Squashed 'external/xbyak/' content from commit 431abd86
git-subtree-dir: external/xbyak git-subtree-split: 431abd865e70a46d56f5aa0e1f87572decb60169
This commit is contained in:
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
* Test program for Xbyak CPU Cache Topology API
|
||||
* Demonstrates the CpuTopology, CpuCache, LogicalCpu, and CpuMask classes
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "xbyak/xbyak_util.h"
|
||||
|
||||
using namespace Xbyak::util;
|
||||
|
||||
void printSeparator()
|
||||
{
|
||||
printf("========================================\n");
|
||||
}
|
||||
|
||||
void printSystemTopology(const CpuTopology& cpuTopo)
|
||||
{
|
||||
printSeparator();
|
||||
printf("CpuTopology Class - System CPU topology\n");
|
||||
printSeparator();
|
||||
|
||||
printf("System Configuration:\n");
|
||||
printf(" Logical CPUs: %zu\n", cpuTopo.getLogicalCpuNum());
|
||||
printf(" Physical Cores: %zu\n", cpuTopo.getPhysicalCoreNum());
|
||||
printf(" Cache Line Size:%u bytes\n", cpuTopo.getLineSize());
|
||||
printf(" Hybrid System: %s\n", cpuTopo.isHybrid() ? "Yes (P-cores + E-cores)" : "No");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void printLogicalCpuDetails(const CpuTopology& cpuTopo)
|
||||
{
|
||||
printSeparator();
|
||||
printf("LogicalCpu Class - Per-CPU topology information\n");
|
||||
printSeparator();
|
||||
|
||||
printf("Detailed CPU Topology (showing upto 32 Logical CPUs):\n");
|
||||
size_t maxCpusToPrint = 32;
|
||||
size_t numCpus = cpuTopo.getLogicalCpuNum();
|
||||
|
||||
for (size_t i = 0; i < numCpus && i < maxCpusToPrint; i++) {
|
||||
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(i);
|
||||
printf(" CPU %3zu: Core=%u Type=%s Siblings=", i, logCpu.coreId, getCoreTypeStr(logCpu.coreType));
|
||||
logCpu.getSiblings().put();
|
||||
}
|
||||
|
||||
if (numCpus > maxCpusToPrint) {
|
||||
printf(" ... (%zu more CPUs not shown)\n", numCpus - maxCpusToPrint);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Print cache size in appropriate unit (MB, KB, or B)
|
||||
void printCacheSize(uint32_t size)
|
||||
{
|
||||
if (size >= 1024 * 1024) {
|
||||
printf("%.2f MB", size / (1024.0 * 1024.0));
|
||||
} else if (size >= 1024) {
|
||||
printf("%.2f KB", size / 1024.0);
|
||||
} else {
|
||||
printf("%u B", size);
|
||||
}
|
||||
}
|
||||
|
||||
// Comparator to group CPUs by their cache topology
|
||||
struct CacheTopologyComparator {
|
||||
const CpuTopology& cpuTopo;
|
||||
|
||||
CacheTopologyComparator(const CpuTopology& topo) : cpuTopo(topo) {}
|
||||
|
||||
bool operator()(size_t cpu1, size_t cpu2) const {
|
||||
const LogicalCpu& logi1 = cpuTopo.getLogicalCpu(cpu1);
|
||||
const LogicalCpu& logi2 = cpuTopo.getLogicalCpu(cpu2);
|
||||
|
||||
// Sort by core type (E-core before P-core)
|
||||
if (logi1.coreType != logi2.coreType) return logi1.coreType > logi2.coreType;
|
||||
|
||||
// Compare cache properties
|
||||
for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
|
||||
const CpuCache& cache1 = cpuTopo.getCache(cpu1, (CacheType)cType);
|
||||
const CpuCache& cache2 = cpuTopo.getCache(cpu2, (CacheType)cType);
|
||||
|
||||
if (cache1.size != cache2.size) return cache1.size < cache2.size;
|
||||
if (cache1.associativity != cache2.associativity) return cache1.associativity < cache2.associativity;
|
||||
size_t num1 = cache1.getSharedCpuNum();
|
||||
size_t num2 = cache2.getSharedCpuNum();
|
||||
if (num1 != num2) return num1 < num2;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::map<size_t, CpuMask, CacheTopologyComparator> TopologyGroupMap;
|
||||
|
||||
// Group CPUs by their cache topology
|
||||
TopologyGroupMap groupCpusByTopology(const CpuTopology& cpuTopo)
|
||||
{
|
||||
TopologyGroupMap group((CacheTopologyComparator(cpuTopo)));
|
||||
|
||||
for (uint32_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
|
||||
group[cpuIdx].append(cpuIdx);
|
||||
}
|
||||
return group;
|
||||
}
|
||||
|
||||
void printCacheHierarchy(const CpuTopology& cpuTopo, const TopologyGroupMap& group)
|
||||
{
|
||||
printSeparator();
|
||||
printf("CpuCache Class - Cache hierarchy and sharing\n");
|
||||
printSeparator();
|
||||
|
||||
// Print each unique cache topology group
|
||||
printf("Cache Hierarchy by Topology:\n");
|
||||
|
||||
for (TopologyGroupMap::const_iterator it = group.begin(); it != group.end(); ++it) {
|
||||
|
||||
const CpuMask& cpus = it->second;
|
||||
if (cpus.empty()) continue;
|
||||
|
||||
size_t firstCpu = cpus.get(0);
|
||||
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);
|
||||
|
||||
// Print core type and CPU list
|
||||
printf("\n%s CPUs ", getCoreTypeStr(logCpu.coreType));
|
||||
cpus.put();
|
||||
|
||||
// Print cache details for this topology
|
||||
for (int cType = 0; cType < CACHE_TYPE_NUM; cType++) {
|
||||
const CpuCache& cache = logCpu.cache[cType];
|
||||
if (cache.size > 0) {
|
||||
printf(" %s: ", getCacheTypeStr(cType));
|
||||
printCacheSize(cache.size);
|
||||
printf(" | %2u-way", cache.associativity);
|
||||
if (cache.isShared()) {
|
||||
printf(" | Shared by %zu CPUs", cache.getSharedCpuNum());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void printCacheSharingDetails(const CpuTopology& cpuTopo, const TopologyGroupMap& group)
|
||||
{
|
||||
printSeparator();
|
||||
printf("Cache Sharing Analysis\n");
|
||||
printSeparator();
|
||||
|
||||
// Print cache sharing analysis for each unique topology
|
||||
for (TopologyGroupMap::const_iterator it = group.begin(); it != group.end(); ++it) {
|
||||
|
||||
const CpuMask& cpus = it->second;
|
||||
if (cpus.empty()) continue;
|
||||
|
||||
size_t firstCpu = cpus.get(0);
|
||||
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);
|
||||
|
||||
printf("%s Topology (representative CPU %zu):\n", getCoreTypeStr(logCpu.coreType), firstCpu);
|
||||
|
||||
// Analyze each cache level
|
||||
for (int cType = 0; cType < CACHE_TYPE_NUM; cType++) {
|
||||
const CpuCache& cache = logCpu.cache[cType];
|
||||
if (cache.size > 0) {
|
||||
printf(" %s Cache:\n", getCacheTypeStr(cType));
|
||||
printf(" Size: ");
|
||||
printCacheSize(cache.size);
|
||||
printf("\n");
|
||||
|
||||
if (cache.isShared()) {
|
||||
printf(" Shared by %zu CPUs: ", cache.getSharedCpuNum());
|
||||
cache.sharedCpuIndices.put();
|
||||
} else {
|
||||
printf(" Private (not shared)\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
void printSmallSample(const CpuTopology& cpuTopo)
|
||||
{
|
||||
printf("logical CPU num %zu %s\n", cpuTopo.getLogicalCpuNum(), cpuTopo.isHybrid() ? "hybrid" : "");
|
||||
if (!cpuTopo.isHybrid()) {
|
||||
cpuTopo.getLogicalCpu(0).put();
|
||||
return;
|
||||
}
|
||||
bool foundEcore = false;
|
||||
bool foundPcore = false;
|
||||
for (size_t i = 0; i < cpuTopo.getLogicalCpuNum(); i++) {
|
||||
const LogicalCpu& logi = cpuTopo.getLogicalCpu(i);
|
||||
if (!foundEcore && logi.coreType == Efficient) {
|
||||
logi.put();
|
||||
foundEcore = true;
|
||||
continue;
|
||||
}
|
||||
if (!foundPcore && logi.coreType == Performance) {
|
||||
logi.put();
|
||||
foundPcore = true;
|
||||
continue;
|
||||
}
|
||||
if (foundEcore && foundPcore) return;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
try
|
||||
{
|
||||
printf("\n");
|
||||
printf("Xbyak CPU Cache Topology API Test\n");
|
||||
printf("==================================\n");
|
||||
printf("\n");
|
||||
|
||||
Cpu cpu;
|
||||
CpuTopology cpuTopo(cpu);
|
||||
|
||||
const TopologyGroupMap group = groupCpusByTopology(cpuTopo);
|
||||
|
||||
printSystemTopology(cpuTopo);
|
||||
printLogicalCpuDetails(cpuTopo);
|
||||
printCacheHierarchy(cpuTopo, group);
|
||||
printCacheSharingDetails(cpuTopo, group);
|
||||
|
||||
printSeparator();
|
||||
printf("All tests completed successfully!\n");
|
||||
printSeparator();
|
||||
printf("\n");
|
||||
printSeparator();
|
||||
printSmallSample(cpuTopo);
|
||||
} catch (std::exception& e) {
|
||||
printf("Error: %s\n", e.what());
|
||||
return 1;
|
||||
}
|
||||
Reference in New Issue
Block a user