/* * Test program for Xbyak CPU Cache Topology API * Demonstrates the CpuTopology, CpuCache, LogicalCpu, and CpuMask classes */ #include #include #include #include #include "xbyak/xbyak_util.h" using namespace Xbyak::util; void printSeparator() { printf("========================================\n"); } void printSystemTopology(const CpuTopology& cpuTopo) { printSeparator(); printf("CpuTopology Class - System CPU topology\n"); printSeparator(); printf("System Configuration:\n"); printf(" Logical CPUs: %zu\n", cpuTopo.getLogicalCpuNum()); printf(" Physical Cores: %zu\n", cpuTopo.getPhysicalCoreNum()); printf(" Cache Line Size:%u bytes\n", cpuTopo.getLineSize()); printf(" Hybrid System: %s\n", cpuTopo.isHybrid() ? "Yes (P-cores + E-cores)" : "No"); printf("\n"); } void printLogicalCpuDetails(const CpuTopology& cpuTopo) { printSeparator(); printf("LogicalCpu Class - Per-CPU topology information\n"); printSeparator(); printf("Detailed CPU Topology (showing upto 32 Logical CPUs):\n"); size_t maxCpusToPrint = 32; size_t numCpus = cpuTopo.getLogicalCpuNum(); for (size_t i = 0; i < numCpus && i < maxCpusToPrint; i++) { const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(i); printf(" CPU %3zu: Core=%u Type=%s Siblings=", i, logCpu.coreId, getCoreTypeStr(logCpu.coreType)); logCpu.getSiblings().put(); } if (numCpus > maxCpusToPrint) { printf(" ... (%zu more CPUs not shown)\n", numCpus - maxCpusToPrint); } printf("\n"); } // Print cache size in appropriate unit (MB, KB, or B) void printCacheSize(uint32_t size) { if (size >= 1024 * 1024) { printf("%.2f MB", size / (1024.0 * 1024.0)); } else if (size >= 1024) { printf("%.2f KB", size / 1024.0); } else { printf("%u B", size); } } // Comparator to group CPUs by their cache topology struct CacheTopologyComparator { const CpuTopology& cpuTopo; CacheTopologyComparator(const CpuTopology& topo) : cpuTopo(topo) {} bool operator()(size_t cpu1, size_t cpu2) const { const LogicalCpu& logi1 = cpuTopo.getLogicalCpu(cpu1); const LogicalCpu& logi2 = cpuTopo.getLogicalCpu(cpu2); // Sort by core type (E-core before P-core) if (logi1.coreType != logi2.coreType) return logi1.coreType > logi2.coreType; // Compare cache properties for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) { const CpuCache& cache1 = cpuTopo.getCache(cpu1, (CacheType)cType); const CpuCache& cache2 = cpuTopo.getCache(cpu2, (CacheType)cType); if (cache1.size != cache2.size) return cache1.size < cache2.size; if (cache1.associativity != cache2.associativity) return cache1.associativity < cache2.associativity; size_t num1 = cache1.getSharedCpuNum(); size_t num2 = cache2.getSharedCpuNum(); if (num1 != num2) return num1 < num2; } return false; } }; typedef std::map TopologyGroupMap; // Group CPUs by their cache topology TopologyGroupMap groupCpusByTopology(const CpuTopology& cpuTopo) { TopologyGroupMap group((CacheTopologyComparator(cpuTopo))); for (uint32_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) { group[cpuIdx].append(cpuIdx); } return group; } void printCacheHierarchy(const CpuTopology& cpuTopo, const TopologyGroupMap& group) { printSeparator(); printf("CpuCache Class - Cache hierarchy and sharing\n"); printSeparator(); // Print each unique cache topology group printf("Cache Hierarchy by Topology:\n"); for (TopologyGroupMap::const_iterator it = group.begin(); it != group.end(); ++it) { const CpuMask& cpus = it->second; if (cpus.empty()) continue; size_t firstCpu = cpus.get(0); const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu); // Print core type and CPU list printf("\n%s CPUs ", getCoreTypeStr(logCpu.coreType)); cpus.put(); // Print cache details for this topology for (int cType = 0; cType < CACHE_TYPE_NUM; cType++) { const CpuCache& cache = logCpu.cache[cType]; if (cache.size > 0) { printf(" %s: ", getCacheTypeStr(cType)); printCacheSize(cache.size); printf(" | %2u-way", cache.associativity); if (cache.isShared()) { printf(" | Shared by %zu CPUs", cache.getSharedCpuNum()); } printf("\n"); } } } printf("\n"); } void printCacheSharingDetails(const CpuTopology& cpuTopo, const TopologyGroupMap& group) { printSeparator(); printf("Cache Sharing Analysis\n"); printSeparator(); // Print cache sharing analysis for each unique topology for (TopologyGroupMap::const_iterator it = group.begin(); it != group.end(); ++it) { const CpuMask& cpus = it->second; if (cpus.empty()) continue; size_t firstCpu = cpus.get(0); const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu); printf("%s Topology (representative CPU %zu):\n", getCoreTypeStr(logCpu.coreType), firstCpu); // Analyze each cache level for (int cType = 0; cType < CACHE_TYPE_NUM; cType++) { const CpuCache& cache = logCpu.cache[cType]; if (cache.size > 0) { printf(" %s Cache:\n", getCacheTypeStr(cType)); printf(" Size: "); printCacheSize(cache.size); printf("\n"); if (cache.isShared()) { printf(" Shared by %zu CPUs: ", cache.getSharedCpuNum()); cache.sharedCpuIndices.put(); } else { printf(" Private (not shared)\n"); } } } printf("\n"); } } void printSmallSample(const CpuTopology& cpuTopo) { printf("logical CPU num %zu %s\n", cpuTopo.getLogicalCpuNum(), cpuTopo.isHybrid() ? "hybrid" : ""); if (!cpuTopo.isHybrid()) { cpuTopo.getLogicalCpu(0).put(); return; } bool foundEcore = false; bool foundPcore = false; for (size_t i = 0; i < cpuTopo.getLogicalCpuNum(); i++) { const LogicalCpu& logi = cpuTopo.getLogicalCpu(i); if (!foundEcore && logi.coreType == Efficient) { logi.put(); foundEcore = true; continue; } if (!foundPcore && logi.coreType == Performance) { logi.put(); foundPcore = true; continue; } if (foundEcore && foundPcore) return; } } int main() try { printf("\n"); printf("Xbyak CPU Cache Topology API Test\n"); printf("==================================\n"); printf("\n"); Cpu cpu; CpuTopology cpuTopo(cpu); const TopologyGroupMap group = groupCpusByTopology(cpuTopo); printSystemTopology(cpuTopo); printLogicalCpuDetails(cpuTopo); printCacheHierarchy(cpuTopo, group); printCacheSharingDetails(cpuTopo, group); printSeparator(); printf("All tests completed successfully!\n"); printSeparator(); printf("\n"); printSeparator(); printSmallSample(cpuTopo); } catch (std::exception& e) { printf("Error: %s\n", e.what()); return 1; }