2201a02272
git-subtree-dir: external/xbyak git-subtree-split: 431abd865e70a46d56f5aa0e1f87572decb60169
237 lines
6.5 KiB
C++
237 lines
6.5 KiB
C++
/*
|
|
* Test program for Xbyak CPU Cache Topology API
|
|
* Demonstrates the CpuTopology, CpuCache, LogicalCpu, and CpuMask classes
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <map>
|
|
#include <vector>
|
|
#include "xbyak/xbyak_util.h"
|
|
|
|
using namespace Xbyak::util;
|
|
|
|
void printSeparator()
|
|
{
|
|
printf("========================================\n");
|
|
}
|
|
|
|
void printSystemTopology(const CpuTopology& cpuTopo)
|
|
{
|
|
printSeparator();
|
|
printf("CpuTopology Class - System CPU topology\n");
|
|
printSeparator();
|
|
|
|
printf("System Configuration:\n");
|
|
printf(" Logical CPUs: %zu\n", cpuTopo.getLogicalCpuNum());
|
|
printf(" Physical Cores: %zu\n", cpuTopo.getPhysicalCoreNum());
|
|
printf(" Cache Line Size:%u bytes\n", cpuTopo.getLineSize());
|
|
printf(" Hybrid System: %s\n", cpuTopo.isHybrid() ? "Yes (P-cores + E-cores)" : "No");
|
|
printf("\n");
|
|
}
|
|
|
|
void printLogicalCpuDetails(const CpuTopology& cpuTopo)
|
|
{
|
|
printSeparator();
|
|
printf("LogicalCpu Class - Per-CPU topology information\n");
|
|
printSeparator();
|
|
|
|
printf("Detailed CPU Topology (showing upto 32 Logical CPUs):\n");
|
|
size_t maxCpusToPrint = 32;
|
|
size_t numCpus = cpuTopo.getLogicalCpuNum();
|
|
|
|
for (size_t i = 0; i < numCpus && i < maxCpusToPrint; i++) {
|
|
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(i);
|
|
printf(" CPU %3zu: Core=%u Type=%s Siblings=", i, logCpu.coreId, getCoreTypeStr(logCpu.coreType));
|
|
logCpu.getSiblings().put();
|
|
}
|
|
|
|
if (numCpus > maxCpusToPrint) {
|
|
printf(" ... (%zu more CPUs not shown)\n", numCpus - maxCpusToPrint);
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
// Print cache size in appropriate unit (MB, KB, or B)
|
|
void printCacheSize(uint32_t size)
|
|
{
|
|
if (size >= 1024 * 1024) {
|
|
printf("%.2f MB", size / (1024.0 * 1024.0));
|
|
} else if (size >= 1024) {
|
|
printf("%.2f KB", size / 1024.0);
|
|
} else {
|
|
printf("%u B", size);
|
|
}
|
|
}
|
|
|
|
// Comparator to group CPUs by their cache topology
|
|
struct CacheTopologyComparator {
|
|
const CpuTopology& cpuTopo;
|
|
|
|
CacheTopologyComparator(const CpuTopology& topo) : cpuTopo(topo) {}
|
|
|
|
bool operator()(size_t cpu1, size_t cpu2) const {
|
|
const LogicalCpu& logi1 = cpuTopo.getLogicalCpu(cpu1);
|
|
const LogicalCpu& logi2 = cpuTopo.getLogicalCpu(cpu2);
|
|
|
|
// Sort by core type (E-core before P-core)
|
|
if (logi1.coreType != logi2.coreType) return logi1.coreType > logi2.coreType;
|
|
|
|
// Compare cache properties
|
|
for (int cType = L1i; cType < CACHE_TYPE_NUM; cType++) {
|
|
const CpuCache& cache1 = cpuTopo.getCache(cpu1, (CacheType)cType);
|
|
const CpuCache& cache2 = cpuTopo.getCache(cpu2, (CacheType)cType);
|
|
|
|
if (cache1.size != cache2.size) return cache1.size < cache2.size;
|
|
if (cache1.associativity != cache2.associativity) return cache1.associativity < cache2.associativity;
|
|
size_t num1 = cache1.getSharedCpuNum();
|
|
size_t num2 = cache2.getSharedCpuNum();
|
|
if (num1 != num2) return num1 < num2;
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
|
|
typedef std::map<size_t, CpuMask, CacheTopologyComparator> TopologyGroupMap;
|
|
|
|
// Group CPUs by their cache topology
|
|
TopologyGroupMap groupCpusByTopology(const CpuTopology& cpuTopo)
|
|
{
|
|
TopologyGroupMap group((CacheTopologyComparator(cpuTopo)));
|
|
|
|
for (uint32_t cpuIdx = 0; cpuIdx < cpuTopo.getLogicalCpuNum(); cpuIdx++) {
|
|
group[cpuIdx].append(cpuIdx);
|
|
}
|
|
return group;
|
|
}
|
|
|
|
void printCacheHierarchy(const CpuTopology& cpuTopo, const TopologyGroupMap& group)
|
|
{
|
|
printSeparator();
|
|
printf("CpuCache Class - Cache hierarchy and sharing\n");
|
|
printSeparator();
|
|
|
|
// Print each unique cache topology group
|
|
printf("Cache Hierarchy by Topology:\n");
|
|
|
|
for (TopologyGroupMap::const_iterator it = group.begin(); it != group.end(); ++it) {
|
|
|
|
const CpuMask& cpus = it->second;
|
|
if (cpus.empty()) continue;
|
|
|
|
size_t firstCpu = cpus.get(0);
|
|
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);
|
|
|
|
// Print core type and CPU list
|
|
printf("\n%s CPUs ", getCoreTypeStr(logCpu.coreType));
|
|
cpus.put();
|
|
|
|
// Print cache details for this topology
|
|
for (int cType = 0; cType < CACHE_TYPE_NUM; cType++) {
|
|
const CpuCache& cache = logCpu.cache[cType];
|
|
if (cache.size > 0) {
|
|
printf(" %s: ", getCacheTypeStr(cType));
|
|
printCacheSize(cache.size);
|
|
printf(" | %2u-way", cache.associativity);
|
|
if (cache.isShared()) {
|
|
printf(" | Shared by %zu CPUs", cache.getSharedCpuNum());
|
|
}
|
|
printf("\n");
|
|
}
|
|
}
|
|
}
|
|
printf("\n");
|
|
}
|
|
|
|
void printCacheSharingDetails(const CpuTopology& cpuTopo, const TopologyGroupMap& group)
|
|
{
|
|
printSeparator();
|
|
printf("Cache Sharing Analysis\n");
|
|
printSeparator();
|
|
|
|
// Print cache sharing analysis for each unique topology
|
|
for (TopologyGroupMap::const_iterator it = group.begin(); it != group.end(); ++it) {
|
|
|
|
const CpuMask& cpus = it->second;
|
|
if (cpus.empty()) continue;
|
|
|
|
size_t firstCpu = cpus.get(0);
|
|
const LogicalCpu& logCpu = cpuTopo.getLogicalCpu(firstCpu);
|
|
|
|
printf("%s Topology (representative CPU %zu):\n", getCoreTypeStr(logCpu.coreType), firstCpu);
|
|
|
|
// Analyze each cache level
|
|
for (int cType = 0; cType < CACHE_TYPE_NUM; cType++) {
|
|
const CpuCache& cache = logCpu.cache[cType];
|
|
if (cache.size > 0) {
|
|
printf(" %s Cache:\n", getCacheTypeStr(cType));
|
|
printf(" Size: ");
|
|
printCacheSize(cache.size);
|
|
printf("\n");
|
|
|
|
if (cache.isShared()) {
|
|
printf(" Shared by %zu CPUs: ", cache.getSharedCpuNum());
|
|
cache.sharedCpuIndices.put();
|
|
} else {
|
|
printf(" Private (not shared)\n");
|
|
}
|
|
}
|
|
}
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
void printSmallSample(const CpuTopology& cpuTopo)
|
|
{
|
|
printf("logical CPU num %zu %s\n", cpuTopo.getLogicalCpuNum(), cpuTopo.isHybrid() ? "hybrid" : "");
|
|
if (!cpuTopo.isHybrid()) {
|
|
cpuTopo.getLogicalCpu(0).put();
|
|
return;
|
|
}
|
|
bool foundEcore = false;
|
|
bool foundPcore = false;
|
|
for (size_t i = 0; i < cpuTopo.getLogicalCpuNum(); i++) {
|
|
const LogicalCpu& logi = cpuTopo.getLogicalCpu(i);
|
|
if (!foundEcore && logi.coreType == Efficient) {
|
|
logi.put();
|
|
foundEcore = true;
|
|
continue;
|
|
}
|
|
if (!foundPcore && logi.coreType == Performance) {
|
|
logi.put();
|
|
foundPcore = true;
|
|
continue;
|
|
}
|
|
if (foundEcore && foundPcore) return;
|
|
}
|
|
}
|
|
|
|
int main()
|
|
try
|
|
{
|
|
printf("\n");
|
|
printf("Xbyak CPU Cache Topology API Test\n");
|
|
printf("==================================\n");
|
|
printf("\n");
|
|
|
|
Cpu cpu;
|
|
CpuTopology cpuTopo(cpu);
|
|
|
|
const TopologyGroupMap group = groupCpusByTopology(cpuTopo);
|
|
|
|
printSystemTopology(cpuTopo);
|
|
printLogicalCpuDetails(cpuTopo);
|
|
printCacheHierarchy(cpuTopo, group);
|
|
printCacheSharingDetails(cpuTopo, group);
|
|
|
|
printSeparator();
|
|
printf("All tests completed successfully!\n");
|
|
printSeparator();
|
|
printf("\n");
|
|
printSeparator();
|
|
printSmallSample(cpuTopo);
|
|
} catch (std::exception& e) {
|
|
printf("Error: %s\n", e.what());
|
|
return 1;
|
|
}
|