Squashed 'external/ircolib/' changes from ce3cd726c..de6e324bd

de6e324bd separate emu thread
10d3daf86 Roms List improvements
95d202f37 Let's make the rom list process on a separate thread so the emulator doesnt take ages to load.
fc306967f Wow the ROM Header was just completely busted. Game list view works now
bad1691ee fuck this shit
2b59e5f46 game list in progress
d26417b83 remappable inputs in progress
ac4af8106 input
e72abc240 update readme
430139dc9 Qt6 frontend
3080d4d45 Fix this small bug too
08cd13b85 Cop0 unused functions do not actually pose a threat (as per manual). They don't do anything, so shall we.
61bb4fb44 make idle loop detection a little more specific with where the load goes
b037de4c3 SAZDFsdff
12e81e73e need to figure out why n64-systemtest loops indefinitely at some address that appears to be valid (i think it's me not invalidating the cache properly)
204f0e13b idle skipping seems to work!
cb8bb634a sdkfjlasdf
58e5c89c1 Fix compilation issue on my machine (no idea)
24fb2898e attempting more serious idle skipping
214719577 Place rsp.Step inside cached interpreter. Gains about 3 more fps
bb97dcc23 mmmmm
920b77d38 wjkhasdfjhkasdf
430ccdab4 it's a start...
4f42a673a Cached interpreter plays Mario 64. Start looking into RSP as well
c9a030787 idle skipping works!
5fbda03ce new idea
366637aba Idle skipping... maybe?
609fa2fb0 Cache instructions implemented but broken lmao. Commented out for now
e140a6d12 - Stop using inheritance for CPU, instead use composition. - Introduce KAIZEN_JIT_ENABLED optional define instead of relying on __aarch64__ and the like. - More cache work
68e613057 prep cache impl
811b4d809 fix clang format
fda755f7d idk
d5024ebbf small MI refactor in preparation of (eventually) implementing the RDRAM interface properly
694b45341 Merge commit '206dcdedf195fb320913584180edb12c7731e396' as 'external/SDL'
206dcdedf Squashed 'external/SDL/' content from commit 4d17b99d0a
4d16e1cb4 need to update sdl
848b19920 Fix compilation error
db61b5299 Merge commit 'e94a94559f28e49678fbcf72199a5258137b0fe9' as 'external/imgui'
e94a94559 Squashed 'external/imgui/' content from commit 02e9b8cac
52edb3757 need to update imgui
c1a705e86 Emulate weird JALR behaviour
4b4c32f4b Fix exception for "unusable COP1" in 4 instructions i missed accidentally (again)
df5828142 Bug putting 0s in the log everywhere
f8b580048 Make isviewer a sink to file
8241e9735 Fix exception for "unusable COP1" in 4 instructions i missed accidentally
b29715f20 small changes
d9a620bc1 make use of my new small utility library
0d1aa938e Add 'external/ircolib/' from commit 'ce3cd726c8df8388d554abf8bb55d55020eb4450'
e64eb40b3 Fuck git

git-subtree-dir: external/ircolib
git-subtree-split: de6e324bde
This commit is contained in:
2026-06-15 11:56:38 +02:00
parent ce3cd726c8
commit 00cc9309cb
4479 changed files with 2943227 additions and 7 deletions
@@ -0,0 +1,528 @@
/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "query_pool.hpp"
#include "device.hpp"
#include <utility>
namespace Vulkan
{
static const char *storage_to_str(VkPerformanceCounterStorageKHR storage)
{
switch (storage)
{
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR:
return "float32";
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR:
return "float64";
case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR:
return "int32";
case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR:
return "int64";
case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR:
return "uint32";
case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR:
return "uint64";
default:
return "???";
}
}
static const char *scope_to_str(VkPerformanceCounterScopeKHR scope)
{
switch (scope)
{
case VK_QUERY_SCOPE_COMMAND_BUFFER_KHR:
return "command buffer";
case VK_QUERY_SCOPE_RENDER_PASS_KHR:
return "render pass";
case VK_QUERY_SCOPE_COMMAND_KHR:
return "command";
default:
return "???";
}
}
static const char *unit_to_str(VkPerformanceCounterUnitKHR unit)
{
switch (unit)
{
case VK_PERFORMANCE_COUNTER_UNIT_AMPS_KHR:
return "A";
case VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR:
return "bytes";
case VK_PERFORMANCE_COUNTER_UNIT_BYTES_PER_SECOND_KHR:
return "bytes / second";
case VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR:
return "cycles";
case VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR:
return "units";
case VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR:
return "Hz";
case VK_PERFORMANCE_COUNTER_UNIT_KELVIN_KHR:
return "K";
case VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR:
return "ns";
case VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR:
return "%";
case VK_PERFORMANCE_COUNTER_UNIT_VOLTS_KHR:
return "V";
case VK_PERFORMANCE_COUNTER_UNIT_WATTS_KHR:
return "W";
default:
return "???";
}
}
void PerformanceQueryPool::log_available_counters(const VkPerformanceCounterKHR *counters,
const VkPerformanceCounterDescriptionKHR *descs,
uint32_t count)
{
for (uint32_t i = 0; i < count; i++)
{
LOGI(" %s: %s\n", descs[i].name, descs[i].description);
LOGI(" Storage: %s\n", storage_to_str(counters[i].storage));
LOGI(" Scope: %s\n", scope_to_str(counters[i].scope));
LOGI(" Unit: %s\n", unit_to_str(counters[i].unit));
}
}
void PerformanceQueryPool::init_device(Device *device_, uint32_t queue_family_index_)
{
device = device_;
queue_family_index = queue_family_index_;
if (!device->get_device_features().performance_query_features.performanceCounterQueryPools)
return;
uint32_t num_counters = 0;
if (vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
device->get_physical_device(),
queue_family_index,
&num_counters,
nullptr, nullptr) != VK_SUCCESS)
{
LOGE("Failed to enumerate performance counters.\n");
return;
}
counters.resize(num_counters, { VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_KHR });
counter_descriptions.resize(num_counters, { VK_STRUCTURE_TYPE_PERFORMANCE_COUNTER_DESCRIPTION_KHR });
if (vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
device->get_physical_device(),
queue_family_index,
&num_counters,
counters.data(), counter_descriptions.data()) != VK_SUCCESS)
{
LOGE("Failed to enumerate performance counters.\n");
return;
}
}
PerformanceQueryPool::~PerformanceQueryPool()
{
if (pool)
device->get_device_table().vkDestroyQueryPool(device->get_device(), pool, nullptr);
}
void PerformanceQueryPool::begin_command_buffer(VkCommandBuffer cmd)
{
if (!pool)
return;
auto &table = device->get_device_table();
table.vkResetQueryPoolEXT(device->get_device(), pool, 0, 1);
table.vkCmdBeginQuery(cmd, pool, 0, 0);
VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT;
table.vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, 1, &barrier, 0, nullptr, 0, nullptr);
}
void PerformanceQueryPool::end_command_buffer(VkCommandBuffer cmd)
{
if (!pool)
return;
auto &table = device->get_device_table();
VkMemoryBarrier barrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT;
table.vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, 1, &barrier, 0, nullptr, 0, nullptr);
table.vkCmdEndQuery(cmd, pool, 0);
}
void PerformanceQueryPool::report()
{
if (pool == VK_NULL_HANDLE)
{
LOGE("No query pool is set up.\n");
return;
}
auto &table = device->get_device_table();
if (table.vkGetQueryPoolResults(device->get_device(), pool,
0, 1,
results.size() * sizeof(VkPerformanceCounterResultKHR),
results.data(),
sizeof(VkPerformanceCounterResultKHR),
VK_QUERY_RESULT_WAIT_BIT) != VK_SUCCESS)
{
LOGE("Getting performance counters did not succeed.\n");
}
size_t num_counters = results.size();
LOGI("\n=== Profiling result ===\n");
for (size_t i = 0; i < num_counters; i++)
{
auto &counter = counters[active_indices[i]];
auto &desc = counter_descriptions[active_indices[i]];
switch (counter.storage)
{
case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR:
LOGI(" %s (%s): %d %s\n", desc.name, desc.description, results[i].int32, unit_to_str(counter.unit));
break;
case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR:
LOGI(" %s (%s): %lld %s\n", desc.name, desc.description, static_cast<long long>(results[i].int64), unit_to_str(counter.unit));
break;
case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR:
LOGI(" %s (%s): %u %s\n", desc.name, desc.description, results[i].uint32, unit_to_str(counter.unit));
break;
case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR:
LOGI(" %s (%s): %llu %s\n", desc.name, desc.description, static_cast<long long>(results[i].uint64), unit_to_str(counter.unit));
break;
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR:
LOGI(" %s (%s): %g %s\n", desc.name, desc.description, results[i].float32, unit_to_str(counter.unit));
break;
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR:
LOGI(" %s (%s): %g %s\n", desc.name, desc.description, results[i].float64, unit_to_str(counter.unit));
break;
default:
break;
}
}
LOGI("================================\n\n");
}
uint32_t PerformanceQueryPool::get_num_counters() const
{
return uint32_t(counters.size());
}
const VkPerformanceCounterKHR *PerformanceQueryPool::get_available_counters() const
{
return counters.data();
}
const VkPerformanceCounterDescriptionKHR *PerformanceQueryPool::get_available_counter_descs() const
{
return counter_descriptions.data();
}
bool PerformanceQueryPool::init_counters(const std::vector<std::string> &counter_names)
{
if (!device->get_device_features().performance_query_features.performanceCounterQueryPools)
{
LOGE("Device does not support VK_KHR_performance_query.\n");
return false;
}
if (!device->get_device_features().vk12_features.hostQueryReset)
{
LOGE("Device does not support host query reset.\n");
return false;
}
auto &table = device->get_device_table();
if (pool)
table.vkDestroyQueryPool(device->get_device(), pool, nullptr);
pool = VK_NULL_HANDLE;
VkQueryPoolPerformanceCreateInfoKHR performance_info = { VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR };
VkQueryPoolCreateInfo info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
info.pNext = &performance_info;
info.queryType = VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR;
info.queryCount = 1;
active_indices.clear();
for (auto &name : counter_names)
{
auto itr = find_if(begin(counter_descriptions), end(counter_descriptions), [&](const VkPerformanceCounterDescriptionKHR &desc) {
return name == desc.name;
});
if (itr != end(counter_descriptions))
{
LOGI("Found counter %s: %s\n", itr->name, itr->description);
active_indices.push_back(itr - begin(counter_descriptions));
}
}
if (active_indices.empty())
{
LOGW("No performance counters were enabled.\n");
return false;
}
performance_info.queueFamilyIndex = queue_family_index;
performance_info.counterIndexCount = active_indices.size();
performance_info.pCounterIndices = active_indices.data();
results.resize(active_indices.size());
uint32_t num_passes = 0;
vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(device->get_physical_device(),
&performance_info, &num_passes);
if (num_passes != 1)
{
LOGE("Implementation requires %u passes to query performance counters. Cannot create query pool.\n",
num_passes);
return false;
}
if (table.vkCreateQueryPool(device->get_device(), &info, nullptr, &pool) != VK_SUCCESS)
{
LOGE("Failed to create performance query pool.\n");
return false;
}
return true;
}
QueryPool::QueryPool(Device *device_)
: device(device_)
, table(device_->get_device_table())
{
supports_timestamp = device->get_gpu_properties().limits.timestampComputeAndGraphics &&
device->get_device_features().vk12_features.hostQueryReset;
// Ignore timestampValidBits and friends for now.
if (supports_timestamp)
add_pool();
}
QueryPool::~QueryPool()
{
for (auto &pool : pools)
table.vkDestroyQueryPool(device->get_device(), pool.pool, nullptr);
}
void QueryPool::begin()
{
for (unsigned i = 0; i <= pool_index; i++)
{
if (i >= pools.size())
continue;
auto &pool = pools[i];
if (pool.index == 0)
continue;
table.vkGetQueryPoolResults(device->get_device(), pool.pool,
0, pool.index,
pool.index * sizeof(uint64_t),
pool.query_results.data(),
sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
for (unsigned j = 0; j < pool.index; j++)
pool.cookies[j]->signal_timestamp_ticks(pool.query_results[j]);
table.vkResetQueryPool(device->get_device(), pool.pool, 0, pool.index);
}
pool_index = 0;
for (auto &pool : pools)
pool.index = 0;
}
void QueryPool::add_pool()
{
VkQueryPoolCreateInfo pool_info = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO };
pool_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
pool_info.queryCount = 64;
Pool pool;
table.vkCreateQueryPool(device->get_device(), &pool_info, nullptr, &pool.pool);
pool.size = pool_info.queryCount;
pool.index = 0;
pool.query_results.resize(pool.size);
pool.cookies.resize(pool.size);
table.vkResetQueryPool(device->get_device(), pool.pool, 0, pool.size);
pools.push_back(std::move(pool));
}
QueryPoolHandle QueryPool::write_timestamp(VkCommandBuffer cmd, VkPipelineStageFlags2 stage)
{
if (!supports_timestamp)
{
LOGI("Timestamps are not supported on this implementation.\n");
return {};
}
VK_ASSERT((stage & (stage - 1)) == 0);
if (pools[pool_index].index >= pools[pool_index].size)
pool_index++;
if (pool_index >= pools.size())
add_pool();
auto &pool = pools[pool_index];
auto cookie = QueryPoolHandle(device->handle_pool.query.allocate(device, true));
pool.cookies[pool.index] = cookie;
if (device->get_device_features().vk13_features.synchronization2)
table.vkCmdWriteTimestamp2(cmd, stage, pool.pool, pool.index);
else
{
table.vkCmdWriteTimestamp(cmd, static_cast<VkPipelineStageFlagBits>(convert_vk_src_stage2(stage)),
pool.pool, pool.index);
}
pool.index++;
return cookie;
}
void QueryPoolResultDeleter::operator()(QueryPoolResult *query)
{
query->device->handle_pool.query.free(query);
}
void TimestampInterval::mark_end_of_frame_context()
{
if (total_time > 0.0)
total_frame_iterations++;
}
uint64_t TimestampInterval::get_total_accumulations() const
{
return total_accumulations;
}
uint64_t TimestampInterval::get_total_frame_iterations() const
{
return total_frame_iterations;
}
double TimestampInterval::get_total_time() const
{
return total_time;
}
void TimestampInterval::accumulate_time(double t)
{
total_time += t;
total_accumulations++;
}
double TimestampInterval::get_time_per_iteration() const
{
if (total_frame_iterations)
return total_time / double(total_frame_iterations);
else
return 0.0;
}
double TimestampInterval::get_time_per_accumulation() const
{
if (total_accumulations)
return total_time / double(total_accumulations);
else
return 0.0;
}
const std::string &TimestampInterval::get_tag() const
{
return tag;
}
void TimestampInterval::reset()
{
total_time = 0.0;
total_accumulations = 0;
total_frame_iterations = 0;
}
TimestampInterval::TimestampInterval(std::string tag_)
: tag(std::move(tag_))
{
}
TimestampInterval *TimestampIntervalManager::get_timestamp_tag(const char *tag)
{
Util::Hasher h;
h.string(tag);
return timestamps.emplace_yield(h.get(), tag);
}
void TimestampIntervalManager::mark_end_of_frame_context()
{
for (auto &timestamp : timestamps)
timestamp.mark_end_of_frame_context();
}
void TimestampIntervalManager::reset()
{
for (auto &timestamp : timestamps)
timestamp.reset();
}
void TimestampIntervalManager::log_simple(const TimestampIntervalReportCallback &func) const
{
for (auto &timestamp : timestamps)
{
if (timestamp.get_total_frame_iterations())
{
TimestampIntervalReport report = {};
report.time_per_accumulation = timestamp.get_time_per_accumulation();
report.time_per_frame_context = timestamp.get_time_per_iteration();
report.accumulations_per_frame_context =
double(timestamp.get_total_accumulations()) / double(timestamp.get_total_frame_iterations());
if (func)
{
func(timestamp.get_tag(), report);
}
else
{
LOGI("Timestamp tag report: %s\n", timestamp.get_tag().c_str());
LOGI(" %.3f ms / iteration\n", 1000.0 * report.time_per_accumulation);
LOGI(" %.3f ms / frame context\n", 1000.0 * report.time_per_frame_context);
LOGI(" %.3f iterations / frame context\n", report.accumulations_per_frame_context);
}
}
}
}
}