Files
kaizen/external/parallel-rdp/parallel-rdp-standalone/vulkan/device_fossilize.cpp
T
iris 00cc9309cb Squashed 'external/ircolib/' changes from ce3cd726c..de6e324bd
de6e324bd separate emu thread
10d3daf86 Roms List improvements
95d202f37 Let's make the rom list process on a separate thread so the emulator doesnt take ages to load.
fc306967f Wow the ROM Header was just completely busted. Game list view works now
bad1691ee fuck this shit
2b59e5f46 game list in progress
d26417b83 remappable inputs in progress
ac4af8106 input
e72abc240 update readme
430139dc9 Qt6 frontend
3080d4d45 Fix this small bug too
08cd13b85 Cop0 unused functions do not actually pose a threat (as per manual). They don't do anything, so shall we.
61bb4fb44 make idle loop detection a little more specific with where the load goes
b037de4c3 SAZDFsdff
12e81e73e need to figure out why n64-systemtest loops indefinitely at some address that appears to be valid (i think it's me not invalidating the cache properly)
204f0e13b idle skipping seems to work!
cb8bb634a sdkfjlasdf
58e5c89c1 Fix compilation issue on my machine (no idea)
24fb2898e attempting more serious idle skipping
214719577 Place rsp.Step inside cached interpreter. Gains about 3 more fps
bb97dcc23 mmmmm
920b77d38 wjkhasdfjhkasdf
430ccdab4 it's a start...
4f42a673a Cached interpreter plays Mario 64. Start looking into RSP as well
c9a030787 idle skipping works!
5fbda03ce new idea
366637aba Idle skipping... maybe?
609fa2fb0 Cache instructions implemented but broken lmao. Commented out for now
e140a6d12 - Stop using inheritance for CPU, instead use composition. - Introduce KAIZEN_JIT_ENABLED optional define instead of relying on __aarch64__ and the like. - More cache work
68e613057 prep cache impl
811b4d809 fix clang format
fda755f7d idk
d5024ebbf small MI refactor in preparation of (eventually) implementing the RDRAM interface properly
694b45341 Merge commit '206dcdedf195fb320913584180edb12c7731e396' as 'external/SDL'
206dcdedf Squashed 'external/SDL/' content from commit 4d17b99d0a
4d16e1cb4 need to update sdl
848b19920 Fix compilation error
db61b5299 Merge commit 'e94a94559f28e49678fbcf72199a5258137b0fe9' as 'external/imgui'
e94a94559 Squashed 'external/imgui/' content from commit 02e9b8cac
52edb3757 need to update imgui
c1a705e86 Emulate weird JALR behaviour
4b4c32f4b Fix exception for "unusable COP1" in 4 instructions i missed accidentally (again)
df5828142 Bug putting 0s in the log everywhere
f8b580048 Make isviewer a sink to file
8241e9735 Fix exception for "unusable COP1" in 4 instructions i missed accidentally
b29715f20 small changes
d9a620bc1 make use of my new small utility library
0d1aa938e Add 'external/ircolib/' from commit 'ce3cd726c8df8388d554abf8bb55d55020eb4450'
e64eb40b3 Fuck git

git-subtree-dir: external/ircolib
git-subtree-split: de6e324bde
2026-06-15 11:56:38 +02:00

1037 lines
34 KiB
C++

/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "device_fossilize.hpp"
#include "timer.hpp"
#include "thread_group.hpp"
#include "fossilize_db.hpp"
#include "dynamic_array.hpp"
namespace Vulkan
{
Device::RecorderState::RecorderState()
{
recorder_ready.store(false, std::memory_order_relaxed);
}
Device::RecorderState::~RecorderState()
{
}
Device::ReplayerState::ReplayerState()
{
progress.prepare.store(0, std::memory_order_relaxed);
progress.modules.store(0, std::memory_order_relaxed);
progress.pipelines.store(0, std::memory_order_relaxed);
}
Device::ReplayerState::~ReplayerState()
{
}
void Device::register_sampler(VkSampler sampler, Fossilize::Hash hash, const VkSamplerCreateInfo &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register sampler before recorder is ready.\n");
return;
}
if (!recorder_state->recorder.record_sampler(sampler, info, hash))
LOGW("Failed to register sampler.\n");
}
void Device::register_sampler_ycbcr_conversion(
VkSamplerYcbcrConversion ycbcr, const VkSamplerYcbcrConversionCreateInfo &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register sampler YCbCr conversion before recorder is ready.\n");
return;
}
if (!recorder_state->recorder.record_ycbcr_conversion(ycbcr, info))
LOGW("Failed to register YCbCr conversion.\n");
}
void Device::register_descriptor_set_layout(VkDescriptorSetLayout layout, Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register descriptor set layout before recorder is ready.\n");
return;
}
if (!recorder_state->recorder.record_descriptor_set_layout(layout, info, hash))
LOGW("Failed to register descriptor set layout.\n");
}
void Device::register_pipeline_layout(VkPipelineLayout layout, Fossilize::Hash hash, const VkPipelineLayoutCreateInfo &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register pipeline layout before recorder is ready.\n");
return;
}
if (!recorder_state->recorder.record_pipeline_layout(layout, info, hash))
LOGW("Failed to register pipeline layout.\n");
}
void Device::register_shader_module(VkShaderModule module, Fossilize::Hash hash, const VkShaderModuleCreateInfo &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register shader module before recorder is ready.\n");
return;
}
replayer_state->feature_filter->register_shader_module_info(module, &info);
if (!recorder_state->recorder.record_shader_module(module, info, hash))
LOGW("Failed to register shader module.\n");
}
void Device::register_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register compute pipeline before recorder is ready.\n");
return;
}
if (!recorder_state->recorder.record_compute_pipeline(VK_NULL_HANDLE, info, nullptr, 0, hash))
LOGW("Failed to register compute pipeline.\n");
}
void Device::register_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register graphics pipeline before recorder is ready.\n");
return;
}
if (!recorder_state->recorder.record_graphics_pipeline(VK_NULL_HANDLE, info, nullptr, 0, hash))
LOGW("Failed to register graphics pipeline.\n");
}
void Device::register_render_pass(VkRenderPass render_pass, Fossilize::Hash hash, const VkRenderPassCreateInfo2KHR &info)
{
if (!recorder_state)
return;
if (!recorder_state->recorder_ready.load(std::memory_order_acquire))
{
LOGW("Attempting to register render pass before recorder is ready.\n");
return;
}
if (!recorder_state->recorder.record_render_pass2(render_pass, info, hash))
LOGW("Failed to register render pass.\n");
}
bool Device::enqueue_create_shader_module(Fossilize::Hash hash, const VkShaderModuleCreateInfo *create_info, VkShaderModule *module)
{
if (!replayer_state->feature_filter->shader_module_is_supported(create_info))
{
*module = VK_NULL_HANDLE;
replayer_state->progress.modules.fetch_add(1, std::memory_order_release);
return true;
}
ResourceLayout layout;
// If we know the resource layout already, just reuse that. Avoids spinning up SPIRV-Cross reflection
// and allows us to not even build it for release builds.
if (shader_manager.get_resource_layout_by_shader_hash(hash, layout))
shaders.emplace_yield(hash, hash, this, create_info->pCode, create_info->codeSize, &layout);
else
shaders.emplace_yield(hash, hash, this, create_info->pCode, create_info->codeSize);
// Resolve the handles later.
*module = (VkShaderModule)hash;
replayer_state->progress.modules.fetch_add(1, std::memory_order_release);
return true;
}
bool Device::fossilize_replay_graphics_pipeline(Fossilize::Hash hash, VkGraphicsPipelineCreateInfo &info)
{
int vert_index = -1;
int task_index = -1;
int mesh_index = -1;
int frag_index = -1;
for (uint32_t i = 0; i < info.stageCount; i++)
{
switch (info.pStages[i].stage)
{
case VK_SHADER_STAGE_VERTEX_BIT:
vert_index = int(i);
break;
case VK_SHADER_STAGE_TASK_BIT_EXT:
task_index = int(i);
break;
case VK_SHADER_STAGE_MESH_BIT_EXT:
mesh_index = int(i);
break;
case VK_SHADER_STAGE_FRAGMENT_BIT:
frag_index = int(i);
break;
default:
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return false;
}
}
if (frag_index < 0 || (mesh_index < 0 && vert_index < 0) ||
(mesh_index >= 0 && vert_index >= 0))
{
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return false;
}
auto *vert_shader = vert_index >= 0 ? shaders.find((Fossilize::Hash)info.pStages[vert_index].module) : nullptr;
auto *task_shader = task_index >= 0 ? shaders.find((Fossilize::Hash)info.pStages[task_index].module) : nullptr;
auto *mesh_shader = mesh_index >= 0 ? shaders.find((Fossilize::Hash)info.pStages[mesh_index].module) : nullptr;
auto *frag_shader = shaders.find((Fossilize::Hash)info.pStages[frag_index].module);
if ((!vert_shader && !mesh_shader) || !frag_shader)
{
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return false;
}
Program *ret;
if (mesh_shader)
{
ret = request_program(task_shader, mesh_shader, frag_shader,
reinterpret_cast<const ImmutableSamplerBank *>(info.layout));
}
else
{
ret = request_program(vert_shader, frag_shader,
reinterpret_cast<const ImmutableSamplerBank *>(info.layout));
}
if (ret)
{
// The layout is dummy, resolve it here.
info.layout = ret->get_pipeline_layout()->get_layout();
// Resolve shader modules.
if (vert_index >= 0)
const_cast<VkPipelineShaderStageCreateInfo *>(info.pStages)[vert_index].module = vert_shader->get_module();
if (task_index >= 0)
const_cast<VkPipelineShaderStageCreateInfo *>(info.pStages)[task_index].module = task_shader->get_module();
if (mesh_index >= 0)
const_cast<VkPipelineShaderStageCreateInfo *>(info.pStages)[mesh_index].module = mesh_shader->get_module();
const_cast<VkPipelineShaderStageCreateInfo *>(info.pStages)[frag_index].module = frag_shader->get_module();
}
if (!ret || !replayer_state->feature_filter->graphics_pipeline_is_supported(&info))
{
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return true;
}
#ifdef VULKAN_DEBUG
LOGI("Replaying graphics pipeline.\n");
#endif
uint32_t dynamic_state = 0;
if (info.pDynamicState)
{
for (uint32_t i = 0; i < info.pDynamicState->dynamicStateCount; i++)
{
switch (info.pDynamicState->pDynamicStates[i])
{
case VK_DYNAMIC_STATE_VIEWPORT:
dynamic_state |= COMMAND_BUFFER_DIRTY_VIEWPORT_BIT;
break;
case VK_DYNAMIC_STATE_SCISSOR:
dynamic_state |= COMMAND_BUFFER_DIRTY_SCISSOR_BIT;
break;
case VK_DYNAMIC_STATE_DEPTH_BIAS:
dynamic_state |= COMMAND_BUFFER_DIRTY_DEPTH_BIAS_BIT;
break;
case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
dynamic_state |= COMMAND_BUFFER_DIRTY_STENCIL_REFERENCE_BIT;
break;
default:
break;
}
}
}
VkPipeline pipeline = VK_NULL_HANDLE;
VkResult res = table->vkCreateGraphicsPipelines(device, pipeline_cache, 1, &info, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOGE("Failed to create graphics pipeline!\n");
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return false;
}
auto actual_pipe = ret->add_pipeline(hash, { pipeline, dynamic_state }).pipeline;
if (actual_pipe != pipeline)
table->vkDestroyPipeline(device, pipeline, nullptr);
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return actual_pipe != VK_NULL_HANDLE;
}
bool Device::fossilize_replay_compute_pipeline(Fossilize::Hash hash, VkComputePipelineCreateInfo &info)
{
// Find the Shader* associated with this VkShaderModule and just use that.
auto *shader = shaders.find((Fossilize::Hash)info.stage.module);
if (!shader)
{
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return false;
}
auto *ret = request_program(shader, reinterpret_cast<const ImmutableSamplerBank *>(info.layout));
if (ret)
{
// The layout is dummy, resolve it here.
info.layout = ret->get_pipeline_layout()->get_layout();
// Resolve shader module.
info.stage.module = shader->get_module();
}
if (!ret || !replayer_state->feature_filter->compute_pipeline_is_supported(&info))
{
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return true;
}
#ifdef VULKAN_DEBUG
LOGI("Replaying compute pipeline.\n");
#endif
VkPipeline pipeline = VK_NULL_HANDLE;
VkResult res = table->vkCreateComputePipelines(device, pipeline_cache, 1, &info, nullptr, &pipeline);
if (res != VK_SUCCESS)
{
LOGE("Failed to create compute pipeline!\n");
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return false;
}
auto actual_pipe = ret->add_pipeline(hash, { pipeline, 0 }).pipeline;
if (actual_pipe != pipeline)
table->vkDestroyPipeline(device, pipeline, nullptr);
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return actual_pipe != VK_NULL_HANDLE;
}
bool Device::enqueue_create_graphics_pipeline(Fossilize::Hash hash,
const VkGraphicsPipelineCreateInfo *create_info,
VkPipeline *pipeline)
{
for (uint32_t i = 0; i < create_info->stageCount; i++)
{
if (create_info->pStages[i].module == VK_NULL_HANDLE)
{
*pipeline = VK_NULL_HANDLE;
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return true;
}
}
if (create_info->renderPass == VK_NULL_HANDLE || create_info->layout == VK_NULL_HANDLE)
{
*pipeline = VK_NULL_HANDLE;
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return true;
}
// The lifetime of create_info is tied to the replayer itself.
replayer_state->graphics_pipelines.emplace_back(hash, const_cast<VkGraphicsPipelineCreateInfo *>(create_info));
return true;
}
bool Device::enqueue_create_compute_pipeline(Fossilize::Hash hash,
const VkComputePipelineCreateInfo *create_info,
VkPipeline *pipeline)
{
if (create_info->stage.module == VK_NULL_HANDLE || create_info->layout == VK_NULL_HANDLE)
{
*pipeline = VK_NULL_HANDLE;
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
return true;
}
// The lifetime of create_info is tied to the replayer itself.
replayer_state->compute_pipelines.emplace_back(hash, const_cast<VkComputePipelineCreateInfo *>(create_info));
return true;
}
bool Device::enqueue_create_render_pass(Fossilize::Hash,
const VkRenderPassCreateInfo *,
VkRenderPass *)
{
return false;
}
bool Device::enqueue_create_render_pass2(Fossilize::Hash hash, const VkRenderPassCreateInfo2 *create_info, VkRenderPass *render_pass)
{
if (!replayer_state->feature_filter->render_pass2_is_supported(create_info))
{
render_pass = VK_NULL_HANDLE;
return true;
}
auto *pass = render_passes.emplace_yield(hash, hash, this, *create_info);
*render_pass = pass->get_render_pass();
return true;
}
bool Device::enqueue_create_raytracing_pipeline(
Fossilize::Hash, const VkRayTracingPipelineCreateInfoKHR *, VkPipeline *)
{
return false;
}
bool Device::enqueue_create_sampler(Fossilize::Hash hash, const VkSamplerCreateInfo *info, VkSampler *vk_sampler)
{
if (!replayer_state->feature_filter->sampler_is_supported(info))
{
*vk_sampler = VK_NULL_HANDLE;
return false;
}
const ImmutableYcbcrConversion *ycbcr = nullptr;
if (info->pNext)
{
// YCbCr conversion create infos are replayed inline in Fossilize.
const auto *ycbcr_info = static_cast<const VkSamplerYcbcrConversionCreateInfo *>(info->pNext);
if (ycbcr_info && ycbcr_info->sType == VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO)
ycbcr = request_immutable_ycbcr_conversion(*ycbcr_info);
}
auto sampler_info = Sampler::fill_sampler_info(*info);
auto *samp = immutable_samplers.emplace_yield(hash, hash, this, sampler_info, ycbcr);
*vk_sampler = reinterpret_cast<VkSampler>(samp);
return true;
}
bool Device::enqueue_create_descriptor_set_layout(Fossilize::Hash, const VkDescriptorSetLayoutCreateInfo *info, VkDescriptorSetLayout *layout)
{
if (!replayer_state->feature_filter->descriptor_set_layout_is_supported(info))
{
*layout = VK_NULL_HANDLE;
return true;
}
auto &alloc = replayer_state->base_replayer.get_allocator();
auto *sampler_bank = alloc.allocate_n_cleared<const ImmutableSampler *>(VULKAN_NUM_BINDINGS);
for (uint32_t i = 0; i < info->bindingCount; i++)
if (info->pBindings[i].pImmutableSamplers && info->pBindings[i].pImmutableSamplers[0] != VK_NULL_HANDLE)
sampler_bank[i] = reinterpret_cast<const ImmutableSampler *>(info->pBindings[i].pImmutableSamplers[0]);
*layout = reinterpret_cast<VkDescriptorSetLayout>(sampler_bank);
return true;
}
bool Device::enqueue_create_pipeline_layout(Fossilize::Hash, const VkPipelineLayoutCreateInfo *info, VkPipelineLayout *layout)
{
if (!replayer_state->feature_filter->pipeline_layout_is_supported(info))
{
*layout = VK_NULL_HANDLE;
return true;
}
auto &alloc = replayer_state->base_replayer.get_allocator();
auto *sampler_bank = alloc.allocate_cleared<ImmutableSamplerBank>();
for (uint32_t i = 0; i < info->setLayoutCount; i++)
{
memcpy(sampler_bank->samplers[i],
reinterpret_cast<const ImmutableSampler *const *>(info->pSetLayouts[i]),
sizeof(sampler_bank->samplers[i]));
}
*layout = reinterpret_cast<VkPipelineLayout>(sampler_bank);
return true;
}
void Device::promote_readonly_db_from_assets() const
{
auto *fs = get_system_handles().filesystem;
// We might want to be able to ship a Fossilize database so that we can prime all PSOs up front.
Granite::FileStat s_cache = {};
Granite::FileStat s_assets = {};
bool cache_exists = fs->stat("cache://fossilize/db.foz", s_cache) && s_cache.type == Granite::PathType::File;
bool assets_exists = fs->stat("assets://fossilize/db.foz", s_assets) && s_assets.type == Granite::PathType::File;
bool overwrite = false;
if (assets_exists)
{
if (!cache_exists)
{
overwrite = true;
}
else
{
// If an application updates the assets Foz DB for shipping updates, throw the old one away.
std::string cache_iter, asset_iter;
if (!fs->read_file_to_string("cache://fossilize/iteration", cache_iter) ||
!fs->read_file_to_string("assets://fossilize/iteration", asset_iter) ||
cache_iter != asset_iter)
{
overwrite = true;
}
}
}
if (overwrite)
{
// The Fossilize DB needs to work with a proper file system. The assets folder is highly virtual by nature.
auto ro = fs->open_readonly_mapping("assets://fossilize/db.foz");
if (!ro)
{
LOGE("Failed to open readonly Fossilize archive.\n");
return;
}
if (!fs->write_buffer_to_file("cache://fossilize/db.foz", ro->data(), ro->get_size()))
{
LOGE("Failed to write to cache://fossilize/db.foz");
return;
}
std::string asset_iter;
if (fs->read_file_to_string("assets://fossilize/iteration", asset_iter))
fs->write_string_to_file("cache://fossilize/iteration", asset_iter);
}
}
void Device::replay_tag_simple(Fossilize::ResourceTag tag)
{
size_t count = 0;
replayer_state->db->get_hash_list_for_resource_tag(tag, &count, nullptr);
std::vector<Fossilize::Hash> hashes(count);
replayer_state->db->get_hash_list_for_resource_tag(tag, &count, hashes.data());
Util::DynamicArray<uint8_t> buffer;
auto &db = *replayer_state->db;
size_t size = 0;
for (auto hash : hashes)
{
if (!db.read_entry(tag, hash, &size, nullptr, 0))
continue;
buffer.reserve(size);
if (!db.read_entry(tag, hash, &size, buffer.data(), 0))
continue;
if (!replayer_state->base_replayer.parse(*this, &db, buffer.data(), size))
LOGW("Failed to replay object.\n");
}
}
void Device::promote_write_cache_to_readonly() const
{
auto *fs = get_system_handles().filesystem;
auto list = fs->list("cache://fossilize");
std::vector<std::string> merge_paths_str;
std::vector<std::string> del_paths_str;
std::vector<const char *> merge_paths;
merge_paths_str.reserve(list.size());
merge_paths.reserve(list.size());
bool have_read_only = false;
for (auto &l : list)
{
if (l.type != Granite::PathType::File || l.path == "fossilize/iteration" || l.path == "fossilize/TOUCH")
continue;
else if (l.path == "fossilize/db.foz")
{
have_read_only = true;
LOGI("Fossilize: Found read-only cache.\n");
continue;
}
else if (l.path == "fossilize/merge.foz")
{
del_paths_str.emplace_back("cache://fossilize/merge.foz");
continue;
}
auto p = "cache://" + l.path;
merge_paths_str.push_back(p);
del_paths_str.push_back(p);
LOGI("Fossilize: Found write cache: %s.\n", merge_paths_str.back().c_str());
}
if (!have_read_only && merge_paths_str.size() == 1)
{
LOGI("Fossilize: No read-cache and one write cache. Replacing directly.\n");
if (fs->move_replace("cache://fossilize/db.foz", merge_paths_str.front()))
LOGI("Fossilize: Promoted write-only cache.\n");
else
LOGW("Fossilize: Failed to promote write-only cache.\n");
}
else if (!merge_paths_str.empty())
{
auto append_path = fs->get_filesystem_path("cache://fossilize/merge.foz");
bool should_merge;
// Ensure that we have taken exclusive write access to this file.
// Only one process will be able to pass this test until the file is removed.
if (have_read_only)
{
LOGI("Fossilize: Attempting to merge caches.\n");
should_merge = fs->move_yield("cache://fossilize/merge.foz", "cache://fossilize/db.foz");
}
else
{
auto db = std::unique_ptr<Fossilize::DatabaseInterface>(
Fossilize::create_stream_archive_database(append_path.c_str(), Fossilize::DatabaseMode::ExclusiveOverWrite));
should_merge = db && db->prepare();
}
if (should_merge)
{
for (auto &str : merge_paths_str)
{
str = fs->get_filesystem_path(str);
merge_paths.push_back(str.c_str());
}
if (Fossilize::merge_concurrent_databases(append_path.c_str(), merge_paths.data(), merge_paths.size()))
{
if (fs->move_replace("cache://fossilize/db.foz", "cache://fossilize/merge.foz"))
LOGI("Fossilize: Successfully merged caches.\n");
else
LOGW("Fossilize: Failed to replace existing read-only database.\n");
}
else
LOGW("Fossilize: Failed to merge databases.\n");
}
else
LOGW("Fossilize: Skipping merge due to unexpected error.\n");
}
else
LOGI("Fossilize: No write only files, nothing to do.\n");
// Cleanup any stale write-only files.
// This can easily race against concurrent processes, so the cache will likely be destroyed by accident,
// but that's ok. Running multiple Granite processes concurrently like this is questionable at best.
for (auto &str : del_paths_str)
fs->remove(str);
}
void Device::init_pipeline_state(const Fossilize::FeatureFilter &filter,
const VkPhysicalDeviceFeatures2 &pdf2,
const VkApplicationInfo &application_info)
{
if (!get_system_handles().filesystem)
{
LOGW("Filesystem system handle must be provided to use Fossilize.\n");
return;
}
if (!get_system_handles().thread_group)
{
LOGW("Thread group system handle must be provided to use Fossilize.\n");
return;
}
replayer_state.reset(new ReplayerState);
recorder_state.reset(new RecorderState);
if (!recorder_state->recorder.record_application_info(application_info))
LOGW("Failed to record application info.\n");
if (!recorder_state->recorder.record_physical_device_features(&pdf2))
LOGW("Failed to record PDF2.\n");
lock.read_only_cache.lock_read();
// Only non-const usage is to register modules, and that is atomic within the implementation.
replayer_state->feature_filter = const_cast<Fossilize::FeatureFilter *>(&filter);
auto *group = get_system_handles().thread_group;
auto shader_manager_task = group->create_task([this]() {
init_shader_manager_cache();
});
shader_manager_task->set_desc("shader-manager-init");
auto cache_maintenance_task = group->create_task([this]() {
// Ensure we create the Fossilize cache folder.
// Also creates a timestamp.
get_system_handles().filesystem->open("cache://fossilize/TOUCH", Granite::FileMode::WriteOnly);
replayer_state->progress.prepare.fetch_add(20, std::memory_order_release);
promote_write_cache_to_readonly();
replayer_state->progress.prepare.fetch_add(50, std::memory_order_release);
promote_readonly_db_from_assets();
replayer_state->progress.prepare.fetch_add(20, std::memory_order_release);
});
cache_maintenance_task->set_desc("foz-cache-maintenance");
auto recorder_kick_task = group->create_task([this]() {
// Kick off recorder thread.
auto write_real_path = get_system_handles().filesystem->get_filesystem_path("cache://fossilize/db");
if (!write_real_path.empty())
{
recorder_state->db.reset(Fossilize::create_concurrent_database(
write_real_path.c_str(), Fossilize::DatabaseMode::Append, nullptr, 0));
recorder_state->recorder.set_database_enable_application_feature_links(false);
recorder_state->recorder.init_recording_thread(recorder_state->db.get());
}
recorder_state->recorder_ready.store(true, std::memory_order_release);
replayer_state->progress.prepare.fetch_add(10, std::memory_order_release);
});
recorder_kick_task->set_desc("foz-recorder-kick");
group->add_dependency(*recorder_kick_task, *cache_maintenance_task);
auto prepare_task = group->create_task([this]() {
auto *fs = get_system_handles().filesystem;
auto read_real_path = fs->get_filesystem_path("cache://fossilize/db.foz");
if (read_real_path.empty())
{
replayer_state->progress.modules.store(~0u, std::memory_order_release);
replayer_state->progress.pipelines.store(~0u, std::memory_order_release);
return;
}
replayer_state->db.reset(
Fossilize::create_stream_archive_database(read_real_path.c_str(), Fossilize::DatabaseMode::ReadOnly));
if (replayer_state->db && !replayer_state->db->prepare())
{
LOGW("Failed to prepare read-only cache.\n");
replayer_state->db.reset();
}
if (replayer_state->db)
{
replay_tag_simple(Fossilize::RESOURCE_SAMPLER);
replay_tag_simple(Fossilize::RESOURCE_DESCRIPTOR_SET_LAYOUT);
replay_tag_simple(Fossilize::RESOURCE_PIPELINE_LAYOUT);
replay_tag_simple(Fossilize::RESOURCE_RENDER_PASS);
size_t count = 0;
replayer_state->db->get_hash_list_for_resource_tag(Fossilize::RESOURCE_SHADER_MODULE, &count, nullptr);
replayer_state->module_hashes.resize(count);
replayer_state->db->get_hash_list_for_resource_tag(Fossilize::RESOURCE_SHADER_MODULE, &count,
replayer_state->module_hashes.data());
replayer_state->db->get_hash_list_for_resource_tag(Fossilize::RESOURCE_GRAPHICS_PIPELINE, &count, nullptr);
replayer_state->graphics_hashes.resize(count);
replayer_state->db->get_hash_list_for_resource_tag(Fossilize::RESOURCE_GRAPHICS_PIPELINE, &count,
replayer_state->graphics_hashes.data());
replayer_state->db->get_hash_list_for_resource_tag(Fossilize::RESOURCE_COMPUTE_PIPELINE, &count, nullptr);
replayer_state->compute_hashes.resize(count);
replayer_state->db->get_hash_list_for_resource_tag(Fossilize::RESOURCE_COMPUTE_PIPELINE, &count,
replayer_state->compute_hashes.data());
replayer_state->progress.num_modules = replayer_state->module_hashes.size();
replayer_state->progress.num_pipelines =
replayer_state->graphics_hashes.size() + replayer_state->compute_hashes.size();
}
if (replayer_state->progress.num_modules == 0)
replayer_state->progress.modules.store(~0u, std::memory_order_release);
if (replayer_state->progress.num_pipelines == 0)
replayer_state->progress.pipelines.store(~0u, std::memory_order_release);
});
prepare_task->set_desc("foz-prepare");
group->add_dependency(*prepare_task, *recorder_kick_task);
auto parse_modules_task = group->create_task();
parse_modules_task->set_desc("foz-parse-modules");
group->add_dependency(*parse_modules_task, *prepare_task);
group->add_dependency(*parse_modules_task, *shader_manager_task);
for (unsigned i = 0; i < NumTasks; i++)
{
parse_modules_task->enqueue_task([this, i]() {
if (!replayer_state->db)
return;
Fossilize::StateReplayer module_replayer;
Util::DynamicArray<uint8_t> buffer;
auto &db = *replayer_state->db;
size_t start = (i * replayer_state->module_hashes.size()) / NumTasks;
size_t end = ((i + 1) * replayer_state->module_hashes.size()) / NumTasks;
size_t size = 0;
for (; start < end; start++)
{
auto hash = replayer_state->module_hashes[start];
if (!db.read_entry(Fossilize::RESOURCE_SHADER_MODULE, hash, &size, nullptr, Fossilize::PAYLOAD_READ_CONCURRENT_BIT))
continue;
buffer.reserve(size);
if (!db.read_entry(Fossilize::RESOURCE_SHADER_MODULE, hash, &size, buffer.data(), Fossilize::PAYLOAD_READ_CONCURRENT_BIT))
continue;
if (!module_replayer.parse(*this, &db, buffer.data(), size))
{
replayer_state->progress.modules.fetch_add(1, std::memory_order_release);
LOGW("Failed to parse module.\n");
}
}
});
}
auto parse_graphics_task = group->create_task([this]() {
if (!replayer_state->db)
return;
auto &replayer = replayer_state->graphics_replayer;
replayer.copy_handle_references(replayer_state->base_replayer);
replayer.set_resolve_shader_module_handles(false);
size_t size = 0;
auto &db = *replayer_state->db;
Util::DynamicArray<uint8_t> buffer;
for (auto hash : replayer_state->graphics_hashes)
{
if (!db.read_entry(Fossilize::RESOURCE_GRAPHICS_PIPELINE, hash, &size, nullptr, Fossilize::PAYLOAD_READ_CONCURRENT_BIT))
continue;
buffer.reserve(size);
if (!db.read_entry(Fossilize::RESOURCE_GRAPHICS_PIPELINE, hash, &size, buffer.data(), Fossilize::PAYLOAD_READ_CONCURRENT_BIT))
continue;
if (!replayer.parse(*this, &db, buffer.data(), size))
{
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
LOGW("Failed to parse graphics pipeline.\n");
}
}
});
parse_graphics_task->set_desc("foz-parse-graphics");
group->add_dependency(*parse_graphics_task, *prepare_task);
auto parse_compute_task = group->create_task([this]() {
if (!replayer_state->db)
return;
auto &replayer = replayer_state->compute_replayer;
replayer.copy_handle_references(replayer_state->base_replayer);
replayer.set_resolve_shader_module_handles(false);
size_t size = 0;
auto &db = *replayer_state->db;
Util::DynamicArray<uint8_t> buffer;
for (auto hash : replayer_state->compute_hashes)
{
if (!db.read_entry(Fossilize::RESOURCE_COMPUTE_PIPELINE, hash, &size, nullptr, Fossilize::PAYLOAD_READ_CONCURRENT_BIT))
continue;
buffer.reserve(size);
if (!db.read_entry(Fossilize::RESOURCE_COMPUTE_PIPELINE, hash, &size, buffer.data(), Fossilize::PAYLOAD_READ_CONCURRENT_BIT))
continue;
if (!replayer.parse(*this, &db, buffer.data(), size))
{
replayer_state->progress.pipelines.fetch_add(1, std::memory_order_release);
LOGW("Failed to parse compute pipeline.\n");
}
}
});
parse_compute_task->set_desc("foz-parse-compute");
group->add_dependency(*parse_compute_task, *prepare_task);
auto compile_graphics_task = group->create_task();
auto compile_compute_task = group->create_task();
compile_graphics_task->set_desc("foz-compile-graphics");
compile_compute_task->set_desc("foz-compile-compute");
group->add_dependency(*compile_graphics_task, *parse_modules_task);
group->add_dependency(*compile_graphics_task, *parse_graphics_task);
group->add_dependency(*compile_compute_task, *parse_modules_task);
group->add_dependency(*compile_compute_task, *parse_compute_task);
for (unsigned i = 0; i < NumTasks; i++)
{
compile_graphics_task->enqueue_task([this, i]() {
size_t start = (i * replayer_state->graphics_pipelines.size()) / NumTasks;
size_t end = ((i + 1) * replayer_state->graphics_pipelines.size()) / NumTasks;
for (; start < end; start++)
{
auto &pipe = replayer_state->graphics_pipelines[start];
fossilize_replay_graphics_pipeline(pipe.first, *pipe.second);
}
});
compile_compute_task->enqueue_task([this, i]() {
size_t start = (i * replayer_state->compute_pipelines.size()) / NumTasks;
size_t end = ((i + 1) * replayer_state->compute_pipelines.size()) / NumTasks;
for (; start < end; start++)
{
auto &pipe = replayer_state->compute_pipelines[start];
fossilize_replay_compute_pipeline(pipe.first, *pipe.second);
}
});
}
replayer_state->complete = get_system_handles().thread_group->create_task([this]() {
LOGI("Fossilize replay completed!\n Modules: %zu\n Graphics: %zu\n Compute: %zu\n",
replayer_state->module_hashes.size(),
replayer_state->graphics_hashes.size(),
replayer_state->compute_hashes.size());
lock.read_only_cache.unlock_read();
const auto cleanup = [](Fossilize::StateReplayer &r) {
r.forget_handle_references();
r.forget_pipeline_handle_references();
r.get_allocator().reset();
};
cleanup(replayer_state->base_replayer);
cleanup(replayer_state->graphics_replayer);
cleanup(replayer_state->compute_replayer);
replayer_state->graphics_pipelines.clear();
replayer_state->compute_pipelines.clear();
replayer_state->module_hashes.clear();
replayer_state->graphics_hashes.clear();
replayer_state->compute_hashes.clear();
replayer_state->db.reset();
});
replayer_state->complete->set_desc("foz-replay-complete");
group->add_dependency(*replayer_state->complete, *compile_graphics_task);
group->add_dependency(*replayer_state->complete, *compile_compute_task);
replayer_state->complete->flush();
replayer_state->module_ready = std::move(parse_modules_task);
replayer_state->module_ready->flush();
auto compile_task = group->create_task();
group->add_dependency(*compile_task, *compile_graphics_task);
group->add_dependency(*compile_task, *compile_compute_task);
replayer_state->pipeline_ready = std::move(compile_task);
replayer_state->pipeline_ready->flush();
}
void Device::flush_pipeline_state()
{
if (replayer_state)
{
if (replayer_state->complete)
replayer_state->complete->wait();
replayer_state.reset();
}
if (recorder_state)
{
recorder_state->recorder.tear_down_recording_thread();
recorder_state.reset();
}
}
unsigned Device::query_initialization_progress(InitializationStage status) const
{
if (!replayer_state)
return 100;
switch (status)
{
case InitializationStage::CacheMaintenance:
return replayer_state->progress.prepare.load(std::memory_order_acquire);
case InitializationStage::ShaderModules:
{
unsigned done = replayer_state->progress.modules.load(std::memory_order_acquire);
// Avoid 0/0.
if (!done)
return 0;
else if (done == ~0u)
return 100;
return (100u * done) / replayer_state->progress.num_modules;
}
case InitializationStage::Pipelines:
{
unsigned done = replayer_state->progress.pipelines.load(std::memory_order_acquire);
// Avoid 0/0.
if (!done)
return 0;
else if (done == ~0u)
return 100;
return (100u * done) / replayer_state->progress.num_pipelines;
}
default:
break;
}
return 0;
}
void Device::block_until_shader_module_ready()
{
if (!replayer_state || !replayer_state->module_ready)
return;
replayer_state->module_ready->wait();
}
void Device::block_until_pipeline_ready()
{
if (!replayer_state || !replayer_state->pipeline_ready)
return;
replayer_state->pipeline_ready->wait();
}
void Device::wait_shader_caches()
{
block_until_pipeline_ready();
}
}