00cc9309cb
de6e324bdseparate emu thread10d3daf86Roms List improvements95d202f37Let's make the rom list process on a separate thread so the emulator doesnt take ages to load.fc306967fWow the ROM Header was just completely busted. Game list view works nowbad1691eefuck this shit2b59e5f46game list in progressd26417b83remappable inputs in progressac4af8106inpute72abc240update readme430139dc9Qt6 frontend3080d4d45Fix this small bug too08cd13b85Cop0 unused functions do not actually pose a threat (as per manual). They don't do anything, so shall we.61bb4fb44make idle loop detection a little more specific with where the load goesb037de4c3SAZDFsdff12e81e73eneed to figure out why n64-systemtest loops indefinitely at some address that appears to be valid (i think it's me not invalidating the cache properly)204f0e13bidle skipping seems to work!cb8bb634asdkfjlasdf58e5c89c1Fix compilation issue on my machine (no idea)24fb2898eattempting more serious idle skipping214719577Place rsp.Step inside cached interpreter. Gains about 3 more fpsbb97dcc23mmmmm920b77d38wjkhasdfjhkasdf430ccdab4it's a start...4f42a673aCached interpreter plays Mario 64. Start looking into RSP as wellc9a030787idle skipping works!5fbda03cenew idea366637abaIdle skipping... maybe?609fa2fb0Cache instructions implemented but broken lmao. Commented out for nowe140a6d12- Stop using inheritance for CPU, instead use composition. - Introduce KAIZEN_JIT_ENABLED optional define instead of relying on __aarch64__ and the like. - More cache work68e613057prep cache impl811b4d809fix clang formatfda755f7didkd5024ebbfsmall MI refactor in preparation of (eventually) implementing the RDRAM interface properly694b45341Merge commit '206dcdedf195fb320913584180edb12c7731e396' as 'external/SDL'206dcdedfSquashed 'external/SDL/' content from commit 4d17b99d0a4d16e1cb4need to update sdl848b19920Fix compilation errordb61b5299Merge commit 'e94a94559f28e49678fbcf72199a5258137b0fe9' as 'external/imgui'e94a94559Squashed 'external/imgui/' content from commit 02e9b8cac52edb3757need to update imguic1a705e86Emulate weird JALR behaviour4b4c32f4bFix exception for "unusable COP1" in 4 instructions i missed accidentally (again)df5828142Bug putting 0s in the log everywheref8b580048Make isviewer a sink to file8241e9735Fix exception for "unusable COP1" in 4 instructions i missed accidentallyb29715f20small changesd9a620bc1make use of my new small utility library0d1aa938eAdd 'external/ircolib/' from commit 'ce3cd726c8df8388d554abf8bb55d55020eb4450'e64eb40b3Fuck git git-subtree-dir: external/ircolib git-subtree-split:de6e324bde
912 lines
37 KiB
C++
912 lines
37 KiB
C++
/* Copyright (c) 2017-2023 Hans-Kristian Arntzen
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "buffer.hpp"
|
|
#include "command_buffer.hpp"
|
|
#include "command_pool.hpp"
|
|
#include "fence.hpp"
|
|
#include "fence_manager.hpp"
|
|
#include "image.hpp"
|
|
#include "memory_allocator.hpp"
|
|
#include "render_pass.hpp"
|
|
#include "sampler.hpp"
|
|
#include "semaphore.hpp"
|
|
#include "semaphore_manager.hpp"
|
|
#include "event_manager.hpp"
|
|
#include "shader.hpp"
|
|
#include "context.hpp"
|
|
#include "query_pool.hpp"
|
|
#include "buffer_pool.hpp"
|
|
#include "indirect_layout.hpp"
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <functional>
|
|
#include <unordered_map>
|
|
#include <stdio.h>
|
|
|
|
#ifdef GRANITE_VULKAN_SYSTEM_HANDLES
|
|
#include "shader_manager.hpp"
|
|
#include "resource_manager.hpp"
|
|
#endif
|
|
|
|
#include <atomic>
|
|
#include <mutex>
|
|
#include <condition_variable>
|
|
|
|
#ifdef GRANITE_VULKAN_FOSSILIZE
|
|
#include "fossilize.hpp"
|
|
#endif
|
|
|
|
#include "quirks.hpp"
|
|
#include "small_vector.hpp"
|
|
|
|
namespace Util
|
|
{
|
|
class TimelineTraceFile;
|
|
}
|
|
|
|
namespace Granite
|
|
{
|
|
struct TaskGroup;
|
|
}
|
|
|
|
namespace Vulkan
|
|
{
|
|
enum class SwapchainRenderPass
|
|
{
|
|
ColorOnly,
|
|
Depth,
|
|
DepthStencil
|
|
};
|
|
|
|
struct InitialImageBuffer
|
|
{
|
|
BufferHandle buffer;
|
|
Util::SmallVector<VkBufferImageCopy, 32> blits;
|
|
};
|
|
|
|
struct HandlePool
|
|
{
|
|
VulkanObjectPool<Buffer> buffers;
|
|
VulkanObjectPool<Image> images;
|
|
VulkanObjectPool<LinearHostImage> linear_images;
|
|
VulkanObjectPool<ImageView> image_views;
|
|
VulkanObjectPool<BufferView> buffer_views;
|
|
VulkanObjectPool<Sampler> samplers;
|
|
VulkanObjectPool<FenceHolder> fences;
|
|
VulkanObjectPool<SemaphoreHolder> semaphores;
|
|
VulkanObjectPool<EventHolder> events;
|
|
VulkanObjectPool<QueryPoolResult> query;
|
|
VulkanObjectPool<CommandBuffer> command_buffers;
|
|
VulkanObjectPool<BindlessDescriptorPool> bindless_descriptor_pool;
|
|
VulkanObjectPool<DeviceAllocationOwner> allocations;
|
|
};
|
|
|
|
class DebugChannelInterface
|
|
{
|
|
public:
|
|
union Word
|
|
{
|
|
uint32_t u32;
|
|
int32_t s32;
|
|
float f32;
|
|
};
|
|
virtual void message(const std::string &tag, uint32_t code, uint32_t x, uint32_t y, uint32_t z,
|
|
uint32_t word_count, const Word *words) = 0;
|
|
};
|
|
|
|
namespace Helper
|
|
{
|
|
struct WaitSemaphores
|
|
{
|
|
Util::SmallVector<VkSemaphoreSubmitInfo> binary_waits;
|
|
Util::SmallVector<VkSemaphoreSubmitInfo> timeline_waits;
|
|
};
|
|
|
|
class BatchComposer
|
|
{
|
|
public:
|
|
enum { MaxSubmissions = 8 };
|
|
|
|
BatchComposer();
|
|
void add_wait_submissions(WaitSemaphores &sem);
|
|
void add_wait_semaphore(SemaphoreHolder &sem, VkPipelineStageFlags2 stage);
|
|
void add_wait_semaphore(VkSemaphore sem, VkPipelineStageFlags2 stage);
|
|
void add_signal_semaphore(VkSemaphore sem, VkPipelineStageFlags2 stage, uint64_t count);
|
|
void add_command_buffer(VkCommandBuffer cmd);
|
|
|
|
void begin_batch();
|
|
Util::SmallVector<VkSubmitInfo2, MaxSubmissions> &bake(int profiling_iteration = -1);
|
|
|
|
private:
|
|
Util::SmallVector<VkSubmitInfo2, MaxSubmissions> submits;
|
|
VkPerformanceQuerySubmitInfoKHR profiling_infos[Helper::BatchComposer::MaxSubmissions];
|
|
|
|
Util::SmallVector<VkSemaphoreSubmitInfo> waits[MaxSubmissions];
|
|
Util::SmallVector<VkSemaphoreSubmitInfo> signals_[MaxSubmissions];
|
|
Util::SmallVector<VkCommandBufferSubmitInfo> cmds[MaxSubmissions];
|
|
|
|
unsigned submit_index = 0;
|
|
};
|
|
}
|
|
|
|
class Device
|
|
: public Util::IntrusivePtrEnabled<Device, std::default_delete<Device>, HandleCounter>
|
|
#ifdef GRANITE_VULKAN_FOSSILIZE
|
|
, public Fossilize::StateCreatorInterface
|
|
#endif
|
|
{
|
|
public:
|
|
// Device-based objects which need to poke at internal data structures when their lifetimes end.
|
|
// Don't want to expose a lot of internal guts to make this work.
|
|
friend class QueryPool;
|
|
friend struct QueryPoolResultDeleter;
|
|
friend class EventHolder;
|
|
friend struct EventHolderDeleter;
|
|
friend class SemaphoreHolder;
|
|
friend struct SemaphoreHolderDeleter;
|
|
friend class FenceHolder;
|
|
friend struct FenceHolderDeleter;
|
|
friend class Sampler;
|
|
friend struct SamplerDeleter;
|
|
friend class ImmutableSampler;
|
|
friend class ImmutableYcbcrConversion;
|
|
friend class Buffer;
|
|
friend struct BufferDeleter;
|
|
friend class BufferView;
|
|
friend struct BufferViewDeleter;
|
|
friend class ImageView;
|
|
friend struct ImageViewDeleter;
|
|
friend class Image;
|
|
friend struct ImageDeleter;
|
|
friend struct LinearHostImageDeleter;
|
|
friend class CommandBuffer;
|
|
friend struct CommandBufferDeleter;
|
|
friend class BindlessDescriptorPool;
|
|
friend struct BindlessDescriptorPoolDeleter;
|
|
friend class Program;
|
|
friend class WSI;
|
|
friend class Cookie;
|
|
friend class Framebuffer;
|
|
friend class PipelineLayout;
|
|
friend class FramebufferAllocator;
|
|
friend class RenderPass;
|
|
friend class Texture;
|
|
friend class DescriptorSetAllocator;
|
|
friend class Shader;
|
|
friend class ImageResourceHolder;
|
|
friend class DeviceAllocationOwner;
|
|
friend struct DeviceAllocationDeleter;
|
|
|
|
Device();
|
|
~Device();
|
|
|
|
// No move-copy.
|
|
void operator=(Device &&) = delete;
|
|
Device(Device &&) = delete;
|
|
|
|
// Only called by main thread, during setup phase.
|
|
void set_context(const Context &context);
|
|
|
|
// This is asynchronous in nature. See query_initialization_progress().
|
|
// Kicks off Fossilize and shader manager caching.
|
|
void begin_shader_caches();
|
|
// For debug or trivial applications, blocks until all shader cache work is done.
|
|
void wait_shader_caches();
|
|
|
|
void init_swapchain(const std::vector<VkImage> &swapchain_images, unsigned width, unsigned height, VkFormat format,
|
|
VkSurfaceTransformFlagBitsKHR transform, VkImageUsageFlags usage);
|
|
void set_swapchain_queue_family_support(uint32_t queue_family_support);
|
|
bool can_touch_swapchain_in_command_buffer(CommandBuffer::Type type) const;
|
|
void init_external_swapchain(const std::vector<ImageHandle> &swapchain_images);
|
|
void init_frame_contexts(unsigned count);
|
|
const VolkDeviceTable &get_device_table() const;
|
|
|
|
// Profiling
|
|
bool init_performance_counters(CommandBuffer::Type type, const std::vector<std::string> &names);
|
|
bool acquire_profiling();
|
|
void release_profiling();
|
|
void query_available_performance_counters(CommandBuffer::Type type,
|
|
uint32_t *count,
|
|
const VkPerformanceCounterKHR **counters,
|
|
const VkPerformanceCounterDescriptionKHR **desc);
|
|
|
|
ImageView &get_swapchain_view();
|
|
ImageView &get_swapchain_view(unsigned index);
|
|
unsigned get_num_swapchain_images() const;
|
|
unsigned get_num_frame_contexts() const;
|
|
unsigned get_swapchain_index() const;
|
|
unsigned get_current_frame_context() const;
|
|
|
|
size_t get_pipeline_cache_size();
|
|
bool get_pipeline_cache_data(uint8_t *data, size_t size);
|
|
bool init_pipeline_cache(const uint8_t *data, size_t size);
|
|
|
|
// Frame-pushing interface.
|
|
void next_frame_context();
|
|
|
|
// Normally, the main thread ensures forward progress of the frame context
|
|
// so that async tasks don't have to care about it,
|
|
// but in the case where async threads are continuously pumping Vulkan work
|
|
// in the background, they need to reclaim memory if WSI goes to sleep for a long period of time.
|
|
void next_frame_context_in_async_thread();
|
|
void set_enable_async_thread_frame_context(bool enable);
|
|
|
|
void wait_idle();
|
|
void end_frame_context();
|
|
|
|
// RenderDoc integration API for app-guided captures.
|
|
static bool init_renderdoc_capture();
|
|
// Calls next_frame_context() and begins a renderdoc capture.
|
|
void begin_renderdoc_capture();
|
|
// Calls next_frame_context() and ends the renderdoc capture.
|
|
void end_renderdoc_capture();
|
|
|
|
// Set names for objects for debuggers and profilers.
|
|
void set_name(const Buffer &buffer, const char *name);
|
|
void set_name(const Image &image, const char *name);
|
|
void set_name(const CommandBuffer &cmd, const char *name);
|
|
// Generic version.
|
|
void set_name(uint64_t object, VkObjectType type, const char *name);
|
|
|
|
// Submission interface, may be called from any thread at any time.
|
|
void flush_frame();
|
|
CommandBufferHandle request_command_buffer(CommandBuffer::Type type = CommandBuffer::Type::Generic);
|
|
CommandBufferHandle request_command_buffer_for_thread(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic);
|
|
|
|
CommandBufferHandle request_profiled_command_buffer(CommandBuffer::Type type = CommandBuffer::Type::Generic);
|
|
CommandBufferHandle request_profiled_command_buffer_for_thread(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic);
|
|
|
|
void submit(CommandBufferHandle &cmd, Fence *fence = nullptr,
|
|
unsigned semaphore_count = 0, Semaphore *semaphore = nullptr);
|
|
|
|
void submit_empty(CommandBuffer::Type type,
|
|
Fence *fence = nullptr,
|
|
SemaphoreHolder *semaphore = nullptr);
|
|
// Mark that there have been work submitted in this frame context outside our control
|
|
// that accesses resources Vulkan::Device owns.
|
|
void submit_external(CommandBuffer::Type type);
|
|
void submit_discard(CommandBufferHandle &cmd);
|
|
QueueIndices get_physical_queue_type(CommandBuffer::Type queue_type) const;
|
|
void register_time_interval(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts,
|
|
const std::string &tag);
|
|
|
|
// Request shaders and programs. These objects are owned by the Device.
|
|
Shader *request_shader(const uint32_t *code, size_t size, const ResourceLayout *layout = nullptr);
|
|
Shader *request_shader_by_hash(Util::Hash hash);
|
|
Program *request_program(const uint32_t *task_data, size_t task_size,
|
|
const uint32_t *mesh_data, size_t mesh_size,
|
|
const uint32_t *fragment_data, size_t fragment_size,
|
|
const ResourceLayout *task_layout = nullptr,
|
|
const ResourceLayout *mesh_layout = nullptr,
|
|
const ResourceLayout *fragment_layout = nullptr);
|
|
Program *request_program(const uint32_t *vertex_data, size_t vertex_size,
|
|
const uint32_t *fragment_data, size_t fragment_size,
|
|
const ResourceLayout *vertex_layout = nullptr,
|
|
const ResourceLayout *fragment_layout = nullptr);
|
|
Program *request_program(const uint32_t *compute_data, size_t compute_size,
|
|
const ResourceLayout *layout = nullptr);
|
|
Program *request_program(Shader *task, Shader *mesh, Shader *fragment, const ImmutableSamplerBank *sampler_bank = nullptr);
|
|
Program *request_program(Shader *vertex, Shader *fragment, const ImmutableSamplerBank *sampler_bank = nullptr);
|
|
Program *request_program(Shader *compute, const ImmutableSamplerBank *sampler_bank = nullptr);
|
|
const IndirectLayout *request_indirect_layout(const IndirectLayoutToken *tokens,
|
|
uint32_t num_tokens, uint32_t stride);
|
|
|
|
const ImmutableYcbcrConversion *request_immutable_ycbcr_conversion(const VkSamplerYcbcrConversionCreateInfo &info);
|
|
const ImmutableSampler *request_immutable_sampler(const SamplerCreateInfo &info, const ImmutableYcbcrConversion *ycbcr);
|
|
|
|
// Map and unmap buffer objects.
|
|
void *map_host_buffer(const Buffer &buffer, MemoryAccessFlags access);
|
|
void unmap_host_buffer(const Buffer &buffer, MemoryAccessFlags access);
|
|
void *map_host_buffer(const Buffer &buffer, MemoryAccessFlags access, VkDeviceSize offset, VkDeviceSize length);
|
|
void unmap_host_buffer(const Buffer &buffer, MemoryAccessFlags access, VkDeviceSize offset, VkDeviceSize length);
|
|
|
|
void *map_linear_host_image(const LinearHostImage &image, MemoryAccessFlags access);
|
|
void unmap_linear_host_image_and_sync(const LinearHostImage &image, MemoryAccessFlags access);
|
|
|
|
// Create buffers and images.
|
|
BufferHandle create_buffer(const BufferCreateInfo &info, const void *initial = nullptr);
|
|
BufferHandle create_imported_host_buffer(const BufferCreateInfo &info, VkExternalMemoryHandleTypeFlagBits type, void *host_buffer);
|
|
ImageHandle create_image(const ImageCreateInfo &info, const ImageInitialData *initial = nullptr);
|
|
ImageHandle create_image_from_staging_buffer(const ImageCreateInfo &info, const InitialImageBuffer *buffer);
|
|
LinearHostImageHandle create_linear_host_image(const LinearHostImageCreateInfo &info);
|
|
// Does not create any default image views. Only wraps the VkImage
|
|
// as a non-owned handle for purposes of API interop.
|
|
ImageHandle wrap_image(const ImageCreateInfo &info, VkImage img);
|
|
DeviceAllocationOwnerHandle take_device_allocation_ownership(Image &image);
|
|
DeviceAllocationOwnerHandle allocate_memory(const MemoryAllocateInfo &info);
|
|
|
|
// Create staging buffers for images.
|
|
InitialImageBuffer create_image_staging_buffer(const ImageCreateInfo &info, const ImageInitialData *initial);
|
|
InitialImageBuffer create_image_staging_buffer(const TextureFormatLayout &layout);
|
|
|
|
// Create image view, buffer views and samplers.
|
|
ImageViewHandle create_image_view(const ImageViewCreateInfo &view_info);
|
|
BufferViewHandle create_buffer_view(const BufferViewCreateInfo &view_info);
|
|
SamplerHandle create_sampler(const SamplerCreateInfo &info);
|
|
|
|
BindlessDescriptorPoolHandle create_bindless_descriptor_pool(BindlessResourceType type,
|
|
unsigned num_sets, unsigned num_descriptors);
|
|
|
|
// Render pass helpers.
|
|
bool image_format_is_supported(VkFormat format, VkFormatFeatureFlags2KHR required, VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL) const;
|
|
void get_format_properties(VkFormat format, VkFormatProperties3KHR *properties) const;
|
|
bool get_image_format_properties(VkFormat format, VkImageType type, VkImageTiling tiling,
|
|
VkImageUsageFlags usage, VkImageCreateFlags flags,
|
|
const void *pNext,
|
|
VkImageFormatProperties2 *properties2) const;
|
|
|
|
VkFormat get_default_depth_stencil_format() const;
|
|
VkFormat get_default_depth_format() const;
|
|
ImageHandle get_transient_attachment(unsigned width, unsigned height, VkFormat format,
|
|
unsigned index = 0, unsigned samples = 1, unsigned layers = 1);
|
|
RenderPassInfo get_swapchain_render_pass(SwapchainRenderPass style);
|
|
|
|
// Semaphore API:
|
|
// Semaphores in Granite are abstracted to support both binary and timeline semaphores
|
|
// internally.
|
|
// In practice this means that semaphores behave like single-use binary semaphores,
|
|
// with one signal and one wait.
|
|
// A single semaphore handle is not reused for multiple submissions, and they must be recycled through
|
|
// the device. The intended use is device.submit(&sem), device.add_wait_semaphore(sem); dispose(sem);
|
|
// For timeline semaphores, the semaphore is just a proxy object which
|
|
// holds the internally owned VkSemaphore + timeline value and is otherwise lightweight.
|
|
//
|
|
// However, there are various use cases where we explicitly need semaphore objects:
|
|
// - Interoperate with other code that only accepts VkSemaphore.
|
|
// - Interoperate with external objects. We need to know whether to use binary or timeline.
|
|
// For timelines, we need to know which handle type to use (OPAQUE or ID3D12Fence).
|
|
// Binary external semaphore is always opaque with TEMPORARY semantics.
|
|
|
|
void add_wait_semaphore(CommandBuffer::Type type, Semaphore semaphore, VkPipelineStageFlags2 stages, bool flush);
|
|
|
|
// If transfer_ownership is set, Semaphore owns the VkSemaphore. Otherwise, application must
|
|
// free the semaphore when GPU usage of it is complete.
|
|
Semaphore request_semaphore(VkSemaphoreTypeKHR type, VkSemaphore handle = VK_NULL_HANDLE, bool transfer_ownership = false);
|
|
|
|
// Requests a binary or timeline semaphore that can be used to import/export.
|
|
// These semaphores cannot be used directly by add_wait_semaphore() and submit_empty().
|
|
// See request_timeline_semaphore_as_binary() for how to use timelines.
|
|
Semaphore request_semaphore_external(VkSemaphoreTypeKHR type,
|
|
VkExternalSemaphoreHandleTypeFlagBits handle_type);
|
|
|
|
// The created semaphore does not hold ownership of the VkSemaphore object.
|
|
// This is used when we want to wait on or signal an external timeline semaphore at a specific timeline value.
|
|
// We must collapse the timeline to a "binary" semaphore before we can call submit_empty or add_wait_semaphore().
|
|
Semaphore request_timeline_semaphore_as_binary(const SemaphoreHolder &holder, uint64_t value);
|
|
|
|
// A proxy semaphore which lets us grab a semaphore handle before we signal it.
|
|
// Move assignment can be used to move a payload.
|
|
// Mostly useful to deal better with render graph implementation.
|
|
// For time being however, we'll support moving the payload over to the proxy object.
|
|
Semaphore request_proxy_semaphore();
|
|
|
|
// For compat with existing code that uses this entry point.
|
|
inline Semaphore request_legacy_semaphore() { return request_semaphore(VK_SEMAPHORE_TYPE_BINARY_KHR); }
|
|
|
|
inline VkDevice get_device() const
|
|
{
|
|
return device;
|
|
}
|
|
|
|
inline VkPhysicalDevice get_physical_device() const
|
|
{
|
|
return gpu;
|
|
}
|
|
|
|
inline VkInstance get_instance() const
|
|
{
|
|
return instance;
|
|
}
|
|
|
|
inline const VkPhysicalDeviceMemoryProperties &get_memory_properties() const
|
|
{
|
|
return mem_props;
|
|
}
|
|
|
|
inline const VkPhysicalDeviceProperties &get_gpu_properties() const
|
|
{
|
|
return gpu_props;
|
|
}
|
|
|
|
void get_memory_budget(HeapBudget *budget);
|
|
|
|
const Sampler &get_stock_sampler(StockSampler sampler) const;
|
|
|
|
#ifdef GRANITE_VULKAN_SYSTEM_HANDLES
|
|
// To obtain ShaderManager, ShaderModules must be observed to be complete
|
|
// in query_initialization_progress().
|
|
ShaderManager &get_shader_manager();
|
|
ResourceManager &get_resource_manager();
|
|
#endif
|
|
|
|
// Useful for loading screens or otherwise figuring out
|
|
// when we can start rendering in a stable state.
|
|
enum class InitializationStage
|
|
{
|
|
CacheMaintenance,
|
|
// When this is done, shader modules and the shader manager have been populated.
|
|
// At this stage it is safe to use shaders in a configuration where we
|
|
// don't have SPIRV-Cross and/or shaderc to do on the fly compilation.
|
|
// For shipping configurations. We can still compile pipelines, but it may stutter.
|
|
ShaderModules,
|
|
// When this is done, pipelines should never stutter if Fossilize knows about the pipeline.
|
|
Pipelines
|
|
};
|
|
|
|
// 0 -> not started
|
|
// [1, 99] rough percentage of completion
|
|
// >= 100 done
|
|
unsigned query_initialization_progress(InitializationStage status) const;
|
|
|
|
// For some platforms, the device and queue might be shared, possibly across threads, so need some mechanism to
|
|
// lock the global device and queue.
|
|
void set_queue_lock(std::function<void ()> lock_callback,
|
|
std::function<void ()> unlock_callback);
|
|
|
|
// Alternative form, when we have to provide lock callbacks to external APIs.
|
|
void external_queue_lock();
|
|
void external_queue_unlock();
|
|
|
|
const ImplementationWorkarounds &get_workarounds() const
|
|
{
|
|
return workarounds;
|
|
}
|
|
|
|
const DeviceFeatures &get_device_features() const
|
|
{
|
|
return ext;
|
|
}
|
|
|
|
bool consumes_debug_markers() const
|
|
{
|
|
return debug_marker_sensitive;
|
|
}
|
|
|
|
bool swapchain_touched() const;
|
|
|
|
double convert_device_timestamp_delta(uint64_t start_ticks, uint64_t end_ticks) const;
|
|
// Writes a timestamp on host side, which is calibrated to the GPU timebase.
|
|
QueryPoolHandle write_calibrated_timestamp();
|
|
|
|
// A split version of VkEvent handling which lets us record a wait command before signal is recorded.
|
|
PipelineEvent begin_signal_event();
|
|
|
|
const Context::SystemHandles &get_system_handles() const
|
|
{
|
|
return system_handles;
|
|
}
|
|
|
|
void configure_default_geometry_samplers(float max_aniso, float lod_bias);
|
|
|
|
bool supports_subgroup_size_log2(bool subgroup_full_group,
|
|
uint8_t subgroup_minimum_size_log2,
|
|
uint8_t subgroup_maximum_size_log2,
|
|
VkShaderStageFlagBits stage = VK_SHADER_STAGE_COMPUTE_BIT) const;
|
|
|
|
const QueueInfo &get_queue_info() const;
|
|
|
|
void timestamp_log_reset();
|
|
void timestamp_log(const TimestampIntervalReportCallback &cb) const;
|
|
|
|
private:
|
|
VkInstance instance = VK_NULL_HANDLE;
|
|
VkPhysicalDevice gpu = VK_NULL_HANDLE;
|
|
VkDevice device = VK_NULL_HANDLE;
|
|
const VolkDeviceTable *table = nullptr;
|
|
const Context *ctx = nullptr;
|
|
QueueInfo queue_info;
|
|
unsigned num_thread_indices = 1;
|
|
|
|
std::atomic_uint64_t cookie;
|
|
|
|
uint64_t allocate_cookie();
|
|
void bake_program(Program &program, const ImmutableSamplerBank *sampler_bank);
|
|
void merge_combined_resource_layout(CombinedResourceLayout &layout, const Program &program);
|
|
|
|
void request_vertex_block(BufferBlock &block, VkDeviceSize size);
|
|
void request_index_block(BufferBlock &block, VkDeviceSize size);
|
|
void request_uniform_block(BufferBlock &block, VkDeviceSize size);
|
|
void request_staging_block(BufferBlock &block, VkDeviceSize size);
|
|
|
|
QueryPoolHandle write_timestamp(VkCommandBuffer cmd, VkPipelineStageFlags2 stage);
|
|
|
|
void set_acquire_semaphore(unsigned index, Semaphore acquire);
|
|
Semaphore consume_release_semaphore();
|
|
VkQueue get_current_present_queue() const;
|
|
CommandBuffer::Type get_current_present_queue_type() const;
|
|
|
|
const PipelineLayout *request_pipeline_layout(const CombinedResourceLayout &layout,
|
|
const ImmutableSamplerBank *immutable_samplers);
|
|
DescriptorSetAllocator *request_descriptor_set_allocator(const DescriptorSetLayout &layout,
|
|
const uint32_t *stages_for_sets,
|
|
const ImmutableSampler * const *immutable_samplers);
|
|
const Framebuffer &request_framebuffer(const RenderPassInfo &info);
|
|
const RenderPass &request_render_pass(const RenderPassInfo &info, bool compatible);
|
|
|
|
VkPhysicalDeviceMemoryProperties mem_props;
|
|
VkPhysicalDeviceProperties gpu_props;
|
|
|
|
DeviceFeatures ext;
|
|
bool debug_marker_sensitive = false;
|
|
void init_stock_samplers();
|
|
void init_stock_sampler(StockSampler sampler, float max_aniso, float lod_bias);
|
|
void init_timeline_semaphores();
|
|
void deinit_timeline_semaphores();
|
|
|
|
uint64_t update_wrapped_device_timestamp(uint64_t ts);
|
|
int64_t convert_timestamp_to_absolute_nsec(const QueryPoolResult &handle);
|
|
Context::SystemHandles system_handles;
|
|
|
|
QueryPoolHandle write_timestamp_nolock(VkCommandBuffer cmd, VkPipelineStageFlags2 stage);
|
|
QueryPoolHandle write_calibrated_timestamp_nolock();
|
|
void register_time_interval_nolock(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts,
|
|
const std::string &tag);
|
|
|
|
// Make sure this is deleted last.
|
|
HandlePool handle_pool;
|
|
|
|
// Calibrated timestamps.
|
|
void init_calibrated_timestamps();
|
|
void recalibrate_timestamps_fallback();
|
|
void recalibrate_timestamps();
|
|
bool resample_calibrated_timestamps();
|
|
VkTimeDomainEXT calibrated_time_domain = VK_TIME_DOMAIN_DEVICE_EXT;
|
|
int64_t calibrated_timestamp_device = 0;
|
|
int64_t calibrated_timestamp_host = 0;
|
|
int64_t calibrated_timestamp_device_accum = 0;
|
|
unsigned timestamp_calibration_counter = 0;
|
|
Vulkan::QueryPoolHandle frame_context_begin_ts;
|
|
|
|
struct Managers
|
|
{
|
|
DeviceAllocator memory;
|
|
FenceManager fence;
|
|
SemaphoreManager semaphore;
|
|
EventManager event;
|
|
BufferPool vbo, ibo, ubo, staging;
|
|
TimestampIntervalManager timestamps;
|
|
};
|
|
Managers managers;
|
|
|
|
struct
|
|
{
|
|
std::mutex memory_lock;
|
|
std::mutex lock;
|
|
std::condition_variable cond;
|
|
Util::RWSpinLock read_only_cache;
|
|
unsigned counter = 0;
|
|
bool async_frame_context = false;
|
|
} lock;
|
|
|
|
struct PerFrame
|
|
{
|
|
PerFrame(Device *device, unsigned index);
|
|
~PerFrame();
|
|
void operator=(const PerFrame &) = delete;
|
|
PerFrame(const PerFrame &) = delete;
|
|
|
|
void begin();
|
|
void trim_command_pools();
|
|
|
|
Device &device;
|
|
unsigned frame_index;
|
|
const VolkDeviceTable &table;
|
|
Managers &managers;
|
|
|
|
std::vector<CommandPool> cmd_pools[QUEUE_INDEX_COUNT];
|
|
VkSemaphore timeline_semaphores[QUEUE_INDEX_COUNT] = {};
|
|
uint64_t timeline_fences[QUEUE_INDEX_COUNT] = {};
|
|
|
|
QueryPool query_pool;
|
|
|
|
std::vector<BufferBlock> vbo_blocks;
|
|
std::vector<BufferBlock> ibo_blocks;
|
|
std::vector<BufferBlock> ubo_blocks;
|
|
std::vector<BufferBlock> staging_blocks;
|
|
|
|
std::vector<VkFence> wait_and_recycle_fences;
|
|
|
|
std::vector<DeviceAllocation> allocations;
|
|
std::vector<VkFramebuffer> destroyed_framebuffers;
|
|
std::vector<VkSampler> destroyed_samplers;
|
|
std::vector<VkImageView> destroyed_image_views;
|
|
std::vector<VkBufferView> destroyed_buffer_views;
|
|
std::vector<VkImage> destroyed_images;
|
|
std::vector<VkBuffer> destroyed_buffers;
|
|
std::vector<VkDescriptorPool> destroyed_descriptor_pools;
|
|
Util::SmallVector<CommandBufferHandle> submissions[QUEUE_INDEX_COUNT];
|
|
std::vector<VkSemaphore> recycled_semaphores;
|
|
std::vector<VkEvent> recycled_events;
|
|
std::vector<VkSemaphore> destroyed_semaphores;
|
|
std::vector<VkSemaphore> consumed_semaphores;
|
|
|
|
struct DebugChannel
|
|
{
|
|
DebugChannelInterface *iface;
|
|
std::string tag;
|
|
BufferHandle buffer;
|
|
};
|
|
std::vector<DebugChannel> debug_channels;
|
|
|
|
struct TimestampIntervalHandles
|
|
{
|
|
std::string tid;
|
|
QueryPoolHandle start_ts;
|
|
QueryPoolHandle end_ts;
|
|
TimestampInterval *timestamp_tag;
|
|
};
|
|
std::vector<TimestampIntervalHandles> timestamp_intervals;
|
|
|
|
bool in_destructor = false;
|
|
};
|
|
// The per frame structure must be destroyed after
|
|
// the hashmap data structures below, so it must be declared before.
|
|
std::vector<std::unique_ptr<PerFrame>> per_frame;
|
|
|
|
struct
|
|
{
|
|
Semaphore acquire;
|
|
Semaphore release;
|
|
std::vector<ImageHandle> swapchain;
|
|
VkQueue present_queue = VK_NULL_HANDLE;
|
|
Vulkan::CommandBuffer::Type present_queue_type = {};
|
|
uint32_t queue_family_support_mask = 0;
|
|
unsigned index = 0;
|
|
bool consumed = false;
|
|
} wsi;
|
|
bool can_touch_swapchain_in_command_buffer(QueueIndices physical_type) const;
|
|
|
|
struct QueueData
|
|
{
|
|
Util::SmallVector<Semaphore> wait_semaphores;
|
|
Util::SmallVector<VkPipelineStageFlags2> wait_stages;
|
|
bool need_fence = false;
|
|
|
|
VkSemaphore timeline_semaphore = VK_NULL_HANDLE;
|
|
uint64_t current_timeline = 0;
|
|
PerformanceQueryPool performance_query_pool;
|
|
} queue_data[QUEUE_INDEX_COUNT];
|
|
|
|
struct InternalFence
|
|
{
|
|
VkFence fence;
|
|
VkSemaphore timeline;
|
|
uint64_t value;
|
|
};
|
|
|
|
void submit_queue(QueueIndices physical_type, InternalFence *fence,
|
|
SemaphoreHolder *external_semaphore = nullptr,
|
|
unsigned semaphore_count = 0,
|
|
Semaphore *semaphore = nullptr,
|
|
int profiled_iteration = -1);
|
|
|
|
PerFrame &frame()
|
|
{
|
|
VK_ASSERT(frame_context_index < per_frame.size());
|
|
VK_ASSERT(per_frame[frame_context_index]);
|
|
return *per_frame[frame_context_index];
|
|
}
|
|
|
|
const PerFrame &frame() const
|
|
{
|
|
VK_ASSERT(frame_context_index < per_frame.size());
|
|
VK_ASSERT(per_frame[frame_context_index]);
|
|
return *per_frame[frame_context_index];
|
|
}
|
|
|
|
unsigned frame_context_index = 0;
|
|
|
|
uint32_t find_memory_type(BufferDomain domain, uint32_t mask) const;
|
|
uint32_t find_memory_type(ImageDomain domain, uint32_t mask) const;
|
|
uint32_t find_memory_type(uint32_t required, uint32_t mask) const;
|
|
bool memory_type_is_device_optimal(uint32_t type) const;
|
|
bool memory_type_is_host_visible(uint32_t type) const;
|
|
|
|
const ImmutableSampler *samplers[static_cast<unsigned>(StockSampler::Count)] = {};
|
|
|
|
VulkanCache<PipelineLayout> pipeline_layouts;
|
|
VulkanCache<DescriptorSetAllocator> descriptor_set_allocators;
|
|
VulkanCache<RenderPass> render_passes;
|
|
VulkanCache<Shader> shaders;
|
|
VulkanCache<Program> programs;
|
|
VulkanCache<ImmutableSampler> immutable_samplers;
|
|
VulkanCache<ImmutableYcbcrConversion> immutable_ycbcr_conversions;
|
|
VulkanCache<IndirectLayout> indirect_layouts;
|
|
|
|
FramebufferAllocator framebuffer_allocator;
|
|
TransientAttachmentAllocator transient_allocator;
|
|
VkPipelineCache pipeline_cache = VK_NULL_HANDLE;
|
|
|
|
void init_pipeline_cache();
|
|
void flush_pipeline_cache();
|
|
|
|
PerformanceQueryPool &get_performance_query_pool(QueueIndices physical_type);
|
|
void clear_wait_semaphores();
|
|
void submit_staging(CommandBufferHandle &cmd, bool flush);
|
|
PipelineEvent request_pipeline_event();
|
|
|
|
std::function<void ()> queue_lock_callback;
|
|
std::function<void ()> queue_unlock_callback;
|
|
void flush_frame(QueueIndices physical_type);
|
|
void submit_empty_inner(QueueIndices type, InternalFence *fence,
|
|
SemaphoreHolder *external_semaphore,
|
|
unsigned semaphore_count,
|
|
Semaphore *semaphore);
|
|
|
|
void collect_wait_semaphores(QueueData &data, Helper::WaitSemaphores &semaphores);
|
|
void emit_queue_signals(Helper::BatchComposer &composer,
|
|
SemaphoreHolder *external_semaphore,
|
|
VkSemaphore sem, uint64_t timeline, InternalFence *fence,
|
|
unsigned semaphore_count, Semaphore *semaphores);
|
|
VkResult submit_batches(Helper::BatchComposer &composer, VkQueue queue, VkFence fence,
|
|
int profiling_iteration = -1);
|
|
VkResult queue_submit(VkQueue queue, uint32_t count, const VkSubmitInfo2 *submits, VkFence fence);
|
|
|
|
void destroy_buffer(VkBuffer buffer);
|
|
void destroy_image(VkImage image);
|
|
void destroy_image_view(VkImageView view);
|
|
void destroy_buffer_view(VkBufferView view);
|
|
void destroy_sampler(VkSampler sampler);
|
|
void destroy_framebuffer(VkFramebuffer framebuffer);
|
|
void destroy_semaphore(VkSemaphore semaphore);
|
|
void consume_semaphore(VkSemaphore semaphore);
|
|
void recycle_semaphore(VkSemaphore semaphore);
|
|
void destroy_event(VkEvent event);
|
|
void free_memory(const DeviceAllocation &alloc);
|
|
void reset_fence(VkFence fence, bool observed_wait);
|
|
void destroy_descriptor_pool(VkDescriptorPool desc_pool);
|
|
|
|
void destroy_buffer_nolock(VkBuffer buffer);
|
|
void destroy_image_nolock(VkImage image);
|
|
void destroy_image_view_nolock(VkImageView view);
|
|
void destroy_buffer_view_nolock(VkBufferView view);
|
|
void destroy_sampler_nolock(VkSampler sampler);
|
|
void destroy_framebuffer_nolock(VkFramebuffer framebuffer);
|
|
void destroy_semaphore_nolock(VkSemaphore semaphore);
|
|
void consume_semaphore_nolock(VkSemaphore semaphore);
|
|
void recycle_semaphore_nolock(VkSemaphore semaphore);
|
|
void destroy_event_nolock(VkEvent event);
|
|
void free_memory_nolock(const DeviceAllocation &alloc);
|
|
void destroy_descriptor_pool_nolock(VkDescriptorPool desc_pool);
|
|
void reset_fence_nolock(VkFence fence, bool observed_wait);
|
|
|
|
void flush_frame_nolock();
|
|
CommandBufferHandle request_command_buffer_nolock(unsigned thread_index, CommandBuffer::Type type, bool profiled);
|
|
void submit_discard_nolock(CommandBufferHandle &cmd);
|
|
void submit_nolock(CommandBufferHandle cmd, Fence *fence,
|
|
unsigned semaphore_count, Semaphore *semaphore);
|
|
void submit_empty_nolock(QueueIndices physical_type, Fence *fence,
|
|
SemaphoreHolder *semaphore, int profiling_iteration);
|
|
void add_wait_semaphore_nolock(QueueIndices type, Semaphore semaphore,
|
|
VkPipelineStageFlags2 stages, bool flush);
|
|
|
|
void request_vertex_block_nolock(BufferBlock &block, VkDeviceSize size);
|
|
void request_index_block_nolock(BufferBlock &block, VkDeviceSize size);
|
|
void request_uniform_block_nolock(BufferBlock &block, VkDeviceSize size);
|
|
void request_staging_block_nolock(BufferBlock &block, VkDeviceSize size);
|
|
|
|
CommandBufferHandle request_secondary_command_buffer_for_thread(unsigned thread_index,
|
|
const Framebuffer *framebuffer,
|
|
unsigned subpass,
|
|
CommandBuffer::Type type = CommandBuffer::Type::Generic);
|
|
void add_frame_counter_nolock();
|
|
void decrement_frame_counter_nolock();
|
|
void submit_secondary(CommandBuffer &primary, CommandBuffer &secondary);
|
|
void wait_idle_nolock();
|
|
void end_frame_nolock();
|
|
|
|
void add_debug_channel_buffer(DebugChannelInterface *iface, std::string tag, BufferHandle buffer);
|
|
void parse_debug_channel(const PerFrame::DebugChannel &channel);
|
|
|
|
Fence request_legacy_fence();
|
|
|
|
#ifdef GRANITE_VULKAN_SYSTEM_HANDLES
|
|
ShaderManager shader_manager;
|
|
ResourceManager resource_manager;
|
|
void init_shader_manager_cache();
|
|
void flush_shader_manager_cache();
|
|
#endif
|
|
|
|
#ifdef GRANITE_VULKAN_FOSSILIZE
|
|
bool enqueue_create_sampler(Fossilize::Hash hash, const VkSamplerCreateInfo *create_info, VkSampler *sampler) override;
|
|
bool enqueue_create_descriptor_set_layout(Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo *create_info, VkDescriptorSetLayout *layout) override;
|
|
bool enqueue_create_pipeline_layout(Fossilize::Hash hash, const VkPipelineLayoutCreateInfo *create_info, VkPipelineLayout *layout) override;
|
|
bool enqueue_create_shader_module(Fossilize::Hash hash, const VkShaderModuleCreateInfo *create_info, VkShaderModule *module) override;
|
|
bool enqueue_create_render_pass(Fossilize::Hash hash, const VkRenderPassCreateInfo *create_info, VkRenderPass *render_pass) override;
|
|
bool enqueue_create_render_pass2(Fossilize::Hash hash, const VkRenderPassCreateInfo2 *create_info, VkRenderPass *render_pass) override;
|
|
bool enqueue_create_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo *create_info, VkPipeline *pipeline) override;
|
|
bool enqueue_create_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo *create_info, VkPipeline *pipeline) override;
|
|
bool enqueue_create_raytracing_pipeline(Fossilize::Hash hash, const VkRayTracingPipelineCreateInfoKHR *create_info, VkPipeline *pipeline) override;
|
|
bool fossilize_replay_graphics_pipeline(Fossilize::Hash hash, VkGraphicsPipelineCreateInfo &info);
|
|
bool fossilize_replay_compute_pipeline(Fossilize::Hash hash, VkComputePipelineCreateInfo &info);
|
|
|
|
void replay_tag_simple(Fossilize::ResourceTag tag);
|
|
|
|
void register_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo &info);
|
|
void register_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo &info);
|
|
void register_render_pass(VkRenderPass render_pass, Fossilize::Hash hash, const VkRenderPassCreateInfo2KHR &info);
|
|
void register_descriptor_set_layout(VkDescriptorSetLayout layout, Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo &info);
|
|
void register_pipeline_layout(VkPipelineLayout layout, Fossilize::Hash hash, const VkPipelineLayoutCreateInfo &info);
|
|
void register_shader_module(VkShaderModule module, Fossilize::Hash hash, const VkShaderModuleCreateInfo &info);
|
|
void register_sampler(VkSampler sampler, Fossilize::Hash hash, const VkSamplerCreateInfo &info);
|
|
void register_sampler_ycbcr_conversion(VkSamplerYcbcrConversion ycbcr, const VkSamplerYcbcrConversionCreateInfo &info);
|
|
|
|
struct RecorderState;
|
|
std::unique_ptr<RecorderState> recorder_state;
|
|
|
|
struct ReplayerState;
|
|
std::unique_ptr<ReplayerState> replayer_state;
|
|
|
|
void promote_write_cache_to_readonly() const;
|
|
void promote_readonly_db_from_assets() const;
|
|
|
|
void init_pipeline_state(const Fossilize::FeatureFilter &filter,
|
|
const VkPhysicalDeviceFeatures2 &pdf2,
|
|
const VkApplicationInfo &application_info);
|
|
void flush_pipeline_state();
|
|
void block_until_shader_module_ready();
|
|
void block_until_pipeline_ready();
|
|
#endif
|
|
|
|
ImplementationWorkarounds workarounds;
|
|
void init_workarounds();
|
|
|
|
void fill_buffer_sharing_indices(VkBufferCreateInfo &create_info, uint32_t *sharing_indices);
|
|
|
|
bool allocate_image_memory(DeviceAllocation *allocation, const ImageCreateInfo &info,
|
|
VkImage image, VkImageTiling tiling);
|
|
|
|
void promote_read_write_caches_to_read_only();
|
|
};
|
|
|
|
// A fairly complex helper used for async queue readbacks.
|
|
// Typically used for things like headless backend which emulates WSI through readbacks + encode.
|
|
struct OwnershipTransferInfo
|
|
{
|
|
CommandBuffer::Type old_queue;
|
|
CommandBuffer::Type new_queue;
|
|
VkImageLayout old_image_layout;
|
|
VkImageLayout new_image_layout;
|
|
VkPipelineStageFlags2 dst_pipeline_stage;
|
|
VkAccessFlags2 dst_access;
|
|
};
|
|
|
|
// For an image which was last accessed in old_queue, requests a command buffer
|
|
// for new_queue. Commands will be enqueued as necessary in new_queue to ensure that a complete ownership
|
|
// transfer has taken place.
|
|
// If queue family for old_queue differs from new_queue, a release barrier is enqueued in old_queue.
|
|
// In new_queue we perform either an acquire barrier or a simple pipeline barrier to change layout if required.
|
|
// If semaphore is a valid handle, it will be waited on in either old_queue to perform release barrier
|
|
// or new_queue depending on what is required.
|
|
// If the image uses CONCURRENT sharing mode, acquire/release barriers are skipped.
|
|
CommandBufferHandle request_command_buffer_with_ownership_transfer(
|
|
Device &device,
|
|
const Vulkan::Image &image,
|
|
const OwnershipTransferInfo &info,
|
|
const Vulkan::Semaphore &semaphore);
|
|
|
|
using DeviceHandle = Util::IntrusivePtr<Device>;
|
|
}
|