Files
kaizen/external/parallel-rdp-standalone/vulkan/device.hpp
CocoSimone ac0f3ffd8c imgui
2022-07-11 00:53:03 +02:00

854 lines
33 KiB
C++

/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "buffer.hpp"
#include "command_buffer.hpp"
#include "command_pool.hpp"
#include "fence.hpp"
#include "fence_manager.hpp"
#include "image.hpp"
#include "memory_allocator.hpp"
#include "render_pass.hpp"
#include "sampler.hpp"
#include "semaphore.hpp"
#include "semaphore_manager.hpp"
#include "event_manager.hpp"
#include "shader.hpp"
#include "context.hpp"
#include "query_pool.hpp"
#include "buffer_pool.hpp"
#include <memory>
#include <vector>
#include <functional>
#include <unordered_map>
#include <stdio.h>
#ifdef GRANITE_VULKAN_FILESYSTEM
#include "shader_manager.hpp"
#include "texture_manager.hpp"
#endif
#ifdef GRANITE_VULKAN_MT
#include <atomic>
#include <mutex>
#include <condition_variable>
#endif
#ifdef GRANITE_VULKAN_FOSSILIZE
#include "fossilize.hpp"
#endif
#include "quirks.hpp"
#include "small_vector.hpp"
namespace Util
{
class TimelineTraceFile;
}
namespace Granite
{
struct TaskGroup;
}
namespace Vulkan
{
enum class SwapchainRenderPass
{
ColorOnly,
Depth,
DepthStencil
};
struct InitialImageBuffer
{
BufferHandle buffer;
Util::SmallVector<VkBufferImageCopy, 32> blits;
};
struct HandlePool
{
VulkanObjectPool<Buffer> buffers;
VulkanObjectPool<Image> images;
VulkanObjectPool<LinearHostImage> linear_images;
VulkanObjectPool<ImageView> image_views;
VulkanObjectPool<BufferView> buffer_views;
VulkanObjectPool<Sampler> samplers;
VulkanObjectPool<FenceHolder> fences;
VulkanObjectPool<SemaphoreHolder> semaphores;
VulkanObjectPool<EventHolder> events;
VulkanObjectPool<QueryPoolResult> query;
VulkanObjectPool<CommandBuffer> command_buffers;
VulkanObjectPool<BindlessDescriptorPool> bindless_descriptor_pool;
VulkanObjectPool<DeviceAllocationOwner> allocations;
};
class DebugChannelInterface
{
public:
union Word
{
uint32_t u32;
int32_t s32;
float f32;
};
virtual void message(const std::string &tag, uint32_t code, uint32_t x, uint32_t y, uint32_t z,
uint32_t word_count, const Word *words) = 0;
};
namespace Helper
{
struct WaitSemaphores
{
Util::SmallVector<VkSemaphore> binary_waits;
Util::SmallVector<VkPipelineStageFlags> binary_wait_stages;
Util::SmallVector<VkSemaphore> timeline_waits;
Util::SmallVector<VkPipelineStageFlags> timeline_wait_stages;
Util::SmallVector<uint64_t> timeline_wait_counts;
};
class BatchComposer
{
public:
enum { MaxSubmissions = 8 };
explicit BatchComposer(bool split_binary_timeline_semaphores);
void add_wait_submissions(WaitSemaphores &sem);
void add_wait_semaphore(SemaphoreHolder &sem, VkPipelineStageFlags stage);
void add_signal_semaphore(VkSemaphore sem, uint64_t count);
void add_command_buffer(VkCommandBuffer cmd);
Util::SmallVector<VkSubmitInfo, MaxSubmissions> &bake(int profiling_iteration = -1);
private:
Util::SmallVector<VkSubmitInfo, MaxSubmissions> submits;
VkTimelineSemaphoreSubmitInfoKHR timeline_infos[Helper::BatchComposer::MaxSubmissions];
VkPerformanceQuerySubmitInfoKHR profiling_infos[Helper::BatchComposer::MaxSubmissions];
Util::SmallVector<VkSemaphore> waits[MaxSubmissions];
Util::SmallVector<uint64_t> wait_counts[MaxSubmissions];
Util::SmallVector<VkFlags> wait_stages[MaxSubmissions];
Util::SmallVector<VkSemaphore> signals[MaxSubmissions];
Util::SmallVector<uint64_t> signal_counts[MaxSubmissions];
Util::SmallVector<VkCommandBuffer> cmds[MaxSubmissions];
unsigned submit_index = 0;
bool split_binary_timeline_semaphores = false;
void begin_batch();
bool has_timeline_semaphore_in_batch(unsigned index) const;
bool has_binary_semaphore_in_batch(unsigned index) const;
};
}
class Device
: public Util::IntrusivePtrEnabled<Device, std::default_delete<Device>, HandleCounter>
#ifdef GRANITE_VULKAN_FOSSILIZE
, public Fossilize::StateCreatorInterface
#endif
{
public:
// Device-based objects which need to poke at internal data structures when their lifetimes end.
// Don't want to expose a lot of internal guts to make this work.
friend class QueryPool;
friend struct QueryPoolResultDeleter;
friend class EventHolder;
friend struct EventHolderDeleter;
friend class SemaphoreHolder;
friend struct SemaphoreHolderDeleter;
friend class FenceHolder;
friend struct FenceHolderDeleter;
friend class Sampler;
friend struct SamplerDeleter;
friend class ImmutableSampler;
friend class Buffer;
friend struct BufferDeleter;
friend class BufferView;
friend struct BufferViewDeleter;
friend class ImageView;
friend struct ImageViewDeleter;
friend class Image;
friend struct ImageDeleter;
friend struct LinearHostImageDeleter;
friend class CommandBuffer;
friend struct CommandBufferDeleter;
friend class BindlessDescriptorPool;
friend struct BindlessDescriptorPoolDeleter;
friend class Program;
friend class WSI;
friend class Cookie;
friend class Framebuffer;
friend class PipelineLayout;
friend class FramebufferAllocator;
friend class RenderPass;
friend class Texture;
friend class DescriptorSetAllocator;
friend class Shader;
friend class ImageResourceHolder;
friend class DeviceAllocationOwner;
friend struct DeviceAllocationDeleter;
Device();
~Device();
// No move-copy.
void operator=(Device &&) = delete;
Device(Device &&) = delete;
// Only called by main thread, during setup phase.
void set_context(const Context &context);
void init_swapchain(const std::vector<VkImage> &swapchain_images, unsigned width, unsigned height, VkFormat format,
VkSurfaceTransformFlagBitsKHR transform, VkImageUsageFlags usage);
void set_swapchain_queue_family_support(uint32_t queue_family_support);
bool can_touch_swapchain_in_command_buffer(CommandBuffer::Type type) const;
void init_external_swapchain(const std::vector<ImageHandle> &swapchain_images);
void init_frame_contexts(unsigned count);
const VolkDeviceTable &get_device_table() const;
// Profiling
bool init_performance_counters(const std::vector<std::string> &names);
bool acquire_profiling();
void release_profiling();
void query_available_performance_counters(CommandBuffer::Type type,
uint32_t *count,
const VkPerformanceCounterKHR **counters,
const VkPerformanceCounterDescriptionKHR **desc);
ImageView &get_swapchain_view();
ImageView &get_swapchain_view(unsigned index);
unsigned get_num_swapchain_images() const;
unsigned get_num_frame_contexts() const;
unsigned get_swapchain_index() const;
unsigned get_current_frame_context() const;
size_t get_pipeline_cache_size();
bool get_pipeline_cache_data(uint8_t *data, size_t size);
bool init_pipeline_cache(const uint8_t *data, size_t size);
// Frame-pushing interface.
void next_frame_context();
void wait_idle();
void end_frame_context();
// RenderDoc integration API for app-guided captures.
static bool init_renderdoc_capture();
// Calls next_frame_context() and begins a renderdoc capture.
void begin_renderdoc_capture();
// Calls next_frame_context() and ends the renderdoc capture.
void end_renderdoc_capture();
// Set names for objects for debuggers and profilers.
void set_name(const Buffer &buffer, const char *name);
void set_name(const Image &image, const char *name);
void set_name(const CommandBuffer &cmd, const char *name);
// Submission interface, may be called from any thread at any time.
void flush_frame();
CommandBufferHandle request_command_buffer(CommandBuffer::Type type = CommandBuffer::Type::Generic);
CommandBufferHandle request_command_buffer_for_thread(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic);
CommandBufferHandle request_profiled_command_buffer(CommandBuffer::Type type = CommandBuffer::Type::Generic);
CommandBufferHandle request_profiled_command_buffer_for_thread(unsigned thread_index, CommandBuffer::Type type = CommandBuffer::Type::Generic);
void submit(CommandBufferHandle &cmd, Fence *fence = nullptr,
unsigned semaphore_count = 0, Semaphore *semaphore = nullptr);
void submit_empty(CommandBuffer::Type type,
Fence *fence = nullptr,
unsigned semaphore_count = 0,
Semaphore *semaphore = nullptr);
void submit_discard(CommandBufferHandle &cmd);
void add_wait_semaphore(CommandBuffer::Type type, Semaphore semaphore, VkPipelineStageFlags stages, bool flush);
QueueIndices get_physical_queue_type(CommandBuffer::Type queue_type) const;
void register_time_interval(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts,
std::string tag, std::string extra = {});
// Request shaders and programs. These objects are owned by the Device.
Shader *request_shader(const uint32_t *code, size_t size,
const ResourceLayout *layout = nullptr,
const ImmutableSamplerBank *sampler_bank = nullptr);
Shader *request_shader_by_hash(Util::Hash hash);
Program *request_program(const uint32_t *vertex_data, size_t vertex_size, const uint32_t *fragment_data,
size_t fragment_size,
const ResourceLayout *vertex_layout = nullptr,
const ResourceLayout *fragment_layout = nullptr);
Program *request_program(const uint32_t *compute_data, size_t compute_size,
const ResourceLayout *layout = nullptr,
const ImmutableSamplerBank *sampler_bank = nullptr);
Program *request_program(Shader *vertex, Shader *fragment);
Program *request_program(Shader *compute);
const ImmutableYcbcrConversion *request_immutable_ycbcr_conversion(const VkSamplerYcbcrConversionCreateInfo &info);
const ImmutableSampler *request_immutable_sampler(const SamplerCreateInfo &info, const ImmutableYcbcrConversion *ycbcr);
// Map and unmap buffer objects.
void *map_host_buffer(const Buffer &buffer, MemoryAccessFlags access);
void unmap_host_buffer(const Buffer &buffer, MemoryAccessFlags access);
void *map_host_buffer(const Buffer &buffer, MemoryAccessFlags access, VkDeviceSize offset, VkDeviceSize length);
void unmap_host_buffer(const Buffer &buffer, MemoryAccessFlags access, VkDeviceSize offset, VkDeviceSize length);
void *map_linear_host_image(const LinearHostImage &image, MemoryAccessFlags access);
void unmap_linear_host_image_and_sync(const LinearHostImage &image, MemoryAccessFlags access);
// Create buffers and images.
BufferHandle create_buffer(const BufferCreateInfo &info, const void *initial = nullptr);
BufferHandle create_imported_host_buffer(const BufferCreateInfo &info, VkExternalMemoryHandleTypeFlagBits type, void *host_buffer);
ImageHandle create_image(const ImageCreateInfo &info, const ImageInitialData *initial = nullptr);
ImageHandle create_image_from_staging_buffer(const ImageCreateInfo &info, const InitialImageBuffer *buffer);
LinearHostImageHandle create_linear_host_image(const LinearHostImageCreateInfo &info);
DeviceAllocationOwnerHandle take_device_allocation_ownership(Image &image);
DeviceAllocationOwnerHandle allocate_memory(const MemoryAllocateInfo &info);
// Create staging buffers for images.
InitialImageBuffer create_image_staging_buffer(const ImageCreateInfo &info, const ImageInitialData *initial);
InitialImageBuffer create_image_staging_buffer(const TextureFormatLayout &layout);
#ifndef _WIN32
ImageHandle create_imported_image(int fd,
VkDeviceSize size,
uint32_t memory_type,
VkExternalMemoryHandleTypeFlagBitsKHR handle_type,
const ImageCreateInfo &create_info);
#endif
// Create image view, buffer views and samplers.
ImageViewHandle create_image_view(const ImageViewCreateInfo &view_info);
BufferViewHandle create_buffer_view(const BufferViewCreateInfo &view_info);
SamplerHandle create_sampler(const SamplerCreateInfo &info);
BindlessDescriptorPoolHandle create_bindless_descriptor_pool(BindlessResourceType type,
unsigned num_sets, unsigned num_descriptors);
// Render pass helpers.
bool image_format_is_supported(VkFormat format, VkFormatFeatureFlags2KHR required, VkImageTiling tiling = VK_IMAGE_TILING_OPTIMAL) const;
void get_format_properties(VkFormat format, VkFormatProperties3KHR *properties) const;
bool get_image_format_properties(VkFormat format, VkImageType type, VkImageTiling tiling,
VkImageUsageFlags usage, VkImageCreateFlags flags,
VkImageFormatProperties2 *properties2) const;
VkFormat get_default_depth_stencil_format() const;
VkFormat get_default_depth_format() const;
ImageHandle get_transient_attachment(unsigned width, unsigned height, VkFormat format,
unsigned index = 0, unsigned samples = 1, unsigned layers = 1);
RenderPassInfo get_swapchain_render_pass(SwapchainRenderPass style);
// Request legacy (non-timeline) semaphores.
// Timeline semaphores are only used internally to reduce handle bloat.
Semaphore request_legacy_semaphore();
Semaphore request_external_semaphore(VkSemaphore semaphore, bool signalled);
#ifndef _WIN32
Semaphore request_imported_semaphore(int fd, VkExternalSemaphoreHandleTypeFlagBitsKHR handle_type);
#endif
// A proxy semaphore which lets us grab a semaphore handle before we signal it.
// Mostly useful to deal better with render graph implementation.
// TODO: When we require timeline semaphores, this could be a bit more elegant, and we could expose timeline directly.
// For time being however, we'll support moving the payload over to the proxy object.
Semaphore request_proxy_semaphore();
VkDevice get_device() const
{
return device;
}
VkPhysicalDevice get_physical_device() const
{
return gpu;
}
const VkPhysicalDeviceMemoryProperties &get_memory_properties() const
{
return mem_props;
}
const VkPhysicalDeviceProperties &get_gpu_properties() const
{
return gpu_props;
}
void get_memory_budget(HeapBudget *budget);
const Sampler &get_stock_sampler(StockSampler sampler) const;
#ifdef GRANITE_VULKAN_FILESYSTEM
ShaderManager &get_shader_manager();
TextureManager &get_texture_manager();
void init_shader_manager_cache();
void flush_shader_manager_cache();
#endif
// For some platforms, the device and queue might be shared, possibly across threads, so need some mechanism to
// lock the global device and queue.
void set_queue_lock(std::function<void ()> lock_callback,
std::function<void ()> unlock_callback);
const ImplementationWorkarounds &get_workarounds() const
{
return workarounds;
}
const DeviceFeatures &get_device_features() const
{
return ext;
}
bool swapchain_touched() const;
double convert_device_timestamp_delta(uint64_t start_ticks, uint64_t end_ticks) const;
// Writes a timestamp on host side, which is calibrated to the GPU timebase.
QueryPoolHandle write_calibrated_timestamp();
// A split version of VkEvent handling which lets us record a wait command before signal is recorded.
PipelineEvent begin_signal_event(VkPipelineStageFlags stages);
// Promotes any read-write cached state to read-only,
// which eliminates need to read/write lock.
// Can be called at any time as long as there is no
// racing access to:
// - Command buffer recording which uses pipelines (texture manager is fine).
// - request_shader()
// - request_program()
// Generally, this should be called before you call next_frame_context().
void promote_read_write_caches_to_read_only();
const Context::SystemHandles &get_system_handles() const
{
return system_handles;
}
void configure_default_geometry_samplers(float max_aniso, float lod_bias);
bool supports_subgroup_size_log2(bool subgroup_full_group,
uint8_t subgroup_minimum_size_log2,
uint8_t subgroup_maximum_size_log2) const;
const QueueInfo &get_queue_info() const;
void timestamp_log_reset();
void timestamp_log(const TimestampIntervalReportCallback &cb) const;
private:
VkInstance instance = VK_NULL_HANDLE;
VkPhysicalDevice gpu = VK_NULL_HANDLE;
VkDevice device = VK_NULL_HANDLE;
const VolkDeviceTable *table = nullptr;
QueueInfo queue_info;
unsigned num_thread_indices = 1;
#ifdef GRANITE_VULKAN_MT
std::atomic<uint64_t> cookie;
#else
uint64_t cookie = 0;
#endif
uint64_t allocate_cookie();
void bake_program(Program &program);
void request_vertex_block(BufferBlock &block, VkDeviceSize size);
void request_index_block(BufferBlock &block, VkDeviceSize size);
void request_uniform_block(BufferBlock &block, VkDeviceSize size);
void request_staging_block(BufferBlock &block, VkDeviceSize size);
QueryPoolHandle write_timestamp(VkCommandBuffer cmd, VkPipelineStageFlagBits stage);
void set_acquire_semaphore(unsigned index, Semaphore acquire);
Semaphore consume_release_semaphore();
VkQueue get_current_present_queue() const;
PipelineLayout *request_pipeline_layout(const CombinedResourceLayout &layout,
const ImmutableSamplerBank *immutable_samplers);
DescriptorSetAllocator *request_descriptor_set_allocator(const DescriptorSetLayout &layout,
const uint32_t *stages_for_sets,
const ImmutableSampler * const *immutable_samplers);
const Framebuffer &request_framebuffer(const RenderPassInfo &info);
const RenderPass &request_render_pass(const RenderPassInfo &info, bool compatible);
VkPhysicalDeviceMemoryProperties mem_props;
VkPhysicalDeviceProperties gpu_props;
DeviceFeatures ext;
void init_stock_samplers();
void init_stock_sampler(StockSampler sampler, float max_aniso, float lod_bias);
void init_timeline_semaphores();
void init_bindless();
void deinit_timeline_semaphores();
uint64_t update_wrapped_device_timestamp(uint64_t ts);
int64_t convert_timestamp_to_absolute_nsec(const QueryPoolResult &handle);
Context::SystemHandles system_handles;
QueryPoolHandle write_timestamp_nolock(VkCommandBuffer cmd, VkPipelineStageFlagBits stage);
QueryPoolHandle write_calibrated_timestamp_nolock();
void register_time_interval_nolock(std::string tid, QueryPoolHandle start_ts, QueryPoolHandle end_ts, std::string tag, std::string extra);
// Make sure this is deleted last.
HandlePool handle_pool;
// Calibrated timestamps.
void init_calibrated_timestamps();
void recalibrate_timestamps_fallback();
void recalibrate_timestamps();
bool resample_calibrated_timestamps();
VkTimeDomainEXT calibrated_time_domain = VK_TIME_DOMAIN_DEVICE_EXT;
int64_t calibrated_timestamp_device = 0;
int64_t calibrated_timestamp_host = 0;
int64_t calibrated_timestamp_device_accum = 0;
int64_t last_calibrated_timestamp_host = 0; // To ensure monotonicity after a recalibration.
unsigned timestamp_calibration_counter = 0;
Vulkan::QueryPoolHandle frame_context_begin_ts;
struct Managers
{
DeviceAllocator memory;
FenceManager fence;
SemaphoreManager semaphore;
EventManager event;
BufferPool vbo, ibo, ubo, staging;
TimestampIntervalManager timestamps;
};
Managers managers;
struct
{
#ifdef GRANITE_VULKAN_MT
std::mutex lock;
std::condition_variable cond;
#endif
unsigned counter = 0;
} lock;
struct PerFrame
{
PerFrame(Device *device, unsigned index);
~PerFrame();
void operator=(const PerFrame &) = delete;
PerFrame(const PerFrame &) = delete;
void begin();
void trim_command_pools();
Device &device;
unsigned frame_index;
const VolkDeviceTable &table;
Managers &managers;
std::vector<CommandPool> cmd_pools[QUEUE_INDEX_COUNT];
VkSemaphore timeline_semaphores[QUEUE_INDEX_COUNT] = {};
uint64_t timeline_fences[QUEUE_INDEX_COUNT] = {};
QueryPool query_pool;
std::vector<BufferBlock> vbo_blocks;
std::vector<BufferBlock> ibo_blocks;
std::vector<BufferBlock> ubo_blocks;
std::vector<BufferBlock> staging_blocks;
std::vector<VkFence> wait_fences;
std::vector<VkFence> recycle_fences;
std::vector<DeviceAllocation> allocations;
std::vector<VkFramebuffer> destroyed_framebuffers;
std::vector<VkSampler> destroyed_samplers;
std::vector<VkPipeline> destroyed_pipelines;
std::vector<VkImageView> destroyed_image_views;
std::vector<VkBufferView> destroyed_buffer_views;
std::vector<VkImage> destroyed_images;
std::vector<VkBuffer> destroyed_buffers;
std::vector<VkDescriptorPool> destroyed_descriptor_pools;
Util::SmallVector<CommandBufferHandle> submissions[QUEUE_INDEX_COUNT];
std::vector<VkSemaphore> recycled_semaphores;
std::vector<VkEvent> recycled_events;
std::vector<VkSemaphore> destroyed_semaphores;
std::vector<ImageHandle> keep_alive_images;
struct DebugChannel
{
DebugChannelInterface *iface;
std::string tag;
BufferHandle buffer;
};
std::vector<DebugChannel> debug_channels;
struct TimestampIntervalHandles
{
std::string tid;
QueryPoolHandle start_ts;
QueryPoolHandle end_ts;
TimestampInterval *timestamp_tag;
std::string extra;
};
std::vector<TimestampIntervalHandles> timestamp_intervals;
bool in_destructor = false;
};
// The per frame structure must be destroyed after
// the hashmap data structures below, so it must be declared before.
std::vector<std::unique_ptr<PerFrame>> per_frame;
struct
{
Semaphore acquire;
Semaphore release;
std::vector<ImageHandle> swapchain;
VkQueue present_queue = VK_NULL_HANDLE;
uint32_t queue_family_support_mask = 0;
unsigned index = 0;
bool consumed = false;
} wsi;
bool can_touch_swapchain_in_command_buffer(QueueIndices physical_type) const;
struct QueueData
{
Util::SmallVector<Semaphore> wait_semaphores;
Util::SmallVector<VkPipelineStageFlags> wait_stages;
bool need_fence = false;
VkSemaphore timeline_semaphore = VK_NULL_HANDLE;
uint64_t current_timeline = 0;
PerformanceQueryPool performance_query_pool;
} queue_data[QUEUE_INDEX_COUNT];
struct InternalFence
{
VkFence fence;
VkSemaphore timeline;
uint64_t value;
};
// Pending buffers which need to be copied from CPU to GPU before submitting graphics or compute work.
struct
{
std::vector<BufferBlock> vbo;
std::vector<BufferBlock> ibo;
std::vector<BufferBlock> ubo;
} dma;
void submit_queue(QueueIndices physical_type, InternalFence *fence,
unsigned semaphore_count = 0,
Semaphore *semaphore = nullptr,
int profiled_iteration = -1);
PerFrame &frame()
{
VK_ASSERT(frame_context_index < per_frame.size());
VK_ASSERT(per_frame[frame_context_index]);
return *per_frame[frame_context_index];
}
const PerFrame &frame() const
{
VK_ASSERT(frame_context_index < per_frame.size());
VK_ASSERT(per_frame[frame_context_index]);
return *per_frame[frame_context_index];
}
unsigned frame_context_index = 0;
uint32_t find_memory_type(BufferDomain domain, uint32_t mask) const;
uint32_t find_memory_type(ImageDomain domain, uint32_t mask) const;
uint32_t find_memory_type(uint32_t required, uint32_t mask) const;
bool memory_type_is_device_optimal(uint32_t type) const;
bool memory_type_is_host_visible(uint32_t type) const;
const ImmutableSampler *samplers[static_cast<unsigned>(StockSampler::Count)] = {};
VulkanCache<PipelineLayout> pipeline_layouts;
VulkanCache<DescriptorSetAllocator> descriptor_set_allocators;
VulkanCache<RenderPass> render_passes;
VulkanCache<Shader> shaders;
VulkanCache<Program> programs;
VulkanCache<ImmutableSampler> immutable_samplers;
VulkanCache<ImmutableYcbcrConversion> immutable_ycbcr_conversions;
DescriptorSetAllocator *bindless_sampled_image_allocator_fp = nullptr;
DescriptorSetAllocator *bindless_sampled_image_allocator_integer = nullptr;
FramebufferAllocator framebuffer_allocator;
TransientAttachmentAllocator transient_allocator;
VkPipelineCache pipeline_cache = VK_NULL_HANDLE;
void init_pipeline_cache();
void flush_pipeline_cache();
PerformanceQueryPool &get_performance_query_pool(QueueIndices physical_type);
void clear_wait_semaphores();
void submit_staging(CommandBufferHandle &cmd, VkBufferUsageFlags usage, bool flush);
PipelineEvent request_pipeline_event();
std::function<void ()> queue_lock_callback;
std::function<void ()> queue_unlock_callback;
void flush_frame(QueueIndices physical_type);
void sync_buffer_blocks();
void submit_empty_inner(QueueIndices type, InternalFence *fence,
unsigned semaphore_count,
Semaphore *semaphore);
void collect_wait_semaphores(QueueData &data, Helper::WaitSemaphores &semaphores);
void emit_queue_signals(Helper::BatchComposer &composer,
VkSemaphore sem, uint64_t timeline, InternalFence *fence,
unsigned semaphore_count, Semaphore *semaphores);
VkResult submit_batches(Helper::BatchComposer &composer, VkQueue queue, VkFence fence,
int profiling_iteration = -1);
void destroy_buffer(VkBuffer buffer);
void destroy_image(VkImage image);
void destroy_image_view(VkImageView view);
void destroy_buffer_view(VkBufferView view);
void destroy_pipeline(VkPipeline pipeline);
void destroy_sampler(VkSampler sampler);
void destroy_framebuffer(VkFramebuffer framebuffer);
void destroy_semaphore(VkSemaphore semaphore);
void recycle_semaphore(VkSemaphore semaphore);
void destroy_event(VkEvent event);
void free_memory(const DeviceAllocation &alloc);
void reset_fence(VkFence fence, bool observed_wait);
void keep_handle_alive(ImageHandle handle);
void destroy_descriptor_pool(VkDescriptorPool desc_pool);
void destroy_buffer_nolock(VkBuffer buffer);
void destroy_image_nolock(VkImage image);
void destroy_image_view_nolock(VkImageView view);
void destroy_buffer_view_nolock(VkBufferView view);
void destroy_pipeline_nolock(VkPipeline pipeline);
void destroy_sampler_nolock(VkSampler sampler);
void destroy_framebuffer_nolock(VkFramebuffer framebuffer);
void destroy_semaphore_nolock(VkSemaphore semaphore);
void recycle_semaphore_nolock(VkSemaphore semaphore);
void destroy_event_nolock(VkEvent event);
void free_memory_nolock(const DeviceAllocation &alloc);
void destroy_descriptor_pool_nolock(VkDescriptorPool desc_pool);
void reset_fence_nolock(VkFence fence, bool observed_wait);
void flush_frame_nolock();
CommandBufferHandle request_command_buffer_nolock(unsigned thread_index, CommandBuffer::Type type, bool profiled);
void submit_discard_nolock(CommandBufferHandle &cmd);
void submit_nolock(CommandBufferHandle cmd, Fence *fence,
unsigned semaphore_count, Semaphore *semaphore);
void submit_empty_nolock(QueueIndices physical_type, Fence *fence,
unsigned semaphore_count,
Semaphore *semaphore, int profiling_iteration);
void add_wait_semaphore_nolock(QueueIndices type, Semaphore semaphore, VkPipelineStageFlags stages,
bool flush);
void request_vertex_block_nolock(BufferBlock &block, VkDeviceSize size);
void request_index_block_nolock(BufferBlock &block, VkDeviceSize size);
void request_uniform_block_nolock(BufferBlock &block, VkDeviceSize size);
void request_staging_block_nolock(BufferBlock &block, VkDeviceSize size);
CommandBufferHandle request_secondary_command_buffer_for_thread(unsigned thread_index,
const Framebuffer *framebuffer,
unsigned subpass,
CommandBuffer::Type type = CommandBuffer::Type::Generic);
void add_frame_counter_nolock();
void decrement_frame_counter_nolock();
void submit_secondary(CommandBuffer &primary, CommandBuffer &secondary);
void wait_idle_nolock();
void end_frame_nolock();
void add_debug_channel_buffer(DebugChannelInterface *iface, std::string tag, BufferHandle buffer);
void parse_debug_channel(const PerFrame::DebugChannel &channel);
Fence request_legacy_fence();
#ifdef GRANITE_VULKAN_FILESYSTEM
ShaderManager shader_manager;
TextureManager texture_manager;
#endif
#ifdef GRANITE_VULKAN_FOSSILIZE
Fossilize::StateRecorder state_recorder;
bool enqueue_create_sampler(Fossilize::Hash hash, const VkSamplerCreateInfo *create_info, VkSampler *sampler) override;
bool enqueue_create_descriptor_set_layout(Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo *create_info, VkDescriptorSetLayout *layout) override;
bool enqueue_create_pipeline_layout(Fossilize::Hash hash, const VkPipelineLayoutCreateInfo *create_info, VkPipelineLayout *layout) override;
bool enqueue_create_shader_module(Fossilize::Hash hash, const VkShaderModuleCreateInfo *create_info, VkShaderModule *module) override;
bool enqueue_create_render_pass(Fossilize::Hash hash, const VkRenderPassCreateInfo *create_info, VkRenderPass *render_pass) override;
bool enqueue_create_render_pass2(Fossilize::Hash hash, const VkRenderPassCreateInfo2 *create_info, VkRenderPass *render_pass) override;
bool enqueue_create_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo *create_info, VkPipeline *pipeline) override;
bool enqueue_create_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo *create_info, VkPipeline *pipeline) override;
bool enqueue_create_raytracing_pipeline(Fossilize::Hash hash, const VkRayTracingPipelineCreateInfoKHR *create_info, VkPipeline *pipeline) override;
void notify_replayed_resources_for_type() override;
VkPipeline fossilize_create_graphics_pipeline(Fossilize::Hash hash, VkGraphicsPipelineCreateInfo &info);
VkPipeline fossilize_create_compute_pipeline(Fossilize::Hash hash, VkComputePipelineCreateInfo &info);
void register_graphics_pipeline(Fossilize::Hash hash, const VkGraphicsPipelineCreateInfo &info);
void register_compute_pipeline(Fossilize::Hash hash, const VkComputePipelineCreateInfo &info);
void register_render_pass(VkRenderPass render_pass, Fossilize::Hash hash, const VkRenderPassCreateInfo &info);
void register_descriptor_set_layout(VkDescriptorSetLayout layout, Fossilize::Hash hash, const VkDescriptorSetLayoutCreateInfo &info);
void register_pipeline_layout(VkPipelineLayout layout, Fossilize::Hash hash, const VkPipelineLayoutCreateInfo &info);
void register_shader_module(VkShaderModule module, Fossilize::Hash hash, const VkShaderModuleCreateInfo &info);
//void register_sampler(VkSampler sampler, Fossilize::Hash hash, const VkSamplerCreateInfo &info);
struct
{
std::unordered_map<VkShaderModule, Shader *> shader_map;
std::unordered_map<VkRenderPass, RenderPass *> render_pass_map;
const Fossilize::FeatureFilter *feature_filter = nullptr;
#ifdef GRANITE_VULKAN_MT
// Need to forward-declare the type, avoid the ref-counted wrapper.
Granite::TaskGroup *pipeline_group = nullptr;
#endif
} replayer_state;
void init_pipeline_state(const Fossilize::FeatureFilter &filter);
void flush_pipeline_state();
#endif
ImplementationWorkarounds workarounds;
void init_workarounds();
void report_checkpoints();
void fill_buffer_sharing_indices(VkBufferCreateInfo &create_info, uint32_t *sharing_indices);
bool allocate_image_memory(DeviceAllocation *allocation, const ImageCreateInfo &info,
VkImage image, VkImageTiling tiling);
};
// A fairly complex helper used for async queue readbacks.
// Typically used for things like headless backend which emulates WSI through readbacks + encode.
struct OwnershipTransferInfo
{
CommandBuffer::Type old_queue;
CommandBuffer::Type new_queue;
VkImageLayout old_image_layout;
VkImageLayout new_image_layout;
VkPipelineStageFlags dst_pipeline_stage;
VkAccessFlags dst_access;
};
// For an image which was last accessed in old_queue, requests a command buffer
// for new_queue. Commands will be enqueued as necessary in new_queue to ensure that a complete ownership
// transfer has taken place.
// If queue family for old_queue differs from new_queue, a release barrier is enqueued in old_queue.
// In new_queue we perform either an acquire barrier or a simple pipeline barrier to change layout if required.
// If semaphore is a valid handle, it will be waited on in either old_queue to perform release barrier
// or new_queue depending on what is required.
// If the image uses CONCURRENT sharing mode, acquire/release barriers are skipped.
CommandBufferHandle request_command_buffer_with_ownership_transfer(
Device &device,
const Vulkan::Image &image,
const OwnershipTransferInfo &info,
const Vulkan::Semaphore &semaphore);
using DeviceHandle = Util::IntrusivePtr<Device>;
}