Squashed 'external/parallel-rdp/parallel-rdp-standalone/' content from commit 3f59f61f2c
git-subtree-dir: external/parallel-rdp/parallel-rdp-standalone git-subtree-split: 3f59f61f2c1c56424356003041df5e4a10612049
This commit is contained in:
284
parallel-rdp/rdp_device.hpp
Normal file
284
parallel-rdp/rdp_device.hpp
Normal file
@@ -0,0 +1,284 @@
|
||||
/* Copyright (c) 2020 Themaister
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <queue>
|
||||
#include "device.hpp"
|
||||
#include "video_interface.hpp"
|
||||
#include "rdp_renderer.hpp"
|
||||
#include "rdp_common.hpp"
|
||||
#include "command_ring.hpp"
|
||||
#include "worker_thread.hpp"
|
||||
#include "rdp_dump_write.hpp"
|
||||
|
||||
namespace RDP
|
||||
{
|
||||
struct RGBA
|
||||
{
|
||||
uint8_t r, g, b, a;
|
||||
};
|
||||
|
||||
enum CommandProcessorFlagBits
|
||||
{
|
||||
COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_HIDDEN_RDRAM_BIT = 1 << 0,
|
||||
COMMAND_PROCESSOR_FLAG_HOST_VISIBLE_TMEM_BIT = 1 << 1,
|
||||
COMMAND_PROCESSOR_FLAG_UPSCALING_2X_BIT = 1 << 2,
|
||||
COMMAND_PROCESSOR_FLAG_UPSCALING_4X_BIT = 1 << 3,
|
||||
COMMAND_PROCESSOR_FLAG_UPSCALING_8X_BIT = 1 << 4,
|
||||
COMMAND_PROCESSOR_FLAG_SUPER_SAMPLED_READ_BACK_BIT = 1 << 5,
|
||||
COMMAND_PROCESSOR_FLAG_SUPER_SAMPLED_DITHER_BIT = 1 << 6
|
||||
};
|
||||
using CommandProcessorFlags = uint32_t;
|
||||
|
||||
struct CoherencyCopy
|
||||
{
|
||||
size_t src_offset = 0;
|
||||
size_t mask_offset = 0;
|
||||
size_t dst_offset = 0;
|
||||
size_t size = 0;
|
||||
std::atomic_uint32_t *counter_base = nullptr;
|
||||
unsigned counters = 0;
|
||||
};
|
||||
|
||||
struct CoherencyOperation
|
||||
{
|
||||
Vulkan::Fence fence;
|
||||
uint64_t timeline_value = 0;
|
||||
|
||||
uint8_t *dst = nullptr;
|
||||
const Vulkan::Buffer *src = nullptr;
|
||||
std::vector<CoherencyCopy> copies;
|
||||
std::atomic_uint32_t *unlock_cookie = nullptr;
|
||||
};
|
||||
|
||||
// These options control various behavior when upscaling to workaround glitches which arise naturally as part of upscaling.
|
||||
struct Quirks
|
||||
{
|
||||
inline Quirks()
|
||||
{
|
||||
u.options.native_resolution_tex_rect = true;
|
||||
u.options.native_texture_lod = false;
|
||||
}
|
||||
|
||||
inline void set_native_resolution_tex_rect(bool enable)
|
||||
{
|
||||
u.options.native_resolution_tex_rect = enable;
|
||||
}
|
||||
|
||||
inline void set_native_texture_lod(bool enable)
|
||||
{
|
||||
u.options.native_texture_lod = enable;
|
||||
}
|
||||
|
||||
union
|
||||
{
|
||||
struct Opts
|
||||
{
|
||||
// If true, force TEX_RECT and TEX_RECT_FLIP to render without upscaling.
|
||||
// Works around bilinear filtering bugs in Cycle1/Cycle2 mode where game assumed 1:1 pixel transfer.
|
||||
bool native_resolution_tex_rect;
|
||||
|
||||
// Forces LOD to be computed as 1x upscale.
|
||||
// Fixes content which relies on LOD computation to select textures in clever ways.
|
||||
bool native_texture_lod;
|
||||
} options;
|
||||
uint32_t words[1];
|
||||
} u;
|
||||
};
|
||||
|
||||
class CommandProcessor
|
||||
{
|
||||
public:
|
||||
CommandProcessor(Vulkan::Device &device,
|
||||
void *rdram_ptr,
|
||||
size_t rdram_offset,
|
||||
size_t rdram_size,
|
||||
size_t hidden_rdram_size,
|
||||
CommandProcessorFlags flags);
|
||||
|
||||
~CommandProcessor();
|
||||
|
||||
void set_validation_interface(ValidationInterface *iface);
|
||||
|
||||
bool device_is_supported() const;
|
||||
|
||||
// Synchronization.
|
||||
void flush();
|
||||
uint64_t signal_timeline();
|
||||
void wait_for_timeline(uint64_t index);
|
||||
void idle();
|
||||
void begin_frame_context();
|
||||
|
||||
// Queues up state and drawing commands.
|
||||
void enqueue_command(unsigned num_words, const uint32_t *words);
|
||||
void enqueue_command_direct(unsigned num_words, const uint32_t *words);
|
||||
|
||||
void set_quirks(const Quirks &quirks);
|
||||
|
||||
// Interact with memory.
|
||||
void *begin_read_rdram();
|
||||
void end_write_rdram();
|
||||
void *begin_read_hidden_rdram();
|
||||
void end_write_hidden_rdram();
|
||||
size_t get_rdram_size() const;
|
||||
size_t get_hidden_rdram_size() const;
|
||||
void *get_tmem();
|
||||
|
||||
// Sets VI register
|
||||
void set_vi_register(VIRegister reg, uint32_t value);
|
||||
|
||||
Vulkan::ImageHandle scanout(const ScanoutOptions &opts = {});
|
||||
void scanout_sync(std::vector<RGBA> &colors, unsigned &width, unsigned &height, const ScanoutOptions &opts = {});
|
||||
void scanout_async_buffer(VIScanoutBuffer &buffer, const ScanoutOptions &opts = {});
|
||||
|
||||
// Support for modifying certain registers per-scanline.
|
||||
// The idea is that before we scanout(), we use set_vi_register() to
|
||||
// set frame-global VI register state.
|
||||
// While scanning out, we can support changing some state, in particular HStart and XStart
|
||||
// which allows various raster effects ala HDMA.
|
||||
// For sanity's sake, scanout() reads all memory at once. A fully beam-raced implementation
|
||||
// would render out images every scanline, but that would cripple performance and it's questionable
|
||||
// how this is useful, especially on a 3D console. The only failure case of this style of implementation
|
||||
// would be if a demo attempted to modify VRAM *after* it has been scanned out, i.e. a write-after-read
|
||||
// hazard.
|
||||
|
||||
// Latch registers are initialized to the values in set_vi_register() for each respective register.
|
||||
// After scanout(), the flags state is cleared to 0.
|
||||
void begin_vi_register_per_scanline(VideoInterface::PerScanlineRegisterFlags flags);
|
||||
void set_vi_register_for_scanline(VideoInterface::PerScanlineRegisterBits reg, uint32_t value);
|
||||
|
||||
// Between begin_vi_register_per_scanline() and scanout(), line must be monotonically increasing,
|
||||
// or the call is ignored. Initial value for the line counter is 0
|
||||
// (to set parameters for line 0, use global VI register state).
|
||||
// Currently set registers in set_vi_register_for_scanline() are considered to be the active VI register
|
||||
// values starting with VI line "vi_line", until the bottom of the frame or a new vi_line is set.
|
||||
// Register state is assumed to have been fixed from the last latched scanline up until vi_line.
|
||||
//
|
||||
// The units used for this value matches the hardware YStart registers,
|
||||
// i.e. the first active scanline is not 0, but VI_H_OFFSET_{NTSC,PAL}.
|
||||
// For every scanned line, vi_line should increment by 2.
|
||||
// vi_line must be less than VI_V_END_MAX (really, VI_V_END_{NTSC,PAL}), or it is ignored.
|
||||
void latch_vi_register_for_scanline(unsigned vi_line);
|
||||
|
||||
// Assumes that scanline register state does not change until end of frame.
|
||||
// Must be called before scanout(), or all per-scanline register state is ignored for the scanout.
|
||||
void end_vi_register_per_scanline();
|
||||
|
||||
// Intended flow is something like:
|
||||
// set_vi_register(reg, value0) // value0 used for line [0, 99]
|
||||
// begin_vi_register_per_scanline(flags);
|
||||
// set_vi_register_for_scanline(reg, value1); // value1 used for line [100, 199]
|
||||
// latch_vi_register_for_scanline(100);
|
||||
// set_vi_register_for_scanline(reg, value2);
|
||||
// latch_vi_register_for_scanline(200); // value2 used for line [200, VBlank]
|
||||
// end_vi_register_per_scanline();
|
||||
// scanout();
|
||||
|
||||
private:
|
||||
Vulkan::Device &device;
|
||||
Vulkan::BufferHandle rdram;
|
||||
Vulkan::BufferHandle hidden_rdram;
|
||||
Vulkan::BufferHandle tmem;
|
||||
size_t rdram_offset;
|
||||
size_t rdram_size;
|
||||
CommandProcessorFlags flags;
|
||||
#ifndef PARALLEL_RDP_SHADER_DIR
|
||||
std::unique_ptr<ShaderBank> shader_bank;
|
||||
#endif
|
||||
|
||||
// Tear-down order is important here.
|
||||
Renderer renderer;
|
||||
VideoInterface vi;
|
||||
CommandRing ring;
|
||||
|
||||
void clear_hidden_rdram();
|
||||
void clear_tmem();
|
||||
void clear_buffer(Vulkan::Buffer &buffer, uint32_t value);
|
||||
void init_renderer();
|
||||
void enqueue_command_inner(unsigned num_words, const uint32_t *words);
|
||||
|
||||
Vulkan::ImageHandle scanout(const ScanoutOptions &opts, VkImageLayout target_layout);
|
||||
|
||||
#define OP(x) void op_##x(const uint32_t *words)
|
||||
OP(fill_triangle); OP(fill_z_buffer_triangle); OP(texture_triangle); OP(texture_z_buffer_triangle);
|
||||
OP(shade_triangle); OP(shade_z_buffer_triangle); OP(shade_texture_triangle); OP(shade_texture_z_buffer_triangle);
|
||||
OP(texture_rectangle); OP(texture_rectangle_flip); OP(sync_load); OP(sync_pipe);
|
||||
OP(sync_tile); OP(sync_full); OP(set_key_gb); OP(set_key_r);
|
||||
OP(set_convert); OP(set_scissor); OP(set_prim_depth); OP(set_other_modes);
|
||||
OP(load_tlut); OP(set_tile_size); OP(load_block);
|
||||
OP(load_tile); OP(set_tile); OP(fill_rectangle); OP(set_fill_color);
|
||||
OP(set_fog_color); OP(set_blend_color); OP(set_prim_color); OP(set_env_color);
|
||||
OP(set_combine); OP(set_texture_image); OP(set_mask_image); OP(set_color_image);
|
||||
#undef OP
|
||||
|
||||
ScissorState scissor_state = {};
|
||||
StaticRasterizationState static_state = {};
|
||||
DepthBlendState depth_blend = {};
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t addr;
|
||||
uint32_t width;
|
||||
TextureFormat fmt;
|
||||
TextureSize size;
|
||||
} texture_image = {};
|
||||
|
||||
uint64_t timeline_value = 0;
|
||||
uint64_t thread_timeline_value = 0;
|
||||
|
||||
struct FenceExecutor
|
||||
{
|
||||
explicit inline FenceExecutor(Vulkan::Device *device_, uint64_t *ptr)
|
||||
: device(device_), value(ptr)
|
||||
{
|
||||
}
|
||||
|
||||
Vulkan::Device *device;
|
||||
uint64_t *value;
|
||||
bool is_sentinel(const CoherencyOperation &work) const;
|
||||
void perform_work(CoherencyOperation &work);
|
||||
void notify_work_locked(const CoherencyOperation &work);
|
||||
};
|
||||
WorkerThread<CoherencyOperation, FenceExecutor> timeline_worker;
|
||||
|
||||
uint8_t *host_rdram = nullptr;
|
||||
bool measure_stall_time = false;
|
||||
bool single_threaded_processing = false;
|
||||
bool is_supported = false;
|
||||
bool is_host_coherent = true;
|
||||
bool timestamp = false;
|
||||
|
||||
friend class Renderer;
|
||||
|
||||
void enqueue_coherency_operation(CoherencyOperation &&op);
|
||||
void drain_command_ring();
|
||||
void decode_triangle_setup(TriangleSetup &setup, const uint32_t *words) const;
|
||||
|
||||
Quirks quirks;
|
||||
|
||||
std::unique_ptr<RDPDumpWriter> dump_writer;
|
||||
bool dump_in_command_list = false;
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user