From 2147195774d0a5e41bc982df157ace7f3613c1b0 Mon Sep 17 00:00:00 2001 From: iris Date: Fri, 29 May 2026 10:24:57 +0200 Subject: [PATCH] Place rsp.Step inside cached interpreter. Gains about 3 more fps --- src/backend/Core.cpp | 6 +- src/backend/core/Interpreter.cpp | 1 + src/backend/core/Interpreter.hpp | 35 ----------- src/backend/core/JITUtils.hpp | 2 +- src/backend/core/Mem.cpp | 12 ---- src/backend/core/RSP.cpp | 102 ------------------------------- src/backend/core/RSP.hpp | 12 ---- 7 files changed, 4 insertions(+), 166 deletions(-) diff --git a/src/backend/Core.cpp b/src/backend/Core.cpp index 650260e..a2308ed 100644 --- a/src/backend/Core.cpp +++ b/src/backend/Core.cpp @@ -88,9 +88,6 @@ void Core::StepRSP(const u32 cpuCycles) { return; } - if (cpuType == CachedInterpreter) - return mmio.rsp.ExecuteCached(); - static constexpr u32 cpuRatio = 3, rspRatio = 2; regs.steps += cpuCycles; @@ -123,7 +120,8 @@ void Core::Run(const float volumeL, const float volumeR) { const u32 taken = StepCPU(); cycles += taken; frameCycles += taken; - StepRSP(taken); + if (cpuType == Interpreted) // because i will call it inside instead, when it's not the interpreter + StepRSP(taken); Scheduler::GetInstance().Tick(taken); } } diff --git a/src/backend/core/Interpreter.cpp b/src/backend/core/Interpreter.cpp index 9d57cbe..81e4192 100644 --- a/src/backend/core/Interpreter.cpp +++ b/src/backend/core/Interpreter.cpp @@ -152,6 +152,7 @@ u32 Interpreter::ExecuteCached() { // 0, making so the emulator halts cause the outer loop won't advance const auto blockCycles = line->cycles; for (u32 i = 0; i < line->len; i++) { + Core::GetInstance().StepRSP(1); if (!MaybeAdvance()) return i + 1; diff --git a/src/backend/core/Interpreter.hpp b/src/backend/core/Interpreter.hpp index a54bf0d..79657b7 100644 --- a/src/backend/core/Interpreter.hpp +++ b/src/backend/core/Interpreter.hpp @@ -5,41 +5,6 @@ namespace n64 { struct Core; -/* -static constexpr u32 MAX_INSTR_PER_BLOCK = 128; -static constexpr u32 MAX_LINES = 1 << 12; - -#define CACHE_GET_BLOCK(addr) (addr / MAX_LINES) -#define CACHE_GET_LINE(addr) ((addr & (MAX_LINES - 1)) >> 2) - -struct CachedLine { - std::array code = {}; - u32 len = 0; - u32 cycles = 0; -} __attribute__((__packed__)); - -struct CachedBlock { - CachedBlock() { lines.resize(MAX_LINES / 4); } - std::vector lines = {}; -}; - -struct CachedState { - std::vector blocks = {}; - bool exception = false; - - void Reset() { - for (auto block : blocks) { - if (block) - for (auto line : block->lines) - delete line; - - delete block; - } - blocks = {}; - blocks.resize(((u64)std::numeric_limits::max() + 1) / MAX_LINES); - } -}; -*/ struct Interpreter final { explicit Interpreter(Mem &, Registers &); diff --git a/src/backend/core/JITUtils.hpp b/src/backend/core/JITUtils.hpp index e6df973..9fe9f09 100644 --- a/src/backend/core/JITUtils.hpp +++ b/src/backend/core/JITUtils.hpp @@ -27,7 +27,7 @@ struct CachedState { std::vector *> blocks = {}; bool exception = false; - void EvictCachedBlock(u64 addr) { blocks[addr / MAX_LINES] = {}; } + void EvictCachedBlock(u32 addr) { blocks[addr / MAX_LINES] = {}; } void Reset() { for (auto block : blocks) { diff --git a/src/backend/core/Mem.cpp b/src/backend/core/Mem.cpp index 0d04f71..6eb887d 100644 --- a/src/backend/core/Mem.cpp +++ b/src/backend/core/Mem.cpp @@ -315,9 +315,6 @@ void Mem::Write(u32 paddr, u32 val) { bool is_imem = paddr & 0x1000; auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem; paddr = (paddr & 0xFFF) & ~3; - if (is_imem) { - mmio.rsp.cachedState.EvictCachedBlock(paddr); - } ircolib::WriteAccess(dest, paddr, val); return; } @@ -368,9 +365,6 @@ void Mem::Write(u32 paddr, u32 val) { bool is_imem = paddr & 0x1000; auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem; paddr = (paddr & 0xFFF) & ~3; - if (is_imem) { - mmio.rsp.cachedState.EvictCachedBlock(paddr); - } ircolib::WriteAccess(dest, paddr, val); return; } @@ -419,9 +413,6 @@ void Mem::Write(const u32 paddr, const u32 val) { if (ircolib::IsInsideRange(paddr, DMEM_REGION_START, RSP_MEM_REGION_END)) { bool is_imem = paddr & 0x1000; auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem; - if (is_imem) { - mmio.rsp.cachedState.EvictCachedBlock(paddr & 0xfff); - } ircolib::WriteAccess(dest, paddr & 0xfff, val); return; } @@ -469,9 +460,6 @@ void Mem::Write(const u32 paddr, u64 val) { if (ircolib::IsInsideRange(paddr, DMEM_REGION_START, RSP_MEM_REGION_END)) { bool is_imem = paddr & 0x1000; auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem; - if (is_imem) { - mmio.rsp.cachedState.EvictCachedBlock(paddr & 0xfff); - } val >>= 32; ircolib::WriteAccess(dest, paddr & 0xfff, val); return; diff --git a/src/backend/core/RSP.cpp b/src/backend/core/RSP.cpp index 77777da..160ebcf 100644 --- a/src/backend/core/RSP.cpp +++ b/src/backend/core/RSP.cpp @@ -31,39 +31,6 @@ void RSP::Reset() { steps = 0; } -/* -FORCE_INLINE void logRSP(const RSP& rsp, const u32 instr) { - debug("{:04X} {:08X} ", rsp.oldPC, instr); - for (auto gpr : rsp.gpr) { - debug("{:08X} ", gpr); - } - - for (auto vpr : rsp.vpr) { - for (int i = 0; i < 8; i++) { - debug("{:04X}", vpr.element[i]); - } - debug(" "); - } - - for (int i = 0; i < 8; i++) { - debug("{:04X}", rsp.acc.h.element[i]); - } - debug(" "); - - for (int i = 0; i < 8; i++) { - debug("{:04X}", rsp.acc.m.element[i]); - } - debug(" "); - - for (int i = 0; i < 8; i++) { - debug("{:04X}", rsp.acc.l.element[i]); - } - - debug(" {:04X} {:04X} {:02X}", rsp.GetVCC(), rsp.GetVCO(), rsp.GetVCE()); - debug("DMEM: {:02X}{:02X}", rsp.dmem[0x3c4], rsp.dmem[0x3c5]); -} -*/ - auto RSP::Read(const u32 addr) -> u32 { switch (addr) { case 0x04040000: @@ -177,9 +144,6 @@ void RSP::DMA() { auto &dst = spDMASPAddr.bank ? imem : dmem; u32 mem_address = spDMASPAddr.address & 0xFF8; - if (spDMASPAddr.bank) { - cachedState.EvictCachedBlock(mem_address); - } u32 dram_address = spDMADRAMAddr.address & 0xFFFFF8; trace("SP DMA from RDRAM to RSP (size: {} B, {:08X} to {:08X})", length, dram_address, mem_address); @@ -235,70 +199,4 @@ void RSP::Write(const u32 addr, const u32 val) { panic("Unimplemented SP register write {:08X}, val: {:08X}", addr, val); } } - -void RSP::CacheBlock(u16 addr) { - auto blockAddr = addr; - - CachedLine line; - u32 i; - bool fetchDelaySlot = false; - - for (i = 0; i < MAX_INSTR_PER_BLOCK; i++) { - Instruction instr = ircolib::ReadAccess(imem, addr & IMEM_DSIZE); - - addr += 4; - line.code[i] = instr; - - if (fetchDelaySlot) { - i++; - break; - } - - if (InstrEndsBlock(instr)) { - if (InstrHasDelaySlot(instr) && !fetchDelaySlot) { - fetchDelaySlot = true; - continue; - } - - if (i == 0) - i = 1; - - break; - } - } - - line.cycles = i; - line.len = i; - cachedState.blocks[CACHE_GET_BLOCK(blockAddr)]->lines[CACHE_GET_LINE(blockAddr)] = new CachedLine(line); - - return ExecuteCached(); -} - -void RSP::ExecuteCached() { - u16 addr = pc; - auto &blocks = cachedState.blocks; - if (!blocks[CACHE_GET_BLOCK(addr)]) { - blocks[CACHE_GET_BLOCK(addr)] = new CachedBlock(); - return CacheBlock(addr); - } - - const auto line = blocks[CACHE_GET_BLOCK(addr)]->lines[CACHE_GET_LINE(addr)]; - if (line) { - for (u32 i = 0; i < line->len; i++) { - prevDelaySlot = delaySlot; - delaySlot = false; - - oldPC = pc & 0xFFC; - pc = nextPC & 0xFFC; - nextPC += 4; - - Instruction instr = line->code[i]; - Exec(instr); - } - - return; - } - - return CacheBlock(addr); -} } // namespace n64 diff --git a/src/backend/core/RSP.hpp b/src/backend/core/RSP.hpp index 165f759..ba303d7 100644 --- a/src/backend/core/RSP.hpp +++ b/src/backend/core/RSP.hpp @@ -146,18 +146,10 @@ struct RSP { VPR l{}, h{}; } vcc, vco; - CachedState<4, 0xFFF> cachedState; - bool delaySlot = false, prevDelaySlot = false; - RSP(); void Reset(); - void ExecuteCached(); - void CacheBlock(u16 addr); - FORCE_INLINE void Step() { - prevDelaySlot = delaySlot; - delaySlot = false; gpr[0] = 0; const u32 instr = ircolib::ReadAccess(imem, pc & IMEM_DSIZE); oldPC = pc & 0xFFC; @@ -232,7 +224,6 @@ struct RSP { FORCE_INLINE void WriteWord(u32 addr, const u32 val) { addr &= 0xfff; - cachedState.EvictCachedBlock(addr); SET_RSP_WORD(addr, val); } @@ -243,7 +234,6 @@ struct RSP { FORCE_INLINE void WriteHalf(u32 addr, const u16 val) { addr &= 0xfff; - cachedState.EvictCachedBlock(addr); SET_RSP_HALF(addr, val); } @@ -254,7 +244,6 @@ struct RSP { FORCE_INLINE void WriteByte(u32 addr, const u8 val) { addr &= 0xfff; - cachedState.EvictCachedBlock(addr); RSP_BYTE(addr) = val; } @@ -392,7 +381,6 @@ struct RSP { FORCE_INLINE void branch(const u16 address, const bool cond) { if (cond) { nextPC = address & 0xFFC; - delaySlot = true; } } };