Place rsp.Step inside cached interpreter. Gains about 3 more fps

This commit is contained in:
2026-05-29 10:24:57 +02:00
parent bb97dcc23f
commit 2147195774
7 changed files with 4 additions and 166 deletions
+1 -3
View File
@@ -88,9 +88,6 @@ void Core::StepRSP(const u32 cpuCycles) {
return;
}
if (cpuType == CachedInterpreter)
return mmio.rsp.ExecuteCached();
static constexpr u32 cpuRatio = 3, rspRatio = 2;
regs.steps += cpuCycles;
@@ -123,6 +120,7 @@ void Core::Run(const float volumeL, const float volumeR) {
const u32 taken = StepCPU();
cycles += taken;
frameCycles += taken;
if (cpuType == Interpreted) // because i will call it inside instead, when it's not the interpreter
StepRSP(taken);
Scheduler::GetInstance().Tick(taken);
}
+1
View File
@@ -152,6 +152,7 @@ u32 Interpreter::ExecuteCached() {
// 0, making so the emulator halts cause the outer loop won't advance
const auto blockCycles = line->cycles;
for (u32 i = 0; i < line->len; i++) {
Core::GetInstance().StepRSP(1);
if (!MaybeAdvance())
return i + 1;
-35
View File
@@ -5,41 +5,6 @@
namespace n64 {
struct Core;
/*
static constexpr u32 MAX_INSTR_PER_BLOCK = 128;
static constexpr u32 MAX_LINES = 1 << 12;
#define CACHE_GET_BLOCK(addr) (addr / MAX_LINES)
#define CACHE_GET_LINE(addr) ((addr & (MAX_LINES - 1)) >> 2)
struct CachedLine {
std::array<Instruction, MAX_INSTR_PER_BLOCK> code = {};
u32 len = 0;
u32 cycles = 0;
} __attribute__((__packed__));
struct CachedBlock {
CachedBlock() { lines.resize(MAX_LINES / 4); }
std::vector<CachedLine *> lines = {};
};
struct CachedState {
std::vector<CachedBlock *> blocks = {};
bool exception = false;
void Reset() {
for (auto block : blocks) {
if (block)
for (auto line : block->lines)
delete line;
delete block;
}
blocks = {};
blocks.resize(((u64)std::numeric_limits<u32>::max() + 1) / MAX_LINES);
}
};
*/
struct Interpreter final {
explicit Interpreter(Mem &, Registers &);
+1 -1
View File
@@ -27,7 +27,7 @@ struct CachedState {
std::vector<CachedBlock<MAX_LINES / 4> *> blocks = {};
bool exception = false;
void EvictCachedBlock(u64 addr) { blocks[addr / MAX_LINES] = {}; }
void EvictCachedBlock(u32 addr) { blocks[addr / MAX_LINES] = {}; }
void Reset() {
for (auto block : blocks) {
-12
View File
@@ -315,9 +315,6 @@ void Mem::Write<u8>(u32 paddr, u32 val) {
bool is_imem = paddr & 0x1000;
auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem;
paddr = (paddr & 0xFFF) & ~3;
if (is_imem) {
mmio.rsp.cachedState.EvictCachedBlock(paddr);
}
ircolib::WriteAccess<u32>(dest, paddr, val);
return;
}
@@ -368,9 +365,6 @@ void Mem::Write<u16>(u32 paddr, u32 val) {
bool is_imem = paddr & 0x1000;
auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem;
paddr = (paddr & 0xFFF) & ~3;
if (is_imem) {
mmio.rsp.cachedState.EvictCachedBlock(paddr);
}
ircolib::WriteAccess<u32>(dest, paddr, val);
return;
}
@@ -419,9 +413,6 @@ void Mem::Write<u32>(const u32 paddr, const u32 val) {
if (ircolib::IsInsideRange(paddr, DMEM_REGION_START, RSP_MEM_REGION_END)) {
bool is_imem = paddr & 0x1000;
auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem;
if (is_imem) {
mmio.rsp.cachedState.EvictCachedBlock(paddr & 0xfff);
}
ircolib::WriteAccess<u32>(dest, paddr & 0xfff, val);
return;
}
@@ -469,9 +460,6 @@ void Mem::Write(const u32 paddr, u64 val) {
if (ircolib::IsInsideRange(paddr, DMEM_REGION_START, RSP_MEM_REGION_END)) {
bool is_imem = paddr & 0x1000;
auto &dest = is_imem ? mmio.rsp.imem : mmio.rsp.dmem;
if (is_imem) {
mmio.rsp.cachedState.EvictCachedBlock(paddr & 0xfff);
}
val >>= 32;
ircolib::WriteAccess<u32>(dest, paddr & 0xfff, val);
return;
-102
View File
@@ -31,39 +31,6 @@ void RSP::Reset() {
steps = 0;
}
/*
FORCE_INLINE void logRSP(const RSP& rsp, const u32 instr) {
debug("{:04X} {:08X} ", rsp.oldPC, instr);
for (auto gpr : rsp.gpr) {
debug("{:08X} ", gpr);
}
for (auto vpr : rsp.vpr) {
for (int i = 0; i < 8; i++) {
debug("{:04X}", vpr.element[i]);
}
debug(" ");
}
for (int i = 0; i < 8; i++) {
debug("{:04X}", rsp.acc.h.element[i]);
}
debug(" ");
for (int i = 0; i < 8; i++) {
debug("{:04X}", rsp.acc.m.element[i]);
}
debug(" ");
for (int i = 0; i < 8; i++) {
debug("{:04X}", rsp.acc.l.element[i]);
}
debug(" {:04X} {:04X} {:02X}", rsp.GetVCC(), rsp.GetVCO(), rsp.GetVCE());
debug("DMEM: {:02X}{:02X}", rsp.dmem[0x3c4], rsp.dmem[0x3c5]);
}
*/
auto RSP::Read(const u32 addr) -> u32 {
switch (addr) {
case 0x04040000:
@@ -177,9 +144,6 @@ void RSP::DMA<false>() {
auto &dst = spDMASPAddr.bank ? imem : dmem;
u32 mem_address = spDMASPAddr.address & 0xFF8;
if (spDMASPAddr.bank) {
cachedState.EvictCachedBlock(mem_address);
}
u32 dram_address = spDMADRAMAddr.address & 0xFFFFF8;
trace("SP DMA from RDRAM to RSP (size: {} B, {:08X} to {:08X})", length, dram_address, mem_address);
@@ -235,70 +199,4 @@ void RSP::Write(const u32 addr, const u32 val) {
panic("Unimplemented SP register write {:08X}, val: {:08X}", addr, val);
}
}
void RSP::CacheBlock(u16 addr) {
auto blockAddr = addr;
CachedLine line;
u32 i;
bool fetchDelaySlot = false;
for (i = 0; i < MAX_INSTR_PER_BLOCK; i++) {
Instruction instr = ircolib::ReadAccess<u32>(imem, addr & IMEM_DSIZE);
addr += 4;
line.code[i] = instr;
if (fetchDelaySlot) {
i++;
break;
}
if (InstrEndsBlock(instr)) {
if (InstrHasDelaySlot(instr) && !fetchDelaySlot) {
fetchDelaySlot = true;
continue;
}
if (i == 0)
i = 1;
break;
}
}
line.cycles = i;
line.len = i;
cachedState.blocks[CACHE_GET_BLOCK(blockAddr)]->lines[CACHE_GET_LINE(blockAddr)] = new CachedLine(line);
return ExecuteCached();
}
void RSP::ExecuteCached() {
u16 addr = pc;
auto &blocks = cachedState.blocks;
if (!blocks[CACHE_GET_BLOCK(addr)]) {
blocks[CACHE_GET_BLOCK(addr)] = new CachedBlock<cachedState.MAX_LINES / 4>();
return CacheBlock(addr);
}
const auto line = blocks[CACHE_GET_BLOCK(addr)]->lines[CACHE_GET_LINE(addr)];
if (line) {
for (u32 i = 0; i < line->len; i++) {
prevDelaySlot = delaySlot;
delaySlot = false;
oldPC = pc & 0xFFC;
pc = nextPC & 0xFFC;
nextPC += 4;
Instruction instr = line->code[i];
Exec(instr);
}
return;
}
return CacheBlock(addr);
}
} // namespace n64
-12
View File
@@ -146,18 +146,10 @@ struct RSP {
VPR l{}, h{};
} vcc, vco;
CachedState<4, 0xFFF> cachedState;
bool delaySlot = false, prevDelaySlot = false;
RSP();
void Reset();
void ExecuteCached();
void CacheBlock(u16 addr);
FORCE_INLINE void Step() {
prevDelaySlot = delaySlot;
delaySlot = false;
gpr[0] = 0;
const u32 instr = ircolib::ReadAccess<u32>(imem, pc & IMEM_DSIZE);
oldPC = pc & 0xFFC;
@@ -232,7 +224,6 @@ struct RSP {
FORCE_INLINE void WriteWord(u32 addr, const u32 val) {
addr &= 0xfff;
cachedState.EvictCachedBlock(addr);
SET_RSP_WORD(addr, val);
}
@@ -243,7 +234,6 @@ struct RSP {
FORCE_INLINE void WriteHalf(u32 addr, const u16 val) {
addr &= 0xfff;
cachedState.EvictCachedBlock(addr);
SET_RSP_HALF(addr, val);
}
@@ -254,7 +244,6 @@ struct RSP {
FORCE_INLINE void WriteByte(u32 addr, const u8 val) {
addr &= 0xfff;
cachedState.EvictCachedBlock(addr);
RSP_BYTE(addr) = val;
}
@@ -392,7 +381,6 @@ struct RSP {
FORCE_INLINE void branch(const u16 address, const bool cond) {
if (cond) {
nextPC = address & 0xFFC;
delaySlot = true;
}
}
};