Cached Interpreter now plays mario. Gained about 12 fps. Start considering RSP as well

This commit is contained in:
2026-05-28 17:32:03 +02:00
parent 16778c5549
commit fb2e743982
10 changed files with 170 additions and 116 deletions
+5
View File
@@ -19,6 +19,11 @@ struct Core {
return instance;
}
static inline bool IsAddressError(u8 mask, u64 vaddr) {
auto regs = GetRegs();
return (!regs.cop0.is64BitAddressing && s32(vaddr) != vaddr) || (vaddr & mask) != 0;
}
static Registers &GetRegs() { return GetInstance().regs; }
static Mem &GetMem() { return *GetInstance().mem; }
+68 -67
View File
@@ -14,7 +14,7 @@ bool Interpreter::ShouldServiceInterrupt() const {
return interrupts_pending && interrupts_enabled && !currently_handling_exception && !currently_handling_error;
}
void Interpreter::CheckCompareInterrupt() const {
void Interpreter::UpdateCompareInterrupt() const {
regs.cop0.count++;
regs.cop0.count &= 0x1FFFFFFFF;
if (regs.cop0.count == static_cast<u64>(regs.cop0.compare) << 1) {
@@ -36,12 +36,12 @@ bool Interpreter::Fetch(Instruction &instr, u64 vaddr) {
}
bool Interpreter::MaybeAdvance() {
CheckCompareInterrupt();
UpdateCompareInterrupt();
regs.prevDelaySlot = regs.delaySlot;
regs.delaySlot = false;
if (check_address_error(0b11, u64(regs.pc))) [[unlikely]] {
if (Core::IsAddressError(0b11, u64(regs.pc))) [[unlikely]] {
regs.cop0.HandleTLBException(regs.pc);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.pc);
return false;
@@ -60,12 +60,12 @@ bool Interpreter::MaybeAdvance() {
}
bool Interpreter::FetchThenMaybeAdvance(Instruction &instr) {
CheckCompareInterrupt();
UpdateCompareInterrupt();
regs.prevDelaySlot = regs.delaySlot;
regs.delaySlot = false;
if (check_address_error(0b11, u64(regs.pc))) [[unlikely]] {
if (Core::IsAddressError(0b11, u64(regs.pc))) [[unlikely]] {
regs.cop0.HandleTLBException(regs.pc);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.pc);
return false;
@@ -96,85 +96,86 @@ u32 Interpreter::Step() {
return 1;
}
u32 Interpreter::ExecuteCached() {
auto addr = regs.pc;
auto blockAddr = addr;
auto page = CACHE_GET_PAGE(addr);
auto offset = CACHE_GET_BLOCK(addr);
auto &lines = cachedState.lines;
auto &blocks = lines[page].blocks;
if (lines[page].blocks[offset].len > 0) {
info("Executing block @ pc 0x{:016X}", (u64)addr);
auto &block = lines[page].blocks[offset];
// i copy the block cycles here in case the block evicts itself when executing which would set the cycles to
// 0, making so the emulator halts cause the outer loop won't advance
const auto blockCycles = block.cycles;
for (u32 i = 0; i < block.len; i++) {
u32 paddr = 0;
if (!regs.cop0.MapVAddr(Cop0::LOAD, blockAddr, paddr)) {
regs.cop0.HandleTLBException(blockAddr);
regs.cop0.FireException(Cop0::GetTLBExceptionCode(regs.cop0.tlbError, Cop0::LOAD), 0, blockAddr);
return i + 1;
}
blockAddr += 4;
if (!MaybeAdvance())
return i + 1;
Instruction instr = block.code[i];
DecodeExecute(instr);
if (block.exception) {
block.exception = false;
return i + 1;
}
// Branch likely with false condition, it wasn't taken so don't execute the delay slot
if (IsBranchLikely(instr) && !regs.delaySlot)
break;
}
if (blockCycles == 0)
Scheduler::GetInstance().SkipToNext();
return blockCycles;
}
auto &block = lines[page].blocks[offset];
block.code.resize(MAX_INSTRUCTIONS_PER_BLOCK);
info("Compiling block @ pc 0x{:016X}", (u64)addr);
u32 Interpreter::CacheBlock(u32 addr) {
u32 blockAddr = addr;
CachedLine line;
u32 i;
bool fetchDelaySlot = false;
for (i = 0; i < MAX_INSTRUCTIONS_PER_BLOCK; i++) {
for (i = 0; i < MAX_INSTR_PER_BLOCK; i++) {
Instruction instr;
Fetch(instr, addr);
if (!Fetch(instr, addr))
return i + 1;
addr += 4;
block.code[i] = instr;
line.code[i] = instr;
if (fetchDelaySlot) {
i++;
break;
}
if (!InstrEndsBlock(instr))
continue;
if (InstrEndsBlock(instr)) {
if (InstrHasDelaySlot(instr) && !fetchDelaySlot) {
fetchDelaySlot = true;
continue;
}
if (InstrHasDelaySlot(instr) && !fetchDelaySlot) {
fetchDelaySlot = true;
continue;
if (i == 0)
i = 1;
break;
}
break;
}
block.cycles = i;
block.len = i;
line.cycles = i;
line.len = i;
cachedState.blocks[CACHE_GET_BLOCK(blockAddr)]->lines[CACHE_GET_LINE(blockAddr)] = new CachedLine(line);
return ExecuteCached();
}
u32 Interpreter::ExecuteCached() {
u32 addr = regs.pc;
auto &blocks = cachedState.blocks;
if (!blocks[CACHE_GET_BLOCK(addr)]) {
blocks[CACHE_GET_BLOCK(addr)] = new CachedBlock<cachedState.MAX_LINES / 4>();
return CacheBlock(addr);
}
const auto line = blocks[CACHE_GET_BLOCK(addr)]->lines[CACHE_GET_LINE(addr)];
if (line) {
cachedState.exception = false;
// i copy the block cycles here in case the block evicts itself when executing which would set the cycles to
// 0, making so the emulator halts cause the outer loop won't advance
const auto blockCycles = line->cycles;
for (u32 i = 0; i < line->len; i++) {
addr += 4;
if (!MaybeAdvance())
return i + 1;
Instruction instr = line->code[i];
DecodeExecute(instr);
if (cachedState.exception)
return i + 1;
// Branch likely with false condition, it wasn't taken so don't execute the delay slot
if (IsBranchLikely(instr) && !regs.delaySlot)
break;
}
if (blockCycles == 0) {
panic("Cycles are 0!");
Scheduler::GetInstance().SkipToNext();
}
return blockCycles;
}
return CacheBlock(addr);
}
} // namespace n64
+32 -28
View File
@@ -1,39 +1,45 @@
#pragma once
#include <Cache.hpp>
#include <Mem.hpp>
#include <JITUtils.hpp>
namespace n64 {
struct Core;
/*
static constexpr u32 MAX_INSTR_PER_BLOCK = 128;
static constexpr u32 MAX_LINES = 1 << 12;
static constexpr u32 CACHE_OUTER_SHIFT = 12;
static constexpr u32 MAX_LINES = 1 << (32 - CACHE_OUTER_SHIFT);
static constexpr u32 MAX_INSTRUCTIONS_PER_BLOCK = 256 / sizeof(Instruction);
static constexpr u32 MAX_BLOCKS_PER_LINE =
((1 << CACHE_OUTER_SHIFT) / sizeof(Instruction)) / MAX_INSTRUCTIONS_PER_BLOCK;
#define CACHE_GET_PAGE(addr) ((addr >> CACHE_OUTER_SHIFT) & (MAX_LINES - 1))
#define CACHE_GET_BLOCK(addr) ((addr & 0xF00) >> 8)
struct CachedBlock {
std::vector<Instruction> code = {};
u32 len = 0;
u32 cycles = 0;
bool exception = false;
} __attribute__((__packed__));
#define CACHE_GET_BLOCK(addr) (addr / MAX_LINES)
#define CACHE_GET_LINE(addr) ((addr & (MAX_LINES - 1)) >> 2)
struct CachedLine {
std::array<CachedBlock, MAX_BLOCKS_PER_LINE> blocks = {};
std::array<Instruction, MAX_INSTR_PER_BLOCK> code = {};
u32 len = 0;
u32 cycles = 0;
} __attribute__((__packed__));
struct CachedBlock {
CachedBlock() { lines.resize(MAX_LINES / 4); }
std::vector<CachedLine *> lines = {};
};
struct CachedState {
CachedState() { lines.resize(MAX_LINES); }
std::vector<CachedLine> lines = {};
std::vector<CachedBlock *> blocks = {};
bool exception = false;
void Reset() {
lines = {};
lines.resize(MAX_LINES);
for (auto block : blocks) {
if (block)
for (auto line : block->lines)
delete line;
delete block;
}
blocks = {};
blocks.resize(((u64)std::numeric_limits<u32>::max() + 1) / MAX_LINES);
}
};
*/
struct Interpreter final {
explicit Interpreter(Mem &, Registers &);
@@ -42,18 +48,17 @@ struct Interpreter final {
u32 ExecuteCached();
bool FetchThenMaybeAdvance(Instruction &);
bool MaybeAdvance();
u32 CacheBlock(u32 addr);
void SignalException(u64 addr) {
cachedState.lines[CACHE_GET_PAGE(addr)].blocks[CACHE_GET_BLOCK(addr)].exception = true;
}
void EvictCachedBlock(u64 addr) { cachedState.lines[CACHE_GET_PAGE(addr)].blocks[CACHE_GET_BLOCK(addr)] = {}; }
void SignalException(u32 addr) { cachedState.exception = true; }
void EvictCachedBlock(u32 addr) { cachedState.blocks[CACHE_GET_BLOCK(addr)] = {}; }
void Reset() {
cop2Latch = {};
cachedState.Reset();
}
CachedState cachedState;
CachedState<12, std::numeric_limits<u32>::max()> cachedState;
private:
friend struct Cop1;
@@ -71,10 +76,9 @@ struct Interpreter final {
bool Fetch(Instruction &, u64);
void CacheTypeData(u8, u64, u32, u32);
void CacheTypeInstruction(u8, u64, u32, u32);
#define check_address_error(mask, vaddr) \
(((!regs.cop0.is64BitAddressing) && (s32)(vaddr) != (vaddr)) || (((vaddr) & (mask)) != 0))
[[nodiscard]] bool ShouldServiceInterrupt() const;
void CheckCompareInterrupt() const;
void UpdateCompareInterrupt() const;
void cop2Decode(Instruction);
void special(Instruction);
+1 -1
View File
@@ -41,7 +41,7 @@ void JIT::InvalidateBlock(const u32 paddr) {
std::optional<u32> JIT::FetchInstruction(s64 vaddr) {
u32 paddr = 0;
if (check_address_error(0b11, vaddr)) [[unlikely]] {
if (Core::IsAddressError(0b11, vaddr)) [[unlikely]] {
/*regs.cop0.HandleTLBException(blockPC);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, blockPC);
return 1;*/
-3
View File
@@ -116,9 +116,6 @@ struct JIT final {
void BranchAbsTaken(s64 addr);
void BranchAbsTaken(const Xbyak::Reg64 &addr);
#define check_address_error(mask, vaddr) \
(((!regs.cop0.is64BitAddressing) && (s32)(vaddr) != (vaddr)) || (((vaddr) & (mask)) != 0))
[[nodiscard]] bool ShouldServiceInterrupt() const;
void CheckCompareInterrupt() const;
std::optional<u32> FetchInstruction(s64);
+42
View File
@@ -0,0 +1,42 @@
#pragma once
#include <Instruction.hpp>
#include <vector>
#include <array>
namespace n64 {
static constexpr u32 MAX_INSTR_PER_BLOCK = 128;
#define CACHE_GET_BLOCK(addr) (addr / (cachedState.MAX_LINES))
#define CACHE_GET_LINE(addr) ((addr & ((cachedState.MAX_LINES) - 1)) >> 2)
struct CachedLine {
std::array<Instruction, MAX_INSTR_PER_BLOCK> code = {};
u32 len = 0;
u32 cycles = 0;
} __attribute__((__packed__));
template <u32 lineAmount>
struct CachedBlock {
CachedBlock() { lines.resize(lineAmount); }
std::vector<CachedLine *> lines = {};
};
template <u32 blockBits, u64 addressSpace>
struct CachedState {
static constexpr u32 MAX_LINES = 1 << blockBits;
std::vector<CachedBlock<MAX_LINES / 4> *> blocks = {};
bool exception = false;
void Reset() {
for (auto block : blocks) {
if (block)
for (auto line : block->lines)
delete line;
delete block;
}
blocks = {};
blocks.resize((addressSpace + 1) / MAX_LINES);
}
};
} // namespace n64
+11 -11
View File
@@ -200,7 +200,7 @@ void Interpreter::lb(const Instruction instr) {
void Interpreter::lh(const Instruction instr) {
const u64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b1, address)) {
if (Core::IsAddressError(0b1, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
return;
@@ -218,7 +218,7 @@ void Interpreter::lh(const Instruction instr) {
void Interpreter::lw(const Instruction instr) {
const s16 offset = instr;
const u64 address = regs.Read<s64>(instr.rs()) + offset;
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
return;
@@ -241,7 +241,7 @@ void Interpreter::ll(const Instruction instr) {
regs.cop0.FireException(Cop0::GetTLBExceptionCode(regs.cop0.tlbError, Cop0::LOAD), 0, regs.oldPC);
} else {
const s32 result = mem.Read<u32>(physical);
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
return;
}
@@ -285,7 +285,7 @@ void Interpreter::lwr(const Instruction instr) {
void Interpreter::ld(const Instruction instr) {
const s64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b111, address)) {
if (Core::IsAddressError(0b111, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
return;
@@ -313,7 +313,7 @@ void Interpreter::lld(const Instruction instr) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::GetTLBExceptionCode(regs.cop0.tlbError, Cop0::LOAD), 0, regs.oldPC);
} else {
if (check_address_error(0b111, address)) {
if (Core::IsAddressError(0b111, address)) {
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
} else {
regs.Write(instr.rt(), mem.Read<u64>(paddr));
@@ -367,7 +367,7 @@ void Interpreter::lbu(const Instruction instr) {
void Interpreter::lhu(const Instruction instr) {
const s64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b1, address)) {
if (Core::IsAddressError(0b1, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
return;
@@ -384,7 +384,7 @@ void Interpreter::lhu(const Instruction instr) {
void Interpreter::lwu(const Instruction instr) {
const s64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
return;
@@ -418,7 +418,7 @@ void Interpreter::sc(const Instruction instr) {
if (regs.cop0.llbit) {
regs.cop0.llbit = false;
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
regs.Write(instr.rt(), 0);
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorStore, 0, regs.oldPC);
@@ -451,7 +451,7 @@ void Interpreter::scd(const Instruction instr) {
if (regs.cop0.llbit) {
regs.cop0.llbit = false;
if (check_address_error(0b111, address)) {
if (Core::IsAddressError(0b111, address)) {
regs.Write(instr.rt(), 0);
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorStore, 0, regs.oldPC);
@@ -489,7 +489,7 @@ void Interpreter::sh(const Instruction instr) {
void Interpreter::sw(const Instruction instr) {
const s16 offset = instr;
const u64 address = regs.Read<s64>(instr.rs()) + offset;
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorStore, 0, regs.oldPC);
return;
@@ -507,7 +507,7 @@ void Interpreter::sw(const Instruction instr) {
void Interpreter::sd(const Instruction instr) {
const s64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b111, address)) {
if (Core::IsAddressError(0b111, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorStore, 0, regs.oldPC);
return;
+6 -6
View File
@@ -930,7 +930,7 @@ void JIT::lb(const Instruction instr) {
void JIT::ld(const Instruction instr) {
if (regs.IsRegConstant(instr.rs())) {
const s64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b111, address)) {
if (Core::IsAddressError(0b111, address)) {
// regs.cop0.HandleTLBException(address);
// regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
// return;
@@ -1014,7 +1014,7 @@ void JIT::ldr(const Instruction instr) {
void JIT::lh(const Instruction instr) {
if (regs.IsRegConstant(instr.rs())) {
const u64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b1, address)) {
if (Core::IsAddressError(0b1, address)) {
// regs.cop0.HandleTLBException(address);
// regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
// return;
@@ -1043,7 +1043,7 @@ void JIT::lhu(const Instruction instr) {
u32 paddr;
if (regs.IsRegConstant(instr.rs())) {
const s64 address = regs.Read<s64>(instr.rs()) + (s16)instr;
if (check_address_error(0b1, address)) {
if (Core::IsAddressError(0b1, address)) {
regs.cop0.HandleTLBException(address);
regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
return;
@@ -1080,7 +1080,7 @@ void JIT::lw(const Instruction instr) {
u32 paddr = 0;
if (regs.IsRegConstant(instr.rs())) {
const u64 address = regs.Read<s64>(instr.rs()) + offset;
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
// regs.cop0.HandleTLBException(address);
// regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorLoad, 0, regs.oldPC);
// return;
@@ -1344,7 +1344,7 @@ void JIT::sw(const Instruction instr) {
if (regs.IsRegConstant(instr.rs(), instr.rt())) {
const s16 offset = instr;
const u64 address = regs.Read<s64>(instr.rs()) + offset;
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
// regs.cop0.HandleTLBException(address);
// regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorStore, 0, regs.oldPC);
panic("[JIT]: Unhandled ADES exception in SW!");
@@ -1367,7 +1367,7 @@ void JIT::sw(const Instruction instr) {
if (regs.IsRegConstant(instr.rs())) {
const s16 offset = instr;
const u64 address = regs.Read<s64>(instr.rs()) + offset;
if (check_address_error(0b11, address)) {
if (Core::IsAddressError(0b11, address)) {
// regs.cop0.HandleTLBException(address);
// regs.cop0.FireException(Cop0::ExceptionCode::AddressErrorStore, 0, regs.oldPC);
panic("[JIT]: Unhandled ADES exception in SW!");
+4
View File
@@ -2,6 +2,7 @@
#include <Options.hpp>
#include <log.hpp>
#include <imgui.h>
#include <Core.hpp>
CPUSettings::CPUSettings() {
auto selectedCpuType = Options::GetInstance().GetValue<std::string>("cpu", "type");
@@ -40,10 +41,13 @@ void CPUSettings::render() {
if (modified) {
if (selectedCpuTypeIndex == 0) {
Options::GetInstance().SetValue<std::string>("cpu", "type", "interpreter");
n64::Core::GetInstance().cpuType = n64::Core::Interpreted;
} else if (selectedCpuTypeIndex == 1) {
Options::GetInstance().SetValue<std::string>("cpu", "type", "cached_interpreter");
n64::Core::GetInstance().cpuType = n64::Core::CachedInterpreter;
} else {
Options::GetInstance().SetValue<std::string>("cpu", "type", "jit");
n64::Core::GetInstance().cpuType = n64::Core::DynamicRecompiler;
}
}
}
+1
View File
@@ -3,6 +3,7 @@
#include <common.hpp>
namespace n64 {
struct Instruction {
Instruction() = default;
Instruction(u32 v) { instr.raw = v; }