[JIT]: Simplify register accesses, implement more instructions, rework some branching logic

This commit is contained in:
SimoneN64
2025-01-15 00:37:29 +01:00
parent 23ddc0b9f7
commit e065558147
9 changed files with 557 additions and 247 deletions

View File

@@ -3,7 +3,7 @@
#include <Scheduler.hpp>
namespace n64 {
Core::Core() : cpu(std::make_unique<Interpreter>(parallel)) {}
Core::Core() : cpu(std::make_unique<JIT>(parallel)) {}
void Core::Stop() {
render = false;
@@ -23,8 +23,7 @@ void Core::LoadROM(const std::string &rom_) {
std::string archive_types[] = {".zip", ".7z", ".rar", ".tar"};
auto extension = fs::path(rom).extension().string();
const bool isArchive = std::ranges::any_of(archive_types,
[&extension](const auto &e) { return e == extension; });
const bool isArchive = std::ranges::any_of(archive_types, [&extension](const auto &e) { return e == extension; });
cpu->GetMem().LoadROM(isArchive, rom);
GameDB::match(cpu->GetMem());

View File

@@ -2,7 +2,7 @@
#include <jit/helpers.hpp>
namespace n64 {
JIT::JIT(ParallelRDP &parallel) : regs(this), mem(regs, parallel) {}
JIT::JIT(ParallelRDP &parallel) : regs(this), mem(regs, parallel, this) { blockCache.resize(kUpperSize); }
bool JIT::ShouldServiceInterrupt() const {
const bool interrupts_pending = (regs.cop0.status.im & regs.cop0.cause.interruptPending) != 0;
@@ -23,18 +23,26 @@ void JIT::CheckCompareInterrupt() {
}
int JIT::Step() {
u32 instruction = 0;
s64 pc = regs.pc;
blockPC = regs.pc;
if (!blockCache[blockPC >> 8].empty()) {
if (blockCache[blockPC >> 8][blockPC >> 20]) {
return blockCache[blockPC >> 8][blockPC >> 20]();
}
} else {
blockCache[blockPC >> 8].resize(kLowerSize);
}
regs.block_delaySlot = false;
u32 instruction;
do {
// CheckCompareInterrupt();
// regs.prevDelaySlot = regs.delaySlot;
// regs.delaySlot = false;
if (check_address_error(0b11, u64(pc))) [[unlikely]] {
/*regs.cop0.HandleTLBException(pc);
regs.cop0.FireException(ExceptionCode::AddressErrorLoad, 0, pc);
if (check_address_error(0b11, u64(blockPC))) [[unlikely]] {
/*regs.cop0.HandleTLBException(blockPC);
regs.cop0.FireException(ExceptionCode::AddressErrorLoad, 0, blockPC);
return 1;*/
Util::panic("[JIT]: Unhandled exception ADL due to unaligned PC virtual value! (0x{:016lX})",
@@ -42,9 +50,10 @@ int JIT::Step() {
}
u32 paddr = 0;
if (!regs.cop0.MapVAddr(Cop0::LOAD, pc, paddr)) {
/*regs.cop0.HandleTLBException(pc);
regs.cop0.FireException(Cop0::GetTLBExceptionCode(regs.cop0.tlbError, Cop0::LOAD), 0, pc);
if (!regs.cop0.MapVAddr(Cop0::LOAD, blockPC, paddr)) {
/*regs.cop0.HandleTLBException(blockPC);
regs.cop0.FireException(Cop0::GetTLBExceptionCode(regs.cop0.tlbError, Cop0::LOAD), 0, blockPC);
return 1;*/
Util::panic(
"[JIT]: Unhandled exception TLB exception {} when retrieving PC physical address! (virtual: 0x{:016lX})",
@@ -58,15 +67,15 @@ int JIT::Step() {
return 1;
}*/
pc += 4;
blockPC += 4;
Emit(instruction);
}
while (!InstrEndsBlock(instruction));
// emit code to store the value of pc
return 1;
blockCache[regs.pc >> 8][regs.pc >> 20] = code.getCurr<BlockFn>();
return blockCache[regs.pc >> 8][regs.pc >> 20]();
}
std::vector<u8> JIT::Serialize() {

View File

@@ -9,13 +9,10 @@ struct Core;
static constexpr u32 kAddressSpaceSize = 0x8000'0000; // >> 20 = 0x800
static constexpr u32 kLowerSize = kAddressSpaceSize >> 20; // 0x800
static constexpr u32 kUpperSize = 1 << 20; // 0x100000
static constexpr u32 kUpperSize = kAddressSpaceSize >> 8; // 0x100000
static constexpr u32 kCodeCacheSize = 32_mb;
static constexpr u32 kCodeCacheAllocSize = kCodeCacheSize + 4096;
struct CodeGenerator : Xbyak::CodeGenerator {
CodeGenerator() : Xbyak::CodeGenerator{kCodeCacheSize} {}
};
#define REG(acc, x) code.acc[reinterpret_cast<uintptr_t>(&regs.x)]
struct JIT : BaseCPU {
explicit JIT(ParallelRDP &);
@@ -27,6 +24,8 @@ struct JIT : BaseCPU {
mem.Reset();
}
void InvalidateBlock(u32);
Mem &GetMem() override { return mem; }
Registers &GetRegs() override { return regs; }
@@ -34,23 +33,31 @@ struct JIT : BaseCPU {
[[nodiscard]] Disassembler::DisassemblyResult Disassemble(u32, u32) const override { return {}; }
private:
CodeGenerator code;
Xbyak::CodeGenerator code{kCodeCacheSize};
Registers regs;
Mem mem;
u64 cop2Latch{};
u64 blockPC = 0;
friend struct Cop1;
friend struct Registers;
using BlockFn = int (*)();
std::vector<std::vector<BlockFn>> blockCache;
template <typename T>
AddressFrame GPR(size_t index) {
Xbyak::Address GPR(const size_t index) const {
if constexpr (sizeof(T) == 1) {
return code.byte[offsetof(JIT, regs) + offsetof(Registers, gpr) + 8 * x];
return code.byte[reinterpret_cast<uintptr_t>(&regs.gpr[index])];
} else if constexpr (sizeof(T) == 2) {
return code.word[offsetof(JIT, regs) + offsetof(Registers, gpr) + 8 * x];
return code.word[reinterpret_cast<uintptr_t>(&regs.gpr[index])];
} else if constexpr (sizeof(T) == 4) {
return code.dword[offsetof(JIT, regs) + offsetof(Registers, gpr) + 8 * x];
return code.dword[reinterpret_cast<uintptr_t>(&regs.gpr[index])];
} else if constexpr (sizeof(T) == 8) {
return code.qword[offsetof(JIT, regs) + offsetof(Registers, gpr) + 8 * x];
return code.qword[reinterpret_cast<uintptr_t>(&regs.gpr[index])];
}
Util::panic("[JIT]: Invalid register addressing");
// never actually hit, but just to silence the warning "not all control paths return a value"
return Xbyak::Address{0};
}
// Credits to PCSX-Redux: https://github.com/grumpycoders/pcsx-redux
@@ -90,6 +97,7 @@ private:
code.call(functionPtr);
}
void SkipSlot();
void SkipSlotConstant();
void BranchTaken(s64 offs);
void BranchTaken(const Xbyak::Reg &offs);
@@ -110,8 +118,8 @@ private:
void addiu(u32);
void andi(u32);
void and_(u32);
void branch_constant(const bool cond, const s64 address);
void branch_likely_constant(const bool cond, const s64 address);
void branch_constant(bool cond, s64 address);
void branch_likely_constant(bool cond, s64 address);
void bltz(u32);
void bgez(u32);
void bltzl(u32);

View File

@@ -2,12 +2,11 @@
#include <Mem.hpp>
#include <backend/RomHelpers.hpp>
#include <cassert>
#include <core/Interpreter.hpp>
#include <core/registers/Registers.hpp>
#include <core/JIT.hpp>
#include <unarr.h>
namespace n64 {
Mem::Mem(Registers &regs, ParallelRDP &parallel) : mmio(*this, regs, parallel), flash(saveData) {
Mem::Mem(Registers &regs, ParallelRDP &parallel, JIT *jit) : mmio(*this, regs, parallel), flash(saveData), jit(jit) {
rom.cart.resize(CART_SIZE);
std::ranges::fill(rom.cart, 0);
}
@@ -318,7 +317,20 @@ u64 Mem::Read(Registers &regs, const u32 paddr) {
}
template <>
void Mem::Write<u8>(Registers &regs, u32 paddr, u32 val) {
void Mem::WriteJIT<u8>(Registers &regs, const u32 paddr, const u32 val) {
if (jit)
jit->InvalidateBlock(paddr);
WriteInterpreter<u8>(regs, paddr, val);
}
template <>
void Mem::Write<u8>(Registers &regs, const u32 paddr, const u32 val) {
WriteInterpreter<u8>(regs, paddr, val);
}
template <>
void Mem::WriteInterpreter<u8>(Registers &regs, u32 paddr, u32 val) {
SI &si = mmio.si;
switch (paddr) {
@@ -359,7 +371,20 @@ void Mem::Write<u8>(Registers &regs, u32 paddr, u32 val) {
}
template <>
void Mem::Write<u16>(Registers &regs, u32 paddr, u32 val) {
void Mem::WriteJIT<u16>(Registers &regs, const u32 paddr, const u32 val) {
if (jit)
jit->InvalidateBlock(paddr);
WriteInterpreter<u16>(regs, paddr, val);
}
template <>
void Mem::Write<u16>(Registers &regs, const u32 paddr, const u32 val) {
WriteInterpreter<u16>(regs, paddr, val);
}
template <>
void Mem::WriteInterpreter<u16>(Registers &regs, u32 paddr, u32 val) {
SI &si = mmio.si;
switch (paddr) {
@@ -399,8 +424,21 @@ void Mem::Write<u16>(Registers &regs, u32 paddr, u32 val) {
}
}
template <>
void Mem::WriteJIT<u32>(Registers &regs, const u32 paddr, const u32 val) {
if (jit)
jit->InvalidateBlock(paddr);
WriteInterpreter<u32>(regs, paddr, val);
}
template <>
void Mem::Write<u32>(Registers &regs, const u32 paddr, const u32 val) {
WriteInterpreter<u32>(regs, paddr, val);
}
template <>
void Mem::WriteInterpreter<u32>(Registers &regs, const u32 paddr, const u32 val) {
SI &si = mmio.si;
switch (paddr) {
@@ -437,7 +475,16 @@ void Mem::Write<u32>(Registers &regs, const u32 paddr, const u32 val) {
}
}
void Mem::Write(const Registers &regs, const u32 paddr, u64 val) {
void Mem::WriteJIT(const Registers &regs, const u32 paddr, const u64 val) {
if (jit)
jit->InvalidateBlock(paddr);
WriteInterpreter(regs, paddr, val);
}
void Mem::Write(const Registers &regs, const u32 paddr, const u64 val) { WriteInterpreter(regs, paddr, val); }
void Mem::WriteInterpreter(const Registers &regs, const u32 paddr, u64 val) {
SI &si = mmio.si;
switch (paddr) {

View File

@@ -79,7 +79,7 @@ struct Flash {
struct Mem {
~Mem() = default;
Mem(Registers &, ParallelRDP &);
Mem(Registers &, ParallelRDP &, JIT * = nullptr);
void Reset();
void LoadSRAM(SaveType, fs::path);
static std::vector<u8> OpenROM(const std::string &, size_t &);
@@ -131,6 +131,14 @@ struct Mem {
Flash flash;
private:
template <typename T>
void WriteInterpreter(Registers &, u32, u32);
void WriteInterpreter(const Registers &, u32, u64);
template <typename T>
void WriteJIT(Registers &, u32, u32);
void WriteJIT(const Registers &, u32, u64);
JIT *jit = nullptr;
friend struct SI;
friend struct PI;
friend struct AI;

View File

@@ -20,27 +20,33 @@ static bool SpecialEndsBlock(const u32 instr) {
}
}
static bool RegimmEndsBlock(const u32 instr) {
switch (instr >> 16 & 0x1F) {
case BLTZL:
case BGEZL:
case BLTZALL:
case BGEZALL:
return false;
default:
return true;
}
}
static bool InstrEndsBlock(const u32 instr) {
switch (instr >> 26 & 0x3f) {
case SPECIAL:
return SpecialEndsBlock(instr);
case REGIMM:
return RegimmEndsBlock(instr);
case J:
case JAL:
case BEQ:
case BNE:
case BLEZ:
case BGTZ:
case BEQL:
case BNEL:
case BLEZL:
case BGTZL:
return true;
default:
return false;
}
}
#define REG(acc, x) code.acc[offsetof(JIT, regs) + offsetof(Registers, x)]
#define GPR_constant_marker(x) code.byte[offsetof(JIT, regs) + offsetof(Registers, gprIsConstant) + x]
} // namespace n64

View File

@@ -8,19 +8,12 @@ namespace n64 {
using namespace Xbyak::util;
void JIT::lui(const u32 instr) {
if (RT(instr) == 0)
return;
u64 val = static_cast<s64>(static_cast<s16>(instr));
val <<= 16;
code.mov(GPR(RT(instr)), val);
code.mov(GPR_constant_marker(RT(instr)), 1);
regs.Write(RT(instr), val, true);
}
void JIT::add(const u32 instr) {
if (RD(instr) == 0)
return;
if (regs.IsRegConstant(RS(instr), RT(instr))) {
const u32 rs = regs.Read<s32>(RS(instr));
const u32 rt = regs.Read<s32>(RT(instr));
@@ -30,117 +23,137 @@ void JIT::add(const u32 instr) {
Util::panic("[JIT]: Unhandled Overflow exception in ADD!");
}
code.mov(code.eax, static_cast<s32>(result));
code.movsxd(code.rax, code.eax);
code.mov(GPR(RD(instr)), code.rax);
regs.Write<s32>(RD(instr), result, true);
return;
}
if (regs.IsRegConstant(RS(instr))) {
const u32 rs = regs.Read<s32>(RS(instr));
code.mov(code.eax, GPR(RT(instr)));
regs.Read<u32>(RT(instr), code.eax);
code.add(code.eax, rs);
code.movsxd(code.rax, code.eax);
code.mov(GPR(RD(instr)), code.rax);
regs.Write<s32>(RD(instr), code.eax);
return;
}
if (regs.IsRegConstant(RT(instr))) {
const u32 rt = regs.Read<s32>(RT(instr));
code.mov(code.eax, GPR(RS(instr)));
regs.Read<u32>(RS(instr), code.eax);
code.add(code.eax, rt);
code.movsxd(code.rax, code.eax);
code.mov(GPR(RD(instr)), code.rax);
regs.Write<s32>(RD(instr), code.eax);
return;
}
code.mov(code.edi, GPR(RT(instr)));
code.mov(code.eax, GPR(RS(instr)));
regs.Read<u32>(RT(instr), code.eax);
regs.Read<u32>(RS(instr), code.edi);
code.add(code.eax, code.edi);
code.movsxd(code.rax, code.eax);
code.mov(GPR(RD(instr)), code.rax);
regs.Write<s32>(RD(instr), code.eax);
}
void JIT::addu(u32 instr) {
if (RD(instr) == 0)
return;
if (regs.IsRegConstant(RS(instr), RT(instr))) {
const s32 rs = regs.Read<s32>(RS(instr));
const s32 rt = regs.Read<s32>(RT(instr));
const s32 result = rs + rt;
code.mov(code.eax, result);
code.movsxd(code.rax, code.eax);
code.mov(GPR(RD(instr)), code.rax);
code.mov(REG(byte, gprIsConstant), 1);
regs.Write<s32>(RD(instr), result);
return;
}
if (regs.IsRegConstant(RS(instr))) {
const s32 rs = regs.Read<s32>(RS(instr));
regs.Read<s32>(RT(instr), code.eax);
code.add(code.eax, rs);
regs.Write<s32>(RD(instr), code.eax);
return;
}
if (regs.IsRegConstant(RT(instr))) {
const s32 rs = regs.Read<s32>(RT(instr));
regs.Read<s32>(RS(instr), code.eax);
code.add(code.eax, rs);
regs.Write<s32>(RD(instr), code.eax);
return;
}
regs.Read<s32>(RS(instr), code.eax);
regs.Read<s32>(RT(instr), code.edi);
code.add(code.eax, code.edi);
regs.Write<s32>(RD(instr), code.eax);
}
void JIT::addi(u32 instr) {
if (RT(instr) == 0)
return;
u32 imm = s32(s16(instr));
if (regs.IsRegConstant(RS(instr))) {
auto rs = regs.Read<u32>(RS(instr));
u32 imm = s32(s16(instr));
u32 result = rs + imm;
if (check_signed_overflow(rs, imm, result)) {
Util::panic("[JIT]: Unhandled Overflow exception in ADDI!");
} else {
code.mov(code.eax, static_cast<s32>(result));
code.movsxd(code.rax, code.eax);
code.mov(GPR(RT(instr)), code.rax);
}
} else {
Util::panic("[JIT]: Implement non constant ADDI!");
regs.Write<s32>(RT(instr), static_cast<s32>(result), true);
return;
}
regs.Read<u32>(RS(instr), code.eax);
code.add(code.eax, imm);
regs.Write<s32>(RT(instr), code.eax);
}
void JIT::addiu(u32 instr) {
if (RT(instr) == 0)
return;
u32 imm = s32(s16(instr));
if (regs.IsRegConstant(RS(instr))) {
auto rs = regs.Read<u32>(RS(instr));
u32 imm = s32(s16(instr));
u32 result = rs + imm;
regs.Write(RT(instr), s32(result));
} else {
Util::panic("[JIT]: Implement non constant ADDIU!");
return;
}
regs.Read<u32>(RS(instr), code.eax);
code.add(code.eax, imm);
regs.Write<s32>(RT(instr), code.eax);
}
void JIT::andi(u32 instr) {
if (RT(instr) == 0)
return;
s64 imm = (u16)instr;
const s64 imm = static_cast<u16>(instr);
if (regs.IsRegConstant(RS(instr))) {
regs.Write(RT(instr), regs.Read<s64>(RS(instr)) & imm);
} else {
Util::panic("[JIT]: Implement non constant ANDI!");
return;
}
regs.Read<s64>(RS(instr), code.rax);
code.and_(code.rax, imm);
regs.Write<s64>(RT(instr), code.rax);
}
void JIT::and_(u32 instr) {
if (RD(instr) == 0)
return;
if (regs.IsRegConstant(RS(instr), RT(instr))) {
regs.Write(RD(instr), regs.Read<s64>(RS(instr)) & regs.Read<s64>(RT(instr)));
} else {
Util::panic("[JIT]: Implement non constant AND!");
return;
}
if (regs.IsRegConstant(RS(instr))) {
const auto rs = regs.Read<s64>(RS(instr));
regs.Read<s64>(RT(instr), code.rax);
code.and_(code.rax, rs);
regs.Write<s64>(RD(instr), code.rax);
return;
}
if (regs.IsRegConstant(RT(instr))) {
const auto rt = regs.Read<s64>(RT(instr));
regs.Read<s64>(RS(instr), code.rax);
code.and_(code.rax, rt);
regs.Write<s64>(RD(instr), code.rax);
return;
}
regs.Read<s64>(RS(instr), code.rax);
regs.Read<s64>(RT(instr), code.rdi);
code.and_(code.rdi, code.rax);
regs.Write<s64>(RD(instr), code.rdi);
}
void JIT::SkipSlot() {
@@ -154,6 +167,8 @@ void JIT::SkipSlot() {
code.mov(REG(qword, nextPC), code.rax);
}
void JIT::SkipSlotConstant() { blockPC += 4; }
void JIT::BranchTaken(const s64 offs) {
code.mov(code.rax, REG(qword, pc));
code.add(code.rax, offs);
@@ -205,7 +220,7 @@ void JIT::branch_likely_constant(const bool cond, const s64 offset) {
code.mov(REG(byte, delaySlot), code.al);
BranchTaken(offset);
} else {
SkipSlot();
SkipSlotConstant();
}
}
@@ -245,7 +260,7 @@ void JIT::bltz(const u32 instr) {
return;
}
code.mov(code.rax, GPR(RS(instr)));
code.mov(code.rax, GPR<s64>(RS(instr)));
code.cmp(code.rax, 0);
branch(offset, l);
}
@@ -462,9 +477,6 @@ void JIT::bgtzl(const u32 instr) {
}
void JIT::dadd(u32 instr) {
if (RD(instr) == 0)
return;
if (regs.IsRegConstant(RS(instr), RT(instr))) {
auto rs = regs.Read<u64>(RS(instr));
auto rt = regs.Read<u64>(RT(instr));
@@ -1155,7 +1167,7 @@ void JIT::sw(const u32 instr) {
code.mov(code.rsi, reinterpret_cast<uintptr_t>(&regs));
code.mov(code.edx, physical);
code.mov(code.rcx, regs.Read<s64>(RT(instr)));
emitMemberFunctionCall(&Mem::Write<u32>, &mem);
emitMemberFunctionCall(&Mem::WriteJIT<u32>, &mem);
}
return;
@@ -1180,7 +1192,7 @@ void JIT::sw(const u32 instr) {
code.mov(code.rsi, reinterpret_cast<uintptr_t>(&regs));
code.mov(code.edx, physical);
code.mov(code.rcx, GPR(RT(instr)));
emitMemberFunctionCall(&Mem::Write<u32>, &mem);
emitMemberFunctionCall(&Mem::WriteJIT<u32>, &mem);
}
return;
@@ -1200,7 +1212,7 @@ void JIT::sw(const u32 instr) {
code.mov(code.rsi, reinterpret_cast<uintptr_t>(&regs));
code.mov(code.edx, physical);
code.mov(code.rcx, regs.Read<s64>(RT(instr)));
emitMemberFunctionCall(&Mem::Write<u32>, &mem);
emitMemberFunctionCall(&Mem::WriteJIT<u32>, &mem);
return;
}
@@ -1218,7 +1230,7 @@ void JIT::sw(const u32 instr) {
code.mov(code.rsi, reinterpret_cast<uintptr_t>(&regs));
code.mov(code.edx, physical);
code.mov(code.rcx, GPR(RT(instr)));
emitMemberFunctionCall(&Mem::Write<u32>, &mem);
emitMemberFunctionCall(&Mem::WriteJIT<u32>, &mem);
}
void JIT::srlv(u32 instr) {

View File

@@ -1,3 +1,6 @@
#include "jit/helpers.hpp"
#include <core/registers/Registers.hpp>
#include <core/JIT.hpp>
@@ -11,6 +14,7 @@ void Registers::Reset() {
prevDelaySlot = false;
gpr.fill(0);
gprIsConstant.fill(false);
gprIsConstant[0] = true;
cop0.Reset();
cop1.Reset();
@@ -37,8 +41,48 @@ u64 Registers::Read<u64>(size_t idx) {
}
template <>
s64 Registers::Read<s64>(size_t idx) {
return s64(Read<u64>(idx));
void Registers::Read<u64>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt64(), jit->GPR<u64>(idx));
}
template <>
void Registers::Read<s64>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt64(), jit->GPR<u64>(idx));
}
template <>
void Registers::Read<u32>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt32(), jit->GPR<u32>(idx));
}
template <>
void Registers::Read<s32>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt32(), jit->GPR<s32>(idx));
}
template <>
void Registers::Read<u16>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt16(), jit->GPR<u16>(idx));
}
template <>
void Registers::Read<s16>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt16(), jit->GPR<u16>(idx));
}
template <>
void Registers::Read<u8>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt8(), jit->GPR<u8>(idx));
}
template <>
void Registers::Read<s8>(size_t idx, Xbyak::Reg reg) {
jit->code.mov(reg.cvt8(), jit->GPR<s8>(idx));
}
template <>
s64 Registers::Read<s64>(const size_t idx) {
return static_cast<s64>(Read<u64>(idx));
}
template <>
@@ -48,7 +92,7 @@ u32 Registers::Read<u32>(size_t idx) {
template <>
s32 Registers::Read<s32>(size_t idx) {
return s32(Read<u32>(idx));
return static_cast<s32>(Read<u32>(idx));
}
template <>
@@ -58,7 +102,7 @@ u16 Registers::Read<u16>(size_t idx) {
template <>
s16 Registers::Read<s16>(size_t idx) {
return s16(Read<u16>(idx));
return static_cast<s16>(Read<u16>(idx));
}
template <>
@@ -68,150 +112,321 @@ u8 Registers::Read<u8>(size_t idx) {
template <>
s8 Registers::Read<s8>(size_t idx) {
return s8(Read<u8>(idx));
return static_cast<s8>(Read<u8>(idx));
}
template <>
void Registers::Write<bool>(size_t idx, bool v) {
void Registers::WriteJIT<bool>(size_t idx, bool v) {
jit->code.mov(jit->code.al, v);
jit->code.mov(jit->GPR<u8>(idx), jit->code.al);
}
template <>
void Registers::Write<bool>(size_t idx, bool v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
jit->code.mov(jit->code.rax, v);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
if (isConstant) {
gpr[idx] = v;
return;
}
WriteJIT<bool>(idx, v);
return;
}
gpr[idx] = v;
gprIsConstant[idx] = true;
}
template <>
void Registers::Write<u64>(size_t idx, u64 v) {
void Registers::WriteJIT<u64>(size_t idx, u64 v) {
jit->code.mov(jit->code.rax, v);
jit->code.mov(jit->GPR<u64>(idx), jit->code.rax);
}
template <>
void Registers::Write<u64>(size_t idx, u64 v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
jit->code.mov(jit->code.rax, v);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
if (isConstant) {
gpr[idx] = v;
return;
}
WriteJIT<u64>(idx, v);
return;
}
gpr[idx] = v;
gprIsConstant[idx] = true;
}
template <>
void Registers::Write<s64>(size_t idx, s64 v) {
Write<u64>(idx, v);
void Registers::Write<s64>(size_t idx, s64 v, bool isConstant) {
Write<u64>(idx, v, isConstant);
}
template <>
void Registers::Write<u32>(size_t idx, u32 v) {
void Registers::WriteJIT<u32>(size_t idx, u32 v) {
jit->code.mov(jit->code.eax, v);
jit->code.mov(jit->GPR<u32>(idx), jit->code.eax);
}
template <>
void Registers::Write<u32>(size_t idx, u32 v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
jit->code.mov(jit->code.rax, v);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
if (isConstant) {
gpr[idx] = v;
return;
}
gpr[idx] = (u32)v;
gprIsConstant[idx] = true;
WriteJIT<u32>(idx, v);
return;
}
gpr[idx] = v;
}
template <>
void Registers::Write<s32>(size_t idx, s32 v) {
if (idx == 0)
return;
if(jit) {
void Registers::WriteJIT<s32>(size_t idx, s32 v) {
jit->code.mov(jit->code.eax, v);
jit->code.movsxd(jit->code.rax, jit->code.eax);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
jit->code.mov(jit->GPR<u64>(idx), jit->code.rax);
}
template <>
void Registers::Write<s32>(size_t idx, s32 v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
if (isConstant) {
gpr[idx] = v;
return;
}
WriteJIT<s32>(idx, v);
return;
}
gpr[idx] = v;
gprIsConstant[idx] = true;
}
template <>
void Registers::Write<u16>(size_t idx, u16 v) {
if (idx == 0)
return;
if(jit) {
jit->code.mov(jit->code.rax, v);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
return;
}
gpr[idx] = (u16)v;
gprIsConstant[idx] = true;
void Registers::WriteJIT<u16>(size_t idx, u16 v) {
jit->code.mov(jit->code.ax, v);
jit->code.mov(jit->GPR<u16>(idx), jit->code.ax);
}
template <>
void Registers::Write<s16>(size_t idx, s16 v) {
void Registers::Write<u16>(size_t idx, u16 v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
if (isConstant) {
gpr[idx] = v;
return;
}
WriteJIT<u16>(idx, v);
return;
}
gpr[idx] = v;
}
template <>
void Registers::WriteJIT<s16>(size_t idx, s16 v) {
jit->code.mov(jit->code.ax, v);
jit->code.movsx(jit->code.rax, jit->code.ax);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
jit->code.mov(jit->GPR<u64>(idx), jit->code.rax);
}
template <>
void Registers::Write<s16>(size_t idx, s16 v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
if (isConstant) {
gpr[idx] = v;
return;
}
WriteJIT<s16>(idx, v);
return;
}
gpr[idx] = v;
gprIsConstant[idx] = true;
}
template <>
void Registers::Write<u8>(size_t idx, u8 v) {
if (idx == 0)
return;
if(jit) {
jit->code.mov(jit->code.rax, v);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
return;
}
gpr[idx] = (u8)v;
gprIsConstant[idx] = true;
void Registers::WriteJIT<u8>(size_t idx, u8 v) {
jit->code.mov(jit->code.al, v);
jit->code.mov(jit->GPR<u8>(idx), jit->code.al);
}
template <>
void Registers::Write<s8>(size_t idx, s8 v) {
void Registers::Write<u8>(size_t idx, u8 v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
if (isConstant) {
gpr[idx] = v;
return;
}
WriteJIT<u8>(idx, v);
return;
}
gpr[idx] = v;
}
template <>
void Registers::WriteJIT<s8>(size_t idx, s8 v) {
jit->code.mov(jit->code.al, v);
jit->code.movsx(jit->code.rax, jit->code.al);
jit->code.mov(GPR(idx), jit->code.rax);
jit->code.mov(jit->code.rax, 1);
jit->code.mov(GPR_constant_marker(idx), jit->code.rax);
jit->code.mov(jit->GPR<u64>(idx), jit->code.rax);
}
template <>
void Registers::Write<s8>(size_t idx, s8 v, bool isConstant) {
if (idx == 0)
return;
gprIsConstant[idx] = isConstant;
if (jit) {
if (isConstant) {
gpr[idx] = v;
return;
}
WriteJIT<s8>(idx, v);
return;
}
gpr[idx] = v;
gprIsConstant[idx] = true;
}
template <>
void Registers::Write<s8>(size_t idx, Xbyak::Reg v) {
if (idx == 0)
return;
gprIsConstant[idx] = false;
if (!jit)
Util::panic("Did you try to call Registers::Write(size_t, *Xbyak::Reg*) from the interpreter?");
jit->code.movsx(v.cvt64(), v.cvt8());
jit->code.mov(jit->GPR<u64>(idx), v);
}
template <>
void Registers::Write<u8>(size_t idx, Xbyak::Reg v) {
if (idx == 0)
return;
gprIsConstant[idx] = false;
if (!jit)
Util::panic("Did you try to call Registers::Write(size_t, *Xbyak::Reg*) from the interpreter?");
jit->code.mov(jit->GPR<u8>(idx), v.cvt8());
}
template <>
void Registers::Write<s16>(size_t idx, Xbyak::Reg v) {
if (idx == 0)
return;
gprIsConstant[idx] = false;
if (!jit)
Util::panic("Did you try to call Registers::Write(size_t, *Xbyak::Reg*) from the interpreter?");
jit->code.movsx(v.cvt64(), v.cvt16());
jit->code.mov(jit->GPR<u64>(idx), v.cvt64());
}
template <>
void Registers::Write<u16>(size_t idx, Xbyak::Reg v) {
if (idx == 0)
return;
gprIsConstant[idx] = false;
if (!jit)
Util::panic("Did you try to call Registers::Write(size_t, *Xbyak::Reg*) from the interpreter?");
jit->code.mov(jit->GPR<u16>(idx), v.cvt16());
}
template <>
void Registers::Write<s32>(size_t idx, Xbyak::Reg v) {
if (idx == 0)
return;
gprIsConstant[idx] = false;
if (!jit)
Util::panic("Did you try to call Registers::Write(size_t, *Xbyak::Reg*) from the interpreter?");
jit->code.movsxd(v.cvt64(), v.cvt32());
jit->code.mov(jit->GPR<u64>(idx), v.cvt64());
}
template <>
void Registers::Write<u32>(size_t idx, Xbyak::Reg v) {
if (idx == 0)
return;
gprIsConstant[idx] = false;
if (!jit)
Util::panic("Did you try to call Registers::Write(size_t, *Xbyak::Reg*) from the interpreter?");
jit->code.mov(jit->GPR<u32>(idx), v.cvt32());
}
template <>
void Registers::Write<u64>(size_t idx, Xbyak::Reg v) {
if (idx == 0)
return;
gprIsConstant[idx] = false;
if (!jit)
Util::panic("Did you try to call Registers::Write(size_t, *Xbyak::Reg*) from the interpreter?");
jit->code.mov(jit->GPR<u64>(idx), v.cvt64());
}
template <>
void Registers::Write<s64>(size_t idx, Xbyak::Reg v) {
Write<u64>(idx, v);
}
} // namespace n64

View File

@@ -1,5 +1,6 @@
#pragma once
#include <array>
#include <xbyak.h>
#include <backend/core/registers/Cop1.hpp>
namespace n64 {
@@ -28,7 +29,7 @@ struct Registers {
Cop1 cop1;
s64 oldPC{}, pc{}, nextPC{};
s64 hi{}, lo{};
bool prevDelaySlot{}, delaySlot{};
bool prevDelaySlot{}, delaySlot{}, block_delaySlot{};
u32 steps = 0;
u32 extraCycles = 0;
@@ -43,9 +44,14 @@ struct Registers {
template <typename T>
T Read(size_t);
template <typename T>
void Write(size_t, T);
void Read(size_t, Xbyak::Reg);
template <typename T>
void Write(size_t, T, bool = false);
template <typename T>
void Write(size_t, Xbyak::Reg);
std::array<s64, 32> gpr{};
private:
template <typename T>
void WriteJIT(size_t, T);