diff --git a/src/backend/core/JIT.hpp b/src/backend/core/JIT.hpp index 685f7ecc..7ea7be0c 100644 --- a/src/backend/core/JIT.hpp +++ b/src/backend/core/JIT.hpp @@ -17,8 +17,6 @@ struct CodeGenerator : Xbyak::CodeGenerator { CodeGenerator() : Xbyak::CodeGenerator{kCodeCacheSize} {} }; -enum BranchCondition { EQ, NE, GT, GE, LT, LE, GTU, GEU, LTU, LEU }; - struct JIT : BaseCPU { explicit JIT(ParallelRDP &); ~JIT() override = default; @@ -42,6 +40,47 @@ private: u64 cop2Latch{}; friend struct Cop1; + + // Credits to PCSX-Redux: https://github.com/grumpycoders/pcsx-redux + // Sets dest to "pointer" + void loadAddress(const Xbyak::Reg64 dest, void *pointer) { code.mov(dest, reinterpret_cast(pointer)); } + + // Load a pointer to the JIT object in "reg" + void loadThisPointer(const Xbyak::Reg64 reg) { code.mov(reg, code.rbp); } + // Emit a call to a class member function, passing "thisObject" (+ an adjustment if necessary) + // As the function's "this" pointer. Only works with classes with single, non-virtual inheritance + // Hence the static asserts. Those are all we need though, thankfully. + template + void emitMemberFunctionCall(T func, void *thisObject) { + void *functionPtr; + auto thisPtr = reinterpret_cast(thisObject); + +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) + static_assert(sizeof(T) == 8, "[x64 JIT] Invalid size for member function pointer"); + std::memcpy(&functionPtr, &func, sizeof(T)); +#else + static_assert(sizeof(T) == 16, "[x64 JIT] Invalid size for member function pointer"); + uintptr_t arr[2]; + std::memcpy(arr, &func, sizeof(T)); + // First 8 bytes correspond to the actual pointer to the function + functionPtr = reinterpret_cast(arr[0]); + // Next 8 bytes correspond to the "this" pointer adjustment + thisPtr += arr[1]; +#endif + + // Load this pointer to arg1 + if (thisPtr == reinterpret_cast(this)) { + loadThisPointer(code.rdi); + } else { + loadAddress(code.rdi, reinterpret_cast(thisPtr)); + } + + code.call(functionPtr); + } + void SkipSlot(); + void BranchTaken(s64 offs); + void BranchTaken(const Xbyak::Reg &offs); + #define check_address_error(mask, vaddr) \ (((!regs.cop0.is64BitAddressing) && (s32)(vaddr) != (vaddr)) || (((vaddr) & (mask)) != 0)) @@ -59,14 +98,30 @@ private: void addiu(u32); void andi(u32); void and_(u32); - void b(u32 instr, BranchCondition, u32 reg1, u32 reg2); - void b(u32 instr, BranchCondition, u32 reg); - void blink(u32 instr, BranchCondition, u32 reg1, u32 reg2); - void blink(u32 instr, BranchCondition, u32 reg); - void bl(u32 instr, BranchCondition, u32 reg1, u32 reg2); - void bl(u32 instr, BranchCondition, u32 reg); - void bllink(u32 instr, BranchCondition, u32 reg1, u32 reg2); - void bllink(u32 instr, BranchCondition, u32 reg); + void branch(const Xbyak::Reg &address); + void branch_likely(const Xbyak::Reg &address); + void branch_constant(const bool cond, const s64 address); + void branch_likely_constant(const bool cond, const s64 address); + void bltz(u32); + void bgez(u32); + void bltzl(u32); + void bgezl(u32); + void bltzal(u32); + void bgezal(u32); + void bltzall(u32); + void bgezall(u32); + void beq(u32); + void beql(u32); + void bne(u32); + void bnel(u32); + void blez(u32); + void blezl(u32); + void bgtz(u32); + void bgtzl(u32); + void bfc1(u32 instr); + void blfc1(u32 instr); + void bfc0(u32 instr); + void blfc0(u32 instr); void dadd(u32); void daddu(u32); void daddi(u32); diff --git a/src/backend/core/registers/cop/cop0instructions.cpp b/src/backend/core/interpreter/cop0instructions.cpp similarity index 95% rename from src/backend/core/registers/cop/cop0instructions.cpp rename to src/backend/core/interpreter/cop0instructions.cpp index 27c7c0e1..15c684c7 100644 --- a/src/backend/core/registers/cop/cop0instructions.cpp +++ b/src/backend/core/interpreter/cop0instructions.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include diff --git a/src/backend/core/registers/cop/cop1instructions.cpp b/src/backend/core/interpreter/cop1instructions.cpp similarity index 94% rename from src/backend/core/registers/cop/cop1instructions.cpp rename to src/backend/core/interpreter/cop1instructions.cpp index ae16f900..6b9cb912 100644 --- a/src/backend/core/registers/cop/cop1instructions.cpp +++ b/src/backend/core/interpreter/cop1instructions.cpp @@ -1,8 +1,8 @@ #include #include -#include -#include -#include +#include +#include +#include #include namespace n64 { @@ -1220,71 +1220,7 @@ void Cop1::truncld(const u32 instr) { FGR_D(regs.cop0.status, FD(instr)) = fd; } -template -void Cop1::lwc1(T &cpu, Mem &mem, u32 instr) { - if constexpr (std::is_same_v) { - if (!CheckFPUUsable()) - return; - lwc1Interp(mem, instr); - } else if constexpr (std::is_same_v) { - lwc1JIT(cpu, mem, instr); - } else { - Util::panic("What the fuck did you just give me?!!"); - } -} - -template void Cop1::lwc1(Interpreter &, Mem &, u32); -template void Cop1::lwc1(JIT &, Mem &, u32); - -template -void Cop1::swc1(T &cpu, Mem &mem, u32 instr) { - if constexpr (std::is_same_v) { - if (!CheckFPUUsable()) - return; - swc1Interp(mem, instr); - } else if constexpr (std::is_same_v) { - swc1JIT(cpu, mem, instr); - } else { - Util::panic("What the fuck did you just give me?!!"); - } -} - -template void Cop1::swc1(Interpreter &, Mem &, u32); -template void Cop1::swc1(JIT &, Mem &, u32); - -template -void Cop1::ldc1(T &cpu, Mem &mem, u32 instr) { - if constexpr (std::is_same_v) { - if (!CheckFPUUsable()) - return; - ldc1Interp(mem, instr); - } else if constexpr (std::is_same_v) { - ldc1JIT(cpu, mem, instr); - } else { - Util::panic("What the fuck did you just give me?!!"); - } -} - -template void Cop1::ldc1(Interpreter &, Mem &, u32); -template void Cop1::ldc1(JIT &, Mem &, u32); - -template -void Cop1::sdc1(T &cpu, Mem &mem, u32 instr) { - if constexpr (std::is_same_v) { - if (!CheckFPUUsable()) - return; - sdc1Interp(mem, instr); - } else if constexpr (std::is_same_v) { - sdc1JIT(cpu, mem, instr); - } else { - Util::panic("What the fuck did you just give me?!!"); - } -} - -template void Cop1::sdc1(Interpreter &, Mem &, u32); -template void Cop1::sdc1(JIT &, Mem &, u32); - -void Cop1::lwc1Interp(Mem &mem, const u32 instr) { +void Cop1::lwc1(Mem &mem, u32 instr) { const u64 addr = static_cast(static_cast(instr)) + regs.Read(BASE(instr)); if (u32 physical; !regs.cop0.MapVAddr(Cop0::LOAD, addr, physical)) { @@ -1296,7 +1232,7 @@ void Cop1::lwc1Interp(Mem &mem, const u32 instr) { } } -void Cop1::swc1Interp(Mem &mem, const u32 instr) { +void Cop1::swc1(Mem &mem, u32 instr) { const u64 addr = static_cast(static_cast(instr)) + regs.Read(BASE(instr)); if (u32 physical; !regs.cop0.MapVAddr(Cop0::STORE, addr, physical)) { @@ -1307,14 +1243,7 @@ void Cop1::swc1Interp(Mem &mem, const u32 instr) { } } -void Cop1::unimplemented() { - if (!CheckFPUUsable()) - return; - SetCauseUnimplemented(); - regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); -} - -void Cop1::ldc1Interp(Mem &mem, const u32 instr) { +void Cop1::ldc1(Mem &mem, u32 instr) { const u64 addr = static_cast(static_cast(instr)) + regs.Read(BASE(instr)); if (u32 physical; !regs.cop0.MapVAddr(Cop0::LOAD, addr, physical)) { @@ -1326,7 +1255,7 @@ void Cop1::ldc1Interp(Mem &mem, const u32 instr) { } } -void Cop1::sdc1Interp(Mem &mem, const u32 instr) { +void Cop1::sdc1(Mem &mem, u32 instr) { const u64 addr = static_cast(static_cast(instr)) + regs.Read(BASE(instr)); if (u32 physical; !regs.cop0.MapVAddr(Cop0::STORE, addr, physical)) { @@ -1337,6 +1266,13 @@ void Cop1::sdc1Interp(Mem &mem, const u32 instr) { } } +void Cop1::unimplemented() { + if (!CheckFPUUsable()) + return; + SetCauseUnimplemented(); + regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); +} + void Cop1::mfc1(const u32 instr) { if (!CheckFPUUsable()) return; diff --git a/src/backend/core/interpreter/decode.cpp b/src/backend/core/interpreter/decode.cpp index db3bd216..9809dbe8 100644 --- a/src/backend/core/interpreter/decode.cpp +++ b/src/backend/core/interpreter/decode.cpp @@ -304,7 +304,38 @@ void Interpreter::Exec(const u32 instr) { regs.cop0.decode(*this, instr); break; case COP1: - regs.cop1.decode(*this, instr); + { + const u8 mask_sub = (instr >> 21) & 0x1F; + const u8 mask_branch = (instr >> 16) & 0x1F; + if (mask_sub == 0x08) { + switch (mask_branch) { + case 0: + if (!regs.cop1.CheckFPUUsable()) + return; + b(instr, !regs.cop1.fcr31.compare); + break; + case 1: + if (!regs.cop1.CheckFPUUsable()) + return; + b(instr, regs.cop1.fcr31.compare); + break; + case 2: + if (!regs.cop1.CheckFPUUsable()) + return; + bl(instr, !regs.cop1.fcr31.compare); + break; + case 3: + if (!regs.cop1.CheckFPUUsable()) + return; + bl(instr, regs.cop1.fcr31.compare); + break; + default: + Util::panic("Undefined BC COP1 {:02X}", mask_branch); + } + break; + } + regs.cop1.decode(instr); + } break; case COP2: cop2Decode(instr); @@ -387,13 +418,13 @@ void Interpreter::Exec(const u32 instr) { ll(instr); break; case LWC1: - regs.cop1.lwc1(*this, mem, instr); + regs.cop1.lwc1(mem, instr); break; case LLD: lld(instr); break; case LDC1: - regs.cop1.ldc1(*this, mem, instr); + regs.cop1.ldc1(mem, instr); break; case LD: ld(instr); @@ -402,13 +433,13 @@ void Interpreter::Exec(const u32 instr) { sc(instr); break; case SWC1: - regs.cop1.swc1(*this, mem, instr); + regs.cop1.swc1(mem, instr); break; case SCD: scd(instr); break; case SDC1: - regs.cop1.sdc1(*this, mem, instr); + regs.cop1.sdc1(mem, instr); break; case SD: sd(instr); diff --git a/src/backend/core/jit/decode.cpp b/src/backend/core/jit/decode.cpp index e0659fde..2532cf74 100644 --- a/src/backend/core/jit/decode.cpp +++ b/src/backend/core/jit/decode.cpp @@ -172,16 +172,16 @@ void JIT::regimm(const u32 instr) { // 000r_rccc switch (const u8 mask = instr >> 16 & 0x1F) { case BLTZ: - b(instr, LT, RS(instr)); + bltz(instr); break; case BGEZ: - b(instr, GE, RS(instr)); + bgez(instr); break; case BLTZL: - bl(instr, LT, RS(instr)); + bltzl(instr); break; case BGEZL: - bl(instr, GE, RS(instr)); + bgezl(instr); break; case TGEI: trap(regs.Read(RS(instr)) >= static_cast(static_cast(instr))); @@ -202,16 +202,16 @@ void JIT::regimm(const u32 instr) { trap(regs.Read(RS(instr)) != static_cast(static_cast(instr))); break; case BLTZAL: - blink(instr, LT, RS(instr)); + bltzal(instr); break; case BGEZAL: - blink(instr, GE, RS(instr)); + bgezal(instr); break; case BLTZALL: - bllink(instr, LT, RS(instr)); + bltzall(instr); break; case BGEZALL: - bllink(instr, GE, RS(instr)); + bgezall(instr); break; default: Util::panic("Unimplemented regimm {} {} ({:08X}) (pc: {:016X})", (mask >> 3) & 3, mask & 7, instr, @@ -234,16 +234,16 @@ void JIT::Emit(const u32 instr) { jal(instr); break; case BEQ: - b(instr, EQ, RS(instr), RT(instr)); + beq(instr); break; case BNE: - b(instr, NE, RS(instr), RT(instr)); + bne(instr); break; case BLEZ: - b(instr, LE, RS(instr)); + blez(instr); break; case BGTZ: - b(instr, GT, RS(instr)); + bgtz(instr); break; case ADDI: addi(instr); @@ -273,21 +273,52 @@ void JIT::Emit(const u32 instr) { regs.cop0.decode(*this, instr); break; case COP1: - regs.cop1.decode(*this, instr); + { + const u8 mask_sub = (instr >> 21) & 0x1F; + const u8 mask_branch = (instr >> 16) & 0x1F; + if (mask_sub == 0x08) { + switch (mask_branch) { + case 0: + // if (!regs.cop1.CheckFPUUsable()) + // return; + bfc0(instr); + break; + case 1: + // if (!regs.cop1.CheckFPUUsable()) + // return; + bfc1(instr); + break; + case 2: + // if (!regs.cop1.CheckFPUUsable()) + // return; + blfc0(instr); + break; + case 3: + // if (!regs.cop1.CheckFPUUsable()) + // return; + blfc1(instr); + break; + default: + Util::panic("Undefined BC COP1 {:02X}", mask_branch); + } + break; + } + regs.cop1.decode(instr); + } break; case COP2: break; case BEQL: - bl(instr, EQ, RS(instr), RT(instr)); + beql(instr); break; case BNEL: - bl(instr, NE, RS(instr), RT(instr)); + bnel(instr); break; case BLEZL: - bl(instr, LE, RS(instr)); + blezl(instr); break; case BGTZL: - bl(instr, GT, RS(instr)); + bgtzl(instr); break; case DADDI: daddi(instr); diff --git a/src/backend/core/registers/Cop1.cpp b/src/backend/core/registers/Cop1.cpp index a3b3604d..86a5acf5 100644 --- a/src/backend/core/registers/Cop1.cpp +++ b/src/backend/core/registers/Cop1.cpp @@ -12,20 +12,7 @@ void Cop1::Reset() { memset(fgr, 0, 32 * sizeof(FloatingPointReg)); } -template -void Cop1::decode(T &cpu, u32 instr) { - if constexpr (std::is_same_v) { - decodeInterp(cpu, instr); - } else { - Util::panic("What the fuck did you just give me?!"); - } -} - -template void Cop1::decode(Interpreter &, u32); -template void Cop1::decode(JIT &, u32); - -void Cop1::decodeInterp(Interpreter &cpu, u32 instr) { - +void Cop1::decode(const u32 instr) { const u8 mask_sub = (instr >> 21) & 0x1F; const u8 mask_fun = instr & 0x3F; const u8 mask_branch = (instr >> 16) & 0x1F; @@ -55,32 +42,6 @@ void Cop1::decodeInterp(Interpreter &cpu, u32 instr) { case 0x07: unimplemented(); break; - case 0x08: - switch (mask_branch) { - case 0: - if (!CheckFPUUsable()) - return; - cpu.b(instr, !fcr31.compare); - break; - case 1: - if (!CheckFPUUsable()) - return; - cpu.b(instr, fcr31.compare); - break; - case 2: - if (!CheckFPUUsable()) - return; - cpu.bl(instr, !fcr31.compare); - break; - case 3: - if (!CheckFPUUsable()) - return; - cpu.bl(instr, fcr31.compare); - break; - default: - Util::panic("Undefined BC COP1 {:02X}", mask_branch); - } - break; case 0x10: // s switch (mask_fun) { case 0x00: diff --git a/src/backend/core/registers/Cop1.hpp b/src/backend/core/registers/Cop1.hpp index 071aa902..4b5135d2 100644 --- a/src/backend/core/registers/Cop1.hpp +++ b/src/backend/core/registers/Cop1.hpp @@ -118,11 +118,12 @@ struct Cop1 { u32 fcr0{}; FCR31 fcr31{}; FloatingPointReg fgr[32]{}; + bool fgrIsConstant[32]{}; void Reset(); - template // either JIT or Interpreter - void decode(T &, u32); + void decode(u32); friend struct Interpreter; + friend struct JIT; template bool CheckFPUUsable(); @@ -159,8 +160,6 @@ private: auto FGR_S(const Cop0Status &, u32) -> T &; template auto FGR_D(const Cop0Status &, u32) -> T &; - void decodeInterp(Interpreter &, u32); - void decodeJIT(JIT &, u32); void absd(u32 instr); void abss(u32 instr); void adds(u32 instr); @@ -234,23 +233,11 @@ private: void negd(u32 instr); void sqrts(u32 instr); void sqrtd(u32 instr); - template - void lwc1(T &, Mem &, u32); - template - void swc1(T &, Mem &, u32); - template - void ldc1(T &, Mem &, u32); - template - void sdc1(T &, Mem &, u32); + void lwc1(Mem &, u32); + void swc1(Mem &, u32); + void ldc1(Mem &, u32); + void sdc1(Mem &, u32); - void lwc1Interp(Mem &, u32); - void swc1Interp(Mem &, u32); - void ldc1Interp(Mem &, u32); - void sdc1Interp(Mem &, u32); - void lwc1JIT(JIT &, Mem &, u32) { Util::panic("[JIT]: lwc1 not implemented!"); } - void swc1JIT(JIT &, Mem &, u32) { Util::panic("[JIT]: swc1 not implemented!"); } - void ldc1JIT(JIT &, Mem &, u32) { Util::panic("[JIT]: ldc1 not implemented!"); } - void sdc1JIT(JIT &, Mem &, u32) { Util::panic("[JIT]: sdc1 not implemented!"); } void mfc1(u32 instr); void dmfc1(u32 instr); void mtc1(u32 instr);