From 8fb79fefd5be8808c4dfe590c1d42fb7ea373afc Mon Sep 17 00:00:00 2001 From: Simone Date: Thu, 23 Nov 2023 10:07:02 +0100 Subject: [PATCH] JIT work --- src/backend/core/JIT.cpp | 12 +- src/backend/core/JIT.hpp | 66 +++--- src/backend/core/JIT/decode.cpp | 290 +++++++++++--------------- src/backend/core/JIT/instructions.cpp | 107 ---------- 4 files changed, 165 insertions(+), 310 deletions(-) diff --git a/src/backend/core/JIT.cpp b/src/backend/core/JIT.cpp index e58683a3..0eb4338d 100644 --- a/src/backend/core/JIT.cpp +++ b/src/backend/core/JIT.cpp @@ -83,15 +83,15 @@ _epilogue: } int JIT::Step() { - if(!blocks[regs.pc >> 20]) { - blocks[regs.pc >> 20] = (Fn*)calloc(4096, sizeof(Fn)); - blocks[regs.pc >> 20][regs.pc & 0xfff] = Recompile(); + if(!blocks[BLOCKCACHE_OUTER_INDEX(regs.pc)]) { + blocks[BLOCKCACHE_OUTER_INDEX(regs.pc)] = (Fn*)calloc(BLOCKCACHE_INNER_SIZE, 1); + blocks[BLOCKCACHE_OUTER_INDEX(regs.pc)][BLOCKCACHE_INNER_INDEX(regs.pc)] = Recompile(); } - if (!blocks[regs.pc >> 20][regs.pc & 0xfff]) { - blocks[regs.pc >> 20][regs.pc & 0xfff] = Recompile(); + if (!blocks[BLOCKCACHE_OUTER_INDEX(regs.pc)][BLOCKCACHE_INNER_INDEX(regs.pc)]) { + blocks[BLOCKCACHE_OUTER_INDEX(regs.pc)][BLOCKCACHE_INNER_INDEX(regs.pc)] = Recompile(); } - return blocks[regs.pc >> 20][regs.pc & 0xfff](); + return blocks[BLOCKCACHE_OUTER_INDEX(regs.pc)][BLOCKCACHE_INNER_INDEX(regs.pc)](); } } diff --git a/src/backend/core/JIT.hpp b/src/backend/core/JIT.hpp index a5976648..63384e10 100644 --- a/src/backend/core/JIT.hpp +++ b/src/backend/core/JIT.hpp @@ -9,6 +9,14 @@ namespace n64 { using Fn = int(*)(); #define GPR(x) qword[rdi + offsetof(Registers, gpr[(x)])] #define REG(ptr, member) ptr[rdi + offsetof(Registers, member)] +// 4KiB aligned pages +#define BLOCKCACHE_OUTER_SHIFT 12 +#define BLOCKCACHE_PAGE_SIZE (1 << BLOCKCACHE_OUTER_SHIFT) +#define BLOCKCACHE_OUTER_SIZE (0x80000000 >> BLOCKCACHE_OUTER_SHIFT) +// word aligned instructions +#define BLOCKCACHE_INNER_SIZE (BLOCKCACHE_PAGE_SIZE >> 2) +#define BLOCKCACHE_INNER_INDEX(physical) (((physical) & (BLOCKCACHE_PAGE_SIZE - 1)) >> 2) +#define BLOCKCACHE_OUTER_INDEX(physical) ((physical) >> BLOCKCACHE_OUTER_SHIFT) struct JIT : BaseCPU, Xbyak::CodeGenerator { JIT(); @@ -27,7 +35,7 @@ private: void emitMemberCall(T func, void* thisObj) { void* funcPtr; auto thisPtr = reinterpret_cast(thisObj); -/*#ifdef ABI_WINDOWS +#ifdef ABI_WINDOWS static_assert(sizeof(T) == 8, "[JIT]: Invalid size for member function pointer"); std::memcpy(&funcPtr, &func, sizeof(T)); #elif defined(ABI_UNIX) @@ -38,7 +46,7 @@ private: thisPtr += tmpArr[1]; #else Util::panic("Huh?!"); -#endif*/ +#endif push(rdi); if(thisPtr == reinterpret_cast(this)) { @@ -91,11 +99,8 @@ private: ret(); } - Fn* blocks[0x80000]{}; - - enum BranchCond { - LT, GT, GE, LE, EQ, NE - }; + static u8 codecache[1 << 25] __attribute__((aligned(4096))); + Fn* blocks[BLOCKCACHE_OUTER_SIZE]{}; u8 Read8(u64 addr) { return mem.Read(regs, addr); @@ -114,24 +119,22 @@ private: void addiu(u32); void andi(u32); void and_(u32); - void emitCondition(const std::string&, BranchCond); - template - void branch(const Xbyak::Reg64&, const T&, s64, BranchCond); - - template - void branch_likely(const Xbyak::Reg64&, const T&, s64, BranchCond); - - template - void b(u32, const Xbyak::Reg64&, const T&, BranchCond); - - template - void blink(u32, const Xbyak::Reg64&, const T&, BranchCond); - - template - void bl(u32, const Xbyak::Reg64&, const T&, BranchCond); - - template - void bllink(u32, const Xbyak::Reg64&, const T&, BranchCond); + void bltz(u32); + void bgez(u32); + void bltzl(u32); + void bgezl(u32); + void bltzal(u32); + void bgezal(u32); + void bltzall(u32); + void bgezall(u32); + void beq(u32); + void bne(u32); + void blez(u32); + void bgtz(u32); + void beql(u32); + void bnel(u32); + void blezl(u32); + void bgtzl(u32); void dadd(u32); void daddu(u32); void daddi(u32); @@ -200,7 +203,18 @@ private: void srav(u32); void srl(u32); void srlv(u32); - void trap(bool); + void tgei(u32); + void tgeiu(u32); + void tlti(u32); + void tltiu(u32); + void teqi(u32); + void tnei(u32); + void tge(u32); + void tgeu(u32); + void tlt(u32); + void tltu(u32); + void teq(u32); + void tne(u32); void or_(u32); void ori(u32); void xor_(u32); diff --git a/src/backend/core/JIT/decode.cpp b/src/backend/core/JIT/decode.cpp index 5a21cea2..b9008fa4 100644 --- a/src/backend/core/JIT/decode.cpp +++ b/src/backend/core/JIT/decode.cpp @@ -6,62 +6,62 @@ void JIT::special(u32 instr) { u8 mask = (instr & 0x3F); // 00rr_rccc switch (mask) { // TODO: named constants for clearer code - case 0: + case SLL: if (instr != 0) { sll(instr); } break; - case 0x02: srl(instr); break; - case 0x03: sra(instr); break; - case 0x04: sllv(instr); break; - case 0x06: srlv(instr); break; - case 0x07: srav(instr); break; - case 0x08: jr(instr); break; - case 0x09: jalr(instr); break; - case 0x0C: FireException(regs, ExceptionCode::Syscall, 0, regs.oldPC); break; - case 0x0D: FireException(regs, ExceptionCode::Breakpoint, 0, regs.oldPC); break; - case 0x0F: break; // SYNC - case 0x10: mfhi(instr); break; - case 0x11: mthi(instr); break; - case 0x12: mflo(instr); break; - case 0x13: mtlo(instr); break; - case 0x14: dsllv(instr); break; - case 0x16: dsrlv(instr); break; - case 0x17: dsrav(instr); break; - case 0x18: mult(instr); break; - case 0x19: multu(instr); break; - case 0x1A: div(instr); break; - case 0x1B: divu(instr); break; - case 0x1C: dmult(instr); break; - case 0x1D: dmultu(instr); break; - case 0x1E: ddiv(instr); break; - case 0x1F: ddivu(instr); break; - case 0x20: add(instr); break; - case 0x21: addu(instr); break; - case 0x22: sub(instr); break; - case 0x23: subu(instr); break; - case 0x24: and_(instr); break; - case 0x25: or_(instr); break; - case 0x26: xor_(instr); break; - case 0x27: nor(instr); break; - case 0x2A: slt(instr); break; - case 0x2B: sltu(instr); break; - case 0x2C: dadd(instr); break; - case 0x2D: daddu(instr); break; - case 0x2E: dsub(instr); break; - case 0x2F: dsubu(instr); break; - case 0x30: trap(regs.gpr[RS(instr)] >= regs.gpr[RT(instr)]); break; - case 0x31: trap((u64)regs.gpr[RS(instr)] >= (u64)regs.gpr[RT(instr)]); break; - case 0x32: trap(regs.gpr[RS(instr)] < regs.gpr[RT(instr)]); break; - case 0x33: trap((u64)regs.gpr[RS(instr)] < (u64)regs.gpr[RT(instr)]); break; - case 0x34: trap(regs.gpr[RS(instr)] == regs.gpr[RT(instr)]); break; - case 0x36: trap(regs.gpr[RS(instr)] != regs.gpr[RT(instr)]); break; - case 0x38: dsll(instr); break; - case 0x3A: dsrl(instr); break; - case 0x3B: dsra(instr); break; - case 0x3C: dsll32(instr); break; - case 0x3E: dsrl32(instr); break; - case 0x3F: dsra32(instr); break; + case SRL: srl(instr); break; + case SRA: sra(instr); break; + case SLLV: sllv(instr); break; + case SRLV: srlv(instr); break; + case SRAV: srav(instr); break; + case JR: jr(instr); break; + case JALR: jalr(instr); break; + case SYSCALL: FireException(regs, ExceptionCode::Syscall, 0, regs.oldPC); break; + case BREAK: FireException(regs, ExceptionCode::Breakpoint, 0, regs.oldPC); break; + case SYNC: break; // SYNC + case MFHI: mfhi(instr); break; + case MTHI: mthi(instr); break; + case MFLO: mflo(instr); break; + case MTLO: mtlo(instr); break; + case DSLLV: dsllv(instr); break; + case DSRLV: dsrlv(instr); break; + case DSRAV: dsrav(instr); break; + case MULT: mult(instr); break; + case MULTU: multu(instr); break; + case DIV: div(instr); break; + case DIVU: divu(instr); break; + case DMULT: dmult(instr); break; + case DMULTU: dmultu(instr); break; + case DDIV: ddiv(instr); break; + case DDIVU: ddivu(instr); break; + case ADD: add(instr); break; + case ADDU: addu(instr); break; + case SUB: sub(instr); break; + case SUBU: subu(instr); break; + case AND: and_(instr); break; + case OR: or_(instr); break; + case XOR: xor_(instr); break; + case NOR: nor(instr); break; + case SLT: slt(instr); break; + case SLTU: sltu(instr); break; + case DADD: dadd(instr); break; + case DADDU: daddu(instr); break; + case DSUB: dsub(instr); break; + case DSUBU: dsubu(instr); break; + case TGE: tge(instr); break; + case TGEU: tgeu(instr); break; + case TLT: tlt(instr); break; + case TLTU: tltu(instr); break; + case TEQ: teq(instr); break; + case TNE: tne(instr); break; + case DSLL: dsll(instr); break; + case DSRL: dsrl(instr); break; + case DSRA: dsra(instr); break; + case DSLL32: dsll32(instr); break; + case DSRL32: dsrl32(instr); break; + case DSRA32: dsra32(instr); break; default: Util::panic("Unimplemented special {} {} ({:08X}) (pc: {:016X})", (mask >> 3) & 7, mask & 7, instr, (u64)regs.oldPC); } @@ -71,44 +71,20 @@ void JIT::regimm(u32 instr) { u8 mask = ((instr >> 16) & 0x1F); // 000r_rccc switch (mask) { // TODO: named constants for clearer code - case 0x00: { - mov(rax, GPR(RS(instr))); - b(instr, rax, 0, LT); - } break; - case 0x01: { - mov(rax, GPR(RS(instr))); - b(instr, rax, 0, GE); - } break; - case 0x02: { - mov(rax, GPR(RS(instr))); - bl(instr, rax, 0, LT); - } break; - case 0x03: { - mov(rax, GPR(RS(instr))); - bl(instr, rax, 0, GE); - } break; - case 0x08: trap(regs.gpr[RS(instr)] >= s64(s16(instr))); break; - case 0x09: trap(u64(regs.gpr[RS(instr)]) >= u64(s64(s16(instr)))); break; - case 0x0A: trap(regs.gpr[RS(instr)] < s64(s16(instr))); break; - case 0x0B: trap(u64(regs.gpr[RS(instr)]) < u64(s64(s16(instr)))); break; - case 0x0C: trap(regs.gpr[RS(instr)] == s64(s16(instr))); break; - case 0x0E: trap(regs.gpr[RS(instr)] != s64(s16(instr))); break; - case 0x10: { - mov(rax, GPR(RS(instr))); - blink(instr, rax, 0, LT); - } break; - case 0x11: { - mov(rax, GPR(RS(instr))); - blink(instr, rax, 0, GE); - } break; - case 0x12: { - mov(rax, GPR(RS(instr))); - bllink(instr, rax, 0, LT); - } break; - case 0x13: { - mov(rax, GPR(RS(instr))); - bllink(instr, rax, 0, GE); - } break; + case BLTZ: bltz(instr); break; + case BGEZ: bgez(instr); break; + case BLTZL: bltzl(instr); break; + case BGEZL: bgezl(instr); break; + case TGEI: tgei(instr); break; + case TGEIU: tgeiu(instr); break; + case TLTI: tlti(instr); break; + case TLTIU: tltiu(instr); break; + case TEQI: teqi(instr); break; + case TNEI: tnei(instr); break; + case BLTZAL: bltzal(instr); break; + case BGEZAL: bgezal(instr); break; + case BLTZALL: bltzall(instr); break; + case BGEZALL: bgezall(instr); break; default: Util::panic("Unimplemented regimm {} {} ({:08X}) (pc: {:016X})", (mask >> 3) & 3, mask & 7, instr, (u64)regs.oldPC); } @@ -135,88 +111,60 @@ void JIT::Emit(u32 instr) { u8 mask = (instr >> 26) & 0x3f; // 00rr_rccc switch(mask) { // TODO: named constants for clearer code - case 0x00: special(instr); break; - case 0x01: regimm(instr); break; - case 0x02: j(instr); break; - case 0x03: jal(instr); break; - case 0x04: { - mov(rax, GPR(RS(instr))); - mov(rcx, GPR(RT(instr))); - b(instr, rax, rcx, EQ); - } break; - case 0x05: { - mov(rax, GPR(RS(instr))); - mov(rcx, GPR(RT(instr))); - b(instr, rax, rcx, NE); - } break; - case 0x06: { - mov(rax, GPR(RS(instr))); - b(instr, rax, 0, LE); - } break; - case 0x07: { - mov(rax, GPR(RS(instr))); - b(instr, rax, 0, GT); - } break; - case 0x08: addi(instr); break; - case 0x09: addiu(instr); break; - case 0x0A: slti(instr); break; - case 0x0B: sltiu(instr); break; - case 0x0C: andi(instr); break; - case 0x0D: ori(instr); break; - case 0x0E: xori(instr); break; - case 0x0F: lui(instr); break; - case 0x10: regs.cop0.decode(*this, instr); break; - case 0x11: regs.cop1.decode(*this, instr); break; - case 0x12: cop2Decode(instr); break; - case 0x14: { - mov(rax, GPR(RS(instr))); - mov(rcx, GPR(RT(instr))); - bl(instr, rax, rcx, EQ); - } break; - case 0x15: { - mov(rax, GPR(RS(instr))); - mov(rcx, GPR(RT(instr))); - bl(instr, rax, rcx, NE); - } break; - case 0x16: { - mov(rax, GPR(RS(instr))); - bl(instr, rax, 0, LE); - } break; - case 0x17: { - mov(rax, GPR(RS(instr))); - bl(instr, rax, 0, GT); - } break; - case 0x18: daddi(instr); break; - case 0x19: daddiu(instr); break; - case 0x1A: ldl(instr); break; - case 0x1B: ldr(instr); break; + case SPECIAL: special(instr); break; + case REGIMM: regimm(instr); break; + case J: j(instr); break; + case JAL: jal(instr); break; + case BEQ: beq(instr); break; + case BNE: bne(instr); break; + case BLEZ: blez(instr); break; + case BGTZ: bgtz(instr); break; + case ADDI: addi(instr); break; + case ADDIU: addiu(instr); break; + case SLTI: slti(instr); break; + case SLTIU: sltiu(instr); break; + case ANDI: andi(instr); break; + case ORI: ori(instr); break; + case XORI: xori(instr); break; + case LUI: lui(instr); break; + case COP0: regs.cop0.decode(*this, instr); break; + case COP1: regs.cop1.decode(*this, instr); break; + case COP2: cop2Decode(instr); break; + case BEQL: beql(instr); break; + case BNEL: bnel(instr); break; + case BLEZL: blezl(instr); break; + case BGTZL: bgtzl(instr); break; + case DADDI: daddi(instr); break; + case DADDIU: daddiu(instr); break; + case LDL: ldl(instr); break; + case LDR: ldr(instr); break; case 0x1F: FireException(regs, ExceptionCode::ReservedInstruction, 0, regs.oldPC); break; - case 0x20: lb(instr); break; - case 0x21: lh(instr); break; - case 0x22: lwl(instr); break; - case 0x23: lw(instr); break; - case 0x24: lbu(instr); break; - case 0x25: lhu(instr); break; - case 0x26: lwr(instr); break; - case 0x27: lwu(instr); break; - case 0x28: sb(instr); break; - case 0x29: sh(instr); break; - case 0x2A: swl(instr); break; - case 0x2B: sw(instr); break; - case 0x2C: sdl(instr); break; - case 0x2D: sdr(instr); break; - case 0x2E: swr(instr); break; - case 0x2F: break; // CACHE - case 0x30: ll(instr); break; - case 0x31: regs.cop1.lwc1(*this, mem, instr); break; - case 0x34: lld(instr); break; - case 0x35: regs.cop1.ldc1(*this, mem, instr); break; - case 0x37: ld(instr); break; - case 0x38: sc(instr); break; - case 0x39: regs.cop1.swc1(*this, mem, instr); break; - case 0x3C: scd(instr); break; - case 0x3D: regs.cop1.sdc1(*this, mem, instr); break; - case 0x3F: sd(instr); break; + case LB: lb(instr); break; + case LH: lh(instr); break; + case LWL: lwl(instr); break; + case LW: lw(instr); break; + case LBU: lbu(instr); break; + case LHU: lhu(instr); break; + case LWR: lwr(instr); break; + case LWU: lwu(instr); break; + case SB: sb(instr); break; + case SH: sh(instr); break; + case SWL: swl(instr); break; + case SW: sw(instr); break; + case SDL: sdl(instr); break; + case SDR: sdr(instr); break; + case SWR: swr(instr); break; + case CACHE: break; // CACHE + case LL: ll(instr); break; + case LWC1: regs.cop1.lwc1(*this, mem, instr); break; + case LLD: lld(instr); break; + case LDC1: regs.cop1.ldc1(*this, mem, instr); break; + case LD: ld(instr); break; + case SC: sc(instr); break; + case SWC1: regs.cop1.swc1(*this, mem, instr); break; + case SCD: scd(instr); break; + case SDC1: regs.cop1.sdc1(*this, mem, instr); break; + case SD: sd(instr); break; default: Util::panic("Unimplemented instruction {:02X} ({:08X}) (pc: {:016X})", mask, instr, (u64)regs.oldPC); } diff --git a/src/backend/core/JIT/instructions.cpp b/src/backend/core/JIT/instructions.cpp index d3301fc6..8411ae1d 100644 --- a/src/backend/core/JIT/instructions.cpp +++ b/src/backend/core/JIT/instructions.cpp @@ -143,107 +143,6 @@ void JIT::ddivu(u32 instr) { L("ddivu_exit"); } -void JIT::emitCondition(const std::string& name, BranchCond cond) { - switch(cond) { - case LT: - jnl(name); - break; - case GT: - jng(name); - break; - case GE: - jnge(name); - break; - case LE: - jnle(name); - break; - case EQ: - jne(name); - break; - case NE: - je(name); - break; - } -} - -template -void JIT::branch(const Xbyak::Reg64& op1, const T& op2, s64 offset, BranchCond cond) { - cmp(op1, op2); - emitCondition("branch_false", cond); - - mov(byte[rdi + offsetof(Registers, delaySlot)], 1); - mov(rax, qword[rdi + offsetof(Registers, pc)]); - CodeGenerator::add(rax, offset); - mov(qword[rdi + offsetof(Registers, nextPC)], rax); - L("branch_false"); -} - -template void JIT::branch(const Xbyak::Reg64& op1, const Xbyak::Reg64& op2, s64 offset, BranchCond cond); -template void JIT::branch(const Xbyak::Reg64& op1, const int& op2, s64 offset, BranchCond cond); - -template -void JIT::branch_likely(const Xbyak::Reg64& op1, const T& op2, s64 offset, BranchCond cond) { - mov(rax, qword[rdi + offsetof(Registers, pc)]); - cmp(op1, op2); - emitCondition("branch_likely_false", cond); - - mov(byte[rdi + offsetof(Registers, delaySlot)], 1); - CodeGenerator::add(rax, offset); - mov(qword[rdi + offsetof(Registers, nextPC)], rax); - jmp("branch_likely_exit"); - - L("branch_likely_false"); - mov(qword[rdi + offsetof(Registers, oldPC)], rax); - mov(rcx, qword[rdi + offsetof(Registers, nextPC)]); - mov(qword[rdi + offsetof(Registers, pc)], rcx); - CodeGenerator::add(rcx, 4); - mov(qword[rdi + offsetof(Registers, nextPC)], rcx); - L("branch_likely_exit"); -} - -template void JIT::branch_likely(const Xbyak::Reg64& op1, const Xbyak::Reg64& op2, s64 offset, BranchCond cond); -template void JIT::branch_likely(const Xbyak::Reg64& op1, const int& op2, s64 offset, BranchCond cond); - -template -void JIT::b(u32 instr, const Xbyak::Reg64& op1, const T& op2, BranchCond cond) { - s16 imm = instr; - s64 offset = u64((s64)imm) << 2; - branch(op1, op2, offset, cond); -} -template void JIT::b(u32 instr, const Xbyak::Reg64& op1, const Xbyak::Reg64& op2, BranchCond cond); -template void JIT::b(u32 instr, const Xbyak::Reg64& op1, const int& op2, BranchCond cond); - -template -void JIT::blink(u32 instr, const Xbyak::Reg64& op1, const T& op2, BranchCond cond) { - s16 imm = instr; - s64 offset = u64((s64)imm) << 2; - mov(rcx, qword[rdi + offsetof(Registers, nextPC)]); - mov(GPR(31), rcx); - branch(op1, op2, offset, cond); -} -template void JIT::blink(u32 instr, const Xbyak::Reg64& op1, const Xbyak::Reg64& op2, BranchCond cond); -template void JIT::blink(u32 instr, const Xbyak::Reg64& op1, const int& op2, BranchCond cond); - -template -void JIT::bl(u32 instr, const Xbyak::Reg64& op1, const T& op2, BranchCond cond) { - s16 imm = instr; - s64 offset = u64((s64)imm) << 2; - branch_likely(op1, op2, offset, cond); -} -template void JIT::bl(u32 instr, const Xbyak::Reg64& op1, const Xbyak::Reg64& op2, BranchCond cond); -template void JIT::bl(u32 instr, const Xbyak::Reg64& op1, const int& op2, BranchCond cond); - -template -void JIT::bllink(u32 instr, const Xbyak::Reg64& op1, const T& op2, BranchCond cond) { - mov(rcx, qword[rdi + offsetof(Registers, nextPC)]); - mov(GPR(31), rcx); - s16 imm = instr; - s64 offset = u64((s64)imm) << 2; - branch_likely(op1, op2, offset, cond); -} -template void JIT::bllink(u32 instr, const Xbyak::Reg64& op1, const Xbyak::Reg64& op2, BranchCond cond); -template void JIT::bllink(u32 instr, const Xbyak::Reg64& op1, const int& op2, BranchCond cond); - void JIT::lui(u32 instr) { u64 val = s64(s16(instr)); val <<= 16; @@ -1003,12 +902,6 @@ void JIT::mthi(u32 instr) { regs.hi = regs.gpr[RS(instr)]; } -void JIT::trap(bool cond) { - if(cond) { - FireException(regs, ExceptionCode::Trap, 0, regs.oldPC); - } -} - void JIT::mtc2(u32 instr) { }