More JIT work

This commit is contained in:
CocoSimone
2023-01-05 01:20:34 +01:00
parent 64630ea06b
commit 60d0dd2c31
7 changed files with 134 additions and 158 deletions

View File

@@ -15,7 +15,7 @@ Dynarec::Dynarec() : code(DEFAULT_MAX_CODE_SIZE, AutoGrow) {
}
dumpCode.open("jit.dump", std::ios::app | std::ios::binary);
code.setProtectMode(CodeGenerator::PROTECT_RWE);
code.ready();
}
void Dynarec::Recompile(Registers& regs, Mem& mem) {
@@ -23,7 +23,7 @@ void Dynarec::Recompile(Registers& regs, Mem& mem) {
u32 start_addr = regs.pc;
Fn block = code.getCurr<Fn>();
code.sub(code.rsp, 8);
code.sub(rsp, 8);
while(!prevBranch) {
instrInBlock++;
@@ -32,11 +32,11 @@ void Dynarec::Recompile(Registers& regs, Mem& mem) {
start_addr += 4;
code.mov(code.rdi, (u64)&regs);
code.mov(rdi, (u64)&regs);
branch = Exec(regs, mem, instr);
}
code.add(code.rsp, 8);
code.add(rsp, 8);
code.ret();
dumpCode.write(code.getCode<char*>(), code.getSize());
u32 pc = regs.pc & 0xffffffff;

View File

@@ -8,6 +8,9 @@ using namespace Xbyak;
using namespace Xbyak::util;
using Fn = void (*)();
#define GPR_OFFSET(x) ((uintptr_t)&regs.gpr[(x)] - (uintptr_t)&regs)
#define REG_OFFSET(kind) ((uintptr_t)&regs.kind - (uintptr_t)&regs)
struct Dynarec {
Dynarec();
~Dynarec();

View File

@@ -4,8 +4,6 @@
#include <Registers.hpp>
namespace n64::JIT {
#define GPR_OFFSET(x) ((uintptr_t)&regs.gpr[(x)] - (uintptr_t)&regs)
void Dynarec::cop2Decode(n64::Registers& regs, u32 instr) {
code.mov(rdi, (u64)this);
code.mov(rsi, (u64)&regs);
@@ -233,67 +231,61 @@ bool Dynarec::special(n64::Registers& regs, u32 instr) {
code.call(rax);
break;
case 0x30:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovge(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setge(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x31:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovae(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setae(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x32:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovl(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setl(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x33:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovb(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setb(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x34:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmove(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.sete(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x36:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovne(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setne(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
@@ -341,141 +333,121 @@ bool Dynarec::regimm(n64::Registers& regs, u32 instr) {
switch (mask) { // TODO: named constants for clearer code
case 0x00:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setl(dl);
code.mov(rax, (u64)b);
code.call(rax);
break;
case 0x01:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setge(dl);
code.mov(rax, (u64)b);
code.call(rax);
break;
case 0x02:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setl(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x03:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setge(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x08:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmovge(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.setge(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x09:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
u64(s64(s16(instr))));
code.cmovae(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, u64(s64(s16(instr))));
code.setae(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0A:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmovl(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.setl(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0B:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
u64(s64(s16(instr))));
code.cmovb(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, u64(s64(s16(instr))));
code.setb(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0C:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmove(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.sete(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0E:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmovne(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.setne(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x10:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setl(dl);
code.mov(rax, (u64)blink);
code.call(rax);
break;
case 0x11:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setge(dl);
code.mov(rax, (u64)blink);
code.call(rax);
break;
case 0x12:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setl(dl);
code.mov(rax, (u64)bllink);
code.call(rax);
break;
case 0x13:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setge(dl);
code.mov(rax, (u64)bllink);
code.call(rax);
break;
@@ -508,10 +480,10 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x04:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(rbx, rcx);
code.cmp(r8, rcx);
code.sete(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -519,10 +491,10 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x05:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(rbx, rcx);
code.cmp(r8, rcx);
code.setne(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -530,9 +502,9 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x06:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.test(rbx, rbx);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.test(r8, r8);
code.setnz(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -540,9 +512,9 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x07:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.test(rbx, rbx);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.test(r8, r8);
code.setg(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -593,43 +565,39 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
case 0x12: cop2Decode(regs, instr); break;
case 0x14:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmove(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, rcx);
code.sete(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x15:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovne(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, rcx);
code.setne(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x16:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovle(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setle(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x17:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovg(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setg(dl);
code.mov(rax, (u64)b);
code.call(rax);
break;

View File

@@ -148,14 +148,14 @@ void ddivu(Registers& regs, u32 instr) {
void branch(Registers& regs, bool cond, s64 address) {
regs.delaySlot = true;
if (cond) {
regs.nextPC = address;
regs.pc = address;
}
}
void branch_likely(Registers& regs, bool cond, s64 address) {
regs.delaySlot = true;
if (cond) {
regs.nextPC = address;
regs.pc = address;
} else {
regs.SetPC(regs.nextPC);
}