More JIT work

This commit is contained in:
CocoSimone
2023-01-05 01:20:34 +01:00
parent 64630ea06b
commit 60d0dd2c31
7 changed files with 134 additions and 158 deletions

View File

@@ -15,7 +15,7 @@ Dynarec::Dynarec() : code(DEFAULT_MAX_CODE_SIZE, AutoGrow) {
}
dumpCode.open("jit.dump", std::ios::app | std::ios::binary);
code.setProtectMode(CodeGenerator::PROTECT_RWE);
code.ready();
}
void Dynarec::Recompile(Registers& regs, Mem& mem) {
@@ -23,7 +23,7 @@ void Dynarec::Recompile(Registers& regs, Mem& mem) {
u32 start_addr = regs.pc;
Fn block = code.getCurr<Fn>();
code.sub(code.rsp, 8);
code.sub(rsp, 8);
while(!prevBranch) {
instrInBlock++;
@@ -32,11 +32,11 @@ void Dynarec::Recompile(Registers& regs, Mem& mem) {
start_addr += 4;
code.mov(code.rdi, (u64)&regs);
code.mov(rdi, (u64)&regs);
branch = Exec(regs, mem, instr);
}
code.add(code.rsp, 8);
code.add(rsp, 8);
code.ret();
dumpCode.write(code.getCode<char*>(), code.getSize());
u32 pc = regs.pc & 0xffffffff;

View File

@@ -8,6 +8,9 @@ using namespace Xbyak;
using namespace Xbyak::util;
using Fn = void (*)();
#define GPR_OFFSET(x) ((uintptr_t)&regs.gpr[(x)] - (uintptr_t)&regs)
#define REG_OFFSET(kind) ((uintptr_t)&regs.kind - (uintptr_t)&regs)
struct Dynarec {
Dynarec();
~Dynarec();

View File

@@ -4,8 +4,6 @@
#include <Registers.hpp>
namespace n64::JIT {
#define GPR_OFFSET(x) ((uintptr_t)&regs.gpr[(x)] - (uintptr_t)&regs)
void Dynarec::cop2Decode(n64::Registers& regs, u32 instr) {
code.mov(rdi, (u64)this);
code.mov(rsi, (u64)&regs);
@@ -233,67 +231,61 @@ bool Dynarec::special(n64::Registers& regs, u32 instr) {
code.call(rax);
break;
case 0x30:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovge(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setge(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x31:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovae(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setae(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x32:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovl(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setl(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x33:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovb(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setb(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x34:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmove(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.sete(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
break;
case 0x36:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovne(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, rcx);
code.setne(sil);
code.mov(rax, (u64)trap);
code.call(rax);
res = true;
@@ -341,141 +333,121 @@ bool Dynarec::regimm(n64::Registers& regs, u32 instr) {
switch (mask) { // TODO: named constants for clearer code
case 0x00:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setl(dl);
code.mov(rax, (u64)b);
code.call(rax);
break;
case 0x01:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setge(dl);
code.mov(rax, (u64)b);
code.call(rax);
break;
case 0x02:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setl(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x03:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setge(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x08:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmovge(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.setge(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x09:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
u64(s64(s16(instr))));
code.cmovae(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, u64(s64(s16(instr))));
code.setae(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0A:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmovl(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.setl(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0B:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
u64(s64(s16(instr))));
code.cmovb(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, u64(s64(s16(instr))));
code.setb(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0C:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmove(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.sete(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x0E:
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
s64(s16(instr)));
code.cmovne(cl, ch);
code.mov(rsi, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rsi, rsi);
code.cmp(r8, s64(s16(instr)));
code.setne(sil);
code.mov(rax, (u64)trap);
code.call(rax);
break;
case 0x10:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setl(dl);
code.mov(rax, (u64)blink);
code.call(rax);
break;
case 0x11:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setge(dl);
code.mov(rax, (u64)blink);
code.call(rax);
break;
case 0x12:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovl(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setl(dl);
code.mov(rax, (u64)bllink);
code.call(rax);
break;
case 0x13:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovge(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(rcx, 0);
code.setge(dl);
code.mov(rax, (u64)bllink);
code.call(rax);
break;
@@ -508,10 +480,10 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x04:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(rbx, rcx);
code.cmp(r8, rcx);
code.sete(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -519,10 +491,10 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x05:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(rbx, rcx);
code.cmp(r8, rcx);
code.setne(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -530,9 +502,9 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x06:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.test(rbx, rbx);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.test(r8, r8);
code.setnz(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -540,9 +512,9 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
break;
case 0x07:
code.mov(rsi, (u64)instr);
code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]);
code.test(rbx, rbx);
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.test(r8, r8);
code.setg(dl);
code.mov(rax, (u64)b);
code.call(rax);
@@ -593,43 +565,39 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) {
case 0x12: cop2Decode(regs, instr); break;
case 0x14:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmove(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, rcx);
code.sete(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x15:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))],
qword[rdi + GPR_OFFSET(RT(instr))]);
code.cmovne(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, rcx);
code.setne(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x16:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovle(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setle(dl);
code.mov(rax, (u64)bl);
code.call(rax);
break;
case 0x17:
code.mov(rsi, (u64)instr);
code.mov(cl, 0);
code.mov(ch, 1);
code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0);
code.cmovg(cl, ch);
code.mov(rdx, cl.cvt64());
code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]);
code.xor_(rdx, rdx);
code.cmp(r8, 0);
code.setg(dl);
code.mov(rax, (u64)b);
code.call(rax);
break;

View File

@@ -148,14 +148,14 @@ void ddivu(Registers& regs, u32 instr) {
void branch(Registers& regs, bool cond, s64 address) {
regs.delaySlot = true;
if (cond) {
regs.nextPC = address;
regs.pc = address;
}
}
void branch_likely(Registers& regs, bool cond, s64 address) {
regs.delaySlot = true;
if (cond) {
regs.nextPC = address;
regs.pc = address;
} else {
regs.SetPC(regs.nextPC);
}

View File

@@ -13,6 +13,11 @@ using namespace nlohmann;
namespace fs = std::filesystem;
GameList::GameList(const std::string& path) {
Create(path);
}
void GameList::Create(const std::string &path) {
threadDone = false;
if(!path.empty()) {
std::thread searchThread([path, this]() {
std::ifstream gameDbFile("resources/db.json");
@@ -20,6 +25,10 @@ GameList::GameList(const std::string& path) {
std::vector<u8> rom{};
for(const auto& p : fs::recursive_directory_iterator{path}) {
const auto filename = p.path().string();
if(threadDone) {
gamesList.clear();
break;
}
if(p.path().extension() == ".n64" || p.path().extension() == ".z64" || p.path().extension() == ".v64" ||
p.path().extension() == ".N64" || p.path().extension() == ".Z64" || p.path().extension() == ".V64") {
std::ifstream file(filename, std::ios::binary);

View File

@@ -3,13 +3,6 @@
#include <string>
#include <atomic>
enum GameInfoID {
Name_ID,
Region_ID,
Size_ID,
Status_ID
};
struct GameInfo {
std::string name, region, size, status, path;
};
@@ -18,10 +11,11 @@ struct GameList {
GameList(const std::string&);
~GameList() = default;
void Create(const std::string&);
bool RenderWidget(float, std::string&);
[[nodiscard]] std::vector<GameInfo> GetGamesList() const { return gamesList; }
std::atomic_bool threadDone = false;
private:
std::vector<GameInfo> gamesList{}, notMatch{};
std::atomic_bool threadDone = false;
};

View File

@@ -160,6 +160,7 @@ DrawData Window::Present(n64::Core& core) {
void Window::LoadROM(n64::Core& core, const std::string &path) {
if(!path.empty()) {
gameList.threadDone = true;
n64::CartInfo cartInfo = core.LoadROM(path);
std::ifstream gameDbFile("resources/db.json");
json gameDb = json::parse(gameDbFile);
@@ -243,6 +244,7 @@ float Window::Render(n64::Core& core) {
core.rom.clear();
Util::UpdateRPC(Util::Idling);
SDL_SetWindowTitle(window, windowTitle.c_str());
gameList.Create(settings.GetGamesDir());
core.Stop();
}
if (ImGui::MenuItem(core.pause ? "Resume" : "Pause", nullptr, false, core.romLoaded)) {