From 60d0dd2c3174dd4b96682d70a6e945d0cbb05bed Mon Sep 17 00:00:00 2001 From: CocoSimone Date: Thu, 5 Jan 2023 01:20:34 +0100 Subject: [PATCH] More JIT work --- src/backend/core/Dynarec.cpp | 8 +- src/backend/core/Dynarec.hpp | 3 + src/backend/core/dynarec/decode.cpp | 256 ++++++++++------------ src/backend/core/dynarec/instructions.cpp | 4 +- src/frontend/imgui/GameList.cpp | 9 + src/frontend/imgui/GameList.hpp | 10 +- src/frontend/imgui/Window.cpp | 2 + 7 files changed, 134 insertions(+), 158 deletions(-) diff --git a/src/backend/core/Dynarec.cpp b/src/backend/core/Dynarec.cpp index 6bfaecb8..3d021333 100644 --- a/src/backend/core/Dynarec.cpp +++ b/src/backend/core/Dynarec.cpp @@ -15,7 +15,7 @@ Dynarec::Dynarec() : code(DEFAULT_MAX_CODE_SIZE, AutoGrow) { } dumpCode.open("jit.dump", std::ios::app | std::ios::binary); - code.setProtectMode(CodeGenerator::PROTECT_RWE); + code.ready(); } void Dynarec::Recompile(Registers& regs, Mem& mem) { @@ -23,7 +23,7 @@ void Dynarec::Recompile(Registers& regs, Mem& mem) { u32 start_addr = regs.pc; Fn block = code.getCurr(); - code.sub(code.rsp, 8); + code.sub(rsp, 8); while(!prevBranch) { instrInBlock++; @@ -32,11 +32,11 @@ void Dynarec::Recompile(Registers& regs, Mem& mem) { start_addr += 4; - code.mov(code.rdi, (u64)®s); + code.mov(rdi, (u64)®s); branch = Exec(regs, mem, instr); } - code.add(code.rsp, 8); + code.add(rsp, 8); code.ret(); dumpCode.write(code.getCode(), code.getSize()); u32 pc = regs.pc & 0xffffffff; diff --git a/src/backend/core/Dynarec.hpp b/src/backend/core/Dynarec.hpp index 6e3672cc..2d220426 100644 --- a/src/backend/core/Dynarec.hpp +++ b/src/backend/core/Dynarec.hpp @@ -8,6 +8,9 @@ using namespace Xbyak; using namespace Xbyak::util; using Fn = void (*)(); +#define GPR_OFFSET(x) ((uintptr_t)®s.gpr[(x)] - (uintptr_t)®s) +#define REG_OFFSET(kind) ((uintptr_t)®s.kind - (uintptr_t)®s) + struct Dynarec { Dynarec(); ~Dynarec(); diff --git a/src/backend/core/dynarec/decode.cpp b/src/backend/core/dynarec/decode.cpp index 34647a22..9957d4fe 100644 --- a/src/backend/core/dynarec/decode.cpp +++ b/src/backend/core/dynarec/decode.cpp @@ -4,8 +4,6 @@ #include namespace n64::JIT { -#define GPR_OFFSET(x) ((uintptr_t)®s.gpr[(x)] - (uintptr_t)®s) - void Dynarec::cop2Decode(n64::Registers& regs, u32 instr) { code.mov(rdi, (u64)this); code.mov(rsi, (u64)®s); @@ -233,67 +231,61 @@ bool Dynarec::special(n64::Registers& regs, u32 instr) { code.call(rax); break; case 0x30: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmovge(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, rcx); + code.setge(sil); code.mov(rax, (u64)trap); code.call(rax); res = true; break; case 0x31: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmovae(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, rcx); + code.setae(sil); code.mov(rax, (u64)trap); code.call(rax); res = true; break; case 0x32: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmovl(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, rcx); + code.setl(sil); code.mov(rax, (u64)trap); code.call(rax); res = true; break; case 0x33: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmovb(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, rcx); + code.setb(sil); code.mov(rax, (u64)trap); code.call(rax); res = true; break; case 0x34: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmove(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, rcx); + code.sete(sil); code.mov(rax, (u64)trap); code.call(rax); res = true; break; case 0x36: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmovne(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, rcx); + code.setne(sil); code.mov(rax, (u64)trap); code.call(rax); res = true; @@ -341,141 +333,121 @@ bool Dynarec::regimm(n64::Registers& regs, u32 instr) { switch (mask) { // TODO: named constants for clearer code case 0x00: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovl(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, 0); + code.setl(dl); code.mov(rax, (u64)b); code.call(rax); break; case 0x01: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovge(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, 0); + code.setge(dl); code.mov(rax, (u64)b); code.call(rax); break; case 0x02: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovl(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, 0); + code.setl(dl); code.mov(rax, (u64)bl); code.call(rax); break; case 0x03: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovge(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, 0); + code.setge(dl); code.mov(rax, (u64)bl); code.call(rax); break; case 0x08: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - s64(s16(instr))); - code.cmovge(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, s64(s16(instr))); + code.setge(sil); code.mov(rax, (u64)trap); code.call(rax); break; case 0x09: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - u64(s64(s16(instr)))); - code.cmovae(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, u64(s64(s16(instr)))); + code.setae(sil); code.mov(rax, (u64)trap); code.call(rax); break; case 0x0A: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - s64(s16(instr))); - code.cmovl(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, s64(s16(instr))); + code.setl(sil); code.mov(rax, (u64)trap); code.call(rax); break; case 0x0B: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - u64(s64(s16(instr)))); - code.cmovb(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, u64(s64(s16(instr)))); + code.setb(sil); code.mov(rax, (u64)trap); code.call(rax); break; case 0x0C: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - s64(s16(instr))); - code.cmove(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, s64(s16(instr))); + code.sete(sil); code.mov(rax, (u64)trap); code.call(rax); break; case 0x0E: - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - s64(s16(instr))); - code.cmovne(cl, ch); - code.mov(rsi, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rsi, rsi); + code.cmp(r8, s64(s16(instr))); + code.setne(sil); code.mov(rax, (u64)trap); code.call(rax); break; case 0x10: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovl(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(rcx, 0); + code.setl(dl); code.mov(rax, (u64)blink); code.call(rax); break; case 0x11: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovge(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(rcx, 0); + code.setge(dl); code.mov(rax, (u64)blink); code.call(rax); break; case 0x12: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovl(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(rcx, 0); + code.setl(dl); code.mov(rax, (u64)bllink); code.call(rax); break; case 0x13: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovge(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(rcx, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(rcx, 0); + code.setge(dl); code.mov(rax, (u64)bllink); code.call(rax); break; @@ -508,10 +480,10 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) { break; case 0x04: code.mov(rsi, (u64)instr); - code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); code.xor_(rdx, rdx); - code.cmp(rbx, rcx); + code.cmp(r8, rcx); code.sete(dl); code.mov(rax, (u64)b); code.call(rax); @@ -519,10 +491,10 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) { break; case 0x05: code.mov(rsi, (u64)instr); - code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); code.xor_(rdx, rdx); - code.cmp(rbx, rcx); + code.cmp(r8, rcx); code.setne(dl); code.mov(rax, (u64)b); code.call(rax); @@ -530,9 +502,9 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) { break; case 0x06: code.mov(rsi, (u64)instr); - code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]); - code.test(rbx, rbx); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); code.xor_(rdx, rdx); + code.test(r8, r8); code.setnz(dl); code.mov(rax, (u64)b); code.call(rax); @@ -540,9 +512,9 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) { break; case 0x07: code.mov(rsi, (u64)instr); - code.mov(rbx, qword[rdi + GPR_OFFSET(RS(instr))]); - code.test(rbx, rbx); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); code.xor_(rdx, rdx); + code.test(r8, r8); code.setg(dl); code.mov(rax, (u64)b); code.call(rax); @@ -593,43 +565,39 @@ bool Dynarec::Exec(n64::Registers& regs, Mem& mem, u32 instr) { case 0x12: cop2Decode(regs, instr); break; case 0x14: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmove(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, rcx); + code.sete(dl); code.mov(rax, (u64)bl); code.call(rax); break; case 0x15: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], - qword[rdi + GPR_OFFSET(RT(instr))]); - code.cmovne(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.mov(rcx, qword[rdi + GPR_OFFSET(RT(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, rcx); + code.setne(dl); code.mov(rax, (u64)bl); code.call(rax); break; case 0x16: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovle(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, 0); + code.setle(dl); code.mov(rax, (u64)bl); code.call(rax); break; case 0x17: code.mov(rsi, (u64)instr); - code.mov(cl, 0); - code.mov(ch, 1); - code.cmp(qword[rdi + GPR_OFFSET(RS(instr))], 0); - code.cmovg(cl, ch); - code.mov(rdx, cl.cvt64()); + code.mov(r8, qword[rdi + GPR_OFFSET(RS(instr))]); + code.xor_(rdx, rdx); + code.cmp(r8, 0); + code.setg(dl); code.mov(rax, (u64)b); code.call(rax); break; diff --git a/src/backend/core/dynarec/instructions.cpp b/src/backend/core/dynarec/instructions.cpp index 87481d18..b72036f9 100644 --- a/src/backend/core/dynarec/instructions.cpp +++ b/src/backend/core/dynarec/instructions.cpp @@ -148,14 +148,14 @@ void ddivu(Registers& regs, u32 instr) { void branch(Registers& regs, bool cond, s64 address) { regs.delaySlot = true; if (cond) { - regs.nextPC = address; + regs.pc = address; } } void branch_likely(Registers& regs, bool cond, s64 address) { regs.delaySlot = true; if (cond) { - regs.nextPC = address; + regs.pc = address; } else { regs.SetPC(regs.nextPC); } diff --git a/src/frontend/imgui/GameList.cpp b/src/frontend/imgui/GameList.cpp index 90f38e0f..2aacfc93 100644 --- a/src/frontend/imgui/GameList.cpp +++ b/src/frontend/imgui/GameList.cpp @@ -13,6 +13,11 @@ using namespace nlohmann; namespace fs = std::filesystem; GameList::GameList(const std::string& path) { + Create(path); +} + +void GameList::Create(const std::string &path) { + threadDone = false; if(!path.empty()) { std::thread searchThread([path, this]() { std::ifstream gameDbFile("resources/db.json"); @@ -20,6 +25,10 @@ GameList::GameList(const std::string& path) { std::vector rom{}; for(const auto& p : fs::recursive_directory_iterator{path}) { const auto filename = p.path().string(); + if(threadDone) { + gamesList.clear(); + break; + } if(p.path().extension() == ".n64" || p.path().extension() == ".z64" || p.path().extension() == ".v64" || p.path().extension() == ".N64" || p.path().extension() == ".Z64" || p.path().extension() == ".V64") { std::ifstream file(filename, std::ios::binary); diff --git a/src/frontend/imgui/GameList.hpp b/src/frontend/imgui/GameList.hpp index c2c06fbf..5ac67588 100644 --- a/src/frontend/imgui/GameList.hpp +++ b/src/frontend/imgui/GameList.hpp @@ -3,13 +3,6 @@ #include #include -enum GameInfoID { - Name_ID, - Region_ID, - Size_ID, - Status_ID -}; - struct GameInfo { std::string name, region, size, status, path; }; @@ -18,10 +11,11 @@ struct GameList { GameList(const std::string&); ~GameList() = default; + void Create(const std::string&); bool RenderWidget(float, std::string&); [[nodiscard]] std::vector GetGamesList() const { return gamesList; } + std::atomic_bool threadDone = false; private: std::vector gamesList{}, notMatch{}; - std::atomic_bool threadDone = false; }; diff --git a/src/frontend/imgui/Window.cpp b/src/frontend/imgui/Window.cpp index 4eaed421..710a6bc6 100644 --- a/src/frontend/imgui/Window.cpp +++ b/src/frontend/imgui/Window.cpp @@ -160,6 +160,7 @@ DrawData Window::Present(n64::Core& core) { void Window::LoadROM(n64::Core& core, const std::string &path) { if(!path.empty()) { + gameList.threadDone = true; n64::CartInfo cartInfo = core.LoadROM(path); std::ifstream gameDbFile("resources/db.json"); json gameDb = json::parse(gameDbFile); @@ -243,6 +244,7 @@ float Window::Render(n64::Core& core) { core.rom.clear(); Util::UpdateRPC(Util::Idling); SDL_SetWindowTitle(window, windowTitle.c_str()); + gameList.Create(settings.GetGamesDir()); core.Stop(); } if (ImGui::MenuItem(core.pause ? "Resume" : "Pause", nullptr, false, core.romLoaded)) {