From 55f792d995ffa1d8299cba6ebcd094b7c3fb93de Mon Sep 17 00:00:00 2001 From: CocoSimone Date: Fri, 17 Feb 2023 17:16:56 +0100 Subject: [PATCH] Optimizations (are they?) --- src/backend/Debugger.cpp | 2 +- src/backend/MemoryRegions.hpp | 4 +- src/backend/core/Interpreter.cpp | 2 - src/backend/core/Mem.cpp | 83 +++--- src/backend/core/Mem.hpp | 38 +-- src/backend/core/RDP.cpp | 8 +- src/backend/core/RDP.hpp | 2 +- src/backend/core/dynarec/instructions.cpp | 236 +++++++++++------- src/backend/core/interpreter/instructions.cpp | 234 ++++++++++------- src/backend/core/mmio/AI.cpp | 2 +- src/backend/core/registers/Cop0.cpp | 6 +- src/backend/core/registers/Cop0.hpp | 4 + src/backend/core/rsp/instructions.cpp | 45 ++++ src/common.hpp | 7 +- src/frontend/imgui/GameList.cpp | 167 ------------- src/frontend/imgui/GameList.hpp | 21 -- src/frontend/imgui/Window.cpp | 14 +- src/frontend/imgui/Window.hpp | 4 - 18 files changed, 430 insertions(+), 449 deletions(-) delete mode 100644 src/frontend/imgui/GameList.cpp delete mode 100644 src/frontend/imgui/GameList.hpp diff --git a/src/backend/Debugger.cpp b/src/backend/Debugger.cpp index 959222e9..1db55f21 100644 --- a/src/backend/Debugger.cpp +++ b/src/backend/Debugger.cpp @@ -178,7 +178,7 @@ size_t debugGetMemory(void* user_data, char* buffer, size_t length, u32 address, printf("Checking memory at address 0x%08X\n", address); int printed = 0; u32 paddr; - if(!n64::MapVAddr(debugger->core.CpuGetRegs(), LOAD, address, paddr)) { + if(!n64::MapVAddr(debugger->core.CpuGetRegs(), n64::LOAD, address, paddr)) { return 0; } diff --git a/src/backend/MemoryRegions.hpp b/src/backend/MemoryRegions.hpp index 208a8f10..2525ba2b 100644 --- a/src/backend/MemoryRegions.hpp +++ b/src/backend/MemoryRegions.hpp @@ -13,6 +13,8 @@ #define PIF_BOOTROM_DSIZE (PIF_BOOTROM_SIZE - 1) #define ISVIEWER_SIZE (0x13FFFFFF - 0x13FF0020) #define ISVIEWER_DSIZE (ISVIEWER_SIZE - 1) +#define CART_SIZE 0xFC00000 +#define CART_DSIZE (CART_REGION_SIZE - 1) #define RDRAM_REGION_START 0 #define RDRAM_REGION_END RDRAM_DSIZE @@ -38,7 +40,7 @@ #define RI_REGION 0x04700000 ... 0x047FFFFF #define SI_REGION 0x04800000 ... 0x048FFFFF #define SRAM_REGION 0x08000000 ... 0x0FFFFFFF -#define CART_REGION 0x10000000 ... 0x1FBFFFFF +#define CART_REGION (CART_REGION_START) ... (CART_REGION_END) #define PIF_ROM_REGION 0x1FC00000 ... 0x1FC007BF #define PIF_RAM_REGION PIF_RAM_REGION_START ... PIF_RAM_REGION_END diff --git a/src/backend/core/Interpreter.cpp b/src/backend/core/Interpreter.cpp index 99624e9e..f20e91d6 100644 --- a/src/backend/core/Interpreter.cpp +++ b/src/backend/core/Interpreter.cpp @@ -22,8 +22,6 @@ inline void CheckCompareInterrupt(MI& mi, Registers& regs) { } void Interpreter::Step(Mem& mem) { - regs.gpr[0] = 0; - CheckCompareInterrupt(mem.mmio.mi, regs); regs.prevDelaySlot = regs.delaySlot; diff --git a/src/backend/core/Mem.cpp b/src/backend/core/Mem.cpp index 6692c6d5..e5a8caf2 100644 --- a/src/backend/core/Mem.cpp +++ b/src/backend/core/Mem.cpp @@ -12,10 +12,8 @@ Mem::Mem() { } void Mem::Reset() { - readPages.resize(PAGE_COUNT); - writePages.resize(PAGE_COUNT); - std::fill(readPages.begin(), readPages.end(), 0); - std::fill(writePages.begin(), writePages.end(), 0); + memset(readPages, 0, PAGE_COUNT); + memset(writePages, 0, PAGE_COUNT); for(int i = 0; i < RDRAM_SIZE / PAGE_SIZE; i++) { const auto addr = (i * PAGE_SIZE) & RDRAM_DSIZE; @@ -24,9 +22,16 @@ void Mem::Reset() { writePages[i] = pointer; } - sram.resize(SRAM_SIZE); - std::fill(sram.begin(), sram.end(), 0); - romMask = 0; + if(sram) { + free(sram); + } + + if(cart) { + free(cart); + } + + cart = (u8*)calloc(CART_SIZE, 1); + sram = (u8*)calloc(SRAM_SIZE, 1); mmio.Reset(); } @@ -39,22 +44,20 @@ CartInfo Mem::LoadROM(const std::string& filename) { } file.seekg(0, std::ios::end); - auto size = file.tellg(); - auto sizeAdjusted = Util::NextPow2(size); + size_t size = file.tellg(); + size_t sizeAdjusted = Util::NextPow2(size); romMask = sizeAdjusted - 1; file.seekg(0, std::ios::beg); - std::fill(cart.begin(), cart.end(), 0); - cart.resize(sizeAdjusted); - cart.insert(cart.begin(), std::istream_iterator(file), std::istream_iterator()); + file.read(reinterpret_cast(cart), size); file.close(); CartInfo result{}; u32 cicChecksum; - Util::SwapN64Rom(sizeAdjusted, cart.data(), result.crc, cicChecksum); - memcpy(mmio.rsp.dmem, cart.data(), 0x1000); + Util::SwapN64Rom(sizeAdjusted, cart, result.crc, cicChecksum); + memcpy(mmio.rsp.dmem, cart, 0x1000); SetCICType(result.cicType, cicChecksum); result.isPAL = IsROMPAL(); @@ -108,13 +111,13 @@ u8 Mem::Read8(n64::Registers ®s, u32 paddr) { int offs = 3 - (paddr & 3); return (w >> (offs * 8)) & 0xff; } - case 0x10000000 ... 0x1FBFFFFF: + case CART_REGION: paddr = (paddr + 2) & ~2; return cart[BYTE_ADDRESS(paddr) & romMask]; case 0x1FC00000 ... 0x1FC007BF: return pifBootrom[BYTE_ADDRESS(paddr) - 0x1FC00000]; - case 0x1FC007C0 ... 0x1FC007FF: - return pifRam[paddr - 0x1FC007C0]; + case PIF_RAM_REGION: + return pifRam[paddr - PIF_RAM_REGION_START]; case 0x00800000 ... 0x03FFFFFF: case 0x04200000 ... 0x042FFFFF: case 0x04900000 ... 0x0FFFFFFF: @@ -136,7 +139,7 @@ u16 Mem::Read16(n64::Registers ®s, u32 paddr) { } else { switch (paddr) { case 0x00000000 ... 0x007FFFFF: - return Util::ReadAccess(mmio.rdp.rdram.data(), HALF_ADDRESS(paddr)); + return Util::ReadAccess(mmio.rdp.rdram, HALF_ADDRESS(paddr)); case 0x04000000 ... 0x0403FFFF: if (paddr & 0x1000) return Util::ReadAccess(mmio.rsp.imem, HALF_ADDRESS(paddr) & IMEM_DSIZE); @@ -149,11 +152,11 @@ u16 Mem::Read16(n64::Registers ®s, u32 paddr) { return mmio.Read(paddr); case 0x10000000 ... 0x1FBFFFFF: paddr = (paddr + 2) & ~3; - return Util::ReadAccess(cart.data(), HALF_ADDRESS(paddr) & romMask); + return Util::ReadAccess(cart, HALF_ADDRESS(paddr) & romMask); case 0x1FC00000 ... 0x1FC007BF: return Util::ReadAccess(pifBootrom, HALF_ADDRESS(paddr) - 0x1FC00000); - case 0x1FC007C0 ... 0x1FC007FF: - return be16toh(Util::ReadAccess(pifRam, paddr - 0x1FC007C0)); + case PIF_RAM_REGION: + return be16toh(Util::ReadAccess(pifRam, paddr - PIF_RAM_REGION_START)); case 0x00800000 ... 0x03FFFFFF: case 0x04200000 ... 0x042FFFFF: case 0x04900000 ... 0x0FFFFFFF: @@ -175,7 +178,7 @@ u32 Mem::Read32(n64::Registers ®s, u32 paddr) { } else { switch(paddr) { case 0x00000000 ... 0x007FFFFF: - return Util::ReadAccess(mmio.rdp.rdram.data(), paddr); + return Util::ReadAccess(mmio.rdp.rdram, paddr); case 0x04000000 ... 0x0403FFFF: if(paddr & 0x1000) return Util::ReadAccess(mmio.rsp.imem, paddr & IMEM_DSIZE); @@ -185,11 +188,11 @@ u32 Mem::Read32(n64::Registers ®s, u32 paddr) { case 0x04300000 ... 0x044FFFFF: case 0x04500000 ... 0x048FFFFF: return mmio.Read(paddr); case 0x10000000 ... 0x1FBFFFFF: - return Util::ReadAccess(cart.data(), paddr & romMask); + return Util::ReadAccess(cart, paddr & romMask); case 0x1FC00000 ... 0x1FC007BF: return Util::ReadAccess(pifBootrom, paddr - 0x1FC00000); - case 0x1FC007C0 ... 0x1FC007FF: - return be32toh(Util::ReadAccess(pifRam, paddr - 0x1FC007C0)); + case PIF_RAM_REGION: + return be32toh(Util::ReadAccess(pifRam, paddr - PIF_RAM_REGION_START)); case 0x00800000 ... 0x03FFFFFF: case 0x04200000 ... 0x042FFFFF: case 0x04900000 ... 0x0FFFFFFF: case 0x1FC00800 ... 0xFFFFFFFF: return 0; default: @@ -208,7 +211,7 @@ u64 Mem::Read64(n64::Registers ®s, u32 paddr) { } else { switch (paddr) { case 0x00000000 ... 0x007FFFFF: - return Util::ReadAccess(mmio.rdp.rdram.data(), paddr); + return Util::ReadAccess(mmio.rdp.rdram, paddr); case 0x04000000 ... 0x0403FFFF: if (paddr & 0x1000) return Util::ReadAccess(mmio.rsp.imem, paddr & IMEM_DSIZE); @@ -220,11 +223,11 @@ u64 Mem::Read64(n64::Registers ®s, u32 paddr) { case 0x04500000 ... 0x048FFFFF: return mmio.Read(paddr); case 0x10000000 ... 0x1FBFFFFF: - return Util::ReadAccess(cart.data(), paddr & romMask); + return Util::ReadAccess(cart, paddr & romMask); case 0x1FC00000 ... 0x1FC007BF: return Util::ReadAccess(pifBootrom, paddr - 0x1FC00000); - case 0x1FC007C0 ... 0x1FC007FF: - return be64toh(Util::ReadAccess(pifRam, paddr - 0x1FC007C0)); + case PIF_RAM_REGION: + return be64toh(Util::ReadAccess(pifRam, paddr - PIF_RAM_REGION_START)); case 0x00800000 ... 0x03FFFFFF: case 0x04200000 ... 0x042FFFFF: case 0x04900000 ... 0x0FFFFFFF: @@ -283,9 +286,9 @@ void Mem::Write8(Registers& regs, u32 paddr, u32 val) { Util::panic("MMIO Write8!\n"); case 0x10000000 ... 0x1FBFFFFF: break; - case 0x1FC007C0 ... 0x1FC007FF: + case PIF_RAM_REGION: val = val << (8 * (3 - (paddr & 3))); - paddr = (paddr - 0x1FC007C0) & ~3; + paddr = (paddr - PIF_RAM_REGION_START) & ~3; Util::WriteAccess(pifRam, paddr, htobe32(val)); ProcessPIFCommands(pifRam, mmio.si.controller, *this); break; @@ -317,7 +320,7 @@ void Mem::Write16(Registers& regs, u32 paddr, u32 val) { } else { switch (paddr) { case 0x00000000 ... 0x007FFFFF: - Util::WriteAccess(mmio.rdp.rdram.data(), HALF_ADDRESS(paddr), val); + Util::WriteAccess(mmio.rdp.rdram, HALF_ADDRESS(paddr), val); break; case 0x04000000 ... 0x0403FFFF: val = val << (16 * !(paddr & 2)); @@ -334,10 +337,10 @@ void Mem::Write16(Registers& regs, u32 paddr, u32 val) { Util::panic("MMIO Write16!\n"); case 0x10000000 ... 0x1FBFFFFF: break; - case 0x1FC007C0 ... 0x1FC007FF: + case PIF_RAM_REGION: val = val << (16 * !(paddr & 2)); paddr &= ~3; - Util::WriteAccess(pifRam, paddr - 0x1FC007C0, htobe32(val)); + Util::WriteAccess(pifRam, paddr - PIF_RAM_REGION_START, htobe32(val)); ProcessPIFCommands(pifRam, mmio.si.controller, *this); break; case 0x00800000 ... 0x03FFFFFF: @@ -364,7 +367,7 @@ void Mem::Write32(Registers& regs, u32 paddr, u32 val) { } else { switch(paddr) { case 0x00000000 ... 0x007FFFFF: - Util::WriteAccess(mmio.rdp.rdram.data(), paddr, val); + Util::WriteAccess(mmio.rdp.rdram, paddr, val); break; case 0x04000000 ... 0x0403FFFF: if(paddr & 0x1000) @@ -387,8 +390,8 @@ void Mem::Write32(Registers& regs, u32 paddr, u32 val) { Util::WriteAccess(isviewer, paddr - 0x13FF0020, htobe32(val)); break; case 0x14000000 ... 0x1FBFFFFF: break; - case 0x1FC007C0 ... 0x1FC007FF: - Util::WriteAccess(pifRam, paddr - 0x1FC007C0, htobe32(val)); + case PIF_RAM_REGION: + Util::WriteAccess(pifRam, paddr - PIF_RAM_REGION_START, htobe32(val)); ProcessPIFCommands(pifRam, mmio.si.controller, *this); break; case 0x00800000 ... 0x03FFFFFF: case 0x04200000 ... 0x042FFFFF: @@ -409,7 +412,7 @@ void Mem::Write64(Registers& regs, u32 paddr, u64 val) { } else { switch (paddr) { case 0x00000000 ... 0x007FFFFF: - Util::WriteAccess(mmio.rdp.rdram.data(), paddr, val); + Util::WriteAccess(mmio.rdp.rdram, paddr, val); break; case 0x04000000 ... 0x0403FFFF: val >>= 32; @@ -425,8 +428,8 @@ void Mem::Write64(Registers& regs, u32 paddr, u64 val) { Util::panic("MMIO Write64!\n"); case 0x10000000 ... 0x1FBFFFFF: break; - case 0x1FC007C0 ... 0x1FC007FF: - Util::WriteAccess(pifRam, paddr - 0x1FC007C0, htobe64(val)); + case PIF_RAM_REGION: + Util::WriteAccess(pifRam, paddr - PIF_RAM_REGION_START, htobe64(val)); ProcessPIFCommands(pifRam, mmio.si.controller, *this); break; case 0x00800000 ... 0x03FFFFFF: diff --git a/src/backend/core/Mem.hpp b/src/backend/core/Mem.hpp index 43fb272f..5593d79e 100644 --- a/src/backend/core/Mem.hpp +++ b/src/backend/core/Mem.hpp @@ -5,10 +5,10 @@ #include #include #include +#include +#include namespace n64 { -struct Registers; - struct CartInfo { bool isPAL; u32 cicType; @@ -20,12 +20,14 @@ struct Dynarec; } struct Mem { - ~Mem() = default; + ~Mem() { + free(sram); + } Mem(); void Reset(); CartInfo LoadROM(const std::string&); - [[nodiscard]] auto GetRDRAM() -> u8* { - return mmio.rdp.rdram.data(); + [[nodiscard]] auto GetRDRAM() const -> u8* { + return mmio.rdp.rdram; } u8 Read8(Registers&, u32); @@ -47,7 +49,7 @@ struct Mem { inline void DumpRDRAM() const { FILE *fp = fopen("rdram.dump", "wb"); u8 *temp = (u8*)calloc(RDRAM_SIZE, 1); - memcpy(temp, mmio.rdp.rdram.data(), RDRAM_SIZE); + memcpy(temp, mmio.rdp.rdram, RDRAM_SIZE); Util::SwapBuffer32(RDRAM_SIZE, temp); fwrite(temp, 1, RDRAM_SIZE, fp); free(temp); @@ -73,20 +75,19 @@ struct Mem { free(temp); fclose(fp); } - std::vector writePages, readPages; + uintptr_t writePages[PAGE_COUNT], readPages[PAGE_COUNT]; private: friend struct SI; friend struct PI; friend struct AI; - friend struct Cpu; friend struct RSP; friend struct Core; - std::vector cart, sram; + u8* sram, *cart; u8 pifBootrom[PIF_BOOTROM_SIZE]{}; u8 isviewer[ISVIEWER_SIZE]{}; - size_t romMask; + size_t romMask = 0; - void SetCICType(u32& cicType, u32 checksum) { + static void SetCICType(u32& cicType, u32 checksum) { switch(checksum) { case 0xEC8B1325: // 7102 cicType = CIC_NUS_7102; @@ -115,12 +116,15 @@ private: bool IsROMPAL() { static const char pal_codes[] = {'D', 'F', 'I', 'P', 'S', 'U', 'X', 'Y'}; - for (int i = 0; i < 8; i++) { - if (cart[0x3e] == pal_codes[i]) { - return true; - } - } - return false; + return std::any_of(std::begin(pal_codes), std::end(pal_codes), [this](char a) { + return cart[0x3e] == a; + }); + // for (char pal_code : pal_codes) { + // if (cart[0x3e] == pal_code) { + // return true; + // } + // } + // return false; } }; diff --git a/src/backend/core/RDP.cpp b/src/backend/core/RDP.cpp index bc0d18e7..5eeac0af 100644 --- a/src/backend/core/RDP.cpp +++ b/src/backend/core/RDP.cpp @@ -11,8 +11,10 @@ RDP::RDP() { void RDP::Reset() { dpc.status.raw = 0x80; - rdram.resize(RDRAM_SIZE); - std::fill(rdram.begin(), rdram.end(), 0); + if(rdram) { + free(rdram); + } + rdram = (u8*)calloc(RDRAM_SIZE, 1); memset(cmd_buf, 0, 0x100000); } @@ -144,7 +146,7 @@ void RDP::RunCommand(MI& mi, Registers& regs, RSP& rsp) { return; } for (int i = 0; i < len; i += 4) { - u32 cmd = Util::ReadAccess(rdram.data(), current + i); + u32 cmd = Util::ReadAccess(rdram, current + i); cmd_buf[remaining_cmds + (i >> 2)] = cmd; } } diff --git a/src/backend/core/RDP.hpp b/src/backend/core/RDP.hpp index 32dca6ff..1c88f313 100644 --- a/src/backend/core/RDP.hpp +++ b/src/backend/core/RDP.hpp @@ -57,7 +57,7 @@ struct RDP { RDP(); void Reset(); - std::vector rdram; + u8* rdram; [[nodiscard]] auto Read(u32 addr) const -> u32; void Write(MI& mi, Registers& regs, RSP& rsp, u32 addr, u32 val); void WriteStatus(MI& mi, Registers& regs, RSP& rsp, u32 val); diff --git a/src/backend/core/dynarec/instructions.cpp b/src/backend/core/dynarec/instructions.cpp index d9bd6526..8beb8364 100644 --- a/src/backend/core/dynarec/instructions.cpp +++ b/src/backend/core/dynarec/instructions.cpp @@ -13,15 +13,19 @@ void add(Registers& regs, u32 instr) { if(check_signed_overflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = s32(result); + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = s32(result); + } } } void addu(Registers& regs, u32 instr) { - s32 rs = (s32)regs.gpr[RS(instr)]; - s32 rt = (s32)regs.gpr[RT(instr)]; - s32 result = rs + rt; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s32 rs = (s32)regs.gpr[RS(instr)]; + s32 rt = (s32)regs.gpr[RT(instr)]; + s32 result = rs + rt; + regs.gpr[RD(instr)] = result; + } } void addi(Registers& regs, u32 instr) { @@ -49,14 +53,18 @@ void dadd(Registers& regs, u32 instr) { if(check_signed_overflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = result; + } } } void daddu(Registers& regs, u32 instr) { - s64 rs = regs.gpr[RS(instr)]; - s64 rt = regs.gpr[RT(instr)]; - regs.gpr[RD(instr)] = rs + rt; + if(likely(RD(instr) != 0)) { + s64 rs = regs.gpr[RS(instr)]; + s64 rt = regs.gpr[RT(instr)]; + regs.gpr[RD(instr)] = rs + rt; + } } void daddi(Registers& regs, u32 instr) { @@ -545,11 +553,15 @@ void ori(Registers& regs, u32 instr) { } void or_(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RS(instr)] | regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RS(instr)] | regs.gpr[RT(instr)]; + } } void nor(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = ~(regs.gpr[RS(instr)] | regs.gpr[RT(instr)]); + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = ~(regs.gpr[RS(instr)] | regs.gpr[RT(instr)]); + } } void j(Registers& regs, u32 instr) { @@ -566,7 +578,9 @@ void jal(Registers& regs, u32 instr) { void jalr(Registers& regs, u32 instr) { branch(regs, true, regs.gpr[RS(instr)]); - regs.gpr[RD(instr)] = regs.pc + 4; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.pc + 4; + } } void slti(Registers& regs, u32 instr) { @@ -578,11 +592,15 @@ void sltiu(Registers& regs, u32 instr) { } void slt(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RS(instr)] < regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RS(instr)] < regs.gpr[RT(instr)]; + } } void sltu(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = (u64)regs.gpr[RS(instr)] < (u64)regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = (u64) regs.gpr[RS(instr)] < (u64) regs.gpr[RT(instr)]; + } } void xori(Registers& regs, u32 instr) { @@ -591,7 +609,9 @@ void xori(Registers& regs, u32 instr) { } void xor_(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RT(instr)] ^ regs.gpr[RS(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RT(instr)] ^ regs.gpr[RS(instr)]; + } } void andi(Registers& regs, u32 instr) { @@ -600,110 +620,142 @@ void andi(Registers& regs, u32 instr) { } void and_(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RS(instr)] & regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RS(instr)] & regs.gpr[RT(instr)]; + } } void sll(Registers& regs, u32 instr) { - u8 sa = ((instr >> 6) & 0x1f); - s32 result = regs.gpr[RT(instr)] << sa; - regs.gpr[RD(instr)] = (s64)result; + if(likely(RD(instr) != 0)) { + u8 sa = ((instr >> 6) & 0x1f); + s32 result = regs.gpr[RT(instr)] << sa; + regs.gpr[RD(instr)] = (s64) result; + } } void sllv(Registers& regs, u32 instr) { - u8 sa = (regs.gpr[RS(instr)]) & 0x1F; - u32 rt = regs.gpr[RT(instr)]; - s32 result = rt << sa; - regs.gpr[RD(instr)] = (s64)result; + if(likely(RD(instr) != 0)) { + u8 sa = (regs.gpr[RS(instr)]) & 0x1F; + u32 rt = regs.gpr[RT(instr)]; + s32 result = rt << sa; + regs.gpr[RD(instr)] = (s64) result; + } } void dsll32(Registers& regs, u32 instr) { - u8 sa = ((instr >> 6) & 0x1f); - s64 result = regs.gpr[RT(instr)] << (sa + 32); - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + u8 sa = ((instr >> 6) & 0x1f); + s64 result = regs.gpr[RT(instr)] << (sa + 32); + regs.gpr[RD(instr)] = result; + } } void dsll(Registers& regs, u32 instr) { - u8 sa = ((instr >> 6) & 0x1f); - s64 result = regs.gpr[RT(instr)] << sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + u8 sa = ((instr >> 6) & 0x1f); + s64 result = regs.gpr[RT(instr)] << sa; + regs.gpr[RD(instr)] = result; + } } void dsllv(Registers& regs, u32 instr) { - s64 sa = regs.gpr[RS(instr)] & 63; - s64 result = regs.gpr[RT(instr)] << sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 sa = regs.gpr[RS(instr)] & 63; + s64 result = regs.gpr[RT(instr)] << sa; + regs.gpr[RD(instr)] = result; + } } void srl(Registers& regs, u32 instr) { - u32 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - u32 result = rt >> sa; - regs.gpr[RD(instr)] = (s32)result; + if(likely(RD(instr) != 0)) { + u32 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + u32 result = rt >> sa; + regs.gpr[RD(instr)] = (s32) result; + } } void srlv(Registers& regs, u32 instr) { - u8 sa = (regs.gpr[RS(instr)] & 0x1F); - u32 rt = regs.gpr[RT(instr)]; - s32 result = rt >> sa; - regs.gpr[RD(instr)] = (s64)result; + if(likely(RD(instr) != 0)) { + u8 sa = (regs.gpr[RS(instr)] & 0x1F); + u32 rt = regs.gpr[RT(instr)]; + s32 result = rt >> sa; + regs.gpr[RD(instr)] = (s64) result; + } } void dsrl(Registers& regs, u32 instr) { - u64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - u64 result = rt >> sa; - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + u64 result = rt >> sa; + regs.gpr[RD(instr)] = s64(result); + } } void dsrlv(Registers& regs, u32 instr) { - u8 amount = (regs.gpr[RS(instr)] & 63); - u64 rt = regs.gpr[RT(instr)]; - u64 result = rt >> amount; - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u8 amount = (regs.gpr[RS(instr)] & 63); + u64 rt = regs.gpr[RT(instr)]; + u64 result = rt >> amount; + regs.gpr[RD(instr)] = s64(result); + } } void dsrl32(Registers& regs, u32 instr) { - u64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - u64 result = rt >> (sa + 32); - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + u64 result = rt >> (sa + 32); + regs.gpr[RD(instr)] = s64(result); + } } void sra(Registers& regs, u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - s32 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + s32 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void srav(Registers& regs, u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - s64 rs = regs.gpr[RS(instr)]; - u8 sa = rs & 0x1f; - s32 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + s64 rs = regs.gpr[RS(instr)]; + u8 sa = rs & 0x1f; + s32 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void dsra(Registers& regs, u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - s64 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + s64 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void dsrav(Registers& regs, u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - s64 rs = regs.gpr[RS(instr)]; - s64 sa = rs & 63; - s64 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + s64 rs = regs.gpr[RS(instr)]; + s64 sa = rs & 63; + s64 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void dsra32(Registers& regs, u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - s64 result = rt >> (sa + 32); - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + s64 result = rt >> (sa + 32); + regs.gpr[RD(instr)] = result; + } } void jr(Registers& regs, u32 instr) { @@ -723,15 +775,19 @@ void dsub(Registers& regs, u32 instr) { if(check_signed_underflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = result; + } } } void dsubu(Registers& regs, u32 instr) { - u64 rt = regs.gpr[RT(instr)]; - u64 rs = regs.gpr[RS(instr)]; - u64 result = rs - rt; - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u64 rt = regs.gpr[RT(instr)]; + u64 rs = regs.gpr[RS(instr)]; + u64 result = rs - rt; + regs.gpr[RD(instr)] = s64(result); + } } void sub(Registers& regs, u32 instr) { @@ -741,15 +797,19 @@ void sub(Registers& regs, u32 instr) { if(check_signed_underflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = result; + } } } void subu(Registers& regs, u32 instr) { - u32 rt = regs.gpr[RT(instr)]; - u32 rs = regs.gpr[RS(instr)]; - u32 result = rs - rt; - regs.gpr[RD(instr)] = (s64)((s32)result); + if(likely(RD(instr) != 0)) { + u32 rt = regs.gpr[RT(instr)]; + u32 rs = regs.gpr[RS(instr)]; + u32 result = rs - rt; + regs.gpr[RD(instr)] = (s64) ((s32) result); + } } void dmultu(Registers& regs, u32 instr) { @@ -785,11 +845,15 @@ void mult(Registers& regs, u32 instr) { } void mflo(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = regs.lo; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.lo; + } } void mfhi(Registers& regs, u32 instr) { - regs.gpr[RD(instr)] = regs.hi; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.hi; + } } void mtlo(Registers& regs, u32 instr) { diff --git a/src/backend/core/interpreter/instructions.cpp b/src/backend/core/interpreter/instructions.cpp index 1b1c3d2a..32760aee 100644 --- a/src/backend/core/interpreter/instructions.cpp +++ b/src/backend/core/interpreter/instructions.cpp @@ -12,15 +12,19 @@ void Interpreter::add(u32 instr) { if(check_signed_overflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = s32(result); + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = s32(result); + } } } void Interpreter::addu(u32 instr) { - s32 rs = (s32)regs.gpr[RS(instr)]; - s32 rt = (s32)regs.gpr[RT(instr)]; - s32 result = rs + rt; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s32 rs = (s32)regs.gpr[RS(instr)]; + s32 rt = (s32)regs.gpr[RT(instr)]; + s32 result = rs + rt; + regs.gpr[RD(instr)] = result; + } } void Interpreter::addi(u32 instr) { @@ -48,14 +52,18 @@ void Interpreter::dadd(u32 instr) { if(check_signed_overflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = result; + } } } void Interpreter::daddu(u32 instr) { - s64 rs = regs.gpr[RS(instr)]; - s64 rt = regs.gpr[RT(instr)]; - regs.gpr[RD(instr)] = rs + rt; + if(likely(RD(instr) != 0)) { + s64 rs = regs.gpr[RS(instr)]; + s64 rt = regs.gpr[RT(instr)]; + regs.gpr[RD(instr)] = rs + rt; + } } void Interpreter::daddi(u32 instr) { @@ -583,11 +591,15 @@ void Interpreter::ori(u32 instr) { } void Interpreter::or_(u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RS(instr)] | regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RS(instr)] | regs.gpr[RT(instr)]; + } } void Interpreter::nor(u32 instr) { - regs.gpr[RD(instr)] = ~(regs.gpr[RS(instr)] | regs.gpr[RT(instr)]); + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = ~(regs.gpr[RS(instr)] | regs.gpr[RT(instr)]); + } } void Interpreter::j(u32 instr) { @@ -604,7 +616,9 @@ void Interpreter::jal(u32 instr) { void Interpreter::jalr(u32 instr) { branch(true, regs.gpr[RS(instr)]); - regs.gpr[RD(instr)] = regs.pc + 4; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.pc + 4; + } } void Interpreter::slti(u32 instr) { @@ -618,11 +632,15 @@ void Interpreter::sltiu(u32 instr) { } void Interpreter::slt(u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RS(instr)] < regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RS(instr)] < regs.gpr[RT(instr)]; + } } void Interpreter::sltu(u32 instr) { - regs.gpr[RD(instr)] = (u64)regs.gpr[RS(instr)] < (u64)regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = (u64) regs.gpr[RS(instr)] < (u64) regs.gpr[RT(instr)]; + } } void Interpreter::xori(u32 instr) { @@ -631,7 +649,9 @@ void Interpreter::xori(u32 instr) { } void Interpreter::xor_(u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RT(instr)] ^ regs.gpr[RS(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RT(instr)] ^ regs.gpr[RS(instr)]; + } } void Interpreter::andi(u32 instr) { @@ -640,110 +660,142 @@ void Interpreter::andi(u32 instr) { } void Interpreter::and_(u32 instr) { - regs.gpr[RD(instr)] = regs.gpr[RS(instr)] & regs.gpr[RT(instr)]; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.gpr[RS(instr)] & regs.gpr[RT(instr)]; + } } void Interpreter::sll(u32 instr) { - u8 sa = ((instr >> 6) & 0x1f); - s32 result = regs.gpr[RT(instr)] << sa; - regs.gpr[RD(instr)] = (s64)result; + if(likely(RD(instr) != 0)) { + u8 sa = ((instr >> 6) & 0x1f); + s32 result = regs.gpr[RT(instr)] << sa; + regs.gpr[RD(instr)] = (s64) result; + } } void Interpreter::sllv(u32 instr) { - u8 sa = (regs.gpr[RS(instr)]) & 0x1F; - u32 rt = regs.gpr[RT(instr)]; - s32 result = rt << sa; - regs.gpr[RD(instr)] = (s64)result; + if(likely(RD(instr) != 0)) { + u8 sa = (regs.gpr[RS(instr)]) & 0x1F; + u32 rt = regs.gpr[RT(instr)]; + s32 result = rt << sa; + regs.gpr[RD(instr)] = (s64) result; + } } void Interpreter::dsll32(u32 instr) { - u8 sa = ((instr >> 6) & 0x1f); - s64 result = regs.gpr[RT(instr)] << (sa + 32); - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + u8 sa = ((instr >> 6) & 0x1f); + s64 result = regs.gpr[RT(instr)] << (sa + 32); + regs.gpr[RD(instr)] = result; + } } void Interpreter::dsll(u32 instr) { - u8 sa = ((instr >> 6) & 0x1f); - s64 result = regs.gpr[RT(instr)] << sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + u8 sa = ((instr >> 6) & 0x1f); + s64 result = regs.gpr[RT(instr)] << sa; + regs.gpr[RD(instr)] = result; + } } void Interpreter::dsllv(u32 instr) { - s64 sa = regs.gpr[RS(instr)] & 63; - s64 result = regs.gpr[RT(instr)] << sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 sa = regs.gpr[RS(instr)] & 63; + s64 result = regs.gpr[RT(instr)] << sa; + regs.gpr[RD(instr)] = result; + } } void Interpreter::srl(u32 instr) { - u32 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - u32 result = rt >> sa; - regs.gpr[RD(instr)] = (s32)result; + if(likely(RD(instr) != 0)) { + u32 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + u32 result = rt >> sa; + regs.gpr[RD(instr)] = (s32) result; + } } void Interpreter::srlv(u32 instr) { - u8 sa = (regs.gpr[RS(instr)] & 0x1F); - u32 rt = regs.gpr[RT(instr)]; - s32 result = rt >> sa; - regs.gpr[RD(instr)] = (s64)result; + if(likely(RD(instr) != 0)) { + u8 sa = (regs.gpr[RS(instr)] & 0x1F); + u32 rt = regs.gpr[RT(instr)]; + s32 result = rt >> sa; + regs.gpr[RD(instr)] = (s64) result; + } } void Interpreter::dsrl(u32 instr) { - u64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - u64 result = rt >> sa; - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + u64 result = rt >> sa; + regs.gpr[RD(instr)] = s64(result); + } } void Interpreter::dsrlv(u32 instr) { - u8 amount = (regs.gpr[RS(instr)] & 63); - u64 rt = regs.gpr[RT(instr)]; - u64 result = rt >> amount; - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u8 amount = (regs.gpr[RS(instr)] & 63); + u64 rt = regs.gpr[RT(instr)]; + u64 result = rt >> amount; + regs.gpr[RD(instr)] = s64(result); + } } void Interpreter::dsrl32(u32 instr) { - u64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - u64 result = rt >> (sa + 32); - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + u64 result = rt >> (sa + 32); + regs.gpr[RD(instr)] = s64(result); + } } void Interpreter::sra(u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - s32 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + s32 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void Interpreter::srav(u32 instr) { s64 rt = regs.gpr[RT(instr)]; - s64 rs = regs.gpr[RS(instr)]; - u8 sa = rs & 0x1f; - s32 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rs = regs.gpr[RS(instr)]; + u8 sa = rs & 0x1f; + s32 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void Interpreter::dsra(u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - s64 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + s64 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void Interpreter::dsrav(u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - s64 rs = regs.gpr[RS(instr)]; - s64 sa = rs & 63; - s64 result = rt >> sa; - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + s64 rs = regs.gpr[RS(instr)]; + s64 sa = rs & 63; + s64 result = rt >> sa; + regs.gpr[RD(instr)] = result; + } } void Interpreter::dsra32(u32 instr) { - s64 rt = regs.gpr[RT(instr)]; - u8 sa = ((instr >> 6) & 0x1f); - s64 result = rt >> (sa + 32); - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + s64 rt = regs.gpr[RT(instr)]; + u8 sa = ((instr >> 6) & 0x1f); + s64 result = rt >> (sa + 32); + regs.gpr[RD(instr)] = result; + } } void Interpreter::jr(u32 instr) { @@ -758,15 +810,19 @@ void Interpreter::dsub(u32 instr) { if(check_signed_underflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = result; + } } } void Interpreter::dsubu(u32 instr) { - u64 rt = regs.gpr[RT(instr)]; - u64 rs = regs.gpr[RS(instr)]; - u64 result = rs - rt; - regs.gpr[RD(instr)] = s64(result); + if(likely(RD(instr) != 0)) { + u64 rt = regs.gpr[RT(instr)]; + u64 rs = regs.gpr[RS(instr)]; + u64 result = rs - rt; + regs.gpr[RD(instr)] = s64(result); + } } void Interpreter::sub(u32 instr) { @@ -776,15 +832,19 @@ void Interpreter::sub(u32 instr) { if(check_signed_underflow(rs, rt, result)) { FireException(regs, ExceptionCode::Overflow, 0, true); } else { - regs.gpr[RD(instr)] = result; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = result; + } } } void Interpreter::subu(u32 instr) { - u32 rt = regs.gpr[RT(instr)]; - u32 rs = regs.gpr[RS(instr)]; - u32 result = rs - rt; - regs.gpr[RD(instr)] = (s64)((s32)result); + if(likely(RD(instr) != 0)) { + u32 rt = regs.gpr[RT(instr)]; + u32 rs = regs.gpr[RS(instr)]; + u32 result = rs - rt; + regs.gpr[RD(instr)] = (s64) ((s32) result); + } } void Interpreter::dmultu(u32 instr) { @@ -820,11 +880,15 @@ void Interpreter::mult(u32 instr) { } void Interpreter::mflo(u32 instr) { - regs.gpr[RD(instr)] = regs.lo; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.lo; + } } void Interpreter::mfhi(u32 instr) { - regs.gpr[RD(instr)] = regs.hi; + if(likely(RD(instr) != 0)) { + regs.gpr[RD(instr)] = regs.hi; + } } void Interpreter::mtlo(u32 instr) { diff --git a/src/backend/core/mmio/AI.cpp b/src/backend/core/mmio/AI.cpp index c625f563..90ade588 100644 --- a/src/backend/core/mmio/AI.cpp +++ b/src/backend/core/mmio/AI.cpp @@ -82,7 +82,7 @@ void AI::Step(Mem& mem, Registers& regs, int cpuCycles, float volumeL, float vol if(dmaLen[0] && dmaEnable) {u32 addrHi = ((dmaAddr[0] >> 13) + dmaAddrCarry) & 0x7FF; dmaAddr[0] = (addrHi << 13) | (dmaAddr[0] & 0x1FFF); - u32 data = Util::ReadAccess(mem.mmio.rdp.rdram.data(), dmaAddr[0] & RDRAM_DSIZE); + u32 data = Util::ReadAccess(mem.mmio.rdp.rdram, dmaAddr[0] & RDRAM_DSIZE); s16 l = s16(data >> 16); s16 r = s16(data); diff --git a/src/backend/core/registers/Cop0.cpp b/src/backend/core/registers/Cop0.cpp index 737a9158..9a495962 100644 --- a/src/backend/core/registers/Cop0.cpp +++ b/src/backend/core/registers/Cop0.cpp @@ -69,7 +69,7 @@ u64 Cop0::GetReg64(u8 addr) { case 23: case 24: case 25: case 31: return openbus; default: - Util::panic("Unsupported word read from COP0 register {}\n", index); + Util::panic("Unsupported dword read from COP0 register {}\n", index); } } @@ -131,7 +131,7 @@ void Cop0::SetReg32(u8 addr, u32 value) { case 23: case 24: case 25: case 31: break; default: - Util::panic("Unsupported word read from COP0 register {}\n", index); + Util::panic("Unsupported word write from COP0 register {}\n", index); } } @@ -159,7 +159,7 @@ void Cop0::SetReg64(u8 addr, u64 value) { case COP0_REG_LLADDR: LLAddr = value; break; case COP0_REG_ERROREPC: ErrorEPC = (s64)value; break; default: - Util::panic("Unsupported word write to COP0 register {}\n", addr); + Util::panic("Unsupported dword write to COP0 register {}\n", addr); } } diff --git a/src/backend/core/registers/Cop0.hpp b/src/backend/core/registers/Cop0.hpp index a86e463e..d165bb8b 100644 --- a/src/backend/core/registers/Cop0.hpp +++ b/src/backend/core/registers/Cop0.hpp @@ -268,6 +268,10 @@ private: struct Registers; enum class ExceptionCode : u8; +enum TLBAccessType { + LOAD, STORE +}; + TLBEntry* TLBTryMatch(Registers& regs, u64 vaddr, int* match); bool ProbeTLB(Registers& regs, TLBAccessType access_type, u64 vaddr, u32& paddr, int* match); void HandleTLBException(Registers& regs, u64 vaddr); diff --git a/src/backend/core/rsp/instructions.cpp b/src/backend/core/rsp/instructions.cpp index 0d3e9ce6..67d78150 100644 --- a/src/backend/core/rsp/instructions.cpp +++ b/src/backend/core/rsp/instructions.cpp @@ -80,6 +80,7 @@ inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) { } } +ARCH_TARGET("sse3", "avx2", "default") inline VPR Broadcast(const VPR& vt, int l0, int l1, int l2, int l3, int l4, int l5, int l6, int l7) { VPR vte{}; vte.element[ELEMENT_INDEX(0)] = vt.element[ELEMENT_INDEX(l0)]; @@ -93,6 +94,7 @@ inline VPR Broadcast(const VPR& vt, int l0, int l1, int l2, int l3, int l4, int return vte; } +ARCH_TARGET("sse3", "avx2", "default") inline VPR GetVTE(const VPR& vt, u8 e) { VPR vte{}; e &= 0xf; @@ -692,6 +694,7 @@ inline u16 unsignedClamp(s64 val) { return val; } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vabs(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; @@ -716,6 +719,7 @@ void RSP::vabs(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vadd(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; @@ -730,6 +734,7 @@ void RSP::vadd(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vaddc(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; @@ -744,6 +749,7 @@ void RSP::vaddc(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vch(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -776,6 +782,7 @@ void RSP::vch(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vcr(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -808,6 +815,7 @@ void RSP::vcr(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vcl(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -843,6 +851,7 @@ void RSP::vcl(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmov(u32 instr) { u8 e = E2(instr), vs = VS(instr) & 7; VPR& vd = vpr[VD(instr)]; @@ -885,6 +894,7 @@ inline bool IsSignExtension(s16 hi, s16 lo) { return false; } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmulf(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -906,6 +916,7 @@ void RSP::vmulf(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmulq(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); @@ -924,6 +935,7 @@ void RSP::vmulq(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmulu(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -945,6 +957,7 @@ void RSP::vmulu(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmudl(u32 instr) { u8 e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -971,6 +984,7 @@ void RSP::vmudl(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmudh(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -989,6 +1003,7 @@ void RSP::vmudh(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmudm(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1005,6 +1020,7 @@ void RSP::vmudm(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmudn(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1030,6 +1046,7 @@ void RSP::vmudn(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmadh(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1052,6 +1069,7 @@ void RSP::vmadh(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmadl(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1080,6 +1098,7 @@ void RSP::vmadl(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmadm(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1099,6 +1118,7 @@ void RSP::vmadm(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmadn(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1124,6 +1144,7 @@ void RSP::vmadn(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmacf(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vs = vpr[VS(instr)]; @@ -1145,6 +1166,7 @@ void RSP::vmacf(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmacu(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vs = vpr[VS(instr)]; @@ -1165,6 +1187,7 @@ void RSP::vmacu(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmacq(u32 instr) { VPR& vd = vpr[VD(instr)]; @@ -1182,6 +1205,7 @@ void RSP::vmacq(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::veq(u32 instr) { int e = E2(instr); VPR& vd = vpr[VD(instr)]; @@ -1196,6 +1220,7 @@ void RSP::veq(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vne(u32 instr) { int e = E2(instr); VPR& vd = vpr[VD(instr)]; @@ -1210,6 +1235,7 @@ void RSP::vne(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vge(u32 instr) { int e = E2(instr); VPR& vd = vpr[VD(instr)]; @@ -1226,6 +1252,7 @@ void RSP::vge(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vlt(u32 instr) { int e = E2(instr); VPR& vd = vpr[VD(instr)]; @@ -1287,6 +1314,7 @@ inline u32 rsq(u32 input) { return result ^ mask; } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vrcpl(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vt = vpr[VT(instr)]; @@ -1313,6 +1341,7 @@ void RSP::vrcpl(u32 instr) { vd.element[ELEMENT_INDEX(de)] = result; } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vrcp(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vt = vpr[VT(instr)]; @@ -1330,6 +1359,7 @@ void RSP::vrcp(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vrsq(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vt = vpr[VT(instr)]; @@ -1354,6 +1384,7 @@ static inline s64 sclip(s64 x, u32 bits) { return ((x & m) ^ b) - b; } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vrndn(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); @@ -1385,6 +1416,7 @@ void RSP::vrndn(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vrndp(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); @@ -1416,6 +1448,7 @@ void RSP::vrndp(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vrsql(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vt = vpr[VT(instr)]; @@ -1441,6 +1474,7 @@ void RSP::vrsql(u32 instr) { vd.element[ELEMENT_INDEX(de)] = result; } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vrcph(u32 instr) { int e = E2(instr) & 7; int de = DE(instr) & 7; @@ -1457,6 +1491,7 @@ void RSP::vrcph(u32 instr) { divInLoaded = true; } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vsar(u32 instr) { u8 e = E2(instr); switch(e) { @@ -1483,6 +1518,7 @@ void RSP::vsar(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vsubc(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1499,6 +1535,7 @@ void RSP::vsubc(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vsub(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1515,6 +1552,7 @@ void RSP::vsub(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vmrg(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1529,6 +1567,7 @@ void RSP::vmrg(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vxor(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1541,6 +1580,7 @@ void RSP::vxor(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vnxor(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1553,6 +1593,7 @@ void RSP::vnxor(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vand(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1565,6 +1606,7 @@ void RSP::vand(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vnand(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1577,6 +1619,7 @@ void RSP::vnand(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vnor(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1589,6 +1632,7 @@ void RSP::vnor(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vor(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1601,6 +1645,7 @@ void RSP::vor(u32 instr) { } } +ARCH_TARGET("sse4.2", "avx2", "default") void RSP::vzero(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); diff --git a/src/common.hpp b/src/common.hpp index 834e1e50..01063f25 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -34,7 +34,6 @@ using s128 = __int128_t; #define BYTE_INDEX(i) (15 - (i)) #define SI_DMA_DELAY (65536 * 2) - -enum TLBAccessType { - LOAD, STORE -}; \ No newline at end of file +#define ARCH_TARGET(...) __attribute__ ((target_clones (__VA_ARGS__))) +#define unlikely(exp) __builtin_expect(exp, 0) +#define likely(exp) __builtin_expect(exp, 1) \ No newline at end of file diff --git a/src/frontend/imgui/GameList.cpp b/src/frontend/imgui/GameList.cpp deleted file mode 100644 index 2aacfc93..00000000 --- a/src/frontend/imgui/GameList.cpp +++ /dev/null @@ -1,167 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace nlohmann; -namespace fs = std::filesystem; - -GameList::GameList(const std::string& path) { - Create(path); -} - -void GameList::Create(const std::string &path) { - threadDone = false; - if(!path.empty()) { - std::thread searchThread([path, this]() { - std::ifstream gameDbFile("resources/db.json"); - json gameDb = json::parse(gameDbFile); - std::vector rom{}; - for(const auto& p : fs::recursive_directory_iterator{path}) { - const auto filename = p.path().string(); - if(threadDone) { - gamesList.clear(); - break; - } - if(p.path().extension() == ".n64" || p.path().extension() == ".z64" || p.path().extension() == ".v64" || - p.path().extension() == ".N64" || p.path().extension() == ".Z64" || p.path().extension() == ".V64") { - std::ifstream file(filename, std::ios::binary); - file.unsetf(std::ios::skipws); - - if(!file.is_open()) { - Util::panic("Unable to open {}!", filename); - } - - file.seekg(0, std::ios::end); - auto size = file.tellg(); - auto sizeAdjusted = Util::NextPow2(size); - file.seekg(0, std::ios::beg); - - std::fill(rom.begin(), rom.end(), 0); - rom.resize(sizeAdjusted); - rom.insert(rom.begin(), std::istream_iterator(file), std::istream_iterator()); - file.close(); - - u32 crc{}; - Util::GetRomCRC(sizeAdjusted, rom.data(), crc); - - bool found = false; - - for(const auto& item : gameDb["items"]) { - const auto& crcEntry = item["crc"]; - if(!crcEntry.empty()) { - if(crcEntry.get() == fmt::format("{:08X}", crc)) { - found = true; - gamesList.push_back(GameInfo{ - item["name"].get(), - item["region"].get(), - fmt::format("{:.2f} MiB", float(size) / 1024 / 1024), - "Good", - p.path().string() - }); - } - } - } - - if(!found) { - gamesList.push_back(GameInfo{ - p.path().stem().string(), - "Unknown", - fmt::format("{:.2f} MiB", float(size) / 1024 / 1024), - "Unknown", - p.path().string() - }); - } - } - }; - - gameDbFile.close(); - threadDone = true; - }); - - searchThread.detach(); - } -} - -bool GameList::RenderWidget(float mainMenuBarHeight, std::string& rom) { - const auto windowSize = ImGui::GetIO().DisplaySize; - ImGui::SetNextWindowPos(ImVec2(0, mainMenuBarHeight)); - ImGui::SetNextWindowSize(ImVec2(windowSize.x, windowSize.y - mainMenuBarHeight)); - ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.f, 0.f)); - ImGui::PushStyleVar(ImGuiStyleVar_WindowBorderSize, 0.f); - - ImGui::Begin( - "Games list", - nullptr, - ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_NoCollapse | ImGuiWindowFlags_NoResize | - ImGuiWindowFlags_NoBringToFrontOnFocus - ); - - static ImGuiTableFlags flags = - ImGuiTableFlags_Resizable | ImGuiTableFlags_Reorderable | ImGuiTableFlags_Hideable | ImGuiTableFlags_Sortable | ImGuiTableFlags_SortMulti - | ImGuiTableFlags_RowBg | ImGuiTableFlags_BordersOuter | ImGuiTableFlags_BordersV | ImGuiTableFlags_NoBordersInBody - | ImGuiTableFlags_ScrollY; - - bool toOpen = false; - if (ImGui::BeginTable("Games List", 4, flags)) { - ImGui::TableSetupColumn("Title"); - ImGui::TableSetupColumn("Region"); - ImGui::TableSetupColumn("Status"); - ImGui::TableSetupColumn("Size"); - ImGui::TableSetupScrollFreeze(0, 1); // Make row always visible - ImGui::TableHeadersRow(); - - int i = 0; - - for (const auto& entry : gamesList) { - ImGui::TableNextRow(ImGuiTableRowFlags_None); - ImGui::PushID(i); - ImGui::PushStyleVar(ImGuiStyleVar_SelectableTextAlign, ImVec2(0.0f, 0.5f)); - ImGui::TableSetColumnIndex(0); - - if (ImGui::Selectable(entry.name.c_str(), false, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap, ImVec2(0.0f, 20.f))) { - toOpen = true; - rom = entry.path; - } - - ImGui::TableSetColumnIndex(1); - - if (ImGui::Selectable(entry.region.c_str(), false, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap, ImVec2(0.0f, 20.f))) { - toOpen = true; - rom = entry.path; - } - - ImGui::TableSetColumnIndex(2); - - if (ImGui::Selectable(entry.status.c_str(), false, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap, ImVec2(0.0f, 20.f))) { - toOpen = true; - rom = entry.path; - } - - ImGui::TableSetColumnIndex(3); - - if (ImGui::Selectable(entry.size.c_str(), false, ImGuiSelectableFlags_SpanAllColumns | ImGuiSelectableFlags_AllowItemOverlap, ImVec2(0.0f, 20.f))) { - toOpen = true; - rom = entry.path; - } - - ImGui::PopStyleVar(); - ImGui::PopID(); - i++; - } - - ImGui::EndTable(); - } - - ImGui::End(); - ImGui::PopStyleVar(); - ImGui::PopStyleVar(); - - return toOpen; -} \ No newline at end of file diff --git a/src/frontend/imgui/GameList.hpp b/src/frontend/imgui/GameList.hpp deleted file mode 100644 index 5ac67588..00000000 --- a/src/frontend/imgui/GameList.hpp +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once -#include -#include -#include - -struct GameInfo { - std::string name, region, size, status, path; -}; - -struct GameList { - GameList(const std::string&); - ~GameList() = default; - - void Create(const std::string&); - bool RenderWidget(float, std::string&); - - [[nodiscard]] std::vector GetGamesList() const { return gamesList; } - std::atomic_bool threadDone = false; -private: - std::vector gamesList{}, notMatch{}; -}; diff --git a/src/frontend/imgui/Window.cpp b/src/frontend/imgui/Window.cpp index 48730827..20c6fcea 100644 --- a/src/frontend/imgui/Window.cpp +++ b/src/frontend/imgui/Window.cpp @@ -12,7 +12,7 @@ VkInstance instance{}; namespace fs = std::filesystem; -Window::Window(n64::Core& core) : settings(core), gameList(settings.GetGamesDir()) { +Window::Window(n64::Core& core) : settings(core) { InitSDL(); InitParallelRDP(core.mem.GetRDRAM(), window); InitImgui(); @@ -156,7 +156,6 @@ ImDrawData* Window::Present(n64::Core& core) { void Window::LoadROM(n64::Core& core, const std::string &path) { if(!path.empty()) { - gameList.threadDone = true; n64::CartInfo cartInfo = core.LoadROM(path); std::ifstream gameDbFile("resources/db.json"); json gameDb = json::parse(gameDbFile); @@ -181,7 +180,6 @@ void Window::LoadROM(n64::Core& core, const std::string &path) { Util::UpdateRPC(Util::Playing, gameName); windowTitle = "Gadolinium - " + gameName; shadowWindowTitle = windowTitle; - renderGameList = false; SDL_SetWindowTitle(window, windowTitle.c_str()); gameDbFile.close(); @@ -190,7 +188,6 @@ void Window::LoadROM(n64::Core& core, const std::string &path) { void Window::RenderMainMenuBar(n64::Core &core) { ImGui::BeginMainMenuBar(); - mainMenuBarHeight = ImGui::GetWindowSize().y; if (ImGui::BeginMenu("File")) { if (ImGui::MenuItem("Open", "O")) { @@ -222,12 +219,10 @@ void Window::RenderMainMenuBar(n64::Core &core) { LoadROM(core, core.rom); } if (ImGui::MenuItem("Stop")) { - renderGameList = true; windowTitle = "Gadolinium"; core.rom.clear(); Util::UpdateRPC(Util::Idling); SDL_SetWindowTitle(window, windowTitle.c_str()); - gameList.Create(settings.GetGamesDir()); core.Stop(); } if (ImGui::MenuItem(core.pause ? "Resume" : "Pause", nullptr, false, core.romLoaded)) { @@ -267,13 +262,6 @@ void Window::Render(n64::Core& core) { RenderMainMenuBar(core); } - static std::string rom{}; - if(renderGameList && gameList.RenderWidget(mainMenuBarHeight, rom)) { - LoadROM(core, rom); - renderGameList = false; - } - - mainMenuBarHeight = 0; settings.RenderWidget(showSettings); ImGui::PopFont(); diff --git a/src/frontend/imgui/Window.hpp b/src/frontend/imgui/Window.hpp index 2da49348..7605c499 100644 --- a/src/frontend/imgui/Window.hpp +++ b/src/frontend/imgui/Window.hpp @@ -7,7 +7,6 @@ #include #include #include -#include struct Window { explicit Window(n64::Core& core); @@ -17,12 +16,9 @@ struct Window { [[nodiscard]] bool gotClosed(SDL_Event event); ImFont *uiFont{}; Settings settings; - GameList gameList; void LoadROM(n64::Core& core, const std::string& path); private: - bool renderGameList = true; bool showSettings = false; - float mainMenuBarHeight = 0; SDL_Window* window{}; std::string windowTitle{"Gadolinium"}; std::string shadowWindowTitle{windowTitle};