From 24ec6ed6dd23e5ca8c2440a6a7477a3b55fef1f0 Mon Sep 17 00:00:00 2001 From: CocoSimone Date: Sun, 25 Sep 2022 14:55:21 +0200 Subject: [PATCH] Fire RSP DMAs upon RSP MTC0 to indexes 2 or 3 (unstucks Mario) --- src/common.hpp | 6 +- src/n64/Core.cpp | 2 +- src/n64/core/Mem.hpp | 2 +- src/n64/core/RSP.cpp | 50 +--------- src/n64/core/RSP.hpp | 61 +++++++++++- src/n64/core/cpu/decode.cpp | 2 +- .../core/cpu/registers/cop1instructions.cpp | 14 ++- src/n64/core/rsp/decode.cpp | 34 ++++--- src/n64/core/rsp/instructions.cpp | 95 +++++++++++++++++-- 9 files changed, 188 insertions(+), 78 deletions(-) diff --git a/src/common.hpp b/src/common.hpp index 4b756603..23857aec 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -39,10 +39,10 @@ using m128 = __m128i; #define BASE(x) RS(x) #define VT(x) (((x) >> 16) & 0x1F) #define VS(x) (((x) >> 11) & 0x1F) -#define VD(x) (((x) >> 6) & 0x1F) -#define E(x) BASE(x) +#define VD(x) (((x) >> 6) & 0x1F) +#define E(x) (((x) >> 7) & 0x0F) #define ELEMENT_INDEX(i) (7 - (i)) -#define BYTE_INDEX(i) (15 - (i)) +#define BYTE_INDEX(i) (15 - (i)) enum TLBAccessType { diff --git a/src/n64/Core.cpp b/src/n64/Core.cpp index 9bef20bd..077f9b8d 100644 --- a/src/n64/Core.cpp +++ b/src/n64/Core.cpp @@ -41,7 +41,7 @@ void Core::Run(Window& window, float volumeL, float volumeR) { for(;cycles <= mmio.vi.cyclesPerHalfline; cycles++, frameCycles++) { cpu.Step(mem); - mmio.rsp.Step(mmio.mi, cpu.regs, mmio.rdp); + mmio.rsp.Step(cpu.regs, mem); mmio.ai.Step(mem, cpu.regs, 1, volumeL, volumeR); } diff --git a/src/n64/core/Mem.hpp b/src/n64/core/Mem.hpp index b56b9879..067db48e 100644 --- a/src/n64/core/Mem.hpp +++ b/src/n64/core/Mem.hpp @@ -33,6 +33,7 @@ struct Mem { template void Write64(Registers&, u32, u64, s64); + MMIO mmio; u8 pifRam[PIF_RAM_SIZE]{}; private: friend struct SI; @@ -41,7 +42,6 @@ private: friend struct Cpu; friend struct RSP; friend struct Core; - MMIO mmio; std::vector cart, sram; u8 pifBootrom[PIF_BOOTROM_SIZE]{}; u8 isviewer[ISVIEWER_SIZE]{}; diff --git a/src/n64/core/RSP.cpp b/src/n64/core/RSP.cpp index aa8362c7..d565a01b 100644 --- a/src/n64/core/RSP.cpp +++ b/src/n64/core/RSP.cpp @@ -21,21 +21,21 @@ void RSP::Reset() { memset(imem, 0, IMEM_SIZE); memset(vpr, 0, 32 * sizeof(VPR)); memset(gpr, 0, 32); - vce = 0; + memset(&vce, 0, sizeof(VPR)); acc = {.h={}, .m={}, .l={}}; vcc = {.l = {}, .h = {}}; vco = {.l = {}, .h = {}}; semaphore = false; } -void RSP::Step(MI& mi, Registers& regs, RDP& rdp) { +void RSP::Step(Registers& regs, Mem& mem) { if(!spStatus.halt) { gpr[0] = 0; u32 instr = util::ReadAccess(imem, pc & IMEM_DSIZE); oldPC = pc & 0xFFC; pc = nextPC & 0xFFC; nextPC += 4; - Exec(mi, regs, rdp, instr); + Exec(regs, mem, instr); } } @@ -54,44 +54,6 @@ auto RSP::Read(u32 addr) -> u32{ } } -template -inline void DMA(SPDMALen len, Mem& mem, RSP& rsp, bool bank) { - u32 length = len.len + 1; - - length = (length + 0x7) & ~0x7; - - u8* dst, *src; - if constexpr (isDRAMdest) { - dst = mem.GetRDRAM(); - src = bank ? rsp.imem : rsp.dmem; - } else { - src = mem.GetRDRAM(); - dst = bank ? rsp.imem : rsp.dmem; - } - - u32 mem_address = rsp.spDMASPAddr.address & 0xFF8; - u32 dram_address = rsp.spDMADRAMAddr.address & 0xFFFFF8; - - for (int i = 0; i < len.count + 1; i++) { - for(int j = 0; j < length; j++) { - if constexpr (isDRAMdest) { - dst[dram_address + j] = src[(mem_address + j) & 0xFFF]; - } else { - dst[(mem_address + j) & 0xFFF] = src[dram_address + j]; - } - } - - int skip = i == len.count ? 0 : len.skip; - - dram_address += (length + skip) & 0xFFFFF8; - mem_address += length; - } - - rsp.lastSuccessfulSPAddr.address = mem_address & 0xFF8; - rsp.lastSuccessfulSPAddr.bank = bank; - rsp.lastSuccessfulDRAMAddr.address = dram_address & 0xFFFFF8; -} - void RSP::Write(Mem& mem, Registers& regs, u32 addr, u32 value) { MI& mi = mem.mmio.mi; switch (addr) { @@ -99,13 +61,11 @@ void RSP::Write(Mem& mem, Registers& regs, u32 addr, u32 value) { case 0x04040004: spDMADRAMAddr.raw = value & 0xFFFFF8; break; case 0x04040008: { spDMALen.raw = value; - DMA(spDMALen, mem, *this, spDMASPAddr.bank); - spDMALen.raw = 0xFF8 | (spDMALen.skip << 20); + DMA(spDMALen, mem.GetRDRAM(), *this, spDMASPAddr.bank); } break; case 0x0404000C: { spDMALen.raw = value; - DMA(spDMALen, mem, *this, spDMASPAddr.bank); - spDMALen.raw = 0xFF8 | (spDMALen.skip << 20); + DMA(spDMALen, mem.GetRDRAM(), *this, spDMASPAddr.bank); } break; case 0x04040010: { auto write = SPStatusWrite{.raw = value}; diff --git a/src/n64/core/RSP.hpp b/src/n64/core/RSP.hpp index 74152b00..9490160f 100644 --- a/src/n64/core/RSP.hpp +++ b/src/n64/core/RSP.hpp @@ -111,10 +111,10 @@ struct Registers; struct RSP { RSP(); void Reset(); - void Step(MI& mi, Registers& regs, RDP& rdp); + void Step(Registers& regs, Mem& mem); auto Read(u32 addr) -> u32; void Write(Mem& mem, Registers& regs, u32 addr, u32 value); - void Exec(MI& mi, Registers& regs, RDP& rdp, u32 instr); + void Exec(Registers& regs, Mem& mem, u32 instr); SPStatus spStatus; u16 oldPC{}, pc{}, nextPC{}; SPDMASPAddr spDMASPAddr{}; @@ -125,7 +125,7 @@ struct RSP { u8 dmem[DMEM_SIZE]{}, imem[IMEM_SIZE]{}; VPR vpr[32]{}; s32 gpr[32]{}; - u8 vce{}; + VPR vce{}; struct { VPR h{}, m{}, l{}; @@ -164,6 +164,15 @@ struct RSP { return val; } + inline u8 GetVCE() { + u8 value = 0; + for(int i = 0; i < 8; i++) { + bool l = vce.element[7 - i] != 0; + value |= (l << i); + } + return value; + } + inline u64 ReadDword(u32 addr, bool i) { if (i) { return GET_RSP_DWORD(addr, imem); @@ -277,9 +286,10 @@ struct RSP { void addi(u32 instr); void and_(u32 instr); void andi(u32 instr); - void cfc2(u32 instr); void b(u32 instr, bool cond); void bl(u32 instr, bool cond); + void cfc2(u32 instr); + void ctc2(u32 instr); void lb(u32 instr); void lh(u32 instr); void lw(u32 instr); @@ -317,7 +327,48 @@ struct RSP { void vne(u32 instr); void vsar(u32 instr); void mfc0(RDP& rdp, u32 instr); - void mtc0(MI& mi, Registers& regs, RDP& rdp, u32 instr); + void mtc0(Registers& regs, Mem& mem, u32 instr); + void mfc2(u32 instr); + void mtc2(u32 instr); + + template + inline void DMA(SPDMALen len, u8* rdram, RSP& rsp, bool bank) { + u32 length = len.len + 1; + + length = (length + 0x7) & ~0x7; + + u8* dst, *src; + if constexpr (isDRAMdest) { + dst = rdram; + src = bank ? rsp.imem : rsp.dmem; + } else { + src = rdram; + dst = bank ? rsp.imem : rsp.dmem; + } + + u32 mem_address = rsp.spDMASPAddr.address & 0xFF8; + u32 dram_address = rsp.spDMADRAMAddr.address & 0xFFFFF8; + + for (int i = 0; i < len.count + 1; i++) { + for(int j = 0; j < length; j++) { + if constexpr (isDRAMdest) { + dst[dram_address + j] = src[(mem_address + j) & 0xFFF]; + } else { + dst[(mem_address + j) & 0xFFF] = src[dram_address + j]; + } + } + + int skip = i == len.count ? 0 : len.skip; + + dram_address += (length + skip) & 0xFFFFF8; + mem_address += length; + } + + rsp.lastSuccessfulSPAddr.address = mem_address & 0xFF8; + rsp.lastSuccessfulSPAddr.bank = bank; + rsp.lastSuccessfulDRAMAddr.address = dram_address & 0xFFFFF8; + rsp.spDMALen.raw = 0xFF8 | (rsp.spDMALen.skip << 20); + } private: inline void branch(u16 address, bool cond) { if(cond) { diff --git a/src/n64/core/cpu/decode.cpp b/src/n64/core/cpu/decode.cpp index e829f174..18b14fee 100644 --- a/src/n64/core/cpu/decode.cpp +++ b/src/n64/core/cpu/decode.cpp @@ -148,7 +148,7 @@ void Cpu::Exec(Mem& mem, u32 instr) { case 0x3D: regs.cop1.sdc1(regs, mem, instr); break; case 0x3F: sd(mem, instr); break; default: - util::panic("Unimplemented instruction {} {} ({:08X}) (pc: {:016X})\n", (mask >> 3) & 7, mask & 7, instr, (u64)regs.oldPC); + util::panic("Unimplemented instruction {:02X} ({:08X}) (pc: {:016X})\n", mask, instr, (u64)regs.oldPC); } } } \ No newline at end of file diff --git a/src/n64/core/cpu/registers/cop1instructions.cpp b/src/n64/core/cpu/registers/cop1instructions.cpp index 23e9729d..98c209bb 100644 --- a/src/n64/core/cpu/registers/cop1instructions.cpp +++ b/src/n64/core/cpu/registers/cop1instructions.cpp @@ -198,7 +198,7 @@ void Cop1::cvtld(Registers& regs, u32 instr) { } template -inline bool CalculateCondition(T fs, T ft, u8 cond) { +inline bool CalculateCondition(Registers& regs, T fs, T ft, CompConds cond) { switch(cond) { case F: return false; case UN: return std::isnan(fs) || std::isnan(ft); @@ -207,7 +207,15 @@ inline bool CalculateCondition(T fs, T ft, u8 cond) { case OLT: return (!std::isnan(fs) && !std::isnan(ft)) && (fs < ft); case ULT: return (std::isnan(fs) || std::isnan(ft)) || (fs < ft); case OLE: return (!std::isnan(fs) && !std::isnan(ft)) && (fs <= ft); - default: return CalculateCondition(fs, ft, cond - 8); + case ULE: return (std::isnan(fs) || std::isnan(ft)) || (fs <= ft); + default: + if(std::isnan(fs) || std::isnan(ft)) { + regs.cop1.fcr31.flag_invalid_operation = true; + regs.cop1.fcr31.cause_invalid_operation = true; + FireException(regs, ExceptionCode::FloatingPointError, 0, regs.oldPC); + } + + return CalculateCondition(regs, fs, ft, static_cast(cond - 8)); } } @@ -216,7 +224,7 @@ void Cop1::ccond(Registers& regs, u32 instr, CompConds cond) { T fs = GetCop1Reg(regs.cop0, FS(instr)); T ft = GetCop1Reg(regs.cop0, FT(instr)); - fcr31.compare = CalculateCondition(fs, ft, cond); + fcr31.compare = CalculateCondition(regs, fs, ft, cond); } template void Cop1::ccond(Registers& regs, u32 instr, CompConds cond); diff --git a/src/n64/core/rsp/decode.cpp b/src/n64/core/rsp/decode.cpp index c7d052c9..4d9d3512 100644 --- a/src/n64/core/rsp/decode.cpp +++ b/src/n64/core/rsp/decode.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace n64 { inline void special(MI& mi, Registers& regs, RSP& rsp, u32 instr) { @@ -57,16 +58,16 @@ inline void regimm(RSP& rsp, u32 instr) { inline void lwc2(RSP& rsp, u32 instr) { u8 mask = (instr >> 11) & 0x1F; switch(mask) { - //case 0x04: rsp.lqv(instr); break; - default: util::panic("Unhandled RSP LWC2 {} {}\n", (mask >> 3) & 3, mask & 7); + case 0x04: rsp.lqv(instr); break; + default: util::panic("Unhandled RSP LWC2 {:06b}\n", mask); } } inline void swc2(RSP& rsp, u32 instr) { u8 mask = (instr >> 11) & 0x1F; switch(mask) { - //case 0x04: rsp.sqv(instr); break; - default: util::panic("Unhandled RSP SWC2 {} {}\n", (mask >> 3) & 3, mask & 7); + case 0x04: rsp.sqv(instr); break; + default: util::panic("Unhandled RSP SWC2 {:06b}\n", mask); } } @@ -76,7 +77,10 @@ inline void cop2(RSP& rsp, u32 instr) { switch(mask) { case 0x00: switch(mask_sub) { - //case 0x02: rsp.cfc2(instr); break; + case 0x00: rsp.mfc2(instr); break; + case 0x02: rsp.cfc2(instr); break; + case 0x04: rsp.mtc2(instr); break; + case 0x06: rsp.ctc2(instr); break; default: util::panic("Unhandled RSP COP2 sub ({:06b})\n", mask_sub); } break; @@ -89,17 +93,23 @@ inline void cop2(RSP& rsp, u32 instr) { } } -inline void cop0(MI& mi, Registers& regs, RSP& rsp, RDP& rdp, u32 instr) { +inline void cop0(Registers& regs, Mem& mem, u32 instr) { u8 mask = (instr >> 21) & 0x1F; + MMIO& mmio = mem.mmio; + RSP& rsp = mmio.rsp; + RDP& rdp = mmio.rdp; switch(mask) { case 0x00: rsp.mfc0(rdp, instr); break; - case 0x04: rsp.mtc0(mi, regs, rdp, instr); break; + case 0x04: rsp.mtc0(regs, mem, instr); break; default: util::panic("Unhandled RSP COP0 ({:06b})\n", mask); } } -void RSP::Exec(MI &mi, Registers ®s, RDP &rdp, u32 instr) { +void RSP::Exec(Registers ®s, Mem& mem, u32 instr) { u8 mask = (instr >> 26) & 0x3F; + MMIO& mmio = mem.mmio; + RDP& rdp = mmio.rdp; + MI& mi = mmio.mi; switch(mask) { case 0x00: special(mi, regs, *this, instr); break; case 0x01: regimm(*this, instr); break; @@ -116,8 +126,8 @@ void RSP::Exec(MI &mi, Registers ®s, RDP &rdp, u32 instr) { case 0x0D: ori(instr); break; case 0x0E: xori(instr); break; case 0x0F: lui(instr); break; - case 0x10: cop0(mi, regs, *this, rdp, instr); break; - //case 0x12: cop2(*this, instr); break; + case 0x10: cop0(regs, mem, instr); break; + case 0x12: cop2(*this, instr); break; case 0x20: lb(instr); break; case 0x21: lh(instr); break; case 0x23: case 0x27: @@ -128,8 +138,8 @@ void RSP::Exec(MI &mi, Registers ®s, RDP &rdp, u32 instr) { case 0x28: sb(instr); break; case 0x29: sh(instr); break; case 0x2B: sw(instr); break; - //case 0x32: lwc2(*this, instr); break; - //case 0x3A: swc2(*this, instr); break; + case 0x32: lwc2(*this, instr); break; + case 0x3A: swc2(*this, instr); break; default: util::panic("Unhandled RSP instruction ({:06b})\n", mask); } } diff --git a/src/n64/core/rsp/instructions.cpp b/src/n64/core/rsp/instructions.cpp index 1d5d0383..d2a66f18 100644 --- a/src/n64/core/rsp/instructions.cpp +++ b/src/n64/core/rsp/instructions.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace n64 { inline bool AcquireSemaphore(RSP& rsp) { @@ -16,28 +17,49 @@ inline void ReleaseSemaphore(RSP& rsp) { rsp.semaphore = false; } +inline int SignExt7bit(u8 val, int sa) { + s8 sval = ((val << 1) & 0x80) | val; + + s32 sval32 = sval; + u32 val32 = sval32; + return val32 << sa; +} + inline auto GetCop0Reg(RSP& rsp, RDP& rdp, u8 index) -> u32{ switch(index) { - case 0: return rsp.spDMASPAddr.raw; - case 1: return rsp.spDMADRAMAddr.raw; + case 0: return rsp.lastSuccessfulSPAddr.raw; + case 1: return rsp.lastSuccessfulDRAMAddr.raw; case 2: case 3: return rsp.spDMALen.raw; case 4: return rsp.spStatus.raw; case 5: return rsp.spStatus.dmaFull; - case 6: return 0; + case 6: return rsp.spStatus.dmaBusy; case 7: return AcquireSemaphore(rsp); + case 9: return rdp.dpc.end; + case 10: return rdp.dpc.current; case 11: return rdp.dpc.status.raw; + case 12: return 0; default: util::panic("Unhandled RSP COP0 register read at index {}\n", index); } return 0; } -inline void SetCop0Reg(MI& mi, Registers& regs, RSP& rsp, RDP& rdp, u8 index, u32 val) { +inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) { + MMIO& mmio = mem.mmio; + RSP& rsp = mmio.rsp; + RDP& rdp = mmio.rdp; + MI& mi = mmio.mi; switch(index) { case 0: rsp.spDMASPAddr.raw = val; break; case 1: rsp.spDMADRAMAddr.raw = val; break; case 2: - case 3: rsp.spDMALen.raw = val; break; + rsp.spDMALen.raw = val; + rsp.DMA(rsp.spDMALen, mem.GetRDRAM(), rsp, rsp.spDMASPAddr.bank); + break; + case 3: + rsp.spDMALen.raw = val; + rsp.DMA(rsp.spDMALen, mem.GetRDRAM(), rsp, rsp.spDMASPAddr.bank); + break; case 4: rsp.spStatus.raw = val; break; case 7: if(val == 0) { @@ -110,7 +132,37 @@ void RSP::andi(u32 instr) { } void RSP::cfc2(u32 instr) { + s16 value = 0; + switch(RD(instr) & 3) { + case 0: value = VCOasU16(); break; + case 1: value = VCCasU16(); break; + case 2 ... 3: value = GetVCE(); break; + } + gpr[RT(instr)] = s32(value); +} + +void RSP::ctc2(u32 instr) { + u16 value = gpr[RT(instr)]; + switch(RD(instr) & 3) { + case 0: + for(int i = 0; i < 8; i++) { + vco.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; + vco.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; + } + break; + case 1: + for(int i = 0; i < 8; i++) { + vcc.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; + vcc.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; + } + break; + case 2: case 3: + for(int i = 0; i < 8; i++) { + vce.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; + } + break; + } } void RSP::b(u32 instr, bool cond) { @@ -154,7 +206,13 @@ void RSP::lui(u32 instr) { } void RSP::lqv(u32 instr) { + int e = E(instr); + u32 addr = gpr[BASE(instr)] + SignExt7bit(instr & 0x7F, 4); + u32 end = ((addr & ~15) + 15); + for(int i = 0; addr + i <= end && i + e < 16; i++) { + vpr[VT(instr)].byte[BYTE_INDEX(i + e)] = ReadByte(addr + i); + } } void RSP::j(u32 instr) { @@ -222,7 +280,13 @@ void RSP::sub(u32 instr) { } void RSP::sqv(u32 instr) { + int e = E(instr); + u32 addr = gpr[BASE(instr)] + SignExt7bit(instr & 0x7F, 4); + u32 end = ((addr & ~15) + 15); + for(int i = 0; addr + i <= end; i++) { + WriteByte(addr + i, vpr[VT(instr)].byte[BYTE_INDEX((i + e) & 15)]); + } } void RSP::sllv(u32 instr) { @@ -303,7 +367,24 @@ void RSP::mfc0(RDP& rdp, u32 instr) { gpr[RT(instr)] = GetCop0Reg(*this, rdp, RD(instr)); } -void RSP::mtc0(MI& mi, Registers& regs, RDP& rdp, u32 instr) { - SetCop0Reg(mi, regs, *this, rdp, RD(instr), gpr[RT(instr)]); +void RSP::mtc0(Registers& regs, Mem& mem, u32 instr) { + SetCop0Reg(regs, mem, RD(instr), gpr[RT(instr)]); +} + +void RSP::mfc2(u32 instr) { + u8 hi = vpr[RD(instr)].byte[BYTE_INDEX(E(instr))]; + u8 lo = vpr[RD(instr)].byte[BYTE_INDEX((E(instr) + 1) & 0xF)]; + s16 elem = (hi << 8) | lo; + gpr[RT(instr)] = s32(elem); +} + +void RSP::mtc2(u32 instr) { + u16 element = gpr[RT(instr)]; + u8 lo = element; + u8 hi = element >> 8; + vpr[RD(instr)].byte[BYTE_INDEX(E(instr))] = hi; + if(E(instr) < 15) { + vpr[RD(instr)].byte[BYTE_INDEX(E(instr) + 1)] = lo; + } } } \ No newline at end of file