From 80c8718fef8974db517fd0f363e6075589d19f45 Mon Sep 17 00:00:00 2001 From: Simone Coco Date: Wed, 5 Oct 2022 11:22:04 +0200 Subject: [PATCH] SUPER MARIO 64 BOOTS, works until peach letter --- src/frontend/imgui/Window.hpp | 2 +- src/n64/core/RSP.hpp | 14 +- src/n64/core/rsp/decode.cpp | 25 ++- src/n64/core/rsp/instructions.cpp | 302 +++++++++++++++++++++++++++++- 4 files changed, 328 insertions(+), 15 deletions(-) diff --git a/src/frontend/imgui/Window.hpp b/src/frontend/imgui/Window.hpp index f7b4218a..3b7b79a8 100644 --- a/src/frontend/imgui/Window.hpp +++ b/src/frontend/imgui/Window.hpp @@ -15,7 +15,7 @@ struct Window { [[nodiscard]] bool gotClosed(SDL_Event event); ImFont *uiFont, *codeFont; u32 windowID; - float volumeL = 0.5, volumeR = 0.5; + float volumeL = 0.001, volumeR = 0.001; void LoadROM(n64::Core& core, const std::string& path); private: bool lockVolume = true; diff --git a/src/n64/core/RSP.hpp b/src/n64/core/RSP.hpp index 8f594ccf..a7a37bf0 100644 --- a/src/n64/core/RSP.hpp +++ b/src/n64/core/RSP.hpp @@ -127,7 +127,7 @@ struct RSP { VPR vpr[32]{}; s32 gpr[32]{}; VPR vce{}; - u16 divIn{}, divOut{}; + s16 divIn{}, divOut{}; bool divInLoaded = false; struct { @@ -340,11 +340,14 @@ struct RSP { void lbu(u32 instr); void lhu(u32 instr); void lui(u32 instr); + void luv(u32 instr); void ldv(u32 instr); void lsv(u32 instr); void llv(u32 instr); void lrv(u32 instr); void lqv(u32 instr); + void ltv(u32 instr); + void lpv(u32 instr); void j(u32 instr); void jal(u32 instr); void jr(u32 instr); @@ -360,9 +363,12 @@ struct RSP { void sub(u32 instr); void sbv(u32 instr); void sdv(u32 instr); + void stv(u32 instr); void sqv(u32 instr); void ssv(u32 instr); + void suv(u32 instr); void slv(u32 instr); + void spv(u32 instr); void sllv(u32 instr); void srlv(u32 instr); void srav(u32 instr); @@ -375,8 +381,10 @@ struct RSP { void sltiu(u32 instr); void vabs(u32 instr); void vadd(u32 instr); + void vaddc(u32 instr); void vand(u32 instr); void vch(u32 instr); + void vcr(u32 instr); void vcl(u32 instr); void vmacf(u32 instr); void vmadh(u32 instr); @@ -393,12 +401,16 @@ struct RSP { void veq(u32 instr); void vne(u32 instr); void vge(u32 instr); + void vrcp(u32 instr); void vrcpl(u32 instr); + void vrsql(u32 instr); void vrcph(u32 instr); void vsar(u32 instr); void vsub(u32 instr); void vsubc(u32 instr); void vxor(u32 instr); + void vxnor(u32 instr); + void vnor(u32 instr); void vzero(u32 instr); void mfc0(RDP& rdp, u32 instr); void mtc0(Registers& regs, Mem& mem, u32 instr); diff --git a/src/n64/core/rsp/decode.cpp b/src/n64/core/rsp/decode.cpp index c82b4864..9eeacd1a 100644 --- a/src/n64/core/rsp/decode.cpp +++ b/src/n64/core/rsp/decode.cpp @@ -62,6 +62,9 @@ inline void lwc2(RSP& rsp, u32 instr) { case 0x03: rsp.ldv(instr); break; case 0x04: rsp.lqv(instr); break; case 0x05: rsp.lrv(instr); break; + case 0x06: rsp.lpv(instr); break; + case 0x07: rsp.luv(instr); break; + case 0x0B: rsp.ltv(instr); break; default: util::panic("Unhandled RSP LWC2 {:05b}\n", mask); } } @@ -74,6 +77,9 @@ inline void swc2(RSP& rsp, u32 instr) { case 0x02: rsp.ssv(instr); break; case 0x03: rsp.sdv(instr); break; case 0x04: rsp.sqv(instr); break; + case 0x06: rsp.spv(instr); break; + case 0x07: rsp.suv(instr); break; + case 0x0B: rsp.stv(instr); break; default: util::panic("Unhandled RSP SWC2 {:05b}\n", mask); } } @@ -88,7 +94,8 @@ inline void cop2(RSP& rsp, u32 instr) { case 0x02: rsp.cfc2(instr); break; case 0x04: rsp.mtc2(instr); break; case 0x06: rsp.ctc2(instr); break; - case 0x1E: rsp.vzero(instr); break; + case 0x10: case 0x1C: case 0x1E: + case 0x1F: case 0x14: break; default: util::panic("Unhandled RSP COP2 sub ({:05b})\n", mask_sub); } break; @@ -104,20 +111,28 @@ inline void cop2(RSP& rsp, u32 instr) { case 0x10: rsp.vadd(instr); break; case 0x11: rsp.vsub(instr); break; case 0x13: rsp.vabs(instr); break; + case 0x14: rsp.vaddc(instr); break; case 0x15: rsp.vsubc(instr); break; case 0x1D: rsp.vsar(instr); break; case 0x20: rsp.vlt(instr); break; - //case 0x21: rsp.veq(instr); break; - //case 0x22: rsp.vne(instr); break; + case 0x21: rsp.veq(instr); break; + case 0x22: rsp.vne(instr); break; case 0x23: rsp.vge(instr); break; case 0x24: rsp.vcl(instr); break; case 0x25: rsp.vch(instr); break; + case 0x26: rsp.vcr(instr); break; case 0x27: rsp.vmrg(instr); break; case 0x28: rsp.vand(instr); break; + case 0x2A: rsp.vnor(instr); break; case 0x2C: rsp.vxor(instr); break; + case 0x2D: rsp.vxnor(instr); break; case 0x31: rsp.vrcpl(instr); break; - case 0x32: rsp.vrcph(instr); break; - //case 0x33: rsp.vmov(instr); break; + case 0x35: rsp.vrsql(instr); break; + case 0x32: case 0x36: + rsp.vrcph(instr); + break; + case 0x30: rsp.vrcp(instr); break; + case 0x33: rsp.vmov(instr); break; default: util::panic("Unhandled RSP COP2 ({:06b})\n", mask); } } diff --git a/src/n64/core/rsp/instructions.cpp b/src/n64/core/rsp/instructions.cpp index 8bba8cad..06a2751a 100644 --- a/src/n64/core/rsp/instructions.cpp +++ b/src/n64/core/rsp/instructions.cpp @@ -229,6 +229,54 @@ void RSP::lrv(u32 instr) { } } +void RSP::lpv(u32 instr) { + int e = E1(instr); + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); + + int addrOffset = addr & 7; + addr &= ~7; + + for(int elem = 0; elem < 8; elem++) { + int elemOffset = (16 - e + (elem + addrOffset)) & 0xF; + + u16 value = ReadByte(addr + elemOffset); + value <<= 8; + vpr[VT(instr)].element[ELEMENT_INDEX(elem)] = value; + } +} + +void RSP::luv(u32 instr) { + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); + + int e = E1(instr); + + int addrOffset = addr & 7; + addr &= ~7; + + for (int elem = 0; elem < 8; elem++) { + int elemOffset = (16 - e + (elem + addrOffset)) & 0xF; + + u16 value = ReadByte(addr + elemOffset); + value <<= 7; + vpr[VT(instr)].element[ELEMENT_INDEX(elem)] = value; + } +} + +void RSP::suv(u32 instr) { + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); + + int start = E1(instr); + int end = start + 8; + + for (int offset = start; offset < end; offset++) { + if((offset & 15) < 8) { + WriteByte(addr++, vpr[VT(instr)].element[ELEMENT_INDEX(offset & 7)] >> 7); + } else { + WriteByte(addr++, vpr[VT(instr)].byte[BYTE_INDEX((offset & 7) << 1)]); + } + } +} + void RSP::ldv(u32 instr) { int e = E1(instr); u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); @@ -338,6 +386,22 @@ void RSP::sqv(u32 instr) { } } +void RSP::spv(u32 instr) { + int e = E1(instr); + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); + + int start = E1(instr); + int end = start + 8; + + for(int offset = start; offset < end; offset++) { + if((offset & 15) < 8) { + WriteByte(addr++, vpr[VT(instr)].byte[BYTE_INDEX((offset & 7) << 1)]); + } else { + WriteByte(addr++, vpr[VT(instr)].byte[ELEMENT_INDEX(offset & 7)] >> 7); + } + } +} + void RSP::sbv(u32 instr) { int e = E1(instr); u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); @@ -374,6 +438,47 @@ void RSP::slv(u32 instr) { } } +void RSP::stv(u32 instr) { + u32 base = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); + u32 addrOffset = base & 0x7; + base &= ~0x7; + + u8 e = E1(instr) >> 1; + + for (int i = 0; i < 8; i++) { + u32 address = base; + u32 offset = (i << 1) + addrOffset; + + int reg = (VT(instr) & 0x18) | ((i + e) & 0x7); + + u16 val = vpr[reg].element[ELEMENT_INDEX(i & 0x7)]; + u16 hi = (val >> 8) & 0xFF; + u16 lo = (val >> 0) & 0xFF; + + WriteByte(address + ((offset + 0) & 0xF), hi); + WriteByte(address + ((offset + 1) & 0xF), lo); + } +} + +void RSP::ltv(u32 instr) { + u32 base = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); + base &= ~0x7; + + u8 e = E1(instr); + + for (int i = 0; i < 8; i++) { + u32 address = base; + u32 offset = (i << 1) + e + (base & 8); + + u16 hi = ReadByte(address + (offset & 0xF)); + u16 lo = ReadByte(address + ((offset + 1) & 0xF)); + + int reg = (VT(instr) & 0x18) | ((i + (e >> 1)) & 0x7); + + vpr[reg].element[ELEMENT_INDEX(i & 0x7)] = (hi << 8) | lo; + } +} + void RSP::sllv(u32 instr) { u8 sa = (gpr[RS(instr)]) & 0x1F; u32 rt = gpr[RT(instr)]; @@ -464,9 +569,7 @@ void RSP::vadd(u32 instr) { VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { - s16 vsE = vs.selement[i]; - s16 vteE = vte.selement[i]; - s32 result = vsE + vteE + (vco.l.element[i] != 0); + s32 result = vs.selement[i] + vte.selement[i] + (vco.l.element[i] != 0); acc.l.element[i] = result; vd.element[i] = clamp_signed(result); vco.l.element[i] = 0; @@ -474,6 +577,20 @@ void RSP::vadd(u32 instr) { } } +void RSP::vaddc(u32 instr) { + VPR& vs = vpr[VS(instr)]; + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + + for(int i = 0; i < 8; i++) { + s32 result = vs.selement[i] + vte.selement[i]; + acc.l.element[i] = result; + vd.element[i] = result; + vco.l.element[i] = (result >> 16) & 1 ? 0xffff : 0; + vco.h.element[i] = 0; + } +} + void RSP::vch(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -491,15 +608,15 @@ void RSP::vch(u32 instr) { vcc.l.element[i] = result <= 0 ? 0xffff : 0; vcc.h.element[i] = vteElem < 0 ? 0xffff : 0; vco.l.element[i] = 0xffff; - vco.h.element[i] = result != 0 && (u16)vsElem != ((u16)vteElem ^ 0xffff); - vce.element[i] = result == -1; + vco.h.element[i] = (result != 0 && (u16)vsElem != ((u16)vteElem ^ 0xffff)) ? 0xffff : 0; + vce.element[i] = result == -1 ? 0xffff : 0; } else { s16 result = vsElem - vteElem; acc.l.selement[i] = (result >= 0) ? vteElem : vsElem; vcc.l.element[i] = vteElem < 0 ? 0xffff : 0; vcc.h.element[i] = result >= 0 ? 0xffff : 0; vco.l.element[i] = 0; - vco.h.element[i] = result != 0 && (u16)vsElem != ((u16)vteElem ^ 0xffff); + vco.h.element[i] = (result != 0 && (u16)vsElem != ((u16)vteElem ^ 0xffff)) ? 0xffff : 0; vce.element[i] = 0; } @@ -507,6 +624,38 @@ void RSP::vch(u32 instr) { } } +void RSP::vcr(u32 instr) { + int e = E2(instr); + VPR& vs = vpr[VS(instr)]; + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], e); + + for(int i = 0; i < 8; i++) { + u16 vsE = vs.element[i]; + u16 vteE = vte.element[i]; + + bool signDiff = (0x8000 & (vsE ^ vteE)) == 0x8000; + + u16 vtAbs = signDiff ? ~vteE : vteE; + + bool gte = s16(vteE) <= s16(signDiff ? 0xffff : vsE); + bool lte = (((signDiff ? vsE : 0) + vteE) & 0x8000) == 0x8000; + + bool check = signDiff ? lte : gte; + u16 result = check ? vtAbs : vsE; + + acc.l.element[i] = result; + vd.element[i] = result; + + vcc.h.element[i] = gte ? 0xffff : 0; + vcc.l.element[i] = lte ? 0xffff : 0; + + vco.l.element[i] = 0; + vco.h.element[i] = 0; + vce.element[i] = 0; + } +} + void RSP::vcl(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -546,7 +695,35 @@ void RSP::vcl(u32 instr) { } void RSP::vmov(u32 instr) { + int e = E2(instr); + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], e); + u32 se; + + switch (e) { + case 0 ... 1: + se = VS(instr) & 7; + break; + case 2 ... 3: + se = (e & 1) | (VS(instr) & 6); + break; + case 4 ... 7: + se = (e & 3) | (VS(instr) & 4); + break; + case 8 ... 15: + se = e & 7; + break; + default: + util::panic("VMOV: This should be unreachable!\n"); + } + + u8 de = VS(instr) & 7; + + vd.element[ELEMENT_INDEX(de)] = vte.element[ELEMENT_INDEX(se)]; + for(int i = 0; i < 8; i++) { + acc.l.element[i] = vte.element[i]; + } } inline bool IsSignExtension(s16 hi, s16 lo) { @@ -738,11 +915,31 @@ void RSP::vmacf(u32 instr) { } void RSP::veq(u32 instr) { + int e = E2(instr); + VPR& vd = vpr[VD(instr)]; + VPR& vs = vpr[VS(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], e); + for(int i = 0; i < 8; i++) { + vcc.l.element[i] = vco.h.element[i] || (vs.element[i] == vte.element[i]) ? 0xffff : 0; + acc.l.element[i] = vcc.l.element[i] ? vs.element[i] : vte.element[i]; + vd.element[i] = acc.l.element[i]; + vcc.h.element[i] = vco.h.element[i] = vco.l.element[i] = 0; + } } void RSP::vne(u32 instr) { + int e = E2(instr); + VPR& vd = vpr[VD(instr)]; + VPR& vs = vpr[VS(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], e); + for(int i = 0; i < 8; i++) { + vcc.l.element[i] = vco.h.element[i] || (vs.element[i] != vte.element[i]) ? 0xffff : 0; + acc.l.element[i] = vcc.l.element[i] ? vs.element[i] : vte.element[i]; + vd.element[i] = acc.l.element[i]; + vcc.h.element[i] = vco.h.element[i] = vco.l.element[i] = 0; + } } void RSP::vge(u32 instr) { @@ -754,7 +951,7 @@ void RSP::vge(u32 instr) { for(int i = 0; i < 8; i++) { bool eql = vs.element[i] == vte.element[i]; bool neg = !(vco.h.element[i] & vco.l.element[i]) & eql; - vcc.l.element[i] = (neg | (vs.element[i] > vte.element[i])) ? 0xffff : 0; + vcc.l.element[i] = (neg || (vs.element[i] > vte.element[i])) ? 0xffff : 0; acc.l.element[i] = vcc.l.element[i] ? vs.element[i] : vte.element[i]; vd.element[i] = acc.l.element[i]; vcc.h.element[i] = vco.h.element[i] = vco.l.element[i] = 0; @@ -770,7 +967,7 @@ void RSP::vlt(u32 instr) { for(int i = 0; i < 8; i++) { bool eql = vs.element[i] == vte.element[i]; bool neg = vco.h.element[i] & vco.l.element[i] & eql; - vcc.l.element[i] = (neg | (vs.element[i] < vte.element[i])) ? 0xffff : 0; + vcc.l.element[i] = (neg || (vs.element[i] < vte.element[i])) ? 0xffff : 0; acc.l.element[i] = vcc.l.element[i] ? vs.element[i] : vte.element[i]; vd.element[i] = acc.l.element[i]; vcc.h.element[i] = vco.h.element[i] = vco.l.element[i] = 0; @@ -799,6 +996,29 @@ inline u32 rcp(s32 sinput) { return result; } +inline u32 rsq(s32 input) { + if (input == 0) { + return 0x7FFFFFFF; + } else if (input == 0xFFFF8000) { + return 0xFFFF0000; + } else if (input > 0xFFFF8000) { + input--; + } + + s32 sinput = input; + s32 mask = sinput >> 31; + input ^= mask; + + int shift = __builtin_clz(input) + 1; + + int index = (((input << shift) >> 24) | ((shift & 1) << 8)); + u32 rom = (((u32)rsq_rom[index]) << 14); + int r_shift = ((32 - shift) >> 1); + u32 result = (0x40000000 | rom) >> r_shift; + + return result ^ mask; +} + void RSP::vrcpl(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vt = vpr[VT(instr)]; @@ -825,6 +1045,48 @@ void RSP::vrcpl(u32 instr) { vd.element[ELEMENT_INDEX(de)] = result; } +void RSP::vrcp(u32 instr) { + VPR& vd = vpr[VD(instr)]; + VPR& vt = vpr[VT(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + int e = E2(instr) & 7; + int de = DE(instr) & 7; + s32 input = vt.selement[ELEMENT_INDEX(e)]; + s32 result = rcp(input); + vd.element[ELEMENT_INDEX(de)] = result; + divOut = result >> 16; + divInLoaded = false; + + for (int i = 0; i < 8; i++) { + acc.l.element[i] = vte.element[i]; + } +} + +void RSP::vrsql(u32 instr) { + VPR& vd = vpr[VD(instr)]; + VPR& vt = vpr[VT(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + int e = E2(instr) & 7; + int de = DE(instr) & 7; + + s32 input; + if(divInLoaded) { + input = (divIn << 16) | vt.element[ELEMENT_INDEX(e)]; + } else { + input = vt.selement[ELEMENT_INDEX(e)]; + } + + u32 result = rsq(input); + divOut = result >> 16; + divInLoaded = false; + + for(int i = 0; i < 8; i++) { + acc.l.element[i] = vte.element[i]; + } + + vd.element[ELEMENT_INDEX(de)] = result; +} + void RSP::vrcph(u32 instr) { int e = E2(instr); int de = DE(instr); @@ -923,6 +1185,18 @@ void RSP::vxor(u32 instr) { } } +void RSP::vxnor(u32 instr) { + int e = E2(instr); + VPR& vs = vpr[VS(instr)]; + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], e); + + for(int i = 0; i < 8; i++) { + acc.l.element[i] = ~(vte.element[i] ^ vs.element[i]); + vd.element[i] = acc.l.element[i]; + } +} + void RSP::vand(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -935,6 +1209,18 @@ void RSP::vand(u32 instr) { } } +void RSP::vnor(u32 instr) { + int e = E2(instr); + VPR& vs = vpr[VS(instr)]; + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], e); + + for(int i = 0; i < 8; i++) { + acc.l.element[i] = ~(vte.element[i] | vs.element[i]); + vd.element[i] = acc.l.element[i]; + } +} + void RSP::vzero(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)];