diff --git a/src/backend/core/RSP.hpp b/src/backend/core/RSP.hpp index cd4bcb6a..2f6cca99 100644 --- a/src/backend/core/RSP.hpp +++ b/src/backend/core/RSP.hpp @@ -258,6 +258,7 @@ struct RSP { void llv(u32 instr); void lrv(u32 instr); void lqv(u32 instr); + void lfv(u32 instr); void lhv(u32 instr); void ltv(u32 instr); void lpv(u32 instr); @@ -306,6 +307,7 @@ struct RSP { void vcl(u32 instr); void vmacf(u32 instr); void vmacu(u32 instr); + void vmacq(u32 instr); void vmadh(u32 instr); void vmadl(u32 instr); void vmadm(u32 instr); @@ -313,6 +315,7 @@ struct RSP { void vmov(u32 instr); void vmulf(u32 instr); void vmulu(u32 instr); + void vmulq(u32 instr); void vmudl(u32 instr); void vmudh(u32 instr); void vmudm(u32 instr); @@ -326,6 +329,8 @@ struct RSP { void vrsq(u32 instr); void vrcpl(u32 instr); void vrsql(u32 instr); + void vrndp(u32 instr); + void vrndn(u32 instr); void vrcph(u32 instr); void vsar(u32 instr); void vsub(u32 instr); @@ -334,6 +339,7 @@ struct RSP { void vnxor(u32 instr); void vor(u32 instr); void vnor(u32 instr); + void vzero(u32 instr); void mfc0(RDP& rdp, u32 instr); void mtc0(Registers& regs, Mem& mem, u32 instr); void mfc2(u32 instr); diff --git a/src/backend/core/dynarec/cop/cop1instructions.cpp b/src/backend/core/dynarec/cop/cop1instructions.cpp index 4d93d82b..5ee73242 100644 --- a/src/backend/core/dynarec/cop/cop1instructions.cpp +++ b/src/backend/core/dynarec/cop/cop1instructions.cpp @@ -90,7 +90,7 @@ void ceilwd(n64::Registers& regs, u32 instr) { } void cfc1(n64::Registers& regs, u32 instr) { - u8 fd = FD(instr); + u8 fd = RD(instr); s32 val = 0; switch(fd) { case 0: val = regs.cop1.fcr0; break; diff --git a/src/backend/core/interpreter/cop/cop1instructions.cpp b/src/backend/core/interpreter/cop/cop1instructions.cpp index c5cc1096..2b4936b0 100644 --- a/src/backend/core/interpreter/cop/cop1instructions.cpp +++ b/src/backend/core/interpreter/cop/cop1instructions.cpp @@ -90,18 +90,20 @@ void Cop1::ceilwd(Registers& regs, u32 instr) { } void Cop1::cfc1(Registers& regs, u32 instr) const { - u8 fd = FD(instr); + u8 fd = RD(instr); s32 val = 0; switch(fd) { case 0: val = fcr0; break; - case 31: val = fcr31.raw; break; + case 31: + val = fcr31.raw; + break; default: Util::panic("Undefined CFC1 with rd != 0 or 31\n"); } regs.gpr[RT(instr)] = val; } void Cop1::ctc1(Registers& regs, u32 instr) { - u8 fs = FS(instr); + u8 fs = RD(instr); u32 val = regs.gpr[RT(instr)]; switch(fs) { case 0: break; diff --git a/src/backend/core/registers/Cop0.cpp b/src/backend/core/registers/Cop0.cpp index 6270eec6..71e174e5 100644 --- a/src/backend/core/registers/Cop0.cpp +++ b/src/backend/core/registers/Cop0.cpp @@ -10,7 +10,7 @@ Cop0::Cop0() { void Cop0::Reset() { cause.raw = 0xB000007C; - status.raw = 0x241000E0; + status.raw = 0x34000000; PRId = 0x00000B22; Config = 0x7006E463; EPC = 0xFFFFFFFFFFFFFFFF; diff --git a/src/backend/core/registers/Cop1.cpp b/src/backend/core/registers/Cop1.cpp index 25efe05d..91fe3c46 100644 --- a/src/backend/core/registers/Cop1.cpp +++ b/src/backend/core/registers/Cop1.cpp @@ -10,7 +10,7 @@ Cop1::Cop1() { void Cop1::Reset() { fcr0 = 0; - fcr31.raw = 0; + fcr31.raw = 0x01000800; memset(fgr, 0, 32 * sizeof(FGR)); } diff --git a/src/backend/core/rsp/decode.cpp b/src/backend/core/rsp/decode.cpp index 77432313..8b02fa1c 100644 --- a/src/backend/core/rsp/decode.cpp +++ b/src/backend/core/rsp/decode.cpp @@ -70,6 +70,8 @@ inline void lwc2(RSP& rsp, u32 instr) { case 0x06: rsp.lpv(instr); break; case 0x07: rsp.luv(instr); break; case 0x08: rsp.lhv(instr); break; + case 0x09: rsp.lfv(instr); break; + case 0x0A: break; case 0x0B: rsp.ltv(instr); break; default: Util::panic("Unhandled RSP LWC2 {:05b}\n", mask); } @@ -114,22 +116,36 @@ inline void cop2(RSP& rsp, u32 instr) { } break; case 0x01: rsp.vmulu(instr); break; + case 0x02: rsp.vrndp(instr); break; + case 0x03: rsp.vmulq(instr); break; case 0x04: rsp.vmudl(instr); break; case 0x05: rsp.vmudm(instr); break; case 0x06: rsp.vmudn(instr); break; case 0x07: rsp.vmudh(instr); break; case 0x08: rsp.vmacf(instr); break; case 0x09: rsp.vmacu(instr); break; + case 0x0A: rsp.vrndn(instr); break; + case 0x0B: rsp.vmacq(instr); break; case 0x0C: rsp.vmadl(instr); break; case 0x0D: rsp.vmadm(instr); break; case 0x0E: rsp.vmadn(instr); break; case 0x0F: rsp.vmadh(instr); break; case 0x10: rsp.vadd(instr); break; case 0x11: rsp.vsub(instr); break; + case 0x12: rsp.vzero(instr); break; case 0x13: rsp.vabs(instr); break; case 0x14: rsp.vaddc(instr); break; case 0x15: rsp.vsubc(instr); break; + case 0x16: rsp.vzero(instr); break; + case 0x17: rsp.vzero(instr); break; + case 0x18: rsp.vzero(instr); break; + case 0x19: rsp.vzero(instr); break; + case 0x1A: rsp.vzero(instr); break; + case 0x1B: rsp.vzero(instr); break; + case 0x1C: rsp.vzero(instr); break; case 0x1D: rsp.vsar(instr); break; + case 0x1E: rsp.vzero(instr); break; + case 0x1F: rsp.vzero(instr); break; case 0x20: rsp.vlt(instr); break; case 0x21: rsp.veq(instr); break; case 0x22: rsp.vne(instr); break; @@ -144,6 +160,8 @@ inline void cop2(RSP& rsp, u32 instr) { case 0x2B: rsp.vnor(instr); break; case 0x2C: rsp.vxor(instr); break; case 0x2D: rsp.vnxor(instr); break; + case 0x2E: rsp.vzero(instr); break; + case 0x2F: rsp.vzero(instr); break; case 0x31: rsp.vrcpl(instr); break; case 0x35: rsp.vrsql(instr); break; case 0x32: case 0x36: @@ -152,6 +170,7 @@ inline void cop2(RSP& rsp, u32 instr) { case 0x30: rsp.vrcp(instr); break; case 0x33: rsp.vmov(instr); break; case 0x34: rsp.vrsq(instr); break; + case 0x38 ... 0x3E: rsp.vzero(instr); break; case 0x37: case 0x3F: break; default: Util::panic("Unhandled RSP COP2 ({:06b})\n", mask); } diff --git a/src/backend/core/rsp/instructions.cpp b/src/backend/core/rsp/instructions.cpp index d815db5f..0d3e9ce6 100644 --- a/src/backend/core/rsp/instructions.cpp +++ b/src/backend/core/rsp/instructions.cpp @@ -455,6 +455,27 @@ void RSP::lhv(u32 instr) { } } +void RSP::lfv(u32 instr) { + VPR& vt = vpr[VT(instr)]; + int start = E1(instr); + u32 address = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); + u32 base = (address & 7) - start; + address &= ~7; + + int end = std::min(start + 8, 16); + + // TODO: should be possible to do with one loop + VPR tmp; + for (u32 offset = 0; offset < 4; offset++) { + tmp.element[ELEMENT_INDEX(offset + 0)] = ReadByte(address + (base + offset * 4 + 0 & 15)) << 7; + tmp.element[ELEMENT_INDEX(offset + 4)] = ReadByte(address + (base + offset * 4 + 8 & 15)) << 7; + } + + for (u32 offset = start; offset < end; offset++) { + vt.byte[BYTE_INDEX(offset)] = tmp.byte[BYTE_INDEX(offset)]; + } +} + void RSP::lrv(u32 instr) { u32 address = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); int e = E1(instr); @@ -885,6 +906,24 @@ void RSP::vmulf(u32 instr) { } } +void RSP::vmulq(u32 instr) { + VPR& vs = vpr[VS(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + VPR& vd = vpr[VD(instr)]; + + for(int i = 0; i < 8; i++) { + s32 product = vs.selement[i] * vte.selement[i]; + if(product < 0) { + product += 31; + } + + acc.h.element[i] = product >> 16; + acc.m.element[i] = product; + acc.l.element[i] = 0; + vd.element[i] = signedClamp(product >> 1) & ~15; + } +} + void RSP::vmulu(u32 instr) { int e = E2(instr); VPR& vs = vpr[VS(instr)]; @@ -1126,6 +1165,23 @@ void RSP::vmacu(u32 instr) { } } +void RSP::vmacq(u32 instr) { + VPR& vd = vpr[VD(instr)]; + + for(int i = 0; i < 8; i++) { + s32 product = acc.h.element[i] << 16 | acc.m.element[i]; + if(product < 0 && !(product & 1 << 5)) { + product += 32; + } else if(product >= 32 && !(product & 1 << 5)) { + product -= 32; + } + acc.h.element[i] = product >> 16; + acc.m.element[i] = product & 0xFFFF; + + vd.element[i] = signedClamp(product >> 1) & ~15; + } +} + void RSP::veq(u32 instr) { int e = E2(instr); VPR& vd = vpr[VD(instr)]; @@ -1291,6 +1347,75 @@ void RSP::vrsq(u32 instr) { } } +// from nall, in ares +static inline s64 sclip(s64 x, u32 bits) { + u64 b = 1ull << (bits - 1); + u64 m = b * 2 - 1; + return ((x & m) ^ b) - b; +} + +void RSP::vrndn(u32 instr) { + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + + for(int i = 0; i < 8; i++) { + s32 product = (s16)vte.selement[i]; + + if(VS(instr) & 1) { + product <<= 16; + } + + s64 accum = 0; + accum |= acc.h.element[i]; + accum <<= 16; + accum |= acc.m.element[i]; + accum <<= 16; + accum |= acc.l.element[i]; + accum <<= 16; + accum >>= 16; + + if(accum < 0) { + accum = sclip(accum + product, 48); + } + + acc.h.element[i] = accum >> 32; + acc.m.element[i] = accum >> 16; + acc.l.element[i] = accum >> 0; + vd.element[i] = signedClamp(accum >> 16); + } +} + +void RSP::vrndp(u32 instr) { + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + + for(int i = 0; i < 8; i++) { + s32 product = (s16)vte.selement[i]; + + if(VS(instr) & 1) { + product <<= 16; + } + + s64 accum = 0; + accum |= acc.h.element[i]; + accum <<= 16; + accum |= acc.m.element[i]; + accum <<= 16; + accum |= acc.l.element[i]; + accum <<= 16; + accum >>= 16; + + if(accum >= 0) { + accum = sclip(accum + product, 48); + } + + acc.h.element[i] = accum >> 32; + acc.m.element[i] = accum >> 16; + acc.l.element[i] = accum >> 0; + vd.element[i] = signedClamp(accum >> 16); + } +} + void RSP::vrsql(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vt = vpr[VT(instr)]; @@ -1476,6 +1601,18 @@ void RSP::vor(u32 instr) { } } +void RSP::vzero(u32 instr) { + VPR& vs = vpr[VS(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + VPR& vd = vpr[VD(instr)]; + + for(int i = 0; i < 8; i++) { + acc.l.element[i] = vte.element[i] + vs.element[i]; + } + + memset(&vd, 0, sizeof(VPR)); +} + void RSP::mfc0(RDP& rdp, u32 instr) { gpr[RT(instr)] = GetCop0Reg(*this, rdp, RD(instr)); }