diff --git a/src/n64/core/RSP.hpp b/src/n64/core/RSP.hpp index 9490160f..26aa1c04 100644 --- a/src/n64/core/RSP.hpp +++ b/src/n64/core/RSP.hpp @@ -142,6 +142,22 @@ struct RSP { nextPC = val += 4; } + inline s64 GetACC(int e) { + s64 val = s64(acc.h.element[e]) << 32; + val |= s64(acc.m.element[e]) << 16; + val |= s64(acc.l.element[e]); + if((val & 0x800000000000) != 0) { + val |= 0xFFFF000000000000; + } + return val; + } + + inline void SetACC(int e, s64 val) { + acc.h.element[e] = val >> 32; + acc.m.element[e] = val >> 16; + acc.l.element[e] = val; + } + inline u16 VCOasU16() { u16 val = 0; for(int i = 0; i < 8; i++) { @@ -296,6 +312,7 @@ struct RSP { void lbu(u32 instr); void lhu(u32 instr); void lui(u32 instr); + void ldv(u32 instr); void lqv(u32 instr); void j(u32 instr); void jal(u32 instr); @@ -311,6 +328,8 @@ struct RSP { void sw(u32 instr); void sub(u32 instr); void sqv(u32 instr); + void sdv(u32 instr); + void ssv(u32 instr); void sllv(u32 instr); void srlv(u32 instr); void srav(u32 instr); @@ -323,9 +342,11 @@ struct RSP { void sltiu(u32 instr); void vabs(u32 instr); void vmov(u32 instr); + void vmacf(u32 instr); void veq(u32 instr); void vne(u32 instr); void vsar(u32 instr); + void vzero(u32 instr); void mfc0(RDP& rdp, u32 instr); void mtc0(Registers& regs, Mem& mem, u32 instr); void mfc2(u32 instr); diff --git a/src/n64/core/rsp/decode.cpp b/src/n64/core/rsp/decode.cpp index 4d9d3512..d70aa9a7 100644 --- a/src/n64/core/rsp/decode.cpp +++ b/src/n64/core/rsp/decode.cpp @@ -58,6 +58,7 @@ inline void regimm(RSP& rsp, u32 instr) { inline void lwc2(RSP& rsp, u32 instr) { u8 mask = (instr >> 11) & 0x1F; switch(mask) { + case 0x03: rsp.ldv(instr); break; case 0x04: rsp.lqv(instr); break; default: util::panic("Unhandled RSP LWC2 {:06b}\n", mask); } @@ -66,6 +67,8 @@ inline void lwc2(RSP& rsp, u32 instr) { inline void swc2(RSP& rsp, u32 instr) { u8 mask = (instr >> 11) & 0x1F; switch(mask) { + case 0x01: rsp.ssv(instr); break; + case 0x03: rsp.sdv(instr); break; case 0x04: rsp.sqv(instr); break; default: util::panic("Unhandled RSP SWC2 {:06b}\n", mask); } @@ -81,9 +84,11 @@ inline void cop2(RSP& rsp, u32 instr) { case 0x02: rsp.cfc2(instr); break; case 0x04: rsp.mtc2(instr); break; case 0x06: rsp.ctc2(instr); break; - default: util::panic("Unhandled RSP COP2 sub ({:06b})\n", mask_sub); + case 0x1E: rsp.vzero(instr); break; + default: util::panic("Unhandled RSP COP2 sub ({:05b})\n", mask_sub); } break; + case 0x08: rsp.vmacf(instr); break; //case 0x13: rsp.vabs(instr); break; //case 0x1D: rsp.vsar(instr); break; //case 0x21: rsp.veq(instr); break; diff --git a/src/n64/core/rsp/instructions.cpp b/src/n64/core/rsp/instructions.cpp index d2a66f18..ea56de1d 100644 --- a/src/n64/core/rsp/instructions.cpp +++ b/src/n64/core/rsp/instructions.cpp @@ -205,9 +205,11 @@ void RSP::lui(u32 instr) { gpr[RT(instr)] = imm; } +#define OFFSET(x) ((x) & 0x7F) + void RSP::lqv(u32 instr) { int e = E(instr); - u32 addr = gpr[BASE(instr)] + SignExt7bit(instr & 0x7F, 4); + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); u32 end = ((addr & ~15) + 15); for(int i = 0; addr + i <= end && i + e < 16; i++) { @@ -215,6 +217,17 @@ void RSP::lqv(u32 instr) { } } +void RSP::ldv(u32 instr) { + int e = E(instr); + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); + u32 end = e + 8 > 16 ? 16 : e + 8; + + for(int i = e; i < end; i++) { + vpr[VT(instr)].byte[BYTE_INDEX(i)] = ReadByte(addr); + addr++; + } +} + void RSP::j(u32 instr) { u32 target = (instr & 0x3ffffff) << 2; nextPC = target; @@ -281,7 +294,7 @@ void RSP::sub(u32 instr) { void RSP::sqv(u32 instr) { int e = E(instr); - u32 addr = gpr[BASE(instr)] + SignExt7bit(instr & 0x7F, 4); + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); u32 end = ((addr & ~15) + 15); for(int i = 0; addr + i <= end; i++) { @@ -289,6 +302,26 @@ void RSP::sqv(u32 instr) { } } +void RSP::sdv(u32 instr) { + int e = E(instr); + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); + + for(int i = 0; i < 8; i++) { + WriteByte(addr + i, vpr[VT(instr)].byte[BYTE_INDEX((i + e) & 0xF)]); + } +} + +void RSP::ssv(u32 instr) { + int e = E(instr); + u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 1); + + u8 hi = vpr[VT(instr)].byte[BYTE_INDEX(e & 15)]; + u8 lo = vpr[VT(instr)].byte[BYTE_INDEX((e + 1) & 15)]; + u16 val = (u16)hi << 8 | lo; + + WriteHalf(addr, val); +} + void RSP::sllv(u32 instr) { u8 sa = (gpr[RS(instr)]) & 0x1F; u32 rt = gpr[RT(instr)]; @@ -343,6 +376,12 @@ void RSP::sltiu(u32 instr) { gpr[RT(instr)] = (u32)gpr[RS(instr)] < imm; } +inline s16 clamp_signed(s64 val) { + if(val > 32767) return 32767; + if(val < -32768) return -32768; + return val; +} + void RSP::vabs(u32 instr) { } @@ -351,6 +390,22 @@ void RSP::vmov(u32 instr) { } +void RSP::vmacf(u32 instr) { + VPR& vd = vpr[VD(instr)]; + VPR& vs = vpr[VS(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E(instr)); + + for(int i = 0; i < 8; i++) { + s32 prod = vs.selement[i] * vte.selement[i]; + s64 accDelta = prod * 2; + s64 accum = GetACC(i) + accDelta; + SetACC(i, accum); + + s16 result = clamp_signed(accum >> 16); + vd.element[i] = result; + } +} + void RSP::veq(u32 instr) { } @@ -363,6 +418,18 @@ void RSP::vsar(u32 instr) { } +void RSP::vzero(u32 instr) { + VPR& vs = vpr[VS(instr)]; + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E(instr)); + + for(int i = 0; i < 8; i++) { + acc.l.element[i] = vte.element[i] + vs.element[i]; + } + + memset(&vd, 0, sizeof(VPR)); +} + void RSP::mfc0(RDP& rdp, u32 instr) { gpr[RT(instr)] = GetCop0Reg(*this, rdp, RD(instr)); }