#include #include #include #include namespace n64 { inline bool AcquireSemaphore(RSP& rsp) { if(rsp.semaphore) { return true; } else { rsp.semaphore = true; return false; } } inline void ReleaseSemaphore(RSP& rsp) { rsp.semaphore = false; } inline int SignExt7bit(u8 val, int sa) { s8 sval = ((val << 1) & 0x80) | val; s32 sval32 = sval; u32 val32 = sval32; return val32 << sa; } inline auto GetCop0Reg(RSP& rsp, RDP& rdp, u8 index) -> u32{ switch(index) { case 0: return rsp.lastSuccessfulSPAddr.raw; case 1: return rsp.lastSuccessfulDRAMAddr.raw; case 2: case 3: return rsp.spDMALen.raw; case 4: return rsp.spStatus.raw; case 5: return rsp.spStatus.dmaFull; case 6: return rsp.spStatus.dmaBusy; case 7: return AcquireSemaphore(rsp); case 9: return rdp.dpc.end; case 10: return rdp.dpc.current; case 11: return rdp.dpc.status.raw; case 12: return 0; default: util::panic("Unhandled RSP COP0 register read at index {}\n", index); } return 0; } inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) { MMIO& mmio = mem.mmio; RSP& rsp = mmio.rsp; RDP& rdp = mmio.rdp; MI& mi = mmio.mi; switch(index) { case 0: rsp.spDMASPAddr.raw = val; break; case 1: rsp.spDMADRAMAddr.raw = val; break; case 2: rsp.spDMALen.raw = val; rsp.DMA(rsp.spDMALen, mem.GetRDRAM(), rsp, rsp.spDMASPAddr.bank); break; case 3: rsp.spDMALen.raw = val; rsp.DMA(rsp.spDMALen, mem.GetRDRAM(), rsp, rsp.spDMASPAddr.bank); break; case 4: rsp.WriteStatus(mi, regs, val); break; case 7: if(val == 0) { ReleaseSemaphore(rsp); } break; case 8: rdp.dpc.start = val & 0xFFFFF8; rdp.dpc.current = rdp.dpc.start; break; case 9: rdp.dpc.end = val & 0xFFFFF8; rdp.RunCommand(mi, regs, rsp); break; case 11: rdp.StatusWrite(mi, regs, rsp, val); break; default: util::panic("Unhandled RSP COP0 register write at index {}\n", index); } } inline VPR Broadcast(VPR vt, int l0, int l1, int l2, int l3, int l4, int l5, int l6, int l7) { VPR vte{}; vte.element[ELEMENT_INDEX(0)] = vt.element[l0]; vte.element[ELEMENT_INDEX(1)] = vt.element[l1]; vte.element[ELEMENT_INDEX(2)] = vt.element[l2]; vte.element[ELEMENT_INDEX(3)] = vt.element[l3]; vte.element[ELEMENT_INDEX(4)] = vt.element[l4]; vte.element[ELEMENT_INDEX(5)] = vt.element[l5]; vte.element[ELEMENT_INDEX(6)] = vt.element[l6]; vte.element[ELEMENT_INDEX(7)] = vt.element[l7]; return vte; } inline VPR GetVTE(VPR vt, u8 e) { VPR vte{}; switch(e & 0xf) { case 0 ... 1: return vt; case 2 ... 3: vte = Broadcast(vt, e - 2, e - 2, e, e, e + 2, e + 2, e + 4, e + 4); break; case 4 ... 7: vte = Broadcast(vt, e - 4, e - 4, e - 4, e - 4, e, e, e, e); break; case 8 ... 15: { int index = ELEMENT_INDEX(e - 8); for (u16& vteE : vte.element) { vteE = vt.element[index]; } } break; } return vte; } void RSP::add(u32 instr) { gpr[RD(instr)] = gpr[RS(instr)] + gpr[RT(instr)]; } void RSP::addi(u32 instr) { s32 op1 = gpr[RS(instr)]; s16 op2 = instr; s32 result = op1 + op2; gpr[RT(instr)] = result; } void RSP::and_(u32 instr) { gpr[RD(instr)] = gpr[RT(instr)] & gpr[RS(instr)]; } void RSP::andi(u32 instr) { gpr[RT(instr)] = gpr[RS(instr)] & (u16)instr; } void RSP::cfc2(u32 instr) { s16 value = 0; switch(RD(instr) & 3) { case 0: value = VCOasU16(); break; case 1: value = VCCasU16(); break; case 2 ... 3: value = GetVCE(); break; } gpr[RT(instr)] = s32(value); } void RSP::ctc2(u32 instr) { u16 value = gpr[RT(instr)]; switch(RD(instr) & 3) { case 0: for(int i = 0; i < 8; i++) { vco.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; vco.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; } break; case 1: for(int i = 0; i < 8; i++) { vcc.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; vcc.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; } break; case 2: case 3: for(int i = 0; i < 8; i++) { vce.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; } break; } } void RSP::b(u32 instr, bool cond) { s32 address = ((s32)((s16)(instr & 0xFFFF) << 2)) + pc; branch(address, cond); } void RSP::bl(u32 instr, bool cond) { b(instr, cond); gpr[31] = pc + 4; } void RSP::lb(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; gpr[RT(instr)] = (s32)(s8)ReadByte(address); } void RSP::lh(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; gpr[RT(instr)] = (s32)(s16)ReadHalf(address); } void RSP::lw(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; gpr[RT(instr)] = ReadWord(address); } void RSP::lbu(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; gpr[RT(instr)] = ReadByte(address); } void RSP::lhu(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; gpr[RT(instr)] = ReadHalf(address); } void RSP::lui(u32 instr) { u32 imm = ((u16)instr) << 16; gpr[RT(instr)] = imm; } #define OFFSET(x) ((x) & 0x7F) void RSP::lqv(u32 instr) { int e = E1(instr); u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); u32 end = ((addr & ~15) + 15); for(int i = 0; addr + i <= end && i + e < 16; i++) { vpr[VT(instr)].byte[BYTE_INDEX(i + e)] = ReadByte(addr + i); } } void RSP::ldv(u32 instr) { int e = E1(instr); u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); u32 end = e + 8 > 16 ? 16 : e + 8; for(int i = e; i < end; i++) { vpr[VT(instr)].byte[BYTE_INDEX(i)] = ReadByte(addr); addr++; } } void RSP::j(u32 instr) { u32 target = (instr & 0x3ffffff) << 2; nextPC = target; } void RSP::jal(u32 instr) { j(instr); gpr[31] = pc + 4; } void RSP::jr(u32 instr) { nextPC = gpr[RS(instr)]; } void RSP::jalr(u32 instr) { jr(instr); gpr[RD(instr)] = pc + 4; } void RSP::nor(u32 instr) { gpr[RD(instr)] = ~(gpr[RT(instr)] | gpr[RS(instr)]); } void RSP::ori(u32 instr) { s32 op1 = gpr[RS(instr)]; u32 op2 = instr & 0xffff; s32 result = op1 | op2; gpr[RT(instr)] = result; } void RSP::xori(u32 instr) { s32 op1 = gpr[RS(instr)]; u32 op2 = instr & 0xffff; s32 result = op1 ^ op2; gpr[RT(instr)] = result; } void RSP::or_(u32 instr) { gpr[RD(instr)] = gpr[RT(instr)] | gpr[RS(instr)]; } void RSP::xor_(u32 instr) { gpr[RD(instr)] = gpr[RT(instr)] ^ gpr[RS(instr)]; } void RSP::sb(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; WriteByte(address, gpr[RT(instr)]); } void RSP::sh(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; WriteHalf(address, gpr[RT(instr)]); } void RSP::sw(u32 instr) { u32 address = gpr[BASE(instr)] + (s16)instr; WriteWord(address, gpr[RT(instr)]); } void RSP::sub(u32 instr) { gpr[RD(instr)] = gpr[RS(instr)] - gpr[RT(instr)]; } void RSP::sqv(u32 instr) { int e = E1(instr); u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); u32 end = ((addr & ~15) + 15); for(int i = 0; addr + i <= end; i++) { WriteByte(addr + i, vpr[VT(instr)].byte[BYTE_INDEX((i + e) & 15)]); } } void RSP::sdv(u32 instr) { int e = E1(instr); u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 3); for(int i = 0; i < 8; i++) { WriteByte(addr + i, vpr[VT(instr)].byte[BYTE_INDEX((i + e) & 0xF)]); } } void RSP::ssv(u32 instr) { int e = E1(instr); u32 addr = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 1); u8 hi = vpr[VT(instr)].byte[BYTE_INDEX(e & 15)]; u8 lo = vpr[VT(instr)].byte[BYTE_INDEX((e + 1) & 15)]; u16 val = (u16)hi << 8 | lo; WriteHalf(addr, val); } void RSP::sllv(u32 instr) { u8 sa = (gpr[RS(instr)]) & 0x1F; u32 rt = gpr[RT(instr)]; u32 result = rt << sa; gpr[RD(instr)] = result; } void RSP::srlv(u32 instr) { u8 sa = (gpr[RS(instr)]) & 0x1F; u32 rt = gpr[RT(instr)]; u32 result = rt >> sa; gpr[RD(instr)] = result; } void RSP::srav(u32 instr) { u8 sa = gpr[RS(instr)] & 0x1F; s32 rt = gpr[RT(instr)]; s32 result = rt >> sa; gpr[RD(instr)] = result; } void RSP::sll(u32 instr) { u8 sa = (instr >> 6) & 0x1f; gpr[RD(instr)] = (u32)gpr[RT(instr)] << sa; } void RSP::srl(u32 instr) { u8 sa = (instr >> 6) & 0x1f; gpr[RD(instr)] = (u32)gpr[RT(instr)] >> sa; } void RSP::sra(u32 instr) { u8 sa = (instr >> 6) & 0x1f; gpr[RD(instr)] = gpr[RT(instr)] >> sa; } void RSP::slt(u32 instr) { gpr[RD(instr)] = gpr[RS(instr)] < gpr[RT(instr)]; } void RSP::sltu(u32 instr) { gpr[RD(instr)] = (u32)gpr[RS(instr)] < (u32)gpr[RT(instr)]; } void RSP::slti(u32 instr) { s32 imm = (s16)instr; gpr[RT(instr)] = gpr[RS(instr)] < imm; } void RSP::sltiu(u32 instr) { s32 imm = (s16)instr; gpr[RT(instr)] = (u32)gpr[RS(instr)] < imm; } inline s16 clamp_signed(s64 val) { if(val > 32767) return 32767; if(val < -32768) return -32768; return val; } void RSP::vabs(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { if(vs.selement[i] < 0) { if(vte.element[i] == 0x8000) { vd.element[i] = 0x7FFF; acc.l.element[i] = 0x8000; } else { vd.element[i] = -vte.selement[i]; acc.l.element[i] = -vte.selement[i]; } } else if (vs.element[i] == 0) { vd.element[i] = 0; acc.l.element[i] = 0; } else { vd.element[i] = vte.element[i]; acc.l.element[i] = vte.element[i]; } } } void RSP::vadd(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { s16 vsE = vs.selement[i]; s16 vteE = vte.selement[i]; s32 result = vsE + vteE + (vco.l.element[i] != 0); acc.l.element[i] = result; vd.element[i] = clamp_signed(result); vco.l.element[i] = 0; vco.h.element[i] = 0; } } void RSP::vmov(u32 instr) { } void RSP::vmacf(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR& vs = vpr[VS(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { s32 prod = vs.selement[i] * vte.selement[i]; s64 accDelta = prod * 2; s64 accum = GetACC(i) + accDelta; SetACC(i, accum); s16 result = clamp_signed(accum >> 16); vd.element[i] = result; } } void RSP::veq(u32 instr) { } void RSP::vne(u32 instr) { } void RSP::vsar(u32 instr) { u8 e = E2(instr); switch(e) { case 0x8: for(int i = 0; i < 8; i++) { vpr[VD(instr)].element[i] = acc.h.element[i]; } break; case 0x9: for(int i = 0; i < 8; i++) { vpr[VD(instr)].element[i] = acc.m.element[i]; } break; case 0xA: for(int i = 0; i < 8; i++) { vpr[VD(instr)].element[i] = acc.l.element[i]; } break; default: for(int i = 0; i < 8; i++) { vpr[VD(instr)].element[i] = 0; } break; } } void RSP::vzero(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { acc.l.element[i] = vte.element[i] + vs.element[i]; } memset(&vd, 0, sizeof(VPR)); } void RSP::mfc0(RDP& rdp, u32 instr) { gpr[RT(instr)] = GetCop0Reg(*this, rdp, RD(instr)); } void RSP::mtc0(Registers& regs, Mem& mem, u32 instr) { SetCop0Reg(regs, mem, RD(instr), gpr[RT(instr)]); } void RSP::mfc2(u32 instr) { u8 hi = vpr[RD(instr)].byte[BYTE_INDEX(E1(instr))]; u8 lo = vpr[RD(instr)].byte[BYTE_INDEX((E1(instr) + 1) & 0xF)]; s16 elem = (hi << 8) | lo; gpr[RT(instr)] = s32(elem); } void RSP::mtc2(u32 instr) { u16 element = gpr[RT(instr)]; u8 lo = element; u8 hi = element >> 8; vpr[RD(instr)].byte[BYTE_INDEX(E1(instr))] = hi; if(E1(instr) < 15) { vpr[RD(instr)].byte[BYTE_INDEX(E1(instr) + 1)] = lo; } } }