fix nasty bug on writing to RSP PC. Mario Kart 64 boots now

This commit is contained in:
CocoSimone
2022-10-10 23:35:14 +02:00
parent db4e2caf85
commit 0ac6f07054
19 changed files with 385 additions and 214 deletions

View File

@@ -39,10 +39,9 @@ inline auto GetCop0Reg(RSP& rsp, RDP& rdp, u8 index) -> u32{
case 9: return rdp.dpc.end;
case 10: return rdp.dpc.current;
case 11: return rdp.dpc.status.raw;
case 12: return 0;
case 12: return rdp.dpc.clock;
default: util::panic("Unhandled RSP COP0 register read at index {}\n", index);
}
return 0;
}
inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) {
@@ -67,15 +66,9 @@ inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) {
ReleaseSemaphore(rsp);
}
break;
case 8:
rdp.dpc.start = val & 0xFFFFF8;
rdp.dpc.current = rdp.dpc.start;
break;
case 9:
rdp.dpc.end = val & 0xFFFFF8;
rdp.RunCommand(mi, regs, rsp);
break;
case 11: rdp.StatusWrite(mi, regs, rsp, val); break;
case 8: rdp.WriteStart(val); break;
case 9: rdp.WriteEnd(mi, regs, rsp, val); break;
case 11: rdp.WriteStatus(mi, regs, rsp, val); break;
default: util::panic("Unhandled RSP COP0 register write at index {}\n", index);
}
}
@@ -135,12 +128,12 @@ void RSP::andi(u32 instr) {
void RSP::cfc2(u32 instr) {
s16 value = 0;
switch(RD(instr) & 3) {
case 0: value = VCOasU16(); break;
case 1: value = VCCasU16(); break;
case 2 ... 3: value = s8(GetVCE()); break;
case 0: value = GetVCO(); break;
case 1: value = GetVCC(); break;
case 2 ... 3: value = GetVCE(); break;
}
gpr[RT(instr)] = s32(value);
gpr[RT(instr)] = value;
}
void RSP::ctc2(u32 instr) {
@@ -167,11 +160,11 @@ void RSP::ctc2(u32 instr) {
}
void RSP::b(u32 instr, bool cond) {
s32 address = ((s32)((s16)(instr & 0xFFFF) << 2)) + pc;
u32 address = ((instr & 0xFFFF) << 2) + pc;
branch(address, cond);
}
void RSP::bl(u32 instr, bool cond) {
void RSP::blink(u32 instr, bool cond) {
b(instr, cond);
gpr[31] = pc + 4;
}
@@ -372,6 +365,17 @@ void RSP::sw(u32 instr) {
WriteWord(address, gpr[RT(instr)]);
}
void RSP::swv(u32 instr) {
u32 address = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4);
int base = address & 7;
address &= ~7;
for(int i = E1(instr); i < E1(instr) + 16; i++) {
WriteByte(address + (base & 15), vpr[VT(instr)].byte[BYTE_INDEX(i & 15)]);
base++;
}
}
void RSP::sub(u32 instr) {
gpr[RD(instr)] = gpr[RS(instr)] - gpr[RT(instr)];
}
@@ -533,8 +537,16 @@ void RSP::sltiu(u32 instr) {
gpr[RT(instr)] = (u32)gpr[RS(instr)] < imm;
}
inline s16 clamp(s64 val) {
return std::clamp<s64>(val, -32768, 32767);
inline s16 signedClamp(s64 val) {
if(val < -32768) return -32768;
else if(val > 32767) return 32767;
return val;
}
inline u16 unsignedClamp(s64 val) {
if(val < 0) return 0;
else if(val > 32767) return 65535;
return val;
}
void RSP::vabs(u32 instr) {
@@ -569,7 +581,7 @@ void RSP::vadd(u32 instr) {
for(int i = 0; i < 8; i++) {
s32 result = vs.selement[i] + vte.selement[i] + (vco.l.selement[i] != 0);
acc.l.element[i] = result;
vd.element[i] = clamp(result);
vd.element[i] = signedClamp(result);
vco.l.element[i] = 0;
vco.h.element[i] = 0;
}
@@ -581,10 +593,10 @@ void RSP::vaddc(u32 instr) {
VPR vte = GetVTE(vpr[VT(instr)], E2(instr));
for(int i = 0; i < 8; i++) {
u32 result = vs.selement[i] + vte.selement[i];
u32 result = vs.element[i] + vte.element[i];
acc.l.element[i] = result;
vd.element[i] = result;
vco.l.element[i] = (result >> 16) & 1 ? 0xffff : 0;
vco.l.element[i] = ((result >> 16) & 1) ? 0xffff : 0;
vco.h.element[i] = 0;
}
}
@@ -610,7 +622,7 @@ void RSP::vch(u32 instr) {
vce.element[i] = result == -1 ? 0xffff : 0;
} else {
s16 result = vsElem - vteElem;
acc.l.selement[i] = (result >= 0) ? vteElem : vsElem;
acc.l.element[i] = (result >= 0) ? vteElem : vsElem;
vcc.l.element[i] = vteElem < 0 ? 0xffff : 0;
vcc.h.element[i] = result >= 0 ? 0xffff : 0;
vco.l.element[i] = 0;
@@ -660,26 +672,26 @@ void RSP::vcl(u32 instr) {
VPR& vd = vpr[VD(instr)];
VPR vte = GetVTE(vpr[VT(instr)], e);
for(int i = 0; i < 8; i++) {
u16 vsElem = vs.element[i];
u16 vteElem = vte.element[i];
for (int i = 0; i < 8; i++) {
u16 vs_element = vs.element[i];
u16 vte_element = vte.element[i];
acc.l.element[i] = vcc.l.element[i] ? -vteElem : vsElem;
if(vco.l.element[i]) {
if(!vco.h.element[i]) {
u16 clampSum = vsElem + vteElem;
bool overflow = (vsElem + vteElem) != clampSum;
u16 clamped_sum = vs_element + vte_element;
bool overflow = (vs_element + vte_element) != clamped_sum;
if(vce.element[i]) {
vcc.l.element[i] = !clampSum || !overflow ? 0xffff : 0;
vcc.l.element[i] = (!clamped_sum || !overflow) ? 0xffff : 0;
} else {
vcc.l.element[i] = !clampSum && !overflow ? 0xffff : 0;
vcc.l.element[i] = (!clamped_sum && !overflow) ? 0xffff : 0;
}
}
acc.l.element[i] = vcc.l.element[i] ? -vte_element : vs_element;
} else {
if(!vco.h.element[i]) {
vcc.h.element[i] = (s32(vsElem) - s32(vteElem) >= 0) ? 0xffff : 0;
vcc.h.element[i] = ((s32)vs_element - (s32)vte_element >= 0) ? 0xffff : 0;
}
acc.l.element[i] = vcc.h.element[i] ? vte_element : vs_element;
}
vco.l.element[i] = 0;
@@ -690,30 +702,30 @@ void RSP::vcl(u32 instr) {
}
void RSP::vmov(u32 instr) {
int e = E2(instr);
u8 e = E2(instr), vs = VS(instr) & 7;
VPR& vd = vpr[VD(instr)];
VPR vte = GetVTE(vpr[VT(instr)], e);
u8 se;
switch (e) {
e &= 7;
switch(e) {
case 0 ... 1:
se = VS(instr) & 7;
se = (e & 0b000) | (vs & 0b111);
break;
case 2 ... 3:
se = (e & 1) | (VS(instr) & 6);
se = (e & 0b001) | (vs & 0b110);
break;
case 4 ... 7:
se = (e & 3) | (VS(instr) & 4);
se = (e & 0b011) | (vs & 0b100);
break;
case 8 ... 15:
se = e & 7;
se = (e & 0b111) | (vs & 0b000);
break;
default:
util::panic("VMOV: This should be unreachable!\n");
}
u8 de = VS(instr) & 7;
u8 de = vs & 7;
vd.element[ELEMENT_INDEX(de)] = vte.element[ELEMENT_INDEX(se)];
@@ -742,18 +754,37 @@ void RSP::vmulf(u32 instr) {
SetACC(i, prod);
s16 result = clamp(prod >> 16);
s16 result = signedClamp(prod >> 16);
vd.element[i] = result;
}
}
void RSP::vmulu(u32 instr) {
int e = E2(instr);
VPR& vs = vpr[VS(instr)];
VPR& vd = vpr[VD(instr)];
VPR vte = GetVTE(vpr[VT(instr)], e);
for(int i = 0; i < 8; i++) {
s32 prod = vs.selement[i] * vte.selement[i];
s64 accum = prod * 2 + 0x8000;
SetACC(i, accum);
u16 result = unsignedClamp(accum >> 16);
vd.element[i] = result;
}
}
void RSP::vmudl(u32 instr) {
int e = E2(instr);
u8 e = E2(instr);
VPR& vs = vpr[VS(instr)];
VPR& vd = vpr[VD(instr)];
VPR vte = GetVTE(vpr[VT(instr)], e);
for(int i = 0; i < 8; i++) {
u64 prod = (u64)vs.selement[i] * (u64)vte.selement[i];
u64 op1 = vte.element[i];
u64 op2 = vs.element[i];
u64 prod = op1 * op2;
u64 accum = prod >> 16;
SetACC(i, accum);
@@ -780,7 +811,7 @@ void RSP::vmudh(u32 instr) {
s32 prod = vs.selement[i] * vte.selement[i];
s64 accum = prod;
s16 result = clamp(accum);
s16 result = signedClamp(accum);
accum <<= 16;
SetACC(i, accum);
@@ -798,7 +829,7 @@ void RSP::vmudm(u32 instr) {
s32 prod = vs.selement[i] * vte.element[i];
s64 accum = prod;
s16 result = clamp(accum >> 16);
s16 result = signedClamp(accum >> 16);
SetACC(i, accum);
vd.element[i] = result;
@@ -838,7 +869,7 @@ void RSP::vmadh(u32 instr) {
s64 accum = GetACC(i) + ((u64)prod << 16);
SetACC(i, accum);
s16 result = clamp(accum >> 16);
s16 result = signedClamp(accum >> 16);
vd.element[i] = result;
}
@@ -849,9 +880,13 @@ void RSP::vmadl(u32 instr) {
VPR& vs = vpr[VS(instr)];
VPR& vd = vpr[VD(instr)];
VPR vte = GetVTE(vpr[VT(instr)], e);
for(int i = 0; i < 8; i++) {
u64 prod = (u64)vs.selement[i] * (u64)vte.selement[i];
u64 accum = (prod >> 16) + GetACC(i);
u64 op1 = vte.element[i];
u64 op2 = vs.element[i];
u64 prod = op1 * op2;
u64 accDelta = prod >> 16;
u16 accum = GetACC(i) + accDelta;
SetACC(i, accum);
@@ -881,7 +916,7 @@ void RSP::vmadm(u32 instr) {
SetACC(i, accum);
accum = GetACC(i);
s16 result = clamp(accum >> 16);
s16 result = signedClamp(accum >> 16);
vd.element[i] = result;
}
@@ -923,7 +958,27 @@ void RSP::vmacf(u32 instr) {
s64 accum = GetACC(i) + accDelta;
SetACC(i, accum);
s16 result = clamp(accum >> 16);
s16 result = signedClamp(accum >> 16);
vd.element[i] = result;
}
}
void RSP::vmacu(u32 instr) {
VPR& vd = vpr[VD(instr)];
VPR& vs = vpr[VS(instr)];
VPR vte = GetVTE(vpr[VT(instr)], E2(instr));
for(int i = 0; i < 8; i++) {
s16 op1 = vte.element[i];
s16 op2 = vs.element[i];
s32 prod = op1 * op2;
s64 accDelta = prod;
accDelta *= 2;
s64 accum = GetACC(i) + accDelta;
SetACC(i, accum);
accum = GetACC(i);
u16 result = unsignedClamp(accum >> 16);
vd.element[i] = result;
}
}
@@ -963,9 +1018,9 @@ void RSP::vge(u32 instr) {
VPR vte = GetVTE(vpr[VT(instr)], e);
for(int i = 0; i < 8; i++) {
bool eql = vs.element[i] == vte.element[i];
bool eql = vs.selement[i] == vte.selement[i];
bool neg = !(vco.h.element[i] && vco.l.element[i]) && eql;
vcc.l.element[i] = (neg || (vs.element[i] > vte.element[i])) ? 0xffff : 0;
vcc.l.element[i] = (neg || (vs.selement[i] > vte.selement[i])) ? 0xffff : 0;
acc.l.element[i] = vcc.l.element[i] ? vs.element[i] : vte.element[i];
vd.element[i] = acc.l.element[i];
vcc.h.element[i] = vco.h.element[i] = vco.l.element[i] = 0;
@@ -1185,7 +1240,7 @@ void RSP::vsub(u32 instr) {
for(int i = 0; i < 8; i++) {
s32 result = vs.selement[i] - vte.selement[i] - (vco.l.element[i] != 0);
acc.l.element[i] = result;
vd.element[i] = clamp(result);
vd.element[i] = signedClamp(result);
vco.l.element[i] = 0;
vco.h.element[i] = 0;
@@ -1288,7 +1343,7 @@ void RSP::mfc2(u32 instr) {
u8 hi = vpr[RD(instr)].byte[BYTE_INDEX(E1(instr))];
u8 lo = vpr[RD(instr)].byte[BYTE_INDEX((E1(instr) + 1) & 0xF)];
s16 elem = (hi << 8) | lo;
gpr[RT(instr)] = s32(elem);
gpr[RT(instr)] = elem;
}
void RSP::mtc2(u32 instr) {