new vector instructions + fixed stupid FPU bug

This commit is contained in:
CocoSimone
2023-02-10 03:02:48 +01:00
parent 3786a5fd3b
commit 0954bb23b1
7 changed files with 170 additions and 6 deletions

View File

@@ -258,6 +258,7 @@ struct RSP {
void llv(u32 instr);
void lrv(u32 instr);
void lqv(u32 instr);
void lfv(u32 instr);
void lhv(u32 instr);
void ltv(u32 instr);
void lpv(u32 instr);
@@ -306,6 +307,7 @@ struct RSP {
void vcl(u32 instr);
void vmacf(u32 instr);
void vmacu(u32 instr);
void vmacq(u32 instr);
void vmadh(u32 instr);
void vmadl(u32 instr);
void vmadm(u32 instr);
@@ -313,6 +315,7 @@ struct RSP {
void vmov(u32 instr);
void vmulf(u32 instr);
void vmulu(u32 instr);
void vmulq(u32 instr);
void vmudl(u32 instr);
void vmudh(u32 instr);
void vmudm(u32 instr);
@@ -326,6 +329,8 @@ struct RSP {
void vrsq(u32 instr);
void vrcpl(u32 instr);
void vrsql(u32 instr);
void vrndp(u32 instr);
void vrndn(u32 instr);
void vrcph(u32 instr);
void vsar(u32 instr);
void vsub(u32 instr);
@@ -334,6 +339,7 @@ struct RSP {
void vnxor(u32 instr);
void vor(u32 instr);
void vnor(u32 instr);
void vzero(u32 instr);
void mfc0(RDP& rdp, u32 instr);
void mtc0(Registers& regs, Mem& mem, u32 instr);
void mfc2(u32 instr);

View File

@@ -90,7 +90,7 @@ void ceilwd(n64::Registers& regs, u32 instr) {
}
void cfc1(n64::Registers& regs, u32 instr) {
u8 fd = FD(instr);
u8 fd = RD(instr);
s32 val = 0;
switch(fd) {
case 0: val = regs.cop1.fcr0; break;

View File

@@ -90,18 +90,20 @@ void Cop1::ceilwd(Registers& regs, u32 instr) {
}
void Cop1::cfc1(Registers& regs, u32 instr) const {
u8 fd = FD(instr);
u8 fd = RD(instr);
s32 val = 0;
switch(fd) {
case 0: val = fcr0; break;
case 31: val = fcr31.raw; break;
case 31:
val = fcr31.raw;
break;
default: Util::panic("Undefined CFC1 with rd != 0 or 31\n");
}
regs.gpr[RT(instr)] = val;
}
void Cop1::ctc1(Registers& regs, u32 instr) {
u8 fs = FS(instr);
u8 fs = RD(instr);
u32 val = regs.gpr[RT(instr)];
switch(fs) {
case 0: break;

View File

@@ -10,7 +10,7 @@ Cop0::Cop0() {
void Cop0::Reset() {
cause.raw = 0xB000007C;
status.raw = 0x241000E0;
status.raw = 0x34000000;
PRId = 0x00000B22;
Config = 0x7006E463;
EPC = 0xFFFFFFFFFFFFFFFF;

View File

@@ -10,7 +10,7 @@ Cop1::Cop1() {
void Cop1::Reset() {
fcr0 = 0;
fcr31.raw = 0;
fcr31.raw = 0x01000800;
memset(fgr, 0, 32 * sizeof(FGR));
}

View File

@@ -70,6 +70,8 @@ inline void lwc2(RSP& rsp, u32 instr) {
case 0x06: rsp.lpv(instr); break;
case 0x07: rsp.luv(instr); break;
case 0x08: rsp.lhv(instr); break;
case 0x09: rsp.lfv(instr); break;
case 0x0A: break;
case 0x0B: rsp.ltv(instr); break;
default: Util::panic("Unhandled RSP LWC2 {:05b}\n", mask);
}
@@ -114,22 +116,36 @@ inline void cop2(RSP& rsp, u32 instr) {
}
break;
case 0x01: rsp.vmulu(instr); break;
case 0x02: rsp.vrndp(instr); break;
case 0x03: rsp.vmulq(instr); break;
case 0x04: rsp.vmudl(instr); break;
case 0x05: rsp.vmudm(instr); break;
case 0x06: rsp.vmudn(instr); break;
case 0x07: rsp.vmudh(instr); break;
case 0x08: rsp.vmacf(instr); break;
case 0x09: rsp.vmacu(instr); break;
case 0x0A: rsp.vrndn(instr); break;
case 0x0B: rsp.vmacq(instr); break;
case 0x0C: rsp.vmadl(instr); break;
case 0x0D: rsp.vmadm(instr); break;
case 0x0E: rsp.vmadn(instr); break;
case 0x0F: rsp.vmadh(instr); break;
case 0x10: rsp.vadd(instr); break;
case 0x11: rsp.vsub(instr); break;
case 0x12: rsp.vzero(instr); break;
case 0x13: rsp.vabs(instr); break;
case 0x14: rsp.vaddc(instr); break;
case 0x15: rsp.vsubc(instr); break;
case 0x16: rsp.vzero(instr); break;
case 0x17: rsp.vzero(instr); break;
case 0x18: rsp.vzero(instr); break;
case 0x19: rsp.vzero(instr); break;
case 0x1A: rsp.vzero(instr); break;
case 0x1B: rsp.vzero(instr); break;
case 0x1C: rsp.vzero(instr); break;
case 0x1D: rsp.vsar(instr); break;
case 0x1E: rsp.vzero(instr); break;
case 0x1F: rsp.vzero(instr); break;
case 0x20: rsp.vlt(instr); break;
case 0x21: rsp.veq(instr); break;
case 0x22: rsp.vne(instr); break;
@@ -144,6 +160,8 @@ inline void cop2(RSP& rsp, u32 instr) {
case 0x2B: rsp.vnor(instr); break;
case 0x2C: rsp.vxor(instr); break;
case 0x2D: rsp.vnxor(instr); break;
case 0x2E: rsp.vzero(instr); break;
case 0x2F: rsp.vzero(instr); break;
case 0x31: rsp.vrcpl(instr); break;
case 0x35: rsp.vrsql(instr); break;
case 0x32: case 0x36:
@@ -152,6 +170,7 @@ inline void cop2(RSP& rsp, u32 instr) {
case 0x30: rsp.vrcp(instr); break;
case 0x33: rsp.vmov(instr); break;
case 0x34: rsp.vrsq(instr); break;
case 0x38 ... 0x3E: rsp.vzero(instr); break;
case 0x37: case 0x3F: break;
default: Util::panic("Unhandled RSP COP2 ({:06b})\n", mask);
}

View File

@@ -455,6 +455,27 @@ void RSP::lhv(u32 instr) {
}
}
void RSP::lfv(u32 instr) {
VPR& vt = vpr[VT(instr)];
int start = E1(instr);
u32 address = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4);
u32 base = (address & 7) - start;
address &= ~7;
int end = std::min(start + 8, 16);
// TODO: should be possible to do with one loop
VPR tmp;
for (u32 offset = 0; offset < 4; offset++) {
tmp.element[ELEMENT_INDEX(offset + 0)] = ReadByte(address + (base + offset * 4 + 0 & 15)) << 7;
tmp.element[ELEMENT_INDEX(offset + 4)] = ReadByte(address + (base + offset * 4 + 8 & 15)) << 7;
}
for (u32 offset = start; offset < end; offset++) {
vt.byte[BYTE_INDEX(offset)] = tmp.byte[BYTE_INDEX(offset)];
}
}
void RSP::lrv(u32 instr) {
u32 address = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4);
int e = E1(instr);
@@ -885,6 +906,24 @@ void RSP::vmulf(u32 instr) {
}
}
void RSP::vmulq(u32 instr) {
VPR& vs = vpr[VS(instr)];
VPR vte = GetVTE(vpr[VT(instr)], E2(instr));
VPR& vd = vpr[VD(instr)];
for(int i = 0; i < 8; i++) {
s32 product = vs.selement[i] * vte.selement[i];
if(product < 0) {
product += 31;
}
acc.h.element[i] = product >> 16;
acc.m.element[i] = product;
acc.l.element[i] = 0;
vd.element[i] = signedClamp(product >> 1) & ~15;
}
}
void RSP::vmulu(u32 instr) {
int e = E2(instr);
VPR& vs = vpr[VS(instr)];
@@ -1126,6 +1165,23 @@ void RSP::vmacu(u32 instr) {
}
}
void RSP::vmacq(u32 instr) {
VPR& vd = vpr[VD(instr)];
for(int i = 0; i < 8; i++) {
s32 product = acc.h.element[i] << 16 | acc.m.element[i];
if(product < 0 && !(product & 1 << 5)) {
product += 32;
} else if(product >= 32 && !(product & 1 << 5)) {
product -= 32;
}
acc.h.element[i] = product >> 16;
acc.m.element[i] = product & 0xFFFF;
vd.element[i] = signedClamp(product >> 1) & ~15;
}
}
void RSP::veq(u32 instr) {
int e = E2(instr);
VPR& vd = vpr[VD(instr)];
@@ -1291,6 +1347,75 @@ void RSP::vrsq(u32 instr) {
}
}
// from nall, in ares
static inline s64 sclip(s64 x, u32 bits) {
u64 b = 1ull << (bits - 1);
u64 m = b * 2 - 1;
return ((x & m) ^ b) - b;
}
void RSP::vrndn(u32 instr) {
VPR& vd = vpr[VD(instr)];
VPR vte = GetVTE(vpr[VT(instr)], E2(instr));
for(int i = 0; i < 8; i++) {
s32 product = (s16)vte.selement[i];
if(VS(instr) & 1) {
product <<= 16;
}
s64 accum = 0;
accum |= acc.h.element[i];
accum <<= 16;
accum |= acc.m.element[i];
accum <<= 16;
accum |= acc.l.element[i];
accum <<= 16;
accum >>= 16;
if(accum < 0) {
accum = sclip(accum + product, 48);
}
acc.h.element[i] = accum >> 32;
acc.m.element[i] = accum >> 16;
acc.l.element[i] = accum >> 0;
vd.element[i] = signedClamp(accum >> 16);
}
}
void RSP::vrndp(u32 instr) {
VPR& vd = vpr[VD(instr)];
VPR vte = GetVTE(vpr[VT(instr)], E2(instr));
for(int i = 0; i < 8; i++) {
s32 product = (s16)vte.selement[i];
if(VS(instr) & 1) {
product <<= 16;
}
s64 accum = 0;
accum |= acc.h.element[i];
accum <<= 16;
accum |= acc.m.element[i];
accum <<= 16;
accum |= acc.l.element[i];
accum <<= 16;
accum >>= 16;
if(accum >= 0) {
accum = sclip(accum + product, 48);
}
acc.h.element[i] = accum >> 32;
acc.m.element[i] = accum >> 16;
acc.l.element[i] = accum >> 0;
vd.element[i] = signedClamp(accum >> 16);
}
}
void RSP::vrsql(u32 instr) {
VPR& vd = vpr[VD(instr)];
VPR& vt = vpr[VT(instr)];
@@ -1476,6 +1601,18 @@ void RSP::vor(u32 instr) {
}
}
void RSP::vzero(u32 instr) {
VPR& vs = vpr[VS(instr)];
VPR vte = GetVTE(vpr[VT(instr)], E2(instr));
VPR& vd = vpr[VD(instr)];
for(int i = 0; i < 8; i++) {
acc.l.element[i] = vte.element[i] + vs.element[i];
}
memset(&vd, 0, sizeof(VPR));
}
void RSP::mfc0(RDP& rdp, u32 instr) {
gpr[RT(instr)] = GetCop0Reg(*this, rdp, RD(instr));
}