diff --git a/src/common.hpp b/src/common.hpp index fa0aa7bd..df45ce0d 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -1,7 +1,6 @@ #pragma once #include #include -#include #include #include @@ -15,7 +14,6 @@ using s32 = int32_t; using s64 = int64_t; using u128 = __uint128_t; using s128 = __int128_t; -using m128 = __m128i; #define UINT128_MAX (((u128)0xFFFF'FFFF'FFFF'FFFF << 64) | 0xFFFF'FFFF'FFFF'FFFF) #define UINT128_MIN 0 diff --git a/src/n64/core/Mem.cpp b/src/n64/core/Mem.cpp index b9258b8c..99f13513 100644 --- a/src/n64/core/Mem.cpp +++ b/src/n64/core/Mem.cpp @@ -309,7 +309,7 @@ void Mem::Write32(Registers& regs, u64 vaddr, u32 val, s64 pc) { } } break; case 0x13FF0020 ... 0x13FFFFFF: - util::WriteAccess(isviewer, paddr & ISVIEWER_DSIZE, htobe32(val)); + util::WriteAccess(isviewer, paddr - 0x13FF0020, htobe32(val)); break; case 0x1FC007C0 ... 0x1FC007FF: util::WriteAccess(pifRam, paddr & PIF_RAM_DSIZE, htobe32(val)); diff --git a/src/n64/core/RSP.cpp b/src/n64/core/RSP.cpp index 3f36884c..bebec6d4 100644 --- a/src/n64/core/RSP.cpp +++ b/src/n64/core/RSP.cpp @@ -26,6 +26,9 @@ void RSP::Reset() { memset(&vcc, 0, 2 * sizeof(VPR)); memset(&vco, 0, 2 * sizeof(VPR)); semaphore = false; + divIn = 0; + divOut = 0; + divInLoaded = false; } void RSP::Step(Registers& regs, Mem& mem) { diff --git a/src/n64/core/RSP.hpp b/src/n64/core/RSP.hpp index bd7733d2..e081edc6 100644 --- a/src/n64/core/RSP.hpp +++ b/src/n64/core/RSP.hpp @@ -431,7 +431,6 @@ struct RSP { void vnxor(u32 instr); void vor(u32 instr); void vnor(u32 instr); - void vzero(u32 instr); void mfc0(RDP& rdp, u32 instr); void mtc0(Registers& regs, Mem& mem, u32 instr); void mfc2(u32 instr); diff --git a/src/n64/core/rsp/instructions.cpp b/src/n64/core/rsp/instructions.cpp index ed64632a..2807e1f3 100644 --- a/src/n64/core/rsp/instructions.cpp +++ b/src/n64/core/rsp/instructions.cpp @@ -80,7 +80,7 @@ inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) { } } -inline VPR Broadcast(VPR vt, int l0, int l1, int l2, int l3, int l4, int l5, int l6, int l7) { +inline VPR Broadcast(const VPR& vt, int l0, int l1, int l2, int l3, int l4, int l5, int l6, int l7) { VPR vte{}; vte.element[ELEMENT_INDEX(0)] = vt.element[l0]; vte.element[ELEMENT_INDEX(1)] = vt.element[l1]; @@ -93,27 +93,15 @@ inline VPR Broadcast(VPR vt, int l0, int l1, int l2, int l3, int l4, int l5, int return vte; } -inline VPR GetVTE(VPR vt, u8 e) { +inline VPR GetVTE(const VPR& vt, u8 e) { VPR vte{}; switch(e & 0xf) { case 0 ... 1: return vt; - case 2: - vte = Broadcast(vt, 0, 0, 2, 2, 4, 4, 6, 6); + case 2 ... 3: + vte = Broadcast(vt, e - 2, e - 2, e, e, e + 2, e + 2, e + 4, e + 4); break; - case 3: - vte = Broadcast(vt, 1, 1, 3, 3, 5, 5, 7, 7); - break; - case 4: - vte = Broadcast(vt, 0, 0, 0, 0, 4, 4, 4, 4); - break; - case 5: - vte = Broadcast(vt, 1, 1, 1, 1, 5, 5, 5, 5); - break; - case 6: - vte = Broadcast(vt, 2, 2, 2, 2, 6, 6, 6, 6); - break; - case 7: - vte = Broadcast(vt, 3, 3, 3, 3, 7, 7, 7, 7); + case 4 ... 7: + vte = Broadcast(vt, e - 4, e - 4, e - 4, e - 4, e, e, e, e); break; case 8 ... 15: { int index = ELEMENT_INDEX(e - 8); @@ -149,7 +137,7 @@ void RSP::cfc2(u32 instr) { switch(RD(instr) & 3) { case 0: value = VCOasU16(); break; case 1: value = VCCasU16(); break; - case 2 ... 3: value = GetVCE(); break; + case 2 ... 3: value = s8(GetVCE()); break; } gpr[RT(instr)] = s32(value); @@ -160,19 +148,19 @@ void RSP::ctc2(u32 instr) { switch(RD(instr) & 3) { case 0: for(int i = 0; i < 8; i++) { - vco.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; - vco.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; + vco.h.element[ELEMENT_INDEX(i)] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; + vco.l.element[ELEMENT_INDEX(i)] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; } break; case 1: for(int i = 0; i < 8; i++) { - vcc.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; - vcc.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; + vcc.h.element[ELEMENT_INDEX(i)] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0; + vcc.l.element[ELEMENT_INDEX(i)] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; } break; case 2: case 3: for(int i = 0; i < 8; i++) { - vce.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; + vce.element[ELEMENT_INDEX(i)] = ((value >> i) & 1) == 1 ? 0xFFFF : 0; } break; } @@ -545,10 +533,8 @@ void RSP::sltiu(u32 instr) { gpr[RT(instr)] = (u32)gpr[RS(instr)] < imm; } -inline s16 clamp_signed(s64 val) { - if(val > 32767) return 32767; - if(val < -32768) return -32768; - return val; +inline s16 clamp(s64 val) { + return std::clamp(val, -32768, 32767); } void RSP::vabs(u32 instr) { @@ -581,12 +567,9 @@ void RSP::vadd(u32 instr) { VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { - s16 vsE = vs.selement[i]; - s16 vteE = vte.selement[i]; - - s32 result = vsE + vteE + (vco.l.element[i] != 0); + s32 result = vs.selement[i] + vte.selement[i] + (vco.l.selement[i] != 0); acc.l.element[i] = result; - vd.element[i] = clamp_signed(result); + vd.element[i] = clamp(result); vco.l.element[i] = 0; vco.h.element[i] = 0; } @@ -598,7 +581,7 @@ void RSP::vaddc(u32 instr) { VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { - s32 result = vs.element[i] + vte.element[i]; + u32 result = vs.selement[i] + vte.selement[i]; acc.l.element[i] = result; vd.element[i] = result; vco.l.element[i] = (result >> 16) & 1 ? 0xffff : 0; @@ -711,7 +694,7 @@ void RSP::vmov(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], e); - /*u8 se; + u8 se; switch (e) { case 0 ... 1: @@ -728,11 +711,12 @@ void RSP::vmov(u32 instr) { break; default: util::panic("VMOV: This should be unreachable!\n"); - }*/ + } u8 de = VS(instr) & 7; - vd.element[ELEMENT_INDEX(de)] = vte.element[ELEMENT_INDEX(de)]; + vd.element[ELEMENT_INDEX(de)] = vte.element[ELEMENT_INDEX(se)]; + for(int i = 0; i < 8; i++) { acc.l.element[i] = vte.element[i]; } @@ -754,12 +738,11 @@ void RSP::vmulf(u32 instr) { VPR vte = GetVTE(vpr[VT(instr)], e); for(int i = 0; i < 8; i++) { - s32 prod = vs.selement[i] * vte.selement[i]; - s64 accum = (prod * 2) + 0x8000; + s64 prod = (vs.selement[i] * vte.selement[i]) * 2 + 0x8000; - SetACC(i, accum); + SetACC(i, prod); - s16 result = clamp_signed(accum >> 16); + s16 result = clamp(prod >> 16); vd.element[i] = result; } } @@ -797,7 +780,7 @@ void RSP::vmudh(u32 instr) { s32 prod = vs.selement[i] * vte.selement[i]; s64 accum = prod; - s16 result = clamp_signed(accum); + s16 result = clamp(accum); accum <<= 16; SetACC(i, accum); @@ -815,7 +798,7 @@ void RSP::vmudm(u32 instr) { s32 prod = vs.selement[i] * vte.element[i]; s64 accum = prod; - s16 result = clamp_signed(accum >> 16); + s16 result = clamp(accum >> 16); SetACC(i, accum); vd.element[i] = result; @@ -855,7 +838,7 @@ void RSP::vmadh(u32 instr) { s64 accum = GetACC(i) + ((u64)prod << 16); SetACC(i, accum); - s16 result = clamp_signed(accum >> 16); + s16 result = clamp(accum >> 16); vd.element[i] = result; } @@ -890,6 +873,7 @@ void RSP::vmadm(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], e); + for(int i = 0; i < 8; i++) { s32 prod = vs.selement[i] * vte.element[i]; s64 accum = GetACC(i); @@ -897,7 +881,7 @@ void RSP::vmadm(u32 instr) { SetACC(i, accum); accum = GetACC(i); - s16 result = clamp_signed(accum >> 16); + s16 result = clamp(accum >> 16); vd.element[i] = result; } @@ -908,6 +892,7 @@ void RSP::vmadn(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], e); + for(int i = 0; i < 8; i++) { s32 prod = vs.element[i] * vte.selement[i]; s64 accum = GetACC(i) + prod; @@ -938,7 +923,7 @@ void RSP::vmacf(u32 instr) { s64 accum = GetACC(i) + accDelta; SetACC(i, accum); - s16 result = clamp_signed(accum >> 16); + s16 result = clamp(accum >> 16); vd.element[i] = result; } } @@ -1200,7 +1185,7 @@ void RSP::vsub(u32 instr) { for(int i = 0; i < 8; i++) { s32 result = vs.selement[i] - vte.selement[i] - (vco.l.element[i] != 0); acc.l.element[i] = result; - vd.element[i] = clamp_signed(result); + vd.element[i] = clamp(result); vco.l.element[i] = 0; vco.h.element[i] = 0; @@ -1291,18 +1276,6 @@ void RSP::vor(u32 instr) { } } -void RSP::vzero(u32 instr) { - VPR& vs = vpr[VS(instr)]; - VPR& vd = vpr[VD(instr)]; - VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); - - for(int i = 0; i < 8; i++) { - acc.l.element[i] = vte.element[i] + vs.element[i]; - } - - memset(&vd, 0, sizeof(VPR)); -} - void RSP::mfc0(RDP& rdp, u32 instr) { gpr[RT(instr)] = GetCop0Reg(*this, rdp, RD(instr)); } diff --git a/src/n64/memory_regions.hpp b/src/n64/memory_regions.hpp index 20340fcf..9bf99bac 100644 --- a/src/n64/memory_regions.hpp +++ b/src/n64/memory_regions.hpp @@ -12,7 +12,7 @@ #define PIF_RAM_DSIZE (PIF_RAM_SIZE - 1) #define PIF_BOOTROM_SIZE 0x7C0 #define PIF_BOOTROM_DSIZE (PIF_BOOTROM_SIZE - 1) -#define ISVIEWER_SIZE 0xFFDF +#define ISVIEWER_SIZE (0x13FFFFFF - 0x13FF0020) #define ISVIEWER_DSIZE (ISVIEWER_SIZE - 1) #define RDRAM_REGION 0 ... RDRAM_DSIZE