Fire RSP DMAs upon RSP MTC0 to indexes 2 or 3 (unstucks Mario)

This commit is contained in:
CocoSimone
2022-09-25 14:55:21 +02:00
parent 6ad8d00a92
commit 24ec6ed6dd
9 changed files with 188 additions and 78 deletions

View File

@@ -39,10 +39,10 @@ using m128 = __m128i;
#define BASE(x) RS(x)
#define VT(x) (((x) >> 16) & 0x1F)
#define VS(x) (((x) >> 11) & 0x1F)
#define VD(x) (((x) >> 6) & 0x1F)
#define E(x) BASE(x)
#define VD(x) (((x) >> 6) & 0x1F)
#define E(x) (((x) >> 7) & 0x0F)
#define ELEMENT_INDEX(i) (7 - (i))
#define BYTE_INDEX(i) (15 - (i))
#define BYTE_INDEX(i) (15 - (i))
enum TLBAccessType {

View File

@@ -41,7 +41,7 @@ void Core::Run(Window& window, float volumeL, float volumeR) {
for(;cycles <= mmio.vi.cyclesPerHalfline; cycles++, frameCycles++) {
cpu.Step(mem);
mmio.rsp.Step(mmio.mi, cpu.regs, mmio.rdp);
mmio.rsp.Step(cpu.regs, mem);
mmio.ai.Step(mem, cpu.regs, 1, volumeL, volumeR);
}

View File

@@ -33,6 +33,7 @@ struct Mem {
template <bool tlb = true>
void Write64(Registers&, u32, u64, s64);
MMIO mmio;
u8 pifRam[PIF_RAM_SIZE]{};
private:
friend struct SI;
@@ -41,7 +42,6 @@ private:
friend struct Cpu;
friend struct RSP;
friend struct Core;
MMIO mmio;
std::vector<u8> cart, sram;
u8 pifBootrom[PIF_BOOTROM_SIZE]{};
u8 isviewer[ISVIEWER_SIZE]{};

View File

@@ -21,21 +21,21 @@ void RSP::Reset() {
memset(imem, 0, IMEM_SIZE);
memset(vpr, 0, 32 * sizeof(VPR));
memset(gpr, 0, 32);
vce = 0;
memset(&vce, 0, sizeof(VPR));
acc = {.h={}, .m={}, .l={}};
vcc = {.l = {}, .h = {}};
vco = {.l = {}, .h = {}};
semaphore = false;
}
void RSP::Step(MI& mi, Registers& regs, RDP& rdp) {
void RSP::Step(Registers& regs, Mem& mem) {
if(!spStatus.halt) {
gpr[0] = 0;
u32 instr = util::ReadAccess<u32>(imem, pc & IMEM_DSIZE);
oldPC = pc & 0xFFC;
pc = nextPC & 0xFFC;
nextPC += 4;
Exec(mi, regs, rdp, instr);
Exec(regs, mem, instr);
}
}
@@ -54,44 +54,6 @@ auto RSP::Read(u32 addr) -> u32{
}
}
template <bool isDRAMdest>
inline void DMA(SPDMALen len, Mem& mem, RSP& rsp, bool bank) {
u32 length = len.len + 1;
length = (length + 0x7) & ~0x7;
u8* dst, *src;
if constexpr (isDRAMdest) {
dst = mem.GetRDRAM();
src = bank ? rsp.imem : rsp.dmem;
} else {
src = mem.GetRDRAM();
dst = bank ? rsp.imem : rsp.dmem;
}
u32 mem_address = rsp.spDMASPAddr.address & 0xFF8;
u32 dram_address = rsp.spDMADRAMAddr.address & 0xFFFFF8;
for (int i = 0; i < len.count + 1; i++) {
for(int j = 0; j < length; j++) {
if constexpr (isDRAMdest) {
dst[dram_address + j] = src[(mem_address + j) & 0xFFF];
} else {
dst[(mem_address + j) & 0xFFF] = src[dram_address + j];
}
}
int skip = i == len.count ? 0 : len.skip;
dram_address += (length + skip) & 0xFFFFF8;
mem_address += length;
}
rsp.lastSuccessfulSPAddr.address = mem_address & 0xFF8;
rsp.lastSuccessfulSPAddr.bank = bank;
rsp.lastSuccessfulDRAMAddr.address = dram_address & 0xFFFFF8;
}
void RSP::Write(Mem& mem, Registers& regs, u32 addr, u32 value) {
MI& mi = mem.mmio.mi;
switch (addr) {
@@ -99,13 +61,11 @@ void RSP::Write(Mem& mem, Registers& regs, u32 addr, u32 value) {
case 0x04040004: spDMADRAMAddr.raw = value & 0xFFFFF8; break;
case 0x04040008: {
spDMALen.raw = value;
DMA<false>(spDMALen, mem, *this, spDMASPAddr.bank);
spDMALen.raw = 0xFF8 | (spDMALen.skip << 20);
DMA<false>(spDMALen, mem.GetRDRAM(), *this, spDMASPAddr.bank);
} break;
case 0x0404000C: {
spDMALen.raw = value;
DMA<true>(spDMALen, mem, *this, spDMASPAddr.bank);
spDMALen.raw = 0xFF8 | (spDMALen.skip << 20);
DMA<true>(spDMALen, mem.GetRDRAM(), *this, spDMASPAddr.bank);
} break;
case 0x04040010: {
auto write = SPStatusWrite{.raw = value};

View File

@@ -111,10 +111,10 @@ struct Registers;
struct RSP {
RSP();
void Reset();
void Step(MI& mi, Registers& regs, RDP& rdp);
void Step(Registers& regs, Mem& mem);
auto Read(u32 addr) -> u32;
void Write(Mem& mem, Registers& regs, u32 addr, u32 value);
void Exec(MI& mi, Registers& regs, RDP& rdp, u32 instr);
void Exec(Registers& regs, Mem& mem, u32 instr);
SPStatus spStatus;
u16 oldPC{}, pc{}, nextPC{};
SPDMASPAddr spDMASPAddr{};
@@ -125,7 +125,7 @@ struct RSP {
u8 dmem[DMEM_SIZE]{}, imem[IMEM_SIZE]{};
VPR vpr[32]{};
s32 gpr[32]{};
u8 vce{};
VPR vce{};
struct {
VPR h{}, m{}, l{};
@@ -164,6 +164,15 @@ struct RSP {
return val;
}
inline u8 GetVCE() {
u8 value = 0;
for(int i = 0; i < 8; i++) {
bool l = vce.element[7 - i] != 0;
value |= (l << i);
}
return value;
}
inline u64 ReadDword(u32 addr, bool i) {
if (i) {
return GET_RSP_DWORD(addr, imem);
@@ -277,9 +286,10 @@ struct RSP {
void addi(u32 instr);
void and_(u32 instr);
void andi(u32 instr);
void cfc2(u32 instr);
void b(u32 instr, bool cond);
void bl(u32 instr, bool cond);
void cfc2(u32 instr);
void ctc2(u32 instr);
void lb(u32 instr);
void lh(u32 instr);
void lw(u32 instr);
@@ -317,7 +327,48 @@ struct RSP {
void vne(u32 instr);
void vsar(u32 instr);
void mfc0(RDP& rdp, u32 instr);
void mtc0(MI& mi, Registers& regs, RDP& rdp, u32 instr);
void mtc0(Registers& regs, Mem& mem, u32 instr);
void mfc2(u32 instr);
void mtc2(u32 instr);
template <bool isDRAMdest>
inline void DMA(SPDMALen len, u8* rdram, RSP& rsp, bool bank) {
u32 length = len.len + 1;
length = (length + 0x7) & ~0x7;
u8* dst, *src;
if constexpr (isDRAMdest) {
dst = rdram;
src = bank ? rsp.imem : rsp.dmem;
} else {
src = rdram;
dst = bank ? rsp.imem : rsp.dmem;
}
u32 mem_address = rsp.spDMASPAddr.address & 0xFF8;
u32 dram_address = rsp.spDMADRAMAddr.address & 0xFFFFF8;
for (int i = 0; i < len.count + 1; i++) {
for(int j = 0; j < length; j++) {
if constexpr (isDRAMdest) {
dst[dram_address + j] = src[(mem_address + j) & 0xFFF];
} else {
dst[(mem_address + j) & 0xFFF] = src[dram_address + j];
}
}
int skip = i == len.count ? 0 : len.skip;
dram_address += (length + skip) & 0xFFFFF8;
mem_address += length;
}
rsp.lastSuccessfulSPAddr.address = mem_address & 0xFF8;
rsp.lastSuccessfulSPAddr.bank = bank;
rsp.lastSuccessfulDRAMAddr.address = dram_address & 0xFFFFF8;
rsp.spDMALen.raw = 0xFF8 | (rsp.spDMALen.skip << 20);
}
private:
inline void branch(u16 address, bool cond) {
if(cond) {

View File

@@ -148,7 +148,7 @@ void Cpu::Exec(Mem& mem, u32 instr) {
case 0x3D: regs.cop1.sdc1(regs, mem, instr); break;
case 0x3F: sd(mem, instr); break;
default:
util::panic("Unimplemented instruction {} {} ({:08X}) (pc: {:016X})\n", (mask >> 3) & 7, mask & 7, instr, (u64)regs.oldPC);
util::panic("Unimplemented instruction {:02X} ({:08X}) (pc: {:016X})\n", mask, instr, (u64)regs.oldPC);
}
}
}

View File

@@ -198,7 +198,7 @@ void Cop1::cvtld(Registers& regs, u32 instr) {
}
template <typename T>
inline bool CalculateCondition(T fs, T ft, u8 cond) {
inline bool CalculateCondition(Registers& regs, T fs, T ft, CompConds cond) {
switch(cond) {
case F: return false;
case UN: return std::isnan(fs) || std::isnan(ft);
@@ -207,7 +207,15 @@ inline bool CalculateCondition(T fs, T ft, u8 cond) {
case OLT: return (!std::isnan(fs) && !std::isnan(ft)) && (fs < ft);
case ULT: return (std::isnan(fs) || std::isnan(ft)) || (fs < ft);
case OLE: return (!std::isnan(fs) && !std::isnan(ft)) && (fs <= ft);
default: return CalculateCondition(fs, ft, cond - 8);
case ULE: return (std::isnan(fs) || std::isnan(ft)) || (fs <= ft);
default:
if(std::isnan(fs) || std::isnan(ft)) {
regs.cop1.fcr31.flag_invalid_operation = true;
regs.cop1.fcr31.cause_invalid_operation = true;
FireException(regs, ExceptionCode::FloatingPointError, 0, regs.oldPC);
}
return CalculateCondition(regs, fs, ft, static_cast<CompConds>(cond - 8));
}
}
@@ -216,7 +224,7 @@ void Cop1::ccond(Registers& regs, u32 instr, CompConds cond) {
T fs = GetCop1Reg<T>(regs.cop0, FS(instr));
T ft = GetCop1Reg<T>(regs.cop0, FT(instr));
fcr31.compare = CalculateCondition(fs, ft, cond);
fcr31.compare = CalculateCondition(regs, fs, ft, cond);
}
template void Cop1::ccond<float>(Registers& regs, u32 instr, CompConds cond);

View File

@@ -2,6 +2,7 @@
#include <util.hpp>
#include <n64/core/cpu/Registers.hpp>
#include <Interrupt.hpp>
#include <Mem.hpp>
namespace n64 {
inline void special(MI& mi, Registers& regs, RSP& rsp, u32 instr) {
@@ -57,16 +58,16 @@ inline void regimm(RSP& rsp, u32 instr) {
inline void lwc2(RSP& rsp, u32 instr) {
u8 mask = (instr >> 11) & 0x1F;
switch(mask) {
//case 0x04: rsp.lqv(instr); break;
default: util::panic("Unhandled RSP LWC2 {} {}\n", (mask >> 3) & 3, mask & 7);
case 0x04: rsp.lqv(instr); break;
default: util::panic("Unhandled RSP LWC2 {:06b}\n", mask);
}
}
inline void swc2(RSP& rsp, u32 instr) {
u8 mask = (instr >> 11) & 0x1F;
switch(mask) {
//case 0x04: rsp.sqv(instr); break;
default: util::panic("Unhandled RSP SWC2 {} {}\n", (mask >> 3) & 3, mask & 7);
case 0x04: rsp.sqv(instr); break;
default: util::panic("Unhandled RSP SWC2 {:06b}\n", mask);
}
}
@@ -76,7 +77,10 @@ inline void cop2(RSP& rsp, u32 instr) {
switch(mask) {
case 0x00:
switch(mask_sub) {
//case 0x02: rsp.cfc2(instr); break;
case 0x00: rsp.mfc2(instr); break;
case 0x02: rsp.cfc2(instr); break;
case 0x04: rsp.mtc2(instr); break;
case 0x06: rsp.ctc2(instr); break;
default: util::panic("Unhandled RSP COP2 sub ({:06b})\n", mask_sub);
}
break;
@@ -89,17 +93,23 @@ inline void cop2(RSP& rsp, u32 instr) {
}
}
inline void cop0(MI& mi, Registers& regs, RSP& rsp, RDP& rdp, u32 instr) {
inline void cop0(Registers& regs, Mem& mem, u32 instr) {
u8 mask = (instr >> 21) & 0x1F;
MMIO& mmio = mem.mmio;
RSP& rsp = mmio.rsp;
RDP& rdp = mmio.rdp;
switch(mask) {
case 0x00: rsp.mfc0(rdp, instr); break;
case 0x04: rsp.mtc0(mi, regs, rdp, instr); break;
case 0x04: rsp.mtc0(regs, mem, instr); break;
default: util::panic("Unhandled RSP COP0 ({:06b})\n", mask);
}
}
void RSP::Exec(MI &mi, Registers &regs, RDP &rdp, u32 instr) {
void RSP::Exec(Registers &regs, Mem& mem, u32 instr) {
u8 mask = (instr >> 26) & 0x3F;
MMIO& mmio = mem.mmio;
RDP& rdp = mmio.rdp;
MI& mi = mmio.mi;
switch(mask) {
case 0x00: special(mi, regs, *this, instr); break;
case 0x01: regimm(*this, instr); break;
@@ -116,8 +126,8 @@ void RSP::Exec(MI &mi, Registers &regs, RDP &rdp, u32 instr) {
case 0x0D: ori(instr); break;
case 0x0E: xori(instr); break;
case 0x0F: lui(instr); break;
case 0x10: cop0(mi, regs, *this, rdp, instr); break;
//case 0x12: cop2(*this, instr); break;
case 0x10: cop0(regs, mem, instr); break;
case 0x12: cop2(*this, instr); break;
case 0x20: lb(instr); break;
case 0x21: lh(instr); break;
case 0x23: case 0x27:
@@ -128,8 +138,8 @@ void RSP::Exec(MI &mi, Registers &regs, RDP &rdp, u32 instr) {
case 0x28: sb(instr); break;
case 0x29: sh(instr); break;
case 0x2B: sw(instr); break;
//case 0x32: lwc2(*this, instr); break;
//case 0x3A: swc2(*this, instr); break;
case 0x32: lwc2(*this, instr); break;
case 0x3A: swc2(*this, instr); break;
default: util::panic("Unhandled RSP instruction ({:06b})\n", mask);
}
}

View File

@@ -1,6 +1,7 @@
#include <RSP.hpp>
#include <util.hpp>
#include <n64/core/cpu/Registers.hpp>
#include <Mem.hpp>
namespace n64 {
inline bool AcquireSemaphore(RSP& rsp) {
@@ -16,28 +17,49 @@ inline void ReleaseSemaphore(RSP& rsp) {
rsp.semaphore = false;
}
inline int SignExt7bit(u8 val, int sa) {
s8 sval = ((val << 1) & 0x80) | val;
s32 sval32 = sval;
u32 val32 = sval32;
return val32 << sa;
}
inline auto GetCop0Reg(RSP& rsp, RDP& rdp, u8 index) -> u32{
switch(index) {
case 0: return rsp.spDMASPAddr.raw;
case 1: return rsp.spDMADRAMAddr.raw;
case 0: return rsp.lastSuccessfulSPAddr.raw;
case 1: return rsp.lastSuccessfulDRAMAddr.raw;
case 2:
case 3: return rsp.spDMALen.raw;
case 4: return rsp.spStatus.raw;
case 5: return rsp.spStatus.dmaFull;
case 6: return 0;
case 6: return rsp.spStatus.dmaBusy;
case 7: return AcquireSemaphore(rsp);
case 9: return rdp.dpc.end;
case 10: return rdp.dpc.current;
case 11: return rdp.dpc.status.raw;
case 12: return 0;
default: util::panic("Unhandled RSP COP0 register read at index {}\n", index);
}
return 0;
}
inline void SetCop0Reg(MI& mi, Registers& regs, RSP& rsp, RDP& rdp, u8 index, u32 val) {
inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) {
MMIO& mmio = mem.mmio;
RSP& rsp = mmio.rsp;
RDP& rdp = mmio.rdp;
MI& mi = mmio.mi;
switch(index) {
case 0: rsp.spDMASPAddr.raw = val; break;
case 1: rsp.spDMADRAMAddr.raw = val; break;
case 2:
case 3: rsp.spDMALen.raw = val; break;
rsp.spDMALen.raw = val;
rsp.DMA<false>(rsp.spDMALen, mem.GetRDRAM(), rsp, rsp.spDMASPAddr.bank);
break;
case 3:
rsp.spDMALen.raw = val;
rsp.DMA<true>(rsp.spDMALen, mem.GetRDRAM(), rsp, rsp.spDMASPAddr.bank);
break;
case 4: rsp.spStatus.raw = val; break;
case 7:
if(val == 0) {
@@ -110,7 +132,37 @@ void RSP::andi(u32 instr) {
}
void RSP::cfc2(u32 instr) {
s16 value = 0;
switch(RD(instr) & 3) {
case 0: value = VCOasU16(); break;
case 1: value = VCCasU16(); break;
case 2 ... 3: value = GetVCE(); break;
}
gpr[RT(instr)] = s32(value);
}
void RSP::ctc2(u32 instr) {
u16 value = gpr[RT(instr)];
switch(RD(instr) & 3) {
case 0:
for(int i = 0; i < 8; i++) {
vco.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0;
vco.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0;
}
break;
case 1:
for(int i = 0; i < 8; i++) {
vcc.h.element[7 - i] = ((value >> (i + 8)) & 1) == 1 ? 0xFFFF : 0;
vcc.l.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0;
}
break;
case 2: case 3:
for(int i = 0; i < 8; i++) {
vce.element[7 - i] = ((value >> i) & 1) == 1 ? 0xFFFF : 0;
}
break;
}
}
void RSP::b(u32 instr, bool cond) {
@@ -154,7 +206,13 @@ void RSP::lui(u32 instr) {
}
void RSP::lqv(u32 instr) {
int e = E(instr);
u32 addr = gpr[BASE(instr)] + SignExt7bit(instr & 0x7F, 4);
u32 end = ((addr & ~15) + 15);
for(int i = 0; addr + i <= end && i + e < 16; i++) {
vpr[VT(instr)].byte[BYTE_INDEX(i + e)] = ReadByte(addr + i);
}
}
void RSP::j(u32 instr) {
@@ -222,7 +280,13 @@ void RSP::sub(u32 instr) {
}
void RSP::sqv(u32 instr) {
int e = E(instr);
u32 addr = gpr[BASE(instr)] + SignExt7bit(instr & 0x7F, 4);
u32 end = ((addr & ~15) + 15);
for(int i = 0; addr + i <= end; i++) {
WriteByte(addr + i, vpr[VT(instr)].byte[BYTE_INDEX((i + e) & 15)]);
}
}
void RSP::sllv(u32 instr) {
@@ -303,7 +367,24 @@ void RSP::mfc0(RDP& rdp, u32 instr) {
gpr[RT(instr)] = GetCop0Reg(*this, rdp, RD(instr));
}
void RSP::mtc0(MI& mi, Registers& regs, RDP& rdp, u32 instr) {
SetCop0Reg(mi, regs, *this, rdp, RD(instr), gpr[RT(instr)]);
void RSP::mtc0(Registers& regs, Mem& mem, u32 instr) {
SetCop0Reg(regs, mem, RD(instr), gpr[RT(instr)]);
}
void RSP::mfc2(u32 instr) {
u8 hi = vpr[RD(instr)].byte[BYTE_INDEX(E(instr))];
u8 lo = vpr[RD(instr)].byte[BYTE_INDEX((E(instr) + 1) & 0xF)];
s16 elem = (hi << 8) | lo;
gpr[RT(instr)] = s32(elem);
}
void RSP::mtc2(u32 instr) {
u16 element = gpr[RT(instr)];
u8 lo = element;
u8 hi = element >> 8;
vpr[RD(instr)].byte[BYTE_INDEX(E(instr))] = hi;
if(E(instr) < 15) {
vpr[RD(instr)].byte[BYTE_INDEX(E(instr) + 1)] = lo;
}
}
}