diff --git a/external/parallel-rdp/parallel-rdp-standalone b/external/parallel-rdp/parallel-rdp-standalone index 18215758..84bdc0c5 160000 --- a/external/parallel-rdp/parallel-rdp-standalone +++ b/external/parallel-rdp/parallel-rdp-standalone @@ -1 +1 @@ -Subproject commit 18215758618a939a42d7b099af6e913a84fb4467 +Subproject commit 84bdc0c552cbbc82e4642f758c4fcfd8766e51f5 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 833c7e38..ba25d653 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -47,4 +47,10 @@ else() set(LIBRARIES ) endif() +if(${CMAKE_BUILD_TYPE} MATCHES Release) + target_compile_options(natsukashii PRIVATE -march=native -Ofast) +elseif(${CMAKE_BUILD_TYPE} MATCHES Debug) + target_compile_options(natsukashii PRIVATE -march=native -g) +endif() + target_link_libraries(natsukashii PRIVATE ${SDL2_LIBRARIES} ${LIBRARIES} capstone-static nfd parallel-rdp imgui fmt::fmt nlohmann_json::nlohmann_json) \ No newline at end of file diff --git a/src/common.hpp b/src/common.hpp index df45ce0d..db1e052e 100644 --- a/src/common.hpp +++ b/src/common.hpp @@ -15,10 +15,6 @@ using s64 = int64_t; using u128 = __uint128_t; using s128 = __int128_t; -#define UINT128_MAX (((u128)0xFFFF'FFFF'FFFF'FFFF << 64) | 0xFFFF'FFFF'FFFF'FFFF) -#define UINT128_MIN 0 -#define INT128_MAX (((u128)0x7FFF'FFFF'FFFF'FFFF << 64) | 0xFFFF'FFFF'FFFF'FFFF) -#define INT128_MIN (-(INT128_MAX) - 1LL) #define KiB * 1024 #define MiB ((KiB) * 1024) #define GiB ((MiB) * 1024) @@ -26,7 +22,6 @@ using s128 = __int128_t; #define N64_CYCLES_PER_FRAME ((N64_CPU_FREQ) / 60) #define HALF_ADDRESS(addr) ((addr) ^ 2) #define BYTE_ADDRESS(addr) ((addr) ^ 3) -#define ASPECT_RATIO ((float)4/3) #define RD(x) (((x) >> 11) & 0x1F) #define RT(x) (((x) >> 16) & 0x1F) diff --git a/src/frontend/imgui/Window.hpp b/src/frontend/imgui/Window.hpp index 0a6ac612..f7b4218a 100644 --- a/src/frontend/imgui/Window.hpp +++ b/src/frontend/imgui/Window.hpp @@ -15,7 +15,7 @@ struct Window { [[nodiscard]] bool gotClosed(SDL_Event event); ImFont *uiFont, *codeFont; u32 windowID; - float volumeL = 0.05, volumeR = 0.05; + float volumeL = 0.5, volumeR = 0.5; void LoadROM(n64::Core& core, const std::string& path); private: bool lockVolume = true; diff --git a/src/n64/Core.cpp b/src/n64/Core.cpp index 42a8f8b6..532c32b1 100644 --- a/src/n64/Core.cpp +++ b/src/n64/Core.cpp @@ -31,6 +31,7 @@ CartInfo Core::LoadROM(const std::string& rom_) { void Core::Run(Window& window, float volumeL, float volumeR) { MMIO& mmio = mem.mmio; Controller& controller = mmio.si.controller; + int cpuSteps = 0; for(int field = 0; field < mmio.vi.numFields; field++) { int frameCycles = 0; if(!pause && romLoaded) { @@ -43,7 +44,21 @@ void Core::Run(Window& window, float volumeL, float volumeR) { for(;cycles <= mmio.vi.cyclesPerHalfline; cycles++, frameCycles++) { cpu.Step(mem); - mmio.rsp.Step(cpu.regs, mem); + cpuSteps++; + if(mmio.rsp.spStatus.halt) { + mmio.rsp.steps = 0; + cpuSteps = 0; + } else { + if(cpuSteps > 2) { + mmio.rsp.steps += 2; + cpuSteps -= 3; + } + + while(mmio.rsp.steps > 0) { + mmio.rsp.steps--; + mmio.rsp.Step(cpu.regs, mem); + } + } mmio.ai.Step(mem, cpu.regs, 1, volumeL, volumeR); } @@ -71,8 +86,10 @@ void Core::Run(Window& window, float volumeL, float volumeR) { #define GET_AXIS(gamepad, axis) SDL_GameControllerGetAxis(gamepad, axis) void Core::UpdateController(const u8* state) { - Controller& controller = mem.mmio.si.controller; + Controller &controller = mem.mmio.si.controller; controller.raw = 0; + s8 xaxis = 0, yaxis = 0; + if(gamepadConnected) { bool A = GET_BUTTON(gamepad, SDL_CONTROLLER_BUTTON_A); bool B = GET_BUTTON(gamepad, SDL_CONTROLLER_BUTTON_X); @@ -85,23 +102,23 @@ void Core::UpdateController(const u8* state) { bool L = GET_BUTTON(gamepad, SDL_CONTROLLER_BUTTON_LEFTSHOULDER); bool R = GET_BUTTON(gamepad, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER); bool CUP = GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_RIGHTY) == 32767; - bool CDOWN = GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_RIGHTY) == -32768; - bool CLEFT = GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_RIGHTX) == -32768; + bool CDOWN = GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_RIGHTY) == -32767; + bool CLEFT = GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_RIGHTX) == -32767; bool CRIGHT = GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_RIGHTX) == 32767; controller.b1 = (A << 7) | (B << 6) | (Z << 5) | (START << 4) | - (DUP << 3) | (DDOWN << 2) | (DLEFT << 1) | DRIGHT; + (DUP << 3) | (DDOWN << 2) | (DLEFT << 1) | DRIGHT; controller.b2 = ((START && L && R) << 7) | (0 << 6) | (L << 5) | (R << 4) | - (CUP << 3) | (CDOWN << 2) | (CLEFT << 1) | CRIGHT; + (CUP << 3) | (CDOWN << 2) | (CLEFT << 1) | CRIGHT; - s8 xaxis = (s8)std::clamp((GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_LEFTX) >> 8), -127, 127); - s8 yaxis = (s8)std::clamp(-(GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_LEFTY) >> 8), -127, 127); + xaxis = (s8) std::clamp((GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_LEFTX) >> 8), -86, 86); + yaxis = (s8) std::clamp(-(GET_AXIS(gamepad, SDL_CONTROLLER_AXIS_LEFTY) >> 8), -86, 86); controller.b3 = xaxis; controller.b4 = yaxis; - if((controller.b2 >> 7) & 1) { + if ((controller.b2 >> 7) & 1) { controller.b1 &= ~0x10; controller.b3 = 0; controller.b4 = 0; @@ -126,24 +143,22 @@ void Core::UpdateController(const u8* state) { (state[SDL_SCANCODE_K] << 1) | (state[SDL_SCANCODE_L]); - s8 xaxis = 0; - if(state[SDL_SCANCODE_LEFT]) { - xaxis = -127; - } else if(state[SDL_SCANCODE_RIGHT]) { - xaxis = 127; + if (state[SDL_SCANCODE_LEFT]) { + xaxis = -86; + } else if (state[SDL_SCANCODE_RIGHT]) { + xaxis = 86; } - s8 yaxis = 0; - if(state[SDL_SCANCODE_DOWN]) { - yaxis = -127; - } else if(state[SDL_SCANCODE_UP]) { - yaxis = 127; + if (state[SDL_SCANCODE_DOWN]) { + yaxis = -86; + } else if (state[SDL_SCANCODE_UP]) { + yaxis = 86; } controller.b3 = xaxis; controller.b4 = yaxis; - if((controller.b2 >> 7) & 1) { + if ((controller.b2 >> 7) & 1) { controller.b1 &= ~0x10; controller.b3 = 0; controller.b4 = 0; diff --git a/src/n64/core/Audio.cpp b/src/n64/core/Audio.cpp index 8f83d1e4..fe1cd1f1 100644 --- a/src/n64/core/Audio.cpp +++ b/src/n64/core/Audio.cpp @@ -3,7 +3,7 @@ #include namespace n64 { -#define AUDIO_SAMPLE_RATE 48000 +#define AUDIO_SAMPLE_RATE 44100 #define SYSTEM_SAMPLE_FORMAT AUDIO_F32SYS #define SYSTEM_SAMPLE_SIZE 4 #define BYTES_PER_HALF_SECOND ((AUDIO_SAMPLE_RATE / 2) * SYSTEM_SAMPLE_SIZE) @@ -65,11 +65,13 @@ void InitAudio() { } } -void PushSample(float left, float volumeL, float volumeR, float right) { - float samples[2]{ left * volumeL, right * volumeR }; +void PushSample(float left, float volumeL, float right, float volumeR) { + float adjustedL = left * volumeL; + float adjustedR = right * volumeR; + float samples[2]{ adjustedL, adjustedR }; int availableBytes = SDL_AudioStreamAvailable(audioStream); - if(availableBytes < BYTES_PER_HALF_SECOND) { + if(availableBytes <= BYTES_PER_HALF_SECOND) { SDL_AudioStreamPut(audioStream, samples, 2 * sizeof(float)); } } diff --git a/src/n64/core/Mem.hpp b/src/n64/core/Mem.hpp index 2dfb1e7d..1026c316 100644 --- a/src/n64/core/Mem.hpp +++ b/src/n64/core/Mem.hpp @@ -59,27 +59,21 @@ private: case 0xEC8B1325: // 7102 cicType = CIC_NUS_7102; break; - case 0x1DEB51A9: // 6101 cicType = CIC_NUS_6101; break; - case 0xC08E5BD6: cicType = CIC_NUS_6102_7101; break; - case 0x03B8376A: cicType = CIC_NUS_6103_7103; break; - case 0xCF7F41DC: cicType = CIC_NUS_6105_7105; break; - case 0xD1059C6A: cicType = CIC_NUS_6106_7106; break; - default: util::warn("Could not determine CIC TYPE! Checksum: {:08X} is unknown!\n", checksum); cicType = UNKNOWN_CIC_TYPE; diff --git a/src/n64/core/RDP.cpp b/src/n64/core/RDP.cpp index 933eec6f..28cc41b3 100644 --- a/src/n64/core/RDP.cpp +++ b/src/n64/core/RDP.cpp @@ -40,26 +40,14 @@ auto RDP::Read(u32 addr) const -> u32{ void RDP::Write(MI& mi, Registers& regs, RSP& rsp, u32 addr, u32 val) { switch(addr) { - case 0x04100000: - if(!dpc.status.startValid) { - dpc.start = val & 0xFFFFF8; - } - dpc.status.startValid = true; - break; - case 0x04100004: - dpc.end = val & 0xFFFFF8; - if(dpc.status.startValid) { - dpc.current = dpc.start; - dpc.status.startValid = false; - } - RunCommand(mi, regs, rsp); - break; - case 0x0410000C: StatusWrite(mi, regs, rsp, val); break; + case 0x04100000: WriteStart(val); break; + case 0x04100004: WriteEnd(mi, regs, rsp, val); break; + case 0x0410000C: WriteStatus(mi, regs, rsp, val); break; default: util::panic("Unhandled DP Command Registers write (addr: {:08X}, val: {:08X})\n", addr, val); } } -void RDP::StatusWrite(MI& mi, Registers& regs, RSP& rsp, u32 val) { +void RDP::WriteStatus(MI& mi, Registers& regs, RSP& rsp, u32 val) { bool rdpUnfrozen = false; DPCStatusWrite temp{}; @@ -85,9 +73,53 @@ void RDP::StatusWrite(MI& mi, Registers& regs, RSP& rsp, u32 val) { } } +inline void logCommand(u8 cmd) { + switch(cmd) { + case 0x08: util::print("Fill triangle\n"); break; + case 0x09: util::print("Fill, zbuf triangle\n"); break; + case 0x0a: util::print("Texture triangle\n"); break; + case 0x0b: util::print("Texture, zbuf triangle\n"); break; + case 0x0c: util::print("Shade triangle\n"); break; + case 0x0d: util::print("Shade, zbuf triangle\n"); break; + case 0x0e: util::print("Shade, texture triangle\n"); break; + case 0x0f: util::print("Shade, texture, zbuf triangle\n"); break; + case 0x24: util::print("Texture rectangle\n"); break; + case 0x25: util::print("Texture rectangle flip\n"); break; + case 0x26: util::print("Sync load\n"); break; + case 0x27: util::print("Sync pipe\n"); break; + case 0x28: util::print("Sync tile\n"); break; + case 0x29: util::print("Sync full\n"); break; + case 0x2a: util::print("Set key gb\n"); break; + case 0x2b: util::print("Set key r\n"); break; + case 0x2c: util::print("Set convert\n"); break; + case 0x2d: util::print("Set scissor\n"); break; + case 0x2e: util::print("Set prim depth\n"); break; + case 0x2f: util::print("Set other modes\n"); break; + case 0x30: util::print("Load TLUT\n"); break; + case 0x32: util::print("Set tile size\n"); break; + case 0x33: util::print("Load block\n"); break; + case 0x34: util::print("Load tile\n"); break; + case 0x35: util::print("Set tile\n"); break; + case 0x36: util::print("Fill rectangle\n"); break; + case 0x37: util::print("Set fill color\n"); break; + case 0x38: util::print("Set fog color\n"); break; + case 0x39: util::print("Set blend color\n"); break; + case 0x3a: util::print("Set prim color\n"); break; + case 0x3b: util::print("Set env color\n"); break; + case 0x3c: util::print("Set combine\n"); break; + case 0x3d: util::print("Set texture image\n"); break; + case 0x3e: util::print("Set mask image\n"); break; + case 0x3f: util::print("Set color image\n"); break; + } +} + void RDP::RunCommand(MI& mi, Registers& regs, RSP& rsp) { + //if (dpc.status.freeze) { + // return; + //} dpc.status.pipeBusy = true; dpc.status.startGclk = true; + dpc.status.freeze = true; static int remaining_cmds = 0; @@ -97,7 +129,6 @@ void RDP::RunCommand(MI& mi, Registers& regs, RSP& rsp) { int len = end - current; if(len <= 0) return; - if(len + (remaining_cmds * 4) > 0xFFFFF) { return; } @@ -108,7 +139,7 @@ void RDP::RunCommand(MI& mi, Registers& regs, RSP& rsp) { cmd_buf[remaining_cmds + (i >> 2)] = cmd; } } else { - if(end > 0x7FFFFF || current > 0x7FFFFF) { + if(end > RDRAM_DSIZE || current > RDRAM_DSIZE) { return; } for(int i = 0; i < len; i += 4) { @@ -124,6 +155,7 @@ void RDP::RunCommand(MI& mi, Registers& regs, RSP& rsp) { while(buf_index < word_len) { u8 cmd = (cmd_buf[buf_index] >> 24) & 0x3F; + // logCommand(cmd); int cmd_len = cmd_lens[cmd]; if((buf_index + cmd_len) * 4 > len + (remaining_cmds * 4)) { @@ -159,8 +191,8 @@ void RDP::RunCommand(MI& mi, Registers& regs, RSP& rsp) { dpc.current = end; dpc.end = end; - dpc.status.freeze = false; dpc.status.cbufReady = true; + dpc.status.freeze = false; } void RDP::OnFullSync(MI& mi, Registers& regs) { diff --git a/src/n64/core/RDP.hpp b/src/n64/core/RDP.hpp index 30f3197e..4fd39dc8 100644 --- a/src/n64/core/RDP.hpp +++ b/src/n64/core/RDP.hpp @@ -60,8 +60,24 @@ struct RDP { std::vector dram; [[nodiscard]] auto Read(u32 addr) const -> u32; void Write(MI& mi, Registers& regs, RSP& rsp, u32 addr, u32 val); - void StatusWrite(MI& mi, Registers& regs, RSP& rsp, u32 val); + void WriteStatus(MI& mi, Registers& regs, RSP& rsp, u32 val); void RunCommand(MI& mi, Registers& regs, RSP& rsp); void OnFullSync(MI& mi, Registers& regs); + + inline void WriteStart(u32 val) { + if(!dpc.status.startValid) { + dpc.start = val & 0xFFFFF8; + } + dpc.status.startValid = true; + } + + inline void WriteEnd(MI& mi, Registers& regs, RSP& rsp, u32 val) { + dpc.end = val & 0xFFFFF8; + if(dpc.status.startValid) { + dpc.current = dpc.start; + dpc.status.startValid = false; + } + RunCommand(mi, regs, rsp); + } }; } // natsukashii diff --git a/src/n64/core/RSP.cpp b/src/n64/core/RSP.cpp index bebec6d4..95a3296f 100644 --- a/src/n64/core/RSP.cpp +++ b/src/n64/core/RSP.cpp @@ -13,7 +13,7 @@ void RSP::Reset() { spStatus.halt = true; oldPC = 0; pc = 0; - nextPC = 0; + nextPC = 4; spDMASPAddr.raw = 0; spDMADRAMAddr.raw = 0; spDMALen.raw = 0; @@ -32,14 +32,44 @@ void RSP::Reset() { } void RSP::Step(Registers& regs, Mem& mem) { - if(!spStatus.halt) { - gpr[0] = 0; - u32 instr = util::ReadAccess(imem, pc & IMEM_DSIZE); - oldPC = pc & 0xFFC; - pc = nextPC & 0xFFC; - nextPC += 4; - Exec(regs, mem, instr); + gpr[0] = 0; + u32 instr = util::ReadAccess(imem, pc & IMEM_DSIZE); + oldPC = pc & 0xFFC; + pc = nextPC & 0xFFC; + nextPC += 4; + Exec(regs, mem, instr); +/* + util::print("{:04X} {:08X} ", oldPC, instr); + for (int i = 0; i < 32; i++) { + util::print("{:08X} ", (u32)gpr[i]); } + + for (int i = 0; i < 32; i++) { + for (int e = 0; e < 8; e++) { + util::print("{:04X}", vpr[i].element[e]); + } + util::print(" "); + } + + for (int e = 0; e < 8; e++) { + util::print("{:04X}", acc.h.element[e]); + } + util::print(" "); + + for (int e = 0; e < 8; e++) { + util::print("{:04X}", acc.m.element[e]); + } + util::print(" "); + + for (int e = 0; e < 8; e++) { + util::print("{:04X}", acc.l.element[e]); + } + util::print(" "); + + util::print("{:04X} {:04X} {:02X}", GetVCC(), GetVCO(), GetVCE()); + + util::print("\n"); + */ } auto RSP::Read(u32 addr) -> u32{ @@ -74,9 +104,7 @@ void RSP::Write(Mem& mem, Registers& regs, u32 addr, u32 value) { case 0x0404001C: ReleaseSemaphore(); break; case 0x04080000: if(spStatus.halt) { - oldPC = pc & 0xFFC; - pc = value & 0xFFC; - nextPC = value & 0xFFC; + SetPC(value); } break; default: util::panic("Unimplemented SP register write {:08X}, val: {:08X}\n", addr, value); diff --git a/src/n64/core/RSP.hpp b/src/n64/core/RSP.hpp index e081edc6..f4c67d1c 100644 --- a/src/n64/core/RSP.hpp +++ b/src/n64/core/RSP.hpp @@ -129,6 +129,7 @@ struct RSP { VPR vce{}; s16 divIn{}, divOut{}; bool divInLoaded = false; + int steps = 0; struct { VPR h{}, m{}, l{}; @@ -141,16 +142,17 @@ struct RSP { bool semaphore = false; inline void SetPC(u16 val) { - pc = val; - nextPC = val += 4; + oldPC = pc & 0xFFC; + pc = val & 0xFFC; + nextPC = pc + 4; } inline s64 GetACC(int e) { s64 val = s64(acc.h.element[e]) << 32; - val |= s64(acc.m.element[e]) << 16; - val |= s64(acc.l.element[e]); - if((val & 0x800000000000) != 0) { - val |= 0xFFFF000000000000; + val |= s64(acc.m.element[e]) << 16; + val |= s64(acc.l.element[e]) << 00; + if((val & 0x0000800000000000) != 0) { + val |= 0xFFFF000000000000; } return val; } @@ -161,17 +163,38 @@ struct RSP { acc.l.element[e] = val; } - inline u16 VCOasU16() { - u16 val = 0; - for(int i = 0; i < 8; i++) { + inline u16 GetVCO() { + u16 value = 0; + for (int i = 0; i < 8; i++) { bool h = vco.h.element[7 - i] != 0; bool l = vco.l.element[7 - i] != 0; u32 mask = (l << i) | (h << (i + 8)); - val |= mask; + value |= mask; } - return val; + return value; } + inline u16 GetVCC() { + u16 value = 0; + for (int i = 0; i < 8; i++) { + bool h = vcc.h.element[7 - i] != 0; + bool l = vcc.l.element[7 - i] != 0; + u32 mask = (l << i) | (h << (i + 8)); + value |= mask; + } + return value; + } + + inline u8 GetVCE() { + u8 value = 0; + for(int i = 0; i < 8; i++) { + bool l = vce.element[ELEMENT_INDEX(i)] != 0; + value |= (l << i); + } + return value; + } + + inline void WriteStatus(MI& mi, Registers& regs, u32 value) { auto write = SPStatusWrite{.raw = value}; if(write.clearHalt && !write.setHalt) { @@ -197,26 +220,6 @@ struct RSP { CLEAR_SET(spStatus.signal7, write.clearSignal7, write.setSignal7); } - inline u16 VCCasU16() { - u16 val = 0; - for(int i = 0; i < 8; i++) { - bool h = vcc.h.element[7 - i] != 0; - bool l = vcc.l.element[7 - i] != 0; - u32 mask = (l << i) | (h << (i + 8)); - val |= mask; - } - return val; - } - - inline u8 GetVCE() { - u8 value = 0; - for(int i = 0; i < 8; i++) { - bool l = vce.element[7 - i] != 0; - value |= (l << i); - } - return value; - } - inline u64 ReadDword(u32 addr, bool i) { addr &= 0xfff; if (i) { @@ -347,7 +350,7 @@ struct RSP { void and_(u32 instr); void andi(u32 instr); void b(u32 instr, bool cond); - void bl(u32 instr, bool cond); + void blink(u32 instr, bool cond); void cfc2(u32 instr); void ctc2(u32 instr); void lb(u32 instr); @@ -376,6 +379,7 @@ struct RSP { void sb(u32 instr); void sh(u32 instr); void sw(u32 instr); + void swv(u32 instr); void sub(u32 instr); void sbv(u32 instr); void sdv(u32 instr); @@ -404,12 +408,14 @@ struct RSP { void vcr(u32 instr); void vcl(u32 instr); void vmacf(u32 instr); + void vmacu(u32 instr); void vmadh(u32 instr); void vmadl(u32 instr); void vmadm(u32 instr); void vmadn(u32 instr); void vmov(u32 instr); void vmulf(u32 instr); + void vmulu(u32 instr); void vmudl(u32 instr); void vmudh(u32 instr); void vmudm(u32 instr); diff --git a/src/n64/core/cpu/registers/cop1instructions.cpp b/src/n64/core/cpu/registers/cop1instructions.cpp index 98c209bb..d0a8cd3d 100644 --- a/src/n64/core/cpu/registers/cop1instructions.cpp +++ b/src/n64/core/cpu/registers/cop1instructions.cpp @@ -3,8 +3,24 @@ #include #include #include +#include namespace n64 { +inline int PushRoundingMode(const FCR31& fcr31) { + int og = fegetround(); + switch(fcr31.rounding_mode) { + case 0: fesetround(FE_TONEAREST); break; + case 1: fesetround(FE_TOWARDZERO); break; + case 2: fesetround(FE_UPWARD); break; + case 3: fesetround(FE_DOWNWARD); break; + } + + return og; +} + +#define PUSHROUNDINGMODE int og = PushRoundingMode(fcr31) +#define POPROUNDINGMODE fesetround(og) + void Cop1::absd(Registers& regs, u32 instr) { double fs = GetCop1Reg(regs.cop0, FS(instr)); SetCop1Reg(regs.cop0, FD(instr), fabs(fs)); @@ -368,22 +384,30 @@ void Cop1::sqrtd(Registers ®s, u32 instr) { void Cop1::roundls(Registers& regs, u32 instr) { float fs = GetCop1Reg(regs.cop0, FS(instr)); - SetReg(regs.cop0, FD(instr), (s32)roundf(fs)); + PUSHROUNDINGMODE; + SetReg(regs.cop0, FD(instr), (s32)nearbyintf(fs)); + POPROUNDINGMODE; } void Cop1::roundld(Registers& regs, u32 instr) { double fs = GetCop1Reg(regs.cop0, FS(instr)); - SetReg(regs.cop0, FD(instr), (s64)round(fs)); + PUSHROUNDINGMODE; + SetReg(regs.cop0, FD(instr), (s64)nearbyint(fs)); + POPROUNDINGMODE; } void Cop1::roundws(Registers& regs, u32 instr) { float fs = GetCop1Reg(regs.cop0, FS(instr)); - SetReg(regs.cop0, FD(instr), (s32)roundf(fs)); + PUSHROUNDINGMODE; + SetReg(regs.cop0, FD(instr), (s32)nearbyintf(fs)); + POPROUNDINGMODE; } void Cop1::roundwd(Registers& regs, u32 instr) { double fs = GetCop1Reg(regs.cop0, FS(instr)); - SetReg(regs.cop0, FD(instr), (s64)round(fs)); + PUSHROUNDINGMODE; + SetReg(regs.cop0, FD(instr), (s32)nearbyint(fs)); + POPROUNDINGMODE; } void Cop1::floorls(Registers& regs, u32 instr) { diff --git a/src/n64/core/mmio/AI.cpp b/src/n64/core/mmio/AI.cpp index 865be940..34019875 100644 --- a/src/n64/core/mmio/AI.cpp +++ b/src/n64/core/mmio/AI.cpp @@ -18,20 +18,18 @@ void AI::Reset() { } auto AI::Read(u32 addr) const -> u32 { - switch(addr) { - case 0x04500004: return dmaLen[0]; - case 0x0450000C: { - u32 val = 0; - val |= (dmaCount > 1); - val |= 1 << 20; - val |= 1 << 24; - val |= (dmaCount > 0) << 30; - val |= (dmaCount > 1) << 31; - return val; - } - default: util::panic("Unhandled AI read at addr {:08X}\n", addr); + if(addr == 0x0450000C) { + u32 val = 0; + val |= (dmaCount > 1); + val |= 1 << 20; + val |= 1 << 24; + val |= (dmaEnable << 25); + val |= (dmaCount > 0) << 30; + val |= (dmaCount > 1) << 31; + return val; } - return 0; + + return dmaLen[0]; } #define max(x, y) ((x) > (y) ? (x) : (y)) @@ -46,6 +44,7 @@ void AI::Write(Mem& mem, Registers& regs, u32 addr, u32 val) { case 0x04500004: { u32 len = (val & 0x3FFFF) & ~7; if((dmaCount < 2) && len) { + if(dmaCount == 0) InterruptRaise(mem.mmio.mi, regs, Interrupt::AI); dmaLen[dmaCount] = len; dmaCount++; } @@ -67,6 +66,7 @@ void AI::Write(Mem& mem, Registers& regs, u32 addr, u32 val) { } break; case 0x04500014: bitrate = val & 0xF; + dac.precision = bitrate + 1; break; default: util::panic("Unhandled AI write at addr {:08X} with val {:08X}\n", addr, val); } @@ -79,24 +79,26 @@ void AI::Step(Mem& mem, Registers& regs, int cpuCycles, float volumeL, float vol return; } - u32 address_hi = ((dmaAddr[0] >> 13) + dmaAddrCarry) & 0x7ff; - dmaAddr[0] = (address_hi << 13) | dmaAddr[0] & 0x1fff; - u32 data = mem.Read32(regs, dmaAddr[0], regs.pc); + if(dmaLen[0] && dmaEnable) { + u32 addrHi = ((dmaAddr[0] >> 13) + dmaAddrCarry) & 0x7FF; + dmaAddr[0] = (addrHi << 13) | (dmaAddr[0] & 0x1FFF); + u32 data = util::ReadAccess(mem.mmio.rdp.dram.data(), dmaAddr[0] & RDRAM_DSIZE); + s16 l = s16(data >> 16); + s16 r = s16(data); - s16 left = (s16)(data >> 16); - s16 right = (s16)data; - PushSample((float)left / INT16_MAX, volumeL, volumeR, (float)right / INT16_MAX); + PushSample((float)l / INT16_MAX, volumeL, (float)r / INT16_MAX, volumeR); - u32 address_lo = (dmaAddr[0] + 4) & 0x1fff; - dmaAddr[0] = (dmaAddr[0] & ~0x1fff) | address_lo; - dmaAddrCarry = (address_lo == 0); - dmaLen[0] -= 4; + u32 addrLo = (dmaAddr[0] + 4) & 0x1FFF; + dmaAddr[0] = (dmaAddr[0] & ~0x1FFF) | addrLo; + dmaAddrCarry = addrLo == 0; + dmaLen[0] -= 4; + } if(!dmaLen[0]) { - InterruptRaise(mem.mmio.mi, regs, Interrupt::AI); - if(--dmaCount > 0) { // If we have another DMA pending, start on that one. + if(--dmaCount > 0) { + InterruptRaise(mem.mmio.mi, regs, Interrupt::AI); dmaAddr[0] = dmaAddr[1]; - dmaLen[0] = dmaLen[1]; + dmaLen[0] = dmaLen[1]; } } diff --git a/src/n64/core/mmio/PI.cpp b/src/n64/core/mmio/PI.cpp index 8377550a..3f388812 100644 --- a/src/n64/core/mmio/PI.cpp +++ b/src/n64/core/mmio/PI.cpp @@ -42,7 +42,7 @@ auto PI::Read(MI& mi, u32 addr) const -> u32 { void PI::Write(Mem& mem, Registers& regs, u32 addr, u32 val) { MI& mi = mem.mmio.mi; switch(addr) { - case 0x04600000: dramAddr = val; break; + case 0x04600000: dramAddr = val & 0xFFFFFF; break; case 0x04600004: cartAddr = val; break; case 0x04600008: { u32 len = (val & 0x00FFFFFF) + 1; @@ -58,7 +58,6 @@ void PI::Write(Mem& mem, Registers& regs, u32 addr, u32 val) { dramAddr = dram_addr + len; cartAddr = cart_addr + len; InterruptRaise(mi, regs, Interrupt::PI); - status &= 0xFFFFFFFE; util::logdebug("PI DMA from RDRAM to CARTRIDGE (size: {} KiB, {:08X} to {:08X})\n", len, dramAddr, cartAddr); } break; case 0x0460000C: { diff --git a/src/n64/core/mmio/SI.cpp b/src/n64/core/mmio/SI.cpp index 42aebfa0..c1573140 100644 --- a/src/n64/core/mmio/SI.cpp +++ b/src/n64/core/mmio/SI.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace n64 { SI::SI() { @@ -16,6 +15,7 @@ void SI::Reset() { auto SI::Read(MI& mi, u32 addr) const -> u32 { switch(addr) { case 0x04800000: return dramAddr; + case 0x0480000C: return 0; case 0x04800018: { u32 val = 0; val |= status.dmaBusy; @@ -24,39 +24,33 @@ auto SI::Read(MI& mi, u32 addr) const -> u32 { val |= (status.intr << 12); return val; } - default: return 0; + default: return 0xFFFFFFFF; } } void SI::Write(Mem& mem, Registers& regs, u32 addr, u32 val) { switch(addr) { case 0x04800000: - dramAddr = val; + dramAddr = val & RDRAM_DSIZE; break; case 0x04800004: { - if(!(status.raw & 3)) { - ProcessPIFCommands(mem.pifRam, controller, mem); + ProcessPIFCommands(mem.pifRam, controller, mem); - pifAddr = (val & 0x7FC) & PIF_RAM_DSIZE; - for(int i = 0; i < 64; i++) { - mem.mmio.rdp.dram[BYTE_ADDRESS(dramAddr + i) & RDRAM_DSIZE] = mem.pifRam[pifAddr + i]; - } - InterruptRaise(mem.mmio.mi, regs, Interrupt::SI); - status.intr = 1; - //util::logdebug("SI DMA from PIF RAM to RDP RAM ({:08X} to {:08X})\n", pifAddr, dramAddr); + for(int i = 0; i < 64; i++) { + mem.mmio.rdp.dram[BYTE_ADDRESS(dramAddr + i)] = mem.pifRam[i]; } + InterruptRaise(mem.mmio.mi, regs, Interrupt::SI); + status.intr = 1; + util::logdebug("SI DMA from PIF RAM to RDRAM ({:08X} to {:08X})\n", val & 0x1FFFFFFF, dramAddr); } break; case 0x04800010: { - if(!(status.raw & 3)) { - pifAddr = (val & 0x7FC) & PIF_RAM_DSIZE; - for(int i = 0; i < 64; i++) { - mem.pifRam[pifAddr + i] = mem.mmio.rdp.dram[BYTE_ADDRESS(dramAddr + i) & RDRAM_DSIZE]; - } - ProcessPIFCommands(mem.pifRam, controller, mem); - InterruptRaise(mem.mmio.mi, regs, Interrupt::SI); - status.intr = 1; - //util::logdebug("SI DMA from RDP RAM to PIF RAM ({:08X} to {:08X})\n", dramAddr, pifAddr); + for(int i = 0; i < 64; i++) { + mem.pifRam[i] = mem.mmio.rdp.dram[BYTE_ADDRESS(dramAddr + i)]; } + ProcessPIFCommands(mem.pifRam, controller, mem); + InterruptRaise(mem.mmio.mi, regs, Interrupt::SI); + status.intr = 1; + util::logdebug("SI DMA from RDRAM to PIF RAM ({:08X} to {:08X})\n", dramAddr, val & 0x1FFFFFFF); } break; case 0x04800018: InterruptLower(mem.mmio.mi, regs, Interrupt::SI); diff --git a/src/n64/core/mmio/SI.hpp b/src/n64/core/mmio/SI.hpp index f919de71..1f05236e 100644 --- a/src/n64/core/mmio/SI.hpp +++ b/src/n64/core/mmio/SI.hpp @@ -25,7 +25,6 @@ struct SI { void Reset(); SIStatus status{}; u32 dramAddr{}; - u32 pifAddr{}; Controller controller{}; auto Read(MI&, u32) const -> u32; diff --git a/src/n64/core/rsp/decode.cpp b/src/n64/core/rsp/decode.cpp index c94b9200..9a8e65f2 100644 --- a/src/n64/core/rsp/decode.cpp +++ b/src/n64/core/rsp/decode.cpp @@ -50,8 +50,8 @@ inline void regimm(RSP& rsp, u32 instr) { switch(mask) { case 0x00: rsp.b(instr, (s32)rsp.gpr[RS(instr)] < 0); break; case 0x01: rsp.b(instr, (s32)rsp.gpr[RS(instr)] >= 0); break; - case 0x10: rsp.bl(instr, (s32)rsp.gpr[RS(instr)] < 0); break; - case 0x11: rsp.bl(instr, (s32)rsp.gpr[RS(instr)] >= 0); break; + case 0x10: rsp.blink(instr, (s32)rsp.gpr[RS(instr)] < 0); break; + case 0x11: rsp.blink(instr, (s32)rsp.gpr[RS(instr)] >= 0); break; default: util::panic("Unhandled RSP regimm instruction ({:05b})\n", mask); } } @@ -67,6 +67,7 @@ inline void lwc2(RSP& rsp, u32 instr) { case 0x05: rsp.lrv(instr); break; case 0x06: rsp.lpv(instr); break; case 0x07: rsp.luv(instr); break; + case 0x0A: printf("LWV\n"); break; case 0x0B: rsp.ltv(instr); break; default: util::panic("Unhandled RSP LWC2 {:05b}\n", mask); } @@ -83,6 +84,7 @@ inline void swc2(RSP& rsp, u32 instr) { case 0x04: rsp.sqv(instr); break; case 0x06: rsp.spv(instr); break; case 0x07: rsp.suv(instr); break; + case 0x0A: rsp.swv(instr); break; case 0x0B: rsp.stv(instr); break; default: util::panic("Unhandled RSP SWC2 {:05b}\n", mask); } @@ -106,11 +108,13 @@ inline void cop2(RSP& rsp, u32 instr) { } } break; + case 0x01: rsp.vmulu(instr); break; case 0x04: rsp.vmudl(instr); break; case 0x05: rsp.vmudm(instr); break; case 0x06: rsp.vmudn(instr); break; case 0x07: rsp.vmudh(instr); break; case 0x08: rsp.vmacf(instr); break; + case 0x09: rsp.vmacu(instr); break; case 0x0C: rsp.vmadl(instr); break; case 0x0D: rsp.vmadm(instr); break; case 0x0E: rsp.vmadn(instr); break; @@ -143,8 +147,9 @@ inline void cop2(RSP& rsp, u32 instr) { case 0x30: rsp.vrcp(instr); break; case 0x33: rsp.vmov(instr); break; case 0x34: rsp.vrsq(instr); break; - case 0x37: break; - case 0x3F: break; + case 0x37: case 0x3F: + printf("RSP VNULL or VNOP\n"); + break; default: util::panic("Unhandled RSP COP2 ({:06b})\n", mask); } } diff --git a/src/n64/core/rsp/instructions.cpp b/src/n64/core/rsp/instructions.cpp index 2807e1f3..84371537 100644 --- a/src/n64/core/rsp/instructions.cpp +++ b/src/n64/core/rsp/instructions.cpp @@ -39,10 +39,9 @@ inline auto GetCop0Reg(RSP& rsp, RDP& rdp, u8 index) -> u32{ case 9: return rdp.dpc.end; case 10: return rdp.dpc.current; case 11: return rdp.dpc.status.raw; - case 12: return 0; + case 12: return rdp.dpc.clock; default: util::panic("Unhandled RSP COP0 register read at index {}\n", index); } - return 0; } inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) { @@ -67,15 +66,9 @@ inline void SetCop0Reg(Registers& regs, Mem& mem, u8 index, u32 val) { ReleaseSemaphore(rsp); } break; - case 8: - rdp.dpc.start = val & 0xFFFFF8; - rdp.dpc.current = rdp.dpc.start; - break; - case 9: - rdp.dpc.end = val & 0xFFFFF8; - rdp.RunCommand(mi, regs, rsp); - break; - case 11: rdp.StatusWrite(mi, regs, rsp, val); break; + case 8: rdp.WriteStart(val); break; + case 9: rdp.WriteEnd(mi, regs, rsp, val); break; + case 11: rdp.WriteStatus(mi, regs, rsp, val); break; default: util::panic("Unhandled RSP COP0 register write at index {}\n", index); } } @@ -135,12 +128,12 @@ void RSP::andi(u32 instr) { void RSP::cfc2(u32 instr) { s16 value = 0; switch(RD(instr) & 3) { - case 0: value = VCOasU16(); break; - case 1: value = VCCasU16(); break; - case 2 ... 3: value = s8(GetVCE()); break; + case 0: value = GetVCO(); break; + case 1: value = GetVCC(); break; + case 2 ... 3: value = GetVCE(); break; } - gpr[RT(instr)] = s32(value); + gpr[RT(instr)] = value; } void RSP::ctc2(u32 instr) { @@ -167,11 +160,11 @@ void RSP::ctc2(u32 instr) { } void RSP::b(u32 instr, bool cond) { - s32 address = ((s32)((s16)(instr & 0xFFFF) << 2)) + pc; + u32 address = ((instr & 0xFFFF) << 2) + pc; branch(address, cond); } -void RSP::bl(u32 instr, bool cond) { +void RSP::blink(u32 instr, bool cond) { b(instr, cond); gpr[31] = pc + 4; } @@ -372,6 +365,17 @@ void RSP::sw(u32 instr) { WriteWord(address, gpr[RT(instr)]); } +void RSP::swv(u32 instr) { + u32 address = gpr[BASE(instr)] + SignExt7bit(OFFSET(instr), 4); + int base = address & 7; + address &= ~7; + + for(int i = E1(instr); i < E1(instr) + 16; i++) { + WriteByte(address + (base & 15), vpr[VT(instr)].byte[BYTE_INDEX(i & 15)]); + base++; + } +} + void RSP::sub(u32 instr) { gpr[RD(instr)] = gpr[RS(instr)] - gpr[RT(instr)]; } @@ -533,8 +537,16 @@ void RSP::sltiu(u32 instr) { gpr[RT(instr)] = (u32)gpr[RS(instr)] < imm; } -inline s16 clamp(s64 val) { - return std::clamp(val, -32768, 32767); +inline s16 signedClamp(s64 val) { + if(val < -32768) return -32768; + else if(val > 32767) return 32767; + return val; +} + +inline u16 unsignedClamp(s64 val) { + if(val < 0) return 0; + else if(val > 32767) return 65535; + return val; } void RSP::vabs(u32 instr) { @@ -569,7 +581,7 @@ void RSP::vadd(u32 instr) { for(int i = 0; i < 8; i++) { s32 result = vs.selement[i] + vte.selement[i] + (vco.l.selement[i] != 0); acc.l.element[i] = result; - vd.element[i] = clamp(result); + vd.element[i] = signedClamp(result); vco.l.element[i] = 0; vco.h.element[i] = 0; } @@ -581,10 +593,10 @@ void RSP::vaddc(u32 instr) { VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); for(int i = 0; i < 8; i++) { - u32 result = vs.selement[i] + vte.selement[i]; + u32 result = vs.element[i] + vte.element[i]; acc.l.element[i] = result; vd.element[i] = result; - vco.l.element[i] = (result >> 16) & 1 ? 0xffff : 0; + vco.l.element[i] = ((result >> 16) & 1) ? 0xffff : 0; vco.h.element[i] = 0; } } @@ -610,7 +622,7 @@ void RSP::vch(u32 instr) { vce.element[i] = result == -1 ? 0xffff : 0; } else { s16 result = vsElem - vteElem; - acc.l.selement[i] = (result >= 0) ? vteElem : vsElem; + acc.l.element[i] = (result >= 0) ? vteElem : vsElem; vcc.l.element[i] = vteElem < 0 ? 0xffff : 0; vcc.h.element[i] = result >= 0 ? 0xffff : 0; vco.l.element[i] = 0; @@ -660,26 +672,26 @@ void RSP::vcl(u32 instr) { VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], e); - for(int i = 0; i < 8; i++) { - u16 vsElem = vs.element[i]; - u16 vteElem = vte.element[i]; + for (int i = 0; i < 8; i++) { + u16 vs_element = vs.element[i]; + u16 vte_element = vte.element[i]; - acc.l.element[i] = vcc.l.element[i] ? -vteElem : vsElem; if(vco.l.element[i]) { if(!vco.h.element[i]) { - u16 clampSum = vsElem + vteElem; - bool overflow = (vsElem + vteElem) != clampSum; - + u16 clamped_sum = vs_element + vte_element; + bool overflow = (vs_element + vte_element) != clamped_sum; if(vce.element[i]) { - vcc.l.element[i] = !clampSum || !overflow ? 0xffff : 0; + vcc.l.element[i] = (!clamped_sum || !overflow) ? 0xffff : 0; } else { - vcc.l.element[i] = !clampSum && !overflow ? 0xffff : 0; + vcc.l.element[i] = (!clamped_sum && !overflow) ? 0xffff : 0; } } + acc.l.element[i] = vcc.l.element[i] ? -vte_element : vs_element; } else { if(!vco.h.element[i]) { - vcc.h.element[i] = (s32(vsElem) - s32(vteElem) >= 0) ? 0xffff : 0; + vcc.h.element[i] = ((s32)vs_element - (s32)vte_element >= 0) ? 0xffff : 0; } + acc.l.element[i] = vcc.h.element[i] ? vte_element : vs_element; } vco.l.element[i] = 0; @@ -690,30 +702,30 @@ void RSP::vcl(u32 instr) { } void RSP::vmov(u32 instr) { - int e = E2(instr); + u8 e = E2(instr), vs = VS(instr) & 7; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], e); u8 se; - - switch (e) { + e &= 7; + switch(e) { case 0 ... 1: - se = VS(instr) & 7; + se = (e & 0b000) | (vs & 0b111); break; case 2 ... 3: - se = (e & 1) | (VS(instr) & 6); + se = (e & 0b001) | (vs & 0b110); break; case 4 ... 7: - se = (e & 3) | (VS(instr) & 4); + se = (e & 0b011) | (vs & 0b100); break; case 8 ... 15: - se = e & 7; + se = (e & 0b111) | (vs & 0b000); break; default: util::panic("VMOV: This should be unreachable!\n"); } - u8 de = VS(instr) & 7; + u8 de = vs & 7; vd.element[ELEMENT_INDEX(de)] = vte.element[ELEMENT_INDEX(se)]; @@ -742,18 +754,37 @@ void RSP::vmulf(u32 instr) { SetACC(i, prod); - s16 result = clamp(prod >> 16); + s16 result = signedClamp(prod >> 16); + vd.element[i] = result; + } +} + +void RSP::vmulu(u32 instr) { + int e = E2(instr); + VPR& vs = vpr[VS(instr)]; + VPR& vd = vpr[VD(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], e); + + for(int i = 0; i < 8; i++) { + s32 prod = vs.selement[i] * vte.selement[i]; + + s64 accum = prod * 2 + 0x8000; + SetACC(i, accum); + + u16 result = unsignedClamp(accum >> 16); vd.element[i] = result; } } void RSP::vmudl(u32 instr) { - int e = E2(instr); + u8 e = E2(instr); VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], e); for(int i = 0; i < 8; i++) { - u64 prod = (u64)vs.selement[i] * (u64)vte.selement[i]; + u64 op1 = vte.element[i]; + u64 op2 = vs.element[i]; + u64 prod = op1 * op2; u64 accum = prod >> 16; SetACC(i, accum); @@ -780,7 +811,7 @@ void RSP::vmudh(u32 instr) { s32 prod = vs.selement[i] * vte.selement[i]; s64 accum = prod; - s16 result = clamp(accum); + s16 result = signedClamp(accum); accum <<= 16; SetACC(i, accum); @@ -798,7 +829,7 @@ void RSP::vmudm(u32 instr) { s32 prod = vs.selement[i] * vte.element[i]; s64 accum = prod; - s16 result = clamp(accum >> 16); + s16 result = signedClamp(accum >> 16); SetACC(i, accum); vd.element[i] = result; @@ -838,7 +869,7 @@ void RSP::vmadh(u32 instr) { s64 accum = GetACC(i) + ((u64)prod << 16); SetACC(i, accum); - s16 result = clamp(accum >> 16); + s16 result = signedClamp(accum >> 16); vd.element[i] = result; } @@ -849,9 +880,13 @@ void RSP::vmadl(u32 instr) { VPR& vs = vpr[VS(instr)]; VPR& vd = vpr[VD(instr)]; VPR vte = GetVTE(vpr[VT(instr)], e); + for(int i = 0; i < 8; i++) { - u64 prod = (u64)vs.selement[i] * (u64)vte.selement[i]; - u64 accum = (prod >> 16) + GetACC(i); + u64 op1 = vte.element[i]; + u64 op2 = vs.element[i]; + u64 prod = op1 * op2; + u64 accDelta = prod >> 16; + u16 accum = GetACC(i) + accDelta; SetACC(i, accum); @@ -881,7 +916,7 @@ void RSP::vmadm(u32 instr) { SetACC(i, accum); accum = GetACC(i); - s16 result = clamp(accum >> 16); + s16 result = signedClamp(accum >> 16); vd.element[i] = result; } @@ -923,7 +958,27 @@ void RSP::vmacf(u32 instr) { s64 accum = GetACC(i) + accDelta; SetACC(i, accum); - s16 result = clamp(accum >> 16); + s16 result = signedClamp(accum >> 16); + vd.element[i] = result; + } +} + +void RSP::vmacu(u32 instr) { + VPR& vd = vpr[VD(instr)]; + VPR& vs = vpr[VS(instr)]; + VPR vte = GetVTE(vpr[VT(instr)], E2(instr)); + + for(int i = 0; i < 8; i++) { + s16 op1 = vte.element[i]; + s16 op2 = vs.element[i]; + s32 prod = op1 * op2; + s64 accDelta = prod; + accDelta *= 2; + s64 accum = GetACC(i) + accDelta; + SetACC(i, accum); + accum = GetACC(i); + + u16 result = unsignedClamp(accum >> 16); vd.element[i] = result; } } @@ -963,9 +1018,9 @@ void RSP::vge(u32 instr) { VPR vte = GetVTE(vpr[VT(instr)], e); for(int i = 0; i < 8; i++) { - bool eql = vs.element[i] == vte.element[i]; + bool eql = vs.selement[i] == vte.selement[i]; bool neg = !(vco.h.element[i] && vco.l.element[i]) && eql; - vcc.l.element[i] = (neg || (vs.element[i] > vte.element[i])) ? 0xffff : 0; + vcc.l.element[i] = (neg || (vs.selement[i] > vte.selement[i])) ? 0xffff : 0; acc.l.element[i] = vcc.l.element[i] ? vs.element[i] : vte.element[i]; vd.element[i] = acc.l.element[i]; vcc.h.element[i] = vco.h.element[i] = vco.l.element[i] = 0; @@ -1185,7 +1240,7 @@ void RSP::vsub(u32 instr) { for(int i = 0; i < 8; i++) { s32 result = vs.selement[i] - vte.selement[i] - (vco.l.element[i] != 0); acc.l.element[i] = result; - vd.element[i] = clamp(result); + vd.element[i] = signedClamp(result); vco.l.element[i] = 0; vco.h.element[i] = 0; @@ -1288,7 +1343,7 @@ void RSP::mfc2(u32 instr) { u8 hi = vpr[RD(instr)].byte[BYTE_INDEX(E1(instr))]; u8 lo = vpr[RD(instr)].byte[BYTE_INDEX((E1(instr) + 1) & 0xF)]; s16 elem = (hi << 8) | lo; - gpr[RT(instr)] = s32(elem); + gpr[RT(instr)] = elem; } void RSP::mtc2(u32 instr) { diff --git a/src/n64/memory_regions.hpp b/src/n64/memory_regions.hpp index 9bf99bac..cc9f920f 100644 --- a/src/n64/memory_regions.hpp +++ b/src/n64/memory_regions.hpp @@ -1,5 +1,4 @@ #pragma once - #define RDRAM_SIZE 0x800000 #define RDRAM_DSIZE (RDRAM_SIZE - 1) #define SRAM_SIZE 0x8000000