Undo TLB caching + minor improvements

This commit is contained in:
SimoneN64
2024-10-15 21:05:33 +02:00
parent a0d46ca24e
commit cf5b1def4f
8 changed files with 81 additions and 125 deletions

View File

@@ -65,7 +65,7 @@ void AI::Write(const u32 addr, const u32 val) {
const u32 oldDacFreq = dac.freq; const u32 oldDacFreq = dac.freq;
dacRate = val & 0x3FFF; dacRate = val & 0x3FFF;
dac.freq = std::max(1.f, (float)GetVideoFrequency(mem.IsROMPAL()) / (dacRate + 1)) * 1.037; dac.freq = std::max(1.f, (float)GetVideoFrequency(mem.IsROMPAL()) / (dacRate + 1)) * 1.037;
dac.period = N64_CPU_FREQ / dac.freq; dac.period = GetVideoFrequency(mem.IsROMPAL()) / dac.freq;
if (oldDacFreq != dac.freq) { if (oldDacFreq != dac.freq) {
device.AdjustSampleRate(dac.freq); device.AdjustSampleRate(dac.freq);
} }
@@ -95,7 +95,7 @@ void AI::Step(const u32 cpuCycles, const float volumeL, const float volumeR) {
const s16 r = s16(data); const s16 r = s16(data);
if (volumeR > 0 && volumeL > 0) { if (volumeR > 0 && volumeL > 0) {
device.PushSample((float)l / INT16_MAX, volumeL, (float)r / INT16_MAX, volumeR); device.PushSample((float)l / std::numeric_limits<s16>::max(), volumeL, (float)r / std::numeric_limits<s16>::max(), volumeR);
} }
const u32 addrLo = dmaAddr[0] + 4 & 0x1FFF; const u32 addrLo = dmaAddr[0] + 4 & 0x1FFF;

View File

@@ -38,7 +38,7 @@ void AudioDevice::PushSample(const float left, const float volumeL, const float
if (const auto availableBytes = static_cast<float>(SDL_GetAudioStreamAvailable(audioStream)); if (const auto availableBytes = static_cast<float>(SDL_GetAudioStreamAvailable(audioStream));
availableBytes <= BYTES_PER_HALF_SECOND) { availableBytes <= BYTES_PER_HALF_SECOND) {
SDL_PutAudioStreamData(audioStream, samples, 2 * sizeof(float)); SDL_PutAudioStreamData(audioStream, samples, 2 * SYSTEM_SAMPLE_SIZE);
} }
if (!running) { if (!running) {
@@ -47,10 +47,13 @@ void AudioDevice::PushSample(const float left, const float volumeL, const float
} }
} }
void AudioDevice::AdjustSampleRate(const int sampleRate) { void AudioDevice::AdjustSampleRate(int sampleRate) {
LockMutex(); LockMutex();
SDL_DestroyAudioStream(audioStream); SDL_DestroyAudioStream(audioStream);
if (sampleRate < 4000) { // hack for Animal Forest. It requests a frequency of 3000-something. Weird asf
sampleRate *= 4000.f / static_cast<float>(sampleRate);
}
request = {SYSTEM_SAMPLE_FORMAT, 2, sampleRate}; request = {SYSTEM_SAMPLE_FORMAT, 2, sampleRate};
audioStream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &request, nullptr, nullptr); audioStream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &request, nullptr, nullptr);

View File

@@ -249,7 +249,7 @@ auto PI::BusRead<u32, false>(u32 addr) -> u32 {
return mem.BackupRead<u32>(addr); return mem.BackupRead<u32>(addr);
case REGION_PI_ROM: case REGION_PI_ROM:
{ {
u32 index = addr - SREGION_PI_ROM; const u32 index = addr - SREGION_PI_ROM;
if (index > mem.rom.cart.size() - 3) { // -3 because we're reading an entire word if (index > mem.rom.cart.size() - 3) { // -3 because we're reading an entire word
switch (addr) { switch (addr) {
case REGION_CART_ISVIEWER_BUFFER: case REGION_CART_ISVIEWER_BUFFER:
@@ -261,9 +261,9 @@ auto PI::BusRead<u32, false>(u32 addr) -> u32 {
} }
Util::warn("Address 0x{:08X} accessed an index {}/0x{:X} outside the bounds of the ROM!", addr, index, index); Util::warn("Address 0x{:08X} accessed an index {}/0x{:X} outside the bounds of the ROM!", addr, index, index);
return 0; return 0;
} else {
return Util::ReadAccess<u32>(mem.rom.cart, index);
} }
return Util::ReadAccess<u32>(mem.rom.cart, index);
} }
default: default:
Util::panic("Should never end up here! Access to address {:08X} which did not match any PI bus regions!", addr); Util::panic("Should never end up here! Access to address {:08X} which did not match any PI bus regions!", addr);

View File

@@ -293,13 +293,7 @@ static FORCE_INLINE u64 getVPN(const u64 addr, const u64 pageMask) {
return vpn & ~mask; return vpn & ~mask;
} }
TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int* index) { TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int &index) const {
if (tlbCache.contains(vaddr)) {
if (index)
*index = tlbCache[vaddr].index;
return tlbCache[vaddr].entry;
}
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
if (TLBEntry *entry = &regs.cop0.tlb[i]; entry->initialized) { if (TLBEntry *entry = &regs.cop0.tlb[i]; entry->initialized) {
const u64 entry_vpn = getVPN(entry->entryHi.raw, entry->pageMask.raw); const u64 entry_vpn = getVPN(entry->entryHi.raw, entry->pageMask.raw);
@@ -309,10 +303,7 @@ TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int* index) {
if (const bool asid_match = entry->global || regs.cop0.entryHi.asid == entry->entryHi.asid; if (const bool asid_match = entry->global || regs.cop0.entryHi.asid == entry->entryHi.asid;
vpn_match && asid_match) { vpn_match && asid_match) {
tlbCache[vaddr].entry = entry; index = i;
tlbCache[vaddr].index = i;
if (index)
*index = i;
return entry; return entry;
} }
} }
@@ -321,44 +312,47 @@ TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int* index) {
return nullptr; return nullptr;
} }
bool Cop0::ProbeTLB(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) { TLBEntry *Cop0::TLBTryMatch(const u64 vaddr) const {
const TLBEntry *entry = TLBTryMatch(vaddr, nullptr); for (auto &i : regs.cop0.tlb) {
if (TLBEntry *entry = &i; entry->initialized) {
const u64 entry_vpn = getVPN(entry->entryHi.raw, entry->pageMask.raw);
const u64 vaddr_vpn = getVPN(vaddr, entry->pageMask.raw);
const bool vpn_match = entry_vpn == vaddr_vpn;
if (const bool asid_match = entry->global || regs.cop0.entryHi.asid == entry->entryHi.asid;
vpn_match && asid_match) {
return entry;
}
}
}
return nullptr;
}
bool Cop0::ProbeTLB(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) const {
const TLBEntry *entry = TLBTryMatch(vaddr);
if (!entry) { if (!entry) {
regs.cop0.tlbError = MISS; regs.cop0.tlbError = MISS;
return false; return false;
} }
const u32 mask = entry->pageMask.mask << 12 | 0xFFF; const u32 mask = entry->pageMask.mask << 12 | 0xFFF;
const u32 odd = vaddr & (mask + 1); const u32 odd = vaddr & mask + 1;
u32 pfn;
if (!odd) { const EntryLo entryLo = odd ? entry->entryLo1 : entry->entryLo0;
if (!entry->entryLo0.v) {
if (!entryLo.v) {
regs.cop0.tlbError = INVALID; regs.cop0.tlbError = INVALID;
return false; return false;
} }
if (accessType == STORE && !entry->entryLo0.d) { if (accessType == STORE && !entryLo.d) {
regs.cop0.tlbError = MODIFICATION; regs.cop0.tlbError = MODIFICATION;
return false; return false;
} }
pfn = entry->entryLo0.pfn; paddr = entryLo.pfn << 12 | vaddr & mask;
} else {
if (!entry->entryLo1.v) {
regs.cop0.tlbError = INVALID;
return false;
}
if (accessType == STORE && !entry->entryLo1.d) {
regs.cop0.tlbError = MODIFICATION;
return false;
}
pfn = entry->entryLo1.pfn;
}
paddr = pfn << 12 | vaddr & mask;
return true; return true;
} }
@@ -559,10 +553,10 @@ bool Cop0::UserMapVAddr32(const TLBAccessType accessType, const u64 vaddr, u32 &
} }
bool Cop0::MapVAddr32(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) { bool Cop0::MapVAddr32(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) {
switch (u32(vaddr) >> 29 & 7) { switch (static_cast<u32>(vaddr) >> 29 & 7) {
case 0 ... 3: case 0 ... 3:
case 7: case 7:
return ProbeTLB(accessType, s64(s32(vaddr)), paddr); return ProbeTLB(accessType, static_cast<s32>(vaddr), paddr);
case 4 ... 5: case 4 ... 5:
paddr = vaddr & 0x1FFFFFFF; paddr = vaddr & 0x1FFFFFFF;
return true; return true;

View File

@@ -146,27 +146,11 @@ union Index {
struct TLBEntry { struct TLBEntry {
bool initialized; bool initialized;
union { EntryLo entryLo0, entryLo1;
u32 raw;
struct {
unsigned : 1;
unsigned v : 1;
unsigned d : 1;
unsigned c : 3;
unsigned pfn : 20;
unsigned : 6;
};
} entryLo0, entryLo1;
EntryHi entryHi; EntryHi entryHi;
PageMask pageMask; PageMask pageMask;
bool global; bool global;
auto operator==(const TLBEntry& other) {
return initialized == other.initialized && entryLo0.raw == other.entryLo0.raw &&
entryLo1.raw == other.entryLo1.raw && entryHi.raw == other.entryHi.raw && pageMask.raw == other.pageMask.raw &&
global == other.global;
}
}; };
enum TLBError : u8 { NONE, MISS, INVALID, MODIFICATION, DISALLOWED_ADDRESS }; enum TLBError : u8 { NONE, MISS, INVALID, MODIFICATION, DISALLOWED_ADDRESS };
@@ -243,12 +227,6 @@ struct Cop0 {
s64 ErrorEPC{}; s64 ErrorEPC{};
u32 r31{}; u32 r31{};
TLBEntry tlb[32]{}; TLBEntry tlb[32]{};
struct TLBCachedEntry {
int index = -1;
TLBEntry *entry = nullptr;
};
std::unordered_map<u64, TLBCachedEntry> tlbCache;
TLBError tlbError = NONE; TLBError tlbError = NONE;
s64 openbus{}; s64 openbus{};
template <class T> template <class T>
@@ -280,7 +258,7 @@ struct Cop0 {
enum TLBAccessType { LOAD, STORE }; enum TLBAccessType { LOAD, STORE };
bool ProbeTLB(TLBAccessType accessType, u64 vaddr, u32 &paddr); bool ProbeTLB(TLBAccessType accessType, u64 vaddr, u32 &paddr) const;
void FireException(ExceptionCode code, int cop, s64 pc) const; void FireException(ExceptionCode code, int cop, s64 pc) const;
bool MapVAddr(TLBAccessType accessType, u64 vaddr, u32 &paddr); bool MapVAddr(TLBAccessType accessType, u64 vaddr, u32 &paddr);
bool UserMapVAddr32(TLBAccessType accessType, u64 vaddr, u32 &paddr); bool UserMapVAddr32(TLBAccessType accessType, u64 vaddr, u32 &paddr);
@@ -288,7 +266,8 @@ struct Cop0 {
bool UserMapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr); bool UserMapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr);
bool MapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr); bool MapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr);
TLBEntry *TLBTryMatch(u64 vaddr, int* index); TLBEntry *TLBTryMatch(u64 vaddr, int &index) const;
TLBEntry *TLBTryMatch(u64 vaddr) const;
void HandleTLBException(u64 vaddr) const; void HandleTLBException(u64 vaddr) const;
static ExceptionCode GetTLBExceptionCode(TLBError error, TLBAccessType accessType); static ExceptionCode GetTLBExceptionCode(TLBError error, TLBAccessType accessType);

View File

@@ -1,6 +1,7 @@
#include <core/registers/Cop0.hpp> #include <core/registers/Cop0.hpp>
#include <core/registers/Registers.hpp> #include <core/registers/Registers.hpp>
#include <log.hpp> #include <log.hpp>
#include <ranges>
namespace n64 { namespace n64 {
void Cop0::mtc0(const u32 instr) { SetReg32(RD(instr), regs.Read<u32>(RT(instr))); } void Cop0::mtc0(const u32 instr) { SetReg32(RD(instr), regs.Read<u32>(RT(instr))); }
@@ -50,17 +51,6 @@ void Cop0::tlbw(const int index_) {
Util::panic("TLBWI with TLB index {}", index_); Util::panic("TLBWI with TLB index {}", index_);
} }
for (auto &[key, cachedTlb] : tlbCache) {
auto &[cachedIndex, cachedEntry] = cachedTlb;
if (cachedEntry) {
if (*cachedEntry == tlb[index_]) {
cachedIndex = -1;
cachedEntry = nullptr;
break;
}
}
}
tlb[index_].entryHi.raw = entryHi.raw; tlb[index_].entryHi.raw = entryHi.raw;
tlb[index_].entryHi.vpn2 &= ~page_mask.mask; tlb[index_].entryHi.vpn2 &= ~page_mask.mask;
@@ -74,7 +64,7 @@ void Cop0::tlbw(const int index_) {
void Cop0::tlbp() { void Cop0::tlbp() {
int match = -1; int match = -1;
if (const TLBEntry *entry = TLBTryMatch(entryHi.raw, &match); entry && match >= 0) { if (const TLBEntry *entry = TLBTryMatch(entryHi.raw, match); entry && match >= 0) {
index.raw = match; index.raw = match;
} else { } else {
index.raw = 0; index.raw = 0;

View File

@@ -492,7 +492,7 @@ void Cop1::ceills(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundCeil<s64>(fs)); CHECK_FPE_CONST(s64, fd, Util::roundCeil(fs));
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd; FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -502,7 +502,7 @@ void Cop1::ceilld(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundCeil<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundCeil(fs))
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd; FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -512,7 +512,7 @@ void Cop1::ceilws(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil(fs))
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd; FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -522,7 +522,7 @@ void Cop1::ceilwd(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil(fs))
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd; FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -646,7 +646,7 @@ void Cop1::cvtwd(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent(fs))
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd; FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -656,7 +656,7 @@ void Cop1::cvtws(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent(fs))
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd; FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -666,7 +666,7 @@ void Cop1::cvtls(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundCurrent<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundCurrent(fs))
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd; FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -703,7 +703,7 @@ void Cop1::cvtld(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundCurrent<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundCurrent(fs))
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd; FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -1074,7 +1074,7 @@ void Cop1::roundls(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundNearest<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundNearest(fs))
if (fd != fs && SetCauseInexact()) { if (fd != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;
@@ -1088,7 +1088,7 @@ void Cop1::roundld(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundNearest<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundNearest(fs))
if (fd != fs && SetCauseInexact()) { if (fd != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;
@@ -1102,7 +1102,7 @@ void Cop1::roundws(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest(fs))
if (fd != fs && SetCauseInexact()) { if (fd != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;
@@ -1116,7 +1116,7 @@ void Cop1::roundwd(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest(fs))
if (fd != fs && SetCauseInexact()) { if (fd != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;
@@ -1130,7 +1130,7 @@ void Cop1::floorls(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundFloor<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundFloor(fs))
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd; FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -1140,7 +1140,7 @@ void Cop1::floorld(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundFloor<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundFloor(fs))
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd; FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -1150,7 +1150,7 @@ void Cop1::floorws(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor(fs))
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd; FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -1160,7 +1160,7 @@ void Cop1::floorwd(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor(fs))
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd; FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
} }
@@ -1170,7 +1170,7 @@ void Cop1::truncws(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc(fs))
if (static_cast<float>(fd) != fs && SetCauseInexact()) { if (static_cast<float>(fd) != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;
@@ -1184,7 +1184,7 @@ void Cop1::truncwd(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s32>(fs)) if (!CheckCVTArg<s32>(fs))
return; return;
CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc<s32>(fs)) CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc(fs))
if (static_cast<double>(fd) != fs && SetCauseInexact()) { if (static_cast<double>(fd) != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;
@@ -1198,7 +1198,7 @@ void Cop1::truncls(const u32 instr) {
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundTrunc<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundTrunc(fs))
if (static_cast<float>(fd) != fs && SetCauseInexact()) { if (static_cast<float>(fd) != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;
@@ -1212,7 +1212,7 @@ void Cop1::truncld(const u32 instr) {
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr)); const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
if (!CheckCVTArg<s64>(fs)) if (!CheckCVTArg<s64>(fs))
return; return;
CHECK_FPE_CONST(s64, fd, Util::roundTrunc<s64>(fs)) CHECK_FPE_CONST(s64, fd, Util::roundTrunc(fs))
if (static_cast<double>(fd) != fs && SetCauseInexact()) { if (static_cast<double>(fd) != fs && SetCauseInexact()) {
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
return; return;

View File

@@ -4,8 +4,7 @@
#include <immintrin.h> #include <immintrin.h>
namespace Util { namespace Util {
template <typename T> static FORCE_INLINE auto roundCeil(float f) {
static FORCE_INLINE T roundCeil(float f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128 t = _mm_set_ss(f); __m128 t = _mm_set_ss(f);
t = _mm_round_ss(t, t, _MM_FROUND_TO_POS_INF); t = _mm_round_ss(t, t, _MM_FROUND_TO_POS_INF);
@@ -15,8 +14,7 @@ static FORCE_INLINE T roundCeil(float f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundCeil(double f) {
static FORCE_INLINE T roundCeil(double f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128d t = _mm_set_sd(f); __m128d t = _mm_set_sd(f);
t = _mm_round_sd(t, t, _MM_FROUND_TO_POS_INF); t = _mm_round_sd(t, t, _MM_FROUND_TO_POS_INF);
@@ -26,8 +24,7 @@ static FORCE_INLINE T roundCeil(double f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundNearest(float f) {
static FORCE_INLINE T roundNearest(float f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128 t = _mm_set_ss(f); __m128 t = _mm_set_ss(f);
t = _mm_round_ss(t, t, _MM_FROUND_TO_NEAREST_INT); t = _mm_round_ss(t, t, _MM_FROUND_TO_NEAREST_INT);
@@ -37,8 +34,7 @@ static FORCE_INLINE T roundNearest(float f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundNearest(double f) {
static FORCE_INLINE T roundNearest(double f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128d t = _mm_set_sd(f); __m128d t = _mm_set_sd(f);
t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT); t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT);
@@ -48,8 +44,7 @@ static FORCE_INLINE T roundNearest(double f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundCurrent(float f) {
static FORCE_INLINE T roundCurrent(float f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
auto t = _mm_set_ss(f); auto t = _mm_set_ss(f);
t = _mm_round_ss(t, t, _MM_FROUND_CUR_DIRECTION); t = _mm_round_ss(t, t, _MM_FROUND_CUR_DIRECTION);
@@ -59,8 +54,7 @@ static FORCE_INLINE T roundCurrent(float f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundCurrent(double f) {
static FORCE_INLINE T roundCurrent(double f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
auto t = _mm_set_sd(f); auto t = _mm_set_sd(f);
t = _mm_round_sd(t, t, _MM_FROUND_CUR_DIRECTION); t = _mm_round_sd(t, t, _MM_FROUND_CUR_DIRECTION);
@@ -71,8 +65,7 @@ static FORCE_INLINE T roundCurrent(double f) {
} }
template <typename T> static FORCE_INLINE auto roundFloor(float f) {
static FORCE_INLINE T roundFloor(float f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128 t = _mm_set_ss(f); __m128 t = _mm_set_ss(f);
t = _mm_round_ss(t, t, _MM_FROUND_TO_NEG_INF); t = _mm_round_ss(t, t, _MM_FROUND_TO_NEG_INF);
@@ -82,8 +75,7 @@ static FORCE_INLINE T roundFloor(float f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundFloor(double f) {
static FORCE_INLINE T roundFloor(double f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128d t = _mm_set_sd(f); __m128d t = _mm_set_sd(f);
t = _mm_round_sd(t, t, _MM_FROUND_TO_NEG_INF); t = _mm_round_sd(t, t, _MM_FROUND_TO_NEG_INF);
@@ -93,8 +85,7 @@ static FORCE_INLINE T roundFloor(double f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundTrunc(float f) {
static FORCE_INLINE T roundTrunc(float f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128 t = _mm_set_ss(f); __m128 t = _mm_set_ss(f);
t = _mm_round_ss(t, t, _MM_FROUND_TO_ZERO); t = _mm_round_ss(t, t, _MM_FROUND_TO_ZERO);
@@ -104,8 +95,7 @@ static FORCE_INLINE T roundTrunc(float f) {
#endif #endif
} }
template <typename T> static FORCE_INLINE auto roundTrunc(double f) {
static FORCE_INLINE T roundTrunc(double f) {
#ifdef SIMD_SUPPORT #ifdef SIMD_SUPPORT
__m128d t = _mm_set_sd(f); __m128d t = _mm_set_sd(f);
t = _mm_round_sd(t, t, _MM_FROUND_TO_ZERO); t = _mm_round_sd(t, t, _MM_FROUND_TO_ZERO);