Undo TLB caching + minor improvements
This commit is contained in:
@@ -65,7 +65,7 @@ void AI::Write(const u32 addr, const u32 val) {
|
|||||||
const u32 oldDacFreq = dac.freq;
|
const u32 oldDacFreq = dac.freq;
|
||||||
dacRate = val & 0x3FFF;
|
dacRate = val & 0x3FFF;
|
||||||
dac.freq = std::max(1.f, (float)GetVideoFrequency(mem.IsROMPAL()) / (dacRate + 1)) * 1.037;
|
dac.freq = std::max(1.f, (float)GetVideoFrequency(mem.IsROMPAL()) / (dacRate + 1)) * 1.037;
|
||||||
dac.period = N64_CPU_FREQ / dac.freq;
|
dac.period = GetVideoFrequency(mem.IsROMPAL()) / dac.freq;
|
||||||
if (oldDacFreq != dac.freq) {
|
if (oldDacFreq != dac.freq) {
|
||||||
device.AdjustSampleRate(dac.freq);
|
device.AdjustSampleRate(dac.freq);
|
||||||
}
|
}
|
||||||
@@ -95,7 +95,7 @@ void AI::Step(const u32 cpuCycles, const float volumeL, const float volumeR) {
|
|||||||
const s16 r = s16(data);
|
const s16 r = s16(data);
|
||||||
|
|
||||||
if (volumeR > 0 && volumeL > 0) {
|
if (volumeR > 0 && volumeL > 0) {
|
||||||
device.PushSample((float)l / INT16_MAX, volumeL, (float)r / INT16_MAX, volumeR);
|
device.PushSample((float)l / std::numeric_limits<s16>::max(), volumeL, (float)r / std::numeric_limits<s16>::max(), volumeR);
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 addrLo = dmaAddr[0] + 4 & 0x1FFF;
|
const u32 addrLo = dmaAddr[0] + 4 & 0x1FFF;
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ void AudioDevice::PushSample(const float left, const float volumeL, const float
|
|||||||
|
|
||||||
if (const auto availableBytes = static_cast<float>(SDL_GetAudioStreamAvailable(audioStream));
|
if (const auto availableBytes = static_cast<float>(SDL_GetAudioStreamAvailable(audioStream));
|
||||||
availableBytes <= BYTES_PER_HALF_SECOND) {
|
availableBytes <= BYTES_PER_HALF_SECOND) {
|
||||||
SDL_PutAudioStreamData(audioStream, samples, 2 * sizeof(float));
|
SDL_PutAudioStreamData(audioStream, samples, 2 * SYSTEM_SAMPLE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!running) {
|
if (!running) {
|
||||||
@@ -47,10 +47,13 @@ void AudioDevice::PushSample(const float left, const float volumeL, const float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioDevice::AdjustSampleRate(const int sampleRate) {
|
void AudioDevice::AdjustSampleRate(int sampleRate) {
|
||||||
LockMutex();
|
LockMutex();
|
||||||
SDL_DestroyAudioStream(audioStream);
|
SDL_DestroyAudioStream(audioStream);
|
||||||
|
|
||||||
|
if (sampleRate < 4000) { // hack for Animal Forest. It requests a frequency of 3000-something. Weird asf
|
||||||
|
sampleRate *= 4000.f / static_cast<float>(sampleRate);
|
||||||
|
}
|
||||||
request = {SYSTEM_SAMPLE_FORMAT, 2, sampleRate};
|
request = {SYSTEM_SAMPLE_FORMAT, 2, sampleRate};
|
||||||
|
|
||||||
audioStream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &request, nullptr, nullptr);
|
audioStream = SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_PLAYBACK, &request, nullptr, nullptr);
|
||||||
|
|||||||
@@ -249,7 +249,7 @@ auto PI::BusRead<u32, false>(u32 addr) -> u32 {
|
|||||||
return mem.BackupRead<u32>(addr);
|
return mem.BackupRead<u32>(addr);
|
||||||
case REGION_PI_ROM:
|
case REGION_PI_ROM:
|
||||||
{
|
{
|
||||||
u32 index = addr - SREGION_PI_ROM;
|
const u32 index = addr - SREGION_PI_ROM;
|
||||||
if (index > mem.rom.cart.size() - 3) { // -3 because we're reading an entire word
|
if (index > mem.rom.cart.size() - 3) { // -3 because we're reading an entire word
|
||||||
switch (addr) {
|
switch (addr) {
|
||||||
case REGION_CART_ISVIEWER_BUFFER:
|
case REGION_CART_ISVIEWER_BUFFER:
|
||||||
@@ -261,9 +261,9 @@ auto PI::BusRead<u32, false>(u32 addr) -> u32 {
|
|||||||
}
|
}
|
||||||
Util::warn("Address 0x{:08X} accessed an index {}/0x{:X} outside the bounds of the ROM!", addr, index, index);
|
Util::warn("Address 0x{:08X} accessed an index {}/0x{:X} outside the bounds of the ROM!", addr, index, index);
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
|
||||||
return Util::ReadAccess<u32>(mem.rom.cart, index);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return Util::ReadAccess<u32>(mem.rom.cart, index);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
Util::panic("Should never end up here! Access to address {:08X} which did not match any PI bus regions!", addr);
|
Util::panic("Should never end up here! Access to address {:08X} which did not match any PI bus regions!", addr);
|
||||||
|
|||||||
@@ -293,13 +293,7 @@ static FORCE_INLINE u64 getVPN(const u64 addr, const u64 pageMask) {
|
|||||||
return vpn & ~mask;
|
return vpn & ~mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int* index) {
|
TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int &index) const {
|
||||||
if (tlbCache.contains(vaddr)) {
|
|
||||||
if (index)
|
|
||||||
*index = tlbCache[vaddr].index;
|
|
||||||
return tlbCache[vaddr].entry;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = 0; i < 32; i++) {
|
for (int i = 0; i < 32; i++) {
|
||||||
if (TLBEntry *entry = ®s.cop0.tlb[i]; entry->initialized) {
|
if (TLBEntry *entry = ®s.cop0.tlb[i]; entry->initialized) {
|
||||||
const u64 entry_vpn = getVPN(entry->entryHi.raw, entry->pageMask.raw);
|
const u64 entry_vpn = getVPN(entry->entryHi.raw, entry->pageMask.raw);
|
||||||
@@ -309,10 +303,7 @@ TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int* index) {
|
|||||||
|
|
||||||
if (const bool asid_match = entry->global || regs.cop0.entryHi.asid == entry->entryHi.asid;
|
if (const bool asid_match = entry->global || regs.cop0.entryHi.asid == entry->entryHi.asid;
|
||||||
vpn_match && asid_match) {
|
vpn_match && asid_match) {
|
||||||
tlbCache[vaddr].entry = entry;
|
index = i;
|
||||||
tlbCache[vaddr].index = i;
|
|
||||||
if (index)
|
|
||||||
*index = i;
|
|
||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -321,44 +312,47 @@ TLBEntry *Cop0::TLBTryMatch(const u64 vaddr, int* index) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Cop0::ProbeTLB(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) {
|
TLBEntry *Cop0::TLBTryMatch(const u64 vaddr) const {
|
||||||
const TLBEntry *entry = TLBTryMatch(vaddr, nullptr);
|
for (auto &i : regs.cop0.tlb) {
|
||||||
|
if (TLBEntry *entry = &i; entry->initialized) {
|
||||||
|
const u64 entry_vpn = getVPN(entry->entryHi.raw, entry->pageMask.raw);
|
||||||
|
const u64 vaddr_vpn = getVPN(vaddr, entry->pageMask.raw);
|
||||||
|
|
||||||
|
const bool vpn_match = entry_vpn == vaddr_vpn;
|
||||||
|
|
||||||
|
if (const bool asid_match = entry->global || regs.cop0.entryHi.asid == entry->entryHi.asid;
|
||||||
|
vpn_match && asid_match) {
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Cop0::ProbeTLB(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) const {
|
||||||
|
const TLBEntry *entry = TLBTryMatch(vaddr);
|
||||||
if (!entry) {
|
if (!entry) {
|
||||||
regs.cop0.tlbError = MISS;
|
regs.cop0.tlbError = MISS;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 mask = entry->pageMask.mask << 12 | 0xFFF;
|
const u32 mask = entry->pageMask.mask << 12 | 0xFFF;
|
||||||
const u32 odd = vaddr & (mask + 1);
|
const u32 odd = vaddr & mask + 1;
|
||||||
u32 pfn;
|
|
||||||
|
|
||||||
if (!odd) {
|
const EntryLo entryLo = odd ? entry->entryLo1 : entry->entryLo0;
|
||||||
if (!entry->entryLo0.v) {
|
|
||||||
|
if (!entryLo.v) {
|
||||||
regs.cop0.tlbError = INVALID;
|
regs.cop0.tlbError = INVALID;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (accessType == STORE && !entry->entryLo0.d) {
|
if (accessType == STORE && !entryLo.d) {
|
||||||
regs.cop0.tlbError = MODIFICATION;
|
regs.cop0.tlbError = MODIFICATION;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
pfn = entry->entryLo0.pfn;
|
paddr = entryLo.pfn << 12 | vaddr & mask;
|
||||||
} else {
|
|
||||||
if (!entry->entryLo1.v) {
|
|
||||||
regs.cop0.tlbError = INVALID;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (accessType == STORE && !entry->entryLo1.d) {
|
|
||||||
regs.cop0.tlbError = MODIFICATION;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
pfn = entry->entryLo1.pfn;
|
|
||||||
}
|
|
||||||
|
|
||||||
paddr = pfn << 12 | vaddr & mask;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -559,10 +553,10 @@ bool Cop0::UserMapVAddr32(const TLBAccessType accessType, const u64 vaddr, u32 &
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Cop0::MapVAddr32(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) {
|
bool Cop0::MapVAddr32(const TLBAccessType accessType, const u64 vaddr, u32 &paddr) {
|
||||||
switch (u32(vaddr) >> 29 & 7) {
|
switch (static_cast<u32>(vaddr) >> 29 & 7) {
|
||||||
case 0 ... 3:
|
case 0 ... 3:
|
||||||
case 7:
|
case 7:
|
||||||
return ProbeTLB(accessType, s64(s32(vaddr)), paddr);
|
return ProbeTLB(accessType, static_cast<s32>(vaddr), paddr);
|
||||||
case 4 ... 5:
|
case 4 ... 5:
|
||||||
paddr = vaddr & 0x1FFFFFFF;
|
paddr = vaddr & 0x1FFFFFFF;
|
||||||
return true;
|
return true;
|
||||||
|
|||||||
@@ -146,27 +146,11 @@ union Index {
|
|||||||
|
|
||||||
struct TLBEntry {
|
struct TLBEntry {
|
||||||
bool initialized;
|
bool initialized;
|
||||||
union {
|
EntryLo entryLo0, entryLo1;
|
||||||
u32 raw;
|
|
||||||
struct {
|
|
||||||
unsigned : 1;
|
|
||||||
unsigned v : 1;
|
|
||||||
unsigned d : 1;
|
|
||||||
unsigned c : 3;
|
|
||||||
unsigned pfn : 20;
|
|
||||||
unsigned : 6;
|
|
||||||
};
|
|
||||||
} entryLo0, entryLo1;
|
|
||||||
EntryHi entryHi;
|
EntryHi entryHi;
|
||||||
PageMask pageMask;
|
PageMask pageMask;
|
||||||
|
|
||||||
bool global;
|
bool global;
|
||||||
|
|
||||||
auto operator==(const TLBEntry& other) {
|
|
||||||
return initialized == other.initialized && entryLo0.raw == other.entryLo0.raw &&
|
|
||||||
entryLo1.raw == other.entryLo1.raw && entryHi.raw == other.entryHi.raw && pageMask.raw == other.pageMask.raw &&
|
|
||||||
global == other.global;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum TLBError : u8 { NONE, MISS, INVALID, MODIFICATION, DISALLOWED_ADDRESS };
|
enum TLBError : u8 { NONE, MISS, INVALID, MODIFICATION, DISALLOWED_ADDRESS };
|
||||||
@@ -243,12 +227,6 @@ struct Cop0 {
|
|||||||
s64 ErrorEPC{};
|
s64 ErrorEPC{};
|
||||||
u32 r31{};
|
u32 r31{};
|
||||||
TLBEntry tlb[32]{};
|
TLBEntry tlb[32]{};
|
||||||
struct TLBCachedEntry {
|
|
||||||
int index = -1;
|
|
||||||
TLBEntry *entry = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::unordered_map<u64, TLBCachedEntry> tlbCache;
|
|
||||||
TLBError tlbError = NONE;
|
TLBError tlbError = NONE;
|
||||||
s64 openbus{};
|
s64 openbus{};
|
||||||
template <class T>
|
template <class T>
|
||||||
@@ -280,7 +258,7 @@ struct Cop0 {
|
|||||||
|
|
||||||
enum TLBAccessType { LOAD, STORE };
|
enum TLBAccessType { LOAD, STORE };
|
||||||
|
|
||||||
bool ProbeTLB(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
bool ProbeTLB(TLBAccessType accessType, u64 vaddr, u32 &paddr) const;
|
||||||
void FireException(ExceptionCode code, int cop, s64 pc) const;
|
void FireException(ExceptionCode code, int cop, s64 pc) const;
|
||||||
bool MapVAddr(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
bool MapVAddr(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
||||||
bool UserMapVAddr32(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
bool UserMapVAddr32(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
||||||
@@ -288,7 +266,8 @@ struct Cop0 {
|
|||||||
bool UserMapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
bool UserMapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
||||||
bool MapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
bool MapVAddr64(TLBAccessType accessType, u64 vaddr, u32 &paddr);
|
||||||
|
|
||||||
TLBEntry *TLBTryMatch(u64 vaddr, int* index);
|
TLBEntry *TLBTryMatch(u64 vaddr, int &index) const;
|
||||||
|
TLBEntry *TLBTryMatch(u64 vaddr) const;
|
||||||
void HandleTLBException(u64 vaddr) const;
|
void HandleTLBException(u64 vaddr) const;
|
||||||
static ExceptionCode GetTLBExceptionCode(TLBError error, TLBAccessType accessType);
|
static ExceptionCode GetTLBExceptionCode(TLBError error, TLBAccessType accessType);
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include <core/registers/Cop0.hpp>
|
#include <core/registers/Cop0.hpp>
|
||||||
#include <core/registers/Registers.hpp>
|
#include <core/registers/Registers.hpp>
|
||||||
#include <log.hpp>
|
#include <log.hpp>
|
||||||
|
#include <ranges>
|
||||||
|
|
||||||
namespace n64 {
|
namespace n64 {
|
||||||
void Cop0::mtc0(const u32 instr) { SetReg32(RD(instr), regs.Read<u32>(RT(instr))); }
|
void Cop0::mtc0(const u32 instr) { SetReg32(RD(instr), regs.Read<u32>(RT(instr))); }
|
||||||
@@ -50,17 +51,6 @@ void Cop0::tlbw(const int index_) {
|
|||||||
Util::panic("TLBWI with TLB index {}", index_);
|
Util::panic("TLBWI with TLB index {}", index_);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto &[key, cachedTlb] : tlbCache) {
|
|
||||||
auto &[cachedIndex, cachedEntry] = cachedTlb;
|
|
||||||
if (cachedEntry) {
|
|
||||||
if (*cachedEntry == tlb[index_]) {
|
|
||||||
cachedIndex = -1;
|
|
||||||
cachedEntry = nullptr;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
tlb[index_].entryHi.raw = entryHi.raw;
|
tlb[index_].entryHi.raw = entryHi.raw;
|
||||||
tlb[index_].entryHi.vpn2 &= ~page_mask.mask;
|
tlb[index_].entryHi.vpn2 &= ~page_mask.mask;
|
||||||
|
|
||||||
@@ -74,7 +64,7 @@ void Cop0::tlbw(const int index_) {
|
|||||||
|
|
||||||
void Cop0::tlbp() {
|
void Cop0::tlbp() {
|
||||||
int match = -1;
|
int match = -1;
|
||||||
if (const TLBEntry *entry = TLBTryMatch(entryHi.raw, &match); entry && match >= 0) {
|
if (const TLBEntry *entry = TLBTryMatch(entryHi.raw, match); entry && match >= 0) {
|
||||||
index.raw = match;
|
index.raw = match;
|
||||||
} else {
|
} else {
|
||||||
index.raw = 0;
|
index.raw = 0;
|
||||||
|
|||||||
@@ -492,7 +492,7 @@ void Cop1::ceills(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundCeil<s64>(fs));
|
CHECK_FPE_CONST(s64, fd, Util::roundCeil(fs));
|
||||||
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -502,7 +502,7 @@ void Cop1::ceilld(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundCeil<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundCeil(fs))
|
||||||
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -512,7 +512,7 @@ void Cop1::ceilws(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil(fs))
|
||||||
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -522,7 +522,7 @@ void Cop1::ceilwd(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCeil(fs))
|
||||||
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -646,7 +646,7 @@ void Cop1::cvtwd(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent(fs))
|
||||||
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -656,7 +656,7 @@ void Cop1::cvtws(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundCurrent(fs))
|
||||||
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -666,7 +666,7 @@ void Cop1::cvtls(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundCurrent<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundCurrent(fs))
|
||||||
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -703,7 +703,7 @@ void Cop1::cvtld(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundCurrent<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundCurrent(fs))
|
||||||
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1074,7 +1074,7 @@ void Cop1::roundls(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundNearest<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundNearest(fs))
|
||||||
if (fd != fs && SetCauseInexact()) {
|
if (fd != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
@@ -1088,7 +1088,7 @@ void Cop1::roundld(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundNearest<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundNearest(fs))
|
||||||
if (fd != fs && SetCauseInexact()) {
|
if (fd != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
@@ -1102,7 +1102,7 @@ void Cop1::roundws(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest(fs))
|
||||||
if (fd != fs && SetCauseInexact()) {
|
if (fd != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
@@ -1116,7 +1116,7 @@ void Cop1::roundwd(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundNearest(fs))
|
||||||
if (fd != fs && SetCauseInexact()) {
|
if (fd != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
@@ -1130,7 +1130,7 @@ void Cop1::floorls(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundFloor<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundFloor(fs))
|
||||||
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1140,7 +1140,7 @@ void Cop1::floorld(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundFloor<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundFloor(fs))
|
||||||
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s64>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1150,7 +1150,7 @@ void Cop1::floorws(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor(fs))
|
||||||
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1160,7 +1160,7 @@ void Cop1::floorwd(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundFloor(fs))
|
||||||
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
FGR_D<s32>(regs.cop0.status, FD(instr)) = fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1170,7 +1170,7 @@ void Cop1::truncws(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc(fs))
|
||||||
if (static_cast<float>(fd) != fs && SetCauseInexact()) {
|
if (static_cast<float>(fd) != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
@@ -1184,7 +1184,7 @@ void Cop1::truncwd(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s32>(fs))
|
if (!CheckCVTArg<s32>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc<s32>(fs))
|
CHECK_FPE_CONV_CONST(s32, fd, Util::roundTrunc(fs))
|
||||||
if (static_cast<double>(fd) != fs && SetCauseInexact()) {
|
if (static_cast<double>(fd) != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
@@ -1198,7 +1198,7 @@ void Cop1::truncls(const u32 instr) {
|
|||||||
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<float>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundTrunc<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundTrunc(fs))
|
||||||
if (static_cast<float>(fd) != fs && SetCauseInexact()) {
|
if (static_cast<float>(fd) != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
@@ -1212,7 +1212,7 @@ void Cop1::truncld(const u32 instr) {
|
|||||||
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
const auto fs = FGR_S<double>(regs.cop0.status, FS(instr));
|
||||||
if (!CheckCVTArg<s64>(fs))
|
if (!CheckCVTArg<s64>(fs))
|
||||||
return;
|
return;
|
||||||
CHECK_FPE_CONST(s64, fd, Util::roundTrunc<s64>(fs))
|
CHECK_FPE_CONST(s64, fd, Util::roundTrunc(fs))
|
||||||
if (static_cast<double>(fd) != fs && SetCauseInexact()) {
|
if (static_cast<double>(fd) != fs && SetCauseInexact()) {
|
||||||
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC);
|
||||||
return;
|
return;
|
||||||
|
|||||||
@@ -4,8 +4,7 @@
|
|||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
|
|
||||||
namespace Util {
|
namespace Util {
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundCeil(float f) {
|
||||||
static FORCE_INLINE T roundCeil(float f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128 t = _mm_set_ss(f);
|
__m128 t = _mm_set_ss(f);
|
||||||
t = _mm_round_ss(t, t, _MM_FROUND_TO_POS_INF);
|
t = _mm_round_ss(t, t, _MM_FROUND_TO_POS_INF);
|
||||||
@@ -15,8 +14,7 @@ static FORCE_INLINE T roundCeil(float f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundCeil(double f) {
|
||||||
static FORCE_INLINE T roundCeil(double f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128d t = _mm_set_sd(f);
|
__m128d t = _mm_set_sd(f);
|
||||||
t = _mm_round_sd(t, t, _MM_FROUND_TO_POS_INF);
|
t = _mm_round_sd(t, t, _MM_FROUND_TO_POS_INF);
|
||||||
@@ -26,8 +24,7 @@ static FORCE_INLINE T roundCeil(double f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundNearest(float f) {
|
||||||
static FORCE_INLINE T roundNearest(float f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128 t = _mm_set_ss(f);
|
__m128 t = _mm_set_ss(f);
|
||||||
t = _mm_round_ss(t, t, _MM_FROUND_TO_NEAREST_INT);
|
t = _mm_round_ss(t, t, _MM_FROUND_TO_NEAREST_INT);
|
||||||
@@ -37,8 +34,7 @@ static FORCE_INLINE T roundNearest(float f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundNearest(double f) {
|
||||||
static FORCE_INLINE T roundNearest(double f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128d t = _mm_set_sd(f);
|
__m128d t = _mm_set_sd(f);
|
||||||
t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT);
|
t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT);
|
||||||
@@ -48,8 +44,7 @@ static FORCE_INLINE T roundNearest(double f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundCurrent(float f) {
|
||||||
static FORCE_INLINE T roundCurrent(float f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
auto t = _mm_set_ss(f);
|
auto t = _mm_set_ss(f);
|
||||||
t = _mm_round_ss(t, t, _MM_FROUND_CUR_DIRECTION);
|
t = _mm_round_ss(t, t, _MM_FROUND_CUR_DIRECTION);
|
||||||
@@ -59,8 +54,7 @@ static FORCE_INLINE T roundCurrent(float f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundCurrent(double f) {
|
||||||
static FORCE_INLINE T roundCurrent(double f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
auto t = _mm_set_sd(f);
|
auto t = _mm_set_sd(f);
|
||||||
t = _mm_round_sd(t, t, _MM_FROUND_CUR_DIRECTION);
|
t = _mm_round_sd(t, t, _MM_FROUND_CUR_DIRECTION);
|
||||||
@@ -71,8 +65,7 @@ static FORCE_INLINE T roundCurrent(double f) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundFloor(float f) {
|
||||||
static FORCE_INLINE T roundFloor(float f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128 t = _mm_set_ss(f);
|
__m128 t = _mm_set_ss(f);
|
||||||
t = _mm_round_ss(t, t, _MM_FROUND_TO_NEG_INF);
|
t = _mm_round_ss(t, t, _MM_FROUND_TO_NEG_INF);
|
||||||
@@ -82,8 +75,7 @@ static FORCE_INLINE T roundFloor(float f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundFloor(double f) {
|
||||||
static FORCE_INLINE T roundFloor(double f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128d t = _mm_set_sd(f);
|
__m128d t = _mm_set_sd(f);
|
||||||
t = _mm_round_sd(t, t, _MM_FROUND_TO_NEG_INF);
|
t = _mm_round_sd(t, t, _MM_FROUND_TO_NEG_INF);
|
||||||
@@ -93,8 +85,7 @@ static FORCE_INLINE T roundFloor(double f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundTrunc(float f) {
|
||||||
static FORCE_INLINE T roundTrunc(float f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128 t = _mm_set_ss(f);
|
__m128 t = _mm_set_ss(f);
|
||||||
t = _mm_round_ss(t, t, _MM_FROUND_TO_ZERO);
|
t = _mm_round_ss(t, t, _MM_FROUND_TO_ZERO);
|
||||||
@@ -104,8 +95,7 @@ static FORCE_INLINE T roundTrunc(float f) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
static FORCE_INLINE auto roundTrunc(double f) {
|
||||||
static FORCE_INLINE T roundTrunc(double f) {
|
|
||||||
#ifdef SIMD_SUPPORT
|
#ifdef SIMD_SUPPORT
|
||||||
__m128d t = _mm_set_sd(f);
|
__m128d t = _mm_set_sd(f);
|
||||||
t = _mm_round_sd(t, t, _MM_FROUND_TO_ZERO);
|
t = _mm_round_sd(t, t, _MM_FROUND_TO_ZERO);
|
||||||
|
|||||||
Reference in New Issue
Block a user