Fpu improvements
This commit is contained in:
@@ -18,9 +18,38 @@ FORCE_INLINE bool FireFPUException(Registers& regs) {
|
||||
}
|
||||
|
||||
#define CheckFPUException() do { if(FireFPUException(regs)) { return; } } while(0)
|
||||
#define CheckFPUUsable_PreserveCause() do { if(!regs.cop0.status.cu1) { FireException(regs, ExceptionCode::CoprocessorUnusable, 1, true); return; } } while(0)
|
||||
#define CheckFPUUsable() do { CheckFPUUsable_PreserveCause(); regs.cop1.fcr31.cause = 0; } while(0)
|
||||
#define CheckRound(a, b) do { if ((a) != (b)) { fcr31.cause_inexact_operation = true; if(!fcr31.enable_inexact_operation) { fcr31.flag_inexact_operation = true; } } CheckFPUException(); } while(0);
|
||||
#define CheckRound(a, b) do { if ((a) != (b)) { fcr31.cause_inexact_operation = true; if(!fcr31.enable_inexact_operation) { fcr31.flag_inexact_operation = true; } } CheckFPUException(); } while(0)
|
||||
#define SetCauseUnimplemented() do { regs.cop1.fcr31.cause_unimplemented_operation = true; } while(0)
|
||||
#define SetCauseUnderflow() do { \
|
||||
regs.cop1.fcr31.cause_underflow = true; \
|
||||
if(!regs.cop1.fcr31.enable_underflow) { \
|
||||
regs.cop1.fcr31.flag_underflow = true; \
|
||||
} \
|
||||
} while(0)
|
||||
#define SetCauseInexact() do { \
|
||||
regs.cop1.fcr31.cause_inexact_operation = true; \
|
||||
if(!regs.cop1.fcr31.enable_inexact_operation) { \
|
||||
regs.cop1.fcr31.flag_inexact_operation = true; \
|
||||
} \
|
||||
} while(0)
|
||||
#define SetCauseDivisionByZero() do { \
|
||||
regs.cop1.fcr31.cause_division_by_zero = true; \
|
||||
if(!regs.cop1.fcr31.enable_division_by_zero) { \
|
||||
regs.cop1.fcr31.flag_division_by_zero = true; \
|
||||
} \
|
||||
} while(0)
|
||||
#define SetCauseOverflow() do { \
|
||||
regs.cop1.fcr31.cause_overflow = true; \
|
||||
if(!regs.cop1.fcr31.enable_overflow) { \
|
||||
regs.cop1.fcr31.flag_overflow = true; \
|
||||
} \
|
||||
} while(0)
|
||||
#define SetCauseInvalid() do { \
|
||||
regs.cop1.fcr31.cause_invalid_operation = true; \
|
||||
if(!regs.cop1.fcr31.enable_invalid_operation) { \
|
||||
regs.cop1.fcr31.flag_invalid_operation = true; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
FORCE_INLINE int PushRoundingMode(const FCR31& fcr31) {
|
||||
int og = fegetround();
|
||||
@@ -44,52 +73,42 @@ FORCE_INLINE int PushRoundingMode(const FCR31& fcr31) {
|
||||
#define OP(T, op) do { \
|
||||
CheckFPUUsable(); \
|
||||
auto fs = GetFGR_FS<T>(regs.cop0, FS(instr)); \
|
||||
auto ft = GetFGR_FT<T>(FT(instr)); \
|
||||
auto ft = GetFGR_FT<T>(FT(instr)); \
|
||||
CheckArg(fs); \
|
||||
CheckArg(ft); \
|
||||
T result; \
|
||||
OP_CheckExcept({result = (op);}); \
|
||||
OP_CheckExcept({result = (op);}); \
|
||||
CheckResult(result); \
|
||||
SetFGR_Raw<T>(FD(instr), result); \
|
||||
} while(0)
|
||||
|
||||
template <typename T>
|
||||
FORCE_INLINE void SetCauseByArgCVT(Registers& regs, T f) {
|
||||
if constexpr(sizeof(T) == 4) {
|
||||
switch (std::fpclassify(f)) {
|
||||
case FP_NAN:
|
||||
case FP_INFINITE:
|
||||
case FP_SUBNORMAL:
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
break;
|
||||
T min, max;
|
||||
if constexpr(std::is_same_v<T, float>) {
|
||||
min = -2147483648.0f;
|
||||
max = 2147483648.0f;
|
||||
} else if constexpr(std::is_same_v<T, double>) {
|
||||
min = -9007199254740992.000000;
|
||||
max = 9007199254740992.000000;
|
||||
}
|
||||
|
||||
case FP_NORMAL:
|
||||
// Check overflow
|
||||
if (f >= 2147483648.0 || f < -2147483648.0) {
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
}
|
||||
break;
|
||||
switch (std::fpclassify(f)) {
|
||||
case FP_NAN:
|
||||
case FP_INFINITE:
|
||||
case FP_SUBNORMAL:
|
||||
SetCauseUnimplemented();
|
||||
break;
|
||||
|
||||
case FP_ZERO:
|
||||
break; // Fine
|
||||
}
|
||||
} else if constexpr(sizeof(T) == 8) {
|
||||
switch (std::fpclassify(f)) {
|
||||
case FP_NAN:
|
||||
case FP_INFINITE:
|
||||
case FP_SUBNORMAL:
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
break;
|
||||
case FP_NORMAL:
|
||||
// Check overflow
|
||||
if (f >= max || f <= min) {
|
||||
SetCauseUnimplemented();
|
||||
}
|
||||
break;
|
||||
|
||||
case FP_NORMAL:
|
||||
// Check overflow
|
||||
if (f >= 9007199254740992.000000 || f <= -9007199254740992.000000) {
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
}
|
||||
break;
|
||||
|
||||
case FP_ZERO:
|
||||
break; // Fine
|
||||
}
|
||||
case FP_ZERO:
|
||||
break; // Fine
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,48 +119,33 @@ FORCE_INLINE void SetFPUCauseRaised(Registers& regs, int raised) {
|
||||
|
||||
if (raised & FE_UNDERFLOW) {
|
||||
if (!regs.cop1.fcr31.fs || regs.cop1.fcr31.enable_underflow || regs.cop1.fcr31.enable_inexact_operation) {
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
SetCauseUnimplemented();
|
||||
return;
|
||||
} else {
|
||||
regs.cop1.fcr31.cause_underflow = true;
|
||||
if(!regs.cop1.fcr31.enable_underflow) {
|
||||
regs.cop1.fcr31.flag_underflow = true;
|
||||
}
|
||||
SetCauseUnderflow();
|
||||
}
|
||||
}
|
||||
|
||||
if (raised & FE_INEXACT) {
|
||||
regs.cop1.fcr31.cause_inexact_operation = true;
|
||||
if(!regs.cop1.fcr31.enable_inexact_operation) {
|
||||
regs.cop1.fcr31.flag_inexact_operation = true;
|
||||
}
|
||||
SetCauseInexact();
|
||||
}
|
||||
|
||||
if (raised & FE_DIVBYZERO) {
|
||||
regs.cop1.fcr31.cause_division_by_zero = true;
|
||||
if(!regs.cop1.fcr31.enable_division_by_zero) {
|
||||
regs.cop1.fcr31.flag_division_by_zero = true;
|
||||
}
|
||||
SetCauseDivisionByZero();
|
||||
}
|
||||
|
||||
if (raised & FE_OVERFLOW) {
|
||||
regs.cop1.fcr31.cause_overflow = true;
|
||||
if(!regs.cop1.fcr31.enable_overflow) {
|
||||
regs.cop1.fcr31.flag_overflow = true;
|
||||
}
|
||||
SetCauseOverflow();
|
||||
}
|
||||
|
||||
if (raised & FE_INVALID) {
|
||||
regs.cop1.fcr31.cause_invalid_operation = true;
|
||||
if(!regs.cop1.fcr31.enable_invalid_operation) {
|
||||
regs.cop1.fcr31.flag_invalid_operation = true;
|
||||
}
|
||||
SetCauseInvalid();
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void SetFPUCauseCVTRaised(Registers& regs, int raised) {
|
||||
if(raised & FE_INVALID) {
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
SetCauseUnimplemented();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -154,16 +158,13 @@ FORCE_INLINE void SetCauseByArg(Registers& regs, T f) {
|
||||
switch(c) {
|
||||
case FP_NAN:
|
||||
if(isqnan(f)) {
|
||||
regs.cop1.fcr31.cause_invalid_operation = true;
|
||||
if(!regs.cop1.fcr31.enable_invalid_operation) {
|
||||
regs.cop1.fcr31.flag_invalid_operation = true;
|
||||
}
|
||||
SetCauseInvalid();
|
||||
} else {
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
SetCauseUnimplemented();
|
||||
}
|
||||
break;
|
||||
case FP_SUBNORMAL:
|
||||
regs.cop1.fcr31.cause_unimplemented_operation = true;
|
||||
SetCauseUnimplemented();
|
||||
break;
|
||||
case FP_INFINITE:
|
||||
case FP_ZERO:
|
||||
@@ -174,21 +175,77 @@ FORCE_INLINE void SetCauseByArg(Registers& regs, T f) {
|
||||
}
|
||||
}
|
||||
|
||||
#define PUSHROUNDINGMODE int og = PushRoundingMode(fcr31)
|
||||
#define POPROUNDINGMODE fesetround(og)
|
||||
|
||||
#define any_unordered(fs, ft) (std::isnan(fs) || std::isnan(ft))
|
||||
|
||||
#define F_TO_U32(f) (*((u32*)(&(f))))
|
||||
#define D_TO_U64(d) (*((u64*)(&(d))))
|
||||
#define U64_TO_D(d) (*((double*)(&(d))))
|
||||
#define U32_TO_F(f) (*((float*)(&(f))))
|
||||
|
||||
template <typename T>
|
||||
FORCE_INLINE void SetCauseOnResult(Registers& regs, T& d) {
|
||||
Cop1& cop1 = regs.cop1;
|
||||
int classification = std::fpclassify(d);
|
||||
T magic, min;
|
||||
if constexpr(std::is_same_v<T, float>) {
|
||||
u32 c = 0x7FBFFFFF;
|
||||
magic = U32_TO_F(c);
|
||||
min = FLT_MIN;
|
||||
} else if constexpr(std::is_same_v<T, double>) {
|
||||
u64 c = 0x7FF7FFFFFFFFFFFF;
|
||||
magic = U64_TO_D(c);
|
||||
min = DBL_MIN;
|
||||
}
|
||||
switch (classification) {
|
||||
case FP_NAN:
|
||||
d = magic; // set result to sNAN
|
||||
break;
|
||||
case FP_SUBNORMAL:
|
||||
if (!cop1.fcr31.fs || cop1.fcr31.enable_underflow || cop1.fcr31.enable_inexact_operation) {
|
||||
SetCauseUnimplemented();
|
||||
} else {
|
||||
// Since the if statement checks for the corresponding enable bits, it's safe to turn these cause bits on here.
|
||||
SetCauseUnderflow();
|
||||
SetCauseInexact();
|
||||
switch (cop1.fcr31.rounding_mode) {
|
||||
case 0:
|
||||
case 1:
|
||||
d = std::copysign(0, d);
|
||||
break;
|
||||
case 2:
|
||||
if (std::signbit(d)) {
|
||||
d = -(T)0;
|
||||
} else {
|
||||
d = min;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
if (std::signbit(d)) {
|
||||
d = -min;
|
||||
} else {
|
||||
d = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case FP_INFINITE:
|
||||
case FP_ZERO:
|
||||
case FP_NORMAL:
|
||||
break; // No-op, these are fine.
|
||||
default:
|
||||
Util::panic("Unknown FP classification: {}", classification);
|
||||
}
|
||||
}
|
||||
|
||||
#define CheckResult(f) do { SetCauseOnResult(regs, (f)); CheckFPUException(); } while(0)
|
||||
|
||||
#define any_unordered(fs, ft) (std::isnan(fs) || std::isnan(ft))
|
||||
|
||||
template <typename T>
|
||||
FORCE_INLINE bool isnan(T f) {
|
||||
if constexpr(std::is_same<T, float>::value) {
|
||||
if constexpr(std::is_same_v<T, float>) {
|
||||
u32 v = F_TO_U32(f);
|
||||
return ((v & 0x7F800000) == 0x7F800000) && ((v & 0x7FFFFF) != 0);
|
||||
} else if constexpr(std::is_same<T, double>::value) {
|
||||
} else if constexpr(std::is_same_v<T, double>) {
|
||||
u64 v = D_TO_U64(f);
|
||||
return ((v & 0x7FF0000000000000) == 0x7FF0000000000000) && ((v & 0xFFFFFFFFFFFFF) != 0);
|
||||
} else {
|
||||
@@ -198,10 +255,10 @@ FORCE_INLINE bool isnan(T f) {
|
||||
|
||||
template <typename T>
|
||||
FORCE_INLINE bool isqnan(T f) {
|
||||
if constexpr(std::is_same<T, float>::value) {
|
||||
if constexpr(std::is_same_v<T, float>) {
|
||||
u32 v = F_TO_U32(f);
|
||||
return (v & 0x7FC00000) == 0x7FC00000;
|
||||
} else if constexpr(std::is_same<T, double>::value) {
|
||||
} else if constexpr(std::is_same_v<T, double>) {
|
||||
u64 v = D_TO_U64(f);
|
||||
return (v & 0x7FF8000000000000) == 0x7FF8000000000000;
|
||||
} else {
|
||||
@@ -251,7 +308,7 @@ void Cop1::ceills(Registers& regs, u32 instr) {
|
||||
CheckCVTArg(fs);
|
||||
s64 result;
|
||||
CVT_OP_CheckExcept({ result = std::ceil(fs); });
|
||||
CheckRound(result, fs);
|
||||
CheckRound(fs, result);
|
||||
SetFGR(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -261,7 +318,7 @@ void Cop1::ceilws(Registers& regs, u32 instr) {
|
||||
CheckCVTArg(fs);
|
||||
s32 result;
|
||||
CVT_OP_CheckExcept({ result = std::ceil(fs); });
|
||||
CheckRound(result, fs);
|
||||
CheckRound(fs, result);
|
||||
SetFGR(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -271,7 +328,7 @@ void Cop1::ceilld(Registers& regs, u32 instr) {
|
||||
CheckCVTArg(fs);
|
||||
s64 result;
|
||||
CVT_OP_CheckExcept({ result = std::ceil(fs); });
|
||||
CheckRound(result, fs);
|
||||
CheckRound(fs, result);
|
||||
SetFGR(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -281,7 +338,7 @@ void Cop1::ceilwd(Registers& regs, u32 instr) {
|
||||
CheckCVTArg(fs);
|
||||
s32 result;
|
||||
CVT_OP_CheckExcept({ result = std::ceil(fs); });
|
||||
CheckRound(result, fs);
|
||||
CheckRound(fs, result);
|
||||
SetFGR(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -316,18 +373,20 @@ void Cop1::ctc1(Registers& regs, u32 instr) {
|
||||
void Cop1::cvtds(Registers& regs, u32 instr) {
|
||||
CheckFPUUsable();
|
||||
auto fs = GetFGR_FS<float>(regs.cop0, FS(instr));
|
||||
CheckCVTArg(fs);
|
||||
CheckArg(fs);
|
||||
double result;
|
||||
CVT_OP_CheckExcept({ result = double(fs); });
|
||||
OP_CheckExcept({ result = double(fs); });
|
||||
CheckResult(result);
|
||||
SetFGR_Raw(FD(instr), result);
|
||||
}
|
||||
|
||||
void Cop1::cvtsd(Registers& regs, u32 instr) {
|
||||
CheckFPUUsable();
|
||||
auto fs = GetFGR_FS<double>(regs.cop0, FS(instr));
|
||||
CheckCVTArg(fs);
|
||||
CheckArg(fs);
|
||||
float result;
|
||||
CVT_OP_CheckExcept({ result = float(fs); });
|
||||
OP_CheckExcept({ result = float(fs); });
|
||||
CheckResult(result);
|
||||
SetFGR_Raw(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -336,7 +395,10 @@ void Cop1::cvtwd(Registers& regs, u32 instr) {
|
||||
auto fs = GetFGR_FS<double>(regs.cop0, FS(instr));
|
||||
CheckCVTArg(fs);
|
||||
s32 result;
|
||||
PUSHROUNDING;
|
||||
CVT_OP_CheckExcept({ result = s32(fs); });
|
||||
POPROUNDING;
|
||||
CheckRound(fs, result);
|
||||
SetFGR(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -345,7 +407,10 @@ void Cop1::cvtws(Registers& regs, u32 instr) {
|
||||
auto fs = GetFGR_FS<float>(regs.cop0, FS(instr));
|
||||
CheckCVTArg(fs);
|
||||
s32 result;
|
||||
PUSHROUNDING;
|
||||
CVT_OP_CheckExcept({ result = s32(fs); });
|
||||
POPROUNDING;
|
||||
CheckRound(fs, result);
|
||||
SetFGR(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -354,19 +419,23 @@ void Cop1::cvtls(Registers& regs, u32 instr) {
|
||||
auto fs = GetFGR_FS<float>(regs.cop0, FS(instr));
|
||||
CheckCVTArg(fs);
|
||||
s64 result;
|
||||
CVT_OP_CheckExcept({ result = s64(fs); });
|
||||
PUSHROUNDING;
|
||||
CVT_OP_CheckExcept({ result = std::rint(fs); });
|
||||
POPROUNDING;
|
||||
CheckRound(fs, result);
|
||||
SetFGR(FD(instr), result);
|
||||
}
|
||||
|
||||
void Cop1::cvtsl(Registers& regs, u32 instr) {
|
||||
CheckFPUUsable();
|
||||
auto fs = GetFGR_FR<s64>(regs.cop0, FS(instr));
|
||||
if (fs >= (s64)0x0080000000000000 || fs < s64(0xff80'0000'0000'0000)) {
|
||||
fcr31.cause_unimplemented_operation = true;
|
||||
if (fs >= s64(0x0080000000000000) || fs < s64(0xff80000000000000)) {
|
||||
SetCauseUnimplemented();
|
||||
CheckFPUException();
|
||||
}
|
||||
float result;
|
||||
OP_CheckExcept({ result = float(fs); });
|
||||
CheckResult(result);
|
||||
SetFGR_Raw(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -375,6 +444,7 @@ void Cop1::cvtdw(Registers& regs, u32 instr) {
|
||||
auto fs = GetFGR_FS<s32>(regs.cop0, FS(instr));
|
||||
double result;
|
||||
OP_CheckExcept({ result = double(fs); });
|
||||
CheckResult(result);
|
||||
SetFGR_Raw(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -383,18 +453,21 @@ void Cop1::cvtsw(Registers& regs, u32 instr) {
|
||||
auto fs = GetFGR_FS<s32>(regs.cop0, FS(instr));
|
||||
float result;
|
||||
OP_CheckExcept({ result = float(fs); });
|
||||
CheckResult(result);
|
||||
SetFGR_Raw(FD(instr), result);
|
||||
}
|
||||
|
||||
void Cop1::cvtdl(Registers& regs, u32 instr) {
|
||||
CheckFPUUsable();
|
||||
auto fs = GetFGR_FR<s64>(regs.cop0, FS(instr));
|
||||
if (fs >= (s64)0x0080000000000000 || fs < s64(0xff80'0000'0000'0000)) {
|
||||
fcr31.cause_unimplemented_operation = true;
|
||||
|
||||
if (fs >= s64(0x0080000000000000) || fs < s64(0xff80000000000000)) {
|
||||
SetCauseUnimplemented();
|
||||
CheckFPUException();
|
||||
}
|
||||
double result;
|
||||
OP_CheckExcept({ result = double(fs); });
|
||||
CheckResult(result);
|
||||
SetFGR_Raw(FD(instr), result);
|
||||
}
|
||||
|
||||
@@ -485,7 +558,7 @@ void Cop1::negs(Registers ®s, u32 instr) {
|
||||
}
|
||||
|
||||
void Cop1::negd(Registers ®s, u32 instr) {
|
||||
OP(double, -ft);
|
||||
OP(double, -fs);
|
||||
}
|
||||
|
||||
void Cop1::sqrts(Registers ®s, u32 instr) {
|
||||
@@ -681,11 +754,6 @@ template void Cop1::sdc1<Interpreter>(Interpreter&, Mem&, u32);
|
||||
template void Cop1::sdc1<JIT>(JIT&, Mem&, u32);
|
||||
|
||||
void Cop1::lwc1Interp(Registers& regs, Mem& mem, u32 instr) {
|
||||
if(!regs.cop0.status.cu1) {
|
||||
FireException(regs, ExceptionCode::CoprocessorUnusable, 1, true);
|
||||
return;
|
||||
}
|
||||
|
||||
u64 addr = (s64)(s16)instr + regs.gpr[BASE(instr)];
|
||||
|
||||
u32 physical;
|
||||
@@ -699,11 +767,6 @@ void Cop1::lwc1Interp(Registers& regs, Mem& mem, u32 instr) {
|
||||
}
|
||||
|
||||
void Cop1::swc1Interp(Registers& regs, Mem& mem, u32 instr) {
|
||||
if(!regs.cop0.status.cu1) {
|
||||
FireException(regs, ExceptionCode::CoprocessorUnusable, 1, true);
|
||||
return;
|
||||
}
|
||||
|
||||
u64 addr = (s64)(s16)instr + regs.gpr[BASE(instr)];
|
||||
|
||||
u32 physical;
|
||||
@@ -722,11 +785,6 @@ void Cop1::unimplemented(Registers& regs) {
|
||||
}
|
||||
|
||||
void Cop1::ldc1Interp(Registers& regs, Mem& mem, u32 instr) {
|
||||
if(!regs.cop0.status.cu1) {
|
||||
FireException(regs, ExceptionCode::CoprocessorUnusable, 1, true);
|
||||
return;
|
||||
}
|
||||
|
||||
u64 addr = (s64)(s16)instr + regs.gpr[BASE(instr)];
|
||||
|
||||
u32 physical;
|
||||
@@ -740,11 +798,6 @@ void Cop1::ldc1Interp(Registers& regs, Mem& mem, u32 instr) {
|
||||
}
|
||||
|
||||
void Cop1::sdc1Interp(Registers& regs, Mem& mem, u32 instr) {
|
||||
if(!regs.cop0.status.cu1) {
|
||||
FireException(regs, ExceptionCode::CoprocessorUnusable, 1, true);
|
||||
return;
|
||||
}
|
||||
|
||||
u64 addr = (s64)(s16)instr + regs.gpr[BASE(instr)];
|
||||
|
||||
u32 physical;
|
||||
|
||||
Reference in New Issue
Block a user