diff --git a/src/backend/core/registers/cop/cop1instructions.cpp b/src/backend/core/registers/cop/cop1instructions.cpp index be6b7887..2e3da8e8 100644 --- a/src/backend/core/registers/cop/cop1instructions.cpp +++ b/src/backend/core/registers/cop/cop1instructions.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace n64 { template<> auto Cop1::FGR_T(Cop0Status& status, u32 index) -> s32& { @@ -431,7 +432,7 @@ void Cop1::ceills(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, ceilf32(fs)) + CHECK_FPE(s64, fd, Util::roundCeil(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -439,7 +440,7 @@ void Cop1::ceilld(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, ceilf64(fs)) + CHECK_FPE(s64, fd, Util::roundCeil(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -447,7 +448,7 @@ void Cop1::ceilws(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, ceilf32(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundCeil(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -455,7 +456,7 @@ void Cop1::ceilwd(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, ceilf64(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundCeil(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -544,7 +545,7 @@ void Cop1::cvtwd(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, rintf64(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundCurrent(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -552,7 +553,7 @@ void Cop1::cvtws(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, rintf32(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundCurrent(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -560,7 +561,7 @@ void Cop1::cvtls(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, rintf32(fs)) + CHECK_FPE(s64, fd, Util::roundCurrent(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -590,7 +591,7 @@ void Cop1::cvtld(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, rintf64(fs)) + CHECK_FPE(s64, fd, Util::roundCurrent(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -901,7 +902,7 @@ void Cop1::roundls(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, roundevenf(fs)) + CHECK_FPE(s64, fd, Util::roundNearest(fs)) if(fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; @@ -913,7 +914,7 @@ void Cop1::roundld(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, roundeven(fs)) + CHECK_FPE(s64, fd, Util::roundNearest(fs)) if(fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; @@ -925,7 +926,7 @@ void Cop1::roundws(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, roundevenf(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundNearest(fs)) if(fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; @@ -937,7 +938,7 @@ void Cop1::roundwd(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, roundeven(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundNearest(fs)) if(fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; @@ -949,7 +950,7 @@ void Cop1::floorls(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, floorf32(fs)) + CHECK_FPE(s64, fd, Util::roundFloor(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -957,7 +958,7 @@ void Cop1::floorld(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, floorf64(fs)) + CHECK_FPE(s64, fd, Util::roundFloor(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -965,7 +966,7 @@ void Cop1::floorws(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, floorf32(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundFloor(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -973,7 +974,7 @@ void Cop1::floorwd(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, floorf64(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundFloor(fs)) FGR_D(regs.cop0.status, FD(instr)) = fd; } @@ -981,7 +982,7 @@ void Cop1::truncws(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, truncf32(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundTrunc(fs)) if((float)fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; @@ -993,7 +994,7 @@ void Cop1::truncwd(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE_CONV(s32, fd, truncf64(fs)) + CHECK_FPE_CONV(s32, fd, Util::roundTrunc(fs)) if((double)fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; @@ -1005,7 +1006,7 @@ void Cop1::truncls(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, truncf32(fs)) + CHECK_FPE(s64, fd, Util::roundTrunc(fs)) if((float)fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; @@ -1017,7 +1018,7 @@ void Cop1::truncld(u32 instr) { if(!CheckFPUUsable()) return; auto fs = FGR_S(regs.cop0.status, FS(instr)); if(!CheckCVTArg(fs)) return; - CHECK_FPE(s64, fd, truncf64(fs)) + CHECK_FPE(s64, fd, Util::roundTrunc(fs)) if((double)fd != fs && SetCauseInexact()) { regs.cop0.FireException(ExceptionCode::FloatingPointError, 0, regs.oldPC); return; diff --git a/src/utils/FloatingPoint.hpp b/src/utils/FloatingPoint.hpp new file mode 100644 index 00000000..078d2f4c --- /dev/null +++ b/src/utils/FloatingPoint.hpp @@ -0,0 +1,121 @@ +// +// Created by simone on 6/25/24. +// + +#pragma once +#include +#include +#include + +namespace Util { +template +static inline T roundCeil(float f) { +#ifdef SIMD_SUPPORT + __m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_POS_INF); + return _mm_cvtss_f32(t); +#else + return ceilf(f); +#endif +} + +template +static inline T roundCeil(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_POS_INF); + return _mm_cvtsd_f64(t); +#else + return ceil(f); +#endif +} + +template +static inline T roundNearest(float f) { +#ifdef SIMD_SUPPORT + __m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_NEAREST_INT); + return _mm_cvtss_f32(t); +#else + return roundf(f); +#endif +} + +template +static inline T roundNearest(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT); + return _mm_cvtsd_f64(t); +#else + return round(f); +#endif +} + +template +static inline T roundCurrent(float f) { +#ifdef SIMD_SUPPORT + auto t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtss_f32(t); +#else + return rint(f); +#endif +} + +template +static inline T roundCurrent(double f) { +#ifdef SIMD_SUPPORT + auto t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtsd_f64(t); +#else + return rint(f); +#endif +} + + +template +static inline T roundFloor(float f) { +#ifdef SIMD_SUPPORT +__m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_NEG_INF); + return _mm_cvtss_f32(t); +#else +return floor(f); +#endif +} + +template +static inline T roundFloor(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_NEG_INF); + return _mm_cvtsd_f64(t); +#else + return floor(f); +#endif +} + +template +static inline T roundTrunc(float f) { +#ifdef SIMD_SUPPORT + __m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_ZERO); + return _mm_cvtss_f32(t); +#else + return trunc(f); +#endif +} + +template +static inline T roundTrunc(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_ZERO); + return _mm_cvtsd_f64(t); +#else + return trunc(f); +#endif +} +} \ No newline at end of file