diff --git a/external/ircolib/README.md b/external/ircolib/README.md new file mode 100644 index 0000000..d5db4a5 --- /dev/null +++ b/external/ircolib/README.md @@ -0,0 +1,7 @@ +# ircolib + +Collections of useful functions I started copy-pasting in various projects and thus decided to gather all in one place. + +## Flags + +To enable SIMD for `floats.hpp`, add a `#define SIMD_SUPPORT` before `#include "floats.hpp"`. \ No newline at end of file diff --git a/external/ircolib/file.hpp b/external/ircolib/file.hpp new file mode 100644 index 0000000..1e9b11a --- /dev/null +++ b/external/ircolib/file.hpp @@ -0,0 +1,43 @@ +#pragma once +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace ircolib { +static inline std::vector ReadFileBinary(const std::string &path) { + std::ifstream file(path, std::ios::binary); + return {std::istreambuf_iterator{file}, {}}; +} + +static inline void WriteFileBinary(const std::vector &data, const std::string &path) { + std::ofstream file(path, std::ios::binary); + std::copy(data.begin(), data.end(), std::ostreambuf_iterator{file}); +} + +static inline void WriteFileBinary(const u8 *data, const size_t size, const std::string &path) { + FILE *out = fopen(path.c_str(), "wb"); + fwrite(data, size, 1, out); + fclose(out); +} + +template +static inline void WriteFileBinary(const std::array &data, const std::string &path) { + std::ofstream file(path, std::ios::binary); + std::copy(data.begin(), data.end(), std::ostreambuf_iterator{file}); +} + +static inline size_t NextPow2(size_t num) { + // Taken from "Bit Twiddling Hacks" by Sean Anderson: + // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + --num; + num |= num >> 1; + num |= num >> 2; + num |= num >> 4; + num |= num >> 8; + num |= num >> 16; + return num + 1; +} +} // namespace Util diff --git a/external/ircolib/floats.hpp b/external/ircolib/floats.hpp new file mode 100644 index 0000000..913f1de --- /dev/null +++ b/external/ircolib/floats.hpp @@ -0,0 +1,106 @@ +#pragma once +#include +#include + +namespace ircolib { +static inline auto roundCeil(float f) { +#ifdef SIMD_SUPPORT + __m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_POS_INF); + return _mm_cvtss_f32(t); +#else + return ceilf(f); +#endif +} + +static inline auto roundCeil(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_POS_INF); + return _mm_cvtsd_f64(t); +#else + return ceil(f); +#endif +} + +static inline auto roundNearest(float f) { +#ifdef SIMD_SUPPORT + __m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_NEAREST_INT); + return _mm_cvtss_f32(t); +#else + return roundf(f); +#endif +} + +static inline auto roundNearest(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_NEAREST_INT); + return _mm_cvtsd_f64(t); +#else + return round(f); +#endif +} + +static inline auto roundCurrent(float f) { +#ifdef SIMD_SUPPORT + auto t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtss_f32(t); +#else + return rint(f); +#endif +} + +static inline auto roundCurrent(double f) { +#ifdef SIMD_SUPPORT + auto t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtsd_f64(t); +#else + return rint(f); +#endif +} + + +static inline auto roundFloor(float f) { +#ifdef SIMD_SUPPORT + __m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_NEG_INF); + return _mm_cvtss_f32(t); +#else + return floor(f); +#endif +} + +static inline auto roundFloor(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_NEG_INF); + return _mm_cvtsd_f64(t); +#else + return floor(f); +#endif +} + +static inline auto roundTrunc(float f) { +#ifdef SIMD_SUPPORT + __m128 t = _mm_set_ss(f); + t = _mm_round_ss(t, t, _MM_FROUND_TO_ZERO); + return _mm_cvtss_f32(t); +#else + return trunc(f); +#endif +} + +static inline auto roundTrunc(double f) { +#ifdef SIMD_SUPPORT + __m128d t = _mm_set_sd(f); + t = _mm_round_sd(t, t, _MM_FROUND_TO_ZERO); + return _mm_cvtsd_f64(t); +#else + return trunc(f); +#endif +} +} // namespace Util diff --git a/external/ircolib/mem_access.hpp b/external/ircolib/mem_access.hpp new file mode 100644 index 0000000..1b29bf3 --- /dev/null +++ b/external/ircolib/mem_access.hpp @@ -0,0 +1,146 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace ircolib { +static inline std::vector IntegralToBuffer(const std::integral auto &val) { + std::vector ret{}; + ret.resize(sizeof(val)); + + memcpy(ret.data(), &val, sizeof(val)); + + return ret; +} + +static inline constexpr bool IsInsideRange(const std::integral auto& addr, + const std::integral auto& start, + const std::integral auto& end) { + return addr >= start && addr <= end; +} + +template +static constexpr inline T ReadAccess(const u8 *data, const u32 index); +template +static constexpr inline T ReadAccess(const std::vector &data, const u32 index); +template +static constexpr inline T ReadAccess(const std::array &data, const u32 index); + +template +static constexpr inline void WriteAccess(u8 *data, const u32 index, const T val); +template +static constexpr inline void WriteAccess(std::vector &data, const u32 index, const T val); +template +static constexpr inline void WriteAccess(std::array &data, const u32 index, const T val); + +template <> +constexpr inline u64 ReadAccess(const u8 *data, const u32 index) { + u32 hi = *reinterpret_cast(&data[index + 0]); + u32 lo = *reinterpret_cast(&data[index + 4]); + const auto& result = static_cast(hi) << 32 | static_cast(lo); + return result; +} + +template +static constexpr inline T ReadAccess(const u8 *data, const u32 index) { + return *reinterpret_cast(&data[index]); +} + +template <> +constexpr inline u64 ReadAccess(const std::vector &data, const u32 index) { + u32 hi = *reinterpret_cast(&data[index + 0]); + u32 lo = *reinterpret_cast(&data[index + 4]); + return (static_cast(hi) << 32) | static_cast(lo); +} + +template +static constexpr inline T ReadAccess(const std::vector &data, const u32 index) { + return *reinterpret_cast(&data[index]); +} + +template +constexpr inline u64 ReadAccess(const std::array &data, const u32 index) { + u32 hi = *reinterpret_cast(&data[index + 0]); + u32 lo = *reinterpret_cast(&data[index + 4]); + return static_cast(hi) << 32 | static_cast(lo); +} + +template +static constexpr inline T ReadAccess(const std::array &data, const u32 index) { + return *reinterpret_cast(&data[index]); +} + +template +constexpr inline void WriteAccess(std::array &data, const u32 index, const u64 val) { + const u32 hi = val >> 32; + const u32 lo = val; + + *reinterpret_cast(&data[index + 0]) = hi; + *reinterpret_cast(&data[index + 4]) = lo; +} + +template +static constexpr inline void WriteAccess(std::array &data, const u32 index, const T val) { + *reinterpret_cast(&data[index]) = val; +} + +template <> +constexpr inline void WriteAccess(std::vector &data, const u32 index, const u64 val) { + const u32 hi = val >> 32; + const u32 lo = val; + + *reinterpret_cast(&data[index + 0]) = hi; + *reinterpret_cast(&data[index + 4]) = lo; +} + +template +static constexpr inline void WriteAccess(std::vector &data, const u32 index, const T val) { + *reinterpret_cast(&data[index]) = val; +} + +template <> +constexpr inline void WriteAccess(u8 *data, const u32 index, const u64 val) { + const u32 hi = val >> 32; + const u32 lo = val; + + *reinterpret_cast(&data[index + 0]) = hi; + *reinterpret_cast(&data[index + 4]) = lo; +} + +template +static constexpr inline void WriteAccess(u8 *data, const u32 index, const T val) { + *reinterpret_cast(&data[index]) = val; +} + +template +static constexpr inline void SwapBuffer(std::vector &data) { + for (size_t i = 0; i < data.size(); i += sizeof(T)) { + const T original = *reinterpret_cast(&data[i]); + *reinterpret_cast(&data[i]) = std::byteswap(original); + } +} + +template +static constexpr inline void SwapBuffer(std::array &data) { + for (size_t i = 0; i < data.size(); i += sizeof(T)) { + const T original = *reinterpret_cast(&data[i]); + *reinterpret_cast(&data[i]) = std::byteswap(original); + } +} + +#ifdef _WIN32 +inline void *aligned_alloc(const size_t alignment, const size_t size) { return _aligned_malloc(size, alignment); } + +inline void aligned_free(void *ptr) { _aligned_free(ptr); } +#else +inline void *aligned_alloc(const size_t alignment, const size_t size) { + return std::aligned_alloc(alignment, size); +} + +inline void aligned_free(void *ptr) { std::free(ptr); } +#endif +} // namespace Util diff --git a/external/ircolib/types.hpp b/external/ircolib/types.hpp new file mode 100644 index 0000000..9752f90 --- /dev/null +++ b/external/ircolib/types.hpp @@ -0,0 +1,13 @@ +#pragma once +#include + +namespace ircolib { +using u8 = uint8_t; +using u16 = uint16_t; +using u32 = uint32_t; +using u64 = uint64_t; +using s8 = int8_t; +using s16 = int16_t; +using s32 = int32_t; +using s64 = int64_t; +} \ No newline at end of file