preliminary dynarec work

This commit is contained in:
CocoSimone
2022-10-30 16:50:43 +01:00
parent 1265279069
commit 35b4d5ab9e
35 changed files with 6727 additions and 153 deletions

View File

@@ -62,8 +62,8 @@ target_include_directories(parallel-rdp PUBLIC
../../src/n64 ../../src/n64
../../src/n64/core/ ../../src/n64/core/
../../src/n64/core/mmio ../../src/n64/core/mmio
../../src/n64/core/cpu/ ../../src/n64/core/interpreter/
../../src/n64/core/cpu/registers ../../src/n64/core/interpreter/registers
parallel-rdp-standalone parallel-rdp-standalone
.. ..
../capstone/include ../capstone/include

27
external/xbyak/COPYRIGHT vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2007 MITSUNARI Shigeo
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of the copyright owner nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.

2951
external/xbyak/xbyak.h vendored Normal file

File diff suppressed because it is too large Load Diff

258
external/xbyak/xbyak_bin2hex.h vendored Normal file
View File

@@ -0,0 +1,258 @@
enum {
B00000000= 0,
B00000001= 1,
B00000010= 2,
B00000011= 3,
B00000100= 4,
B00000101= 5,
B00000110= 6,
B00000111= 7,
B00001000= 8,
B00001001= 9,
B00001010= 10,
B00001011= 11,
B00001100= 12,
B00001101= 13,
B00001110= 14,
B00001111= 15,
B00010000= 16,
B00010001= 17,
B00010010= 18,
B00010011= 19,
B00010100= 20,
B00010101= 21,
B00010110= 22,
B00010111= 23,
B00011000= 24,
B00011001= 25,
B00011010= 26,
B00011011= 27,
B00011100= 28,
B00011101= 29,
B00011110= 30,
B00011111= 31,
B00100000= 32,
B00100001= 33,
B00100010= 34,
B00100011= 35,
B00100100= 36,
B00100101= 37,
B00100110= 38,
B00100111= 39,
B00101000= 40,
B00101001= 41,
B00101010= 42,
B00101011= 43,
B00101100= 44,
B00101101= 45,
B00101110= 46,
B00101111= 47,
B00110000= 48,
B00110001= 49,
B00110010= 50,
B00110011= 51,
B00110100= 52,
B00110101= 53,
B00110110= 54,
B00110111= 55,
B00111000= 56,
B00111001= 57,
B00111010= 58,
B00111011= 59,
B00111100= 60,
B00111101= 61,
B00111110= 62,
B00111111= 63,
B01000000= 64,
B01000001= 65,
B01000010= 66,
B01000011= 67,
B01000100= 68,
B01000101= 69,
B01000110= 70,
B01000111= 71,
B01001000= 72,
B01001001= 73,
B01001010= 74,
B01001011= 75,
B01001100= 76,
B01001101= 77,
B01001110= 78,
B01001111= 79,
B01010000= 80,
B01010001= 81,
B01010010= 82,
B01010011= 83,
B01010100= 84,
B01010101= 85,
B01010110= 86,
B01010111= 87,
B01011000= 88,
B01011001= 89,
B01011010= 90,
B01011011= 91,
B01011100= 92,
B01011101= 93,
B01011110= 94,
B01011111= 95,
B01100000= 96,
B01100001= 97,
B01100010= 98,
B01100011= 99,
B01100100= 100,
B01100101= 101,
B01100110= 102,
B01100111= 103,
B01101000= 104,
B01101001= 105,
B01101010= 106,
B01101011= 107,
B01101100= 108,
B01101101= 109,
B01101110= 110,
B01101111= 111,
B01110000= 112,
B01110001= 113,
B01110010= 114,
B01110011= 115,
B01110100= 116,
B01110101= 117,
B01110110= 118,
B01110111= 119,
B01111000= 120,
B01111001= 121,
B01111010= 122,
B01111011= 123,
B01111100= 124,
B01111101= 125,
B01111110= 126,
B01111111= 127,
B10000000= 128,
B10000001= 129,
B10000010= 130,
B10000011= 131,
B10000100= 132,
B10000101= 133,
B10000110= 134,
B10000111= 135,
B10001000= 136,
B10001001= 137,
B10001010= 138,
B10001011= 139,
B10001100= 140,
B10001101= 141,
B10001110= 142,
B10001111= 143,
B10010000= 144,
B10010001= 145,
B10010010= 146,
B10010011= 147,
B10010100= 148,
B10010101= 149,
B10010110= 150,
B10010111= 151,
B10011000= 152,
B10011001= 153,
B10011010= 154,
B10011011= 155,
B10011100= 156,
B10011101= 157,
B10011110= 158,
B10011111= 159,
B10100000= 160,
B10100001= 161,
B10100010= 162,
B10100011= 163,
B10100100= 164,
B10100101= 165,
B10100110= 166,
B10100111= 167,
B10101000= 168,
B10101001= 169,
B10101010= 170,
B10101011= 171,
B10101100= 172,
B10101101= 173,
B10101110= 174,
B10101111= 175,
B10110000= 176,
B10110001= 177,
B10110010= 178,
B10110011= 179,
B10110100= 180,
B10110101= 181,
B10110110= 182,
B10110111= 183,
B10111000= 184,
B10111001= 185,
B10111010= 186,
B10111011= 187,
B10111100= 188,
B10111101= 189,
B10111110= 190,
B10111111= 191,
B11000000= 192,
B11000001= 193,
B11000010= 194,
B11000011= 195,
B11000100= 196,
B11000101= 197,
B11000110= 198,
B11000111= 199,
B11001000= 200,
B11001001= 201,
B11001010= 202,
B11001011= 203,
B11001100= 204,
B11001101= 205,
B11001110= 206,
B11001111= 207,
B11010000= 208,
B11010001= 209,
B11010010= 210,
B11010011= 211,
B11010100= 212,
B11010101= 213,
B11010110= 214,
B11010111= 215,
B11011000= 216,
B11011001= 217,
B11011010= 218,
B11011011= 219,
B11011100= 220,
B11011101= 221,
B11011110= 222,
B11011111= 223,
B11100000= 224,
B11100001= 225,
B11100010= 226,
B11100011= 227,
B11100100= 228,
B11100101= 229,
B11100110= 230,
B11100111= 231,
B11101000= 232,
B11101001= 233,
B11101010= 234,
B11101011= 235,
B11101100= 236,
B11101101= 237,
B11101110= 238,
B11101111= 239,
B11110000= 240,
B11110001= 241,
B11110010= 242,
B11110011= 243,
B11110100= 244,
B11110101= 245,
B11110110= 246,
B11110111= 247,
B11111000= 248,
B11111001= 249,
B11111010= 250,
B11111011= 251,
B11111100= 252,
B11111101= 253,
B11111110= 254,
B11111111= 255
};

2309
external/xbyak/xbyak_mnemonic.h vendored Normal file

File diff suppressed because it is too large Load Diff

994
external/xbyak/xbyak_util.h vendored Normal file
View File

@@ -0,0 +1,994 @@
#ifndef XBYAK_XBYAK_UTIL_H_
#define XBYAK_XBYAK_UTIL_H_
#ifdef XBYAK_ONLY_CLASS_CPU
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#ifndef XBYAK_THROW
#define XBYAK_THROW(x) ;
#define XBYAK_THROW_RET(x, y) return y;
#endif
#else
#include <string.h>
/**
utility class and functions for Xbyak
Xbyak::util::Clock ; rdtsc timer
Xbyak::util::Cpu ; detect CPU
*/
#include "xbyak.h"
#endif // XBYAK_ONLY_CLASS_CPU
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define XBYAK_INTEL_CPU_SPECIFIC
#endif
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _WIN32
#if defined(_MSC_VER) && (_MSC_VER < 1400) && defined(XBYAK32)
static inline __declspec(naked) void __cpuid(int[4], int)
{
__asm {
push ebx
push esi
mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn
cpuid
mov esi, dword ptr [esp + 4 * 2 + 4] // data
mov dword ptr [esi], eax
mov dword ptr [esi + 4], ebx
mov dword ptr [esi + 8], ecx
mov dword ptr [esi + 12], edx
pop esi
pop ebx
ret
}
}
#else
#include <intrin.h> // for __cpuid
#endif
#else
#ifndef __GNUC_PREREQ
#define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
#endif
#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
#include <cpuid.h>
#else
#if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm'
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
#else
#define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
#endif
#endif
#endif
#endif
#ifdef XBYAK_USE_VTUNE
// -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl
#include <jitprofiling.h>
#ifdef _MSC_VER
#pragma comment(lib, "libittnotify.lib")
#endif
#ifdef __linux__
#include <dlfcn.h>
#endif
#endif
#ifdef __linux__
#define XBYAK_USE_PERF
#endif
namespace Xbyak { namespace util {
typedef enum {
SmtLevel = 1,
CoreLevel = 2
} IntelCpuTopologyLevel;
namespace local {
template<uint64_t L, uint64_t H = 0>
struct TypeT {
};
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
template<typename T>
inline T max_(T x, T y) { return x >= y ? x : y; }
template<typename T>
inline T min_(T x, T y) { return x < y ? x : y; }
} // local
/**
CPU detection class
@note static inline const member is supported by c++17 or later, so use template hack
*/
class Cpu {
public:
class Type {
uint64_t L;
uint64_t H;
public:
Type(uint64_t L = 0, uint64_t H = 0) : L(L), H(H) { }
template<uint64_t L_, uint64_t H_>
Type(local::TypeT<L_, H_>) : L(L_), H(H_) {}
Type& operator&=(const Type& rhs) { L &= rhs.L; H &= rhs.H; return *this; }
Type& operator|=(const Type& rhs) { L |= rhs.L; H |= rhs.H; return *this; }
Type operator&(const Type& rhs) const { Type t = *this; t &= rhs; return t; }
Type operator|(const Type& rhs) const { Type t = *this; t |= rhs; return t; }
bool operator==(const Type& rhs) const { return H == rhs.H && L == rhs.L; }
bool operator!=(const Type& rhs) const { return !operator==(rhs); }
// without explicit because backward compatilibity
operator bool() const { return (H | L) != 0; }
uint64_t getL() const { return L; }
uint64_t getH() const { return H; }
};
private:
Type type_;
//system topology
bool x2APIC_supported_;
static const size_t maxTopologyLevels = 2;
uint32_t numCores_[maxTopologyLevels];
static const uint32_t maxNumberCacheLevels = 10;
uint32_t dataCacheSize_[maxNumberCacheLevels];
uint32_t coresSharignDataCache_[maxNumberCacheLevels];
uint32_t dataCacheLevels_;
uint32_t get32bitAsBE(const char *x) const
{
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
}
uint32_t mask(int n) const
{
return (1U << n) - 1;
}
void setFamily()
{
uint32_t data[4] = {};
getCpuid(1, data);
stepping = data[0] & mask(4);
model = (data[0] >> 4) & mask(4);
family = (data[0] >> 8) & mask(4);
// type = (data[0] >> 12) & mask(2);
extModel = (data[0] >> 16) & mask(4);
extFamily = (data[0] >> 20) & mask(8);
if (family == 0x0f) {
displayFamily = family + extFamily;
} else {
displayFamily = family;
}
if (family == 6 || family == 0x0f) {
displayModel = (extModel << 4) + model;
} else {
displayModel = model;
}
}
uint32_t extractBit(uint32_t val, uint32_t base, uint32_t end)
{
return (val >> base) & ((1u << (end - base)) - 1);
}
void setNumCores()
{
if (!has(tINTEL)) return;
uint32_t data[4] = {};
/* CAUTION: These numbers are configuration as shipped by Intel. */
getCpuidEx(0x0, 0, data);
if (data[0] >= 0xB) {
/*
if leaf 11 exists(x2APIC is supported),
we use it to get the number of smt cores and cores on socket
leaf 0xB can be zeroed-out by a hypervisor
*/
x2APIC_supported_ = true;
for (uint32_t i = 0; i < maxTopologyLevels; i++) {
getCpuidEx(0xB, i, data);
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
if (level == SmtLevel || level == CoreLevel) {
numCores_[level - 1] = extractBit(data[1], 0, 15);
}
}
/*
Fallback values in case a hypervisor has 0xB leaf zeroed-out.
*/
numCores_[SmtLevel - 1] = local::max_(1u, numCores_[SmtLevel - 1]);
numCores_[CoreLevel - 1] = local::max_(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]);
} else {
/*
Failed to deremine num of cores without x2APIC support.
TODO: USE initial APIC ID to determine ncores.
*/
numCores_[SmtLevel - 1] = 0;
numCores_[CoreLevel - 1] = 0;
}
}
void setCacheHierarchy()
{
if (!has(tINTEL)) return;
const uint32_t NO_CACHE = 0;
const uint32_t DATA_CACHE = 1;
// const uint32_t INSTRUCTION_CACHE = 2;
const uint32_t UNIFIED_CACHE = 3;
uint32_t smt_width = 0;
uint32_t logical_cores = 0;
uint32_t data[4] = {};
if (x2APIC_supported_) {
smt_width = numCores_[0];
logical_cores = numCores_[1];
}
/*
Assumptions:
the first level of data cache is not shared (which is the
case for every existing architecture) and use this to
determine the SMT width for arch not supporting leaf 11.
when leaf 4 reports a number of core less than numCores_
on socket reported by leaf 11, then it is a correct number
of cores not an upperbound.
*/
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
getCpuidEx(0x4, i, data);
uint32_t cacheType = extractBit(data[0], 0, 4);
if (cacheType == NO_CACHE) break;
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
actual_logical_cores = local::min_(actual_logical_cores, logical_cores);
}
assert(actual_logical_cores != 0);
dataCacheSize_[dataCacheLevels_] =
(extractBit(data[1], 22, 31) + 1)
* (extractBit(data[1], 12, 21) + 1)
* (extractBit(data[1], 0, 11) + 1)
* (data[2] + 1);
if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
assert(smt_width != 0);
coresSharignDataCache_[dataCacheLevels_] = local::max_(actual_logical_cores / smt_width, 1u);
dataCacheLevels_++;
}
}
}
public:
int model;
int family;
int stepping;
int extModel;
int extFamily;
int displayFamily; // family + extFamily
int displayModel; // model + extModel
uint32_t getNumCores(IntelCpuTopologyLevel level) const {
if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
switch (level) {
case SmtLevel: return numCores_[level - 1];
case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1];
default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
}
}
uint32_t getDataCacheLevels() const { return dataCacheLevels_; }
uint32_t getCoresSharingDataCache(uint32_t i) const
{
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
return coresSharignDataCache_[i];
}
uint32_t getDataCacheSize(uint32_t i) const
{
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
return dataCacheSize_[i];
}
/*
data[] = { eax, ebx, ecx, edx }
*/
static inline void getCpuid(uint32_t eaxIn, uint32_t data[4])
{
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _WIN32
__cpuid(reinterpret_cast<int*>(data), eaxIn);
#else
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
#endif
#else
(void)eaxIn;
(void)data;
#endif
}
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
{
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _WIN32
__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
#else
__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
#endif
#else
(void)eaxIn;
(void)ecxIn;
(void)data;
#endif
}
static inline uint64_t getXfeature()
{
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
return _xgetbv(0);
#else
uint32_t eax, edx;
// xgetvb is not support on gcc 4.2
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64_t)edx << 32) | eax;
#endif
#else
return 0;
#endif
}
#define XBYAK_SPLIT_ID(id) ((0 <= id && id < 64) ? (1ull << (id % 64)) : 0), (id >= 64 ? (1ull << (id % 64)) : 0)
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && (_MSC_VER >= 1700)) /* VS2012 */
#define XBYAK_DEFINE_TYPE(id, NAME) static const constexpr local::TypeT<XBYAK_SPLIT_ID(id)> NAME{}
#else
#define XBYAK_DEFINE_TYPE(id, NAME) static const local::TypeT<XBYAK_SPLIT_ID(id)> NAME
#endif
XBYAK_DEFINE_TYPE(0, tMMX);
XBYAK_DEFINE_TYPE(1, tMMX2);
XBYAK_DEFINE_TYPE(2, tCMOV);
XBYAK_DEFINE_TYPE(3, tSSE);
XBYAK_DEFINE_TYPE(4, tSSE2);
XBYAK_DEFINE_TYPE(5, tSSE3);
XBYAK_DEFINE_TYPE(6, tSSSE3);
XBYAK_DEFINE_TYPE(7, tSSE41);
XBYAK_DEFINE_TYPE(8, tSSE42);
XBYAK_DEFINE_TYPE(9, tPOPCNT);
XBYAK_DEFINE_TYPE(10, tAESNI);
XBYAK_DEFINE_TYPE(11, tAVX512_FP16);
XBYAK_DEFINE_TYPE(12, tOSXSAVE);
XBYAK_DEFINE_TYPE(13, tPCLMULQDQ);
XBYAK_DEFINE_TYPE(14, tAVX);
XBYAK_DEFINE_TYPE(15, tFMA);
XBYAK_DEFINE_TYPE(16, t3DN);
XBYAK_DEFINE_TYPE(17, tE3DN);
XBYAK_DEFINE_TYPE(18, tWAITPKG);
XBYAK_DEFINE_TYPE(19, tRDTSCP);
XBYAK_DEFINE_TYPE(20, tAVX2);
XBYAK_DEFINE_TYPE(21, tBMI1); // andn, bextr, blsi, blsmsk, blsr, tzcnt
XBYAK_DEFINE_TYPE(22, tBMI2); // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
XBYAK_DEFINE_TYPE(23, tLZCNT);
XBYAK_DEFINE_TYPE(24, tINTEL);
XBYAK_DEFINE_TYPE(25, tAMD);
XBYAK_DEFINE_TYPE(26, tENHANCED_REP); // enhanced rep movsb/stosb
XBYAK_DEFINE_TYPE(27, tRDRAND);
XBYAK_DEFINE_TYPE(28, tADX); // adcx, adox
XBYAK_DEFINE_TYPE(29, tRDSEED); // rdseed
XBYAK_DEFINE_TYPE(30, tSMAP); // stac
XBYAK_DEFINE_TYPE(31, tHLE); // xacquire, xrelease, xtest
XBYAK_DEFINE_TYPE(32, tRTM); // xbegin, xend, xabort
XBYAK_DEFINE_TYPE(33, tF16C); // vcvtph2ps, vcvtps2ph
XBYAK_DEFINE_TYPE(34, tMOVBE); // mobve
XBYAK_DEFINE_TYPE(35, tAVX512F);
XBYAK_DEFINE_TYPE(36, tAVX512DQ);
XBYAK_DEFINE_TYPE(37, tAVX512_IFMA);
XBYAK_DEFINE_TYPE(37, tAVX512IFMA);// = tAVX512_IFMA;
XBYAK_DEFINE_TYPE(38, tAVX512PF);
XBYAK_DEFINE_TYPE(39, tAVX512ER);
XBYAK_DEFINE_TYPE(40, tAVX512CD);
XBYAK_DEFINE_TYPE(41, tAVX512BW);
XBYAK_DEFINE_TYPE(42, tAVX512VL);
XBYAK_DEFINE_TYPE(43, tAVX512_VBMI);
XBYAK_DEFINE_TYPE(43, tAVX512VBMI); // = tAVX512_VBMI; // changed by Intel's manual
XBYAK_DEFINE_TYPE(44, tAVX512_4VNNIW);
XBYAK_DEFINE_TYPE(45, tAVX512_4FMAPS);
XBYAK_DEFINE_TYPE(46, tPREFETCHWT1);
XBYAK_DEFINE_TYPE(47, tPREFETCHW);
XBYAK_DEFINE_TYPE(48, tSHA);
XBYAK_DEFINE_TYPE(49, tMPX);
XBYAK_DEFINE_TYPE(50, tAVX512_VBMI2);
XBYAK_DEFINE_TYPE(51, tGFNI);
XBYAK_DEFINE_TYPE(52, tVAES);
XBYAK_DEFINE_TYPE(53, tVPCLMULQDQ);
XBYAK_DEFINE_TYPE(54, tAVX512_VNNI);
XBYAK_DEFINE_TYPE(55, tAVX512_BITALG);
XBYAK_DEFINE_TYPE(56, tAVX512_VPOPCNTDQ);
XBYAK_DEFINE_TYPE(57, tAVX512_BF16);
XBYAK_DEFINE_TYPE(58, tAVX512_VP2INTERSECT);
XBYAK_DEFINE_TYPE(59, tAMX_TILE);
XBYAK_DEFINE_TYPE(60, tAMX_INT8);
XBYAK_DEFINE_TYPE(61, tAMX_BF16);
XBYAK_DEFINE_TYPE(62, tAVX_VNNI);
XBYAK_DEFINE_TYPE(63, tCLFLUSHOPT);
XBYAK_DEFINE_TYPE(64, tCLDEMOTE);
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
XBYAK_DEFINE_TYPE(68, tAMX_FP16);
XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8);
XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT);
XBYAK_DEFINE_TYPE(71, tAVX_IFMA);
#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
Cpu()
: type_()
, x2APIC_supported_(false)
, numCores_()
, dataCacheSize_()
, coresSharignDataCache_()
, dataCacheLevels_(0)
{
uint32_t data[4] = {};
const uint32_t& EAX = data[0];
const uint32_t& EBX = data[1];
const uint32_t& ECX = data[2];
const uint32_t& EDX = data[3];
getCpuid(0, data);
const uint32_t maxNum = EAX;
static const char intel[] = "ntel";
static const char amd[] = "cAMD";
if (ECX == get32bitAsBE(amd)) {
type_ |= tAMD;
getCpuid(0x80000001, data);
if (EDX & (1U << 31)) {
type_ |= t3DN;
// 3DNow! implies support for PREFETCHW on AMD
type_ |= tPREFETCHW;
}
if (EDX & (1U << 29)) {
// Long mode implies support for PREFETCHW on AMD
type_ |= tPREFETCHW;
}
}
if (ECX == get32bitAsBE(intel)) {
type_ |= tINTEL;
}
// Extended flags information
getCpuid(0x80000000, data);
const uint32_t maxExtendedNum = EAX;
if (maxExtendedNum >= 0x80000001) {
getCpuid(0x80000001, data);
if (EDX & (1U << 31)) type_ |= t3DN;
if (EDX & (1U << 30)) type_ |= tE3DN;
if (EDX & (1U << 27)) type_ |= tRDTSCP;
if (EDX & (1U << 22)) type_ |= tMMX2;
if (EDX & (1U << 15)) type_ |= tCMOV;
if (ECX & (1U << 5)) type_ |= tLZCNT;
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
}
if (maxExtendedNum >= 0x80000008) {
getCpuid(0x80000008, data);
if (EBX & (1U << 0)) type_ |= tCLZERO;
}
getCpuid(1, data);
if (ECX & (1U << 0)) type_ |= tSSE3;
if (ECX & (1U << 9)) type_ |= tSSSE3;
if (ECX & (1U << 19)) type_ |= tSSE41;
if (ECX & (1U << 20)) type_ |= tSSE42;
if (ECX & (1U << 22)) type_ |= tMOVBE;
if (ECX & (1U << 23)) type_ |= tPOPCNT;
if (ECX & (1U << 25)) type_ |= tAESNI;
if (ECX & (1U << 1)) type_ |= tPCLMULQDQ;
if (ECX & (1U << 27)) type_ |= tOSXSAVE;
if (ECX & (1U << 30)) type_ |= tRDRAND;
if (ECX & (1U << 29)) type_ |= tF16C;
if (EDX & (1U << 15)) type_ |= tCMOV;
if (EDX & (1U << 23)) type_ |= tMMX;
if (EDX & (1U << 25)) type_ |= tMMX2 | tSSE;
if (EDX & (1U << 26)) type_ |= tSSE2;
if (type_ & tOSXSAVE) {
// check XFEATURE_ENABLED_MASK[2:1] = '11b'
uint64_t bv = getXfeature();
if ((bv & 6) == 6) {
if (ECX & (1U << 28)) type_ |= tAVX;
if (ECX & (1U << 12)) type_ |= tFMA;
// do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support
#if !defined(__APPLE__)
if (((bv >> 5) & 7) == 7)
#endif
{
getCpuidEx(7, 0, data);
if (EBX & (1U << 16)) type_ |= tAVX512F;
if (type_ & tAVX512F) {
if (EBX & (1U << 17)) type_ |= tAVX512DQ;
if (EBX & (1U << 21)) type_ |= tAVX512_IFMA;
if (EBX & (1U << 26)) type_ |= tAVX512PF;
if (EBX & (1U << 27)) type_ |= tAVX512ER;
if (EBX & (1U << 28)) type_ |= tAVX512CD;
if (EBX & (1U << 30)) type_ |= tAVX512BW;
if (EBX & (1U << 31)) type_ |= tAVX512VL;
if (ECX & (1U << 1)) type_ |= tAVX512_VBMI;
if (ECX & (1U << 6)) type_ |= tAVX512_VBMI2;
if (ECX & (1U << 11)) type_ |= tAVX512_VNNI;
if (ECX & (1U << 12)) type_ |= tAVX512_BITALG;
if (ECX & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ;
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
if ((type_ & tAVX512BW) && (EDX & (1U << 23))) type_ |= tAVX512_FP16;
}
}
}
}
if (maxNum >= 7) {
getCpuidEx(7, 0, data);
const uint32_t maxNumSubLeaves = EAX;
if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2;
if (EBX & (1U << 3)) type_ |= tBMI1;
if (EBX & (1U << 8)) type_ |= tBMI2;
if (EBX & (1U << 9)) type_ |= tENHANCED_REP;
if (EBX & (1U << 18)) type_ |= tRDSEED;
if (EBX & (1U << 19)) type_ |= tADX;
if (EBX & (1U << 20)) type_ |= tSMAP;
if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT;
if (EBX & (1U << 4)) type_ |= tHLE;
if (EBX & (1U << 11)) type_ |= tRTM;
if (EBX & (1U << 14)) type_ |= tMPX;
if (EBX & (1U << 29)) type_ |= tSHA;
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
if (ECX & (1U << 5)) type_ |= tWAITPKG;
if (ECX & (1U << 8)) type_ |= tGFNI;
if (ECX & (1U << 9)) type_ |= tVAES;
if (ECX & (1U << 10)) type_ |= tVPCLMULQDQ;
if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
if (ECX & (1U << 27)) type_ |= tMOVDIRI;
if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
if (maxNumSubLeaves >= 1) {
getCpuidEx(7, 1, data);
if (EAX & (1U << 4)) type_ |= tAVX_VNNI;
if (type_ & tAVX512F) {
if (EAX & (1U << 5)) type_ |= tAVX512_BF16;
}
if (EAX & (1U << 21)) type_ |= tAMX_FP16;
if (EAX & (1U << 23)) type_ |= tAVX_IFMA;
if (EDX & (1U << 4)) type_ |= tAVX_VNNI_INT8;
if (EDX & (1U << 5)) type_ |= tAVX_NE_CONVERT;
}
}
setFamily();
setNumCores();
setCacheHierarchy();
}
void putFamily() const
{
#ifndef XBYAK_ONLY_CLASS_CPU
printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n",
family, model, stepping, extFamily, extModel);
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
#endif
}
bool has(const Type& type) const
{
return (type & type_) == type;
}
};
#ifndef XBYAK_ONLY_CLASS_CPU
class Clock {
public:
static inline uint64_t getRdtsc()
{
#ifdef XBYAK_INTEL_CPU_SPECIFIC
#ifdef _MSC_VER
return __rdtsc();
#else
uint32_t eax, edx;
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
return ((uint64_t)edx << 32) | eax;
#endif
#else
// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
return 0;
#endif
}
Clock()
: clock_(0)
, count_(0)
{
}
void begin()
{
clock_ -= getRdtsc();
}
void end()
{
clock_ += getRdtsc();
count_++;
}
int getCount() const { return count_; }
uint64_t getClock() const { return clock_; }
void clear() { count_ = 0; clock_ = 0; }
private:
uint64_t clock_;
int count_;
};
#ifdef XBYAK64
const int UseRCX = 1 << 6;
const int UseRDX = 1 << 7;
class Pack {
static const size_t maxTblNum = 15;
Xbyak::Reg64 tbl_[maxTblNum];
size_t n_;
public:
Pack() : tbl_(), n_(0) {}
Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); }
Pack(const Pack& rhs)
: n_(rhs.n_)
{
for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
}
Pack& operator=(const Pack& rhs)
{
n_ = rhs.n_;
for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i];
return *this;
}
Pack(const Xbyak::Reg64& t0)
{ n_ = 1; tbl_[0] = t0; }
Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 2; tbl_[0] = t0; tbl_[1] = t1; }
Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 3; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; }
Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 4; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; }
Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 5; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; }
Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 6; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; }
Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 7; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; }
Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 8; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; }
Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 9; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; }
Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 10; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; }
Pack(const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 11; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; }
Pack(const Xbyak::Reg64& tb, const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
{ n_ = 12; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; tbl_[11] = tb; }
Pack& append(const Xbyak::Reg64& t)
{
if (n_ == maxTblNum) {
fprintf(stderr, "ERR Pack::can't append\n");
XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
}
tbl_[n_++] = t;
return *this;
}
void init(const Xbyak::Reg64 *tbl, size_t n)
{
if (n > maxTblNum) {
fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n);
XBYAK_THROW(ERR_BAD_PARAMETER)
}
n_ = n;
for (size_t i = 0; i < n; i++) {
tbl_[i] = tbl[i];
}
}
const Xbyak::Reg64& operator[](size_t n) const
{
if (n >= n_) {
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
}
return tbl_[n];
}
size_t size() const { return n_; }
/*
get tbl[pos, pos + num)
*/
Pack sub(size_t pos, size_t num = size_t(-1)) const
{
if (num == size_t(-1)) num = n_ - pos;
if (pos + num > n_) {
fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num);
XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack())
}
Pack pack;
pack.n_ = num;
for (size_t i = 0; i < num; i++) {
pack.tbl_[i] = tbl_[pos + i];
}
return pack;
}
void put() const
{
for (size_t i = 0; i < n_; i++) {
printf("%s ", tbl_[i].toString());
}
printf("\n");
}
};
class StackFrame {
#ifdef XBYAK64_WIN
static const int noSaveNum = 6;
static const int rcxPos = 0;
static const int rdxPos = 1;
#else
static const int noSaveNum = 8;
static const int rcxPos = 3;
static const int rdxPos = 2;
#endif
static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
Xbyak::CodeGenerator *code_;
int pNum_;
int tNum_;
bool useRcx_;
bool useRdx_;
int saveNum_;
int P_;
bool makeEpilog_;
Xbyak::Reg64 pTbl_[4];
Xbyak::Reg64 tTbl_[maxRegNum];
Pack p_;
Pack t_;
StackFrame(const StackFrame&);
void operator=(const StackFrame&);
public:
const Pack& p;
const Pack& t;
/*
make stack frame
@param sf [in] this
@param pNum [in] num of function parameter(0 <= pNum <= 4)
@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
@param stackSizeByte [in] local stack size
@param makeEpilog [in] automatically call close() if true
you can use
rax
gp0, ..., gp(pNum - 1)
gt0, ..., gt(tNum-1)
rcx if tNum & UseRCX
rdx if tNum & UseRDX
rsp[0..stackSizeByte - 1]
*/
StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true)
: code_(code)
, pNum_(pNum)
, tNum_(tNum & ~(UseRCX | UseRDX))
, useRcx_((tNum & UseRCX) != 0)
, useRdx_((tNum & UseRDX) != 0)
, saveNum_(0)
, P_(0)
, makeEpilog_(makeEpilog)
, p(p_)
, t(t_)
{
using namespace Xbyak;
if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM)
const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM)
const Reg64& _rsp = code->rsp;
saveNum_ = local::max_(0, allRegNum - noSaveNum);
const int *tbl = getOrderTbl() + noSaveNum;
for (int i = 0; i < saveNum_; i++) {
code->push(Reg64(tbl[i]));
}
P_ = (stackSizeByte + 7) / 8;
if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
P_ *= 8;
if (P_ > 0) code->sub(_rsp, P_);
int pos = 0;
for (int i = 0; i < pNum; i++) {
pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
}
for (int i = 0; i < tNum_; i++) {
tTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
}
if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx);
if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx);
p_.init(pTbl_, pNum);
t_.init(tTbl_, tNum_);
}
/*
make epilog manually
@param callRet [in] call ret() if true
*/
void close(bool callRet = true)
{
using namespace Xbyak;
const Reg64& _rsp = code_->rsp;
const int *tbl = getOrderTbl() + noSaveNum;
if (P_ > 0) code_->add(_rsp, P_);
for (int i = 0; i < saveNum_; i++) {
code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
}
if (callRet) code_->ret();
}
~StackFrame()
{
if (!makeEpilog_) return;
close();
}
private:
const int *getOrderTbl() const
{
using namespace Xbyak;
static const int tbl[] = {
#ifdef XBYAK64_WIN
Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI,
#else
Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11,
#endif
Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15
};
return &tbl[0];
}
int getRegIdx(int& pos) const
{
assert(pos < maxRegNum);
using namespace Xbyak;
const int *tbl = getOrderTbl();
int r = tbl[pos++];
if (useRcx_) {
if (r == Operand::RCX) { return Operand::R10; }
if (r == Operand::R10) { r = tbl[pos++]; }
}
if (useRdx_) {
if (r == Operand::RDX) { return Operand::R11; }
if (r == Operand::R11) { return tbl[pos++]; }
}
return r;
}
};
#endif
class Profiler {
int mode_;
const char *suffix_;
const void *startAddr_;
#ifdef XBYAK_USE_PERF
FILE *fp_;
#endif
public:
enum {
None = 0,
Perf = 1,
VTune = 2
};
Profiler()
: mode_(None)
, suffix_("")
, startAddr_(0)
#ifdef XBYAK_USE_PERF
, fp_(0)
#endif
{
}
// append suffix to funcName
void setNameSuffix(const char *suffix)
{
suffix_ = suffix;
}
void setStartAddr(const void *startAddr)
{
startAddr_ = startAddr;
}
void init(int mode)
{
mode_ = None;
switch (mode) {
default:
case None:
return;
case Perf:
#ifdef XBYAK_USE_PERF
close();
{
const int pid = getpid();
char name[128];
snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid);
fp_ = fopen(name, "a+");
if (fp_ == 0) {
fprintf(stderr, "can't open %s\n", name);
return;
}
}
mode_ = Perf;
#endif
return;
case VTune:
#ifdef XBYAK_USE_VTUNE
dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling
if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) {
fprintf(stderr, "VTune profiling is not active\n");
return;
}
mode_ = VTune;
#endif
return;
}
}
~Profiler()
{
close();
}
void close()
{
#ifdef XBYAK_USE_PERF
if (fp_ == 0) return;
fclose(fp_);
fp_ = 0;
#endif
}
void set(const char *funcName, const void *startAddr, size_t funcSize) const
{
if (mode_ == None) return;
#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE)
(void)funcName;
(void)startAddr;
(void)funcSize;
#endif
#ifdef XBYAK_USE_PERF
if (mode_ == Perf) {
if (fp_ == 0) return;
fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_);
/*
perf does not recognize the function name which is less than 3,
so append '_' at the end of the name if necessary
*/
size_t n = strlen(funcName) + strlen(suffix_);
for (size_t i = n; i < 3; i++) {
fprintf(fp_, "_");
}
fprintf(fp_, "\n");
fflush(fp_);
}
#endif
#ifdef XBYAK_USE_VTUNE
if (mode_ != VTune) return;
char className[] = "";
char fileName[] = "";
iJIT_Method_Load jmethod = {};
jmethod.method_id = iJIT_GetNewMethodID();
jmethod.class_file_name = className;
jmethod.source_file_name = fileName;
jmethod.method_load_address = const_cast<void*>(startAddr);
jmethod.method_size = funcSize;
jmethod.line_number_size = 0;
char buf[128];
snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_);
jmethod.method_name = buf;
iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod);
#endif
}
/*
for continuous set
funcSize = endAddr - <previous set endAddr>
*/
void set(const char *funcName, const void *endAddr)
{
set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_);
startAddr_ = endAddr;
}
};
#endif // XBYAK_ONLY_CLASS_CPU
} } // end of util
#endif

View File

@@ -25,21 +25,20 @@ add_executable(natsukashii
${FRONTEND_HEADERS} ${FRONTEND_HEADERS}
main.cpp main.cpp
common.hpp common.hpp
util.hpp util.hpp)
n64/Scheduler.cpp
n64/Scheduler.hpp)
target_include_directories(natsukashii PRIVATE target_include_directories(natsukashii PRIVATE
. .
n64 n64
n64/core n64/core
n64/core/cpu/ n64/core/interpreter/
n64/core/cpu/registers n64/core/interpreter/registers
n64/core/mmio n64/core/mmio
n64/core/rsp n64/core/rsp
frontend frontend
frontend/imgui frontend/imgui
../external ../external
../external/xbyak
../external/imgui/imgui ../external/imgui/imgui
../external/parallel-rdp/ ../external/parallel-rdp/
../external/nativefiledialog-extended/src/include ../external/nativefiledialog-extended/src/include
@@ -54,8 +53,8 @@ file(REMOVE
${PROJECT_BINARY_DIR}/resources/shader.vert) ${PROJECT_BINARY_DIR}/resources/shader.vert)
if(WIN32) if(WIN32)
add_compile_definitions(NOMINMAX _CRT_SECURE_NO_WARNINGS) target_compile_definitions(natsukashii PUBLIC NOMINMAX _CRT_SECURE_NO_WARNINGS)
add_compile_options(/EHa) target_compile_options(natsukashii PUBLIC /EHa)
endif() endif()
target_link_libraries(natsukashii PRIVATE SDL2::SDL2main SDL2::SDL2 capstone-static nfd parallel-rdp fmt::fmt imgui nlohmann_json::nlohmann_json) target_link_libraries(natsukashii PRIVATE SDL2::SDL2main SDL2::SDL2 capstone-static nfd parallel-rdp fmt::fmt imgui nlohmann_json::nlohmann_json)

View File

@@ -7,11 +7,12 @@
namespace n64 { namespace n64 {
Core::Core() { Core::Core() {
cpu = std::make_unique<Interpreter>();
Stop(); Stop();
} }
void Core::Stop() { void Core::Stop() {
cpu.Reset(); cpu->Reset();
mem.Reset(); mem.Reset();
pause = true; pause = true;
romLoaded = false; romLoaded = false;
@@ -19,13 +20,13 @@ void Core::Stop() {
CartInfo Core::LoadROM(const std::string& rom_) { CartInfo Core::LoadROM(const std::string& rom_) {
rom = rom_; rom = rom_;
cpu.Reset(); cpu->Reset();
mem.Reset(); mem.Reset();
pause = false; pause = false;
romLoaded = true; romLoaded = true;
CartInfo cartInfo = mem.LoadROM(rom); CartInfo cartInfo = mem.LoadROM(rom);
DoPIFHLE(mem, cpu.regs, cartInfo); DoPIFHLE(mem, cpu->regs, cartInfo);
return cartInfo; return cartInfo;
} }
@@ -41,11 +42,11 @@ void Core::Run(Window& window, float volumeL, float volumeR) {
mmio.vi.current = (i << 1) + field; mmio.vi.current = (i << 1) + field;
if ((mmio.vi.current & 0x3FE) == mmio.vi.intr) { if ((mmio.vi.current & 0x3FE) == mmio.vi.intr) {
InterruptRaise(mmio.mi, cpu.regs, Interrupt::VI); InterruptRaise(mmio.mi, cpu->regs, Interrupt::VI);
} }
for(;cycles <= mmio.vi.cyclesPerHalfline; cycles++, frameCycles++) { for(;cycles <= mmio.vi.cyclesPerHalfline; cycles++, frameCycles++) {
cpu.Step(mem); cpu->Step(mem);
cpuSteps++; cpuSteps++;
if(mmio.rsp.spStatus.halt) { if(mmio.rsp.spStatus.halt) {
mmio.rsp.steps = 0; mmio.rsp.steps = 0;
@@ -58,25 +59,25 @@ void Core::Run(Window& window, float volumeL, float volumeR) {
while(mmio.rsp.steps > 0) { while(mmio.rsp.steps > 0) {
mmio.rsp.steps--; mmio.rsp.steps--;
mmio.rsp.Step(cpu.regs, mem); mmio.rsp.Step(cpu->regs, mem);
} }
} }
mmio.ai.Step(mem, cpu.regs, 1, volumeL, volumeR); mmio.ai.Step(mem, cpu->regs, 1, volumeL, volumeR);
scheduler.tick(1, mem, cpu.regs); scheduler.tick(1, mem, cpu->regs);
} }
cycles -= mmio.vi.cyclesPerHalfline; cycles -= mmio.vi.cyclesPerHalfline;
} }
if ((mmio.vi.current & 0x3FE) == mmio.vi.intr) { if ((mmio.vi.current & 0x3FE) == mmio.vi.intr) {
InterruptRaise(mmio.mi, cpu.regs, Interrupt::VI); InterruptRaise(mmio.mi, cpu->regs, Interrupt::VI);
} }
UpdateScreenParallelRdp(*this, window, GetVI()); UpdateScreenParallelRdp(*this, window, GetVI());
int missedCycles = N64_CYCLES_PER_FRAME - frameCycles; int missedCycles = N64_CYCLES_PER_FRAME - frameCycles;
mmio.ai.Step(mem, cpu.regs, missedCycles, volumeL, volumeR); mmio.ai.Step(mem, cpu->regs, missedCycles, volumeL, volumeR);
} else if(pause && romLoaded) { } else if(pause && romLoaded) {
UpdateScreenParallelRdp(*this, window, GetVI()); UpdateScreenParallelRdp(*this, window, GetVI());
} else if(pause && !romLoaded) { } else if(pause && !romLoaded) {

View File

@@ -1,6 +1,6 @@
#pragma once #pragma once
#include <SDL_events.h> #include <SDL_events.h>
#include <Cpu.hpp> #include <Interpreter.hpp>
#include <Mem.hpp> #include <Mem.hpp>
#include <string> #include <string>
@@ -27,6 +27,6 @@ struct Core {
bool done = false; bool done = false;
std::string rom; std::string rom;
Mem mem; Mem mem;
Cpu cpu; std::unique_ptr<BaseCpu> cpu;
}; };
} }

View File

@@ -1,6 +1,6 @@
#include <Scheduler.hpp> #include <Scheduler.hpp>
#include <Mem.hpp> #include <Mem.hpp>
#include <Registers.hpp> #include <core/registers/Registers.hpp>
Scheduler scheduler; Scheduler scheduler;

12
src/n64/core/BaseCpu.hpp Normal file
View File

@@ -0,0 +1,12 @@
#pragma
#include <core/registers/Registers.hpp>
namespace n64 {
struct BaseCpu {
virtual ~BaseCpu() {}
Registers regs;
virtual void Step(Mem& mem) {}
virtual void Reset() {}
};
}

11
src/n64/core/Dynarec.cpp Normal file
View File

@@ -0,0 +1,11 @@
#include <Dynarec.hpp>
namespace n64 {
void Dynarec::Reset() {
}
void Dynarec::Step(Mem &mem) {
}
}

12
src/n64/core/Dynarec.hpp Normal file
View File

@@ -0,0 +1,12 @@
#pragma once
#include <BaseCpu.hpp>
#include <xbyak/xbyak.h>
namespace n64 {
struct Dynarec : BaseCpu {
void Step(Mem& mem) override;
void Reset() override;
private:
Xbyak::CodeGenerator code;
};
}

View File

@@ -1,8 +1,8 @@
#include <n64/core/Cpu.hpp> #include <n64/core/Interpreter.hpp>
#include <util.hpp> #include <util.hpp>
namespace n64 { namespace n64 {
void Cpu::Reset() { void Interpreter::Reset() {
regs.Reset(); regs.Reset();
} }
@@ -25,7 +25,7 @@ inline void CheckCompareInterrupt(MI& mi, Registers& regs) {
} }
} }
inline void Cpu::disassembly(u32 instr) { inline void Interpreter::disassembly(u32 instr) {
size_t count; size_t count;
cs_insn *insn; cs_insn *insn;
@@ -45,7 +45,7 @@ inline void Cpu::disassembly(u32 instr) {
printf("ERROR: Failed to disassemble given code!\n"); printf("ERROR: Failed to disassemble given code!\n");
} }
void Cpu::Step(Mem& mem) { void Interpreter::Step(Mem& mem) {
regs.gpr[0] = 0; regs.gpr[0] = 0;
CheckCompareInterrupt(mem.mmio.mi, regs); CheckCompareInterrupt(mem.mmio.mi, regs);

View File

@@ -1,24 +1,24 @@
#pragma once #pragma once
#include <Registers.hpp> #include <BaseCpu.hpp>
#include <core/registers/Registers.hpp>
#include <Mem.hpp> #include <Mem.hpp>
#include <capstone/capstone.h> #include <capstone/capstone.h>
#include <vector> #include <vector>
namespace n64 { namespace n64 {
struct Cpu { struct Interpreter : BaseCpu {
Cpu() { Interpreter() {
if(cs_open(CS_ARCH_MIPS, CS_MODE_MIPS64, &handle) != CS_ERR_OK) { if(cs_open(CS_ARCH_MIPS, CS_MODE_MIPS64, &handle) != CS_ERR_OK) {
util::panic("Could not initialize capstone!\n"); util::panic("Could not initialize capstone!\n");
} }
Reset(); Reset();
} }
~Cpu() { ~Interpreter() {
cs_close(&handle); cs_close(&handle);
} }
void Reset(); void Reset() override;
void Step(Mem&); void Step(Mem&) override;
Registers regs;
private: private:
csh handle; csh handle;

View File

@@ -1,7 +1,7 @@
#include <n64/core/MMIO.hpp> #include <n64/core/MMIO.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/Mem.hpp> #include <n64/core/Mem.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
namespace n64 { namespace n64 {
MMIO::MMIO () { MMIO::MMIO () {

View File

@@ -1,8 +1,8 @@
#include <Mem.hpp> #include <Mem.hpp>
#include <fstream> #include <fstream>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <n64/core/cpu/registers/Cop0.hpp> #include <core/registers/Cop0.hpp>
#include <n64/core/Cpu.hpp> #include <n64/core/Interpreter.hpp>
namespace n64 { namespace n64 {
Mem::Mem() { Mem::Mem() {

View File

@@ -1,5 +1,5 @@
#include <n64/core/cpu/registers/Cop0.hpp> #include <core/registers/Cop0.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <util.hpp> #include <util.hpp>
namespace n64 { namespace n64 {

View File

@@ -1,5 +1,5 @@
#include <n64/core/cpu/registers/Cop1.hpp> #include <core/registers/Cop1.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <n64/core/Mem.hpp> #include <n64/core/Mem.hpp>
#include <cmath> #include <cmath>
#include <cfenv> #include <cfenv>

View File

@@ -1,8 +1,8 @@
#include <n64/core/Cpu.hpp> #include <n64/core/Interpreter.hpp>
#include <util.hpp> #include <util.hpp>
namespace n64 { namespace n64 {
void Cpu::special(Mem& mem, u32 instr) { void Interpreter::special(Mem& mem, u32 instr) {
u8 mask = (instr & 0x3F); u8 mask = (instr & 0x3F);
// 00rr_rccc // 00rr_rccc
switch (mask) { // TODO: named constants for clearer code switch (mask) { // TODO: named constants for clearer code
@@ -67,7 +67,7 @@ void Cpu::special(Mem& mem, u32 instr) {
} }
} }
void Cpu::regimm(u32 instr) { void Interpreter::regimm(u32 instr) {
u8 mask = ((instr >> 16) & 0x1F); u8 mask = ((instr >> 16) & 0x1F);
// 000r_rccc // 000r_rccc
switch (mask) { // TODO: named constants for clearer code switch (mask) { // TODO: named constants for clearer code
@@ -90,7 +90,7 @@ void Cpu::regimm(u32 instr) {
} }
} }
void Cpu::Exec(Mem& mem, u32 instr) { void Interpreter::Exec(Mem& mem, u32 instr) {
u8 mask = (instr >> 26) & 0x3f; u8 mask = (instr >> 26) & 0x3f;
// 00rr_rccc // 00rr_rccc
switch(mask) { // TODO: named constants for clearer code switch(mask) { // TODO: named constants for clearer code

View File

@@ -1,4 +1,4 @@
#include <n64/core/Cpu.hpp> #include <n64/core/Interpreter.hpp>
#include <util.hpp> #include <util.hpp>
#define se_imm(x) ((s16)((x) & 0xFFFF)) #define se_imm(x) ((s16)((x) & 0xFFFF))
@@ -7,7 +7,7 @@
#define check_signed_underflow(op1, op2, res) (((((op1) ^ (op2)) & ((op1) ^ (res))) >> ((sizeof(res) * 8) - 1)) & 1) #define check_signed_underflow(op1, op2, res) (((((op1) ^ (op2)) & ((op1) ^ (res))) >> ((sizeof(res) * 8) - 1)) & 1)
namespace n64 { namespace n64 {
void Cpu::add(u32 instr) { void Interpreter::add(u32 instr) {
u32 rs = (s32)regs.gpr[RS(instr)]; u32 rs = (s32)regs.gpr[RS(instr)];
u32 rt = (s32)regs.gpr[RT(instr)]; u32 rt = (s32)regs.gpr[RT(instr)];
u32 result = rs + rt; u32 result = rs + rt;
@@ -18,14 +18,14 @@ void Cpu::add(u32 instr) {
} }
} }
void Cpu::addu(u32 instr) { void Interpreter::addu(u32 instr) {
s32 rs = (s32)regs.gpr[RS(instr)]; s32 rs = (s32)regs.gpr[RS(instr)];
s32 rt = (s32)regs.gpr[RT(instr)]; s32 rt = (s32)regs.gpr[RT(instr)];
s32 result = rs + rt; s32 result = rs + rt;
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::addi(u32 instr) { void Interpreter::addi(u32 instr) {
u32 rs = regs.gpr[RS(instr)]; u32 rs = regs.gpr[RS(instr)];
u32 imm = s32(s16(instr)); u32 imm = s32(s16(instr));
u32 result = rs + imm; u32 result = rs + imm;
@@ -36,14 +36,14 @@ void Cpu::addi(u32 instr) {
} }
} }
void Cpu::addiu(u32 instr) { void Interpreter::addiu(u32 instr) {
s32 rs = (s32)regs.gpr[RS(instr)]; s32 rs = (s32)regs.gpr[RS(instr)];
s16 imm = (s16)(instr); s16 imm = (s16)(instr);
s32 result = rs + imm; s32 result = rs + imm;
regs.gpr[RT(instr)] = result; regs.gpr[RT(instr)] = result;
} }
void Cpu::dadd(u32 instr) { void Interpreter::dadd(u32 instr) {
u64 rs = regs.gpr[RS(instr)]; u64 rs = regs.gpr[RS(instr)];
u64 rt = regs.gpr[RT(instr)]; u64 rt = regs.gpr[RT(instr)];
u64 result = rt + rs; u64 result = rt + rs;
@@ -54,13 +54,13 @@ void Cpu::dadd(u32 instr) {
} }
} }
void Cpu::daddu(u32 instr) { void Interpreter::daddu(u32 instr) {
s64 rs = regs.gpr[RS(instr)]; s64 rs = regs.gpr[RS(instr)];
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
regs.gpr[RD(instr)] = rs + rt; regs.gpr[RD(instr)] = rs + rt;
} }
void Cpu::daddi(u32 instr) { void Interpreter::daddi(u32 instr) {
u64 imm = s64(s16(instr)); u64 imm = s64(s16(instr));
u64 rs = regs.gpr[RS(instr)]; u64 rs = regs.gpr[RS(instr)];
u64 result = imm + rs; u64 result = imm + rs;
@@ -71,13 +71,13 @@ void Cpu::daddi(u32 instr) {
} }
} }
void Cpu::daddiu(u32 instr) { void Interpreter::daddiu(u32 instr) {
s16 imm = (s16)(instr); s16 imm = (s16)(instr);
s64 rs = regs.gpr[RS(instr)]; s64 rs = regs.gpr[RS(instr)];
regs.gpr[RT(instr)] = rs + imm; regs.gpr[RT(instr)] = rs + imm;
} }
void Cpu::div(u32 instr) { void Interpreter::div(u32 instr) {
s64 dividend = (s32)regs.gpr[RS(instr)]; s64 dividend = (s32)regs.gpr[RS(instr)];
s64 divisor = (s32)regs.gpr[RT(instr)]; s64 divisor = (s32)regs.gpr[RT(instr)];
@@ -96,7 +96,7 @@ void Cpu::div(u32 instr) {
} }
} }
void Cpu::divu(u32 instr) { void Interpreter::divu(u32 instr) {
u32 dividend = regs.gpr[RS(instr)]; u32 dividend = regs.gpr[RS(instr)];
u32 divisor = regs.gpr[RT(instr)]; u32 divisor = regs.gpr[RT(instr)];
if(divisor == 0) { if(divisor == 0) {
@@ -110,7 +110,7 @@ void Cpu::divu(u32 instr) {
} }
} }
void Cpu::ddiv(u32 instr) { void Interpreter::ddiv(u32 instr) {
s64 dividend = regs.gpr[RS(instr)]; s64 dividend = regs.gpr[RS(instr)];
s64 divisor = regs.gpr[RT(instr)]; s64 divisor = regs.gpr[RT(instr)];
if (dividend == 0x8000000000000000 && divisor == 0xFFFFFFFFFFFFFFFF) { if (dividend == 0x8000000000000000 && divisor == 0xFFFFFFFFFFFFFFFF) {
@@ -131,7 +131,7 @@ void Cpu::ddiv(u32 instr) {
} }
} }
void Cpu::ddivu(u32 instr) { void Interpreter::ddivu(u32 instr) {
u64 dividend = regs.gpr[RS(instr)]; u64 dividend = regs.gpr[RS(instr)];
u64 divisor = regs.gpr[RT(instr)]; u64 divisor = regs.gpr[RT(instr)];
if(divisor == 0) { if(divisor == 0) {
@@ -145,14 +145,14 @@ void Cpu::ddivu(u32 instr) {
} }
} }
void Cpu::branch(bool cond, s64 address) { void Interpreter::branch(bool cond, s64 address) {
regs.delaySlot = true; regs.delaySlot = true;
if (cond) { if (cond) {
regs.nextPC = address; regs.nextPC = address;
} }
} }
void Cpu::branch_likely(bool cond, s64 address) { void Interpreter::branch_likely(bool cond, s64 address) {
regs.delaySlot = true; regs.delaySlot = true;
if (cond) { if (cond) {
regs.nextPC = address; regs.nextPC = address;
@@ -161,44 +161,44 @@ void Cpu::branch_likely(bool cond, s64 address) {
} }
} }
void Cpu::b(u32 instr, bool cond) { void Interpreter::b(u32 instr, bool cond) {
s64 offset = (s64)se_imm(instr) << 2; s64 offset = (s64)se_imm(instr) << 2;
s64 address = regs.pc + offset; s64 address = regs.pc + offset;
branch(cond, address); branch(cond, address);
} }
void Cpu::blink(u32 instr, bool cond) { void Interpreter::blink(u32 instr, bool cond) {
regs.gpr[31] = regs.nextPC; regs.gpr[31] = regs.nextPC;
s64 offset = (s64)se_imm(instr) << 2; s64 offset = (s64)se_imm(instr) << 2;
s64 address = regs.pc + offset; s64 address = regs.pc + offset;
branch(cond, address); branch(cond, address);
} }
void Cpu::bl(u32 instr, bool cond) { void Interpreter::bl(u32 instr, bool cond) {
s64 offset = (s64)se_imm(instr) << 2; s64 offset = (s64)se_imm(instr) << 2;
s64 address = regs.pc + offset; s64 address = regs.pc + offset;
branch_likely(cond, address); branch_likely(cond, address);
} }
void Cpu::bllink(u32 instr, bool cond) { void Interpreter::bllink(u32 instr, bool cond) {
regs.gpr[31] = regs.nextPC; regs.gpr[31] = regs.nextPC;
s64 offset = (s64)se_imm(instr) << 2; s64 offset = (s64)se_imm(instr) << 2;
s64 address = regs.pc + offset; s64 address = regs.pc + offset;
branch_likely(cond, address); branch_likely(cond, address);
} }
void Cpu::lui(u32 instr) { void Interpreter::lui(u32 instr) {
s64 val = (s16)instr; s64 val = (s16)instr;
val <<= 16; val <<= 16;
regs.gpr[RT(instr)] = val; regs.gpr[RT(instr)] = val;
} }
void Cpu::lb(Mem& mem, u32 instr) { void Interpreter::lb(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
regs.gpr[RT(instr)] = (s8)mem.Read8(regs, address, regs.oldPC); regs.gpr[RT(instr)] = (s8)mem.Read8(regs, address, regs.oldPC);
} }
void Cpu::lh(Mem& mem, u32 instr) { void Interpreter::lh(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b1)) { if (check_address_error(address, 0b1)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -209,7 +209,7 @@ void Cpu::lh(Mem& mem, u32 instr) {
regs.gpr[RT(instr)] = (s16)mem.Read16(regs, address, regs.oldPC); regs.gpr[RT(instr)] = (s16)mem.Read16(regs, address, regs.oldPC);
} }
void Cpu::lw(Mem& mem, u32 instr) { void Interpreter::lw(Mem& mem, u32 instr) {
s16 offset = instr; s16 offset = instr;
u64 address = regs.gpr[RS(instr)] + offset; u64 address = regs.gpr[RS(instr)] + offset;
if (check_address_error(address, 0b11)) { if (check_address_error(address, 0b11)) {
@@ -227,7 +227,7 @@ void Cpu::lw(Mem& mem, u32 instr) {
} }
} }
void Cpu::ll(Mem& mem, u32 instr) { void Interpreter::ll(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 physical; u32 physical;
if (!MapVAddr(regs, LOAD, address, physical)) { if (!MapVAddr(regs, LOAD, address, physical)) {
@@ -241,7 +241,7 @@ void Cpu::ll(Mem& mem, u32 instr) {
regs.cop0.LLAddr = physical >> 4; regs.cop0.LLAddr = physical >> 4;
} }
void Cpu::lwl(Mem& mem, u32 instr) { void Interpreter::lwl(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr = 0; u32 paddr = 0;
if(!MapVAddr(regs, LOAD, address, paddr)) { if(!MapVAddr(regs, LOAD, address, paddr)) {
@@ -256,7 +256,7 @@ void Cpu::lwl(Mem& mem, u32 instr) {
} }
} }
void Cpu::lwr(Mem& mem, u32 instr) { void Interpreter::lwr(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr = 0; u32 paddr = 0;
if(!MapVAddr(regs, LOAD, address, paddr)) { if(!MapVAddr(regs, LOAD, address, paddr)) {
@@ -271,7 +271,7 @@ void Cpu::lwr(Mem& mem, u32 instr) {
} }
} }
void Cpu::ld(Mem& mem, u32 instr) { void Interpreter::ld(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b111)) { if (check_address_error(address, 0b111)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -283,7 +283,7 @@ void Cpu::ld(Mem& mem, u32 instr) {
regs.gpr[RT(instr)] = value; regs.gpr[RT(instr)] = value;
} }
void Cpu::lld(Mem& mem, u32 instr) { void Interpreter::lld(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr; u32 paddr;
if (!MapVAddr(regs, LOAD, address, paddr)) { if (!MapVAddr(regs, LOAD, address, paddr)) {
@@ -297,7 +297,7 @@ void Cpu::lld(Mem& mem, u32 instr) {
regs.cop0.LLAddr = paddr >> 4; regs.cop0.LLAddr = paddr >> 4;
} }
void Cpu::ldl(Mem& mem, u32 instr) { void Interpreter::ldl(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr = 0; u32 paddr = 0;
if (!MapVAddr(regs, LOAD, address, paddr)) { if (!MapVAddr(regs, LOAD, address, paddr)) {
@@ -312,7 +312,7 @@ void Cpu::ldl(Mem& mem, u32 instr) {
} }
} }
void Cpu::ldr(Mem& mem, u32 instr) { void Interpreter::ldr(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr; u32 paddr;
if (!MapVAddr(regs, LOAD, address, paddr)) { if (!MapVAddr(regs, LOAD, address, paddr)) {
@@ -327,13 +327,13 @@ void Cpu::ldr(Mem& mem, u32 instr) {
} }
} }
void Cpu::lbu(Mem& mem, u32 instr) { void Interpreter::lbu(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u8 value = mem.Read8(regs, address, regs.oldPC); u8 value = mem.Read8(regs, address, regs.oldPC);
regs.gpr[RT(instr)] = value; regs.gpr[RT(instr)] = value;
} }
void Cpu::lhu(Mem& mem, u32 instr) { void Interpreter::lhu(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b1)) { if (check_address_error(address, 0b1)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -345,7 +345,7 @@ void Cpu::lhu(Mem& mem, u32 instr) {
regs.gpr[RT(instr)] = value; regs.gpr[RT(instr)] = value;
} }
void Cpu::lwu(Mem& mem, u32 instr) { void Interpreter::lwu(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b11)) { if (check_address_error(address, 0b11)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -357,12 +357,12 @@ void Cpu::lwu(Mem& mem, u32 instr) {
regs.gpr[RT(instr)] = value; regs.gpr[RT(instr)] = value;
} }
void Cpu::sb(Mem& mem, u32 instr) { void Interpreter::sb(Mem& mem, u32 instr) {
u32 address = regs.gpr[RS(instr)] + (s16)instr; u32 address = regs.gpr[RS(instr)] + (s16)instr;
mem.Write8(regs, address, regs.gpr[RT(instr)], regs.oldPC); mem.Write8(regs, address, regs.gpr[RT(instr)], regs.oldPC);
} }
void Cpu::sc(Mem& mem, u32 instr) { void Interpreter::sc(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b11)) { if (check_address_error(address, 0b11)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -377,7 +377,7 @@ void Cpu::sc(Mem& mem, u32 instr) {
regs.cop0.llbit = false; regs.cop0.llbit = false;
} }
void Cpu::scd(Mem& mem, u32 instr) { void Interpreter::scd(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b111)) { if (check_address_error(address, 0b111)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -393,7 +393,7 @@ void Cpu::scd(Mem& mem, u32 instr) {
regs.cop0.llbit = false; regs.cop0.llbit = false;
} }
void Cpu::sh(Mem& mem, u32 instr) { void Interpreter::sh(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b1)) { if (check_address_error(address, 0b1)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -410,7 +410,7 @@ void Cpu::sh(Mem& mem, u32 instr) {
} }
} }
void Cpu::sw(Mem& mem, u32 instr) { void Interpreter::sw(Mem& mem, u32 instr) {
s16 offset = instr; s16 offset = instr;
u64 address = regs.gpr[RS(instr)] + offset; u64 address = regs.gpr[RS(instr)] + offset;
if (check_address_error(address, 0b11)) { if (check_address_error(address, 0b11)) {
@@ -428,7 +428,7 @@ void Cpu::sw(Mem& mem, u32 instr) {
} }
} }
void Cpu::sd(Mem& mem, u32 instr) { void Interpreter::sd(Mem& mem, u32 instr) {
s64 address = regs.gpr[RS(instr)] + (s16)instr; s64 address = regs.gpr[RS(instr)] + (s16)instr;
if (check_address_error(address, 0b11)) { if (check_address_error(address, 0b11)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -446,7 +446,7 @@ void Cpu::sd(Mem& mem, u32 instr) {
} }
void Cpu::sdl(Mem& mem, u32 instr) { void Interpreter::sdl(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr; u32 paddr;
if (!MapVAddr(regs, STORE, address, paddr)) { if (!MapVAddr(regs, STORE, address, paddr)) {
@@ -461,7 +461,7 @@ void Cpu::sdl(Mem& mem, u32 instr) {
} }
} }
void Cpu::sdr(Mem& mem, u32 instr) { void Interpreter::sdr(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr; u32 paddr;
if (!MapVAddr(regs, STORE, address, paddr)) { if (!MapVAddr(regs, STORE, address, paddr)) {
@@ -476,7 +476,7 @@ void Cpu::sdr(Mem& mem, u32 instr) {
} }
} }
void Cpu::swl(Mem& mem, u32 instr) { void Interpreter::swl(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr; u32 paddr;
if (!MapVAddr(regs, STORE, address, paddr)) { if (!MapVAddr(regs, STORE, address, paddr)) {
@@ -491,7 +491,7 @@ void Cpu::swl(Mem& mem, u32 instr) {
} }
} }
void Cpu::swr(Mem& mem, u32 instr) { void Interpreter::swr(Mem& mem, u32 instr) {
u64 address = regs.gpr[RS(instr)] + (s16)instr; u64 address = regs.gpr[RS(instr)] + (s16)instr;
u32 paddr; u32 paddr;
if (!MapVAddr(regs, STORE, address, paddr)) { if (!MapVAddr(regs, STORE, address, paddr)) {
@@ -506,21 +506,21 @@ void Cpu::swr(Mem& mem, u32 instr) {
} }
} }
void Cpu::ori(u32 instr) { void Interpreter::ori(u32 instr) {
s64 imm = (u16)instr; s64 imm = (u16)instr;
s64 result = imm | regs.gpr[RS(instr)]; s64 result = imm | regs.gpr[RS(instr)];
regs.gpr[RT(instr)] = result; regs.gpr[RT(instr)] = result;
} }
void Cpu::or_(u32 instr) { void Interpreter::or_(u32 instr) {
regs.gpr[RD(instr)] = regs.gpr[RS(instr)] | regs.gpr[RT(instr)]; regs.gpr[RD(instr)] = regs.gpr[RS(instr)] | regs.gpr[RT(instr)];
} }
void Cpu::nor(u32 instr) { void Interpreter::nor(u32 instr) {
regs.gpr[RD(instr)] = ~(regs.gpr[RS(instr)] | regs.gpr[RT(instr)]); regs.gpr[RD(instr)] = ~(regs.gpr[RS(instr)] | regs.gpr[RT(instr)]);
} }
void Cpu::j(u32 instr) { void Interpreter::j(u32 instr) {
s32 target = (instr & 0x3ffffff) << 2; s32 target = (instr & 0x3ffffff) << 2;
s64 address = (regs.oldPC & ~0xfffffff) | target; s64 address = (regs.oldPC & ~0xfffffff) | target;
if (check_address_error(address, 0b11)) { if (check_address_error(address, 0b11)) {
@@ -531,124 +531,124 @@ void Cpu::j(u32 instr) {
branch(true, address); branch(true, address);
} }
void Cpu::jal(u32 instr) { void Interpreter::jal(u32 instr) {
regs.gpr[31] = regs.nextPC; regs.gpr[31] = regs.nextPC;
j(instr); j(instr);
} }
void Cpu::jalr(u32 instr) { void Interpreter::jalr(u32 instr) {
branch(true, regs.gpr[RS(instr)]); branch(true, regs.gpr[RS(instr)]);
regs.gpr[RD(instr)] = regs.pc + 4; regs.gpr[RD(instr)] = regs.pc + 4;
} }
void Cpu::slti(u32 instr) { void Interpreter::slti(u32 instr) {
regs.gpr[RT(instr)] = regs.gpr[RS(instr)] < se_imm(instr); regs.gpr[RT(instr)] = regs.gpr[RS(instr)] < se_imm(instr);
} }
void Cpu::sltiu(u32 instr) { void Interpreter::sltiu(u32 instr) {
regs.gpr[RT(instr)] = (u64)regs.gpr[RS(instr)] < se_imm(instr); regs.gpr[RT(instr)] = (u64)regs.gpr[RS(instr)] < se_imm(instr);
} }
void Cpu::slt(u32 instr) { void Interpreter::slt(u32 instr) {
regs.gpr[RD(instr)] = regs.gpr[RS(instr)] < regs.gpr[RT(instr)]; regs.gpr[RD(instr)] = regs.gpr[RS(instr)] < regs.gpr[RT(instr)];
} }
void Cpu::sltu(u32 instr) { void Interpreter::sltu(u32 instr) {
regs.gpr[RD(instr)] = (u64)regs.gpr[RS(instr)] < (u64)regs.gpr[RT(instr)]; regs.gpr[RD(instr)] = (u64)regs.gpr[RS(instr)] < (u64)regs.gpr[RT(instr)];
} }
void Cpu::xori(u32 instr) { void Interpreter::xori(u32 instr) {
s64 imm = (u16)instr; s64 imm = (u16)instr;
regs.gpr[RT(instr)] = regs.gpr[RS(instr)] ^ imm; regs.gpr[RT(instr)] = regs.gpr[RS(instr)] ^ imm;
} }
void Cpu::xor_(u32 instr) { void Interpreter::xor_(u32 instr) {
regs.gpr[RD(instr)] = regs.gpr[RT(instr)] ^ regs.gpr[RS(instr)]; regs.gpr[RD(instr)] = regs.gpr[RT(instr)] ^ regs.gpr[RS(instr)];
} }
void Cpu::andi(u32 instr) { void Interpreter::andi(u32 instr) {
s64 imm = (u16)instr; s64 imm = (u16)instr;
regs.gpr[RT(instr)] = regs.gpr[RS(instr)] & imm; regs.gpr[RT(instr)] = regs.gpr[RS(instr)] & imm;
} }
void Cpu::and_(u32 instr) { void Interpreter::and_(u32 instr) {
regs.gpr[RD(instr)] = regs.gpr[RS(instr)] & regs.gpr[RT(instr)]; regs.gpr[RD(instr)] = regs.gpr[RS(instr)] & regs.gpr[RT(instr)];
} }
void Cpu::sll(u32 instr) { void Interpreter::sll(u32 instr) {
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
s32 result = regs.gpr[RT(instr)] << sa; s32 result = regs.gpr[RT(instr)] << sa;
regs.gpr[RD(instr)] = (s64)result; regs.gpr[RD(instr)] = (s64)result;
} }
void Cpu::sllv(u32 instr) { void Interpreter::sllv(u32 instr) {
u8 sa = (regs.gpr[RS(instr)]) & 0x1F; u8 sa = (regs.gpr[RS(instr)]) & 0x1F;
u32 rt = regs.gpr[RT(instr)]; u32 rt = regs.gpr[RT(instr)];
s32 result = rt << sa; s32 result = rt << sa;
regs.gpr[RD(instr)] = (s64)result; regs.gpr[RD(instr)] = (s64)result;
} }
void Cpu::dsll32(u32 instr) { void Interpreter::dsll32(u32 instr) {
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
s64 result = regs.gpr[RT(instr)] << (sa + 32); s64 result = regs.gpr[RT(instr)] << (sa + 32);
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::dsll(u32 instr) { void Interpreter::dsll(u32 instr) {
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
s64 result = regs.gpr[RT(instr)] << sa; s64 result = regs.gpr[RT(instr)] << sa;
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::dsllv(u32 instr) { void Interpreter::dsllv(u32 instr) {
s64 sa = regs.gpr[RS(instr)] & 63; s64 sa = regs.gpr[RS(instr)] & 63;
s64 result = regs.gpr[RT(instr)] << sa; s64 result = regs.gpr[RT(instr)] << sa;
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::srl(u32 instr) { void Interpreter::srl(u32 instr) {
u32 rt = regs.gpr[RT(instr)]; u32 rt = regs.gpr[RT(instr)];
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
u32 result = rt >> sa; u32 result = rt >> sa;
regs.gpr[RD(instr)] = (s32)result; regs.gpr[RD(instr)] = (s32)result;
} }
void Cpu::srlv(u32 instr) { void Interpreter::srlv(u32 instr) {
u8 sa = (regs.gpr[RS(instr)] & 0x1F); u8 sa = (regs.gpr[RS(instr)] & 0x1F);
u32 rt = regs.gpr[RT(instr)]; u32 rt = regs.gpr[RT(instr)];
s32 result = rt >> sa; s32 result = rt >> sa;
regs.gpr[RD(instr)] = (s64)result; regs.gpr[RD(instr)] = (s64)result;
} }
void Cpu::dsrl(u32 instr) { void Interpreter::dsrl(u32 instr) {
u64 rt = regs.gpr[RT(instr)]; u64 rt = regs.gpr[RT(instr)];
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
u64 result = rt >> sa; u64 result = rt >> sa;
regs.gpr[RD(instr)] = s64(result); regs.gpr[RD(instr)] = s64(result);
} }
void Cpu::dsrlv(u32 instr) { void Interpreter::dsrlv(u32 instr) {
u8 amount = (regs.gpr[RS(instr)] & 63); u8 amount = (regs.gpr[RS(instr)] & 63);
u64 rt = regs.gpr[RT(instr)]; u64 rt = regs.gpr[RT(instr)];
u64 result = rt >> amount; u64 result = rt >> amount;
regs.gpr[RD(instr)] = s64(result); regs.gpr[RD(instr)] = s64(result);
} }
void Cpu::dsrl32(u32 instr) { void Interpreter::dsrl32(u32 instr) {
u64 rt = regs.gpr[RT(instr)]; u64 rt = regs.gpr[RT(instr)];
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
u64 result = rt >> (sa + 32); u64 result = rt >> (sa + 32);
regs.gpr[RD(instr)] = s64(result); regs.gpr[RD(instr)] = s64(result);
} }
void Cpu::sra(u32 instr) { void Interpreter::sra(u32 instr) {
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
s32 result = rt >> sa; s32 result = rt >> sa;
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::srav(u32 instr) { void Interpreter::srav(u32 instr) {
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
s64 rs = regs.gpr[RS(instr)]; s64 rs = regs.gpr[RS(instr)];
u8 sa = rs & 0x1f; u8 sa = rs & 0x1f;
@@ -656,14 +656,14 @@ void Cpu::srav(u32 instr) {
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::dsra(u32 instr) { void Interpreter::dsra(u32 instr) {
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
s64 result = rt >> sa; s64 result = rt >> sa;
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::dsrav(u32 instr) { void Interpreter::dsrav(u32 instr) {
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
s64 rs = regs.gpr[RS(instr)]; s64 rs = regs.gpr[RS(instr)];
s64 sa = rs & 63; s64 sa = rs & 63;
@@ -671,14 +671,14 @@ void Cpu::dsrav(u32 instr) {
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::dsra32(u32 instr) { void Interpreter::dsra32(u32 instr) {
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
u8 sa = ((instr >> 6) & 0x1f); u8 sa = ((instr >> 6) & 0x1f);
s64 result = rt >> (sa + 32); s64 result = rt >> (sa + 32);
regs.gpr[RD(instr)] = result; regs.gpr[RD(instr)] = result;
} }
void Cpu::jr(u32 instr) { void Interpreter::jr(u32 instr) {
s64 address = regs.gpr[RS(instr)]; s64 address = regs.gpr[RS(instr)];
if (check_address_error(address, 0b11)) { if (check_address_error(address, 0b11)) {
HandleTLBException(regs, address); HandleTLBException(regs, address);
@@ -688,7 +688,7 @@ void Cpu::jr(u32 instr) {
branch(true, address); branch(true, address);
} }
void Cpu::dsub(u32 instr) { void Interpreter::dsub(u32 instr) {
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
s64 rs = regs.gpr[RS(instr)]; s64 rs = regs.gpr[RS(instr)];
s64 result = rs - rt; s64 result = rs - rt;
@@ -699,14 +699,14 @@ void Cpu::dsub(u32 instr) {
} }
} }
void Cpu::dsubu(u32 instr) { void Interpreter::dsubu(u32 instr) {
u64 rt = regs.gpr[RT(instr)]; u64 rt = regs.gpr[RT(instr)];
u64 rs = regs.gpr[RS(instr)]; u64 rs = regs.gpr[RS(instr)];
u64 result = rs - rt; u64 result = rs - rt;
regs.gpr[RD(instr)] = s64(result); regs.gpr[RD(instr)] = s64(result);
} }
void Cpu::sub(u32 instr) { void Interpreter::sub(u32 instr) {
s32 rt = regs.gpr[RT(instr)]; s32 rt = regs.gpr[RT(instr)];
s32 rs = regs.gpr[RS(instr)]; s32 rs = regs.gpr[RS(instr)];
s32 result = rs - rt; s32 result = rs - rt;
@@ -717,14 +717,14 @@ void Cpu::sub(u32 instr) {
} }
} }
void Cpu::subu(u32 instr) { void Interpreter::subu(u32 instr) {
u32 rt = regs.gpr[RT(instr)]; u32 rt = regs.gpr[RT(instr)];
u32 rs = regs.gpr[RS(instr)]; u32 rs = regs.gpr[RS(instr)];
u32 result = rs - rt; u32 result = rs - rt;
regs.gpr[RD(instr)] = (s64)((s32)result); regs.gpr[RD(instr)] = (s64)((s32)result);
} }
void Cpu::dmultu(u32 instr) { void Interpreter::dmultu(u32 instr) {
u64 rt = regs.gpr[RT(instr)]; u64 rt = regs.gpr[RT(instr)];
u64 rs = regs.gpr[RS(instr)]; u64 rs = regs.gpr[RS(instr)];
u128 result = (u128)rt * (u128)rs; u128 result = (u128)rt * (u128)rs;
@@ -732,7 +732,7 @@ void Cpu::dmultu(u32 instr) {
regs.hi = (s64)(result >> 64); regs.hi = (s64)(result >> 64);
} }
void Cpu::dmult(u32 instr) { void Interpreter::dmult(u32 instr) {
s64 rt = regs.gpr[RT(instr)]; s64 rt = regs.gpr[RT(instr)];
s64 rs = regs.gpr[RS(instr)]; s64 rs = regs.gpr[RS(instr)];
s128 result = (s128)rt * (s128)rs; s128 result = (s128)rt * (s128)rs;
@@ -740,7 +740,7 @@ void Cpu::dmult(u32 instr) {
regs.hi = result >> 64; regs.hi = result >> 64;
} }
void Cpu::multu(u32 instr) { void Interpreter::multu(u32 instr) {
u32 rt = regs.gpr[RT(instr)]; u32 rt = regs.gpr[RT(instr)];
u32 rs = regs.gpr[RS(instr)]; u32 rs = regs.gpr[RS(instr)];
u64 result = (u64)rt * (u64)rs; u64 result = (u64)rt * (u64)rs;
@@ -748,7 +748,7 @@ void Cpu::multu(u32 instr) {
regs.hi = (s64)((s32)(result >> 32)); regs.hi = (s64)((s32)(result >> 32));
} }
void Cpu::mult(u32 instr) { void Interpreter::mult(u32 instr) {
s32 rt = regs.gpr[RT(instr)]; s32 rt = regs.gpr[RT(instr)];
s32 rs = regs.gpr[RS(instr)]; s32 rs = regs.gpr[RS(instr)];
s64 result = (s64)rt * (s64)rs; s64 result = (s64)rt * (s64)rs;
@@ -756,23 +756,23 @@ void Cpu::mult(u32 instr) {
regs.hi = (s64)((s32)(result >> 32)); regs.hi = (s64)((s32)(result >> 32));
} }
void Cpu::mflo(u32 instr) { void Interpreter::mflo(u32 instr) {
regs.gpr[RD(instr)] = regs.lo; regs.gpr[RD(instr)] = regs.lo;
} }
void Cpu::mfhi(u32 instr) { void Interpreter::mfhi(u32 instr) {
regs.gpr[RD(instr)] = regs.hi; regs.gpr[RD(instr)] = regs.hi;
} }
void Cpu::mtlo(u32 instr) { void Interpreter::mtlo(u32 instr) {
regs.lo = regs.gpr[RS(instr)]; regs.lo = regs.gpr[RS(instr)];
} }
void Cpu::mthi(u32 instr) { void Interpreter::mthi(u32 instr) {
regs.hi = regs.gpr[RS(instr)]; regs.hi = regs.gpr[RS(instr)];
} }
void Cpu::trap(bool cond) { void Interpreter::trap(bool cond) {
if(cond) { if(cond) {
FireException(regs, ExceptionCode::Trap, 0, regs.oldPC); FireException(regs, ExceptionCode::Trap, 0, regs.oldPC);
} }

View File

@@ -1,7 +1,7 @@
#include <n64/core/mmio/AI.hpp> #include <n64/core/mmio/AI.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/Mem.hpp> #include <n64/core/Mem.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <n64/core/Audio.hpp> #include <n64/core/Audio.hpp>
namespace n64 { namespace n64 {

View File

@@ -1,6 +1,6 @@
#include <n64/core/mmio/Interrupt.hpp> #include <n64/core/mmio/Interrupt.hpp>
#include <n64/core/mmio/MI.hpp> #include <n64/core/mmio/MI.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
namespace n64 { namespace n64 {
void InterruptRaise(MI &mi, Registers &regs, Interrupt intr) { void InterruptRaise(MI &mi, Registers &regs, Interrupt intr) {

View File

@@ -1,5 +1,5 @@
#include <n64/core/mmio/MI.hpp> #include <n64/core/mmio/MI.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/mmio/Interrupt.hpp> #include <n64/core/mmio/Interrupt.hpp>

View File

@@ -1,7 +1,7 @@
#include <n64/core/mmio/PI.hpp> #include <n64/core/mmio/PI.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/Mem.hpp> #include <n64/core/Mem.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
namespace n64 { namespace n64 {
PI::PI() { PI::PI() {

View File

@@ -1,6 +1,6 @@
#include <n64/core/mmio/PIF.hpp> #include <n64/core/mmio/PIF.hpp>
#include <n64/core/Mem.hpp> #include <n64/core/Mem.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <util.hpp> #include <util.hpp>
#include "m64.hpp" #include "m64.hpp"

View File

@@ -1,6 +1,6 @@
#include <n64/core/mmio/VI.hpp> #include <n64/core/mmio/VI.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <n64/core/mmio/MI.hpp> #include <n64/core/mmio/MI.hpp>
#include <n64/core/mmio/Interrupt.hpp> #include <n64/core/mmio/Interrupt.hpp>

View File

@@ -1,7 +1,7 @@
#include <Cop0.hpp> #include <core/registers/Cop0.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <n64/core/Cpu.hpp> #include <n64/core/Interpreter.hpp>
namespace n64 { namespace n64 {
Cop0::Cop0() { Cop0::Cop0() {

View File

@@ -1,6 +1,6 @@
#include <n64/core/cpu/registers/Cop1.hpp> #include <core/registers/Cop1.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <n64/core/Cpu.hpp> #include <n64/core/Interpreter.hpp>
#include <util.hpp> #include <util.hpp>
namespace n64 { namespace n64 {
@@ -14,7 +14,7 @@ void Cop1::Reset() {
memset(fgr, 0, 32 * sizeof(FGR)); memset(fgr, 0, 32 * sizeof(FGR));
} }
void Cop1::decode(Cpu& cpu, u32 instr) { void Cop1::decode(Interpreter& cpu, u32 instr) {
Registers& regs = cpu.regs; Registers& regs = cpu.regs;
if(!regs.cop0.status.cu1) { if(!regs.cop0.status.cu1) {
FireException(regs, ExceptionCode::CoprocessorUnusable, 1, regs.oldPC); FireException(regs, ExceptionCode::CoprocessorUnusable, 1, regs.oldPC);

View File

@@ -1,5 +1,5 @@
#pragma once #pragma once
#include <n64/core/cpu/registers/Cop0.hpp> #include <core/registers/Cop0.hpp>
namespace n64 { namespace n64 {
union FCR31 { union FCR31 {
@@ -54,7 +54,7 @@ union FGR {
s64 raw; s64 raw;
}; };
struct Cpu; struct Interpreter;
struct Registers; struct Registers;
struct Cop1 { struct Cop1 {
@@ -63,8 +63,8 @@ struct Cop1 {
FCR31 fcr31{}; FCR31 fcr31{};
FGR fgr[32]{}; FGR fgr[32]{};
void Reset(); void Reset();
void decode(Cpu&, u32); void decode(Interpreter&, u32);
friend struct Cpu; friend struct Interpreter;
private: private:
template <typename T> template <typename T>
inline void SetReg(Cop0& cop0, u8 index, T value) { inline void SetReg(Cop0& cop0, u8 index, T value) {

View File

@@ -1,4 +1,4 @@
#include <Registers.hpp> #include <core/registers/Registers.hpp>
namespace n64 { namespace n64 {
Registers::Registers() { Registers::Registers() {

View File

@@ -1,6 +1,6 @@
#pragma once #pragma once
#include <Cop0.hpp> #include <core/registers/Cop0.hpp>
#include <Cop1.hpp> #include <core/registers/Cop1.hpp>
namespace n64 { namespace n64 {
struct Registers { struct Registers {

View File

@@ -1,6 +1,6 @@
#include <n64/core/RSP.hpp> #include <n64/core/RSP.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <Interrupt.hpp> #include <Interrupt.hpp>
#include <Mem.hpp> #include <Mem.hpp>

View File

@@ -1,6 +1,6 @@
#include <RSP.hpp> #include <RSP.hpp>
#include <util.hpp> #include <util.hpp>
#include <n64/core/cpu/Registers.hpp> #include <core/registers/Registers.hpp>
#include <Mem.hpp> #include <Mem.hpp>
#include <rcp_rsq.hpp> #include <rcp_rsq.hpp>