00cc9309cb
de6e324bdseparate emu thread10d3daf86Roms List improvements95d202f37Let's make the rom list process on a separate thread so the emulator doesnt take ages to load.fc306967fWow the ROM Header was just completely busted. Game list view works nowbad1691eefuck this shit2b59e5f46game list in progressd26417b83remappable inputs in progressac4af8106inpute72abc240update readme430139dc9Qt6 frontend3080d4d45Fix this small bug too08cd13b85Cop0 unused functions do not actually pose a threat (as per manual). They don't do anything, so shall we.61bb4fb44make idle loop detection a little more specific with where the load goesb037de4c3SAZDFsdff12e81e73eneed to figure out why n64-systemtest loops indefinitely at some address that appears to be valid (i think it's me not invalidating the cache properly)204f0e13bidle skipping seems to work!cb8bb634asdkfjlasdf58e5c89c1Fix compilation issue on my machine (no idea)24fb2898eattempting more serious idle skipping214719577Place rsp.Step inside cached interpreter. Gains about 3 more fpsbb97dcc23mmmmm920b77d38wjkhasdfjhkasdf430ccdab4it's a start...4f42a673aCached interpreter plays Mario 64. Start looking into RSP as wellc9a030787idle skipping works!5fbda03cenew idea366637abaIdle skipping... maybe?609fa2fb0Cache instructions implemented but broken lmao. Commented out for nowe140a6d12- Stop using inheritance for CPU, instead use composition. - Introduce KAIZEN_JIT_ENABLED optional define instead of relying on __aarch64__ and the like. - More cache work68e613057prep cache impl811b4d809fix clang formatfda755f7didkd5024ebbfsmall MI refactor in preparation of (eventually) implementing the RDRAM interface properly694b45341Merge commit '206dcdedf195fb320913584180edb12c7731e396' as 'external/SDL'206dcdedfSquashed 'external/SDL/' content from commit 4d17b99d0a4d16e1cb4need to update sdl848b19920Fix compilation errordb61b5299Merge commit 'e94a94559f28e49678fbcf72199a5258137b0fe9' as 'external/imgui'e94a94559Squashed 'external/imgui/' content from commit 02e9b8cac52edb3757need to update imguic1a705e86Emulate weird JALR behaviour4b4c32f4bFix exception for "unusable COP1" in 4 instructions i missed accidentally (again)df5828142Bug putting 0s in the log everywheref8b580048Make isviewer a sink to file8241e9735Fix exception for "unusable COP1" in 4 instructions i missed accidentallyb29715f20small changesd9a620bc1make use of my new small utility library0d1aa938eAdd 'external/ircolib/' from commit 'ce3cd726c8df8388d554abf8bb55d55020eb4450'e64eb40b3Fuck git git-subtree-dir: external/ircolib git-subtree-split:de6e324bde
188 lines
4.8 KiB
C
188 lines
4.8 KiB
C
/* Bra86.c -- Branch converter for X86 code (BCJ)
|
|
2023-04-02 : Igor Pavlov : Public domain */
|
|
|
|
#include "Precomp.h"
|
|
|
|
#include "Bra.h"
|
|
#include "CpuArch.h"
|
|
|
|
|
|
#if defined(MY_CPU_SIZEOF_POINTER) \
|
|
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|
|
|| MY_CPU_SIZEOF_POINTER == 8)
|
|
#define BR_CONV_USE_OPT_PC_PTR
|
|
#endif
|
|
|
|
#ifdef BR_CONV_USE_OPT_PC_PTR
|
|
#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
|
|
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
|
|
#else
|
|
#define BR_PC_INIT pc += (UInt32)size;
|
|
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
|
|
// #define BR_PC_INIT
|
|
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
|
|
#endif
|
|
|
|
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
|
|
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
|
|
|
|
#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
|
|
|
|
#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
|
|
|
|
#ifdef MY_CPU_LE_UNALIGN
|
|
#define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
|
|
#define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
|
|
#else
|
|
#define BR86_PREPARE_BCJ_SCAN
|
|
// bad for MSVC X86 (partial write to byte reg):
|
|
#define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
|
|
// bad for old MSVC (partial write to byte reg):
|
|
// #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
|
|
#endif
|
|
|
|
static
|
|
Z7_FORCE_INLINE
|
|
Z7_ATTRIB_NO_VECTOR
|
|
Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
|
|
{
|
|
if (size < 5)
|
|
return p;
|
|
{
|
|
// Byte *p = data;
|
|
const Byte *lim = p + size - 4;
|
|
unsigned mask = (unsigned)*state; // & 7;
|
|
#ifdef BR_CONV_USE_OPT_PC_PTR
|
|
/* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
|
|
because call/jump offset is relative to the next instruction.
|
|
if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
|
|
because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
|
|
*/
|
|
pc += 4;
|
|
#endif
|
|
BR_PC_INIT
|
|
goto start;
|
|
|
|
for (;; mask |= 4)
|
|
{
|
|
// cont: mask |= 4;
|
|
start:
|
|
if (p >= lim)
|
|
goto fin;
|
|
{
|
|
BR86_PREPARE_BCJ_SCAN
|
|
p += 4;
|
|
if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
|
|
if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
|
|
if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
|
|
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
|
|
}
|
|
goto main_loop;
|
|
|
|
m0: p--;
|
|
m1: p--;
|
|
m2: p--;
|
|
if (mask == 0)
|
|
goto a3;
|
|
if (p > lim)
|
|
goto fin_p;
|
|
|
|
// if (((0x17u >> mask) & 1) == 0)
|
|
if (mask > 4 || mask == 3)
|
|
{
|
|
mask >>= 1;
|
|
continue; // goto cont;
|
|
}
|
|
mask >>= 1;
|
|
if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
|
|
continue; // goto cont;
|
|
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
|
|
{
|
|
UInt32 v = GetUi32(p);
|
|
UInt32 c;
|
|
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
|
|
c = BR_PC_GET;
|
|
BR_CONVERT_VAL(v, c)
|
|
{
|
|
mask <<= 3;
|
|
if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
|
|
{
|
|
v ^= (((UInt32)0x100 << mask) - 1);
|
|
#ifdef MY_CPU_X86
|
|
// for X86 : we can recalculate (c) to reduce register pressure
|
|
c = BR_PC_GET;
|
|
#endif
|
|
BR_CONVERT_VAL(v, c)
|
|
}
|
|
mask = 0;
|
|
}
|
|
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
|
|
v &= (1 << 25) - 1; v -= (1 << 24);
|
|
SetUi32(p, v)
|
|
p += 4;
|
|
goto main_loop;
|
|
}
|
|
|
|
main_loop:
|
|
if (p >= lim)
|
|
goto fin;
|
|
for (;;)
|
|
{
|
|
BR86_PREPARE_BCJ_SCAN
|
|
p += 4;
|
|
if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
|
|
if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
|
|
if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
|
|
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
|
|
if (p >= lim)
|
|
goto fin;
|
|
}
|
|
|
|
a0: p--;
|
|
a1: p--;
|
|
a2: p--;
|
|
a3:
|
|
if (p > lim)
|
|
goto fin_p;
|
|
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
|
|
{
|
|
UInt32 v = GetUi32(p);
|
|
UInt32 c;
|
|
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
|
|
c = BR_PC_GET;
|
|
BR_CONVERT_VAL(v, c)
|
|
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
|
|
v &= (1 << 25) - 1; v -= (1 << 24);
|
|
SetUi32(p, v)
|
|
p += 4;
|
|
goto main_loop;
|
|
}
|
|
}
|
|
|
|
fin_p:
|
|
p--;
|
|
fin:
|
|
// the following processing for tail is optional and can be commented
|
|
/*
|
|
lim += 4;
|
|
for (; p < lim; p++, mask >>= 1)
|
|
if ((*p & 0xfe) == 0xe8)
|
|
break;
|
|
*/
|
|
*state = (UInt32)mask;
|
|
return p;
|
|
}
|
|
}
|
|
|
|
|
|
#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
|
|
Z7_NO_INLINE \
|
|
Z7_ATTRIB_NO_VECTOR \
|
|
Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
|
|
{ return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
|
|
|
|
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
|
|
#ifndef Z7_EXTRACT_ONLY
|
|
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
|
|
#endif
|