00cc9309cb
de6e324bdseparate emu thread10d3daf86Roms List improvements95d202f37Let's make the rom list process on a separate thread so the emulator doesnt take ages to load.fc306967fWow the ROM Header was just completely busted. Game list view works nowbad1691eefuck this shit2b59e5f46game list in progressd26417b83remappable inputs in progressac4af8106inpute72abc240update readme430139dc9Qt6 frontend3080d4d45Fix this small bug too08cd13b85Cop0 unused functions do not actually pose a threat (as per manual). They don't do anything, so shall we.61bb4fb44make idle loop detection a little more specific with where the load goesb037de4c3SAZDFsdff12e81e73eneed to figure out why n64-systemtest loops indefinitely at some address that appears to be valid (i think it's me not invalidating the cache properly)204f0e13bidle skipping seems to work!cb8bb634asdkfjlasdf58e5c89c1Fix compilation issue on my machine (no idea)24fb2898eattempting more serious idle skipping214719577Place rsp.Step inside cached interpreter. Gains about 3 more fpsbb97dcc23mmmmm920b77d38wjkhasdfjhkasdf430ccdab4it's a start...4f42a673aCached interpreter plays Mario 64. Start looking into RSP as wellc9a030787idle skipping works!5fbda03cenew idea366637abaIdle skipping... maybe?609fa2fb0Cache instructions implemented but broken lmao. Commented out for nowe140a6d12- Stop using inheritance for CPU, instead use composition. - Introduce KAIZEN_JIT_ENABLED optional define instead of relying on __aarch64__ and the like. - More cache work68e613057prep cache impl811b4d809fix clang formatfda755f7didkd5024ebbfsmall MI refactor in preparation of (eventually) implementing the RDRAM interface properly694b45341Merge commit '206dcdedf195fb320913584180edb12c7731e396' as 'external/SDL'206dcdedfSquashed 'external/SDL/' content from commit 4d17b99d0a4d16e1cb4need to update sdl848b19920Fix compilation errordb61b5299Merge commit 'e94a94559f28e49678fbcf72199a5258137b0fe9' as 'external/imgui'e94a94559Squashed 'external/imgui/' content from commit 02e9b8cac52edb3757need to update imguic1a705e86Emulate weird JALR behaviour4b4c32f4bFix exception for "unusable COP1" in 4 instructions i missed accidentally (again)df5828142Bug putting 0s in the log everywheref8b580048Make isviewer a sink to file8241e9735Fix exception for "unusable COP1" in 4 instructions i missed accidentallyb29715f20small changesd9a620bc1make use of my new small utility library0d1aa938eAdd 'external/ircolib/' from commit 'ce3cd726c8df8388d554abf8bb55d55020eb4450'e64eb40b3Fuck git git-subtree-dir: external/ircolib git-subtree-split:de6e324bde
333 lines
12 KiB
C
333 lines
12 KiB
C
/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2)
|
|
2023-03-02 : Igor Pavlov : Public domain */
|
|
|
|
#ifndef ZIP7_INC_BCJ2_H
|
|
#define ZIP7_INC_BCJ2_H
|
|
|
|
#include "7zTypes.h"
|
|
|
|
EXTERN_C_BEGIN
|
|
|
|
#define BCJ2_NUM_STREAMS 4
|
|
|
|
enum
|
|
{
|
|
BCJ2_STREAM_MAIN,
|
|
BCJ2_STREAM_CALL,
|
|
BCJ2_STREAM_JUMP,
|
|
BCJ2_STREAM_RC
|
|
};
|
|
|
|
enum
|
|
{
|
|
BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,
|
|
BCJ2_DEC_STATE_ORIG_1,
|
|
BCJ2_DEC_STATE_ORIG_2,
|
|
BCJ2_DEC_STATE_ORIG_3,
|
|
|
|
BCJ2_DEC_STATE_ORIG,
|
|
BCJ2_DEC_STATE_ERROR /* after detected data error */
|
|
};
|
|
|
|
enum
|
|
{
|
|
BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
|
|
BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */
|
|
};
|
|
|
|
|
|
/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */
|
|
#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2)
|
|
|
|
/*
|
|
CBcj2Dec / CBcj2Enc
|
|
bufs sizes:
|
|
BUF_SIZE(n) = lims[n] - bufs[n]
|
|
bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4:
|
|
(BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
|
|
(BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
|
|
*/
|
|
|
|
// typedef UInt32 CBcj2Prob;
|
|
typedef UInt16 CBcj2Prob;
|
|
|
|
/*
|
|
BCJ2 encoder / decoder internal requirements:
|
|
- If last bytes of stream contain marker (e8/e8/0f8x), then
|
|
there is also encoded symbol (0 : no conversion) in RC stream.
|
|
- One case of overlapped instructions is supported,
|
|
if last byte of converted instruction is (0f) and next byte is (8x):
|
|
marker [xx xx xx 0f] 8x
|
|
then the pair (0f 8x) is treated as marker.
|
|
*/
|
|
|
|
/* ---------- BCJ2 Decoder ---------- */
|
|
|
|
/*
|
|
CBcj2Dec:
|
|
(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
|
|
bufs[BCJ2_STREAM_MAIN] >= dest &&
|
|
bufs[BCJ2_STREAM_MAIN] - dest >=
|
|
BUF_SIZE(BCJ2_STREAM_CALL) +
|
|
BUF_SIZE(BCJ2_STREAM_JUMP)
|
|
reserve = bufs[BCJ2_STREAM_MAIN] - dest -
|
|
( BUF_SIZE(BCJ2_STREAM_CALL) +
|
|
BUF_SIZE(BCJ2_STREAM_JUMP) )
|
|
and additional conditions:
|
|
if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init())
|
|
{
|
|
(reserve != 1) : if (ver < v23.00)
|
|
}
|
|
else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init())
|
|
{
|
|
(reserve >= 6) : if (ver < v23.00)
|
|
(reserve >= 4) : if (ver >= v23.00)
|
|
We need that (reserve) because after first call of Bcj2Dec_Decode(),
|
|
CBcj2Dec::temp can contain up to 4 bytes for writing to (dest).
|
|
}
|
|
(reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode().
|
|
(reserve == 0) also is allowed in case of multi-call, if we use fixed buffers,
|
|
and (reserve) is calculated from full (final) sizes of all streams before first call.
|
|
*/
|
|
|
|
typedef struct
|
|
{
|
|
const Byte *bufs[BCJ2_NUM_STREAMS];
|
|
const Byte *lims[BCJ2_NUM_STREAMS];
|
|
Byte *dest;
|
|
const Byte *destLim;
|
|
|
|
unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
|
|
|
|
UInt32 ip; /* property of starting base for decoding */
|
|
UInt32 temp; /* Byte temp[4]; */
|
|
UInt32 range;
|
|
UInt32 code;
|
|
CBcj2Prob probs[2 + 256];
|
|
} CBcj2Dec;
|
|
|
|
|
|
/* Note:
|
|
Bcj2Dec_Init() sets (CBcj2Dec::ip = 0)
|
|
if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init()
|
|
*/
|
|
void Bcj2Dec_Init(CBcj2Dec *p);
|
|
|
|
|
|
/* Bcj2Dec_Decode():
|
|
returns:
|
|
SZ_OK
|
|
SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct
|
|
*/
|
|
SRes Bcj2Dec_Decode(CBcj2Dec *p);
|
|
|
|
/* To check that decoding was finished you can compare
|
|
sizes of processed streams with sizes known from another sources.
|
|
You must do at least one mandatory check from the two following options:
|
|
- the check for size of processed output (ORIG) stream.
|
|
- the check for size of processed input (MAIN) stream.
|
|
additional optional checks:
|
|
- the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC)
|
|
- the checks Bcj2Dec_IsMaybeFinished*()
|
|
also before actual decoding you can check that the
|
|
following condition is met for stream sizes:
|
|
( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) )
|
|
*/
|
|
|
|
/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for
|
|
additional input data in BCJ2_STREAM_MAIN stream.
|
|
Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding.
|
|
*/
|
|
#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN)
|
|
|
|
/* if the stream decoding was finished correctly, then range decoder
|
|
part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0).
|
|
Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding.
|
|
*/
|
|
#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0)
|
|
|
|
/* use Bcj2Dec_IsMaybeFinished() only as additional check
|
|
after at least one mandatory check from the two following options:
|
|
- the check for size of processed output (ORIG) stream.
|
|
- the check for size of processed input (MAIN) stream.
|
|
*/
|
|
#define Bcj2Dec_IsMaybeFinished(_p_) ( \
|
|
Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \
|
|
Bcj2Dec_IsMaybeFinished_code(_p_))
|
|
|
|
|
|
|
|
/* ---------- BCJ2 Encoder ---------- */
|
|
|
|
typedef enum
|
|
{
|
|
BCJ2_ENC_FINISH_MODE_CONTINUE,
|
|
BCJ2_ENC_FINISH_MODE_END_BLOCK,
|
|
BCJ2_ENC_FINISH_MODE_END_STREAM
|
|
} EBcj2Enc_FinishMode;
|
|
|
|
/*
|
|
BCJ2_ENC_FINISH_MODE_CONTINUE:
|
|
process non finished encoding.
|
|
It notifies the encoder that additional further calls
|
|
can provide more input data (src) than provided by current call.
|
|
In that case the CBcj2Enc encoder still can move (src) pointer
|
|
up to (srcLim), but CBcj2Enc encoder can store some of the last
|
|
processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer.
|
|
at return:
|
|
(CBcj2Enc::src will point to position that includes
|
|
processed data and data copied to (temp[]) buffer)
|
|
That data from (temp[]) buffer will be used in further calls.
|
|
|
|
BCJ2_ENC_FINISH_MODE_END_BLOCK:
|
|
finish encoding of current block (ended at srcLim) without RC flushing.
|
|
at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) &&
|
|
CBcj2Enc::src == CBcj2Enc::srcLim)
|
|
: it shows that block encoding was finished. And the encoder is
|
|
ready for new (src) data or for stream finish operation.
|
|
finished block means
|
|
{
|
|
CBcj2Enc has completed block encoding up to (srcLim).
|
|
(1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will
|
|
not cross block boundary at (srcLim).
|
|
temporary CBcj2Enc buffer for (ORIG) src data is empty.
|
|
3 output uncompressed streams (MAIN, CALL, JUMP) were flushed.
|
|
RC stream was not flushed. And RC stream will cross block boundary.
|
|
}
|
|
Note: some possible implementation of BCJ2 encoder could
|
|
write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(),
|
|
and it could calculate symbol for RC in another call of Bcj2Enc_Encode().
|
|
BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol.
|
|
And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls.
|
|
So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK
|
|
to ensure that RC symbol is calculated and written in proper block.
|
|
|
|
BCJ2_ENC_FINISH_MODE_END_STREAM
|
|
finish encoding of stream (ended at srcLim) fully including RC flushing.
|
|
at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED)
|
|
: it shows that stream encoding was finished fully,
|
|
and all output streams were flushed fully.
|
|
also Bcj2Enc_IsFinished() can be called.
|
|
*/
|
|
|
|
|
|
/*
|
|
32-bit relative offset in JUMP/CALL commands is
|
|
- (mod 4 GiB) for 32-bit x86 code
|
|
- signed Int32 for 64-bit x86-64 code
|
|
BCJ2 encoder also does internal relative to absolute address conversions.
|
|
And there are 2 possible ways to do it:
|
|
before v23: we used 32-bit variables and (mod 4 GiB) conversion
|
|
since v23: we use 64-bit variables and (signed Int32 offset) conversion.
|
|
The absolute address condition for conversion in v23:
|
|
((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64)
|
|
note that if (fileSize64 > 2 GiB). there is difference between
|
|
old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23).
|
|
And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases.
|
|
*/
|
|
|
|
/*
|
|
// for old (v22) way for conversion:
|
|
typedef UInt32 CBcj2Enc_ip_unsigned;
|
|
typedef Int32 CBcj2Enc_ip_signed;
|
|
#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31)
|
|
*/
|
|
typedef UInt64 CBcj2Enc_ip_unsigned;
|
|
typedef Int64 CBcj2Enc_ip_signed;
|
|
|
|
/* maximum size of file that can be used for conversion condition */
|
|
#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2)
|
|
|
|
/* default value of fileSize64_minus1 variable that means
|
|
that absolute address limitation will not be used */
|
|
#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1)
|
|
|
|
/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */
|
|
#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \
|
|
((CBcj2Enc_ip_unsigned)(fileSize) - 1)
|
|
|
|
/* set CBcj2Enc::fileSize64_minus1 variable from size of file */
|
|
#define Bcj2Enc_SET_FileSize(p, fileSize) \
|
|
(p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize);
|
|
|
|
|
|
typedef struct
|
|
{
|
|
Byte *bufs[BCJ2_NUM_STREAMS];
|
|
const Byte *lims[BCJ2_NUM_STREAMS];
|
|
const Byte *src;
|
|
const Byte *srcLim;
|
|
|
|
unsigned state;
|
|
EBcj2Enc_FinishMode finishMode;
|
|
|
|
Byte context;
|
|
Byte flushRem;
|
|
Byte isFlushState;
|
|
|
|
Byte cache;
|
|
UInt32 range;
|
|
UInt64 low;
|
|
UInt64 cacheSize;
|
|
|
|
// UInt32 context; // for marker version, it can include marker flag.
|
|
|
|
/* (ip64) and (fileIp64) correspond to virtual source stream position
|
|
that doesn't include data in temp[] */
|
|
CBcj2Enc_ip_unsigned ip64; /* current (ip) position */
|
|
CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */
|
|
CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */
|
|
UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */
|
|
// UInt32 relatExcludeBits;
|
|
|
|
UInt32 tempTarget;
|
|
unsigned tempPos; /* the number of bytes that were copied to temp[] buffer
|
|
(tempPos <= 4) outside of Bcj2Enc_Encode() */
|
|
// Byte temp[4]; // for marker version
|
|
Byte temp[8];
|
|
CBcj2Prob probs[2 + 256];
|
|
} CBcj2Enc;
|
|
|
|
void Bcj2Enc_Init(CBcj2Enc *p);
|
|
|
|
|
|
/*
|
|
Bcj2Enc_Encode(): at exit:
|
|
p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream
|
|
(bufs[p->State] == lims[p->State])
|
|
p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream
|
|
(src == srcLim)
|
|
p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream
|
|
*/
|
|
void Bcj2Enc_Encode(CBcj2Enc *p);
|
|
|
|
/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream.
|
|
CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer.
|
|
(CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after
|
|
fully processed data and after data copied to temp buffer.
|
|
So if the caller needs to get real number of fully processed input
|
|
bytes (without look ahead data in temp buffer),
|
|
the caller must subtruct (CBcj2Enc::tempPos) value from processed size
|
|
value that is calculated based on current (CBcj2Enc::src):
|
|
cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) -
|
|
Bcj2Enc_Get_AvailInputSize_in_Temp(&enc);
|
|
*/
|
|
/* get the size of input data that was stored in temp[] buffer: */
|
|
#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos)
|
|
|
|
#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0)
|
|
|
|
/* Note : the decoder supports overlapping of marker (0f 80).
|
|
But we can eliminate such overlapping cases by setting
|
|
the limit for relative offset conversion as
|
|
CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB)
|
|
*/
|
|
/* default value for CBcj2Enc::relatLimit */
|
|
#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24)
|
|
#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31)
|
|
// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5
|
|
|
|
EXTERN_C_END
|
|
|
|
#endif
|