mirror of
https://github.com/ZDoom/Raze.git
synced 2024-12-11 05:11:07 +00:00
aa54d3af10
LZMA update plus several ZScript improvements.
290 lines
7.9 KiB
C
290 lines
7.9 KiB
C
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
|
|
2023-03-01 : Igor Pavlov : Public domain */
|
|
|
|
#include "Precomp.h"
|
|
|
|
#include "Bcj2.h"
|
|
#include "CpuArch.h"
|
|
|
|
#define kTopValue ((UInt32)1 << 24)
|
|
#define kNumBitModelTotalBits 11
|
|
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
|
#define kNumMoveBits 5
|
|
|
|
// UInt32 bcj2_stats[256 + 2][2];
|
|
|
|
void Bcj2Dec_Init(CBcj2Dec *p)
|
|
{
|
|
unsigned i;
|
|
p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
|
|
p->ip = 0;
|
|
p->temp = 0;
|
|
p->range = 0;
|
|
p->code = 0;
|
|
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
|
|
p->probs[i] = kBitModelTotal >> 1;
|
|
}
|
|
|
|
SRes Bcj2Dec_Decode(CBcj2Dec *p)
|
|
{
|
|
UInt32 v = p->temp;
|
|
// const Byte *src;
|
|
if (p->range <= 5)
|
|
{
|
|
UInt32 code = p->code;
|
|
p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
|
|
for (; p->range != 5; p->range++)
|
|
{
|
|
if (p->range == 1 && code != 0)
|
|
return SZ_ERROR_DATA;
|
|
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
|
|
{
|
|
p->state = BCJ2_STREAM_RC;
|
|
return SZ_OK;
|
|
}
|
|
code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
|
|
p->code = code;
|
|
}
|
|
if (code == 0xffffffff)
|
|
return SZ_ERROR_DATA;
|
|
p->range = 0xffffffff;
|
|
}
|
|
// else
|
|
{
|
|
unsigned state = p->state;
|
|
// we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
|
|
if (BCJ2_IS_32BIT_STREAM(state))
|
|
{
|
|
const Byte *cur = p->bufs[state];
|
|
if (cur == p->lims[state])
|
|
return SZ_OK;
|
|
p->bufs[state] = cur + 4;
|
|
{
|
|
const UInt32 ip = p->ip + 4;
|
|
v = GetBe32a(cur) - ip;
|
|
p->ip = ip;
|
|
}
|
|
state = BCJ2_DEC_STATE_ORIG_0;
|
|
}
|
|
if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
|
|
{
|
|
Byte *dest = p->dest;
|
|
for (;;)
|
|
{
|
|
if (dest == p->destLim)
|
|
{
|
|
p->state = state;
|
|
p->temp = v;
|
|
return SZ_OK;
|
|
}
|
|
*dest++ = (Byte)v;
|
|
p->dest = dest;
|
|
if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
|
|
break;
|
|
v >>= 8;
|
|
}
|
|
}
|
|
}
|
|
|
|
// src = p->bufs[BCJ2_STREAM_MAIN];
|
|
for (;;)
|
|
{
|
|
/*
|
|
if (BCJ2_IS_32BIT_STREAM(p->state))
|
|
p->state = BCJ2_DEC_STATE_OK;
|
|
else
|
|
*/
|
|
{
|
|
if (p->range < kTopValue)
|
|
{
|
|
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
|
|
{
|
|
p->state = BCJ2_STREAM_RC;
|
|
p->temp = v;
|
|
return SZ_OK;
|
|
}
|
|
p->range <<= 8;
|
|
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
|
|
}
|
|
{
|
|
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
|
|
const Byte *srcLim;
|
|
Byte *dest = p->dest;
|
|
{
|
|
const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
|
|
SizeT num = (SizeT)(p->destLim - dest);
|
|
if (num >= rem)
|
|
num = rem;
|
|
#define NUM_ITERS 4
|
|
#if (NUM_ITERS & (NUM_ITERS - 1)) == 0
|
|
num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x))
|
|
#else
|
|
num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
|
|
#endif
|
|
srcLim = src + num;
|
|
}
|
|
|
|
#define NUM_SHIFT_BITS 24
|
|
#define ONE_ITER(indx) { \
|
|
const unsigned b = src[indx]; \
|
|
*dest++ = (Byte)b; \
|
|
v = (v << NUM_SHIFT_BITS) | b; \
|
|
if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
|
|
if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
|
|
((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
|
|
/* ++dest */; /* v = b; */ }
|
|
|
|
if (src != srcLim)
|
|
for (;;)
|
|
{
|
|
/* The dependency chain of 2-cycle for (v) calculation is not big problem here.
|
|
But we can remove dependency chain with v = b in the end of loop. */
|
|
ONE_ITER(0)
|
|
#if (NUM_ITERS > 1)
|
|
ONE_ITER(1)
|
|
#if (NUM_ITERS > 2)
|
|
ONE_ITER(2)
|
|
#if (NUM_ITERS > 3)
|
|
ONE_ITER(3)
|
|
#if (NUM_ITERS > 4)
|
|
ONE_ITER(4)
|
|
#if (NUM_ITERS > 5)
|
|
ONE_ITER(5)
|
|
#if (NUM_ITERS > 6)
|
|
ONE_ITER(6)
|
|
#if (NUM_ITERS > 7)
|
|
ONE_ITER(7)
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
src += NUM_ITERS;
|
|
if (src == srcLim)
|
|
break;
|
|
}
|
|
|
|
if (src == srcLim)
|
|
#if (NUM_ITERS > 1)
|
|
for (;;)
|
|
#endif
|
|
{
|
|
#if (NUM_ITERS > 1)
|
|
if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
|
|
#endif
|
|
{
|
|
const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
|
|
p->bufs[BCJ2_STREAM_MAIN] = src;
|
|
p->dest = dest;
|
|
p->ip += (UInt32)num;
|
|
/* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
|
|
p->state =
|
|
src == p->lims[BCJ2_STREAM_MAIN] ?
|
|
(unsigned)BCJ2_STREAM_MAIN :
|
|
(unsigned)BCJ2_DEC_STATE_ORIG;
|
|
p->temp = v;
|
|
return SZ_OK;
|
|
}
|
|
#if (NUM_ITERS > 1)
|
|
ONE_ITER(0)
|
|
src++;
|
|
#endif
|
|
}
|
|
|
|
{
|
|
const SizeT num = (SizeT)(dest - p->dest);
|
|
p->dest = dest; // p->dest += num;
|
|
p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
|
|
p->ip += (UInt32)num;
|
|
}
|
|
{
|
|
UInt32 bound, ttt;
|
|
CBcj2Prob *prob; // unsigned index;
|
|
/*
|
|
prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
|
|
2 + (Byte)(v >> 8) :
|
|
((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1));
|
|
*/
|
|
{
|
|
const unsigned c = ((v + 0x17) >> 6) & 1;
|
|
prob = p->probs + (unsigned)
|
|
(((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
|
|
// (Byte)
|
|
// 8x->0 : e9->1 : xxe8->xx+2
|
|
// 8x->0x100 : e9->0x101 : xxe8->xx
|
|
// (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
|
|
// (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
|
|
}
|
|
ttt = *prob;
|
|
bound = (p->range >> kNumBitModelTotalBits) * ttt;
|
|
if (p->code < bound)
|
|
{
|
|
// bcj2_stats[prob - p->probs][0]++;
|
|
p->range = bound;
|
|
*prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
|
continue;
|
|
}
|
|
{
|
|
// bcj2_stats[prob - p->probs][1]++;
|
|
p->range -= bound;
|
|
p->code -= bound;
|
|
*prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
{
|
|
/* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
|
|
// const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
|
|
const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
|
|
const Byte *cur = p->bufs[cj];
|
|
Byte *dest;
|
|
SizeT rem;
|
|
if (cur == p->lims[cj])
|
|
{
|
|
p->state = cj;
|
|
break;
|
|
}
|
|
v = GetBe32a(cur);
|
|
p->bufs[cj] = cur + 4;
|
|
{
|
|
const UInt32 ip = p->ip + 4;
|
|
v -= ip;
|
|
p->ip = ip;
|
|
}
|
|
dest = p->dest;
|
|
rem = (SizeT)(p->destLim - dest);
|
|
if (rem < 4)
|
|
{
|
|
if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8;
|
|
if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8;
|
|
if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}}
|
|
p->temp = v;
|
|
p->dest = dest + rem;
|
|
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
|
|
break;
|
|
}
|
|
SetUi32(dest, v)
|
|
v >>= 24;
|
|
p->dest = dest + 4;
|
|
}
|
|
}
|
|
|
|
if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
|
|
{
|
|
p->range <<= 8;
|
|
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
|
|
}
|
|
return SZ_OK;
|
|
}
|
|
|
|
#undef NUM_ITERS
|
|
#undef ONE_ITER
|
|
#undef NUM_SHIFT_BITS
|
|
#undef kTopValue
|
|
#undef kNumBitModelTotalBits
|
|
#undef kBitModelTotal
|
|
#undef kNumMoveBits
|