diff --git a/lzma/C/7zVersion.h b/lzma/C/7zVersion.h index 69ef69853..f3fb623aa 100644 --- a/lzma/C/7zVersion.h +++ b/lzma/C/7zVersion.h @@ -1,7 +1,7 @@ #define MY_VER_MAJOR 18 -#define MY_VER_MINOR 01 +#define MY_VER_MINOR 05 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "18.01" +#define MY_VERSION_NUMBERS "18.05" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,7 +10,7 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2018-01-28" +#define MY_DATE "2018-04-30" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" diff --git a/lzma/C/Bcj2.c b/lzma/C/Bcj2.c index 0efff4e5d..9a0046a65 100644 --- a/lzma/C/Bcj2.c +++ b/lzma/C/Bcj2.c @@ -1,5 +1,5 @@ /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) -2017-04-03 : Igor Pavlov : Public domain */ +2018-04-28 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -232,10 +232,10 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) if (rem < 4) { - SizeT i; - SetUi32(p->temp, val); - for (i = 0; i < rem; i++) - dest[i] = p->temp[i]; + p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8; + p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8; + p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8; + p->temp[3] = (Byte)val; p->dest = dest + rem; p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; break; diff --git a/lzma/C/CpuArch.h b/lzma/C/CpuArch.h index 51dd607b0..056116dde 100644 --- a/lzma/C/CpuArch.h +++ b/lzma/C/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2017-06-30 : Igor Pavlov : Public domain */ +2017-09-04 : Igor Pavlov : Public domain */ #ifndef __CPU_ARCH_H #define __CPU_ARCH_H @@ -174,7 +174,7 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #ifndef MY_CPU_NAME #ifdef MY_CPU_LE #define MY_CPU_NAME "LE" - #elif MY_CPU_BE + #elif defined(MY_CPU_BE) #define MY_CPU_NAME "BE" #else /* diff --git a/lzma/C/Lzma2Dec.c b/lzma/C/Lzma2Dec.c index 63853c6df..ecde55080 100644 --- a/lzma/C/Lzma2Dec.c +++ b/lzma/C/Lzma2Dec.c @@ -1,5 +1,5 @@ /* Lzma2Dec.c -- LZMA2 Decoder -2017-04-03 : Igor Pavlov : Public domain */ +2018-02-19 : Igor Pavlov : Public domain */ /* #define SHOW_DEBUG_INFO */ @@ -14,28 +14,22 @@ #include "Lzma2Dec.h" /* -00000000 - EOS -00000001 U U - Uncompressed Reset Dic -00000010 U U - Uncompressed No Reset -100uuuuu U U P P - LZMA no reset -101uuuuu U U P P - LZMA reset state -110uuuuu U U P P S - LZMA reset state + new prop -111uuuuu U U P P S - LZMA reset state + new prop + reset dic +00000000 - End of data +00000001 U U - Uncompressed, reset dic, need reset state and set new prop +00000010 U U - Uncompressed, no reset +100uuuuu U U P P - LZMA, no reset +101uuuuu U U P P - LZMA, reset state +110uuuuu U U P P S - LZMA, reset state + set new prop +111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic u, U - Unpack Size P - Pack Size S - Props */ -#define LZMA2_CONTROL_LZMA (1 << 7) -#define LZMA2_CONTROL_COPY_NO_RESET 2 #define LZMA2_CONTROL_COPY_RESET_DIC 1 -#define LZMA2_CONTROL_EOF 0 -#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & LZMA2_CONTROL_LZMA) == 0) - -#define LZMA2_GET_LZMA_MODE(p) (((p)->control >> 5) & 3) -#define LZMA2_IS_THERE_PROP(mode) ((mode) >= 2) +#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0) #define LZMA2_LCLP_MAX 4 #define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) @@ -91,9 +85,11 @@ SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) void Lzma2Dec_Init(CLzma2Dec *p) { p->state = LZMA2_STATE_CONTROL; - p->needInitDic = True; - p->needInitState = True; - p->needInitProp = True; + p->needInitLevel = 0xE0; + p->isExtraMode = False; + p->unpackSize = 0; + + // p->decoder.dicPos = 0; // we can use it instead of full init LzmaDec_Init(&p->decoder); } @@ -102,19 +98,26 @@ static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) switch (p->state) { case LZMA2_STATE_CONTROL: + p->isExtraMode = False; p->control = b; - PRF(printf("\n %4X ", (unsigned)p->decoder.dicPos)); - PRF(printf(" %2X", (unsigned)b)); + PRF(printf("\n %8X", (unsigned)p->decoder.dicPos)); + PRF(printf(" %02X", (unsigned)b)); if (b == 0) return LZMA2_STATE_FINISHED; if (LZMA2_IS_UNCOMPRESSED_STATE(p)) { - if (b > 2) + if (b == LZMA2_CONTROL_COPY_RESET_DIC) + p->needInitLevel = 0xC0; + else if (b > 2 || p->needInitLevel == 0xE0) return LZMA2_STATE_ERROR; - p->unpackSize = 0; } else + { + if (b < p->needInitLevel) + return LZMA2_STATE_ERROR; + p->needInitLevel = 0; p->unpackSize = (UInt32)(b & 0x1F) << 16; + } return LZMA2_STATE_UNPACK0; case LZMA2_STATE_UNPACK0: @@ -124,8 +127,8 @@ static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) case LZMA2_STATE_UNPACK1: p->unpackSize |= (UInt32)b; p->unpackSize++; - PRF(printf(" %8u", (unsigned)p->unpackSize)); - return (LZMA2_IS_UNCOMPRESSED_STATE(p)) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; + PRF(printf(" %7u", (unsigned)p->unpackSize)); + return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; case LZMA2_STATE_PACK0: p->packSize = (UInt32)b << 8; @@ -134,9 +137,9 @@ static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) case LZMA2_STATE_PACK1: p->packSize |= (UInt32)b; p->packSize++; - PRF(printf(" %8u", (unsigned)p->packSize)); - return LZMA2_IS_THERE_PROP(LZMA2_GET_LZMA_MODE(p)) ? LZMA2_STATE_PROP: - (p->needInitProp ? LZMA2_STATE_ERROR : LZMA2_STATE_DATA); + // if (p->packSize < 5) return LZMA2_STATE_ERROR; + PRF(printf(" %5u", (unsigned)p->packSize)); + return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA; case LZMA2_STATE_PROP: { @@ -145,13 +148,12 @@ static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) return LZMA2_STATE_ERROR; lc = b % 9; b /= 9; - p->decoder.prop.pb = b / 5; + p->decoder.prop.pb = (Byte)(b / 5); lp = b % 5; if (lc + lp > LZMA2_LCLP_MAX) return LZMA2_STATE_ERROR; - p->decoder.prop.lc = lc; - p->decoder.prop.lp = lp; - p->needInitProp = False; + p->decoder.prop.lc = (Byte)lc; + p->decoder.prop.lp = (Byte)lp; return LZMA2_STATE_DATA; } } @@ -231,11 +233,6 @@ SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, if (p->state == LZMA2_STATE_DATA) { Bool initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC); - if (initDic) - p->needInitProp = p->needInitState = True; - else if (p->needInitDic) - break; - p->needInitDic = False; LzmaDec_InitDicAndState(&p->decoder, initDic, False); } @@ -257,23 +254,17 @@ SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, if (p->state == LZMA2_STATE_DATA) { - unsigned mode = LZMA2_GET_LZMA_MODE(p); - Bool initDic = (mode == 3); - Bool initState = (mode != 0); - if ((!initDic && p->needInitDic) || (!initState && p->needInitState)) - break; - + Bool initDic = (p->control >= 0xE0); + Bool initState = (p->control >= 0xA0); LzmaDec_InitDicAndState(&p->decoder, initDic, initState); - p->needInitDic = False; - p->needInitState = False; p->state = LZMA2_STATE_DATA_CONT; } if (inCur > p->packSize) inCur = (SizeT)p->packSize; - - res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status); + res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status); + src += inCur; *srcLen += inCur; p->packSize -= (UInt32)inCur; @@ -310,6 +301,129 @@ SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, } + + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, + const Byte *src, SizeT *srcLen, + int checkFinishBlock) +{ + SizeT inSize = *srcLen; + *srcLen = 0; + + while (p->state != LZMA2_STATE_ERROR) + { + if (p->state == LZMA2_STATE_FINISHED) + return LZMA_STATUS_FINISHED_WITH_MARK; + + if (outSize == 0 && !checkFinishBlock) + return LZMA_STATUS_NOT_FINISHED; + + if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) + { + if (*srcLen == inSize) + return LZMA_STATUS_NEEDS_MORE_INPUT; + (*srcLen)++; + + p->state = Lzma2Dec_UpdateState(p, *src++); + + if (p->state == LZMA2_STATE_UNPACK0) + { + // if (p->decoder.dicPos != 0) + if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0) + return LZMA2_PARSE_STATUS_NEW_BLOCK; + // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED; + } + + // The following code can be commented. + // It's not big problem, if we read additional input bytes. + // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state. + + if (outSize == 0 && p->state != LZMA2_STATE_FINISHED) + { + // checkFinishBlock is true. So we expect that block must be finished, + // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here + // break; + return LZMA_STATUS_NOT_FINISHED; + } + + if (p->state == LZMA2_STATE_DATA) + return LZMA2_PARSE_STATUS_NEW_CHUNK; + + continue; + } + + if (outSize == 0) + return LZMA_STATUS_NOT_FINISHED; + + { + SizeT inCur = inSize - *srcLen; + + if (LZMA2_IS_UNCOMPRESSED_STATE(p)) + { + if (inCur == 0) + return LZMA_STATUS_NEEDS_MORE_INPUT; + if (inCur > p->unpackSize) + inCur = p->unpackSize; + if (inCur > outSize) + inCur = outSize; + p->decoder.dicPos += inCur; + src += inCur; + *srcLen += inCur; + outSize -= inCur; + p->unpackSize -= (UInt32)inCur; + p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; + } + else + { + p->isExtraMode = True; + + if (inCur == 0) + { + if (p->packSize != 0) + return LZMA_STATUS_NEEDS_MORE_INPUT; + } + else if (p->state == LZMA2_STATE_DATA) + { + p->state = LZMA2_STATE_DATA_CONT; + if (*src != 0) + { + // first byte of lzma chunk must be Zero + *srcLen += 1; + p->packSize--; + break; + } + } + + if (inCur > p->packSize) + inCur = (SizeT)p->packSize; + + src += inCur; + *srcLen += inCur; + p->packSize -= (UInt32)inCur; + + if (p->packSize == 0) + { + SizeT rem = outSize; + if (rem > p->unpackSize) + rem = p->unpackSize; + p->decoder.dicPos += rem; + p->unpackSize -= (UInt32)rem; + outSize -= rem; + if (p->unpackSize == 0) + p->state = LZMA2_STATE_CONTROL; + } + } + } + } + + p->state = LZMA2_STATE_ERROR; + return LZMA_STATUS_NOT_SPECIFIED; +} + + + + SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen, inSize = *srcLen; diff --git a/lzma/C/Lzma2Dec.h b/lzma/C/Lzma2Dec.h index 917af990d..b8ddeac89 100644 --- a/lzma/C/Lzma2Dec.h +++ b/lzma/C/Lzma2Dec.h @@ -1,5 +1,5 @@ /* Lzma2Dec.h -- LZMA2 Decoder -2017-04-03 : Igor Pavlov : Public domain */ +2018-02-19 : Igor Pavlov : Public domain */ #ifndef __LZMA2_DEC_H #define __LZMA2_DEC_H @@ -12,25 +12,24 @@ EXTERN_C_BEGIN typedef struct { - CLzmaDec decoder; - UInt32 packSize; - UInt32 unpackSize; unsigned state; Byte control; - Bool needInitDic; - Bool needInitState; - Bool needInitProp; + Byte needInitLevel; + Byte isExtraMode; + Byte _pad_; + UInt32 packSize; + UInt32 unpackSize; + CLzmaDec decoder; } CLzma2Dec; #define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder) -#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc); -#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc); +#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc) +#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc) SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); void Lzma2Dec_Init(CLzma2Dec *p); - /* finishMode: It has meaning only if the decoding reaches output limit (*destLen or dicLimit). @@ -53,6 +52,47 @@ SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); +/* ---------- LZMA2 block and chunk parsing ---------- */ + +/* +Lzma2Dec_Parse() parses compressed data stream up to next independent block or next chunk data. +It can return LZMA_STATUS_* code or LZMA2_PARSE_STATUS_* code: + - LZMA2_PARSE_STATUS_NEW_BLOCK - there is new block, and 1 additional byte (control byte of next block header) was read from input. + - LZMA2_PARSE_STATUS_NEW_CHUNK - there is new chunk, and only lzma2 header of new chunk was read. + CLzma2Dec::unpackSize contains unpack size of that chunk +*/ + +typedef enum +{ +/* + LZMA_STATUS_NOT_SPECIFIED // data error + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED // + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK // unused +*/ + LZMA2_PARSE_STATUS_NEW_BLOCK = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + 1, + LZMA2_PARSE_STATUS_NEW_CHUNK +} ELzma2ParseStatus; + +ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, + SizeT outSize, // output size + const Byte *src, SizeT *srcLen, + int checkFinishBlock // set (checkFinishBlock = 1), if it must read full input data, if decoder.dicPos reaches blockMax position. + ); + +/* +LZMA2 parser doesn't decode LZMA chunks, so we must read + full input LZMA chunk to decode some part of LZMA chunk. + +Lzma2Dec_GetUnpackExtra() returns the value that shows + max possible number of output bytes that can be output by decoder + at current input positon. +*/ + +#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0); + + /* ---------- One Call Interface ---------- */ /* diff --git a/lzma/C/LzmaDec.c b/lzma/C/LzmaDec.c index e96fa975b..cccb93267 100644 --- a/lzma/C/LzmaDec.c +++ b/lzma/C/LzmaDec.c @@ -1,8 +1,9 @@ /* LzmaDec.c -- LZMA Decoder -2017-04-03 : Igor Pavlov : Public domain */ +2018-02-28 : Igor Pavlov : Public domain */ #include "Precomp.h" +/* #include "CpuArch.h" */ #include "LzmaDec.h" #include @@ -24,9 +25,16 @@ #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ { UPDATE_0(p); i = (i + i); A0; } else \ { UPDATE_1(p); i = (i + i) + 1; A1; } -#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) -#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } +#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } + +#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ + { UPDATE_0(p + i); A0; } else \ + { UPDATE_1(p + i); A1; } +#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) +#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) +#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) + #define TREE_DECODE(probs, limit, i) \ { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } @@ -46,12 +54,15 @@ i -= 0x40; } #endif -#define NORMAL_LITER_DEC GET_BIT(prob + symbol, symbol) +#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol) #define MATCHED_LITER_DEC \ - matchByte <<= 1; \ - bit = (matchByte & offs); \ - probLit = prob + offs + bit + symbol; \ - GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + matchByte += matchByte; \ + bit = offs; \ + offs &= matchByte; \ + probLit = prob + (offs + bit + symbol); \ + GET_BIT2(probLit, symbol, offs ^= bit; , ;) + + #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } @@ -66,25 +77,28 @@ { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } +#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ + { UPDATE_0_CHECK; i += m; m += m; } else \ + { UPDATE_1_CHECK; m += m; i += m; } + + #define kNumPosBitsMax 4 #define kNumPosStatesMax (1 << kNumPosBitsMax) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) -#define LenChoice 0 -#define LenChoice2 (LenChoice + 1) -#define LenLow (LenChoice2 + 1) -#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) -#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define LenLow 0 +#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits)) #define kNumLenProbs (LenHigh + kLenNumHighSymbols) +#define LenChoice LenLow +#define LenChoice2 (LenLow + (1 << kLenNumLowBits)) #define kNumStates 12 +#define kNumStates2 16 #define kNumLitStates 7 #define kStartPosModelIndex 4 @@ -98,54 +112,117 @@ #define kAlignTableSize (1 << kNumAlignBits) #define kMatchMinLen 2 -#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) -#define IsMatch 0 -#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +/* External ASM code needs same CLzmaProb array layout. So don't change it. */ + +/* (probs_1664) is faster and better for code size at some platforms */ +/* +#ifdef MY_CPU_X86_OR_AMD64 +*/ +#define kStartOffset 1664 +#define GET_PROBS p->probs_1664 +/* +#define GET_PROBS p->probs + kStartOffset +#else +#define kStartOffset 0 +#define GET_PROBS p->probs +#endif +*/ + +#define SpecPos (-kStartOffset) +#define IsRep0Long (SpecPos + kNumFullDistances) +#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax)) +#define LenCoder (RepLenCoder + kNumLenProbs) +#define IsMatch (LenCoder + kNumLenProbs) +#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax)) +#define IsRep (Align + kAlignTableSize) #define IsRepG0 (IsRep + kNumStates) #define IsRepG1 (IsRepG0 + kNumStates) #define IsRepG2 (IsRepG1 + kNumStates) -#define IsRep0Long (IsRepG2 + kNumStates) -#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) -#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) -#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) -#define LenCoder (Align + kAlignTableSize) -#define RepLenCoder (LenCoder + kNumLenProbs) -#define Literal (RepLenCoder + kNumLenProbs) +#define PosSlot (IsRepG2 + kNumStates) +#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define NUM_BASE_PROBS (Literal + kStartOffset) -#define LZMA_BASE_SIZE 1846 -#define LZMA_LIT_SIZE 0x300 - -#if Literal != LZMA_BASE_SIZE -StopCompilingDueBUG +#if Align != 0 && kStartOffset != 0 + #error Stop_Compiling_Bad_LZMA_kAlign #endif -#define LzmaProps_GetNumProbs(p) (Literal + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) +#if NUM_BASE_PROBS != 1984 + #error Stop_Compiling_Bad_LZMA_PROBS +#endif + + +#define LZMA_LIT_SIZE 0x300 + +#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + + +#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4) +#define COMBINED_PS_STATE (posState + state) +#define GET_LEN_STATE (posState) #define LZMA_DIC_MIN (1 << 12) -/* First LZMA-symbol is always decoded. -And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization +/* +p->remainLen : shows status of LZMA decoder: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state +*/ + +/* ---------- LZMA_DECODE_REAL ---------- */ +/* +LzmaDec_DecodeReal_3() can be implemented in external ASM file. +3 - is the code compatibility version of that function for check at link time. +*/ + +#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3 + +/* +LZMA_DECODE_REAL() +In: + RangeCoder is normalized + if (p->dicPos == limit) + { + LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. + So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol + is not END_OF_PAYALOAD_MARKER, then function returns error code. + } + +Processing: + first LZMA symbol will be decoded in any case + All checks for limits are at the end of main loop, + It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + Out: + RangeCoder is normalized Result: SZ_OK - OK SZ_ERROR_DATA - Error p->remainLen: < kMatchSpecLenStart : normal remain = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : Flush marker (unused now) - = kMatchSpecLenStart + 2 : State Init Marker (unused now) */ -static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit) -{ - CLzmaProb *probs = p->probs; - unsigned state = p->state; +#ifdef _LZMA_DEC_OPT + +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); + +#else + +static +int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; - unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; unsigned lc = p->prop.lc; + unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); Byte *dic = p->dic; SizeT dicBufSize = p->dicBufSize; @@ -164,17 +241,16 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte CLzmaProb *prob; UInt32 bound; unsigned ttt; - unsigned posState = processedPos & pbMask; + unsigned posState = CALC_POS_STATE(processedPos, pbMask); - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0(prob) { unsigned symbol; UPDATE_0(prob); prob = probs + Literal; if (processedPos != 0 || checkDicSize != 0) - prob += ((UInt32)LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + - (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); processedPos++; if (state < kNumLitStates) @@ -240,13 +316,16 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte else { UPDATE_1(prob); + /* + // that case was checked before with kBadRepCode if (checkDicSize == 0 && processedPos == 0) return SZ_ERROR_DATA; + */ prob = probs + IsRepG0 + state; IF_BIT_0(prob) { UPDATE_0(prob); - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0(prob) { UPDATE_0(prob); @@ -299,7 +378,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; offset = 0; lim = (1 << kLenNumLowBits); } @@ -310,15 +389,15 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; - lim = (1 << kLenNumMidBits); + lim = (1 << kLenNumLowBits); } else { UPDATE_1(probLen); probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; + offset = kLenNumLowSymbols * 2; lim = (1 << kLenNumHighBits); } } @@ -331,7 +410,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; len = 1; TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len); @@ -345,7 +424,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte IF_BIT_0(probLen) { UPDATE_0(probLen); - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); len = 1; TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len); @@ -356,7 +435,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte UPDATE_1(probLen); probLen = prob + LenHigh; TREE_DECODE(probLen, (1 << kLenNumHighBits), len); - len += kLenNumLowSymbols + kLenNumMidSymbols; + len += kLenNumLowSymbols * 2; } } } @@ -376,16 +455,16 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte if (posSlot < kEndPosModelIndex) { distance <<= numDirectBits; - prob = probs + SpecPos + distance - posSlot - 1; + prob = probs + SpecPos; { - UInt32 mask = 1; - unsigned i = 1; + UInt32 m = 1; + distance++; do { - GET_BIT2(prob + i, i, ; , distance |= mask); - mask <<= 1; + REV_BIT_VAR(prob, distance, m); } - while (--numDirectBits != 0); + while (--numDirectBits); + distance -= m; } } else @@ -412,19 +491,20 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte } */ } - while (--numDirectBits != 0); + while (--numDirectBits); prob = probs + Align; distance <<= kNumAlignBits; { unsigned i = 1; - GET_BIT2(prob + i, i, ; , distance |= 1); - GET_BIT2(prob + i, i, ; , distance |= 2); - GET_BIT2(prob + i, i, ; , distance |= 4); - GET_BIT2(prob + i, i, ; , distance |= 8); + REV_BIT_CONST(prob, i, 1); + REV_BIT_CONST(prob, i, 2); + REV_BIT_CONST(prob, i, 4); + REV_BIT_LAST (prob, i, 8); + distance |= i; } if (distance == (UInt32)0xFFFFFFFF) { - len += kMatchSpecLenStart; + len = kMatchSpecLenStart; state -= kNumStates; break; } @@ -435,20 +515,12 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte rep2 = rep1; rep1 = rep0; rep0 = distance + 1; - if (checkDicSize == 0) - { - if (distance >= processedPos) - { - p->dicPos = dicPos; - return SZ_ERROR_DATA; - } - } - else if (distance >= checkDicSize) + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) { p->dicPos = dicPos; return SZ_ERROR_DATA; } - state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; } len += kMatchMinLen; @@ -511,6 +583,7 @@ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte return SZ_OK; } +#endif static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { @@ -519,7 +592,7 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) Byte *dic = p->dic; SizeT dicPos = p->dicPos; SizeT dicBufSize = p->dicBufSize; - unsigned len = p->remainLen; + unsigned len = (unsigned)p->remainLen; SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ SizeT rem = limit - dicPos; if (rem < len) @@ -540,6 +613,14 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) } } + +#define kRange0 0xFFFFFFFF +#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) +#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) +#if kBadRepCode != (0xC0000000 - 0x400) + #error Stop_Compiling_Bad_LZMA_Check +#endif + static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { do @@ -550,9 +631,13 @@ static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte UInt32 rem = p->prop.dicSize - p->processedPos; if (limit - p->dicPos > rem) limit2 = p->dicPos + rem; + + if (p->processedPos == 0) + if (p->code >= kBadRepCode) + return SZ_ERROR_DATA; } - - RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); + + RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) p->checkDicSize = p->prop.dicSize; @@ -561,9 +646,6 @@ static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - if (p->remainLen > kMatchSpecLenStart) - p->remainLen = kMatchSpecLenStart; - return 0; } @@ -580,17 +662,17 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS UInt32 range = p->range; UInt32 code = p->code; const Byte *bufLimit = buf + inSize; - const CLzmaProb *probs = p->probs; - unsigned state = p->state; + const CLzmaProb *probs = GET_PROBS; + unsigned state = (unsigned)p->state; ELzmaDummy res; { const CLzmaProb *prob; UInt32 bound; unsigned ttt; - unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); + unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); - prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK @@ -618,10 +700,11 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS { unsigned bit; const CLzmaProb *probLit; - matchByte <<= 1; - bit = (matchByte & offs); - probLit = prob + offs + bit + symbol; - GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + matchByte += matchByte; + bit = offs; + offs &= matchByte; + probLit = prob + (offs + bit + symbol); + GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; ) } while (symbol < 0x100); } @@ -648,7 +731,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; - prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + prob = probs + IsRep0Long + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; @@ -691,7 +774,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; - probLen = prob + LenLow + (posState << kLenNumLowBits); + probLen = prob + LenLow + GET_LEN_STATE; offset = 0; limit = 1 << kLenNumLowBits; } @@ -702,15 +785,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; - probLen = prob + LenMid + (posState << kLenNumMidBits); + probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); offset = kLenNumLowSymbols; - limit = 1 << kLenNumMidBits; + limit = 1 << kLenNumLowBits; } else { UPDATE_1_CHECK; probLen = prob + LenHigh; - offset = kLenNumLowSymbols + kLenNumMidSymbols; + offset = kLenNumLowSymbols * 2; limit = 1 << kLenNumHighBits; } } @@ -722,7 +805,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS { unsigned posSlot; prob = probs + PosSlot + - ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << + ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); if (posSlot >= kStartPosModelIndex) @@ -733,7 +816,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS if (posSlot < kEndPosModelIndex) { - prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); } else { @@ -745,17 +828,18 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS code -= range & (((code - range) >> 31) - 1); /* if (code >= range) code -= range; */ } - while (--numDirectBits != 0); + while (--numDirectBits); prob = probs + Align; numDirectBits = kNumAlignBits; } { unsigned i = 1; + unsigned m = 1; do { - GET_BIT_CHECK(prob + i, i); + REV_BIT_CHECK(prob, i, m); } - while (--numDirectBits != 0); + while (--numDirectBits); } } } @@ -768,18 +852,17 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) { - p->needFlush = 1; - p->remainLen = 0; + p->remainLen = kMatchSpecLenStart + 1; p->tempBufSize = 0; if (initDic) { p->processedPos = 0; p->checkDicSize = 0; - p->needInitState = 1; + p->remainLen = kMatchSpecLenStart + 2; } if (initState) - p->needInitState = 1; + p->remainLen = kMatchSpecLenStart + 2; } void LzmaDec_Init(CLzmaDec *p) @@ -788,53 +871,54 @@ void LzmaDec_Init(CLzmaDec *p) LzmaDec_InitDicAndState(p, True, True); } -static void LzmaDec_InitStateReal(CLzmaDec *p) -{ - SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); - SizeT i; - CLzmaProb *probs = p->probs; - for (i = 0; i < numProbs; i++) - probs[i] = kBitModelTotal >> 1; - p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; - p->state = 0; - p->needInitState = 0; -} SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT inSize = *srcLen; (*srcLen) = 0; - LzmaDec_WriteRem(p, dicLimit); *status = LZMA_STATUS_NOT_SPECIFIED; + if (p->remainLen > kMatchSpecLenStart) + { + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize != 0 && p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + p->code = + ((UInt32)p->tempBuf[1] << 24) + | ((UInt32)p->tempBuf[2] << 16) + | ((UInt32)p->tempBuf[3] << 8) + | ((UInt32)p->tempBuf[4]); + p->range = 0xFFFFFFFF; + p->tempBufSize = 0; + + if (p->remainLen > kMatchSpecLenStart + 1) + { + SizeT numProbs = LzmaProps_GetNumProbs(&p->prop); + SizeT i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + } + + p->remainLen = 0; + } + + LzmaDec_WriteRem(p, dicLimit); + while (p->remainLen != kMatchSpecLenStart) { - int checkEndMarkNow; + int checkEndMarkNow = 0; - if (p->needFlush) - { - for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) - p->tempBuf[p->tempBufSize++] = *src++; - if (p->tempBufSize < RC_INIT_SIZE) - { - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; - } - if (p->tempBuf[0] != 0) - return SZ_ERROR_DATA; - p->code = - ((UInt32)p->tempBuf[1] << 24) - | ((UInt32)p->tempBuf[2] << 16) - | ((UInt32)p->tempBuf[3] << 8) - | ((UInt32)p->tempBuf[4]); - p->range = 0xFFFFFFFF; - p->needFlush = 0; - p->tempBufSize = 0; - } - - checkEndMarkNow = 0; if (p->dicPos >= dicLimit) { if (p->remainLen == 0 && p->code == 0) @@ -855,9 +939,6 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr checkEndMarkNow = 1; } - if (p->needInitState) - LzmaDec_InitStateReal(p); - if (p->tempBufSize == 0) { SizeT processed; @@ -930,11 +1011,14 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr p->tempBufSize = 0; } } - if (p->code == 0) - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; + + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; } + SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen; @@ -1011,10 +1095,10 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) if (d >= (9 * 5 * 5)) return SZ_ERROR_UNSUPPORTED; - p->lc = d % 9; + p->lc = (Byte)(d % 9); d /= 9; - p->pb = d / 5; - p->lp = d % 5; + p->pb = (Byte)(d / 5); + p->lp = (Byte)(d % 5); return SZ_OK; } @@ -1026,9 +1110,10 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl { LzmaDec_FreeProbs(p, alloc); p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb)); - p->numProbs = numProbs; if (!p->probs) return SZ_ERROR_MEM; + p->probs_1664 = p->probs + 1664; + p->numProbs = numProbs; } return SZ_OK; } diff --git a/lzma/C/LzmaDec.h b/lzma/C/LzmaDec.h index d6af92203..1f0927ab1 100644 --- a/lzma/C/LzmaDec.h +++ b/lzma/C/LzmaDec.h @@ -1,5 +1,5 @@ /* LzmaDec.h -- LZMA Decoder -2017-04-03 : Igor Pavlov : Public domain */ +2018-04-21 : Igor Pavlov : Public domain */ #ifndef __LZMA_DEC_H #define __LZMA_DEC_H @@ -12,11 +12,13 @@ EXTERN_C_BEGIN /* _LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ +typedef #ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 + UInt32 #else -#define CLzmaProb UInt16 + UInt16 #endif + CLzmaProb; /* ---------- LZMA Properties ---------- */ @@ -25,7 +27,10 @@ EXTERN_C_BEGIN typedef struct _CLzmaProps { - unsigned lc, lp, pb; + Byte lc; + Byte lp; + Byte pb; + Byte _pad_; UInt32 dicSize; } CLzmaProps; @@ -47,32 +52,34 @@ SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); typedef struct { + /* Don't change this structure. ASM code can use it. */ CLzmaProps prop; CLzmaProb *probs; + CLzmaProb *probs_1664; Byte *dic; - const Byte *buf; - UInt32 range, code; - SizeT dicPos; SizeT dicBufSize; + SizeT dicPos; + const Byte *buf; + UInt32 range; + UInt32 code; UInt32 processedPos; UInt32 checkDicSize; - unsigned state; UInt32 reps[4]; - unsigned remainLen; - int needFlush; - int needInitState; + UInt32 state; + UInt32 remainLen; + UInt32 numProbs; unsigned tempBufSize; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; -#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } +#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } void LzmaDec_Init(CLzmaDec *p); /* There are two types of LZMA streams: - 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. - 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + - Stream with end mark. That end mark adds about 6 bytes to compressed size. + - Stream without end mark. You must know exact uncompressed size to decompress such stream. */ typedef enum { @@ -132,8 +139,8 @@ LzmaDec_Allocate* can return: SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc); -SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAllocPtr alloc); -void LzmaDec_Free(CLzmaDec *state, ISzAllocPtr alloc); +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc); +void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc); /* ---------- Dictionary Interface ---------- */ @@ -142,7 +149,7 @@ void LzmaDec_Free(CLzmaDec *state, ISzAllocPtr alloc); You must work with CLzmaDec variables directly in this interface. STEPS: - LzmaDec_Constr() + LzmaDec_Construct() LzmaDec_Allocate() for (each new stream) { diff --git a/lzma/C/LzmaEnc.c b/lzma/C/LzmaEnc.c index 9b7e69104..24e582649 100644 --- a/lzma/C/LzmaEnc.c +++ b/lzma/C/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2017-06-22 : Igor Pavlov : Public domain */ +2018-04-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -26,15 +26,6 @@ static unsigned g_STAT_OFFSET = 0; #define kLzmaMaxHistorySize ((UInt32)3 << 29) /* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ -#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) - -#define kBlockSize (9 << 10) -#define kUnpackBlockSize (1 << 18) -#define kMatchArraySize (1 << 21) -#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) - -#define kNumMaxDirectBits (31) - #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) @@ -111,9 +102,9 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } -static UInt32 GetPosSlot1(UInt32 pos) +static unsigned GetPosSlot1(UInt32 pos) { - UInt32 res; + unsigned res; BSR2_RET(pos, res); return res; } @@ -146,18 +137,18 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos) /* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */ /* -#define BSR2_RET(pos, res) { UInt32 zz = 6 + ((kNumLogBits - 1) & \ +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ res = p->g_FastPos[pos >> zz] + (zz * 2); } */ /* -#define BSR2_RET(pos, res) { UInt32 zz = 6 + ((kNumLogBits - 1) & \ +#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \ (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \ res = p->g_FastPos[pos >> zz] + (zz * 2); } */ -#define BSR2_RET(pos, res) { UInt32 zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ +#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \ res = p->g_FastPos[pos >> zz] + (zz * 2); } /* @@ -168,32 +159,32 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos) #define GetPosSlot1(pos) p->g_FastPos[pos] #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } -#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } #endif #define LZMA_NUM_REPS 4 -typedef unsigned CState; +typedef UInt16 CState; +typedef UInt16 CExtra; typedef struct { UInt32 price; - CState state; - int prev1IsChar; - int prev2; - - UInt32 posPrev2; - UInt32 backPrev2; - - UInt32 posPrev; - UInt32 backPrev; - UInt32 backs[LZMA_NUM_REPS]; + CExtra extra; + // 0 : normal + // 1 : LIT : MATCH + // > 1 : MATCH (extra-1) : LIT : REP0 (len) + UInt32 len; + UInt32 dist; + UInt32 reps[LZMA_NUM_REPS]; } COptimal; + #define kNumOpts (1 << 12) +#define kPackReserve (1 + kNumOpts * 2) #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 @@ -201,22 +192,21 @@ typedef struct #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) - #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kAlignMask (kAlignTableSize - 1) #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 -#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) - #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) +typedef #ifdef _LZMA_PROB32 -#define CLzmaProb UInt32 + UInt32 #else -#define CLzmaProb UInt16 + UInt16 #endif + CLzmaProb; #define LZMA_PB_MAX 4 #define LZMA_LC_MAX 8 @@ -224,15 +214,11 @@ typedef struct #define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) - #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) -#define kLenNumMidBits 3 -#define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) - -#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) +#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols) #define LZMA_MATCH_LEN_MIN 2 #define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) @@ -242,27 +228,23 @@ typedef struct typedef struct { - CLzmaProb choice; - CLzmaProb choice2; - CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; - CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)]; CLzmaProb high[kLenNumHighSymbols]; } CLenEnc; typedef struct { - CLenEnc p; - UInt32 tableSize; + unsigned tableSize; + unsigned counters[LZMA_NUM_PB_STATES_MAX]; UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; - UInt32 counters[LZMA_NUM_PB_STATES_MAX]; } CLenPriceEnc; typedef struct { UInt32 range; - Byte cache; + unsigned cache; UInt64 low; UInt64 cacheSize; Byte *buf; @@ -278,48 +260,54 @@ typedef struct { CLzmaProb *litProbs; - UInt32 state; + unsigned state; UInt32 reps[LZMA_NUM_REPS]; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb posEncoders[kNumFullDistances]; - CLenPriceEnc lenEnc; - CLenPriceEnc repLenEnc; + CLenEnc lenProbs; + CLenEnc repLenProbs; + } CSaveState; +typedef UInt32 CProbPrice; + + typedef struct { void *matchFinderObj; IMatchFinder matchFinder; - UInt32 optimumEndIndex; - UInt32 optimumCurrentIndex; + unsigned optCur; + unsigned optEnd; - UInt32 longestMatchLength; - UInt32 numPairs; + unsigned longestMatchLen; + unsigned numPairs; UInt32 numAvail; - UInt32 numFastBytes; - UInt32 additionalOffset; + unsigned state; + unsigned numFastBytes; + unsigned additionalOffset; UInt32 reps[LZMA_NUM_REPS]; - UInt32 state; + unsigned lpMask, pbMask; + CLzmaProb *litProbs; + CRangeEnc rc; + + UInt32 backRes; unsigned lc, lp, pb; - unsigned lpMask, pbMask; unsigned lclp; - CLzmaProb *litProbs; - Bool fastMode; Bool writeEndMark; Bool finished; @@ -328,19 +316,19 @@ typedef struct UInt64 nowPos64; - UInt32 matchPriceCount; - UInt32 alignPriceCount; + unsigned matchPriceCount; + unsigned alignPriceCount; - UInt32 distTableSize; + unsigned distTableSize; UInt32 dictSize; SRes result; - CRangeEnc rc; - #ifndef _7ZIP_ST Bool mtMode; + // begin of CMatchFinderMt is used in LZ thread CMatchFinderMt matchFinderMt; + // end of CMatchFinderMt is used in BT and HASH threads #endif CMatchFinder matchFinderBase; @@ -349,33 +337,37 @@ typedef struct Byte pad[128]; #endif - COptimal opt[kNumOpts]; - - #ifndef LZMA_LOG_BSR - Byte g_FastPos[1 << kNumLogBits]; - #endif + // LZ thread + CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + UInt32 alignPrices[kAlignTableSize]; UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; - UInt32 alignPrices[kAlignTableSize]; - CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; - CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; - CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; - CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + CLzmaProb posEncoders[kNumFullDistances]; + CLenEnc lenProbs; + CLenEnc repLenProbs; + + #ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; + #endif + CLenPriceEnc lenEnc; CLenPriceEnc repLenEnc; + COptimal opt[kNumOpts]; + CSaveState saveState; #ifndef _7ZIP_ST @@ -384,58 +376,62 @@ typedef struct } CLzmaEnc; + +#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); + void LzmaEnc_SaveState(CLzmaEncHandle pp) { CLzmaEnc *p = (CLzmaEnc *)pp; CSaveState *dest = &p->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; + + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb)); } + void LzmaEnc_RestoreState(CLzmaEncHandle pp) { CLzmaEnc *dest = (CLzmaEnc *)pp; const CSaveState *p = &dest->saveState; - int i; - dest->lenEnc = p->lenEnc; - dest->repLenEnc = p->repLenEnc; + dest->state = p->state; - for (i = 0; i < kNumStates; i++) - { - memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); - memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); - } - for (i = 0; i < kNumLenToPosStates; i++) - memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); - memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); - memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); - memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); - memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); - memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); - memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); - memcpy(dest->reps, p->reps, sizeof(p->reps)); + dest->lenProbs = p->lenProbs; + dest->repLenProbs = p->repLenProbs; + + COPY_ARR(dest, p, reps); + + COPY_ARR(dest, p, posAlignEncoder); + COPY_ARR(dest, p, isRep); + COPY_ARR(dest, p, isRepG0); + COPY_ARR(dest, p, isRepG1); + COPY_ARR(dest, p, isRepG2); + COPY_ARR(dest, p, isMatch); + COPY_ARR(dest, p, isRep0Long); + COPY_ARR(dest, p, posSlotEncoder); + COPY_ARR(dest, p, posEncoders); + memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb)); } + + SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) { CLzmaEnc *p = (CLzmaEnc *)pp; @@ -464,7 +460,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->fastMode = (props.algo == 0); p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); { - UInt32 numHashBytes = 4; + unsigned numHashBytes = 4; if (props.btMode) { if (props.numHashBytes < 2) @@ -501,14 +497,19 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) } +#define kState_Start 0 +#define kState_LitAfterMatch 4 +#define kState_LitAfterRep 5 +#define kState_MatchAfterLit 7 +#define kState_RepAfterLit 8 -static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; -static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; -static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; -static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; - -#define IsCharState(s) ((s) < 7) +static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const Byte kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const Byte kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; +#define IsLitState(s) ((s) < 7) +#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1) #define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) #define kInfinityPrice (1 << 30) @@ -519,9 +520,11 @@ static void RangeEnc_Construct(CRangeEnc *p) p->bufBase = NULL; } -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) #define RC_BUF_SIZE (1 << 16) + static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) { if (!p->bufBase) @@ -543,10 +546,10 @@ static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) static void RangeEnc_Init(CRangeEnc *p) { /* Stream.Init(); */ - p->low = 0; p->range = 0xFFFFFFFF; - p->cacheSize = 1; p->cache = 0; + p->low = 0; + p->cacheSize = 0; p->buf = p->bufBase; @@ -554,7 +557,7 @@ static void RangeEnc_Init(CRangeEnc *p) p->res = SZ_OK; } -static void RangeEnc_FlushStream(CRangeEnc *p) +MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) { size_t num; if (p->res != SZ_OK) @@ -566,25 +569,36 @@ static void RangeEnc_FlushStream(CRangeEnc *p) p->buf = p->bufBase; } -static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) { - if ((UInt32)p->low < (UInt32)0xFF000000 || (unsigned)(p->low >> 32) != 0) + UInt32 low = (UInt32)p->low; + unsigned high = (unsigned)(p->low >> 32); + p->low = (UInt32)(low << 8); + if (low < (UInt32)0xFF000000 || high != 0) { - Byte temp = p->cache; - do { Byte *buf = p->buf; - *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); + *buf++ = (Byte)(p->cache + high); + p->cache = (unsigned)(low >> 24); p->buf = buf; if (buf == p->bufLim) RangeEnc_FlushStream(p); - temp = 0xFF; + if (p->cacheSize == 0) + return; + } + high += 0xFF; + for (;;) + { + Byte *buf = p->buf; + *buf++ = (Byte)(high); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + if (--p->cacheSize == 0) + return; } - while (--p->cacheSize != 0); - p->cache = (Byte)((UInt32)p->low >> 24); } p->cacheSize++; - p->low = (UInt32)p->low << 8; } static void RangeEnc_FlushData(CRangeEnc *p) @@ -594,78 +608,121 @@ static void RangeEnc_FlushData(CRangeEnc *p) RangeEnc_ShiftLow(p); } -static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, unsigned numBits) -{ - do - { - p->range >>= 1; - p->low += p->range & (0 - ((value >> --numBits) & 1)); - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } - } - while (numBits != 0); -} +#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); } -static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol) +#define RC_BIT_PRE(p, prob) \ + ttt = *(prob); \ + newBound = (range >> kNumBitModelTotalBits) * ttt; + +// #define _LZMA_ENC_USE_BRANCH + +#ifdef _LZMA_ENC_USE_BRANCH + +#define RC_BIT(p, prob, symbol) { \ + RC_BIT_PRE(p, prob) \ + if (symbol == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \ + else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#else + +#define RC_BIT(p, prob, symbol) { \ + UInt32 mask; \ + RC_BIT_PRE(p, prob) \ + mask = 0 - (UInt32)symbol; \ + range &= mask; \ + mask &= newBound; \ + range -= mask; \ + (p)->low += mask; \ + mask = (UInt32)symbol - 1; \ + range += newBound & mask; \ + mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ + mask += ((1 << kNumMoveBits) - 1); \ + ttt += (Int32)(mask - ttt) >> kNumMoveBits; \ + *(prob) = (CLzmaProb)ttt; \ + RC_NORM(p) \ + } + +#endif + + + + +#define RC_BIT_0_BASE(p, prob) \ + range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); + +#define RC_BIT_1_BASE(p, prob) \ + range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \ + +#define RC_BIT_0(p, prob) \ + RC_BIT_0_BASE(p, prob) \ + RC_NORM(p) + +#define RC_BIT_1(p, prob) \ + RC_BIT_1_BASE(p, prob) \ + RC_NORM(p) + +static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob) { - UInt32 ttt = *prob; - UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; - if (symbol == 0) - { - p->range = newBound; - ttt += (kBitModelTotal - ttt) >> kNumMoveBits; - } - else - { - p->low += newBound; - p->range -= newBound; - ttt -= ttt >> kNumMoveBits; - } - *prob = (CLzmaProb)ttt; - if (p->range < kTopValue) - { - p->range <<= 8; - RangeEnc_ShiftLow(p); - } + UInt32 range, ttt, newBound; + range = p->range; + RC_BIT_PRE(p, prob) + RC_BIT_0(p, prob) + p->range = range; } static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol) { + UInt32 range = p->range; symbol |= 0x100; do { - RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + UInt32 ttt, newBound; + // RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + CLzmaProb *prob = probs + (symbol >> 8); + UInt32 bit = (symbol >> 7) & 1; symbol <<= 1; + RC_BIT(p, prob, bit); } while (symbol < 0x10000); + p->range = range; } static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte) { + UInt32 range = p->range; UInt32 offs = 0x100; symbol |= 0x100; do { + UInt32 ttt, newBound; + CLzmaProb *prob; + UInt32 bit; matchByte <<= 1; - RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); + // RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); + prob = probs + (offs + (matchByte & offs) + (symbol >> 8)); + bit = (symbol >> 7) & 1; symbol <<= 1; offs &= ~(matchByte ^ symbol); + RC_BIT(p, prob, bit); } while (symbol < 0x10000); + p->range = range; } -static void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) + + +static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) { UInt32 i; - for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) + for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++) { - const int kCyclesBits = kNumBitPriceShiftBits; - UInt32 w = i; - UInt32 bitCount = 0; - int j; + const unsigned kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1)); + unsigned bitCount = 0; + unsigned j; for (j = 0; j < kCyclesBits; j++) { w = w * w; @@ -676,37 +733,41 @@ static void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) bitCount++; } } - ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + // printf("\n%3d: %5d", i, ProbPrices[i]); } } #define GET_PRICE(prob, symbol) \ - p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + p->ProbPrices[((prob) ^ (unsigned)(((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICEa(prob, symbol) \ - ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + ProbPrices[((prob) ^ (unsigned)((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] -#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] -#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] +#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] -static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, const UInt32 *ProbPrices) + +static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, const CProbPrice *ProbPrices) { UInt32 price = 0; symbol |= 0x100; do { - price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); - symbol <<= 1; + unsigned bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[symbol], bit); } - while (symbol < 0x10000); + while (symbol >= 2); return price; } -static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, const UInt32 *ProbPrices) + +static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, const CProbPrice *ProbPrices) { UInt32 price = 0; UInt32 offs = 0x100; @@ -723,520 +784,525 @@ static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt } -static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) +static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBits, UInt32 symbol) { - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0;) + UInt32 range = rc->range; + unsigned m = 1; + do { - UInt32 bit; - i--; - bit = (symbol >> i) & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); + UInt32 ttt, newBound; + unsigned bit = symbol & 1; + // RangeEnc_EncodeBit(rc, probs + m, bit); + symbol >>= 1; + RC_BIT(rc, probs + m, bit); m = (m << 1) | bit; } + while (--numBits); + rc->range = range; } -static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) -{ - UInt32 m = 1; - int i; - for (i = 0; i < numBitLevels; i++) - { - UInt32 bit = symbol & 1; - RangeEnc_EncodeBit(rc, probs + m, bit); - m = (m << 1) | bit; - symbol >>= 1; - } -} - -static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, const UInt32 *ProbPrices) -{ - UInt32 price = 0; - symbol |= (1 << numBitLevels); - while (symbol != 1) - { - price += GET_PRICEa(probs[symbol >> 1], symbol & 1); - symbol >>= 1; - } - return price; -} - -static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, const UInt32 *ProbPrices) -{ - UInt32 price = 0; - UInt32 m = 1; - int i; - for (i = numBitLevels; i != 0; i--) - { - UInt32 bit = symbol & 1; - symbol >>= 1; - price += GET_PRICEa(probs[m], bit); - m = (m << 1) | bit; - } - return price; -} static void LenEnc_Init(CLenEnc *p) { unsigned i; - p->choice = p->choice2 = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++) p->low[i] = kProbInitValue; - for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) - p->mid[i] = kProbInitValue; for (i = 0; i < kLenNumHighSymbols; i++) p->high[i] = kProbInitValue; } -static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState) +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned symbol, unsigned posState) { - if (symbol < kLenNumLowSymbols) + UInt32 range, ttt, newBound; + CLzmaProb *probs = p->low; + range = rc->range; + RC_BIT_PRE(rc, probs); + if (symbol >= kLenNumLowSymbols) { - RangeEnc_EncodeBit(rc, &p->choice, 0); - RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice, 1); - if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) + RC_BIT_1(rc, probs); + probs += kLenNumLowSymbols; + RC_BIT_PRE(rc, probs); + if (symbol >= kLenNumLowSymbols * 2) { - RangeEnc_EncodeBit(rc, &p->choice2, 0); - RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); - } - else - { - RangeEnc_EncodeBit(rc, &p->choice2, 1); - RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); - } - } -} - -static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, const UInt32 *ProbPrices) -{ - UInt32 a0 = GET_PRICE_0a(p->choice); - UInt32 a1 = GET_PRICE_1a(p->choice); - UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); - UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); - UInt32 i = 0; - for (i = 0; i < kLenNumLowSymbols; i++) - { - if (i >= numSymbols) + RC_BIT_1(rc, probs); + rc->range = range; + // RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols * 2); + LitEnc_Encode(rc, p->high, symbol - kLenNumLowSymbols * 2); return; - prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); + } + symbol -= kLenNumLowSymbols; } - for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) + + // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, symbol); { - if (i >= numSymbols) - return; - prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); + unsigned m; + unsigned bit; + RC_BIT_0(rc, probs); + probs += (posState << (1 + kLenNumLowBits)); + bit = (symbol >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; + bit = (symbol >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; + bit = symbol & 1; RC_BIT(rc, probs + m, bit); + rc->range = range; } - for (; i < numSymbols; i++) - prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); } -static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, const UInt32 *ProbPrices) +static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices) { - LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); - p->counters[posState] = p->tableSize; + unsigned i; + for (i = 0; i < 8; i += 2) + { + UInt32 price = startPrice; + UInt32 prob; + price += GET_PRICEa(probs[1 ], (i >> 2)); + price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1); + prob = probs[4 + (i >> 1)]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + } } -static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, const UInt32 *ProbPrices) + +MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTable( + CLenPriceEnc *p, unsigned posState, + const CLenEnc *enc, + const CProbPrice *ProbPrices) { - UInt32 posState; + // int y; for (y = 0; y < 100; y++) { + UInt32 a; + unsigned i, numSymbols; + + UInt32 *prices = p->prices[posState]; + { + const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits)); + SetPrices_3(probs, GET_PRICEa_0(enc->low[0]), prices, ProbPrices); + a = GET_PRICEa_1(enc->low[0]); + SetPrices_3(probs + kLenNumLowSymbols, a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]), prices + kLenNumLowSymbols, ProbPrices); + a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]); + } + numSymbols = p->tableSize; + p->counters[posState] = numSymbols; + for (i = kLenNumLowSymbols * 2; i < numSymbols; i += 1) + { + prices[i] = a + + // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices); + LitEnc_GetPrice(enc->high, i - kLenNumLowSymbols * 2, ProbPrices); + /* + unsigned sym = (i - kLenNumLowSymbols * 2) >> 1; + UInt32 price = a + RcTree_GetPrice(enc->high, kLenNumHighBits - 1, sym, ProbPrices); + UInt32 prob = enc->high[(1 << 7) + sym]; + prices[i ] = price + GET_PRICEa_0(prob); + prices[i + 1] = price + GET_PRICEa_1(prob); + */ + } + // } +} + +static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, unsigned numPosStates, + const CLenEnc *enc, + const CProbPrice *ProbPrices) +{ + unsigned posState; for (posState = 0; posState < numPosStates; posState++) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); -} - -static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, const UInt32 *ProbPrices) -{ - LenEnc_Encode(&p->p, rc, symbol, posState); - if (updatePrice) - if (--p->counters[posState] == 0) - LenPriceEnc_UpdateTable(p, posState, ProbPrices); + LenPriceEnc_UpdateTable(p, posState, enc, ProbPrices); } - - -static void MovePos(CLzmaEnc *p, UInt32 num) -{ +/* #ifdef SHOW_STAT g_STAT_OFFSET += num; printf("\n MovePos %u", num); #endif +*/ - if (num != 0) - { - p->additionalOffset += num; - p->matchFinder.Skip(p->matchFinderObj, num); - } -} +#define MOVE_POS(p, num) { \ + p->additionalOffset += (num); \ + p->matchFinder.Skip(p->matchFinderObj, (num)); } -static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes) + +static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) { - UInt32 lenRes = 0, numPairs; + unsigned numPairs; + + p->additionalOffset++; p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + *numPairsRes = numPairs; #ifdef SHOW_STAT printf("\n i = %u numPairs = %u ", g_STAT_OFFSET, numPairs / 2); g_STAT_OFFSET++; { - UInt32 i; + unsigned i; for (i = 0; i < numPairs; i += 2) printf("%2u %6u | ", p->matches[i], p->matches[i + 1]); } #endif - if (numPairs > 0) + if (numPairs == 0) + return 0; { - lenRes = p->matches[(size_t)numPairs - 2]; - if (lenRes == p->numFastBytes) + unsigned len = p->matches[(size_t)numPairs - 2]; + if (len != p->numFastBytes) + return len; { UInt32 numAvail = p->numAvail; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; { - const Byte *pbyCur = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - const Byte *pby = pbyCur + lenRes; + const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + const Byte *p2 = p1 + len; ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; - const Byte *pbyLim = pbyCur + numAvail; - for (; pby != pbyLim && *pby == pby[dif]; pby++); - lenRes = (UInt32)(pby - pbyCur); + const Byte *lim = p1 + numAvail; + for (; p2 != lim && *p2 == p2[dif]; p2++); + return (unsigned)(p2 - p1); } } } - p->additionalOffset++; - *numDistancePairsRes = numPairs; - return lenRes; } +#define MARK_LIT ((UInt32)(Int32)-1) -#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False; -#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False; -#define IsShortRep(p) ((p)->backPrev == 0) +#define MakeAs_Lit(p) { (p)->dist = MARK_LIT; (p)->extra = 0; } +#define MakeAs_ShortRep(p) { (p)->dist = 0; (p)->extra = 0; } +#define IsShortRep(p) ((p)->dist == 0) -static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState) -{ - return - GET_PRICE_0(p->isRepG0[state]) + - GET_PRICE_0(p->isRep0Long[state][posState]); -} -static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState) +#define GetPrice_ShortRep(p, state, posState) \ + ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState])) + +#define GetPrice_Rep_0(p, state, posState) ( \ + GET_PRICE_1(p->isMatch[state][posState]) \ + + GET_PRICE_1(p->isRep0Long[state][posState])) \ + + GET_PRICE_1(p->isRep[state]) \ + + GET_PRICE_0(p->isRepG0[state]) + + +static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) { UInt32 price; + UInt32 prob = p->isRepG0[state]; if (repIndex == 0) { - price = GET_PRICE_0(p->isRepG0[state]); + price = GET_PRICE_0(prob); price += GET_PRICE_1(p->isRep0Long[state][posState]); } else { - price = GET_PRICE_1(p->isRepG0[state]); + price = GET_PRICE_1(prob); + prob = p->isRepG1[state]; if (repIndex == 1) - price += GET_PRICE_0(p->isRepG1[state]); + price += GET_PRICE_0(prob); else { - price += GET_PRICE_1(p->isRepG1[state]); + price += GET_PRICE_1(prob); price += GET_PRICE(p->isRepG2[state], repIndex - 2); } } return price; } -static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) -{ - return p->repLenEnc.prices[posState][(size_t)len - LZMA_MATCH_LEN_MIN] + - GetPureRepPrice(p, repIndex, state, posState); -} -static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur) +static unsigned Backward(CLzmaEnc *p, unsigned cur) { - UInt32 posMem = p->opt[cur].posPrev; - UInt32 backMem = p->opt[cur].backPrev; - p->optimumEndIndex = cur; - do + unsigned wr = cur + 1; + p->optEnd = wr; + + for (;;) { - if (p->opt[cur].prev1IsChar) + UInt32 dist = p->opt[cur].dist; + UInt32 len = p->opt[cur].len; + UInt32 extra = p->opt[cur].extra; + cur -= len; + + if (extra) { - MakeAsChar(&p->opt[posMem]) - p->opt[posMem].posPrev = posMem - 1; - if (p->opt[cur].prev2) + wr--; + p->opt[wr].len = len; + cur -= extra; + len = extra; + if (extra == 1) { - p->opt[(size_t)posMem - 1].prev1IsChar = False; - p->opt[(size_t)posMem - 1].posPrev = p->opt[cur].posPrev2; - p->opt[(size_t)posMem - 1].backPrev = p->opt[cur].backPrev2; + p->opt[wr].dist = dist; + dist = MARK_LIT; + } + else + { + p->opt[wr].dist = 0; + len--; + wr--; + p->opt[wr].dist = MARK_LIT; + p->opt[wr].len = 1; } } + + if (cur == 0) { - UInt32 posPrev = posMem; - UInt32 backCur = backMem; - - backMem = p->opt[posPrev].backPrev; - posMem = p->opt[posPrev].posPrev; - - p->opt[posPrev].backPrev = backCur; - p->opt[posPrev].posPrev = cur; - cur = posPrev; + p->backRes = dist; + p->optCur = wr; + return len; } + + wr--; + p->opt[wr].dist = dist; + p->opt[wr].len = len; } - while (cur != 0); - *backRes = p->opt[0].backPrev; - p->optimumCurrentIndex = p->opt[0].posPrev; - return p->optimumCurrentIndex; } -#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * (UInt32)0x300) -static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) + +#define LIT_PROBS(pos, prevByte) \ + (p->litProbs + (UInt32)3 * (((((pos) << 8) + (prevByte)) & p->lpMask) << p->lc)) + + +static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) { - UInt32 lenEnd, cur; - UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; + unsigned last, cur; + UInt32 reps[LZMA_NUM_REPS]; + unsigned repLens[LZMA_NUM_REPS]; UInt32 *matches; { - - UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, len; - UInt32 matchPrice, repMatchPrice, normalMatchPrice; - const Byte *data; - Byte curByte, matchByte; - - if (p->optimumEndIndex != p->optimumCurrentIndex) - { - const COptimal *opt = &p->opt[p->optimumCurrentIndex]; - UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; - *backRes = opt->backPrev; - p->optimumCurrentIndex = opt->posPrev; - return lenRes; - } - p->optimumCurrentIndex = p->optimumEndIndex = 0; - - if (p->additionalOffset == 0) - mainLen = ReadMatchDistances(p, &numPairs); - else - { - mainLen = p->longestMatchLength; - numPairs = p->numPairs; - } - - numAvail = p->numAvail; - if (numAvail < 2) - { - *backRes = (UInt32)(-1); - return 1; - } - if (numAvail > LZMA_MATCH_LEN_MAX) - numAvail = LZMA_MATCH_LEN_MAX; - - data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repMaxIndex = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - { - UInt32 lenTest; - const Byte *data2; - reps[i] = p->reps[i]; - data2 = data - reps[i] - 1; - if (data[0] != data2[0] || data[1] != data2[1]) + UInt32 numAvail; + unsigned numPairs, mainLen, repMaxIndex, i, posState; + UInt32 matchPrice, repMatchPrice; + const Byte *data; + Byte curByte, matchByte; + + p->optCur = p->optEnd = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else { - repLens[i] = 0; - continue; + mainLen = p->longestMatchLen; + numPairs = p->numPairs; } - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); - repLens[i] = lenTest; - if (lenTest > repLens[repMaxIndex]) - repMaxIndex = i; - } - if (repLens[repMaxIndex] >= p->numFastBytes) - { - UInt32 lenRes; - *backRes = repMaxIndex; - lenRes = repLens[repMaxIndex]; - MovePos(p, lenRes - 1); - return lenRes; - } - - matches = p->matches; - if (mainLen >= p->numFastBytes) - { - *backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); - return mainLen; - } - curByte = *data; - matchByte = *(data - (reps[0] + 1)); - - if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) - { - *backRes = (UInt32)-1; - return 1; - } - - p->opt[0].state = (CState)p->state; - - posState = (position & p->pbMask); - - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + - (!IsCharState(p->state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : + + numAvail = p->numAvail; + if (numAvail < 2) + { + p->backRes = MARK_LIT; + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned len; + const Byte *data2; + reps[i] = p->reps[i]; + data2 = data - reps[i]; + if (data[0] != data2[0] || data[1] != data2[1]) + { + repLens[i] = 0; + continue; + } + for (len = 2; len < numAvail && data[len] == data2[len]; len++); + repLens[i] = len; + if (len > repLens[repMaxIndex]) + repMaxIndex = i; + } + + if (repLens[repMaxIndex] >= p->numFastBytes) + { + unsigned len; + p->backRes = repMaxIndex; + len = repLens[repMaxIndex]; + MOVE_POS(p, len - 1) + return len; + } + + matches = p->matches; + + if (mainLen >= p->numFastBytes) + { + p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) + return mainLen; + } + + curByte = *data; + matchByte = *(data - reps[0]); + + if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) + { + p->backRes = MARK_LIT; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsLitState(p->state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } - - MakeAsChar(&p->opt[1]); - - matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); - repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); - - if (matchByte == curByte) - { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); - if (shortRepPrice < p->opt[1].price) - { - p->opt[1].price = shortRepPrice; - MakeAsShortRep(&p->opt[1]); } - } - lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); - - if (lenEnd < 2) - { - *backRes = p->opt[1].backPrev; - return 1; - } - - p->opt[1].posPrev = 0; - for (i = 0; i < LZMA_NUM_REPS; i++) - p->opt[0].backs[i] = reps[i]; - - len = lenEnd; - do - p->opt[len--].price = kInfinityPrice; - while (len >= 2); - - for (i = 0; i < LZMA_NUM_REPS; i++) - { - UInt32 repLen = repLens[i]; - UInt32 price; - if (repLen < 2) - continue; - price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); - do + + MakeAs_Lit(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + if (matchByte == curByte) { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][(size_t)repLen - 2]; - COptimal *opt = &p->opt[repLen]; - if (curAndLenPrice < opt->price) + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = i; - opt->prev1IsChar = False; + p->opt[1].price = shortRepPrice; + MakeAs_ShortRep(&p->opt[1]); } } - while (--repLen >= 2); - } - - normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); - - len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); - if (len <= mainLen) - { - UInt32 offs = 0; - while (len > matches[offs]) - offs += 2; - for (; ; len++) + + last = (mainLen >= repLens[repMaxIndex] ? mainLen : repLens[repMaxIndex]); + + if (last < 2) { - COptimal *opt; - UInt32 distance = matches[(size_t)offs + 1]; + p->backRes = p->opt[1].dist; + return 1; + } + + p->opt[1].len = 1; + + p->opt[0].reps[0] = reps[0]; + p->opt[0].reps[1] = reps[1]; + p->opt[0].reps[2] = reps[2]; + p->opt[0].reps[3] = reps[3]; + + { + unsigned len = last; + do + p->opt[len--].price = kInfinityPrice; + while (len >= 2); + } - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][(size_t)len - LZMA_MATCH_LEN_MIN]; - UInt32 lenToPosState = GetLenToPosState(len); - if (distance < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][distance]; - else + // ---------- REP ---------- + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + unsigned repLen = repLens[i]; + UInt32 price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPrice_PureRep(p, i, p->state, posState); + do { - UInt32 slot; - GetPosSlot2(distance, slot); - curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; + UInt32 price2 = price + p->repLenEnc.prices[posState][(size_t)repLen - 2]; + COptimal *opt = &p->opt[repLen]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = repLen; + opt->dist = i; + opt->extra = 0; + } } - opt = &p->opt[len]; - if (curAndLenPrice < opt->price) + while (--repLen >= 2); + } + + + // ---------- MATCH ---------- + { + unsigned len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); + if (len <= mainLen) { - opt->price = curAndLenPrice; - opt->posPrev = 0; - opt->backPrev = distance + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } - if (len == matches[offs]) - { - offs += 2; - if (offs == numPairs) - break; + unsigned offs = 0; + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + while (len > matches[offs]) + offs += 2; + + for (; ; len++) + { + COptimal *opt; + UInt32 dist = matches[(size_t)offs + 1]; + UInt32 price2 = normalMatchPrice + p->lenEnc.prices[posState][(size_t)len - LZMA_MATCH_LEN_MIN]; + unsigned lenToPosState = GetLenToPosState(len); + + if (dist < kNumFullDistances) + price2 += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; + else + { + unsigned slot; + GetPosSlot2(dist, slot); + price2 += p->alignPrices[dist & kAlignMask]; + price2 += p->posSlotPrices[lenToPosState][slot]; + } + + opt = &p->opt[len]; + + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } + + if (len == matches[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } } } - } + - cur = 0; + cur = 0; #ifdef SHOW_STAT2 /* if (position >= 0) */ { unsigned i; printf("\n pos = %4X", position); - for (i = cur; i <= lenEnd; i++) + for (i = cur; i <= last; i++) printf("\nprice[%4X] = %u", position - cur + i, p->opt[i].price); } #endif - } + + + // ---------- Optimal Parsing ---------- + for (;;) { - UInt32 numAvail; - UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; - UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; - Bool nextIsChar; + UInt32 numAvail, numAvailFull; + unsigned newLen, numPairs, prev, state, posState, startLen; + UInt32 curPrice, litPrice, matchPrice, repMatchPrice; + Bool nextIsLit; Byte curByte, matchByte; const Byte *data; - COptimal *curOpt; - COptimal *nextOpt; + COptimal *curOpt, *nextOpt; - cur++; - if (cur == lenEnd) - return Backward(p, backRes, cur); + if (++cur == last) + return Backward(p, cur); newLen = ReadMatchDistances(p, &numPairs); + if (newLen >= p->numFastBytes) { p->numPairs = numPairs; - p->longestMatchLength = newLen; - return Backward(p, backRes, cur); + p->longestMatchLen = newLen; + return Backward(p, cur); } - position++; + curOpt = &p->opt[cur]; - posPrev = curOpt->posPrev; - if (curOpt->prev1IsChar) - { - posPrev--; - if (curOpt->prev2) - { - state = p->opt[curOpt->posPrev2].state; - if (curOpt->backPrev2 < LZMA_NUM_REPS) - state = kRepNextStates[state]; - else - state = kMatchNextStates[state]; - } - else - state = p->opt[posPrev].state; - state = kLiteralNextStates[state]; - } - else - state = p->opt[posPrev].state; - if (posPrev == cur - 1) + prev = cur - curOpt->len; + + if (curOpt->len == 1) { + state = p->opt[prev].state; if (IsShortRep(curOpt)) state = kShortRepNextStates[state]; else @@ -1244,92 +1310,136 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) } else { - UInt32 pos; const COptimal *prevOpt; - if (curOpt->prev1IsChar && curOpt->prev2) + UInt32 b0; + UInt32 dist = curOpt->dist; + + if (curOpt->extra) { - posPrev = curOpt->posPrev2; - pos = curOpt->backPrev2; - state = kRepNextStates[state]; + prev -= curOpt->extra; + state = kState_RepAfterLit; + if (curOpt->extra == 1) + state = (dist < LZMA_NUM_REPS) ? kState_RepAfterLit : kState_MatchAfterLit; } else { - pos = curOpt->backPrev; - if (pos < LZMA_NUM_REPS) + state = p->opt[prev].state; + if (dist < LZMA_NUM_REPS) state = kRepNextStates[state]; else state = kMatchNextStates[state]; } - prevOpt = &p->opt[posPrev]; - if (pos < LZMA_NUM_REPS) + + prevOpt = &p->opt[prev]; + b0 = prevOpt->reps[0]; + + if (dist < LZMA_NUM_REPS) { - UInt32 i; - reps[0] = prevOpt->backs[pos]; - for (i = 1; i <= pos; i++) - reps[i] = prevOpt->backs[(size_t)i - 1]; - for (; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[i]; + if (dist == 0) + { + reps[0] = b0; + reps[1] = prevOpt->reps[1]; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[1] = b0; + b0 = prevOpt->reps[1]; + if (dist == 1) + { + reps[0] = b0; + reps[2] = prevOpt->reps[2]; + reps[3] = prevOpt->reps[3]; + } + else + { + reps[2] = b0; + reps[0] = prevOpt->reps[dist]; + reps[3] = prevOpt->reps[dist ^ 1]; + } + } } else { - UInt32 i; - reps[0] = (pos - LZMA_NUM_REPS); - for (i = 1; i < LZMA_NUM_REPS; i++) - reps[i] = prevOpt->backs[(size_t)i - 1]; + reps[0] = (dist - LZMA_NUM_REPS + 1); + reps[1] = b0; + reps[2] = prevOpt->reps[1]; + reps[3] = prevOpt->reps[2]; } } + curOpt->state = (CState)state; + curOpt->reps[0] = reps[0]; + curOpt->reps[1] = reps[1]; + curOpt->reps[2] = reps[2]; + curOpt->reps[3] = reps[3]; - curOpt->backs[0] = reps[0]; - curOpt->backs[1] = reps[1]; - curOpt->backs[2] = reps[2]; - curOpt->backs[3] = reps[3]; - - curPrice = curOpt->price; - nextIsChar = False; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; curByte = *data; - matchByte = *(data - (reps[0] + 1)); + matchByte = *(data - reps[0]); + position++; posState = (position & p->pbMask); - curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); - { - const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); - curAnd1Price += - (!IsCharState(state) ? - LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : - LitEnc_GetPrice(probs, curByte, p->ProbPrices)); - } + /* + The order of Price checks: + < LIT + <= SHORT_REP + < LIT : REP_0 + < REP [ : LIT : REP_0 ] + < MATCH [ : LIT : REP_0 ] + */ + + curPrice = curOpt->price; + litPrice = curPrice + GET_PRICE_0(p->isMatch[state][posState]); nextOpt = &p->opt[(size_t)cur + 1]; + nextIsLit = False; - if (curAnd1Price < nextOpt->price) + // if (litPrice >= nextOpt->price) litPrice = 0; else // 18.new { - nextOpt->price = curAnd1Price; - nextOpt->posPrev = cur; - MakeAsChar(nextOpt); - nextIsChar = True; + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + litPrice += (!IsLitState(state) ? + LitEnc_Matched_GetPrice(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + + if (litPrice < nextOpt->price) + { + nextOpt->price = litPrice; + nextOpt->len = 1; + MakeAs_Lit(nextOpt); + nextIsLit = True; + } } matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); - if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) + // ---------- SHORT_REP ---------- + // if (IsLitState(state)) // 18.new + if (matchByte == curByte) + // if (repMatchPrice < nextOpt->price) // 18.new + if (nextOpt->len < 2 + || (nextOpt->dist != 0 + && nextOpt->extra <= 1 // 17.old + )) { - UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); - if (shortRepPrice <= nextOpt->price) + UInt32 shortRepPrice = repMatchPrice + GetPrice_ShortRep(p, state, posState); + if (shortRepPrice <= nextOpt->price) // 17.old + // if (shortRepPrice < nextOpt->price) // 18.new { nextOpt->price = shortRepPrice; - nextOpt->posPrev = cur; - MakeAsShortRep(nextOpt); - nextIsChar = True; + nextOpt->len = 1; + MakeAs_ShortRep(nextOpt); + nextIsLit = False; } } + numAvailFull = p->numAvail; { UInt32 temp = kNumOpts - 1 - cur; - if (temp < numAvailFull) + if (numAvailFull > temp) numAvailFull = temp; } @@ -1337,41 +1447,53 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) continue; numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); - if (!nextIsChar && matchByte != curByte) /* speed optimization */ - { - /* try Literal + rep0 */ - UInt32 temp; - UInt32 lenTest2; - const Byte *data2 = data - reps[0] - 1; - UInt32 limit = p->numFastBytes + 1; - if (limit > numAvailFull) - limit = numAvailFull; + // numAvail <= p->numFastBytes - for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++); - lenTest2 = temp - 1; - if (lenTest2 >= 2) + // ---------- LIT : REP_0 ---------- + + if ( + // litPrice != 0 && // 18.new + !nextIsLit + && matchByte != curByte + && numAvailFull > 2) + { + const Byte *data2 = data - reps[0]; + if (data[1] == data2[1] && data[2] == data2[2]) { - UInt32 state2 = kLiteralNextStates[state]; - UInt32 posStateNext = (position + 1) & p->pbMask; - UInt32 nextRepMatchPrice = curAnd1Price + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - /* for (; lenTest2 >= 2; lenTest2--) */ + unsigned len; + unsigned limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + for (len = 3; len < limit && data[len] == data2[len]; len++); + { - UInt32 curAndLenPrice; - COptimal *opt; - UInt32 offset = cur + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) + unsigned state2 = kLiteralNextStates[state]; + unsigned posState2 = (position + 1) & p->pbMask; + UInt32 price = litPrice + GetPrice_Rep_0(p, state2, posState2); { - opt->price = curAndLenPrice; - opt->posPrev = cur + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = False; + unsigned offset = cur + len; + while (last < offset) + p->opt[++last].price = kInfinityPrice; + + // do + { + UInt32 price2; + COptimal *opt; + len--; + // price2 = price + GetPrice_Len_Rep_0(p, len, state2, posState2); + price2 = price + p->repLenEnc.prices[posState2][len - LZMA_MATCH_LEN_MIN]; + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len; + opt->dist = 0; + opt->extra = 1; + } + } + // while (len >= 3); } } } @@ -1379,87 +1501,105 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) startLen = 2; /* speed optimization */ { - UInt32 repIndex; - for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) - { - UInt32 lenTest; - UInt32 lenTestTemp; - UInt32 price; - const Byte *data2 = data - reps[repIndex] - 1; - if (data[0] != data2[0] || data[1] != data2[1]) - continue; - for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); - while (lenEnd < cur + lenTest) - p->opt[++lenEnd].price = kInfinityPrice; - lenTestTemp = lenTest; - price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); - do + // ---------- REP ---------- + unsigned repIndex = 0; // 17.old + // unsigned repIndex = IsLitState(state) ? 0 : 1; // 18.notused + for (; repIndex < LZMA_NUM_REPS; repIndex++) { - UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][(size_t)lenTest - 2]; - COptimal *opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = repIndex; - opt->prev1IsChar = False; - } - } - while (--lenTest >= 2); - lenTest = lenTestTemp; - - if (repIndex == 0) - startLen = lenTest + 1; + unsigned len; + UInt32 price; + const Byte *data2 = data - reps[repIndex]; + if (data[0] != data2[0] || data[1] != data2[1]) + continue; - /* if (_maxMode) */ + for (len = 2; len < numAvail && data[len] == data2[len]; len++); + + // if (len < startLen) continue; // 18.new: speed optimization + + while (last < cur + len) + p->opt[++last].price = kInfinityPrice; { - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; + unsigned len2 = len; + price = repMatchPrice + GetPrice_PureRep(p, repIndex, state, posState); + do + { + UInt32 price2 = price + p->repLenEnc.prices[posState][(size_t)len2 - 2]; + COptimal *opt = &p->opt[cur + len2]; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len2; + opt->dist = repIndex; + opt->extra = 0; + } + } + while (--len2 >= 2); + } + + if (repIndex == 0) startLen = len + 1; // 17.old + // startLen = len + 1; // 18.new + + /* if (_maxMode) */ + { + // ---------- REP : LIT : REP_0 ---------- + // numFastBytes + 1 + numFastBytes + + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; if (limit > numAvailFull) limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) + + for (; len2 < limit && data[len2] == data2[len2]; len2++); + + len2 -= len; + if (len2 >= 3) { - UInt32 nextRepMatchPrice; - UInt32 state2 = kRepNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = - price + p->repLenEnc.prices[posState][(size_t)lenTest - 2] + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[(size_t)lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (position + lenTest + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); + unsigned state2 = kRepNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + price += + p->repLenEnc.prices[posState][(size_t)len - 2] + + GET_PRICE_0(p->isMatch[state2][posState2]) + + LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); - /* for (; lenTest2 >= 2; lenTest2--) */ + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterRep; + posState2 = (posState2 + 1) & p->pbMask; + + + price += GetPrice_Rep_0(p, state2, posState2); { - UInt32 curAndLenPrice; - COptimal *opt; - UInt32 offset = cur + lenTest + 1 + lenTest2; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); - opt = &p->opt[offset]; - if (curAndLenPrice < opt->price) + unsigned offset = cur + len + len2; + while (last < offset) + p->opt[++last].price = kInfinityPrice; + // do { - opt->price = curAndLenPrice; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = repIndex; + unsigned price2; + COptimal *opt; + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + p->repLenEnc.prices[posState2][len2 - LZMA_MATCH_LEN_MIN]; + + opt = &p->opt[offset]; + // offset--; + if (price2 < opt->price) + { + opt->price = price2; + opt->len = len2; + opt->extra = (CExtra)(len + 1); + opt->dist = repIndex; + } } + // while (len2 >= 3); } } } + } } - } - /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ + + + // ---------- MATCH ---------- + /* for (unsigned len = 2; len <= newLen; len++) */ if (newLen > numAvail) { newLen = numAvail; @@ -1467,134 +1607,148 @@ static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) matches[numPairs] = newLen; numPairs += 2; } + if (newLen >= startLen) { UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); - UInt32 offs, curBack, posSlot; - UInt32 lenTest; - while (lenEnd < cur + newLen) - p->opt[++lenEnd].price = kInfinityPrice; + UInt32 dist; + unsigned offs, posSlot, len; + while (last < cur + newLen) + p->opt[++last].price = kInfinityPrice; offs = 0; while (startLen > matches[offs]) offs += 2; - curBack = matches[(size_t)offs + 1]; - GetPosSlot2(curBack, posSlot); - for (lenTest = /*2*/ startLen; ; lenTest++) + dist = matches[(size_t)offs + 1]; + + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); + + for (len = /*2*/ startLen; ; len++) { - UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][(size_t)lenTest - LZMA_MATCH_LEN_MIN]; + UInt32 price = normalMatchPrice + p->lenEnc.prices[posState][(size_t)len - LZMA_MATCH_LEN_MIN]; { - UInt32 lenToPosState = GetLenToPosState(lenTest); - COptimal *opt; - if (curBack < kNumFullDistances) - curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; - else - curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; - - opt = &p->opt[cur + lenTest]; - if (curAndLenPrice < opt->price) - { - opt->price = curAndLenPrice; - opt->posPrev = cur; - opt->backPrev = curBack + LZMA_NUM_REPS; - opt->prev1IsChar = False; - } + COptimal *opt; + unsigned lenToPosState = len - 2; lenToPosState = GetLenToPosState2(lenToPosState); + if (dist < kNumFullDistances) + price += p->distancesPrices[lenToPosState][dist & (kNumFullDistances - 1)]; + else + price += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[dist & kAlignMask]; + + opt = &p->opt[cur + len]; + if (price < opt->price) + { + opt->price = price; + opt->len = len; + opt->dist = dist + LZMA_NUM_REPS; + opt->extra = 0; + } } - if (/*_maxMode && */lenTest == matches[offs]) + if (/*_maxMode && */ len == matches[offs]) { - /* Try Match + Literal + Rep0 */ - const Byte *data2 = data - curBack - 1; - UInt32 lenTest2 = lenTest + 1; - UInt32 limit = lenTest2 + p->numFastBytes; + // MATCH : LIT : REP_0 + + const Byte *data2 = data - dist - 1; + unsigned len2 = len + 1; + unsigned limit = len2 + p->numFastBytes; if (limit > numAvailFull) limit = numAvailFull; - for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); - lenTest2 -= lenTest + 1; - if (lenTest2 >= 2) + + for (; len2 < limit && data[len2] == data2[len2]; len2++); + + len2 -= len; + + if (len2 >= 3) { - UInt32 nextRepMatchPrice; - UInt32 state2 = kMatchNextStates[state]; - UInt32 posStateNext = (position + lenTest) & p->pbMask; - UInt32 curAndLenCharPrice = curAndLenPrice + - GET_PRICE_0(p->isMatch[state2][posStateNext]) + - LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[(size_t)lenTest - 1]), - data[lenTest], data2[lenTest], p->ProbPrices); - state2 = kLiteralNextStates[state2]; - posStateNext = (posStateNext + 1) & p->pbMask; - nextRepMatchPrice = curAndLenCharPrice + - GET_PRICE_1(p->isMatch[state2][posStateNext]) + - GET_PRICE_1(p->isRep[state2]); - - /* for (; lenTest2 >= 2; lenTest2--) */ + unsigned state2 = kMatchNextStates[state]; + unsigned posState2 = (position + len) & p->pbMask; + unsigned offset; + price += GET_PRICE_0(p->isMatch[state2][posState2]); + price += LitEnc_Matched_GetPrice(LIT_PROBS(position + len, data[(size_t)len - 1]), + data[len], data2[len], p->ProbPrices); + + // state2 = kLiteralNextStates[state2]; + state2 = kState_LitAfterMatch; + + posState2 = (posState2 + 1) & p->pbMask; + price += GetPrice_Rep_0(p, state2, posState2); + + offset = cur + len + len2; + while (last < offset) + p->opt[++last].price = kInfinityPrice; + // do { - UInt32 offset = cur + lenTest + 1 + lenTest2; - UInt32 curAndLenPrice2; + UInt32 price2; COptimal *opt; - while (lenEnd < offset) - p->opt[++lenEnd].price = kInfinityPrice; - curAndLenPrice2 = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + len2--; + // price2 = price + GetPrice_Len_Rep_0(p, len2, state2, posState2); + price2 = price + p->repLenEnc.prices[posState2][len2 - LZMA_MATCH_LEN_MIN]; opt = &p->opt[offset]; - if (curAndLenPrice2 < opt->price) + // offset--; + if (price2 < opt->price) { - opt->price = curAndLenPrice2; - opt->posPrev = cur + lenTest + 1; - opt->backPrev = 0; - opt->prev1IsChar = True; - opt->prev2 = True; - opt->posPrev2 = cur; - opt->backPrev2 = curBack + LZMA_NUM_REPS; + opt->price = price2; + opt->len = len2; + opt->extra = (CExtra)(len + 1); + opt->dist = dist + LZMA_NUM_REPS; } } + // while (len2 >= 3); } + offs += 2; if (offs == numPairs) break; - curBack = matches[(size_t)offs + 1]; - if (curBack >= kNumFullDistances) - GetPosSlot2(curBack, posSlot); + dist = matches[(size_t)offs + 1]; + // if (dist >= kNumFullDistances) + GetPosSlot2(dist, posSlot); } } } } } + + #define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) -static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) + + +static unsigned GetOptimumFast(CLzmaEnc *p) { - UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; + UInt32 numAvail, mainDist; + unsigned mainLen, numPairs, repIndex, repLen, i; const Byte *data; - const UInt32 *matches; if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else { - mainLen = p->longestMatchLength; + mainLen = p->longestMatchLen; numPairs = p->numPairs; } numAvail = p->numAvail; - *backRes = (UInt32)-1; + p->backRes = MARK_LIT; if (numAvail < 2) return 1; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; - repLen = repIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len; - const Byte *data2 = data - p->reps[i] - 1; + unsigned len; + const Byte *data2 = data - p->reps[i]; if (data[0] != data2[0] || data[1] != data2[1]) continue; for (len = 2; len < numAvail && data[len] == data2[len]; len++); if (len >= p->numFastBytes) { - *backRes = i; - MovePos(p, len - 1); + p->backRes = i; + MOVE_POS(p, len - 1) return len; } if (len > repLen) @@ -1604,84 +1758,152 @@ static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) } } - matches = p->matches; if (mainLen >= p->numFastBytes) { - *backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; - MovePos(p, mainLen - 1); + p->backRes = p->matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + MOVE_POS(p, mainLen - 1) return mainLen; } mainDist = 0; /* for GCC */ + if (mainLen >= 2) { - mainDist = matches[(size_t)numPairs - 1]; - while (numPairs > 2 && mainLen == matches[(size_t)numPairs - 4] + 1) + mainDist = p->matches[(size_t)numPairs - 1]; + while (numPairs > 2) { - if (!ChangePair(matches[(size_t)numPairs - 3], mainDist)) + UInt32 dist2; + if (mainLen != p->matches[(size_t)numPairs - 4] + 1) + break; + dist2 = p->matches[(size_t)numPairs - 3]; + if (!ChangePair(dist2, mainDist)) break; numPairs -= 2; - mainLen = matches[(size_t)numPairs - 2]; - mainDist = matches[(size_t)numPairs - 1]; + mainLen--; + mainDist = dist2; } if (mainLen == 2 && mainDist >= 0x80) mainLen = 1; } - if (repLen >= 2 && ( - (repLen + 1 >= mainLen) || - (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || - (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) + if (repLen >= 2) + if ( repLen + 1 >= mainLen + || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) + || (repLen + 3 >= mainLen && mainDist >= (1 << 15))) { - *backRes = repIndex; - MovePos(p, repLen - 1); + p->backRes = repIndex; + MOVE_POS(p, repLen - 1) return repLen; } if (mainLen < 2 || numAvail <= 2) return 1; - p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); - if (p->longestMatchLength >= 2) { - UInt32 newDistance = matches[(size_t)p->numPairs - 1]; - if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || - (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || - (p->longestMatchLength > mainLen + 1) || - (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) - return 1; + unsigned len1 = ReadMatchDistances(p, &p->numPairs); + p->longestMatchLen = len1; + + if (len1 >= 2) + { + UInt32 newDist = p->matches[(size_t)p->numPairs - 1]; + if ( (len1 >= mainLen && newDist < mainDist) + || (len1 == mainLen + 1 && !ChangePair(mainDist, newDist)) + || (len1 > mainLen + 1) + || (len1 + 1 >= mainLen && mainLen >= 3 && ChangePair(newDist, mainDist))) + return 1; + } } data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + for (i = 0; i < LZMA_NUM_REPS; i++) { - UInt32 len, limit; - const Byte *data2 = data - p->reps[i] - 1; + unsigned len, limit; + const Byte *data2 = data - p->reps[i]; if (data[0] != data2[0] || data[1] != data2[1]) continue; limit = mainLen - 1; - for (len = 2; len < limit && data[len] == data2[len]; len++); - if (len >= limit) - return 1; + for (len = 2;; len++) + { + if (len >= limit) + return 1; + if (data[len] != data2[len]) + break; + } + } + + p->backRes = mainDist + LZMA_NUM_REPS; + if (mainLen != 2) + { + MOVE_POS(p, mainLen - 2) } - *backRes = mainDist + LZMA_NUM_REPS; - MovePos(p, mainLen - 2); return mainLen; } -static void WriteEndMarker(CLzmaEnc *p, UInt32 posState) + + + +static void WriteEndMarker(CLzmaEnc *p, unsigned posState) { - UInt32 len; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + UInt32 range; + range = p->rc.range; + { + UInt32 ttt, newBound; + CLzmaProb *prob = &p->isMatch[p->state][posState]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_1(&p->rc, prob) + prob = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, prob) + RC_BIT_0(&p->rc, prob) + } p->state = kMatchNextStates[p->state]; - len = LZMA_MATCH_LEN_MIN; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); - RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + + p->rc.range = range; + LenEnc_Encode(&p->lenProbs, &p->rc, 0, posState); + range = p->rc.range; + + { + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[0], (1 << kNumPosSlotBits) - 1); + CLzmaProb *probs = p->posSlotEncoder[0]; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < (1 << kNumPosSlotBits)); + } + { + // RangeEnc_EncodeDirectBits(&p->rc, ((UInt32)1 << (30 - kNumAlignBits)) - 1, 30 - kNumAlignBits); UInt32 range = p->range; + unsigned numBits = 30 - kNumAlignBits; + do + { + range >>= 1; + p->rc.low += range; + RC_NORM(&p->rc) + } + while (--numBits); + } + + { + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); + CLzmaProb *probs = p->posAlignEncoder; + unsigned m = 1; + do + { + UInt32 ttt, newBound; + RC_BIT_PRE(p, probs + m) + RC_BIT_1(&p->rc, probs + m); + m = (m << 1) + 1; + } + while (m < kAlignTableSize); + } + p->rc.range = range; } + static SRes CheckErrors(CLzmaEnc *p) { if (p->result != SZ_OK) @@ -1695,7 +1917,8 @@ static SRes CheckErrors(CLzmaEnc *p) return p->result; } -static SRes Flush(CLzmaEnc *p, UInt32 nowPos) + +MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; @@ -1706,47 +1929,108 @@ static SRes Flush(CLzmaEnc *p, UInt32 nowPos) return CheckErrors(p); } + + static void FillAlignPrices(CLzmaEnc *p) { - UInt32 i; - for (i = 0; i < kAlignTableSize; i++) - p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + unsigned i; + const CProbPrice *ProbPrices = p->ProbPrices; + const CLzmaProb *probs = p->posAlignEncoder; p->alignPriceCount = 0; + for (i = 0; i < kAlignTableSize / 2; i++) + { + UInt32 price = 0; + unsigned symbol = i; + unsigned m = 1; + unsigned bit; + UInt32 prob; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit; + prob = probs[m]; + p->alignPrices[i ] = price + GET_PRICEa_0(prob); + p->alignPrices[i + 8] = price + GET_PRICEa_1(prob); + // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + } } + static void FillDistancesPrices(CLzmaEnc *p) { UInt32 tempPrices[kNumFullDistances]; - UInt32 i, lenToPosState; + unsigned i, lenToPosState; + + const CProbPrice *ProbPrices = p->ProbPrices; + p->matchPriceCount = 0; + for (i = kStartPosModelIndex; i < kNumFullDistances; i++) { - UInt32 posSlot = GetPosSlot1(i); - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); + unsigned posSlot = GetPosSlot1(i); + unsigned footerBits = ((posSlot >> 1) - 1); + unsigned base = ((2 | (posSlot & 1)) << footerBits); + // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices); + + const CLzmaProb *probs = p->posEncoders + base; + UInt32 price = 0; + unsigned m = 1; + unsigned symbol = i - base; + do + { + unsigned bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) + bit; + } + while (--footerBits); + tempPrices[i] = price; } for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) { - UInt32 posSlot; + unsigned posSlot; const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState]; UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState]; - for (posSlot = 0; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); - for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) - posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + unsigned distTableSize = p->distTableSize; + const CLzmaProb *probs = encoder; + for (posSlot = 0; posSlot < distTableSize; posSlot += 2) + { + // posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); + UInt32 price = 0; + unsigned bit; + unsigned symbol = (posSlot >> 1) + (1 << (kNumPosSlotBits - 1)); + UInt32 prob; + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[symbol], bit); + prob = probs[(posSlot >> 1) + (1 << (kNumPosSlotBits - 1))]; + posSlotPrices[posSlot ] = price + GET_PRICEa_0(prob); + posSlotPrices[posSlot + 1] = price + GET_PRICEa_1(prob); + } + for (posSlot = kEndPosModelIndex; posSlot < distTableSize; posSlot++) + posSlotPrices[posSlot] += ((UInt32)(((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); { UInt32 *distancesPrices = p->distancesPrices[lenToPosState]; - for (i = 0; i < kStartPosModelIndex; i++) - distancesPrices[i] = posSlotPrices[i]; - for (; i < kNumFullDistances; i++) - distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; + { + distancesPrices[0] = posSlotPrices[0]; + distancesPrices[1] = posSlotPrices[1]; + distancesPrices[2] = posSlotPrices[2]; + distancesPrices[3] = posSlotPrices[3]; + } + for (i = 4; i < kNumFullDistances; i += 2) + { + UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)]; + distancesPrices[i ] = slotPrice + tempPrices[i]; + distancesPrices[i + 1] = slotPrice + tempPrices[i + 1]; + } } } - p->matchPriceCount = 0; } + + void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); @@ -1770,6 +2054,7 @@ void LzmaEnc_Construct(CLzmaEnc *p) LzmaEnc_InitPriceTables(p->ProbPrices); p->litProbs = NULL; p->saveState.litProbs = NULL; + } CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) @@ -1806,7 +2091,8 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) ISzAlloc_Free(alloc, p); } -static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) + +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->needInit) @@ -1824,13 +2110,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize if (p->nowPos64 == 0) { - UInt32 numPairs; + unsigned numPairs; Byte curByte; if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) return Flush(p, nowPos32); ReadMatchDistances(p, &numPairs); - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); - p->state = kLiteralNextStates[p->state]; + RangeEnc_EncodeBit_0(&p->rc, &p->isMatch[kState_Start][0]); + // p->state = kLiteralNextStates[p->state]; curByte = *(p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset); LitEnc_Encode(&p->rc, p->litProbs, curByte); p->additionalOffset--; @@ -1838,109 +2124,225 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize } if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + for (;;) { - UInt32 pos, len, posState; - + UInt32 dist; + unsigned len, posState; + UInt32 range, ttt, newBound; + CLzmaProb *probs; + if (p->fastMode) - len = GetOptimumFast(p, &pos); + len = GetOptimumFast(p); else - len = GetOptimum(p, nowPos32, &pos); + { + unsigned oci = p->optCur; + if (p->optEnd == oci) + len = GetOptimum(p, nowPos32); + else + { + const COptimal *opt = &p->opt[oci]; + len = opt->len; + p->backRes = opt->dist; + p->optCur = oci + 1; + } + } + + posState = (unsigned)nowPos32 & p->pbMask; + range = p->rc.range; + probs = &p->isMatch[p->state][posState]; + + RC_BIT_PRE(&p->rc, probs) + + dist = p->backRes; #ifdef SHOW_STAT2 - printf("\n pos = %4X, len = %u pos = %u", nowPos32, len, pos); + printf("\n pos = %6X, len = %3u pos = %6u", nowPos32, len, dist); #endif - posState = nowPos32 & p->pbMask; - if (len == 1 && pos == (UInt32)-1) + if (dist == MARK_LIT) { Byte curByte; - CLzmaProb *probs; const Byte *data; + unsigned state; - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); + RC_BIT_0(&p->rc, probs); + p->rc.range = range; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; - curByte = *data; probs = LIT_PROBS(nowPos32, *(data - 1)); - if (IsCharState(p->state)) + curByte = *data; + state = p->state; + p->state = kLiteralNextStates[state]; + if (IsLitState(state)) LitEnc_Encode(&p->rc, probs, curByte); else - LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); - p->state = kLiteralNextStates[p->state]; + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0])); } else { - RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); - if (pos < LZMA_NUM_REPS) + RC_BIT_1(&p->rc, probs); + probs = &p->isRep[p->state]; + RC_BIT_PRE(&p->rc, probs) + + if (dist < LZMA_NUM_REPS) { - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); - if (pos == 0) + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG0[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 0) { - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); - RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); + RC_BIT_0(&p->rc, probs); + probs = &p->isRep0Long[p->state][posState]; + RC_BIT_PRE(&p->rc, probs) + if (len != 1) + { + RC_BIT_1_BASE(&p->rc, probs); + } + else + { + RC_BIT_0_BASE(&p->rc, probs); + p->state = kShortRepNextStates[p->state]; + } } else { - UInt32 distance = p->reps[pos]; - RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); - if (pos == 1) - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG1[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 1) + { + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[1]; + } else { - RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); - RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); - if (pos == 3) + RC_BIT_1(&p->rc, probs); + probs = &p->isRepG2[p->state]; + RC_BIT_PRE(&p->rc, probs) + if (dist == 2) + { + RC_BIT_0_BASE(&p->rc, probs); + dist = p->reps[2]; + } + else + { + RC_BIT_1_BASE(&p->rc, probs); + dist = p->reps[3]; p->reps[3] = p->reps[2]; + } p->reps[2] = p->reps[1]; } p->reps[1] = p->reps[0]; - p->reps[0] = distance; + p->reps[0] = dist; } - if (len == 1) - p->state = kShortRepNextStates[p->state]; - else + + RC_NORM(&p->rc) + + p->rc.range = range; + + if (len != 1) { - LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + LenEnc_Encode(&p->repLenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + if (!p->fastMode) + if (--p->repLenEnc.counters[posState] == 0) + LenPriceEnc_UpdateTable(&p->repLenEnc, posState, &p->repLenProbs, p->ProbPrices); + p->state = kRepNextStates[p->state]; } } else { - UInt32 posSlot; - RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + unsigned posSlot; + RC_BIT_0(&p->rc, probs); + p->rc.range = range; p->state = kMatchNextStates[p->state]; - LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); - pos -= LZMA_NUM_REPS; - GetPosSlot(pos, posSlot); - RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); - - if (posSlot >= kStartPosModelIndex) - { - UInt32 footerBits = ((posSlot >> 1) - 1); - UInt32 base = ((2 | (posSlot & 1)) << footerBits); - UInt32 posReduced = pos - base; - if (posSlot < kEndPosModelIndex) - RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); - else - { - RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); - RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); - p->alignPriceCount++; - } - } + LenEnc_Encode(&p->lenProbs, &p->rc, len - LZMA_MATCH_LEN_MIN, posState); + if (!p->fastMode) + if (--p->lenEnc.counters[posState] == 0) + LenPriceEnc_UpdateTable(&p->lenEnc, posState, &p->lenProbs, p->ProbPrices); + + dist -= LZMA_NUM_REPS; p->reps[3] = p->reps[2]; p->reps[2] = p->reps[1]; p->reps[1] = p->reps[0]; - p->reps[0] = pos; + p->reps[0] = dist + 1; + p->matchPriceCount++; + GetPosSlot(dist, posSlot); + // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); + { + UInt32 symbol = posSlot + (1 << kNumPosSlotBits); + range = p->rc.range; + probs = p->posSlotEncoder[GetLenToPosState(len)]; + do + { + CLzmaProb *prob = probs + (symbol >> kNumPosSlotBits); + UInt32 bit = (symbol >> (kNumPosSlotBits - 1)) & 1; + symbol <<= 1; + RC_BIT(&p->rc, prob, bit); + } + while (symbol < (1 << kNumPosSlotBits * 2)); + p->rc.range = range; + } + + if (dist >= kStartPosModelIndex) + { + unsigned footerBits = ((posSlot >> 1) - 1); + + if (dist < kNumFullDistances) + { + unsigned base = ((2 | (posSlot & 1)) << footerBits); + RcTree_ReverseEncode(&p->rc, p->posEncoders + base, footerBits, dist - base); + } + else + { + UInt32 pos2 = (dist | 0xF) << (32 - footerBits); + range = p->rc.range; + // RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + /* + do + { + range >>= 1; + p->rc.low += range & (0 - ((dist >> --footerBits) & 1)); + RC_NORM(&p->rc) + } + while (footerBits > kNumAlignBits); + */ + do + { + range >>= 1; + p->rc.low += range & (0 - (pos2 >> 31)); + pos2 += pos2; + RC_NORM(&p->rc) + } + while (pos2 != 0xF0000000); + + + // RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + + { + unsigned m = 1; + unsigned bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; + bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); + p->rc.range = range; + p->alignPriceCount++; + } + } + } } } - p->additionalOffset -= len; + nowPos32 += len; + p->additionalOffset -= len; + if (p->additionalOffset == 0) { UInt32 processed; + if (!p->fastMode) { if (p->matchPriceCount >= (1 << 7)) @@ -1948,13 +2350,15 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize if (p->alignPriceCount >= kAlignTableSize) FillAlignPrices(p); } + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) break; processed = nowPos32 - startPos32; - if (useLimits) + + if (maxPackSize) { - if (processed + kNumOpts + 300 >= maxUnpackSize || - RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) + if (processed + kNumOpts + 300 >= maxUnpackSize + || RangeEnc_GetProcessed_sizet(&p->rc) + kPackReserve >= maxPackSize) break; } else if (processed >= (1 << 17)) @@ -1964,10 +2368,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize } } } + p->nowPos64 += nowPos32 - startPos32; return Flush(p, nowPos32); } + + #define kBigHashDicLimit ((UInt32)1 << 24) static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) @@ -2004,7 +2411,10 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, #ifndef _7ZIP_ST if (p->mtMode) { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); + RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, + LZMA_MATCH_LEN_MAX + + 1 /* 18.04 */ + , allocBig)); p->matchFinderObj = &p->matchFinderMt; p->matchFinderBase.bigHash = (Byte)( (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); @@ -2024,17 +2434,21 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, void LzmaEnc_Init(CLzmaEnc *p) { - UInt32 i; + unsigned i; p->state = 0; - for (i = 0 ; i < LZMA_NUM_REPS; i++) - p->reps[i] = 0; + p->reps[0] = + p->reps[1] = + p->reps[2] = + p->reps[3] = 1; RangeEnc_Init(&p->rc); + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; for (i = 0; i < kNumStates; i++) { - UInt32 j; + unsigned j; for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { p->isMatch[i][j] = kProbInitValue; @@ -2046,39 +2460,38 @@ void LzmaEnc_Init(CLzmaEnc *p) p->isRepG2[i] = kProbInitValue; } - { - UInt32 num = (UInt32)0x300 << (p->lp + p->lc); - CLzmaProb *probs = p->litProbs; - for (i = 0; i < num; i++) - probs[i] = kProbInitValue; - } - { for (i = 0; i < kNumLenToPosStates; i++) { CLzmaProb *probs = p->posSlotEncoder[i]; - UInt32 j; + unsigned j; for (j = 0; j < (1 << kNumPosSlotBits); j++) probs[j] = kProbInitValue; } } { - for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) + for (i = 0; i < kNumFullDistances; i++) p->posEncoders[i] = kProbInitValue; } - LenEnc_Init(&p->lenEnc.p); - LenEnc_Init(&p->repLenEnc.p); + { + UInt32 num = (UInt32)0x300 << (p->lp + p->lc); + UInt32 k; + CLzmaProb *probs = p->litProbs; + for (k = 0; k < num; k++) + probs[k] = kProbInitValue; + } - for (i = 0; i < (1 << kNumAlignBits); i++) - p->posAlignEncoder[i] = kProbInitValue; - p->optimumEndIndex = 0; - p->optimumCurrentIndex = 0; + LenEnc_Init(&p->lenProbs); + LenEnc_Init(&p->repLenProbs); + + p->optEnd = 0; + p->optCur = 0; p->additionalOffset = 0; p->pbMask = (1 << p->pb) - 1; - p->lpMask = (1 << p->lp) - 1; + p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); } void LzmaEnc_InitPrices(CLzmaEnc *p) @@ -2092,14 +2505,14 @@ void LzmaEnc_InitPrices(CLzmaEnc *p) p->lenEnc.tableSize = p->repLenEnc.tableSize = p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); } static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { - UInt32 i; - for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) + unsigned i; + for (i = kEndPosModelIndex / 2; i < kDicLogSizeMax; i++) if (p->dictSize <= ((UInt32)1 << i)) break; p->distTableSize = i * 2; @@ -2220,11 +2633,15 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, if (reInit) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); + nowPos64 = p->nowPos64; RangeEnc_Init(&p->rc); p->rc.outStream = &outStream.vt; - res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); + if (desiredPackSize == 0) + return SZ_ERROR_OUTPUT_EOF; + + res = LzmaEnc_CodeOneBlock(p, desiredPackSize, *unpackSize); *unpackSize = (UInt32)(p->nowPos64 - nowPos64); *destLen -= outStream.rem; @@ -2247,7 +2664,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) for (;;) { - res = LzmaEnc_CodeOneBlock(p, False, 0, 0); + res = LzmaEnc_CodeOneBlock(p, 0, 0); if (res != SZ_OK || p->finished) break; if (progress)