- updated LZMA to 21.06

https://www.7-zip.org/sdk.html
https://www.7-zip.org/a/lzma2106.7z
This commit is contained in:
alexey.lysiuk 2021-11-27 16:00:09 +02:00
parent d0697d9801
commit 121b89306a
35 changed files with 5933 additions and 1782 deletions

View file

@ -1,5 +1,5 @@
/* 7z.h -- 7z interface
2017-04-03 : Igor Pavlov : Public domain */
2018-07-02 : Igor Pavlov : Public domain */
#ifndef __7Z_H
#define __7Z_H
@ -91,6 +91,8 @@ typedef struct
UInt64 *CoderUnpackSizes; // for all coders in all folders
Byte *CodersData;
UInt64 RangeLimit;
} CSzAr;
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);

View file

@ -1,5 +1,5 @@
/* 7zArcIn.c -- 7z Input functions
2018-12-31 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc)
return SZ_OK;
}
void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc)
#define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; }
void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL;
ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL;
@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p)
p->CoderUnpackSizes = NULL;
p->CodersData = NULL;
p->RangeLimit = 0;
}
static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc)
@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd)
return SZ_ERROR_ARCHIVE;
if (propsSize >= 0x80)
return SZ_ERROR_UNSUPPORTED;
coder->PropsOffset = sd->Data - dataStart;
coder->PropsOffset = (size_t)(sd->Data - dataStart);
coder->PropsSize = (Byte)propsSize;
sd->Data += (size_t)propsSize;
sd->Size -= (size_t)propsSize;
@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
{
UInt32 numCoders, ci, numInStreams = 0;
p->FoCodersOffsets[fo] = sd.Data - startBufPtr;
p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr);
RINOK(SzReadNumber32(&sd, &numCoders));
if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX)
@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p,
p->FoToCoderUnpackSizes[fo] = numCodersOutStreams;
{
size_t dataSize = sd.Data - startBufPtr;
const size_t dataSize = (size_t)(sd.Data - startBufPtr);
p->FoStartPackStreamIndex[fo] = packStreamIndex;
p->FoCodersOffsets[fo] = dataSize;
MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc);
@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i))
numSubDigests += numStreams;
}
ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data;
ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data);
continue;
}
if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd)
@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdSizes.Data = sd->Data;
RINOK(SkipNumbers(sd, numUnpackSizesInData));
ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data;
ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data);
RINOK(ReadID(sd, &type));
}
@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi)
{
ssi->sdCRCs.Data = sd->Data;
RINOK(SkipBitUi32s(sd, numSubDigests));
ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data;
ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data);
}
else
{
@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p,
if (type == k7zIdPackInfo)
{
RINOK(ReadNumber(sd, dataOffset));
if (*dataOffset > p->RangeLimit)
return SZ_ERROR_ARCHIVE;
RINOK(ReadPackInfo(p, sd, alloc));
if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset)
return SZ_ERROR_ARCHIVE;
RINOK(ReadID(sd, &type));
}
if (type == k7zIdUnpackInfo)
@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size
return SZ_ERROR_ARCHIVE;
for (p = data + pos;
#ifdef _WIN32
*(const UInt16 *)p != 0
*(const UInt16 *)(const void *)p != 0
#else
p[0] != 0 || p[1] != 0
#endif
; p += 2);
pos = p - data + 2;
pos = (size_t)(p - data) + 2;
*offsets++ = (pos >> 1);
}
while (--numFiles);
@ -1133,6 +1139,8 @@ static SRes SzReadHeader2(
SRes res;
SzAr_Init(&tempAr);
tempAr.RangeLimit = p->db.RangeLimit;
res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX,
p->startPosAfterHeader, &tempAr, allocTemp);
*numTempBufs = tempAr.NumFolders;
@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2(
nextHeaderSize = GetUi64(header + 20);
nextHeaderCRC = GetUi32(header + 28);
p->startPosAfterHeader = startArcPos + k7zStartHeaderSize;
p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize;
if (CrcCalc(header + 12, 20) != GetUi32(header + 8))
return SZ_ERROR_CRC;
p->db.RangeLimit = nextHeaderOffset;
nextHeaderSizeT = (size_t)nextHeaderSize;
if (nextHeaderSizeT != nextHeaderSize)
return SZ_ERROR_MEM;
@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2(
{
Int64 pos = 0;
RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END));
if ((UInt64)pos < startArcPos + nextHeaderOffset ||
(UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
(UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset ||
(UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset ||
(UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize)
return SZ_ERROR_INPUT_EOF;
}
RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset));
RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset));
if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp))
return SZ_ERROR_MEM;
@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2(
Buf_Init(&tempBuf);
SzAr_Init(&tempAr);
tempAr.RangeLimit = p->db.RangeLimit;
res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp);
SzAr_Free(&tempAr, allocTemp);

View file

@ -1,5 +1,5 @@
/* 7zCrc.c -- CRC32 init
2017-06-06 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -26,8 +26,20 @@
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
extern
CRC_FUNC g_CrcUpdateT4;
CRC_FUNC g_CrcUpdateT4;
extern
CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
extern
CRC_FUNC g_CrcUpdateT0_32;
CRC_FUNC g_CrcUpdateT0_32;
extern
CRC_FUNC g_CrcUpdateT0_64;
CRC_FUNC g_CrcUpdateT0_64;
extern
CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
return v;
}
/* ---------- hardware CRC ---------- */
#ifdef MY_CPU_LE
#if defined(MY_CPU_ARM_OR_ARM64)
// #pragma message("ARM*")
#if defined(_MSC_VER)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#define USE_ARM64_CRC
#endif
#endif
#elif (defined(__clang__) && (__clang_major__ >= 3)) \
|| (defined(__GNUC__) && (__GNUC__ > 4))
#if !defined(__ARM_FEATURE_CRC32)
#define __ARM_FEATURE_CRC32 1
#if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
#endif
#endif
#if defined(__ARM_FEATURE_CRC32)
#define USE_ARM64_CRC
#include <arm_acle.h>
#endif
#endif
#else
// no hardware CRC
// #define USE_CRC_EMU
#ifdef USE_CRC_EMU
#pragma message("ARM64 CRC emulation")
MY_FORCE_INLINE
UInt32 __crc32b(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data);
return v;
}
MY_FORCE_INLINE
UInt32 __crc32w(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
MY_FORCE_INLINE
UInt32 __crc32d(UInt32 v, UInt64 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
#endif // USE_CRC_EMU
#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#define T0_32_UNROLL_BYTES (4 * 4)
#define T0_64_UNROLL_BYTES (4 * 8)
#ifndef ATTRIB_CRC
#define ATTRIB_CRC
#endif
// #pragma message("USE ARM HW CRC")
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_32_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_32_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_64_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_64_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#endif // MY_CPU_LE
void MY_FAST_CALL CrcGenerateTable()
{
UInt32 i;
@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable()
}
}
#endif
#endif
#ifdef MY_CPU_LE
#ifdef USE_ARM64_CRC
if (CPU_IsSupported_CRC32())
{
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate =
#if defined(MY_CPU_ARM)
CrcUpdateT0_32;
#else
CrcUpdateT0_64;
#endif
}
#endif
#ifdef USE_CRC_EMU
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate = CrcUpdateT0_64;
#endif
#endif
}

View file

@ -1,5 +1,5 @@
/* 7zCrcOpt.c -- CRC32 calculation
2017-04-03 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -9,6 +9,7 @@
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x300)[((v ) & 0xFF)]
^ (table + 0x200)[((v >> 8) & 0xFF)]
@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
return v;
}
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
d = *((const UInt32 *)p + 1);
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x000)[((v ) & 0xFF)]
^ (table + 0x100)[((v >> 8) & 0xFF)]
@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)p;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
d = *((const UInt32 *)p + 1);
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]

View file

@ -1,5 +1,5 @@
/* 7zDec.c -- Decoding from 7z folder
2019-02-02 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -21,17 +21,20 @@
#endif
#define k_Copy 0
#define k_Delta 3
#ifndef _7Z_NO_METHOD_LZMA2
#define k_LZMA2 0x21
#endif
#define k_LZMA 0x30101
#define k_BCJ 0x3030103
#define k_BCJ2 0x303011B
#ifndef _7Z_NO_METHODS_FILTERS
#define k_Delta 3
#define k_BCJ 0x3030103
#define k_PPC 0x3030205
#define k_IA64 0x3030401
#define k_ARM 0x3030501
#define k_ARMT 0x3030701
#define k_SPARC 0x3030805
#endif
#ifdef _7ZIP_PPMD_SUPPPORT
@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp)
return *p->cur++;
if (p->res == SZ_OK)
{
size_t size = p->cur - p->begin;
size_t size = (size_t)(p->cur - p->begin);
p->processed += size;
p->res = ILookInStream_Skip(p->inStream, size);
size = (1 << 25);
@ -101,28 +104,32 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
Ppmd7_Init(&ppmd, order);
}
{
CPpmd7z_RangeDec rc;
Ppmd7z_RangeDec_CreateVTable(&rc);
rc.Stream = &s.vt;
if (!Ppmd7z_RangeDec_Init(&rc))
ppmd.rc.dec.Stream = &s.vt;
if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec))
res = SZ_ERROR_DATA;
else if (s.extra)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else
else if (!s.extra)
{
SizeT i;
for (i = 0; i < outSize; i++)
Byte *buf = outBuffer;
const Byte *lim = buf + outSize;
for (; buf != lim; buf++)
{
int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt);
int sym = Ppmd7z_DecodeSymbol(&ppmd);
if (s.extra || sym < 0)
break;
outBuffer[i] = (Byte)sym;
*buf = (Byte)sym;
}
if (i != outSize)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc))
if (buf != lim)
res = SZ_ERROR_DATA;
else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec))
{
/* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */
res = SZ_ERROR_DATA;
}
}
if (s.extra)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else if (s.processed + (size_t)(s.cur - s.begin) != inSize)
res = SZ_ERROR_DATA;
}
Ppmd7_Free(&ppmd, allocMain);
return res;
@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
return SZ_ERROR_UNSUPPORTED;
}
#ifndef _7Z_NO_METHODS_FILTERS
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
#endif
static SRes SzFolder_Decode2(const CSzFolder *folder,
const Byte *propsData,

View file

@ -1,5 +1,5 @@
/* 7zStream.c -- 7z Stream functions
2017-04-03 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
{
Int64 t = offset;
Int64 t = (Int64)offset;
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
}

View file

@ -1,11 +1,13 @@
/* 7zTypes.h -- Basic types
2018-08-04 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
#else
#include <errno.h>
#endif
#include <stddef.h>
@ -43,18 +45,115 @@ EXTERN_C_BEGIN
typedef int SRes;
#ifdef _MSC_VER
#if _MSC_VER > 1200
#define MY_ALIGN(n) __declspec(align(n))
#else
#define MY_ALIGN(n)
#endif
#else
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
#else
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32
// #define ENV_HAVE_LSTAT
typedef int WRes;
#define MY__FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_WIN32
#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY__FACILITY_ERRNO 0x800
#define MY__FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \
| (MY__FACILITY__WRes << 16) \
| (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \
((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
/*
#define ERROR_FILE_NOT_FOUND 2L
#define ERROR_ACCESS_DENIED 5L
#define ERROR_NO_MORE_FILES 18L
#define ERROR_LOCK_VIOLATION 33L
#define ERROR_FILE_EXISTS 80L
#define ERROR_DISK_FULL 112L
#define ERROR_NEGATIVE_SEEK 131L
#define ERROR_ALREADY_EXISTS 183L
#define ERROR_DIRECTORY 267L
#define ERROR_TOO_MANY_POSTS 298L
#define ERROR_INTERNAL_ERROR 1359L
#define ERROR_INVALID_REPARSE_DATA 4392L
#define ERROR_REPARSE_TAG_INVALID 4393L
#define ERROR_REPARSE_TAG_MISMATCH 4394L
*/
// we use errno equivalents for some WIN32 errors:
#define ERROR_INVALID_FUNCTION EINVAL
#define ERROR_ALREADY_EXISTS EEXIST
#define ERROR_FILE_EXISTS EEXIST
#define ERROR_PATH_NOT_FOUND ENOENT
#define ERROR_FILE_NOT_FOUND ENOENT
#define ERROR_DISK_FULL ENOSPC
// #define ERROR_INVALID_HANDLE EBADF
// we use FACILITY_WIN32 for errors that has no errno equivalent
// Too many posts were made to a semaphore.
#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY__FACILITY__WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/
// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
#define TEXT(quote) quote
#define FILE_ATTRIBUTE_READONLY 0x0001
#define FILE_ATTRIBUTE_HIDDEN 0x0002
#define FILE_ATTRIBUTE_SYSTEM 0x0004
#define FILE_ATTRIBUTE_DIRECTORY 0x0010
#define FILE_ATTRIBUTE_ARCHIVE 0x0020
#define FILE_ATTRIBUTE_DEVICE 0x0040
#define FILE_ATTRIBUTE_NORMAL 0x0080
#define FILE_ATTRIBUTE_TEMPORARY 0x0100
#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200
#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400
#define FILE_ATTRIBUTE_COMPRESSED 0x0800
#define FILE_ATTRIBUTE_OFFLINE 0x1000
#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
#define FILE_ATTRIBUTE_ENCRYPTED 0x4000
#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */
#endif
@ -63,6 +162,10 @@ typedef int WRes;
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
#ifndef RINOK_WRes
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
@ -75,6 +178,40 @@ typedef int Int32;
typedef unsigned int UInt32;
#endif
#ifndef _WIN32
typedef int INT;
typedef Int32 INT32;
typedef unsigned int UINT;
typedef UInt32 UINT32;
typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
typedef UINT32 ULONG;
#undef DWORD
typedef UINT32 DWORD;
#define VOID void
#define HRESULT LONG
typedef void *LPVOID;
// typedef void VOID;
// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
typedef long LONG_PTR;
typedef unsigned long DWORD_PTR;
typedef size_t SIZE_T;
#endif // _WIN32
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
@ -128,25 +265,37 @@ typedef int BoolInt;
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
#else
#else // _MSC_VER
#define MY_NO_INLINE
#define MY_FORCE_INLINE
#define MY_CDECL
#define MY_FAST_CALL
/* inline keyword : for C++ / C99 */
/* GCC, clang: */
/*
#if defined (__GNUC__) && (__GNUC__ >= 4)
#define MY_FORCE_INLINE __attribute__((always_inline))
#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
|| (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \
|| defined(__xlC__)
#define MY_NO_INLINE __attribute__((noinline))
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
#else
#define MY_NO_INLINE
#endif
*/
#define MY_FORCE_INLINE
#define MY_CDECL
#if defined(_M_IX86) \
|| defined(__i386__)
// #define MY_FAST_CALL __attribute__((fastcall))
// #define MY_FAST_CALL __attribute__((cdecl))
#define MY_FAST_CALL
#elif defined(MY_CPU_AMD64)
// #define MY_FAST_CALL __attribute__((ms_abi))
#define MY_FAST_CALL
#else
#define MY_FAST_CALL
#endif
#endif // _MSC_VER
/* The following interfaces use first parameter as pointer to structure */
@ -335,12 +484,11 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members"
*/
#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
/*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
@ -353,6 +501,7 @@ struct ISzAlloc
*/
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifdef _WIN32

View file

@ -1,7 +1,7 @@
#define MY_VER_MAJOR 19
#define MY_VER_MINOR 00
#define MY_VER_MAJOR 21
#define MY_VER_MINOR 06
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "19.00"
#define MY_VERSION_NUMBERS "21.06"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2019-02-21"
#define MY_DATE "2021-11-24"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR

View file

@ -1,5 +1,5 @@
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2018-04-28 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
const Byte *srcLim;
Byte *dest;
SizeT num = p->lims[BCJ2_STREAM_MAIN] - src;
SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
if (num == 0)
{
@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
dest = p->dest;
if (num > (SizeT)(p->destLim - dest))
{
num = p->destLim - dest;
num = (SizeT)(p->destLim - dest);
if (num == 0)
{
p->state = BCJ2_DEC_STATE_ORIG;
@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
break;
}
num = src - p->bufs[BCJ2_STREAM_MAIN];
num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
if (src == srcLim)
{
@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
p->ip += 4;
val -= p->ip;
dest = p->dest;
rem = p->destLim - dest;
rem = (SizeT)(p->destLim - dest);
if (rem < 4)
{

View file

@ -1,5 +1,5 @@
/* Bra.c -- Converters for RISC code
2017-04-04 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
return p - data;
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
UInt32 b3;
if (p > lim)
return p - data;
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
for (;;)
{
if (p >= lim)
return p - data;
return (SizeT)(p - data);
/*
v = GetBe32(p);
p += 4;

View file

@ -1,5 +1,5 @@
/* Bra86.c -- Converter for x86 code (BCJ)
2017-04-03 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding
break;
{
SizeT d = (SizeT)(p - data - pos);
SizeT d = (SizeT)(p - data) - pos;
pos = (SizeT)(p - data);
if (p >= limit)
{

View file

@ -1,9 +1,13 @@
/* Compiler.h
2017-04-03 : Igor Pavlov : Public domain */
2021-01-05 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-private-field"
#endif
#ifdef _MSC_VER
#ifdef UNDER_CE
@ -25,6 +29,12 @@
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#endif
#define UNUSED_VAR(x) (void)x;

View file

@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
2018-02-18: Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag)
#define CHECK_CPUID_IS_SUPPORTED
#endif
#ifndef USE_ASM
#ifdef _MSC_VER
#if _MSC_VER >= 1600
#define MY__cpuidex __cpuidex
#else
/*
__cpuid (function == 4) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) doesn't clear ECX
We still can use __cpuid for low (function) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FAST_CALL / NO_INLINE function,
So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
*/
static
MY_NO_INLINE
void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
{
UNUSED_VAR(subFunction);
__cpuid(CPUInfo, function);
}
#define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
#pragma message("======== MY__cpuidex_HACK WAS USED ========")
#endif
#else
#define MY__cpuidex(info, func, func2) __cpuid(info, func)
#pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
#endif
#endif
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
#endif
"=c" (*c) ,
"=d" (*d)
: "0" (function)) ;
: "0" (function), "c"(0) ) ;
#endif
#else
int CPUInfo[4];
__cpuid(CPUInfo, function);
*a = CPUInfo[0];
*b = CPUInfo[1];
*c = CPUInfo[2];
*d = CPUInfo[3];
MY__cpuidex(CPUInfo, (int)function, 0);
*a = (UInt32)CPUInfo[0];
*b = (UInt32)CPUInfo[1];
*c = (UInt32)CPUInfo[2];
*d = (UInt32)CPUInfo[3];
#endif
}
@ -174,7 +217,7 @@ BoolInt CPU_Is_InOrder()
}
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
#include <windows.h>
#include <Windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported()
{
OSVERSIONINFO vi;
@ -188,13 +231,101 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
#define CHECK_SYS_SSE_SUPPORT
#endif
BoolInt CPU_Is_Aes_Supported()
static UInt32 X86_CPUID_ECX_Get_Flags()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p))
return 0;
return p.c;
}
BoolInt CPU_IsSupported_AES()
{
return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
}
BoolInt CPU_IsSupported_SSSE3()
{
return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41()
{
return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p))
return False;
return (p.c >> 25) & 1;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
return (d[1] >> 29) & 1;
}
}
// #include <stdio.h>
#ifdef _WIN32
#include <Windows.h>
#endif
BoolInt CPU_IsSupported_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False;
#endif
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5); // avx2
}
}
BoolInt CPU_IsSupported_VAES_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False;
#endif
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5) // avx2
// & (d[1] >> 31) // avx512vl
& (d[2] >> 9); // vaes // VEX-256/EVEX
}
}
BoolInt CPU_IsSupported_PageGB()
@ -215,4 +346,133 @@ BoolInt CPU_IsSupported_PageGB()
}
}
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _WIN32
#include <Windows.h>
BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else
#if defined(__APPLE__)
/*
#include <stdio.h>
#include <string.h>
static void Print_sysctlbyname(const char *name)
{
size_t bufSize = 256;
char buf[256];
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
{
int i;
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
for (i = 0; i < 20; i++)
printf(" %2x", (unsigned)(Byte)buf[i]);
}
}
*/
static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
{
UInt32 val = 0;
if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
return 1;
return 0;
}
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
BoolInt CPU_IsSupported_CRC32(void)
{
return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
}
BoolInt CPU_IsSupported_NEON(void)
{
return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
}
#ifdef MY_CPU_ARM64
#define APPLE_CRYPTO_SUPPORT_VAL 1
#else
#define APPLE_CRYPTO_SUPPORT_VAL 0
#endif
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#else // __APPLE__
#include <sys/auxv.h>
#define USE_HWCAP
#ifdef USE_HWCAP
#include <asm/hwcap.h>
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
#ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \
MY_HWCAP_CHECK_FUNC_2(name, name)
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
// MY_HWCAP_CHECK_FUNC (ASIMD)
#elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
#endif
#else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; }
MY_HWCAP_CHECK_FUNC(NEON)
#endif // USE_HWCAP
MY_HWCAP_CHECK_FUNC (CRC32)
MY_HWCAP_CHECK_FUNC (SHA1)
MY_HWCAP_CHECK_FUNC (SHA2)
MY_HWCAP_CHECK_FUNC (AES)
#endif // __APPLE__
#endif // _WIN32
#endif // MY_CPU_ARM_OR_ARM64
#ifdef __APPLE__
#include <sys/sysctl.h>
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
{
return sysctlbyname(name, buf, bufSize, NULL, 0);
}
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
{
size_t bufSize = sizeof(*val);
int res = My_sysctlbyname_Get(name, val, &bufSize);
if (res == 0 && bufSize != sizeof(*val))
return EFAULT;
return res;
}
#endif

View file

@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
2018-02-18 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN.
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT can be used to select fast code branch
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if defined(_M_X64) \
@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_AMD64
#ifdef __ILP32__
#define MY_CPU_NAME "x32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "x64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__i386__)
#define MY_CPU_X86
#define MY_CPU_NAME "x86"
#define MY_CPU_32BIT
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
|| defined(__THUMBEL__) \
|| defined(__THUMBEB__)
#define MY_CPU_ARM
#define MY_CPU_NAME "arm"
#define MY_CPU_32BIT
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_NAME "armt"
#else
#define MY_CPU_NAME "arm"
#endif
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
@ -84,17 +97,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#if defined(__ppc64__) \
|| defined(__powerpc64__)
|| defined(__powerpc64__) \
|| defined(__ppc__) \
|| defined(__powerpc__) \
|| defined(__PPC__) \
|| defined(_POWER)
#if defined(__ppc64__) \
|| defined(__powerpc64__) \
|| defined(_LP64) \
|| defined(__64BIT__)
#ifdef __ILP32__
#define MY_CPU_NAME "ppc64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "ppc64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#elif defined(__ppc__) \
|| defined(__powerpc__)
#else
#define MY_CPU_NAME "ppc"
#define MY_CPU_32BIT
#define MY_CPU_SIZEOF_POINTER 4
/* #define MY_CPU_32BIT */
#endif
#endif
@ -111,6 +136,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define MY_CPU_X86_OR_AMD64
#endif
#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
#define MY_CPU_ARM_OR_ARM64
#endif
#ifdef _WIN32
@ -170,6 +199,40 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#error Stop_Compiling_Bad_32_64_BIT
#endif
#ifdef __SIZEOF_POINTER__
#ifdef MY_CPU_SIZEOF_POINTER
#if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#else
#define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
#endif
#endif
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
#if defined (_LP64)
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
#define MY_CPU_pragma_pop __pragma(pack(pop))
#else
#define MY_CPU_pragma_pack_push_1
#define MY_CPU_pragma_pop
#endif
#else
#ifdef __xlC__
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
#define MY_CPU_pragma_pop _Pragma("pack()")
#else
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
#define MY_CPU_pragma_pop _Pragma("pack(pop)")
#endif
#endif
#ifndef MY_CPU_NAME
#ifdef MY_CPU_LE
@ -189,8 +252,12 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64) \
|| defined(__ARM_FEATURE_UNALIGNED)
|| defined(MY_CPU_ARM64)
#define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
#endif
#endif
@ -200,11 +267,15 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#endif
#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#else
@ -218,8 +289,6 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
@ -230,19 +299,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
#endif
#ifndef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
#endif
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#else
#define MY__has_builtin(x) 0
#endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
@ -253,8 +332,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
@ -262,9 +341,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
@ -325,10 +404,37 @@ int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder();
BoolInt CPU_Is_Aes_Supported();
BoolInt CPU_IsSupported_PageGB();
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void);
BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64)
BoolInt CPU_IsSupported_CRC32(void);
BoolInt CPU_IsSupported_NEON(void);
#if defined(_WIN32)
BoolInt CPU_IsSupported_CRYPTO(void);
#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
#else
BoolInt CPU_IsSupported_SHA1(void);
BoolInt CPU_IsSupported_SHA2(void);
BoolInt CPU_IsSupported_AES(void);
#endif
#endif
#if defined(__APPLE__)
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif
EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* Delta.c -- Delta converter
2009-05-26 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -12,53 +12,158 @@ void Delta_Init(Byte *state)
state[i] = 0;
}
static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
{
unsigned i;
for (i = 0; i < size; i++)
dest[i] = src[i];
}
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
Byte buf[DELTA_STATE_SIZE];
unsigned j = 0;
MyMemCpy(buf, state, delta);
Byte temp[DELTA_STATE_SIZE];
if (size == 0)
return;
{
SizeT i;
for (i = 0; i < size;)
unsigned i = 0;
do
temp[i] = state[i];
while (++i != delta);
}
if (size <= delta)
{
unsigned i = 0, k;
do
{
for (j = 0; j < delta && i < size; i++, j++)
Byte b = *data;
*data++ = (Byte)(b - temp[i]);
temp[i] = b;
}
while (++i != size);
k = 0;
do
{
if (i == delta)
i = 0;
state[k] = temp[i++];
}
while (++k != delta);
return;
}
{
Byte *p = data + size - delta;
{
unsigned i = 0;
do
state[i] = *p++;
while (++i != delta);
}
{
const Byte *lim = data + delta;
ptrdiff_t dif = -(ptrdiff_t)delta;
if (((ptrdiff_t)size + dif) & 1)
{
Byte b = data[i];
data[i] = (Byte)(b - buf[j]);
buf[j] = b;
--p; *p = (Byte)(*p - p[dif]);
}
while (p != lim)
{
--p; *p = (Byte)(*p - p[dif]);
--p; *p = (Byte)(*p - p[dif]);
}
dif = -dif;
do
{
--p; *p = (Byte)(*p - temp[--dif]);
}
while (dif != 0);
}
}
if (j == delta)
j = 0;
MyMemCpy(state, buf + j, delta - j);
MyMemCpy(state + delta - j, buf, j);
}
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
Byte buf[DELTA_STATE_SIZE];
unsigned j = 0;
MyMemCpy(buf, state, delta);
unsigned i;
const Byte *lim;
if (size == 0)
return;
i = 0;
lim = data + size;
if (size <= delta)
{
SizeT i;
for (i = 0; i < size;)
do
*data = (Byte)(*data + state[i++]);
while (++data != lim);
for (; delta != i; state++, delta--)
*state = state[i];
data -= i;
}
else
{
/*
#define B(n) b ## n
#define I(n) Byte B(n) = state[n];
#define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }
#define F(n) if (data != lim) { U(n) }
if (delta == 1)
{
for (j = 0; j < delta && i < size; i++, j++)
I(0)
if ((lim - data) & 1) { U(0) }
while (data != lim) { U(0) U(0) }
data -= 1;
}
else if (delta == 2)
{
I(0) I(1)
lim -= 1; while (data < lim) { U(0) U(1) }
lim += 1; F(0)
data -= 2;
}
else if (delta == 3)
{
I(0) I(1) I(2)
lim -= 2; while (data < lim) { U(0) U(1) U(2) }
lim += 2; F(0) F(1)
data -= 3;
}
else if (delta == 4)
{
I(0) I(1) I(2) I(3)
lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }
lim += 3; F(0) F(1) F(2)
data -= 4;
}
else
*/
{
do
{
buf[j] = data[i] = (Byte)(buf[j] + data[i]);
*data = (Byte)(*data + state[i++]);
data++;
}
while (i != delta);
{
ptrdiff_t dif = -(ptrdiff_t)delta;
do
*data = (Byte)(*data + data[dif]);
while (++data != lim);
data += dif;
}
}
}
if (j == delta)
j = 0;
MyMemCpy(state, buf + j, delta - j);
MyMemCpy(state + delta - j, buf, j);
do
*state++ = *data;
while (++data != lim);
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms
2017-06-10 : Igor Pavlov : Public domain */
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
@ -15,7 +15,7 @@ typedef struct _CMatchFinder
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
UInt32 streamPos;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
UInt32 lenLimit;
UInt32 cyclicBufferPos;
@ -51,17 +51,19 @@ typedef struct _CMatchFinder
UInt64 expectedDataSize;
} CMatchFinder;
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
/*
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
*/
int MatchFinder_NeedMove(CMatchFinder *p);
Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
@ -76,10 +78,21 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
/*
#define Inline_MatchFinder_InitPos(p, val) \
(p)->pos = (val); \
(p)->streamPos = (val);
*/
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
(p)->pos -= (subValue); \
(p)->streamPos -= (subValue);
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
@ -91,7 +104,7 @@ Conditions:
typedef void (*Mf_Init_Func)(void *object);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
@ -101,21 +114,23 @@ typedef struct _IMatchFinder
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
} IMatchFinder;
} IMatchFinder2;
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_3(CMatchFinder *p, int readData);
void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void LzFindPrepare(void);
EXTERN_C_END
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2018-07-04 : Igor Pavlov : Public domain */
2021-07-12 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
@ -9,31 +9,26 @@
EXTERN_C_BEGIN
#define kMtHashBlockSize (1 << 13)
#define kMtHashNumBlocks (1 << 3)
#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1)
#define kMtBtBlockSize (1 << 14)
#define kMtBtNumBlocks (1 << 6)
#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1)
typedef struct _CMtSync
{
UInt32 numProcessedBlocks;
CThread thread;
UInt64 affinity;
BoolInt wasCreated;
BoolInt needStart;
BoolInt csWasInitialized;
BoolInt csWasEntered;
BoolInt exit;
BoolInt stopWriting;
CThread thread;
CAutoResetEvent canStart;
CAutoResetEvent wasStarted;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
BoolInt csWasInitialized;
BoolInt csWasEntered;
CCriticalSection cs;
UInt32 numProcessedBlocks;
// UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
@ -49,18 +44,23 @@ typedef struct _CMatchFinderMt
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
UInt32 btBufPos;
UInt32 btBufPosLimit;
const UInt32 *btBufPos;
const UInt32 *btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
UInt32 *hash;
UInt32 fixedHashSize;
// UInt32 hash4Mask;
UInt32 historySize;
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
UInt32 failure_LZ_BT; // failure in BT transfered to LZ
// UInt32 failure_LZ_LZ; // failure in LZ tables
UInt32 failureBuf[1];
// UInt32 crc[256];
/* LZ + BT */
CMtSync btSync;
Byte btDummy[kMtCacheLineDummy];
@ -70,6 +70,8 @@ typedef struct _CMatchFinderMt
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
UInt32 failure_BT;
CLzRef *son;
UInt32 matchMaxLen;
@ -77,7 +79,7 @@ typedef struct _CMatchFinderMt
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be historySize + 1 */
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
UInt32 cutValue;
/* BT + Hash */
@ -87,13 +89,19 @@ typedef struct _CMatchFinderMt
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
// CMatchFinder MatchFinder;
} CMatchFinderMt;
// only for Mt part
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable);
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END

View file

@ -0,0 +1,578 @@
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "LzFind.h"
// #include "LzFindMt.h"
// #define LOG_ITERS
// #define LOG_THREAD
#ifdef LOG_THREAD
#include <stdio.h>
#define PRF(x) x
#else
// #define PRF(x)
#endif
#ifdef LOG_ITERS
#include <stdio.h>
UInt64 g_NumIters_Tree;
UInt64 g_NumIters_Loop;
UInt64 g_NumIters_Bytes;
#define LOG_ITER(x) x
#else
#define LOG_ITER(x)
#endif
// ---------- BT THREAD ----------
#define USE_SON_PREFETCH
#define USE_LONG_MATCH_OPT
#define kEmptyHashValue 0
// #define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug
/*
MY_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
{
do
{
UInt32 delta;
if (hash == size)
break;
delta = *hash++;
if (delta == 0 || delta > (UInt32)pos)
return NULL;
lenLimit++;
if (delta == (UInt32)pos)
{
CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
const Byte *len0 = cur, *len1 = cur;
UInt32 cutValue = _cutValue;
const Byte *maxLen = cur + _maxLen;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
hash++;
pos++;
cur++;
lenLimit++;
{
CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
#else
const UInt32 p0 = ptr[0 + (diff * 2)];
const UInt32 p1 = ptr[1 + (diff * 2)];
ptr[0] = p0;
ptr[1] = p1;
// ptr[0] = ptr[0 + (diff * 2)];
// ptr[1] = ptr[1 + (diff * 2)];
#endif
}
// PrintSon(son + 2, pos - 1);
// printf("\npos = %x delta = %x\n", pos, delta);
len++;
*d++ = 2;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
if (delta >= curMatch)
return NULL;
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
if (delta >= curMatch)
return NULL;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= pos)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/
/* define cbs if you use 2 functions.
GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
do not define cbs if you use 1 function:
GetMatchesSpecN_2()
*/
// #define cbs _cyclicBufferSize
/*
we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes);
MY_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
lenLimit++;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 cutValue = _cutValue;
const Byte *len0 = cur, *len1 = cur;
const Byte *maxLen = cur + _maxLen;
// if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// SPEC code
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
*d++ = 2;
*d++ = (UInt32)(lenLimit - cur);
*d++ = delta - 1;
cur++;
lenLimit++;
// SPEC
_cyclicBufferPos++;
{
// SPEC code
CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
const CLzRef *src = dest + ((diff
+ (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
// CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
pos++;
hash++;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
} // for() end for long matches
}
#endif
break; // break from TREE iterations
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
if (delta >= curMatch)
return NULL;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
if (delta >= curMatch)
return NULL;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= cbs)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
/*
typedef UInt32 uint32plus; // size_t
UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 *_distances = ++d;
uint32plus len0 = 0, len1 = 0;
UInt32 cutValue = _cutValue;
uint32plus maxLen = _maxLen;
// lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const Byte *pb = cur - delta;
uint32plus len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
while (++len != lenLimit)
if (pb[len] != cur[len])
break;
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)len;
*d++ = delta - 1;
if (len == lenLimit)
{
{
const UInt32 pair1 = pair[1];
*ptr0 = pair1;
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
}
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
for (;;)
{
*d++ = 2;
*d++ = (UInt32)lenLimit;
*d++ = delta - 1;
_cyclicBufferPos++;
{
CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
const CLzRef *src = dest + ((diff +
(ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
hash++;
pos++;
cur++;
pb++;
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta;
if (pb[len] < cur[len])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
{
if (delta >= curMatch)
return NULL;
delta = (UInt32)pos - delta;
if (delta >= cbs
// delta >= _cyclicBufferSize || delta >= pos
|| --cutValue == 0)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/

View file

@ -1,57 +1,34 @@
/* LzHash.h -- HASH functions for LZ algorithms
2015-04-12 : Igor Pavlov : Public domain */
2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H
#define __LZ_HASH_H
/*
(kHash2Size >= (1 << 8)) : Required
(kHash3Size >= (1 << 16)) : Required
*/
#define kHash2Size (1 << 10)
#define kHash3Size (1 << 16)
#define kHash4Size (1 << 20)
// #define kHash4Size (1 << 20)
#define kFix3HashSize (kHash2Size)
#define kFix4HashSize (kHash2Size + kHash3Size)
#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
/*
We use up to 3 crc values for hash:
crc0
crc1 << Shift_1
crc2 << Shift_2
(Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
Small values for Shift are not good for collision rate.
Big value for Shift_2 increases the minimum size
of hash table, that will be slow for small files.
*/
#define HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
#define HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
#define HASH5_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
temp ^= (p->crc[cur[3]] << 5); \
h4 = temp & (kHash4Size - 1); \
hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
#define MT_HASH2_CALC \
h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
#define MT_HASH3_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \
h3 = temp & (kHash3Size - 1); \
h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
#define kLzHash_CrcShift_1 5
#define kLzHash_CrcShift_2 10
#endif

View file

@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder
2019-02-02 : Igor Pavlov : Public domain */
2021-02-09 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p)
LzmaDec_Init(&p->decoder);
}
static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
// ELzma2State
static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
{
switch (p->state)
{

View file

@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder
2018-07-04 : Igor Pavlov : Public domain */
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@ -13,10 +13,12 @@
#define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
#define RC_INIT_SIZE 5
#ifndef _LZMA_DEC_OPT
#define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
@ -62,9 +64,10 @@
probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
#endif // _LZMA_DEC_OPT
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound;
@ -114,6 +117,9 @@
#define kMatchMinLen 2
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
#define kMatchSpecLen_Error_Data (1 << 9)
#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1)
/* External ASM code needs same CLzmaProb array layout. So don't change it. */
/* (probs_1664) is faster and better for code size at some platforms */
@ -166,10 +172,12 @@
/*
p->remainLen : shows status of LZMA decoder:
< kMatchSpecLenStart : normal remain
= kMatchSpecLenStart : finished
= kMatchSpecLenStart + 1 : need init range coder
= kMatchSpecLenStart + 2 : need init range coder and state
< kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset
= kMatchSpecLenStart : the LZMA stream was finished with end mark
= kMatchSpecLenStart + 1 : need init range coder
= kMatchSpecLenStart + 2 : need init range coder and state
= kMatchSpecLen_Error_Fail : Internal Code Failure
= kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error
*/
/* ---------- LZMA_DECODE_REAL ---------- */
@ -188,23 +196,31 @@ In:
{
LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
is not END_OF_PAYALOAD_MARKER, then function returns error code.
is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary,
the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later.
}
Processing:
first LZMA symbol will be decoded in any case
All checks for limits are at the end of main loop,
It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
The first LZMA symbol will be decoded in any case.
All main checks for limits are at the end of main loop,
It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for
next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX),
that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit.
So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte.
Out:
RangeCoder is normalized
Result:
SZ_OK - OK
SZ_ERROR_DATA - Error
p->remainLen:
< kMatchSpecLenStart : normal remain
= kMatchSpecLenStart : finished
p->remainLen:
< kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset
= kMatchSpecLenStart : the LZMA stream was finished with end mark
SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary
p->remainLen : undefined
p->reps[*] : undefined
*/
@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
else
{
UPDATE_1(prob);
/*
// that case was checked before with kBadRepCode
if (checkDicSize == 0 && processedPos == 0)
return SZ_ERROR_DATA;
*/
prob = probs + IsRepG0 + state;
IF_BIT_0(prob)
{
@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob)
{
UPDATE_0(prob);
// that case was checked before with kBadRepCode
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
// The caller doesn't allow (dicPos == limit) case here
// so we don't need the following check:
// if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; }
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
processedPos++;
@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
{
p->dicPos = dicPos;
return SZ_ERROR_DATA;
len += kMatchSpecLen_Error_Data + kMatchMinLen;
// len = kMatchSpecLen_Error_Data;
// len += kMatchMinLen;
break;
}
}
@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
if ((rem = limit - dicPos) == 0)
{
p->dicPos = dicPos;
return SZ_ERROR_DATA;
/*
We stop decoding and return SZ_OK, and we can resume decoding later.
Any error conditions can be tested later in caller code.
For more strict mode we can stop decoding with error
// len += kMatchSpecLen_Error_Data;
*/
break;
}
curLen = ((rem < len) ? (unsigned)rem : len);
@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->buf = buf;
p->range = range;
p->code = code;
p->remainLen = (UInt32)len;
p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too.
p->dicPos = dicPos;
p->processedPos = processedPos;
p->reps[0] = rep0;
@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
p->reps[2] = rep2;
p->reps[3] = rep3;
p->state = (UInt32)state;
if (len >= kMatchSpecLen_Error_Data)
return SZ_ERROR_DATA;
return SZ_OK;
}
#endif
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
unsigned len = (unsigned)p->remainLen;
if (len == 0 /* || len >= kMatchSpecLenStart */)
return;
{
Byte *dic = p->dic;
SizeT dicPos = p->dicPos;
SizeT dicBufSize = p->dicBufSize;
unsigned len = (unsigned)p->remainLen;
SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
SizeT rem = limit - dicPos;
if (rem < len)
len = (unsigned)(rem);
Byte *dic;
SizeT dicBufSize;
SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
{
SizeT rem = limit - dicPos;
if (rem < len)
{
len = (unsigned)(rem);
if (len == 0)
return;
}
}
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)len;
p->remainLen -= (UInt32)len;
while (len != 0)
dic = p->dic;
rep0 = p->reps[0];
dicBufSize = p->dicBufSize;
do
{
len--;
dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
dicPos++;
}
while (--len);
p->dicPos = dicPos;
}
}
/*
At staring of new stream we have one of the following symbols:
- Literal - is allowed
- Non-Rep-Match - is allowed only if it's end marker symbol
- Rep-Match - is not allowed
We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code
*/
#define kRange0 0xFFFFFFFF
#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
#error Stop_Compiling_Bad_LZMA_Check
#endif
/*
LzmaDec_DecodeReal2():
It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize).
We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(),
and we support the following state of (p->checkDicSize):
if (total_processed < p->prop.dicSize) then
{
(total_processed == p->processedPos)
(p->checkDicSize == 0)
}
else
(p->checkDicSize == p->prop.dicSize)
*/
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{
do
if (p->checkDicSize == 0)
{
SizeT limit2 = limit;
if (p->checkDicSize == 0)
{
UInt32 rem = p->prop.dicSize - p->processedPos;
if (limit - p->dicPos > rem)
limit2 = p->dicPos + rem;
if (p->processedPos == 0)
if (p->code >= kBadRepCode)
return SZ_ERROR_DATA;
}
RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
UInt32 rem = p->prop.dicSize - p->processedPos;
if (limit - p->dicPos > rem)
limit = p->dicPos + rem;
}
{
int res = LZMA_DECODE_REAL(p, limit, bufLimit);
if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
p->checkDicSize = p->prop.dicSize;
LzmaDec_WriteRem(p, limit);
return res;
}
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
return 0;
}
typedef enum
{
DUMMY_ERROR, /* unexpected end of input stream */
DUMMY_INPUT_EOF, /* need more input data */
DUMMY_LIT,
DUMMY_MATCH,
DUMMY_REP
} ELzmaDummy;
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH)
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut)
{
UInt32 range = p->range;
UInt32 code = p->code;
const Byte *bufLimit = buf + inSize;
const Byte *bufLimit = *bufOut;
const CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state;
ELzmaDummy res;
for (;;)
{
const CLzmaProb *prob;
UInt32 bound;
unsigned ttt;
unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1);
prob = probs + IsMatch + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK
/* if (bufLimit - buf >= 7) return DUMMY_LIT; */
prob = probs + Literal;
if (p->checkDicSize != 0 || p->processedPos != 0)
prob += ((UInt32)LZMA_LIT_SIZE *
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) +
((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
if (state < kNumLitStates)
{
@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
IF_BIT_0_CHECK(prob)
{
UPDATE_0_CHECK;
NORMALIZE_CHECK;
return DUMMY_REP;
break;
}
else
{
@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
{
unsigned numDirectBits = ((posSlot >> 1) - 1);
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
if (posSlot < kEndPosModelIndex)
{
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS
}
}
}
break;
}
NORMALIZE_CHECK;
*bufOut = buf;
return res;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
{
p->remainLen = kMatchSpecLenStart + 1;
@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p)
}
/*
LZMA supports optional end_marker.
So the decoder can lookahead for one additional LZMA-Symbol to check end_marker.
That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream.
When the decoder reaches dicLimit, it looks (finishMode) parameter:
if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead
if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position
When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways:
1) Strict mode (default) : the decoder returns SZ_ERROR_DATA.
2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller
must check (status) value. The caller can show the error,
if the end of stream is expected, and the (status) is noit
LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK.
*/
#define RETURN__NOT_FINISHED__FOR_FINISH \
*status = LZMA_STATUS_NOT_FINISHED; \
return SZ_ERROR_DATA; // for strict mode
// return SZ_OK; // for relaxed mode
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
(*srcLen) = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
if (p->remainLen > kMatchSpecLenStart)
{
if (p->remainLen > kMatchSpecLenStart + 2)
return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA;
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
p->tempBuf[p->tempBufSize++] = *src++;
if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
| ((UInt32)p->tempBuf[2] << 16)
| ((UInt32)p->tempBuf[3] << 8)
| ((UInt32)p->tempBuf[4]);
if (p->checkDicSize == 0
&& p->processedPos == 0
&& p->code >= kBadRepCode)
return SZ_ERROR_DATA;
p->range = 0xFFFFFFFF;
p->tempBufSize = 0;
@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
p->remainLen = 0;
}
LzmaDec_WriteRem(p, dicLimit);
while (p->remainLen != kMatchSpecLenStart)
for (;;)
{
if (p->remainLen == kMatchSpecLenStart)
{
if (p->code != 0)
return SZ_ERROR_DATA;
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
}
LzmaDec_WriteRem(p, dicLimit);
{
// (p->remainLen == 0 || p->dicPos == dicLimit)
int checkEndMarkNow = 0;
if (p->dicPos >= dicLimit)
@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
}
if (p->remainLen != 0)
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
RETURN__NOT_FINISHED__FOR_FINISH;
}
checkEndMarkNow = 1;
}
// (p->remainLen == 0)
if (p->tempBufSize == 0)
{
SizeT processed;
const Byte *bufLimit;
int dummyProcessed = -1;
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
int dummyRes = LzmaDec_TryDummy(p, src, inSize);
if (dummyRes == DUMMY_ERROR)
const Byte *bufOut = src + inSize;
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut);
if (dummyRes == DUMMY_INPUT_EOF)
{
memcpy(p->tempBuf, src, inSize);
p->tempBufSize = (unsigned)inSize;
size_t i;
if (inSize >= LZMA_REQUIRED_INPUT_MAX)
break;
(*srcLen) += inSize;
p->tempBufSize = (unsigned)inSize;
for (i = 0; i < inSize; i++)
p->tempBuf[i] = src[i];
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
dummyProcessed = (int)(bufOut - src);
if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX)
break;
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_ERROR_DATA;
unsigned i;
(*srcLen) += (unsigned)dummyProcessed;
p->tempBufSize = (unsigned)dummyProcessed;
for (i = 0; i < (unsigned)dummyProcessed; i++)
p->tempBuf[i] = src[i];
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH;
}
bufLimit = src;
// we will decode only one iteration
}
else
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
p->buf = src;
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
return SZ_ERROR_DATA;
processed = (SizeT)(p->buf - src);
(*srcLen) += processed;
src += processed;
inSize -= processed;
}
else
{
unsigned rem = p->tempBufSize, lookAhead = 0;
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
p->tempBuf[rem++] = src[lookAhead++];
p->tempBufSize = rem;
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
if (dummyRes == DUMMY_ERROR)
int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit);
SizeT processed = (SizeT)(p->buf - src);
if (dummyProcessed < 0)
{
(*srcLen) += (SizeT)lookAhead;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
if (processed > inSize)
break;
}
if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
else if ((unsigned)dummyProcessed != processed)
break;
src += processed;
inSize -= processed;
(*srcLen) += processed;
if (res != SZ_OK)
{
*status = LZMA_STATUS_NOT_FINISHED;
p->remainLen = kMatchSpecLen_Error_Data;
return SZ_ERROR_DATA;
}
}
continue;
}
{
// we have some data in (p->tempBuf)
// in strict mode: tempBufSize is not enough for one Symbol decoding.
// in relaxed mode: tempBufSize not larger than required for one Symbol decoding.
unsigned rem = p->tempBufSize;
unsigned ahead = 0;
int dummyProcessed = -1;
while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize)
p->tempBuf[rem++] = src[ahead++];
// ahead - the size of new data copied from (src) to (p->tempBuf)
// rem - the size of temp buffer including new data from (src)
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
{
const Byte *bufOut = p->tempBuf + rem;
ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut);
if (dummyRes == DUMMY_INPUT_EOF)
{
if (rem >= LZMA_REQUIRED_INPUT_MAX)
break;
p->tempBufSize = rem;
(*srcLen) += (SizeT)ahead;
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
dummyProcessed = (int)(bufOut - p->tempBuf);
if ((unsigned)dummyProcessed < p->tempBufSize)
break;
if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes))
{
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
p->tempBufSize = (unsigned)dummyProcessed;
// p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH;
}
}
p->buf = p->tempBuf;
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
return SZ_ERROR_DATA;
{
unsigned kkk = (unsigned)(p->buf - p->tempBuf);
if (rem < kkk)
return SZ_ERROR_FAIL; /* some internal error */
rem -= kkk;
if (lookAhead < rem)
return SZ_ERROR_FAIL; /* some internal error */
lookAhead -= rem;
// we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf)
int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf);
SizeT processed = (SizeT)(p->buf - p->tempBuf);
rem = p->tempBufSize;
if (dummyProcessed < 0)
{
if (processed > LZMA_REQUIRED_INPUT_MAX)
break;
if (processed < rem)
break;
}
else if ((unsigned)dummyProcessed != processed)
break;
processed -= rem;
src += processed;
inSize -= processed;
(*srcLen) += processed;
p->tempBufSize = 0;
if (res != SZ_OK)
{
p->remainLen = kMatchSpecLen_Error_Data;
return SZ_ERROR_DATA;
}
}
(*srcLen) += (SizeT)lookAhead;
src += lookAhead;
inSize -= (SizeT)lookAhead;
p->tempBufSize = 0;
}
}
}
if (p->code != 0)
return SZ_ERROR_DATA;
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
/* Some unexpected error: internal error of code, memory corruption or hardware failure */
p->remainLen = kMatchSpecLen_Error_Fail;
return SZ_ERROR_FAIL;
}
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen;

View file

@ -1,5 +1,5 @@
/* LzmaDec.h -- LZMA Decoder
2018-04-21 : Igor Pavlov : Public domain */
2020-03-19 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
@ -181,6 +181,7 @@ Returns:
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
@ -223,6 +224,7 @@ Returns:
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,

View file

@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
2019-01-10: Igor Pavlov : Public domain */
2021-11-18: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -12,6 +12,7 @@
#include <stdio.h>
#endif
#include "CpuArch.h"
#include "LzmaEnc.h"
#include "LzFind.h"
@ -19,12 +20,25 @@
#include "LzFindMt.h"
#endif
/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
void LzmaEnc_Finish(CLzmaEncHandle pp);
void LzmaEnc_SaveState(CLzmaEncHandle pp);
void LzmaEnc_RestoreState(CLzmaEncHandle pp);
#ifdef SHOW_STAT
static unsigned g_STAT_OFFSET = 0;
#endif
#define kLzmaMaxHistorySize ((UInt32)3 << 29)
/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
/* for good normalization speed we still reserve 256 MB before 4 GB range */
#define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits)
@ -36,7 +50,7 @@ static unsigned g_STAT_OFFSET = 0;
#define kNumMoveReducingBits 4
#define kNumBitPriceShiftBits 4
#define kBitPrice (1 << kNumBitPriceShiftBits)
// #define kBitPrice (1 << kNumBitPriceShiftBits)
#define REP_LEN_COUNT 64
@ -47,6 +61,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
p->reduceSize = (UInt64)(Int64)-1;
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->writeEndMark = 0;
p->affinity = 0;
}
void LzmaEncProps_Normalize(CLzmaEncProps *p)
@ -55,16 +70,21 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (level < 0) level = 5;
p->level = level;
if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
if (p->dictSize == 0)
p->dictSize =
( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
( level <= 6 ? ((UInt32)1 << (level + 19)) :
( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
)));
if (p->dictSize > p->reduceSize)
{
unsigned i;
UInt32 reduceSize = (UInt32)p->reduceSize;
for (i = 11; i <= 30; i++)
{
if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
}
UInt32 v = (UInt32)p->reduceSize;
const UInt32 kReduceMin = ((UInt32)1 << 12);
if (v < kReduceMin)
v = kReduceMin;
if (p->dictSize > v)
p->dictSize = v;
}
if (p->lc < 0) p->lc = 3;
@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
if (p->numHashBytes < 0) p->numHashBytes = 4;
if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
if (p->numThreads < 0)
p->numThreads =
@ -93,18 +113,85 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
return props.dictSize;
}
#if (_MSC_VER >= 1400)
/* BSR code is fast for some new CPUs */
/* #define LZMA_LOG_BSR */
/*
x86/x64:
BSR:
IF (SRC == 0) ZF = 1, DEST is undefined;
AMD : DEST is unchanged;
IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit
BSR is slow in some processors
LZCNT:
IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64)
IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits
IF (DEST == 0) ZF = 1;
LZCNT works only in new processors starting from Haswell.
if LZCNT is not supported by processor, then it's executed as BSR.
LZCNT can be faster than BSR, if supported.
*/
// #define LZMA_LOG_BSR
#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */
#if (defined(__clang__) && (__clang_major__ >= 6)) \
|| (defined(__GNUC__) && (__GNUC__ >= 6))
#define LZMA_LOG_BSR
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
// #if defined(MY_CPU_ARM_OR_ARM64)
#define LZMA_LOG_BSR
// #endif
#endif
#endif
// #include <intrin.h>
#ifdef LZMA_LOG_BSR
#define kDicLogSizeMaxCompress 32
#if defined(__clang__) \
|| defined(__GNUC__)
#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
/*
C code: : (30 - __builtin_clz(x))
gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31)
clang10 for x64 : 31 + (bsr(x) xor -32)
*/
static unsigned GetPosSlot1(UInt32 pos)
#define MY_clz(x) ((unsigned)__builtin_clz(x))
// __lzcnt32
// __builtin_ia32_lzcnt_u32
#else // #if defined(_MSC_VER)
#ifdef MY_CPU_ARM_OR_ARM64
#define MY_clz _CountLeadingZeros
#else // if defined(MY_CPU_X86_OR_AMD64)
// #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU)
// _BitScanReverse code is not optimal for some MSVC compilers
#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \
res = (zz + zz) + (pos >> zz); }
#endif // MY_CPU_X86_OR_AMD64
#endif // _MSC_VER
#ifndef BSR2_RET
#define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \
res = (zz + zz) + (pos >> zz); }
#endif
unsigned GetPosSlot1(UInt32 pos);
unsigned GetPosSlot1(UInt32 pos)
{
unsigned res;
BSR2_RET(pos, res);
@ -113,10 +200,10 @@ static unsigned GetPosSlot1(UInt32 pos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
#else
#define kNumLogBits (9 + sizeof(size_t) / 2)
/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
#else // ! LZMA_LOG_BSR
#define kNumLogBits (11 + sizeof(size_t) / 8 * 3)
#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
@ -163,7 +250,7 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos)
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
#endif
#endif // LZMA_LOG_BSR
#define LZMA_NUM_REPS 4
@ -193,7 +280,7 @@ typedef struct
#define kNumLenToPosStates 4
#define kNumPosSlotBits 6
#define kDicLogSizeMin 0
// #define kDicLogSizeMin 0
#define kDicLogSizeMax 32
#define kDistTableSizeMax (kDicLogSizeMax * 2)
@ -299,7 +386,7 @@ typedef UInt32 CProbPrice;
typedef struct
{
void *matchFinderObj;
IMatchFinder matchFinder;
IMatchFinder2 matchFinder;
unsigned optCur;
unsigned optEnd;
@ -344,10 +431,14 @@ typedef struct
// begin of CMatchFinderMt is used in LZ thread
CMatchFinderMt matchFinderMt;
// end of CMatchFinderMt is used in BT and HASH threads
// #else
// CMatchFinder matchFinderBase;
#endif
CMatchFinder matchFinderBase;
// we suppose that we have 8-bytes alignment after CMatchFinder
#ifndef _7ZIP_ST
Byte pad[128];
#endif
@ -355,8 +446,10 @@ typedef struct
// LZ thread
CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
// we want {len , dist} pairs to be 8-bytes aligned in matches array
UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2];
// we want 8-bytes alignment here
UInt32 alignPrices[kAlignTableSize];
UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
@ -385,12 +478,19 @@ typedef struct
CSaveState saveState;
// BoolInt mf_Failure;
#ifndef _7ZIP_ST
Byte pad2[128];
#endif
} CLzmaEnc;
#define MFB (p->matchFinderBase)
/*
#ifndef _7ZIP_ST
#define MFB (p->matchFinderMt.MatchFinder)
#endif
*/
#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr));
@ -455,41 +555,51 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
if (props.lc > LZMA_LC_MAX
|| props.lp > LZMA_LP_MAX
|| props.pb > LZMA_PB_MAX
|| props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
|| props.dictSize > kLzmaMaxHistorySize)
|| props.pb > LZMA_PB_MAX)
return SZ_ERROR_PARAM;
if (props.dictSize > kLzmaMaxHistorySize)
props.dictSize = kLzmaMaxHistorySize;
#ifndef LZMA_LOG_BSR
{
const UInt64 dict64 = props.dictSize;
if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress))
return SZ_ERROR_PARAM;
}
#endif
p->dictSize = props.dictSize;
{
unsigned fb = props.fb;
unsigned fb = (unsigned)props.fb;
if (fb < 5)
fb = 5;
if (fb > LZMA_MATCH_LEN_MAX)
fb = LZMA_MATCH_LEN_MAX;
p->numFastBytes = fb;
}
p->lc = props.lc;
p->lp = props.lp;
p->pb = props.pb;
p->lc = (unsigned)props.lc;
p->lp = (unsigned)props.lp;
p->pb = (unsigned)props.pb;
p->fastMode = (props.algo == 0);
// p->_maxMode = True;
p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
MFB.btMode = (Byte)(props.btMode ? 1 : 0);
{
unsigned numHashBytes = 4;
if (props.btMode)
{
if (props.numHashBytes < 2)
numHashBytes = 2;
else if (props.numHashBytes < 4)
numHashBytes = props.numHashBytes;
if (props.numHashBytes < 2) numHashBytes = 2;
else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes;
}
p->matchFinderBase.numHashBytes = numHashBytes;
if (props.numHashBytes >= 5) numHashBytes = 5;
MFB.numHashBytes = numHashBytes;
}
p->matchFinderBase.cutValue = props.mc;
MFB.cutValue = props.mc;
p->writeEndMark = props.writeEndMark;
p->writeEndMark = (BoolInt)props.writeEndMark;
#ifndef _7ZIP_ST
/*
@ -500,6 +610,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
}
*/
p->multiThread = (props.numThreads > 1);
p->matchFinderMt.btSync.affinity =
p->matchFinderMt.hashSync.affinity = props.affinity;
#endif
return SZ_OK;
@ -509,7 +621,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
p->matchFinderBase.expectedDataSize = expectedDataSiize;
MFB.expectedDataSize = expectedDataSiize;
}
@ -536,8 +648,8 @@ static void RangeEnc_Construct(CRangeEnc *p)
p->bufBase = NULL;
}
#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize)
#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
#define RC_BUF_SIZE (1 << 16)
@ -556,12 +668,11 @@ static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->bufBase);
p->bufBase = 0;
p->bufBase = NULL;
}
static void RangeEnc_Init(CRangeEnc *p)
{
/* Stream.Init(); */
p->range = 0xFFFFFFFF;
p->cache = 0;
p->low = 0;
@ -575,12 +686,12 @@ static void RangeEnc_Init(CRangeEnc *p)
MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
{
size_t num;
if (p->res != SZ_OK)
return;
num = p->buf - p->bufBase;
if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
p->res = SZ_ERROR_WRITE;
const size_t num = (size_t)(p->buf - p->bufBase);
if (p->res == SZ_OK)
{
if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
p->res = SZ_ERROR_WRITE;
}
p->processed += num;
p->buf = p->bufBase;
}
@ -656,7 +767,7 @@ static void RangeEnc_FlushData(CRangeEnc *p)
range += newBound & mask; \
mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
mask += ((1 << kNumMoveBits) - 1); \
ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \
*(prob) = (CLzmaProb)ttt; \
RC_NORM(p) \
}
@ -749,7 +860,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
bitCount++;
}
}
ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
// printf("\n%3d: %5d", i, ProbPrices[i]);
}
}
@ -985,7 +1096,11 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
p->additionalOffset++;
p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
{
const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches);
// if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; }
numPairs = (unsigned)(d - p->matches);
}
*numPairsRes = numPairs;
#ifdef SHOW_STAT
@ -1001,7 +1116,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
if (numPairs == 0)
return 0;
{
unsigned len = p->matches[(size_t)numPairs - 2];
const unsigned len = p->matches[(size_t)numPairs - 2];
if (len != p->numFastBytes)
return len;
{
@ -1011,7 +1126,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
{
const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1;
const Byte *p2 = p1 + len;
ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1];
const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1];
const Byte *lim = p1 + numAvail;
for (; p2 != lim && *p2 == p2[dif]; p2++)
{}
@ -1167,6 +1282,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
repLens[i] = len;
if (len > repLens[repMaxIndex])
repMaxIndex = i;
if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization
break;
}
if (repLens[repMaxIndex] >= p->numFastBytes)
@ -1179,10 +1296,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
matches = p->matches;
#define MATCHES matches
// #define MATCHES p->matches
if (mainLen >= p->numFastBytes)
{
p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS;
p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS;
MOVE_POS(p, mainLen - 1)
return mainLen;
}
@ -1276,13 +1395,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (len < 2)
len = 2;
else
while (len > matches[offs])
while (len > MATCHES[offs])
offs += 2;
for (; ; len++)
{
COptimal *opt;
UInt32 dist = matches[(size_t)offs + 1];
UInt32 dist = MATCHES[(size_t)offs + 1];
UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len);
unsigned lenToPosState = GetLenToPosState(len);
@ -1306,7 +1425,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
opt->extra = 0;
}
if (len == matches[offs])
if (len == MATCHES[offs])
{
offs += 2;
if (offs == numPairs)
@ -1727,8 +1846,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (newLen > numAvail)
{
newLen = numAvail;
for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2);
matches[numPairs] = (UInt32)newLen;
for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2);
MATCHES[numPairs] = (UInt32)newLen;
numPairs += 2;
}
@ -1747,9 +1866,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
offs = 0;
while (startLen > matches[offs])
while (startLen > MATCHES[offs])
offs += 2;
dist = matches[(size_t)offs + 1];
dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
@ -1776,7 +1895,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
}
}
if (len == matches[offs])
if (len == MATCHES[offs])
{
// if (p->_maxMode) {
// MATCH : LIT : REP_0
@ -1841,7 +1960,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
offs += 2;
if (offs == numPairs)
break;
dist = matches[(size_t)offs + 1];
dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot);
}
@ -2059,8 +2178,23 @@ static SRes CheckErrors(CLzmaEnc *p)
return p->result;
if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE;
if (p->matchFinderBase.result != SZ_OK)
#ifndef _7ZIP_ST
if (
// p->mf_Failure ||
(p->mtMode &&
( // p->matchFinderMt.failure_LZ_LZ ||
p->matchFinderMt.failure_LZ_BT))
)
{
p->result = MY_HRES_ERROR__INTERNAL_ERROR;
// printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
}
#endif
if (MFB.result != SZ_OK)
p->result = SZ_ERROR_READ;
if (p->result != SZ_OK)
p->finished = True;
return p->result;
@ -2198,14 +2332,14 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
void LzmaEnc_Construct(CLzmaEnc *p)
static void LzmaEnc_Construct(CLzmaEnc *p)
{
RangeEnc_Construct(&p->rc);
MatchFinder_Construct(&p->matchFinderBase);
MatchFinder_Construct(&MFB);
#ifndef _7ZIP_ST
p->matchFinderMt.MatchFinder = &MFB;
MatchFinderMt_Construct(&p->matchFinderMt);
p->matchFinderMt.MatchFinder = &p->matchFinderBase;
#endif
{
@ -2221,7 +2355,6 @@ void LzmaEnc_Construct(CLzmaEnc *p)
LzmaEnc_InitPriceTables(p->ProbPrices);
p->litProbs = NULL;
p->saveState.litProbs = NULL;
}
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
@ -2233,7 +2366,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
return p;
}
void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->litProbs);
ISzAlloc_Free(alloc, p->saveState.litProbs);
@ -2241,13 +2374,13 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
p->saveState.litProbs = NULL;
}
void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
#ifndef _7ZIP_ST
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif
MatchFinder_Free(&p->matchFinderBase, allocBig);
MatchFinder_Free(&MFB, allocBig);
LzmaEnc_FreeLits(p, alloc);
RangeEnc_Free(&p->rc, alloc);
}
@ -2259,11 +2392,18 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
}
MY_NO_INLINE
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
{
UInt32 nowPos32, startPos32;
if (p->needInit)
{
#ifndef _7ZIP_ST
if (p->mtMode)
{
RINOK(MatchFinderMt_InitMt(&p->matchFinderMt));
}
#endif
p->matchFinder.Init(p->matchFinderObj);
p->needInit = 0;
}
@ -2521,12 +2661,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
// { int y; for (y = 0; y < 100; y++) {
FillDistancesPrices(p);
// }}
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
}
if (p->repLenEncCounter <= 0)
{
p->repLenEncCounter = REP_LEN_COUNT;
LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
}
@ -2559,11 +2699,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
UInt32 beforeSize = kNumOpts;
UInt32 dictSize;
if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM;
#ifndef _7ZIP_ST
p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#endif
{
@ -2582,36 +2724,56 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
}
}
p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
if (beforeSize + p->dictSize < keepWindowSize)
beforeSize = keepWindowSize - p->dictSize;
dictSize = p->dictSize;
if (dictSize == ((UInt32)2 << 30) ||
dictSize == ((UInt32)3 << 30))
{
/* 21.03 : here we reduce the dictionary for 2 reasons:
1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary.
2) we want to elimate useless last MatchFinder_Normalize3() for corner cases,
where data size is aligned for 1 GB: 5/6/8 GB.
That reducing must be >= 1 for such corner cases. */
dictSize -= 1;
}
if (beforeSize + dictSize < keepWindowSize)
beforeSize = keepWindowSize - dictSize;
/* in worst case we can look ahead for
max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes.
we send larger value for (keepAfter) to MantchFinder_Create():
(numFastBytes + LZMA_MATCH_LEN_MAX + 1)
*/
#ifndef _7ZIP_ST
if (p->mtMode)
{
RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
LZMA_MATCH_LEN_MAX
+ 1 /* 18.04 */
RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig));
p->matchFinderObj = &p->matchFinderMt;
p->matchFinderBase.bigHash = (Byte)(
(p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
MFB.bigHash = (Byte)(
(p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
}
else
#endif
{
if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
if (!MatchFinder_Create(&MFB, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */
, allocBig))
return SZ_ERROR_MEM;
p->matchFinderObj = &p->matchFinderBase;
MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
p->matchFinderObj = &MFB;
MatchFinder_CreateVTable(&MFB, &p->matchFinder);
}
return SZ_OK;
}
void LzmaEnc_Init(CLzmaEnc *p)
static void LzmaEnc_Init(CLzmaEnc *p)
{
unsigned i;
p->state = 0;
@ -2675,12 +2837,14 @@ void LzmaEnc_Init(CLzmaEnc *p)
p->additionalOffset = 0;
p->pbMask = (1 << p->pb) - 1;
p->pbMask = ((unsigned)1 << p->pb) - 1;
p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
// p->mf_Failure = False;
}
void LzmaEnc_InitPrices(CLzmaEnc *p)
static void LzmaEnc_InitPrices(CLzmaEnc *p)
{
if (!p->fastMode)
{
@ -2694,8 +2858,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p)
p->repLenEncCounter = REP_LEN_COUNT;
LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices);
LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices);
}
static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
@ -2719,7 +2883,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
p->matchFinderBase.stream = inStream;
MFB.stream = inStream;
p->needInit = 1;
p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
@ -2730,16 +2894,16 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
p->matchFinderBase.stream = inStream;
MFB.stream = inStream;
p->needInit = 1;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
}
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen)
{
p->matchFinderBase.directInput = 1;
p->matchFinderBase.bufferBase = (Byte *)src;
p->matchFinderBase.directInputRem = srcLen;
MFB.directInput = 1;
MFB.bufferBase = (Byte *)src;
MFB.directInputRem = srcLen;
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
@ -2781,19 +2945,23 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
size = p->rem;
p->overflow = True;
}
memcpy(p->data, data, size);
p->rem -= size;
p->data += size;
if (size != 0)
{
memcpy(p->data, data, size);
p->rem -= size;
p->data += size;
}
return size;
}
/*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
{
const CLzmaEnc *p = (CLzmaEnc *)pp;
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
}
*/
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
{
@ -2841,6 +3009,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
}
MY_NO_INLINE
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
{
SRes res = SZ_OK;
@ -2870,7 +3039,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
LzmaEnc_Finish(p);
/*
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase))
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
res = SZ_ERROR_FAIL;
}
*/
@ -2889,35 +3058,43 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
unsigned i;
UInt32 dictSize = p->dictSize;
if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
if (dictSize >= ((UInt32)1 << 22))
{
UInt32 kDictMask = ((UInt32)1 << 20) - 1;
if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
dictSize = (dictSize + kDictMask) & ~kDictMask;
}
else for (i = 11; i <= 30; i++)
{
if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
}
const CLzmaEnc *p = (const CLzmaEnc *)pp;
const UInt32 dictSize = p->dictSize;
UInt32 v;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
// we write aligned dictionary value to properties for lzma decoder
if (dictSize >= ((UInt32)1 << 21))
{
const UInt32 kDictMask = ((UInt32)1 << 20) - 1;
v = (dictSize + kDictMask) & ~kDictMask;
if (v < dictSize)
v = dictSize;
}
else
{
unsigned i = 11 * 2;
do
{
v = (UInt32)(2 + (i & 1)) << (i >> 1);
i++;
}
while (v < dictSize);
}
for (i = 0; i < 4; i++)
props[1 + i] = (Byte)(dictSize >> (8 * i));
return SZ_OK;
SetUi32(props + 1, v);
return SZ_OK;
}
}
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp)
{
return ((CLzmaEnc *)pp)->writeEndMark;
return (unsigned)((CLzmaEnc *)pp)->writeEndMark;
}
@ -2974,3 +3151,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
LzmaEnc_Destroy(p, alloc, allocBig);
return res;
}
/*
#ifndef _7ZIP_ST
void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2])
{
const CLzmaEnc *p = (CLzmaEnc *)pp;
lz_threads[0] = p->matchFinderMt.hashSync.thread;
lz_threads[1] = p->matchFinderMt.btSync.thread;
}
#endif
*/

View file

@ -1,5 +1,5 @@
/* LzmaEnc.h -- LZMA Encoder
2017-07-27 : Igor Pavlov : Public domain */
2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H
#define __LZMA_ENC_H
@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
UInt64 affinity;
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);

View file

@ -1,5 +1,5 @@
/* Ppmd.h -- PPMD codec common code
2017-04-03 : Igor Pavlov : Public domain
2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD_H
@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
EXTERN_C_BEGIN
#ifdef MY_CPU_32BIT
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
/*
PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields.
if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
PPMD code works slightly faster in (PPMD_32BIT) mode.
*/
#define PPMD_32BIT
#endif
@ -28,7 +37,7 @@ EXTERN_C_BEGIN
#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
#pragma pack(push, 1)
MY_CPU_pragma_pack_push_1
/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
/* SEE-contexts for PPM-contexts with masked symbols */
@ -40,41 +49,114 @@ typedef struct
} CPpmd_See;
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
{ (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); }
{ (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); }
typedef struct
{
Byte Symbol;
Byte Freq;
UInt16 SuccessorLow;
UInt16 SuccessorHigh;
UInt16 Successor_0;
UInt16 Successor_1;
} CPpmd_State;
#pragma pack(pop)
typedef struct CPpmd_State2_
{
Byte Symbol;
Byte Freq;
} CPpmd_State2;
typedef
#ifdef PPMD_32BIT
CPpmd_State *
#else
UInt32
#endif
CPpmd_State_Ref;
typedef struct CPpmd_State4_
{
UInt16 Successor_0;
UInt16 Successor_1;
} CPpmd_State4;
typedef
#ifdef PPMD_32BIT
void *
#else
UInt32
#endif
CPpmd_Void_Ref;
MY_CPU_pragma_pop
/*
PPMD code can write full CPpmd_State structure data to CPpmd*_Context
at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
If we use pointers to different types, but that point to shared
memory space, we can have aliasing problem (strict aliasing).
XLC compiler in -O2 mode can change the order of memory write instructions
in relation to read instructions, if we have use pointers to different types.
To solve that aliasing problem we use combined CPpmd*_Context structure
with unions that contain the fields from both structures:
the original CPpmd*_Context and CPpmd_State.
So we can access the fields from both structures via one pointer,
and the compiler doesn't change the order of write instructions
in relation to read instructions.
If we don't use memory write instructions to shared memory in
some local code, and we use only reading instructions (read only),
then probably it's safe to use pointers to different types for reading.
*/
#ifdef PPMD_32BIT
#define Ppmd_Ref_Type(type) type *
#define Ppmd_GetRef(p, ptr) (ptr)
#define Ppmd_GetPtr(p, ptr) (ptr)
#define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
#else
#define Ppmd_Ref_Type(type) UInt32
#define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
#define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
#define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
#endif // PPMD_32BIT
typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref;
typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref;
/*
#ifdef MY_CPU_LE_UNALIGN
// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
#else
*/
/*
We can write 16-bit halves to 32-bit (Successor) field in any selected order.
But the native order is more consistent way.
So we use the native order, if LE/BE order can be detected here at compile time.
*/
#ifdef MY_CPU_BE
#define Ppmd_GET_SUCCESSOR(p) \
( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
#define Ppmd_SET_SUCCESSOR(p, v) { \
(p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
(p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
#else
#define Ppmd_GET_SUCCESSOR(p) \
( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
#define Ppmd_SET_SUCCESSOR(p, v) { \
(p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
(p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
#endif
// #endif
typedef
#ifdef PPMD_32BIT
Byte *
#else
UInt32
#endif
CPpmd_Byte_Ref;
#define PPMD_SetAllBitsIn256Bytes(p) \
{ size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \

File diff suppressed because it is too large Load diff

View file

@ -1,10 +1,8 @@
/* Ppmd7.h -- PPMdH compression codec
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* This code supports virtual RangeDecoder and includes the implementation
of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H.
If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */
/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD7_H
#define __PPMD7_H
@ -21,23 +19,56 @@ EXTERN_C_BEGIN
struct CPpmd7_Context_;
typedef
#ifdef PPMD_32BIT
struct CPpmd7_Context_ *
#else
UInt32
#endif
CPpmd7_Context_Ref;
typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref;
// MY_CPU_pragma_pack_push_1
typedef struct CPpmd7_Context_
{
UInt16 NumStats;
UInt16 SummFreq;
CPpmd_State_Ref Stats;
union
{
UInt16 SummFreq;
CPpmd_State2 State2;
} Union2;
union
{
CPpmd_State_Ref Stats;
CPpmd_State4 State4;
} Union4;
CPpmd7_Context_Ref Suffix;
} CPpmd7_Context;
#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq)
// MY_CPU_pragma_pop
#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
typedef struct
{
UInt32 Range;
UInt32 Code;
UInt32 Low;
IByteIn *Stream;
} CPpmd7_RangeDec;
typedef struct
{
UInt32 Range;
Byte Cache;
// Byte _dummy_[3];
UInt64 Low;
UInt64 CacheSize;
IByteOut *Stream;
} CPpmd7z_RangeEnc;
typedef struct
{
@ -48,17 +79,30 @@ typedef struct
UInt32 Size;
UInt32 GlueCount;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 AlignOffset;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
Byte Indx2Units[PPMD_NUM_INDEXES];
union
{
CPpmd7_RangeDec dec;
CPpmd7z_RangeEnc enc;
} rc;
Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256];
Byte NS2BSIndx[256], NS2Indx[256];
Byte ExpEscape[16];
CPpmd_See DummySee, See[25][16];
UInt16 BinSumm[128][64];
// int LastSymbol;
} CPpmd7;
void Ppmd7_Construct(CPpmd7 *p);
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
/* ---------- Internal Functions ---------- */
extern const Byte PPMD7_kExpEscape[16];
#ifdef PPMD_32BIT
#define Ppmd7_GetPtr(p, ptr) (ptr)
#define Ppmd7_GetContext(p, ptr) (ptr)
#define Ppmd7_GetStats(p, ctx) ((ctx)->Stats)
#else
#define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
#define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs)))
#define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats)))
#endif
#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context)
#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd7_Update1(CPpmd7 *p);
void Ppmd7_Update1_0(CPpmd7 *p);
void Ppmd7_Update2(CPpmd7 *p);
void Ppmd7_UpdateBin(CPpmd7 *p);
#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3))
#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4))
// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3))
// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4))
#define Ppmd7_GetBinSumm(p) \
&p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \
p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \
(p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \
2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \
((p->RunLength >> 26) & 0x20)]
&p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \
[ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \
+ PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \
+ (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ]
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
/*
We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure:
1) Ppmd7a_*: original PPMdH
2) Ppmd7z_*: modified PPMdH with 7z Range Coder
Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH)
*/
/* ---------- Decode ---------- */
typedef struct IPpmd7_RangeDec IPpmd7_RangeDec;
#define PPMD7_SYM_END (-1)
#define PPMD7_SYM_ERROR (-2)
struct IPpmd7_RangeDec
{
UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total);
void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size);
UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0);
};
/*
You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init()
typedef struct
{
IPpmd7_RangeDec vt;
UInt32 Range;
UInt32 Code;
IByteIn *Stream;
} CPpmd7z_RangeDec;
Ppmd7*_DecodeSymbol()
out:
>= 0 : decoded byte
-1 : PPMD7_SYM_END : End of payload marker
-2 : PPMD7_SYM_ERROR : Data error
*/
void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p);
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p);
/* Ppmd7a_* : original PPMdH */
BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p);
#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7a_DecodeSymbol(CPpmd7 *p);
/* Ppmd7z_* : modified PPMdH with 7z Range Coder */
BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p);
#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc);
int Ppmd7z_DecodeSymbol(CPpmd7 *p);
// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim);
/* ---------- Encode ---------- */
typedef struct
{
UInt64 Low;
UInt32 Range;
Byte Cache;
UInt64 CacheSize;
IByteOut *Stream;
} CPpmd7z_RangeEnc;
void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p);
void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p);
void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol);
void Ppmd7z_Init_RangeEnc(CPpmd7 *p);
void Ppmd7z_Flush_RangeEnc(CPpmd7 *p);
// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol);
void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim);
EXTERN_C_END

View file

@ -1,6 +1,8 @@
/* Ppmd7Dec.c -- PPMdH Decoder
2018-07-04 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#define kTopValue (1 << 24)
BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p)
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
if (IByteIn_Read(p->Stream) != 0)
if (READ_BYTE(p) != 0)
return False;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt);
static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total)
#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \
{ (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8;
#define RC_NORM_1(p) RC_NORM_BASE(p) }
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R (&p->rc.dec)
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
GET_Ppmd7z_RangeDec
return p->Code / (p->Range /= total);
R->Code -= start * R->Range;
R->Range *= size;
RC_NORM_LOCAL(R)
}
static void Range_Normalize(CPpmd7z_RangeDec *p)
{
if (p->Range < kTopValue)
{
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Range <<= 8;
if (p->Range < kTopValue)
{
p->Code = (p->Code << 8) | IByteIn_Read(p->Stream);
p->Range <<= 8;
}
}
}
static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size)
{
GET_Ppmd7z_RangeDec
p->Code -= start * p->Range;
p->Range *= size;
Range_Normalize(p);
}
static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0)
{
GET_Ppmd7z_RangeDec
UInt32 newBound = (p->Range >> 14) * size0;
UInt32 symbol;
if (p->Code < newBound)
{
symbol = 0;
p->Range = newBound;
}
else
{
symbol = 1;
p->Code -= newBound;
p->Range -= newBound;
}
Range_Normalize(p);
return symbol;
}
void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p)
{
p->vt.GetThreshold = Range_GetThreshold;
p->vt.Decode = Range_Decode;
p->vt.DecodeBit = Range_DecodeBit;
}
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define MASK(sym) ((signed char *)charMask)[sym]
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc)
#define MASK(sym) ((unsigned char *)charMask)[sym]
// MY_FORCE_INLINE
// static
int Ppmd7z_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq))
UInt32 summFreq = p->MinContext->Union2.SummFreq;
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte symbol;
rc->Decode(rc, 0, s->Freq);
Byte sym;
RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd7_Update1_0(p);
return symbol;
return sym;
}
p->PrevSuccess = 0;
i = p->MinContext->NumStats - 1;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((hiCnt += (++s)->Freq) > count)
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte symbol;
rc->Decode(rc, hiCnt - s->Freq, s->Freq);
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd7_Update1(p);
return symbol;
return sym;
}
}
while (--i);
if (count >= p->MinContext->SummFreq)
return -2;
p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol];
rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt);
if (hiCnt >= summFreq)
return PPMD7_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt);
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
i = p->MinContext->NumStats - 1;
do { MASK((--s)->Symbol) = 0; } while (--i);
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
if (rc->DecodeBit(rc, *prob) == 0)
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte symbol;
*prob = (UInt16)PPMD_UPDATE_PROB_0(*prob);
symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
Ppmd7_UpdateBin(p);
return symbol;
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM_1(R)
/* we can use single byte normalization here because of
(min(BinSumm[][]) = 95) > (1 << (14 - 8)) */
// sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
return sym;
}
*prob = (UInt16)PPMD_UPDATE_PROB_1(*prob);
p->InitEsc = PPMD7_kExpEscape[*prob >> 10];
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(size0);
R->Code -= size0;
R->Range -= size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *ps[256], *s;
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
CPpmd_See *see;
unsigned i, num, numMasked = p->MinContext->NumStats;
CPpmd7_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!p->MinContext->Suffix)
return -1;
p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix);
if (!mc->Suffix)
return PPMD7_SYM_END;
mc = Ppmd7_GetContext(p, mc->Suffix);
}
while (p->MinContext->NumStats == numMasked);
hiCnt = 0;
s = Ppmd7_GetStats(p, p->MinContext);
i = 0;
num = p->MinContext->NumStats - numMasked;
do
{
int k = (int)(MASK(s->Symbol));
hiCnt += (s->Freq & k);
ps[i] = s++;
i -= k;
}
while (i != num);
while (mc->NumStats == numMasked);
s = Ppmd7_GetStats(p, mc);
{
unsigned num = mc->NumStats;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
count = rc->GetThreshold(rc, freqSum);
count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
Byte symbol;
CPpmd_State **pps = ps;
for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++);
s = *pps;
rc->Decode(rc, hiCnt - s->Freq, s->Freq);
Byte sym;
s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
// if ((Int32)count >= 0)
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
};
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
symbol = s->Symbol;
sym = s->Symbol;
Ppmd7_Update2(p);
return symbol;
return sym;
}
if (count >= freqSum)
return -2;
rc->Decode(rc, hiCnt, freqSum - hiCnt);
return PPMD7_SYM_ERROR;
RC_Decode(hiCnt, freqSum - hiCnt);
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
do { MASK(ps[--i]->Symbol) = 0; } while (i != 0);
s = Ppmd7_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}
/*
Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
{
int sym = 0;
if (buf != lim)
do
{
sym = Ppmd7z_DecodeSymbol(p);
if (sym < 0)
break;
*buf = (Byte)sym;
}
while (++buf < lim);
p->LastSymbol = sym;
return buf;
}
*/

View file

@ -1,8 +1,10 @@
/* Threads.c -- multithreading library
2017-06-26 : Igor Pavlov : Public domain */
2021-07-12 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
#ifndef UNDER_CE
#include <process.h>
#endif
@ -29,28 +31,103 @@ WRes HandlePtr_Close(HANDLE *p)
return 0;
}
WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); }
WRes Handle_WaitObject(HANDLE h)
{
DWORD dw = WaitForSingleObject(h, INFINITE);
/*
(dw) result:
WAIT_OBJECT_0 // 0
WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
WAIT_FAILED // 0xFFFFFFFF
*/
if (dw == WAIT_FAILED)
{
dw = GetLastError();
if (dw == 0)
return WAIT_FAILED;
}
return (WRes)dw;
}
#define Thread_Wait(p) Handle_WaitObject(*(p))
WRes Thread_Wait_Close(CThread *p)
{
WRes res = Thread_Wait(p);
WRes res2 = Thread_Close(p);
return (res != 0 ? res : res2);
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
#ifdef UNDER_CE
DWORD threadId;
*p = CreateThread(0, 0, func, param, 0, &threadId);
#else
unsigned threadId;
*p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId);
*p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
#endif
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return HandleToWRes(*p);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
#ifdef UNDER_CE
UNUSED_VAR(affinity)
return Thread_Create(p, func, param);
#else
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
HANDLE h;
WRes wres;
unsigned threadId;
h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
*p = h;
wres = HandleToWRes(h);
if (h)
{
{
// DWORD_PTR prevMask =
SetThreadAffinityMask(h, (DWORD_PTR)affinity);
/*
if (prevMask == 0)
{
// affinity change is non-critical error, so we can ignore it
// wres = GetError();
}
*/
}
{
DWORD prevSuspendCount = ResumeThread(h);
/* ResumeThread() returns:
0 : was_not_suspended
1 : was_resumed
-1 : error
*/
if (prevSuspendCount == (DWORD)-1)
wres = GetError();
}
}
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return wres;
#endif
}
static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
{
*p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
@ -68,10 +145,22 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
*p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
return HandleToWRes(*p);
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// if (Semaphore_IsCreated(p))
{
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
}
return Semaphore_Create(p, initCount, maxCount);
}
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
@ -80,7 +169,9 @@ WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
WRes CriticalSection_Init(CCriticalSection *p)
{
/* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */
/* InitializeCriticalSection() can raise exception:
Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
Windows Vista+ : no exceptions */
#ifdef _MSC_VER
__try
#endif
@ -89,7 +180,361 @@ WRes CriticalSection_Init(CCriticalSection *p)
/* InitializeCriticalSectionAndSpinCount(p, 0); */
}
#ifdef _MSC_VER
__except (EXCEPTION_EXECUTE_HANDLER) { return 1; }
__except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
#endif
return 0;
}
#else // _WIN32
// ---------- POSIX ----------
#ifndef __APPLE__
#ifndef _7ZIP_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
#define _GNU_SOURCE
#endif
#endif
#include "Threads.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifdef _7ZIP_AFFINITY_SUPPORTED
// #include <sched.h>
#endif
// #include <stdio.h>
// #define PRF(p) p
#define PRF(p)
#define Print(s) PRF(printf("\n%s\n", s))
// #include <stdio.h>
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
{
// new thread in Posix probably inherits affinity from parrent thread
Print("Thread_Create_With_CpuSet");
pthread_attr_t attr;
int ret;
// int ret2;
p->_created = 0;
RINOK(pthread_attr_init(&attr));
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
if (!ret)
{
if (cpuSet)
{
#ifdef _7ZIP_AFFINITY_SUPPORTED
/*
printf("\n affinity :");
unsigned i;
for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
{
Byte b = *((const Byte *)cpuSet + i);
char temp[32];
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
temp[0] = GET_HEX_CHAR((b & 0xF));
temp[1] = GET_HEX_CHAR((b >> 4));
// temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
// temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
temp[2] = 0;
printf("%s", temp);
}
printf("\n");
*/
// ret2 =
pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
#endif
}
ret = pthread_create(&p->_tid, &attr, func, param);
if (!ret)
{
p->_created = 1;
/*
if (cpuSet)
{
// ret2 =
pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
}
*/
}
}
// ret2 =
pthread_attr_destroy(&attr);
// if (ret2 != 0) ret = ret2;
return ret;
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
return Thread_Create_With_CpuSet(p, func, param, NULL);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
Print("Thread_Create_WithAffinity");
CCpuSet cs;
unsigned i;
CpuSet_Zero(&cs);
for (i = 0; i < sizeof(affinity) * 8; i++)
{
if (affinity == 0)
break;
if (affinity & 1)
{
CpuSet_Set(&cs, i);
}
affinity >>= 1;
}
return Thread_Create_With_CpuSet(p, func, param, &cs);
}
WRes Thread_Close(CThread *p)
{
// Print("Thread_Close");
int ret;
if (!p->_created)
return 0;
ret = pthread_detach(p->_tid);
p->_tid = 0;
p->_created = 0;
return ret;
}
WRes Thread_Wait_Close(CThread *p)
{
// Print("Thread_Wait_Close");
void *thread_return;
int ret;
if (!p->_created)
return EINVAL;
ret = pthread_join(p->_tid, &thread_return);
// probably we can't use that (_tid) after pthread_join(), so we close thread here
p->_created = 0;
p->_tid = 0;
return ret;
}
static WRes Event_Create(CEvent *p, int manualReset, int signaled)
{
RINOK(pthread_mutex_init(&p->_mutex, NULL));
RINOK(pthread_cond_init(&p->_cond, NULL));
p->_manual_reset = manualReset;
p->_state = (signaled ? True : False);
p->_created = 1;
return 0;
}
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
{ return Event_Create(p, True, signaled); }
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
{ return ManualResetEvent_Create(p, 0); }
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
{ return Event_Create(p, False, signaled); }
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
{ return AutoResetEvent_Create(p, 0); }
WRes Event_Set(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond);
int res2 = pthread_mutex_unlock(&p->_mutex);
return (res2 ? res2 : res1);
}
WRes Event_Reset(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
p->_state = False;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Wait(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
while (p->_state == False)
{
// ETIMEDOUT
// ret =
pthread_cond_wait(&p->_cond, &p->_mutex);
// if (ret != 0) break;
}
if (p->_manual_reset == False)
{
p->_state = False;
}
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Close(CEvent *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (initCount > maxCount || maxCount < 1)
return EINVAL;
RINOK(pthread_mutex_init(&p->_mutex, NULL));
RINOK(pthread_cond_init(&p->_cond, NULL));
p->_count = initCount;
p->_maxCount = maxCount;
p->_created = 1;
return 0;
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (Semaphore_IsCreated(p))
{
/*
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
*/
if (initCount > maxCount || maxCount < 1)
return EINVAL;
// return EINVAL; // for debug
p->_count = initCount;
p->_maxCount = maxCount;
return 0;
}
return Semaphore_Create(p, initCount, maxCount);
}
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
{
UInt32 newCount;
int ret;
if (releaseCount < 1)
return EINVAL;
RINOK(pthread_mutex_lock(&p->_mutex));
newCount = p->_count + releaseCount;
if (newCount > p->_maxCount)
ret = ERROR_TOO_MANY_POSTS; // EINVAL;
else
{
p->_count = newCount;
ret = pthread_cond_broadcast(&p->_cond);
}
RINOK(pthread_mutex_unlock(&p->_mutex));
return ret;
}
WRes Semaphore_Wait(CSemaphore *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
while (p->_count < 1)
{
pthread_cond_wait(&p->_cond, &p->_mutex);
}
p->_count--;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Semaphore_Close(CSemaphore *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes CriticalSection_Init(CCriticalSection *p)
{
// Print("CriticalSection_Init");
if (!p)
return EINTR;
return pthread_mutex_init(&p->_mutex, NULL);
}
void CriticalSection_Enter(CCriticalSection *p)
{
// Print("CriticalSection_Enter");
if (p)
{
// int ret =
pthread_mutex_lock(&p->_mutex);
}
}
void CriticalSection_Leave(CCriticalSection *p)
{
// Print("CriticalSection_Leave");
if (p)
{
// int ret =
pthread_mutex_unlock(&p->_mutex);
}
}
void CriticalSection_Delete(CCriticalSection *p)
{
// Print("CriticalSection_Delete");
if (p)
{
// int ret =
pthread_mutex_destroy(&p->_mutex);
}
}
LONG InterlockedIncrement(LONG volatile *addend)
{
// Print("InterlockedIncrement");
#ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend + 1;
*addend = val;
return val;
#else
return __sync_add_and_fetch(addend, 1);
#endif
}
#endif // _WIN32

View file

@ -1,38 +1,110 @@
/* Threads.h -- multithreading library
2017-06-18 : Igor Pavlov : Public domain */
2021-07-12 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H
#define __7Z_THREADS_H
#ifdef _WIN32
#include <windows.h>
#include <Windows.h>
#else
#if defined(__linux__)
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef _7ZIP_AFFINITY_DISABLE
#define _7ZIP_AFFINITY_SUPPORTED
// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE
#endif
#endif
#endif
#include <pthread.h>
#endif
#include "7zTypes.h"
EXTERN_C_BEGIN
#ifdef _WIN32
WRes HandlePtr_Close(HANDLE *h);
WRes Handle_WaitObject(HANDLE h);
typedef HANDLE CThread;
#define Thread_Construct(p) *(p) = NULL
#define Thread_Construct(p) { *(p) = NULL; }
#define Thread_WasCreated(p) (*(p) != NULL)
#define Thread_Close(p) HandlePtr_Close(p)
#define Thread_Wait(p) Handle_WaitObject(*(p))
// #define Thread_Wait(p) Handle_WaitObject(*(p))
typedef
#ifdef UNDER_CE
DWORD
#ifdef UNDER_CE
DWORD
#else
unsigned
#endif
THREAD_FUNC_RET_TYPE;
typedef DWORD_PTR CAffinityMask;
typedef DWORD_PTR CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; }
#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); }
#else // _WIN32
typedef struct _CThread
{
pthread_t _tid;
int _created;
} CThread;
#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; }
#define Thread_WasCreated(p) ((p)->_created != 0)
WRes Thread_Close(CThread *p);
// #define Thread_Wait Thread_Wait_Close
typedef void * THREAD_FUNC_RET_TYPE;
typedef UInt64 CAffinityMask;
#ifdef _7ZIP_AFFINITY_SUPPORTED
typedef cpu_set_t CCpuSet;
#define CpuSet_Zero(p) CPU_ZERO(p)
#define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
#else
unsigned
typedef UInt64 CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; }
#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); }
#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
#endif
THREAD_FUNC_RET_TYPE;
#endif // _WIN32
#define THREAD_FUNC_CALL_TYPE MY_STD_CALL
#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE
typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *);
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param);
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity);
WRes Thread_Wait_Close(CThread *p);
#ifdef _WIN32
#define Thread_Create_With_CpuSet(p, func, param, cs) \
Thread_Create_With_Affinity(p, func, param, *cs)
#else
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
#endif
#ifdef _WIN32
typedef HANDLE CEvent;
typedef CEvent CAutoResetEvent;
@ -54,6 +126,7 @@ typedef HANDLE CSemaphore;
#define Semaphore_Close(p) HandlePtr_Close(p)
#define Semaphore_Wait(p) Handle_WaitObject(*(p))
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
WRes Semaphore_Release1(CSemaphore *p);
@ -63,6 +136,68 @@ WRes CriticalSection_Init(CCriticalSection *p);
#define CriticalSection_Enter(p) EnterCriticalSection(p)
#define CriticalSection_Leave(p) LeaveCriticalSection(p)
#else // _WIN32
typedef struct _CEvent
{
int _created;
int _manual_reset;
int _state;
pthread_mutex_t _mutex;
pthread_cond_t _cond;
} CEvent;
typedef CEvent CAutoResetEvent;
typedef CEvent CManualResetEvent;
#define Event_Construct(p) (p)->_created = 0
#define Event_IsCreated(p) ((p)->_created)
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
WRes Event_Set(CEvent *p);
WRes Event_Reset(CEvent *p);
WRes Event_Wait(CEvent *p);
WRes Event_Close(CEvent *p);
typedef struct _CSemaphore
{
int _created;
UInt32 _count;
UInt32 _maxCount;
pthread_mutex_t _mutex;
pthread_cond_t _cond;
} CSemaphore;
#define Semaphore_Construct(p) (p)->_created = 0
#define Semaphore_IsCreated(p) ((p)->_created)
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount);
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num);
#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1)
WRes Semaphore_Wait(CSemaphore *p);
WRes Semaphore_Close(CSemaphore *p);
typedef struct _CCriticalSection
{
pthread_mutex_t _mutex;
} CCriticalSection;
WRes CriticalSection_Init(CCriticalSection *p);
void CriticalSection_Delete(CCriticalSection *cs);
void CriticalSection_Enter(CCriticalSection *cs);
void CriticalSection_Leave(CCriticalSection *cs);
LONG InterlockedIncrement(LONG volatile *addend);
#endif // _WIN32
EXTERN_C_END
#endif

View file

@ -25,7 +25,7 @@ set( LZMA_FILES
C/Ppmd7Dec.c )
if( WIN32 )
set( LZMA_FILES ${LZMA_FILES} C/LzFindMt.c C/Threads.c )
set( LZMA_FILES ${LZMA_FILES} C/LzFindMt.c C/LzFindOpt.c C/Threads.c )
else()
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_7ZIP_ST" )
endif()

View file

@ -1,6 +1,71 @@
HISTORY of the LZMA SDK
-----------------------
21.06 2021-11-24
-------------------------
- Bug in LZMA encoder in file LzmaEnc.c was fixed:
LzmaEnc_MemEncode(), LzmaEncode() and LzmaCompress() could work incorrectly,
if size value for output buffer is smaller than size required for all compressed data.
LzmaEnc_Encode() could work incorrectly,
if callback ISeqOutStream::Write() doesn't write all compressed data.
NCompress::NLzma::CEncoder::Code() could work incorrectly,
if callback ISequentialOutStream::Write() returns error code.
- Bug in versions 21.00-21.05 was fixed:
7-Zip didn't set attributes of directories during archive extracting.
21.04 beta 2021-11-02
-------------------------
- 7-Zip now reduces the number of working CPU threads for compression,
if RAM size is not enough for compression with big LZMA2 dictionary.
- 7-Zip now can create and check "file.sha256" text files that contain the list
of file names and SHA-256 checksums in format compatible with sha256sum program.
21.03 beta 2021-07-20
-------------------------
- The maximum dictionary size for LZMA/LZMA2 compressing was increased to 4 GB (3840 MiB).
- Minor speed optimizations in LZMA/LZMA2 compressing.
21.02 alpha 2021-05-06
-------------------------
- The command line version of 7-Zip for macOS was released.
- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux
was increased by 20%-60%.
21.01 alpha 2021-03-09
-------------------------
- The command line version of 7-Zip for Linux was released.
- The improvements for speed of ARM64 version using hardware CPU instructions
for AES, CRC-32, SHA-1 and SHA-256.
- Some bugs were fixed.
20.02 alpha 2020-08-08
-------------------------
- The default number of LZMA2 chunks per solid block in 7z archive was increased to 64.
It allows to increase the compression speed for big 7z archives, if there is a big number
of CPU cores and threads.
- The speed of PPMd compressing/decompressing was increased for 7z archives.
- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system
to modify "Last Access Time" property of source files for archiving and hashing operations.
- Some bugs were fixed.
20.00 alpha 2020-02-06
-------------------------
- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5,
that can work faster than bt4 and hc4 match finders for the data with big redundancy.
- The compression ratio was improved for Fast and Fastest compression levels with the
following default settings:
- Fastest level (-mx1) : hc5 match finder with 256 KB dictionary.
- Fast level (-mx3) : hc5 match finder with 4 MB dictionary.
- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra
compression levels.
19.00 2019-02-21
-------------------------
- Encryption strength for 7z archives was increased:

View file

@ -1,4 +1,4 @@
LZMA SDK 19.00
LZMA SDK 21.06
--------------
LZMA SDK provides the documentation, samples, header files,
@ -62,14 +62,61 @@ LZMA SDK Contents
UNIX/Linux version
------------------
To compile C++ version of file->file LZMA encoding, go to directory
CPP/7zip/Bundles/LzmaCon
and call make to recompile it:
make -f makefile.gcc clean all
There are several otpions to compile 7-Zip with different compilers: gcc and clang.
Also 7-Zip code contains two versions for some critical parts of code: in C and in Assembeler.
So if you compile the version with Assembeler code, you will get faster 7-Zip binary.
7-Zip's assembler code uses the following syntax for different platforms:
1) x86 and x86-64 (AMD64): MASM syntax.
There are 2 programs that supports MASM syntax in Linux.
' 'Asmc Macro Assembler and JWasm. But JWasm now doesn't support some
cpu instructions used in 7-Zip.
So you must install Asmc Macro Assembler in Linux, if you want to compile fastest version
of 7-Zip x86 and x86-64:
https://github.com/nidud/asmc
2) arm64: GNU assembler for ARM64 with preprocessor.
That systax of that arm64 assembler code in 7-Zip is supported by GCC and CLANG for ARM64.
There are different binaries that can be compiled from 7-Zip source.
There are 2 main files in folder for compiling:
makefile - that can be used for compiling Windows version of 7-Zip with nmake command
makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip with make command
At first you must change the current folder to folder that contains `makefile.gcc`:
cd CPP/7zip/Bundles/Alone7z
Then you can compile `makefile.gcc` with the command:
make -j -f makefile.gcc
Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile
7-Zip binaries with optimized code and optimzing options.
To compile with GCC without assembler:
cd CPP/7zip/Bundles/Alone7z
make -j -f ../../cmpl_gcc.mak
To compile with CLANG without assembler:
make -j -f ../../cmpl_clang.mak
To compile 7-Zip for x86-64 with asmc assembler:
make -j -f ../../cmpl_gcc_x64.mak
To compile 7-Zip for arm64 with assembler:
make -j -f ../../cmpl_gcc_arm64.mak
To compile 7-Zip for arm64 for macOS:
make -j -f ../../cmpl_mac_arm64.mak
Also you can change some compiler options in the mak files:
cmpl_gcc.mak
var_gcc.mak
warn_gcc.mak
In some UNIX/Linux versions you must compile LZMA with static libraries.
To compile with static libraries, you can use
LIB = -lm -static
Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux):