From 4db7a20f7ab1c4bdb631e97ed4c7f86038c4336e Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Thu, 18 Nov 2021 21:29:08 -0500 Subject: [PATCH 01/39] - apply clearscope to `ApplyDamageFactor` as well --- wadsrc/static/zscript/actors/actor.zs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wadsrc/static/zscript/actors/actor.zs b/wadsrc/static/zscript/actors/actor.zs index b47ee93655..3124a7bd07 100644 --- a/wadsrc/static/zscript/actors/actor.zs +++ b/wadsrc/static/zscript/actors/actor.zs @@ -684,7 +684,7 @@ class Actor : Thinker native native Actor, Actor SpawnPlayerMissile(class type, double angle = 1e37, double x = 0, double y = 0, double z = 0, out FTranslatedLineTarget pLineTarget = null, bool nofreeaim = false, bool noautoaim = false, int aimflags = 0); native void SpawnTeleportFog(Vector3 pos, bool beforeTele, bool setTarget); native Actor RoughMonsterSearch(int distance, bool onlyseekable = false, bool frontonly = false, double fov = 0); - native int ApplyDamageFactor(Name damagetype, int damage); + native clearscope int ApplyDamageFactor(Name damagetype, int damage); native int GetModifiedDamage(Name damagetype, int damage, bool passive, Actor inflictor = null, Actor source = null, int flags = 0); native bool CheckBossDeath(); native bool CheckFov(Actor target, double fov); From 312b5ce66e9c8c2769adf9f70979b88800fa07d0 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 27 Oct 2021 10:48:22 -0400 Subject: [PATCH 02/39] - add SDL hint to not minimize the window on focus loss --- src/common/platform/posix/sdl/hardware.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/platform/posix/sdl/hardware.cpp b/src/common/platform/posix/sdl/hardware.cpp index ea008169e9..ce877e943f 100644 --- a/src/common/platform/posix/sdl/hardware.cpp +++ b/src/common/platform/posix/sdl/hardware.cpp @@ -67,6 +67,7 @@ void I_InitGraphics () #ifdef __APPLE__ SDL_SetHint(SDL_HINT_VIDEO_MAC_FULLSCREEN_SPACES, "0"); #endif // __APPLE__ + SDL_SetHint(SDL_HINT_VIDEO_MINIMIZE_ON_FOCUS_LOSS, "0"); if (SDL_InitSubSystem (SDL_INIT_VIDEO) < 0) { From c4f7760ab22306a3b867fdf65f031b882102a661 Mon Sep 17 00:00:00 2001 From: Mitch Richters Date: Wed, 17 Nov 2021 18:41:03 +1100 Subject: [PATCH 03/39] - `D_ProcessEvents()`: Delay `EV_KeyUp` events until any `EV_KeyDown` events for the corresponding key have been processed. This makes the mouse under SDL a lot better. --- src/common/engine/d_event.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/common/engine/d_event.cpp b/src/common/engine/d_event.cpp index 3a779a5fc4..cd92a7229c 100644 --- a/src/common/engine/d_event.cpp +++ b/src/common/engine/d_event.cpp @@ -67,11 +67,20 @@ CVAR(Bool, m_filter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) void D_ProcessEvents (void) { - event_t *ev; + bool keywasdown[NUM_KEYS] = { false }; + TArray delayedevents; + while (eventtail != eventhead) { - ev = &events[eventtail]; + event_t *ev = &events[eventtail]; eventtail = (eventtail + 1) & (MAXEVENTS - 1); + + if (ev->type == EV_KeyUp && keywasdown[ev->data1]) + { + delayedevents.Push(ev); + continue; + } + if (ev->type == EV_None) continue; if (ev->type == EV_DeviceChange) @@ -85,7 +94,12 @@ void D_ProcessEvents (void) continue; // menu ate the event } - G_Responder (ev); + keywasdown[ev->data1] = G_Responder(ev) && ev->type == EV_KeyDown; + } + + for (auto& ev: delayedevents) + { + D_PostEvent(ev); } } From b1fea228bef90041f3560c5a1559d6a3c96673b5 Mon Sep 17 00:00:00 2001 From: Mitch Richters Date: Sat, 20 Nov 2021 22:38:54 +1100 Subject: [PATCH 04/39] - `D_ProcessEvents()`: Fix bad setup with `delayedevents` array that was holding pointers to items in the `events[]` array instead of making a copy. --- src/common/engine/d_event.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/engine/d_event.cpp b/src/common/engine/d_event.cpp index cd92a7229c..52fedd4287 100644 --- a/src/common/engine/d_event.cpp +++ b/src/common/engine/d_event.cpp @@ -68,7 +68,7 @@ CVAR(Bool, m_filter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) void D_ProcessEvents (void) { bool keywasdown[NUM_KEYS] = { false }; - TArray delayedevents; + TArray delayedevents; while (eventtail != eventhead) { @@ -77,7 +77,7 @@ void D_ProcessEvents (void) if (ev->type == EV_KeyUp && keywasdown[ev->data1]) { - delayedevents.Push(ev); + delayedevents.Push(*ev); continue; } @@ -99,7 +99,7 @@ void D_ProcessEvents (void) for (auto& ev: delayedevents) { - D_PostEvent(ev); + D_PostEvent(&ev); } } From 4530a7b583b7f8243d33d46f0857cb82a6112911 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 21 Nov 2021 10:10:58 +0100 Subject: [PATCH 05/39] - minor optimization to last PR to use a FixedBitArray to reduce stack impact of the check array. --- src/common/engine/d_event.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/engine/d_event.cpp b/src/common/engine/d_event.cpp index 52fedd4287..7a51d054ad 100644 --- a/src/common/engine/d_event.cpp +++ b/src/common/engine/d_event.cpp @@ -67,9 +67,10 @@ CVAR(Bool, m_filter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) void D_ProcessEvents (void) { - bool keywasdown[NUM_KEYS] = { false }; + FixedBitArray keywasdown; TArray delayedevents; + keywasdown.Zero(); while (eventtail != eventhead) { event_t *ev = &events[eventtail]; @@ -94,7 +95,7 @@ void D_ProcessEvents (void) continue; // menu ate the event } - keywasdown[ev->data1] = G_Responder(ev) && ev->type == EV_KeyDown; + if (G_Responder(ev) && ev->type == EV_KeyDown) keywasdown.Set(ev->data1); } for (auto& ev: delayedevents) From 54ad3433b1880291a02b9d71a16e99ee46dec2dd Mon Sep 17 00:00:00 2001 From: Emile Belanger Date: Sun, 21 Nov 2021 12:01:42 +0000 Subject: [PATCH 06/39] GLES: Load default shader if we try to load user shader to avoid crash --- src/common/rendering/gles/gles_shader.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common/rendering/gles/gles_shader.h b/src/common/rendering/gles/gles_shader.h index 66fcb1ebd5..b9c98fb6e7 100644 --- a/src/common/rendering/gles/gles_shader.h +++ b/src/common/rendering/gles/gles_shader.h @@ -480,7 +480,10 @@ public: { return mMaterialShaders[eff]; } - return NULL; + else // This can happen if we try and active a user shader which is not loaded, so return default shader so it does not crash + { + return mMaterialShaders[0]; + } } }; From d0697d980120a8a80cdf9840c54fdd4d3165a038 Mon Sep 17 00:00:00 2001 From: Player701 <{ID}+{username}@users.noreply.github.com> Date: Thu, 25 Nov 2021 16:14:01 +0300 Subject: [PATCH 07/39] - Fixed some issues with the scaling feature of DStatusBarCore::DrawString --- src/common/statusbar/base_sbar.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/common/statusbar/base_sbar.cpp b/src/common/statusbar/base_sbar.cpp index 9ea4a6d260..0ee2f7701a 100644 --- a/src/common/statusbar/base_sbar.cpp +++ b/src/common/statusbar/base_sbar.cpp @@ -754,7 +754,7 @@ void DStatusBarCore::DrawString(FFont* font, const FString& cstring, double x, d { if (ch == ' ') { - x += monospaced ? spacing : font->GetSpaceWidth() + spacing; + x += (monospaced ? spacing : font->GetSpaceWidth() + spacing) * scaleX; continue; } else if (ch == TEXTCOLOR_ESCAPE) @@ -774,7 +774,7 @@ void DStatusBarCore::DrawString(FFont* font, const FString& cstring, double x, d width += font->GetDefaultKerning(); if (!monospaced) //If we are monospaced lets use the offset - x += (c->GetDisplayLeftOffset() + 1); //ignore x offsets since we adapt to character size + x += c->GetDisplayLeftOffset() * scaleX + 1; //ignore x offsets since we adapt to character size double rx, ry, rw, rh; rx = x + drawOffset.X; @@ -825,12 +825,12 @@ void DStatusBarCore::DrawString(FFont* font, const FString& cstring, double x, d DTA_LegacyRenderStyle, ERenderStyle(style), TAG_DONE); - dx = monospaced - ? spacing - : width + spacing - (c->GetDisplayLeftOffset() + 1); - // Take text scale into account - x += dx * scaleX; + dx = monospaced + ? spacing * scaleX + : (double(width) + spacing - c->GetDisplayLeftOffset()) * scaleX - 1; + + x += dx; } } From 121b89306aac3d32ecac61c966ad3344cfdc332b Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sat, 27 Nov 2021 16:00:09 +0200 Subject: [PATCH 08/39] - updated LZMA to 21.06 https://www.7-zip.org/sdk.html https://www.7-zip.org/a/lzma2106.7z --- libraries/lzma/C/7z.h | 4 +- libraries/lzma/C/7zArcIn.c | 44 +- libraries/lzma/C/7zCrc.c | 196 +++- libraries/lzma/C/7zCrcOpt.c | 16 +- libraries/lzma/C/7zDec.c | 47 +- libraries/lzma/C/7zStream.c | 4 +- libraries/lzma/C/7zTypes.h | 191 +++- libraries/lzma/C/7zVersion.h | 10 +- libraries/lzma/C/Bcj2.c | 10 +- libraries/lzma/C/Bra.c | 14 +- libraries/lzma/C/Bra86.c | 4 +- libraries/lzma/C/Compiler.h | 12 +- libraries/lzma/C/CpuArch.c | 280 +++++- libraries/lzma/C/CpuArch.h | 154 ++- libraries/lzma/C/Delta.c | 167 +++- libraries/lzma/C/LzFind.c | 1320 ++++++++++++++++++-------- libraries/lzma/C/LzFind.h | 41 +- libraries/lzma/C/LzFindMt.c | 1345 +++++++++++++++++++-------- libraries/lzma/C/LzFindMt.h | 46 +- libraries/lzma/C/LzFindOpt.c | 578 ++++++++++++ libraries/lzma/C/LzHash.h | 63 +- libraries/lzma/C/Lzma2Dec.c | 5 +- libraries/lzma/C/LzmaDec.c | 422 ++++++--- libraries/lzma/C/LzmaDec.h | 4 +- libraries/lzma/C/LzmaEnc.c | 439 ++++++--- libraries/lzma/C/LzmaEnc.h | 4 +- libraries/lzma/C/Ppmd.h | 138 ++- libraries/lzma/C/Ppmd7.c | 898 +++++++++++++----- libraries/lzma/C/Ppmd7.h | 175 ++-- libraries/lzma/C/Ppmd7Dec.c | 338 ++++--- libraries/lzma/C/Threads.c | 465 ++++++++- libraries/lzma/C/Threads.h | 151 ++- libraries/lzma/CMakeLists.txt | 2 +- libraries/lzma/DOC/lzma-history.txt | 65 ++ libraries/lzma/DOC/lzma-sdk.txt | 63 +- 35 files changed, 5933 insertions(+), 1782 deletions(-) create mode 100644 libraries/lzma/C/LzFindOpt.c diff --git a/libraries/lzma/C/7z.h b/libraries/lzma/C/7z.h index 6c7886e38b..304f75ffc5 100644 --- a/libraries/lzma/C/7z.h +++ b/libraries/lzma/C/7z.h @@ -1,5 +1,5 @@ /* 7z.h -- 7z interface -2017-04-03 : Igor Pavlov : Public domain */ +2018-07-02 : Igor Pavlov : Public domain */ #ifndef __7Z_H #define __7Z_H @@ -91,6 +91,8 @@ typedef struct UInt64 *CoderUnpackSizes; // for all coders in all folders Byte *CodersData; + + UInt64 RangeLimit; } CSzAr; UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); diff --git a/libraries/lzma/C/7zArcIn.c b/libraries/lzma/C/7zArcIn.c index f74d0fad51..0d9dec41e5 100644 --- a/libraries/lzma/C/7zArcIn.c +++ b/libraries/lzma/C/7zArcIn.c @@ -1,5 +1,5 @@ /* 7zArcIn.c -- 7z Input functions -2018-12-31 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -75,7 +75,7 @@ static SRes SzBitUi32s_Alloc(CSzBitUi32s *p, size_t num, ISzAllocPtr alloc) return SZ_OK; } -void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) +static void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; @@ -83,7 +83,7 @@ void SzBitUi32s_Free(CSzBitUi32s *p, ISzAllocPtr alloc) #define SzBitUi64s_Init(p) { (p)->Defs = NULL; (p)->Vals = NULL; } -void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) +static void SzBitUi64s_Free(CSzBitUi64s *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Defs); p->Defs = NULL; ISzAlloc_Free(alloc, p->Vals); p->Vals = NULL; @@ -105,6 +105,8 @@ static void SzAr_Init(CSzAr *p) p->CoderUnpackSizes = NULL; p->CodersData = NULL; + + p->RangeLimit = 0; } static void SzAr_Free(CSzAr *p, ISzAllocPtr alloc) @@ -502,7 +504,7 @@ SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd) return SZ_ERROR_ARCHIVE; if (propsSize >= 0x80) return SZ_ERROR_UNSUPPORTED; - coder->PropsOffset = sd->Data - dataStart; + coder->PropsOffset = (size_t)(sd->Data - dataStart); coder->PropsSize = (Byte)propsSize; sd->Data += (size_t)propsSize; sd->Size -= (size_t)propsSize; @@ -677,7 +679,7 @@ static SRes ReadUnpackInfo(CSzAr *p, { UInt32 numCoders, ci, numInStreams = 0; - p->FoCodersOffsets[fo] = sd.Data - startBufPtr; + p->FoCodersOffsets[fo] = (size_t)(sd.Data - startBufPtr); RINOK(SzReadNumber32(&sd, &numCoders)); if (numCoders == 0 || numCoders > k_Scan_NumCoders_MAX) @@ -797,7 +799,7 @@ static SRes ReadUnpackInfo(CSzAr *p, p->FoToCoderUnpackSizes[fo] = numCodersOutStreams; { - size_t dataSize = sd.Data - startBufPtr; + const size_t dataSize = (size_t)(sd.Data - startBufPtr); p->FoStartPackStreamIndex[fo] = packStreamIndex; p->FoCodersOffsets[fo] = dataSize; MY_ALLOC_ZE_AND_CPY(p->CodersData, dataSize, startBufPtr, alloc); @@ -885,7 +887,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) if (numStreams != 1 || !SzBitWithVals_Check(&p->FolderCRCs, i)) numSubDigests += numStreams; } - ssi->sdNumSubStreams.Size = sd->Data - ssi->sdNumSubStreams.Data; + ssi->sdNumSubStreams.Size = (size_t)(sd->Data - ssi->sdNumSubStreams.Data); continue; } if (type == k7zIdCRC || type == k7zIdSize || type == k7zIdEnd) @@ -907,7 +909,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) { ssi->sdSizes.Data = sd->Data; RINOK(SkipNumbers(sd, numUnpackSizesInData)); - ssi->sdSizes.Size = sd->Data - ssi->sdSizes.Data; + ssi->sdSizes.Size = (size_t)(sd->Data - ssi->sdSizes.Data); RINOK(ReadID(sd, &type)); } @@ -919,7 +921,7 @@ static SRes ReadSubStreamsInfo(CSzAr *p, CSzData *sd, CSubStreamInfo *ssi) { ssi->sdCRCs.Data = sd->Data; RINOK(SkipBitUi32s(sd, numSubDigests)); - ssi->sdCRCs.Size = sd->Data - ssi->sdCRCs.Data; + ssi->sdCRCs.Size = (size_t)(sd->Data - ssi->sdCRCs.Data); } else { @@ -947,7 +949,11 @@ static SRes SzReadStreamsInfo(CSzAr *p, if (type == k7zIdPackInfo) { RINOK(ReadNumber(sd, dataOffset)); + if (*dataOffset > p->RangeLimit) + return SZ_ERROR_ARCHIVE; RINOK(ReadPackInfo(p, sd, alloc)); + if (p->PackPositions[p->NumPackStreams] > p->RangeLimit - *dataOffset) + return SZ_ERROR_ARCHIVE; RINOK(ReadID(sd, &type)); } if (type == k7zIdUnpackInfo) @@ -1028,12 +1034,12 @@ static SRes SzReadFileNames(const Byte *data, size_t size, UInt32 numFiles, size return SZ_ERROR_ARCHIVE; for (p = data + pos; #ifdef _WIN32 - *(const UInt16 *)p != 0 + *(const UInt16 *)(const void *)p != 0 #else p[0] != 0 || p[1] != 0 #endif ; p += 2); - pos = p - data + 2; + pos = (size_t)(p - data) + 2; *offsets++ = (pos >> 1); } while (--numFiles); @@ -1133,6 +1139,8 @@ static SRes SzReadHeader2( SRes res; SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + res = SzReadAndDecodePackedStreams(inStream, sd, tempBufs, NUM_ADDITIONAL_STREAMS_MAX, p->startPosAfterHeader, &tempAr, allocTemp); *numTempBufs = tempAr.NumFolders; @@ -1526,11 +1534,13 @@ static SRes SzArEx_Open2( nextHeaderSize = GetUi64(header + 20); nextHeaderCRC = GetUi32(header + 28); - p->startPosAfterHeader = startArcPos + k7zStartHeaderSize; + p->startPosAfterHeader = (UInt64)startArcPos + k7zStartHeaderSize; if (CrcCalc(header + 12, 20) != GetUi32(header + 8)) return SZ_ERROR_CRC; + p->db.RangeLimit = nextHeaderOffset; + nextHeaderSizeT = (size_t)nextHeaderSize; if (nextHeaderSizeT != nextHeaderSize) return SZ_ERROR_MEM; @@ -1543,13 +1553,13 @@ static SRes SzArEx_Open2( { Int64 pos = 0; RINOK(ILookInStream_Seek(inStream, &pos, SZ_SEEK_END)); - if ((UInt64)pos < startArcPos + nextHeaderOffset || - (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset || - (UInt64)pos < startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) + if ((UInt64)pos < (UInt64)startArcPos + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset || + (UInt64)pos < (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset + nextHeaderSize) return SZ_ERROR_INPUT_EOF; } - RINOK(LookInStream_SeekTo(inStream, startArcPos + k7zStartHeaderSize + nextHeaderOffset)); + RINOK(LookInStream_SeekTo(inStream, (UInt64)startArcPos + k7zStartHeaderSize + nextHeaderOffset)); if (!Buf_Create(&buf, nextHeaderSizeT, allocTemp)) return SZ_ERROR_MEM; @@ -1575,6 +1585,8 @@ static SRes SzArEx_Open2( Buf_Init(&tempBuf); SzAr_Init(&tempAr); + tempAr.RangeLimit = p->db.RangeLimit; + res = SzReadAndDecodePackedStreams(inStream, &sd, &tempBuf, 1, p->startPosAfterHeader, &tempAr, allocTemp); SzAr_Free(&tempAr, allocTemp); diff --git a/libraries/lzma/C/7zCrc.c b/libraries/lzma/C/7zCrc.c index b4d84f0233..f186324ddc 100644 --- a/libraries/lzma/C/7zCrc.c +++ b/libraries/lzma/C/7zCrc.c @@ -1,5 +1,5 @@ /* 7zCrc.c -- CRC32 init -2017-06-06 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -26,8 +26,20 @@ typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); +extern CRC_FUNC g_CrcUpdateT4; +CRC_FUNC g_CrcUpdateT4; +extern CRC_FUNC g_CrcUpdateT8; +CRC_FUNC g_CrcUpdateT8; +extern +CRC_FUNC g_CrcUpdateT0_32; +CRC_FUNC g_CrcUpdateT0_32; +extern +CRC_FUNC g_CrcUpdateT0_64; +CRC_FUNC g_CrcUpdateT0_64; +extern +CRC_FUNC g_CrcUpdate; CRC_FUNC g_CrcUpdate; UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; @@ -44,6 +56,7 @@ UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -53,6 +66,166 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U return v; } + +/* ---------- hardware CRC ---------- */ + +#ifdef MY_CPU_LE + +#if defined(MY_CPU_ARM_OR_ARM64) + +// #pragma message("ARM*") + + #if defined(_MSC_VER) + #if defined(MY_CPU_ARM64) + #if (_MSC_VER >= 1910) + #define USE_ARM64_CRC + #endif + #endif + #elif (defined(__clang__) && (__clang_major__ >= 3)) \ + || (defined(__GNUC__) && (__GNUC__ > 4)) + #if !defined(__ARM_FEATURE_CRC32) + #define __ARM_FEATURE_CRC32 1 + #if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers + #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) + #endif + #endif + #if defined(__ARM_FEATURE_CRC32) + #define USE_ARM64_CRC + #include + #endif + #endif + +#else + +// no hardware CRC + +// #define USE_CRC_EMU + +#ifdef USE_CRC_EMU + +#pragma message("ARM64 CRC emulation") + +MY_FORCE_INLINE +UInt32 __crc32b(UInt32 v, UInt32 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); + return v; +} + +MY_FORCE_INLINE +UInt32 __crc32w(UInt32 v, UInt32 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + return v; +} + +MY_FORCE_INLINE +UInt32 __crc32d(UInt32 v, UInt64 data) +{ + const UInt32 *table = g_CrcTable; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8; + return v; +} + +#endif // USE_CRC_EMU + +#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE) + + + +#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) + +#define T0_32_UNROLL_BYTES (4 * 4) +#define T0_64_UNROLL_BYTES (4 * 8) + +#ifndef ATTRIB_CRC +#define ATTRIB_CRC +#endif +// #pragma message("USE ARM HW CRC") + +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) +{ + const Byte *p = (const Byte *)data; + UNUSED_VAR(table); + + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + + if (size >= T0_32_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= (T0_32_UNROLL_BYTES - 1); + lim -= size; + do + { + v = __crc32w(v, *(const UInt32 *)(const void *)(p)); + v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; + v = __crc32w(v, *(const UInt32 *)(const void *)(p)); + v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4; + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); +ATTRIB_CRC +UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) +{ + const Byte *p = (const Byte *)data; + UNUSED_VAR(table); + + for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--) + v = __crc32b(v, *p++); + + if (size >= T0_64_UNROLL_BYTES) + { + const Byte *lim = p + size; + size &= (T0_64_UNROLL_BYTES - 1); + lim -= size; + do + { + v = __crc32d(v, *(const UInt64 *)(const void *)(p)); + v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; + v = __crc32d(v, *(const UInt64 *)(const void *)(p)); + v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8; + } + while (p != lim); + } + + for (; size != 0; size--) + v = __crc32b(v, *p++); + + return v; +} + +#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) + +#endif // MY_CPU_LE + + + + void MY_FAST_CALL CrcGenerateTable() { UInt32 i; @@ -123,6 +296,27 @@ void MY_FAST_CALL CrcGenerateTable() } } #endif + #endif + #ifdef MY_CPU_LE + #ifdef USE_ARM64_CRC + if (CPU_IsSupported_CRC32()) + { + g_CrcUpdateT0_32 = CrcUpdateT0_32; + g_CrcUpdateT0_64 = CrcUpdateT0_64; + g_CrcUpdate = + #if defined(MY_CPU_ARM) + CrcUpdateT0_32; + #else + CrcUpdateT0_64; + #endif + } + #endif + + #ifdef USE_CRC_EMU + g_CrcUpdateT0_32 = CrcUpdateT0_32; + g_CrcUpdateT0_64 = CrcUpdateT0_64; + g_CrcUpdate = CrcUpdateT0_64; + #endif #endif } diff --git a/libraries/lzma/C/7zCrcOpt.c b/libraries/lzma/C/7zCrcOpt.c index 73beba298d..69fad9ca2b 100644 --- a/libraries/lzma/C/7zCrcOpt.c +++ b/libraries/lzma/C/7zCrcOpt.c @@ -1,5 +1,5 @@ /* 7zCrcOpt.c -- CRC32 calculation -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -9,6 +9,7 @@ #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) +UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -16,7 +17,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U v = CRC_UPDATE_BYTE_2(v, *p); for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x300)[((v ) & 0xFF)] ^ (table + 0x200)[((v >> 8) & 0xFF)] @@ -28,6 +29,7 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U return v; } +UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) { const Byte *p = (const Byte *)data; @@ -36,13 +38,13 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U for (; size >= 8; size -= 8, p += 8) { UInt32 d; - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x700)[((v ) & 0xFF)] ^ (table + 0x600)[((v >> 8) & 0xFF)] ^ (table + 0x500)[((v >> 16) & 0xFF)] ^ (table + 0x400)[((v >> 24))]; - d = *((const UInt32 *)p + 1); + d = *((const UInt32 *)(const void *)p + 1); v ^= (table + 0x300)[((d ) & 0xFF)] ^ (table + 0x200)[((d >> 8) & 0xFF)] @@ -72,7 +74,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co v = CRC_UPDATE_BYTE_2_BE(v, *p); for (; size >= 4; size -= 4, p += 4) { - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x000)[((v ) & 0xFF)] ^ (table + 0x100)[((v >> 8) & 0xFF)] @@ -94,13 +96,13 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, co for (; size >= 8; size -= 8, p += 8) { UInt32 d; - v ^= *(const UInt32 *)p; + v ^= *(const UInt32 *)(const void *)p; v = (table + 0x400)[((v ) & 0xFF)] ^ (table + 0x500)[((v >> 8) & 0xFF)] ^ (table + 0x600)[((v >> 16) & 0xFF)] ^ (table + 0x700)[((v >> 24))]; - d = *((const UInt32 *)p + 1); + d = *((const UInt32 *)(const void *)p + 1); v ^= (table + 0x000)[((d ) & 0xFF)] ^ (table + 0x100)[((d >> 8) & 0xFF)] diff --git a/libraries/lzma/C/7zDec.c b/libraries/lzma/C/7zDec.c index 7c46352111..fbfd016e1e 100644 --- a/libraries/lzma/C/7zDec.c +++ b/libraries/lzma/C/7zDec.c @@ -1,5 +1,5 @@ /* 7zDec.c -- Decoding from 7z folder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -21,17 +21,20 @@ #endif #define k_Copy 0 -#define k_Delta 3 +#ifndef _7Z_NO_METHOD_LZMA2 #define k_LZMA2 0x21 +#endif #define k_LZMA 0x30101 -#define k_BCJ 0x3030103 #define k_BCJ2 0x303011B +#ifndef _7Z_NO_METHODS_FILTERS +#define k_Delta 3 +#define k_BCJ 0x3030103 #define k_PPC 0x3030205 #define k_IA64 0x3030401 #define k_ARM 0x3030501 #define k_ARMT 0x3030701 #define k_SPARC 0x3030805 - +#endif #ifdef _7ZIP_PPMD_SUPPPORT @@ -56,7 +59,7 @@ static Byte ReadByte(const IByteIn *pp) return *p->cur++; if (p->res == SZ_OK) { - size_t size = p->cur - p->begin; + size_t size = (size_t)(p->cur - p->begin); p->processed += size; p->res = ILookInStream_Skip(p->inStream, size); size = (1 << 25); @@ -101,28 +104,32 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c Ppmd7_Init(&ppmd, order); } { - CPpmd7z_RangeDec rc; - Ppmd7z_RangeDec_CreateVTable(&rc); - rc.Stream = &s.vt; - if (!Ppmd7z_RangeDec_Init(&rc)) + ppmd.rc.dec.Stream = &s.vt; + if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec)) res = SZ_ERROR_DATA; - else if (s.extra) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else + else if (!s.extra) { - SizeT i; - for (i = 0; i < outSize; i++) + Byte *buf = outBuffer; + const Byte *lim = buf + outSize; + for (; buf != lim; buf++) { - int sym = Ppmd7_DecodeSymbol(&ppmd, &rc.vt); + int sym = Ppmd7z_DecodeSymbol(&ppmd); if (s.extra || sym < 0) break; - outBuffer[i] = (Byte)sym; + *buf = (Byte)sym; } - if (i != outSize) - res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); - else if (s.processed + (s.cur - s.begin) != inSize || !Ppmd7z_RangeDec_IsFinishedOK(&rc)) + if (buf != lim) res = SZ_ERROR_DATA; + else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) + { + /* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */ + res = SZ_ERROR_DATA; + } } + if (s.extra) + res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA); + else if (s.processed + (size_t)(s.cur - s.begin) != inSize) + res = SZ_ERROR_DATA; } Ppmd7_Free(&ppmd, allocMain); return res; @@ -365,7 +372,9 @@ static SRes CheckSupportedFolder(const CSzFolder *f) return SZ_ERROR_UNSUPPORTED; } +#ifndef _7Z_NO_METHODS_FILTERS #define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break; +#endif static SRes SzFolder_Decode2(const CSzFolder *folder, const Byte *propsData, diff --git a/libraries/lzma/C/7zStream.c b/libraries/lzma/C/7zStream.c index 6b5aa1621d..28a14604fb 100644 --- a/libraries/lzma/C/7zStream.c +++ b/libraries/lzma/C/7zStream.c @@ -1,5 +1,5 @@ /* 7zStream.c -- 7z Stream functions -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -37,7 +37,7 @@ SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf) SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) { - Int64 t = offset; + Int64 t = (Int64)offset; return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); } diff --git a/libraries/lzma/C/7zTypes.h b/libraries/lzma/C/7zTypes.h index 65b3af63c7..3f66a7b51f 100644 --- a/libraries/lzma/C/7zTypes.h +++ b/libraries/lzma/C/7zTypes.h @@ -1,11 +1,13 @@ /* 7zTypes.h -- Basic types -2018-08-04 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H #ifdef _WIN32 /* #include */ +#else +#include #endif #include @@ -43,18 +45,115 @@ EXTERN_C_BEGIN typedef int SRes; +#ifdef _MSC_VER + #if _MSC_VER > 1200 + #define MY_ALIGN(n) __declspec(align(n)) + #else + #define MY_ALIGN(n) + #endif +#else + #define MY_ALIGN(n) __attribute__ ((aligned(n))) +#endif + + #ifdef _WIN32 /* typedef DWORD WRes; */ typedef unsigned WRes; #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) -#else +// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) +#else // _WIN32 + +// #define ENV_HAVE_LSTAT typedef int WRes; -#define MY__FACILITY_WIN32 7 -#define MY__FACILITY__WRes MY__FACILITY_WIN32 -#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000))) + +// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT +#define MY__FACILITY_ERRNO 0x800 +#define MY__FACILITY_WIN32 7 +#define MY__FACILITY__WRes MY__FACILITY_ERRNO + +#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ + ( (HRESULT)(x) & 0x0000FFFF) \ + | (MY__FACILITY__WRes << 16) \ + | (HRESULT)0x80000000 )) + +#define MY_SRes_HRESULT_FROM_WRes(x) \ + ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x)) + +// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno) +#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x) + +/* +#define ERROR_FILE_NOT_FOUND 2L +#define ERROR_ACCESS_DENIED 5L +#define ERROR_NO_MORE_FILES 18L +#define ERROR_LOCK_VIOLATION 33L +#define ERROR_FILE_EXISTS 80L +#define ERROR_DISK_FULL 112L +#define ERROR_NEGATIVE_SEEK 131L +#define ERROR_ALREADY_EXISTS 183L +#define ERROR_DIRECTORY 267L +#define ERROR_TOO_MANY_POSTS 298L + +#define ERROR_INTERNAL_ERROR 1359L +#define ERROR_INVALID_REPARSE_DATA 4392L +#define ERROR_REPARSE_TAG_INVALID 4393L +#define ERROR_REPARSE_TAG_MISMATCH 4394L +*/ + +// we use errno equivalents for some WIN32 errors: + +#define ERROR_INVALID_FUNCTION EINVAL +#define ERROR_ALREADY_EXISTS EEXIST +#define ERROR_FILE_EXISTS EEXIST +#define ERROR_PATH_NOT_FOUND ENOENT +#define ERROR_FILE_NOT_FOUND ENOENT +#define ERROR_DISK_FULL ENOSPC +// #define ERROR_INVALID_HANDLE EBADF + +// we use FACILITY_WIN32 for errors that has no errno equivalent +// Too many posts were made to a semaphore. +#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL) +#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) +#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) + +// if (MY__FACILITY__WRes != FACILITY_WIN32), +// we use FACILITY_WIN32 for COM errors: +#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) +#define E_INVALIDARG ((HRESULT)0x80070057L) +#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) + +/* +// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: +#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) +#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) +*/ + +// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits +typedef long INT_PTR; +typedef unsigned long UINT_PTR; + +#define TEXT(quote) quote + +#define FILE_ATTRIBUTE_READONLY 0x0001 +#define FILE_ATTRIBUTE_HIDDEN 0x0002 +#define FILE_ATTRIBUTE_SYSTEM 0x0004 +#define FILE_ATTRIBUTE_DIRECTORY 0x0010 +#define FILE_ATTRIBUTE_ARCHIVE 0x0020 +#define FILE_ATTRIBUTE_DEVICE 0x0040 +#define FILE_ATTRIBUTE_NORMAL 0x0080 +#define FILE_ATTRIBUTE_TEMPORARY 0x0100 +#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200 +#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400 +#define FILE_ATTRIBUTE_COMPRESSED 0x0800 +#define FILE_ATTRIBUTE_OFFLINE 0x1000 +#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000 +#define FILE_ATTRIBUTE_ENCRYPTED 0x4000 + +#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */ #endif @@ -63,6 +162,10 @@ typedef int WRes; #define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #endif +#ifndef RINOK_WRes +#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } +#endif + typedef unsigned char Byte; typedef short Int16; typedef unsigned short UInt16; @@ -75,6 +178,40 @@ typedef int Int32; typedef unsigned int UInt32; #endif + +#ifndef _WIN32 + +typedef int INT; +typedef Int32 INT32; +typedef unsigned int UINT; +typedef UInt32 UINT32; +typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility +typedef UINT32 ULONG; + +#undef DWORD +typedef UINT32 DWORD; + +#define VOID void + +#define HRESULT LONG + +typedef void *LPVOID; +// typedef void VOID; +// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR; +// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits) +typedef long INT_PTR; +typedef unsigned long UINT_PTR; +typedef long LONG_PTR; +typedef unsigned long DWORD_PTR; + +typedef size_t SIZE_T; + +#endif // _WIN32 + + +#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) + + #ifdef _SZ_NO_INT_64 /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. @@ -128,25 +265,37 @@ typedef int BoolInt; #define MY_CDECL __cdecl #define MY_FAST_CALL __fastcall -#else +#else // _MSC_VER -#define MY_NO_INLINE -#define MY_FORCE_INLINE -#define MY_CDECL -#define MY_FAST_CALL - -/* inline keyword : for C++ / C99 */ - -/* GCC, clang: */ -/* -#if defined (__GNUC__) && (__GNUC__ >= 4) -#define MY_FORCE_INLINE __attribute__((always_inline)) +#if (defined(__GNUC__) && (__GNUC__ >= 4)) \ + || (defined(__clang__) && (__clang_major__ >= 4)) \ + || defined(__INTEL_COMPILER) \ + || defined(__xlC__) #define MY_NO_INLINE __attribute__((noinline)) +// #define MY_FORCE_INLINE __attribute__((always_inline)) inline +#else +#define MY_NO_INLINE #endif -*/ +#define MY_FORCE_INLINE + + +#define MY_CDECL + +#if defined(_M_IX86) \ + || defined(__i386__) +// #define MY_FAST_CALL __attribute__((fastcall)) +// #define MY_FAST_CALL __attribute__((cdecl)) +#define MY_FAST_CALL +#elif defined(MY_CPU_AMD64) +// #define MY_FAST_CALL __attribute__((ms_abi)) +#define MY_FAST_CALL +#else +#define MY_FAST_CALL #endif +#endif // _MSC_VER + /* The following interfaces use first parameter as pointer to structure */ @@ -335,12 +484,11 @@ struct ISzAlloc GCC 4.8.1 : classes with non-public variable members" */ -#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) - +#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) #endif -#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr)) +#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) /* #define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) @@ -353,6 +501,7 @@ struct ISzAlloc */ +#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) #ifdef _WIN32 diff --git a/libraries/lzma/C/7zVersion.h b/libraries/lzma/C/7zVersion.h index c176823a4d..a8cbdb9c65 100644 --- a/libraries/lzma/C/7zVersion.h +++ b/libraries/lzma/C/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 19 -#define MY_VER_MINOR 00 +#define MY_VER_MAJOR 21 +#define MY_VER_MINOR 06 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "19.00" +#define MY_VERSION_NUMBERS "21.06" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2019-02-21" +#define MY_DATE "2021-11-24" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2018 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/libraries/lzma/C/Bcj2.c b/libraries/lzma/C/Bcj2.c index 9a0046a650..c7b956708f 100644 --- a/libraries/lzma/C/Bcj2.c +++ b/libraries/lzma/C/Bcj2.c @@ -1,5 +1,5 @@ /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) -2018-04-28 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -123,7 +123,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; const Byte *srcLim; Byte *dest; - SizeT num = p->lims[BCJ2_STREAM_MAIN] - src; + SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src); if (num == 0) { @@ -134,7 +134,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) dest = p->dest; if (num > (SizeT)(p->destLim - dest)) { - num = p->destLim - dest; + num = (SizeT)(p->destLim - dest); if (num == 0) { p->state = BCJ2_DEC_STATE_ORIG; @@ -168,7 +168,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) break; } - num = src - p->bufs[BCJ2_STREAM_MAIN]; + num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]); if (src == srcLim) { @@ -228,7 +228,7 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p) p->ip += 4; val -= p->ip; dest = p->dest; - rem = p->destLim - dest; + rem = (SizeT)(p->destLim - dest); if (rem < 4) { diff --git a/libraries/lzma/C/Bra.c b/libraries/lzma/C/Bra.c index aed17e330d..3b854d9cad 100644 --- a/libraries/lzma/C/Bra.c +++ b/libraries/lzma/C/Bra.c @@ -1,5 +1,5 @@ /* Bra.c -- Converters for RISC code -2017-04-04 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -22,7 +22,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; if (p[-1] == 0xEB) break; @@ -43,7 +43,7 @@ SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; if (p[-1] == 0xEB) break; @@ -78,7 +78,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) { UInt32 b3; if (p > lim) - return p - data; + return (SizeT)(p - data); b1 = p[1]; b3 = p[3]; p += 2; @@ -113,7 +113,7 @@ SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) { UInt32 b3; if (p > lim) - return p - data; + return (SizeT)(p - data); b1 = p[1]; b3 = p[3]; p += 2; @@ -162,7 +162,7 @@ SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); p += 4; /* if ((v & 0xFC000003) == 0x48000001) */ if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) @@ -196,7 +196,7 @@ SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) for (;;) { if (p >= lim) - return p - data; + return (SizeT)(p - data); /* v = GetBe32(p); p += 4; diff --git a/libraries/lzma/C/Bra86.c b/libraries/lzma/C/Bra86.c index 93ed4d762b..10a0fbd161 100644 --- a/libraries/lzma/C/Bra86.c +++ b/libraries/lzma/C/Bra86.c @@ -1,5 +1,5 @@ /* Bra86.c -- Converter for x86 code (BCJ) -2017-04-03 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -25,7 +25,7 @@ SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding break; { - SizeT d = (SizeT)(p - data - pos); + SizeT d = (SizeT)(p - data) - pos; pos = (SizeT)(p - data); if (p >= limit) { diff --git a/libraries/lzma/C/Compiler.h b/libraries/lzma/C/Compiler.h index 0cc409d8a8..a9816fa5ad 100644 --- a/libraries/lzma/C/Compiler.h +++ b/libraries/lzma/C/Compiler.h @@ -1,9 +1,13 @@ /* Compiler.h -2017-04-03 : Igor Pavlov : Public domain */ +2021-01-05 : Igor Pavlov : Public domain */ #ifndef __7Z_COMPILER_H #define __7Z_COMPILER_H + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wunused-private-field" + #endif + #ifdef _MSC_VER #ifdef UNDER_CE @@ -25,6 +29,12 @@ #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information #endif + #ifdef __clang__ + #pragma clang diagnostic ignored "-Wdeprecated-declarations" + #pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" + // #pragma clang diagnostic ignored "-Wreserved-id-macro" + #endif + #endif #define UNUSED_VAR(x) (void)x; diff --git a/libraries/lzma/C/CpuArch.c b/libraries/lzma/C/CpuArch.c index 02e482e088..fa9afe3970 100644 --- a/libraries/lzma/C/CpuArch.c +++ b/libraries/lzma/C/CpuArch.c @@ -1,5 +1,5 @@ /* CpuArch.c -- CPU specific code -2018-02-18: Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -55,6 +55,47 @@ static UInt32 CheckFlag(UInt32 flag) #define CHECK_CPUID_IS_SUPPORTED #endif +#ifndef USE_ASM + #ifdef _MSC_VER + #if _MSC_VER >= 1600 + #define MY__cpuidex __cpuidex + #else + +/* + __cpuid (function == 4) requires subfunction number in ECX. + MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction. + __cpuid() in new MSVC clears ECX. + __cpuid() in old MSVC (14.00) doesn't clear ECX + We still can use __cpuid for low (function) values that don't require ECX, + but __cpuid() in old MSVC will be incorrect for some function values: (function == 4). + So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction, + where ECX value is first parameter for FAST_CALL / NO_INLINE function, + So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and + old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value. + + DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!! +*/ + +static +MY_NO_INLINE +void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function) +{ + UNUSED_VAR(subFunction); + __cpuid(CPUInfo, function); +} + + #define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func) + #pragma message("======== MY__cpuidex_HACK WAS USED ========") + #endif + #else + #define MY__cpuidex(info, func, func2) __cpuid(info, func) + #pragma message("======== (INCORRECT ?) cpuid WAS USED ========") + #endif +#endif + + + + void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) { #ifdef USE_ASM @@ -99,18 +140,20 @@ void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d) #endif "=c" (*c) , "=d" (*d) - : "0" (function)) ; + : "0" (function), "c"(0) ) ; #endif #else int CPUInfo[4]; - __cpuid(CPUInfo, function); - *a = CPUInfo[0]; - *b = CPUInfo[1]; - *c = CPUInfo[2]; - *d = CPUInfo[3]; + + MY__cpuidex(CPUInfo, (int)function, 0); + + *a = (UInt32)CPUInfo[0]; + *b = (UInt32)CPUInfo[1]; + *c = (UInt32)CPUInfo[2]; + *d = (UInt32)CPUInfo[3]; #endif } @@ -174,7 +217,7 @@ BoolInt CPU_Is_InOrder() } #if !defined(MY_CPU_AMD64) && defined(_WIN32) -#include +#include static BoolInt CPU_Sys_Is_SSE_Supported() { OSVERSIONINFO vi; @@ -188,13 +231,101 @@ static BoolInt CPU_Sys_Is_SSE_Supported() #define CHECK_SYS_SSE_SUPPORT #endif -BoolInt CPU_Is_Aes_Supported() + +static UInt32 X86_CPUID_ECX_Get_Flags() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + if (!x86cpuid_CheckAndRead(&p)) + return 0; + return p.c; +} + +BoolInt CPU_IsSupported_AES() +{ + return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; +} + +BoolInt CPU_IsSupported_SSSE3() +{ + return (X86_CPUID_ECX_Get_Flags() >> 9) & 1; +} + +BoolInt CPU_IsSupported_SSE41() +{ + return (X86_CPUID_ECX_Get_Flags() >> 19) & 1; +} + +BoolInt CPU_IsSupported_SHA() { Cx86cpuid p; CHECK_SYS_SSE_SUPPORT if (!x86cpuid_CheckAndRead(&p)) return False; - return (p.c >> 25) & 1; + + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + return (d[1] >> 29) & 1; + } +} + +// #include + +#ifdef _WIN32 +#include +#endif + +BoolInt CPU_IsSupported_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5); // avx2 + } +} + +BoolInt CPU_IsSupported_VAES_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5) // avx2 + // & (d[1] >> 31) // avx512vl + & (d[2] >> 9); // vaes // VEX-256/EVEX + } } BoolInt CPU_IsSupported_PageGB() @@ -215,4 +346,133 @@ BoolInt CPU_IsSupported_PageGB() } } + +#elif defined(MY_CPU_ARM_OR_ARM64) + +#ifdef _WIN32 + +#include + +BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } + +#else + +#if defined(__APPLE__) + +/* +#include +#include +static void Print_sysctlbyname(const char *name) +{ + size_t bufSize = 256; + char buf[256]; + int res = sysctlbyname(name, &buf, &bufSize, NULL, 0); + { + int i; + printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize); + for (i = 0; i < 20; i++) + printf(" %2x", (unsigned)(Byte)buf[i]); + + } +} +*/ + +static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) +{ + UInt32 val = 0; + if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + + /* + Print_sysctlbyname("hw.pagesize"); + Print_sysctlbyname("machdep.cpu.brand_string"); + */ + +BoolInt CPU_IsSupported_CRC32(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); +} + +#ifdef MY_CPU_ARM64 +#define APPLE_CRYPTO_SUPPORT_VAL 1 +#else +#define APPLE_CRYPTO_SUPPORT_VAL 0 +#endif + +BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; } +BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } + + +#else // __APPLE__ + +#include + +#define USE_HWCAP + +#ifdef USE_HWCAP + +#include + + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + +#ifdef MY_CPU_ARM64 + #define MY_HWCAP_CHECK_FUNC(name) \ + MY_HWCAP_CHECK_FUNC_2(name, name) + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +// MY_HWCAP_CHECK_FUNC (ASIMD) +#elif defined(MY_CPU_ARM) + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) +#endif + +#else // USE_HWCAP + + #define MY_HWCAP_CHECK_FUNC(name) \ + BoolInt CPU_IsSupported_ ## name() { return 0; } + MY_HWCAP_CHECK_FUNC(NEON) + +#endif // USE_HWCAP + +MY_HWCAP_CHECK_FUNC (CRC32) +MY_HWCAP_CHECK_FUNC (SHA1) +MY_HWCAP_CHECK_FUNC (SHA2) +MY_HWCAP_CHECK_FUNC (AES) + +#endif // __APPLE__ +#endif // _WIN32 + +#endif // MY_CPU_ARM_OR_ARM64 + + + +#ifdef __APPLE__ + +#include + +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) +{ + return sysctlbyname(name, buf, bufSize, NULL, 0); +} + +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) +{ + size_t bufSize = sizeof(*val); + int res = My_sysctlbyname_Get(name, val, &bufSize); + if (res == 0 && bufSize != sizeof(*val)) + return EFAULT; + return res; +} + #endif diff --git a/libraries/lzma/C/CpuArch.h b/libraries/lzma/C/CpuArch.h index bd42938802..529d3a502b 100644 --- a/libraries/lzma/C/CpuArch.h +++ b/libraries/lzma/C/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2018-02-18 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __CPU_ARCH_H #define __CPU_ARCH_H @@ -14,6 +14,10 @@ MY_CPU_BE means that CPU is BIG ENDIAN. If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform. MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses. + +MY_CPU_64BIT means that processor can work with 64-bit registers. + MY_CPU_64BIT can be used to select fast code branch + MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8) */ #if defined(_M_X64) \ @@ -24,8 +28,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY_CPU_AMD64 #ifdef __ILP32__ #define MY_CPU_NAME "x32" + #define MY_CPU_SIZEOF_POINTER 4 #else #define MY_CPU_NAME "x64" + #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT #endif @@ -35,7 +41,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem || defined(__i386__) #define MY_CPU_X86 #define MY_CPU_NAME "x86" - #define MY_CPU_32BIT + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif @@ -59,8 +66,14 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem || defined(__THUMBEL__) \ || defined(__THUMBEB__) #define MY_CPU_ARM - #define MY_CPU_NAME "arm" - #define MY_CPU_32BIT + + #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) + #define MY_CPU_NAME "armt" + #else + #define MY_CPU_NAME "arm" + #endif + /* #define MY_CPU_32BIT */ + #define MY_CPU_SIZEOF_POINTER 4 #endif @@ -84,17 +97,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #if defined(__ppc64__) \ - || defined(__powerpc64__) + || defined(__powerpc64__) \ + || defined(__ppc__) \ + || defined(__powerpc__) \ + || defined(__PPC__) \ + || defined(_POWER) + +#if defined(__ppc64__) \ + || defined(__powerpc64__) \ + || defined(_LP64) \ + || defined(__64BIT__) #ifdef __ILP32__ #define MY_CPU_NAME "ppc64-32" + #define MY_CPU_SIZEOF_POINTER 4 #else #define MY_CPU_NAME "ppc64" + #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT -#elif defined(__ppc__) \ - || defined(__powerpc__) +#else #define MY_CPU_NAME "ppc" - #define MY_CPU_32BIT + #define MY_CPU_SIZEOF_POINTER 4 + /* #define MY_CPU_32BIT */ +#endif #endif @@ -111,6 +136,10 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define MY_CPU_X86_OR_AMD64 #endif +#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64) +#define MY_CPU_ARM_OR_ARM64 +#endif + #ifdef _WIN32 @@ -170,6 +199,40 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #error Stop_Compiling_Bad_32_64_BIT #endif +#ifdef __SIZEOF_POINTER__ + #ifdef MY_CPU_SIZEOF_POINTER + #if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__ + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE + #endif + #else + #define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__ + #endif +#endif + +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +#if defined (_LP64) + #error Stop_Compiling_Bad_MY_CPU_PTR_SIZE +#endif +#endif + +#ifdef _MSC_VER + #if _MSC_VER >= 1300 + #define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1)) + #define MY_CPU_pragma_pop __pragma(pack(pop)) + #else + #define MY_CPU_pragma_pack_push_1 + #define MY_CPU_pragma_pop + #endif +#else + #ifdef __xlC__ + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") + #define MY_CPU_pragma_pop _Pragma("pack()") + #else + #define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)") + #define MY_CPU_pragma_pop _Pragma("pack(pop)") + #endif +#endif + #ifndef MY_CPU_NAME #ifdef MY_CPU_LE @@ -189,8 +252,12 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ - || defined(MY_CPU_ARM64) \ - || defined(__ARM_FEATURE_UNALIGNED) + || defined(MY_CPU_ARM64) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) + /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. + So we can't use unaligned 64-bit operations. */ #define MY_CPU_LE_UNALIGN #endif #endif @@ -200,11 +267,15 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #define GetUi16(p) (*(const UInt16 *)(const void *)(p)) #define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 #define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#endif -#define SetUi16(p, v) { *(UInt16 *)(p) = (v); } -#define SetUi32(p, v) { *(UInt32 *)(p) = (v); } -#define SetUi64(p, v) { *(UInt64 *)(p) = (v); } +#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } +#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } +#ifdef MY_CPU_LE_UNALIGN_64 +#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif #else @@ -218,8 +289,6 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem ((UInt32)((const Byte *)(p))[2] << 16) | \ ((UInt32)((const Byte *)(p))[3] << 24)) -#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) - #define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ _ppp_[0] = (Byte)_vvv_; \ _ppp_[1] = (Byte)(_vvv_ >> 8); } @@ -230,19 +299,29 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem _ppp_[2] = (Byte)(_vvv_ >> 16); \ _ppp_[3] = (Byte)(_vvv_ >> 24); } +#endif + + +#ifndef MY_CPU_LE_UNALIGN_64 + +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) + #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ SetUi32(_ppp2_ , (UInt32)_vvv2_); \ SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } #endif + + + #ifdef __has_builtin #define MY__has_builtin(x) __has_builtin(x) #else #define MY__has_builtin(x) 0 #endif -#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300) +#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300) /* Note: we use bswap instruction, that is unsupported in 386 cpu */ @@ -253,8 +332,8 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem #pragma intrinsic(_byteswap_uint64) /* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p)) -#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p)) +#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p)) #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v) @@ -262,9 +341,9 @@ MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned mem (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \ || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) ) -/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */ -#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p)) -#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p)) +/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */ +#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p)) +#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p)) #define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v) @@ -325,10 +404,37 @@ int x86cpuid_GetFirm(const Cx86cpuid *p); #define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF)) #define x86cpuid_GetStepping(ver) (ver & 0xF) -BoolInt CPU_Is_InOrder(); -BoolInt CPU_Is_Aes_Supported(); -BoolInt CPU_IsSupported_PageGB(); +BoolInt CPU_Is_InOrder(void); +BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX2(void); +BoolInt CPU_IsSupported_VAES_AVX2(void); +BoolInt CPU_IsSupported_SSSE3(void); +BoolInt CPU_IsSupported_SSE41(void); +BoolInt CPU_IsSupported_SHA(void); +BoolInt CPU_IsSupported_PageGB(void); + +#elif defined(MY_CPU_ARM_OR_ARM64) + +BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); + +#if defined(_WIN32) +BoolInt CPU_IsSupported_CRYPTO(void); +#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO +#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO +#else +BoolInt CPU_IsSupported_SHA1(void); +BoolInt CPU_IsSupported_SHA2(void); +BoolInt CPU_IsSupported_AES(void); +#endif + +#endif + +#if defined(__APPLE__) +int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); +int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); #endif EXTERN_C_END diff --git a/libraries/lzma/C/Delta.c b/libraries/lzma/C/Delta.c index e3edd21edb..c4a4499fe2 100644 --- a/libraries/lzma/C/Delta.c +++ b/libraries/lzma/C/Delta.c @@ -1,5 +1,5 @@ /* Delta.c -- Delta converter -2009-05-26 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -12,53 +12,158 @@ void Delta_Init(Byte *state) state[i] = 0; } -static void MyMemCpy(Byte *dest, const Byte *src, unsigned size) -{ - unsigned i; - for (i = 0; i < size; i++) - dest[i] = src[i]; -} void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size) { - Byte buf[DELTA_STATE_SIZE]; - unsigned j = 0; - MyMemCpy(buf, state, delta); + Byte temp[DELTA_STATE_SIZE]; + + if (size == 0) + return; + { - SizeT i; - for (i = 0; i < size;) + unsigned i = 0; + do + temp[i] = state[i]; + while (++i != delta); + } + + if (size <= delta) + { + unsigned i = 0, k; + do { - for (j = 0; j < delta && i < size; i++, j++) + Byte b = *data; + *data++ = (Byte)(b - temp[i]); + temp[i] = b; + } + while (++i != size); + + k = 0; + + do + { + if (i == delta) + i = 0; + state[k] = temp[i++]; + } + while (++k != delta); + + return; + } + + { + Byte *p = data + size - delta; + { + unsigned i = 0; + do + state[i] = *p++; + while (++i != delta); + } + { + const Byte *lim = data + delta; + ptrdiff_t dif = -(ptrdiff_t)delta; + + if (((ptrdiff_t)size + dif) & 1) { - Byte b = data[i]; - data[i] = (Byte)(b - buf[j]); - buf[j] = b; + --p; *p = (Byte)(*p - p[dif]); } + + while (p != lim) + { + --p; *p = (Byte)(*p - p[dif]); + --p; *p = (Byte)(*p - p[dif]); + } + + dif = -dif; + + do + { + --p; *p = (Byte)(*p - temp[--dif]); + } + while (dif != 0); } } - if (j == delta) - j = 0; - MyMemCpy(state, buf + j, delta - j); - MyMemCpy(state + delta - j, buf, j); } + void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size) { - Byte buf[DELTA_STATE_SIZE]; - unsigned j = 0; - MyMemCpy(buf, state, delta); + unsigned i; + const Byte *lim; + + if (size == 0) + return; + + i = 0; + lim = data + size; + + if (size <= delta) { - SizeT i; - for (i = 0; i < size;) + do + *data = (Byte)(*data + state[i++]); + while (++data != lim); + + for (; delta != i; state++, delta--) + *state = state[i]; + data -= i; + } + else + { + /* + #define B(n) b ## n + #define I(n) Byte B(n) = state[n]; + #define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); } + #define F(n) if (data != lim) { U(n) } + + if (delta == 1) { - for (j = 0; j < delta && i < size; i++, j++) + I(0) + if ((lim - data) & 1) { U(0) } + while (data != lim) { U(0) U(0) } + data -= 1; + } + else if (delta == 2) + { + I(0) I(1) + lim -= 1; while (data < lim) { U(0) U(1) } + lim += 1; F(0) + data -= 2; + } + else if (delta == 3) + { + I(0) I(1) I(2) + lim -= 2; while (data < lim) { U(0) U(1) U(2) } + lim += 2; F(0) F(1) + data -= 3; + } + else if (delta == 4) + { + I(0) I(1) I(2) I(3) + lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) } + lim += 3; F(0) F(1) F(2) + data -= 4; + } + else + */ + { + do { - buf[j] = data[i] = (Byte)(buf[j] + data[i]); + *data = (Byte)(*data + state[i++]); + data++; + } + while (i != delta); + + { + ptrdiff_t dif = -(ptrdiff_t)delta; + do + *data = (Byte)(*data + data[dif]); + while (++data != lim); + data += dif; } } } - if (j == delta) - j = 0; - MyMemCpy(state, buf + j, delta - j); - MyMemCpy(state + delta - j, buf, j); + + do + *state++ = *data; + while (++data != lim); } diff --git a/libraries/lzma/C/LzFind.c b/libraries/lzma/C/LzFind.c index df55e86c14..bf157a0b24 100644 --- a/libraries/lzma/C/LzFind.c +++ b/libraries/lzma/C/LzFind.c @@ -1,20 +1,69 @@ /* LzFind.c -- Match finder for LZ algorithms -2018-07-08 : Igor Pavlov : Public domain */ +2021-09-03 : Igor Pavlov : Public domain */ #include "Precomp.h" #include +// #include +#include "CpuArch.h" #include "LzFind.h" #include "LzHash.h" -#define kEmptyHashValue 0 -#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) -#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) -#define kMaxHistorySize ((UInt32)7 << 29) +#define kBlockMoveAlign (1 << 7) // alignment for memmove() +#define kBlockSizeAlign (1 << 16) // alignment for block allocation +#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary + +#define kEmptyHashValue 0 + +#define kMaxValForNormalize ((UInt32)0) +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug + +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#define GET_AVAIL_BYTES(p) \ + Inline_MatchFinder_GetNumAvailableBytes(p) + + +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +#define kFix5HashSize kFix4HashSize + +/* + HASH2_CALC: + if (hv) match, then cur[0] and cur[1] also match +*/ +#define HASH2_CALC hv = GetUi16(cur); + +// (crc[0 ... 255] & 0xFF) provides one-to-one correspondence to [0 ... 255] + +/* + HASH3_CALC: + if (cur[0]) and (h2) match, then cur[1] also match + if (cur[0]) and (hv) match, then cur[1] and cur[2] also match +*/ +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + hv = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + temp ^= (p->crc[cur[3]] << kLzHash_CrcShift_1); \ + /* h4 = temp & p->hash4Mask; */ /* (kHash4Size - 1); */ \ + hv = (temp ^ (p->crc[cur[4]] << kLzHash_CrcShift_2)) & p->hashMask; } + +#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; -#define kStartMaxLen 3 static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) { @@ -25,46 +74,57 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) } } -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ -static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) +static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc) { - UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) - { - p->blockSize = blockSize; - return 1; - } + if (blockSize == 0) + return 0; if (!p->bufferBase || p->blockSize != blockSize) { + // size_t blockSizeT; LzInWindow_Free(p, alloc); p->blockSize = blockSize; - p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); + // blockSizeT = blockSize; + + // printf("\nblockSize = 0x%x\n", blockSize); + /* + #if defined _WIN64 + // we can allocate 4GiB, but still use UInt32 for (p->blockSize) + // we use UInt32 type for (p->blockSize), because + // we don't want to wrap over 4 GiB, + // when we use (p->streamPos - p->pos) that is UInt32. + if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign) + { + blockSizeT = ((size_t)1 << 32); + printf("\nchanged to blockSizeT = 4GiB\n"); + } + #endif + */ + + p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufferBase); + // return 0; // for debug } return (p->bufferBase != NULL); } -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } -UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } +static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) -{ - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} +MY_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) { if (p->streamEndWasReached || p->result != SZ_OK) return; - /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ + /* We use (p->streamPos - p->pos) value. + (p->streamPos < p->pos) is allowed. */ if (p->directInput) { - UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); + UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); if (curSize > p->directInputRem) curSize = (UInt32)p->directInputRem; p->directInputRem -= curSize; @@ -76,10 +136,22 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) for (;;) { - Byte *dest = p->buffer + (p->streamPos - p->pos); - size_t size = (p->bufferBase + p->blockSize - dest); + Byte *dest = p->buffer + GET_AVAIL_BYTES(p); + size_t size = (size_t)(p->bufferBase + p->blockSize - dest); if (size == 0) + { + /* we call ReadBlock() after NeedMove() and MoveBlock(). + NeedMove() and MoveBlock() povide more than (keepSizeAfter) + to the end of (blockSize). + So we don't execute this branch in normal code flow. + We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock(). + */ + // p->result = SZ_ERROR_FAIL; // we can show error here return; + } + + // #define kRead 3 + // if (size > kRead) size = kRead; // for debug p->result = ISeqInStream_Read(p->stream, dest, &size); if (p->result != SZ_OK) @@ -90,41 +162,52 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) return; } p->streamPos += (UInt32)size; - if (p->streamPos - p->pos > p->keepSizeAfter) + if (GET_AVAIL_BYTES(p) > p->keepSizeAfter) return; + /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function + (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */ } + + // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter) } + + +MY_NO_INLINE void MatchFinder_MoveBlock(CMatchFinder *p) { + const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; + const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; + p->buffer = p->bufferBase + keepBefore; memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); - p->buffer = p->bufferBase + p->keepSizeBefore; + p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + keepBefore + (size_t)GET_AVAIL_BYTES(p)); } +/* We call MoveBlock() before ReadBlock(). + So MoveBlock() can be wasteful operation, if the whole input data + can fit in current block even without calling MoveBlock(). + in important case where (dataSize <= historySize) + condition (p->blockSize > dataSize + p->keepSizeAfter) is met + So there is no MoveBlock() in that case case. +*/ + int MatchFinder_NeedMove(CMatchFinder *p) { if (p->directInput) return 0; - /* if (p->streamEndWasReached) return 0; */ + if (p->streamEndWasReached || p->result != SZ_OK) + return 0; return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder *p) { - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) + if (p->keepSizeAfter >= GET_AVAIL_BYTES(p)) MatchFinder_ReadBlock(p); } -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) -{ - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} + static void MatchFinder_SetDefaultSettings(CMatchFinder *p) { @@ -175,39 +258,74 @@ static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); } +#if (kBlockSizeReserveMin < kBlockSizeAlign * 2) + #error Stop_Compiling_Bad_Reserve +#endif + + + +static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) +{ + UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter); + /* + if (historySize > kMaxHistorySize) + return 0; + */ + // printf("\nhistorySize == 0x%x\n", historySize); + + if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow + return 0; + + { + const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign; + const UInt32 rem = kBlockSizeMax - blockSize; + const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)) + + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here + if (blockSize >= kBlockSizeMax + || rem < kBlockSizeReserveMin) // we reject settings that will be slow + return 0; + if (reserve >= rem) + blockSize = kBlockSizeMax; + else + { + blockSize += reserve; + blockSize &= ~(UInt32)(kBlockSizeAlign - 1); + } + } + // printf("\n LzFind_blockSize = %x\n", blockSize); + // printf("\n LzFind_blockSize = %d\n", blockSize >> 20); + return blockSize; +} + + int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) { - UInt32 sizeReserv; - - if (historySize > kMaxHistorySize) - { - MatchFinder_Free(p, alloc); - return 0; - } - - sizeReserv = historySize >> 1; - if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; - else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; - - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); - + /* we need one additional byte in (p->keepSizeBefore), + since we use MoveBlock() after (p->pos++) and before dictionary using */ + // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - - if (LzInWindow_Create(p, sizeReserv, alloc)) + + keepAddBufferAfter += matchMaxLen; + /* we need (p->keepSizeAfter >= p->numHashBytes) */ + if (keepAddBufferAfter < p->numHashBytes) + keepAddBufferAfter = p->numHashBytes; + // keepAddBufferAfter -= 2; // for debug + p->keepSizeAfter = keepAddBufferAfter; + + if (p->directInput) + p->blockSize = 0; + if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) { - UInt32 newCyclicBufferSize = historySize + 1; + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it UInt32 hs; p->matchMaxLen = matchMaxLen; { + // UInt32 hs4; p->fixedHashSize = 0; - if (p->numHashBytes == 2) - hs = (1 << 16) - 1; - else + hs = (1 << 16) - 1; + if (p->numHashBytes != 2) { hs = historySize; if (hs > p->expectedDataSize) @@ -218,9 +336,9 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs |= (hs >> 2); hs |= (hs >> 4); hs |= (hs >> 8); + // we propagated 16 bits in (hs). Low 16 bits must be set later hs >>= 1; - hs |= 0xFFFF; /* don't change it! It's required for Deflate */ - if (hs > (1 << 24)) + if (hs >= (1 << 24)) { if (p->numHashBytes == 3) hs = (1 << 24) - 1; @@ -228,12 +346,30 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs >>= 1; /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */ } + + // hs = ((UInt32)1 << 25) - 1; // for test + + // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) + hs |= (1 << 16) - 1; /* don't change it! */ + + // bt5: we adjust the size with recommended minimum size + if (p->numHashBytes >= 5) + hs |= (256 << kLzHash_CrcShift_2) - 1; } p->hashMask = hs; hs++; + + /* + hs4 = (1 << 20); + if (hs4 > hs) + hs4 = hs; + // hs4 = (1 << 16); // for test + p->hash4Mask = hs4 - 1; + */ + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; - if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; + // if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; hs += p->fixedHashSize; } @@ -242,13 +378,17 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, size_t numSons; p->historySize = historySize; p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; + p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) numSons = newCyclicBufferSize; if (p->btMode) numSons <<= 1; newSize = hs + numSons; + // aligned size is not required here, but it can be better for some loops + #define NUM_REFS_ALIGN_MASK 0xF + newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; + if (p->hash && p->numRefs == newSize) return 1; @@ -268,33 +408,43 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, return 0; } + static void MatchFinder_SetLimits(CMatchFinder *p) { - UInt32 limit = kMaxValForNormalize - p->pos; - UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; + UInt32 k; + UInt32 n = kMaxValForNormalize - p->pos; + if (n == 0) + n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0) - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; - - if (limit2 <= p->keepSizeAfter) + k = p->cyclicBufferSize - p->cyclicBufferPos; + if (k < n) + n = k; + + k = GET_AVAIL_BYTES(p); { - if (limit2 > 0) - limit2 = 1; + const UInt32 ksa = p->keepSizeAfter; + UInt32 mm = p->matchMaxLen; + if (k > ksa) + k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock + else if (k >= mm) + { + // the limitation for (p->lenLimit) update + k -= mm; // optimization : to reduce the number of checks + k++; + // k = 1; // non-optimized version : for debug + } + else + { + mm = k; + if (k != 0) + k = 1; + } + p->lenLimit = mm; } - else - limit2 -= p->keepSizeAfter; + if (k < n) + n = k; - if (limit2 < limit) - limit = limit2; - - { - UInt32 lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; - } - p->posLimit = p->pos + limit; + p->posLimit = p->pos + n; } @@ -302,7 +452,7 @@ void MatchFinder_Init_LowHash(CMatchFinder *p) { size_t i; CLzRef *items = p->hash; - size_t numItems = p->fixedHashSize; + const size_t numItems = p->fixedHashSize; for (i = 0; i < numItems; i++) items[i] = kEmptyHashValue; } @@ -312,72 +462,315 @@ void MatchFinder_Init_HighHash(CMatchFinder *p) { size_t i; CLzRef *items = p->hash + p->fixedHashSize; - size_t numItems = (size_t)p->hashMask + 1; + const size_t numItems = (size_t)p->hashMask + 1; for (i = 0; i < numItems; i++) items[i] = kEmptyHashValue; } -void MatchFinder_Init_3(CMatchFinder *p, int readData) +void MatchFinder_Init_4(CMatchFinder *p) { - p->cyclicBufferPos = 0; p->buffer = p->bufferBase; - p->pos = - p->streamPos = p->cyclicBufferSize; + { + /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. + the code in CMatchFinderMt expects (pos = 1) */ + p->pos = + p->streamPos = + 1; // it's smallest optimal value. do not change it + // 0; // for debug + } p->result = SZ_OK; p->streamEndWasReached = 0; - - if (readData) - MatchFinder_ReadBlock(p); - - MatchFinder_SetLimits(p); } +// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug + void MatchFinder_Init(CMatchFinder *p) { MatchFinder_Init_HighHash(p); MatchFinder_Init_LowHash(p); - MatchFinder_Init_3(p, True); + MatchFinder_Init_4(p); + // if (readData) + MatchFinder_ReadBlock(p); + + /* if we init (cyclicBufferPos = pos), then we can use one variable + instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */ + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos) + // p->cyclicBufferPos = 0; // smallest value + // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses. + MatchFinder_SetLimits(p); } - -static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) + + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + #define USE_SATUR_SUB_128 + #define USE_AVX2 + #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1600) + #define USE_SATUR_SUB_128 + #if (_MSC_VER >= 1900) + #define USE_AVX2 + #include // avx + #endif + #endif + #endif + +// #elif defined(MY_CPU_ARM_OR_ARM64) +#elif defined(MY_CPU_ARM64) + + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_SATUR_SUB_128 + #ifdef MY_CPU_ARM64 + // #define ATTRIB_SSE41 __attribute__((__target__(""))) + #else + // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif + + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1910) + #define USE_SATUR_SUB_128 + #endif + #endif + + #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #include + #else + #include + #endif + +#endif + +/* +#ifndef ATTRIB_SSE41 + #define ATTRIB_SSE41 +#endif +#ifndef ATTRIB_AVX2 + #define ATTRIB_AVX2 +#endif +*/ + +#ifdef USE_SATUR_SUB_128 + +// #define _SHOW_HW_STATUS + +#ifdef _SHOW_HW_STATUS +#include +#define _PRF(x) x +_PRF(;) +#else +#define _PRF(x) +#endif + +#ifdef MY_CPU_ARM_OR_ARM64 + +#ifdef MY_CPU_ARM64 +// #define FORCE_SATUR_SUB_128 +#endif + +typedef uint32x4_t v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); + +#else + +#include // sse4.1 + +typedef __m128i v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 + +#endif + + + +MY_NO_INLINE +static +#ifdef ATTRIB_SSE41 +ATTRIB_SSE41 +#endif +void LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - return (p->pos - p->historySize - 1) & kNormalizeMask; + v128 sub2 = + #ifdef MY_CPU_ARM_OR_ARM64 + vdupq_n_u32(subValue); + #else + _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + #endif + do + { + SASUB_128(0) + SASUB_128(1) + SASUB_128(2) + SASUB_128(3) + items += 4 * 4; + } + while (items != lim); } + + +#ifdef USE_AVX2 + +#include // avx + +#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 + +MY_NO_INLINE +static +#ifdef ATTRIB_AVX2 +ATTRIB_AVX2 +#endif +void LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + __m256i sub2 = _mm256_set_epi32( + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + do + { + SASUB_256(0) + SASUB_256(1) + items += 2 * 8; + } + while (items != lim); +} +#endif // USE_AVX2 + +#ifndef FORCE_SATUR_SUB_128 +typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( + UInt32 subValue, CLzRef *items, const CLzRef *lim); +static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; +#endif // FORCE_SATUR_SUB_128 + +#endif // USE_SATUR_SUB_128 + + +// kEmptyHashValue must be zero +// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; +#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; + +#ifdef FORCE_SATUR_SUB_128 + +#define DEFAULT_SaturSub LzFind_SaturSub_128 + +#else + +#define DEFAULT_SaturSub LzFind_SaturSub_32 + +MY_NO_INLINE +static void LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + do + { + UInt32 v; + SASUB_32(0) + SASUB_32(1) + SASUB_32(2) + SASUB_32(3) + SASUB_32(4) + SASUB_32(5) + SASUB_32(6) + SASUB_32(7) + items += 8; + } + while (items != lim); +} + +#endif + + +MY_NO_INLINE void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - size_t i; - for (i = 0; i < numItems; i++) + #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) + + CLzRef *lim; + + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) { - UInt32 value = items[i]; - if (value <= subValue) - value = kEmptyHashValue; - else - value -= subValue; - items[i] = value; + UInt32 v; + SASUB_32(0); + items++; + } + + { + #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) + lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); + numItems &= K_NORM_ALIGN_MASK; + if (items != lim) + { + #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) + if (g_LzFind_SaturSub) + g_LzFind_SaturSub(subValue, items, lim); + else + #endif + DEFAULT_SaturSub(subValue, items, lim); + } + items = lim; + } + + + for (; numItems != 0; numItems--) + { + UInt32 v; + SASUB_32(0); + items++; } } -static void MatchFinder_Normalize(CMatchFinder *p) -{ - UInt32 subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->numRefs); - MatchFinder_ReduceOffsets(p, subValue); -} +// call MatchFinder_CheckLimits() only after (p->pos++) update + MY_NO_INLINE static void MatchFinder_CheckLimits(CMatchFinder *p) { + if (// !p->streamEndWasReached && p->result == SZ_OK && + p->keepSizeAfter == GET_AVAIL_BYTES(p)) + { + // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p)) + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); + } + if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); + if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data. + /* + if we disable normalization for last bytes of data, and + if (data_size == 4 GiB), we don't call wastfull normalization, + but (pos) will be wrapped over Zero (0) in that case. + And we cannot resume later to normal operation + */ + { + // MatchFinder_Normalize(p); + /* after normalization we need (p->pos >= p->historySize + 1); */ + /* we can reduce subValue to aligned value, if want to keep alignment + of (p->pos) and (p->buffer) for speculated accesses. */ + const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; + // const UInt32 subValue = (1 << 15); // for debug + // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + Inline_MatchFinder_ReduceOffsets(p, subValue); + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); + } + if (p->cyclicBufferPos == p->cyclicBufferSize) p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); } @@ -386,9 +779,9 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) (lenLimit > maxLen) */ MY_FORCE_INLINE -static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, unsigned maxLen) +static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, unsigned maxLen) { /* son[_cyclicBufferPos] = curMatch; @@ -396,7 +789,7 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; + return d; { const Byte *pb = cur - delta; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; @@ -409,10 +802,10 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos if (maxLen < len) { maxLen = len; - *distances++ = len; - *distances++ = delta - 1; + *d++ = len; + *d++ = delta - 1; if (len == lenLimit) - return distances; + return d; } } } @@ -421,35 +814,41 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos const Byte *lim = cur + lenLimit; son[_cyclicBufferPos] = curMatch; + do { - UInt32 delta = pos - curMatch; + UInt32 delta; + + if (curMatch == 0) + break; + // if (curMatch2 >= curMatch) return NULL; + delta = pos - curMatch; if (delta >= _cyclicBufferSize) break; { ptrdiff_t diff; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; - diff = (ptrdiff_t)0 - delta; - if (cur[maxLen] == cur[maxLen + diff]) + diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) { const Byte *c = cur; while (*c == c[diff]) { if (++c == lim) { - distances[0] = (UInt32)(lim - cur); - distances[1] = delta - 1; - return distances + 2; + d[0] = (UInt32)(lim - cur); + d[1] = delta - 1; + return d + 2; } } { - unsigned len = (unsigned)(c - cur); + const unsigned len = (unsigned)(c - cur); if (maxLen < len) { maxLen = len; - distances[0] = (UInt32)len; - distances[1] = delta - 1; - distances += 2; + d[0] = (UInt32)len; + d[1] = delta - 1; + d += 2; } } } @@ -457,31 +856,36 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos } while (--cutValue); - return distances; + return d; } MY_FORCE_INLINE UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, UInt32 maxLen) + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, UInt32 maxLen) { CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); unsigned len0 = 0, len1 = 0; - for (;;) + + UInt32 cmCheck; + + // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (cmCheck < curMatch) + do { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; - } + const UInt32 delta = pos - curMatch; { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); - UInt32 pair0 = pair[0]; + const UInt32 pair0 = pair[0]; if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) @@ -491,48 +895,60 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt if (maxLen < len) { maxLen = (UInt32)len; - *distances++ = (UInt32)len; - *distances++ = delta - 1; + *d++ = (UInt32)len; + *d++ = delta - 1; if (len == lenLimit) { *ptr1 = pair0; *ptr0 = pair[1]; - return distances; + return d; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; + // const UInt32 curMatch2 = pair[1]; + // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + // curMatch = curMatch2; + curMatch = pair[1]; ptr1 = pair + 1; - curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; + curMatch = pair[0]; ptr0 = pair; - curMatch = *ptr0; len0 = len; } } } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return d; } + static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); unsigned len0 = 0, len1 = 0; - for (;;) + + UInt32 cmCheck; + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (// curMatch >= pos || // failure + cmCheck < curMatch) + do { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } + const UInt32 delta = pos - curMatch; { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; @@ -554,80 +970,108 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const if (pb[len] < cur[len]) { *ptr1 = curMatch; + curMatch = pair[1]; ptr1 = pair + 1; - curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; + curMatch = pair[0]; ptr0 = pair; - curMatch = *ptr0; len0 = len; } } } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return; } + #define MOVE_POS \ ++p->cyclicBufferPos; \ p->buffer++; \ - if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); + { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } -#define MOVE_POS_RET MOVE_POS return (UInt32)offset; +#define MOVE_POS_RET MOVE_POS return distances; -static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } +MY_NO_INLINE +static void MatchFinder_MovePos(CMatchFinder *p) +{ + /* we go here at the end of stream data, when (avail < num_hash_bytes) + We don't update sons[cyclicBufferPos << btMode]. + So (sons) record will contain junk. And we cannot resume match searching + to normal operation, even if we will provide more input data in buffer. + p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue + if (p->btMode) + p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue + */ + MOVE_POS; +} #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ + unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ cur = p->buffer; -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) +#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); + +#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ + distances = func(MF_PARAMS(p), \ + distances, (UInt32)_maxLen_); MOVE_POS_RET; + +#define GET_MATCHES_FOOTER_BT(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) + +#define GET_MATCHES_FOOTER_HC(_maxLen_) \ + GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) -#define GET_MATCHES_FOOTER(offset, maxLen) \ - offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET; -#define SKIP_FOOTER \ - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; #define UPDATE_maxLen { \ - ptrdiff_t diff = (ptrdiff_t)0 - d2; \ + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ const Byte *c = cur + maxLen; \ const Byte *lim = cur + lenLimit; \ for (; c != lim; c++) if (*(c + diff) != *c) break; \ maxLen = (unsigned)(c - cur); } -static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - unsigned offset; GET_MATCHES_HEADER(2) HASH2_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 1) + GET_MATCHES_FOOTER_BT(1) } -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - unsigned offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = 0; - GET_MATCHES_FOOTER(offset, 2) + GET_MATCHES_FOOTER_BT(2) } -static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + +#define SET_mmm \ + mmm = p->cyclicBufferSize; \ + if (pos < mmm) \ + mmm = pos; + + +static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { + UInt32 mmm; UInt32 h2, d2, pos; - unsigned maxLen, offset; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(3) @@ -643,29 +1087,32 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) hash[h2] = pos; (hash + kFix3HashSize)[hv] = pos; - maxLen = 2; - offset = 0; + SET_mmm - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + maxLen = 2; + + if (d2 < mmm && *(cur - d2) == *cur) { UPDATE_maxLen distances[0] = (UInt32)maxLen; distances[1] = d2 - 1; - offset = 2; + distances += 2; if (maxLen == lenLimit) { - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); + SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS_RET; } } - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } -static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + +static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { + UInt32 mmm; UInt32 h2, h3, d2, d3, pos; - unsigned maxLen, offset; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) @@ -676,53 +1123,63 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; - maxLen = 0; - offset = 0; + SET_mmm + + maxLen = 3; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + for (;;) { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) - { UPDATE_maxLen - distances[(size_t)offset - 2] = (UInt32)maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); - MOVE_POS_RET; + SkipMatchesSpec(MF_PARAMS(p)); + MOVE_POS_RET } + break; } - if (maxLen < 3) - maxLen = 3; - - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } -/* -static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + +static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos; + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -733,73 +1190,69 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; + // d4 = pos - (hash + kFix4HashSize)[h4]; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; + // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; - maxLen = 0; - offset = 0; + SET_mmm - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + maxLen = 4; + + for (;;) { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + if (d2 < mmm && *(cur - d2) == *cur) { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; d2 = d3; } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS_RET; } + break; } - - if (maxLen < 4) - maxLen = 4; - GET_MATCHES_FOOTER(offset, maxLen) + GET_MATCHES_FOOTER_BT(maxLen) } -*/ -static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + +static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { + UInt32 mmm; UInt32 h2, h3, d2, d3, pos; - unsigned maxLen, offset; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) @@ -816,48 +1269,57 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; - maxLen = 0; - offset = 0; + SET_mmm - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) - { - maxLen = 2; - distances[0] = 2; - distances[1] = d2 - 1; - offset = 2; - } - - if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - maxLen = 3; - distances[(size_t)offset + 1] = d3 - 1; - offset += 2; - d2 = d3; - } - - if (offset != 0) + maxLen = 3; + + for (;;) { + if (d2 < mmm && *(cur - d2) == *cur) + { + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + // distances[-2] = 3; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + d2 = d3; + distances[1] = d3 - 1; + distances += 2; + } + else + break; + UPDATE_maxLen - distances[(size_t)offset - 2] = (UInt32)maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } + break; } - if (maxLen < 3) - maxLen = 3; - - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET + GET_MATCHES_FOOTER_HC(maxLen); } -/* -static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + +static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -865,242 +1327,237 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) hash = p->hash; pos = p->pos; - + d2 = pos - hash [h2]; d3 = pos - (hash + kFix3HashSize)[h3]; - d4 = pos - (hash + kFix4HashSize)[h4]; + // d4 = pos - (hash + kFix4HashSize)[h4]; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = pos; (hash + kFix3HashSize)[h3] = pos; - (hash + kFix4HashSize)[h4] = pos; + // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; - maxLen = 0; - offset = 0; + SET_mmm + + maxLen = 4; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + for (;;) { - distances[0] = maxLen = 2; - distances[1] = d2 - 1; - offset = 2; - if (*(cur - d2 + 2) == cur[2]) - distances[0] = maxLen = 3; - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + if (d2 < mmm && *(cur - d2) == *cur) { - distances[2] = maxLen = 3; - distances[3] = d3 - 1; - offset = 4; + distances[0] = 2; + distances[1] = d2 - 1; + distances += 2; + if (*(cur - d2 + 2) == cur[2]) + { + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; + d2 = d3; + } + else + break; + } + else if (d3 < mmm && *(cur - d3) == *cur) + { + distances[1] = d3 - 1; + distances += 2; d2 = d3; } - } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) - { - distances[0] = maxLen = 3; - distances[1] = d3 - 1; - offset = 2; - d2 = d3; - } - - if (d2 != d4 && d4 < p->cyclicBufferSize - && *(cur - d4) == *cur - && *(cur - d4 + 3) == *(cur + 3)) - { - maxLen = 4; - distances[(size_t)offset + 1] = d4 - 1; - offset += 2; - d2 = d4; - } - - if (offset != 0) - { + else + break; + + distances[-2] = 3; + if (*(cur - d2 + 3) != cur[3]) + break; UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; + distances[-2] = maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } + break; } - if (maxLen < 4) - maxLen = 4; - - offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances + offset, maxLen) - (distances)); - MOVE_POS_RET + GET_MATCHES_FOOTER_HC(maxLen); } -*/ -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - unsigned offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), - distances, 2) - (distances)); - MOVE_POS_RET + GET_MATCHES_FOOTER_HC(2) } + static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(2) { - SKIP_HEADER(2) HASH2_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(3) { - SKIP_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(3) { UInt32 h2; UInt32 *hash; - SKIP_HEADER(3) HASH3_CALC; hash = p->hash; curMatch = (hash + kFix3HashSize)[hv]; hash[h2] = (hash + kFix3HashSize)[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(4) { UInt32 h2, h3; UInt32 *hash; - SKIP_HEADER(4) HASH4_CALC; hash = p->hash; curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = (hash + kFix4HashSize)[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } -/* static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(5) { - UInt32 h2, h3, h4; + UInt32 h2, h3; UInt32 *hash; - SKIP_HEADER(5) HASH5_CALC; hash = p->hash; curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = + // (hash + kFix4HashSize)[h4] = (hash + kFix5HashSize)[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } -*/ + + +#define HC_SKIP_HEADER(minLen) \ + do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ + Byte *cur; \ + UInt32 *hash; \ + UInt32 *son; \ + UInt32 pos = p->pos; \ + UInt32 num2 = num; \ + /* (p->pos == p->posLimit) is not allowed here !!! */ \ + { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + num -= num2; \ + { const UInt32 cycPos = p->cyclicBufferPos; \ + son = p->son + cycPos; \ + p->cyclicBufferPos = cycPos + num2; } \ + cur = p->buffer; \ + hash = p->hash; \ + do { \ + UInt32 curMatch; \ + UInt32 hv; + + +#define HC_SKIP_FOOTER \ + cur++; pos++; *son++ = curMatch; \ + } while (--num2); \ + p->buffer = cur; \ + p->pos = pos; \ + if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ + }} while(num); \ + static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do - { + HC_SKIP_HEADER(4) + UInt32 h2, h3; - UInt32 *hash; - SKIP_HEADER(4) HASH4_CALC; - hash = p->hash; curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); + (hash + kFix4HashSize)[hv] = pos; + + HC_SKIP_FOOTER } -/* + static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do - { - UInt32 h2, h3, h4; - UInt32 *hash; - SKIP_HEADER(5) - HASH5_CALC; - hash = p->hash; - curMatch = hash + kFix5HashSize)[hv]; + HC_SKIP_HEADER(5) + + UInt32 h2, h3; + HASH5_CALC + curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[h4] = - (hash + kFix5HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); + // (hash + kFix4HashSize)[h4] = + (hash + kFix5HashSize)[hv] = pos; + + HC_SKIP_FOOTER } -*/ + void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do - { - SKIP_HEADER(3) + HC_SKIP_HEADER(3) + HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); + curMatch = hash[hv]; + hash[hv] = pos; + + HC_SKIP_FOOTER } -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) { vTable->Init = (Mf_Init_Func)MatchFinder_Init; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { - /* if (p->numHashBytes <= 4) */ + if (p->numHashBytes <= 4) { vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; } - /* else { vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip; } - */ } else if (p->numHashBytes == 2) { @@ -1112,16 +1569,53 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; } - else /* if (p->numHashBytes == 4) */ + else if (p->numHashBytes == 4) { vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; } - /* else { vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; } - */ +} + + + +void LzFindPrepare() +{ + #ifndef FORCE_SATUR_SUB_128 + #ifdef USE_SATUR_SUB_128 + LZFIND_SATUR_SUB_CODE_FUNC f = NULL; + #ifdef MY_CPU_ARM_OR_ARM64 + { + if (CPU_IsSupported_NEON()) + { + // #pragma message ("=== LzFind NEON") + _PRF(printf("\n=== LzFind NEON\n")); + f = LzFind_SaturSub_128; + } + // f = 0; // for debug + } + #else // MY_CPU_ARM_OR_ARM64 + if (CPU_IsSupported_SSE41()) + { + // #pragma message ("=== LzFind SSE41") + _PRF(printf("\n=== LzFind SSE41\n")); + f = LzFind_SaturSub_128; + + #ifdef USE_AVX2 + if (CPU_IsSupported_AVX2()) + { + // #pragma message ("=== LzFind AVX2") + _PRF(printf("\n=== LzFind AVX2\n")); + f = LzFind_SaturSub_256; + } + #endif + } + #endif // MY_CPU_ARM_OR_ARM64 + g_LzFind_SaturSub = f; + #endif // USE_SATUR_SUB_128 + #endif // FORCE_SATUR_SUB_128 } diff --git a/libraries/lzma/C/LzFind.h b/libraries/lzma/C/LzFind.h index 42c13be157..eea873ff61 100644 --- a/libraries/lzma/C/LzFind.h +++ b/libraries/lzma/C/LzFind.h @@ -1,5 +1,5 @@ /* LzFind.h -- Match finder for LZ algorithms -2017-06-10 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_H #define __LZ_FIND_H @@ -15,7 +15,7 @@ typedef struct _CMatchFinder Byte *buffer; UInt32 pos; UInt32 posLimit; - UInt32 streamPos; + UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ UInt32 lenLimit; UInt32 cyclicBufferPos; @@ -51,17 +51,19 @@ typedef struct _CMatchFinder UInt64 expectedDataSize; } CMatchFinder; -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer) -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos)) +/* #define Inline_MatchFinder_IsFinishedOK(p) \ ((p)->streamEndWasReached \ && (p)->streamPos == (p)->pos \ && (!(p)->directInput || (p)->directInputRem == 0)) +*/ int MatchFinder_NeedMove(CMatchFinder *p); -Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */ void MatchFinder_MoveBlock(CMatchFinder *p); void MatchFinder_ReadIfRequired(CMatchFinder *p); @@ -76,10 +78,21 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); + +/* +#define Inline_MatchFinder_InitPos(p, val) \ + (p)->pos = (val); \ + (p)->streamPos = (val); +*/ + +#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ + (p)->pos -= (subValue); \ + (p)->streamPos -= (subValue); + UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, UInt32 *distances, UInt32 maxLen); /* @@ -91,7 +104,7 @@ Conditions: typedef void (*Mf_Init_Func)(void *object); typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); -typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef void (*Mf_Skip_Func)(void *object, UInt32); typedef struct _IMatchFinder @@ -101,21 +114,23 @@ typedef struct _IMatchFinder Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; Mf_GetMatches_Func GetMatches; Mf_Skip_Func Skip; -} IMatchFinder; +} IMatchFinder2; -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p); -void MatchFinder_Init_3(CMatchFinder *p, int readData); +void MatchFinder_Init_4(CMatchFinder *p); void MatchFinder_Init(CMatchFinder *p); -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void LzFindPrepare(void); + EXTERN_C_END #endif diff --git a/libraries/lzma/C/LzFindMt.c b/libraries/lzma/C/LzFindMt.c index bb0f42c302..da339ebf5e 100644 --- a/libraries/lzma/C/LzFindMt.c +++ b/libraries/lzma/C/LzFindMt.c @@ -1,97 +1,215 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2018-12-29 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #include "Precomp.h" -#include "LzHash.h" +// #include +#include "CpuArch.h" + +#include "LzHash.h" #include "LzFindMt.h" +// #define LOG_ITERS + +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +#define PRF(x) +#endif + +#ifdef LOG_ITERS +#include +extern UInt64 g_NumIters_Tree; +extern UInt64 g_NumIters_Loop; +extern UInt64 g_NumIters_Bytes; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +#define kMtHashBlockSize ((UInt32)1 << 17) +#define kMtHashNumBlocks (1 << 1) + +#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize) + +#define kMtBtBlockSize ((UInt32)1 << 16) +#define kMtBtNumBlocks (1 << 4) + +#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize) + +/* + HASH functions: + We use raw 8/16 bits from a[1] and a[2], + xored with crc(a[0]) and crc(a[3]). + We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches. + our crc() function provides one-to-one correspondence for low 8-bit values: + (crc[0...0xFF] & 0xFF) <-> [0...0xFF] +*/ + +#define MF(mt) ((mt)->MatchFinder) +#define MF_CRC (p->crc) + +// #define MF(mt) (&(mt)->MatchFinder) +// #define MF_CRC (p->MatchFinder.crc) + +#define MT_HASH2_CALC \ + h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +/* +#define MT_HASH3_CALC__NO_2 { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define __MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + h2 = temp & (kHash2Size - 1); \ + temp ^= ((UInt32)cur[2] << 8); \ + h3 = temp & (kHash3Size - 1); \ + h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; } + // (kHash4Size - 1); +*/ + + +MY_NO_INLINE static void MtSync_Construct(CMtSync *p) { + p->affinity = 0; p->wasCreated = False; p->csWasInitialized = False; p->csWasEntered = False; Thread_Construct(&p->thread); Event_Construct(&p->canStart); - Event_Construct(&p->wasStarted); Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->filledSemaphore); } -static void MtSync_GetNextBlock(CMtSync *p) + +#define DEBUG_BUFFER_LOCK // define it to debug lock state + +#ifdef DEBUG_BUFFER_LOCK +#include +#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); +#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1); +#else +#define BUFFER_MUST_BE_LOCKED(p) +#define BUFFER_MUST_BE_UNLOCKED(p) +#endif + +#define LOCK_BUFFER(p) { \ + BUFFER_MUST_BE_UNLOCKED(p); \ + CriticalSection_Enter(&(p)->cs); \ + (p)->csWasEntered = True; } + +#define UNLOCK_BUFFER(p) { \ + BUFFER_MUST_BE_LOCKED(p); \ + CriticalSection_Leave(&(p)->cs); \ + (p)->csWasEntered = False; } + + +MY_NO_INLINE +static UInt32 MtSync_GetNextBlock(CMtSync *p) { + UInt32 numBlocks = 0; if (p->needStart) { + BUFFER_MUST_BE_UNLOCKED(p) p->numProcessedBlocks = 1; p->needStart = False; p->stopWriting = False; p->exit = False; - Event_Reset(&p->wasStarted); Event_Reset(&p->wasStopped); - Event_Set(&p->canStart); - Event_Wait(&p->wasStarted); - - // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder); } else { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - p->numProcessedBlocks++; + UNLOCK_BUFFER(p) + // we free current block + numBlocks = p->numProcessedBlocks++; Semaphore_Release1(&p->freeSemaphore); } + + // buffer is UNLOCKED here Semaphore_Wait(&p->filledSemaphore); - CriticalSection_Enter(&p->cs); - p->csWasEntered = True; + LOCK_BUFFER(p); + return numBlocks; } -/* MtSync_StopWriting must be called if Writing was started */ +/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ + +MY_NO_INLINE static void MtSync_StopWriting(CMtSync *p) { - UInt32 myNumBlocks = p->numProcessedBlocks; if (!Thread_WasCreated(&p->thread) || p->needStart) return; - p->stopWriting = True; + + PRF(printf("\nMtSync_StopWriting %p\n", p)); + if (p->csWasEntered) { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; + /* we don't use buffer in this thread after StopWriting(). + So we UNLOCK buffer. + And we restore default UNLOCKED state for stopped thread */ + UNLOCK_BUFFER(p) } - Semaphore_Release1(&p->freeSemaphore); - - Event_Wait(&p->wasStopped); - while (myNumBlocks++ != p->numProcessedBlocks) - { - Semaphore_Wait(&p->filledSemaphore); - Semaphore_Release1(&p->freeSemaphore); - } + /* We send (p->stopWriting) message and release freeSemaphore + to free current block. + So the thread will see (p->stopWriting) at some + iteration after Wait(freeSemaphore). + The thread doesn't need to fill all avail free blocks, + so we can get fast thread stop. + */ + + p->stopWriting = True; + Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!! + + PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p)); + Event_Wait(&p->wasStopped); + PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p)); + + /* 21.03 : we don't restore samaphore counters here. + We will recreate and reinit samaphores in next start */ + p->needStart = True; } + +MY_NO_INLINE static void MtSync_Destruct(CMtSync *p) { + PRF(printf("\nMtSync_Destruct %p\n", p)); + if (Thread_WasCreated(&p->thread)) { + /* we want thread to be in Stopped state before sending EXIT command. + note: stop(btSync) will stop (htSync) also */ MtSync_StopWriting(p); + /* thread in Stopped state here : (p->needStart == true) */ p->exit = True; - if (p->needStart) - Event_Set(&p->canStart); - Thread_Wait(&p->thread); - Thread_Close(&p->thread); + // if (p->needStart) // it's (true) + Event_Set(&p->canStart); // we send EXIT command to thread + Thread_Wait_Close(&p->thread); // we wait thread finishing } + if (p->csWasInitialized) { CriticalSection_Delete(&p->cs); p->csWasInitialized = False; } + p->csWasEntered = False; Event_Close(&p->canStart); - Event_Close(&p->wasStarted); Event_Close(&p->wasStopped); Semaphore_Close(&p->freeSemaphore); Semaphore_Close(&p->filledSemaphore); @@ -99,80 +217,251 @@ static void MtSync_Destruct(CMtSync *p) p->wasCreated = False; } -#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } -static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) +// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } +// we want to get real system error codes here instead of SZ_ERROR_THREAD +#define RINOK_THREAD(x) RINOK(x) + + +// call it before each new file (when new starting is required): +MY_NO_INLINE +static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) { + WRes wres; + // BUFFER_MUST_BE_UNLOCKED(p) + if (!p->needStart || p->csWasEntered) + return SZ_ERROR_FAIL; + wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks); + if (wres == 0) + wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks); + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) +{ + WRes wres; + if (p->wasCreated) return SZ_OK; RINOK_THREAD(CriticalSection_Init(&p->cs)); p->csWasInitialized = True; + p->csWasEntered = False; RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); - - RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); - RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); p->needStart = True; - - RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); + p->exit = True; /* p->exit is unused before (canStart) Event. + But in case of some unexpected code failure we will get fast exit from thread */ + + // return ERROR_TOO_MANY_POSTS; // for debug + // return EINVAL; // for debug + + if (p->affinity != 0) + wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); + else + wres = Thread_Create(&p->thread, startAddress, obj); + + RINOK_THREAD(wres); p->wasCreated = True; return SZ_OK; } -static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) + +MY_NO_INLINE +static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) { - SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); - if (res != SZ_OK) - MtSync_Destruct(p); - return res; + const WRes wres = MtSync_Create_WRes(p, startAddress, obj); + if (wres == 0) + return 0; + MtSync_Destruct(p); + return MY_SRes_HRESULT_FROM_WRes(wres); } -void MtSync_Init(CMtSync *p) { p->needStart = True; } + +// ---------- HASH THREAD ---------- #define kMtMaxValForNormalize 0xFFFFFFFF +// #define kMtMaxValForNormalize ((1 << 21)) // for debug +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#ifdef MY_CPU_LE_UNALIGN + #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) +#else + #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16)) +#endif + +#define GetHeads_DECL(name) \ + static void GetHeads ## name(const Byte *p, UInt32 pos, \ + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) + +#define GetHeads_LOOP(v) \ + for (; numHeads != 0; numHeads--) { \ + const UInt32 value = (v); \ + p++; \ + *heads++ = pos - hash[value]; \ + hash[value] = pos++; } #define DEF_GetHeads2(name, v, action) \ - static void GetHeads ## name(const Byte *p, UInt32 pos, \ - UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \ - { action; for (; numHeads != 0; numHeads--) { \ - const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } } - + GetHeads_DECL(name) { action \ + GetHeads_LOOP(v) } + #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) -DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) -DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) -DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) -DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) -/* DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) */ +DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +DEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask) +DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) +// BT3 is not good for crc collisions for big hashMask values. + +/* +GetHeads_DECL(3b) +{ + UNUSED_VAR(hashMask); + UNUSED_VAR(crc); + { + const Byte *pLim = p + numHeads; + if (numHeads == 0) + return; + pLim--; + while (p < pLim) + { + UInt32 v1 = GetUi32(p); + UInt32 v0 = v1 & 0xFFFFFF; + UInt32 h0, h1; + p += 2; + v1 >>= 8; + h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++; + h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++; + heads += 2; + } + if (p == pLim) + { + UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16); + *heads = pos - hash[v0]; + hash[v0] = pos; + } + } +} +*/ + +/* +GetHeads_DECL(4) +{ + unsigned sh = 0; + UNUSED_VAR(crc) + while ((hashMask & 0x80000000) == 0) + { + hashMask <<= 1; + sh++; + } + GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh) +} +#define GetHeads4b GetHeads4 +*/ + +#define USE_GetHeads_LOCAL_CRC + +#ifdef USE_GetHeads_LOCAL_CRC + +GetHeads_DECL(4) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + // crc1[i] = rotlFixed(v, 8) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(4b) +{ + UInt32 crc0[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + crc0[i] = crc[i] & hashMask; + } + GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p)) +} + +GetHeads_DECL(5) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + UInt32 crc2[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + crc2[i] = (v << kLzHash_CrcShift_2) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1)) +} + +GetHeads_DECL(5b) +{ + UInt32 crc0[256]; + UInt32 crc1[256]; + { + unsigned i; + for (i = 0; i < 256; i++) + { + UInt32 v = crc[i]; + crc0[i] = v & hashMask; + crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; + } + } + GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p)) +} + +#else + +DEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask) +DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask) +DEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask) +DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask) + +#endif + static void HashThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->hashSync; + PRF(printf("\nHashThreadFunc\n")); + for (;;) { - UInt32 numProcessedBlocks = 0; + UInt32 blockIndex = 0; + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n")); Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n")); + if (p->exit) + { + PRF(printf("\nHashThreadFunc : exit \n")); + return; + } - MatchFinder_Init_HighHash(mt->MatchFinder); + MatchFinder_Init_HighHash(MF(mt)); for (;;) { - if (p->exit) - return; - if (p->stopWriting) - { - p->numProcessedBlocks = numProcessedBlocks; - Event_Set(&p->wasStopped); - break; - } + PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos)); { - CMatchFinder *mf = mt->MatchFinder; + CMatchFinder *mf = MF(mt); if (MatchFinder_NeedMove(mf)) { CriticalSection_Enter(&mt->btSync.cs); @@ -185,194 +474,178 @@ static void HashThreadFunc(CMatchFinderMt *mt) mt->pointerToCurPos -= offset; mt->buffer -= offset; } - CriticalSection_Leave(&mt->btSync.cs); CriticalSection_Leave(&mt->hashSync.cs); + CriticalSection_Leave(&mt->btSync.cs); continue; } Semaphore_Wait(&p->freeSemaphore); + if (p->exit) // exit is unexpected here. But we check it here for some failure case + return; + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) + if (p->stopWriting) + break; + MatchFinder_ReadIfRequired(mf); - if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) { - UInt32 subValue = (mf->pos - mf->historySize - 1); - MatchFinder_ReduceOffsets(mf, subValue); - MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); - } - { - UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; - UInt32 num = mf->streamPos - mf->pos; + UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++); + UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf); heads[0] = 2; heads[1] = num; + + /* heads[1] contains the number of avail bytes: + if (avail < mf->numHashBytes) : + { + it means that stream was finished + HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes. + HASH_THREAD doesn't stop, + HASH_THREAD fills only the header (2 numbers) for all next blocks: + {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0} + } + else + { + HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes; + } + */ + if (num >= mf->numHashBytes) { num = num - mf->numHashBytes + 1; if (num > kMtHashBlockSize - 2) num = kMtHashBlockSize - 2; - mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); + + if (mf->pos > (UInt32)kMtMaxValForNormalize - num) + { + const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + Inline_MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); + } + heads[0] = 2 + num; + mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); } - mf->pos += num; + + mf->pos += num; // wrap over zero is allowed at the end of stream mf->buffer += num; } } Semaphore_Release1(&p->filledSemaphore); - } - } + } // for() processing end + + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); + } // for() thread end } -static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) -{ - MtSync_GetNextBlock(&p->hashSync); - p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; - p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; - p->hashNumAvail = p->hashBuf[p->hashBufPos++]; -} -#define kEmptyHashValue 0 + + +// ---------- BT THREAD ---------- + +/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap. + here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */ +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug #define MFMT_GM_INLINE #ifdef MFMT_GM_INLINE /* - we use size_t for _cyclicBufferPos instead of UInt32 + we use size_t for (pos) instead of UInt32 to eliminate "movsx" BUG in old MSVC x64 compiler. */ -MY_NO_INLINE -static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, - size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *distances, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes) -{ - do - { - UInt32 *_distances = ++distances; - UInt32 delta = *hash++; - CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); - unsigned len0 = 0, len1 = 0; - UInt32 cutValue = _cutValue; - unsigned maxLen = (unsigned)_maxLen; - - /* - if (size > 1) - { - UInt32 delta = *hash; - if (delta < _cyclicBufferSize) - { - UInt32 cyc1 = _cyclicBufferPos + 1; - CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1); - Byte b = *(cur + 1 - delta); - _distances[0] = pair[0]; - _distances[1] = b; - } - } - */ - if (cutValue == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - } - else - for(;;) - { - { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; - unsigned len = (len0 < len1 ? len0 : len1); - UInt32 pair0 = *pair; - if (pb[len] == cur[len]) - { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { - maxLen = len; - *distances++ = (UInt32)len; - *distances++ = delta - 1; - if (len == lenLimit) - { - UInt32 pair1 = pair[1]; - *ptr1 = pair0; - *ptr0 = pair1; - break; - } - } - } - { - UInt32 curMatch = pos - delta; - // delta = pos - *pair; - // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31]; - if (pb[len] < cur[len]) - { - delta = pos - pair[1]; - *ptr1 = curMatch; - ptr1 = pair + 1; - len1 = len; - } - else - { - delta = pos - *pair; - *ptr0 = curMatch; - ptr0 = pair; - len0 = len; - } - } - } - if (--cutValue == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - break; - } - } - pos++; - _cyclicBufferPos++; - cur++; - { - UInt32 num = (UInt32)(distances - _distances); - _distances[-1] = num; - } - } - while (distances < limit && --size != 0); - *posRes = pos; - return distances; -} +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); #endif - -static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) +static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) { UInt32 numProcessed = 0; UInt32 curPos = 2; - UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2 - distances[1] = p->hashNumAvail; + /* GetMatchesSpec() functions don't create (len = 1) + in [len, dist] match pairs, if (p->numHashBytes >= 2) + Also we suppose here that (matchMaxLen >= 2). + So the following code for (reserve) is not required + UInt32 reserve = (p->matchMaxLen * 2); + const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX + if (reserve < kNumHashBytes_Max - 1) + reserve = kNumHashBytes_Max - 1; + const UInt32 limit = kMtBtBlockSize - (reserve); + */ + + const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); + + d[1] = p->hashNumAvail; + + if (p->failure_BT) + { + // printf("\n == 1 BtGetMatches() p->failure_BT\n"); + d[0] = 0; + // d[1] = 0; + return; + } while (curPos < limit) { if (p->hashBufPos == p->hashBufPosLimit) { - MatchFinderMt_GetNextBlock_Hash(p); - distances[1] = numProcessed + p->hashNumAvail; - if (p->hashNumAvail >= p->numHashBytes) + // MatchFinderMt_GetNextBlock_Hash(p); + UInt32 avail; + { + const UInt32 bi = MtSync_GetNextBlock(&p->hashSync); + const UInt32 k = GET_HASH_BLOCK_OFFSET(bi); + const UInt32 *h = p->hashBuf + k; + avail = h[1]; + p->hashBufPosLimit = k + h[0]; + p->hashNumAvail = avail; + p->hashBufPos = k + 2; + } + + { + /* we must prevent UInt32 overflow for avail total value, + if avail was increased with new hash block */ + UInt32 availSum = numProcessed + avail; + if (availSum < numProcessed) + availSum = (UInt32)(Int32)-1; + d[1] = availSum; + } + + if (avail >= p->numHashBytes) continue; - distances[0] = curPos + p->hashNumAvail; - distances += curPos; - for (; p->hashNumAvail != 0; p->hashNumAvail--) - *distances++ = 0; + + // if (p->hashBufPos != p->hashBufPosLimit) exit(1); + + /* (avail < p->numHashBytes) + It means that stream was finished. + And (avail) - is a number of remaining bytes, + we fill (d) for (avail) bytes for LZ_THREAD (receiver). + but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */ + + /* here we suppose that we have space enough: + (kMtBtBlockSize - curPos >= p->hashNumAvail) */ + p->hashNumAvail = 0; + d[0] = curPos + avail; + d += curPos; + for (; avail != 0; avail--) + *d++ = 0; return; } { UInt32 size = p->hashBufPosLimit - p->hashBufPos; - UInt32 lenLimit = p->matchMaxLen; UInt32 pos = p->pos; UInt32 cyclicBufferPos = p->cyclicBufferPos; + UInt32 lenLimit = p->matchMaxLen; if (lenLimit >= p->hashNumAvail) lenLimit = p->hashNumAvail; { @@ -384,10 +657,18 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) size = size2; } + if (pos > (UInt32)kMtMaxValForNormalize - size) + { + const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1); + pos -= subValue; + p->pos = pos; + MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); + } + #ifndef MFMT_GM_INLINE while (curPos < limit && size-- != 0) { - UInt32 *startDistances = distances + curPos; + UInt32 *startDistances = d + curPos; UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, startDistances + 1, p->numHashBytes - 1) - startDistances); @@ -399,81 +680,112 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) } #else { - UInt32 posRes; - curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, - distances + limit, - size, &posRes) - distances); - p->hashBufPos += posRes - pos; - cyclicBufferPos += posRes - pos; - p->buffer += posRes - pos; - pos = posRes; + UInt32 posRes = pos; + const UInt32 *d_end; + { + d_end = GetMatchesSpecN_2( + p->buffer + lenLimit - 1, + pos, p->buffer, p->son, p->cutValue, d + curPos, + p->numHashBytes - 1, p->hashBuf + p->hashBufPos, + d + limit, p->hashBuf + p->hashBufPos + size, + cyclicBufferPos, p->cyclicBufferSize, + &posRes); + } + { + if (!d_end) + { + // printf("\n == 2 BtGetMatches() p->failure_BT\n"); + // internal data failure + p->failure_BT = True; + d[0] = 0; + // d[1] = 0; + return; + } + } + curPos = (UInt32)(d_end - d); + { + const UInt32 processed = posRes - pos; + pos = posRes; + p->hashBufPos += processed; + cyclicBufferPos += processed; + p->buffer += processed; + } } #endif - numProcessed += pos - p->pos; - p->hashNumAvail -= pos - p->pos; - p->pos = pos; + { + const UInt32 processed = pos - p->pos; + numProcessed += processed; + p->hashNumAvail -= processed; + p->pos = pos; + } if (cyclicBufferPos == p->cyclicBufferSize) cyclicBufferPos = 0; p->cyclicBufferPos = cyclicBufferPos; } } - distances[0] = curPos; + d[0] = curPos; } + static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) { CMtSync *sync = &p->hashSync; + + BUFFER_MUST_BE_UNLOCKED(sync) + if (!sync->needStart) { - CriticalSection_Enter(&sync->cs); - sync->csWasEntered = True; + LOCK_BUFFER(sync) } - BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); + BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex)); + + /* We suppose that we have called GetNextBlock() from start. + So buffer is LOCKED */ - if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) - { - UInt32 subValue = p->pos - p->cyclicBufferSize; - MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); - p->pos -= subValue; - } - - if (!sync->needStart) - { - CriticalSection_Leave(&sync->cs); - sync->csWasEntered = False; - } + UNLOCK_BUFFER(sync) } -void BtThreadFunc(CMatchFinderMt *mt) + +MY_NO_INLINE +static void BtThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->btSync; for (;;) { UInt32 blockIndex = 0; Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); + for (;;) { + PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos)); + /* (p->exit == true) is possible after (p->canStart) at first loop iteration + and is unexpected after more Wait(freeSemaphore) iterations */ if (p->exit) return; - if (p->stopWriting) - { - p->numProcessedBlocks = blockIndex; - MtSync_StopWriting(&mt->hashSync); - Event_Set(&p->wasStopped); - break; - } + Semaphore_Wait(&p->freeSemaphore); + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) + if (p->stopWriting) + break; + BtFillBlock(mt, blockIndex++); + Semaphore_Release1(&p->filledSemaphore); } + + // we stop HASH_THREAD here + MtSync_StopWriting(&mt->hashSync); + + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); } } + void MatchFinderMt_Construct(CMatchFinderMt *p) { p->hashBuf = NULL; @@ -489,14 +801,37 @@ static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) { - MtSync_Destruct(&p->hashSync); + /* + HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs. + So we must be sure that HASH_THREAD will not use CriticalSection(s) + after deleting CriticalSection here. + + we call ReleaseStream(p) + that calls StopWriting(btSync) + that calls StopWriting(hashSync), if it's required to stop HASH_THREAD. + after StopWriting() it's safe to destruct MtSync(s) in any order */ + + MatchFinderMt_ReleaseStream(p); + MtSync_Destruct(&p->btSync); + MtSync_Destruct(&p->hashSync); + + LOG_ITER( + printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n", + (UInt32)(g_NumIters_Tree / 1000), + (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), + (UInt32)(g_NumIters_Loop / 1000), + (UInt32)(g_NumIters_Bytes / 1000) + )); + MatchFinderMt_FreeMem(p, alloc); } + #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) + static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p) { @@ -509,16 +844,17 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p) return 0; } + SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) { - CMatchFinder *mf = p->MatchFinder; + CMatchFinder *mf = MF(p); p->historySize = historySize; if (kMtBtBlockSize <= matchMaxLen * 4) return SZ_ERROR_PARAM; if (!p->hashBuf) { - p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); + p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32)); if (!p->hashBuf) return SZ_ERROR_MEM; p->btBuf = p->hashBuf + kHashBufferSize; @@ -528,253 +864,457 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) return SZ_ERROR_MEM; - RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); - RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); return SZ_OK; } -/* Call it after ReleaseStream / SetStream */ + +SRes MatchFinderMt_InitMt(CMatchFinderMt *p) +{ + RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); + return MtSync_Init(&p->btSync, kMtBtNumBlocks); +} + + static void MatchFinderMt_Init(CMatchFinderMt *p) { - CMatchFinder *mf = p->MatchFinder; + CMatchFinder *mf = MF(p); p->btBufPos = - p->btBufPosLimit = 0; + p->btBufPosLimit = NULL; p->hashBufPos = p->hashBufPosLimit = 0; + p->hashNumAvail = 0; // 21.03 + + p->failure_BT = False; /* Init without data reading. We don't want to read data in this thread */ - MatchFinder_Init_3(mf, False); + MatchFinder_Init_4(mf); + MatchFinder_Init_LowHash(mf); p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); p->btNumAvailBytes = 0; - p->lzPos = p->historySize + 1; + p->failure_LZ_BT = False; + // p->failure_LZ_LZ = False; + + p->lzPos = + 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug p->hash = mf->hash; p->fixedHashSize = mf->fixedHashSize; + // p->hash4Mask = mf->hash4Mask; p->crc = mf->crc; + // memcpy(p->crc, mf->crc, sizeof(mf->crc)); p->son = mf->son; p->matchMaxLen = mf->matchMaxLen; p->numHashBytes = mf->numHashBytes; - p->pos = mf->pos; - p->buffer = mf->buffer; - p->cyclicBufferPos = mf->cyclicBufferPos; + + /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */ + // mf->streamPos = mf->pos = 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug + + /* we must init (p->pos = mf->pos) for BT, because + BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */ + p->pos = mf->pos; // do not change it + + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); p->cyclicBufferSize = mf->cyclicBufferSize; + p->buffer = mf->buffer; p->cutValue = mf->cutValue; + // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses. } + /* ReleaseStream is required to finish multithreading */ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) { + // Sleep(1); // for debug MtSync_StopWriting(&p->btSync); + // Sleep(200); // for debug /* p->MatchFinder->ReleaseStream(); */ } -static void MatchFinderMt_Normalize(CMatchFinderMt *p) + +MY_NO_INLINE +static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) { - MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); - p->lzPos = p->historySize + 1; + if (p->failure_LZ_BT) + p->btBufPos = p->failureBuf; + else + { + const UInt32 bi = MtSync_GetNextBlock(&p->btSync); + const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi); + { + const UInt32 numItems = bt[0]; + p->btBufPosLimit = bt + numItems; + p->btNumAvailBytes = bt[1]; + p->btBufPos = bt + 2; + if (numItems < 2 || numItems > kMtBtBlockSize) + { + p->failureBuf[0] = 0; + p->btBufPos = p->failureBuf; + p->btBufPosLimit = p->failureBuf + 1; + p->failure_LZ_BT = True; + // p->btNumAvailBytes = 0; + /* we don't want to decrease AvailBytes, that was load before. + that can be unxepected for the code that have loaded anopther value before */ + } + } + + if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize) + { + /* we don't check (lzPos) over exact avail bytes in (btBuf). + (fixedHashSize) is small, so normalization is fast */ + const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + p->lzPos -= subValue; + MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize); + } + } + return p->btNumAvailBytes; } -static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) -{ - UInt32 blockIndex; - MtSync_GetNextBlock(&p->btSync); - blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); - p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; - p->btBufPosLimit += p->btBuf[p->btBufPos++]; - p->btNumAvailBytes = p->btBuf[p->btBufPos++]; - if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) - MatchFinderMt_Normalize(p); -} + static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) { return p->pointerToCurPos; } + #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); + static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) { - GET_NEXT_BLOCK_IF_REQUIRED; - return p->btNumAvailBytes; + if (p->btBufPos != p->btBufPosLimit) + return p->btNumAvailBytes; + return MatchFinderMt_GetNextBlock_Bt(p); } -static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) + +// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; } +#define CHECK_FAILURE_LZ(_match_, _pos_) + +static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { - UInt32 h2, curMatch2; + UInt32 h2, c2; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; + const UInt32 m = p->lzPos; MT_HASH2_CALC - curMatch2 = hash[h2]; - hash[h2] = lzPos; + c2 = hash[h2]; + hash[h2] = m; - if (curMatch2 >= matchMinPos) - if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + if (c2 >= matchMinPos) + { + CHECK_FAILURE_LZ(c2, m) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - *distances++ = 2; - *distances++ = lzPos - curMatch2 - 1; + *d++ = 2; + *d++ = m - c2 - 1; } + } - return distances; + return d; } -static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { - UInt32 h2, h3, curMatch2, curMatch3; + UInt32 h2, h3, c2, c3; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; + const UInt32 m = p->lzPos; MT_HASH3_CALC - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + if (c2 >= matchMinPos) { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + CHECK_FAILURE_LZ(c2, m) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - distances[0] = 3; - return distances + 2; + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + return d + 2; + } + d[0] = 2; + d += 2; } - distances[0] = 2; - distances += 2; } - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + if (c3 >= matchMinPos) { - *distances++ = 3; - *distances++ = lzPos - curMatch3 - 1; + CHECK_FAILURE_LZ(c3, m) + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } } - return distances; + return d; } -/* -static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) -{ - UInt32 h2, h3, h4, curMatch2, curMatch3, curMatch4; - UInt32 *hash = p->hash; - const Byte *cur = p->pointerToCurPos; - UInt32 lzPos = p->lzPos; - MT_HASH4_CALC - - curMatch2 = hash[ h2]; - curMatch3 = (hash + kFix3HashSize)[h3]; - curMatch4 = (hash + kFix4HashSize)[h4]; - - hash[ h2] = lzPos; - (hash + kFix3HashSize)[h3] = lzPos; - (hash + kFix4HashSize)[h4] = lzPos; - - if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch2 - 1; - if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) - { - distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; - return distances + 2; - } - distances[0] = 2; - distances += 2; - } - - if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) - { - distances[1] = lzPos - curMatch3 - 1; - if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) - { - distances[0] = 4; - return distances + 2; - } - distances[0] = 3; - distances += 2; - } - - if (curMatch4 >= matchMinPos) - if ( - cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && - cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] - ) - { - *distances++ = 4; - *distances++ = lzPos - curMatch4 - 1; - } - - return distances; -} -*/ #define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; -static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) +/* +static +UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) { - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; - p->btNumAvailBytes--; + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; + UInt32 matchMinPos; + UInt32 avail = p->btNumAvailBytes - 1; + p->btBufPos = btLim; + { - UInt32 i; - for (i = 0; i < len; i += 2) + p->btNumAvailBytes = avail; + + #define BT_HASH_BYTES_MAX 5 + + matchMinPos = p->lzPos; + + if (len != 0) + matchMinPos -= bt[1]; + else if (avail < (BT_HASH_BYTES_MAX - 1) - 1) { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances[0] = v0; - distances[1] = v1; - distances += 2; + INCREASE_LZ_POS + return d; + } + else + { + const UInt32 hs = p->historySize; + if (matchMinPos > hs) + matchMinPos -= hs; + else + matchMinPos = 1; } } + + for (;;) + { + + UInt32 h2, h3, c2, c3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 m = p->lzPos; + MT_HASH3_CALC + + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + d += 2; + break; + } + // else + { + d[0] = 2; + d += 2; + } + } + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } + break; + } + + if (len != 0) + { + do + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + while (bt != btLim); + } INCREASE_LZ_POS - return len; + return d; +} +*/ + + +static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) +{ + UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + const UInt32 m = p->lzPos; + MT_HASH3_CALC + // MT_HASH4_CALC + c2 = hash[h2]; + c3 = (hash + kFix3HashSize)[h3]; + // c4 = (hash + kFix4HashSize)[h4]; + + hash[h2] = m; + (hash + kFix3HashSize)[h3] = m; + // (hash + kFix4HashSize)[h4] = m; + + #define _USE_H2 + + #ifdef _USE_H2 + if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3; + // return d + 2; + + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + + #ifdef _USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + return d; + } + d[0] = 2; + d += 2; + } + #endif + + if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + d[1] = m - c3 - 1; + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3]) + { + d[0] = 4; + return d + 2; + } + d[0] = 3; + d += 2; + } + + #ifdef _USE_H4 + if (c4 >= matchMinPos) + if ( + cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && + cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] + ) + { + *d++ = 4; + *d++ = m - c4 - 1; + } + #endif + + return d; } -static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) -{ - const UInt32 *btBuf = p->btBuf + p->btBufPos; - UInt32 len = *btBuf++; - p->btBufPos += 1 + len; +static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +{ + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; + p->btBufPos = btLim; + p->btNumAvailBytes--; + INCREASE_LZ_POS + { + while (bt != btLim) + { + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; + } + } + return d; +} + + + +static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +{ + const UInt32 *bt = p->btBufPos; + UInt32 len = *bt++; + const UInt32 avail = p->btNumAvailBytes - 1; + p->btNumAvailBytes = avail; + p->btBufPos = bt + len; if (len == 0) { - /* change for bt5 ! */ - if (p->btNumAvailBytes-- >= 4) - len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); + #define BT_HASH_BYTES_MAX 5 + if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) + { + UInt32 m = p->lzPos; + if (m > p->historySize) + m -= p->historySize; + else + m = 1; + d = p->MixMatchesFunc(p, m, d); + } } else { - /* Condition: there are matches in btBuf with length < p->numHashBytes */ - UInt32 *distances2; - p->btNumAvailBytes--; - distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); + /* + first match pair from BinTree: (match_len, match_dist), + (match_len >= numHashBytes). + MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) + */ + d = p->MixMatchesFunc(p, p->lzPos - bt[1], d); + // if (d) // check for failure do { - UInt32 v0 = btBuf[0]; - UInt32 v1 = btBuf[1]; - btBuf += 2; - distances2[0] = v0; - distances2[1] = v1; - distances2 += 2; + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; + bt += 2; + d[0] = v0; + d[1] = v1; + d += 2; } - while ((len -= 2) != 0); - len = (UInt32)(distances2 - (distances)); + while (len -= 2); } INCREASE_LZ_POS - return len; + return d; } #define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; -#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0); +#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) { @@ -803,12 +1343,16 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) } /* +// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip(). +// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream. + static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER_MT(4) - UInt32 h2, h3, h4; - MT_HASH4_CALC - (hash + kFix4HashSize)[h4] = + UInt32 h2, h3; // h4 + MT_HASH3_CALC + // MT_HASH4_CALC + // (hash + kFix4HashSize)[h4] = (hash + kFix3HashSize)[h3] = hash[ h2] = p->lzPos; @@ -816,14 +1360,14 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) } */ -void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) { vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; - switch (p->MatchFinder->numHashBytes) + switch (MF(p)->numHashBytes) { case 2: p->GetHeadsFunc = GetHeads2; @@ -832,22 +1376,25 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; break; case 3: - p->GetHeadsFunc = GetHeads3; + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; break; - default: - /* case 4: */ - p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; + case 4: + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; + + // it's fast inline version of GetMatches() + // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; break; - /* default: - p->GetHeadsFunc = GetHeads5; + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; + vTable->Skip = + (Mf_Skip_Func)MatchFinderMt3_Skip; + // (Mf_Skip_Func)MatchFinderMt4_Skip; break; - */ } } diff --git a/libraries/lzma/C/LzFindMt.h b/libraries/lzma/C/LzFindMt.h index ef431e3f55..660b7244d1 100644 --- a/libraries/lzma/C/LzFindMt.h +++ b/libraries/lzma/C/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2018-07-04 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_MT_H #define __LZ_FIND_MT_H @@ -9,31 +9,26 @@ EXTERN_C_BEGIN -#define kMtHashBlockSize (1 << 13) -#define kMtHashNumBlocks (1 << 3) -#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) - -#define kMtBtBlockSize (1 << 14) -#define kMtBtNumBlocks (1 << 6) -#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) - typedef struct _CMtSync { + UInt32 numProcessedBlocks; + CThread thread; + UInt64 affinity; + BoolInt wasCreated; BoolInt needStart; + BoolInt csWasInitialized; + BoolInt csWasEntered; + BoolInt exit; BoolInt stopWriting; - CThread thread; CAutoResetEvent canStart; - CAutoResetEvent wasStarted; CAutoResetEvent wasStopped; CSemaphore freeSemaphore; CSemaphore filledSemaphore; - BoolInt csWasInitialized; - BoolInt csWasEntered; CCriticalSection cs; - UInt32 numProcessedBlocks; + // UInt32 numBlocks_Sent; } CMtSync; typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); @@ -49,18 +44,23 @@ typedef struct _CMatchFinderMt /* LZ */ const Byte *pointerToCurPos; UInt32 *btBuf; - UInt32 btBufPos; - UInt32 btBufPosLimit; + const UInt32 *btBufPos; + const UInt32 *btBufPosLimit; UInt32 lzPos; UInt32 btNumAvailBytes; UInt32 *hash; UInt32 fixedHashSize; + // UInt32 hash4Mask; UInt32 historySize; const UInt32 *crc; Mf_Mix_Matches MixMatchesFunc; - + UInt32 failure_LZ_BT; // failure in BT transfered to LZ + // UInt32 failure_LZ_LZ; // failure in LZ tables + UInt32 failureBuf[1]; + // UInt32 crc[256]; + /* LZ + BT */ CMtSync btSync; Byte btDummy[kMtCacheLineDummy]; @@ -70,6 +70,8 @@ typedef struct _CMatchFinderMt UInt32 hashBufPos; UInt32 hashBufPosLimit; UInt32 hashNumAvail; + UInt32 failure_BT; + CLzRef *son; UInt32 matchMaxLen; @@ -77,7 +79,7 @@ typedef struct _CMatchFinderMt UInt32 pos; const Byte *buffer; UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be historySize + 1 */ + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ UInt32 cutValue; /* BT + Hash */ @@ -87,13 +89,19 @@ typedef struct _CMatchFinderMt /* Hash */ Mf_GetHeads GetHeadsFunc; CMatchFinder *MatchFinder; + // CMatchFinder MatchFinder; } CMatchFinderMt; +// only for Mt part void MatchFinderMt_Construct(CMatchFinderMt *p); void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc); + SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); -void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable); +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable); + +/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */ +SRes MatchFinderMt_InitMt(CMatchFinderMt *p); void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); EXTERN_C_END diff --git a/libraries/lzma/C/LzFindOpt.c b/libraries/lzma/C/LzFindOpt.c new file mode 100644 index 0000000000..8ff006e074 --- /dev/null +++ b/libraries/lzma/C/LzFindOpt.c @@ -0,0 +1,578 @@ +/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms +2021-07-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" +#include "LzFind.h" + +// #include "LzFindMt.h" + +// #define LOG_ITERS + +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +// #define PRF(x) +#endif + +#ifdef LOG_ITERS +#include +UInt64 g_NumIters_Tree; +UInt64 g_NumIters_Loop; +UInt64 g_NumIters_Bytes; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +// ---------- BT THREAD ---------- + +#define USE_SON_PREFETCH +#define USE_LONG_MATCH_OPT + +#define kEmptyHashValue 0 + +// #define CYC_TO_POS_OFFSET 0 + +// #define CYC_TO_POS_OFFSET 1 // for debug + +/* +MY_NO_INLINE +UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) +{ + do + { + UInt32 delta; + if (hash == size) + break; + delta = *hash++; + + if (delta == 0 || delta > (UInt32)pos) + return NULL; + + lenLimit++; + + if (delta == (UInt32)pos) + { + CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2; + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1; + CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + + const Byte *len0 = cur, *len1 = cur; + UInt32 cutValue = _cutValue; + const Byte *maxLen = cur + _maxLen; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1); + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + hash++; + pos++; + cur++; + lenLimit++; + { + CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff]; + #else + const UInt32 p0 = ptr[0 + (diff * 2)]; + const UInt32 p1 = ptr[1 + (diff * 2)]; + ptr[0] = p0; + ptr[1] = p1; + // ptr[0] = ptr[0 + (diff * 2)]; + // ptr[1] = ptr[1 + (diff * 2)]; + #endif + } + // PrintSon(son + 2, pos - 1); + // printf("\npos = %x delta = %x\n", pos, delta); + len++; + *d++ = 2; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + if (delta >= curMatch) + return NULL; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + if (delta >= curMatch) + return NULL; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= pos) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ + +/* define cbs if you use 2 functions. + GetMatchesSpecN_1() : (pos < _cyclicBufferSize) + GetMatchesSpecN_2() : (pos >= _cyclicBufferSize) + + do not define cbs if you use 1 function: + GetMatchesSpecN_2() +*/ + +// #define cbs _cyclicBufferSize + +/* + we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32 + to eliminate "movsx" BUG in old MSVC x64 compiler. +*/ + +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); + +MY_NO_INLINE +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + lenLimit++; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + + UInt32 cutValue = _cutValue; + const Byte *len0 = cur, *len1 = cur; + const Byte *maxLen = cur + _maxLen; + + // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // SPEC code + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + *d++ = 2; + *d++ = (UInt32)(lenLimit - cur); + *d++ = delta - 1; + cur++; + lenLimit++; + // SPEC + _cyclicBufferPos++; + { + // SPEC code + CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1); + // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + pos++; + hash++; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } // for() end for long matches + } + #endif + + break; // break from TREE iterations + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + if (delta >= curMatch) + return NULL; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + if (delta >= curMatch) + return NULL; + } + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= cbs) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} + + + +/* +typedef UInt32 uint32plus; // size_t + +UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + UInt32 *_distances = ++d; + uint32plus len0 = 0, len1 = 0; + UInt32 cutValue = _cutValue; + uint32plus maxLen = _maxLen; + // lenLimit++; // const Byte *lenLimit = cur + _lenLimit; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + const Byte *pb = cur - delta; + uint32plus len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)len; + *d++ = delta - 1; + if (len == lenLimit) + { + { + const UInt32 pair1 = pair[1]; + *ptr0 = pair1; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + } + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + for (;;) + { + *d++ = 2; + *d++ = (UInt32)lenLimit; + *d++ = delta - 1; + _cyclicBufferPos++; + { + CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1); + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + hash++; + pos++; + cur++; + pb++; + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; + if (pb[len] < cur[len]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + { + if (delta >= curMatch) + return NULL; + delta = (UInt32)pos - delta; + if (delta >= cbs + // delta >= _cyclicBufferSize || delta >= pos + || --cutValue == 0) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ diff --git a/libraries/lzma/C/LzHash.h b/libraries/lzma/C/LzHash.h index e7c942303d..77b898cfab 100644 --- a/libraries/lzma/C/LzHash.h +++ b/libraries/lzma/C/LzHash.h @@ -1,57 +1,34 @@ /* LzHash.h -- HASH functions for LZ algorithms -2015-04-12 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZ_HASH_H #define __LZ_HASH_H +/* + (kHash2Size >= (1 << 8)) : Required + (kHash3Size >= (1 << 16)) : Required +*/ + #define kHash2Size (1 << 10) #define kHash3Size (1 << 16) -#define kHash4Size (1 << 20) +// #define kHash4Size (1 << 20) #define kFix3HashSize (kHash2Size) #define kFix4HashSize (kHash2Size + kHash3Size) -#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) +// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) -#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8); +/* + We use up to 3 crc values for hash: + crc0 + crc1 << Shift_1 + crc2 << Shift_2 + (Shift_1 = 5) and (Shift_2 = 10) is good tradeoff. + Small values for Shift are not good for collision rate. + Big value for Shift_2 increases the minimum size + of hash table, that will be slow for small files. +*/ -#define HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } - -#define HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; } - -#define HASH5_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - temp ^= (p->crc[cur[3]] << 5); \ - h4 = temp & (kHash4Size - 1); \ - hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; } - -/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ -#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; - - -#define MT_HASH2_CALC \ - h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); - -#define MT_HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } - -#define MT_HASH4_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ - h2 = temp & (kHash2Size - 1); \ - temp ^= ((UInt32)cur[2] << 8); \ - h3 = temp & (kHash3Size - 1); \ - h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } +#define kLzHash_CrcShift_1 5 +#define kLzHash_CrcShift_2 10 #endif diff --git a/libraries/lzma/C/Lzma2Dec.c b/libraries/lzma/C/Lzma2Dec.c index 4e138a4aef..ac970a843d 100644 --- a/libraries/lzma/C/Lzma2Dec.c +++ b/libraries/lzma/C/Lzma2Dec.c @@ -1,5 +1,5 @@ /* Lzma2Dec.c -- LZMA2 Decoder -2019-02-02 : Igor Pavlov : Public domain */ +2021-02-09 : Igor Pavlov : Public domain */ /* #define SHOW_DEBUG_INFO */ @@ -93,7 +93,8 @@ void Lzma2Dec_Init(CLzma2Dec *p) LzmaDec_Init(&p->decoder); } -static ELzma2State Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) +// ELzma2State +static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) { switch (p->state) { diff --git a/libraries/lzma/C/LzmaDec.c b/libraries/lzma/C/LzmaDec.c index ba3e1dd50e..d6742e5af8 100644 --- a/libraries/lzma/C/LzmaDec.c +++ b/libraries/lzma/C/LzmaDec.c @@ -1,5 +1,5 @@ /* LzmaDec.c -- LZMA Decoder -2018-07-04 : Igor Pavlov : Public domain */ +2021-04-01 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -13,10 +13,12 @@ #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) -#define kNumMoveBits 5 #define RC_INIT_SIZE 5 +#ifndef _LZMA_DEC_OPT + +#define kNumMoveBits 5 #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) @@ -62,9 +64,10 @@ probLit = prob + (offs + bit + symbol); \ GET_BIT2(probLit, symbol, offs ^= bit; , ;) +#endif // _LZMA_DEC_OPT -#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define UPDATE_0_CHECK range = bound; @@ -114,6 +117,9 @@ #define kMatchMinLen 2 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) +#define kMatchSpecLen_Error_Data (1 << 9) +#define kMatchSpecLen_Error_Fail (kMatchSpecLen_Error_Data - 1) + /* External ASM code needs same CLzmaProb array layout. So don't change it. */ /* (probs_1664) is faster and better for code size at some platforms */ @@ -166,10 +172,12 @@ /* p->remainLen : shows status of LZMA decoder: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished - = kMatchSpecLenStart + 1 : need init range coder - = kMatchSpecLenStart + 2 : need init range coder and state + < kMatchSpecLenStart : the number of bytes to be copied with (p->rep0) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + = kMatchSpecLenStart + 1 : need init range coder + = kMatchSpecLenStart + 2 : need init range coder and state + = kMatchSpecLen_Error_Fail : Internal Code Failure + = kMatchSpecLen_Error_Data + [0 ... 273] : LZMA Data Error */ /* ---------- LZMA_DECODE_REAL ---------- */ @@ -188,23 +196,31 @@ In: { LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases. So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol - is not END_OF_PAYALOAD_MARKER, then function returns error code. + is not END_OF_PAYALOAD_MARKER, then the function doesn't write any byte to dictionary, + the function returns SZ_OK, and the caller can use (p->remainLen) and (p->reps[0]) later. } Processing: - first LZMA symbol will be decoded in any case - All checks for limits are at the end of main loop, - It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit), + The first LZMA symbol will be decoded in any case. + All main checks for limits are at the end of main loop, + It decodes additional LZMA-symbols while (p->buf < bufLimit && dicPos < limit), RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked. + But if (p->buf < bufLimit), the caller provided at least (LZMA_REQUIRED_INPUT_MAX + 1) bytes for + next iteration before limit (bufLimit + LZMA_REQUIRED_INPUT_MAX), + that is enough for worst case LZMA symbol with one additional RangeCoder normalization for one bit. + So that function never reads bufLimit [LZMA_REQUIRED_INPUT_MAX] byte. Out: RangeCoder is normalized Result: SZ_OK - OK - SZ_ERROR_DATA - Error - p->remainLen: - < kMatchSpecLenStart : normal remain - = kMatchSpecLenStart : finished + p->remainLen: + < kMatchSpecLenStart : the number of bytes to be copied with (p->reps[0]) offset + = kMatchSpecLenStart : the LZMA stream was finished with end mark + + SZ_ERROR_DATA - error, when the MATCH-Symbol refers out of dictionary + p->remainLen : undefined + p->reps[*] : undefined */ @@ -316,11 +332,6 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit else { UPDATE_1(prob); - /* - // that case was checked before with kBadRepCode - if (checkDicSize == 0 && processedPos == 0) - return SZ_ERROR_DATA; - */ prob = probs + IsRepG0 + state; IF_BIT_0(prob) { @@ -329,6 +340,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit IF_BIT_0(prob) { UPDATE_0(prob); + + // that case was checked before with kBadRepCode + // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } + // The caller doesn't allow (dicPos == limit) case here + // so we don't need the following check: + // if (dicPos == limit) { state = state < kNumLitStates ? 9 : 11; len = 1; break; } + dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; processedPos++; @@ -518,8 +536,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) { - p->dicPos = dicPos; - return SZ_ERROR_DATA; + len += kMatchSpecLen_Error_Data + kMatchMinLen; + // len = kMatchSpecLen_Error_Data; + // len += kMatchMinLen; + break; } } @@ -532,8 +552,13 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit if ((rem = limit - dicPos) == 0) { - p->dicPos = dicPos; - return SZ_ERROR_DATA; + /* + We stop decoding and return SZ_OK, and we can resume decoding later. + Any error conditions can be tested later in caller code. + For more strict mode we can stop decoding with error + // len += kMatchSpecLen_Error_Data; + */ + break; } curLen = ((rem < len) ? (unsigned)rem : len); @@ -572,7 +597,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit p->buf = buf; p->range = range; p->code = code; - p->remainLen = (UInt32)len; + p->remainLen = (UInt32)len; // & (kMatchSpecLen_Error_Data - 1); // we can write real length for error matches too. p->dicPos = dicPos; p->processedPos = processedPos; p->reps[0] = rep0; @@ -580,40 +605,61 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit p->reps[2] = rep2; p->reps[3] = rep3; p->state = (UInt32)state; - + if (len >= kMatchSpecLen_Error_Data) + return SZ_ERROR_DATA; return SZ_OK; } #endif + + static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) { - if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + unsigned len = (unsigned)p->remainLen; + if (len == 0 /* || len >= kMatchSpecLenStart */) + return; { - Byte *dic = p->dic; SizeT dicPos = p->dicPos; - SizeT dicBufSize = p->dicBufSize; - unsigned len = (unsigned)p->remainLen; - SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ - SizeT rem = limit - dicPos; - if (rem < len) - len = (unsigned)(rem); + Byte *dic; + SizeT dicBufSize; + SizeT rep0; /* we use SizeT to avoid the BUG of VC14 for AMD64 */ + { + SizeT rem = limit - dicPos; + if (rem < len) + { + len = (unsigned)(rem); + if (len == 0) + return; + } + } if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) p->checkDicSize = p->prop.dicSize; p->processedPos += (UInt32)len; p->remainLen -= (UInt32)len; - while (len != 0) + dic = p->dic; + rep0 = p->reps[0]; + dicBufSize = p->dicBufSize; + do { - len--; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; dicPos++; } + while (--len); p->dicPos = dicPos; } } +/* +At staring of new stream we have one of the following symbols: + - Literal - is allowed + - Non-Rep-Match - is allowed only if it's end marker symbol + - Rep-Match - is not allowed +We use early check of (RangeCoder:Code) over kBadRepCode to simplify main decoding code +*/ + #define kRange0 0xFFFFFFFF #define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)) #define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))) @@ -621,69 +667,77 @@ static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) #error Stop_Compiling_Bad_LZMA_Check #endif + +/* +LzmaDec_DecodeReal2(): + It calls LZMA_DECODE_REAL() and it adjusts limit according (p->checkDicSize). + +We correct (p->checkDicSize) after LZMA_DECODE_REAL() and in LzmaDec_WriteRem(), +and we support the following state of (p->checkDicSize): + if (total_processed < p->prop.dicSize) then + { + (total_processed == p->processedPos) + (p->checkDicSize == 0) + } + else + (p->checkDicSize == p->prop.dicSize) +*/ + static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) { - do + if (p->checkDicSize == 0) { - SizeT limit2 = limit; - if (p->checkDicSize == 0) - { - UInt32 rem = p->prop.dicSize - p->processedPos; - if (limit - p->dicPos > rem) - limit2 = p->dicPos + rem; - - if (p->processedPos == 0) - if (p->code >= kBadRepCode) - return SZ_ERROR_DATA; - } - - RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit)); - + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit = p->dicPos + rem; + } + { + int res = LZMA_DECODE_REAL(p, limit, bufLimit); if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize) p->checkDicSize = p->prop.dicSize; - - LzmaDec_WriteRem(p, limit); + return res; } - while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); - - return 0; } + + typedef enum { - DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_INPUT_EOF, /* need more input data */ DUMMY_LIT, DUMMY_MATCH, DUMMY_REP } ELzmaDummy; -static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) + +#define IS_DUMMY_END_MARKER_POSSIBLE(dummyRes) ((dummyRes) == DUMMY_MATCH) + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byte **bufOut) { UInt32 range = p->range; UInt32 code = p->code; - const Byte *bufLimit = buf + inSize; + const Byte *bufLimit = *bufOut; const CLzmaProb *probs = GET_PROBS; unsigned state = (unsigned)p->state; ELzmaDummy res; + for (;;) { const CLzmaProb *prob; UInt32 bound; unsigned ttt; - unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1); + unsigned posState = CALC_POS_STATE(p->processedPos, ((unsigned)1 << p->prop.pb) - 1); prob = probs + IsMatch + COMBINED_PS_STATE; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK - /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ - prob = probs + Literal; if (p->checkDicSize != 0 || p->processedPos != 0) prob += ((UInt32)LZMA_LIT_SIZE * - ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + - (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + ((((p->processedPos) & (((unsigned)1 << (p->prop.lp)) - 1)) << p->prop.lc) + + ((unsigned)p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); if (state < kNumLitStates) { @@ -735,8 +789,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; - NORMALIZE_CHECK; - return DUMMY_REP; + break; } else { @@ -812,8 +865,6 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS { unsigned numDirectBits = ((posSlot >> 1) - 1); - /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ - if (posSlot < kEndPosModelIndex) { prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits); @@ -844,12 +895,15 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inS } } } + break; } NORMALIZE_CHECK; + + *bufOut = buf; return res; } - +void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState) { p->remainLen = kMatchSpecLenStart + 1; @@ -872,16 +926,41 @@ void LzmaDec_Init(CLzmaDec *p) } +/* +LZMA supports optional end_marker. +So the decoder can lookahead for one additional LZMA-Symbol to check end_marker. +That additional LZMA-Symbol can require up to LZMA_REQUIRED_INPUT_MAX bytes in input stream. +When the decoder reaches dicLimit, it looks (finishMode) parameter: + if (finishMode == LZMA_FINISH_ANY), the decoder doesn't lookahead + if (finishMode != LZMA_FINISH_ANY), the decoder lookahead, if end_marker is possible for current position + +When the decoder lookahead, and the lookahead symbol is not end_marker, we have two ways: + 1) Strict mode (default) : the decoder returns SZ_ERROR_DATA. + 2) The relaxed mode (alternative mode) : we could return SZ_OK, and the caller + must check (status) value. The caller can show the error, + if the end of stream is expected, and the (status) is noit + LZMA_STATUS_FINISHED_WITH_MARK or LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK. +*/ + + +#define RETURN__NOT_FINISHED__FOR_FINISH \ + *status = LZMA_STATUS_NOT_FINISHED; \ + return SZ_ERROR_DATA; // for strict mode + // return SZ_OK; // for relaxed mode + + SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT inSize = *srcLen; (*srcLen) = 0; - *status = LZMA_STATUS_NOT_SPECIFIED; if (p->remainLen > kMatchSpecLenStart) { + if (p->remainLen > kMatchSpecLenStart + 2) + return p->remainLen == kMatchSpecLen_Error_Fail ? SZ_ERROR_FAIL : SZ_ERROR_DATA; + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) p->tempBuf[p->tempBufSize++] = *src++; if (p->tempBufSize != 0 && p->tempBuf[0] != 0) @@ -896,6 +975,12 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr | ((UInt32)p->tempBuf[2] << 16) | ((UInt32)p->tempBuf[3] << 8) | ((UInt32)p->tempBuf[4]); + + if (p->checkDicSize == 0 + && p->processedPos == 0 + && p->code >= kBadRepCode) + return SZ_ERROR_DATA; + p->range = 0xFFFFFFFF; p->tempBufSize = 0; @@ -913,10 +998,21 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr p->remainLen = 0; } - LzmaDec_WriteRem(p, dicLimit); - - while (p->remainLen != kMatchSpecLenStart) + for (;;) { + if (p->remainLen == kMatchSpecLenStart) + { + if (p->code != 0) + return SZ_ERROR_DATA; + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return SZ_OK; + } + + LzmaDec_WriteRem(p, dicLimit); + + { + // (p->remainLen == 0 || p->dicPos == dicLimit) + int checkEndMarkNow = 0; if (p->dicPos >= dicLimit) @@ -933,92 +1029,174 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr } if (p->remainLen != 0) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + RETURN__NOT_FINISHED__FOR_FINISH; } checkEndMarkNow = 1; } + // (p->remainLen == 0) + if (p->tempBufSize == 0) { - SizeT processed; const Byte *bufLimit; + int dummyProcessed = -1; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { - int dummyRes = LzmaDec_TryDummy(p, src, inSize); - if (dummyRes == DUMMY_ERROR) + const Byte *bufOut = src + inSize; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, src, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) { - memcpy(p->tempBuf, src, inSize); - p->tempBufSize = (unsigned)inSize; + size_t i; + if (inSize >= LZMA_REQUIRED_INPUT_MAX) + break; (*srcLen) += inSize; + p->tempBufSize = (unsigned)inSize; + for (i = 0; i < inSize; i++) + p->tempBuf[i] = src[i]; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + + dummyProcessed = (int)(bufOut - src); + if ((unsigned)dummyProcessed > LZMA_REQUIRED_INPUT_MAX) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) { - *status = LZMA_STATUS_NOT_FINISHED; - return SZ_ERROR_DATA; + unsigned i; + (*srcLen) += (unsigned)dummyProcessed; + p->tempBufSize = (unsigned)dummyProcessed; + for (i = 0; i < (unsigned)dummyProcessed; i++) + p->tempBuf[i] = src[i]; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; } + bufLimit = src; + // we will decode only one iteration } else bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; - if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) - return SZ_ERROR_DATA; - processed = (SizeT)(p->buf - src); - (*srcLen) += processed; - src += processed; - inSize -= processed; - } - else - { - unsigned rem = p->tempBufSize, lookAhead = 0; - while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) - p->tempBuf[rem++] = src[lookAhead++]; - p->tempBufSize = rem; - if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { - int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem); - if (dummyRes == DUMMY_ERROR) + int res = LzmaDec_DecodeReal2(p, dicLimit, bufLimit); + + SizeT processed = (SizeT)(p->buf - src); + + if (dummyProcessed < 0) { - (*srcLen) += (SizeT)lookAhead; - *status = LZMA_STATUS_NEEDS_MORE_INPUT; - return SZ_OK; + if (processed > inSize) + break; } - if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + else if ((unsigned)dummyProcessed != processed) + break; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + + if (res != SZ_OK) { - *status = LZMA_STATUS_NOT_FINISHED; + p->remainLen = kMatchSpecLen_Error_Data; return SZ_ERROR_DATA; } } + continue; + } + + { + // we have some data in (p->tempBuf) + // in strict mode: tempBufSize is not enough for one Symbol decoding. + // in relaxed mode: tempBufSize not larger than required for one Symbol decoding. + + unsigned rem = p->tempBufSize; + unsigned ahead = 0; + int dummyProcessed = -1; + + while (rem < LZMA_REQUIRED_INPUT_MAX && ahead < inSize) + p->tempBuf[rem++] = src[ahead++]; + + // ahead - the size of new data copied from (src) to (p->tempBuf) + // rem - the size of temp buffer including new data from (src) + + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + const Byte *bufOut = p->tempBuf + rem; + + ELzmaDummy dummyRes = LzmaDec_TryDummy(p, p->tempBuf, &bufOut); + + if (dummyRes == DUMMY_INPUT_EOF) + { + if (rem >= LZMA_REQUIRED_INPUT_MAX) + break; + p->tempBufSize = rem; + (*srcLen) += (SizeT)ahead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + + dummyProcessed = (int)(bufOut - p->tempBuf); + + if ((unsigned)dummyProcessed < p->tempBufSize) + break; + + if (checkEndMarkNow && !IS_DUMMY_END_MARKER_POSSIBLE(dummyRes)) + { + (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; + p->tempBufSize = (unsigned)dummyProcessed; + // p->remainLen = kMatchSpecLen_Error_Data; + RETURN__NOT_FINISHED__FOR_FINISH; + } + } + p->buf = p->tempBuf; - if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) - return SZ_ERROR_DATA; { - unsigned kkk = (unsigned)(p->buf - p->tempBuf); - if (rem < kkk) - return SZ_ERROR_FAIL; /* some internal error */ - rem -= kkk; - if (lookAhead < rem) - return SZ_ERROR_FAIL; /* some internal error */ - lookAhead -= rem; + // we decode one symbol from (p->tempBuf) here, so the (bufLimit) is equal to (p->buf) + int res = LzmaDec_DecodeReal2(p, dicLimit, p->buf); + + SizeT processed = (SizeT)(p->buf - p->tempBuf); + rem = p->tempBufSize; + + if (dummyProcessed < 0) + { + if (processed > LZMA_REQUIRED_INPUT_MAX) + break; + if (processed < rem) + break; + } + else if ((unsigned)dummyProcessed != processed) + break; + + processed -= rem; + + src += processed; + inSize -= processed; + (*srcLen) += processed; + p->tempBufSize = 0; + + if (res != SZ_OK) + { + p->remainLen = kMatchSpecLen_Error_Data; + return SZ_ERROR_DATA; + } } - (*srcLen) += (SizeT)lookAhead; - src += lookAhead; - inSize -= (SizeT)lookAhead; - p->tempBufSize = 0; } + } } - - if (p->code != 0) - return SZ_ERROR_DATA; - *status = LZMA_STATUS_FINISHED_WITH_MARK; - return SZ_OK; + + /* Some unexpected error: internal error of code, memory corruption or hardware failure */ + p->remainLen = kMatchSpecLen_Error_Fail; + return SZ_ERROR_FAIL; } + SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) { SizeT outSize = *destLen; diff --git a/libraries/lzma/C/LzmaDec.h b/libraries/lzma/C/LzmaDec.h index 1f0927ab13..6f1296250c 100644 --- a/libraries/lzma/C/LzmaDec.h +++ b/libraries/lzma/C/LzmaDec.h @@ -1,5 +1,5 @@ /* LzmaDec.h -- LZMA Decoder -2018-04-21 : Igor Pavlov : Public domain */ +2020-03-19 : Igor Pavlov : Public domain */ #ifndef __LZMA_DEC_H #define __LZMA_DEC_H @@ -181,6 +181,7 @@ Returns: LZMA_STATUS_NEEDS_MORE_INPUT LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, @@ -223,6 +224,7 @@ Returns: SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). + SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure */ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, diff --git a/libraries/lzma/C/LzmaEnc.c b/libraries/lzma/C/LzmaEnc.c index 46a0db000f..b04a7b7b71 100644 --- a/libraries/lzma/C/LzmaEnc.c +++ b/libraries/lzma/C/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2019-01-10: Igor Pavlov : Public domain */ +2021-11-18: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -12,6 +12,7 @@ #include #endif +#include "CpuArch.h" #include "LzmaEnc.h" #include "LzFind.h" @@ -19,12 +20,25 @@ #include "LzFindMt.h" #endif +/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); +void LzmaEnc_Finish(CLzmaEncHandle pp); +void LzmaEnc_SaveState(CLzmaEncHandle pp); +void LzmaEnc_RestoreState(CLzmaEncHandle pp); + #ifdef SHOW_STAT static unsigned g_STAT_OFFSET = 0; #endif -#define kLzmaMaxHistorySize ((UInt32)3 << 29) -/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ +/* for good normalization speed we still reserve 256 MB before 4 GB range */ +#define kLzmaMaxHistorySize ((UInt32)15 << 28) #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) @@ -36,7 +50,7 @@ static unsigned g_STAT_OFFSET = 0; #define kNumMoveReducingBits 4 #define kNumBitPriceShiftBits 4 -#define kBitPrice (1 << kNumBitPriceShiftBits) +// #define kBitPrice (1 << kNumBitPriceShiftBits) #define REP_LEN_COUNT 64 @@ -47,6 +61,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->reduceSize = (UInt64)(Int64)-1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->writeEndMark = 0; + p->affinity = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) @@ -55,16 +70,21 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (level < 0) level = 5; p->level = level; - if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26))); + if (p->dictSize == 0) + p->dictSize = + ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : + ( level <= 6 ? ((UInt32)1 << (level + 19)) : + ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) + ))); + if (p->dictSize > p->reduceSize) { - unsigned i; - UInt32 reduceSize = (UInt32)p->reduceSize; - for (i = 11; i <= 30; i++) - { - if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } - if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } - } + UInt32 v = (UInt32)p->reduceSize; + const UInt32 kReduceMin = ((UInt32)1 << 12); + if (v < kReduceMin) + v = kReduceMin; + if (p->dictSize > v) + p->dictSize = v; } if (p->lc < 0) p->lc = 3; @@ -74,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); - if (p->numHashBytes < 0) p->numHashBytes = 4; - if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5); + if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1); if (p->numThreads < 0) p->numThreads = @@ -93,18 +113,85 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) return props.dictSize; } -#if (_MSC_VER >= 1400) -/* BSR code is fast for some new CPUs */ -/* #define LZMA_LOG_BSR */ + +/* +x86/x64: + +BSR: + IF (SRC == 0) ZF = 1, DEST is undefined; + AMD : DEST is unchanged; + IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit + BSR is slow in some processors + +LZCNT: + IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64) + IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits + IF (DEST == 0) ZF = 1; + +LZCNT works only in new processors starting from Haswell. +if LZCNT is not supported by processor, then it's executed as BSR. +LZCNT can be faster than BSR, if supported. +*/ + +// #define LZMA_LOG_BSR + +#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */ + + #if (defined(__clang__) && (__clang_major__ >= 6)) \ + || (defined(__GNUC__) && (__GNUC__ >= 6)) + #define LZMA_LOG_BSR + #elif defined(_MSC_VER) && (_MSC_VER >= 1300) + // #if defined(MY_CPU_ARM_OR_ARM64) + #define LZMA_LOG_BSR + // #endif + #endif #endif +// #include + #ifdef LZMA_LOG_BSR -#define kDicLogSizeMaxCompress 32 +#if defined(__clang__) \ + || defined(__GNUC__) -#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } +/* + C code: : (30 - __builtin_clz(x)) + gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31) + clang10 for x64 : 31 + (bsr(x) xor -32) +*/ -static unsigned GetPosSlot1(UInt32 pos) + #define MY_clz(x) ((unsigned)__builtin_clz(x)) + // __lzcnt32 + // __builtin_ia32_lzcnt_u32 + +#else // #if defined(_MSC_VER) + + #ifdef MY_CPU_ARM_OR_ARM64 + + #define MY_clz _CountLeadingZeros + + #else // if defined(MY_CPU_X86_OR_AMD64) + + // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU) + // _BitScanReverse code is not optimal for some MSVC compilers + #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \ + res = (zz + zz) + (pos >> zz); } + + #endif // MY_CPU_X86_OR_AMD64 + +#endif // _MSC_VER + + +#ifndef BSR2_RET + + #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \ + res = (zz + zz) + (pos >> zz); } + +#endif + + +unsigned GetPosSlot1(UInt32 pos); +unsigned GetPosSlot1(UInt32 pos) { unsigned res; BSR2_RET(pos, res); @@ -113,10 +200,10 @@ static unsigned GetPosSlot1(UInt32 pos) #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } -#else -#define kNumLogBits (9 + sizeof(size_t) / 2) -/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */ +#else // ! LZMA_LOG_BSR + +#define kNumLogBits (11 + sizeof(size_t) / 8 * 3) #define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) @@ -163,7 +250,7 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos) #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } -#endif +#endif // LZMA_LOG_BSR #define LZMA_NUM_REPS 4 @@ -193,7 +280,7 @@ typedef struct #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 -#define kDicLogSizeMin 0 +// #define kDicLogSizeMin 0 #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) @@ -299,7 +386,7 @@ typedef UInt32 CProbPrice; typedef struct { void *matchFinderObj; - IMatchFinder matchFinder; + IMatchFinder2 matchFinder; unsigned optCur; unsigned optEnd; @@ -344,10 +431,14 @@ typedef struct // begin of CMatchFinderMt is used in LZ thread CMatchFinderMt matchFinderMt; // end of CMatchFinderMt is used in BT and HASH threads + // #else + // CMatchFinder matchFinderBase; #endif - CMatchFinder matchFinderBase; + + // we suppose that we have 8-bytes alignment after CMatchFinder + #ifndef _7ZIP_ST Byte pad[128]; #endif @@ -355,8 +446,10 @@ typedef struct // LZ thread CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + // we want {len , dist} pairs to be 8-bytes aligned in matches array + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2]; + // we want 8-bytes alignment here UInt32 alignPrices[kAlignTableSize]; UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; @@ -385,12 +478,19 @@ typedef struct CSaveState saveState; + // BoolInt mf_Failure; #ifndef _7ZIP_ST Byte pad2[128]; #endif } CLzmaEnc; +#define MFB (p->matchFinderBase) +/* +#ifndef _7ZIP_ST +#define MFB (p->matchFinderMt.MatchFinder) +#endif +*/ #define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); @@ -455,41 +555,51 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX - || props.pb > LZMA_PB_MAX - || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) - || props.dictSize > kLzmaMaxHistorySize) + || props.pb > LZMA_PB_MAX) return SZ_ERROR_PARAM; + + if (props.dictSize > kLzmaMaxHistorySize) + props.dictSize = kLzmaMaxHistorySize; + + #ifndef LZMA_LOG_BSR + { + const UInt64 dict64 = props.dictSize; + if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress)) + return SZ_ERROR_PARAM; + } + #endif + p->dictSize = props.dictSize; { - unsigned fb = props.fb; + unsigned fb = (unsigned)props.fb; if (fb < 5) fb = 5; if (fb > LZMA_MATCH_LEN_MAX) fb = LZMA_MATCH_LEN_MAX; p->numFastBytes = fb; } - p->lc = props.lc; - p->lp = props.lp; - p->pb = props.pb; + p->lc = (unsigned)props.lc; + p->lp = (unsigned)props.lp; + p->pb = (unsigned)props.pb; p->fastMode = (props.algo == 0); // p->_maxMode = True; - p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); + MFB.btMode = (Byte)(props.btMode ? 1 : 0); { unsigned numHashBytes = 4; if (props.btMode) { - if (props.numHashBytes < 2) - numHashBytes = 2; - else if (props.numHashBytes < 4) - numHashBytes = props.numHashBytes; + if (props.numHashBytes < 2) numHashBytes = 2; + else if (props.numHashBytes < 4) numHashBytes = (unsigned)props.numHashBytes; } - p->matchFinderBase.numHashBytes = numHashBytes; + if (props.numHashBytes >= 5) numHashBytes = 5; + + MFB.numHashBytes = numHashBytes; } - p->matchFinderBase.cutValue = props.mc; + MFB.cutValue = props.mc; - p->writeEndMark = props.writeEndMark; + p->writeEndMark = (BoolInt)props.writeEndMark; #ifndef _7ZIP_ST /* @@ -500,6 +610,8 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) } */ p->multiThread = (props.numThreads > 1); + p->matchFinderMt.btSync.affinity = + p->matchFinderMt.hashSync.affinity = props.affinity; #endif return SZ_OK; @@ -509,7 +621,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.expectedDataSize = expectedDataSiize; + MFB.expectedDataSize = expectedDataSiize; } @@ -536,8 +648,8 @@ static void RangeEnc_Construct(CRangeEnc *p) p->bufBase = NULL; } -#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) -#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) +#define RangeEnc_GetProcessed(p) ( (p)->processed + (size_t)((p)->buf - (p)->bufBase) + (p)->cacheSize) +#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + (size_t)((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize) #define RC_BUF_SIZE (1 << 16) @@ -556,12 +668,11 @@ static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc) static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->bufBase); - p->bufBase = 0; + p->bufBase = NULL; } static void RangeEnc_Init(CRangeEnc *p) { - /* Stream.Init(); */ p->range = 0xFFFFFFFF; p->cache = 0; p->low = 0; @@ -575,12 +686,12 @@ static void RangeEnc_Init(CRangeEnc *p) MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) { - size_t num; - if (p->res != SZ_OK) - return; - num = p->buf - p->bufBase; - if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) - p->res = SZ_ERROR_WRITE; + const size_t num = (size_t)(p->buf - p->bufBase); + if (p->res == SZ_OK) + { + if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + } p->processed += num; p->buf = p->bufBase; } @@ -656,7 +767,7 @@ static void RangeEnc_FlushData(CRangeEnc *p) range += newBound & mask; \ mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \ mask += ((1 << kNumMoveBits) - 1); \ - ttt += (Int32)(mask - ttt) >> kNumMoveBits; \ + ttt += (UInt32)((Int32)(mask - ttt) >> kNumMoveBits); \ *(prob) = (CLzmaProb)ttt; \ RC_NORM(p) \ } @@ -749,7 +860,7 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices) bitCount++; } } - ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + ProbPrices[i] = (CProbPrice)(((unsigned)kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); // printf("\n%3d: %5d", i, ProbPrices[i]); } } @@ -985,7 +1096,11 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) p->additionalOffset++; p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + { + const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; } + numPairs = (unsigned)(d - p->matches); + } *numPairsRes = numPairs; #ifdef SHOW_STAT @@ -1001,7 +1116,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) if (numPairs == 0) return 0; { - unsigned len = p->matches[(size_t)numPairs - 2]; + const unsigned len = p->matches[(size_t)numPairs - 2]; if (len != p->numFastBytes) return len; { @@ -1011,7 +1126,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) { const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; const Byte *p2 = p1 + len; - ptrdiff_t dif = (ptrdiff_t)-1 - p->matches[(size_t)numPairs - 1]; + const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; const Byte *lim = p1 + numAvail; for (; p2 != lim && *p2 == p2[dif]; p2++) {} @@ -1167,6 +1282,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) repLens[i] = len; if (len > repLens[repMaxIndex]) repMaxIndex = i; + if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization + break; } if (repLens[repMaxIndex] >= p->numFastBytes) @@ -1179,10 +1296,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } matches = p->matches; + #define MATCHES matches + // #define MATCHES p->matches if (mainLen >= p->numFastBytes) { - p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS; MOVE_POS(p, mainLen - 1) return mainLen; } @@ -1276,13 +1395,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (len < 2) len = 2; else - while (len > matches[offs]) + while (len > MATCHES[offs]) offs += 2; for (; ; len++) { COptimal *opt; - UInt32 dist = matches[(size_t)offs + 1]; + UInt32 dist = MATCHES[(size_t)offs + 1]; UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); unsigned lenToPosState = GetLenToPosState(len); @@ -1306,7 +1425,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) opt->extra = 0; } - if (len == matches[offs]) + if (len == MATCHES[offs]) { offs += 2; if (offs == numPairs) @@ -1727,8 +1846,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (newLen > numAvail) { newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); - matches[numPairs] = (UInt32)newLen; + for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2); + MATCHES[numPairs] = (UInt32)newLen; numPairs += 2; } @@ -1747,9 +1866,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } offs = 0; - while (startLen > matches[offs]) + while (startLen > MATCHES[offs]) offs += 2; - dist = matches[(size_t)offs + 1]; + dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) GetPosSlot2(dist, posSlot); @@ -1776,7 +1895,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } } - if (len == matches[offs]) + if (len == MATCHES[offs]) { // if (p->_maxMode) { // MATCH : LIT : REP_0 @@ -1841,7 +1960,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) offs += 2; if (offs == numPairs) break; - dist = matches[(size_t)offs + 1]; + dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) GetPosSlot2(dist, posSlot); } @@ -2059,8 +2178,23 @@ static SRes CheckErrors(CLzmaEnc *p) return p->result; if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) + + #ifndef _7ZIP_ST + if ( + // p->mf_Failure || + (p->mtMode && + ( // p->matchFinderMt.failure_LZ_LZ || + p->matchFinderMt.failure_LZ_BT)) + ) + { + p->result = MY_HRES_ERROR__INTERNAL_ERROR; + // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); + } + #endif + + if (MFB.result != SZ_OK) p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) p->finished = True; return p->result; @@ -2198,14 +2332,14 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) -void LzmaEnc_Construct(CLzmaEnc *p) +static void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); + MatchFinder_Construct(&MFB); #ifndef _7ZIP_ST + p->matchFinderMt.MatchFinder = &MFB; MatchFinderMt_Construct(&p->matchFinderMt); - p->matchFinderMt.MatchFinder = &p->matchFinderBase; #endif { @@ -2221,7 +2355,6 @@ void LzmaEnc_Construct(CLzmaEnc *p) LzmaEnc_InitPriceTables(p->ProbPrices); p->litProbs = NULL; p->saveState.litProbs = NULL; - } CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) @@ -2233,7 +2366,7 @@ CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) return p; } -void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) +static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->litProbs); ISzAlloc_Free(alloc, p->saveState.litProbs); @@ -2241,13 +2374,13 @@ void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc) p->saveState.litProbs = NULL; } -void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) +static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) { #ifndef _7ZIP_ST MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); #endif - MatchFinder_Free(&p->matchFinderBase, allocBig); + MatchFinder_Free(&MFB, allocBig); LzmaEnc_FreeLits(p, alloc); RangeEnc_Free(&p->rc, alloc); } @@ -2259,11 +2392,18 @@ void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) } +MY_NO_INLINE static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->needInit) { + #ifndef _7ZIP_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); + } + #endif p->matchFinder.Init(p->matchFinderObj); p->needInit = 0; } @@ -2521,12 +2661,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa // { int y; for (y = 0; y < 100; y++) { FillDistancesPrices(p); // }} - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); } if (p->repLenEncCounter <= 0) { p->repLenEncCounter = REP_LEN_COUNT; - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } } @@ -2559,11 +2699,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { UInt32 beforeSize = kNumOpts; + UInt32 dictSize; + if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; #ifndef _7ZIP_ST - p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0)); + p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); #endif { @@ -2582,36 +2724,56 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, } } - p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); + MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; + + dictSize = p->dictSize; + if (dictSize == ((UInt32)2 << 30) || + dictSize == ((UInt32)3 << 30)) + { + /* 21.03 : here we reduce the dictionary for 2 reasons: + 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary. + 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases, + where data size is aligned for 1 GB: 5/6/8 GB. + That reducing must be >= 1 for such corner cases. */ + dictSize -= 1; + } + + if (beforeSize + dictSize < keepWindowSize) + beforeSize = keepWindowSize - dictSize; + + /* in worst case we can look ahead for + max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes. + we send larger value for (keepAfter) to MantchFinder_Create(): + (numFastBytes + LZMA_MATCH_LEN_MAX + 1) + */ #ifndef _7ZIP_ST if (p->mtMode) { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, - LZMA_MATCH_LEN_MAX - + 1 /* 18.04 */ + RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ , allocBig)); p->matchFinderObj = &p->matchFinderMt; - p->matchFinderBase.bigHash = (Byte)( - (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); + MFB.bigHash = (Byte)( + (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else #endif { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) + if (!MatchFinder_Create(&MFB, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */ + , allocBig)) return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + p->matchFinderObj = &MFB; + MatchFinder_CreateVTable(&MFB, &p->matchFinder); } return SZ_OK; } -void LzmaEnc_Init(CLzmaEnc *p) +static void LzmaEnc_Init(CLzmaEnc *p) { unsigned i; p->state = 0; @@ -2675,12 +2837,14 @@ void LzmaEnc_Init(CLzmaEnc *p) p->additionalOffset = 0; - p->pbMask = (1 << p->pb) - 1; + p->pbMask = ((unsigned)1 << p->pb) - 1; p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); + + // p->mf_Failure = False; } -void LzmaEnc_InitPrices(CLzmaEnc *p) +static void LzmaEnc_InitPrices(CLzmaEnc *p) { if (!p->fastMode) { @@ -2694,8 +2858,8 @@ void LzmaEnc_InitPrices(CLzmaEnc *p) p->repLenEncCounter = REP_LEN_COUNT; - LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices); - LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->lenEnc, (unsigned)1 << p->pb, &p->lenProbs, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, (unsigned)1 << p->pb, &p->repLenProbs, p->ProbPrices); } static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) @@ -2719,7 +2883,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; + MFB.stream = inStream; p->needInit = 1; p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); @@ -2730,16 +2894,16 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; + MFB.stream = inStream; p->needInit = 1; return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) { - p->matchFinderBase.directInput = 1; - p->matchFinderBase.bufferBase = (Byte *)src; - p->matchFinderBase.directInputRem = srcLen; + MFB.directInput = 1; + MFB.bufferBase = (Byte *)src; + MFB.directInputRem = srcLen; } SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, @@ -2781,19 +2945,23 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s size = p->rem; p->overflow = True; } - memcpy(p->data, data, size); - p->rem -= size; - p->data += size; + if (size != 0) + { + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + } return size; } +/* UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { const CLzmaEnc *p = (CLzmaEnc *)pp; return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } - +*/ const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { @@ -2841,6 +3009,7 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, } +MY_NO_INLINE static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) { SRes res = SZ_OK; @@ -2870,7 +3039,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) LzmaEnc_Finish(p); /* - if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) + if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) res = SZ_ERROR_FAIL; } */ @@ -2889,35 +3058,43 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) { - CLzmaEnc *p = (CLzmaEnc *)pp; - unsigned i; - UInt32 dictSize = p->dictSize; if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; - props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); - - if (dictSize >= ((UInt32)1 << 22)) { - UInt32 kDictMask = ((UInt32)1 << 20) - 1; - if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) - dictSize = (dictSize + kDictMask) & ~kDictMask; - } - else for (i = 11; i <= 30; i++) - { - if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } - if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } - } + const CLzmaEnc *p = (const CLzmaEnc *)pp; + const UInt32 dictSize = p->dictSize; + UInt32 v; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + // we write aligned dictionary value to properties for lzma decoder + if (dictSize >= ((UInt32)1 << 21)) + { + const UInt32 kDictMask = ((UInt32)1 << 20) - 1; + v = (dictSize + kDictMask) & ~kDictMask; + if (v < dictSize) + v = dictSize; + } + else + { + unsigned i = 11 * 2; + do + { + v = (UInt32)(2 + (i & 1)) << (i >> 1); + i++; + } + while (v < dictSize); + } - for (i = 0; i < 4; i++) - props[1 + i] = (Byte)(dictSize >> (8 * i)); - return SZ_OK; + SetUi32(props + 1, v); + return SZ_OK; + } } unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) { - return ((CLzmaEnc *)pp)->writeEndMark; + return (unsigned)((CLzmaEnc *)pp)->writeEndMark; } @@ -2974,3 +3151,15 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, LzmaEnc_Destroy(p, alloc, allocBig); return res; } + + +/* +#ifndef _7ZIP_ST +void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + lz_threads[0] = p->matchFinderMt.hashSync.thread; + lz_threads[1] = p->matchFinderMt.btSync.thread; +} +#endif +*/ diff --git a/libraries/lzma/C/LzmaEnc.h b/libraries/lzma/C/LzmaEnc.h index 9194ee576d..bc2ed5042b 100644 --- a/libraries/lzma/C/LzmaEnc.h +++ b/libraries/lzma/C/LzmaEnc.h @@ -1,5 +1,5 @@ /* LzmaEnc.h -- LZMA Encoder -2017-07-27 : Igor Pavlov : Public domain */ +2019-10-30 : Igor Pavlov : Public domain */ #ifndef __LZMA_ENC_H #define __LZMA_ENC_H @@ -29,6 +29,8 @@ typedef struct _CLzmaEncProps UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ + + UInt64 affinity; } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); diff --git a/libraries/lzma/C/Ppmd.h b/libraries/lzma/C/Ppmd.h index a5c1e3ef25..b198792087 100644 --- a/libraries/lzma/C/Ppmd.h +++ b/libraries/lzma/C/Ppmd.h @@ -1,5 +1,5 @@ /* Ppmd.h -- PPMD codec common code -2017-04-03 : Igor Pavlov : Public domain +2021-04-13 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #ifndef __PPMD_H @@ -9,7 +9,16 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ EXTERN_C_BEGIN -#ifdef MY_CPU_32BIT +#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4) +/* + PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block. + if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields. + if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields. + if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed, + if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional, + and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit. + PPMD code works slightly faster in (PPMD_32BIT) mode. +*/ #define PPMD_32BIT #endif @@ -28,7 +37,7 @@ EXTERN_C_BEGIN #define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4) #define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4) -#pragma pack(push, 1) +MY_CPU_pragma_pack_push_1 /* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */ /* SEE-contexts for PPM-contexts with masked symbols */ @@ -40,41 +49,114 @@ typedef struct } CPpmd_See; #define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ - { (p)->Summ <<= 1; (p)->Count = (Byte)(3 << (p)->Shift++); } + { (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); } + typedef struct { Byte Symbol; Byte Freq; - UInt16 SuccessorLow; - UInt16 SuccessorHigh; + UInt16 Successor_0; + UInt16 Successor_1; } CPpmd_State; -#pragma pack(pop) +typedef struct CPpmd_State2_ +{ + Byte Symbol; + Byte Freq; +} CPpmd_State2; -typedef - #ifdef PPMD_32BIT - CPpmd_State * - #else - UInt32 - #endif - CPpmd_State_Ref; +typedef struct CPpmd_State4_ +{ + UInt16 Successor_0; + UInt16 Successor_1; +} CPpmd_State4; -typedef - #ifdef PPMD_32BIT - void * - #else - UInt32 - #endif - CPpmd_Void_Ref; +MY_CPU_pragma_pop + +/* + PPMD code can write full CPpmd_State structure data to CPpmd*_Context + at (byte offset = 2) instead of some fields of original CPpmd*_Context structure. + + If we use pointers to different types, but that point to shared + memory space, we can have aliasing problem (strict aliasing). + + XLC compiler in -O2 mode can change the order of memory write instructions + in relation to read instructions, if we have use pointers to different types. + + To solve that aliasing problem we use combined CPpmd*_Context structure + with unions that contain the fields from both structures: + the original CPpmd*_Context and CPpmd_State. + So we can access the fields from both structures via one pointer, + and the compiler doesn't change the order of write instructions + in relation to read instructions. + + If we don't use memory write instructions to shared memory in + some local code, and we use only reading instructions (read only), + then probably it's safe to use pointers to different types for reading. +*/ + + + +#ifdef PPMD_32BIT + + #define Ppmd_Ref_Type(type) type * + #define Ppmd_GetRef(p, ptr) (ptr) + #define Ppmd_GetPtr(p, ptr) (ptr) + #define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr) + +#else + + #define Ppmd_Ref_Type(type) UInt32 + #define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) + #define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs))) + #define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs)) + +#endif // PPMD_32BIT + + +typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref; +typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref; +typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref; + + +/* +#ifdef MY_CPU_LE_UNALIGN +// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache. +#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0) +#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v) + +#else +*/ + +/* + We can write 16-bit halves to 32-bit (Successor) field in any selected order. + But the native order is more consistent way. + So we use the native order, if LE/BE order can be detected here at compile time. +*/ + +#ifdef MY_CPU_BE + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); } + +#else + + #define Ppmd_GET_SUCCESSOR(p) \ + ( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) ) + + #define Ppmd_SET_SUCCESSOR(p, v) { \ + (p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \ + (p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); } + +#endif + +// #endif -typedef - #ifdef PPMD_32BIT - Byte * - #else - UInt32 - #endif - CPpmd_Byte_Ref; #define PPMD_SetAllBitsIn256Bytes(p) \ { size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \ diff --git a/libraries/lzma/C/Ppmd7.c b/libraries/lzma/C/Ppmd7.c index 470aadccf1..cf401cb37c 100644 --- a/libraries/lzma/C/Ppmd7.c +++ b/libraries/lzma/C/Ppmd7.c @@ -1,5 +1,5 @@ /* Ppmd7.c -- PPMdH codec -2018-07-04 : Igor Pavlov : Public domain +2021-04-13 : Igor Pavlov : Public domain This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Precomp.h" @@ -8,7 +8,12 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #include "Ppmd7.h" -const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +/* define PPMD7_ORDER_0_SUPPPORT to suport order-0 mode, unsupported by orignal PPMd var.H. code */ +// #define PPMD7_ORDER_0_SUPPPORT + +MY_ALIGN(16) +static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; +MY_ALIGN(16) static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; #define MAX_FREQ 124 @@ -16,13 +21,10 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x #define U2B(nu) ((UInt32)(nu) * UNIT_SIZE) #define U2I(nu) (p->Units2Indx[(size_t)(nu) - 1]) -#define I2U(indx) (p->Indx2Units[indx]) +#define I2U(indx) ((unsigned)p->Indx2Units[indx]) +#define I2U_UInt16(indx) ((UInt16)p->Indx2Units[indx]) -#ifdef PPMD_32BIT - #define REF(ptr) (ptr) -#else - #define REF(ptr) ((UInt32)((Byte *)(ptr) - (p)->Base)) -#endif +#define REF(ptr) Ppmd_GetRef(p, ptr) #define STATS_REF(ptr) ((CPpmd_State_Ref)REF(ptr)) @@ -35,13 +37,7 @@ typedef CPpmd7_Context * CTX_PTR; struct CPpmd7_Node_; -typedef - #ifdef PPMD_32BIT - struct CPpmd7_Node_ * - #else - UInt32 - #endif - CPpmd7_Node_Ref; +typedef Ppmd_Ref_Type(struct CPpmd7_Node_) CPpmd7_Node_Ref; typedef struct CPpmd7_Node_ { @@ -51,17 +47,13 @@ typedef struct CPpmd7_Node_ CPpmd7_Node_Ref Prev; } CPpmd7_Node; -#ifdef PPMD_32BIT - #define NODE(ptr) (ptr) -#else - #define NODE(offs) ((CPpmd7_Node *)(p->Base + (offs))) -#endif +#define NODE(r) Ppmd_GetPtr_Type(p, r, CPpmd7_Node) void Ppmd7_Construct(CPpmd7 *p) { unsigned i, k, m; - p->Base = 0; + p->Base = NULL; for (i = 0, k = 0; i < PPMD_NUM_INDEXES; i++) { @@ -77,6 +69,7 @@ void Ppmd7_Construct(CPpmd7 *p) for (i = 0; i < 3; i++) p->NS2Indx[i] = (Byte)i; + for (m = i, k = 1; i < 256; i++) { p->NS2Indx[i] = (Byte)m; @@ -84,54 +77,63 @@ void Ppmd7_Construct(CPpmd7 *p) k = (++m) - 2; } - memset(p->HB2Flag, 0, 0x40); - memset(p->HB2Flag + 0x40, 8, 0x100 - 0x40); + memcpy(p->ExpEscape, PPMD7_kExpEscape, 16); } + void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc) { ISzAlloc_Free(alloc, p->Base); p->Size = 0; - p->Base = 0; + p->Base = NULL; } + BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc) { if (!p->Base || p->Size != size) { - size_t size2; Ppmd7_Free(p, alloc); - size2 = 0 - #ifndef PPMD_32BIT - + UNIT_SIZE - #endif - ; - p->AlignOffset = - #ifdef PPMD_32BIT - (4 - size) & 3; - #else - 4 - (size & 3); - #endif - if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size + size2)) == 0) + p->AlignOffset = (4 - size) & 3; + if ((p->Base = (Byte *)ISzAlloc_Alloc(alloc, p->AlignOffset + size)) == NULL) return False; p->Size = size; } return True; } + + +// ---------- Internal Memory Allocator ---------- + +/* We can use CPpmd7_Node in list of free units (as in Ppmd8) + But we still need one additional list walk pass in GlueFreeBlocks(). + So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode() +*/ + +#define EMPTY_NODE 0 + + static void InsertNode(CPpmd7 *p, void *node, unsigned indx) { *((CPpmd_Void_Ref *)node) = p->FreeList[indx]; + // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; + p->FreeList[indx] = REF(node); + } + static void *RemoveNode(CPpmd7 *p, unsigned indx) { CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); p->FreeList[indx] = *node; + // CPpmd7_Node *node = NODE((CPpmd7_Node_Ref)p->FreeList[indx]); + // p->FreeList[indx] = node->Next; return node; } + static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) { unsigned i, nu = I2U(oldIndx) - I2U(newIndx); @@ -144,123 +146,167 @@ static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) InsertNode(p, ptr, i); } + +/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ + +typedef union _CPpmd7_Node_Union +{ + CPpmd7_Node Node; + CPpmd7_Node_Ref NextRef; +} CPpmd7_Node_Union; + +/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks() + we use single linked list similar to Ppmd8 code */ + + static void GlueFreeBlocks(CPpmd7 *p) { - #ifdef PPMD_32BIT - CPpmd7_Node headItem; - CPpmd7_Node_Ref head = &headItem; - #else - CPpmd7_Node_Ref head = p->AlignOffset + p->Size; - #endif - - CPpmd7_Node_Ref n = head; - unsigned i; - + /* + we use first UInt16 field of 12-bytes UNITs as record type stamp + CPpmd_State { Byte Symbol; Byte Freq; : Freq != 0 + CPpmd7_Context { UInt16 NumStats; : NumStats != 0 + CPpmd7_Node { UInt16 Stamp : Stamp == 0 for free record + : Stamp == 1 for head record and guard + Last 12-bytes UNIT in array is always contains 12-bytes order-0 CPpmd7_Context record. + */ + CPpmd7_Node_Ref head, n = 0; + p->GlueCount = 255; - /* create doubly-linked list of free blocks */ - for (i = 0; i < PPMD_NUM_INDEXES; i++) - { - UInt16 nu = I2U(i); - CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; - p->FreeList[i] = 0; - while (next != 0) - { - CPpmd7_Node *node = NODE(next); - node->Next = n; - n = NODE(n)->Prev = next; - next = *(const CPpmd7_Node_Ref *)node; - node->Stamp = 0; - node->NU = (UInt16)nu; - } - } - NODE(head)->Stamp = 1; - NODE(head)->Next = n; - NODE(n)->Prev = head; + + /* we set guard NODE at LoUnit */ if (p->LoUnit != p->HiUnit) - ((CPpmd7_Node *)p->LoUnit)->Stamp = 1; - - /* Glue free blocks */ - while (n != head) + ((CPpmd7_Node *)(void *)p->LoUnit)->Stamp = 1; + { - CPpmd7_Node *node = NODE(n); - UInt32 nu = (UInt32)node->NU; - for (;;) + /* Create list of free blocks. + We still need one additional list walk pass before Glue. */ + unsigned i; + for (i = 0; i < PPMD_NUM_INDEXES; i++) { - CPpmd7_Node *node2 = NODE(n) + nu; - nu += node2->NU; - if (node2->Stamp != 0 || nu >= 0x10000) - break; - NODE(node2->Prev)->Next = node2->Next; - NODE(node2->Next)->Prev = node2->Prev; - node->NU = (UInt16)nu; + const UInt16 nu = I2U_UInt16(i); + CPpmd7_Node_Ref next = (CPpmd7_Node_Ref)p->FreeList[i]; + p->FreeList[i] = 0; + while (next != 0) + { + /* Don't change the order of the following commands: */ + CPpmd7_Node_Union *un = (CPpmd7_Node_Union *)NODE(next); + const CPpmd7_Node_Ref tmp = next; + next = un->NextRef; + un->Node.Stamp = EMPTY_NODE; + un->Node.NU = nu; + un->Node.Next = n; + n = tmp; + } } - n = node->Next; } - + + head = n; + /* Glue and Fill must walk the list in same direction */ + { + /* Glue free blocks */ + CPpmd7_Node_Ref *prev = &head; + while (n) + { + CPpmd7_Node *node = NODE(n); + UInt32 nu = node->NU; + n = node->Next; + if (nu == 0) + { + *prev = n; + continue; + } + prev = &node->Next; + for (;;) + { + CPpmd7_Node *node2 = node + nu; + nu += node2->NU; + if (node2->Stamp != EMPTY_NODE || nu >= 0x10000) + break; + node->NU = (UInt16)nu; + node2->NU = 0; + } + } + } + /* Fill lists of free blocks */ - for (n = NODE(head)->Next; n != head;) + for (n = head; n != 0;) { CPpmd7_Node *node = NODE(n); - unsigned nu; - CPpmd7_Node_Ref next = node->Next; - for (nu = node->NU; nu > 128; nu -= 128, node += 128) + UInt32 nu = node->NU; + unsigned i; + n = node->Next; + if (nu == 0) + continue; + for (; nu > 128; nu -= 128, node += 128) InsertNode(p, node, PPMD_NUM_INDEXES - 1); if (I2U(i = U2I(nu)) != nu) { unsigned k = I2U(--i); - InsertNode(p, node + k, nu - k - 1); + InsertNode(p, node + k, (unsigned)nu - k - 1); } InsertNode(p, node, i); - n = next; } } + +MY_NO_INLINE static void *AllocUnitsRare(CPpmd7 *p, unsigned indx) { unsigned i; - void *retVal; + if (p->GlueCount == 0) { GlueFreeBlocks(p); if (p->FreeList[indx] != 0) return RemoveNode(p, indx); } + i = indx; + do { if (++i == PPMD_NUM_INDEXES) { UInt32 numBytes = U2B(I2U(indx)); + Byte *us = p->UnitsStart; p->GlueCount--; - return ((UInt32)(p->UnitsStart - p->Text) > numBytes) ? (p->UnitsStart -= numBytes) : (NULL); + return ((UInt32)(us - p->Text) > numBytes) ? (p->UnitsStart = us - numBytes) : NULL; } } while (p->FreeList[i] == 0); - retVal = RemoveNode(p, i); - SplitBlock(p, retVal, i, indx); - return retVal; + + { + void *block = RemoveNode(p, i); + SplitBlock(p, block, i, indx); + return block; + } } + static void *AllocUnits(CPpmd7 *p, unsigned indx) { - UInt32 numBytes; if (p->FreeList[indx] != 0) return RemoveNode(p, indx); - numBytes = U2B(I2U(indx)); - if (numBytes <= (UInt32)(p->HiUnit - p->LoUnit)) { - void *retVal = p->LoUnit; - p->LoUnit += numBytes; - return retVal; + UInt32 numBytes = U2B(I2U(indx)); + Byte *lo = p->LoUnit; + if ((UInt32)(p->HiUnit - lo) >= numBytes) + { + p->LoUnit = lo + numBytes; + return lo; + } } return AllocUnitsRare(p, indx); } -#define MyMem12Cpy(dest, src, num) \ - { UInt32 *d = (UInt32 *)dest; const UInt32 *s = (const UInt32 *)src; UInt32 n = num; \ - do { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; s += 3; d += 3; } while (--n); } +#define MyMem12Cpy(dest, src, num) \ + { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ + do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } + + +/* static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU) { unsigned i0 = U2I(oldNU); @@ -277,20 +323,25 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU SplitBlock(p, oldPtr, i0, i1); return oldPtr; } +*/ -#define SUCCESSOR(p) ((CPpmd_Void_Ref)((p)->SuccessorLow | ((UInt32)(p)->SuccessorHigh << 16))) +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) { - (p)->SuccessorLow = (UInt16)((UInt32)(v) & 0xFFFF); - (p)->SuccessorHigh = (UInt16)(((UInt32)(v) >> 16) & 0xFFFF); + Ppmd_SET_SUCCESSOR(p, v); } -static void RestartModel(CPpmd7 *p) + + +MY_NO_INLINE +static +void RestartModel(CPpmd7 *p) { - unsigned i, k, m; + unsigned i, k; memset(p->FreeList, 0, sizeof(p->FreeList)); + p->Text = p->Base + p->AlignOffset; p->HiUnit = p->Text + p->Size; p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE; @@ -300,57 +351,110 @@ static void RestartModel(CPpmd7 *p) p->RunLength = p->InitRL = -(Int32)((p->MaxOrder < 12) ? p->MaxOrder : 12) - 1; p->PrevSuccess = 0; - p->MinContext = p->MaxContext = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ - p->MinContext->Suffix = 0; - p->MinContext->NumStats = 256; - p->MinContext->SummFreq = 256 + 1; - p->FoundState = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ - p->LoUnit += U2B(256 / 2); - p->MinContext->Stats = REF(p->FoundState); - for (i = 0; i < 256; i++) { - CPpmd_State *s = &p->FoundState[i]; - s->Symbol = (Byte)i; - s->Freq = 1; - SetSuccessor(s, 0); + CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ + CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ + + p->LoUnit += U2B(256 / 2); + p->MaxContext = p->MinContext = mc; + p->FoundState = s; + + mc->NumStats = 256; + mc->Union2.SummFreq = 256 + 1; + mc->Union4.Stats = REF(s); + mc->Suffix = 0; + + for (i = 0; i < 256; i++, s++) + { + s->Symbol = (Byte)i; + s->Freq = 1; + SetSuccessor(s, 0); + } + + #ifdef PPMD7_ORDER_0_SUPPPORT + if (p->MaxOrder == 0) + { + CPpmd_Void_Ref r = REF(mc); + s = p->FoundState; + for (i = 0; i < 256; i++, s++) + SetSuccessor(s, r); + return; + } + #endif } for (i = 0; i < 128; i++) + + + for (k = 0; k < 8; k++) { + unsigned m; UInt16 *dest = p->BinSumm[i] + k; UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2)); for (m = 0; m < 64; m += 8) dest[m] = val; } - + + for (i = 0; i < 25; i++) - for (k = 0; k < 16; k++) + { + + CPpmd_See *s = p->See[i]; + + + + unsigned summ = ((5 * i + 10) << (PPMD_PERIOD_BITS - 4)); + for (k = 0; k < 16; k++, s++) { - CPpmd_See *s = &p->See[i][k]; - s->Summ = (UInt16)((5 * i + 10) << (s->Shift = PPMD_PERIOD_BITS - 4)); + s->Summ = (UInt16)summ; + s->Shift = (PPMD_PERIOD_BITS - 4); s->Count = 4; } + } + + p->DummySee.Summ = 0; /* unused */ + p->DummySee.Shift = PPMD_PERIOD_BITS; + p->DummySee.Count = 64; /* unused */ } + void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder) { p->MaxOrder = maxOrder; + RestartModel(p); - p->DummySee.Shift = PPMD_PERIOD_BITS; - p->DummySee.Summ = 0; /* unused */ - p->DummySee.Count = 64; /* unused */ } -static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) + + +/* + CreateSuccessors() + It's called when (FoundState->Successor) is RAW-Successor, + that is the link to position in Raw text. + So we create Context records and write the links to + FoundState->Successor and to identical RAW-Successors in suffix + contexts of MinContex. + + The function returns: + if (OrderFall == 0) then MinContext is already at MAX order, + { return pointer to new or existing context of same MAX order } + else + { return pointer to new real context that will be (Order+1) in comparison with MinContext + + also it can return pointer to real context of same order, +*/ + +MY_NO_INLINE +static CTX_PTR CreateSuccessors(CPpmd7 *p) { - CPpmd_State upState; CTX_PTR c = p->MinContext; CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); - CPpmd_State *ps[PPMD7_MAX_ORDER]; + Byte newSym, newFreq; unsigned numPs = 0; - - if (!skip) + CPpmd_State *ps[PPMD7_MAX_ORDER]; + + if (p->OrderFall != 0) ps[numPs++] = p->FoundState; while (c->Suffix) @@ -358,44 +462,70 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) CPpmd_Void_Ref successor; CPpmd_State *s; c = SUFFIX(c); + + if (c->NumStats != 1) { - for (s = STATS(c); s->Symbol != p->FoundState->Symbol; s++); + Byte sym = p->FoundState->Symbol; + for (s = STATS(c); s->Symbol != sym; s++); + } else + { s = ONE_STATE(c); + + } successor = SUCCESSOR(s); if (successor != upBranch) { + // (c) is real record Context here, c = CTX(successor); if (numPs == 0) + { + // (c) is real record MAX Order Context here, + // So we don't need to create any new contexts. return c; + } break; } ps[numPs++] = s; } - upState.Symbol = *(const Byte *)Ppmd7_GetPtr(p, upBranch); - SetSuccessor(&upState, upBranch + 1); + // All created contexts will have single-symbol with new RAW-Successor + // All new RAW-Successors will point to next position in RAW text + // after FoundState->Successor + + newSym = *(const Byte *)Ppmd7_GetPtr(p, upBranch); + upBranch++; + if (c->NumStats == 1) - upState.Freq = ONE_STATE(c)->Freq; + newFreq = ONE_STATE(c)->Freq; else { UInt32 cf, s0; CPpmd_State *s; - for (s = STATS(c); s->Symbol != upState.Symbol; s++); - cf = s->Freq - 1; - s0 = c->SummFreq - c->NumStats - cf; - upState.Freq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : ((2 * cf + 3 * s0 - 1) / (2 * s0)))); + for (s = STATS(c); s->Symbol != newSym; s++); + cf = (UInt32)s->Freq - 1; + s0 = (UInt32)c->Union2.SummFreq - c->NumStats - cf; + /* + cf - is frequency of symbol that will be Successor in new context records. + s0 - is commulative frequency sum of another symbols from parent context. + max(newFreq)= (s->Freq + 1), when (s0 == 1) + we have requirement (Ppmd7Context_OneState()->Freq <= 128) in BinSumm[] + so (s->Freq < 128) - is requirement for multi-symbol contexts + */ + newFreq = (Byte)(1 + ((2 * cf <= s0) ? (5 * cf > s0) : (2 * cf + s0 - 1) / (2 * s0) + 1)); } + // Create new single-symbol contexts from low order to high order in loop + do { - /* Create Child */ - CTX_PTR c1; /* = AllocContext(p); */ + CTX_PTR c1; + /* = AllocContext(p); */ if (p->HiUnit != p->LoUnit) - c1 = (CTX_PTR)(p->HiUnit -= UNIT_SIZE); + c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); else if (p->FreeList[0] != 0) c1 = (CTX_PTR)RemoveNode(p, 0); else @@ -404,8 +534,11 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) if (!c1) return NULL; } + c1->NumStats = 1; - *ONE_STATE(c1) = upState; + ONE_STATE(c1)->Symbol = newSym; + ONE_STATE(c1)->Freq = newFreq; + SetSuccessor(ONE_STATE(c1), upBranch); c1->Suffix = REF(c); SetSuccessor(ps[--numPs], REF(c1)); c = c1; @@ -415,21 +548,26 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p, BoolInt skip) return c; } -static void SwapStates(CPpmd_State *t1, CPpmd_State *t2) -{ - CPpmd_State tmp = *t1; - *t1 = *t2; - *t2 = tmp; -} -static void UpdateModel(CPpmd7 *p) + +#define SwapStates(s) \ + { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } + + +void Ppmd7_UpdateModel(CPpmd7 *p); +MY_NO_INLINE +void Ppmd7_UpdateModel(CPpmd7 *p) { - CPpmd_Void_Ref successor, fSuccessor = SUCCESSOR(p->FoundState); - CTX_PTR c; + CPpmd_Void_Ref maxSuccessor, minSuccessor; + CTX_PTR c, mc; unsigned s0, ns; - + + + if (p->FoundState->Freq < MAX_FREQ / 4 && p->MinContext->Suffix != 0) { + /* Update Freqs in Suffix Context */ + c = SUFFIX(p->MinContext); if (c->NumStats == 1) @@ -441,27 +579,39 @@ static void UpdateModel(CPpmd7 *p) else { CPpmd_State *s = STATS(c); - if (s->Symbol != p->FoundState->Symbol) + Byte sym = p->FoundState->Symbol; + + if (s->Symbol != sym) { - do { s++; } while (s->Symbol != p->FoundState->Symbol); + do + { + // s++; if (s->Symbol == sym) break; + s++; + } + while (s->Symbol != sym); + if (s[0].Freq >= s[-1].Freq) { - SwapStates(&s[0], &s[-1]); + SwapStates(s); s--; } } + if (s->Freq < MAX_FREQ - 9) { - s->Freq += 2; - c->SummFreq += 2; + s->Freq = (Byte)(s->Freq + 2); + c->Union2.SummFreq = (UInt16)(c->Union2.SummFreq + 2); } } } + if (p->OrderFall == 0) { - p->MinContext = p->MaxContext = CreateSuccessors(p, True); - if (p->MinContext == 0) + /* MAX ORDER context */ + /* (FoundState->Successor) is RAW-Successor. */ + p->MaxContext = p->MinContext = CreateSuccessors(p); + if (!p->MinContext) { RestartModel(p); return; @@ -469,45 +619,93 @@ static void UpdateModel(CPpmd7 *p) SetSuccessor(p->FoundState, REF(p->MinContext)); return; } + + + /* NON-MAX ORDER context */ - *p->Text++ = p->FoundState->Symbol; - successor = REF(p->Text); - if (p->Text >= p->UnitsStart) { - RestartModel(p); - return; + Byte *text = p->Text; + *text++ = p->FoundState->Symbol; + p->Text = text; + if (text >= p->UnitsStart) + { + RestartModel(p); + return; + } + maxSuccessor = REF(text); } - if (fSuccessor) + minSuccessor = SUCCESSOR(p->FoundState); + + if (minSuccessor) { - if (fSuccessor <= successor) + // there is Successor for FoundState in MinContext. + // So the next context will be one order higher than MinContext. + + if (minSuccessor <= maxSuccessor) { - CTX_PTR cs = CreateSuccessors(p, False); - if (cs == NULL) + // minSuccessor is RAW-Successor. So we will create real contexts records: + CTX_PTR cs = CreateSuccessors(p); + if (!cs) { RestartModel(p); return; } - fSuccessor = REF(cs); + minSuccessor = REF(cs); } + + // minSuccessor now is real Context pointer that points to existing (Order+1) context + if (--p->OrderFall == 0) { - successor = fSuccessor; + /* + if we move to MaxOrder context, then minSuccessor will be common Succesor for both: + MinContext that is (MaxOrder - 1) + MaxContext that is (MaxOrder) + so we don't need new RAW-Successor, and we can use real minSuccessor + as succssors for both MinContext and MaxContext. + */ + maxSuccessor = minSuccessor; + + /* + if (MaxContext != MinContext) + { + there was order fall from MaxOrder and we don't need current symbol + to transfer some RAW-Succesors to real contexts. + So we roll back pointer in raw data for one position. + } + */ p->Text -= (p->MaxContext != p->MinContext); } } else { - SetSuccessor(p->FoundState, successor); - fSuccessor = REF(p->MinContext); + /* + FoundState has NULL-Successor here. + And only root 0-order context can contain NULL-Successors. + We change Successor in FoundState to RAW-Successor, + And next context will be same 0-order root Context. + */ + SetSuccessor(p->FoundState, maxSuccessor); + minSuccessor = REF(p->MinContext); } - - s0 = p->MinContext->SummFreq - (ns = p->MinContext->NumStats) - (p->FoundState->Freq - 1); - - for (c = p->MaxContext; c != p->MinContext; c = SUFFIX(c)) + + mc = p->MinContext; + c = p->MaxContext; + + p->MaxContext = p->MinContext = CTX(minSuccessor); + + if (c == mc) + return; + + // s0 : is pure Escape Freq + s0 = mc->Union2.SummFreq - (ns = mc->NumStats) - ((unsigned)p->FoundState->Freq - 1); + + do { unsigned ns1; - UInt32 cf, sf; + UInt32 sum; + if ((ns1 = c->NumStats) != 1) { if ((ns1 & 1) == 0) @@ -527,80 +725,127 @@ static void UpdateModel(CPpmd7 *p) oldPtr = STATS(c); MyMem12Cpy(ptr, oldPtr, oldNU); InsertNode(p, oldPtr, i); - c->Stats = STATS_REF(ptr); + c->Union4.Stats = STATS_REF(ptr); } } - c->SummFreq = (UInt16)(c->SummFreq + (2 * ns1 < ns) + 2 * ((4 * ns1 <= ns) & (c->SummFreq <= 8 * ns1))); + sum = c->Union2.SummFreq; + /* max increase of Escape_Freq is 3 here. + total increase of Union2.SummFreq for all symbols is less than 256 here */ + sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)); + /* original PPMdH uses 16-bit variable for (sum) here. + But (sum < 0x9000). So we don't truncate (sum) to 16-bit */ + // sum = (UInt16)sum; } else { + // instead of One-symbol context we create 2-symbol context CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0); if (!s) { RestartModel(p); return; } - *s = *ONE_STATE(c); - c->Stats = REF(s); - if (s->Freq < MAX_FREQ / 4 - 1) - s->Freq <<= 1; - else - s->Freq = MAX_FREQ - 4; - c->SummFreq = (UInt16)(s->Freq + p->InitEsc + (ns > 3)); - } - cf = 2 * (UInt32)p->FoundState->Freq * (c->SummFreq + 6); - sf = (UInt32)s0 + c->SummFreq; - if (cf < 6 * sf) - { - cf = 1 + (cf > sf) + (cf >= 4 * sf); - c->SummFreq += 3; - } - else - { - cf = 4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); - c->SummFreq = (UInt16)(c->SummFreq + cf); + { + unsigned freq = c->Union2.State2.Freq; + // s = *ONE_STATE(c); + s->Symbol = c->Union2.State2.Symbol; + s->Successor_0 = c->Union4.State4.Successor_0; + s->Successor_1 = c->Union4.State4.Successor_1; + // SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of + // (Successor_0 and Successor_1) in LE/BE. + c->Union4.Stats = REF(s); + if (freq < MAX_FREQ / 4 - 1) + freq <<= 1; + else + freq = MAX_FREQ - 4; + // (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context + s->Freq = (Byte)freq; + // max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here + sum = freq + p->InitEsc + (ns > 3); + } } + { CPpmd_State *s = STATS(c) + ns1; - SetSuccessor(s, successor); + UInt32 cf = 2 * (sum + 6) * (UInt32)p->FoundState->Freq; + UInt32 sf = (UInt32)s0 + sum; s->Symbol = p->FoundState->Symbol; - s->Freq = (Byte)cf; c->NumStats = (UInt16)(ns1 + 1); + SetSuccessor(s, maxSuccessor); + + if (cf < 6 * sf) + { + cf = (UInt32)1 + (cf > sf) + (cf >= 4 * sf); + sum += 3; + /* It can add (0, 1, 2) to Escape_Freq */ + } + else + { + cf = (UInt32)4 + (cf >= 9 * sf) + (cf >= 12 * sf) + (cf >= 15 * sf); + sum += cf; + } + + c->Union2.SummFreq = (UInt16)sum; + s->Freq = (Byte)cf; } + c = SUFFIX(c); } - p->MaxContext = p->MinContext = CTX(fSuccessor); + while (c != mc); } + + +MY_NO_INLINE static void Rescale(CPpmd7 *p) { unsigned i, adder, sumFreq, escFreq; CPpmd_State *stats = STATS(p->MinContext); CPpmd_State *s = p->FoundState; + + /* Sort the list by Freq */ + if (s != stats) { CPpmd_State tmp = *s; - for (; s != stats; s--) + do s[0] = s[-1]; + while (--s != stats); *s = tmp; } - escFreq = p->MinContext->SummFreq - s->Freq; - s->Freq += 4; - adder = (p->OrderFall != 0); - s->Freq = (Byte)((s->Freq + adder) >> 1); + sumFreq = s->Freq; + escFreq = p->MinContext->Union2.SummFreq - sumFreq; + + /* + if (p->OrderFall == 0), adder = 0 : it's allowed to remove symbol from MAX Order context + if (p->OrderFall != 0), adder = 1 : it's NOT allowed to remove symbol from NON-MAX Order context + */ + + adder = (p->OrderFall != 0); + + #ifdef PPMD7_ORDER_0_SUPPPORT + adder |= (p->MaxOrder == 0); // we don't remove symbols from order-0 context + #endif + + sumFreq = (sumFreq + 4 + adder) >> 1; + i = (unsigned)p->MinContext->NumStats - 1; + s->Freq = (Byte)sumFreq; - i = p->MinContext->NumStats - 1; do { - escFreq -= (++s)->Freq; - s->Freq = (Byte)((s->Freq + adder) >> 1); - sumFreq += s->Freq; - if (s[0].Freq > s[-1].Freq) + unsigned freq = (++s)->Freq; + escFreq -= freq; + freq = (freq + adder) >> 1; + sumFreq += freq; + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) { + CPpmd_State tmp = *s; CPpmd_State *s1 = s; - CPpmd_State tmp = *s1; do + { s1[0] = s1[-1]; - while (--s1 != stats && tmp.Freq > s1[-1].Freq); + } + while (--s1 != stats && freq > s1[-1].Freq); *s1 = tmp; } } @@ -608,47 +853,89 @@ static void Rescale(CPpmd7 *p) if (s->Freq == 0) { - unsigned numStats = p->MinContext->NumStats; - unsigned n0, n1; - do { i++; } while ((--s)->Freq == 0); + /* Remove all items with Freq == 0 */ + CPpmd7_Context *mc; + unsigned numStats, numStatsNew, n0, n1; + + i = 0; do { i++; } while ((--s)->Freq == 0); + + /* We increase (escFreq) for the number of removed symbols. + So we will have (0.5) increase for Escape_Freq in avarage per + removed symbol after Escape_Freq halving */ escFreq += i; - p->MinContext->NumStats = (UInt16)(p->MinContext->NumStats - i); - if (p->MinContext->NumStats == 1) + mc = p->MinContext; + numStats = mc->NumStats; + numStatsNew = numStats - i; + mc->NumStats = (UInt16)(numStatsNew); + n0 = (numStats + 1) >> 1; + + if (numStatsNew == 1) { - CPpmd_State tmp = *stats; + /* Create Single-Symbol context */ + unsigned freq = stats->Freq; + do { - tmp.Freq = (Byte)(tmp.Freq - (tmp.Freq >> 1)); escFreq >>= 1; + freq = (freq + 1) >> 1; } while (escFreq > 1); - InsertNode(p, stats, U2I(((numStats + 1) >> 1))); - *(p->FoundState = ONE_STATE(p->MinContext)) = tmp; + + s = ONE_STATE(mc); + *s = *stats; + s->Freq = (Byte)freq; // (freq <= 260 / 4) + p->FoundState = s; + InsertNode(p, stats, U2I(n0)); return; } - n0 = (numStats + 1) >> 1; - n1 = (p->MinContext->NumStats + 1) >> 1; + + n1 = (numStatsNew + 1) >> 1; if (n0 != n1) - p->MinContext->Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + { + // p->MinContext->Union4.Stats = STATS_REF(ShrinkUnits(p, stats, n0, n1)); + unsigned i0 = U2I(n0); + unsigned i1 = U2I(n1); + if (i0 != i1) + { + if (p->FreeList[i1] != 0) + { + void *ptr = RemoveNode(p, i1); + p->MinContext->Union4.Stats = STATS_REF(ptr); + MyMem12Cpy(ptr, (const void *)stats, n1); + InsertNode(p, stats, i0); + } + else + SplitBlock(p, stats, i0, i1); + } + } + } + { + CPpmd7_Context *mc = p->MinContext; + mc->Union2.SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); + // Escape_Freq halving here + p->FoundState = STATS(mc); } - p->MinContext->SummFreq = (UInt16)(sumFreq + escFreq - (escFreq >> 1)); - p->FoundState = STATS(p->MinContext); } + CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) { CPpmd_See *see; - unsigned nonMasked = p->MinContext->NumStats - numMasked; - if (p->MinContext->NumStats != 256) + const CPpmd7_Context *mc = p->MinContext; + unsigned numStats = mc->NumStats; + if (numStats != 256) { - see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + - (nonMasked < (unsigned)SUFFIX(p->MinContext)->NumStats - p->MinContext->NumStats) + - 2 * (unsigned)(p->MinContext->SummFreq < 11 * p->MinContext->NumStats) + - 4 * (unsigned)(numMasked > nonMasked) + + unsigned nonMasked = numStats - numMasked; + see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]] + + (nonMasked < (unsigned)SUFFIX(mc)->NumStats - numStats) + + 2 * (unsigned)(mc->Union2.SummFreq < 11 * numStats) + + 4 * (unsigned)(numMasked > nonMasked) + p->HiBitsFlag; { - unsigned r = (see->Summ >> see->Shift); - see->Summ = (UInt16)(see->Summ - r); + // if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ + unsigned summ = (UInt16)see->Summ; // & 0xFFFF + unsigned r = (summ >> see->Shift); + see->Summ = (UInt16)(summ - r); *escFreq = r + (r == 0); } } @@ -660,53 +947,158 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq) return see; } + static void NextContext(CPpmd7 *p) { CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); - if (p->OrderFall == 0 && (Byte *)c > p->Text) - p->MinContext = p->MaxContext = c; + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; else - UpdateModel(p); + Ppmd7_UpdateModel(p); } + void Ppmd7_Update1(CPpmd7 *p) { CPpmd_State *s = p->FoundState; - s->Freq += 4; - p->MinContext->SummFreq += 4; - if (s[0].Freq > s[-1].Freq) + unsigned freq = s->Freq; + freq += 4; + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > s[-1].Freq) { - SwapStates(&s[0], &s[-1]); + SwapStates(s); p->FoundState = --s; - if (s->Freq > MAX_FREQ) + if (freq > MAX_FREQ) Rescale(p); } NextContext(p); } + void Ppmd7_Update1_0(CPpmd7 *p) { - p->PrevSuccess = (2 * p->FoundState->Freq > p->MinContext->SummFreq); - p->RunLength += p->PrevSuccess; - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) + CPpmd_State *s = p->FoundState; + CPpmd7_Context *mc = p->MinContext; + unsigned freq = s->Freq; + unsigned summFreq = mc->Union2.SummFreq; + p->PrevSuccess = (2 * freq > summFreq); + p->RunLength += (int)p->PrevSuccess; + mc->Union2.SummFreq = (UInt16)(summFreq + 4); + freq += 4; + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) Rescale(p); NextContext(p); } + +/* void Ppmd7_UpdateBin(CPpmd7 *p) { - p->FoundState->Freq = (Byte)(p->FoundState->Freq + (p->FoundState->Freq < 128 ? 1: 0)); + unsigned freq = p->FoundState->Freq; + p->FoundState->Freq = (Byte)(freq + (freq < 128)); p->PrevSuccess = 1; p->RunLength++; NextContext(p); } +*/ void Ppmd7_Update2(CPpmd7 *p) { - p->MinContext->SummFreq += 4; - if ((p->FoundState->Freq += 4) > MAX_FREQ) - Rescale(p); + CPpmd_State *s = p->FoundState; + unsigned freq = s->Freq; + freq += 4; p->RunLength = p->InitRL; - UpdateModel(p); + p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); + s->Freq = (Byte)freq; + if (freq > MAX_FREQ) + Rescale(p); + Ppmd7_UpdateModel(p); } + + + +/* +PPMd Memory Map: +{ + [ 0 ] contains subset of original raw text, that is required to create context + records, Some symbols are not written, when max order context was reached + [ Text ] free area + [ UnitsStart ] CPpmd_State vectors and CPpmd7_Context records + [ LoUnit ] free area for CPpmd_State and CPpmd7_Context items +[ HiUnit ] CPpmd7_Context records + [ Size ] end of array +} + +These addresses don't cross at any time. +And the following condtions is true for addresses: + (0 <= Text < UnitsStart <= LoUnit <= HiUnit <= Size) + +Raw text is BYTE--aligned. +the data in block [ UnitsStart ... Size ] contains 12-bytes aligned UNITs. + +Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record. +The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. +The code doesn't free UNITs allocated for CPpmd7_Context records. + +The code calls RestartModel(), when there is no free memory for allocation. +And RestartModel() changes the state to orignal start state, with full free block. + + +The code allocates UNITs with the following order: + +Allocation of 1 UNIT for Context record + - from free space (HiUnit) down to (LoUnit) + - from FreeList[0] + - AllocUnitsRare() + +AllocUnits() for CPpmd_State vectors: + - from FreeList[i] + - from free space (LoUnit) up to (HiUnit) + - AllocUnitsRare() + +AllocUnitsRare() + - if (GlueCount == 0) + { Glue lists, GlueCount = 255, allocate from FreeList[i]] } + - loop for all higher sized FreeList[...] lists + - from (UnitsStart - Text), GlueCount-- + - ERROR + + +Each Record with Context contains the CPpmd_State vector, where each +CPpmd_State contains the link to Successor. +There are 3 types of Successor: + 1) NULL-Successor - NULL pointer. NULL-Successor links can be stored + only in 0-order Root Context Record. + We use 0 value as NULL-Successor + 2) RAW-Successor - the link to position in raw text, + that "RAW-Successor" is being created after first + occurrence of new symbol for some existing context record. + (RAW-Successor > 0). + 3) RECORD-Successor - the link to CPpmd7_Context record of (Order+1), + that record is being created when we go via RAW-Successor again. + +For any successors at any time: the following condtions are true for Successor links: +(NULL-Successor < RAW-Successor < UnitsStart <= RECORD-Successor) + + +---------- Symbol Frequency, SummFreq and Range in Range_Coder ---------- + +CPpmd7_Context::SummFreq = Sum(Stats[].Freq) + Escape_Freq + +The PPMd code tries to fulfill the condition: + (SummFreq <= (256 * 128 = RC::kBot)) + +We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) +So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. +If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. +SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions. +Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for +max-order context. + +When the PPMd code still break (Total <= RC::Range) condition in range coder, +we have two ways to resolve that problem: + 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. + 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. +*/ diff --git a/libraries/lzma/C/Ppmd7.h b/libraries/lzma/C/Ppmd7.h index 610539a047..d31809aebd 100644 --- a/libraries/lzma/C/Ppmd7.h +++ b/libraries/lzma/C/Ppmd7.h @@ -1,10 +1,8 @@ -/* Ppmd7.h -- PPMdH compression codec -2018-07-04 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ - -/* This code supports virtual RangeDecoder and includes the implementation -of RangeCoder from 7z, instead of RangeCoder from original PPMd var.H. -If you need the compatibility with original PPMd var.H, you can use external RangeDecoder */ +/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #ifndef __PPMD7_H #define __PPMD7_H @@ -21,23 +19,56 @@ EXTERN_C_BEGIN struct CPpmd7_Context_; -typedef - #ifdef PPMD_32BIT - struct CPpmd7_Context_ * - #else - UInt32 - #endif - CPpmd7_Context_Ref; +typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref; + +// MY_CPU_pragma_pack_push_1 typedef struct CPpmd7_Context_ { UInt16 NumStats; - UInt16 SummFreq; - CPpmd_State_Ref Stats; + + + union + { + UInt16 SummFreq; + CPpmd_State2 State2; + } Union2; + + union + { + CPpmd_State_Ref Stats; + CPpmd_State4 State4; + } Union4; + CPpmd7_Context_Ref Suffix; } CPpmd7_Context; -#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->SummFreq) +// MY_CPU_pragma_pop + +#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2) + + + + +typedef struct +{ + UInt32 Range; + UInt32 Code; + UInt32 Low; + IByteIn *Stream; +} CPpmd7_RangeDec; + + +typedef struct +{ + UInt32 Range; + Byte Cache; + // Byte _dummy_[3]; + UInt64 Low; + UInt64 CacheSize; + IByteOut *Stream; +} CPpmd7z_RangeEnc; + typedef struct { @@ -48,17 +79,30 @@ typedef struct UInt32 Size; UInt32 GlueCount; - Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; UInt32 AlignOffset; + Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart; - Byte Indx2Units[PPMD_NUM_INDEXES]; + + + + union + { + CPpmd7_RangeDec dec; + CPpmd7z_RangeEnc enc; + } rc; + + Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment Byte Units2Indx[128]; CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES]; - Byte NS2Indx[256], NS2BSIndx[256], HB2Flag[256]; + + Byte NS2BSIndx[256], NS2Indx[256]; + Byte ExpEscape[16]; CPpmd_See DummySee, See[25][16]; UInt16 BinSumm[128][64]; + // int LastSymbol; } CPpmd7; + void Ppmd7_Construct(CPpmd7 *p); BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc); void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc); @@ -68,74 +112,69 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder); /* ---------- Internal Functions ---------- */ -extern const Byte PPMD7_kExpEscape[16]; - -#ifdef PPMD_32BIT - #define Ppmd7_GetPtr(p, ptr) (ptr) - #define Ppmd7_GetContext(p, ptr) (ptr) - #define Ppmd7_GetStats(p, ctx) ((ctx)->Stats) -#else - #define Ppmd7_GetPtr(p, offs) ((void *)((p)->Base + (offs))) - #define Ppmd7_GetContext(p, offs) ((CPpmd7_Context *)Ppmd7_GetPtr((p), (offs))) - #define Ppmd7_GetStats(p, ctx) ((CPpmd_State *)Ppmd7_GetPtr((p), ((ctx)->Stats))) -#endif +#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr) +#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context) +#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State) void Ppmd7_Update1(CPpmd7 *p); void Ppmd7_Update1_0(CPpmd7 *p); void Ppmd7_Update2(CPpmd7 *p); -void Ppmd7_UpdateBin(CPpmd7 *p); + +#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3)) +#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4)) +// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3)) +// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4)) #define Ppmd7_GetBinSumm(p) \ - &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1][p->PrevSuccess + \ - p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] + \ - (p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]) + \ - 2 * p->HB2Flag[(unsigned)Ppmd7Context_OneState(p->MinContext)->Symbol] + \ - ((p->RunLength >> 26) & 0x20)] + &p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \ + [ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \ + + p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \ + + PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \ + + (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ] CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale); +/* +We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure: + 1) Ppmd7a_*: original PPMdH + 2) Ppmd7z_*: modified PPMdH with 7z Range Coder +Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH) +*/ + /* ---------- Decode ---------- */ -typedef struct IPpmd7_RangeDec IPpmd7_RangeDec; +#define PPMD7_SYM_END (-1) +#define PPMD7_SYM_ERROR (-2) -struct IPpmd7_RangeDec -{ - UInt32 (*GetThreshold)(const IPpmd7_RangeDec *p, UInt32 total); - void (*Decode)(const IPpmd7_RangeDec *p, UInt32 start, UInt32 size); - UInt32 (*DecodeBit)(const IPpmd7_RangeDec *p, UInt32 size0); -}; +/* +You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init() -typedef struct -{ - IPpmd7_RangeDec vt; - UInt32 Range; - UInt32 Code; - IByteIn *Stream; -} CPpmd7z_RangeDec; +Ppmd7*_DecodeSymbol() +out: + >= 0 : decoded byte + -1 : PPMD7_SYM_END : End of payload marker + -2 : PPMD7_SYM_ERROR : Data error +*/ -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p); -BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p); +/* Ppmd7a_* : original PPMdH */ +BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p); +#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0) +int Ppmd7a_DecodeSymbol(CPpmd7 *p); + +/* Ppmd7z_* : modified PPMdH with 7z Range Coder */ +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p); #define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0) - -int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc); +int Ppmd7z_DecodeSymbol(CPpmd7 *p); +// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim); /* ---------- Encode ---------- */ -typedef struct -{ - UInt64 Low; - UInt32 Range; - Byte Cache; - UInt64 CacheSize; - IByteOut *Stream; -} CPpmd7z_RangeEnc; - -void Ppmd7z_RangeEnc_Init(CPpmd7z_RangeEnc *p); -void Ppmd7z_RangeEnc_FlushData(CPpmd7z_RangeEnc *p); - -void Ppmd7_EncodeSymbol(CPpmd7 *p, CPpmd7z_RangeEnc *rc, int symbol); +void Ppmd7z_Init_RangeEnc(CPpmd7 *p); +void Ppmd7z_Flush_RangeEnc(CPpmd7 *p); +// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol); +void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim); EXTERN_C_END diff --git a/libraries/lzma/C/Ppmd7Dec.c b/libraries/lzma/C/Ppmd7Dec.c index 311e9f9ddb..55d74ff9dc 100644 --- a/libraries/lzma/C/Ppmd7Dec.c +++ b/libraries/lzma/C/Ppmd7Dec.c @@ -1,6 +1,8 @@ -/* Ppmd7Dec.c -- PPMdH Decoder -2018-07-04 : Igor Pavlov : Public domain -This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ +/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder +2021-04-13 : Igor Pavlov : Public domain +This code is based on: + PPMd var.H (2001): Dmitry Shkarin : Public domain */ + #include "Precomp.h" @@ -8,184 +10,288 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ #define kTopValue (1 << 24) -BoolInt Ppmd7z_RangeDec_Init(CPpmd7z_RangeDec *p) + +#define READ_BYTE(p) IByteIn_Read((p)->Stream) + +BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p) { unsigned i; p->Code = 0; p->Range = 0xFFFFFFFF; - if (IByteIn_Read(p->Stream) != 0) + if (READ_BYTE(p) != 0) return False; for (i = 0; i < 4; i++) - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); + p->Code = (p->Code << 8) | READ_BYTE(p); return (p->Code < 0xFFFFFFFF); } -#define GET_Ppmd7z_RangeDec CPpmd7z_RangeDec *p = CONTAINER_FROM_VTBL(pp, CPpmd7z_RangeDec, vt); - -static UInt32 Range_GetThreshold(const IPpmd7_RangeDec *pp, UInt32 total) +#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \ + { (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8; + +#define RC_NORM_1(p) RC_NORM_BASE(p) } +#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} + +// we must use only one type of Normalization from two: LOCAL or REMOTE +#define RC_NORM_LOCAL(p) // RC_NORM(p) +#define RC_NORM_REMOTE(p) RC_NORM(p) + +#define R (&p->rc.dec) + +MY_FORCE_INLINE +// MY_NO_INLINE +static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size) { - GET_Ppmd7z_RangeDec - return p->Code / (p->Range /= total); + + + R->Code -= start * R->Range; + R->Range *= size; + RC_NORM_LOCAL(R) } -static void Range_Normalize(CPpmd7z_RangeDec *p) -{ - if (p->Range < kTopValue) - { - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); - p->Range <<= 8; - if (p->Range < kTopValue) - { - p->Code = (p->Code << 8) | IByteIn_Read(p->Stream); - p->Range <<= 8; - } - } -} - -static void Range_Decode(const IPpmd7_RangeDec *pp, UInt32 start, UInt32 size) -{ - GET_Ppmd7z_RangeDec - p->Code -= start * p->Range; - p->Range *= size; - Range_Normalize(p); -} - -static UInt32 Range_DecodeBit(const IPpmd7_RangeDec *pp, UInt32 size0) -{ - GET_Ppmd7z_RangeDec - UInt32 newBound = (p->Range >> 14) * size0; - UInt32 symbol; - if (p->Code < newBound) - { - symbol = 0; - p->Range = newBound; - } - else - { - symbol = 1; - p->Code -= newBound; - p->Range -= newBound; - } - Range_Normalize(p); - return symbol; -} - -void Ppmd7z_RangeDec_CreateVTable(CPpmd7z_RangeDec *p) -{ - p->vt.GetThreshold = Range_GetThreshold; - p->vt.Decode = Range_Decode; - p->vt.DecodeBit = Range_DecodeBit; -} +#define RC_Decode(start, size) RangeDec_Decode(p, start, size); +#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) +#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) -#define MASK(sym) ((signed char *)charMask)[sym] +#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) +typedef CPpmd7_Context * CTX_PTR; +#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) +void Ppmd7_UpdateModel(CPpmd7 *p); -int Ppmd7_DecodeSymbol(CPpmd7 *p, const IPpmd7_RangeDec *rc) +#define MASK(sym) ((unsigned char *)charMask)[sym] +// MY_FORCE_INLINE +// static +int Ppmd7z_DecodeSymbol(CPpmd7 *p) { size_t charMask[256 / sizeof(size_t)]; + if (p->MinContext->NumStats != 1) { CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); unsigned i; UInt32 count, hiCnt; - if ((count = rc->GetThreshold(rc, p->MinContext->SummFreq)) < (hiCnt = s->Freq)) + UInt32 summFreq = p->MinContext->Union2.SummFreq; + + + + + count = RC_GetThreshold(summFreq); + hiCnt = count; + + if ((Int32)(count -= s->Freq) < 0) { - Byte symbol; - rc->Decode(rc, 0, s->Freq); + Byte sym; + RC_DecodeFinal(0, s->Freq); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update1_0(p); - return symbol; + return sym; } + p->PrevSuccess = 0; - i = p->MinContext->NumStats - 1; + i = (unsigned)p->MinContext->NumStats - 1; + do { - if ((hiCnt += (++s)->Freq) > count) + if ((Int32)(count -= (++s)->Freq) < 0) { - Byte symbol; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); + Byte sym; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update1(p); - return symbol; + return sym; } } while (--i); - if (count >= p->MinContext->SummFreq) - return -2; - p->HiBitsFlag = p->HB2Flag[p->FoundState->Symbol]; - rc->Decode(rc, hiCnt, p->MinContext->SummFreq - hiCnt); + + if (hiCnt >= summFreq) + return PPMD7_SYM_ERROR; + + hiCnt -= count; + RC_Decode(hiCnt, summFreq - hiCnt); + + p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); PPMD_SetAllBitsIn256Bytes(charMask); - MASK(s->Symbol) = 0; - i = p->MinContext->NumStats - 1; - do { MASK((--s)->Symbol) = 0; } while (--i); + // i = p->MinContext->NumStats - 1; + // do { MASK((--s)->Symbol) = 0; } while (--i); + { + CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); + MASK(s->Symbol) = 0; + do + { + unsigned sym0 = s2[0].Symbol; + unsigned sym1 = s2[1].Symbol; + s2 += 2; + MASK(sym0) = 0; + MASK(sym1) = 0; + } + while (s2 < s); + } } else { + CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); UInt16 *prob = Ppmd7_GetBinSumm(p); - if (rc->DecodeBit(rc, *prob) == 0) + UInt32 pr = *prob; + UInt32 size0 = (R->Range >> 14) * pr; + pr = PPMD_UPDATE_PROB_1(pr); + + if (R->Code < size0) { - Byte symbol; - *prob = (UInt16)PPMD_UPDATE_PROB_0(*prob); - symbol = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; - Ppmd7_UpdateBin(p); - return symbol; + Byte sym; + *prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); + + // RangeDec_DecodeBit0(size0); + R->Range = size0; + RC_NORM_1(R) + /* we can use single byte normalization here because of + (min(BinSumm[][]) = 95) > (1 << (14 - 8)) */ + + // sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol; + // Ppmd7_UpdateBin(p); + { + unsigned freq = s->Freq; + CTX_PTR c = CTX(SUCCESSOR(s)); + sym = s->Symbol; + p->FoundState = s; + p->PrevSuccess = 1; + p->RunLength++; + s->Freq = (Byte)(freq + (freq < 128)); + // NextContext(p); + if (p->OrderFall == 0 && (const Byte *)c > p->Text) + p->MaxContext = p->MinContext = c; + else + Ppmd7_UpdateModel(p); + } + return sym; } - *prob = (UInt16)PPMD_UPDATE_PROB_1(*prob); - p->InitEsc = PPMD7_kExpEscape[*prob >> 10]; + + *prob = (UInt16)pr; + p->InitEsc = p->ExpEscape[pr >> 10]; + + // RangeDec_DecodeBit1(size0); + + R->Code -= size0; + R->Range -= size0; + RC_NORM_LOCAL(R) + PPMD_SetAllBitsIn256Bytes(charMask); MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; p->PrevSuccess = 0; } + for (;;) { - CPpmd_State *ps[256], *s; + CPpmd_State *s, *s2; UInt32 freqSum, count, hiCnt; + CPpmd_See *see; - unsigned i, num, numMasked = p->MinContext->NumStats; + CPpmd7_Context *mc; + unsigned numMasked; + RC_NORM_REMOTE(R) + mc = p->MinContext; + numMasked = mc->NumStats; + do { p->OrderFall++; - if (!p->MinContext->Suffix) - return -1; - p->MinContext = Ppmd7_GetContext(p, p->MinContext->Suffix); + if (!mc->Suffix) + return PPMD7_SYM_END; + mc = Ppmd7_GetContext(p, mc->Suffix); } - while (p->MinContext->NumStats == numMasked); - hiCnt = 0; - s = Ppmd7_GetStats(p, p->MinContext); - i = 0; - num = p->MinContext->NumStats - numMasked; - do - { - int k = (int)(MASK(s->Symbol)); - hiCnt += (s->Freq & k); - ps[i] = s++; - i -= k; - } - while (i != num); + while (mc->NumStats == numMasked); + s = Ppmd7_GetStats(p, mc); + + { + unsigned num = mc->NumStats; + unsigned num2 = num / 2; + + num &= 1; + hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num); + s += num; + p->MinContext = mc; + + do + { + unsigned sym0 = s[0].Symbol; + unsigned sym1 = s[1].Symbol; + s += 2; + hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0))); + hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1))); + } + while (--num2); + } + see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum); freqSum += hiCnt; - count = rc->GetThreshold(rc, freqSum); + + + + + count = RC_GetThreshold(freqSum); if (count < hiCnt) { - Byte symbol; - CPpmd_State **pps = ps; - for (hiCnt = 0; (hiCnt += (*pps)->Freq) <= count; pps++); - s = *pps; - rc->Decode(rc, hiCnt - s->Freq, s->Freq); + Byte sym; + + s = Ppmd7_GetStats(p, p->MinContext); + hiCnt = count; + // count -= s->Freq & (unsigned)(MASK(s->Symbol)); + // if ((Int32)count >= 0) + { + for (;;) + { + count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; + }; + } + s--; + RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); + + // new (see->Summ) value can overflow over 16-bits in some rare cases Ppmd_See_Update(see); p->FoundState = s; - symbol = s->Symbol; + sym = s->Symbol; Ppmd7_Update2(p); - return symbol; + return sym; } + if (count >= freqSum) - return -2; - rc->Decode(rc, hiCnt, freqSum - hiCnt); + return PPMD7_SYM_ERROR; + + RC_Decode(hiCnt, freqSum - hiCnt); + + // We increase (see->Summ) for sum of Freqs of all non_Masked symbols. + // new (see->Summ) value can overflow over 16-bits in some rare cases see->Summ = (UInt16)(see->Summ + freqSum); - do { MASK(ps[--i]->Symbol) = 0; } while (i != 0); + + s = Ppmd7_GetStats(p, p->MinContext); + s2 = s + p->MinContext->NumStats; + do + { + MASK(s->Symbol) = 0; + s++; + } + while (s != s2); } } + +/* +Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim) +{ + int sym = 0; + if (buf != lim) + do + { + sym = Ppmd7z_DecodeSymbol(p); + if (sym < 0) + break; + *buf = (Byte)sym; + } + while (++buf < lim); + p->LastSymbol = sym; + return buf; +} +*/ diff --git a/libraries/lzma/C/Threads.c b/libraries/lzma/C/Threads.c index 930ad271b4..7b4f5b5dc0 100644 --- a/libraries/lzma/C/Threads.c +++ b/libraries/lzma/C/Threads.c @@ -1,8 +1,10 @@ /* Threads.c -- multithreading library -2017-06-26 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #include "Precomp.h" +#ifdef _WIN32 + #ifndef UNDER_CE #include #endif @@ -29,28 +31,103 @@ WRes HandlePtr_Close(HANDLE *p) return 0; } -WRes Handle_WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); } +WRes Handle_WaitObject(HANDLE h) +{ + DWORD dw = WaitForSingleObject(h, INFINITE); + /* + (dw) result: + WAIT_OBJECT_0 // 0 + WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space + WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space + WAIT_FAILED // 0xFFFFFFFF + */ + if (dw == WAIT_FAILED) + { + dw = GetLastError(); + if (dw == 0) + return WAIT_FAILED; + } + return (WRes)dw; +} + +#define Thread_Wait(p) Handle_WaitObject(*(p)) + +WRes Thread_Wait_Close(CThread *p) +{ + WRes res = Thread_Wait(p); + WRes res2 = Thread_Close(p); + return (res != 0 ? res : res2); +} WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) { /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ - + #ifdef UNDER_CE - + DWORD threadId; *p = CreateThread(0, 0, func, param, 0, &threadId); - + #else - + unsigned threadId; - *p = (HANDLE)_beginthreadex(NULL, 0, func, param, 0, &threadId); - + *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); + #endif /* maybe we must use errno here, but probably GetLastError() is also OK. */ return HandleToWRes(*p); } + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + #ifdef UNDER_CE + + UNUSED_VAR(affinity) + return Thread_Create(p, func, param); + + #else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + *p = h; + wres = HandleToWRes(h); + if (h) + { + { + // DWORD_PTR prevMask = + SetThreadAffinityMask(h, (DWORD_PTR)affinity); + /* + if (prevMask == 0) + { + // affinity change is non-critical error, so we can ignore it + // wres = GetError(); + } + */ + } + { + DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled) { *p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL); @@ -68,10 +145,22 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEven WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) { + // negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore() *p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL); return HandleToWRes(*p); } +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + // if (Semaphore_IsCreated(p)) + { + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + } + return Semaphore_Create(p, initCount, maxCount); +} + static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount) { return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); } WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num) @@ -80,7 +169,9 @@ WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); } WRes CriticalSection_Init(CCriticalSection *p) { - /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ + /* InitializeCriticalSection() can raise exception: + Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception + Windows Vista+ : no exceptions */ #ifdef _MSC_VER __try #endif @@ -89,7 +180,361 @@ WRes CriticalSection_Init(CCriticalSection *p) /* InitializeCriticalSectionAndSpinCount(p, 0); */ } #ifdef _MSC_VER - __except (EXCEPTION_EXECUTE_HANDLER) { return 1; } + __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; } #endif return 0; } + + + + +#else // _WIN32 + +// ---------- POSIX ---------- + +#ifndef __APPLE__ +#ifndef _7ZIP_AFFINITY_DISABLE +// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET +#define _GNU_SOURCE +#endif +#endif + +#include "Threads.h" + +#include +#include +#include +#ifdef _7ZIP_AFFINITY_SUPPORTED +// #include +#endif + + +// #include +// #define PRF(p) p +#define PRF(p) + +#define Print(s) PRF(printf("\n%s\n", s)) + +// #include + +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) +{ + // new thread in Posix probably inherits affinity from parrent thread + Print("Thread_Create_With_CpuSet"); + + pthread_attr_t attr; + int ret; + // int ret2; + + p->_created = 0; + + RINOK(pthread_attr_init(&attr)); + + ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + if (!ret) + { + if (cpuSet) + { + #ifdef _7ZIP_AFFINITY_SUPPORTED + + /* + printf("\n affinity :"); + unsigned i; + for (i = 0; i < sizeof(*cpuSet) && i < 8; i++) + { + Byte b = *((const Byte *)cpuSet + i); + char temp[32]; + #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) + temp[0] = GET_HEX_CHAR((b & 0xF)); + temp[1] = GET_HEX_CHAR((b >> 4)); + // temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian + // temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian + temp[2] = 0; + printf("%s", temp); + } + printf("\n"); + */ + + // ret2 = + pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + #endif + } + + ret = pthread_create(&p->_tid, &attr, func, param); + + if (!ret) + { + p->_created = 1; + /* + if (cpuSet) + { + // ret2 = + pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet); + // if (ret2) ret = ret2; + } + */ + } + } + // ret2 = + pthread_attr_destroy(&attr); + // if (ret2 != 0) ret = ret2; + return ret; +} + + +WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) +{ + return Thread_Create_With_CpuSet(p, func, param, NULL); +} + + +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) +{ + Print("Thread_Create_WithAffinity"); + CCpuSet cs; + unsigned i; + CpuSet_Zero(&cs); + for (i = 0; i < sizeof(affinity) * 8; i++) + { + if (affinity == 0) + break; + if (affinity & 1) + { + CpuSet_Set(&cs, i); + } + affinity >>= 1; + } + return Thread_Create_With_CpuSet(p, func, param, &cs); +} + + +WRes Thread_Close(CThread *p) +{ + // Print("Thread_Close"); + int ret; + if (!p->_created) + return 0; + + ret = pthread_detach(p->_tid); + p->_tid = 0; + p->_created = 0; + return ret; +} + + +WRes Thread_Wait_Close(CThread *p) +{ + // Print("Thread_Wait_Close"); + void *thread_return; + int ret; + if (!p->_created) + return EINVAL; + + ret = pthread_join(p->_tid, &thread_return); + // probably we can't use that (_tid) after pthread_join(), so we close thread here + p->_created = 0; + p->_tid = 0; + return ret; +} + + + +static WRes Event_Create(CEvent *p, int manualReset, int signaled) +{ + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_manual_reset = manualReset; + p->_state = (signaled ? True : False); + p->_created = 1; + return 0; +} + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) + { return Event_Create(p, True, signaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) + { return ManualResetEvent_Create(p, 0); } +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) + { return Event_Create(p, False, signaled); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) + { return AutoResetEvent_Create(p, 0); } + + +WRes Event_Set(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = True; + int res1 = pthread_cond_broadcast(&p->_cond); + int res2 = pthread_mutex_unlock(&p->_mutex); + return (res2 ? res2 : res1); +} + +WRes Event_Reset(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + p->_state = False; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Wait(CEvent *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_state == False) + { + // ETIMEDOUT + // ret = + pthread_cond_wait(&p->_cond, &p->_mutex); + // if (ret != 0) break; + } + if (p->_manual_reset == False) + { + p->_state = False; + } + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Event_Close(CEvent *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + RINOK(pthread_mutex_init(&p->_mutex, NULL)); + RINOK(pthread_cond_init(&p->_cond, NULL)); + p->_count = initCount; + p->_maxCount = maxCount; + p->_created = 1; + return 0; +} + + +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (Semaphore_IsCreated(p)) + { + /* + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + */ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + // return EINVAL; // for debug + p->_count = initCount; + p->_maxCount = maxCount; + return 0; + } + return Semaphore_Create(p, initCount, maxCount); +} + + +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + UInt32 newCount; + int ret; + + if (releaseCount < 1) + return EINVAL; + + RINOK(pthread_mutex_lock(&p->_mutex)); + + newCount = p->_count + releaseCount; + if (newCount > p->_maxCount) + ret = ERROR_TOO_MANY_POSTS; // EINVAL; + else + { + p->_count = newCount; + ret = pthread_cond_broadcast(&p->_cond); + } + RINOK(pthread_mutex_unlock(&p->_mutex)); + return ret; +} + +WRes Semaphore_Wait(CSemaphore *p) +{ + RINOK(pthread_mutex_lock(&p->_mutex)); + while (p->_count < 1) + { + pthread_cond_wait(&p->_cond, &p->_mutex); + } + p->_count--; + return pthread_mutex_unlock(&p->_mutex); +} + +WRes Semaphore_Close(CSemaphore *p) +{ + if (!p->_created) + return 0; + p->_created = 0; + { + int res1 = pthread_mutex_destroy(&p->_mutex); + int res2 = pthread_cond_destroy(&p->_cond); + return (res1 ? res1 : res2); + } +} + + + +WRes CriticalSection_Init(CCriticalSection *p) +{ + // Print("CriticalSection_Init"); + if (!p) + return EINTR; + return pthread_mutex_init(&p->_mutex, NULL); +} + +void CriticalSection_Enter(CCriticalSection *p) +{ + // Print("CriticalSection_Enter"); + if (p) + { + // int ret = + pthread_mutex_lock(&p->_mutex); + } +} + +void CriticalSection_Leave(CCriticalSection *p) +{ + // Print("CriticalSection_Leave"); + if (p) + { + // int ret = + pthread_mutex_unlock(&p->_mutex); + } +} + +void CriticalSection_Delete(CCriticalSection *p) +{ + // Print("CriticalSection_Delete"); + if (p) + { + // int ret = + pthread_mutex_destroy(&p->_mutex); + } +} + +LONG InterlockedIncrement(LONG volatile *addend) +{ + // Print("InterlockedIncrement"); + #ifdef USE_HACK_UNSAFE_ATOMIC + LONG val = *addend + 1; + *addend = val; + return val; + #else + return __sync_add_and_fetch(addend, 1); + #endif +} + +#endif // _WIN32 diff --git a/libraries/lzma/C/Threads.h b/libraries/lzma/C/Threads.h index e53ace4356..9e70ecab4c 100644 --- a/libraries/lzma/C/Threads.h +++ b/libraries/lzma/C/Threads.h @@ -1,38 +1,110 @@ /* Threads.h -- multithreading library -2017-06-18 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #ifndef __7Z_THREADS_H #define __7Z_THREADS_H #ifdef _WIN32 -#include +#include +#else + +#if defined(__linux__) +#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) +#ifndef _7ZIP_AFFINITY_DISABLE +#define _7ZIP_AFFINITY_SUPPORTED +// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED") +// #define _GNU_SOURCE +#endif +#endif +#endif + +#include + #endif #include "7zTypes.h" EXTERN_C_BEGIN +#ifdef _WIN32 + WRes HandlePtr_Close(HANDLE *h); WRes Handle_WaitObject(HANDLE h); typedef HANDLE CThread; -#define Thread_Construct(p) *(p) = NULL + +#define Thread_Construct(p) { *(p) = NULL; } #define Thread_WasCreated(p) (*(p) != NULL) #define Thread_Close(p) HandlePtr_Close(p) -#define Thread_Wait(p) Handle_WaitObject(*(p)) +// #define Thread_Wait(p) Handle_WaitObject(*(p)) typedef -#ifdef UNDER_CE - DWORD + #ifdef UNDER_CE + DWORD + #else + unsigned + #endif + THREAD_FUNC_RET_TYPE; + +typedef DWORD_PTR CAffinityMask; +typedef DWORD_PTR CCpuSet; + +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } + +#else // _WIN32 + +typedef struct _CThread +{ + pthread_t _tid; + int _created; +} CThread; + +#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } +#define Thread_WasCreated(p) ((p)->_created != 0) +WRes Thread_Close(CThread *p); +// #define Thread_Wait Thread_Wait_Close + +typedef void * THREAD_FUNC_RET_TYPE; + +typedef UInt64 CAffinityMask; + +#ifdef _7ZIP_AFFINITY_SUPPORTED + +typedef cpu_set_t CCpuSet; +#define CpuSet_Zero(p) CPU_ZERO(p) +#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) +#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) + #else - unsigned + +typedef UInt64 CCpuSet; +#define CpuSet_Zero(p) { *(p) = 0; } +#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } +#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) + #endif - THREAD_FUNC_RET_TYPE; + + +#endif // _WIN32 + #define THREAD_FUNC_CALL_TYPE MY_STD_CALL #define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param); +WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity); +WRes Thread_Wait_Close(CThread *p); + +#ifdef _WIN32 +#define Thread_Create_With_CpuSet(p, func, param, cs) \ + Thread_Create_With_Affinity(p, func, param, *cs) +#else +WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); +#endif + + +#ifdef _WIN32 typedef HANDLE CEvent; typedef CEvent CAutoResetEvent; @@ -54,6 +126,7 @@ typedef HANDLE CSemaphore; #define Semaphore_Close(p) HandlePtr_Close(p) #define Semaphore_Wait(p) Handle_WaitObject(*(p)) WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); WRes Semaphore_Release1(CSemaphore *p); @@ -63,6 +136,68 @@ WRes CriticalSection_Init(CCriticalSection *p); #define CriticalSection_Enter(p) EnterCriticalSection(p) #define CriticalSection_Leave(p) LeaveCriticalSection(p) + +#else // _WIN32 + +typedef struct _CEvent +{ + int _created; + int _manual_reset; + int _state; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(p) (p)->_created = 0 +#define Event_IsCreated(p) ((p)->_created) + +WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); +WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); +WRes Event_Set(CEvent *p); +WRes Event_Reset(CEvent *p); +WRes Event_Wait(CEvent *p); +WRes Event_Close(CEvent *p); + + +typedef struct _CSemaphore +{ + int _created; + UInt32 _count; + UInt32 _maxCount; + pthread_mutex_t _mutex; + pthread_cond_t _cond; +} CSemaphore; + +#define Semaphore_Construct(p) (p)->_created = 0 +#define Semaphore_IsCreated(p) ((p)->_created) + +WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +#define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) +WRes Semaphore_Wait(CSemaphore *p); +WRes Semaphore_Close(CSemaphore *p); + + +typedef struct _CCriticalSection +{ + pthread_mutex_t _mutex; +} CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection *p); +void CriticalSection_Delete(CCriticalSection *cs); +void CriticalSection_Enter(CCriticalSection *cs); +void CriticalSection_Leave(CCriticalSection *cs); + +LONG InterlockedIncrement(LONG volatile *addend); + +#endif // _WIN32 + EXTERN_C_END #endif diff --git a/libraries/lzma/CMakeLists.txt b/libraries/lzma/CMakeLists.txt index 570265049c..627d8a43c8 100644 --- a/libraries/lzma/CMakeLists.txt +++ b/libraries/lzma/CMakeLists.txt @@ -25,7 +25,7 @@ set( LZMA_FILES C/Ppmd7Dec.c ) if( WIN32 ) - set( LZMA_FILES ${LZMA_FILES} C/LzFindMt.c C/Threads.c ) + set( LZMA_FILES ${LZMA_FILES} C/LzFindMt.c C/LzFindOpt.c C/Threads.c ) else() set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_7ZIP_ST" ) endif() diff --git a/libraries/lzma/DOC/lzma-history.txt b/libraries/lzma/DOC/lzma-history.txt index 48ee74813d..ae380a1598 100644 --- a/libraries/lzma/DOC/lzma-history.txt +++ b/libraries/lzma/DOC/lzma-history.txt @@ -1,6 +1,71 @@ HISTORY of the LZMA SDK ----------------------- +21.06 2021-11-24 +------------------------- +- Bug in LZMA encoder in file LzmaEnc.c was fixed: + LzmaEnc_MemEncode(), LzmaEncode() and LzmaCompress() could work incorrectly, + if size value for output buffer is smaller than size required for all compressed data. + LzmaEnc_Encode() could work incorrectly, + if callback ISeqOutStream::Write() doesn't write all compressed data. + NCompress::NLzma::CEncoder::Code() could work incorrectly, + if callback ISequentialOutStream::Write() returns error code. +- Bug in versions 21.00-21.05 was fixed: + 7-Zip didn't set attributes of directories during archive extracting. + + +21.04 beta 2021-11-02 +------------------------- +- 7-Zip now reduces the number of working CPU threads for compression, + if RAM size is not enough for compression with big LZMA2 dictionary. +- 7-Zip now can create and check "file.sha256" text files that contain the list + of file names and SHA-256 checksums in format compatible with sha256sum program. + + +21.03 beta 2021-07-20 +------------------------- +- The maximum dictionary size for LZMA/LZMA2 compressing was increased to 4 GB (3840 MiB). +- Minor speed optimizations in LZMA/LZMA2 compressing. + + +21.02 alpha 2021-05-06 +------------------------- +- The command line version of 7-Zip for macOS was released. +- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux + was increased by 20%-60%. + + +21.01 alpha 2021-03-09 +------------------------- +- The command line version of 7-Zip for Linux was released. +- The improvements for speed of ARM64 version using hardware CPU instructions + for AES, CRC-32, SHA-1 and SHA-256. +- Some bugs were fixed. + + +20.02 alpha 2020-08-08 +------------------------- +- The default number of LZMA2 chunks per solid block in 7z archive was increased to 64. + It allows to increase the compression speed for big 7z archives, if there is a big number + of CPU cores and threads. +- The speed of PPMd compressing/decompressing was increased for 7z archives. +- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system + to modify "Last Access Time" property of source files for archiving and hashing operations. +- Some bugs were fixed. + + +20.00 alpha 2020-02-06 +------------------------- +- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5, + that can work faster than bt4 and hc4 match finders for the data with big redundancy. +- The compression ratio was improved for Fast and Fastest compression levels with the + following default settings: + - Fastest level (-mx1) : hc5 match finder with 256 KB dictionary. + - Fast level (-mx3) : hc5 match finder with 4 MB dictionary. +- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra + compression levels. + + 19.00 2019-02-21 ------------------------- - Encryption strength for 7z archives was increased: diff --git a/libraries/lzma/DOC/lzma-sdk.txt b/libraries/lzma/DOC/lzma-sdk.txt index b0e14a2e29..6bbb90f0c9 100644 --- a/libraries/lzma/DOC/lzma-sdk.txt +++ b/libraries/lzma/DOC/lzma-sdk.txt @@ -1,4 +1,4 @@ -LZMA SDK 19.00 +LZMA SDK 21.06 -------------- LZMA SDK provides the documentation, samples, header files, @@ -62,14 +62,61 @@ LZMA SDK Contents UNIX/Linux version ------------------ -To compile C++ version of file->file LZMA encoding, go to directory -CPP/7zip/Bundles/LzmaCon -and call make to recompile it: - make -f makefile.gcc clean all +There are several otpions to compile 7-Zip with different compilers: gcc and clang. +Also 7-Zip code contains two versions for some critical parts of code: in C and in Assembeler. +So if you compile the version with Assembeler code, you will get faster 7-Zip binary. + +7-Zip's assembler code uses the following syntax for different platforms: + +1) x86 and x86-64 (AMD64): MASM syntax. + There are 2 programs that supports MASM syntax in Linux. +' 'Asmc Macro Assembler and JWasm. But JWasm now doesn't support some + cpu instructions used in 7-Zip. + So you must install Asmc Macro Assembler in Linux, if you want to compile fastest version + of 7-Zip x86 and x86-64: + https://github.com/nidud/asmc + +2) arm64: GNU assembler for ARM64 with preprocessor. + That systax of that arm64 assembler code in 7-Zip is supported by GCC and CLANG for ARM64. + +There are different binaries that can be compiled from 7-Zip source. +There are 2 main files in folder for compiling: + makefile - that can be used for compiling Windows version of 7-Zip with nmake command + makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip with make command + +At first you must change the current folder to folder that contains `makefile.gcc`: + + cd CPP/7zip/Bundles/Alone7z + +Then you can compile `makefile.gcc` with the command: + + make -j -f makefile.gcc + +Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile +7-Zip binaries with optimized code and optimzing options. + +To compile with GCC without assembler: + cd CPP/7zip/Bundles/Alone7z + make -j -f ../../cmpl_gcc.mak + +To compile with CLANG without assembler: + make -j -f ../../cmpl_clang.mak + +To compile 7-Zip for x86-64 with asmc assembler: + make -j -f ../../cmpl_gcc_x64.mak + +To compile 7-Zip for arm64 with assembler: + make -j -f ../../cmpl_gcc_arm64.mak + +To compile 7-Zip for arm64 for macOS: + make -j -f ../../cmpl_mac_arm64.mak + +Also you can change some compiler options in the mak files: + cmpl_gcc.mak + var_gcc.mak + warn_gcc.mak + -In some UNIX/Linux versions you must compile LZMA with static libraries. -To compile with static libraries, you can use -LIB = -lm -static Also you can use p7zip (port of 7-Zip for POSIX systems like Unix or Linux): From 9fac6058b766be33886e2028d88e1e298bb01d24 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sat, 27 Nov 2021 16:26:07 +0200 Subject: [PATCH 09/39] - enabled LZMA multithreading on all platforms --- libraries/lzma/CMakeLists.txt | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/libraries/lzma/CMakeLists.txt b/libraries/lzma/CMakeLists.txt index 627d8a43c8..2ef5b078fb 100644 --- a/libraries/lzma/CMakeLists.txt +++ b/libraries/lzma/CMakeLists.txt @@ -4,7 +4,9 @@ make_release_only() set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_7ZIP_PPMD_SUPPPORT" ) -set( LZMA_FILES +find_package(Threads) + +add_library( lzma STATIC C/7zArcIn.c C/7zBuf.c C/7zCrc.c @@ -18,17 +20,13 @@ set( LZMA_FILES C/CpuArch.c C/Delta.c C/LzFind.c + C/LzFindMt.c + C/LzFindOpt.c C/Lzma2Dec.c C/LzmaDec.c C/LzmaEnc.c C/Ppmd7.c - C/Ppmd7Dec.c ) - -if( WIN32 ) - set( LZMA_FILES ${LZMA_FILES} C/LzFindMt.c C/LzFindOpt.c C/Threads.c ) -else() - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_7ZIP_ST" ) -endif() - -add_library( lzma STATIC ${LZMA_FILES} ) -target_link_libraries( lzma ) + C/Ppmd7Dec.c + C/Threads.c +) +target_link_libraries( lzma Threads::Threads ) From bf1577a984928fa01c885a64fd283540a7d1d5c5 Mon Sep 17 00:00:00 2001 From: Sean Baggaley Date: Sun, 28 Nov 2021 23:40:25 +0000 Subject: [PATCH 10/39] ACS: fixed an old regression in which printing a map char array did not dereference the given array variable --- src/playsim/p_acs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/playsim/p_acs.cpp b/src/playsim/p_acs.cpp index 545bdd9c8d..215fb0e34e 100644 --- a/src/playsim/p_acs.cpp +++ b/src/playsim/p_acs.cpp @@ -8570,7 +8570,7 @@ scriptwait: int capacity, offset, a, c; if (CharArrayParms(capacity, offset, a, Stack, sp, pcd == PCD_PRINTMAPCHRANGE)) { - while (capacity-- && (c = activeBehavior->GetArrayVal (a, offset)) != '\0') + while (capacity-- && (c = activeBehavior->GetArrayVal (*activeBehavior->MapVars[a], offset)) != '\0') { work += (char)c; offset++; From bb8534ed00329f582e336a5e30e84502eda48c2a Mon Sep 17 00:00:00 2001 From: drfrag Date: Mon, 29 Nov 2021 10:57:48 +0100 Subject: [PATCH 11/39] Fix LZMA compilation with VS 32 bit. (patch by Igor Pavlov) --- libraries/lzma/C/LzFind.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/libraries/lzma/C/LzFind.c b/libraries/lzma/C/LzFind.c index bf157a0b24..1b73c28484 100644 --- a/libraries/lzma/C/LzFind.c +++ b/libraries/lzma/C/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2021-09-03 : Igor Pavlov : Public domain */ +2021-11-29 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -600,7 +600,9 @@ static #ifdef ATTRIB_SSE41 ATTRIB_SSE41 #endif -void LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) +void +MY_FAST_CALL +LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) { v128 sub2 = #ifdef MY_CPU_ARM_OR_ARM64 @@ -632,7 +634,9 @@ static #ifdef ATTRIB_AVX2 ATTRIB_AVX2 #endif -void LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) +void +MY_FAST_CALL +LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) { __m256i sub2 = _mm256_set_epi32( (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, @@ -669,7 +673,10 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; #define DEFAULT_SaturSub LzFind_SaturSub_32 MY_NO_INLINE -static void LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) +static +void +MY_FAST_CALL +LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) { do { From 2ce5b49cabbad444938a6d3b3685c0556144f129 Mon Sep 17 00:00:00 2001 From: Player701 <{ID}+{username}@users.noreply.github.com> Date: Mon, 6 Dec 2021 16:51:19 +0300 Subject: [PATCH 12/39] - Exported the "paused" global variable to ZScript --- src/g_game.cpp | 1 + wadsrc/static/zscript/engine/base.zs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/g_game.cpp b/src/g_game.cpp index dac60dbf34..f38a21b3e3 100644 --- a/src/g_game.cpp +++ b/src/g_game.cpp @@ -3140,3 +3140,4 @@ DEFINE_GLOBAL(demoplayback) DEFINE_GLOBAL(automapactive); DEFINE_GLOBAL(Net_Arbitrator); DEFINE_GLOBAL(netgame); +DEFINE_GLOBAL(paused); diff --git a/wadsrc/static/zscript/engine/base.zs b/wadsrc/static/zscript/engine/base.zs index d4b8964668..41bdb07f9d 100644 --- a/wadsrc/static/zscript/engine/base.zs +++ b/wadsrc/static/zscript/engine/base.zs @@ -183,6 +183,7 @@ struct _ native // These are the global variables, the struct is only here to av native MenuDelegateBase menuDelegate; native readonly int consoleplayer; native readonly double NotifyFontScale; + native readonly int paused; } struct System native @@ -202,7 +203,6 @@ struct System native } return false; } - } struct MusPlayingInfo native From bbd91be5d59fe9b7d199ebe95cea527bdcdb2546 Mon Sep 17 00:00:00 2001 From: Major Cooke Date: Tue, 14 Dec 2021 02:32:40 -0600 Subject: [PATCH 13/39] Added NoTrim for TEXTURES. - This can be applied either in or outside of a definition of a sprite. - Simply adding "NoTrim" inside a definition will apply it. - Syntax outside of a sprite is `NoTrim `. --- src/common/textures/formats/multipatchtexture.h | 1 + src/common/textures/gametexture.cpp | 2 +- src/common/textures/gametexture.h | 3 +++ src/common/textures/multipatchtexturebuilder.cpp | 5 +++++ src/common/textures/texturemanager.cpp | 16 ++++++++++++++++ 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/common/textures/formats/multipatchtexture.h b/src/common/textures/formats/multipatchtexture.h index 48223ec5ea..c8cb16f24c 100644 --- a/src/common/textures/formats/multipatchtexture.h +++ b/src/common/textures/formats/multipatchtexture.h @@ -120,6 +120,7 @@ struct BuildInfo bool bComplex = false; bool textual = false; bool bNoDecals = false; + bool bNoTrim = false; int LeftOffset[2] = {}; int TopOffset[2] = {}; FGameTexture *texture = nullptr; diff --git a/src/common/textures/gametexture.cpp b/src/common/textures/gametexture.cpp index c5979a7a97..8d00190817 100644 --- a/src/common/textures/gametexture.cpp +++ b/src/common/textures/gametexture.cpp @@ -311,7 +311,7 @@ void FGameTexture::SetupSpriteData() if (i == 1 && ShouldExpandSprite()) { - spi.mTrimResult = Base->TrimBorders(spi.trim); // get the trim size before adding the empty frame + spi.mTrimResult = Base->TrimBorders(spi.trim) && !GetNoTrimming(); // get the trim size before adding the empty frame spi.spriteWidth += 2; spi.spriteHeight += 2; } diff --git a/src/common/textures/gametexture.h b/src/common/textures/gametexture.h index e9851fbe32..518f262d1b 100644 --- a/src/common/textures/gametexture.h +++ b/src/common/textures/gametexture.h @@ -59,6 +59,7 @@ enum EGameTexFlags GTexf_BrightmapChecked = 128, // Check for a colormap-based brightmap was already done. GTexf_AutoMaterialsAdded = 256, // AddAutoMaterials has been called on this texture. GTexf_OffsetsNotForFont = 512, // The offsets must be ignored when using this texture in a font. + GTexf_NoTrim = 1024, // Don't perform trimming on this texture. }; // Refactoring helper to allow piece by piece adjustment of the API @@ -155,6 +156,8 @@ public: bool expandSprites() { return expandSprite == -1? ShouldExpandSprite() : !!expandSprite; } bool useWorldPanning() const { return !!(flags & GTexf_WorldPanning); } void SetWorldPanning(bool on) { if (on) flags |= GTexf_WorldPanning; else flags &= ~GTexf_WorldPanning; } + void SetNoTrimming(bool on) { if (on) flags |= GTexf_NoTrim; else flags &= ~GTexf_NoTrim; } + bool GetNoTrimming() { return !!(flags & GTexf_NoTrim); } bool allowNoDecals() const { return !!(flags & GTexf_NoDecals); } void SetNoDecals(bool on) { if (on) flags |= GTexf_NoDecals; else flags &= ~GTexf_NoDecals; } void SetOffsetsNotForFont() { flags |= GTexf_OffsetsNotForFont; } diff --git a/src/common/textures/multipatchtexturebuilder.cpp b/src/common/textures/multipatchtexturebuilder.cpp index 6a49d87ae9..cdd4e7e00f 100644 --- a/src/common/textures/multipatchtexturebuilder.cpp +++ b/src/common/textures/multipatchtexturebuilder.cpp @@ -144,6 +144,7 @@ void FMultipatchTextureBuilder::MakeTexture(BuildInfo &buildinfo, ETextureType u buildinfo.texture->SetScale((float)buildinfo.Scale.X, (float)buildinfo.Scale.Y); buildinfo.texture->SetWorldPanning(buildinfo.bWorldPanning); buildinfo.texture->SetNoDecals(buildinfo.bNoDecals); + buildinfo.texture->SetNoTrimming(buildinfo.bNoTrim); TexMan.AddGameTexture(buildinfo.texture); } @@ -669,6 +670,10 @@ void FMultipatchTextureBuilder::ParseTexture(FScanner &sc, ETextureType UseType, { buildinfo.bNoDecals = true; } + else if (sc.Compare("NoTrim")) + { + buildinfo.bNoTrim = true; + } else if (sc.Compare("Patch")) { TexPartBuild part; diff --git a/src/common/textures/texturemanager.cpp b/src/common/textures/texturemanager.cpp index 65e267b842..21ca34346b 100644 --- a/src/common/textures/texturemanager.cpp +++ b/src/common/textures/texturemanager.cpp @@ -811,6 +811,22 @@ void FTextureManager::ParseTextureDef(int lump, FMultipatchTextureBuilder &build } //else Printf("Unable to define hires texture '%s'\n", tex->Name); } + else if (sc.Compare("notrim")) + { + sc.MustGetString(); + + FTextureID id = TexMan.CheckForTexture(sc.String, ETextureType::Sprite); + if (id.isValid()) + { + FGameTexture *tex = TexMan.GetGameTexture(id); + + if (tex) tex->SetNoTrimming(true); + else sc.ScriptError("NoTrim: %s not found", sc.String); + } + else + sc.ScriptError("NoTrim: %s is not a sprite", sc.String); + + } else if (sc.Compare("texture")) { build.ParseTexture(sc, ETextureType::Override, lump); From b4c74fabd3847e35a80c072ff0aad485950faab6 Mon Sep 17 00:00:00 2001 From: Major Cooke Date: Tue, 14 Dec 2021 07:42:44 -0600 Subject: [PATCH 14/39] Added NoTrim support for ANIMDEFS, same syntax as TEXTURES outside a definition. --- src/gamedata/textures/animations.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gamedata/textures/animations.cpp b/src/gamedata/textures/animations.cpp index 55dcf060c4..dd963cc909 100644 --- a/src/gamedata/textures/animations.cpp +++ b/src/gamedata/textures/animations.cpp @@ -455,6 +455,17 @@ void FTextureAnimator::ParseAnim (FScanner &sc, ETextureType usetype) defined = 1; ani = ParseRangeAnim (sc, picnum, usetype, missing); } + else if (sc.Compare("notrim")) + { + if (picnum.isValid()) + { + auto tex = TexMan.GetGameTexture(picnum); + + if (tex) tex->SetNoTrimming(true); + else sc.ScriptError("NoTrim: %s not found", sc.String); + } + else sc.ScriptError("NoTrim: %s is not a sprite", sc.String); + } else if (sc.Compare ("pic")) { if (defined == 1) From 395b5695adf10c843b4bc6a574ee505a4da0f193 Mon Sep 17 00:00:00 2001 From: drfrag Date: Thu, 23 Dec 2021 19:28:28 +0100 Subject: [PATCH 15/39] - Fixed: ActorMover was not changing its tracer's flags. --- wadsrc/static/zscript/actors/shared/movingcamera.zs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wadsrc/static/zscript/actors/shared/movingcamera.zs b/wadsrc/static/zscript/actors/shared/movingcamera.zs index 1323700089..b77c34c37e 100644 --- a/wadsrc/static/zscript/actors/shared/movingcamera.zs +++ b/wadsrc/static/zscript/actors/shared/movingcamera.zs @@ -493,8 +493,8 @@ class ActorMover : PathFollower { LinkContext ctx; tracer.UnlinkFromWorld (ctx); - bNoBlockmap = true; - bSolid = false; + tracer.bNoBlockmap = true; + tracer.bSolid = false; tracer.LinkToWorld (ctx); } if (tracer.bIsMonster) From 2945e10a0e63f53cab044a752c7fd98b6951f328 Mon Sep 17 00:00:00 2001 From: drfrag Date: Fri, 24 Dec 2021 16:03:53 +0100 Subject: [PATCH 16/39] - More wrong tracer flags in ActorMover. --- wadsrc/static/zscript/actors/shared/movingcamera.zs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wadsrc/static/zscript/actors/shared/movingcamera.zs b/wadsrc/static/zscript/actors/shared/movingcamera.zs index b77c34c37e..8f76bd1790 100644 --- a/wadsrc/static/zscript/actors/shared/movingcamera.zs +++ b/wadsrc/static/zscript/actors/shared/movingcamera.zs @@ -488,7 +488,7 @@ class ActorMover : PathFollower Super.Activate (activator); let tracer = self.tracer; special1 = tracer.bNoGravity + (tracer.bNoBlockmap<<1) + (tracer.bSolid<<2) + (tracer.bInvulnerable<<4) + (tracer.bDormant<<8); - bNoGravity = true; + tracer.bNoGravity = true; if (args[2] & 128) { LinkContext ctx; @@ -499,8 +499,8 @@ class ActorMover : PathFollower } if (tracer.bIsMonster) { - bInvulnerable = true; - bDormant = true; + tracer.bInvulnerable = true; + tracer.bDormant = true; } // Don't let the renderer interpolate between the actor's // old position and its new position. From f02060f822d27d4f955faf8d1151132a97331f9c Mon Sep 17 00:00:00 2001 From: Player701 <{ID}+{username}@users.noreply.github.com> Date: Mon, 27 Dec 2021 14:27:35 +0300 Subject: [PATCH 17/39] - Fixed crash with Scroll_Texture_Offsets with non-zero tag --- src/maploader/specials.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/maploader/specials.cpp b/src/maploader/specials.cpp index b0a0761310..64127bbabe 100644 --- a/src/maploader/specials.cpp +++ b/src/maploader/specials.cpp @@ -1414,9 +1414,9 @@ void MapLoader::SpawnScrollers() else { auto it = Level->GetLineIdIterator(l->args[1]); - while (int ln = it.Next()) + while ((s = it.Next()) >= 0) { - Level->CreateThinker(EScroll::sc_side, dx, dy, control, nullptr, Level->lines[ln].sidedef[0], accel, SCROLLTYPE(l->args[0])); + Level->CreateThinker(EScroll::sc_side, dx, dy, control, nullptr, Level->lines[s].sidedef[0], accel, SCROLLTYPE(l->args[0])); } } break; From dae8793066266ea2a8eb4949a3dcc5a78a0bad3c Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 28 Dec 2021 16:33:42 +0100 Subject: [PATCH 18/39] - moved Chex1 widescreen assets to the proper place. --- .../filter/chex.chex1/{ => graphics}/titlepic.lmp | Bin .../filter/chex.chex1/{ => graphics}/wimap0.lmp | Bin 2 files changed, 0 insertions(+), 0 deletions(-) rename wadsrc_widepix/static/filter/chex.chex1/{ => graphics}/titlepic.lmp (100%) rename wadsrc_widepix/static/filter/chex.chex1/{ => graphics}/wimap0.lmp (100%) diff --git a/wadsrc_widepix/static/filter/chex.chex1/titlepic.lmp b/wadsrc_widepix/static/filter/chex.chex1/graphics/titlepic.lmp similarity index 100% rename from wadsrc_widepix/static/filter/chex.chex1/titlepic.lmp rename to wadsrc_widepix/static/filter/chex.chex1/graphics/titlepic.lmp diff --git a/wadsrc_widepix/static/filter/chex.chex1/wimap0.lmp b/wadsrc_widepix/static/filter/chex.chex1/graphics/wimap0.lmp similarity index 100% rename from wadsrc_widepix/static/filter/chex.chex1/wimap0.lmp rename to wadsrc_widepix/static/filter/chex.chex1/graphics/wimap0.lmp From 0947f60c3ba994db5fac9eb7235ad4d4bdce831e Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Tue, 28 Dec 2021 12:03:32 -0500 Subject: [PATCH 19/39] - move and copy SHT2E0 into the game-appropriate filter for Doom 2 games and wadsmoosh --- .../patches => doom.id.doom2/sprites}/SHT2E0.lmp | Bin .../static/filter/doom.id.doom2/textures.txt | 5 ----- .../filter/doom.id.wadsmoosh/sprites/SHT2E0.lmp | Bin 0 -> 8263 bytes .../static/filter/doom.id.wadsmoosh/textures.txt | 5 ----- 4 files changed, 10 deletions(-) rename wadsrc_widepix/static/filter/{doom.id/patches => doom.id.doom2/sprites}/SHT2E0.lmp (100%) delete mode 100644 wadsrc_widepix/static/filter/doom.id.doom2/textures.txt create mode 100644 wadsrc_widepix/static/filter/doom.id.wadsmoosh/sprites/SHT2E0.lmp delete mode 100644 wadsrc_widepix/static/filter/doom.id.wadsmoosh/textures.txt diff --git a/wadsrc_widepix/static/filter/doom.id/patches/SHT2E0.lmp b/wadsrc_widepix/static/filter/doom.id.doom2/sprites/SHT2E0.lmp similarity index 100% rename from wadsrc_widepix/static/filter/doom.id/patches/SHT2E0.lmp rename to wadsrc_widepix/static/filter/doom.id.doom2/sprites/SHT2E0.lmp diff --git a/wadsrc_widepix/static/filter/doom.id.doom2/textures.txt b/wadsrc_widepix/static/filter/doom.id.doom2/textures.txt deleted file mode 100644 index 4e2c7335ff..0000000000 --- a/wadsrc_widepix/static/filter/doom.id.doom2/textures.txt +++ /dev/null @@ -1,5 +0,0 @@ -sprite SHT2E0, 233, 63 -{ - offset 32, -105 - patch "/patches/SHT2E0.lmp", 0, 0 { } -} diff --git a/wadsrc_widepix/static/filter/doom.id.wadsmoosh/sprites/SHT2E0.lmp b/wadsrc_widepix/static/filter/doom.id.wadsmoosh/sprites/SHT2E0.lmp new file mode 100644 index 0000000000000000000000000000000000000000..b73a84fcb47843aabe3a729b1868362800ebaa62 GIT binary patch literal 8263 zcmZ{pYm8jyS;yZQdw1>Fj^o6awPVMZ*m0c1*)*43&+W{)&$*Au&6Z{BF-hvGH3iviSwMVIG@O$8&z~Q~b3BCm02K)CZ)ds%}-UbJ5RqE%!7s1!S z-u>hT{3iHoFmjtxF8FouCMX|J$_Bp%-UQ{_l`_GvfH%PR!J~KJ6Zi^v2aF$7DgnO? zUIhOPMh_|V5O^B=D)?jYb?^Z=a9F7afDfJpzX(1LUI2dq-U9y!-U0ss-UaW0_rZUH z?}GmV{|){Jd=Gp8J_O$fKL9@jAAui%{{{aCa-fbVRRU$O1MCCG!Gpj7KMy_)J`er~ zyanC?rE#Us02g$?=fIzWuY>P_V@Jsom*O=7JuG-*Z1@Kjx>!tPycnfUEjfN-q&^}N$fg~ei3<%hJF#+^*S%&xMf;a zUL9Fo4PD!Hd6gu#+0ggG&}o>6RYq4=1KVmqt&GO8W0@9uE2AM4(=g5a(%ALuHVRE{ zUmA;JvtFxN`hFBy^+w$`jC^WW9D7cqVF2Dlk>{ID!{f>1?l=xCx7M_|^gEXqrgmp? zcN95}<5`wXh?Bd0-||AMXd z&1xNY8`WAZe`GK3J)5^?tzNHG^bNdmO}k;#@IQZe9}PFp!kl%kWO*0-u zUbAVt&1TiG>Q#d@-G4BS({`!t)$4WJMT^aST*~h|6vttlqN&+zT9P-Ong;2=_XrIV z2Vq?D4Bz+Vk!`t_?UAta;}Mr11aYG8+6~CA?^t5%{CE(Aq3;KzN$wihw5kpMrI7a= zCGvo$VN5jKu3@*1r}YZ`aqd_c243Lm$NIjQcI>86l_bey*Y?mEd&T{sOSW{^**o#c za=eDcbDKh&_RuTCq>yJ$l*^e&(2RaWCs6+8(6zcjT&M5KvN-Y`8fcry&_9cmArgin zg1BXRv1l>s6rNTQ9}V0*b&}N-#=cEUSxtwQtk-#1#~(U|*Z6w!6f#~b_N5&y!;S+7 z9gUi})WD8mG@JQ}GhrBoA-X);uVpnX#H!@1uEk8-@oaK>=h+ZBiUmB^^IJyU2xIis zD6u$4@MOcz$ImAT>a=JO6|#1v?#GU4Hf=|otXXty1KGnDMApMa^tQYp_B=G(m5Nsi zq`9abe$?nkN!%R|cpjc2u9deuFJOWQp;oaBa;mv$@;gTRs2;jb8M zwT!CamQvsKsJUm8J=^9%zUNVBEV&_#Ce%D_&lRFoH@ql~v>w*JV_}6_XyT@|A&vTo zqqYmtYZ!J^N@;X))}co{8M3!LCew*hbFn-I=0aZMr)N7siv}>7Ua6GDp=P47waL^( zc$Vc59;=H`Tb>Wo4nj9{8C_8+OJmny&q;M3AfsBJFAbmkA@R{XaVEdliG;Hyg#(?KrE@MH*87Bl!3VGQ zn>1~hfp0YoH({U6yC;-WA~H<7Y1D4&Z}&uTOs;g+%_ydH&dl0YRlp6j^6ix`8rElg%uK{YI>tq`;3 zfD2u&eC(1~Wj*4&n8!{*g}X_1AhW0$l$gNs;DRg!&x?mty@Xw<0Pbt24dJw4TXlA^ zV2hoPR(#)d9O|RZGE7X%wrtIY>`2Is20<5eo|1}!UoOL@SsWI>b{Q^nreM=rk_v}} zHCSZQw{hT^bQDe8lGH|;ZAxQcL&lg*TU*Qvb~U7g?OGUz?$QrY>NBH-t`)#0XEbTS zm2|--bV!(kyN&KgN!o5l2(xMV5oa1x@He`d-8dSiBZR z`A8KugP(0)LLOWeQSg>sPv{ieVylsSa(OKwDt+ZUYaAe8lUOmmr9hmqJe@*1Kom5w z!EpgOFHEvcK3Uc5!t=tuc6gId>MWOdwRM{cFqhM|$^wGlQTKg={THp`8za5Yqcf%* zpv*40L4&?zps~FZL6!uZ1q%g*^O)c=b;A<3*d+q8!pk~mI+kRs*mII1{qKS+-Fne7ulB?0aF9p+ZK3@EwXJ7ZK>X4*9&%d^EV0Y^Dvi_ToZ8 zmPr6RE3Ru`fp*1Flu$1*FKy#G3^YkOooKO1*V$kti%bM6z?{i${ zHaJ|f2@74kXAQBDISjv;!7AreZ5_I4N+ecw~?X7BHJBNih#nPx!2G>^I;`Ni3$ER)9!(z}Jl>Me@JQ*B+{!B&yI5 z^6`Vs(J%P2U|8A#DG(3FHu>T@mvjV~;o5S5Llg^?t)f<|a5|TII#dq{Q9#(VjOVH2zHikxh{)@sHY|`RfH|%FvM>qmUm3?iEDCF8obFQ@v?H747d!UH1u4Z zv8hUh0_bl4Trrs{PJpBF&y*JM+jFOmH|OY5@-<}&`RiLjsvu= zY;sHtId-<9)zx+^7RNpTk1Wwhj8vEGX1>ZoH%Oqdj|z$Am7yai*VQx;)~*TsG%7XO zi>y$QlQ>hH)xs>3!;_}SERiE$6sM_hZh3%oV2F)@6vs)R89^ir8DnyG;9}Q-!Iu^J zp8KSNq=L_Bv<1HL;iHh3DXgVMK8)LnGtJ%$SgvCLcWL#Q1`%oThWAJV*$lHbot@u$ z9%=F{3o}RuvN9(S<`lwG!Eq+P$li8NMEL}35v5Y1Rd!(vAtxu}B*{iphU9^>mPI!; zE#y+2MEX;T!e%2UT#?g)x8E(!%2y6CX^XO`$^xp2>lV|OYG)(jXMV@sIL?+~VM z43Y~^gQSeTi)(Le@F+iXkeS1&%@5LY`Qpu+H`gw{vbM3lo}ayaV}nO&`QpVlHa0dc zu1&nU#>_i+U}NLv#_MbAZ@iA|`oy1SYin!yJ^ME{*4N&+u?~IW)%EOUdg}bG+GSWjQ!=>sF{>#xPH zy!KlDGp@5EUAg-G{PZpQ)xtvY_T)bOcS&EJg_V{3#BCzbUtV5bTI}~0xJsq``0a9U zX=Qn2WpKO0=91re2e+4&)UL77;RAjD*de(uV`}iE-(Tp+7eV>aBf}?K&lY-Z*{bxj z{&2Pph&`TVJR3iX{o%7AwY$`Ar~2u!fw_T%ke7QarFNztA35IX^j5am!e8$6RY?T+ z-YP$GqSM)~9?@O13oC4&TN3m0ow@BgmS*Ssib<=qNSYtHtK03&cKXYOot349&iwpR ziO9-j5fL*dd%f=5?6s@c=DJ-f&_QCpUlJcTWyVkT`}1>ibJu33XRghP(awA?D;3-6 zK+iaOU?bJ-^?HcR&Rpv(G2TTF#^gi7_$h34y4~K=;^HDXM|@_kk0~;(h2&T=ej1@} zZ*gfw*Sd?j+1Xy2h?ImudL{Q~p!XM7N`u-XHaF8v+2S##7%qj_k+X>PmR7cUp*!E1 z?zRg-Y>J>%>+m_O(oFmsve`D>?o1EI`GQZp6H)o@_wYTvUVlluvQ4zpmlzX$J}5*{ zdg$I>4;PE%ZwhxiOFAJ5OI%+NMfgImSEv>FP3ithSt`zvZ&R9$D?fODr?W87yiLBl zgs=REybNO*vV3_xkl)s;0|ULjgjTdv#w#r^)zJEXOkO#8SUIcXg(-Qmi#G@ci?LOC||H#LM}i`)Dj$ zlZL{>D7~<7?b^)D z^p)xPEosS0(Tf9b#8Q6bUKZ}$wJT3QedUw$$w0MO-6k!;YT@zoB7F7fr=I%cOm85p zWrxBHAaMlY{OCP}>=RF2nf)nQa`=3ab#js)J4bM+z5LYlmZ%OLj}euUirS3tTSao_ ziOWyT%zunh?KYC_{P^8N!P#z<6bak(REkjKAU`3KU~cBg%hR(9QL5z%F$5%J`GdN< zP7lR81F?aW1k6kMQ{tOtq&_Y?TA4&VGQwBXT4<0KUCDti(L}7soM!T(XpoOWM(2ZC zBRs;AIdcM$VItn&u7)0C>srq5KK`>Lc)o;{vv&>zBt?Uk5?4k>*b!Ov=h%a~gVbzg z5aWvr%RIh^4NO0lln7miF|yq6i1qX1os1c+l(dbInhNpjJk4?;_j3gtqCs8sP}YZ$aJe^yMd~ zXM;#0F00X%(b0T@1H_ffv`7%W|2{X>DA{{x|InEz(S9QMiCeeaKoCiOZ`$U7zVpc5 zEz|nOqkDeLD8G7bnM1?m?zuUvKclVltDAl&zC`>^hvegrPtWtcaAmaHedh5iSLf&X o{ZC#_&(8AW;Oy)(m#3%sA+ghW?ujS(ow47)dX=9b`=t8+0a6+Vz5oCK literal 0 HcmV?d00001 diff --git a/wadsrc_widepix/static/filter/doom.id.wadsmoosh/textures.txt b/wadsrc_widepix/static/filter/doom.id.wadsmoosh/textures.txt deleted file mode 100644 index 4e2c7335ff..0000000000 --- a/wadsrc_widepix/static/filter/doom.id.wadsmoosh/textures.txt +++ /dev/null @@ -1,5 +0,0 @@ -sprite SHT2E0, 233, 63 -{ - offset 32, -105 - patch "/patches/SHT2E0.lmp", 0, 0 { } -} From 6aea7694bc39f9d7c0f921cc9d6c579af9f8b9cd Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sat, 1 Jan 2022 15:58:40 +0200 Subject: [PATCH 20/39] - updated LZMA to 21.07 https://www.7-zip.org/sdk.html https://www.7-zip.org/a/lzma2107.7z --- libraries/lzma/C/7zTypes.h | 3 ++- libraries/lzma/C/7zVersion.h | 6 ++--- libraries/lzma/C/LzFindMt.c | 6 ++--- libraries/lzma/C/Threads.c | 10 ++++----- libraries/lzma/C/Threads.h | 35 ++++++++++++++++++++++++++--- libraries/lzma/DOC/lzma-history.txt | 9 ++++++++ libraries/lzma/DOC/lzma-sdk.txt | 2 +- 7 files changed, 55 insertions(+), 16 deletions(-) diff --git a/libraries/lzma/C/7zTypes.h b/libraries/lzma/C/7zTypes.h index 3f66a7b51f..fe4fde3ffc 100644 --- a/libraries/lzma/C/7zTypes.h +++ b/libraries/lzma/C/7zTypes.h @@ -1,5 +1,5 @@ /* 7zTypes.h -- Basic types -2021-07-13 : Igor Pavlov : Public domain */ +2021-12-25 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H @@ -105,6 +105,7 @@ typedef int WRes; // we use errno equivalents for some WIN32 errors: +#define ERROR_INVALID_PARAMETER EINVAL #define ERROR_INVALID_FUNCTION EINVAL #define ERROR_ALREADY_EXISTS EEXIST #define ERROR_FILE_EXISTS EEXIST diff --git a/libraries/lzma/C/7zVersion.h b/libraries/lzma/C/7zVersion.h index a8cbdb9c65..e9363d37be 100644 --- a/libraries/lzma/C/7zVersion.h +++ b/libraries/lzma/C/7zVersion.h @@ -1,7 +1,7 @@ #define MY_VER_MAJOR 21 -#define MY_VER_MINOR 06 +#define MY_VER_MINOR 07 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "21.06" +#define MY_VERSION_NUMBERS "21.07" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,7 +10,7 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2021-11-24" +#define MY_DATE "2021-12-26" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" diff --git a/libraries/lzma/C/LzFindMt.c b/libraries/lzma/C/LzFindMt.c index da339ebf5e..4e67fc3f2f 100644 --- a/libraries/lzma/C/LzFindMt.c +++ b/libraries/lzma/C/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2021-07-12 : Igor Pavlov : Public domain */ +2021-12-21 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -832,8 +832,8 @@ void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) -static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } -static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p) +static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } +static THREAD_FUNC_DECL BtThreadFunc2(void *p) { Byte allocaDummy[0x180]; unsigned i = 0; diff --git a/libraries/lzma/C/Threads.c b/libraries/lzma/C/Threads.c index 7b4f5b5dc0..58eb90ffa0 100644 --- a/libraries/lzma/C/Threads.c +++ b/libraries/lzma/C/Threads.c @@ -1,11 +1,11 @@ /* Threads.c -- multithreading library -2021-07-12 : Igor Pavlov : Public domain */ +2021-12-21 : Igor Pavlov : Public domain */ #include "Precomp.h" #ifdef _WIN32 -#ifndef UNDER_CE +#ifndef USE_THREADS_CreateThread #include #endif @@ -63,10 +63,10 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) { /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ - #ifdef UNDER_CE + #ifdef USE_THREADS_CreateThread DWORD threadId; - *p = CreateThread(0, 0, func, param, 0, &threadId); + *p = CreateThread(NULL, 0, func, param, 0, &threadId); #else @@ -82,7 +82,7 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) { - #ifdef UNDER_CE + #ifdef USE_THREADS_CreateThread UNUSED_VAR(affinity) return Thread_Create(p, func, param); diff --git a/libraries/lzma/C/Threads.h b/libraries/lzma/C/Threads.h index 9e70ecab4c..89ecb92be9 100644 --- a/libraries/lzma/C/Threads.h +++ b/libraries/lzma/C/Threads.h @@ -1,5 +1,5 @@ /* Threads.h -- multithreading library -2021-07-12 : Igor Pavlov : Public domain */ +2021-12-21 : Igor Pavlov : Public domain */ #ifndef __7Z_THREADS_H #define __7Z_THREADS_H @@ -38,8 +38,14 @@ typedef HANDLE CThread; #define Thread_Close(p) HandlePtr_Close(p) // #define Thread_Wait(p) Handle_WaitObject(*(p)) +#ifdef UNDER_CE + // if (USE_THREADS_CreateThread is defined), we use _beginthreadex() + // if (USE_THREADS_CreateThread is not definned), we use CreateThread() + #define USE_THREADS_CreateThread +#endif + typedef - #ifdef UNDER_CE + #ifdef USE_THREADS_CreateThread DWORD #else unsigned @@ -90,7 +96,30 @@ typedef UInt64 CCpuSet; #define THREAD_FUNC_CALL_TYPE MY_STD_CALL -#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE + +#if defined(_WIN32) && defined(__GNUC__) +/* GCC compiler for x86 32-bit uses the rule: + the stack is 16-byte aligned before CALL instruction for function calling. + But only root function main() contains instructions that + set 16-byte alignment for stack pointer. And another functions + just keep alignment, if it was set in some parent function. + + The problem: + if we create new thread in MinGW (GCC) 32-bit x86 via _beginthreadex() or CreateThread(), + the root function of thread doesn't set 16-byte alignment. + And stack frames in all child functions also will be unaligned in that case. + + Here we set (force_align_arg_pointer) attribute for root function of new thread. + Do we need (force_align_arg_pointer) also for another systems? */ + + #define THREAD_FUNC_ATTRIB_ALIGN_ARG __attribute__((force_align_arg_pointer)) + // #define THREAD_FUNC_ATTRIB_ALIGN_ARG // for debug : bad alignment in SSE functions +#else + #define THREAD_FUNC_ATTRIB_ALIGN_ARG +#endif + +#define THREAD_FUNC_DECL THREAD_FUNC_ATTRIB_ALIGN_ARG THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE + typedef THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE * THREAD_FUNC_TYPE)(void *); WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param); WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity); diff --git a/libraries/lzma/DOC/lzma-history.txt b/libraries/lzma/DOC/lzma-history.txt index ae380a1598..0963c7bf79 100644 --- a/libraries/lzma/DOC/lzma-history.txt +++ b/libraries/lzma/DOC/lzma-history.txt @@ -1,6 +1,15 @@ HISTORY of the LZMA SDK ----------------------- +21.07 2021-12-26 +------------------------- +- New switches: -spm and -im!{file_path} to exclude directories from processing + for specified paths that don't contain path separator character at the end of path. +- The sorting order of files in archives was slightly changed to be more consistent + for cases where the name of some directory is the same as the prefix part of the name + of another directory or file. + + 21.06 2021-11-24 ------------------------- - Bug in LZMA encoder in file LzmaEnc.c was fixed: diff --git a/libraries/lzma/DOC/lzma-sdk.txt b/libraries/lzma/DOC/lzma-sdk.txt index 6bbb90f0c9..9621c8d5db 100644 --- a/libraries/lzma/DOC/lzma-sdk.txt +++ b/libraries/lzma/DOC/lzma-sdk.txt @@ -1,4 +1,4 @@ -LZMA SDK 21.06 +LZMA SDK 21.07 -------------- LZMA SDK provides the documentation, samples, header files, From 1c517d19fa7fc73d43b96ba7665ca73c6a3add58 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Wed, 29 Dec 2021 10:25:31 +0100 Subject: [PATCH 21/39] - Backend update from Raze. This is mainly code cleanup from setting the compiler to a stricter warning level. --- src/common/2d/v_2ddrawer.cpp | 3 +- src/common/2d/v_draw.cpp | 17 +- src/common/audio/music/music.cpp | 8 +- src/common/audio/sound/oalsound.cpp | 1 - src/common/audio/sound/s_environment.cpp | 2 +- src/common/audio/sound/s_reverbedit.cpp | 3 - src/common/audio/sound/s_sound.cpp | 5 +- src/common/audio/sound/s_soundinternal.h | 5 +- src/common/console/c_console.cpp | 12 +- src/common/console/c_cvars.cpp | 15 +- src/common/console/c_dispatch.cpp | 13 +- src/common/engine/i_net.cpp | 2 +- src/common/engine/palettecontainer.cpp | 5 +- src/common/engine/sc_man.cpp | 4 +- src/common/engine/serializer.cpp | 26 +- src/common/engine/serializer.h | 8 +- src/common/filesystem/file_directory.cpp | 2 - src/common/filesystem/file_wad.cpp | 13 +- src/common/filesystem/filesystem.cpp | 18 +- src/common/filesystem/resourcefile.cpp | 4 +- src/common/filesystem/resourcefile.h | 1 + src/common/fonts/font.cpp | 17 +- src/common/fonts/singlelumpfont.cpp | 4 +- src/common/fonts/specialfont.cpp | 8 +- src/common/fonts/v_font.cpp | 6 +- src/common/fonts/v_text.cpp | 1 - src/common/menu/joystickmenu.cpp | 14 +- src/common/menu/menudef.cpp | 12 +- src/common/menu/savegamemanager.cpp | 4 +- src/common/models/model.cpp | 4 +- src/common/models/models_md2.cpp | 16 +- src/common/models/models_md3.cpp | 34 +-- src/common/models/models_voxel.cpp | 7 +- src/common/platform/posix/sdl/i_system.cpp | 4 +- src/common/platform/win32/base_sysfb.cpp | 2 +- src/common/platform/win32/i_crash.cpp | 8 +- src/common/platform/win32/i_dijoy.cpp | 8 - src/common/platform/win32/i_input.cpp | 2 - src/common/platform/win32/i_mouse.cpp | 1 - src/common/platform/win32/i_system.cpp | 4 +- src/common/platform/win32/st_start.cpp | 2 +- src/common/platform/win32/st_start_util.cpp | 4 +- src/common/platform/win32/win32glvideo.cpp | 2 - src/common/rendering/gl/gl_framebuffer.cpp | 1 - src/common/rendering/gl/gl_hwtexture.cpp | 2 - src/common/rendering/gl/gl_renderbuffers.cpp | 2 - src/common/rendering/gl/gl_renderstate.cpp | 1 - src/common/rendering/gl/gl_shader.cpp | 6 +- .../rendering/gles/gles_framebuffer.cpp | 2 +- src/common/rendering/gles/gles_hwtexture.cpp | 30 ++- .../rendering/gles/gles_postprocess.cpp | 1 - .../rendering/gles/gles_renderbuffers.cpp | 3 - src/common/rendering/gles/gles_renderer.cpp | 1 - .../rendering/gles/gles_renderstate.cpp | 21 +- .../hwrenderer/data/flatvertices.cpp | 1 - .../hwrenderer/data/hw_dynlightdata.h | 2 + .../hwrenderer/data/hw_shaderpatcher.cpp | 13 +- .../rendering/hwrenderer/data/hw_shadowmap.h | 1 + .../rendering/hwrenderer/data/hw_vrmodes.cpp | 6 +- src/common/rendering/hwrenderer/hw_draw2d.cpp | 1 - src/common/rendering/r_videoscale.cpp | 1 - src/common/rendering/v_video.cpp | 2 - .../vulkan/renderer/vk_renderbuffers.cpp | 12 +- .../rendering/vulkan/system/vk_device.cpp | 16 +- .../vulkan/system/vk_framebuffer.cpp | 5 +- .../vulkan/textures/vk_hwtexture.cpp | 12 +- src/common/scripting/backend/codegen.cpp | 28 +-- src/common/scripting/core/dynarrays.cpp | 2 +- src/common/scripting/core/vmdisasm.cpp | 5 +- src/common/scripting/frontend/zcc_compile.cpp | 27 +- src/common/scripting/jit/jit_runtime.cpp | 4 +- src/common/scripting/vm/vmexec.h | 25 +- src/common/scripting/vm/vmframe.cpp | 2 - src/common/statusbar/base_sbar.cpp | 16 +- src/common/textures/bitmap.cpp | 5 +- src/common/textures/formats/jpegtexture.cpp | 2 +- src/common/textures/formats/pcxtexture.cpp | 2 +- src/common/textures/gametexture.cpp | 2 +- src/common/textures/gametexture.h | 8 +- src/common/textures/image.cpp | 2 - src/common/textures/imagetexture.cpp | 2 +- .../textures/multipatchtexturebuilder.cpp | 11 +- src/common/textures/texturemanager.cpp | 13 +- src/common/thirdparty/base64.h | 2 + src/common/thirdparty/gain_analysis.cpp | 56 ++--- src/common/thirdparty/m_crc32.h | 5 - src/common/thirdparty/rapidjson/document.h | 4 +- src/common/thirdparty/strnatcmp.c | 4 - src/common/utility/basics.h | 7 + src/common/utility/cmdlib.cpp | 4 +- src/common/utility/filereadermusicinterface.h | 3 +- src/common/utility/i_time.cpp | 38 +++ src/common/utility/i_time.h | 6 + src/common/utility/memarena.cpp | 8 + src/common/utility/memarena.h | 1 + src/common/utility/palentry.h | 1 + src/common/utility/palette.cpp | 7 +- src/common/utility/tarray.h | 230 +++--------------- src/common/utility/tflags.h | 60 ++--- src/common/utility/utf8.cpp | 1 - src/common/utility/vectors.h | 9 +- src/common/utility/zstrformat.cpp | 4 +- 102 files changed, 491 insertions(+), 588 deletions(-) diff --git a/src/common/2d/v_2ddrawer.cpp b/src/common/2d/v_2ddrawer.cpp index 1827123499..443acb2937 100644 --- a/src/common/2d/v_2ddrawer.cpp +++ b/src/common/2d/v_2ddrawer.cpp @@ -749,7 +749,6 @@ void F2DDrawer::AddPoly(FGameTexture *texture, FVector2 *points, int npoints, void F2DDrawer::AddPoly(FGameTexture* img, FVector4* vt, size_t vtcount, const unsigned int* ind, size_t idxcount, int translation, PalEntry color, FRenderStyle style, int clipx1, int clipy1, int clipx2, int clipy2) { RenderCommand dg; - int method = 0; if (!img || !img->isValid()) return; @@ -835,13 +834,13 @@ void F2DDrawer::AddFlatFill(int left, int top, int right, int bottom, FGameTextu dg.mFlags = DTF_Wrap; float fs = 1.f / float(flatscale); - bool flipc = false; float sw = GetClassicFlatScalarWidth(); float sh = GetClassicFlatScalarHeight(); switch (local_origin) { + default: case 0: fU1 = float(left) / (float)src->GetDisplayWidth() * fs; fV1 = float(top) / (float)src->GetDisplayHeight() * fs; diff --git a/src/common/2d/v_draw.cpp b/src/common/2d/v_draw.cpp index 4a99b8cac0..a7d213cb43 100644 --- a/src/common/2d/v_draw.cpp +++ b/src/common/2d/v_draw.cpp @@ -144,7 +144,7 @@ int GetUIScale(F2DDrawer *drawer, int altval) // Default should try to scale to 640x400 int vscale = drawer->GetHeight() / 400; int hscale = drawer->GetWidth() / 640; - scaleval = clamp(vscale, 1, hscale); + scaleval = max(1, min(vscale, hscale)); } else scaleval = uiscale; @@ -165,7 +165,7 @@ int GetConScale(F2DDrawer* drawer, int altval) // Default should try to scale to 640x400 int vscale = drawer->GetHeight() / 800; int hscale = drawer->GetWidth() / 1280; - scaleval = clamp(vscale, 1, hscale); + scaleval = max(1, min(vscale, hscale)); } else scaleval = (uiscale+1) / 2; @@ -671,7 +671,6 @@ bool ParseDrawTextureTags(F2DDrawer *drawer, FGameTexture *img, double x, double { INTBOOL boolval; int intval; - bool translationset = false; bool fillcolorset = false; if (!fortext) @@ -1016,10 +1015,16 @@ bool ParseDrawTextureTags(F2DDrawer *drawer, FGameTexture *img, double x, double case DTA_CenterOffsetRel: assert(fortext == false); if (fortext) return false; - if (ListGetInt(tags)) + intval = ListGetInt(tags); + if (intval == 1) { - parms->left = img->GetDisplayLeftOffset() + img->GetDisplayWidth() * 0.5; - parms->top = img->GetDisplayTopOffset() + img->GetDisplayHeight() * 0.5; + parms->left = img->GetDisplayLeftOffset() + (img->GetDisplayWidth() * 0.5); + parms->top = img->GetDisplayTopOffset() + (img->GetDisplayHeight() * 0.5); + } + else if (intval == 2) + { + parms->left = img->GetDisplayLeftOffset() + floor(img->GetDisplayWidth() * 0.5); + parms->top = img->GetDisplayTopOffset() + floor(img->GetDisplayHeight() * 0.5); } break; diff --git a/src/common/audio/music/music.cpp b/src/common/audio/music/music.cpp index 1b36041da4..d8766ae900 100644 --- a/src/common/audio/music/music.cpp +++ b/src/common/audio/music/music.cpp @@ -603,13 +603,13 @@ static void CheckReplayGain(const char *musicname, EMidiDevice playertype, const { float* sbuf = (float*)readbuffer.Data(); int numsamples = fmt.mBufferSize / 8; - auto index = lbuffer.Reserve(numsamples); + auto addr = lbuffer.Reserve(numsamples); rbuffer.Reserve(numsamples); for (int i = 0; i < numsamples; i++) { - lbuffer[index + i] = sbuf[i * 2] * 32768.f; - rbuffer[index + i] = sbuf[i * 2 + 1] * 32768.f; + lbuffer[addr + i] = sbuf[i * 2] * 32768.f; + rbuffer[addr + i] = sbuf[i * 2 + 1] * 32768.f; } } float accTime = lbuffer.Size() / (float)fmt.mSampleRate; @@ -684,8 +684,6 @@ bool S_ChangeMusic(const char* musicname, int order, bool looping, bool force) return true; } - int lumpnum = -1; - int length = 0; ZMusic_MusicStream handle = nullptr; MidiDeviceSetting* devp = MidiDevices.CheckKey(musicname); diff --git a/src/common/audio/sound/oalsound.cpp b/src/common/audio/sound/oalsound.cpp index 4ef36cb4d8..f3c46746dc 100644 --- a/src/common/audio/sound/oalsound.cpp +++ b/src/common/audio/sound/oalsound.cpp @@ -329,7 +329,6 @@ public: virtual FString GetStats() { FString stats; - size_t pos = 0, len = 0; ALfloat volume; ALint offset; ALint processed; diff --git a/src/common/audio/sound/s_environment.cpp b/src/common/audio/sound/s_environment.cpp index c5f1a5cf7f..7a2dbd9a50 100644 --- a/src/common/audio/sound/s_environment.cpp +++ b/src/common/audio/sound/s_environment.cpp @@ -530,7 +530,7 @@ void S_ReadReverbDef (FScanner &sc) { const ReverbContainer *def; ReverbContainer *newenv; - REVERB_PROPERTIES props; + REVERB_PROPERTIES props = {}; char *name; int id1, id2, i, j; bool inited[NUM_REVERB_FIELDS]; diff --git a/src/common/audio/sound/s_reverbedit.cpp b/src/common/audio/sound/s_reverbedit.cpp index 398d4f2f59..9787ffa8cb 100644 --- a/src/common/audio/sound/s_reverbedit.cpp +++ b/src/common/audio/sound/s_reverbedit.cpp @@ -309,7 +309,6 @@ DEFINE_ACTION_FUNCTION(DReverbEdit, GetValue) } } ACTION_RETURN_FLOAT(v); - return 1; } DEFINE_ACTION_FUNCTION(DReverbEdit, SetValue) @@ -337,14 +336,12 @@ DEFINE_ACTION_FUNCTION(DReverbEdit, SetValue) } ACTION_RETURN_FLOAT(v); - return 1; } DEFINE_ACTION_FUNCTION(DReverbEdit, GrayCheck) { PARAM_PROLOGUE; ACTION_RETURN_BOOL(CurrentEnv->Builtin); - return 1; } DEFINE_ACTION_FUNCTION(DReverbEdit, GetSelectedEnvironment) diff --git a/src/common/audio/sound/s_sound.cpp b/src/common/audio/sound/s_sound.cpp index 46119a3222..5201891c00 100644 --- a/src/common/audio/sound/s_sound.cpp +++ b/src/common/audio/sound/s_sound.cpp @@ -164,7 +164,6 @@ void SoundEngine::CacheSound (sfxinfo_t *sfx) { if (GSnd && !sfx->bTentative) { - sfxinfo_t *orig = sfx; while (!sfx->bRandomHeader && sfx->link != sfxinfo_t::NO_LINK) { sfx = &S_sfx[sfx->link]; @@ -1085,13 +1084,14 @@ void SoundEngine::SetPitch(FSoundChan *chan, float pitch) // Is a sound being played by a specific emitter? //========================================================================== -int SoundEngine::GetSoundPlayingInfo (int sourcetype, const void *source, int sound_id) +int SoundEngine::GetSoundPlayingInfo (int sourcetype, const void *source, int sound_id, int chann) { int count = 0; if (sound_id > 0) { for (FSoundChan *chan = Channels; chan != NULL; chan = chan->NextChan) { + if (chann != -1 && chann != chan->EntChannel) continue; if (chan->OrgID == sound_id && (sourcetype == SOURCE_Any || (chan->SourceType == sourcetype && chan->Source == source))) @@ -1104,6 +1104,7 @@ int SoundEngine::GetSoundPlayingInfo (int sourcetype, const void *source, int so { for (FSoundChan* chan = Channels; chan != NULL; chan = chan->NextChan) { + if (chann != -1 && chann != chan->EntChannel) continue; if ((sourcetype == SOURCE_Any || (chan->SourceType == sourcetype && chan->Source == source))) { count++; diff --git a/src/common/audio/sound/s_soundinternal.h b/src/common/audio/sound/s_soundinternal.h index d35cb524a5..13ca33e7a4 100644 --- a/src/common/audio/sound/s_soundinternal.h +++ b/src/common/audio/sound/s_soundinternal.h @@ -249,9 +249,6 @@ public: blockNewSounds = on; } - virtual int SoundSourceIndex(FSoundChan* chan) { return 0; } - virtual void SetSource(FSoundChan* chan, int index) {} - virtual void StopChannel(FSoundChan* chan); sfxinfo_t* LoadSound(sfxinfo_t* sfx); const sfxinfo_t* GetSfx(unsigned snd) @@ -303,7 +300,7 @@ public: bool IsSourcePlayingSomething(int sourcetype, const void* actor, int channel, int sound_id = -1); // Stop and resume music, during game PAUSE. - int GetSoundPlayingInfo(int sourcetype, const void* source, int sound_id); + int GetSoundPlayingInfo(int sourcetype, const void* source, int sound_id, int chan = -1); void UnloadAllSounds(); void Reset(); void MarkUsed(int num); diff --git a/src/common/console/c_console.cpp b/src/common/console/c_console.cpp index b40708808e..8d4f8b44ab 100644 --- a/src/common/console/c_console.cpp +++ b/src/common/console/c_console.cpp @@ -331,16 +331,16 @@ void C_DeinitConsole () // at runtime.) for (size_t i = 0; i < countof(Commands); ++i) { - FConsoleCommand *cmd = Commands[i]; + FConsoleCommand *command = Commands[i]; - while (cmd != NULL) + while (command != NULL) { - FConsoleCommand *next = cmd->m_Next; - if (cmd->IsAlias()) + FConsoleCommand *nextcmd = command->m_Next; + if (command->IsAlias()) { - delete cmd; + delete command; } - cmd = next; + command = nextcmd; } } diff --git a/src/common/console/c_cvars.cpp b/src/common/console/c_cvars.cpp index 7e5bd94ba3..6510d561ef 100644 --- a/src/common/console/c_cvars.cpp +++ b/src/common/console/c_cvars.cpp @@ -333,6 +333,7 @@ UCVarValue FBaseCVar::FromBool (bool value, ECVarType type) break; default: + ret.Int = 0; break; } @@ -363,6 +364,7 @@ UCVarValue FBaseCVar::FromInt (int value, ECVarType type) break; default: + ret.Int = 0; break; } @@ -395,6 +397,7 @@ UCVarValue FBaseCVar::FromFloat (float value, ECVarType type) break; default: + ret.Int = 0; break; } @@ -456,6 +459,7 @@ UCVarValue FBaseCVar::FromString (const char *value, ECVarType type) break; default: + ret.Int = 0; break; } @@ -1426,12 +1430,12 @@ void C_ArchiveCVars (FConfigFile *f, uint32_t filter) cvar = cvar->m_Next; } qsort(cvarlist.Data(), cvarlist.Size(), sizeof(FBaseCVar*), cvarcmp); - for (auto cvar : cvarlist) + for (auto cv : cvarlist) { - const char* const value = (cvar->Flags & CVAR_ISDEFAULT) - ? cvar->GetGenericRep(CVAR_String).String - : cvar->SafeValue.GetChars(); - f->SetValueForKey(cvar->GetName(), value); + const char* const value = (cv->Flags & CVAR_ISDEFAULT) + ? cv->GetGenericRep(CVAR_String).String + : cv->SafeValue.GetChars(); + f->SetValueForKey(cv->GetName(), value); } } @@ -1643,7 +1647,6 @@ CCMD (archivecvar) void C_ListCVarsWithoutDescription() { FBaseCVar* var = CVars; - int count = 0; while (var) { diff --git a/src/common/console/c_dispatch.cpp b/src/common/console/c_dispatch.cpp index 004c5be3ef..76e0f19e21 100644 --- a/src/common/console/c_dispatch.cpp +++ b/src/common/console/c_dispatch.cpp @@ -282,8 +282,8 @@ void C_DoCommand (const char *cmd, int keynum) } else { - auto cmd = new FStoredCommand(com, beg); - delayedCommandQueue.AddCommand(cmd); + auto command = new FStoredCommand(com, beg); + delayedCommandQueue.AddCommand(command); } } } @@ -373,8 +373,8 @@ void AddCommandString (const char *text, int keynum) // Note that deferred commands lose track of which key // (if any) they were pressed from. *brkpt = ';'; - auto cmd = new FWaitingCommand(brkpt, tics, UnsafeExecutionContext); - delayedCommandQueue.AddCommand(cmd); + auto command = new FWaitingCommand(brkpt, tics, UnsafeExecutionContext); + delayedCommandQueue.AddCommand(command); } return; } @@ -851,8 +851,8 @@ CCMD (key) for (i = 1; i < argv.argc(); ++i) { - unsigned int key = MakeKey (argv[i]); - Printf (" 0x%08x\n", key); + unsigned int hash = MakeKey (argv[i]); + Printf (" 0x%08x\n", hash); } } } @@ -1014,7 +1014,6 @@ void FExecList::AddPullins(TArray &wads, FConfigFile *config) const FExecList *C_ParseExecFile(const char *file, FExecList *exec) { char cmd[4096]; - int retval = 0; FileReader fr; diff --git a/src/common/engine/i_net.cpp b/src/common/engine/i_net.cpp index 64fd4acaf8..40bedd5628 100644 --- a/src/common/engine/i_net.cpp +++ b/src/common/engine/i_net.cpp @@ -736,7 +736,7 @@ bool HostGame (int i) { // If we send the packets eight times to each guest, // hopefully at least one of them will get through. - for (int i = 8; i != 0; --i) + for (int ii = 8; ii != 0; --ii) { PreSend (&packet, 2, &sendaddress[node]); } diff --git a/src/common/engine/palettecontainer.cpp b/src/common/engine/palettecontainer.cpp index 73fb2bdb90..9648f2528c 100644 --- a/src/common/engine/palettecontainer.cpp +++ b/src/common/engine/palettecontainer.cpp @@ -234,7 +234,7 @@ void PaletteContainer::UpdateTranslation(int trans, FRemapTable* remap) int PaletteContainer::AddTranslation(int slot, FRemapTable* remap, int count) { - uint32_t id; + uint32_t id = 0; for (int i = 0; i < count; i++) { auto newremap = AddRemap(&remap[i]); @@ -265,7 +265,7 @@ FRemapTable *PaletteContainer::TranslationToTable(int translation) unsigned int type = GetTranslationType(translation); unsigned int index = GetTranslationIndex(translation); - if (type < 0 || type >= TranslationTables.Size() || index >= NumTranslations(type)) + if (type >= TranslationTables.Size() || index >= NumTranslations(type)) { return uniqueRemaps[0]; // this is the identity table. } @@ -649,7 +649,6 @@ bool FRemapTable::AddTint(int start, int end, int r, int g, int b, int amount) bool FRemapTable::AddToTranslation(const char *range) { int start,end; - bool desaturated = false; FScanner sc; sc.OpenMem("translation", range, int(strlen(range))); diff --git a/src/common/engine/sc_man.cpp b/src/common/engine/sc_man.cpp index 29b0b445b7..68bc8542dc 100644 --- a/src/common/engine/sc_man.cpp +++ b/src/common/engine/sc_man.cpp @@ -1131,9 +1131,7 @@ FString FScanner::TokenName (int token, const char *string) } else { - FString work; - work.Format ("Unknown(%d)", token); - return work; + work.Format("Unknown(%d)", token); } return work; } diff --git a/src/common/engine/serializer.cpp b/src/common/engine/serializer.cpp index 505bd044db..f123fb42de 100644 --- a/src/common/engine/serializer.cpp +++ b/src/common/engine/serializer.cpp @@ -290,6 +290,28 @@ bool FSerializer::BeginObject(const char *name) // //========================================================================== +bool FSerializer::HasObject(const char* name) +{ + if (isReading()) + { + auto val = r->FindKey(name); + if (val != nullptr) + { + if (val->IsObject()) + { + return true; + } + } + } + return false; +} + +//========================================================================== +// +// +// +//========================================================================== + void FSerializer::EndObject() { if (isWriting()) @@ -619,7 +641,6 @@ void FSerializer::ReadObjects(bool hubtravel) if (BeginObject(nullptr)) { FString clsname; // do not deserialize the class type directly so that we can print appropriate errors. - int pindex = -1; Serialize(*this, "classtype", clsname, nullptr); PClass *cls = PClass::FindClass(clsname); @@ -643,6 +664,7 @@ void FSerializer::ReadObjects(bool hubtravel) if (!founderrors) { // Reset to start; + unsigned size = r->mObjects.Size(); r->mObjects.Last().mIndex = 0; for (unsigned i = 0; i < r->mDObjects.Size(); i++) @@ -652,7 +674,6 @@ void FSerializer::ReadObjects(bool hubtravel) { if (obj != nullptr) { - int pindex = -1; try { obj->SerializeUserVars(*this); @@ -660,6 +681,7 @@ void FSerializer::ReadObjects(bool hubtravel) } catch (CRecoverableError &err) { + r->mObjects.Clamp(size); // close all inner objects. // In case something in here throws an error, let's continue and deal with it later. Printf(TEXTCOLOR_RED "'%s'\n while restoring %s\n", err.GetMessage(), obj ? obj->GetClass()->TypeName.GetChars() : "invalid object"); mErrors++; diff --git a/src/common/engine/serializer.h b/src/common/engine/serializer.h index 400a6f4aa9..4797ca20df 100644 --- a/src/common/engine/serializer.h +++ b/src/common/engine/serializer.h @@ -85,6 +85,7 @@ public: void ReadObjects(bool hubtravel); bool BeginObject(const char *name); void EndObject(); + bool HasObject(const char* name); bool BeginArray(const char *name); void EndArray(); unsigned GetSize(const char *group); @@ -234,7 +235,7 @@ FSerializer &Serialize(FSerializer &arc, const char *key, FSoundID &sid, FSoundI FSerializer &Serialize(FSerializer &arc, const char *key, FString &sid, FString *def); FSerializer &Serialize(FSerializer &arc, const char *key, NumericValue &sid, NumericValue *def); -template +template >*/> FSerializer &Serialize(FSerializer &arc, const char *key, T *&value, T **) { DObject *v = static_cast(value); @@ -301,6 +302,11 @@ FSerializer& Serialize(FSerializer& arc, const char* key, FixedBitArray& v return arc.SerializeMemory(key, value.Storage(), value.StorageSize()); } +inline FSerializer& Serialize(FSerializer& arc, const char* key, BitArray& value, BitArray* def) +{ + return arc.SerializeMemory(key, value.Storage().Data(), value.Storage().Size()); +} + template<> FSerializer& Serialize(FSerializer& arc, const char* key, PClass*& clst, PClass** def); template<> FSerializer& Serialize(FSerializer& arc, const char* key, FFont*& font, FFont** def); template<> FSerializer &Serialize(FSerializer &arc, const char *key, Dictionary *&dict, Dictionary **def); diff --git a/src/common/filesystem/file_directory.cpp b/src/common/filesystem/file_directory.cpp index 8c7b578763..9d215784dd 100644 --- a/src/common/filesystem/file_directory.cpp +++ b/src/common/filesystem/file_directory.cpp @@ -41,8 +41,6 @@ #include "printf.h" #include "findfile.h" - - //========================================================================== // // Zip Lump diff --git a/src/common/filesystem/file_wad.cpp b/src/common/filesystem/file_wad.cpp index 054b0721a7..216fab58b0 100644 --- a/src/common/filesystem/file_wad.cpp +++ b/src/common/filesystem/file_wad.cpp @@ -308,14 +308,14 @@ void FWadFile::SetNamespace(const char *startmarker, const char *endmarker, name // We have found no F_START but one or more F_END markers. // mark all lumps before the last F_END marker as potential flats. unsigned int end = markers[markers.Size()-1].index; - for(unsigned int i = 0; i < end; i++) + for(unsigned int ii = 0; ii < end; ii++) { - if (Lumps[i].LumpSize == 4096) + if (Lumps[ii].LumpSize == 4096) { // We can't add this to the flats namespace but // it needs to be flagged for the texture manager. - DPrintf(DMSG_NOTIFY, "Marking %s as potential flat\n", Lumps[i].getName()); - Lumps[i].Flags |= LUMPF_MAYBEFLAT; + DPrintf(DMSG_NOTIFY, "Marking %s as potential flat\n", Lumps[ii].getName()); + Lumps[ii].Flags |= LUMPF_MAYBEFLAT; } } } @@ -428,18 +428,19 @@ void FWadFile::SkinHack () namespc++; } } + // needless to say, this check is entirely useless these days as map names can be more diverse.. if ((lump->getName()[0] == 'M' && lump->getName()[1] == 'A' && lump->getName()[2] == 'P' && lump->getName()[3] >= '0' && lump->getName()[3] <= '9' && lump->getName()[4] >= '0' && lump->getName()[4] <= '9' && - lump->getName()[5] >= '\0') + lump->getName()[5] == '\0') || (lump->getName()[0] == 'E' && lump->getName()[1] >= '0' && lump->getName()[1] <= '9' && lump->getName()[2] == 'M' && lump->getName()[3] >= '0' && lump->getName()[3] <= '9' && - lump->getName()[4] >= '\0')) + lump->getName()[4] == '\0')) { hasmap = true; } diff --git a/src/common/filesystem/filesystem.cpp b/src/common/filesystem/filesystem.cpp index 47932334d8..6151925a08 100644 --- a/src/common/filesystem/filesystem.cpp +++ b/src/common/filesystem/filesystem.cpp @@ -218,7 +218,6 @@ void FileSystem::InitMultipleFiles (TArray &filenames, bool quiet, Lump for(unsigned i=0;i