cache1d changes, also implements lz4compressionlevel cvar to control the speed vs side trade-off for savegame compression

git-svn-id: https://svn.eduke32.com/eduke32@6807 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
terminx 2018-04-05 04:39:30 +00:00
parent 34525aa853
commit 1b2ab0edad
5 changed files with 108 additions and 101 deletions

View file

@ -47,7 +47,12 @@ void kclose(int32_t handle);
void krename(int32_t crcval, int32_t filenum, const char *newname); void krename(int32_t crcval, int32_t filenum, const char *newname);
char const * kfileparent(int32_t handle); char const * kfileparent(int32_t handle);
typedef struct { intptr_t *hand; int32_t leng; char *lock ; } cactype; typedef struct
{
intptr_t *hand;
int32_t leng;
char * lock;
} cactype;
enum { enum {
CACHE1D_FIND_FILE = 1, CACHE1D_FIND_FILE = 1,
@ -72,6 +77,7 @@ typedef struct _CACHE1D_FIND_REC {
void klistfree(CACHE1D_FIND_REC *rec); void klistfree(CACHE1D_FIND_REC *rec);
CACHE1D_FIND_REC *klistpath(const char *path, const char *mask, int32_t type); CACHE1D_FIND_REC *klistpath(const char *path, const char *mask, int32_t type);
extern int32_t lz4CompressionLevel;
int32_t kdfread(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil); int32_t kdfread(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil);
int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil); int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil);
#if 0 #if 0

View file

@ -7,6 +7,7 @@
#include "a.h" #include "a.h"
#include "polymost.h" #include "polymost.h"
#include "cache1d.h"
// input // input
char inputdevices=0; char inputdevices=0;
@ -529,6 +530,7 @@ int32_t baselayer_init(void)
#endif #endif
static osdcvardata_t cvars_engine[] = static osdcvardata_t cvars_engine[] =
{ {
{ "lz4compressionlevel","adjust LZ4 compression level used for savegames",(void *) &lz4CompressionLevel, CVAR_INT, 1, 32 },
{ "r_usenewaspect","enable/disable new screen aspect ratio determination code",(void *) &r_usenewaspect, CVAR_BOOL, 0, 1 }, { "r_usenewaspect","enable/disable new screen aspect ratio determination code",(void *) &r_usenewaspect, CVAR_BOOL, 0, 1 },
{ "r_screenaspect","if using r_usenewaspect and in fullscreen, screen aspect ratio in the form XXYY, e.g. 1609 for 16:9", { "r_screenaspect","if using r_usenewaspect and in fullscreen, screen aspect ratio in the form XXYY, e.g. 1609 for 16:9",
(void *) &r_screenxy, SCREENASPECT_CVAR_TYPE, 0, 9999 }, (void *) &r_screenxy, SCREENASPECT_CVAR_TYPE, 0, 9999 },

View file

@ -110,7 +110,6 @@ int32_t kpzbufload(char const * const filnam)
static int32_t cachesize = 0; static int32_t cachesize = 0;
static char zerochar = 0; static char zerochar = 0;
static intptr_t cachestart = 0; static intptr_t cachestart = 0;
static int32_t agecount = 0;
static int32_t lockrecip[200]; static int32_t lockrecip[200];
int32_t cacnum = 0; int32_t cacnum = 0;
@ -148,19 +147,11 @@ void initcache(intptr_t dacachestart, int32_t dacachesize)
for (i=1; i<200; i++) for (i=1; i<200; i++)
lockrecip[i] = tabledivide32_noinline(1<<28, 200-i); lockrecip[i] = tabledivide32_noinline(1<<28, 200-i);
// The following code was relocated here from engine.c, since this // we allocate this block with aligned_alloc, but I'm leaving this here in
// function is only ever called once (from there), and it seems to // case we run on any platforms that just implement it as regular malloc
// really belong here:
//
// initcache((FP_OFF(pic)+15)&0xfffffff0,(cachesize-((-FP_OFF(pic))&15))&0xfffffff0);
//
// I'm not sure why it's necessary, but the code is making sure the
// cache starts on a multiple of 16 bytes? -- SA
//printf("BEFORE: cachestart = %x, cachesize = %d\n", dacachestart, dacachesize);
cachestart = ((uintptr_t)dacachestart+15)&~(uintptr_t)0xf; cachestart = ((uintptr_t)dacachestart+15)&~(uintptr_t)0xf;
cachesize = (dacachesize-(((uintptr_t)(dacachestart))&0xf))&~(uintptr_t)0xf; cachesize = (dacachesize-(((uintptr_t)(dacachestart))&0xf))&~(uintptr_t)0xf;
//printf("AFTER : cachestart = %x, cachesize = %d\n", cachestart, cachesize);
cac[0].leng = cachesize; cac[0].leng = cachesize;
cac[0].lock = &zerochar; cac[0].lock = &zerochar;
@ -183,45 +174,40 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
#else #else
static inline void inc_and_check_cacnum(void) static inline void inc_and_check_cacnum(void)
{ {
if (++cacnum > MAXCACHEOBJECTS) if (EDUKE32_PREDICT_FALSE(++cacnum > MAXCACHEOBJECTS))
reportandexit("Too many objects in cache! (cacnum > MAXCACHEOBJECTS)"); reportandexit("Too many objects in cache! (cacnum > MAXCACHEOBJECTS)");
} }
void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
{ {
int32_t i, z, bestz=0, bestval, besto=0, o1, sucklen, suckz; if (EDUKE32_PREDICT_FALSE(*newlockptr == 0))
reportandexit("ALLOCACHE CALLED WITH LOCK OF 0!");
//printf(" ==> asking for %d bytes, ", newbytes);
// Make all requests a multiple of 16 bytes // Make all requests a multiple of 16 bytes
newbytes = (newbytes+15)&0xfffffff0; newbytes = (newbytes + 15) & ~0xf;
//printf("allocated %d bytes\n", newbytes);
if ((unsigned)newbytes > (unsigned)cachesize) if (EDUKE32_PREDICT_FALSE((unsigned)newbytes > (unsigned)cachesize))
{ {
Bprintf("Cachesize: %d\n",cachesize); Bprintf("Cachesize: %d\n",cachesize);
Bprintf("*Newhandle: 0x%" PRIxPTR ", Newbytes: %d, *Newlock: %d\n",(intptr_t)newhandle,newbytes,*newlockptr); Bprintf("*Newhandle: 0x%" PRIxPTR ", Newbytes: %d, *Newlock: %d\n",(intptr_t)newhandle,newbytes,*newlockptr);
reportandexit("BUFFER TOO BIG TO FIT IN CACHE!"); reportandexit("BUFFER TOO BIG TO FIT IN CACHE!");
} }
if (*newlockptr == 0) int32_t bestz = 0;
{ int32_t besto = 0;
reportandexit("ALLOCACHE CALLED WITH LOCK OF 0!"); int32_t bestval = 0x7fffffff;
}
//Find best place for (native_t z=cacnum-1, o1=cachesize; z>=0; z--)
bestval = 0x7fffffff; o1 = cachesize;
for (z=cacnum-1; z>=0; z--)
{ {
int32_t zz, o2, daval;
o1 -= cac[z].leng; o1 -= cac[z].leng;
o2 = o1+newbytes; int32_t o2 = o1 + newbytes;
if (o2 > cachesize) if (o2 > cachesize)
continue; continue;
daval = 0; int32_t daval = 0;
for (i=o1,zz=z; i<o2; i+=cac[zz++].leng)
for (native_t i=o1, zz=z; i<o2; i+=cac[zz++].leng)
{ {
if (*cac[zz].lock == 0) if (*cac[zz].lock == 0)
continue; continue;
@ -236,24 +222,32 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
// - smaller item size // - smaller item size
// - smaller lock byte value (but in [1 .. 199]) // - smaller lock byte value (but in [1 .. 199])
daval += mulscale32(cac[zz].leng + 65536, lockrecip[*cac[zz].lock]); daval += mulscale32(cac[zz].leng + 65536, lockrecip[*cac[zz].lock]);
if (daval >= bestval) if (daval >= bestval)
break; break;
} }
if (daval < bestval) if (daval < bestval)
{ {
bestval = daval; besto = o1; bestz = z; bestval = daval;
if (bestval == 0) break; besto = o1;
bestz = z;
if (bestval == 0)
break;
} }
} }
//printf("%d %d %d\n",besto,newbytes,*newlockptr); //printf("%d %d %d\n",besto,newbytes,*newlockptr);
if (bestval == 0x7fffffff) if (EDUKE32_PREDICT_FALSE(bestval == 0x7fffffff))
reportandexit("CACHE SPACE ALL LOCKED UP!"); reportandexit("CACHE SPACE ALL LOCKED UP!");
//Suck things out //Suck things out
for (sucklen=-newbytes,suckz=bestz; sucklen<0; sucklen+=cac[suckz++].leng) int32_t sucklen = -newbytes;
int32_t suckz = bestz;
for (;sucklen<0; sucklen+=cac[suckz++].leng)
if (*cac[suckz].lock) if (*cac[suckz].lock)
*cac[suckz].hand = 0; *cac[suckz].hand = 0;
@ -262,6 +256,7 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
cacnum -= suckz; cacnum -= suckz;
Bmemmove(&cac[bestz], &cac[bestz + suckz], (cacnum - bestz) * sizeof(cactype)); Bmemmove(&cac[bestz], &cac[bestz + suckz], (cacnum - bestz) * sizeof(cactype));
cac[bestz].hand = newhandle; cac[bestz].hand = newhandle;
*newhandle = cachestart + besto; *newhandle = cachestart + besto;
cac[bestz].leng = newbytes; cac[bestz].leng = newbytes;
@ -271,11 +266,9 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
if (sucklen <= 0) if (sucklen <= 0)
return; return;
bestz++; if (++bestz == cacnum)
if (bestz == cacnum)
{ {
inc_and_check_cacnum(); inc_and_check_cacnum();
cac[bestz].leng = sucklen; cac[bestz].leng = sucklen;
cac[bestz].lock = &zerochar; cac[bestz].lock = &zerochar;
return; return;
@ -289,8 +282,9 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
inc_and_check_cacnum(); inc_and_check_cacnum();
for (z=cacnum-1; z>bestz; z--) for (native_t z=cacnum-1; z>bestz; z--)
cac[z] = cac[z-1]; cac[z] = cac[z-1];
cac[bestz].leng = sucklen; cac[bestz].leng = sucklen;
cac[bestz].lock = &zerochar; cac[bestz].lock = &zerochar;
} }
@ -299,11 +293,13 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
void agecache(void) void agecache(void)
{ {
#ifndef DEBUG_ALLOCACHE_AS_MALLOC #ifndef DEBUG_ALLOCACHE_AS_MALLOC
bssize_t cnt = (cacnum>>4); static int32_t agecount;
if (agecount >= cacnum) if (agecount >= cacnum)
agecount = cacnum-1; agecount = cacnum-1;
native_t cnt = (cacnum>>5);
if (agecount < 0 || !cnt) if (agecount < 0 || !cnt)
return; return;
@ -313,8 +309,7 @@ void agecache(void)
if (cac[agecount].lock && (((*cac[agecount].lock)-2)&255) < 198) if (cac[agecount].lock && (((*cac[agecount].lock)-2)&255) < 198)
(*cac[agecount].lock)--; (*cac[agecount].lock)--;
agecount--; if (--agecount < 0)
if (agecount < 0)
agecount = cacnum-1; agecount = cacnum-1;
} }
#endif #endif
@ -323,20 +318,27 @@ void agecache(void)
static void reportandexit(const char *errormessage) static void reportandexit(const char *errormessage)
{ {
#ifndef DEBUG_ALLOCACHE_AS_MALLOC #ifndef DEBUG_ALLOCACHE_AS_MALLOC
int32_t i, j;
//setvmode(0x3); //setvmode(0x3);
j = 0; int32_t j = 0;
for (i=0; i<cacnum; i++) for (native_t i = 0; i < cacnum; i++)
{ {
Bprintf("%d- ",i); Bprintf("%zu- ", i);
if (cac[i].hand) Bprintf("ptr: 0x%" PRIxPTR ", ",*cac[i].hand);
else Bprintf("ptr: NULL, "); if (cac[i].hand)
Bprintf("ptr: 0x%" PRIxPTR ", ", *cac[i].hand);
else
Bprintf("ptr: NULL, ");
Bprintf("leng: %d, ", cac[i].leng); Bprintf("leng: %d, ", cac[i].leng);
if (cac[i].lock) Bprintf("lock: %d\n",*cac[i].lock);
else Bprintf("lock: NULL\n"); if (cac[i].lock)
Bprintf("lock: %d\n", *cac[i].lock);
else
Bprintf("lock: NULL\n");
j += cac[i].leng; j += cac[i].leng;
} }
Bprintf("Cachesize = %d\n", cachesize); Bprintf("Cachesize = %d\n", cachesize);
Bprintf("Cacnum = %d\n", cacnum); Bprintf("Cacnum = %d\n", cacnum);
Bprintf("Cache length sum = %d\n", j); Bprintf("Cache length sum = %d\n", j);
@ -1704,6 +1706,14 @@ int32_t kdfread(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil)
return c1d_read_compressed(buffer, dasizeof, count, (intptr_t)fil); return c1d_read_compressed(buffer, dasizeof, count, (intptr_t)fil);
} }
// LZ4_COMPRESSION_ACCELERATION_VALUE can be tuned for performance/space trade-off
// (lower number = higher compression ratio, higher number = faster compression speed)
#define LZ4_COMPRESSION_ACCELERATION_VALUE 15
static char compressedDataStackBuf[131072];
int32_t lz4CompressionLevel = LZ4_COMPRESSION_ACCELERATION_VALUE;
int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil) int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil)
{ {
int32_t leng; int32_t leng;
@ -1711,12 +1721,13 @@ int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil)
// read compressed data length // read compressed data length
if (c1d_readfunc(fil, &leng, 4) != 4) if (c1d_readfunc(fil, &leng, 4) != 4)
return -1; return -1;
leng = B_LITTLE32(leng); leng = B_LITTLE32(leng);
char compressedDataStackBuf[100000];
char *pCompressedData = compressedDataStackBuf; char *pCompressedData = compressedDataStackBuf;
if (leng > 100000)
pCompressedData = (char*) Bmalloc(leng); if (leng > ARRAY_SSIZE(compressedDataStackBuf))
pCompressedData = (char *)Xaligned_alloc(16, leng);
if (c1d_readfunc(fil, pCompressedData, leng) != leng) if (c1d_readfunc(fil, pCompressedData, leng) != leng)
return -1; return -1;
@ -1724,7 +1735,7 @@ int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil)
int32_t decompressedLength = LZ4_decompress_safe(pCompressedData, (char*) buffer, leng, dasizeof*count); int32_t decompressedLength = LZ4_decompress_safe(pCompressedData, (char*) buffer, leng, dasizeof*count);
if (pCompressedData != compressedDataStackBuf) if (pCompressedData != compressedDataStackBuf)
free(pCompressedData); Baligned_free(pCompressedData);
return decompressedLength/dasizeof; return decompressedLength/dasizeof;
} }
@ -1794,25 +1805,22 @@ void dfwrite(const void *buffer, bsize_t dasizeof, bsize_t count, BFILE *fil)
c1d_write_compressed(buffer, dasizeof, count, (intptr_t)fil); c1d_write_compressed(buffer, dasizeof, count, (intptr_t)fil);
} }
// LZ4_COMPRESSION_ACCELERATION_VALUE can be tuned for performance/space trade-off
// (lower number = higher compression ratio, higher number = faster compression speed)
#define LZ4_COMPRESSION_ACCELERATION_VALUE 15
void dfwrite_LZ4(const void *buffer, bsize_t dasizeof, bsize_t count, BFILE *fil) void dfwrite_LZ4(const void *buffer, bsize_t dasizeof, bsize_t count, BFILE *fil)
{ {
char compressedDataStackBuf[100000];
char * pCompressedData = compressedDataStackBuf; char * pCompressedData = compressedDataStackBuf;
int32_t maxCompressedSize = LZ4_compressBound(dasizeof*count); int32_t const maxCompressedSize = LZ4_compressBound(dasizeof * count);
if (maxCompressedSize > 100000)
pCompressedData = (char*) Bmalloc(maxCompressedSize);
const int32_t leng = LZ4_compress_fast((const char*) buffer, pCompressedData, dasizeof*count, maxCompressedSize, LZ4_COMPRESSION_ACCELERATION_VALUE); if (maxCompressedSize > ARRAY_SSIZE(compressedDataStackBuf))
const int32_t swleng = B_LITTLE32(leng); pCompressedData = (char *)Xaligned_alloc(16, maxCompressedSize);
int32_t const leng = LZ4_compress_fast((const char*) buffer, pCompressedData, dasizeof*count, maxCompressedSize, lz4CompressionLevel);
int32_t const swleng = B_LITTLE32(leng);
c1d_writefunc((intptr_t) fil, &swleng, 4); c1d_writefunc((intptr_t) fil, &swleng, 4);
c1d_writefunc((intptr_t) fil, pCompressedData, leng); c1d_writefunc((intptr_t) fil, pCompressedData, leng);
if (pCompressedData != compressedDataStackBuf) if (pCompressedData != compressedDataStackBuf)
free(pCompressedData); Baligned_free(pCompressedData);
} }

View file

@ -39,8 +39,6 @@
L_State g_engState; L_State g_engState;
#endif #endif
#define CACHEAGETIME 16
////////// //////////
// Compilation switches for optional/extended engine features // Compilation switches for optional/extended engine features
@ -165,7 +163,6 @@ int32_t globalx1, globaly2, globalx3, globaly3;
int32_t sloptable[16384]; int32_t sloptable[16384];
static intptr_t slopalookup[16384]; // was 2048 static intptr_t slopalookup[16384]; // was 2048
static int32_t lastageclock;
static int32_t no_radarang2 = 0; static int32_t no_radarang2 = 0;
static int16_t radarang[1280], *radarang2; static int16_t radarang[1280], *radarang2;
@ -9988,10 +9985,9 @@ void nextpage(void)
case 480: case 480:
break; break;
} }
faketimerhandler();
if ((totalclock >= lastageclock+CACHEAGETIME) || (totalclock < lastageclock)) faketimerhandler();
{ lastageclock = totalclock; agecache(); } agecache();
#ifdef USE_OPENGL #ifdef USE_OPENGL
omdtims = mdtims; mdtims = getticks(); omdtims = mdtims; mdtims = getticks();

View file

@ -567,12 +567,7 @@ int32_t loadpics(const char *filename, int32_t askedsize)
cachesize = (Bgetsysmemsize() <= (uint32_t)askedsize) ? (int32_t)((Bgetsysmemsize() / 100) * 60) : askedsize; cachesize = (Bgetsysmemsize() <= (uint32_t)askedsize) ? (int32_t)((Bgetsysmemsize() / 100) * 60) : askedsize;
// NOTE: this doesn't make a lot of sense on modern OSs... // NOTE: this doesn't make a lot of sense on modern OSs...
while ((pic = Bmalloc(cachesize)) == NULL) pic = Xaligned_alloc(cachesize, 16);
{
cachesize -= 65536;
if (cachesize < 65536)
return -1;
}
initcache((intptr_t) pic, cachesize); initcache((intptr_t) pic, cachesize);
E_RecalcPicSiz(); E_RecalcPicSiz();
@ -793,5 +788,5 @@ void Buninitart(void)
if (artfil != -1) if (artfil != -1)
kclose(artfil); kclose(artfil);
DO_FREE_AND_NULL(pic); ALIGNED_FREE_AND_NULL(pic);
} }