diff --git a/source/build/include/cache1d.h b/source/build/include/cache1d.h index 7eb931aa6..d3995d8f0 100644 --- a/source/build/include/cache1d.h +++ b/source/build/include/cache1d.h @@ -47,31 +47,37 @@ void kclose(int32_t handle); void krename(int32_t crcval, int32_t filenum, const char *newname); char const * kfileparent(int32_t handle); -typedef struct { intptr_t *hand; int32_t leng; char *lock ; } cactype; +typedef struct +{ + intptr_t *hand; + int32_t leng; + char * lock; +} cactype; enum { - CACHE1D_FIND_FILE = 1, - CACHE1D_FIND_DIR = 2, - CACHE1D_FIND_DRIVE = 4, - CACHE1D_FIND_NOCURDIR = 8, + CACHE1D_FIND_FILE = 1, + CACHE1D_FIND_DIR = 2, + CACHE1D_FIND_DRIVE = 4, + CACHE1D_FIND_NOCURDIR = 8, - CACHE1D_OPT_NOSTACK = 0x100, + CACHE1D_OPT_NOSTACK = 0x100, - // the lower the number, the higher the priority - CACHE1D_SOURCE_DRIVE = 0, - CACHE1D_SOURCE_CURDIR = 1, - CACHE1D_SOURCE_PATH = 2, // + path stack depth - CACHE1D_SOURCE_ZIP = 0x7ffffffe, - CACHE1D_SOURCE_GRP = 0x7fffffff, + // the lower the number, the higher the priority + CACHE1D_SOURCE_DRIVE = 0, + CACHE1D_SOURCE_CURDIR = 1, + CACHE1D_SOURCE_PATH = 2, // + path stack depth + CACHE1D_SOURCE_ZIP = 0x7ffffffe, + CACHE1D_SOURCE_GRP = 0x7fffffff, }; typedef struct _CACHE1D_FIND_REC { - char *name; - int32_t type, source; - struct _CACHE1D_FIND_REC *next, *prev, *usera, *userb; + char *name; + int32_t type, source; + struct _CACHE1D_FIND_REC *next, *prev, *usera, *userb; } CACHE1D_FIND_REC; void klistfree(CACHE1D_FIND_REC *rec); CACHE1D_FIND_REC *klistpath(const char *path, const char *mask, int32_t type); +extern int32_t lz4CompressionLevel; int32_t kdfread(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil); int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil); #if 0 diff --git a/source/build/src/baselayer.cpp b/source/build/src/baselayer.cpp index 76b48c482..8b5559676 100644 --- a/source/build/src/baselayer.cpp +++ b/source/build/src/baselayer.cpp @@ -7,6 +7,7 @@ #include "a.h" #include "polymost.h" +#include "cache1d.h" // input char inputdevices=0; @@ -529,6 +530,7 @@ int32_t baselayer_init(void) #endif static osdcvardata_t cvars_engine[] = { + { "lz4compressionlevel","adjust LZ4 compression level used for savegames",(void *) &lz4CompressionLevel, CVAR_INT, 1, 32 }, { "r_usenewaspect","enable/disable new screen aspect ratio determination code",(void *) &r_usenewaspect, CVAR_BOOL, 0, 1 }, { "r_screenaspect","if using r_usenewaspect and in fullscreen, screen aspect ratio in the form XXYY, e.g. 1609 for 16:9", (void *) &r_screenxy, SCREENASPECT_CVAR_TYPE, 0, 9999 }, diff --git a/source/build/src/cache1d.cpp b/source/build/src/cache1d.cpp index a6d8aff3a..66fee8a93 100644 --- a/source/build/src/cache1d.cpp +++ b/source/build/src/cache1d.cpp @@ -110,7 +110,6 @@ int32_t kpzbufload(char const * const filnam) static int32_t cachesize = 0; static char zerochar = 0; static intptr_t cachestart = 0; -static int32_t agecount = 0; static int32_t lockrecip[200]; int32_t cacnum = 0; @@ -148,19 +147,11 @@ void initcache(intptr_t dacachestart, int32_t dacachesize) for (i=1; i<200; i++) lockrecip[i] = tabledivide32_noinline(1<<28, 200-i); - // The following code was relocated here from engine.c, since this - // function is only ever called once (from there), and it seems to - // really belong here: - // - // initcache((FP_OFF(pic)+15)&0xfffffff0,(cachesize-((-FP_OFF(pic))&15))&0xfffffff0); - // - // I'm not sure why it's necessary, but the code is making sure the - // cache starts on a multiple of 16 bytes? -- SA + // we allocate this block with aligned_alloc, but I'm leaving this here in + // case we run on any platforms that just implement it as regular malloc -//printf("BEFORE: cachestart = %x, cachesize = %d\n", dacachestart, dacachesize); cachestart = ((uintptr_t)dacachestart+15)&~(uintptr_t)0xf; cachesize = (dacachesize-(((uintptr_t)(dacachestart))&0xf))&~(uintptr_t)0xf; -//printf("AFTER : cachestart = %x, cachesize = %d\n", cachestart, cachesize); cac[0].leng = cachesize; cac[0].lock = &zerochar; @@ -183,45 +174,40 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) #else static inline void inc_and_check_cacnum(void) { - if (++cacnum > MAXCACHEOBJECTS) + if (EDUKE32_PREDICT_FALSE(++cacnum > MAXCACHEOBJECTS)) reportandexit("Too many objects in cache! (cacnum > MAXCACHEOBJECTS)"); } void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) { - int32_t i, z, bestz=0, bestval, besto=0, o1, sucklen, suckz; + if (EDUKE32_PREDICT_FALSE(*newlockptr == 0)) + reportandexit("ALLOCACHE CALLED WITH LOCK OF 0!"); -//printf(" ==> asking for %d bytes, ", newbytes); // Make all requests a multiple of 16 bytes - newbytes = (newbytes+15)&0xfffffff0; -//printf("allocated %d bytes\n", newbytes); + newbytes = (newbytes + 15) & ~0xf; - if ((unsigned)newbytes > (unsigned)cachesize) + if (EDUKE32_PREDICT_FALSE((unsigned)newbytes > (unsigned)cachesize)) { Bprintf("Cachesize: %d\n",cachesize); Bprintf("*Newhandle: 0x%" PRIxPTR ", Newbytes: %d, *Newlock: %d\n",(intptr_t)newhandle,newbytes,*newlockptr); reportandexit("BUFFER TOO BIG TO FIT IN CACHE!"); } - if (*newlockptr == 0) - { - reportandexit("ALLOCACHE CALLED WITH LOCK OF 0!"); - } + int32_t bestz = 0; + int32_t besto = 0; + int32_t bestval = 0x7fffffff; - //Find best place - bestval = 0x7fffffff; o1 = cachesize; - for (z=cacnum-1; z>=0; z--) + for (native_t z=cacnum-1, o1=cachesize; z>=0; z--) { - int32_t zz, o2, daval; - o1 -= cac[z].leng; - o2 = o1+newbytes; + int32_t o2 = o1 + newbytes; if (o2 > cachesize) continue; - daval = 0; - for (i=o1,zz=z; i= bestval) break; } if (daval < bestval) { - bestval = daval; besto = o1; bestz = z; - if (bestval == 0) break; + bestval = daval; + besto = o1; + bestz = z; + + if (bestval == 0) + break; } } //printf("%d %d %d\n",besto,newbytes,*newlockptr); - if (bestval == 0x7fffffff) + if (EDUKE32_PREDICT_FALSE(bestval == 0x7fffffff)) reportandexit("CACHE SPACE ALL LOCKED UP!"); //Suck things out - for (sucklen=-newbytes,suckz=bestz; sucklen<0; sucklen+=cac[suckz++].leng) + int32_t sucklen = -newbytes; + int32_t suckz = bestz; + + for (;sucklen<0; sucklen+=cac[suckz++].leng) if (*cac[suckz].lock) *cac[suckz].hand = 0; @@ -261,9 +255,10 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) suckz -= bestz+1; cacnum -= suckz; - Bmemmove(&cac[bestz], &cac[bestz+suckz], (cacnum-bestz)*sizeof(cactype)); + Bmemmove(&cac[bestz], &cac[bestz + suckz], (cacnum - bestz) * sizeof(cactype)); + cac[bestz].hand = newhandle; - *newhandle = cachestart + besto; + *newhandle = cachestart + besto; cac[bestz].leng = newbytes; cac[bestz].lock = newlockptr; @@ -271,11 +266,9 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) if (sucklen <= 0) return; - bestz++; - if (bestz == cacnum) + if (++bestz == cacnum) { inc_and_check_cacnum(); - cac[bestz].leng = sucklen; cac[bestz].lock = &zerochar; return; @@ -289,8 +282,9 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) inc_and_check_cacnum(); - for (z=cacnum-1; z>bestz; z--) + for (native_t z=cacnum-1; z>bestz; z--) cac[z] = cac[z-1]; + cac[bestz].leng = sucklen; cac[bestz].lock = &zerochar; } @@ -299,11 +293,13 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) void agecache(void) { #ifndef DEBUG_ALLOCACHE_AS_MALLOC - bssize_t cnt = (cacnum>>4); + static int32_t agecount; if (agecount >= cacnum) agecount = cacnum-1; + native_t cnt = (cacnum>>5); + if (agecount < 0 || !cnt) return; @@ -313,8 +309,7 @@ void agecache(void) if (cac[agecount].lock && (((*cac[agecount].lock)-2)&255) < 198) (*cac[agecount].lock)--; - agecount--; - if (agecount < 0) + if (--agecount < 0) agecount = cacnum-1; } #endif @@ -323,25 +318,32 @@ void agecache(void) static void reportandexit(const char *errormessage) { #ifndef DEBUG_ALLOCACHE_AS_MALLOC - int32_t i, j; - //setvmode(0x3); - j = 0; - for (i=0; i 100000) - pCompressedData = (char*) Bmalloc(leng); + char *pCompressedData = compressedDataStackBuf; + + if (leng > ARRAY_SSIZE(compressedDataStackBuf)) + pCompressedData = (char *)Xaligned_alloc(16, leng); if (c1d_readfunc(fil, pCompressedData, leng) != leng) return -1; @@ -1724,7 +1735,7 @@ int32_t kdfread_LZ4(void *buffer, bsize_t dasizeof, bsize_t count, int32_t fil) int32_t decompressedLength = LZ4_decompress_safe(pCompressedData, (char*) buffer, leng, dasizeof*count); if (pCompressedData != compressedDataStackBuf) - free(pCompressedData); + Baligned_free(pCompressedData); return decompressedLength/dasizeof; } @@ -1794,25 +1805,22 @@ void dfwrite(const void *buffer, bsize_t dasizeof, bsize_t count, BFILE *fil) c1d_write_compressed(buffer, dasizeof, count, (intptr_t)fil); } -// LZ4_COMPRESSION_ACCELERATION_VALUE can be tuned for performance/space trade-off -// (lower number = higher compression ratio, higher number = faster compression speed) -#define LZ4_COMPRESSION_ACCELERATION_VALUE 15 void dfwrite_LZ4(const void *buffer, bsize_t dasizeof, bsize_t count, BFILE *fil) { - char compressedDataStackBuf[100000]; - char* pCompressedData = compressedDataStackBuf; - int32_t maxCompressedSize = LZ4_compressBound(dasizeof*count); - if (maxCompressedSize > 100000) - pCompressedData = (char*) Bmalloc(maxCompressedSize); + char * pCompressedData = compressedDataStackBuf; + int32_t const maxCompressedSize = LZ4_compressBound(dasizeof * count); - const int32_t leng = LZ4_compress_fast((const char*) buffer, pCompressedData, dasizeof*count, maxCompressedSize, LZ4_COMPRESSION_ACCELERATION_VALUE); - const int32_t swleng = B_LITTLE32(leng); + if (maxCompressedSize > ARRAY_SSIZE(compressedDataStackBuf)) + pCompressedData = (char *)Xaligned_alloc(16, maxCompressedSize); + + int32_t const leng = LZ4_compress_fast((const char*) buffer, pCompressedData, dasizeof*count, maxCompressedSize, lz4CompressionLevel); + int32_t const swleng = B_LITTLE32(leng); c1d_writefunc((intptr_t) fil, &swleng, 4); c1d_writefunc((intptr_t) fil, pCompressedData, leng); if (pCompressedData != compressedDataStackBuf) - free(pCompressedData); + Baligned_free(pCompressedData); } diff --git a/source/build/src/engine.cpp b/source/build/src/engine.cpp index fc90a2c34..2d82fb12b 100644 --- a/source/build/src/engine.cpp +++ b/source/build/src/engine.cpp @@ -39,8 +39,6 @@ L_State g_engState; #endif -#define CACHEAGETIME 16 - ////////// // Compilation switches for optional/extended engine features @@ -165,7 +163,6 @@ int32_t globalx1, globaly2, globalx3, globaly3; int32_t sloptable[16384]; static intptr_t slopalookup[16384]; // was 2048 -static int32_t lastageclock; static int32_t no_radarang2 = 0; static int16_t radarang[1280], *radarang2; @@ -9988,10 +9985,9 @@ void nextpage(void) case 480: break; } - faketimerhandler(); - if ((totalclock >= lastageclock+CACHEAGETIME) || (totalclock < lastageclock)) - { lastageclock = totalclock; agecache(); } + faketimerhandler(); + agecache(); #ifdef USE_OPENGL omdtims = mdtims; mdtims = getticks(); diff --git a/source/build/src/tiles.cpp b/source/build/src/tiles.cpp index c3c45418c..4f281cf59 100644 --- a/source/build/src/tiles.cpp +++ b/source/build/src/tiles.cpp @@ -567,12 +567,7 @@ int32_t loadpics(const char *filename, int32_t askedsize) cachesize = (Bgetsysmemsize() <= (uint32_t)askedsize) ? (int32_t)((Bgetsysmemsize() / 100) * 60) : askedsize; // NOTE: this doesn't make a lot of sense on modern OSs... - while ((pic = Bmalloc(cachesize)) == NULL) - { - cachesize -= 65536; - if (cachesize < 65536) - return -1; - } + pic = Xaligned_alloc(cachesize, 16); initcache((intptr_t) pic, cachesize); E_RecalcPicSiz(); @@ -793,5 +788,5 @@ void Buninitart(void) if (artfil != -1) kclose(artfil); - DO_FREE_AND_NULL(pic); + ALIGNED_FREE_AND_NULL(pic); }