From 0cf527914feb62cddefbb998218cab845f063bb4 Mon Sep 17 00:00:00 2001 From: terminx Date: Sat, 25 Oct 2014 03:31:41 +0000 Subject: [PATCH] Minor optimizations (loop unrolling) to LZW functions in cache1d, improves throughput ~20% on my i7 git-svn-id: https://svn.eduke32.com/eduke32@4665 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/src/cache1d.c | 106 +++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 18 deletions(-) diff --git a/polymer/eduke32/build/src/cache1d.c b/polymer/eduke32/build/src/cache1d.c index da8890427..364c4730d 100644 --- a/polymer/eduke32/build/src/cache1d.c +++ b/polymer/eduke32/build/src/cache1d.c @@ -1468,8 +1468,17 @@ C1D_STATIC int32_t c1d_read_compressed(void *buffer, bsize_t dasizeof, bsize_t c if (dasizeof > LZWSIZE) { - count *= dasizeof; - dasizeof = 1; + if (count > LZWSIZE) + { + count *= dasizeof; + dasizeof = 1; + } + else + { + i = count; + count = dasizeof; + dasizeof = i; + } } ptr = (char *)buffer; @@ -1488,7 +1497,18 @@ C1D_STATIC int32_t c1d_read_compressed(void *buffer, bsize_t dasizeof, bsize_t c if (k) return -1; } - for (j=0; j= 4) + { + for (; j LZWSIZE) + if (dasizeof > LZWSIZE && count > LZWSIZE) { - count *= dasizeof; - dasizeof = 1; + if (count > LZWSIZE) + { + count *= dasizeof; + dasizeof = 1; + } + else + { + i = count; + count = dasizeof; + dasizeof = i; + } } - ptr = (char*)buffer; - Bmemcpy(lzwrawbuf, ptr, (int32_t)dasizeof); k = dasizeof; @@ -1539,7 +1566,18 @@ C1D_STATIC void c1d_write_compressed(const void *buffer, bsize_t dasizeof, bsize for (i=1; i= 4) + { + for (; j=0; i--) + for (i=255; i>=4; i-=4) + { + lzwtmpbuf[i] = i, lzwcodenext[i] = (i+1)&255; + lzwtmpbuf[i-1] = i-1, lzwcodenext[i-1] = (i) &255; + lzwtmpbuf[i-2] = i-2, lzwcodenext[i-2] = (i-1)&255; + lzwtmpbuf[i-3] = i-3, lzwcodenext[i-3] = (i-2)&255; + lzwcodehead[i] = lzwcodehead[i-1] = lzwcodehead[i-2] = lzwcodehead[i-3] = -1; + } + + for (; i>=0; i--) { lzwtmpbuf[i] = i; lzwcodenext[i] = (i+1)&255; @@ -1588,8 +1635,7 @@ static int32_t lzwcompress(const char *lzwinbuf, int32_t uncompleng, char *lzwou { int32_t newaddr; - bytecnt1++; - if (bytecnt1 == uncompleng) + if (++bytecnt1 == uncompleng) break; // (*) see XXX below if (lzwcodehead[addr] < 0) @@ -1648,7 +1694,16 @@ static int32_t lzwcompress(const char *lzwinbuf, int32_t uncompleng, char *lzwou // Failed compressing, mark this in the stream. shortptr[1] = 0; - for (i=0; i=0; i--) + for (i=255; i>=4; i-=4) { - lzwbuf2[i] = i; - lzwbuf3[i] = i; + lzwbuf2[i] = lzwbuf3[i] = i; + lzwbuf2[i-1] = lzwbuf3[i-1] = i-1; + lzwbuf2[i-2] = lzwbuf3[i-2] = i-2; + lzwbuf2[i-3] = lzwbuf3[i-3] = i-3; } + lzwbuf2[i] = lzwbuf3[i] = i; + lzwbuf2[i-1] = lzwbuf3[i-1] = i-1; + lzwbuf2[i-2] = lzwbuf3[i-2] = i-2; + currstr = 256; bitcnt = (4<<3); outbytecnt = 0; numbits = 8; oneupnumbits = (1<<8); do @@ -1700,7 +1761,16 @@ static int32_t lzwuncompress(const char *lzwinbuf, int32_t compleng, char *lzwou lzwtmpbuf[leng] = lzwbuf2[dat]; lzwoutbuf[outbytecnt++] = dat; - for (i=leng-1; i>=0; i--) + + for (i=leng-1; i>=4; i-=4, outbytecnt+=4) + { + lzwoutbuf[outbytecnt] = lzwtmpbuf[i]; + lzwoutbuf[outbytecnt+1] = lzwtmpbuf[i-1]; + lzwoutbuf[outbytecnt+2] = lzwtmpbuf[i-2]; + lzwoutbuf[outbytecnt+3] = lzwtmpbuf[i-3]; + } + + for (; i>=0; i--) lzwoutbuf[outbytecnt++] = lzwtmpbuf[i]; lzwbuf2[currstr-1] = dat; lzwbuf2[currstr] = dat;