Faster CRC32 implementation (slicing-by-4 for 32-bit platforms and slicing-by-8 for 64-bit)

git-svn-id: https://svn.eduke32.com/eduke32@4625 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
terminx 2014-09-30 04:15:04 +00:00
parent 31d2f15b24
commit 423b87a707
6 changed files with 111 additions and 53 deletions

View file

@ -7,30 +7,16 @@
extern "C" {
#endif
extern uint32_t crc32table[256];
#define POLY 0xEDB88320
void initcrc32table(void);
#ifdef BITNESS64
extern uint32_t crc32table[8][256];
#else
extern uint32_t crc32table[4][256];
#endif
uint32_t crc32once(uint8_t *blk, uint32_t len);
static inline void crc32init(uint32_t *crcvar)
{
if (!crcvar) return;
*crcvar = 0xffffffffl;
}
static inline void crc32block(uint32_t *crcvar, uint8_t *blk, uint32_t len)
{
uint32_t crc = *crcvar;
while (len--) crc = crc32table[(crc ^ *(blk++)) & 0xffl] ^(crc >> 8);
*crcvar = crc;
}
static inline uint32_t crc32finish(uint32_t *crcvar)
{
*crcvar = *crcvar ^ 0xffffffffl;
return *crcvar;
}
extern uint32_t crc32(const void* data, size_t length, uint32_t crc);
extern void initcrc32table(void);
#ifdef EXTERNC
}

View file

@ -1,26 +1,102 @@
// based on http://create.stephan-brumme.com/crc32/Crc32.cpp, zlib license
#include "compat.h"
#include "crc32.h"
uint32_t crc32table[256];
uint32_t crc32(const void* data, size_t length, uint32_t crc)
{
const uint32_t* current = (const uint32_t*) data;
uint8_t *currentChar;
crc = ~crc;
#ifdef BITNESS64
// process eight bytes at once (Slicing-by-8)
while (length >= 8)
{
#if B_BIG_ENDIAN != 0
uint32_t one = *current ^ B_SWAP32(crc);
uint32_t two = *(current+1);
crc = crc32table[0][two & 0xFF] ^
crc32table[1][(two>> 8) & 0xFF] ^
crc32table[2][(two>>16) & 0xFF] ^
crc32table[3][(two>>24) & 0xFF] ^
crc32table[4][one & 0xFF] ^
crc32table[5][(one>> 8) & 0xFF] ^
crc32table[6][(one>>16) & 0xFF] ^
crc32table[7][(one>>24) & 0xFF];
#else
uint32_t one = *current ^ crc;
uint32_t two = *(current+1);
crc = crc32table[0][(two>>24) & 0xFF] ^
crc32table[1][(two>>16) & 0xFF] ^
crc32table[2][(two>> 8) & 0xFF] ^
crc32table[3][two & 0xFF] ^
crc32table[4][(one>>24) & 0xFF] ^
crc32table[5][(one>>16) & 0xFF] ^
crc32table[6][(one>> 8) & 0xFF] ^
crc32table[7][one & 0xFF];
#endif
current += 2;
length -= 8;
}
#else
// process four bytes at once (Slicing-by-4)
while (length >= 4)
{
#if B_BIG_ENDIAN != 0
uint32_t one = *current++ ^ B_SWAP32(crc);
crc = crc32table[0][one & 0xFF] ^
crc32table[1][(one>> 8) & 0xFF] ^
crc32table[2][(one>>16) & 0xFF] ^
crc32table[3][(one>>24) & 0xFF];
#else
uint32_t one = *current++ ^ crc;
crc = crc32table[0][(one>>24) & 0xFF] ^
crc32table[1][(one>>16) & 0xFF] ^
crc32table[2][(one>> 8) & 0xFF] ^
crc32table[3][one & 0xFF];
#endif
length -= 4;
}
#endif
currentChar = (uint8_t*) current;
// remaining 1 to 7 bytes (standard algorithm)
while (length-- > 0)
crc = (crc >> 8) ^ crc32table[0][(crc & 0xFF) ^ *currentChar++];
return ~crc;
}
#ifdef BITNESS64
uint32_t crc32table[8][256];
#else
uint32_t crc32table[4][256];
#endif
void initcrc32table(void)
{
uint32_t i,j,k;
// algorithm and polynomial same as that used by infozip's zip
for (i=0; i<256; i++)
int i;
for (i = 0; i <= 0xFF; i++)
{
j = i;
for (k=8; k; k--)
j = (j&1) ? (0xedb88320L^(j>>1)) : (j>>1);
crc32table[i] = j;
uint32_t j, crc = i;
for (j = 0; j < 8; j++)
crc = (crc >> 1) ^ ((crc & 1) * POLY);
crc32table[0][i] = crc;
}
for (i = 0; i <= 0xFF; i++)
{
crc32table[1][i] = (crc32table[0][i] >> 8) ^ crc32table[0][crc32table[0][i] & 0xFF];
crc32table[2][i] = (crc32table[1][i] >> 8) ^ crc32table[0][crc32table[1][i] & 0xFF];
crc32table[3][i] = (crc32table[2][i] >> 8) ^ crc32table[0][crc32table[2][i] & 0xFF];
#ifdef BITNESS64
crc32table[4][i] = (crc32table[3][i] >> 8) ^ crc32table[0][crc32table[3][i] & 0xFF];
crc32table[5][i] = (crc32table[4][i] >> 8) ^ crc32table[0][crc32table[4][i] & 0xFF];
crc32table[6][i] = (crc32table[5][i] >> 8) ^ crc32table[0][crc32table[5][i] & 0xFF];
crc32table[7][i] = (crc32table[6][i] >> 8) ^ crc32table[0][crc32table[6][i] & 0xFF];
#endif
}
}
uint32_t crc32once(uint8_t *blk, uint32_t len)
{
uint32_t crc;
crc32init(&crc);
crc32block(&crc, blk, len);
return crc32finish(&crc);
}

View file

@ -7923,22 +7923,22 @@ static int32_t loadtables(void)
reciptable[i] = divscale30(2048, i+2048);
for (i=0; i<=512; i++)
sintable[i] = (int16_t)(16384*sin(i*BANG2RAD));
sintable[i] = (int16_t)(16384.f * sinf((float)i * BANG2RAD));
for (i=513; i<1024; i++)
sintable[i] = sintable[1024-i];
for (i=1024; i<2048; i++)
sintable[i] = -sintable[i-1024];
for (i=0; i<640; i++)
radarang[i] = (int16_t)(-64*atan((640-0.5-i)/160)/BANG2RAD);
radarang[i] = (int16_t)(atanf(((float)(640-i)-0.5f) * (1.f/160.f)) * (-64.f * (1.f/BANG2RAD)));
for (i=0; i<640; i++)
radarang[1279-i] = -radarang[i];
#ifdef B_LITTLE_ENDIAN
i = 0;
if (crc32once((uint8_t *)sintable, sizeof(sintable)) != 0xee1e7aba)
if (crc32((uint8_t *)sintable, sizeof(sintable), 0) != 0xee1e7aba)
i |= 1;
if (crc32once((uint8_t *)radarang, 640*sizeof(radarang[0])) != 0xee893d92)
if (crc32((uint8_t *)radarang, 640*sizeof(radarang[0]), 0) != 0xee893d92)
i |= 2;
if (i != 0)
@ -8155,7 +8155,7 @@ static int32_t loadpalette(void)
#endif
// If Duke3D 1.5 GRP or LameDuke, ...
if (crc32once((uint8_t *)transluc, 65536)==0x94a1fac6 || lamedukep)
if (crc32((uint8_t *)transluc, 65536, 0)==0x94a1fac6 || lamedukep)
{
int32_t i;
// ... fix up translucency table so that transluc(255,x)

View file

@ -340,14 +340,12 @@ static int32_t _internal_osdfunc_fileinfo(const osdfuncparm_t *parm)
length = kfilelength(i);
crctime = getticks();
crc32init(&crc);
do
{
j = kread(i,buf,256);
crc32block(&crc,(uint8_t *)buf,j);
crc = crc32((uint8_t *)buf,j,crc);
}
while (j == 256);
crc32finish(&crc);
crctime = getticks() - crctime;
klseek(i, 0, BSEEK_SET);

View file

@ -384,21 +384,19 @@ int32_t ScanGroups(void)
{
int32_t b, fh;
int32_t crcval;
int32_t crcval = 0;
fh = openfrompath(sidx->name, BO_RDONLY|BO_BINARY, BS_IREAD);
if (fh < 0) continue;
if (Bfstat(fh, &st)) continue;
initprintf(" Checksumming %s...", sidx->name);
crc32init((uint32_t *)&crcval);
do
{
b = read(fh, buf, BUFFER_SIZE);
if (b > 0) crc32block((uint32_t *)&crcval, (uint8_t *)buf, b);
if (b > 0) crcval = crc32((uint8_t *)buf, b, crcval);
}
while (b == BUFFER_SIZE);
crc32finish((uint32_t *)&crcval);
close(fh);
initprintf(" Done\n");

View file

@ -766,7 +766,7 @@ void Net_SendChallenge()
tempnetbuf[0] = PACKET_AUTH;
B_BUF16(&tempnetbuf[1], BYTEVERSION);
B_BUF16(&tempnetbuf[3], NETVERSION);
B_BUF32(&tempnetbuf[5], crc32once((uint8_t *)g_netPassword, Bstrlen(g_netPassword)));
B_BUF32(&tempnetbuf[5], crc32((uint8_t *)g_netPassword, Bstrlen(g_netPassword), 0));
tempnetbuf[9] = myconnectindex;
enet_peer_send(g_netClientPeer, CHAN_GAMESTATE, enet_packet_create(&tempnetbuf[0], 10, ENET_PACKET_FLAG_RELIABLE));
@ -786,7 +786,7 @@ void Net_ReceiveChallenge(uint8_t *pbuf, int32_t packbufleng, ENetEvent *event)
initprintf("Bad client protocol: version %u.%u\n", byteVersion, netVersion);
return;
}
if (crc != crc32once((uint8_t *)g_netPassword, Bstrlen(g_netPassword)))
if (crc != crc32((uint8_t *)g_netPassword, Bstrlen(g_netPassword), 0))
{
enet_peer_disconnect_later(event->peer, DISC_BAD_PASSWORD);
initprintf("Bad password from client.\n");