- say goodbye to libdivide!

This commit is contained in:
Christoph Oelckers 2020-07-14 20:21:16 +02:00
parent 2f3d405f8b
commit 424716bb88
8 changed files with 13 additions and 140 deletions

View file

@ -3606,7 +3606,7 @@ void viewLoadingScreenWide(void)
}
else
{
int width = roundscale(xdim, 240, ydim);
int width = scale(xdim, 240, ydim);
int nCount = (width+kLoadScreenWideBackWidth-1)/kLoadScreenWideBackWidth;
for (int i = 0; i < nCount; i++)
{

View file

@ -6,6 +6,7 @@
// by Jonathon Fowler (jf@jonof.id.au)
// by the EDuke32 team (development@voidpoint.com)
#include "templates.h"
#ifndef pragmas_h_
#define pragmas_h_
@ -36,32 +37,6 @@ extern libdivide::libdivide_s64_t divtable64[DIVTABLESIZE];
extern libdivide::libdivide_s32_t divtable32[DIVTABLESIZE];
extern void initdivtables(void);
static inline uint32_t divideu32(uint32_t const n, uint32_t const d)
{
static libdivide::libdivide_u32_t udiv;
static uint32_t lastd;
if (d == lastd)
goto skip;
udiv = libdivide::libdivide_u32_gen((lastd = d));
skip:
return libdivide::libdivide_u32_do(n, &udiv);
}
static inline uint64_t divideu64(uint64_t const n, uint64_t const d)
{
static libdivide::libdivide_u64_t udiv;
static uint64_t lastd;
if (d == lastd)
goto skip;
udiv = libdivide::libdivide_u64_gen((lastd = d));
skip:
return libdivide::libdivide_u64_do(n, &udiv);
}
static inline int64_t tabledivide64(int64_t const n, int64_t const d)
{
static libdivide::libdivide_s64_t sdiv;
@ -76,28 +51,9 @@ skip:
return libdivide::libdivide_s64_do(n, dptr);
}
static inline int32_t tabledivide32(int32_t const n, int32_t const d)
{
static libdivide::libdivide_s32_t sdiv;
static int32_t lastd;
auto const dptr = ((uint32_t)d < DIVTABLESIZE) ? &divtable32[d] : &sdiv;
static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx) { return (int64_t(eax) << ecx) / ebx; }
if (d == lastd || dptr != &sdiv)
goto skip;
sdiv = libdivide::libdivide_s32_gen((lastd = d));
skip:
return libdivide::libdivide_s32_do(n, dptr);
}
extern uint32_t divideu32_noinline(uint32_t n, uint32_t d);
extern uint64_t divideu64_noinline(uint64_t n, uint64_t d);
extern int32_t tabledivide32_noinline(int32_t n, int32_t d);
static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx) { return dw(tabledivide64(qw(eax) << by(ecx), ebx)); }
static inline int64_t divscale64(int64_t eax, int64_t ebx, int64_t ecx) { return tabledivide64(eax << ecx, ebx); }
static inline int64_t divscale64(int64_t eax, int64_t ebx, int64_t ecx) { return (eax << ecx) / ebx; }
#define EDUKE32_SCALER_PRAGMA(a) \
static FORCE_INLINE int32_t divscale##a(int32_t eax, int32_t ebx) { return divscale(eax, ebx, a); }
@ -106,17 +62,7 @@ EDUKE32_GENERATE_PRAGMAS EDUKE32_SCALER_PRAGMA(32)
static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx)
{
return dw(tabledivide64(qw(eax) * edx, ecx));
}
static FORCE_INLINE int32_t scaleadd(int32_t eax, int32_t edx, int32_t addend, int32_t ecx)
{
return dw(tabledivide64(qw(eax) * edx + addend, ecx));
}
static inline int32_t roundscale(int32_t eax, int32_t edx, int32_t ecx)
{
return scaleadd(eax, edx, ecx / 2, ecx);
return int64_t(eax) * edx / ecx;
}

View file

@ -135,7 +135,6 @@ static void getclosestpointonwall_internal(vec2_t const p, int32_t const dawall,
}
i = ((i << 15) / j) << 15;
//i = tabledivide64((i << 15), j) << 15;
*closest = { (int32_t)(w.x + ((d.x * i) >> 30)), (int32_t)(w.y + ((d.y * i) >> 30)) };
}
@ -906,7 +905,7 @@ int32_t lintersect(const int32_t originX, const int32_t originY, const int32_t o
t = rayDotLineEndDiff;
}
t = tabledivide64(t << 24L, rayLengthSquared);
t = (t << 24) / rayLengthSquared;
*intersectionX = originX + mulscale24(ray.x, t);
*intersectionY = originY + mulscale24(ray.y, t);
@ -934,7 +933,7 @@ int32_t lintersect(const int32_t originX, const int32_t originY, const int32_t o
return 0;
}
int64_t t = tabledivide64(((int64_t) originDiffCrossLineVec) << 24L, rayCrossLineVec);
int64_t t = (int64_t(originDiffCrossLineVec) << 24) / rayCrossLineVec;
// For sake of completeness/readability, alternative to the above approach for an early out & avoidance of an extra division:
*intersectionX = originX + mulscale24(ray.x, t);
@ -2386,11 +2385,11 @@ void renderDrawMapView(int32_t dax, int32_t day, int32_t zoome, int16_t ang)
//relative alignment stuff
ox = v2.x-v1.x; oy = v2.y-v1.y;
i = ox*ox+oy*oy; if (i == 0) continue; i = tabledivide32_noinline(65536*16384, i);
i = ox*ox+oy*oy; if (i == 0) continue; i = 65536*16384 / i;
globalx1 = mulscale10(dmulscale10(ox,bakgvect.x,oy,bakgvect.y),i);
globaly1 = mulscale10(dmulscale10(ox,bakgvect.y,-oy,bakgvect.x),i);
ox = v1.y-v4.y; oy = v4.x-v1.x;
i = ox*ox+oy*oy; if (i == 0) continue; i = tabledivide32_noinline(65536*16384, i);
i = ox*ox+oy*oy; if (i == 0) continue; i = 65536 * 16384 / i;
globalx2 = mulscale10(dmulscale10(ox,bakgvect.x,oy,bakgvect.y),i);
globaly2 = mulscale10(dmulscale10(ox,bakgvect.y,-oy,bakgvect.x),i);

View file

@ -989,7 +989,7 @@ static void polymost_internal_nonparallaxed(vec2f_t n0, vec2f_t n1, float ryp0,
}
else
{
int i = nsqrtasm(uhypsq(xy.x,xy.y)); if (i == 0) i = 1024; else i = tabledivide32(1048576, i);
int i = nsqrtasm(uhypsq(xy.x,xy.y)); if (i == 0) i = 1024; else i = 1048576 / i;
r = i * (1.f/1048576.f);
}
@ -1207,7 +1207,7 @@ static inline int polymost_getclosestpointonwall(vec2_t const * const pos, int32
if (i > j)
return 1;
i = tabledivide64((i << 15), j) << 15;
i = ((i << 15) / j) << 15;
n->x = w.x + ((d.x * i) >> 30);
n->y = w.y + ((d.y * i) >> 30);

View file

@ -22,6 +22,3 @@ void initdivtables(void)
}
}
uint32_t divideu32_noinline(uint32_t n, uint32_t d) { return divideu32(n, d); }
uint64_t divideu64_noinline(uint64_t n, uint64_t d) { return divideu64(n, d); }
int32_t tabledivide32_noinline(int32_t n, int32_t d) { return tabledivide32(n, d); }

View file

@ -9,6 +9,7 @@
#include "scriptfile.h"
#include "baselayer.h"
#include "compat.h"
#include "common.h"
#include "filesystem.h"

View file

@ -44,7 +44,7 @@ ATTRIBUTE((flatten)) void timerUpdateClock(void)
uint64_t numerator = (elapsedTime.count() * (uint64_t) timerticspersec * steady_clock::period::num);
uint64_t freq = timerGetFreqU64();
int n = tabledivide64(numerator, freq);
int n = numerator / freq;
if (n <= 0) return;

View file

@ -488,76 +488,6 @@ static int32_t readspecdata(const dataspec_t *spec, FileReader *fil, uint8_t **d
#define VAL(bits,p) (*(UINT(bits) const *)(p))
#define WVAL(bits,p) (*(UINT(bits) *)(p))
static void docmpsd(const void *ptr, void *dump, uint32_t size, uint32_t cnt, uint8_t **diffvar)
{
uint8_t *retdiff = *diffvar;
// Hail to the C preprocessor, baby!
#define CPSINGLEVAL(Datbits) \
if (VAL(Datbits, ptr) != VAL(Datbits, dump)) \
{ \
WVAL(Datbits, retdiff) = WVAL(Datbits, dump) = VAL(Datbits, ptr); \
*diffvar = retdiff + BYTES(Datbits); \
}
if (cnt == 1)
switch (size)
{
case 8: CPSINGLEVAL(64); return;
case 4: CPSINGLEVAL(32); return;
case 2: CPSINGLEVAL(16); return;
case 1: CPSINGLEVAL(8); return;
}
#define CPELTS(Idxbits, Datbits) \
do \
{ \
for (int i = 0; i < nelts; i++) \
{ \
if (*p != *op) \
{ \
*op = *p; \
WVAL(Idxbits, retdiff) = i; \
retdiff += BYTES(Idxbits); \
WVAL(Datbits, retdiff) = *p; \
retdiff += BYTES(Datbits); \
} \
p++; \
op++; \
} \
WVAL(Idxbits, retdiff) = -1; \
retdiff += BYTES(Idxbits); \
} while (0)
#define CPDATA(Datbits) \
do \
{ \
auto p = (UINT(Datbits) const *)ptr; \
auto op = (UINT(Datbits) *)dump; \
int nelts = tabledivide32_noinline(size * cnt, BYTES(Datbits)); \
if (nelts > 65536) \
CPELTS(32, Datbits); \
else if (nelts > 256) \
CPELTS(16, Datbits); \
else \
CPELTS(8, Datbits); \
} while (0)
if (size == 8)
CPDATA(64);
else if ((size & 3) == 0)
CPDATA(32);
else if ((size & 1) == 0)
CPDATA(16);
else
CPDATA(8);
*diffvar = retdiff;
#undef CPELTS
#undef CPSINGLEVAL
#undef CPDATA
}
// get the number of elements to be monitored for changes
static int32_t getnumvar(const dataspec_t *spec)