From 424716bb880a41c48e530b1c2aa35eff990cecd4 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Tue, 14 Jul 2020 20:21:16 +0200 Subject: [PATCH] - say goodbye to libdivide! --- source/blood/src/view.cpp | 2 +- source/build/include/pragmas.h | 62 ++---------------------- source/build/src/engine.cpp | 9 ++-- source/build/src/polymost.cpp | 4 +- source/build/src/pragmas.cpp | 3 -- source/build/src/scriptfile.cpp | 1 + source/build/src/timer.cpp | 2 +- source/games/duke/src/zz_savegame.cpp | 70 --------------------------- 8 files changed, 13 insertions(+), 140 deletions(-) diff --git a/source/blood/src/view.cpp b/source/blood/src/view.cpp index 165b6859e..af4a09b26 100644 --- a/source/blood/src/view.cpp +++ b/source/blood/src/view.cpp @@ -3606,7 +3606,7 @@ void viewLoadingScreenWide(void) } else { - int width = roundscale(xdim, 240, ydim); + int width = scale(xdim, 240, ydim); int nCount = (width+kLoadScreenWideBackWidth-1)/kLoadScreenWideBackWidth; for (int i = 0; i < nCount; i++) { diff --git a/source/build/include/pragmas.h b/source/build/include/pragmas.h index cc89dad71..88b716f8f 100644 --- a/source/build/include/pragmas.h +++ b/source/build/include/pragmas.h @@ -6,6 +6,7 @@ // by Jonathon Fowler (jf@jonof.id.au) // by the EDuke32 team (development@voidpoint.com) +#include "templates.h" #ifndef pragmas_h_ #define pragmas_h_ @@ -36,32 +37,6 @@ extern libdivide::libdivide_s64_t divtable64[DIVTABLESIZE]; extern libdivide::libdivide_s32_t divtable32[DIVTABLESIZE]; extern void initdivtables(void); -static inline uint32_t divideu32(uint32_t const n, uint32_t const d) -{ - static libdivide::libdivide_u32_t udiv; - static uint32_t lastd; - - if (d == lastd) - goto skip; - - udiv = libdivide::libdivide_u32_gen((lastd = d)); -skip: - return libdivide::libdivide_u32_do(n, &udiv); -} - -static inline uint64_t divideu64(uint64_t const n, uint64_t const d) -{ - static libdivide::libdivide_u64_t udiv; - static uint64_t lastd; - - if (d == lastd) - goto skip; - - udiv = libdivide::libdivide_u64_gen((lastd = d)); -skip: - return libdivide::libdivide_u64_do(n, &udiv); -} - static inline int64_t tabledivide64(int64_t const n, int64_t const d) { static libdivide::libdivide_s64_t sdiv; @@ -76,28 +51,9 @@ skip: return libdivide::libdivide_s64_do(n, dptr); } -static inline int32_t tabledivide32(int32_t const n, int32_t const d) -{ - static libdivide::libdivide_s32_t sdiv; - static int32_t lastd; - auto const dptr = ((uint32_t)d < DIVTABLESIZE) ? &divtable32[d] : &sdiv; +static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx) { return (int64_t(eax) << ecx) / ebx; } - if (d == lastd || dptr != &sdiv) - goto skip; - - sdiv = libdivide::libdivide_s32_gen((lastd = d)); -skip: - return libdivide::libdivide_s32_do(n, dptr); -} - -extern uint32_t divideu32_noinline(uint32_t n, uint32_t d); -extern uint64_t divideu64_noinline(uint64_t n, uint64_t d); -extern int32_t tabledivide32_noinline(int32_t n, int32_t d); - - -static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx) { return dw(tabledivide64(qw(eax) << by(ecx), ebx)); } - -static inline int64_t divscale64(int64_t eax, int64_t ebx, int64_t ecx) { return tabledivide64(eax << ecx, ebx); } +static inline int64_t divscale64(int64_t eax, int64_t ebx, int64_t ecx) { return (eax << ecx) / ebx; } #define EDUKE32_SCALER_PRAGMA(a) \ static FORCE_INLINE int32_t divscale##a(int32_t eax, int32_t ebx) { return divscale(eax, ebx, a); } @@ -106,17 +62,7 @@ EDUKE32_GENERATE_PRAGMAS EDUKE32_SCALER_PRAGMA(32) static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { - return dw(tabledivide64(qw(eax) * edx, ecx)); -} - -static FORCE_INLINE int32_t scaleadd(int32_t eax, int32_t edx, int32_t addend, int32_t ecx) -{ - return dw(tabledivide64(qw(eax) * edx + addend, ecx)); -} - -static inline int32_t roundscale(int32_t eax, int32_t edx, int32_t ecx) -{ - return scaleadd(eax, edx, ecx / 2, ecx); + return int64_t(eax) * edx / ecx; } diff --git a/source/build/src/engine.cpp b/source/build/src/engine.cpp index 38d3f10b1..a47434be6 100644 --- a/source/build/src/engine.cpp +++ b/source/build/src/engine.cpp @@ -135,7 +135,6 @@ static void getclosestpointonwall_internal(vec2_t const p, int32_t const dawall, } i = ((i << 15) / j) << 15; - //i = tabledivide64((i << 15), j) << 15; *closest = { (int32_t)(w.x + ((d.x * i) >> 30)), (int32_t)(w.y + ((d.y * i) >> 30)) }; } @@ -906,7 +905,7 @@ int32_t lintersect(const int32_t originX, const int32_t originY, const int32_t o t = rayDotLineEndDiff; } - t = tabledivide64(t << 24L, rayLengthSquared); + t = (t << 24) / rayLengthSquared; *intersectionX = originX + mulscale24(ray.x, t); *intersectionY = originY + mulscale24(ray.y, t); @@ -934,7 +933,7 @@ int32_t lintersect(const int32_t originX, const int32_t originY, const int32_t o return 0; } - int64_t t = tabledivide64(((int64_t) originDiffCrossLineVec) << 24L, rayCrossLineVec); + int64_t t = (int64_t(originDiffCrossLineVec) << 24) / rayCrossLineVec; // For sake of completeness/readability, alternative to the above approach for an early out & avoidance of an extra division: *intersectionX = originX + mulscale24(ray.x, t); @@ -2386,11 +2385,11 @@ void renderDrawMapView(int32_t dax, int32_t day, int32_t zoome, int16_t ang) //relative alignment stuff ox = v2.x-v1.x; oy = v2.y-v1.y; - i = ox*ox+oy*oy; if (i == 0) continue; i = tabledivide32_noinline(65536*16384, i); + i = ox*ox+oy*oy; if (i == 0) continue; i = 65536*16384 / i; globalx1 = mulscale10(dmulscale10(ox,bakgvect.x,oy,bakgvect.y),i); globaly1 = mulscale10(dmulscale10(ox,bakgvect.y,-oy,bakgvect.x),i); ox = v1.y-v4.y; oy = v4.x-v1.x; - i = ox*ox+oy*oy; if (i == 0) continue; i = tabledivide32_noinline(65536*16384, i); + i = ox*ox+oy*oy; if (i == 0) continue; i = 65536 * 16384 / i; globalx2 = mulscale10(dmulscale10(ox,bakgvect.x,oy,bakgvect.y),i); globaly2 = mulscale10(dmulscale10(ox,bakgvect.y,-oy,bakgvect.x),i); diff --git a/source/build/src/polymost.cpp b/source/build/src/polymost.cpp index 9d27ff38b..723572ff5 100644 --- a/source/build/src/polymost.cpp +++ b/source/build/src/polymost.cpp @@ -989,7 +989,7 @@ static void polymost_internal_nonparallaxed(vec2f_t n0, vec2f_t n1, float ryp0, } else { - int i = nsqrtasm(uhypsq(xy.x,xy.y)); if (i == 0) i = 1024; else i = tabledivide32(1048576, i); + int i = nsqrtasm(uhypsq(xy.x,xy.y)); if (i == 0) i = 1024; else i = 1048576 / i; r = i * (1.f/1048576.f); } @@ -1207,7 +1207,7 @@ static inline int polymost_getclosestpointonwall(vec2_t const * const pos, int32 if (i > j) return 1; - i = tabledivide64((i << 15), j) << 15; + i = ((i << 15) / j) << 15; n->x = w.x + ((d.x * i) >> 30); n->y = w.y + ((d.y * i) >> 30); diff --git a/source/build/src/pragmas.cpp b/source/build/src/pragmas.cpp index 9685796a0..6d199c47c 100644 --- a/source/build/src/pragmas.cpp +++ b/source/build/src/pragmas.cpp @@ -22,6 +22,3 @@ void initdivtables(void) } } -uint32_t divideu32_noinline(uint32_t n, uint32_t d) { return divideu32(n, d); } -uint64_t divideu64_noinline(uint64_t n, uint64_t d) { return divideu64(n, d); } -int32_t tabledivide32_noinline(int32_t n, int32_t d) { return tabledivide32(n, d); } diff --git a/source/build/src/scriptfile.cpp b/source/build/src/scriptfile.cpp index 9ccc94fe2..245e9eb12 100644 --- a/source/build/src/scriptfile.cpp +++ b/source/build/src/scriptfile.cpp @@ -9,6 +9,7 @@ #include "scriptfile.h" #include "baselayer.h" #include "compat.h" +#include "common.h" #include "filesystem.h" diff --git a/source/build/src/timer.cpp b/source/build/src/timer.cpp index 5cba8cd21..a641f0821 100644 --- a/source/build/src/timer.cpp +++ b/source/build/src/timer.cpp @@ -44,7 +44,7 @@ ATTRIBUTE((flatten)) void timerUpdateClock(void) uint64_t numerator = (elapsedTime.count() * (uint64_t) timerticspersec * steady_clock::period::num); uint64_t freq = timerGetFreqU64(); - int n = tabledivide64(numerator, freq); + int n = numerator / freq; if (n <= 0) return; diff --git a/source/games/duke/src/zz_savegame.cpp b/source/games/duke/src/zz_savegame.cpp index ab5109d99..406a9a2e4 100644 --- a/source/games/duke/src/zz_savegame.cpp +++ b/source/games/duke/src/zz_savegame.cpp @@ -488,76 +488,6 @@ static int32_t readspecdata(const dataspec_t *spec, FileReader *fil, uint8_t **d #define VAL(bits,p) (*(UINT(bits) const *)(p)) #define WVAL(bits,p) (*(UINT(bits) *)(p)) -static void docmpsd(const void *ptr, void *dump, uint32_t size, uint32_t cnt, uint8_t **diffvar) -{ - uint8_t *retdiff = *diffvar; - - // Hail to the C preprocessor, baby! -#define CPSINGLEVAL(Datbits) \ - if (VAL(Datbits, ptr) != VAL(Datbits, dump)) \ - { \ - WVAL(Datbits, retdiff) = WVAL(Datbits, dump) = VAL(Datbits, ptr); \ - *diffvar = retdiff + BYTES(Datbits); \ - } - - if (cnt == 1) - switch (size) - { - case 8: CPSINGLEVAL(64); return; - case 4: CPSINGLEVAL(32); return; - case 2: CPSINGLEVAL(16); return; - case 1: CPSINGLEVAL(8); return; - } - -#define CPELTS(Idxbits, Datbits) \ - do \ - { \ - for (int i = 0; i < nelts; i++) \ - { \ - if (*p != *op) \ - { \ - *op = *p; \ - WVAL(Idxbits, retdiff) = i; \ - retdiff += BYTES(Idxbits); \ - WVAL(Datbits, retdiff) = *p; \ - retdiff += BYTES(Datbits); \ - } \ - p++; \ - op++; \ - } \ - WVAL(Idxbits, retdiff) = -1; \ - retdiff += BYTES(Idxbits); \ - } while (0) - -#define CPDATA(Datbits) \ - do \ - { \ - auto p = (UINT(Datbits) const *)ptr; \ - auto op = (UINT(Datbits) *)dump; \ - int nelts = tabledivide32_noinline(size * cnt, BYTES(Datbits)); \ - if (nelts > 65536) \ - CPELTS(32, Datbits); \ - else if (nelts > 256) \ - CPELTS(16, Datbits); \ - else \ - CPELTS(8, Datbits); \ - } while (0) - - if (size == 8) - CPDATA(64); - else if ((size & 3) == 0) - CPDATA(32); - else if ((size & 1) == 0) - CPDATA(16); - else - CPDATA(8); - - *diffvar = retdiff; - -#undef CPELTS -#undef CPSINGLEVAL -#undef CPDATA -} // get the number of elements to be monitored for changes static int32_t getnumvar(const dataspec_t *spec)