- removed integer square root code.

On modern systems using the sqrt function with a cast to int has no relevant performance disadvantage anymore so there's no need for all of this.
This commit is contained in:
Christoph Oelckers 2021-03-14 23:38:39 +01:00
parent 7c68261fbf
commit a484e39e05
5 changed files with 21 additions and 193 deletions

View file

@ -1511,6 +1511,8 @@ source_group("Core\\2D" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/2d
source_group("Core\\Console" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/console/.+")
source_group("Core\\DObject" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/dobject/.+")
source_group("Core\\Menu" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/menu/.+")
source_group("Core\\Rendering" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/rendering/.+")
source_group("Core\\Rendering\\Scene" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/rendering/scene/.+")
source_group("Rendering" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/glbackend/.+")
source_group("Platform" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/platform/.+")
source_group("Platform\\Win32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/platform/win32/.+")

View file

@ -513,7 +513,11 @@ static FORCE_INLINE int32_t krand(void)
int32_t krand(void);
#endif
int32_t ksqrt(uint32_t num);
inline int32_t ksqrt(uint32_t num)
{
return int(sqrt((float)num));
}
int32_t getangle(int32_t xvect, int32_t yvect);
fixed_t gethiq16angle(int32_t xvect, int32_t yvect);

View file

@ -175,34 +175,6 @@ static FORCE_INLINE void clipmove_tweak_pos(const vec3_t *pos, int32_t gx, int32
}
}
int32_t getceilzofslope_old(int32_t sectnum, int32_t dax, int32_t day)
{
int32_t dx, dy, i, j;
if (!(sector[sectnum].ceilingstat&2)) return sector[sectnum].ceilingz;
j = sector[sectnum].wallptr;
dx = wall[wall[j].point2].x-wall[j].x;
dy = wall[wall[j].point2].y-wall[j].y;
i = (ksqrtasm_old(dx*dx+dy*dy)); if (i == 0) return(sector[sectnum].ceilingz);
i = DivScale(sector[sectnum].ceilingheinum,i, 15);
dx *= i; dy *= i;
return sector[sectnum].ceilingz+DMulScale(dx,day-wall[j].y,-dy,dax-wall[j].x, 23);
}
int32_t getflorzofslope_old(int32_t sectnum, int32_t dax, int32_t day)
{
int32_t dx, dy, i, j;
if (!(sector[sectnum].floorstat&2)) return sector[sectnum].floorz;
j = sector[sectnum].wallptr;
dx = wall[wall[j].point2].x-wall[j].x;
dy = wall[wall[j].point2].y-wall[j].y;
i = (ksqrtasm_old(dx*dx+dy*dy)); if (i == 0) return sector[sectnum].floorz;
i = DivScale(sector[sectnum].floorheinum,i, 15);
dx *= i; dy *= i;
return sector[sectnum].floorz+DMulScale(dx,day-wall[j].y,-dy,dax-wall[j].x, 23);
}
// Returns: should clip?
static int cliptestsector(int const dasect, int const nextsect, int32_t const flordist, int32_t const ceildist, vec2_t const pos, int32_t const posz)
{
@ -214,20 +186,6 @@ static int cliptestsector(int const dasect, int const nextsect, int32_t const fl
{
case ENGINECOMPATIBILITY_NONE:
break;
case ENGINECOMPATIBILITY_19950829:
{
int32_t daz = getflorzofslope_old(dasect, pos.x, pos.y);
int32_t daz2 = getflorzofslope_old(nextsect, pos.x, pos.y);
if (daz2 < daz && (sec2->floorstat&1) == 0)
if (posz >= daz2-(flordist-1)) return 1;
daz = getceilzofslope_old(dasect, pos.x, pos.y);
daz2 = getceilzofslope_old(nextsect, pos.x, pos.y);
if (daz2 > daz && (sec2->ceilingstat&1) == 0)
if (posz <= daz2+(ceildist-1)) return 1;
return 0;
}
default:
{
int32_t daz = getflorzofslope(dasect, pos.x, pos.y);
@ -491,7 +449,7 @@ int32_t clipmove(vec3_t * const pos, int16_t * const sectnum, int32_t xvect, int
//Extra walldist for sprites on sector lines
vec2_t const diff = { goal.x - (pos->x), goal.y - (pos->y) };
int32_t const rad = clip_nsqrtasm(compat_maybe_truncate_to_int32(uhypsq(diff.x, diff.y))) + MAXCLIPDIST + walldist + 8;
int32_t const rad = ksqrt(compat_maybe_truncate_to_int32(uhypsq(diff.x, diff.y))) + MAXCLIPDIST + walldist + 8;
vec2_t const clipMin = { cent.x - rad, cent.y - rad };
vec2_t const clipMax = { cent.x + rad, cent.y + rad };
@ -989,11 +947,6 @@ void getzrange(const vec3_t *pos, int16_t sectnum,
vec2_t closest = pos->vec2;
if (enginecompatibility_mode == ENGINECOMPATIBILITY_NONE)
getsectordist(closest, sectnum, &closest);
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
{
*ceilz = getceilzofslope_old(sectnum,closest.x,closest.y);
*florz = getflorzofslope_old(sectnum,closest.x,closest.y);
}
else
getzsofslope(sectnum,closest.x,closest.y,ceilz,florz);
*ceilhit = sectnum+16384; *florhit = sectnum+16384;
@ -1061,11 +1014,6 @@ void getzrange(const vec3_t *pos, int16_t sectnum,
closest = pos->vec2;
if (enginecompatibility_mode == ENGINECOMPATIBILITY_NONE)
getsectordist(closest, k, &closest);
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
{
daz = getceilzofslope_old(k, closest.x,closest.y);
daz2 = getflorzofslope_old(k, closest.x,closest.y);
}
else
getzsofslope(k, closest.x,closest.y, &daz,&daz2);
@ -1240,7 +1188,7 @@ static int32_t hitscan_trysector(const vec3_t *sv, usectorptr_t sec, hitdata_t *
auto const wal2 = (uwallptr_t)&wall[wal->point2];
int32_t j, dax=wal2->x-wal->x, day=wal2->y-wal->y;
i = nsqrtasm(compat_maybe_truncate_to_int32(uhypsq(dax,day))); if (i == 0) return 1; //continue;
i = ksqrt(compat_maybe_truncate_to_int32(uhypsq(dax,day))); if (i == 0) return 1; //continue;
i = DivScale(heinum,i, 15);
dax *= i; day *= i;

View file

@ -80,8 +80,6 @@ static int32_t no_radarang2 = 0;
static int16_t radarang[1280];
static int32_t qradarang[10240];
uint16_t ATTRIBUTE((used)) sqrtable[4096], ATTRIBUTE((used)) shlookup[4096+256], ATTRIBUTE((used)) sqrtable_old[2048];
const char *engineerrstr = "No error";
int32_t showfirstwall=0;
@ -312,70 +310,6 @@ static void renderDrawMaskedWall(int16_t damaskwallcnt)
}
static uint32_t msqrtasm(uint32_t c)
{
uint32_t a = 0x40000000l, b = 0x20000000l;
do
{
if (c >= a)
{
c -= a;
a += b*4;
}
a -= b;
a >>= 1;
b >>= 2;
} while (b);
if (c >= a)
a++;
return a >> 1;
}
//
// initksqrt (internal)
//
static inline void initksqrt(void)
{
int32_t i, j, k;
uint32_t root, num;
int32_t temp;
j = 1; k = 0;
for (i=0; i<4096; i++)
{
if (i >= j) { j <<= 2; k++; }
sqrtable[i] = (uint16_t)(msqrtasm((i<<18)+131072)<<1);
shlookup[i] = (k<<1)+((10-k)<<8);
if (i < 256) shlookup[i+4096] = ((k+6)<<1)+((10-(k+6))<<8);
}
for(i=0;i<2048;i++)
{
root = 128;
num = i<<20;
do
{
temp = root;
root = (root+num/root)>>1;
} while((temp-root+1) > 2);
temp = root*root-num;
while (abs(int32_t(temp-2*root+1)) < abs(temp))
{
temp += 1-int(2*root);
root--;
}
while (abs(int32_t(temp+2*root+1)) < abs(temp))
{
temp += 2*root+1;
root++;
}
sqrtable_old[i] = root;
}
}
static int32_t engineLoadTables(void)
{
static char tablesloaded = 0;
@ -384,8 +318,6 @@ static int32_t engineLoadTables(void)
{
int32_t i;
initksqrt();
for (i=0; i<2048; i++)
reciptable[i] = DivScale(2048, i+2048, 30);
@ -1648,7 +1580,7 @@ void renderDrawMapView(int32_t dax, int32_t day, int32_t zoome, int16_t ang)
{
ox = wall[wall[startwall].point2].x - wall[startwall].x;
oy = wall[wall[startwall].point2].y - wall[startwall].y;
i = nsqrtasm(uhypsq(ox,oy)); if (i == 0) continue;
i = ksqrt(uhypsq(ox,oy)); if (i == 0) continue;
i = 1048576/i;
globalx1 = MulScale(DMulScale(ox,bakgvect.x,oy,bakgvect.y, 10),i, 10);
globaly1 = MulScale(DMulScale(ox,bakgvect.y,-oy,bakgvect.x, 10),i, 10);
@ -1659,7 +1591,7 @@ void renderDrawMapView(int32_t dax, int32_t day, int32_t zoome, int16_t ang)
globaly2 = -globaly1;
int32_t const daslope = sector[s].floorheinum;
i = nsqrtasm(daslope*daslope+16777216);
i = ksqrt(daslope*daslope+16777216);
set_globalpos(globalposx, MulScale(globalposy,i, 12), globalposz);
globalx2 = MulScale(globalx2,i, 12);
globaly2 = MulScale(globaly2,i, 12);
@ -2049,16 +1981,6 @@ fixed_t gethiq16angle(int32_t xvect, int32_t yvect)
return rv;
}
//
// ksqrt
//
int32_t ksqrt(uint32_t num)
{
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
return ksqrtasm_old(num);
return nsqrtasm(num);
}
// Gets the BUILD unit height and z offset of a sprite.
// Returns the z offset, 'height' may be NULL.
int32_t spriteheightofsptr(uspriteptr_t spr, int32_t *height, int32_t alsotileyofs)
@ -2962,7 +2884,7 @@ int32_t getceilzofslopeptr(usectorptr_t sec, int32_t dax, int32_t day)
vec2_t const w = *(vec2_t const *)wal;
vec2_t const d = { wal2->x - w.x, wal2->y - w.y };
int const i = nsqrtasm(uhypsq(d.x,d.y))<<5;
int const i = ksqrt(uhypsq(d.x,d.y))<<5;
if (i == 0) return sec->ceilingz;
int const j = DMulScale(d.x, day-w.y, -d.y, dax-w.x, 3);
@ -2981,7 +2903,7 @@ int32_t getflorzofslopeptr(usectorptr_t sec, int32_t dax, int32_t day)
vec2_t const w = *(vec2_t const *)wal;
vec2_t const d = { wal2->x - w.x, wal2->y - w.y };
int const i = nsqrtasm(uhypsq(d.x,d.y))<<5;
int const i = ksqrt(uhypsq(d.x,d.y))<<5;
if (i == 0) return sec->floorz;
int const j = DMulScale(d.x, day-w.y, -d.y, dax-w.x, 3);
@ -3001,7 +2923,7 @@ void getzsofslopeptr(usectorptr_t sec, int32_t dax, int32_t day, int32_t *ceilz,
vec2_t const d = { wal2->x - wal->x, wal2->y - wal->y };
int const i = nsqrtasm(uhypsq(d.x,d.y))<<5;
int const i = ksqrt(uhypsq(d.x,d.y))<<5;
if (i == 0) return;
int const j = DMulScale(d.x,day-wal->y, -d.y,dax-wal->x, 3);
@ -3026,7 +2948,7 @@ void alignceilslope(int16_t dasect, int32_t x, int32_t y, int32_t z)
return;
sector[dasect].ceilingheinum = Scale((z-sector[dasect].ceilingz)<<8,
nsqrtasm(uhypsq(dax,day)), i);
ksqrt(uhypsq(dax,day)), i);
if (sector[dasect].ceilingheinum == 0)
sector[dasect].ceilingstat &= ~2;
else sector[dasect].ceilingstat |= 2;
@ -3047,7 +2969,7 @@ void alignflorslope(int16_t dasect, int32_t x, int32_t y, int32_t z)
return;
sector[dasect].floorheinum = Scale((z-sector[dasect].floorz)<<8,
nsqrtasm(uhypsq(dax,day)), i);
ksqrt(uhypsq(dax,day)), i);
if (sector[dasect].floorheinum == 0)
sector[dasect].floorstat &= ~2;
else sector[dasect].floorstat |= 2;

View file

@ -23,63 +23,15 @@
extern int32_t globalx1, globaly2;
extern uint16_t sqrtable[4096], shlookup[4096+256],sqrtable_old[2048];
static inline int32_t nsqrtasm(uint32_t a)
{
// JBF 20030901: This was a damn lot simpler to reverse engineer than
// msqrtasm was. Really, it was just like simplifying an algebra equation.
uint16_t c;
if (a & 0xff000000) // test eax, 0xff000000 / jnz short over24
{
c = shlookup[(a >> 24) + 4096]; // mov ebx, eax
// over24: shr ebx, 24
// mov cx, word ptr shlookup[ebx*2+8192]
}
else
{
c = shlookup[a >> 12]; // mov ebx, eax
// shr ebx, 12
// mov cx, word ptr shlookup[ebx*2]
// jmp short under24
}
a >>= c&0xff; // under24: shr eax, cl
a = (a&0xffff0000)|(sqrtable[a]); // mov ax, word ptr sqrtable[eax*2]
a >>= ((c&0xff00) >> 8); // mov cl, ch
// shr eax, cl
return a;
}
static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
// Ken did this
d = ((a<0)<<3) + ((b<0)<<2) + ((c<0)<<1) + (d<0);
return (((d<<4)^0xf0)|d);
}
inline int32_t ksqrtasm_old(int32_t n)
{
uint32_t shift = 0;
n = abs((int32_t)n);
while (n >= 2048)
{
n >>= 2;
++shift;
}
uint32_t const s = sqrtable_old[n];
return (s << shift) >> 10;
}
inline int32_t clip_nsqrtasm(int32_t n)
{
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
return ksqrtasm_old(n);
return nsqrtasm(n);
}
extern int16_t thesector[MAXWALLSB], thewall[MAXWALLSB];
extern int16_t bunchfirst[MAXWALLSB], bunchlast[MAXWALLSB];