mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-01-15 20:20:54 +00:00
- removed integer square root code.
On modern systems using the sqrt function with a cast to int has no relevant performance disadvantage anymore so there's no need for all of this.
This commit is contained in:
parent
7c68261fbf
commit
a484e39e05
5 changed files with 21 additions and 193 deletions
|
@ -1511,6 +1511,8 @@ source_group("Core\\2D" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/2d
|
|||
source_group("Core\\Console" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/console/.+")
|
||||
source_group("Core\\DObject" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/dobject/.+")
|
||||
source_group("Core\\Menu" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/menu/.+")
|
||||
source_group("Core\\Rendering" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/rendering/.+")
|
||||
source_group("Core\\Rendering\\Scene" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/core/rendering/scene/.+")
|
||||
source_group("Rendering" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/glbackend/.+")
|
||||
source_group("Platform" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/platform/.+")
|
||||
source_group("Platform\\Win32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/platform/win32/.+")
|
||||
|
|
|
@ -513,7 +513,11 @@ static FORCE_INLINE int32_t krand(void)
|
|||
int32_t krand(void);
|
||||
#endif
|
||||
|
||||
int32_t ksqrt(uint32_t num);
|
||||
inline int32_t ksqrt(uint32_t num)
|
||||
{
|
||||
return int(sqrt((float)num));
|
||||
}
|
||||
|
||||
int32_t getangle(int32_t xvect, int32_t yvect);
|
||||
fixed_t gethiq16angle(int32_t xvect, int32_t yvect);
|
||||
|
||||
|
|
|
@ -175,34 +175,6 @@ static FORCE_INLINE void clipmove_tweak_pos(const vec3_t *pos, int32_t gx, int32
|
|||
}
|
||||
}
|
||||
|
||||
int32_t getceilzofslope_old(int32_t sectnum, int32_t dax, int32_t day)
|
||||
{
|
||||
int32_t dx, dy, i, j;
|
||||
|
||||
if (!(sector[sectnum].ceilingstat&2)) return sector[sectnum].ceilingz;
|
||||
j = sector[sectnum].wallptr;
|
||||
dx = wall[wall[j].point2].x-wall[j].x;
|
||||
dy = wall[wall[j].point2].y-wall[j].y;
|
||||
i = (ksqrtasm_old(dx*dx+dy*dy)); if (i == 0) return(sector[sectnum].ceilingz);
|
||||
i = DivScale(sector[sectnum].ceilingheinum,i, 15);
|
||||
dx *= i; dy *= i;
|
||||
return sector[sectnum].ceilingz+DMulScale(dx,day-wall[j].y,-dy,dax-wall[j].x, 23);
|
||||
}
|
||||
|
||||
int32_t getflorzofslope_old(int32_t sectnum, int32_t dax, int32_t day)
|
||||
{
|
||||
int32_t dx, dy, i, j;
|
||||
|
||||
if (!(sector[sectnum].floorstat&2)) return sector[sectnum].floorz;
|
||||
j = sector[sectnum].wallptr;
|
||||
dx = wall[wall[j].point2].x-wall[j].x;
|
||||
dy = wall[wall[j].point2].y-wall[j].y;
|
||||
i = (ksqrtasm_old(dx*dx+dy*dy)); if (i == 0) return sector[sectnum].floorz;
|
||||
i = DivScale(sector[sectnum].floorheinum,i, 15);
|
||||
dx *= i; dy *= i;
|
||||
return sector[sectnum].floorz+DMulScale(dx,day-wall[j].y,-dy,dax-wall[j].x, 23);
|
||||
}
|
||||
|
||||
// Returns: should clip?
|
||||
static int cliptestsector(int const dasect, int const nextsect, int32_t const flordist, int32_t const ceildist, vec2_t const pos, int32_t const posz)
|
||||
{
|
||||
|
@ -214,20 +186,6 @@ static int cliptestsector(int const dasect, int const nextsect, int32_t const fl
|
|||
{
|
||||
case ENGINECOMPATIBILITY_NONE:
|
||||
break;
|
||||
case ENGINECOMPATIBILITY_19950829:
|
||||
{
|
||||
int32_t daz = getflorzofslope_old(dasect, pos.x, pos.y);
|
||||
int32_t daz2 = getflorzofslope_old(nextsect, pos.x, pos.y);
|
||||
|
||||
if (daz2 < daz && (sec2->floorstat&1) == 0)
|
||||
if (posz >= daz2-(flordist-1)) return 1;
|
||||
daz = getceilzofslope_old(dasect, pos.x, pos.y);
|
||||
daz2 = getceilzofslope_old(nextsect, pos.x, pos.y);
|
||||
if (daz2 > daz && (sec2->ceilingstat&1) == 0)
|
||||
if (posz <= daz2+(ceildist-1)) return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
{
|
||||
int32_t daz = getflorzofslope(dasect, pos.x, pos.y);
|
||||
|
@ -491,7 +449,7 @@ int32_t clipmove(vec3_t * const pos, int16_t * const sectnum, int32_t xvect, int
|
|||
|
||||
//Extra walldist for sprites on sector lines
|
||||
vec2_t const diff = { goal.x - (pos->x), goal.y - (pos->y) };
|
||||
int32_t const rad = clip_nsqrtasm(compat_maybe_truncate_to_int32(uhypsq(diff.x, diff.y))) + MAXCLIPDIST + walldist + 8;
|
||||
int32_t const rad = ksqrt(compat_maybe_truncate_to_int32(uhypsq(diff.x, diff.y))) + MAXCLIPDIST + walldist + 8;
|
||||
vec2_t const clipMin = { cent.x - rad, cent.y - rad };
|
||||
vec2_t const clipMax = { cent.x + rad, cent.y + rad };
|
||||
|
||||
|
@ -989,11 +947,6 @@ void getzrange(const vec3_t *pos, int16_t sectnum,
|
|||
vec2_t closest = pos->vec2;
|
||||
if (enginecompatibility_mode == ENGINECOMPATIBILITY_NONE)
|
||||
getsectordist(closest, sectnum, &closest);
|
||||
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
|
||||
{
|
||||
*ceilz = getceilzofslope_old(sectnum,closest.x,closest.y);
|
||||
*florz = getflorzofslope_old(sectnum,closest.x,closest.y);
|
||||
}
|
||||
else
|
||||
getzsofslope(sectnum,closest.x,closest.y,ceilz,florz);
|
||||
*ceilhit = sectnum+16384; *florhit = sectnum+16384;
|
||||
|
@ -1061,11 +1014,6 @@ void getzrange(const vec3_t *pos, int16_t sectnum,
|
|||
closest = pos->vec2;
|
||||
if (enginecompatibility_mode == ENGINECOMPATIBILITY_NONE)
|
||||
getsectordist(closest, k, &closest);
|
||||
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
|
||||
{
|
||||
daz = getceilzofslope_old(k, closest.x,closest.y);
|
||||
daz2 = getflorzofslope_old(k, closest.x,closest.y);
|
||||
}
|
||||
else
|
||||
getzsofslope(k, closest.x,closest.y, &daz,&daz2);
|
||||
|
||||
|
@ -1240,7 +1188,7 @@ static int32_t hitscan_trysector(const vec3_t *sv, usectorptr_t sec, hitdata_t *
|
|||
auto const wal2 = (uwallptr_t)&wall[wal->point2];
|
||||
int32_t j, dax=wal2->x-wal->x, day=wal2->y-wal->y;
|
||||
|
||||
i = nsqrtasm(compat_maybe_truncate_to_int32(uhypsq(dax,day))); if (i == 0) return 1; //continue;
|
||||
i = ksqrt(compat_maybe_truncate_to_int32(uhypsq(dax,day))); if (i == 0) return 1; //continue;
|
||||
i = DivScale(heinum,i, 15);
|
||||
dax *= i; day *= i;
|
||||
|
||||
|
|
|
@ -80,8 +80,6 @@ static int32_t no_radarang2 = 0;
|
|||
static int16_t radarang[1280];
|
||||
static int32_t qradarang[10240];
|
||||
|
||||
uint16_t ATTRIBUTE((used)) sqrtable[4096], ATTRIBUTE((used)) shlookup[4096+256], ATTRIBUTE((used)) sqrtable_old[2048];
|
||||
|
||||
const char *engineerrstr = "No error";
|
||||
|
||||
int32_t showfirstwall=0;
|
||||
|
@ -312,70 +310,6 @@ static void renderDrawMaskedWall(int16_t damaskwallcnt)
|
|||
}
|
||||
|
||||
|
||||
static uint32_t msqrtasm(uint32_t c)
|
||||
{
|
||||
uint32_t a = 0x40000000l, b = 0x20000000l;
|
||||
|
||||
do
|
||||
{
|
||||
if (c >= a)
|
||||
{
|
||||
c -= a;
|
||||
a += b*4;
|
||||
}
|
||||
a -= b;
|
||||
a >>= 1;
|
||||
b >>= 2;
|
||||
} while (b);
|
||||
|
||||
if (c >= a)
|
||||
a++;
|
||||
|
||||
return a >> 1;
|
||||
}
|
||||
|
||||
//
|
||||
// initksqrt (internal)
|
||||
//
|
||||
static inline void initksqrt(void)
|
||||
{
|
||||
int32_t i, j, k;
|
||||
uint32_t root, num;
|
||||
int32_t temp;
|
||||
|
||||
j = 1; k = 0;
|
||||
for (i=0; i<4096; i++)
|
||||
{
|
||||
if (i >= j) { j <<= 2; k++; }
|
||||
sqrtable[i] = (uint16_t)(msqrtasm((i<<18)+131072)<<1);
|
||||
shlookup[i] = (k<<1)+((10-k)<<8);
|
||||
if (i < 256) shlookup[i+4096] = ((k+6)<<1)+((10-(k+6))<<8);
|
||||
}
|
||||
|
||||
for(i=0;i<2048;i++)
|
||||
{
|
||||
root = 128;
|
||||
num = i<<20;
|
||||
do
|
||||
{
|
||||
temp = root;
|
||||
root = (root+num/root)>>1;
|
||||
} while((temp-root+1) > 2);
|
||||
temp = root*root-num;
|
||||
while (abs(int32_t(temp-2*root+1)) < abs(temp))
|
||||
{
|
||||
temp += 1-int(2*root);
|
||||
root--;
|
||||
}
|
||||
while (abs(int32_t(temp+2*root+1)) < abs(temp))
|
||||
{
|
||||
temp += 2*root+1;
|
||||
root++;
|
||||
}
|
||||
sqrtable_old[i] = root;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t engineLoadTables(void)
|
||||
{
|
||||
static char tablesloaded = 0;
|
||||
|
@ -384,8 +318,6 @@ static int32_t engineLoadTables(void)
|
|||
{
|
||||
int32_t i;
|
||||
|
||||
initksqrt();
|
||||
|
||||
for (i=0; i<2048; i++)
|
||||
reciptable[i] = DivScale(2048, i+2048, 30);
|
||||
|
||||
|
@ -1648,7 +1580,7 @@ void renderDrawMapView(int32_t dax, int32_t day, int32_t zoome, int16_t ang)
|
|||
{
|
||||
ox = wall[wall[startwall].point2].x - wall[startwall].x;
|
||||
oy = wall[wall[startwall].point2].y - wall[startwall].y;
|
||||
i = nsqrtasm(uhypsq(ox,oy)); if (i == 0) continue;
|
||||
i = ksqrt(uhypsq(ox,oy)); if (i == 0) continue;
|
||||
i = 1048576/i;
|
||||
globalx1 = MulScale(DMulScale(ox,bakgvect.x,oy,bakgvect.y, 10),i, 10);
|
||||
globaly1 = MulScale(DMulScale(ox,bakgvect.y,-oy,bakgvect.x, 10),i, 10);
|
||||
|
@ -1659,7 +1591,7 @@ void renderDrawMapView(int32_t dax, int32_t day, int32_t zoome, int16_t ang)
|
|||
globaly2 = -globaly1;
|
||||
|
||||
int32_t const daslope = sector[s].floorheinum;
|
||||
i = nsqrtasm(daslope*daslope+16777216);
|
||||
i = ksqrt(daslope*daslope+16777216);
|
||||
set_globalpos(globalposx, MulScale(globalposy,i, 12), globalposz);
|
||||
globalx2 = MulScale(globalx2,i, 12);
|
||||
globaly2 = MulScale(globaly2,i, 12);
|
||||
|
@ -2049,16 +1981,6 @@ fixed_t gethiq16angle(int32_t xvect, int32_t yvect)
|
|||
return rv;
|
||||
}
|
||||
|
||||
//
|
||||
// ksqrt
|
||||
//
|
||||
int32_t ksqrt(uint32_t num)
|
||||
{
|
||||
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
|
||||
return ksqrtasm_old(num);
|
||||
return nsqrtasm(num);
|
||||
}
|
||||
|
||||
// Gets the BUILD unit height and z offset of a sprite.
|
||||
// Returns the z offset, 'height' may be NULL.
|
||||
int32_t spriteheightofsptr(uspriteptr_t spr, int32_t *height, int32_t alsotileyofs)
|
||||
|
@ -2962,7 +2884,7 @@ int32_t getceilzofslopeptr(usectorptr_t sec, int32_t dax, int32_t day)
|
|||
vec2_t const w = *(vec2_t const *)wal;
|
||||
vec2_t const d = { wal2->x - w.x, wal2->y - w.y };
|
||||
|
||||
int const i = nsqrtasm(uhypsq(d.x,d.y))<<5;
|
||||
int const i = ksqrt(uhypsq(d.x,d.y))<<5;
|
||||
if (i == 0) return sec->ceilingz;
|
||||
|
||||
int const j = DMulScale(d.x, day-w.y, -d.y, dax-w.x, 3);
|
||||
|
@ -2981,7 +2903,7 @@ int32_t getflorzofslopeptr(usectorptr_t sec, int32_t dax, int32_t day)
|
|||
vec2_t const w = *(vec2_t const *)wal;
|
||||
vec2_t const d = { wal2->x - w.x, wal2->y - w.y };
|
||||
|
||||
int const i = nsqrtasm(uhypsq(d.x,d.y))<<5;
|
||||
int const i = ksqrt(uhypsq(d.x,d.y))<<5;
|
||||
if (i == 0) return sec->floorz;
|
||||
|
||||
int const j = DMulScale(d.x, day-w.y, -d.y, dax-w.x, 3);
|
||||
|
@ -3001,7 +2923,7 @@ void getzsofslopeptr(usectorptr_t sec, int32_t dax, int32_t day, int32_t *ceilz,
|
|||
|
||||
vec2_t const d = { wal2->x - wal->x, wal2->y - wal->y };
|
||||
|
||||
int const i = nsqrtasm(uhypsq(d.x,d.y))<<5;
|
||||
int const i = ksqrt(uhypsq(d.x,d.y))<<5;
|
||||
if (i == 0) return;
|
||||
|
||||
int const j = DMulScale(d.x,day-wal->y, -d.y,dax-wal->x, 3);
|
||||
|
@ -3026,7 +2948,7 @@ void alignceilslope(int16_t dasect, int32_t x, int32_t y, int32_t z)
|
|||
return;
|
||||
|
||||
sector[dasect].ceilingheinum = Scale((z-sector[dasect].ceilingz)<<8,
|
||||
nsqrtasm(uhypsq(dax,day)), i);
|
||||
ksqrt(uhypsq(dax,day)), i);
|
||||
if (sector[dasect].ceilingheinum == 0)
|
||||
sector[dasect].ceilingstat &= ~2;
|
||||
else sector[dasect].ceilingstat |= 2;
|
||||
|
@ -3047,7 +2969,7 @@ void alignflorslope(int16_t dasect, int32_t x, int32_t y, int32_t z)
|
|||
return;
|
||||
|
||||
sector[dasect].floorheinum = Scale((z-sector[dasect].floorz)<<8,
|
||||
nsqrtasm(uhypsq(dax,day)), i);
|
||||
ksqrt(uhypsq(dax,day)), i);
|
||||
if (sector[dasect].floorheinum == 0)
|
||||
sector[dasect].floorstat &= ~2;
|
||||
else sector[dasect].floorstat |= 2;
|
||||
|
|
|
@ -23,63 +23,15 @@
|
|||
extern int32_t globalx1, globaly2;
|
||||
|
||||
|
||||
extern uint16_t sqrtable[4096], shlookup[4096+256],sqrtable_old[2048];
|
||||
|
||||
|
||||
static inline int32_t nsqrtasm(uint32_t a)
|
||||
{
|
||||
// JBF 20030901: This was a damn lot simpler to reverse engineer than
|
||||
// msqrtasm was. Really, it was just like simplifying an algebra equation.
|
||||
uint16_t c;
|
||||
|
||||
if (a & 0xff000000) // test eax, 0xff000000 / jnz short over24
|
||||
{
|
||||
c = shlookup[(a >> 24) + 4096]; // mov ebx, eax
|
||||
// over24: shr ebx, 24
|
||||
// mov cx, word ptr shlookup[ebx*2+8192]
|
||||
}
|
||||
else
|
||||
{
|
||||
c = shlookup[a >> 12]; // mov ebx, eax
|
||||
// shr ebx, 12
|
||||
// mov cx, word ptr shlookup[ebx*2]
|
||||
// jmp short under24
|
||||
}
|
||||
a >>= c&0xff; // under24: shr eax, cl
|
||||
a = (a&0xffff0000)|(sqrtable[a]); // mov ax, word ptr sqrtable[eax*2]
|
||||
a >>= ((c&0xff00) >> 8); // mov cl, ch
|
||||
// shr eax, cl
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
|
||||
{
|
||||
static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
|
||||
{
|
||||
// Ken did this
|
||||
d = ((a<0)<<3) + ((b<0)<<2) + ((c<0)<<1) + (d<0);
|
||||
return (((d<<4)^0xf0)|d);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline int32_t ksqrtasm_old(int32_t n)
|
||||
{
|
||||
uint32_t shift = 0;
|
||||
n = abs((int32_t)n);
|
||||
while (n >= 2048)
|
||||
{
|
||||
n >>= 2;
|
||||
++shift;
|
||||
}
|
||||
uint32_t const s = sqrtable_old[n];
|
||||
return (s << shift) >> 10;
|
||||
}
|
||||
|
||||
inline int32_t clip_nsqrtasm(int32_t n)
|
||||
{
|
||||
if (enginecompatibility_mode == ENGINECOMPATIBILITY_19950829)
|
||||
return ksqrtasm_old(n);
|
||||
return nsqrtasm(n);
|
||||
}
|
||||
|
||||
|
||||
extern int16_t thesector[MAXWALLSB], thewall[MAXWALLSB];
|
||||
extern int16_t bunchfirst[MAXWALLSB], bunchlast[MAXWALLSB];
|
||||
|
|
Loading…
Reference in a new issue