From 54721d7461d0753de3fa44c9a7cc86b61aa0a657 Mon Sep 17 00:00:00 2001 From: helixhorned Date: Sun, 1 Jul 2012 22:11:14 +0000 Subject: [PATCH] Make ksqrt take uint32_t, add helper function uint32_t uhypsq(int32_t,int32_t). uhypsq calculates the hypotenuse using unsigned multiplication. This is permissible since for arbitrary int32s a and b, the following holds in two's complement arithmetic: (int32_t)((uint32_t)a * b) == (int32_t)((int64_t)a * b) ("Signed and unsigned multiplication is the same on the bit level.") This fixes various overflows where wall lengths for walls of length > 46340 are calculated, but does not rid us of other overflows in the same vein (usually dot products between vectors where one point is a wall vertex and the other a position in a sector). git-svn-id: https://svn.eduke32.com/eduke32@2791 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/include/build.h | 7 +++++- polymer/eduke32/build/src/build.c | 2 +- polymer/eduke32/build/src/engine.c | 28 +++++++++++++----------- polymer/eduke32/source/astub.c | 2 +- polymer/eduke32/source/gameexec.c | 4 ++-- polymer/eduke32/source/lunatic/defs.ilua | 2 +- polymer/eduke32/source/lunatic/test.elua | 2 +- polymer/eduke32/source/m32exec.c | 4 ++-- polymer/eduke32/source/player.c | 18 ++++++++------- 9 files changed, 39 insertions(+), 30 deletions(-) diff --git a/polymer/eduke32/build/include/build.h b/polymer/eduke32/build/include/build.h index 8f216cb0f..1f8ced013 100644 --- a/polymer/eduke32/build/include/build.h +++ b/polymer/eduke32/build/include/build.h @@ -656,7 +656,7 @@ int32_t checksectorpointer(int16_t i, int16_t sectnum); void getmousevalues(int32_t *mousx, int32_t *mousy, int32_t *bstatus) ATTRIBUTE((nonnull(1,2,3))); int32_t krand(void); -int32_t ksqrt(int32_t num); +int32_t ksqrt(uint32_t num); // int32_t getangle(int32_t xvect, int32_t yvect); // @@ -665,6 +665,11 @@ int32_t ksqrt(int32_t num); EXTERN int16_t radarang[1280]; +static inline uint32_t uhypsq(int32_t dx, int32_t dy) +{ + return (uint32_t)dx*dx + (uint32_t)dy*dy; +} + static inline int32_t getangle(int32_t xvect, int32_t yvect) { if ((xvect|yvect) == 0) return(0); diff --git a/polymer/eduke32/build/src/build.c b/polymer/eduke32/build/src/build.c index a25c3f17c..afdb3c801 100644 --- a/polymer/eduke32/build/src/build.c +++ b/polymer/eduke32/build/src/build.c @@ -8345,7 +8345,7 @@ int32_t wallength(int16_t i) if (hypsq > (int64_t)INT32_MAX) return (int32_t)sqrt((double)hypsq); else - return ksqrt((int32_t)hypsq); + return ksqrt((uint32_t)hypsq); #else return ksqrt(dax*dax + day*day); #endif diff --git a/polymer/eduke32/build/src/engine.c b/polymer/eduke32/build/src/engine.c index 0059b1283..7adee2bdd 100644 --- a/polymer/eduke32/build/src/engine.c +++ b/polymer/eduke32/build/src/engine.c @@ -1611,7 +1611,7 @@ int32_t clipmapinfo_load(void) } else { - int32_t tmp = ksqrt(wall[w].x*wall[w].x + wall[w].y*wall[w].y); + int32_t tmp = ksqrt(uhypsq(wall[w].x, wall[w].y)); if (tmp > maxdist) maxdist = tmp; } @@ -3212,7 +3212,7 @@ static int32_t wallmost(int16_t *mostbuf, int32_t w, int32_t sectnum, char dasta fw = sector[sectnum].wallptr; i = wall[fw].point2; dx = wall[i].x-wall[fw].x; dy = wall[i].y-wall[fw].y; - dasqr = krecipasm(nsqrtasm(dx*dx+dy*dy)); + dasqr = krecipasm(nsqrtasm(uhypsq(dx,dy))); if (xb1[w] == 0) { xv = cosglobalang+sinviewingrangeglobalang; yv = singlobalang-cosviewingrangeglobalang; } @@ -3238,6 +3238,7 @@ static int32_t wallmost(int16_t *mostbuf, int32_t w, int32_t sectnum, char dasta { xv = cosglobalang-sinviewingrangeglobalang; yv = singlobalang+cosviewingrangeglobalang; } else { xv = (x2+x1)-globalposx; yv = (y2+y1)-globalposy; } + // XXX: OVERFLOW with huge sectors and sloped ceilngs/floors! i = xv*(y1-globalposy)-yv*(x1-globalposx); j = yv*x2-xv*y2; if (klabs(j) > klabs(i>>3)) i = divscale28(i,j); if (dastat == 0) @@ -3392,7 +3393,7 @@ static int32_t setup_globals_cf1(const sectortype *sec, int32_t pal, int32_t zd, j = sec->wallptr; ox = wall[wall[j].point2].x - wall[j].x; oy = wall[wall[j].point2].y - wall[j].y; - i = nsqrtasm(ox*ox+oy*oy); if (i == 0) i = 1024; else i = 1048576/i; + i = nsqrtasm(uhypsq(ox,oy)); if (i == 0) i = 1024; else i = 1048576/i; globalx1 = mulscale10(dmulscale10(ox,singlobalang,-oy,cosglobalang),i); globaly1 = mulscale10(dmulscale10(ox,cosglobalang,oy,singlobalang),i); globalx2 = -globalx1; @@ -4180,7 +4181,7 @@ static void grouscan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat) wal = &wall[sec->wallptr]; wx = wall[wal->point2].x - wal->x; wy = wall[wal->point2].y - wal->y; - dasqr = krecipasm(nsqrtasm(wx*wx+wy*wy)); + dasqr = krecipasm(nsqrtasm(uhypsq(wx,wy))); i = mulscale21(daslope,dasqr); wx *= i; wy *= i; @@ -4200,6 +4201,7 @@ static void grouscan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat) dx = mulscale14(wall[wal->point2].x-wal->x,dasqr); dy = mulscale14(wall[wal->point2].y-wal->y,dasqr); + // NOTE: sector[].*heinum is int16_t and not supposed to be <0. i = nsqrtasm(daslope*daslope+16777216); x = globalx; y = globaly; @@ -8886,7 +8888,7 @@ void drawmapview(int32_t dax, int32_t day, int32_t zoome, int16_t ang) { ox = wall[wall[startwall].point2].x - wall[startwall].x; oy = wall[wall[startwall].point2].y - wall[startwall].y; - i = nsqrtasm(ox*ox+oy*oy); if (i == 0) continue; + i = nsqrtasm(uhypsq(ox,oy)); if (i == 0) continue; i = 1048576/i; globalx1 = mulscale10(dmulscale10(ox,bakgxvect,oy,bakgyvect),i); globaly1 = mulscale10(dmulscale10(ox,bakgyvect,-oy,bakgxvect),i); @@ -10810,7 +10812,7 @@ int32_t inside(int32_t x, int32_t y, int16_t sectnum) // // ksqrt // -int32_t ksqrt(int32_t num) +int32_t ksqrt(uint32_t num) { return(nsqrtasm(num)); } @@ -11152,7 +11154,7 @@ static int32_t hitscan_trysector(const vec3_t *sv, const sectortype *sec, hitdat { wal = &wall[sec->wallptr]; wal2 = &wall[wal->point2]; dax = wal2->x-wal->x; day = wal2->y-wal->y; - i = nsqrtasm(dax*dax+day*day); if (i == 0) return 1; //continue; + i = nsqrtasm(uhypsq(dax,day)); if (i == 0) return 1; //continue; i = divscale15(heinum,i); dax *= i; day *= i; @@ -12068,7 +12070,7 @@ int32_t clipmove(vec3_t *pos, int16_t *sectnum, cy = (pos->y+goaly)>>1; //Extra walldist for sprites on sector lines gx = goalx-(pos->x); gy = goaly-(pos->y); - rad = nsqrtasm(gx*gx + gy*gy) + MAXCLIPDIST+walldist + 8; + rad = nsqrtasm(uhypsq(gx,gy)) + MAXCLIPDIST+walldist + 8; xmin = cx-rad; ymin = cy-rad; xmax = cx+rad; ymax = cy+rad; @@ -14127,7 +14129,7 @@ int32_t getceilzofslope(int16_t sectnum, int32_t dax, int32_t day) wal = &wall[sector[sectnum].wallptr]; // floor(sqrt(2**31-1)) == 46340 dx = wall[wal->point2].x-wal->x; dy = wall[wal->point2].y-wal->y; - i = (nsqrtasm(dx*dx+dy*dy)<<5); if (i == 0) return(sector[sectnum].ceilingz); + i = (nsqrtasm(uhypsq(dx,dy))<<5); if (i == 0) return(sector[sectnum].ceilingz); j = dmulscale3(dx,day-wal->y,-dy,dax-wal->x); return(sector[sectnum].ceilingz+(scale(sector[sectnum].ceilingheinum,j>>1,i)<<1)); } @@ -14147,7 +14149,7 @@ int32_t getflorzofslope(int16_t sectnum, int32_t dax, int32_t day) wal = &wall[sector[sectnum].wallptr]; dx = wall[wal->point2].x-wal->x; dy = wall[wal->point2].y-wal->y; - i = (nsqrtasm(dx*dx+dy*dy)<<5); if (i == 0) return(sector[sectnum].floorz); + i = (nsqrtasm(uhypsq(dx,dy))<<5); if (i == 0) return(sector[sectnum].floorz); j = dmulscale3(dx,day-wal->y,-dy,dax-wal->x); return(sector[sectnum].floorz+(scale(sector[sectnum].floorheinum,j>>1,i)<<1)); } @@ -14169,7 +14171,7 @@ void getzsofslope(int16_t sectnum, int32_t dax, int32_t day, int32_t *ceilz, int { wal = &wall[sec->wallptr]; wal2 = &wall[wal->point2]; dx = wal2->x-wal->x; dy = wal2->y-wal->y; - i = (nsqrtasm(dx*dx+dy*dy)<<5); if (i == 0) return; // XXX: OVERFLOW + i = (nsqrtasm(uhypsq(dx,dy))<<5); if (i == 0) return; j = dmulscale3(dx,day-wal->y,-dy,dax-wal->x); if (sec->ceilingstat&2) *ceilz = (*ceilz)+(scale(sec->ceilingheinum,j>>1,i)<<1); if (sec->floorstat&2) *florz = (*florz)+(scale(sec->floorheinum,j>>1,i)<<1); @@ -14191,7 +14193,7 @@ void alignceilslope(int16_t dasect, int32_t x, int32_t y, int32_t z) i = (y-wal->y)*dax - (x-wal->x)*day; if (i == 0) return; sector[dasect].ceilingheinum = scale((z-sector[dasect].ceilingz)<<8, - nsqrtasm(dax*dax+day*day),i); + nsqrtasm(uhypsq(dax,day)), i); if (sector[dasect].ceilingheinum == 0) sector[dasect].ceilingstat &= ~2; else sector[dasect].ceilingstat |= 2; @@ -14212,7 +14214,7 @@ void alignflorslope(int16_t dasect, int32_t x, int32_t y, int32_t z) i = (y-wal->y)*dax - (x-wal->x)*day; if (i == 0) return; sector[dasect].floorheinum = scale((z-sector[dasect].floorz)<<8, - nsqrtasm(dax*dax+day*day),i); + nsqrtasm(uhypsq(dax,day)), i); if (sector[dasect].floorheinum == 0) sector[dasect].floorstat &= ~2; else sector[dasect].floorstat |= 2; diff --git a/polymer/eduke32/source/astub.c b/polymer/eduke32/source/astub.c index 63323d8dd..506868679 100644 --- a/polymer/eduke32/source/astub.c +++ b/polymer/eduke32/source/astub.c @@ -6577,7 +6577,7 @@ static void Keys3d(void) case 7:ma+=512; break; } - a = ksqrt(mouseax*mouseax + mouseay*mouseay); + a = ksqrt(uhypsq(mouseax,mouseay)); if (a) { int32_t mult = (stat&8) ? 8192 : 8192*2; diff --git a/polymer/eduke32/source/gameexec.c b/polymer/eduke32/source/gameexec.c index ad5c6e413..4d4db0a9d 100644 --- a/polymer/eduke32/source/gameexec.c +++ b/polymer/eduke32/source/gameexec.c @@ -2637,7 +2637,7 @@ nullquote: if (hypsq > (int64_t)INT_MAX) Gv_SetVarX(retvar, (int32_t)sqrt((double)hypsq)); else - Gv_SetVarX(retvar, ksqrt((int32_t)hypsq)); + Gv_SetVarX(retvar, ksqrt((uint32_t)hypsq)); continue; } @@ -3607,7 +3607,7 @@ nullquote: // syntax sqrt int32_t lInVarID=*insptr++, lOutVarID=*insptr++; - Gv_SetVarX(lOutVarID, ksqrt(Gv_GetVarX(lInVarID))); + Gv_SetVarX(lOutVarID, ksqrt((uint32_t)Gv_GetVarX(lInVarID))); continue; } diff --git a/polymer/eduke32/source/lunatic/defs.ilua b/polymer/eduke32/source/lunatic/defs.ilua index 80a0251c5..05d2fae59 100644 --- a/polymer/eduke32/source/lunatic/defs.ilua +++ b/polymer/eduke32/source/lunatic/defs.ilua @@ -472,7 +472,7 @@ playerdata_t g_player[MAXPLAYERS]; -- functions ffi.cdef[[ -int32_t ksqrt(int32_t num); +int32_t ksqrt(uint32_t num); ]] ffi.cdef "double gethitickms(void);" diff --git a/polymer/eduke32/source/lunatic/test.elua b/polymer/eduke32/source/lunatic/test.elua index 78dcfe579..8f59b4c73 100644 --- a/polymer/eduke32/source/lunatic/test.elua +++ b/polymer/eduke32/source/lunatic/test.elua @@ -159,7 +159,7 @@ gameevent(gv.EVENT_ENTERLEVEL, N, t, (t*1000)/N, sum) printf("sqrt(0xffffffff) = %f(ksqrt) %f(math.sqrt)", - gv.ksqrt(-1), math.sqrt(0xffffffff)) + gv.ksqrt(0xffffffff), math.sqrt(0xffffffff)) end ) diff --git a/polymer/eduke32/source/m32exec.c b/polymer/eduke32/source/m32exec.c index 9f5375265..795de25c3 100644 --- a/polymer/eduke32/source/m32exec.c +++ b/polymer/eduke32/source/m32exec.c @@ -881,7 +881,7 @@ skip_check: // syntax sqrt int32_t lInVarID=*insptr++, lOutVarID=*insptr++; - Gv_SetVarX(lOutVarID, ksqrt(Gv_GetVarX(lInVarID))); + Gv_SetVarX(lOutVarID, ksqrt((uint32_t)Gv_GetVarX(lInVarID))); continue; } @@ -1793,7 +1793,7 @@ badindex: if (hypsq > (int64_t)INT32_MAX) Gv_SetVarX(retvar, (int32_t)sqrt((double)hypsq)); else - Gv_SetVarX(retvar, ksqrt((int32_t)hypsq)); + Gv_SetVarX(retvar, ksqrt((uint32_t)hypsq)); continue; } diff --git a/polymer/eduke32/source/player.c b/polymer/eduke32/source/player.c index d9bdf89af..797bcaa7e 100644 --- a/polymer/eduke32/source/player.c +++ b/polymer/eduke32/source/player.c @@ -4686,9 +4686,9 @@ void P_ProcessInput(int32_t snum) } else p->weapon_sway = p->bobcounter; - // NOTE: This overflows if the difference is too great, e.g. used to do + // NOTE: This silently wraps if the difference is too great, e.g. used to do // that when teleported by silent SE7s. - s->xvel = ksqrt((p->pos.x-p->bobposx)*(p->pos.x-p->bobposx)+(p->pos.y-p->bobposy)*(p->pos.y-p->bobposy)); + s->xvel = ksqrt(uhypsq(p->pos.x-p->bobposx, p->pos.y-p->bobposy)); if (p->on_ground) p->bobcounter += sprite[p->i].xvel>>1; @@ -5630,7 +5630,9 @@ void computergetinput(int32_t snum, input_t *syn) for (TRAVERSE_CONNECT(i)) if (i != snum && !(GTFLAGS(GAMETYPE_TDM) && g_player[snum].ps->team == g_player[i].ps->team)) { - dist = ksqrt((sprite[g_player[i].ps->i].x-x1)*(sprite[g_player[i].ps->i].x-x1)+(sprite[g_player[i].ps->i].y-y1)*(sprite[g_player[i].ps->i].y-y1)); + const spritetype *const pspr = &sprite[g_player[i].ps->i]; + + dist = ksqrt(uhypsq(pspr->x-x1, pspr->y-y1)); x2 = sprite[g_player[i].ps->i].x; y2 = sprite[g_player[i].ps->i].y; @@ -5742,7 +5744,7 @@ void computergetinput(int32_t snum, input_t *syn) fightdist = fdmatrix[p->curr_weapon][g_player[goalplayer[snum]].ps->curr_weapon]; if (fightdist < 128) fightdist = 128; - dist = ksqrt((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)); + dist = ksqrt(uhypsq(x2-x1, y2-y1)); if (dist == 0) dist = 1; daang = getangle(x2+(g_player[goalplayer[snum]].ps->vel.x>>14)-x1,y2+(g_player[goalplayer[snum]].ps->vel.y>>14)-y1); zang = 100-((z2-z1)*8)/dist; @@ -5860,7 +5862,7 @@ void computergetinput(int32_t snum, input_t *syn) break; } - dist = ksqrt(dx*dx+dy*dy); + dist = ksqrt(uhypsq(dx,dy)); if (dist > l) { l = dist; @@ -5906,7 +5908,7 @@ void computergetinput(int32_t snum, input_t *syn) { dx = wall[wall[i].point2].x-wall[i].x; dy = wall[wall[i].point2].y-wall[i].y; - dist = ksqrt(dx*dx+dy*dy); + dist = ksqrt(uhypsq(dx,dy)); if ((wall[i].nextsector == goalsect[snum]) && (dist > l)) { l = dist; @@ -5962,7 +5964,7 @@ void computergetinput(int32_t snum, input_t *syn) } x2 = goalx[snum]; y2 = goaly[snum]; - dist = ksqrt((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)); + dist = ksqrt(uhypsq(x2-x1, y2-y1)); if (!dist) return; daang = getangle(x2-x1,y2-y1); syn->fvel += (x2-x1)*2047/dist; @@ -6044,7 +6046,7 @@ void computergetinput(int32_t snum, input_t *syn) { x2 = goalx[snum]; y2 = goaly[snum]; - dist = ksqrt((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)); + dist = ksqrt(uhypsq(x2-x1, y2-y1)); if (!dist) return; daang = getangle(x2-x1,y2-y1); if ((goalwall[snum] >= 0) && (dist < 4096))