From 58d8ce1a4f336dee721ab9a15c20c8c633db5eb2 Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 22 Nov 2020 10:25:04 +0100 Subject: [PATCH] - cleanup of m_fixed.h. With fixed point math barely being used anywhere with the vast majority of calls in the software voxel drawer it makes sense to reduce the function interface to the minimum possible and move the shift value into the function call as an argument. --- src/common/utility/cmdlib.h | 10 ++++ src/common/utility/m_fixed.h | 42 +------------- src/playsim/p_acs.cpp | 10 +++- src/playsim/p_maputl.cpp | 4 +- src/rendering/r_utility.h | 8 +-- src/rendering/swrenderer/plane/r_skyplane.cpp | 2 + .../swrenderer/things/r_visiblesprite.cpp | 1 - src/rendering/swrenderer/things/r_voxel.cpp | 56 +++++++++---------- src/utility/nodebuilder/nodebuild_utility.cpp | 4 +- 9 files changed, 58 insertions(+), 79 deletions(-) diff --git a/src/common/utility/cmdlib.h b/src/common/utility/cmdlib.h index c3ea527f2d..9c85aa05f9 100644 --- a/src/common/utility/cmdlib.h +++ b/src/common/utility/cmdlib.h @@ -90,4 +90,14 @@ struct MD5Context; void md5Update(FileReader& file, MD5Context& md5, unsigned len); void uppercopy(char* to, const char* from); +inline void fillshort(void* buff, size_t count, uint16_t clear) +{ + int16_t* b2 = (int16_t*)buff; + for (size_t i = 0; i < count; ++i) + { + b2[i] = clear; + } +} + + #endif diff --git a/src/common/utility/m_fixed.h b/src/common/utility/m_fixed.h index b765f997ad..99115df6e8 100644 --- a/src/common/utility/m_fixed.h +++ b/src/common/utility/m_fixed.h @@ -6,48 +6,12 @@ #include "basics.h" -// Modern compilers are smart enough to do these multiplications intelligently. -__forceinline int32_t MulScale14(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 14); } // only used by R_DrawVoxel -__forceinline int32_t MulScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 30); } // only used once in the node builder -__forceinline int32_t MulScale32(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 32); } // only used by R_DrawVoxel - -__forceinline uint32_t UMulScale16(uint32_t a, uint32_t b) { return (uint32_t)(((uint64_t)a * b) >> 16); } // used for sky drawing - -__forceinline int32_t DMulScale3(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 3); } // used for setting up slopes for Build maps -__forceinline int32_t DMulScale6(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 6); } // only used by R_DrawVoxel -__forceinline int32_t DMulScale10(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 10); } // only used by R_DrawVoxel -__forceinline int32_t DMulScale18(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 18); } // only used by R_DrawVoxel -__forceinline int32_t DMulScale32(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 32); } // used by R_PointOnSide. - -// Sadly, for divisions this is not true but these are so infrequently used that the C versions are just fine, despite not being fully optimal. -__forceinline int32_t DivScale6(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 6) / b); } // only used by R_DrawVoxel -__forceinline int32_t DivScale21(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 21) / b); } // only used by R_DrawVoxel -__forceinline int32_t DivScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 30) / b); } // only used once in the node builder - -__forceinline void fillshort(void *buff, unsigned int count, uint16_t clear) -{ - int16_t *b2 = (int16_t *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} +__forceinline constexpr int32_t MulScale(int32_t a, int32_t b, int32_t shift) { return (int32_t)(((int64_t)a * b) >> shift); } +__forceinline constexpr int32_t DMulScale(int32_t a, int32_t b, int32_t c, int32_t d, int32_t shift) { return (int32_t)(((int64_t)a * b + (int64_t)c * d) >> shift); } +__forceinline constexpr int32_t DivScale(int32_t a, int32_t b, int shift) { return (int32_t)(((int64_t)a << shift) / b); } #include "xs_Float.h" -inline int32_t FixedDiv (int32_t a, int32_t b) -{ - if ((uint32_t)abs(a) >> (31-16) >= (uint32_t)abs (b)) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - - return (int32_t)(((int64_t)a << 16) / b); -} - -__forceinline constexpr int32_t FixedMul(int32_t a, int32_t b) -{ - return (int32_t)(((int64_t)a * b) >> 16); -} - inline fixed_t FloatToFixed(double f) { return xs_Fix<16>::ToFix(f); diff --git a/src/playsim/p_acs.cpp b/src/playsim/p_acs.cpp index e04de2b993..7a8516eee7 100644 --- a/src/playsim/p_acs.cpp +++ b/src/playsim/p_acs.cpp @@ -9050,15 +9050,19 @@ scriptwait: break; case PCD_FIXEDMUL: - STACK(2) = FixedMul (STACK(2), STACK(1)); + STACK(2) = MulScale(STACK(2), STACK(1), 16); sp--; break; case PCD_FIXEDDIV: - STACK(2) = FixedDiv (STACK(2), STACK(1)); + { + int a = STACK(2), b = STACK(1); + // Overflow check. + if ((uint32_t)abs(a) >> (31 - 16) >= (uint32_t)abs(b)) STACK(2) = (a ^ b) < 0 ? FIXED_MIN : FIXED_MAX; + else STACK(2) = DivScale(STACK(2), STACK(1), 16); sp--; break; - + } case PCD_SETGRAVITY: Level->gravity = ACSToDouble(STACK(1)); sp--; diff --git a/src/playsim/p_maputl.cpp b/src/playsim/p_maputl.cpp index ffc7a1f430..1dec3bf6bd 100644 --- a/src/playsim/p_maputl.cpp +++ b/src/playsim/p_maputl.cpp @@ -1929,8 +1929,8 @@ int P_VanillaPointOnLineSide(double x, double y, const line_t* line) auto dx = FloatToFixed(x - line->v1->fX()); auto dy = FloatToFixed(y - line->v1->fY()); - auto left = FixedMul( int(delta.Y * 256) , dx ); - auto right = FixedMul( dy , int(delta.X * 256) ); + auto left = MulScale( int(delta.Y * 256) , dx, 16 ); + auto right = MulScale( dy , int(delta.X * 256), 16 ); if (right < left) return 0; // front side diff --git a/src/rendering/r_utility.h b/src/rendering/r_utility.h index bdc9aea171..be6f01b2f5 100644 --- a/src/rendering/r_utility.h +++ b/src/rendering/r_utility.h @@ -91,17 +91,17 @@ const double r_Yaspect = 200.0; // Why did I make this a variable? It's never // //========================================================================== -inline int R_PointOnSide (fixed_t x, fixed_t y, const node_t *node) +inline constexpr int R_PointOnSide (fixed_t x, fixed_t y, const node_t *node) { - return DMulScale32 (y-node->y, node->dx, node->x-x, node->dy) > 0; + return DMulScale (y-node->y, node->dx, node->x-x, node->dy, 32) > 0; } inline int R_PointOnSide(double x, double y, const node_t *node) { - return DMulScale32(FLOAT2FIXED(y) - node->y, node->dx, node->x - FLOAT2FIXED(x), node->dy) > 0; + return DMulScale(FLOAT2FIXED(y) - node->y, node->dx, node->x - FLOAT2FIXED(x), node->dy, 32) > 0; } inline int R_PointOnSide(const DVector2 &pos, const node_t *node) { - return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; + return DMulScale(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy, 32) > 0; } // Used for interpolation waypoints. diff --git a/src/rendering/swrenderer/plane/r_skyplane.cpp b/src/rendering/swrenderer/plane/r_skyplane.cpp index 08a263a018..cb1069913a 100644 --- a/src/rendering/swrenderer/plane/r_skyplane.cpp +++ b/src/rendering/swrenderer/plane/r_skyplane.cpp @@ -222,6 +222,8 @@ namespace swrenderer DrawSky(pl); } + static uint32_t UMulScale16(uint32_t a, uint32_t b) { return (uint32_t)(((uint64_t)a * b) >> 16); } + void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat) { RenderPortal *renderportal = Thread->Portal.get(); diff --git a/src/rendering/swrenderer/things/r_visiblesprite.cpp b/src/rendering/swrenderer/things/r_visiblesprite.cpp index 4102f912dd..16e3435c50 100644 --- a/src/rendering/swrenderer/things/r_visiblesprite.cpp +++ b/src/rendering/swrenderer/things/r_visiblesprite.cpp @@ -239,7 +239,6 @@ namespace swrenderer // killough 3/27/98: end special clipping for deep water / fake ceilings else if (!spr->IsVoxel() && spr->floorclip) { // [RH] Move floorclip stuff from R_DrawVisSprite to here - //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); int clip = xs_RoundToInt(viewport->CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); if (clip < botclip) { diff --git a/src/rendering/swrenderer/things/r_voxel.cpp b/src/rendering/swrenderer/things/r_voxel.cpp index 5dfbb58844..dc8ef9de1c 100644 --- a/src/rendering/swrenderer/things/r_voxel.cpp +++ b/src/rendering/swrenderer/things/r_voxel.cpp @@ -323,8 +323,8 @@ namespace swrenderer sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; // Select mip level - i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); - i = DivScale6(i, MIN(daxscale, dayscale)); + i = abs(DMulScale(dasprx - globalposx, cosang, daspry - globalposy, sinang, 6)); + i = DivScale(i, MIN(daxscale, dayscale), 6); j = xs_Fix<13>::ToFix(viewport->FocalLengthX); for (k = 0; i >= j && k < voxobj->NumMips; ++k) { @@ -338,10 +338,10 @@ namespace swrenderer maxslabz >>= k; daxscale <<= (k + 8); dayscale <<= (k + 8); - dazscale = FixedDiv(dayscale, FLOAT2FIXED(viewport->BaseYaspectMul)); + dazscale = DivScale(dayscale, FLOAT2FIXED(viewport->BaseYaspectMul), 16); daxscale = fixed_t(daxscale / viewport->YaspectMul); daxscale = Scale(daxscale, xdimenscale, viewport->viewwindow.centerxwide << 9); - dayscale = Scale(dayscale, FixedMul(xdimenscale, viewport->viewingrangerecip), viewport->viewwindow.centerxwide << 9); + dayscale = Scale(dayscale, MulScale(xdimenscale, viewport->viewingrangerecip, 16), viewport->viewwindow.centerxwide << 9); daxscalerecip = (1 << 30) / daxscale; dayscalerecip = (1 << 30) / dayscale; @@ -350,26 +350,26 @@ namespace swrenderer fixed_t piv_y = fixed_t(mip->Pivot.Y*256.); fixed_t piv_z = fixed_t(mip->Pivot.Z*256.); - x = FixedMul(globalposx - dasprx, daxscalerecip); - y = FixedMul(globalposy - daspry, daxscalerecip); - backx = (DMulScale10(x, sprcosang, y, sprsinang) + piv_x) >> 8; - backy = (DMulScale10(y, sprcosang, x, -sprsinang) + piv_y) >> 8; + x = MulScale(globalposx - dasprx, daxscalerecip, 16); + y = MulScale(globalposy - daspry, daxscalerecip, 16); + backx = (DMulScale(x, sprcosang, y, sprsinang, 10) + piv_x) >> 8; + backy = (DMulScale(y, sprcosang, x, -sprsinang, 10) + piv_y) >> 8; cbackx = clamp(backx, 0, mip->SizeX - 1); cbacky = clamp(backy, 0, mip->SizeY - 1); - sprcosang = MulScale14(daxscale, sprcosang); - sprsinang = MulScale14(daxscale, sprsinang); + sprcosang = MulScale(daxscale, sprcosang, 14); + sprsinang = MulScale(daxscale, sprsinang, 14); - x = (dasprx - globalposx) - DMulScale18(piv_x, sprcosang, piv_y, -sprsinang); - y = (daspry - globalposy) - DMulScale18(piv_y, sprcosang, piv_x, sprsinang); + x = (dasprx - globalposx) - DMulScale(piv_x, sprcosang, piv_y, -sprsinang, 18); + y = (daspry - globalposy) - DMulScale(piv_y, sprcosang, piv_x, sprsinang, 18); - cosang = FixedMul(cosang, dayscalerecip); - sinang = FixedMul(sinang, dayscalerecip); + cosang = MulScale(cosang, dayscalerecip, 16); + sinang = MulScale(sinang, dayscalerecip, 16); gxstart = y*cosang - x*sinang; gystart = x*cosang + y*sinang; - gxinc = DMulScale10(sprsinang, cosang, sprcosang, -sinang); - gyinc = DMulScale10(sprcosang, cosang, sprsinang, sinang); + gxinc = DMulScale(sprsinang, cosang, sprcosang, -sinang, 10); + gyinc = DMulScale(sprcosang, cosang, sprsinang, sinang, 10); if ((abs(globalposz - dasprz) >> 10) >= abs(dazscale)) return; x = 0; y = 0; j = MAX(mip->SizeX, mip->SizeY); @@ -381,7 +381,7 @@ namespace swrenderer ggyinc[i] = y; y += gyinc; } - syoff = DivScale21(globalposz - dasprz, FixedMul(dazscale, 0xE800)) + (piv_z << 7); + syoff = DivScale(globalposz - dasprz, MulScale(dazscale, 0xE800, 16), 21) + (piv_z << 7); yoff = (abs(gxinc) + abs(gyinc)) >> 1; bool useSlabDataBgra = !drawerargs.DrawerNeedsPalInput() && viewport->RenderTarget->IsBgra(); @@ -446,12 +446,12 @@ namespace swrenderer uint8_t oand16 = oand + 16; uint8_t oand32 = oand + 32; - if (yi > 0) { dagxinc = gxinc; dagyinc = FixedMul(gyinc, viewport->viewingrangerecip); } - else { dagxinc = -gxinc; dagyinc = -FixedMul(gyinc, viewport->viewingrangerecip); } + if (yi > 0) { dagxinc = gxinc; dagyinc = MulScale(gyinc, viewport->viewingrangerecip, 16); } + else { dagxinc = -gxinc; dagyinc = -MulScale(gyinc, viewport->viewingrangerecip, 16); } /* Fix for non 90 degree viewing ranges */ - nxoff = FixedMul(x2 - x1, viewport->viewingrangerecip); - x1 = FixedMul(x1, viewport->viewingrangerecip); + nxoff = MulScale(x2 - x1, viewport->viewingrangerecip, 16); + x1 = MulScale(x1, viewport->viewingrangerecip, 16); ggxstart = gxstart + ggyinc[ys]; ggystart = gystart - ggxinc[ys]; @@ -462,7 +462,7 @@ namespace swrenderer uint8_t *slabxoffs = &SlabData[mip->OffsetX[x]]; short *xyoffs = &mip->OffsetXY[x * (mip->SizeY + 1)]; - nx = FixedMul(ggxstart + ggxinc[x], viewport->viewingrangerecip) + x1; + nx = MulScale(ggxstart + ggxinc[x], viewport->viewingrangerecip, 16) + x1; ny = ggystart + ggyinc[x]; for (y = ys; y != ye; y += yi, nx += dagyinc, ny -= dagxinc) { @@ -522,20 +522,20 @@ namespace swrenderer if (k < 0) { if ((voxptr->backfacecull & oand32) == 0) continue; - z2 = MulScale32(l2, k) + viewport->viewwindow.centery; /* Below slab */ + z2 = MulScale(l2, k, 32) + viewport->viewwindow.centery; /* Below slab */ } else { if ((voxptr->backfacecull & oand) == 0) continue; /* Middle of slab */ - z2 = MulScale32(l1, k) + viewport->viewwindow.centery; + z2 = MulScale(l1, k, 32) + viewport->viewwindow.centery; } - z1 = MulScale32(l1, j) + viewport->viewwindow.centery; + z1 = MulScale(l1, j, 32) + viewport->viewwindow.centery; } else { if ((voxptr->backfacecull & oand16) == 0) continue; - z1 = MulScale32(l2, j) + viewport->viewwindow.centery; /* Above slab */ - z2 = MulScale32(l1, j + (zleng << 15)) + viewport->viewwindow.centery; + z1 = MulScale(l2, j, 32) + viewport->viewwindow.centery; /* Above slab */ + z2 = MulScale(l1, j + (zleng << 15), 32) + viewport->viewwindow.centery; } if (z2 <= z1) continue; @@ -546,7 +546,7 @@ namespace swrenderer } else { - if (z2 - z1 >= 1024) yinc = FixedDiv(zleng, z2 - z1); + if (z2 - z1 >= 1024) yinc = DivScale(zleng, z2 - z1, 16); else yinc = (((1 << 24) - 1) / (z2 - z1)) * zleng >> 8; } // [RH] Clip each column separately, not just by the first one. diff --git a/src/utility/nodebuilder/nodebuild_utility.cpp b/src/utility/nodebuilder/nodebuild_utility.cpp index ffd5f61e8c..d94f84d079 100644 --- a/src/utility/nodebuilder/nodebuild_utility.cpp +++ b/src/utility/nodebuilder/nodebuild_utility.cpp @@ -453,8 +453,8 @@ void FNodeBuilder::FindPolyContainers (TArray &spots, TArrayx, v1->y, dx, dy) <= 0) { - fixed_t t = DivScale30 (center.fixY() - v1->y, dy); - fixed_t sx = v1->x + MulScale30 (dx, t); + fixed_t t = DivScale (center.fixY() - v1->y, dy, 30); + fixed_t sx = v1->x + MulScale(dx, t, 30); fixed_t dist = sx - spot->x; if (dist < closestdist && dist >= 0)