diff --git a/src/common/utility/cmdlib.h b/src/common/utility/cmdlib.h index c3ea527f2..9c85aa05f 100644 --- a/src/common/utility/cmdlib.h +++ b/src/common/utility/cmdlib.h @@ -90,4 +90,14 @@ struct MD5Context; void md5Update(FileReader& file, MD5Context& md5, unsigned len); void uppercopy(char* to, const char* from); +inline void fillshort(void* buff, size_t count, uint16_t clear) +{ + int16_t* b2 = (int16_t*)buff; + for (size_t i = 0; i < count; ++i) + { + b2[i] = clear; + } +} + + #endif diff --git a/src/common/utility/m_fixed.h b/src/common/utility/m_fixed.h index b765f997a..99115df6e 100644 --- a/src/common/utility/m_fixed.h +++ b/src/common/utility/m_fixed.h @@ -6,48 +6,12 @@ #include "basics.h" -// Modern compilers are smart enough to do these multiplications intelligently. -__forceinline int32_t MulScale14(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 14); } // only used by R_DrawVoxel -__forceinline int32_t MulScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 30); } // only used once in the node builder -__forceinline int32_t MulScale32(int32_t a, int32_t b) { return (int32_t)(((int64_t)a * b) >> 32); } // only used by R_DrawVoxel - -__forceinline uint32_t UMulScale16(uint32_t a, uint32_t b) { return (uint32_t)(((uint64_t)a * b) >> 16); } // used for sky drawing - -__forceinline int32_t DMulScale3(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 3); } // used for setting up slopes for Build maps -__forceinline int32_t DMulScale6(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 6); } // only used by R_DrawVoxel -__forceinline int32_t DMulScale10(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 10); } // only used by R_DrawVoxel -__forceinline int32_t DMulScale18(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 18); } // only used by R_DrawVoxel -__forceinline int32_t DMulScale32(int32_t a, int32_t b, int32_t c, int32_t d) { return (int32_t)(((int64_t)a*b + (int64_t)c*d) >> 32); } // used by R_PointOnSide. - -// Sadly, for divisions this is not true but these are so infrequently used that the C versions are just fine, despite not being fully optimal. -__forceinline int32_t DivScale6(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 6) / b); } // only used by R_DrawVoxel -__forceinline int32_t DivScale21(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 21) / b); } // only used by R_DrawVoxel -__forceinline int32_t DivScale30(int32_t a, int32_t b) { return (int32_t)(((int64_t)a << 30) / b); } // only used once in the node builder - -__forceinline void fillshort(void *buff, unsigned int count, uint16_t clear) -{ - int16_t *b2 = (int16_t *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} +__forceinline constexpr int32_t MulScale(int32_t a, int32_t b, int32_t shift) { return (int32_t)(((int64_t)a * b) >> shift); } +__forceinline constexpr int32_t DMulScale(int32_t a, int32_t b, int32_t c, int32_t d, int32_t shift) { return (int32_t)(((int64_t)a * b + (int64_t)c * d) >> shift); } +__forceinline constexpr int32_t DivScale(int32_t a, int32_t b, int shift) { return (int32_t)(((int64_t)a << shift) / b); } #include "xs_Float.h" -inline int32_t FixedDiv (int32_t a, int32_t b) -{ - if ((uint32_t)abs(a) >> (31-16) >= (uint32_t)abs (b)) - return (a^b)<0 ? FIXED_MIN : FIXED_MAX; - - return (int32_t)(((int64_t)a << 16) / b); -} - -__forceinline constexpr int32_t FixedMul(int32_t a, int32_t b) -{ - return (int32_t)(((int64_t)a * b) >> 16); -} - inline fixed_t FloatToFixed(double f) { return xs_Fix<16>::ToFix(f); diff --git a/src/playsim/p_acs.cpp b/src/playsim/p_acs.cpp index e04de2b99..7a8516eee 100644 --- a/src/playsim/p_acs.cpp +++ b/src/playsim/p_acs.cpp @@ -9050,15 +9050,19 @@ scriptwait: break; case PCD_FIXEDMUL: - STACK(2) = FixedMul (STACK(2), STACK(1)); + STACK(2) = MulScale(STACK(2), STACK(1), 16); sp--; break; case PCD_FIXEDDIV: - STACK(2) = FixedDiv (STACK(2), STACK(1)); + { + int a = STACK(2), b = STACK(1); + // Overflow check. + if ((uint32_t)abs(a) >> (31 - 16) >= (uint32_t)abs(b)) STACK(2) = (a ^ b) < 0 ? FIXED_MIN : FIXED_MAX; + else STACK(2) = DivScale(STACK(2), STACK(1), 16); sp--; break; - + } case PCD_SETGRAVITY: Level->gravity = ACSToDouble(STACK(1)); sp--; diff --git a/src/playsim/p_maputl.cpp b/src/playsim/p_maputl.cpp index ffc7a1f43..1dec3bf6b 100644 --- a/src/playsim/p_maputl.cpp +++ b/src/playsim/p_maputl.cpp @@ -1929,8 +1929,8 @@ int P_VanillaPointOnLineSide(double x, double y, const line_t* line) auto dx = FloatToFixed(x - line->v1->fX()); auto dy = FloatToFixed(y - line->v1->fY()); - auto left = FixedMul( int(delta.Y * 256) , dx ); - auto right = FixedMul( dy , int(delta.X * 256) ); + auto left = MulScale( int(delta.Y * 256) , dx, 16 ); + auto right = MulScale( dy , int(delta.X * 256), 16 ); if (right < left) return 0; // front side diff --git a/src/rendering/r_utility.h b/src/rendering/r_utility.h index bdc9aea17..be6f01b2f 100644 --- a/src/rendering/r_utility.h +++ b/src/rendering/r_utility.h @@ -91,17 +91,17 @@ const double r_Yaspect = 200.0; // Why did I make this a variable? It's never // //========================================================================== -inline int R_PointOnSide (fixed_t x, fixed_t y, const node_t *node) +inline constexpr int R_PointOnSide (fixed_t x, fixed_t y, const node_t *node) { - return DMulScale32 (y-node->y, node->dx, node->x-x, node->dy) > 0; + return DMulScale (y-node->y, node->dx, node->x-x, node->dy, 32) > 0; } inline int R_PointOnSide(double x, double y, const node_t *node) { - return DMulScale32(FLOAT2FIXED(y) - node->y, node->dx, node->x - FLOAT2FIXED(x), node->dy) > 0; + return DMulScale(FLOAT2FIXED(y) - node->y, node->dx, node->x - FLOAT2FIXED(x), node->dy, 32) > 0; } inline int R_PointOnSide(const DVector2 &pos, const node_t *node) { - return DMulScale32(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy) > 0; + return DMulScale(FLOAT2FIXED(pos.Y) - node->y, node->dx, node->x - FLOAT2FIXED(pos.X), node->dy, 32) > 0; } // Used for interpolation waypoints. diff --git a/src/rendering/swrenderer/plane/r_skyplane.cpp b/src/rendering/swrenderer/plane/r_skyplane.cpp index 08a263a01..cb1069913 100644 --- a/src/rendering/swrenderer/plane/r_skyplane.cpp +++ b/src/rendering/swrenderer/plane/r_skyplane.cpp @@ -222,6 +222,8 @@ namespace swrenderer DrawSky(pl); } + static uint32_t UMulScale16(uint32_t a, uint32_t b) { return (uint32_t)(((uint64_t)a * b) >> 16); } + void RenderSkyPlane::DrawSkyColumnStripe(int start_x, int y1, int y2, double scale, double texturemid, double yrepeat) { RenderPortal *renderportal = Thread->Portal.get(); diff --git a/src/rendering/swrenderer/things/r_visiblesprite.cpp b/src/rendering/swrenderer/things/r_visiblesprite.cpp index 4102f912d..16e3435c5 100644 --- a/src/rendering/swrenderer/things/r_visiblesprite.cpp +++ b/src/rendering/swrenderer/things/r_visiblesprite.cpp @@ -239,7 +239,6 @@ namespace swrenderer // killough 3/27/98: end special clipping for deep water / fake ceilings else if (!spr->IsVoxel() && spr->floorclip) { // [RH] Move floorclip stuff from R_DrawVisSprite to here - //int clip = ((FLOAT2FIXED(CenterY) - FixedMul (spr->texturemid - (spr->pic->GetHeight() << FRACBITS) + spr->floorclip, spr->yscale)) >> FRACBITS); int clip = xs_RoundToInt(viewport->CenterY - (spr->texturemid - spr->pic->GetHeight() + spr->floorclip) * spr->yscale); if (clip < botclip) { diff --git a/src/rendering/swrenderer/things/r_voxel.cpp b/src/rendering/swrenderer/things/r_voxel.cpp index 5dfbb5884..dc8ef9de1 100644 --- a/src/rendering/swrenderer/things/r_voxel.cpp +++ b/src/rendering/swrenderer/things/r_voxel.cpp @@ -323,8 +323,8 @@ namespace swrenderer sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; // Select mip level - i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); - i = DivScale6(i, MIN(daxscale, dayscale)); + i = abs(DMulScale(dasprx - globalposx, cosang, daspry - globalposy, sinang, 6)); + i = DivScale(i, MIN(daxscale, dayscale), 6); j = xs_Fix<13>::ToFix(viewport->FocalLengthX); for (k = 0; i >= j && k < voxobj->NumMips; ++k) { @@ -338,10 +338,10 @@ namespace swrenderer maxslabz >>= k; daxscale <<= (k + 8); dayscale <<= (k + 8); - dazscale = FixedDiv(dayscale, FLOAT2FIXED(viewport->BaseYaspectMul)); + dazscale = DivScale(dayscale, FLOAT2FIXED(viewport->BaseYaspectMul), 16); daxscale = fixed_t(daxscale / viewport->YaspectMul); daxscale = Scale(daxscale, xdimenscale, viewport->viewwindow.centerxwide << 9); - dayscale = Scale(dayscale, FixedMul(xdimenscale, viewport->viewingrangerecip), viewport->viewwindow.centerxwide << 9); + dayscale = Scale(dayscale, MulScale(xdimenscale, viewport->viewingrangerecip, 16), viewport->viewwindow.centerxwide << 9); daxscalerecip = (1 << 30) / daxscale; dayscalerecip = (1 << 30) / dayscale; @@ -350,26 +350,26 @@ namespace swrenderer fixed_t piv_y = fixed_t(mip->Pivot.Y*256.); fixed_t piv_z = fixed_t(mip->Pivot.Z*256.); - x = FixedMul(globalposx - dasprx, daxscalerecip); - y = FixedMul(globalposy - daspry, daxscalerecip); - backx = (DMulScale10(x, sprcosang, y, sprsinang) + piv_x) >> 8; - backy = (DMulScale10(y, sprcosang, x, -sprsinang) + piv_y) >> 8; + x = MulScale(globalposx - dasprx, daxscalerecip, 16); + y = MulScale(globalposy - daspry, daxscalerecip, 16); + backx = (DMulScale(x, sprcosang, y, sprsinang, 10) + piv_x) >> 8; + backy = (DMulScale(y, sprcosang, x, -sprsinang, 10) + piv_y) >> 8; cbackx = clamp(backx, 0, mip->SizeX - 1); cbacky = clamp(backy, 0, mip->SizeY - 1); - sprcosang = MulScale14(daxscale, sprcosang); - sprsinang = MulScale14(daxscale, sprsinang); + sprcosang = MulScale(daxscale, sprcosang, 14); + sprsinang = MulScale(daxscale, sprsinang, 14); - x = (dasprx - globalposx) - DMulScale18(piv_x, sprcosang, piv_y, -sprsinang); - y = (daspry - globalposy) - DMulScale18(piv_y, sprcosang, piv_x, sprsinang); + x = (dasprx - globalposx) - DMulScale(piv_x, sprcosang, piv_y, -sprsinang, 18); + y = (daspry - globalposy) - DMulScale(piv_y, sprcosang, piv_x, sprsinang, 18); - cosang = FixedMul(cosang, dayscalerecip); - sinang = FixedMul(sinang, dayscalerecip); + cosang = MulScale(cosang, dayscalerecip, 16); + sinang = MulScale(sinang, dayscalerecip, 16); gxstart = y*cosang - x*sinang; gystart = x*cosang + y*sinang; - gxinc = DMulScale10(sprsinang, cosang, sprcosang, -sinang); - gyinc = DMulScale10(sprcosang, cosang, sprsinang, sinang); + gxinc = DMulScale(sprsinang, cosang, sprcosang, -sinang, 10); + gyinc = DMulScale(sprcosang, cosang, sprsinang, sinang, 10); if ((abs(globalposz - dasprz) >> 10) >= abs(dazscale)) return; x = 0; y = 0; j = MAX(mip->SizeX, mip->SizeY); @@ -381,7 +381,7 @@ namespace swrenderer ggyinc[i] = y; y += gyinc; } - syoff = DivScale21(globalposz - dasprz, FixedMul(dazscale, 0xE800)) + (piv_z << 7); + syoff = DivScale(globalposz - dasprz, MulScale(dazscale, 0xE800, 16), 21) + (piv_z << 7); yoff = (abs(gxinc) + abs(gyinc)) >> 1; bool useSlabDataBgra = !drawerargs.DrawerNeedsPalInput() && viewport->RenderTarget->IsBgra(); @@ -446,12 +446,12 @@ namespace swrenderer uint8_t oand16 = oand + 16; uint8_t oand32 = oand + 32; - if (yi > 0) { dagxinc = gxinc; dagyinc = FixedMul(gyinc, viewport->viewingrangerecip); } - else { dagxinc = -gxinc; dagyinc = -FixedMul(gyinc, viewport->viewingrangerecip); } + if (yi > 0) { dagxinc = gxinc; dagyinc = MulScale(gyinc, viewport->viewingrangerecip, 16); } + else { dagxinc = -gxinc; dagyinc = -MulScale(gyinc, viewport->viewingrangerecip, 16); } /* Fix for non 90 degree viewing ranges */ - nxoff = FixedMul(x2 - x1, viewport->viewingrangerecip); - x1 = FixedMul(x1, viewport->viewingrangerecip); + nxoff = MulScale(x2 - x1, viewport->viewingrangerecip, 16); + x1 = MulScale(x1, viewport->viewingrangerecip, 16); ggxstart = gxstart + ggyinc[ys]; ggystart = gystart - ggxinc[ys]; @@ -462,7 +462,7 @@ namespace swrenderer uint8_t *slabxoffs = &SlabData[mip->OffsetX[x]]; short *xyoffs = &mip->OffsetXY[x * (mip->SizeY + 1)]; - nx = FixedMul(ggxstart + ggxinc[x], viewport->viewingrangerecip) + x1; + nx = MulScale(ggxstart + ggxinc[x], viewport->viewingrangerecip, 16) + x1; ny = ggystart + ggyinc[x]; for (y = ys; y != ye; y += yi, nx += dagyinc, ny -= dagxinc) { @@ -522,20 +522,20 @@ namespace swrenderer if (k < 0) { if ((voxptr->backfacecull & oand32) == 0) continue; - z2 = MulScale32(l2, k) + viewport->viewwindow.centery; /* Below slab */ + z2 = MulScale(l2, k, 32) + viewport->viewwindow.centery; /* Below slab */ } else { if ((voxptr->backfacecull & oand) == 0) continue; /* Middle of slab */ - z2 = MulScale32(l1, k) + viewport->viewwindow.centery; + z2 = MulScale(l1, k, 32) + viewport->viewwindow.centery; } - z1 = MulScale32(l1, j) + viewport->viewwindow.centery; + z1 = MulScale(l1, j, 32) + viewport->viewwindow.centery; } else { if ((voxptr->backfacecull & oand16) == 0) continue; - z1 = MulScale32(l2, j) + viewport->viewwindow.centery; /* Above slab */ - z2 = MulScale32(l1, j + (zleng << 15)) + viewport->viewwindow.centery; + z1 = MulScale(l2, j, 32) + viewport->viewwindow.centery; /* Above slab */ + z2 = MulScale(l1, j + (zleng << 15), 32) + viewport->viewwindow.centery; } if (z2 <= z1) continue; @@ -546,7 +546,7 @@ namespace swrenderer } else { - if (z2 - z1 >= 1024) yinc = FixedDiv(zleng, z2 - z1); + if (z2 - z1 >= 1024) yinc = DivScale(zleng, z2 - z1, 16); else yinc = (((1 << 24) - 1) / (z2 - z1)) * zleng >> 8; } // [RH] Clip each column separately, not just by the first one. diff --git a/src/utility/nodebuilder/nodebuild_utility.cpp b/src/utility/nodebuilder/nodebuild_utility.cpp index ffd5f61e8..d94f84d07 100644 --- a/src/utility/nodebuilder/nodebuild_utility.cpp +++ b/src/utility/nodebuilder/nodebuild_utility.cpp @@ -453,8 +453,8 @@ void FNodeBuilder::FindPolyContainers (TArray &spots, TArrayx, v1->y, dx, dy) <= 0) { - fixed_t t = DivScale30 (center.fixY() - v1->y, dy); - fixed_t sx = v1->x + MulScale30 (dx, t); + fixed_t t = DivScale (center.fixY() - v1->y, dy, 30); + fixed_t sx = v1->x + MulScale(dx, t, 30); fixed_t dist = sx - spot->x; if (dist < closestdist && dist >= 0)