Merge branch 'sw-tilted-npo2-span-opt' into 'next'

NPO2 slope span optimization

See merge request STJr/SRB2!1216
This commit is contained in:
Nev3r 2021-05-29 09:30:58 -04:00
commit b978bc4507
6 changed files with 2225 additions and 80 deletions

View file

@ -93,6 +93,7 @@ set(SRB2_CORE_HEADERS
i_video.h
info.h
keys.h
libdivide.h
lzf.h
m_aatree.h
m_anigif.h

2111
src/libdivide.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -25,6 +25,7 @@
#include "w_wad.h"
#include "z_zone.h"
#include "console.h" // Until buffering gets finished
#include "libdivide.h" // used by NPO2 tilted span functions
#ifdef HWRENDER
#include "hardware/hw_main.h"

View file

@ -106,6 +106,9 @@ void R_DrawTiltedSpan_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -145,12 +148,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -194,12 +198,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -225,12 +230,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -261,12 +267,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -299,6 +306,9 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -337,12 +347,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -386,12 +397,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -417,12 +429,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -453,12 +466,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -490,6 +504,9 @@ void R_DrawTiltedSplat_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -529,12 +546,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -582,12 +600,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -615,12 +634,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -654,12 +674,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -1401,6 +1422,9 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -1440,12 +1464,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}
@ -1489,12 +1514,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}
@ -1520,12 +1546,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}
@ -1556,12 +1583,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}

View file

@ -247,6 +247,7 @@
<ClInclude Include="..\i_tcp.h" />
<ClInclude Include="..\i_video.h" />
<ClInclude Include="..\keys.h" />
<ClInclude Include="..\libdivide.h" />
<ClInclude Include="..\lua_hook.h" />
<ClInclude Include="..\lua_hud.h" />
<ClInclude Include="..\lua_libs.h" />

View file

@ -402,6 +402,9 @@
<ClInclude Include="..\tables.h">
<Filter>P_Play</Filter>
</ClInclude>
<ClInclude Include="..\libdivide.h">
<Filter>R_Rend</Filter>
</ClInclude>
<ClInclude Include="..\r_bsp.h">
<Filter>R_Rend</Filter>
</ClInclude>