NPO2 slope span optimization

This commit is contained in:
Hannu Hanhi 2020-10-22 00:31:28 +03:00
parent 1cd73315f1
commit 86ad187f05
6 changed files with 2196 additions and 80 deletions

View file

@ -87,6 +87,7 @@ set(SRB2_CORE_HEADERS
i_video.h
info.h
keys.h
libdivide.h
lzf.h
m_aatree.h
m_anigif.h

2082
src/libdivide.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -25,6 +25,7 @@
#include "w_wad.h"
#include "z_zone.h"
#include "console.h" // Until buffering gets finished
#include "libdivide.h" // used by NPO2 tilted span functions
#ifdef HWRENDER
#include "hardware/hw_main.h"

View file

@ -83,6 +83,9 @@ void R_DrawTiltedSpan_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -122,12 +125,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -174,12 +178,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -205,12 +210,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -241,12 +247,13 @@ void R_DrawTiltedSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = colormap[source[((y * ds_flatwidth) + x)]];
}
@ -279,6 +286,9 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -317,12 +327,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -369,12 +380,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -400,12 +412,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -436,12 +449,13 @@ void R_DrawTiltedTranslucentSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dest);
}
@ -473,6 +487,9 @@ void R_DrawTiltedSplat_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -512,12 +529,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -568,12 +586,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -601,12 +620,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -640,12 +660,13 @@ void R_DrawTiltedSplat_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
val = source[((y * ds_flatwidth) + x)];
}
@ -864,6 +885,9 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
double endz, endu, endv;
UINT32 stepu, stepv;
struct libdivide_u32_t x_divider = libdivide_u32_gen(ds_flatwidth);
struct libdivide_u32_t y_divider = libdivide_u32_gen(ds_flatheight);
iz = ds_szp->z + ds_szp->y*(centery-ds_y) + ds_szp->x*(ds_x1-centerx);
// Lighting is simple. It's just linear interpolation from start to end
@ -903,12 +927,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}
@ -955,12 +980,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}
@ -986,12 +1012,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}
@ -1022,12 +1049,13 @@ void R_DrawTiltedTranslucentWaterSpan_NPO2_8(void)
// Carefully align all of my Friends.
if (x < 0)
x = ds_flatwidth - ((UINT32)(ds_flatwidth - x) % ds_flatwidth);
x += (libdivide_u32_do((UINT32)(-x-1), &x_divider) + 1) * ds_flatwidth;
else
x -= libdivide_u32_do((UINT32)x, &x_divider) * ds_flatwidth;
if (y < 0)
y = ds_flatheight - ((UINT32)(ds_flatheight - y) % ds_flatheight);
x %= ds_flatwidth;
y %= ds_flatheight;
y += (libdivide_u32_do((UINT32)(-y-1), &y_divider) + 1) * ds_flatheight;
else
y -= libdivide_u32_do((UINT32)y, &y_divider) * ds_flatheight;
*dest = *(ds_transmap + (colormap[source[((y * ds_flatwidth) + x)]] << 8) + *dsrc++);
}

View file

@ -244,6 +244,7 @@
<ClInclude Include="..\i_tcp.h" />
<ClInclude Include="..\i_video.h" />
<ClInclude Include="..\keys.h" />
<ClInclude Include="..\libdivide.h" />
<ClInclude Include="..\lua_hook.h" />
<ClInclude Include="..\lua_hud.h" />
<ClInclude Include="..\lua_libs.h" />

View file

@ -402,6 +402,9 @@
<ClInclude Include="..\tables.h">
<Filter>P_Play</Filter>
</ClInclude>
<ClInclude Include="..\libdivide.h">
<Filter>R_Rend</Filter>
</ClInclude>
<ClInclude Include="..\r_bsp.h">
<Filter>R_Rend</Filter>
</ClInclude>