mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-01-13 11:30:44 +00:00
Optimize nonpow2_mhline and nonpow2_thline by replacing divide operation by cheaper multiplication
Patch from Nuke.YKT. git-svn-id: https://svn.eduke32.com/eduke32@7362 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
425c183c67
commit
e9d53fce0a
1 changed files with 7 additions and 7 deletions
|
@ -3181,14 +3181,14 @@ static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, uint32
|
||||||
const char *const A_C_RESTRICT buf = (char *)bufplc;
|
const char *const A_C_RESTRICT buf = (char *)bufplc;
|
||||||
const char *const A_C_RESTRICT pal = (char *)asm3;
|
const char *const A_C_RESTRICT pal = (char *)asm3;
|
||||||
|
|
||||||
const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
|
const uint32_t xmul = globalxspan;
|
||||||
const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
|
const uint32_t ymul = globalyspan;
|
||||||
const uint32_t yspan = globalyspan;
|
const uint32_t yspan = globalyspan;
|
||||||
const int32_t xinc = asm1, yinc = asm2;
|
const int32_t xinc = asm1, yinc = asm2;
|
||||||
|
|
||||||
for (cntup16>>=16; cntup16>0; cntup16--)
|
for (cntup16>>=16; cntup16>0; cntup16--)
|
||||||
{
|
{
|
||||||
ch = buf[(divideu32(bx, xdiv))*yspan + divideu32(by, ydiv)];
|
ch = buf[mulscale31(bx>>1, xmul)*yspan + mulscale31(by>>1, ymul)];
|
||||||
|
|
||||||
if (ch != 255) *p = pal[ch];
|
if (ch != 255) *p = pal[ch];
|
||||||
bx += xinc;
|
bx += xinc;
|
||||||
|
@ -3206,8 +3206,8 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, uint32
|
||||||
const char *const A_C_RESTRICT pal = (char *)asm3;
|
const char *const A_C_RESTRICT pal = (char *)asm3;
|
||||||
const char *const A_C_RESTRICT trans = paletteGetBlendTable(globalblend);
|
const char *const A_C_RESTRICT trans = paletteGetBlendTable(globalblend);
|
||||||
|
|
||||||
const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
|
const uint32_t xmul = globalxspan;
|
||||||
const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
|
const uint32_t ymul = globalyspan;
|
||||||
const uint32_t yspan = globalyspan;
|
const uint32_t yspan = globalyspan;
|
||||||
const int32_t xinc = asm1, yinc = asm2;
|
const int32_t xinc = asm1, yinc = asm2;
|
||||||
|
|
||||||
|
@ -3215,7 +3215,7 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, uint32
|
||||||
{
|
{
|
||||||
for (cntup16>>=16; cntup16>0; cntup16--)
|
for (cntup16>>=16; cntup16>0; cntup16--)
|
||||||
{
|
{
|
||||||
ch = buf[divideu32(bx, xdiv)*yspan + divideu32(by, ydiv)];
|
ch = buf[mulscale31(bx>>1, xmul)*yspan + mulscale31(by>>1, ymul)];
|
||||||
if (ch != 255) *p = trans[(*p)|(pal[ch]<<8)];
|
if (ch != 255) *p = trans[(*p)|(pal[ch]<<8)];
|
||||||
bx += xinc;
|
bx += xinc;
|
||||||
by += yinc;
|
by += yinc;
|
||||||
|
@ -3226,7 +3226,7 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, uint32
|
||||||
{
|
{
|
||||||
for (cntup16>>=16; cntup16>0; cntup16--)
|
for (cntup16>>=16; cntup16>0; cntup16--)
|
||||||
{
|
{
|
||||||
ch = buf[divideu32(bx, xdiv)*yspan + divideu32(by, ydiv)];
|
ch = buf[mulscale31(bx>>1, xmul)*yspan + mulscale31(by>>1, ymul)];
|
||||||
if (ch != 255) *p = trans[((*p)<<8)|pal[ch]];
|
if (ch != 255) *p = trans[((*p)<<8)|pal[ch]];
|
||||||
bx += xinc;
|
bx += xinc;
|
||||||
by += yinc;
|
by += yinc;
|
||||||
|
|
Loading…
Reference in a new issue