mirror of
https://github.com/DrBeef/Raze.git
synced 2025-01-18 15:11:51 +00:00
Optimization of C replacements of basic texture mapping functions, part 1
Affected functions: hlineasm4, vlineasm1, mvlineasm1, tvlineasm1. Optimizations: - declare all used variables as possibly const-qualified locals in each function. This removes unnecessary loads from memory in the loops. - rewrite "for (; cnt>=0; cnt--) {...}" to "cnt++; do {...} while (--cnt);" in the three last ones (yes, these function iterate cnt+1 times). This makes them functionally equivalent to the asm versions (madness ensues for cnt < 0) and allows the compiler to remove one 'test' instruction at the end of each loop. - in the translucence function, replace addition by ORing Observations (system: Core2 Duo Linux x86_64): With a 1680x1050 window fully covered by the respective type of wall (simple, masked, trans. masked), fps increases by 3-4 from the baseline of approx. 60. git-svn-id: https://svn.eduke32.com/eduke32@2405 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
5119b135d1
commit
792ac2fdf0
1 changed files with 64 additions and 32 deletions
|
@ -30,28 +30,36 @@ void settransnormal(void) { transmode = 0; }
|
|||
void settransreverse(void) { transmode = 1; }
|
||||
|
||||
|
||||
//Ceiling/floor horizontal line functions
|
||||
///// Ceiling/floor horizontal line functions /////
|
||||
|
||||
void sethlinesizes(int32_t logx, int32_t logy, intptr_t bufplc)
|
||||
{ glogx = logx; glogy = logy; gbuf = (char *)bufplc; }
|
||||
void setpalookupaddress(char *paladdr) { ghlinepal = paladdr; }
|
||||
void setuphlineasm4(int32_t bxinc, int32_t byinc) { gbxinc = bxinc; gbyinc = byinc; }
|
||||
void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, uint32_t bx, intptr_t p)
|
||||
{
|
||||
char *palptr;
|
||||
|
||||
palptr = (char *)&ghlinepal[paloffs];
|
||||
if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; }
|
||||
for (; cnt>=0; cnt--)
|
||||
|
||||
{
|
||||
*((char *)p) = palptr[gbuf[((bx>>(32-glogx))<<glogy)+(by>>(32-glogy))]];
|
||||
bx -= gbxinc;
|
||||
by -= gbyinc;
|
||||
p--;
|
||||
const char *const palptr = &ghlinepal[paloffs];
|
||||
const char *const buf = gbuf;
|
||||
const int32_t bxinc = gbxinc, byinc = gbyinc;
|
||||
const int32_t logx = glogx, logy = glogy;
|
||||
char *pp = (char *)p;
|
||||
|
||||
for (; cnt>=0; cnt--)
|
||||
{
|
||||
*pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]];
|
||||
bx -= bxinc;
|
||||
by -= byinc;
|
||||
pp--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Sloped ceiling/floor vertical line functions
|
||||
///// Sloped ceiling/floor vertical line functions /////
|
||||
|
||||
void setupslopevlin(int32_t logylogx, intptr_t bufplc, int32_t pinc)
|
||||
{
|
||||
glogx = (logylogx&255); glogy = (logylogx>>8);
|
||||
|
@ -77,61 +85,85 @@ void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t
|
|||
}
|
||||
|
||||
|
||||
//Wall,face sprite/wall sprite vertical line functions
|
||||
///// Wall,face sprite/wall sprite vertical line functions /////
|
||||
|
||||
void setupvlineasm(int32_t neglogy) { glogy = neglogy; }
|
||||
// cnt+1 loop iterations!
|
||||
void vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
|
||||
{
|
||||
gbuf = (char *)bufplc;
|
||||
gpal = (char *)paloffs;
|
||||
for (; cnt>=0; cnt--)
|
||||
const char *const buf = (char *)bufplc;
|
||||
const char *const pal = (char *)paloffs;
|
||||
const int32_t logy = glogy, ourbpl = bpl;
|
||||
char *pp = (char *)p;
|
||||
|
||||
cnt++;
|
||||
|
||||
do
|
||||
{
|
||||
*((char *)p) = gpal[gbuf[vplc>>glogy]];
|
||||
p += bpl;
|
||||
*pp = pal[buf[vplc>>logy]];
|
||||
pp += ourbpl;
|
||||
vplc += vinc;
|
||||
}
|
||||
while (--cnt);
|
||||
}
|
||||
|
||||
void setupmvlineasm(int32_t neglogy) { glogy = neglogy; }
|
||||
// cnt+1 loop iterations!
|
||||
void mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
|
||||
{
|
||||
char ch;
|
||||
|
||||
gbuf = (char *)bufplc;
|
||||
gpal = (char *)paloffs;
|
||||
for (; cnt>=0; cnt--)
|
||||
const char *const buf = (char *)bufplc;
|
||||
const char *const pal = (char *)paloffs;
|
||||
const int32_t logy = glogy, ourbpl = bpl;
|
||||
char *pp = (char *)p;
|
||||
|
||||
cnt++;
|
||||
|
||||
do
|
||||
{
|
||||
ch = gbuf[vplc>>glogy]; if (ch != 255) *((char *)p) = gpal[ch];
|
||||
p += bpl;
|
||||
ch = buf[vplc>>logy]; if (ch != 255) *pp = pal[ch];
|
||||
pp += ourbpl;
|
||||
vplc += vinc;
|
||||
}
|
||||
while (--cnt);
|
||||
}
|
||||
|
||||
void setuptvlineasm(int32_t neglogy) { glogy = neglogy; }
|
||||
// cnt+1 loop iterations!
|
||||
void tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
|
||||
{
|
||||
char ch;
|
||||
|
||||
gbuf = (char *)bufplc;
|
||||
gpal = (char *)paloffs;
|
||||
if (transmode)
|
||||
const char *const buf = (char *)bufplc;
|
||||
const char *const pal = (char *)paloffs;
|
||||
const char *const trans = (char *)gtrans;
|
||||
const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
|
||||
char *pp = (char *)p;
|
||||
|
||||
cnt++;
|
||||
|
||||
if (transm)
|
||||
{
|
||||
for (; cnt>=0; cnt--)
|
||||
do
|
||||
{
|
||||
ch = gbuf[vplc>>glogy];
|
||||
if (ch != 255) *((char *)p) = gtrans[(*((char *)p))+(gpal[ch]<<8)];
|
||||
p += bpl;
|
||||
ch = buf[vplc>>glogy];
|
||||
if (ch != 255) *pp = trans[(*pp)|(pal[ch]<<8)];
|
||||
pp += ourbpl;
|
||||
vplc += vinc;
|
||||
}
|
||||
while (--cnt);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (; cnt>=0; cnt--)
|
||||
do
|
||||
{
|
||||
ch = gbuf[vplc>>glogy];
|
||||
if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<8)+gpal[ch]];
|
||||
p += bpl;
|
||||
ch = buf[vplc>>logy];
|
||||
if (ch != 255) *pp = trans[((*pp)<<8)|pal[ch]];
|
||||
pp += ourbpl;
|
||||
vplc += vinc;
|
||||
}
|
||||
while (--cnt);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue