Optimization of C replacements of basic texture mapping functions, part 1

Affected functions: hlineasm4, vlineasm1, mvlineasm1, tvlineasm1.

Optimizations:
 - declare all used variables as possibly const-qualified locals in each
   function. This removes unnecessary loads from memory in the loops.
 - rewrite "for (; cnt>=0; cnt--) {...}" to "cnt++; do {...} while (--cnt);"
   in the three last ones (yes, these function iterate cnt+1 times). This
   makes them functionally equivalent to the asm versions (madness ensues for
   cnt < 0) and allows the compiler to remove one 'test' instruction at the
   end of each loop.
 - in the translucence function, replace addition by ORing

Observations (system: Core2 Duo Linux x86_64):
 With a 1680x1050 window fully covered by the respective type of wall (simple,
 masked, trans. masked), fps increases by 3-4 from the baseline of approx. 60.

git-svn-id: https://svn.eduke32.com/eduke32@2405 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
helixhorned 2012-03-04 20:12:30 +00:00
parent 5119b135d1
commit 792ac2fdf0

View file

@ -30,28 +30,36 @@ void settransnormal(void) { transmode = 0; }
void settransreverse(void) { transmode = 1; } void settransreverse(void) { transmode = 1; }
//Ceiling/floor horizontal line functions ///// Ceiling/floor horizontal line functions /////
void sethlinesizes(int32_t logx, int32_t logy, intptr_t bufplc) void sethlinesizes(int32_t logx, int32_t logy, intptr_t bufplc)
{ glogx = logx; glogy = logy; gbuf = (char *)bufplc; } { glogx = logx; glogy = logy; gbuf = (char *)bufplc; }
void setpalookupaddress(char *paladdr) { ghlinepal = paladdr; } void setpalookupaddress(char *paladdr) { ghlinepal = paladdr; }
void setuphlineasm4(int32_t bxinc, int32_t byinc) { gbxinc = bxinc; gbyinc = byinc; } void setuphlineasm4(int32_t bxinc, int32_t byinc) { gbxinc = bxinc; gbyinc = byinc; }
void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, uint32_t bx, intptr_t p) void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, uint32_t bx, intptr_t p)
{ {
char *palptr;
palptr = (char *)&ghlinepal[paloffs];
if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; } if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; }
for (; cnt>=0; cnt--)
{ {
*((char *)p) = palptr[gbuf[((bx>>(32-glogx))<<glogy)+(by>>(32-glogy))]]; const char *const palptr = &ghlinepal[paloffs];
bx -= gbxinc; const char *const buf = gbuf;
by -= gbyinc; const int32_t bxinc = gbxinc, byinc = gbyinc;
p--; const int32_t logx = glogx, logy = glogy;
char *pp = (char *)p;
for (; cnt>=0; cnt--)
{
*pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]];
bx -= bxinc;
by -= byinc;
pp--;
}
} }
} }
//Sloped ceiling/floor vertical line functions ///// Sloped ceiling/floor vertical line functions /////
void setupslopevlin(int32_t logylogx, intptr_t bufplc, int32_t pinc) void setupslopevlin(int32_t logylogx, intptr_t bufplc, int32_t pinc)
{ {
glogx = (logylogx&255); glogy = (logylogx>>8); glogx = (logylogx&255); glogy = (logylogx>>8);
@ -77,61 +85,85 @@ void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t
} }
//Wall,face sprite/wall sprite vertical line functions ///// Wall,face sprite/wall sprite vertical line functions /////
void setupvlineasm(int32_t neglogy) { glogy = neglogy; } void setupvlineasm(int32_t neglogy) { glogy = neglogy; }
// cnt+1 loop iterations!
void vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p) void vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{ {
gbuf = (char *)bufplc; const char *const buf = (char *)bufplc;
gpal = (char *)paloffs; const char *const pal = (char *)paloffs;
for (; cnt>=0; cnt--) const int32_t logy = glogy, ourbpl = bpl;
char *pp = (char *)p;
cnt++;
do
{ {
*((char *)p) = gpal[gbuf[vplc>>glogy]]; *pp = pal[buf[vplc>>logy]];
p += bpl; pp += ourbpl;
vplc += vinc; vplc += vinc;
} }
while (--cnt);
} }
void setupmvlineasm(int32_t neglogy) { glogy = neglogy; } void setupmvlineasm(int32_t neglogy) { glogy = neglogy; }
// cnt+1 loop iterations!
void mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p) void mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{ {
char ch; char ch;
gbuf = (char *)bufplc; const char *const buf = (char *)bufplc;
gpal = (char *)paloffs; const char *const pal = (char *)paloffs;
for (; cnt>=0; cnt--) const int32_t logy = glogy, ourbpl = bpl;
char *pp = (char *)p;
cnt++;
do
{ {
ch = gbuf[vplc>>glogy]; if (ch != 255) *((char *)p) = gpal[ch]; ch = buf[vplc>>logy]; if (ch != 255) *pp = pal[ch];
p += bpl; pp += ourbpl;
vplc += vinc; vplc += vinc;
} }
while (--cnt);
} }
void setuptvlineasm(int32_t neglogy) { glogy = neglogy; } void setuptvlineasm(int32_t neglogy) { glogy = neglogy; }
// cnt+1 loop iterations!
void tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p) void tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{ {
char ch; char ch;
gbuf = (char *)bufplc; const char *const buf = (char *)bufplc;
gpal = (char *)paloffs; const char *const pal = (char *)paloffs;
if (transmode) const char *const trans = (char *)gtrans;
const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
char *pp = (char *)p;
cnt++;
if (transm)
{ {
for (; cnt>=0; cnt--) do
{ {
ch = gbuf[vplc>>glogy]; ch = buf[vplc>>glogy];
if (ch != 255) *((char *)p) = gtrans[(*((char *)p))+(gpal[ch]<<8)]; if (ch != 255) *pp = trans[(*pp)|(pal[ch]<<8)];
p += bpl; pp += ourbpl;
vplc += vinc; vplc += vinc;
} }
while (--cnt);
} }
else else
{ {
for (; cnt>=0; cnt--) do
{ {
ch = gbuf[vplc>>glogy]; ch = buf[vplc>>logy];
if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<8)+gpal[ch]]; if (ch != 255) *pp = trans[((*pp)<<8)|pal[ch]];
p += bpl; pp += ourbpl;
vplc += vinc; vplc += vinc;
} }
while (--cnt);
} }
} }