From dca9d1357c89fca557dc6c8944e554e4a905dbeb Mon Sep 17 00:00:00 2001 From: helixhorned Date: Wed, 7 Mar 2012 19:42:37 +0000 Subject: [PATCH] Optimizations and tweaks for C replacements of mhline and thline. Hlines for masked and translucent masked ceiling/floor (sprites). - apply the --> 'do { ... } while (--cnt)' transformation, making these functions iterate cnt+1 times like the asm version. This also fixes an off-by-one issue where sprites or masked ceilings/floors had a one-pixel non-drawn line to the right. - This time, only declare-as-local two 'extern' globals (asm1 and asm2). It seems that I was too eager with "localing" all file-scoped vars earlier. GCC is able to remove the loads from memory inside the loop by itself, whereas clang is not. This is not trivial, since it has to prove that the 'screen' pointer passed to the functions will never alias these globals. git-svn-id: https://svn.eduke32.com/eduke32@2424 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/src/a-c.c | 38 +++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/polymer/eduke32/build/src/a-c.c b/polymer/eduke32/build/src/a-c.c index 4a5201b80..2d6f99eff 100644 --- a/polymer/eduke32/build/src/a-c.c +++ b/polymer/eduke32/build/src/a-c.c @@ -169,54 +169,70 @@ void tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intp //Floor sprite horizontal line functions void msethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; } +// cntup16>>16 + 1 iterations void mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p) { char ch; + const int32_t xinc = asm1, yinc = asm2; + UNREFERENCED_PARAMETER(junk); gbuf = (char *)bufplc; gpal = (char *)asm3; - for (cntup16>>=16; cntup16>0; cntup16--) + + cntup16>>=16; + cntup16++; + do { ch = gbuf[((bx>>(32-glogx))<>(32-glogy))]; if (ch != 255) *((char *)p) = gpal[ch]; - bx += asm1; - by += asm2; + bx += xinc; + by += yinc; p++; } + while (--cntup16); } void tsethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; } +// cntup16>>16 + 1 iterations void thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p) { char ch; + const int32_t xinc = asm1, yinc = asm2; + UNREFERENCED_PARAMETER(junk); gbuf = (char *)bufplc; gpal = (char *)asm3; + + cntup16>>=16; + cntup16++; + if (transmode) { - for (cntup16>>=16; cntup16>0; cntup16--) + do { ch = gbuf[((bx>>(32-glogx))<>(32-glogy))]; - if (ch != 255) *((char *)p) = gtrans[(*((char *)p))+(gpal[ch]<<8)]; - bx += asm1; - by += asm2; + if (ch != 255) *((char *)p) = gtrans[(*((char *)p))|(gpal[ch]<<8)]; + bx += xinc; + by += yinc; p++; } + while (--cntup16); } else { - for (cntup16>>=16; cntup16>0; cntup16--) + do { ch = gbuf[((bx>>(32-glogx))<>(32-glogy))]; - if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<8)+gpal[ch]]; - bx += asm1; - by += asm2; + if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<8)|gpal[ch]]; + bx += xinc; + by += yinc; p++; } + while (--cntup16); } }