// A.ASM replacement using C // Mainly by Ken Silverman, with things melded with my port by // Jonathon Fowler (jf@jonof.id.au) // // "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman // Ken Silverman's official web site: "http://www.advsys.net/ken" // See the included license file "BUILDLIC.TXT" for license info. // // This file has been modified from Ken Silverman's original release // by Jonathon Fowler (jf@jonof.id.au) // by the EDuke32 team (development@voidpoint.com) #include "a.h" #include "pragmas.h" #ifdef ENGINE_USING_A_C #define BITSOFPRECISION 3 #define BITSOFPRECISIONPOW 8 // Compile code to saturate vplc for sprites to prevent stray lines at the // bottom of non-y-flipped ones? #define USE_SATURATE_VPLC // Also for translucent masks? //#define USE_SATURATE_VPLC_TRANS extern intptr_t asm1, asm2, asm3, asm4; extern int32_t globalx3, globaly3; #ifdef USE_ASM64 # define A64_ASSIGN(var, val) var=val #else # define A64_ASSIGN(var, val) #endif #ifdef USE_ASM64 // variables for a64.yasm int32_t a64_bpl, a64_transmode, a64_glogy; intptr_t a64_paloffs; char *a64_gtrans; #endif static int32_t bpl, transmode = 0; static char *gbuf; static int32_t glogx, glogy; int32_t gpinc; static int32_t gbxinc, gbyinc; static char *gpal, *ghlinepal, *gtrans; static char *gpal2; //Global variable functions void setvlinebpl(int32_t dabpl) { A64_ASSIGN(a64_bpl, dabpl); bpl = dabpl;} void fixtransluscence(intptr_t datransoff) { A64_ASSIGN(a64_gtrans, (char *)datransoff); gtrans = (char *)datransoff; } void settransnormal(void) { A64_ASSIGN(a64_transmode, 0); transmode = 0; } void settransreverse(void) { A64_ASSIGN(a64_transmode, 1); transmode = 1; } ///// Ceiling/floor horizontal line functions ///// void sethlinesizes(int32_t logx, int32_t logy, intptr_t bufplc) { glogx = logx; glogy = logy; gbuf = (char *)bufplc; } void setpalookupaddress(char *paladdr) { ghlinepal = paladdr; } void setuphlineasm4(int32_t bxinc, int32_t byinc) { gbxinc = bxinc; gbyinc = byinc; } void hlineasm4(bssize_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, uint32_t bx, intptr_t p) { Bassert(gbuf); if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; } const char *const A_C_RESTRICT palptr = &ghlinepal[paloffs]; const char *const A_C_RESTRICT buf = gbuf; const vec2_t inc = { gbxinc, gbyinc }; const vec2_t log = { glogx, glogy }; const vec2_t log32 = { 32-log.x, 32-log.y }; char *pp = (char *)p; #ifdef CLASSIC_SLICE_BY_4 for (; cnt>=4; cnt-=4, pp-=4) { #if 1 *pp = palptr[buf[((bx>>log32.x)<>log32.y)]]; *(pp-1) = palptr[buf[(((bx-inc.x)>>log32.x)<>log32.y)]]; *(pp-2) = palptr[buf[(((bx-(inc.x<<1))>>log32.x)<>log32.y)]]; *(pp-3) = palptr[buf[(((bx-(inc.x*3))>>log32.x)<>log32.y)]]; #else *(int32_t *)(pp-3) = palptr[buf[(((bx-(inc.x*3))>>log32.x)<>log32.y)]] + (palptr[buf[(((bx-(inc.x<<1))>>log32.x)<>log32.y)]]<<8) + (palptr[buf[(((bx-inc.x)>>log32.x)<>log32.y)]]<<16) + (palptr[buf[((bx>>log32.x)<>log32.y)]]<<24); #endif bx -= inc.x<<2; by -= inc.y<<2; } #endif for (; cnt>=0; cnt--, pp--) { *pp = palptr[buf[((bx>>log32.x)<>log32.y)]]; bx -= inc.x; by -= inc.y; } } ///// Sloped ceiling/floor vertical line functions ///// extern int32_t sloptable[16384]; void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, bssize_t cnt, int32_t bx, int32_t by) { intptr_t * A_C_RESTRICT slopalptr; int32_t bz, bzinc; uint32_t u, v; bz = asm3; bzinc = (asm1>>3); slopalptr = (intptr_t *)slopaloffs; for (; cnt>0; cnt--) { i = (sloptable[(bz>>6)+8192]); bz += bzinc; u = bx+(inthi_t)globalx3*i; v = by+(inthi_t)globaly3*i; (*(char *)p) = *(char *)(((intptr_t)slopalptr[0])+gbuf[((u>>(32-glogx))<>(32-glogy))]); slopalptr--; p += gpinc; } } ///// Wall,face sprite/wall sprite vertical line functions ///// extern int32_t globaltilesizy; static inline uint32_t ourmulscale32(uint32_t a, uint32_t b) { return ((uint64_t)a*b)>>32; } static inline int32_t getpix(int32_t logy, const char *buf, uint32_t vplc) { return logy ? buf[vplc>>logy] : buf[ourmulscale32(vplc,globaltilesizy)]; } void setupvlineasm(int32_t neglogy) { glogy = neglogy; } // cnt+1 loop iterations! int32_t vlineasm1(int32_t vinc, intptr_t paloffs, bssize_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p) { const char *const A_C_RESTRICT buf = (char *)bufplc; const char *const A_C_RESTRICT pal = (char *)paloffs; const int32_t logy = glogy, ourbpl = bpl; char *pp = (char *)p; cnt++; if (logy) { #ifdef CLASSIC_SLICE_BY_4 for (; cnt>=4; cnt-=4) { *pp = pal[buf[vplc>>logy]]; *(pp+ourbpl) = pal[buf[(vplc+vinc)>>logy]]; *(pp+(ourbpl<<1)) = pal[buf[(vplc+(vinc<<1))>>logy]]; *(pp+(ourbpl*3)) = pal[buf[(vplc+(vinc*3))>>logy ]]; pp += ourbpl<<2; vplc += vinc<<2; } #endif while (cnt--) { *pp = pal[buf[vplc>>logy]]; pp += ourbpl; vplc += vinc; } } else { #ifdef CLASSIC_SLICE_BY_4 for (; cnt>=4; cnt-=4) { *pp = pal[buf[ourmulscale32(vplc, globaltilesizy)]]; *(pp+ourbpl) = pal[buf[ourmulscale32((vplc+vinc),globaltilesizy)]]; *(pp+(ourbpl<<1)) = pal[buf[ourmulscale32((vplc+(vinc<<1)), globaltilesizy)]]; *(pp+(ourbpl*3)) = pal[buf[ourmulscale32((vplc+(vinc*3)), globaltilesizy)]]; pp += ourbpl<<2; vplc += vinc<<2; } #endif while (cnt--) { *pp = pal[buf[ourmulscale32(vplc,globaltilesizy)]], pp += ourbpl; vplc += vinc; } } return vplc; } extern intptr_t palookupoffse[4]; extern uint32_t vplce[4]; extern int32_t vince[4]; extern intptr_t bufplce[4]; #if (EDUKE32_GCC_PREREQ(4,7) || __has_extension(attribute_ext_vector_type)) && defined BITNESS64 // XXX: The "Ubuntu clang version 3.5-1ubuntu1 (trunk) (based on LLVM 3.5)" // does not compile us with USE_VECTOR_EXT. Maybe a newer one does? # if !defined __clang__ # define USE_VECTOR_EXT # endif #endif #ifdef USE_VECTOR_EXT typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16))); #endif #ifdef USE_SATURATE_VPLC # define saturate_vplc(vplc, vinc) vplc |= g_saturate & -(vplc < (uint32_t)vinc) // NOTE: the vector types yield -1 for logical "true": # define saturate_vplc_vec(vplc, vinc) vplc |= g_saturate & (vplc < vinc) # ifdef USE_SATURATE_VPLC_TRANS # define saturate_vplc_trans(vplc, vinc) saturate_vplc(vplc, vinc) # else # define saturate_vplc_trans(vplc, vinc) # endif #else # define saturate_vplc(vplc, vinc) # define saturate_vplc_vec(vplc, vinc) # define saturate_vplc_trans(vplc, vinc) #endif #ifdef CLASSIC_NONPOW2_YSIZE_WALLS // cnt >= 1 static void vlineasm4nlogy(bssize_t cnt, char *p, char *const A_C_RESTRICT * pal, char *const A_C_RESTRICT * buf, # ifdef USE_VECTOR_EXT uint32_vec4 vplc, const uint32_vec4 vinc) # else uint32_t * vplc, const int32_t *vinc) # endif { const int32_t ourbpl = bpl; do { p[0] = pal[0][buf[0][ourmulscale32(vplc[0], globaltilesizy)]]; p[1] = pal[1][buf[1][ourmulscale32(vplc[1], globaltilesizy)]]; p[2] = pal[2][buf[2][ourmulscale32(vplc[2], globaltilesizy)]]; p[3] = pal[3][buf[3][ourmulscale32(vplc[3], globaltilesizy)]]; # if defined USE_VECTOR_EXT vplc += vinc; # else vplc[0] += vinc[0]; vplc[1] += vinc[1]; vplc[2] += vinc[2]; vplc[3] += vinc[3]; # endif p += ourbpl; } while (--cnt); Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4); } #endif // cnt >= 1 void vlineasm4(bssize_t cnt, char *p) { char * const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; char * const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; #ifdef USE_VECTOR_EXT uint32_vec4 vinc = {(uint32_t)vince[0], (uint32_t)vince[1], (uint32_t)vince[2], (uint32_t)vince[3]}; uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]}; #else const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]}; uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]}; #endif const int32_t logy = glogy, ourbpl = bpl; #ifdef CLASSIC_NONPOW2_YSIZE_WALLS if (EDUKE32_PREDICT_FALSE(!logy)) { // This should only happen when 'globalshiftval = 0' has been set in engine.c. vlineasm4nlogy(cnt, p, pal, buf, vplc, vinc); return; } #else assert(logy); #endif // just fucking shoot me #ifdef CLASSIC_SLICE_BY_4 for (; cnt>=4;cnt-=4) { p[0] = pal[0][buf[0][ vplc[0]>>logy ]]; p[1] = pal[1][buf[1][ vplc[1]>>logy ]]; p[2] = pal[2][buf[2][ vplc[2]>>logy ]]; p[3] = pal[3][buf[3][ vplc[3]>>logy ]]; (p+ourbpl)[0] = pal[0][buf[0][ (vplc[0]+vinc[0])>>logy ]]; (p+ourbpl)[1] = pal[1][buf[1][ (vplc[1]+vinc[1])>>logy ]]; (p+ourbpl)[2] = pal[2][buf[2][ (vplc[2]+vinc[2])>>logy ]]; (p+ourbpl)[3] = pal[3][buf[3][ (vplc[3]+vinc[3])>>logy ]]; (p+(ourbpl<<1))[0] = pal[0][buf[0][ (vplc[0]+(vinc[0]<<1))>>logy ]]; (p+(ourbpl<<1))[1] = pal[1][buf[1][ (vplc[1]+(vinc[1]<<1))>>logy ]]; (p+(ourbpl<<1))[2] = pal[2][buf[2][ (vplc[2]+(vinc[2]<<1))>>logy ]]; (p+(ourbpl<<1))[3] = pal[3][buf[3][ (vplc[3]+(vinc[3]<<1))>>logy ]]; (p+(ourbpl*3))[0] = pal[0][buf[0][ (vplc[0]+(vinc[0]*3))>>logy ]]; (p+(ourbpl*3))[1] = pal[1][buf[1][ (vplc[1]+(vinc[1]*3))>>logy ]]; (p+(ourbpl*3))[2] = pal[2][buf[2][ (vplc[2]+(vinc[2]*3))>>logy ]]; (p+(ourbpl*3))[3] = pal[3][buf[3][ (vplc[3]+(vinc[3]*3))>>logy ]]; #if defined USE_VECTOR_EXT vplc += vinc<<2; #else vplc[0] += vinc[0]<<2; vplc[1] += vinc[1]<<2; vplc[2] += vinc[2]<<2; vplc[3] += vinc[3]<<2; #endif p += ourbpl<<2; } #endif while (cnt--) { p[0] = pal[0][buf[0][vplc[0]>>logy]]; p[1] = pal[1][buf[1][vplc[1]>>logy]]; p[2] = pal[2][buf[2][vplc[2]>>logy]]; p[3] = pal[3][buf[3][vplc[3]>>logy]]; #if defined USE_VECTOR_EXT vplc += vinc; #else vplc[0] += vinc[0]; vplc[1] += vinc[1]; vplc[2] += vinc[2]; vplc[3] += vinc[3]; #endif p += ourbpl; } Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4); } #ifdef USE_SATURATE_VPLC static int32_t g_saturate; // -1 if saturating vplc is requested, 0 else # define set_saturate(dosaturate) g_saturate = -(int)!!dosaturate #else # define set_saturate(dosaturate) UNREFERENCED_PARAMETER(dosaturate) #endif void setupmvlineasm(int32_t neglogy, int32_t dosaturate) { glogy = neglogy; set_saturate(dosaturate); } // cnt+1 loop iterations! int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, bssize_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p) { char ch; const char *const A_C_RESTRICT buf = (char *)bufplc; const char *const A_C_RESTRICT pal = (char *)paloffs; const int32_t logy = glogy, ourbpl = bpl; char *pp = (char *)p; cnt++; if (!logy) { do { ch = buf[ourmulscale32(vplc,globaltilesizy)]; if (ch != 255) *pp = pal[ch]; pp += ourbpl; vplc += vinc; saturate_vplc(vplc, vinc); } while (--cnt); return vplc; } do { if (buf[vplc>>logy] != 255) *pp = pal[buf[vplc>>logy]]; pp += ourbpl; vplc += vinc; saturate_vplc(vplc, vinc); } while (--cnt); return vplc; } // cnt >= 1 void mvlineasm4(bssize_t cnt, char *p) { char *const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; char *const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; #ifdef USE_VECTOR_EXT uint32_vec4 vinc = {(uint32_t)vince[0], (uint32_t)vince[1], (uint32_t)vince[2], (uint32_t)vince[3]}; uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]}; #else const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]}; uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]}; #endif const int32_t logy = glogy, ourbpl = bpl; char ch; if (logy) { do { ch = buf[0][vplc[0]>>logy]; if (ch != 255) p[0] = pal[0][ch]; ch = buf[1][vplc[1]>>logy]; if (ch != 255) p[1] = pal[1][ch]; ch = buf[2][vplc[2]>>logy]; if (ch != 255) p[2] = pal[2][ch]; ch = buf[3][vplc[3]>>logy]; if (ch != 255) p[3] = pal[3][ch]; #if !defined USE_VECTOR_EXT vplc[0] += vinc[0]; vplc[1] += vinc[1]; vplc[2] += vinc[2]; vplc[3] += vinc[3]; saturate_vplc(vplc[0], vinc[0]); saturate_vplc(vplc[1], vinc[1]); saturate_vplc(vplc[2], vinc[2]); saturate_vplc(vplc[3], vinc[3]); #else vplc += vinc; saturate_vplc_vec(vplc, vinc); #endif p += ourbpl; } while (--cnt); } else { do { ch = buf[0][ourmulscale32(vplc[0],globaltilesizy)]; if (ch != 255) p[0] = pal[0][ch]; ch = buf[1][ourmulscale32(vplc[1],globaltilesizy)]; if (ch != 255) p[1] = pal[1][ch]; ch = buf[2][ourmulscale32(vplc[2],globaltilesizy)]; if (ch != 255) p[2] = pal[2][ch]; ch = buf[3][ourmulscale32(vplc[3],globaltilesizy)]; if (ch != 255) p[3] = pal[3][ch]; #if !defined USE_VECTOR_EXT vplc[0] += vinc[0]; vplc[1] += vinc[1]; vplc[2] += vinc[2]; vplc[3] += vinc[3]; saturate_vplc(vplc[0], vinc[0]); saturate_vplc(vplc[1], vinc[1]); saturate_vplc(vplc[2], vinc[2]); saturate_vplc(vplc[3], vinc[3]); #else vplc += vinc; saturate_vplc_vec(vplc, vinc); #endif p += ourbpl; } while (--cnt); } Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4); } #ifdef USE_ASM64 # define GLOGY a64_glogy #else # define GLOGY glogy #endif void setuptvlineasm(int32_t neglogy, int32_t dosaturate) { GLOGY = neglogy; set_saturate(dosaturate); } #if !defined USE_ASM64 // cnt+1 loop iterations! int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, bssize_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p) { char ch; const char *const A_C_RESTRICT buf = (char *)bufplc; const char *const A_C_RESTRICT pal = (char *)paloffs; const char *const A_C_RESTRICT trans = (char *)gtrans; const int32_t logy = glogy, ourbpl = bpl, transm = transmode; char *pp = (char *)p; cnt++; uint8_t const shift = transm<<3; do { ch = getpix(logy, buf, vplc); if (ch != 255) *pp = trans[((*pp)<<(8-shift))|(pal[ch]<= 1 const int32_t vinc2 = asm1; const char *const A_C_RESTRICT buf1 = (char *)bufplc1; const char *const A_C_RESTRICT buf2 = (char *)bufplc2; const int32_t logy = glogy, ourbpl = bpl, transm = transmode; char *pp = (char *)p; cnt++; uint8_t const shift = transm<<3; do { ch = getpix(logy, buf1, vplc1); if (ch != 255) pp[0] = gtrans[(pp[0]<<(8-shift))|(gpal[ch]< 0); asm1 = vplc1; asm2 = vplc2; } #endif //Floor sprite horizontal line functions void msethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; } // cntup16>>16 + 1 iterations void mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p) { char ch; const int32_t xinc = asm1, yinc = asm2; UNREFERENCED_PARAMETER(junk); gbuf = (char *)bufplc; gpal = (char *)asm3; cntup16>>=16; cntup16++; do { ch = gbuf[((bx>>(32-glogx))<>(32-glogy))]; if (ch != 255) *((char *)p) = gpal[ch]; bx += xinc; by += yinc; p++; } while (--cntup16); } void tsethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; } // cntup16>>16 + 1 iterations void thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p) { char ch; const int32_t xinc = asm1, yinc = asm2; UNREFERENCED_PARAMETER(junk); gbuf = (char *)bufplc; gpal = (char *)asm3; cntup16>>=16; cntup16++; uint8_t const shift = transmode<<3; do { ch = gbuf[((bx>>(32-glogx))<>(32-glogy))]; if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<(8-shift))|(gpal[ch]<1; cnt--) { (*(char *)p) = gpal[gbuf[(bx>>16)*glogy+(by>>16)]]; bx += gbxinc; by += gbyinc; p += bpl; } } //Rotatesprite vertical line functions void msetupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz) { gpal = (char *)paloffs; gbxinc = bxinc; gbyinc = byinc; glogy = ysiz; } void mspritevline(int32_t bx, int32_t by, bssize_t cnt, intptr_t bufplc, intptr_t p) { char ch; gbuf = (char *)bufplc; for (; cnt>1; cnt--) { ch = gbuf[(bx>>16)*glogy+(by>>16)]; if (ch != 255) (*(char *)p) = gpal[ch]; bx += gbxinc; by += gbyinc; p += bpl; } } void tsetupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz) { gpal = (char *)paloffs; gbxinc = bxinc; gbyinc = byinc; glogy = ysiz; } void tspritevline(int32_t bx, int32_t by, bssize_t cnt, intptr_t bufplc, intptr_t p) { char ch; gbuf = (char *)bufplc; uint8_t const shift = transmode<<3; for (; cnt>1; cnt--) { ch = gbuf[(bx>>16)*glogy+(by>>16)]; if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<(8-shift))+(gpal[ch]< 0) { char c = gpal[(int32_t)(*(char *)((v>>16)+vptr))]; for (x=0; x < dx; x++) ((char*)p)[x] = c; p += bpl; v += vi; dy--; } } #if 0 void stretchhline(intptr_t p0, int32_t u, bssize_t cnt, int32_t uinc, intptr_t rptr, intptr_t p) { p0 = p-(cnt<<2); do { p--; *(char *)p = *(char *)((u>>16)+rptr); u -= uinc; } while (p > p0); } #endif #endif /* * vim:ts=4: */