diff --git a/polymer/eduke32/build/include/build.h b/polymer/eduke32/build/include/build.h index 59882a7ba..f53c6f765 100644 --- a/polymer/eduke32/build/include/build.h +++ b/polymer/eduke32/build/include/build.h @@ -1287,9 +1287,6 @@ static inline int32_t getclosestcol(int32_t r, int32_t g, int32_t b) typedef struct s_equation { float a, b, c; } _equation; -typedef struct s_point2d { - float x, y; -} _point2d; int32_t wallvisible(int32_t x, int32_t y, int16_t wallnum); #define STATUS2DSIZ 144 diff --git a/polymer/eduke32/build/include/common.h b/polymer/eduke32/build/include/common.h index 26a6a39bd..1ea240ef0 100644 --- a/polymer/eduke32/build/include/common.h +++ b/polymer/eduke32/build/include/common.h @@ -72,6 +72,13 @@ void G_AddDefModule(const char *buffer); void G_AddClipMap(const char *buffer); #endif +// returns a buffer of size BMAX_PATH +static inline char *dup_filename(const char *fn) +{ + char * const buf = (char *) Xmalloc(BMAX_PATH); + return Bstrncpyz(buf, fn, BMAX_PATH); +} + int32_t getatoken(scriptfile *sf, const tokenlist *tl, int32_t ntokens); int32_t G_CheckCmdSwitch(int32_t argc, const char **argv, const char *str); @@ -92,34 +99,30 @@ int32_t maybe_append_ext(char *wbuf, int32_t wbufsiz, const char *fn, const char // individual x/y(/z) distances are passed instead. static inline int32_t sepldist(const int32_t dx, const int32_t dy) { - int32_t x = klabs(dx); - int32_t y = klabs(dy); + vec2_t d ={ klabs(dx), klabs(dy) }; - if (x < y) - swaplong(&x, &y); + if (d.x < d.y) + swaplong(&d.x, &d.y); - { - int32_t t = y + (y>>1); - return x - (x>>5) - (x>>7) + (t>>2) + (t>>6); - } + d.y += (d.y>>1); + + return d.x - (d.x>>5) - (d.x>>7) + (d.y>>2) + (d.y>>6); } // dz: in Build coordinates -static inline int32_t sepdist(int32_t dx, int32_t dy, int32_t dz) +static inline int32_t sepdist(const int32_t dx, const int32_t dy, const int32_t dz) { - int32_t x = klabs(dx); - int32_t y = klabs(dy); - int32_t z = klabs(dz>>4); + vec3_t d ={ klabs(dx), klabs(dy), klabs(dz>>4) }; - if (x < y) - swaplong(&x, &y); - if (x < z) - swaplong(&x, &z); + if (d.x < d.y) + swaplong(&d.x, &d.y); - { - int32_t t = y + z; - return x - (x>>4) + (t>>2) + (t>>3); - } + if (d.x < d.z) + swaplong(&d.x, &d.z); + + d.y += d.z; + + return d.x - (d.x>>4) + (d.y>>2) + (d.y>>3); } int32_t ldist(const spritetype *s1, const spritetype *s2); diff --git a/polymer/eduke32/build/include/editor.h b/polymer/eduke32/build/include/editor.h index 1a3b37026..aa492fe16 100644 --- a/polymer/eduke32/build/include/editor.h +++ b/polymer/eduke32/build/include/editor.h @@ -128,8 +128,6 @@ extern void M32_DrawRoomsAndMasks(void); extern void yax_tweakpicnums(int32_t bunchnum, int32_t cf, int32_t restore); // editor-only extern void M32_ResetFakeRORTiles(void); -extern int32_t kopen4loadfrommod(const char *filename, char searchfirst); - // set to 1 to enable: #define M32_UNDO 1 extern int32_t map_revision; diff --git a/polymer/eduke32/build/include/pragmas.h b/polymer/eduke32/build/include/pragmas.h index 1cffc97c9..a323dfbdd 100644 --- a/polymer/eduke32/build/include/pragmas.h +++ b/polymer/eduke32/build/include/pragmas.h @@ -21,6 +21,13 @@ _scaler(25) _scaler(26) _scaler(27) _scaler(28)\ _scaler(29) _scaler(30) _scaler(31) extern int32_t dmval; +#if !defined(NOASM) && defined __cplusplus +extern "C" { +#endif +extern int32_t reciptable[2048], fpuasm; +#if !defined(NOASM) && defined __cplusplus +}; +#endif // break the C version of divscale out from the others // because asm version overflows in drawmapview() @@ -193,6 +200,13 @@ void clearbufbyte(void *D, int32_t c, int32_t a); void copybufbyte(const void *S, void *D, int32_t c); void copybufreverse(const void *S, void *D, int32_t c); +static inline int32_t krecipasm(int32_t i) +{ + // Ken did this + float f = (float) i; i = *(int32_t *) &f; + return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31)); +} + #endif #undef qw diff --git a/polymer/eduke32/build/include/pragmas_arm.h b/polymer/eduke32/build/include/pragmas_arm.h index 17db9fe82..786a8d210 100644 --- a/polymer/eduke32/build/include/pragmas_arm.h +++ b/polymer/eduke32/build/include/pragmas_arm.h @@ -52,4 +52,11 @@ void swapbuf4(void* a, void* b, int32_t c); void clearbufbyte(void *D, int32_t c, int32_t a); void copybufbyte(const void *S, void *D, int32_t c); void copybufreverse(const void *S, void *D, int32_t c); + +static inline int32_t krecipasm(int32_t i) +{ + // Ken did this + float f = (float) i; i = *(int32_t *) &f; + return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31)); +} #endif diff --git a/polymer/eduke32/build/include/pragmas_ppc.h b/polymer/eduke32/build/include/pragmas_ppc.h index dc490e66b..93b13a3be 100644 --- a/polymer/eduke32/build/include/pragmas_ppc.h +++ b/polymer/eduke32/build/include/pragmas_ppc.h @@ -249,5 +249,12 @@ static inline int32_t umax(int32_t a, int32_t b) { if ((uint32_t) a < (uint32_t) static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return a; return b; } static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return b; return a; } +static inline int32_t krecipasm(int32_t i) +{ + // Ken did this + float f = (float) i; i = *(int32_t *) &f; + return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31)); +} + #endif // __pragmas_ppc_h__ #endif // __pragmas_h__ diff --git a/polymer/eduke32/build/include/pragmas_x86_gcc.h b/polymer/eduke32/build/include/pragmas_x86_gcc.h index 77fd0019e..318adc2f6 100644 --- a/polymer/eduke32/build/include/pragmas_x86_gcc.h +++ b/polymer/eduke32/build/include/pragmas_x86_gcc.h @@ -585,6 +585,17 @@ void copybufreverse(const void *S, void *D, int32_t c); : "ebx", "edi", "memory", "cc"); \ 0; }) +#define krecipasm(a) \ + ({ int32_t __a=(a); \ + __asm__ __volatile__ ( \ + "movl %%eax, (" ASMSYM("fpuasm") "); fildl (" ASMSYM("fpuasm") "); " \ + "addl %%eax, %%eax; fstps (" ASMSYM("fpuasm") "); sbbl %%ebx, %%ebx; " \ + "movl (" ASMSYM("fpuasm") "), %%eax; movl %%eax, %%ecx; " \ + "andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \ + "shrl $23, %%ecx; movl " ASMSYM("reciptable") "(%%eax), %%eax; " \ + "sarl %%cl, %%eax; xorl %%ebx, %%eax" \ + : "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \ + __a; }) //}}} diff --git a/polymer/eduke32/build/include/pragmas_x86_msvc.h b/polymer/eduke32/build/include/pragmas_x86_msvc.h index 27e2f905a..d1852e51a 100644 --- a/polymer/eduke32/build/include/pragmas_x86_msvc.h +++ b/polymer/eduke32/build/include/pragmas_x86_msvc.h @@ -474,6 +474,32 @@ static __inline void swapchar2(void *a, void *b, int32_t s) mov[eax], dx } } + +//0x007ff000 is (11<<13), 0x3f800000 is (127<<23) +static inline int32_t krecipasm(int32_t a) +{ + _asm + { + push ebx + mov eax, a + mov fpuasm, eax + fild dword ptr fpuasm + add eax, eax + fstp dword ptr fpuasm + sbb ebx, ebx + mov eax, fpuasm + mov ecx, eax + and eax, 0x007ff000 + shr eax, 10 + sub ecx, 0x3f800000 + shr ecx, 23 + mov eax, dword ptr reciptable[eax] + sar eax, cl + xor eax, ebx + pop ebx + } +} + //}}} #endif // __pragmas_x86_h__ diff --git a/polymer/eduke32/build/src/a-c.c b/polymer/eduke32/build/src/a-c.c index a16c35212..7a8ed3f8a 100644 --- a/polymer/eduke32/build/src/a-c.c +++ b/polymer/eduke32/build/src/a-c.c @@ -11,8 +11,6 @@ #ifdef ENGINE_USING_A_C -int32_t krecip(int32_t num); // from engine.c - #define BITSOFPRECISION 3 #define BITSOFPRECISIONPOW 8 @@ -23,8 +21,7 @@ int32_t krecip(int32_t num); // from engine.c //#define USE_SATURATE_VPLC_TRANS extern intptr_t asm1, asm2, asm3, asm4; -extern int32_t fpuasm, globalx3, globaly3; -extern void *reciptable; +extern int32_t globalx3, globaly3; #ifdef USE_ASM64 # define A64_ASSIGN(var, val) var=val @@ -40,8 +37,10 @@ char *a64_gtrans; #endif static int32_t bpl, transmode = 0; -static int32_t glogx, glogy, gbxinc, gbyinc, gpinc; -static char *gbuf, *gpal, *ghlinepal, *gtrans; +int32_t glogx, glogy, gpinc; +static int32_t gbxinc, gbyinc; +char *gbuf; +static char *gpal, *ghlinepal, *gtrans; static char *gpal2; //Global variable functions @@ -71,24 +70,26 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, const int32_t bxinc = gbxinc, byinc = gbyinc; const int32_t logx = glogx, logy = glogy; char *pp = (char *)p; + const uint8_t logx32 = 32-logx, logy32 = 32-logy; - for (; cnt>=4; cnt -= 4) +#ifdef CLASSIC_SLICE_BY_4 + for (; cnt>=4; cnt-=4, pp-=4) { - *pp = palptr[buf[((bx>>(32-logx))<>(32-logy))]]; pp--; - *pp = palptr[buf[(((bx-bxinc)>>(32-logx))<>(32-logy))]]; pp--; - *pp = palptr[buf[(((bx-(bxinc<<1))>>(32-logx))<>(32-logy))]]; pp--; - *pp = palptr[buf[(((bx-(bxinc*3))>>(32-logx))<>(32-logy))]]; pp--; + *pp = palptr[buf[((bx>>logx32)<>logy32)]]; + *(pp-1) = palptr[buf[(((bx-bxinc)>>logx32)<>logy32)]]; + *(pp-2) = palptr[buf[(((bx-(bxinc<<1))>>logx32)<>logy32)]]; + *(pp-3) = palptr[buf[(((bx-(bxinc*3))>>logx32)<>logy32)]]; bx -= bxinc<<2; by -= byinc<<2; } +#endif - for (; cnt>=0; cnt--) + for (; cnt>=0; cnt--, pp--) { - *pp = palptr[buf[((bx>>(32-logx))<>(32-logy))]]; + *pp = palptr[buf[((bx>>logx32)<>logy32)]]; bx -= bxinc; by -= byinc; - pp--; } } } @@ -96,11 +97,6 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, ///// Sloped ceiling/floor vertical line functions ///// -void setupslopevlin(int32_t logylogx, intptr_t bufplc, int32_t pinc) -{ - glogx = (logylogx&255); glogy = (logylogx>>8); - gbuf = (char *)bufplc; gpinc = pinc; -} void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t bx, int32_t by) { intptr_t *slopalptr; @@ -111,7 +107,7 @@ void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t slopalptr = (intptr_t *)slopaloffs; for (; cnt>0; cnt--) { - i = krecip(bz>>6); bz += bzinc; + i = krecipasm(bz>>6); bz += bzinc; u = bx+(inthi_t)globalx3*i; v = by+(inthi_t)globaly3*i; (*(char *)p) = *(char *)(((intptr_t)slopalptr[0])+gbuf[((u>>(32-glogx))<>(32-glogy))]); @@ -224,6 +220,37 @@ typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16))); # define saturate_vplc_trans(vplc, vinc) #endif +// cnt >= 1 +void vlineasm4nlogy(int32_t cnt, char *p, char *const *pal, char *const *buf, +#ifdef USE_VECTOR_EXT + uint32_vec4 vplc, const uint32_vec4 vinc) +#else + uint32_t * vplc, const int32_t *vinc) +#endif +{ + const int32_t ourbpl = bpl; + + do + { + p[0] = pal[0][buf[0][ourmulscale32(vplc[0], globaltilesizy)]]; + p[1] = pal[1][buf[1][ourmulscale32(vplc[1], globaltilesizy)]]; + p[2] = pal[2][buf[2][ourmulscale32(vplc[2], globaltilesizy)]]; + p[3] = pal[3][buf[3][ourmulscale32(vplc[3], globaltilesizy)]]; + +#if defined USE_VECTOR_EXT + vplc += vinc; +#else + vplc[0] += vinc[0]; + vplc[1] += vinc[1]; + vplc[2] += vinc[2]; + vplc[3] += vinc[3]; +#endif + p += ourbpl; + } while (--cnt); + + Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4); +} + // cnt >= 1 void vlineasm4(int32_t cnt, char *p) { @@ -238,28 +265,10 @@ void vlineasm4(int32_t cnt, char *p) #endif const int32_t logy = glogy, ourbpl = bpl; - if (!logy) + if (!logy) // I had an assert on logy for quite a while that NEVER triggered... { - do - { - p[0] = pal[0][buf[0][ourmulscale32(vplc[0],globaltilesizy)]]; - p[1] = pal[1][buf[1][ourmulscale32(vplc[1],globaltilesizy)]]; - p[2] = pal[2][buf[2][ourmulscale32(vplc[2],globaltilesizy)]]; - p[3] = pal[3][buf[3][ourmulscale32(vplc[3],globaltilesizy)]]; - -#if defined USE_VECTOR_EXT - vplc += vinc; -#else - vplc[0] += vinc[0]; - vplc[1] += vinc[1]; - vplc[2] += vinc[2]; - vplc[3] += vinc[3]; -#endif - p += ourbpl; - } - while (--cnt); - - goto skip; + vlineasm4nlogy(cnt, p, pal, buf, vplc, vinc); + return; } // just fucking shoot me @@ -313,11 +322,7 @@ void vlineasm4(int32_t cnt, char *p) p += ourbpl; } -skip: - vplce[0] = vplc[0]; - vplce[1] = vplc[1]; - vplce[2] = vplc[2]; - vplce[3] = vplc[3]; + Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4); } #ifdef USE_SATURATE_VPLC @@ -450,10 +455,7 @@ void mvlineasm4(int32_t cnt, char *p) while (--cnt); } - vplce[0] = vplc[0]; - vplce[1] = vplc[1]; - vplce[2] = vplc[2]; - vplce[3] = vplc[3]; + Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4); } #ifdef USE_ASM64 diff --git a/polymer/eduke32/build/src/cache1d.c b/polymer/eduke32/build/src/cache1d.c index fd99612ca..da8890427 100644 --- a/polymer/eduke32/build/src/cache1d.c +++ b/polymer/eduke32/build/src/cache1d.c @@ -163,11 +163,9 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr) *newhandle = (intptr_t)Xmalloc(newbytes); } #else -static void inc_and_check_cacnum(void) +static inline void inc_and_check_cacnum(void) { - cacnum++; - - if (cacnum > MAXCACHEOBJECTS) + if (++cacnum > MAXCACHEOBJECTS) reportandexit("Too many objects in cache! (cacnum > MAXCACHEOBJECTS)"); } diff --git a/polymer/eduke32/build/src/common.c b/polymer/eduke32/build/src/common.c index 2a46fca28..122f9dbb1 100644 --- a/polymer/eduke32/build/src/common.c +++ b/polymer/eduke32/build/src/common.c @@ -173,15 +173,6 @@ int32_t fnlist_getnames(fnlist_t *fnl, const char *dirname, const char *pattern, //// -// returns a buffer of size BMAX_PATH -char *dup_filename(const char *fn) -{ - char *buf = (char *)Xmalloc(BMAX_PATH); - - return Bstrncpyz(buf, fn, BMAX_PATH); -} - - // Copy FN to WBUF and append an extension if it's not there, which is checked // case-insensitively. // Returns: 1 if not all characters could be written to WBUF, 0 else. diff --git a/polymer/eduke32/build/src/engine.c b/polymer/eduke32/build/src/engine.c index e944064de..65123e808 100644 --- a/polymer/eduke32/build/src/engine.c +++ b/polymer/eduke32/build/src/engine.c @@ -2048,31 +2048,6 @@ static inline int32_t msqrtasm(int32_t c) } } -//0x007ff000 is (11<<13), 0x3f800000 is (127<<23) -static inline int32_t krecipasm(int32_t a) -{ - _asm - { - push ebx - mov eax, a - mov fpuasm, eax - fild dword ptr fpuasm - add eax, eax - fstp dword ptr fpuasm - sbb ebx, ebx - mov eax, fpuasm - mov ecx, eax - and eax, 0x007ff000 - shr eax, 10 - sub ecx, 0x3f800000 - shr ecx, 23 - mov eax, dword ptr reciptable[eax] - sar eax, cl - xor eax, ebx - pop ebx - } -} - static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d) { _asm @@ -2163,18 +2138,6 @@ static inline int32_t getkensmessagecrc(void *b) : "=a" (__r) : "c" (__c) : "edx","ebx", "cc"); \ __r; }) -#define krecipasm(a) \ - ({ int32_t __a=(a); \ - __asm__ __volatile__ ( \ - "movl %%eax, (" ASMSYM("fpuasm") "); fildl (" ASMSYM("fpuasm") "); " \ - "addl %%eax, %%eax; fstps (" ASMSYM("fpuasm") "); sbbl %%ebx, %%ebx; " \ - "movl (" ASMSYM("fpuasm") "), %%eax; movl %%eax, %%ecx; " \ - "andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \ - "shrl $23, %%ecx; movl " ASMSYM("reciptable") "(%%eax), %%eax; " \ - "sarl %%cl, %%eax; xorl %%ebx, %%eax" \ - : "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \ - __a; }) - #define getclipmask(a,b,c,d) \ ({ int32_t __a=(a), __b=(b), __c=(c), __d=(d); \ __asm__ __volatile__ ("sarl $31, %%eax; addl %%ebx, %%ebx; adcl %%eax, %%eax; " \ @@ -2252,14 +2215,6 @@ static inline int32_t msqrtasm(uint32_t c) return a; } -static inline int32_t krecipasm(int32_t i) -{ - // Ken did this - float f = (float)i; i = *(int32_t *)&f; - return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31)); -} - - static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d) { // Ken did this @@ -2467,9 +2422,7 @@ void fade_screen_black(int32_t moreopaquep) { #ifdef USE_OPENGL if (getrendermode() >= REND_POLYMOST) - { fullscreen_tint_gl(0,0,0, moreopaquep ? 168 : 84); - } else #endif { @@ -2481,10 +2434,19 @@ void fade_screen_black(int32_t moreopaquep) const char *const trans = getblendtab(0); const int32_t shiftamnt = ((!!moreopaquep)*8); const int32_t dimprod = xdim*ydim; + int32_t i = 0; - int32_t i; +#ifdef CLASSIC_SLICE_BY_4 + for (; i>8); + gbuf = (char *) bufplc; gpinc = pinc; +#else setupslopevlin(logylogx, bufplc, pinc); +#endif gglogx = (logylogx&255); gglogy = (logylogx>>8); ggbuf = (char *)bufplc; ggpinc = pinc; ggpal = palookup[globalpal] + getpalookupsh(0); @@ -5310,8 +5282,8 @@ static void drawvox(int32_t dasprx, int32_t daspry, int32_t dasprz, int32_t dasp daxscale = scale(daxscale,xdimenscale,xdimen<<8); dayscale = scale(dayscale,mulscale16(xdimenscale,viewingrangerecip),xdimen<<8); - daxscalerecip = tabledivide32_noinline(1<<30, daxscale); - dayscalerecip = tabledivide32_noinline(1<<30, dayscale); + daxscalerecip = divideu32_noinline(1<<30, daxscale); + dayscalerecip = divideu32_noinline(1<<30, dayscale); longptr = (int32_t *)davoxptr; daxsiz = B_LITTLE32(longptr[0]); daysiz = B_LITTLE32(longptr[1]); //dazsiz = B_LITTLE32(longptr[2]); @@ -5772,21 +5744,21 @@ draw_as_face_sprite: #ifdef CLASSIC_SLICE_BY_4 for (; x<=rx-4; x+=4) { - uwall[x] = max(startumost[x+windowx1]-windowy1, (int16_t) startum); - uwall[x+1] = max(startumost[x+windowx1+1]-windowy1, (int16_t) startum); - uwall[x+2] = max(startumost[x+windowx1+2]-windowy1, (int16_t) startum); - uwall[x+3] = max(startumost[x+windowx1+3]-windowy1, (int16_t) startum); + uwall[x] = max(startumost[windowx1+x]-windowy1, (int16_t) startum); + uwall[x+1] = max(startumost[windowx1+x+1]-windowy1, (int16_t) startum); + uwall[x+2] = max(startumost[windowx1+x+2]-windowy1, (int16_t) startum); + uwall[x+3] = max(startumost[windowx1+x+3]-windowy1, (int16_t) startum); - dwall[x] = min(startdmost[x+windowx1]-windowy1, (int16_t) startdm); - dwall[x+1] = min(startdmost[x+windowx1+1]-windowy1, (int16_t) startdm); - dwall[x+2] = min(startdmost[x+windowx1+2]-windowy1, (int16_t) startdm); - dwall[x+3] = min(startdmost[x+windowx1+3]-windowy1, (int16_t) startdm); + dwall[x] = min(startdmost[windowx1+x]-windowy1, (int16_t) startdm); + dwall[x+1] = min(startdmost[windowx1+x+1]-windowy1, (int16_t) startdm); + dwall[x+2] = min(startdmost[windowx1+x+2]-windowy1, (int16_t) startdm); + dwall[x+3] = min(startdmost[windowx1+x+3]-windowy1, (int16_t) startdm); } #endif for (; x<=rx; x++) { - uwall[x] = max(startumost[x+windowx1]-windowy1,(int16_t)startum); - dwall[x] = min(startdmost[x+windowx1]-windowy1,(int16_t)startdm); + uwall[x] = max(startumost[windowx1+x]-windowy1,(int16_t)startum); + dwall[x] = min(startdmost[windowx1+x]-windowy1,(int16_t)startdm); } daclip = 0; @@ -5814,7 +5786,7 @@ draw_as_face_sprite: k = smoststart[i] - xb1[j]; x = dalx2; #ifdef CLASSIC_SLICE_BY_4 // ok, this one is really by 2 ;) - for (x=dalx2; x<=darx2-2; x+=2) + for (; x<=darx2-2; x+=2) { if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x]; if (smost[k+x+1] > uwall[x+1]) uwall[x+1] = smost[k+x+1]; @@ -6014,22 +5986,54 @@ draw_as_face_sprite: { int32_t hplc = divscale19(xdimenscale,sy1); const int32_t hplc2 = divscale19(xdimenscale,sy2); - int32_t hinc = sx2-sx1 ? (hplc2-hplc)/(sx2-sx1) : 0; + const int32_t idiv = sx2-sx1; + int32_t hinc[4] ={ idiv ? tabledivide32(hplc2-hplc, idiv) : 0 }; #ifdef HIGH_PRECISION_SPRITE - const float cc = ((1<<19)*(float)xdimen*yxaspect)/320.f; - float hplcf = cc/sy1; - const float hincf = sx2-sx1 ? (cc/sy2 - hplcf)/(sx2-sx1) : 0; + const float cc = ((1<<19)*fxdimen*(float)yxaspect) * (1.f/320.f); const float loopcc = ((cstat&8) ? -1 : 1)*((float)(1<<30)*(1<<24)) / (yspan*tspr->yrepeat); + float hplcf = cc/sy1; + float hincf[4] = {idiv ? (cc/sy2 - hplcf)/idiv : 0}; + +#ifdef CLASSIC_SLICE_BY_4 + hincf[1] = hincf[0] * 2.f; + hincf[2] = hincf[0] * 3.f; + hincf[3] = hincf[0] * 4.f; +#endif // CLASSIC_SLICE_BY_4 +#endif // HIGH_PRECISION_SPRITE +#ifdef CLASSIC_SLICE_BY_4 + hinc[1] = hinc[0]<<1; + hinc[2] = hinc[0]*3; + hinc[3] = hinc[0]<<2; #endif - for (i=sx1; i<=sx2; i++) + i = sx1; + +#ifdef CLASSIC_SLICE_BY_4 + for (; i<=sx2-4; i+=4) { swall[i] = (krecipasm(hplc)<<2); - hplc += hinc; + swall[i+1] = (krecipasm(hplc+hinc[0])<<2); + swall[i+2] = (krecipasm(hplc+hinc[1])<<2); + swall[i+3] = (krecipasm(hplc+hinc[2])<<2); + hplc += hinc[3]; +#ifdef HIGH_PRECISION_SPRITE + swallf[i] = loopcc/hplcf; + swallf[i+1] = loopcc/(hplcf+hincf[0]); + swallf[i+2] = loopcc/(hplcf+hincf[1]); + swallf[i+3] = loopcc/(hplcf+hincf[2]); + hplcf += hincf[3]; +#endif // HIGH_PRECISION_SPRITE + } +#endif // CLASSIC_SLICE_BY_4 + + for (; i<=sx2; i++) + { + swall[i] = (krecipasm(hplc)<<2); + hplc += hinc[0]; #ifdef HIGH_PRECISION_SPRITE swallf[i] = loopcc/hplcf; - hplcf += hincf; + hplcf += hincf[0]; #endif } } @@ -6117,12 +6121,30 @@ draw_as_face_sprite: break; case 1: k = smoststart[i] - xb1[j]; - for (x=dalx2; x<=darx2; x++) + x = dalx2; +#ifdef CLASSIC_SLICE_BY_4 + for (; x<=darx2-2; x+=2) + { + if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x]; + if (smost[k+x+1] > uwall[x+1]) uwall[x+1] = smost[k+x+1]; + } +#endif + for (; x<=darx2; x++) if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x]; break; case 2: k = smoststart[i] - xb1[j]; - for (x=dalx2; x<=darx2; x++) + x = dalx2; +#ifdef CLASSIC_SLICE_BY_4 + for (; x<=darx2-4; x+=4) + { + if (smost[k+x] < dwall[x]) dwall[x] = smost[k+x]; + if (smost[k+x+1] < dwall[x+1]) dwall[x+1] = smost[k+x+1]; + if (smost[k+x+2] < dwall[x+2]) dwall[x+2] = smost[k+x+2]; + if (smost[k+x+3] < dwall[x+3]) dwall[x+3] = smost[k+x+3]; + } +#endif + for (; x<=darx2; x++) if (smost[k+x] < dwall[x]) dwall[x] = smost[k+x]; break; } @@ -6513,12 +6535,30 @@ draw_as_face_sprite: break; case 1: k = smoststart[i] - xb1[j]; - for (x=dalx2; x<=darx2; x++) + x = dalx2; +#ifdef CLASSIC_SLICE_BY_4 + for (; x<=darx2-2; x+=2) + { + if (smost[k+x] > lwall[x]) lwall[x] = smost[k+x]; + if (smost[k+x+1] > lwall[x+1]) lwall[x+1] = smost[k+x+1]; + } +#endif + for (; x<=darx2; x++) if (smost[k+x] > lwall[x]) lwall[x] = smost[k+x]; break; case 2: k = smoststart[i] - xb1[j]; - for (x=dalx2; x<=darx2; x++) + x = dalx2; +#ifdef CLASSIC_SLICE_BY_4 + for (; x<=darx2-4; x+=4) + { + if (smost[k+x] < swall[x]) swall[x] = smost[k+x]; + if (smost[k+x+1] < swall[x+1]) swall[x+1] = smost[k+x+1]; + if (smost[k+x+2] < swall[x+2]) swall[x+2] = smost[k+x+2]; + if (smost[k+x+3] < swall[x+3]) swall[x+3] = smost[k+x+3]; + } +#endif + for (; x<=darx2; x++) if (smost[k+x] < swall[x]) swall[x] = smost[k+x]; break; } @@ -7871,37 +7911,13 @@ static void dosetaspect(void) oxyaspect = xyaspect; j = xyaspect*320; horizlookup2[horizycent-1] = divscale26(131072,j); - for (i=0; i < horizycent-1-4; i += 4) - { - horizlookup[i] = divscale28(1, i -(horizycent-1)); - horizlookup[i+1] = divscale28(1, i+1-(horizycent-1)); - horizlookup[i+2] = divscale28(1, i+2-(horizycent-1)); - horizlookup[i+3] = divscale28(1, i+3-(horizycent-1)); - - horizlookup2[i] = divscale14(klabs(horizlookup[i]), j); - horizlookup2[i+1] = divscale14(klabs(horizlookup[i+1]), j); - horizlookup2[i+2] = divscale14(klabs(horizlookup[i+2]), j); - horizlookup2[i+3] = divscale14(klabs(horizlookup[i+3]), j); - } - for (; i < horizycent-1; i++) + for (i=0; i < horizycent-1; i++) { horizlookup[i] = divscale28(1, i-(horizycent-1)); horizlookup2[i] = divscale14(klabs(horizlookup[i]), j); } - for (i=horizycent; i < ydim*4-1-4; i += 4) - { - horizlookup[i] = divscale28(1, i -(horizycent-1)); - horizlookup[i+1] = divscale28(1, i+1-(horizycent-1)); - horizlookup[i+2] = divscale28(1, i+2-(horizycent-1)); - horizlookup[i+3] = divscale28(1, i+3-(horizycent-1)); - - horizlookup2[i] = divscale14(klabs(horizlookup[i]), j); - horizlookup2[i+1] = divscale14(klabs(horizlookup[i+1]), j); - horizlookup2[i+2] = divscale14(klabs(horizlookup[i+2]), j); - horizlookup2[i+3] = divscale14(klabs(horizlookup[i+3]), j); - } - for (; i < ydim*4-1; i++) + for (i=horizycent; i < ydim*4-1; i++) { horizlookup[i] = divscale28(1, i-(horizycent-1)); horizlookup2[i] = divscale14(klabs(horizlookup[i]), j); @@ -9331,21 +9347,23 @@ int32_t drawrooms(int32_t daposx, int32_t daposy, int32_t daposz, static inline _equation equation(float x1, float y1, float x2, float y2) { _equation ret; + const float f = x2-x1; - if ((x2 - x1) != 0) - { - ret.a = (float)(y2 - y1)/(float)(x2 - x1); - ret.b = -1; - ret.c = (y1 - (ret.a * x1)); - } - else // vertical + // vertical + if (f == 0.f) { ret.a = 1; ret.b = 0; ret.c = -x1; + + return ret; } - return (ret); + ret.a = (float) (y2 - y1)/f; + ret.b = -1; + ret.c = (y1 - (ret.a * x1)); + + return ret; } int32_t wallvisible(int32_t x, int32_t y, int16_t wallnum) @@ -9436,21 +9454,11 @@ static inline void drawmaskleaf(_maskleaf* wall) } #endif -static inline int32_t sameside(const _equation *eq, const _point2d *p1, const _point2d *p2) +static inline int32_t sameside(const _equation *eq, const vec2f_t *p1, const vec2f_t *p2) { - float sign1, sign2; - - sign1 = eq->a * p1->x + eq->b * p1->y + eq->c; - sign2 = eq->a * p2->x + eq->b * p2->y + eq->c; - - sign1 = sign1 * sign2; - if (sign1 > 0) - { - //OSD_Printf("SAME SIDE !\n"); - return 1; - } - //OSD_Printf("OPPOSITE SIDE !\n"); - return 0; + const float sign1 = (eq->a * p1->x) + (eq->b * p1->y) + eq->c; + const float sign2 = (eq->a * p2->x) + (eq->b * p2->y) + eq->c; + return (sign1 * sign2) > 0.f; } // x1, y1: in/out @@ -9621,7 +9629,7 @@ killsprite: } #endif { - _point2d pos; + vec2f_t pos; pos.x = fglobalposx; pos.y = fglobalposy; @@ -9630,7 +9638,7 @@ killsprite: // Writing e.g. "while (maskwallcnt--)" is wrong! while (maskwallcnt) { - _point2d dot, dot2, middle; + vec2f_t dot, dot2, middle; // PLAG: sorting stuff _equation maskeq, p1eq, p2eq; @@ -9658,7 +9666,7 @@ killsprite: i--; if (tspriteptr[i] != NULL) { - _point2d spr; + vec2f_t spr; const spritetype *tspr = tspriteptr[i]; spr.x = (float)tspr->x; @@ -11551,11 +11559,10 @@ static const char *E_GetArtFileName(int32_t tilefilei) //<-1: per-map ART issue static int32_t E_ReadArtFile(int32_t tilefilei) { - int32_t fil; - const char *fn = E_GetArtFileName(tilefilei); const int32_t permap = (tilefilei >= MAXARTFILES_BASE); // is it a per-map ART file? int16_t *tilesizx, *tilesizy; + int32_t fil; if ((fil = kopen4load(fn,0)) != -1) { @@ -11742,28 +11749,6 @@ void loadtile(int16_t tilenume) } #endif - // dummy tiles for highres replacements and tilefromtexture definitions - if (faketilesiz[tilenume]) - { - if (faketilesiz[tilenume] == -1) - { - walock[tilenume] = 255; // permanent tile - allocache(&waloff[tilenume], dasiz, &walock[tilenume]); - Bmemset((char *)waloff[tilenume],0,dasiz); - } - else if (faketiledata[tilenume] != NULL) - { - walock[tilenume] = 255; - allocache(&waloff[tilenume], dasiz, &walock[tilenume]); - LZ4_decompress_fast(faketiledata[tilenume], (char *)waloff[tilenume], dasiz); - Bfree(faketiledata[tilenume]); - faketiledata[tilenume] = NULL; - } - - faketimerhandler(); - return; - } - // Allocate storage if necessary. if (waloff[tilenume] == 0) { @@ -11771,6 +11756,16 @@ void loadtile(int16_t tilenume) allocache(&waloff[tilenume],dasiz,&walock[tilenume]); } + // dummy tiles for highres replacements and tilefromtexture definitions + if (faketilesiz[tilenume]) + { + if (faketilesiz[tilenume] != -1 && faketiledata[tilenume] != NULL) + LZ4_decompress_fast(faketiledata[tilenume], (char *) waloff[tilenume], dasiz); + + faketimerhandler(); + return; + } + // Potentially switch open ART file. if (i != artfilnum) { @@ -12082,14 +12077,6 @@ int32_t ksqrt(uint32_t num) return nsqrtasm(num); } -// -// krecip -// -int32_t krecip(int32_t num) -{ - return krecipasm(num); -} - #ifdef LUNATIC int32_t Mulscale(int32_t a, int32_t b, int32_t sh) { @@ -13250,14 +13237,14 @@ static int32_t clipsprite_try(const spritetype *spr, int32_t xmin, int32_t ymin, if ((spr->cstat&48)!=32) // face/wall sprite { int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT; - maxcorrection = tabledivide32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1); + maxcorrection = divideu32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1); } else // floor sprite { int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT; int32_t tempint2 = clipmapinfo.sector[k].CM_YREPEAT; - maxcorrection = max(tabledivide32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1), - tabledivide32_noinline(maxcorrection * (int32_t)spr->yrepeat, tempint2)); + maxcorrection = max(divideu32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1), + divideu32_noinline(maxcorrection * (int32_t)spr->yrepeat, tempint2)); } maxcorrection -= MAXCLIPDIST; @@ -15430,20 +15417,32 @@ void setviewback(void) // void squarerotatetile(int16_t tilenume) { - int32_t siz; + int32_t siz = tilesiz[tilenume].x; + int32_t i, j; + char *ptr1, *ptr2; - //supports square tiles only for rotation part - if ((siz = tilesiz[tilenume].x) == tilesiz[tilenume].y) + if (siz != tilesiz[tilenume].y) + return; + + for (i=siz-1; i>=3; i-=4) { - int32_t i = siz-1; + ptr2 = ptr1 = (char *) (waloff[tilenume]+i*(siz+1)); + swapchar(--ptr1, (ptr2 -= siz)); + j=(i>>1)-1; + for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz); - for (; i>=0; i--) - { - int32_t j=(i>>1)-1; - char *ptr1 = (char *)(waloff[tilenume]+i*(siz+1)), *ptr2 = ptr1; - if (i&1) swapchar(--ptr1, (ptr2 -= siz)); - for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz); - } + ptr2 = ptr1 = (char *) (waloff[tilenume]+(i-1)*(siz+1)); + j=((i-1)>>1)-1; + for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz); + + ptr2 = ptr1 = (char *) (waloff[tilenume]+(i-2)*(siz+1)); + swapchar(--ptr1, (ptr2 -= siz)); + j=((i-2)>>1)-1; + for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz); + + ptr2 = ptr1 = (char *) (waloff[tilenume]+(i-3)*(siz+1)); + j=((i-3)>>1)-1; + for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz); } } diff --git a/polymer/eduke32/build/src/kplib.c b/polymer/eduke32/build/src/kplib.c index b89fe3444..9a66c5000 100644 --- a/polymer/eduke32/build/src/kplib.c +++ b/polymer/eduke32/build/src/kplib.c @@ -828,7 +828,7 @@ static int32_t kpngrend(const char *kfilebuf, int32_t kfilength, } else if (i == (int32_t)LSWAPIB(0x45544c50)) //PLTE (must be before IDAT) { - paleng = tabledivide32(leng, 3); + paleng = leng/3; for (i=paleng-1; i>=0; i--) palcol[i] = LSWAPIB((LSWAPIL(*(int32_t *)&filptr[i*3])>>8)|0xff000000); } else if (i == (int32_t)LSWAPIB(0x44474b62)) //bKGD (must be after PLTE and before IDAT) diff --git a/polymer/eduke32/build/src/texcache.c b/polymer/eduke32/build/src/texcache.c index 46fe8c1c4..42c1fd4cc 100644 --- a/polymer/eduke32/build/src/texcache.c +++ b/polymer/eduke32/build/src/texcache.c @@ -29,10 +29,8 @@ static const char *texcache_errorstr[TEXCACHEERRORS] = { static pthtyp *texcache_tryart(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int32_t dameth) { - pthtyp *pth; const int32_t j = dapicnum&(GLTEXCACHEADSIZ-1); - - if (hicprecaching) return NULL; + pthtyp *pth; // load from art for (pth=texcache.list[j]; pth; pth=pth->next) @@ -62,10 +60,10 @@ static pthtyp *texcache_tryart(int32_t dapicnum, int32_t dapalnum, int32_t dasha pthtyp *texcache_fetchmulti(pthtyp *pth, hicreplctyp *si, int32_t dapicnum, int32_t dameth) { - int32_t i; const int32_t j = dapicnum&(GLTEXCACHEADSIZ-1); + int32_t i; - for (i = (GLTEXCACHEADSIZ - 1); i >= 0; i--) + for (i = 0; i <= (GLTEXCACHEADSIZ - 1); i++) { const pthtyp *pth2; @@ -110,7 +108,7 @@ pthtyp *texcache_fetch(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int3 if (!si) { if (dapalnum >= (MAXPALOOKUPS - RESERVEDPALS)) return NULL; - return texcache_tryart(dapicnum, dapalnum, dashade, dameth); + return hicprecaching ? NULL : texcache_tryart(dapicnum, dapalnum, dashade, dameth); } /* if palette > 0 && replacement found @@ -141,7 +139,7 @@ pthtyp *texcache_fetch(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int3 if (tilestat == -2) // bad filename hicclearsubst(dapicnum, dapalnum); if (drawingskybox) return NULL; - return texcache_tryart(dapicnum, dapalnum, dashade, dameth); + return hicprecaching ? NULL : texcache_tryart(dapicnum, dapalnum, dashade, dameth); } } @@ -163,7 +161,7 @@ pthtyp *texcache_fetch(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int3 hicclearsubst(dapicnum, dapalnum); Bfree(pth); if (drawingskybox) return NULL; - return texcache_tryart(dapicnum, dapalnum, dashade, dameth); + return hicprecaching ? NULL : texcache_tryart(dapicnum, dapalnum, dashade, dameth); } pth->palnum = si->palnum; @@ -180,12 +178,7 @@ static void texcache_closefiles(void) Bclose(texcache.filehandle); texcache.filehandle = -1; } - - if (texcache.index) - { - Bfclose(texcache.index); - texcache.index = NULL; - } + MAYBE_FCLOSE_AND_NULL(texcache.index); } void texcache_freeptrs(void) @@ -208,10 +201,9 @@ void texcache_freeptrs(void) } } -void texcache_clearmemcache(void) +static inline void texcache_clearmemcache(void) { - Bfree(texcache.memcache.ptr); - texcache.memcache.ptr = NULL; + DO_FREE_AND_NULL(texcache.memcache.ptr); texcache.memcache.size = -1; }