Additional engine cleanups and optimizations. About as generic as it sounds. Also changes internal dummytile/tilefromtexture behavior so that the LZ4 compressed version of a texture is kept and the cache1d entries associated with the tiles can expire, like any other tile.

git-svn-id: https://svn.eduke32.com/eduke32@4661 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
terminx 2014-10-25 03:30:38 +00:00
parent 019315dd59
commit cbe91be9ff
14 changed files with 324 additions and 279 deletions

View file

@ -1287,9 +1287,6 @@ static inline int32_t getclosestcol(int32_t r, int32_t g, int32_t b)
typedef struct s_equation {
float a, b, c;
} _equation;
typedef struct s_point2d {
float x, y;
} _point2d;
int32_t wallvisible(int32_t x, int32_t y, int16_t wallnum);
#define STATUS2DSIZ 144

View file

@ -72,6 +72,13 @@ void G_AddDefModule(const char *buffer);
void G_AddClipMap(const char *buffer);
#endif
// returns a buffer of size BMAX_PATH
static inline char *dup_filename(const char *fn)
{
char * const buf = (char *) Xmalloc(BMAX_PATH);
return Bstrncpyz(buf, fn, BMAX_PATH);
}
int32_t getatoken(scriptfile *sf, const tokenlist *tl, int32_t ntokens);
int32_t G_CheckCmdSwitch(int32_t argc, const char **argv, const char *str);
@ -92,34 +99,30 @@ int32_t maybe_append_ext(char *wbuf, int32_t wbufsiz, const char *fn, const char
// individual x/y(/z) distances are passed instead.
static inline int32_t sepldist(const int32_t dx, const int32_t dy)
{
int32_t x = klabs(dx);
int32_t y = klabs(dy);
vec2_t d ={ klabs(dx), klabs(dy) };
if (x < y)
swaplong(&x, &y);
if (d.x < d.y)
swaplong(&d.x, &d.y);
{
int32_t t = y + (y>>1);
return x - (x>>5) - (x>>7) + (t>>2) + (t>>6);
}
d.y += (d.y>>1);
return d.x - (d.x>>5) - (d.x>>7) + (d.y>>2) + (d.y>>6);
}
// dz: in Build coordinates
static inline int32_t sepdist(int32_t dx, int32_t dy, int32_t dz)
static inline int32_t sepdist(const int32_t dx, const int32_t dy, const int32_t dz)
{
int32_t x = klabs(dx);
int32_t y = klabs(dy);
int32_t z = klabs(dz>>4);
vec3_t d ={ klabs(dx), klabs(dy), klabs(dz>>4) };
if (x < y)
swaplong(&x, &y);
if (x < z)
swaplong(&x, &z);
if (d.x < d.y)
swaplong(&d.x, &d.y);
{
int32_t t = y + z;
return x - (x>>4) + (t>>2) + (t>>3);
}
if (d.x < d.z)
swaplong(&d.x, &d.z);
d.y += d.z;
return d.x - (d.x>>4) + (d.y>>2) + (d.y>>3);
}
int32_t ldist(const spritetype *s1, const spritetype *s2);

View file

@ -128,8 +128,6 @@ extern void M32_DrawRoomsAndMasks(void);
extern void yax_tweakpicnums(int32_t bunchnum, int32_t cf, int32_t restore); // editor-only
extern void M32_ResetFakeRORTiles(void);
extern int32_t kopen4loadfrommod(const char *filename, char searchfirst);
// set to 1 to enable:
#define M32_UNDO 1
extern int32_t map_revision;

View file

@ -21,6 +21,13 @@ _scaler(25) _scaler(26) _scaler(27) _scaler(28)\
_scaler(29) _scaler(30) _scaler(31)
extern int32_t dmval;
#if !defined(NOASM) && defined __cplusplus
extern "C" {
#endif
extern int32_t reciptable[2048], fpuasm;
#if !defined(NOASM) && defined __cplusplus
};
#endif
// break the C version of divscale out from the others
// because asm version overflows in drawmapview()
@ -193,6 +200,13 @@ void clearbufbyte(void *D, int32_t c, int32_t a);
void copybufbyte(const void *S, void *D, int32_t c);
void copybufreverse(const void *S, void *D, int32_t c);
static inline int32_t krecipasm(int32_t i)
{
// Ken did this
float f = (float) i; i = *(int32_t *) &f;
return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31));
}
#endif
#undef qw

View file

@ -52,4 +52,11 @@ void swapbuf4(void* a, void* b, int32_t c);
void clearbufbyte(void *D, int32_t c, int32_t a);
void copybufbyte(const void *S, void *D, int32_t c);
void copybufreverse(const void *S, void *D, int32_t c);
static inline int32_t krecipasm(int32_t i)
{
// Ken did this
float f = (float) i; i = *(int32_t *) &f;
return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31));
}
#endif

View file

@ -249,5 +249,12 @@ static inline int32_t umax(int32_t a, int32_t b) { if ((uint32_t) a < (uint32_t)
static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return a; return b; }
static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return b; return a; }
static inline int32_t krecipasm(int32_t i)
{
// Ken did this
float f = (float) i; i = *(int32_t *) &f;
return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31));
}
#endif // __pragmas_ppc_h__
#endif // __pragmas_h__

View file

@ -585,6 +585,17 @@ void copybufreverse(const void *S, void *D, int32_t c);
: "ebx", "edi", "memory", "cc"); \
0; })
#define krecipasm(a) \
({ int32_t __a=(a); \
__asm__ __volatile__ ( \
"movl %%eax, (" ASMSYM("fpuasm") "); fildl (" ASMSYM("fpuasm") "); " \
"addl %%eax, %%eax; fstps (" ASMSYM("fpuasm") "); sbbl %%ebx, %%ebx; " \
"movl (" ASMSYM("fpuasm") "), %%eax; movl %%eax, %%ecx; " \
"andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \
"shrl $23, %%ecx; movl " ASMSYM("reciptable") "(%%eax), %%eax; " \
"sarl %%cl, %%eax; xorl %%ebx, %%eax" \
: "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \
__a; })
//}}}

View file

@ -474,6 +474,32 @@ static __inline void swapchar2(void *a, void *b, int32_t s)
mov[eax], dx
}
}
//0x007ff000 is (11<<13), 0x3f800000 is (127<<23)
static inline int32_t krecipasm(int32_t a)
{
_asm
{
push ebx
mov eax, a
mov fpuasm, eax
fild dword ptr fpuasm
add eax, eax
fstp dword ptr fpuasm
sbb ebx, ebx
mov eax, fpuasm
mov ecx, eax
and eax, 0x007ff000
shr eax, 10
sub ecx, 0x3f800000
shr ecx, 23
mov eax, dword ptr reciptable[eax]
sar eax, cl
xor eax, ebx
pop ebx
}
}
//}}}
#endif // __pragmas_x86_h__

View file

@ -11,8 +11,6 @@
#ifdef ENGINE_USING_A_C
int32_t krecip(int32_t num); // from engine.c
#define BITSOFPRECISION 3
#define BITSOFPRECISIONPOW 8
@ -23,8 +21,7 @@ int32_t krecip(int32_t num); // from engine.c
//#define USE_SATURATE_VPLC_TRANS
extern intptr_t asm1, asm2, asm3, asm4;
extern int32_t fpuasm, globalx3, globaly3;
extern void *reciptable;
extern int32_t globalx3, globaly3;
#ifdef USE_ASM64
# define A64_ASSIGN(var, val) var=val
@ -40,8 +37,10 @@ char *a64_gtrans;
#endif
static int32_t bpl, transmode = 0;
static int32_t glogx, glogy, gbxinc, gbyinc, gpinc;
static char *gbuf, *gpal, *ghlinepal, *gtrans;
int32_t glogx, glogy, gpinc;
static int32_t gbxinc, gbyinc;
char *gbuf;
static char *gpal, *ghlinepal, *gtrans;
static char *gpal2;
//Global variable functions
@ -71,24 +70,26 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by,
const int32_t bxinc = gbxinc, byinc = gbyinc;
const int32_t logx = glogx, logy = glogy;
char *pp = (char *)p;
const uint8_t logx32 = 32-logx, logy32 = 32-logy;
for (; cnt>=4; cnt -= 4)
#ifdef CLASSIC_SLICE_BY_4
for (; cnt>=4; cnt-=4, pp-=4)
{
*pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]]; pp--;
*pp = palptr[buf[(((bx-bxinc)>>(32-logx))<<logy)+((by-byinc)>>(32-logy))]]; pp--;
*pp = palptr[buf[(((bx-(bxinc<<1))>>(32-logx))<<logy)+((by-(byinc<<1))>>(32-logy))]]; pp--;
*pp = palptr[buf[(((bx-(bxinc*3))>>(32-logx))<<logy)+((by-(byinc*3))>>(32-logy))]]; pp--;
*pp = palptr[buf[((bx>>logx32)<<logy)+(by>>logy32)]];
*(pp-1) = palptr[buf[(((bx-bxinc)>>logx32)<<logy)+((by-byinc)>>logy32)]];
*(pp-2) = palptr[buf[(((bx-(bxinc<<1))>>logx32)<<logy)+((by-(byinc<<1))>>logy32)]];
*(pp-3) = palptr[buf[(((bx-(bxinc*3))>>logx32)<<logy)+((by-(byinc*3))>>logy32)]];
bx -= bxinc<<2;
by -= byinc<<2;
}
#endif
for (; cnt>=0; cnt--)
for (; cnt>=0; cnt--, pp--)
{
*pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]];
*pp = palptr[buf[((bx>>logx32)<<logy)+(by>>logy32)]];
bx -= bxinc;
by -= byinc;
pp--;
}
}
}
@ -96,11 +97,6 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by,
///// Sloped ceiling/floor vertical line functions /////
void setupslopevlin(int32_t logylogx, intptr_t bufplc, int32_t pinc)
{
glogx = (logylogx&255); glogy = (logylogx>>8);
gbuf = (char *)bufplc; gpinc = pinc;
}
void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t bx, int32_t by)
{
intptr_t *slopalptr;
@ -111,7 +107,7 @@ void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t
slopalptr = (intptr_t *)slopaloffs;
for (; cnt>0; cnt--)
{
i = krecip(bz>>6); bz += bzinc;
i = krecipasm(bz>>6); bz += bzinc;
u = bx+(inthi_t)globalx3*i;
v = by+(inthi_t)globaly3*i;
(*(char *)p) = *(char *)(((intptr_t)slopalptr[0])+gbuf[((u>>(32-glogx))<<glogy)+(v>>(32-glogy))]);
@ -225,21 +221,15 @@ typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16)));
#endif
// cnt >= 1
void vlineasm4(int32_t cnt, char *p)
{
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
void vlineasm4nlogy(int32_t cnt, char *p, char *const *pal, char *const *buf,
#ifdef USE_VECTOR_EXT
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
uint32_vec4 vplc, const uint32_vec4 vinc)
#else
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
uint32_t * vplc, const int32_t *vinc)
#endif
const int32_t logy = glogy, ourbpl = bpl;
if (!logy)
{
const int32_t ourbpl = bpl;
do
{
p[0] = pal[0][buf[0][ourmulscale32(vplc[0], globaltilesizy)]];
@ -256,10 +246,29 @@ void vlineasm4(int32_t cnt, char *p)
vplc[3] += vinc[3];
#endif
p += ourbpl;
}
while (--cnt);
} while (--cnt);
goto skip;
Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4);
}
// cnt >= 1
void vlineasm4(int32_t cnt, char *p)
{
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
#ifdef USE_VECTOR_EXT
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
#else
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
#endif
const int32_t logy = glogy, ourbpl = bpl;
if (!logy) // I had an assert on logy for quite a while that NEVER triggered...
{
vlineasm4nlogy(cnt, p, pal, buf, vplc, vinc);
return;
}
// just fucking shoot me
@ -313,11 +322,7 @@ void vlineasm4(int32_t cnt, char *p)
p += ourbpl;
}
skip:
vplce[0] = vplc[0];
vplce[1] = vplc[1];
vplce[2] = vplc[2];
vplce[3] = vplc[3];
Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4);
}
#ifdef USE_SATURATE_VPLC
@ -450,10 +455,7 @@ void mvlineasm4(int32_t cnt, char *p)
while (--cnt);
}
vplce[0] = vplc[0];
vplce[1] = vplc[1];
vplce[2] = vplc[2];
vplce[3] = vplc[3];
Bmemcpy(&vplce[0], &vplc[0], sizeof(uint32_t) * 4);
}
#ifdef USE_ASM64

View file

@ -163,11 +163,9 @@ void allocache(intptr_t *newhandle, int32_t newbytes, char *newlockptr)
*newhandle = (intptr_t)Xmalloc(newbytes);
}
#else
static void inc_and_check_cacnum(void)
static inline void inc_and_check_cacnum(void)
{
cacnum++;
if (cacnum > MAXCACHEOBJECTS)
if (++cacnum > MAXCACHEOBJECTS)
reportandexit("Too many objects in cache! (cacnum > MAXCACHEOBJECTS)");
}

View file

@ -173,15 +173,6 @@ int32_t fnlist_getnames(fnlist_t *fnl, const char *dirname, const char *pattern,
////
// returns a buffer of size BMAX_PATH
char *dup_filename(const char *fn)
{
char *buf = (char *)Xmalloc(BMAX_PATH);
return Bstrncpyz(buf, fn, BMAX_PATH);
}
// Copy FN to WBUF and append an extension if it's not there, which is checked
// case-insensitively.
// Returns: 1 if not all characters could be written to WBUF, 0 else.

View file

@ -2048,31 +2048,6 @@ static inline int32_t msqrtasm(int32_t c)
}
}
//0x007ff000 is (11<<13), 0x3f800000 is (127<<23)
static inline int32_t krecipasm(int32_t a)
{
_asm
{
push ebx
mov eax, a
mov fpuasm, eax
fild dword ptr fpuasm
add eax, eax
fstp dword ptr fpuasm
sbb ebx, ebx
mov eax, fpuasm
mov ecx, eax
and eax, 0x007ff000
shr eax, 10
sub ecx, 0x3f800000
shr ecx, 23
mov eax, dword ptr reciptable[eax]
sar eax, cl
xor eax, ebx
pop ebx
}
}
static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
_asm
@ -2163,18 +2138,6 @@ static inline int32_t getkensmessagecrc(void *b)
: "=a" (__r) : "c" (__c) : "edx","ebx", "cc"); \
__r; })
#define krecipasm(a) \
({ int32_t __a=(a); \
__asm__ __volatile__ ( \
"movl %%eax, (" ASMSYM("fpuasm") "); fildl (" ASMSYM("fpuasm") "); " \
"addl %%eax, %%eax; fstps (" ASMSYM("fpuasm") "); sbbl %%ebx, %%ebx; " \
"movl (" ASMSYM("fpuasm") "), %%eax; movl %%eax, %%ecx; " \
"andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \
"shrl $23, %%ecx; movl " ASMSYM("reciptable") "(%%eax), %%eax; " \
"sarl %%cl, %%eax; xorl %%ebx, %%eax" \
: "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \
__a; })
#define getclipmask(a,b,c,d) \
({ int32_t __a=(a), __b=(b), __c=(c), __d=(d); \
__asm__ __volatile__ ("sarl $31, %%eax; addl %%ebx, %%ebx; adcl %%eax, %%eax; " \
@ -2252,14 +2215,6 @@ static inline int32_t msqrtasm(uint32_t c)
return a;
}
static inline int32_t krecipasm(int32_t i)
{
// Ken did this
float f = (float)i; i = *(int32_t *)&f;
return((reciptable[(i>>12)&2047]>>(((i-0x3f800000)>>23)&31))^(i>>31));
}
static inline int32_t getclipmask(int32_t a, int32_t b, int32_t c, int32_t d)
{
// Ken did this
@ -2467,9 +2422,7 @@ void fade_screen_black(int32_t moreopaquep)
{
#ifdef USE_OPENGL
if (getrendermode() >= REND_POLYMOST)
{
fullscreen_tint_gl(0,0,0, moreopaquep ? 168 : 84);
}
else
#endif
{
@ -2481,10 +2434,19 @@ void fade_screen_black(int32_t moreopaquep)
const char *const trans = getblendtab(0);
const int32_t shiftamnt = ((!!moreopaquep)*8);
const int32_t dimprod = xdim*ydim;
int32_t i = 0;
int32_t i;
#ifdef CLASSIC_SLICE_BY_4
for (; i<dimprod-4; i+=4)
{
p[i] = trans[p[i]<<shiftamnt];
p[i+1] = trans[p[i+1]<<shiftamnt];
p[i+2] = trans[p[i+2]<<shiftamnt];
p[i+3] = trans[p[i+3]<<shiftamnt];
}
#endif
for (i=0; i<dimprod; i++)
for (; i<dimprod; i++)
p[i] = trans[p[i]<<shiftamnt];
}
enddrawing();
@ -4181,7 +4143,7 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_
//
// ceilspritehline (internal)
//
static inline void ceilspritehline(int32_t x2, int32_t y)
static void ceilspritehline(int32_t x2, int32_t y)
{
int32_t x1, v, bx, by;
@ -4219,7 +4181,7 @@ static inline void ceilspritehline(int32_t x2, int32_t y)
//
// ceilspritescan (internal)
//
static inline void ceilspritescan(int32_t x1, int32_t x2)
static void ceilspritescan(int32_t x1, int32_t x2)
{
int32_t x, y1, y2, twall, bwall;
@ -4257,9 +4219,19 @@ static inline void ceilspritescan(int32_t x1, int32_t x2)
static int32_t gglogx, gglogy, ggpinc;
static char *ggbuf, *ggpal;
static void setupslopevlin_alsotrans(int32_t logylogx, intptr_t bufplc, int32_t pinc)
#ifdef ENGINE_USING_A_C
extern int32_t glogx, glogy, gpinc;
extern char *gbuf;
#endif
static inline void setupslopevlin_alsotrans(int32_t logylogx, intptr_t bufplc, int32_t pinc)
{
#ifdef ENGINE_USING_A_C
glogx = (logylogx&255); glogy = (logylogx>>8);
gbuf = (char *) bufplc; gpinc = pinc;
#else
setupslopevlin(logylogx, bufplc, pinc);
#endif
gglogx = (logylogx&255); gglogy = (logylogx>>8);
ggbuf = (char *)bufplc; ggpinc = pinc;
ggpal = palookup[globalpal] + getpalookupsh(0);
@ -5310,8 +5282,8 @@ static void drawvox(int32_t dasprx, int32_t daspry, int32_t dasprz, int32_t dasp
daxscale = scale(daxscale,xdimenscale,xdimen<<8);
dayscale = scale(dayscale,mulscale16(xdimenscale,viewingrangerecip),xdimen<<8);
daxscalerecip = tabledivide32_noinline(1<<30, daxscale);
dayscalerecip = tabledivide32_noinline(1<<30, dayscale);
daxscalerecip = divideu32_noinline(1<<30, daxscale);
dayscalerecip = divideu32_noinline(1<<30, dayscale);
longptr = (int32_t *)davoxptr;
daxsiz = B_LITTLE32(longptr[0]); daysiz = B_LITTLE32(longptr[1]); //dazsiz = B_LITTLE32(longptr[2]);
@ -5772,21 +5744,21 @@ draw_as_face_sprite:
#ifdef CLASSIC_SLICE_BY_4
for (; x<=rx-4; x+=4)
{
uwall[x] = max(startumost[x+windowx1]-windowy1, (int16_t) startum);
uwall[x+1] = max(startumost[x+windowx1+1]-windowy1, (int16_t) startum);
uwall[x+2] = max(startumost[x+windowx1+2]-windowy1, (int16_t) startum);
uwall[x+3] = max(startumost[x+windowx1+3]-windowy1, (int16_t) startum);
uwall[x] = max(startumost[windowx1+x]-windowy1, (int16_t) startum);
uwall[x+1] = max(startumost[windowx1+x+1]-windowy1, (int16_t) startum);
uwall[x+2] = max(startumost[windowx1+x+2]-windowy1, (int16_t) startum);
uwall[x+3] = max(startumost[windowx1+x+3]-windowy1, (int16_t) startum);
dwall[x] = min(startdmost[x+windowx1]-windowy1, (int16_t) startdm);
dwall[x+1] = min(startdmost[x+windowx1+1]-windowy1, (int16_t) startdm);
dwall[x+2] = min(startdmost[x+windowx1+2]-windowy1, (int16_t) startdm);
dwall[x+3] = min(startdmost[x+windowx1+3]-windowy1, (int16_t) startdm);
dwall[x] = min(startdmost[windowx1+x]-windowy1, (int16_t) startdm);
dwall[x+1] = min(startdmost[windowx1+x+1]-windowy1, (int16_t) startdm);
dwall[x+2] = min(startdmost[windowx1+x+2]-windowy1, (int16_t) startdm);
dwall[x+3] = min(startdmost[windowx1+x+3]-windowy1, (int16_t) startdm);
}
#endif
for (; x<=rx; x++)
{
uwall[x] = max(startumost[x+windowx1]-windowy1,(int16_t)startum);
dwall[x] = min(startdmost[x+windowx1]-windowy1,(int16_t)startdm);
uwall[x] = max(startumost[windowx1+x]-windowy1,(int16_t)startum);
dwall[x] = min(startdmost[windowx1+x]-windowy1,(int16_t)startdm);
}
daclip = 0;
@ -5814,7 +5786,7 @@ draw_as_face_sprite:
k = smoststart[i] - xb1[j];
x = dalx2;
#ifdef CLASSIC_SLICE_BY_4 // ok, this one is really by 2 ;)
for (x=dalx2; x<=darx2-2; x+=2)
for (; x<=darx2-2; x+=2)
{
if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x];
if (smost[k+x+1] > uwall[x+1]) uwall[x+1] = smost[k+x+1];
@ -6014,22 +5986,54 @@ draw_as_face_sprite:
{
int32_t hplc = divscale19(xdimenscale,sy1);
const int32_t hplc2 = divscale19(xdimenscale,sy2);
int32_t hinc = sx2-sx1 ? (hplc2-hplc)/(sx2-sx1) : 0;
const int32_t idiv = sx2-sx1;
int32_t hinc[4] ={ idiv ? tabledivide32(hplc2-hplc, idiv) : 0 };
#ifdef HIGH_PRECISION_SPRITE
const float cc = ((1<<19)*(float)xdimen*yxaspect)/320.f;
float hplcf = cc/sy1;
const float hincf = sx2-sx1 ? (cc/sy2 - hplcf)/(sx2-sx1) : 0;
const float cc = ((1<<19)*fxdimen*(float)yxaspect) * (1.f/320.f);
const float loopcc = ((cstat&8) ? -1 : 1)*((float)(1<<30)*(1<<24))
/ (yspan*tspr->yrepeat);
float hplcf = cc/sy1;
float hincf[4] = {idiv ? (cc/sy2 - hplcf)/idiv : 0};
#ifdef CLASSIC_SLICE_BY_4
hincf[1] = hincf[0] * 2.f;
hincf[2] = hincf[0] * 3.f;
hincf[3] = hincf[0] * 4.f;
#endif // CLASSIC_SLICE_BY_4
#endif // HIGH_PRECISION_SPRITE
#ifdef CLASSIC_SLICE_BY_4
hinc[1] = hinc[0]<<1;
hinc[2] = hinc[0]*3;
hinc[3] = hinc[0]<<2;
#endif
for (i=sx1; i<=sx2; i++)
i = sx1;
#ifdef CLASSIC_SLICE_BY_4
for (; i<=sx2-4; i+=4)
{
swall[i] = (krecipasm(hplc)<<2);
hplc += hinc;
swall[i+1] = (krecipasm(hplc+hinc[0])<<2);
swall[i+2] = (krecipasm(hplc+hinc[1])<<2);
swall[i+3] = (krecipasm(hplc+hinc[2])<<2);
hplc += hinc[3];
#ifdef HIGH_PRECISION_SPRITE
swallf[i] = loopcc/hplcf;
hplcf += hincf;
swallf[i+1] = loopcc/(hplcf+hincf[0]);
swallf[i+2] = loopcc/(hplcf+hincf[1]);
swallf[i+3] = loopcc/(hplcf+hincf[2]);
hplcf += hincf[3];
#endif // HIGH_PRECISION_SPRITE
}
#endif // CLASSIC_SLICE_BY_4
for (; i<=sx2; i++)
{
swall[i] = (krecipasm(hplc)<<2);
hplc += hinc[0];
#ifdef HIGH_PRECISION_SPRITE
swallf[i] = loopcc/hplcf;
hplcf += hincf[0];
#endif
}
}
@ -6117,12 +6121,30 @@ draw_as_face_sprite:
break;
case 1:
k = smoststart[i] - xb1[j];
for (x=dalx2; x<=darx2; x++)
x = dalx2;
#ifdef CLASSIC_SLICE_BY_4
for (; x<=darx2-2; x+=2)
{
if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x];
if (smost[k+x+1] > uwall[x+1]) uwall[x+1] = smost[k+x+1];
}
#endif
for (; x<=darx2; x++)
if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x];
break;
case 2:
k = smoststart[i] - xb1[j];
for (x=dalx2; x<=darx2; x++)
x = dalx2;
#ifdef CLASSIC_SLICE_BY_4
for (; x<=darx2-4; x+=4)
{
if (smost[k+x] < dwall[x]) dwall[x] = smost[k+x];
if (smost[k+x+1] < dwall[x+1]) dwall[x+1] = smost[k+x+1];
if (smost[k+x+2] < dwall[x+2]) dwall[x+2] = smost[k+x+2];
if (smost[k+x+3] < dwall[x+3]) dwall[x+3] = smost[k+x+3];
}
#endif
for (; x<=darx2; x++)
if (smost[k+x] < dwall[x]) dwall[x] = smost[k+x];
break;
}
@ -6513,12 +6535,30 @@ draw_as_face_sprite:
break;
case 1:
k = smoststart[i] - xb1[j];
for (x=dalx2; x<=darx2; x++)
x = dalx2;
#ifdef CLASSIC_SLICE_BY_4
for (; x<=darx2-2; x+=2)
{
if (smost[k+x] > lwall[x]) lwall[x] = smost[k+x];
if (smost[k+x+1] > lwall[x+1]) lwall[x+1] = smost[k+x+1];
}
#endif
for (; x<=darx2; x++)
if (smost[k+x] > lwall[x]) lwall[x] = smost[k+x];
break;
case 2:
k = smoststart[i] - xb1[j];
for (x=dalx2; x<=darx2; x++)
x = dalx2;
#ifdef CLASSIC_SLICE_BY_4
for (; x<=darx2-4; x+=4)
{
if (smost[k+x] < swall[x]) swall[x] = smost[k+x];
if (smost[k+x+1] < swall[x+1]) swall[x+1] = smost[k+x+1];
if (smost[k+x+2] < swall[x+2]) swall[x+2] = smost[k+x+2];
if (smost[k+x+3] < swall[x+3]) swall[x+3] = smost[k+x+3];
}
#endif
for (; x<=darx2; x++)
if (smost[k+x] < swall[x]) swall[x] = smost[k+x];
break;
}
@ -7871,37 +7911,13 @@ static void dosetaspect(void)
oxyaspect = xyaspect;
j = xyaspect*320;
horizlookup2[horizycent-1] = divscale26(131072,j);
for (i=0; i < horizycent-1-4; i += 4)
{
horizlookup[i] = divscale28(1, i -(horizycent-1));
horizlookup[i+1] = divscale28(1, i+1-(horizycent-1));
horizlookup[i+2] = divscale28(1, i+2-(horizycent-1));
horizlookup[i+3] = divscale28(1, i+3-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
horizlookup2[i+1] = divscale14(klabs(horizlookup[i+1]), j);
horizlookup2[i+2] = divscale14(klabs(horizlookup[i+2]), j);
horizlookup2[i+3] = divscale14(klabs(horizlookup[i+3]), j);
}
for (; i < horizycent-1; i++)
for (i=0; i < horizycent-1; i++)
{
horizlookup[i] = divscale28(1, i-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
}
for (i=horizycent; i < ydim*4-1-4; i += 4)
{
horizlookup[i] = divscale28(1, i -(horizycent-1));
horizlookup[i+1] = divscale28(1, i+1-(horizycent-1));
horizlookup[i+2] = divscale28(1, i+2-(horizycent-1));
horizlookup[i+3] = divscale28(1, i+3-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
horizlookup2[i+1] = divscale14(klabs(horizlookup[i+1]), j);
horizlookup2[i+2] = divscale14(klabs(horizlookup[i+2]), j);
horizlookup2[i+3] = divscale14(klabs(horizlookup[i+3]), j);
}
for (; i < ydim*4-1; i++)
for (i=horizycent; i < ydim*4-1; i++)
{
horizlookup[i] = divscale28(1, i-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
@ -9331,21 +9347,23 @@ int32_t drawrooms(int32_t daposx, int32_t daposy, int32_t daposz,
static inline _equation equation(float x1, float y1, float x2, float y2)
{
_equation ret;
const float f = x2-x1;
if ((x2 - x1) != 0)
{
ret.a = (float)(y2 - y1)/(float)(x2 - x1);
ret.b = -1;
ret.c = (y1 - (ret.a * x1));
}
else // vertical
// vertical
if (f == 0.f)
{
ret.a = 1;
ret.b = 0;
ret.c = -x1;
return ret;
}
return (ret);
ret.a = (float) (y2 - y1)/f;
ret.b = -1;
ret.c = (y1 - (ret.a * x1));
return ret;
}
int32_t wallvisible(int32_t x, int32_t y, int16_t wallnum)
@ -9436,21 +9454,11 @@ static inline void drawmaskleaf(_maskleaf* wall)
}
#endif
static inline int32_t sameside(const _equation *eq, const _point2d *p1, const _point2d *p2)
static inline int32_t sameside(const _equation *eq, const vec2f_t *p1, const vec2f_t *p2)
{
float sign1, sign2;
sign1 = eq->a * p1->x + eq->b * p1->y + eq->c;
sign2 = eq->a * p2->x + eq->b * p2->y + eq->c;
sign1 = sign1 * sign2;
if (sign1 > 0)
{
//OSD_Printf("SAME SIDE !\n");
return 1;
}
//OSD_Printf("OPPOSITE SIDE !\n");
return 0;
const float sign1 = (eq->a * p1->x) + (eq->b * p1->y) + eq->c;
const float sign2 = (eq->a * p2->x) + (eq->b * p2->y) + eq->c;
return (sign1 * sign2) > 0.f;
}
// x1, y1: in/out
@ -9621,7 +9629,7 @@ killsprite:
}
#endif
{
_point2d pos;
vec2f_t pos;
pos.x = fglobalposx;
pos.y = fglobalposy;
@ -9630,7 +9638,7 @@ killsprite:
// Writing e.g. "while (maskwallcnt--)" is wrong!
while (maskwallcnt)
{
_point2d dot, dot2, middle;
vec2f_t dot, dot2, middle;
// PLAG: sorting stuff
_equation maskeq, p1eq, p2eq;
@ -9658,7 +9666,7 @@ killsprite:
i--;
if (tspriteptr[i] != NULL)
{
_point2d spr;
vec2f_t spr;
const spritetype *tspr = tspriteptr[i];
spr.x = (float)tspr->x;
@ -11551,11 +11559,10 @@ static const char *E_GetArtFileName(int32_t tilefilei)
//<-1: per-map ART issue
static int32_t E_ReadArtFile(int32_t tilefilei)
{
int32_t fil;
const char *fn = E_GetArtFileName(tilefilei);
const int32_t permap = (tilefilei >= MAXARTFILES_BASE); // is it a per-map ART file?
int16_t *tilesizx, *tilesizy;
int32_t fil;
if ((fil = kopen4load(fn,0)) != -1)
{
@ -11742,28 +11749,6 @@ void loadtile(int16_t tilenume)
}
#endif
// dummy tiles for highres replacements and tilefromtexture definitions
if (faketilesiz[tilenume])
{
if (faketilesiz[tilenume] == -1)
{
walock[tilenume] = 255; // permanent tile
allocache(&waloff[tilenume], dasiz, &walock[tilenume]);
Bmemset((char *)waloff[tilenume],0,dasiz);
}
else if (faketiledata[tilenume] != NULL)
{
walock[tilenume] = 255;
allocache(&waloff[tilenume], dasiz, &walock[tilenume]);
LZ4_decompress_fast(faketiledata[tilenume], (char *)waloff[tilenume], dasiz);
Bfree(faketiledata[tilenume]);
faketiledata[tilenume] = NULL;
}
faketimerhandler();
return;
}
// Allocate storage if necessary.
if (waloff[tilenume] == 0)
{
@ -11771,6 +11756,16 @@ void loadtile(int16_t tilenume)
allocache(&waloff[tilenume],dasiz,&walock[tilenume]);
}
// dummy tiles for highres replacements and tilefromtexture definitions
if (faketilesiz[tilenume])
{
if (faketilesiz[tilenume] != -1 && faketiledata[tilenume] != NULL)
LZ4_decompress_fast(faketiledata[tilenume], (char *) waloff[tilenume], dasiz);
faketimerhandler();
return;
}
// Potentially switch open ART file.
if (i != artfilnum)
{
@ -12082,14 +12077,6 @@ int32_t ksqrt(uint32_t num)
return nsqrtasm(num);
}
//
// krecip
//
int32_t krecip(int32_t num)
{
return krecipasm(num);
}
#ifdef LUNATIC
int32_t Mulscale(int32_t a, int32_t b, int32_t sh)
{
@ -13250,14 +13237,14 @@ static int32_t clipsprite_try(const spritetype *spr, int32_t xmin, int32_t ymin,
if ((spr->cstat&48)!=32) // face/wall sprite
{
int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT;
maxcorrection = tabledivide32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1);
maxcorrection = divideu32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1);
}
else // floor sprite
{
int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT;
int32_t tempint2 = clipmapinfo.sector[k].CM_YREPEAT;
maxcorrection = max(tabledivide32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1),
tabledivide32_noinline(maxcorrection * (int32_t)spr->yrepeat, tempint2));
maxcorrection = max(divideu32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1),
divideu32_noinline(maxcorrection * (int32_t)spr->yrepeat, tempint2));
}
maxcorrection -= MAXCLIPDIST;
@ -15430,20 +15417,32 @@ void setviewback(void)
//
void squarerotatetile(int16_t tilenume)
{
int32_t siz;
int32_t siz = tilesiz[tilenume].x;
int32_t i, j;
char *ptr1, *ptr2;
//supports square tiles only for rotation part
if ((siz = tilesiz[tilenume].x) == tilesiz[tilenume].y)
{
int32_t i = siz-1;
if (siz != tilesiz[tilenume].y)
return;
for (; i>=0; i--)
for (i=siz-1; i>=3; i-=4)
{
int32_t j=(i>>1)-1;
char *ptr1 = (char *)(waloff[tilenume]+i*(siz+1)), *ptr2 = ptr1;
if (i&1) swapchar(--ptr1, (ptr2 -= siz));
ptr2 = ptr1 = (char *) (waloff[tilenume]+i*(siz+1));
swapchar(--ptr1, (ptr2 -= siz));
j=(i>>1)-1;
for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz);
ptr2 = ptr1 = (char *) (waloff[tilenume]+(i-1)*(siz+1));
j=((i-1)>>1)-1;
for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz);
ptr2 = ptr1 = (char *) (waloff[tilenume]+(i-2)*(siz+1));
swapchar(--ptr1, (ptr2 -= siz));
j=((i-2)>>1)-1;
for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz);
ptr2 = ptr1 = (char *) (waloff[tilenume]+(i-3)*(siz+1));
j=((i-3)>>1)-1;
for (; j>=0; j--) swapchar2((ptr1 -= 2), (ptr2 -= (siz<<1)), siz);
}
}
}

View file

@ -828,7 +828,7 @@ static int32_t kpngrend(const char *kfilebuf, int32_t kfilength,
}
else if (i == (int32_t)LSWAPIB(0x45544c50)) //PLTE (must be before IDAT)
{
paleng = tabledivide32(leng, 3);
paleng = leng/3;
for (i=paleng-1; i>=0; i--) palcol[i] = LSWAPIB((LSWAPIL(*(int32_t *)&filptr[i*3])>>8)|0xff000000);
}
else if (i == (int32_t)LSWAPIB(0x44474b62)) //bKGD (must be after PLTE and before IDAT)

View file

@ -29,10 +29,8 @@ static const char *texcache_errorstr[TEXCACHEERRORS] = {
static pthtyp *texcache_tryart(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int32_t dameth)
{
pthtyp *pth;
const int32_t j = dapicnum&(GLTEXCACHEADSIZ-1);
if (hicprecaching) return NULL;
pthtyp *pth;
// load from art
for (pth=texcache.list[j]; pth; pth=pth->next)
@ -62,10 +60,10 @@ static pthtyp *texcache_tryart(int32_t dapicnum, int32_t dapalnum, int32_t dasha
pthtyp *texcache_fetchmulti(pthtyp *pth, hicreplctyp *si, int32_t dapicnum, int32_t dameth)
{
int32_t i;
const int32_t j = dapicnum&(GLTEXCACHEADSIZ-1);
int32_t i;
for (i = (GLTEXCACHEADSIZ - 1); i >= 0; i--)
for (i = 0; i <= (GLTEXCACHEADSIZ - 1); i++)
{
const pthtyp *pth2;
@ -110,7 +108,7 @@ pthtyp *texcache_fetch(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int3
if (!si)
{
if (dapalnum >= (MAXPALOOKUPS - RESERVEDPALS)) return NULL;
return texcache_tryart(dapicnum, dapalnum, dashade, dameth);
return hicprecaching ? NULL : texcache_tryart(dapicnum, dapalnum, dashade, dameth);
}
/* if palette > 0 && replacement found
@ -141,7 +139,7 @@ pthtyp *texcache_fetch(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int3
if (tilestat == -2) // bad filename
hicclearsubst(dapicnum, dapalnum);
if (drawingskybox) return NULL;
return texcache_tryart(dapicnum, dapalnum, dashade, dameth);
return hicprecaching ? NULL : texcache_tryart(dapicnum, dapalnum, dashade, dameth);
}
}
@ -163,7 +161,7 @@ pthtyp *texcache_fetch(int32_t dapicnum, int32_t dapalnum, int32_t dashade, int3
hicclearsubst(dapicnum, dapalnum);
Bfree(pth);
if (drawingskybox) return NULL;
return texcache_tryart(dapicnum, dapalnum, dashade, dameth);
return hicprecaching ? NULL : texcache_tryart(dapicnum, dapalnum, dashade, dameth);
}
pth->palnum = si->palnum;
@ -180,12 +178,7 @@ static void texcache_closefiles(void)
Bclose(texcache.filehandle);
texcache.filehandle = -1;
}
if (texcache.index)
{
Bfclose(texcache.index);
texcache.index = NULL;
}
MAYBE_FCLOSE_AND_NULL(texcache.index);
}
void texcache_freeptrs(void)
@ -208,10 +201,9 @@ void texcache_freeptrs(void)
}
}
void texcache_clearmemcache(void)
static inline void texcache_clearmemcache(void)
{
Bfree(texcache.memcache.ptr);
texcache.memcache.ptr = NULL;
DO_FREE_AND_NULL(texcache.memcache.ptr);
texcache.memcache.size = -1;
}