Various additional optimizations: add CLASSIC_SLICE_BY_4 mode to unroll some of the loops in a-c, replace integer divisions by a divisor unknown at compile time with usage of libdivide, clean up pragmas further by removing more old stuff that wasn't used anywhere. This is another one of those nasty commits that make people cry. DONT_BUILD.

git-svn-id: https://svn.eduke32.com/eduke32@4658 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
terminx 2014-10-25 03:29:21 +00:00
parent 6f332e645d
commit 6c6b68d534
37 changed files with 1903 additions and 914 deletions

View file

@ -5,7 +5,7 @@ $(ENGINE_OBJ)/a.$o: $(ENGINE_SRC)/a.$(asm)
$(ENGINE_OBJ)/baselayer.$o: $(ENGINE_SRC)/baselayer.c $(ENGINE_INC)/compat.h $(ENGINE_INC)/baselayer.h $(ENGINE_INC)/build.h $(ENGINE_INC)/osd.h $(ENGINE_OBJ)/baselayer.$o: $(ENGINE_SRC)/baselayer.c $(ENGINE_INC)/compat.h $(ENGINE_INC)/baselayer.h $(ENGINE_INC)/build.h $(ENGINE_INC)/osd.h
$(ENGINE_OBJ)/build.$o: $(ENGINE_SRC)/build.c $(ENGINE_INC)/build.h $(ENGINE_INC)/pragmas.h $(ENGINE_INC)/compat.h $(ENGINE_INC)/baselayer.h $(ENGINE_INC)/editor.h $(ENGINE_OBJ)/build.$o: $(ENGINE_SRC)/build.c $(ENGINE_INC)/build.h $(ENGINE_INC)/pragmas.h $(ENGINE_INC)/compat.h $(ENGINE_INC)/baselayer.h $(ENGINE_INC)/editor.h
$(ENGINE_OBJ)/cache1d.$o: $(ENGINE_SRC)/cache1d.c $(ENGINE_INC)/compat.h $(ENGINE_INC)/cache1d.h $(ENGINE_INC)/pragmas.h $(ENGINE_INC)/baselayer.h $(ENGINE_OBJ)/cache1d.$o: $(ENGINE_SRC)/cache1d.c $(ENGINE_INC)/compat.h $(ENGINE_INC)/cache1d.h $(ENGINE_INC)/pragmas.h $(ENGINE_INC)/baselayer.h
$(ENGINE_OBJ)/compat.$o: $(ENGINE_SRC)/compat.c $(ENGINE_INC)/compat.h $(ENGINE_OBJ)/compat.$o: $(ENGINE_SRC)/compat.c $(ENGINE_INC)/compat.h $(ENGINE_INC)/libdivide.h
$(ENGINE_OBJ)/config.$o: $(ENGINE_SRC)/config.c $(ENGINE_INC)/compat.h $(ENGINE_INC)/osd.h $(ENGINE_INC)/editor.h $(ENGINE_OBJ)/config.$o: $(ENGINE_SRC)/config.c $(ENGINE_INC)/compat.h $(ENGINE_INC)/osd.h $(ENGINE_INC)/editor.h
$(ENGINE_OBJ)/crc32.$o: $(ENGINE_SRC)/crc32.c $(ENGINE_INC)/crc32.h $(ENGINE_OBJ)/crc32.$o: $(ENGINE_SRC)/crc32.c $(ENGINE_INC)/crc32.h
$(ENGINE_OBJ)/defs.$o: $(ENGINE_SRC)/defs.c $(ENGINE_INC)/build.h $(ENGINE_INC)/baselayer.h $(ENGINE_INC)/scriptfile.h $(ENGINE_INC)/compat.h $(ENGINE_OBJ)/defs.$o: $(ENGINE_SRC)/defs.c $(ENGINE_INC)/build.h $(ENGINE_INC)/baselayer.h $(ENGINE_INC)/scriptfile.h $(ENGINE_INC)/compat.h

View file

@ -8,6 +8,8 @@
#include "compat.h" #include "compat.h"
#define CLASSIC_SLICE_BY_4
/** Definitions of high-precision integer types. **/ /** Definitions of high-precision integer types. **/
// Should be used for values that represent coordinates with which calculations // Should be used for values that represent coordinates with which calculations
// like dot product are carried out. Substituting 32-bit ints for these will // like dot product are carried out. Substituting 32-bit ints for these will

View file

@ -92,6 +92,8 @@
#define WITHKPLIB #define WITHKPLIB
#include "libdivide.h"
// Define this to rewrite all 'B' versions to library functions. This // Define this to rewrite all 'B' versions to library functions. This
// is for platforms which give us a standard sort of C library so we // is for platforms which give us a standard sort of C library so we
// link directly. Platforms like PalmOS which don't have a standard C // link directly. Platforms like PalmOS which don't have a standard C
@ -146,31 +148,6 @@
#if _MSC_VER < 1800 #if _MSC_VER < 1800
# define inline __inline # define inline __inline
# ifndef _WIN64
static inline float nearbyintf(float x)
{
uint32_t w1, w2;
__asm fnstcw w1
w2 = w1 | 0x00000020;
__asm
{
fldcw w2
fld x
frndint
fclex
fldcw w1
}
}
# else
#include <math.h>
static inline float nearbyintf(float x)
{
if (x >= 0.0)
return floorf(x + 0.5);
else
return floorf(x - 0.5);
}
# endif
#endif #endif
#include <math.h> #include <math.h>
@ -178,31 +155,8 @@ static inline long lround(double num)
{ {
return (long) (num > 0 ? num + 0.5 : ceil(num - 0.5)); return (long) (num > 0 ? num + 0.5 : ceil(num - 0.5));
} }
#if defined(_WIN64)
#include <emmintrin.h>
static inline int32_t Blrintf(const float x)
{
__m128 xx = _mm_load_ss(&x);
return _mm_cvtss_si32(xx);
}
#else
static inline int32_t Blrintf(const float x)
{
int n;
__asm fld x;
__asm fistp n;
return n;
}
#endif
#else #else
# define longlong(x) x##ll # define longlong(x) x##ll
#define Blrintf lrintf
#endif
#if defined __OPENDINGUX__
//ugly hack
#define nearbyintf rintf
#endif #endif
#if defined(__arm__) #if defined(__arm__)
@ -398,60 +352,23 @@ static inline uint16_t B_UNBUF16(const uint8_t *buf) { return (buf[1] << 8) | (b
static inline uint32_t B_UNBUF32(const uint8_t *buf) { return (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | (buf[0]); } static inline uint32_t B_UNBUF32(const uint8_t *buf) { return (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | (buf[0]); }
static inline uint64_t B_UNBUF64(const uint8_t *buf) { return ((uint64_t)buf[7] << 56) | ((uint64_t)buf[6] << 48) | ((uint64_t)buf[5] << 40) | ((uint64_t)buf[4] << 32) | (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | (buf[0]); } static inline uint64_t B_UNBUF64(const uint8_t *buf) { return ((uint64_t)buf[7] << 56) | ((uint64_t)buf[6] << 48) | ((uint64_t)buf[5] << 40) | ((uint64_t)buf[4] << 32) | (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | (buf[0]); }
#if defined(USE_MSC_PRAGMAS) #if defined(BITNESS64)
static inline void ftol(float f, int32_t *a) #include <emmintrin.h>
static inline int32_t Blrintf(const float x)
{ {
_asm __m128 xx = _mm_load_ss(&x);
{ return _mm_cvtss_si32(xx);
mov eax, a
fld f
fistp dword ptr [eax]
}
} }
#elif defined (_MSC_VER)
static inline void dtol(double d, int32_t *a) static inline int32_t Blrintf(const float x)
{ {
_asm int n;
{ __asm fld x;
mov eax, a __asm fistp n;
fld d return n;
fistp dword ptr [eax]
}
} }
#elif defined(USE_GCC_PRAGMAS)
static inline void ftol(float f, int32_t *a)
{
__asm__ __volatile__(
#if 0 //(__GNUC__ >= 3)
"flds %1; fistpl %0;"
#else #else
"flds %1; fistpl (%0);" #define Blrintf lrintf
#endif
: "=r"(a) : "m"(f) : "memory","cc");
}
static inline void dtol(double d, int32_t *a)
{
__asm__ __volatile__(
#if 0 //(__GNUC__ >= 3)
"fldl %1; fistpl %0;"
#else
"fldl %1; fistpl (%0);"
#endif
: "=r"(a) : "m"(d) : "memory","cc");
}
#else
static inline void ftol(float f, int32_t *a)
{
*a = (int32_t)f;
}
static inline void dtol(double d, int32_t *a)
{
*a = (int32_t)d;
}
#endif #endif
#if B_LITTLE_ENDIAN == 1 #if B_LITTLE_ENDIAN == 1
@ -792,7 +709,7 @@ char *Bgetsystemdrives(void);
int32_t Bfilelength(int32_t fd); int32_t Bfilelength(int32_t fd);
char *Bstrtoken(char *s, const char *delim, char **ptrptr, int32_t chop); char *Bstrtoken(char *s, const char *delim, char **ptrptr, int32_t chop);
char *Bstrtolower(char *str); char *Bstrtolower(char *str);
int32_t Bwildmatch (const char *i, const char *j); #define Bwildmatch wildmatch
#if !defined(_WIN32) #if !defined(_WIN32)
char *Bstrlwr(char *); char *Bstrlwr(char *);

View file

@ -46,16 +46,16 @@ static inline int have_basepal_tint(void)
static inline void hictinting_apply(float *color, int32_t palnum) static inline void hictinting_apply(float *color, int32_t palnum)
{ {
color[0] *= (float)hictinting[palnum].r / 255.f; color[0] *= (float)hictinting[palnum].r * (1.f/255.f);
color[1] *= (float)hictinting[palnum].g / 255.f; color[1] *= (float)hictinting[palnum].g * (1.f/255.f);
color[2] *= (float)hictinting[palnum].b / 255.f; color[2] *= (float)hictinting[palnum].b * (1.f/255.f);
} }
static inline void hictinting_apply_ub(uint8_t *color, int32_t palnum) static inline void hictinting_apply_ub(uint8_t *color, int32_t palnum)
{ {
color[0] = (uint8_t)(color[0] * (float)hictinting[palnum].r / 255.f); color[0] = (uint8_t)(color[0] * (float)hictinting[palnum].r * (1.f/255.f));
color[1] = (uint8_t)(color[1] * (float)hictinting[palnum].g / 255.f); color[1] = (uint8_t)(color[1] * (float)hictinting[palnum].g * (1.f/255.f));
color[2] = (uint8_t)(color[2] * (float)hictinting[palnum].b / 255.f); color[2] = (uint8_t)(color[2] * (float)hictinting[palnum].b * (1.f/255.f));
} }
// texcacheheader cachead.flags bits // texcacheheader cachead.flags bits

View file

@ -2,6 +2,10 @@
extern "C" { extern "C" {
#endif #endif
#ifndef __compat_h__
#include "compat.h"
#endif
typedef struct typedef struct
{ {
FILE *fil; //0:no file open, !=0:open file (either stand-alone or zip) FILE *fil; //0:no file open, !=0:open file (either stand-alone or zip)
@ -52,6 +56,7 @@ static inline int32_t filnamcmp(const char *j, const char *i)
i++, j++; i++, j++;
return *i != '\0'; return *i != '\0';
} }
extern int32_t wildmatch(const char *match, const char *wild);
#ifdef EXTERNC #ifdef EXTERNC
} }

File diff suppressed because it is too large Load diff

View file

@ -30,39 +30,93 @@ extern int32_t dmval;
#define wo(x) ((int16_t)(x)) // word cast #define wo(x) ((int16_t)(x)) // word cast
#define by(x) ((uint8_t)(x)) // byte cast #define by(x) ((uint8_t)(x)) // byte cast
// XXX: Only for testing on x86. Don't use from outside; it doesn't account for #define LIBDIVIDE_ALWAYS
// whether we're compiling for e.g. x86_64 which will never use asm anyway. #define DIVTABLESIZE 16384
//#define USE_ASM_DIVSCALE
extern libdivide_s64pad_t divtable64[DIVTABLESIZE];
extern libdivide_s32pad_t divtable32[DIVTABLESIZE];
#if defined(__arm__) || defined(LIBDIVIDE_ALWAYS)
static inline uint32_t divideu32(uint32_t n, uint32_t d)
{
static libdivide_u32_t udiv;
static uint32_t lastd;
if (d == lastd)
goto skip;
lastd = d;
udiv = libdivide_u32_gen(d);
skip:
return libdivide_u32_do(n, &udiv);
}
static inline int32_t tabledivide64(int64_t n, int32_t d)
{
static libdivide_s64_t sdiv;
static int32_t lastd;
libdivide_s64_t *dptr = ((unsigned) d < DIVTABLESIZE) ? (libdivide_s64_t *)&divtable64[d] : &sdiv;
if (d == lastd || dptr != &sdiv)
goto skip;
lastd = d;
sdiv = libdivide_s64_gen(d);
skip:
return libdivide_s64_do(n, dptr);
}
static inline int32_t tabledivide32(int32_t n, int32_t d)
{
static libdivide_s32_t sdiv;
static int32_t lastd;
libdivide_s32_t *dptr = ((unsigned) d < DIVTABLESIZE) ? (libdivide_s32_t *)&divtable32[d] : &sdiv;
if (d == lastd || dptr != &sdiv)
goto skip;
lastd = d;
sdiv = libdivide_s32_gen(d);
skip:
return libdivide_s32_do(n, dptr);
}
#else
static inline uint32_t divideu32(uint32_t n, uint32_t d) { return n / d; }
static inline int32_t tabledivide64(int64_t n, int32_t d) { return ((unsigned) d < DIVTABLESIZE) ?
libdivide_s64_do(n, (libdivide_s64_t *) &divtable64[d]) : n / d; }
static inline int32_t tabledivide32(int32_t n, int32_t d) { return ((unsigned) d < DIVTABLESIZE) ?
libdivide_s32_do(n, (libdivide_s32_t *) &divtable32[d]) : n / d; }
#endif
extern uint32_t divideu32_noinline(uint32_t n, uint32_t d);
extern int32_t tabledivide32_noinline(int32_t n, int32_t d);
extern int32_t tabledivide64_noinline(int64_t n, int32_t d);
#if !defined USE_ASM_DIVSCALE
#ifdef GEKKO #ifdef GEKKO
#include <math.h> #include <math.h>
static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx) static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx)
{ {
return ldexp(eax, ecx) / ebx; return tabledivide64(ldexp(eax, ecx), ebx);
} }
# define _scaler(a) \
static inline int32_t divscale##a(int32_t eax, int32_t ebx) \
{ \
return divscale(eax, ebx, a); \
} \
#else #else
static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx) { return dw((qw(eax) << by(ecx)) / ebx); } static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx)
{
# define _scaler(a) \ const int64_t numer = qw(eax) << by(ecx);
static inline int32_t divscale##a(int32_t eax, int32_t ebx) \ return dw(tabledivide64(numer, ebx));
{ \ }
return dw((qw(eax) << by(a)) / ebx); \
} \
#endif #endif
# define _scaler(a) static inline int32_t divscale##a(int32_t eax, int32_t ebx) { return divscale(eax, ebx, a); }
PRAGMA_FUNCS _scaler(32) PRAGMA_FUNCS _scaler(32)
#undef _scaler #undef _scaler
#endif // !defined USE_ASM_DIVSCALE
static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx)
{
const int64_t numer = qw(eax) * edx;
return dw(tabledivide64(numer, ecx));
}
#if defined(__GNUC__) && defined(GEKKO) #if defined(__GNUC__) && defined(GEKKO)
@ -115,11 +169,6 @@ static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*)b); *((
static inline char readpixel(void* s) { return (*((char*)(s))); } static inline char readpixel(void* s) { return (*((char*)(s))); }
static inline void drawpixel(void* s, char a) { *((char*)(s)) = a; } static inline void drawpixel(void* s, char a) { *((char*)(s)) = a; }
static inline void drawpixels(void* s, int16_t a) { *((int16_t*)(s)) = a; }
static inline void drawpixelses(void* s, int32_t a) { *((int32_t*)(s)) = a; }
static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a%_b; return _a/_b; }
static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a/_b; return _a%_b; }
static inline int32_t klabs(int32_t a) { const uint32_t m = a >> (sizeof(int) * CHAR_BIT - 1); return (a ^ m) - m; } static inline int32_t klabs(int32_t a) { const uint32_t m = a >> (sizeof(int) * CHAR_BIT - 1); return (a ^ m) - m; }
static inline int32_t ksgn(int32_t a) { return (a>0)-(a<0); } static inline int32_t ksgn(int32_t a) { return (a>0)-(a<0); }
@ -130,7 +179,6 @@ static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b)
static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b) return b; return a; } static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b) return b; return a; }
static inline int32_t sqr(int32_t eax) { return (eax) * (eax); } static inline int32_t sqr(int32_t eax) { return (eax) * (eax); }
static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) / ecx); }
static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) >> by(ecx)); } static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) >> by(ecx)); }
static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(ecx)); } static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(ecx)); }
@ -155,9 +203,15 @@ void copybufreverse(const void *S, void *D, int32_t c);
static inline void swapbufreverse(void *s, void *d, int32_t c) static inline void swapbufreverse(void *s, void *d, int32_t c)
{ {
uint8_t *src = (uint8_t*)s, *dst = (uint8_t*)d; uint8_t *src = (uint8_t*)s, *dst = (uint8_t*)d;
while (c--) { do
swapchar(dst++, src--); {
} swapchar(dst, src);
swapchar(dst+1, src-1);
swapchar(dst+2, src-2);
swapchar(dst+3, src-3);
dst += 4, src -= 4;
} while (--c > 4);
while (c--) swapchar(dst++, src--);
} }
#ifdef EXTERNC #ifdef EXTERNC

View file

@ -5,7 +5,6 @@
#ifndef __pragmas_arm_h__ #ifndef __pragmas_arm_h__
#define __pragmas_arm_h__ #define __pragmas_arm_h__
// TODO: implement libdivide.h
#define _scaler(a) \ #define _scaler(a) \
static inline int32_t mulscale##a(int32_t eax, int32_t edx) \ static inline int32_t mulscale##a(int32_t eax, int32_t edx) \
{ \ { \
@ -30,11 +29,6 @@ static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*) b); *(
static inline char readpixel(void* s) { return (*((char*) (s))); } static inline char readpixel(void* s) { return (*((char*) (s))); }
static inline void drawpixel(void* s, char a) { *((char*) (s)) = a; } static inline void drawpixel(void* s, char a) { *((char*) (s)) = a; }
static inline void drawpixels(void* s, int16_t a) { *((int16_t*) (s)) = a; }
static inline void drawpixelses(void* s, int32_t a) { *((int32_t*) (s)) = a; }
static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a%_b; return _a/_b; }
static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a/_b; return _a%_b; }
static inline int32_t klabs(int32_t a) { const uint32_t m = a >> (sizeof(int) * CHAR_BIT - 1); return (a ^ m) - m; } static inline int32_t klabs(int32_t a) { const uint32_t m = a >> (sizeof(int) * CHAR_BIT - 1); return (a ^ m) - m; }
static inline int32_t ksgn(int32_t a) { return (a>0)-(a<0); } static inline int32_t ksgn(int32_t a) { return (a>0)-(a<0); }
@ -45,7 +39,6 @@ static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b
static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return b; return a; } static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return b; return a; }
static inline int32_t sqr(int32_t eax) { return (eax) * (eax); } static inline int32_t sqr(int32_t eax) { return (eax) * (eax); }
static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) / qw(ecx)); }
static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) >> by(ecx)); } static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) >> by(ecx)); }
static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(ecx)); } static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(ecx)); }

View file

@ -6,8 +6,6 @@
#define sqr(a) ((a)*(a)) #define sqr(a) ((a)*(a))
int32_t scale(int32_t a, int32_t d, int32_t c);
#define _scaler(x) \ #define _scaler(x) \
static inline int32_t mulscale##x(int32_t a, int32_t d) \ static inline int32_t mulscale##x(int32_t a, int32_t d) \
{ \ { \
@ -118,26 +116,6 @@ static inline void drawpixel(void *d, char a)
*(char*) d = a; *(char*) d = a;
} }
static inline void drawpixels(void *d, int16_t a)
{
__asm__(
" sthbrx %0, 0, %1\n"
:
: "r"(&a), "r"(d)
: "memory"
);
}
static inline void drawpixelses(void *d, int32_t a)
{
__asm__(
" stwbrx %0, 0, %1\n"
:
: "r"(&a), "r"(d)
: "memory"
);
}
void clearbufbyte(void *d, int32_t c, int32_t a); void clearbufbyte(void *d, int32_t c, int32_t a);
static inline void clearbuf(void *d, int32_t c, int32_t a) static inline void clearbuf(void *d, int32_t c, int32_t a)
@ -266,32 +244,6 @@ static inline void swap64bit(void *a, void *b)
*(double*) b = t; *(double*) b = t;
} }
static inline int32_t divmod(int32_t a, int32_t b)
{
int32_t div;
__asm__(
" divwu %0, %2, %3\n"
" mullw %1, %0, %3\n"
" subf %1, %1, %2\n"
: "=&r"(div), "=&r"(dmval)
: "r"(a), "r"(b)
);
return div;
}
static inline int32_t moddiv(int32_t a, int32_t b)
{
int32_t mod;
__asm__(
" divwu %0, %2, %3\n"
" mullw %1, %0, %3\n"
" subf %1, %1, %2\n"
: "=&r"(dmval), "=&r"(mod)
: "r"(a), "r"(b)
);
return mod;
}
static inline int32_t umin(int32_t a, int32_t b) { if ((uint32_t) a < (uint32_t) b) return a; return b; } static inline int32_t umin(int32_t a, int32_t b) { if ((uint32_t) a < (uint32_t) b) return a; return b; }
static inline int32_t umax(int32_t a, int32_t b) { if ((uint32_t) a < (uint32_t) b) return b; return a; } static inline int32_t umax(int32_t a, int32_t b) { if ((uint32_t) a < (uint32_t) b) return b; return a; }
static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return a; return b; } static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t) a < (int32_t) b) return a; return b; }

View file

@ -33,13 +33,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
#define sqr(a) __builtin_sqr(a) #define sqr(a) __builtin_sqr(a)
#endif #endif
#define scale(a,d,c) \
({ int32_t __a=(a), __d=(d), __c=(c); \
__asm__ __volatile__ ("imull %%edx; idivl %%ecx" \
: "=a" (__a), "=d" (__d) \
: "0" (__a), "1" (__d), "c" (__c) : "cc"); \
__a; })
#define mulscale(a,d,c) \ #define mulscale(a,d,c) \
({ int32_t __a=(a), __d=(d), __c=(c); \ ({ int32_t __a=(a), __d=(d), __c=(c); \
__asm__ __volatile__ ("imull %%edx; shrdl %%cl, %%edx, %%eax" \ __asm__ __volatile__ ("imull %%edx; shrdl %%cl, %%edx, %%eax" \
@ -471,174 +464,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__d; }) __d; })
#ifdef USE_ASM_DIVSCALE
#define divscale(a,b,c) \
({ int32_t __a=(a), __b=(b), __c=(c); \
__asm__ __volatile__ ("movl %%eax, %%edx; shll %%cl, %%eax; negb %%cl; sarl %%cl, %%edx; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "c" (__c), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale1(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("addl %%eax, %%eax; sbbl %%edx, %%edx; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale2(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $30, %%edx; leal (,%%eax,4), %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale3(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $29, %%edx; leal (,%%eax,8), %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale4(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $28, %%edx; shll $4, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale5(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $27, %%edx; shll $5, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale6(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $26, %%edx; shll $6, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale7(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $25, %%edx; shll $7, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale8(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $24, %%edx; shll $8, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale9(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $23, %%edx; shll $9, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale10(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $22, %%edx; shll $10, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale11(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $21, %%edx; shll $11, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale12(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $20, %%edx; shll $12, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale13(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $19, %%edx; shll $13, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale14(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $18, %%edx; shll $14, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale15(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $17, %%edx; shll $15, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale16(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $16, %%edx; shll $16, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale17(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $15, %%edx; shll $17, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale18(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $14, %%edx; shll $18, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale19(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $13, %%edx; shll $19, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale20(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $12, %%edx; shll $20, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale21(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $11, %%edx; shll $21, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale22(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $10, %%edx; shll $22, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale23(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $9, %%edx; shll $23, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale24(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $8, %%edx; shll $24, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale25(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $7, %%edx; shll $25, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale26(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $6, %%edx; shll $26, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale27(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $5, %%edx; shll $27, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale28(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $4, %%edx; shll $28, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale29(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $3, %%edx; shll $29, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale30(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $2, %%edx; shll $30, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale31(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("movl %%eax, %%edx; sarl $1, %%edx; shll $31, %%eax; idivl %%ebx" \
: "=a" (__a) : "a" (__a), "b" (__b) : "edx", "cc"); \
__a; })
#define divscale32(d,b) \
({ int32_t __d=(d), __b=(b), __r; \
__asm__ __volatile__ ("xorl %%eax, %%eax; idivl %%ebx" \
: "=a" (__r), "=d" (__d) : "d" (__d), "b" (__b) : "cc"); \
__r; })
#endif // defined USE_ASM_DIVSCALE
#define readpixel(D) \ #define readpixel(D) \
({ void *__D=(D); int32_t __a; \ ({ void *__D=(D); int32_t __a; \
__asm__ __volatile__ ("movb (%%edi), %%al" \ __asm__ __volatile__ ("movb (%%edi), %%al" \
@ -649,16 +474,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
__asm__ __volatile__ ("movb %%al, (%%edi)" \ __asm__ __volatile__ ("movb %%al, (%%edi)" \
: : "D" (__D), "a" (__a) : "memory", "cc"); \ : : "D" (__D), "a" (__a) : "memory", "cc"); \
0; }) 0; })
#define drawpixels(D,a) \
({ void *__D=(D); int32_t __a=(a); \
__asm__ __volatile__ ("movw %%ax, (%%edi)" \
: : "D" (__D), "a" (__a) : "memory", "cc"); \
0; })
#define drawpixelses(D,a) \
({ void *__D=(D); int32_t __a=(a); \
__asm__ __volatile__ ("movl %%eax, (%%edi)" \
: : "D" (__D), "a" (__a) : "memory", "cc"); \
0; })
#define clearbuf(D,c,a) \ #define clearbuf(D,c,a) \
({ void *__D=(D); int32_t __c=(c), __a=(a); \ ({ void *__D=(D); int32_t __c=(c), __a=(a); \
__asm__ __volatile__ ("rep; stosl" \ __asm__ __volatile__ ("rep; stosl" \
@ -670,19 +485,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
: "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \ : "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \
0; }) 0; })
//returns eax/ebx, dmval = eax%edx;
#define divmod(a,b) \
({ int32_t __a=(a), __b=(b); \
__asm__ __volatile__ ("xorl %%edx, %%edx; divl %%ebx; movl %%edx, "_DMVAL \
: "+a" (__a) : "b" (__b) : "edx", "memory", "cc"); \
__a; })
//returns eax%ebx, dmval = eax/edx;
#define moddiv(a,b) \
({ int32_t __a=(a), __b=(b), __d; \
__asm__ __volatile__ ("xorl %%edx, %%edx; divl %%ebx; movl %%eax, "_DMVAL \
: "=d" (__d) : "a" (__a), "b" (__b) : "eax", "memory", "cc"); \
__d; })
#define klabs(a) \ #define klabs(a) \
({ int32_t __a=(a); \ ({ int32_t __a=(a); \
__asm__ __volatile__ ("testl %%eax, %%eax; jns 0f; negl %%eax; 0:" \ __asm__ __volatile__ ("testl %%eax, %%eax; jns 0f; negl %%eax; 0:" \

View file

@ -16,15 +16,6 @@ static __inline int32_t sqr(int32_t a)
} }
} }
static __inline int32_t scale(int32_t a, int32_t d, int32_t c)
{
_asm {
mov eax, a
imul d
idiv c
}
}
static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c) static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
{ {
_asm { _asm {
@ -99,80 +90,6 @@ static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
} }
} }
#ifdef USE_ASM_DIVSCALE
static __inline int32_t divscale(int32_t a, int32_t b, int32_t c)
{
_asm {
mov eax, a
mov ecx, c
mov edx, eax
shl eax, cl
neg cl
sar edx, cl
idiv b
}
}
static __inline int32_t divscale1(int32_t a, int32_t b)
{
_asm {
mov eax, a
add eax, eax
sbb edx, edx
idiv b
}
}
static __inline int32_t divscale2(int32_t a, int32_t b)
{
_asm {
mov eax, a
mov edx, eax
sar edx, 30
lea eax, [eax*4]
idiv b
}
}
static __inline int32_t divscale3(int32_t a, int32_t b)
{
_asm {
mov eax, a
mov edx, eax
sar edx, 29
lea eax, [eax*8]
idiv b
}
}
#define DIVSCALE(x,y) \
static __inline int32_t divscale##y(int32_t a, int32_t b) \
{ \
_asm mov eax, a \
_asm mov edx, eax \
_asm sar edx, x \
_asm shl eax, y \
_asm idiv b \
}
DIVSCALE(28, 4) DIVSCALE(27, 5) DIVSCALE(26, 6) DIVSCALE(25, 7)
DIVSCALE(24, 8) DIVSCALE(23, 9) DIVSCALE(22, 10) DIVSCALE(21, 11)
DIVSCALE(20, 12) DIVSCALE(19, 13) DIVSCALE(18, 14) DIVSCALE(17, 15)
DIVSCALE(16, 16) DIVSCALE(15, 17) DIVSCALE(14, 18) DIVSCALE(13, 19)
DIVSCALE(12, 20) DIVSCALE(11, 21) DIVSCALE(10, 22) DIVSCALE(9, 23)
DIVSCALE(8, 24) DIVSCALE(7, 25) DIVSCALE(6, 26) DIVSCALE(5, 27)
DIVSCALE(4, 28) DIVSCALE(3, 29) DIVSCALE(2, 30) DIVSCALE(1, 31)
static __inline int32_t divscale32(int32_t d, int32_t b)
{
_asm {
mov edx, d
xor eax, eax
idiv b
}
}
#endif // defined USE_ASM_DIVSCALE
static __inline char readpixel(void *d) static __inline char readpixel(void *d)
{ {
_asm { _asm {
@ -190,24 +107,6 @@ static __inline void drawpixel(void *d, char a)
} }
} }
static __inline void drawpixels(void *d, int16_t a)
{
_asm {
mov edx, d
mov ax, a
mov word ptr[edx], ax
}
}
static __inline void drawpixelses(void *d, int32_t a)
{
_asm {
mov edx, d
mov eax, a
mov dword ptr[edx], eax
}
}
static __inline void clearbuf(void *d, int32_t c, int32_t a) static __inline void clearbuf(void *d, int32_t c, int32_t a)
{ {
_asm { _asm {
@ -415,29 +314,6 @@ static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, in
} }
} }
//returns eax/ebx, dmval = eax%edx;
static __inline int32_t divmod(int32_t a, int32_t b)
{
_asm {
mov eax, a
xor edx, edx
div b
mov dmval, edx
}
}
//returns eax%ebx, dmval = eax/edx;
static __inline int32_t moddiv(int32_t a, int32_t b)
{
_asm {
mov eax, a
xor edx, edx
div b
mov dmval, eax
mov eax, edx
}
}
static __inline int32_t klabs(int32_t a) static __inline int32_t klabs(int32_t a)
{ {
_asm { _asm {

View file

@ -7,6 +7,7 @@
// See the included license file "BUILDLIC.TXT" for license info. // See the included license file "BUILDLIC.TXT" for license info.
#include "a.h" #include "a.h"
#include "pragmas.h"
#ifdef ENGINE_USING_A_C #ifdef ENGINE_USING_A_C
@ -71,6 +72,17 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by,
const int32_t logx = glogx, logy = glogy; const int32_t logx = glogx, logy = glogy;
char *pp = (char *)p; char *pp = (char *)p;
for (; cnt>=4; cnt -= 4)
{
*pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]]; pp--;
*pp = palptr[buf[(((bx-bxinc)>>(32-logx))<<logy)+((by-byinc)>>(32-logy))]]; pp--;
*pp = palptr[buf[(((bx-(bxinc<<1))>>(32-logx))<<logy)+((by-(byinc<<1))>>(32-logy))]]; pp--;
*pp = palptr[buf[(((bx-(bxinc*3))>>(32-logx))<<logy)+((by-(byinc*3))>>(32-logy))]]; pp--;
bx -= bxinc<<2;
by -= byinc<<2;
}
for (; cnt>=0; cnt--) for (; cnt>=0; cnt--)
{ {
*pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]]; *pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]];
@ -121,10 +133,7 @@ static inline uint32_t ourmulscale32(uint32_t a, uint32_t b)
static inline int32_t getpix(int32_t logy, const char *buf, uint32_t vplc) static inline int32_t getpix(int32_t logy, const char *buf, uint32_t vplc)
{ {
if (logy != 0) return logy ? buf[vplc>>logy] : buf[ourmulscale32(vplc,globaltilesizy)];
return buf[vplc>>logy];
else
return buf[ourmulscale32(vplc,globaltilesizy)];
} }
void setupvlineasm(int32_t neglogy) { glogy = neglogy; } void setupvlineasm(int32_t neglogy) { glogy = neglogy; }
@ -138,18 +147,45 @@ int32_t vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, in
cnt++; cnt++;
do if (logy)
{
#ifdef CLASSIC_SLICE_BY_4
for (; cnt>=4; cnt-=4)
{
*pp = pal[buf[vplc>>logy]];
*(pp+ourbpl) = pal[buf[(vplc+vinc)>>logy]];
*(pp+(ourbpl<<1)) = pal[buf[(vplc+(vinc<<1))>>logy]];
*(pp+(ourbpl*3)) = pal[buf[(vplc+(vinc*3))>>logy ]];
pp += ourbpl<<2;
vplc += vinc<<2;
}
#endif
while (cnt--)
{ {
if (logy != 0)
*pp = pal[buf[vplc>>logy]]; *pp = pal[buf[vplc>>logy]];
else
*pp = pal[buf[ourmulscale32(vplc,globaltilesizy)]];
pp += ourbpl; pp += ourbpl;
vplc += vinc; vplc += vinc;
} }
while (--cnt); }
else
{
#ifdef CLASSIC_SLICE_BY_4
for (; cnt>=4; cnt-=4)
{
*pp = pal[buf[ourmulscale32(vplc, globaltilesizy)]];
*(pp+ourbpl) = pal[buf[ourmulscale32((vplc+vinc),globaltilesizy)]];
*(pp+(ourbpl<<1)) = pal[buf[ourmulscale32((vplc+(vinc<<1)), globaltilesizy)]];
*(pp+(ourbpl*3)) = pal[buf[ourmulscale32((vplc+(vinc*3)), globaltilesizy)]];
pp += ourbpl<<2;
vplc += vinc<<2;
}
#endif
while (cnt--)
{
*pp = pal[buf[ourmulscale32(vplc,globaltilesizy)]], pp += ourbpl;
vplc += vinc;
}
}
return vplc; return vplc;
} }
@ -191,9 +227,6 @@ typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16)));
// cnt >= 1 // cnt >= 1
void vlineasm4(int32_t cnt, char *p) void vlineasm4(int32_t cnt, char *p)
{ {
char ch;
int32_t i;
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
#ifdef USE_VECTOR_EXT #ifdef USE_VECTOR_EXT
@ -205,25 +238,86 @@ void vlineasm4(int32_t cnt, char *p)
#endif #endif
const int32_t logy = glogy, ourbpl = bpl; const int32_t logy = glogy, ourbpl = bpl;
if (!logy)
{
do do
{ {
for (i=0; i<4; i++) p[0] = pal[0][buf[0][ourmulscale32(vplc[0],globaltilesizy)]];
{ p[1] = pal[1][buf[1][ourmulscale32(vplc[1],globaltilesizy)]];
ch = getpix(logy, buf[i], vplc[i]); p[2] = pal[2][buf[2][ourmulscale32(vplc[2],globaltilesizy)]];
p[i] = pal[i][ch]; p[3] = pal[3][buf[3][ourmulscale32(vplc[3],globaltilesizy)]];
#if !defined USE_VECTOR_EXT
vplc[i] += vinc[i]; #if defined USE_VECTOR_EXT
#endif
}
#ifdef USE_VECTOR_EXT
vplc += vinc; vplc += vinc;
#else
vplc[0] += vinc[0];
vplc[1] += vinc[1];
vplc[2] += vinc[2];
vplc[3] += vinc[3];
#endif #endif
p += ourbpl; p += ourbpl;
} }
while (--cnt); while (--cnt);
for (i=0; i<4; i++) goto skip;
vplce[i] = vplc[i]; }
// just fucking shoot me
#ifdef CLASSIC_SLICE_BY_4
for (; cnt>=4;cnt-=4)
{
p[0] = pal[0][buf[0][ vplc[0]>>logy ]];
p[1] = pal[1][buf[1][ vplc[1]>>logy ]];
p[2] = pal[2][buf[2][ vplc[2]>>logy ]];
p[3] = pal[3][buf[3][ vplc[3]>>logy ]];
(p+ourbpl)[0] = pal[0][buf[0][ (vplc[0]+vinc[0])>>logy ]];
(p+ourbpl)[1] = pal[1][buf[1][ (vplc[1]+vinc[1])>>logy ]];
(p+ourbpl)[2] = pal[2][buf[2][ (vplc[2]+vinc[2])>>logy ]];
(p+ourbpl)[3] = pal[3][buf[3][ (vplc[3]+vinc[3])>>logy ]];
(p+(ourbpl<<1))[0] = pal[0][buf[0][ (vplc[0]+(vinc[0]<<1))>>logy ]];
(p+(ourbpl<<1))[1] = pal[1][buf[1][ (vplc[1]+(vinc[1]<<1))>>logy ]];
(p+(ourbpl<<1))[2] = pal[2][buf[2][ (vplc[2]+(vinc[2]<<1))>>logy ]];
(p+(ourbpl<<1))[3] = pal[3][buf[3][ (vplc[3]+(vinc[3]<<1))>>logy ]];
(p+(ourbpl*3))[0] = pal[0][buf[0][ (vplc[0]+(vinc[0]*3))>>logy ]];
(p+(ourbpl*3))[1] = pal[1][buf[1][ (vplc[1]+(vinc[1]*3))>>logy ]];
(p+(ourbpl*3))[2] = pal[2][buf[2][ (vplc[2]+(vinc[2]*3))>>logy ]];
(p+(ourbpl*3))[3] = pal[3][buf[3][ (vplc[3]+(vinc[3]*3))>>logy ]];
#if defined USE_VECTOR_EXT
vplc += vinc<<2;
#else
vplc[0] += vinc[0]<<2;
vplc[1] += vinc[1]<<2;
vplc[2] += vinc[2]<<2;
vplc[3] += vinc[3]<<2;
#endif
p += ourbpl<<2;
}
#endif
while (cnt--)
{
p[0] = pal[0][buf[0][vplc[0]>>logy]];
p[1] = pal[1][buf[1][vplc[1]>>logy]];
p[2] = pal[2][buf[2][vplc[2]>>logy]];
p[3] = pal[3][buf[3][vplc[3]>>logy]];
#if defined USE_VECTOR_EXT
vplc += vinc;
#else
vplc[0] += vinc[0];
vplc[1] += vinc[1];
vplc[2] += vinc[2];
vplc[3] += vinc[3];
#endif
p += ourbpl;
}
skip:
vplce[0] = vplc[0];
vplce[1] = vplc[1];
vplce[2] = vplc[2];
vplce[3] = vplc[3];
} }
#ifdef USE_SATURATE_VPLC #ifdef USE_SATURATE_VPLC
@ -251,9 +345,11 @@ int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
cnt++; cnt++;
if (!logy)
{
do do
{ {
ch = getpix(logy, buf, vplc); ch = buf[ourmulscale32(vplc,globaltilesizy)];
if (ch != 255) *pp = pal[ch]; if (ch != 255) *pp = pal[ch];
pp += ourbpl; pp += ourbpl;
vplc += vinc; vplc += vinc;
@ -261,15 +357,26 @@ int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
} }
while (--cnt); while (--cnt);
return vplc;
}
do
{
if (buf[vplc>>logy] != 255)
*pp = pal[buf[vplc>>logy]];
pp += ourbpl;
vplc += vinc;
saturate_vplc(vplc, vinc);
}
while (--cnt);
return vplc; return vplc;
} }
// cnt >= 1 // cnt >= 1
void mvlineasm4(int32_t cnt, char *p) void mvlineasm4(int32_t cnt, char *p)
{ {
char ch;
int32_t i;
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
#ifdef USE_VECTOR_EXT #ifdef USE_VECTOR_EXT
@ -280,28 +387,73 @@ void mvlineasm4(int32_t cnt, char *p)
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]}; uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
#endif #endif
const int32_t logy = glogy, ourbpl = bpl; const int32_t logy = glogy, ourbpl = bpl;
char ch;
if (logy)
{
do do
{ {
for (i=0; i<4; i++) ch = buf[0][vplc[0]>>logy];
{ if (ch != 255) p[0] = pal[0][ch];
ch = getpix(logy, buf[i], vplc[i]); ch = buf[1][vplc[1]>>logy];
if (ch != 255) p[i] = pal[i][ch]; if (ch != 255) p[1] = pal[1][ch];
ch = buf[2][vplc[2]>>logy];
if (ch != 255) p[2] = pal[2][ch];
ch = buf[3][vplc[3]>>logy];
if (ch != 255) p[3] = pal[3][ch];
#if !defined USE_VECTOR_EXT #if !defined USE_VECTOR_EXT
vplc[i] += vinc[i]; vplc[0] += vinc[0];
saturate_vplc(vplc[i], vinc[i]); vplc[1] += vinc[1];
#endif vplc[2] += vinc[2];
} vplc[3] += vinc[3];
#ifdef USE_VECTOR_EXT saturate_vplc(vplc[0], vinc[0]);
saturate_vplc(vplc[1], vinc[1]);
saturate_vplc(vplc[2], vinc[2]);
saturate_vplc(vplc[3], vinc[3]);
#else
vplc += vinc; vplc += vinc;
saturate_vplc_vec(vplc, vinc); saturate_vplc_vec(vplc, vinc);
#endif #endif
p += ourbpl; p += ourbpl;
} }
while (--cnt); while (--cnt);
}
else
{
do
{
ch = buf[0][ourmulscale32(vplc[0],globaltilesizy)];
if (ch != 255) p[0] = pal[0][ch];
ch = buf[1][ourmulscale32(vplc[1],globaltilesizy)];
if (ch != 255) p[1] = pal[1][ch];
ch = buf[2][ourmulscale32(vplc[2],globaltilesizy)];
if (ch != 255) p[2] = pal[2][ch];
ch = buf[3][ourmulscale32(vplc[3],globaltilesizy)];
if (ch != 255) p[3] = pal[3][ch];
for (i=0; i<4; i++) #if !defined USE_VECTOR_EXT
vplce[i] = vplc[i]; vplc[0] += vinc[0];
vplc[1] += vinc[1];
vplc[2] += vinc[2];
vplc[3] += vinc[3];
saturate_vplc(vplc[0], vinc[0]);
saturate_vplc(vplc[1], vinc[1]);
saturate_vplc(vplc[2], vinc[2]);
saturate_vplc(vplc[3], vinc[3]);
#else
vplc += vinc;
saturate_vplc_vec(vplc, vinc);
#endif
p += ourbpl;
}
while (--cnt);
}
vplce[0] = vplc[0];
vplce[1] = vplc[1];
vplce[2] = vplc[2];
vplce[3] = vplc[3];
} }
#ifdef USE_ASM64 #ifdef USE_ASM64
@ -335,7 +487,8 @@ int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
do do
{ {
ch = getpix(logy, buf, vplc); ch = getpix(logy, buf, vplc);
if (ch != 255) *pp = trans[(*pp)|(pal[ch]<<8)]; if (ch != 255)
*pp = trans[(*pp)|(pal[ch]<<8)];
pp += ourbpl; pp += ourbpl;
vplc += vinc; vplc += vinc;
saturate_vplc_trans(vplc, vinc); saturate_vplc_trans(vplc, vinc);
@ -374,7 +527,7 @@ void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc
{ {
char ch; char ch;
int32_t cnt = (asm2-p-1)/bpl; // >= 1 int32_t cnt = tabledivide32(asm2-p-1, bpl); // >= 1
const int32_t vinc2 = asm1; const int32_t vinc2 = asm1;
const char *const buf1 = (char *)bufplc1; const char *const buf1 = (char *)bufplc1;
@ -533,7 +686,7 @@ void mspritevline(int32_t bx, int32_t by, int32_t cnt, intptr_t bufplc, intptr_t
for (; cnt>1; cnt--) for (; cnt>1; cnt--)
{ {
ch = gbuf[(bx>>16)*glogy+(by>>16)]; ch = gbuf[(bx>>16)*glogy+(by>>16)];
if (ch != 255)(*(char *)p) = gpal[ch]; if (ch != 255) (*(char *)p) = gpal[ch];
bx += gbxinc; bx += gbxinc;
by += gbyinc; by += gbyinc;
p += bpl; p += bpl;

View file

@ -128,7 +128,7 @@ void initcache(intptr_t dacachestart, int32_t dacachesize)
int32_t i; int32_t i;
for (i=1; i<200; i++) for (i=1; i<200; i++)
lockrecip[i] = (1<<28)/(200-i); lockrecip[i] = tabledivide32_noinline(1<<28, 200-i);
// The following code was relocated here from engine.c, since this // The following code was relocated here from engine.c, since this
// function is only ever called once (from there), and it seems to // function is only ever called once (from there), and it seems to

View file

@ -797,33 +797,32 @@ char *Bstrtolower(char *str)
//Brute-force case-insensitive, slash-insensitive, * and ? wildcard matcher //Brute-force case-insensitive, slash-insensitive, * and ? wildcard matcher
//Given: string i and string j. string j can have wildcards //Given: string i and string j. string j can have wildcards
//Returns: 1:matches, 0:doesn't match //Returns: 1:matches, 0:doesn't match
int32_t Bwildmatch(const char *i, const char *j) #ifndef WITHKPLIB
{ extern char toupperlookup[256];
const char *k;
char c0, c1;
if (!*j) return(1); static int32_t wildmatch(const char *match, const char *wild)
{
do do
{ {
if (*j == '*') if (*match && (toupperlookup[*wild] == toupperlookup[*match] || *wild == '?'))
{ {
for (k=i,j++; *k; k++) if (Bwildmatch(k,j)) return(1); wild++, match++;
continue; continue;
} }
if (!*i) return(0); else if ((*match|*wild) == '\0')
if (*j == '?') { i++; j++; continue; } return 1;
c0 = *i; if ((c0 >= 'a') && (c0 <= 'z')) c0 -= 32; else if (*wild == '*')
c1 = *j; if ((c1 >= 'a') && (c1 <= 'z')) c1 -= 32; {
#ifdef _WIN32 while (*wild == '*') wild++;
if (c0 == '/') c0 = '\\'; if (*wild == '\0') return 1;
if (c1 == '/') c1 = '\\'; while (*match && toupperlookup[*match] != toupperlookup[*wild]) match++;
#endif if (toupperlookup[*match] == toupperlookup[*wild])
if (c0 != c1) return(0); continue;
i++; j++;
} }
while (*j); return 0;
return(!*i); } while (1);
} }
#endif
#if !defined(_WIN32) #if !defined(_WIN32)
char *Bstrlwr(char *s) char *Bstrlwr(char *s)
@ -917,3 +916,6 @@ int access(const char *pathname, int mode)
} }
#endif #endif
#define LIBDIVIDE_BODY
#include "libdivide.h"

View file

@ -153,7 +153,7 @@ int32_t dxtfilter(int32_t fil, const texcachepicture *pict, const char *pic, voi
for (j=stride; (unsigned)j<miplen; j+=stride) for (j=stride; (unsigned)j<miplen; j+=stride)
for (k=0; k<8; k++) *cptr++ = pic[j+k]; for (k=0; k<8; k++) *cptr++ = pic[j+k];
dxt_handle_io(fil, (miplen/stride)<<3, midbuf, packbuf); dxt_handle_io(fil, tabledivide32(miplen, stride)<<3, midbuf, packbuf);
} }
//rgb0,rgb1 //rgb0,rgb1
@ -162,7 +162,7 @@ int32_t dxtfilter(int32_t fil, const texcachepicture *pict, const char *pic, voi
for (j=0; (unsigned)j<miplen; j+=stride) for (j=0; (unsigned)j<miplen; j+=stride)
{ *(int16_t *)cptr = dxt_hicosub(*(int16_t *)(&pic[offs+j+k])); cptr += 2; } { *(int16_t *)cptr = dxt_hicosub(*(int16_t *)(&pic[offs+j+k])); cptr += 2; }
dxt_handle_io(fil, (miplen/stride)<<2, midbuf, packbuf); dxt_handle_io(fil, tabledivide32(miplen, stride)<<2, midbuf, packbuf);
//index_4x4 //index_4x4
cptr = (char *)midbuf; cptr = (char *)midbuf;
@ -176,7 +176,7 @@ int32_t dxtfilter(int32_t fil, const texcachepicture *pict, const char *pic, voi
cptr += 4; cptr += 4;
} }
dxt_handle_io(fil, (miplen/stride)<<2, midbuf, packbuf); dxt_handle_io(fil, tabledivide32(miplen, stride)<<2, midbuf, packbuf);
return 0; return 0;
} }
@ -196,7 +196,7 @@ int32_t dedxtfilter(int32_t fil, const texcachepicture *pict, char *pic, void *m
if (stride == 16) //If DXT3... if (stride == 16) //If DXT3...
{ {
//alpha_4x4 //alpha_4x4
if (dedxt_handle_io(fil, (pict->size/stride)*8, midbuf, pict->size, packbuf, ispacked)) if (dedxt_handle_io(fil, tabledivide32(pict->size, stride)*8, midbuf, pict->size, packbuf, ispacked))
return -1; return -1;
cptr = (char *)midbuf; cptr = (char *)midbuf;
@ -206,7 +206,7 @@ int32_t dedxtfilter(int32_t fil, const texcachepicture *pict, char *pic, void *m
} }
//rgb0,rgb1 //rgb0,rgb1
if (dedxt_handle_io(fil, (pict->size/stride)*4, midbuf, pict->size, packbuf, ispacked)) if (dedxt_handle_io(fil, tabledivide32(pict->size, stride)*4, midbuf, pict->size, packbuf, ispacked))
return -1; return -1;
cptr = (char *)midbuf; cptr = (char *)midbuf;
@ -220,7 +220,7 @@ int32_t dedxtfilter(int32_t fil, const texcachepicture *pict, char *pic, void *m
} }
//index_4x4: //index_4x4:
if (dedxt_handle_io(fil, (pict->size/stride)*4, midbuf, pict->size, packbuf, ispacked)) if (dedxt_handle_io(fil, tabledivide32(pict->size, stride)*4, midbuf, pict->size, packbuf, ispacked))
return -1; return -1;
cptr = (char *)midbuf; cptr = (char *)midbuf;

View file

@ -144,7 +144,8 @@ static char voxlock[MAXVOXELS][MAXVOXMIPS];
int32_t voxscale[MAXVOXELS]; int32_t voxscale[MAXVOXELS];
static int32_t ggxinc[MAXXSIZ+1], ggyinc[MAXXSIZ+1]; static int32_t ggxinc[MAXXSIZ+1], ggyinc[MAXXSIZ+1];
static int32_t lowrecip[1024], nytooclose, nytoofar; static int32_t lowrecip[1024], nytooclose;
static const int32_t nytoofar = 65536*16384-1048576;
static uint32_t distrecip[65536+256]; static uint32_t distrecip[65536+256];
static int32_t *lookups = NULL; static int32_t *lookups = NULL;
@ -3549,7 +3550,7 @@ static int32_t setup_globals_cf1(const sectortype *sec, int32_t pal, int32_t zd,
j = sec->wallptr; j = sec->wallptr;
ox = wall[wall[j].point2].x - wall[j].x; ox = wall[wall[j].point2].x - wall[j].x;
oy = wall[wall[j].point2].y - wall[j].y; oy = wall[wall[j].point2].y - wall[j].y;
i = nsqrtasm(uhypsq(ox,oy)); if (i == 0) i = 1024; else i = 1048576/i; i = nsqrtasm(uhypsq(ox,oy)); if (i == 0) i = 1024; else i = tabledivide32(1048576, i);
globalx1 = mulscale10(dmulscale10(ox,singlobalang,-oy,cosglobalang),i); globalx1 = mulscale10(dmulscale10(ox,singlobalang,-oy,cosglobalang),i);
globaly1 = mulscale10(dmulscale10(ox,cosglobalang,oy,singlobalang),i); globaly1 = mulscale10(dmulscale10(ox,cosglobalang,oy,singlobalang),i);
globalx2 = -globalx1; globalx2 = -globalx1;
@ -4110,6 +4111,7 @@ static void transmaskwallscan(int32_t x1, int32_t x2, int32_t saturatevplc)
#endif #endif
// cntup16>>16 iterations // cntup16>>16 iterations
static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, char *p) static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, char *p)
{ {
char ch; char ch;
@ -4126,7 +4128,7 @@ static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_
for (cntup16>>=16; cntup16>0; cntup16--) for (cntup16>>=16; cntup16>0; cntup16--)
{ {
ch = buf[(bx/xdiv)*yspan + by/ydiv]; ch = buf[(divideu32(bx, xdiv))*yspan + divideu32(by, ydiv)];
if (ch != 255) *p = pal[ch]; if (ch != 255) *p = pal[ch];
bx += xinc; bx += xinc;
@ -4155,7 +4157,7 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_
{ {
for (cntup16>>=16; cntup16>0; cntup16--) for (cntup16>>=16; cntup16>0; cntup16--)
{ {
ch = buf[(bx/xdiv)*yspan + by/ydiv]; ch = buf[divideu32(bx, xdiv)*yspan + divideu32(by, ydiv)];
if (ch != 255) *p = trans[(*p)|(pal[ch]<<8)]; if (ch != 255) *p = trans[(*p)|(pal[ch]<<8)];
bx += xinc; bx += xinc;
by += yinc; by += yinc;
@ -4166,7 +4168,7 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_
{ {
for (cntup16>>=16; cntup16>0; cntup16--) for (cntup16>>=16; cntup16>0; cntup16--)
{ {
ch = buf[(bx/xdiv)*yspan + by/ydiv]; ch = buf[divideu32(bx, xdiv)*yspan + divideu32(by, ydiv)];
if (ch != 255) *p = trans[((*p)<<8)|pal[ch]]; if (ch != 255) *p = trans[((*p)<<8)|pal[ch]];
bx += xinc; bx += xinc;
by += yinc; by += yinc;
@ -4287,14 +4289,8 @@ static void tslopevlin(uint8_t *p, int32_t i, const intptr_t *slopalptr, int32_t
v = by + ytov*i; v = by + ytov*i;
ch = *(uint8_t *)(slopalptr[0] + buf[((u>>(32-logx))<<logy)+(v>>(32-logy))]); ch = *(uint8_t *)(slopalptr[0] + buf[((u>>(32-logx))<<logy)+(v>>(32-logy))]);
if (transmode) if (ch != 255)
{ *p = trans[transmode ? *p|(pal[ch]<<8) : (*p<<8)|pal[ch]];
if (ch != 255) *p = trans[*p|(pal[ch]<<8)];
}
else
{
if (ch != 255) *p = trans[(*p<<8)|pal[ch]];
}
slopalptr--; slopalptr--;
p += pinc; p += pinc;
@ -5314,8 +5310,8 @@ static void drawvox(int32_t dasprx, int32_t daspry, int32_t dasprz, int32_t dasp
daxscale = scale(daxscale,xdimenscale,xdimen<<8); daxscale = scale(daxscale,xdimenscale,xdimen<<8);
dayscale = scale(dayscale,mulscale16(xdimenscale,viewingrangerecip),xdimen<<8); dayscale = scale(dayscale,mulscale16(xdimenscale,viewingrangerecip),xdimen<<8);
daxscalerecip = (1<<30)/daxscale; daxscalerecip = tabledivide32_noinline(1<<30, daxscale);
dayscalerecip = (1<<30)/dayscale; dayscalerecip = tabledivide32_noinline(1<<30, dayscale);
longptr = (int32_t *)davoxptr; longptr = (int32_t *)davoxptr;
daxsiz = B_LITTLE32(longptr[0]); daysiz = B_LITTLE32(longptr[1]); //dazsiz = B_LITTLE32(longptr[2]); daxsiz = B_LITTLE32(longptr[0]); daysiz = B_LITTLE32(longptr[1]); //dazsiz = B_LITTLE32(longptr[2]);
@ -5772,7 +5768,22 @@ draw_as_face_sprite:
if ((cstat&8) > 0) if ((cstat&8) > 0)
swaplong(&y1, &y2); swaplong(&y1, &y2);
for (x=lx; x<=rx; x++) x = lx;
#ifdef CLASSIC_SLICE_BY_4
for (; x<=rx-4; x+=4)
{
uwall[x] = max(startumost[x+windowx1]-windowy1, (int16_t) startum);
uwall[x+1] = max(startumost[x+windowx1+1]-windowy1, (int16_t) startum);
uwall[x+2] = max(startumost[x+windowx1+2]-windowy1, (int16_t) startum);
uwall[x+3] = max(startumost[x+windowx1+3]-windowy1, (int16_t) startum);
dwall[x] = min(startdmost[x+windowx1]-windowy1, (int16_t) startdm);
dwall[x+1] = min(startdmost[x+windowx1+1]-windowy1, (int16_t) startdm);
dwall[x+2] = min(startdmost[x+windowx1+2]-windowy1, (int16_t) startdm);
dwall[x+3] = min(startdmost[x+windowx1+3]-windowy1, (int16_t) startdm);
}
#endif
for (; x<=rx; x++)
{ {
uwall[x] = max(startumost[x+windowx1]-windowy1,(int16_t)startum); uwall[x] = max(startumost[x+windowx1]-windowy1,(int16_t)startum);
dwall[x] = min(startdmost[x+windowx1]-windowy1,(int16_t)startdm); dwall[x] = min(startdmost[x+windowx1]-windowy1,(int16_t)startdm);
@ -5801,13 +5812,31 @@ draw_as_face_sprite:
break; break;
case 1: case 1:
k = smoststart[i] - xb1[j]; k = smoststart[i] - xb1[j];
for (x=dalx2; x<=darx2; x++) x = dalx2;
#ifdef CLASSIC_SLICE_BY_4 // ok, this one is really by 2 ;)
for (x=dalx2; x<=darx2-2; x+=2)
{
if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x];
if (smost[k+x+1] > uwall[x+1]) uwall[x+1] = smost[k+x+1];
}
#endif
for (; x<=darx2; x++)
if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x]; if (smost[k+x] > uwall[x]) uwall[x] = smost[k+x];
if ((dalx2 == lx) && (darx2 == rx)) daclip |= 1; if ((dalx2 == lx) && (darx2 == rx)) daclip |= 1;
break; break;
case 2: case 2:
k = smoststart[i] - xb1[j]; k = smoststart[i] - xb1[j];
for (x=dalx2; x<=darx2; x++) x = dalx2;
#ifdef CLASSIC_SLICE_BY_4
for (; x<=darx2-4; x+=4)
{
if (smost[k+x] < dwall[x]) dwall[x] = smost[k+x];
if (smost[k+x+1] < dwall[x+1]) dwall[x+1] = smost[k+x+1];
if (smost[k+x+2] < dwall[x+2]) dwall[x+2] = smost[k+x+2];
if (smost[k+x+3] < dwall[x+3]) dwall[x+3] = smost[k+x+3];
}
#endif
for (; x<=darx2; x++)
if (smost[k+x] < dwall[x]) dwall[x] = smost[k+x]; if (smost[k+x] < dwall[x]) dwall[x] = smost[k+x];
if ((dalx2 == lx) && (darx2 == rx)) daclip |= 2; if ((dalx2 == lx) && (darx2 == rx)) daclip |= 2;
break; break;
@ -7842,11 +7871,40 @@ static void dosetaspect(void)
oxyaspect = xyaspect; oxyaspect = xyaspect;
j = xyaspect*320; j = xyaspect*320;
horizlookup2[horizycent-1] = divscale26(131072,j); horizlookup2[horizycent-1] = divscale26(131072,j);
for (i=ydim*4-1; i>=0; i--) for (i=0; i < horizycent-1-4; i += 4)
if (i != (horizycent-1))
{ {
horizlookup[i] = divscale28(1,i-(horizycent-1)); horizlookup[i] = divscale28(1, i -(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]),j); horizlookup[i+1] = divscale28(1, i+1-(horizycent-1));
horizlookup[i+2] = divscale28(1, i+2-(horizycent-1));
horizlookup[i+3] = divscale28(1, i+3-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
horizlookup2[i+1] = divscale14(klabs(horizlookup[i+1]), j);
horizlookup2[i+2] = divscale14(klabs(horizlookup[i+2]), j);
horizlookup2[i+3] = divscale14(klabs(horizlookup[i+3]), j);
}
for (; i < horizycent-1; i++)
{
horizlookup[i] = divscale28(1, i-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
}
for (i=horizycent; i < ydim*4-1-4; i += 4)
{
horizlookup[i] = divscale28(1, i -(horizycent-1));
horizlookup[i+1] = divscale28(1, i+1-(horizycent-1));
horizlookup[i+2] = divscale28(1, i+2-(horizycent-1));
horizlookup[i+3] = divscale28(1, i+3-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
horizlookup2[i+1] = divscale14(klabs(horizlookup[i+1]), j);
horizlookup2[i+2] = divscale14(klabs(horizlookup[i+2]), j);
horizlookup2[i+3] = divscale14(klabs(horizlookup[i+3]), j);
}
for (; i < ydim*4-1; i++)
{
horizlookup[i] = divscale28(1, i-(horizycent-1));
horizlookup2[i] = divscale14(klabs(horizlookup[i]), j);
} }
} }
@ -7856,7 +7914,6 @@ static void dosetaspect(void)
no_radarang2 = 0; no_radarang2 = 0;
oviewingrange = viewingrange; oviewingrange = viewingrange;
oxdimen = xdimen;
xinc = mulscale32(viewingrange*320,xdimenrecip); xinc = mulscale32(viewingrange*320,xdimenrecip);
x = (640<<16)-mulscale1(xinc,xdimen); x = (640<<16)-mulscale1(xinc,xdimen);
@ -7880,15 +7937,28 @@ static void dosetaspect(void)
radarang2[i] = (int16_t)((radarang[k]+j)>>6); radarang2[i] = (int16_t)((radarang[k]+j)>>6);
} }
if (xdimen != oxdimen)
{ {
EDUKE32_STATIC_ASSERT((uint64_t) MAXXDIM*(ARRAY_SIZE(distrecip)-1) <= INT32_MAX); EDUKE32_STATIC_ASSERT((uint64_t) MAXXDIM*(ARRAY_SIZE(distrecip)-1) <= INT32_MAX);
for (i=1; i<(int32_t) ARRAY_SIZE(distrecip); i++) i = 1;
#ifdef CLASSIC_SLICE_BY_4
for (; i<(int32_t) ARRAY_SIZE(distrecip)-4; i+=4)
{
distrecip[i] = (xdimen * i)>>20; distrecip[i] = (xdimen * i)>>20;
distrecip[i+1] = (xdimen * (i+1))>>20;
distrecip[i+2] = (xdimen * (i+2))>>20;
distrecip[i+3] = (xdimen * (i+3))>>20;
} }
#endif
for (; i<(int32_t) ARRAY_SIZE(distrecip); i++)
distrecip[i] = (xdimen * i)>>20;
nytooclose = xdimen*2100; nytooclose = xdimen*2100;
nytoofar = 65536*16384-1048576; }
oxdimen = xdimen;
} }
} }
@ -7920,9 +7990,19 @@ static int32_t loadtables(void)
if (tablesloaded == 0) if (tablesloaded == 0)
{ {
int32_t i; int32_t i;
libdivide_s64_t d;
libdivide_s32_t d32;
initksqrt(); initksqrt();
for (i=1; i<DIVTABLESIZE; i++)
{
d = libdivide_s64_gen(i);
divtable64[i].magic = d.magic, divtable64[i].more = d.more;
d32 = libdivide_s32_gen(i);
divtable32[i].magic = d32.magic, divtable32[i].more = d32.more;
}
for (i=0; i<2048; i++) for (i=0; i<2048; i++)
reciptable[i] = divscale30(2048, i+2048); reciptable[i] = divscale30(2048, i+2048);
@ -9569,8 +9649,8 @@ killsprite:
p1eq = equation(pos.x, pos.y, dot.x, dot.y); p1eq = equation(pos.x, pos.y, dot.x, dot.y);
p2eq = equation(pos.x, pos.y, dot2.x, dot2.y); p2eq = equation(pos.x, pos.y, dot2.x, dot2.y);
middle.x = (dot.x + dot2.x) / 2; middle.x = (dot.x + dot2.x) * .5f;
middle.y = (dot.y + dot2.y) / 2; middle.y = (dot.y + dot2.y) * .5f;
i = spritesortcnt; i = spritesortcnt;
while (i) while (i)
@ -9963,11 +10043,11 @@ void drawmapview(int32_t dax, int32_t day, int32_t zoome, int16_t ang)
//relative alignment stuff //relative alignment stuff
ox = x2-x1; oy = y2-y1; ox = x2-x1; oy = y2-y1;
i = ox*ox+oy*oy; if (i == 0) continue; i = (65536*16384)/i; i = ox*ox+oy*oy; if (i == 0) continue; i = tabledivide32_noinline(65536*16384, i);
globalx1 = mulscale10(dmulscale10(ox,bakgxvect,oy,bakgyvect),i); globalx1 = mulscale10(dmulscale10(ox,bakgxvect,oy,bakgyvect),i);
globaly1 = mulscale10(dmulscale10(ox,bakgyvect,-oy,bakgxvect),i); globaly1 = mulscale10(dmulscale10(ox,bakgyvect,-oy,bakgxvect),i);
ox = y1-y4; oy = x4-x1; ox = y1-y4; oy = x4-x1;
i = ox*ox+oy*oy; if (i == 0) continue; i = (65536*16384)/i; i = ox*ox+oy*oy; if (i == 0) continue; i = tabledivide32_noinline(65536*16384, i);
globalx2 = mulscale10(dmulscale10(ox,bakgxvect,oy,bakgyvect),i); globalx2 = mulscale10(dmulscale10(ox,bakgxvect,oy,bakgyvect),i);
globaly2 = mulscale10(dmulscale10(ox,bakgyvect,-oy,bakgxvect),i); globaly2 = mulscale10(dmulscale10(ox,bakgyvect,-oy,bakgxvect),i);
@ -13170,14 +13250,14 @@ static int32_t clipsprite_try(const spritetype *spr, int32_t xmin, int32_t ymin,
if ((spr->cstat&48)!=32) // face/wall sprite if ((spr->cstat&48)!=32) // face/wall sprite
{ {
int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT; int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT;
maxcorrection = (maxcorrection * (int32_t)spr->xrepeat)/tempint1; maxcorrection = tabledivide32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1);
} }
else // floor sprite else // floor sprite
{ {
int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT; int32_t tempint1 = clipmapinfo.sector[k].CM_XREPEAT;
int32_t tempint2 = clipmapinfo.sector[k].CM_YREPEAT; int32_t tempint2 = clipmapinfo.sector[k].CM_YREPEAT;
maxcorrection = max((maxcorrection * (int32_t)spr->xrepeat)/tempint1, maxcorrection = max(tabledivide32_noinline(maxcorrection * (int32_t)spr->xrepeat, tempint1),
(maxcorrection * (int32_t)spr->yrepeat)/tempint2); tabledivide32_noinline(maxcorrection * (int32_t)spr->yrepeat, tempint2));
} }
maxcorrection -= MAXCLIPDIST; maxcorrection -= MAXCLIPDIST;
@ -15140,9 +15220,9 @@ void clearview(int32_t dacol)
{ {
palette_t p = getpal(dacol); palette_t p = getpal(dacol);
bglClearColor(((float)p.r)/255.0, bglClearColor((float)p.r * (1.f/255.f),
((float)p.g)/255.0, (float)p.g * (1.f/255.f),
((float)p.b)/255.0, (float)p.b * (1.f/255.f),
0); 0);
bglClear(GL_COLOR_BUFFER_BIT); bglClear(GL_COLOR_BUFFER_BIT);
return; return;
@ -15179,9 +15259,9 @@ void clearallviews(int32_t dacol)
palette_t p = getpal(dacol); palette_t p = getpal(dacol);
bglViewport(0,0,xdim,ydim); glox1 = -1; bglViewport(0,0,xdim,ydim); glox1 = -1;
bglClearColor(((float)p.r)/255.0, bglClearColor((float)p.r * (1.f/255.f),
((float)p.g)/255.0, (float)p.g * (1.f/255.f),
((float)p.b)/255.0, (float)p.b * (1.f/255.f),
0); 0);
bglClear(GL_COLOR_BUFFER_BIT); bglClear(GL_COLOR_BUFFER_BIT);
return; return;
@ -15740,8 +15820,8 @@ void drawline256(int32_t x1, int32_t y1, int32_t x2, int32_t y2, char col)
//bglEnable(GL_BLEND); // When using line antialiasing, this is needed //bglEnable(GL_BLEND); // When using line antialiasing, this is needed
bglBegin(GL_LINES); bglBegin(GL_LINES);
bglColor4ub(p.r,p.g,p.b,255); bglColor4ub(p.r,p.g,p.b,255);
bglVertex2f((float)x1/4096.0,(float)y1/4096.0); bglVertex2f((float)x1 * (1.f/4096.f), (float)y1 * (1.f/4096.f));
bglVertex2f((float)x2/4096.0,(float)y2/4096.0); bglVertex2f((float)x2 * (1.f/4096.f), (float)y2 * (1.f/4096.f));
bglEnd(); bglEnd();
//bglDisable(GL_BLEND); //bglDisable(GL_BLEND);

View file

@ -37,6 +37,7 @@ credits.
#include <sys/stat.h> #include <sys/stat.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "pragmas.h"
#if defined(__POWERPC__) || defined(GEKKO) #if defined(__POWERPC__) || defined(GEKKO)
#define BIGENDIAN 1 #define BIGENDIAN 1
@ -820,14 +821,14 @@ static int32_t kpngrend(const char *kfilebuf, int32_t kfilength,
//Save code by making grayscale look like a palette color scheme //Save code by making grayscale look like a palette color scheme
if ((!kcoltype) || (kcoltype == 4)) if ((!kcoltype) || (kcoltype == 4))
{ {
j = 0xff000000; k = (255 / ((1<<bitdepth)-1))*0x10101; j = 0xff000000; k = (tabledivide32(255, ((1<<bitdepth)-1)))*0x10101;
paleng = (1<<bitdepth); paleng = (1<<bitdepth);
for (i=0; i<paleng; i++,j+=k) palcol[i] = LSWAPIB(j); for (i=0; i<paleng; i++,j+=k) palcol[i] = LSWAPIB(j);
} }
} }
else if (i == (int32_t)LSWAPIB(0x45544c50)) //PLTE (must be before IDAT) else if (i == (int32_t)LSWAPIB(0x45544c50)) //PLTE (must be before IDAT)
{ {
paleng = leng/3; paleng = tabledivide32(leng, 3);
for (i=paleng-1; i>=0; i--) palcol[i] = LSWAPIB((LSWAPIL(*(int32_t *)&filptr[i*3])>>8)|0xff000000); for (i=paleng-1; i>=0; i--) palcol[i] = LSWAPIB((LSWAPIL(*(int32_t *)&filptr[i*3])>>8)|0xff000000);
} }
else if (i == (int32_t)LSWAPIB(0x44474b62)) //bKGD (must be after PLTE and before IDAT) else if (i == (int32_t)LSWAPIB(0x44474b62)) //bKGD (must be after PLTE and before IDAT)
@ -835,7 +836,7 @@ static int32_t kpngrend(const char *kfilebuf, int32_t kfilength,
switch (kcoltype) switch (kcoltype)
{ {
case 0: case 4: case 0: case 4:
bakcol = (((int32_t)filptr[0]<<8)+(int32_t)filptr[1])*255/((1<<bitdepth)-1); bakcol = (((int32_t)filptr[0]<<8)+(int32_t)filptr[1])*tabledivide32(255, ((1<<bitdepth)-1));
bakcol = bakcol*0x10101+0xff000000; break; bakcol = bakcol*0x10101+0xff000000; break;
case 2: case 6: case 2: case 6:
if (bitdepth == 8) if (bitdepth == 8)
@ -843,7 +844,7 @@ static int32_t kpngrend(const char *kfilebuf, int32_t kfilength,
else else
{ {
for (i=0,bakcol=0xff000000; i<3; i++) for (i=0,bakcol=0xff000000; i<3; i++)
bakcol += ((((((int32_t)filptr[i<<1])<<8)+((int32_t)filptr[(i<<1)+1]))/257)<<(16-(i<<3))); bakcol += tabledivide32(((((int32_t)filptr[i<<1])<<8)+((int32_t)filptr[(i<<1)+1])), 257)<<(16-(i<<3));
} }
break; break;
case 3: case 3:
@ -2202,7 +2203,7 @@ static int32_t kddsrend(const char *buf, int32_t leng,
if (!(dxt&1)) if (!(dxt&1))
{ {
for (z=256-1; z>0; z--) lut[z] = (255<<16)/z; for (z=256-1; z>0; z--) lut[z] = tabledivide32_noinline(255<<16, z);
lut[0] = (1<<16); lut[0] = (1<<16);
} }
if (dxt == 1) stride = (xsiz<<1); else stride = (xsiz<<2); if (dxt == 1) stride = (xsiz<<1); else stride = (xsiz<<2);
@ -2419,7 +2420,7 @@ int32_t kprender(const char *buf, int32_t leng, intptr_t frameptr, int32_t bpl,
extern char toupperlookup[256]; extern char toupperlookup[256];
static int32_t wildmatch(const char *match, const char *wild) int32_t wildmatch(const char *match, const char *wild)
{ {
do do
{ {
@ -2428,13 +2429,13 @@ static int32_t wildmatch(const char *match, const char *wild)
wild++, match++; wild++, match++;
continue; continue;
} }
else if (*match + *wild == '\0') else if ((*match|*wild) == '\0')
return 1; return 1;
else if (*wild == '*') else if (*wild == '*')
{ {
while (*wild == '*') wild++; while (*wild == '*') wild++;
if (*wild == '\0') return 1; if (*wild == '\0') return 1;
while (toupperlookup[*match] != toupperlookup[*wild] && *match) match++; while (*match && toupperlookup[*match] != toupperlookup[*wild]) match++;
if (toupperlookup[*match] == toupperlookup[*wild]) if (toupperlookup[*match] == toupperlookup[*wild])
continue; continue;
} }

View file

@ -1004,10 +1004,7 @@ void updateanimation(md2model_t *m, const spritetype *tspr, uint8_t lpal)
return; return;
} }
if (smooth->mdsmooth) // VERIFY: (smooth->mdsmooth) implies (tile2model[tile].smoothduration!=0) ? fps = smooth->mdsmooth ? Blrintf((1.0f / (float) (tile2model[tile].smoothduration)) * 66.f) : anim->fpssc;
ftol((1.0f / (float)(tile2model[tile].smoothduration)) * 66.f, &fps);
else
fps = anim->fpssc;
i = (mdtims - sprext->mdanimtims)*((fps*timerticspersec)/120); i = (mdtims - sprext->mdanimtims)*((fps*timerticspersec)/120);

View file

@ -1459,7 +1459,7 @@ void OSD_Draw(void)
while (j > -1) while (j > -1)
{ {
osdrowscur++; osdrowscur++;
j -= 200/osd->draw.rows; j -= tabledivide32_noinline(200, osd->draw.rows);
if (osdrowscur > osd->draw.rows-1) if (osdrowscur > osd->draw.rows-1)
break; break;
} }
@ -1470,7 +1470,7 @@ void OSD_Draw(void)
while (j > -1) while (j > -1)
{ {
osdrowscur--; osdrowscur--;
j -= 200/osd->draw.rows; j -= tabledivide32_noinline(200, osd->draw.rows);
if (osdrowscur < 1) if (osdrowscur < 1)
break; break;
} }

View file

@ -2830,9 +2830,7 @@ static float calc_ypancoef(char curypanning, int16_t curpicnum, int32_t dopancor
if (dopancor) if (dopancor)
{ {
int32_t yoffs; int32_t yoffs = Blrintf((ypancoef - tilesiz[curpicnum].y) * (255.0f / ypancoef));
ftol((ypancoef - tilesiz[curpicnum].y) * (255.0f / ypancoef), &yoffs);
if (curypanning > 256 - yoffs) if (curypanning > 256 - yoffs)
curypanning -= yoffs; curypanning -= yoffs;
} }

View file

@ -558,8 +558,9 @@ static inline void fogcalc(int32_t tile, int32_t shade, int32_t vis, int32_t pal
} }
else else
{ {
fogresult = (r_usenewshading == 3 && shade > 0) ? 0 : -(FOGDISTCONST * shade)/combvis; combvis = 1.f/combvis;
fogresult2 = (FOGDISTCONST * (numshades-1-shade))/combvis; fogresult = (r_usenewshading == 3 && shade > 0) ? 0 : -(FOGDISTCONST * shade) * combvis;
fogresult2 = (FOGDISTCONST * (numshades-1-shade)) * combvis;
} }
} }
} }
@ -663,7 +664,7 @@ static void resizeglcheck(void)
if ((glox1 != windowx1) || (gloy1 != windowy1) || (glox2 != windowx2) || (gloy2 != windowy2)) if ((glox1 != windowx1) || (gloy1 != windowy1) || (glox2 != windowx2) || (gloy2 != windowy2))
{ {
const int32_t ourxdimen = (windowx2-windowx1+1); const int32_t ourxdimen = (windowx2-windowx1+1);
const float ratio = get_projhack_ratio(); float ratio = get_projhack_ratio();
const int32_t fovcorrect = (ratio==0) ? 0 : (int32_t)(ourxdimen*ratio - ourxdimen); const int32_t fovcorrect = (ratio==0) ? 0 : (int32_t)(ourxdimen*ratio - ourxdimen);
float m[4][4]; float m[4][4];
@ -675,9 +676,10 @@ static void resizeglcheck(void)
bglMatrixMode(GL_PROJECTION); bglMatrixMode(GL_PROJECTION);
memset(m,0,sizeof(m)); memset(m,0,sizeof(m));
m[0][0] = fydimen / ratio; m[0][2] = 1.f; ratio = 1.f/ratio;
m[0][0] = fydimen * ratio; m[0][2] = 1.f;
m[1][1] = fxdimen; m[1][2] = 1.f; m[1][1] = fxdimen; m[1][2] = 1.f;
m[2][2] = 1.f; m[2][3] = fydimen / ratio; m[2][2] = 1.f; m[2][3] = fydimen * ratio;
m[3][2] =-1.f; m[3][2] =-1.f;
bglLoadMatrixf(&m[0][0]); bglLoadMatrixf(&m[0][0]);
@ -2380,9 +2382,7 @@ static void calc_ypanning(int32_t refposz, float ryp0, float ryp1,
{ {
// Carry out panning "correction" to make it look like classic in some // Carry out panning "correction" to make it look like classic in some
// cases, but failing in the general case. // cases, but failing in the general case.
int32_t yoffs; int32_t yoffs = Blrintf((i-tilesiz[globalpicnum].y)*(255.f/i));
ftol((i-tilesiz[globalpicnum].y)*(255.f/i), &yoffs);
if (ypan > 256-yoffs) if (ypan > 256-yoffs)
ypan -= yoffs; ypan -= yoffs;
@ -2543,11 +2543,11 @@ static void polymost_drawalls(int32_t bunch)
else domost(x0,fy0,x1,fy1); else domost(x0,fy0,x1,fy1);
if (r_parallaxskypanning) if (r_parallaxskypanning)
vv[0] += dd[0]*((float)sec->floorypanning)*((float)i)/256.0; vv[0] += dd[0]*((float)sec->floorypanning)*((float)i)*(1.f/256.f);
gdx = 0; gdy = 0; gdo = dd[0]; gdx = 0; gdy = 0; gdo = dd[0];
gux = gdo * gux = gdo *
(t * (float) ((uint64_t) (xdimscale * yxaspect) * viewingrange)) / (16384.0*65536.0*65536.0*5.0*1024.0); (t * (float) ((uint64_t) (xdimscale * yxaspect) * viewingrange)) * (1.f/(16384.0*65536.0*65536.0*5.0*1024.0));
guy = 0; //guo calculated later guy = 0; //guo calculated later
gvx = 0; gvy = vv[1]; gvo = vv[0]; gvx = 0; gvy = vv[1]; gvo = vv[0];
@ -2559,7 +2559,7 @@ static void polymost_drawalls(int32_t bunch)
do do
{ {
globalpicnum = dapskyoff[y&((1<<dapskybits)-1)]+i; globalpicnum = dapskyoff[y&((1<<dapskybits)-1)]+i;
guo = gdo*(t*((float)(globalang-(y<<(11-dapskybits))))/2048.0 + (float)((r_parallaxskypanning)?sec->floorxpanning:0)) - gux*ghalfx; guo = gdo*(t*((float)(globalang-(y<<(11-dapskybits)))) * (1.f/2048.f) + (float)((r_parallaxskypanning)?sec->floorxpanning:0)) - gux*ghalfx;
y++; y++;
ox = fx; fx = ((float)((y<<(11-dapskybits))-globalang))*oz+ghalfx; ox = fx; fx = ((float)((y<<(11-dapskybits))-globalang))*oz+ghalfx;
if (fx > x1) { fx = x1; i = -1; } if (fx > x1) { fx = x1; i = -1; }
@ -2798,7 +2798,7 @@ static void polymost_drawalls(int32_t bunch)
i = (1<<(picsiz[globalpicnum]>>4)); if (i != tilesiz[globalpicnum].y) i += i; i = (1<<(picsiz[globalpicnum]>>4)); if (i != tilesiz[globalpicnum].y) i += i;
//Hack to draw black rectangle below sky when looking down... //Hack to draw black rectangle below sky when looking down...
gdx = 0; gdy = gxyaspect / -262144.f; gdo = -ghoriz*gdy; gdx = 0; gdy = gxyaspect * (1.f/-262144.f); gdo = -ghoriz*gdy;
gux = 0; guy = 0; guo = 0; gux = 0; guy = 0; guo = 0;
gvx = 0; gvy = 0; gvo = 0; gvx = 0; gvy = 0; gvo = 0;
oy = -vv[0]/vv[1]; oy = -vv[0]/vv[1];
@ -2819,7 +2819,7 @@ static void polymost_drawalls(int32_t bunch)
else domost(x1,cy1,x0,cy0); else domost(x1,cy1,x0,cy0);
if (r_parallaxskypanning) if (r_parallaxskypanning)
vv[0] += dd[0]*((float)sec->ceilingypanning)*((float)i)/256.f; vv[0] += dd[0]*(float)sec->ceilingypanning*(float)i/256.f;
gdx = 0; gdy = 0; gdo = dd[0]; gdx = 0; gdy = 0; gdo = dd[0];
gux = gdo * gux = gdo *
@ -2835,7 +2835,7 @@ static void polymost_drawalls(int32_t bunch)
do do
{ {
globalpicnum = dapskyoff[y&((1<<dapskybits)-1)]+i; globalpicnum = dapskyoff[y&((1<<dapskybits)-1)]+i;
guo = gdo*(t*((float)(globalang-(y<<(11-dapskybits))))/2048.0 + (float)((r_parallaxskypanning)?sec->ceilingxpanning:0)) - gux*ghalfx; guo = gdo*(t*((float)(globalang-(y<<(11-dapskybits)))) * 1.f/2048.f + (float)((r_parallaxskypanning)?sec->ceilingxpanning:0)) - gux*ghalfx;
y++; y++;
ox = fx; fx = ((float)((y<<(11-dapskybits))-globalang))*oz+ghalfx; ox = fx; fx = ((float)((y<<(11-dapskybits))-globalang))*oz+ghalfx;
if (fx > x1) { fx = x1; i = -1; } if (fx > x1) { fx = x1; i = -1; }

View file

@ -7,167 +7,19 @@
// inline versions. I'll eventually convert these to macro-inline // inline versions. I'll eventually convert these to macro-inline
// equivalents. --Jonathon // equivalents. --Jonathon
//#include "pragmas.h"
#include "compat.h" #include "compat.h"
#include "pragmas.h"
libdivide_s64pad_t divtable64[DIVTABLESIZE];
libdivide_s32pad_t divtable32[DIVTABLESIZE];
uint32_t divideu32_noinline(uint32_t n, uint32_t d) { return divideu32(n, d); }
int32_t tabledivide32_noinline(int32_t n, int32_t d) { return tabledivide32(n, d); }
int32_t tabledivide64_noinline(int64_t n, int32_t d) { return tabledivide64(n, d); }
int32_t dmval; int32_t dmval;
#if defined(__GNUC__) && defined(GEKKO) #if defined(__GNUC__) && defined(__i386__) && !defined(NOASM) // NOASM
// naked function (no prolog/epilog)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wreturn-type"
int32_t scale(int32_t a, int32_t d, int32_t c) ATTRIBUTE((naked));
int32_t scale(int32_t a, int32_t d, int32_t c)
{
// return ((int64_t)a * d) / c;
__asm__ __volatile__ (
" mullw 6, 3, 4\n"
" mulhw 4, 3, 4\n"
" mr 3, 6\n"
" srawi. 0, 5, 31\n"
" cmpwi cr1, 4, 0\n"
" crxor 7, 0, 4\n"
" xor 5, 0, 5\n"
" subf. 5, 0, 5\n"
" beq DivByZero\n"
" bge cr1, Div64Common\n"
" subfic 3, 3, 0\n"
" subfze 4, 4\n"
"Div64Common:\n"
" cmplw 4, 5\n"
" cntlzw 6, 5\n"
" xor 4, 4, 3\n"
" slw 5, 5, 6\n"
" rotlw 4, 4, 6\n"
" slw 3, 3, 6\n"
" li 7, 2\n"
" xor 4, 4, 3\n"
" bge DivOverflow\n"
" mtctr 7\n"
"Div64Compute:\n"
" srwi 6, 5, 16\n"
" divwu 7, 4, 6\n"
" mullw 6, 7, 6\n"
" subf 4, 6, 4\n"
" slwi 4, 4, 16\n"
" inslwi 4, 3, 16, 16\n"
" slwi 3, 3, 16\n"
" clrlwi 6, 5, 16\n"
" mullw 6, 7, 6\n"
" subfc 4, 6, 4\n"
" subfe. 6, 6, 6\n"
" add 3, 3, 7\n"
" bge Div64Done\n"
"Div64Correct:\n"
" addc 4, 4, 5\n"
" addze. 6, 6\n"
" subi 3, 3, 1\n"
" blt Div64Correct\n"
"Div64Done:\n"
" bdnz Div64Compute\n"
" cmpwi 3, 0\n"
" bso cr1, Div64QuotientNeg\n"
" blt DivOverflow\n"
" blr\n"
"Div64QuotientNeg:\n"
" neg. 3, 3\n"
" blelr\n"
"DivOverflow:\n"
" cror 4, 7, 7\n"
"DivByZero:\n"
" lis 3, 0x8000\n"
" bltlr cr1\n"
" subi 3, 3, 1\n"
" blr\n"
);
}
#pragma GCC diagnostic pop
void clearbufbyte(void *d, int32_t c, int32_t a)
{
if (a==0) {
uint8_t *dd = (uint8_t*)d;
int32_t align = (32 - (int32_t)d) & 31;
if (align && c >= align) {
uint32_t izero = 0;
double fzero = 0;
c -= align;
if (align&1) {
*dd = izero;
dd += 1;
}
if (align&2) {
*(uint16_t*)dd = izero;
dd += 2;
}
if (align&4) {
*(uint32_t*)dd = izero;
dd += 4;
}
if (align&8) {
*(double*)dd = fzero;
dd += 8;
}
if (align&16) {
*(double*)dd = fzero;
*(double*)(dd+8) = fzero;
dd += 16;
}
}
align = c >> 5;
while (align) {
__asm__ (
" dcbz 0, %0\n"
" addi %0, %0, 32\n"
: "+r"(dd)
:
: "memory"
);
align--;
}
if ((c &= 31)) {
while (c--) {
*dd++ = 0;
}
}
return;
}
__asm__ __volatile__ (
" add %1, %1, %2\n"
" neg. %2, %2\n"
" beq 2f\n"
"1:\n"
" stbx %0, %1, %2\n"
" addic. %2, %2, 1\n"
" rotrwi %0, %0, 8\n"
" bne 1b\n"
"2:\n"
: "+r"(a), "+b"(d), "+r"(c)
:
: "cc", "xer", "memory"
);
}
#elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM) // NOASM
// //
// GCC Inline Assembler version // GCC Inline Assembler version
@ -297,7 +149,158 @@ void copybufreverse(const void *S, void *D, int32_t c)
// Microsoft C Inline Assembler version // Microsoft C Inline Assembler version
// //
#else // _MSC_VER #elif defined(__GNUC__) && defined(GEKKO)
// naked function (no prolog/epilog)
// FIXME: this function produces unused parameter warnings and a missing return warning
int32_t scale(int32_t a, int32_t d, int32_t c)
{
// return ((int64_t)a * d) / c;
__asm__ __volatile__ (
" mullw 6, 3, 4\n"
" mulhw 4, 3, 4\n"
" mr 3, 6\n"
" srawi. 0, 5, 31\n"
" cmpwi cr1, 4, 0\n"
" crxor 7, 0, 4\n"
" xor 5, 0, 5\n"
" subf. 5, 0, 5\n"
" beq DivByZero\n"
" bge cr1, Div64Common\n"
" subfic 3, 3, 0\n"
" subfze 4, 4\n"
"Div64Common:\n"
" cmplw 4, 5\n"
" cntlzw 6, 5\n"
" xor 4, 4, 3\n"
" slw 5, 5, 6\n"
" rotlw 4, 4, 6\n"
" slw 3, 3, 6\n"
" li 7, 2\n"
" xor 4, 4, 3\n"
" bge DivOverflow\n"
" mtctr 7\n"
"Div64Compute:\n"
" srwi 6, 5, 16\n"
" divwu 7, 4, 6\n"
" mullw 6, 7, 6\n"
" subf 4, 6, 4\n"
" slwi 4, 4, 16\n"
" inslwi 4, 3, 16, 16\n"
" slwi 3, 3, 16\n"
" clrlwi 6, 5, 16\n"
" mullw 6, 7, 6\n"
" subfc 4, 6, 4\n"
" subfe. 6, 6, 6\n"
" add 3, 3, 7\n"
" bge Div64Done\n"
"Div64Correct:\n"
" addc 4, 4, 5\n"
" addze. 6, 6\n"
" subi 3, 3, 1\n"
" blt Div64Correct\n"
"Div64Done:\n"
" bdnz Div64Compute\n"
" cmpwi 3, 0\n"
" bso cr1, Div64QuotientNeg\n"
" blt DivOverflow\n"
" blr\n"
"Div64QuotientNeg:\n"
" neg. 3, 3\n"
" blelr\n"
"DivOverflow:\n"
" cror 4, 7, 7\n"
"DivByZero:\n"
" lis 3, 0x8000\n"
" bltlr cr1\n"
" subi 3, 3, 1\n"
" blr\n"
);
}
void clearbufbyte(void *d, int32_t c, int32_t a)
{
if (a==0) {
uint8_t *dd = (uint8_t*)d;
int32_t align = (32 - (int32_t)d) & 31;
if (align && c >= align) {
uint32_t izero = 0;
double fzero = 0;
c -= align;
if (align&1) {
*dd = izero;
dd += 1;
}
if (align&2) {
*(uint16_t*)dd = izero;
dd += 2;
}
if (align&4) {
*(uint32_t*)dd = izero;
dd += 4;
}
if (align&8) {
*(double*)dd = fzero;
dd += 8;
}
if (align&16) {
*(double*)dd = fzero;
*(double*)(dd+8) = fzero;
dd += 16;
}
}
align = c >> 5;
while (align) {
__asm__ (
" dcbz 0, %0\n"
" addi %0, %0, 32\n"
: "+r"(dd)
:
: "memory"
);
align--;
}
if ((c &= 31)) {
while (c--) {
*dd++ = 0;
}
}
return;
}
__asm__ __volatile__(
" add %1, %1, %2\n"
" neg. %2, %2\n"
" beq 2f\n"
"1:\n"
" stbx %0, %1, %2\n"
" addic. %2, %2, 1\n"
" rotrwi %0, %0, 8\n"
" bne 1b\n"
"2:\n"
: "+r"(a), "+b"(d), "+r"(c)
:
: "cc", "xer", "memory"
);
}
#else
// //
// Generic C version // Generic C version

View file

@ -127,7 +127,7 @@
<NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nmake /f Makefile.msvc veryclean all DEBUG=1 WINBITS=64</NMakeReBuildCommandLine> <NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nmake /f Makefile.msvc veryclean all DEBUG=1 WINBITS=64</NMakeReBuildCommandLine>
<NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nmake /f Makefile.msvc veryclean WINBITS=64</NMakeCleanCommandLine> <NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nmake /f Makefile.msvc veryclean WINBITS=64</NMakeCleanCommandLine>
<NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">eduke32.exe</NMakeOutput> <NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">eduke32.exe</NMakeOutput>
<NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">USE_OPENGL;POLYMER</NMakePreprocessorDefinitions> <NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">USE_OPENGL;POLYMER;NOASM</NMakePreprocessorDefinitions>
<NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath> <NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath>
<NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes> <NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes>
<NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath> <NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath>
@ -138,7 +138,7 @@
<NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nmake /f Makefile.msvc veryclean all WINBITS=64</NMakeReBuildCommandLine> <NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nmake /f Makefile.msvc veryclean all WINBITS=64</NMakeReBuildCommandLine>
<NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nmake /f Makefile.msvc veryclean WINBITS=64</NMakeCleanCommandLine> <NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nmake /f Makefile.msvc veryclean WINBITS=64</NMakeCleanCommandLine>
<NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">eduke32.exe</NMakeOutput> <NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Release|x64'">eduke32.exe</NMakeOutput>
<NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">USE_OPENGL;POLYMER</NMakePreprocessorDefinitions> <NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">USE_OPENGL;POLYMER;NOASM</NMakePreprocessorDefinitions>
<NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath> <NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath>
<NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes> <NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes>
<NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath> <NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath>
@ -172,7 +172,7 @@
<NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">nmake /f Makefile.msvc veryclean all DEBUG=1 WINBITS=64 RENDERTYPE=SDL</NMakeReBuildCommandLine> <NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">nmake /f Makefile.msvc veryclean all DEBUG=1 WINBITS=64 RENDERTYPE=SDL</NMakeReBuildCommandLine>
<NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">nmake /f Makefile.msvc veryclean WINBITS=64 RENDERTYPE=SDL</NMakeCleanCommandLine> <NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">nmake /f Makefile.msvc veryclean WINBITS=64 RENDERTYPE=SDL</NMakeCleanCommandLine>
<NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">eduke32.exe</NMakeOutput> <NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">eduke32.exe</NMakeOutput>
<NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">USE_OPENGL;POLYMER</NMakePreprocessorDefinitions> <NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">USE_OPENGL;POLYMER;NOASM</NMakePreprocessorDefinitions>
<NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath> <NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath>
<NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes> <NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes>
<NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath> <NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Debug-SDL|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath>
@ -183,7 +183,7 @@
<NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">nmake /f Makefile.msvc veryclean all WINBITS=64 RENDERTYPE=SDL</NMakeReBuildCommandLine> <NMakeReBuildCommandLine Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">nmake /f Makefile.msvc veryclean all WINBITS=64 RENDERTYPE=SDL</NMakeReBuildCommandLine>
<NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">nmake /f Makefile.msvc veryclean WINBITS=64 RENDERTYPE=SDL</NMakeCleanCommandLine> <NMakeCleanCommandLine Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">nmake /f Makefile.msvc veryclean WINBITS=64 RENDERTYPE=SDL</NMakeCleanCommandLine>
<NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">eduke32.exe</NMakeOutput> <NMakeOutput Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">eduke32.exe</NMakeOutput>
<NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">USE_OPENGL;POLYMER</NMakePreprocessorDefinitions> <NMakePreprocessorDefinitions Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">USE_OPENGL;POLYMER;NOASM</NMakePreprocessorDefinitions>
<NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath> <NMakeIncludeSearchPath Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">$(NMakeIncludeSearchPath);build\include;source\jmact;source\jaudiolib\include;source\enet\include;</NMakeIncludeSearchPath>
<NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes> <NMakeForcedIncludes Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">$(NMakeForcedIncludes)</NMakeForcedIncludes>
<NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath> <NMakeAssemblySearchPath Condition="'$(Configuration)|$(Platform)'=='Release-SDL|x64'">$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath>

View file

@ -5029,7 +5029,7 @@ ACTOR_STATIC void G_MoveMisc(void) // STATNUM 5
case NEON5__STATIC: case NEON5__STATIC:
case NEON6__STATIC: case NEON6__STATIC:
if ((g_globalRandom/(s->lotag+1)&31) > 4) s->shade = -127; if (tabledivide32_noinline(g_globalRandom, (s->lotag+1)&31) > 4) s->shade = -127;
else s->shade = 127; else s->shade = 127;
goto BOLT; goto BOLT;
@ -6300,7 +6300,7 @@ ACTOR_STATIC void G_MoveEffectors(void) //STATNUM 3
// if(t[5] > 0) { t[5]--; break; } // if(t[5] > 0) { t[5]--; break; }
if ((g_globalRandom/(sh+1)&31) < 4 && !t[2]) if (tabledivide32_noinline(g_globalRandom, (sh+1)&31) < 4 && !t[2])
{ {
// t[5] = 4+(g_globalRandom&7); // t[5] = 4+(g_globalRandom&7);
sc->ceilingpal = s->owner>>8; sc->ceilingpal = s->owner>>8;
@ -6337,7 +6337,7 @@ ACTOR_STATIC void G_MoveEffectors(void) //STATNUM 3
case SE_4_RANDOM_LIGHTS: case SE_4_RANDOM_LIGHTS:
if ((g_globalRandom/(sh+1)&31) < 4) if (tabledivide32_noinline(g_globalRandom, (sh+1)&31) < 4)
{ {
t[1] = s->shade + (g_globalRandom&15);//Got really bright t[1] = s->shade + (g_globalRandom&15);//Got really bright
t[0] = s->shade + (g_globalRandom&15); t[0] = s->shade + (g_globalRandom&15);

View file

@ -243,11 +243,11 @@ void CONTROL_Android_PollDevices(ControlInfo *info)
//LOGI("CONTROL_Android_PollDevices %f %f",forwardmove,sidemove); //LOGI("CONTROL_Android_PollDevices %f %f",forwardmove,sidemove);
//LOGI("CONTROL_Android_PollDevices %f %f",droidinput.pitch,droidinput.yaw); //LOGI("CONTROL_Android_PollDevices %f %f",droidinput.pitch,droidinput.yaw);
info->dz = (int32_t)nearbyintf(-droidinput.forwardmove * ANDROIDFORWARDMOVEFACTOR); info->dz = (int32_t)Blrintf(-droidinput.forwardmove * ANDROIDFORWARDMOVEFACTOR);
info->dx = (int32_t)nearbyintf(droidinput.sidemove * ANDROIDSIDEMOVEFACTOR); info->dx = (int32_t)Blrintf(droidinput.sidemove * ANDROIDSIDEMOVEFACTOR);
info->dpitch = (int32_t)nearbyint(droidinput.pitch * ANDROIDPITCHFACTOR + info->dpitch = (int32_t)Blrintf(droidinput.pitch * ANDROIDPITCHFACTOR +
droidinput.pitch_joystick * ANDROIDPITCHFACTORJOYSTICK); droidinput.pitch_joystick * ANDROIDPITCHFACTORJOYSTICK);
info->dyaw = (int32_t)nearbyint(-droidinput.yaw * ANDROIDYAWFACTOR - info->dyaw = (int32_t)Blrintf(-droidinput.yaw * ANDROIDYAWFACTOR -
droidinput.yaw_joystick * ANDROIDYAWFACTORJOYSTICK); droidinput.yaw_joystick * ANDROIDYAWFACTORJOYSTICK);
/* /*

View file

@ -886,7 +886,7 @@ nextdemo_nomenu:
rotatesprite(120<<16,16<<16,32768,0,SLIDEBAR,0,0,2+8+16+1024,(xdim*125)/320,0,(xdim*155)/320,ydim-1); rotatesprite(120<<16,16<<16,32768,0,SLIDEBAR,0,0,2+8+16+1024,(xdim*125)/320,0,(xdim*155)/320,ydim-1);
rotatesprite(150<<16,16<<16,32768,0,SLIDEBAR,0,0,2+8+16+1024,(xdim*155)/320,0,xdim-1,ydim-1); rotatesprite(150<<16,16<<16,32768,0,SLIDEBAR,0,0,2+8+16+1024,(xdim*155)/320,0,xdim-1,ydim-1);
j = (182<<16) - ((((120*(g_demo_totalCnt-g_demo_cnt))<<4)/g_demo_totalCnt)<<12); j = (182<<16) - (tabledivide32_noinline((120*(g_demo_totalCnt-g_demo_cnt))<<4, g_demo_totalCnt)<<12);
rotatesprite_fs(j,(16<<16)+(1<<15),32768,0,SLIDEBAR+1,0,0,2+8+16+1024); rotatesprite_fs(j,(16<<16)+(1<<15),32768,0,SLIDEBAR+1,0,0,2+8+16+1024);
j=(g_demo_totalCnt-g_demo_cnt)/REALGAMETICSPERSEC; j=(g_demo_totalCnt-g_demo_cnt)/REALGAMETICSPERSEC;

View file

@ -809,7 +809,7 @@ vec2_t G_ScreenText(const int32_t font,
{ {
size.x = xbetween; size.x = xbetween;
xbetween = (length == 1) ? 0 : ((xbetween - linewidth) / (length - 1)); xbetween = (length == 1) ? 0 : tabledivide32_noinline((xbetween - linewidth), (length - 1));
linewidth = size.x; linewidth = size.x;
} }
@ -823,7 +823,7 @@ vec2_t G_ScreenText(const int32_t font,
if (f & TEXT_YJUSTIFY) if (f & TEXT_YJUSTIFY)
{ {
const int32_t tempswap = ybetween; const int32_t tempswap = ybetween;
ybetween = (lines == 1) ? 0 : ((ybetween - size.y) / (lines - 1)); ybetween = (lines == 1) ? 0 : tabledivide32_noinline(ybetween - size.y, lines - 1);
size.y = tempswap; size.y = tempswap;
} }
@ -1001,7 +1001,7 @@ vec2_t G_ScreenText(const int32_t font,
if (f & TEXT_XJUSTIFY) if (f & TEXT_XJUSTIFY)
{ {
xbetween = (length == 1) ? 0 : ((xbetween - linewidth) / (length - 1)); xbetween = (length == 1) ? 0 : tabledivide32_noinline(xbetween - linewidth, length - 1);
linewidth = size.x; linewidth = size.x;
} }
@ -2489,7 +2489,7 @@ static void G_PrintFPS(void)
if (thisSec - LastSec) if (thisSec - LastSec)
{ {
g_currentFrameRate = LastCount = FrameCount / (thisSec - LastSec); g_currentFrameRate = LastCount = tabledivide32_noinline(FrameCount, thisSec - LastSec);
LastSec = thisSec; LastSec = thisSec;
FrameCount = 0; FrameCount = 0;
@ -3483,7 +3483,9 @@ static void palaccum_add(palaccum_t *pa, const palette_t *pal, int32_t f)
static void G_FadePalaccum(const palaccum_t *pa) static void G_FadePalaccum(const palaccum_t *pa)
{ {
setpalettefade(pa->r/pa->sumf, pa->g/pa->sumf, pa->b/pa->sumf, pa->maxf); setpalettefade(tabledivide32_noinline(pa->r, pa->sumf),
tabledivide32_noinline(pa->g, pa->sumf),
tabledivide32_noinline(pa->b, pa->sumf), pa->maxf);
} }
@ -4502,7 +4504,7 @@ void G_DrawRooms(int32_t snum, int32_t smoothratio)
else else
{ {
tmpvr = vr; tmpvr = vr;
tmpyx = (65536*ydim*8)/(xdim*5); tmpyx = tabledivide32_noinline(65536*ydim*8, xdim*5);
setaspect(mulscale16(tmpvr,viewingrange), yxaspect); setaspect(mulscale16(tmpvr,viewingrange), yxaspect);
} }
@ -4581,7 +4583,7 @@ void G_DrawRooms(int32_t snum, int32_t smoothratio)
setaspect(mulscale16(oviewingrange,i>>1), yxaspect); setaspect(mulscale16(oviewingrange,i>>1), yxaspect);
tmpvr = i>>1; tmpvr = i>>1;
tmpyx = (65536*ydim*8)/(xdim*5); tmpyx = tabledivide32_noinline(65536*ydim*8, xdim*5);
} }
} }
else if (getrendermode() >= REND_POLYMOST && (ud.screen_tilting else if (getrendermode() >= REND_POLYMOST && (ud.screen_tilting
@ -7646,7 +7648,7 @@ void G_DoSpriteAnimations(int32_t ourx, int32_t oury, int32_t oura, int32_t smoo
l = s->z-actor[g_player[p].ps->i].floorz+(3<<8); l = s->z-actor[g_player[p].ps->i].floorz+(3<<8);
// SET_SPRITE_NOT_TSPRITE // SET_SPRITE_NOT_TSPRITE
if (l > 1024 && s->yrepeat > 32 && s->extra > 0) if (l > 1024 && s->yrepeat > 32 && s->extra > 0)
s->yoffset = (int8_t)(l/(s->yrepeat<<2)); s->yoffset = (int8_t)tabledivide32_noinline(l, s->yrepeat<<2);
else s->yoffset=0; else s->yoffset=0;
} }
@ -12908,8 +12910,8 @@ void A_SpawnWallGlass(int32_t i,int32_t wallnum,int32_t n)
x1 -= ksgn(yv); x1 -= ksgn(yv);
y1 += ksgn(xv); y1 += ksgn(xv);
xv /= j; xv = tabledivide32_noinline(xv, j);
yv /= j; yv = tabledivide32_noinline(yv, j);
for (j=n; j>0; j--) for (j=n; j>0; j--)
{ {
@ -12949,8 +12951,8 @@ void A_SpawnCeilingGlass(int32_t i,int32_t sectnum,int32_t n)
x1 = wall[s].x; x1 = wall[s].x;
y1 = wall[s].y; y1 = wall[s].y;
xv = (wall[s+1].x-x1)/(n+1); xv = tabledivide32_noinline(wall[s+1].x-x1, n+1);
yv = (wall[s+1].y-y1)/(n+1); yv = tabledivide32_noinline(wall[s+1].y-y1, n+1);
for (j=n; j>0; j--) for (j=n; j>0; j--)
{ {
@ -12984,8 +12986,8 @@ void A_SpawnRandomGlass(int32_t i,int32_t wallnum,int32_t n)
x1 = wall[wallnum].x; x1 = wall[wallnum].x;
y1 = wall[wallnum].y; y1 = wall[wallnum].y;
xv = (wall[wall[wallnum].point2].x-wall[wallnum].x)/j; xv = tabledivide32_noinline(wall[wall[wallnum].point2].x-wall[wallnum].x, j);
yv = (wall[wall[wallnum].point2].y-wall[wallnum].y)/j; yv = tabledivide32_noinline(wall[wall[wallnum].point2].y-wall[wallnum].y, j);
for (j=n; j>0; j--) for (j=n; j>0; j--)
{ {

View file

@ -266,7 +266,7 @@ int32_t A_GetFurthestAngle(int32_t iActor, int32_t angs)
int32_t furthest_angle=0; int32_t furthest_angle=0;
int32_t d, j; int32_t d, j;
int32_t greatestd = INT32_MIN; int32_t greatestd = INT32_MIN;
int32_t angincs=2048/angs; int32_t angincs=tabledivide32_noinline(2048, angs);
hitdata_t hit; hitdata_t hit;
for (j=s->ang; j<(2048+s->ang); j+=angincs) for (j=s->ang; j<(2048+s->ang); j+=angincs)
@ -303,7 +303,7 @@ int32_t A_FurthestVisiblePoint(int32_t iActor, spritetype *ts, int32_t *dax, int
if ((!g_netServer && ud.multimode < 2) && ud.player_skill < 3) if ((!g_netServer && ud.multimode < 2) && ud.player_skill < 3)
angincs = 2048/2; angincs = 2048/2;
else angincs = 2048/(1+(krand()&1)); else angincs = tabledivide32_noinline(2048, 1+(krand()&1));
for (j=ts->ang; j<(2048+ts->ang); j+=(angincs-(krand()&511))) for (j=ts->ang; j<(2048+ts->ang); j+=(angincs-(krand()&511)))
{ {
@ -4447,7 +4447,7 @@ finish_qsprintf:
/*OSD_Printf(OSDTEXT_GREEN "CON_RESIZEARRAY: resizing array %s from %d to %d\n", /*OSD_Printf(OSDTEXT_GREEN "CON_RESIZEARRAY: resizing array %s from %d to %d\n",
aGameArrays[j].szLabel, aGameArrays[j].size, asize / GAR_ELTSZ);*/ aGameArrays[j].szLabel, aGameArrays[j].size, asize / GAR_ELTSZ);*/
aGameArrays[j].plValues = (intptr_t *)Xrealloc(aGameArrays[j].plValues, asize); aGameArrays[j].plValues = (intptr_t *)Xrealloc(aGameArrays[j].plValues, asize);
aGameArrays[j].size = asize / GAR_ELTSZ; aGameArrays[j].size = asize/GAR_ELTSZ;
kread(fil, aGameArrays[j].plValues, asize); kread(fil, aGameArrays[j].plValues, asize);
} }
@ -5513,7 +5513,11 @@ void A_Execute(int32_t iActor, int32_t iPlayer, int32_t lDist)
else if (actor[vm.g_i].timetosleep > 1) else if (actor[vm.g_i].timetosleep > 1)
actor[vm.g_i].timetosleep--; actor[vm.g_i].timetosleep--;
else if (actor[vm.g_i].timetosleep == 1) else if (actor[vm.g_i].timetosleep == 1)
{
if (g_scriptVersion == 13 && (vm.g_sp->picnum == FIRE || vm.g_sp->picnum == FIRE2))
return;
changespritestat(vm.g_i, STAT_ZOMBIEACTOR); changespritestat(vm.g_i, STAT_ZOMBIEACTOR);
}
} }
void G_SaveMapState(void) void G_SaveMapState(void)

View file

@ -127,35 +127,79 @@ void Gv_FinalizeWeaponDefaults(void);
{ \ { \
default: \ default: \
aGameVars[id].val.lValue operator lValue; \ aGameVars[id].val.lValue operator lValue; \
return; \ break; \
case GAMEVAR_PERPLAYER: \ case GAMEVAR_PERPLAYER: \
if ((unsigned)vm.g_p > MAXPLAYERS-1) return; \ if ((unsigned)vm.g_p > MAXPLAYERS-1) break; \
aGameVars[id].val.plValues[vm.g_p] operator lValue; \ aGameVars[id].val.plValues[vm.g_p] operator lValue; \
return; \ break; \
case GAMEVAR_PERACTOR: \ case GAMEVAR_PERACTOR: \
if ((unsigned)vm.g_i > MAXSPRITES-1) return; \ if ((unsigned)vm.g_i > MAXSPRITES-1) break; \
aGameVars[id].val.plValues[vm.g_i] operator lValue; \ aGameVars[id].val.plValues[vm.g_i] operator lValue; \
return; \ break; \
case GAMEVAR_INTPTR: \ case GAMEVAR_INTPTR: \
*((int32_t *)aGameVars[id].val.lValue) operator (int32_t)lValue; \ *((int32_t *)aGameVars[id].val.lValue) operator (int32_t)lValue; \
return; \ break; \
case GAMEVAR_SHORTPTR: \ case GAMEVAR_SHORTPTR: \
*((int16_t *)aGameVars[id].val.lValue) operator (int16_t)lValue; \ *((int16_t *)aGameVars[id].val.lValue) operator (int16_t)lValue; \
return; \ break; \
case GAMEVAR_CHARPTR: \ case GAMEVAR_CHARPTR: \
*((uint8_t *)aGameVars[id].val.lValue) operator (uint8_t)lValue; \ *((uint8_t *)aGameVars[id].val.lValue) operator (uint8_t)lValue; \
return; \ break; \
} \ } \
} }
// even though libdivide is faster than straight division (when using the LUT) the overhead makes this slower on x86
// ARM, however, has no hardware integer division
#if defined(__arm__) || defined(LIBDIVIDE_ALWAYS)
static inline void __fastcall Gv_DivVar(int32_t id, int32_t lValue)
{
static libdivide_s32_t sdiv;
static int32_t lastlValue;
libdivide_s32_t *dptr = &sdiv;
intptr_t *iptr = &aGameVars[id].val.lValue;
if ((aGameVars[id].dwFlags & GAMEVAR_PERPLAYER && (unsigned) vm.g_p > MAXPLAYERS-1) ||
(aGameVars[id].dwFlags & GAMEVAR_PERACTOR && (unsigned) vm.g_i > MAXSPRITES-1)) return;
if ((unsigned) lValue < DIVTABLESIZE)
dptr = (libdivide_s32_t *)&divtable32[lValue];
else if (lValue != lastlValue)
sdiv = libdivide_s32_gen(lValue), lastlValue = lValue;
switch (aGameVars[id].dwFlags & (GAMEVAR_USER_MASK|GAMEVAR_PTR_MASK))
{
case GAMEVAR_PERPLAYER:
iptr = &aGameVars[id].val.plValues[vm.g_p];
default:
break;
case GAMEVAR_PERACTOR:
iptr = &aGameVars[id].val.plValues[vm.g_i];
break;
case GAMEVAR_INTPTR:
*((int32_t *) aGameVars[id].val.lValue) = (int32_t) libdivide_s32_do(*((int32_t *) aGameVars[id].val.lValue), dptr);
return;
case GAMEVAR_SHORTPTR:
*((int16_t *) aGameVars[id].val.lValue) = (int16_t) libdivide_s32_do(*((int16_t *) aGameVars[id].val.lValue), dptr);
return;
case GAMEVAR_CHARPTR:
*((uint8_t *) aGameVars[id].val.lValue) = (uint8_t) libdivide_s32_do(*((uint8_t *) aGameVars[id].val.lValue), dptr);
return;
}
*iptr = libdivide_s32_do(*iptr, dptr);
}
#else
GV_VAROP(Gv_DivVar, /=)
#endif
GV_VAROP(Gv_AddVar, +=) GV_VAROP(Gv_AddVar, +=)
GV_VAROP(Gv_SubVar, -=) GV_VAROP(Gv_SubVar, -=)
GV_VAROP(Gv_MulVar, *=) GV_VAROP(Gv_MulVar, *=)
GV_VAROP(Gv_DivVar, /=)
GV_VAROP(Gv_ModVar, %=) GV_VAROP(Gv_ModVar, %=)
GV_VAROP(Gv_AndVar, &=) GV_VAROP(Gv_AndVar, &=)
GV_VAROP(Gv_XorVar, ^=) GV_VAROP(Gv_XorVar, ^=)
GV_VAROP(Gv_OrVar, |=) GV_VAROP(Gv_OrVar, |=)
#endif #endif
#endif #endif

View file

@ -809,7 +809,7 @@ skip_check:
float fval = *((float *)&bits); float fval = *((float *)&bits);
// rounding must absolutely be! // rounding must absolutely be!
//OSD_Printf("ftoi: bits:%8x, scale=%d, fval=%f, (int32_t)(fval*scale)=%d\n", bits, scale, fval, (int32_t)(fval*scale)); //OSD_Printf("ftoi: bits:%8x, scale=%d, fval=%f, (int32_t)(fval*scale)=%d\n", bits, scale, fval, (int32_t)(fval*scale));
Gv_SetVarX(*insptr, (int32_t)nearbyintf(fval * scale)); Gv_SetVarX(*insptr, (int32_t)Blrintf(fval * scale));
} }
insptr += 2; insptr += 2;
continue; continue;

View file

@ -726,17 +726,17 @@ static MenuEntry_t *MEL_RENDERERSETUP_GL3[] = {
#endif #endif
#ifdef DROIDMENU #ifdef DROIDMENU
static MenuRangeFloat_t MEO_COLCORR_GAMMA = MAKE_MENURANGE( &MF_Bluefont, 1.f, 1.f, 2.5f, 39.f, 0.f, &vid_gamma ); static MenuRangeFloat_t MEO_COLCORR_GAMMA = MAKE_MENURANGE( &MF_Bluefont, 1, 1.f, 2.5f, 39.f, 0.f, &vid_gamma );
#else #else
static MenuRangeFloat_t MEO_COLCORR_GAMMA = MAKE_MENURANGE( &MF_Bluefont, 1.f, 0.2f, 4.f, 39.f, 0.f, &vid_gamma ); static MenuRangeFloat_t MEO_COLCORR_GAMMA = MAKE_MENURANGE( &MF_Bluefont, 1, 0.2f, 4.f, 39, 0.f, &vid_gamma );
#endif #endif
static MenuEntry_t ME_COLCORR_GAMMA = MAKE_MENUENTRY( &MF_Redfont, "Gamma:", RangeFloat, &MEO_COLCORR_GAMMA ); static MenuEntry_t ME_COLCORR_GAMMA = MAKE_MENUENTRY( &MF_Redfont, "Gamma:", RangeFloat, &MEO_COLCORR_GAMMA );
static MenuRangeFloat_t MEO_COLCORR_CONTRAST = MAKE_MENURANGE( &MF_Bluefont, 1.f, 0.1f, 2.7f, 53.f, 0.f, &vid_contrast ); static MenuRangeFloat_t MEO_COLCORR_CONTRAST = MAKE_MENURANGE( &MF_Bluefont, 1, 0.1f, 2.7f, 53, 0.f, &vid_contrast );
static MenuEntry_t ME_COLCORR_CONTRAST = MAKE_MENUENTRY( &MF_Redfont, "Contrast:", RangeFloat, &MEO_COLCORR_CONTRAST ); static MenuEntry_t ME_COLCORR_CONTRAST = MAKE_MENUENTRY( &MF_Redfont, "Contrast:", RangeFloat, &MEO_COLCORR_CONTRAST );
static MenuRangeFloat_t MEO_COLCORR_BRIGHTNESS = MAKE_MENURANGE( &MF_Bluefont, 1.f, -0.8f, 0.8f, 33.f, 0.f, &vid_brightness ); static MenuRangeFloat_t MEO_COLCORR_BRIGHTNESS = MAKE_MENURANGE( &MF_Bluefont, 1, -0.8f, 0.8f, 33, 0.f, &vid_brightness );
static MenuEntry_t ME_COLCORR_BRIGHTNESS = MAKE_MENUENTRY( &MF_Redfont, "Brightness:", RangeFloat, &MEO_COLCORR_BRIGHTNESS ); static MenuEntry_t ME_COLCORR_BRIGHTNESS = MAKE_MENUENTRY( &MF_Redfont, "Brightness:", RangeFloat, &MEO_COLCORR_BRIGHTNESS );
static MenuEntry_t ME_COLCORR_RESET = MAKE_MENUENTRY( &MF_Redfont, "Reset To Defaults", Link, &MEO_NULL ); static MenuEntry_t ME_COLCORR_RESET = MAKE_MENUENTRY( &MF_Redfont, "Reset To Defaults", Link, &MEO_NULL );
static MenuRangeFloat_t MEO_COLCORR_AMBIENT = MAKE_MENURANGE(&MF_Bluefont, 1.f, 0.125f, 4.f, 32.f, 0.f, &r_ambientlight); static MenuRangeFloat_t MEO_COLCORR_AMBIENT = MAKE_MENURANGE(&MF_Bluefont, 1, 0.125f, 4.f, 32, 0.f, &r_ambientlight);
static MenuEntry_t ME_COLCORR_AMBIENT = MAKE_MENUENTRY(&MF_Redfont, "Visibility:", RangeFloat, &MEO_COLCORR_AMBIENT); static MenuEntry_t ME_COLCORR_AMBIENT = MAKE_MENUENTRY(&MF_Redfont, "Visibility:", RangeFloat, &MEO_COLCORR_AMBIENT);
static MenuEntry_t *MEL_COLCORR[] = { static MenuEntry_t *MEL_COLCORR[] = {
@ -3575,7 +3575,7 @@ static int32_t M_RunMenu_MenuMenu(MenuMenu_t *menu, MenuEntry_t *currentry, int3
case 2: case 2:
{ {
int32_t v; int32_t v;
ftol(((float) *object->variable * 100.) / (float) object->onehundredpercent + 0.5, &v); v = Blrintf(((float) *object->variable * 100.f) / (float) object->onehundredpercent);
Bsprintf(tempbuf, "%d%%", v); Bsprintf(tempbuf, "%d%%", v);
break; break;
} }
@ -3602,7 +3602,7 @@ static int32_t M_RunMenu_MenuMenu(MenuMenu_t *menu, MenuEntry_t *currentry, int3
rotatesprite_fs(x, y - menu->scrollPos, z, 0, SLIDEBAR, s, p, 2|8|16|ROTATESPRITE_FULL16); rotatesprite_fs(x, y - menu->scrollPos, z, 0, SLIDEBAR, s, p, 2|8|16|ROTATESPRITE_FULL16);
rotatesprite_fs( rotatesprite_fs(
x + (1<<16) + ((float) scale((tilesiz[SLIDEBAR].x-2-tilesiz[SLIDEBAR+1].x)<<16, height, tilesiz[SLIDEBAR].y<<16) * (*object->variable - object->min) / (object->max - object->min)), x + (1<<16) + (int32_t)((float) scale((tilesiz[SLIDEBAR].x-2-tilesiz[SLIDEBAR+1].x)<<16, height, tilesiz[SLIDEBAR].y<<16) * (*object->variable - object->min) / (object->max - object->min)),
y + scale((tilesiz[SLIDEBAR].y-tilesiz[SLIDEBAR+1].y)<<15, height, tilesiz[SLIDEBAR].y<<16) - menu->scrollPos, y + scale((tilesiz[SLIDEBAR].y-tilesiz[SLIDEBAR+1].y)<<15, height, tilesiz[SLIDEBAR].y<<16) - menu->scrollPos,
z, 0, SLIDEBAR+1, s, p, 2|8|16|ROTATESPRITE_FULL16); z, 0, SLIDEBAR+1, s, p, 2|8|16|ROTATESPRITE_FULL16);
@ -3621,7 +3621,7 @@ static int32_t M_RunMenu_MenuMenu(MenuMenu_t *menu, MenuEntry_t *currentry, int3
case 2: case 2:
{ {
int32_t v; int32_t v;
ftol((*object->variable * 100.f) / object->onehundredpercent + 0.5f, &v); v = Blrintf((*object->variable * 100.f) / object->onehundredpercent);
Bsprintf(tempbuf, "%d%%", v); Bsprintf(tempbuf, "%d%%", v);
break; break;
} }
@ -3648,7 +3648,7 @@ static int32_t M_RunMenu_MenuMenu(MenuMenu_t *menu, MenuEntry_t *currentry, int3
rotatesprite_fs(x, y - menu->scrollPos, z, 0, SLIDEBAR, s, p, 2|8|16|ROTATESPRITE_FULL16); rotatesprite_fs(x, y - menu->scrollPos, z, 0, SLIDEBAR, s, p, 2|8|16|ROTATESPRITE_FULL16);
rotatesprite_fs( rotatesprite_fs(
x + (1<<16) + ((double) scale((tilesiz[SLIDEBAR].x-2-tilesiz[SLIDEBAR+1].x)<<16, height, tilesiz[SLIDEBAR].y<<16) * (*object->variable - object->min) / (object->max - object->min)), x + (1<<16) + (int32_t)((double) scale((tilesiz[SLIDEBAR].x-2-tilesiz[SLIDEBAR+1].x)<<16, height, tilesiz[SLIDEBAR].y<<16) * (*object->variable - object->min) / (object->max - object->min)),
y + scale((tilesiz[SLIDEBAR].y-tilesiz[SLIDEBAR+1].y)<<15, height, tilesiz[SLIDEBAR].y<<16) - menu->scrollPos, y + scale((tilesiz[SLIDEBAR].y-tilesiz[SLIDEBAR+1].y)<<15, height, tilesiz[SLIDEBAR].y<<16) - menu->scrollPos,
z, 0, SLIDEBAR+1, s, p, 2|8|16|ROTATESPRITE_FULL16); z, 0, SLIDEBAR+1, s, p, 2|8|16|ROTATESPRITE_FULL16);
@ -3667,7 +3667,7 @@ static int32_t M_RunMenu_MenuMenu(MenuMenu_t *menu, MenuEntry_t *currentry, int3
case 2: case 2:
{ {
int32_t v; int32_t v;
dtol((*object->variable * 100.) / object->onehundredpercent + 0.5, &v); v = Blrintf((*object->variable * 100.) / object->onehundredpercent);
Bsprintf(tempbuf, "%d%%", v); Bsprintf(tempbuf, "%d%%", v);
break; break;
} }
@ -4403,14 +4403,14 @@ static void M_RunMenuInput(Menu_t *cm)
case RangeInt32: case RangeInt32:
{ {
MenuRangeInt32_t *object = (MenuRangeInt32_t*)currentry->entry; MenuRangeInt32_t *object = (MenuRangeInt32_t*)currentry->entry;
const double interval = (double) (object->max - object->min) / (object->steps - 1); const float interval = (float) (object->max - object->min) / (float) (object->steps - 1);
int32_t step; int32_t step;
int32_t modification = 0; int32_t modification = 0;
if (currentry->disabled) if (currentry->disabled)
break; break;
dtol((double) (*object->variable - object->min) / interval + 0.5, &step); step = Blrintf((float) (*object->variable - object->min) / interval);
if (I_SliderLeft()) if (I_SliderLeft())
{ {
@ -4438,7 +4438,7 @@ static void M_RunMenuInput(Menu_t *cm)
else if (step >= object->steps) else if (step >= object->steps)
step = object->steps - 1; step = object->steps - 1;
dtol(interval * step + object->min + 0.5, &temp); temp = Blrintf(interval * step + (object->min));
if (!M_MenuEntryRangeInt32Modify(currentry, temp)) if (!M_MenuEntryRangeInt32Modify(currentry, temp))
*object->variable = temp; *object->variable = temp;
@ -4456,7 +4456,7 @@ static void M_RunMenuInput(Menu_t *cm)
if (currentry->disabled) if (currentry->disabled)
break; break;
ftol((*object->variable - object->min) / interval + 0.5, &step); step = Blrintf((*object->variable - object->min) / interval);
if (I_SliderLeft()) if (I_SliderLeft())
{ {
@ -4502,7 +4502,7 @@ static void M_RunMenuInput(Menu_t *cm)
if (currentry->disabled) if (currentry->disabled)
break; break;
dtol((*object->variable - object->min) / interval + 0.5, &step); step = Blrintf((*object->variable - object->min) / interval);
if (I_SliderLeft()) if (I_SliderLeft())
{ {

View file

@ -39,6 +39,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include "midi.h" #include "midi.h"
#include "mpu401.h" #include "mpu401.h"
#include "compat.h" #include "compat.h"
#include "pragmas.h"
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
@ -297,7 +298,7 @@ static void _MIDI_MetaEvent
break; break;
case MIDI_TEMPO_CHANGE : case MIDI_TEMPO_CHANGE :
tempo = 60000000L / _MIDI_ReadNumber(Track->pos, 3); tempo = tabledivide32_noinline(60000000L, _MIDI_ReadNumber(Track->pos, 3));
MIDI_SetTempo(tempo); MIDI_SetTempo(tempo);
break; break;
@ -318,7 +319,7 @@ static void _MIDI_MetaEvent
_MIDI_TimeBase += _MIDI_TimeBase; _MIDI_TimeBase += _MIDI_TimeBase;
denominator--; denominator--;
} }
_MIDI_TicksPerBeat = (_MIDI_Division * 4) / _MIDI_TimeBase; _MIDI_TicksPerBeat = tabledivide32_noinline(_MIDI_Division * 4, _MIDI_TimeBase);
break; break;
} }
@ -794,7 +795,7 @@ static void _MIDI_SetChannelVolume
{ {
remotevolume = volume * _MIDI_TotalVolume; remotevolume = volume * _MIDI_TotalVolume;
remotevolume *= _MIDI_UserChannelVolume[ channel ]; remotevolume *= _MIDI_UserChannelVolume[ channel ];
remotevolume /= MIDI_MaxVolume; remotevolume = tabledivide32_noinline(remotevolume, MIDI_MaxVolume);
remotevolume >>= 8; remotevolume >>= 8;
status = _MIDI_RerouteFunctions[ channel ](0xB0 + channel, status = _MIDI_RerouteFunctions[ channel ](0xB0 + channel,
@ -821,7 +822,7 @@ static void _MIDI_SetChannelVolume
if (_MIDI_Funcs->SetVolume == NULL) if (_MIDI_Funcs->SetVolume == NULL)
{ {
volume *= _MIDI_TotalVolume; volume *= _MIDI_TotalVolume;
volume /= MIDI_MaxVolume; volume = tabledivide32_noinline(volume, MIDI_MaxVolume);
} }
// For user volume // For user volume
@ -1315,8 +1316,8 @@ void MIDI_SetTempo
int32_t tickspersecond; int32_t tickspersecond;
MIDI_Tempo = tempo; MIDI_Tempo = tempo;
tickspersecond = ((tempo) * _MIDI_Division) / 60; tickspersecond = ((tempo) * _MIDI_Division)/60;
_MIDI_FPSecondsPerTick = (1 << TIME_PRECISION) / tickspersecond; _MIDI_FPSecondsPerTick = tabledivide32_noinline(1 << TIME_PRECISION, tickspersecond);
MPU_SetTempo(tempo); MPU_SetTempo(tempo);
} }
@ -1562,8 +1563,8 @@ void MIDI_SetSongTime
MIDI_PauseSong(); MIDI_PauseSong();
mil = ((milliseconds % 1000) << TIME_PRECISION) / 1000; mil = tabledivide32_noinline((milliseconds % 1000) << TIME_PRECISION, 1000);
sec = (milliseconds / 1000) << TIME_PRECISION; sec = tabledivide32_noinline(milliseconds, 1000) << TIME_PRECISION;
newtime = sec + mil; newtime = sec + mil;
if (newtime < _MIDI_Time) if (newtime < _MIDI_Time)

View file

@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include "mpu401.h" #include "mpu401.h"
#include "compat.h" #include "compat.h"
#include "pragmas.h"
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
@ -441,7 +442,7 @@ void MPU_SetTempo(int32_t tempo)
{ {
MIDIPROPTEMPO prop; MIDIPROPTEMPO prop;
prop.cbStruct = sizeof(MIDIPROPTEMPO); prop.cbStruct = sizeof(MIDIPROPTEMPO);
prop.dwTempo = 60000000l/tempo; prop.dwTempo = tabledivide32_noinline(60000000l, tempo);
midiStreamProperty(hmido, (LPBYTE)&prop, MIDIPROP_SET|MIDIPROP_TEMPO); midiStreamProperty(hmido, (LPBYTE)&prop, MIDIPROP_SET|MIDIPROP_TEMPO);
} }

View file

@ -113,9 +113,9 @@ static void A_DoWaterTracers(int32_t x1,int32_t y1,int32_t z1,int32_t x2,int32_t
int16_t sect = -1; int16_t sect = -1;
i = n+1; i = n+1;
xv = (x2-x1)/i; xv = tabledivide32_noinline(x2-x1, i);
yv = (y2-y1)/i; yv = tabledivide32_noinline(y2-y1, i);
zv = (z2-z1)/i; zv = tabledivide32_noinline(z2-z1, i);
if ((klabs(x1-x2)+klabs(y1-y2)) < 3084) if ((klabs(x1-x2)+klabs(y1-y2)) < 3084)
return; return;
@ -147,15 +147,15 @@ static void A_HitscanProjTrail(const vec3_t *sv, const vec3_t *dv, int32_t ang,
Bmemcpy(&destvect, dv, sizeof(vec3_t)); Bmemcpy(&destvect, dv, sizeof(vec3_t));
srcvect.x = sv->x + (sintable[(348+ang+512)&2047]/proj->offset); srcvect.x = sv->x + tabledivide32_noinline(sintable[(348+ang+512)&2047], proj->offset);
srcvect.y = sv->y + (sintable[(ang+348)&2047]/proj->offset); srcvect.y = sv->y + tabledivide32_noinline(sintable[(ang+348)&2047], proj->offset);
srcvect.z = sv->z + 1024+(proj->toffset<<8); srcvect.z = sv->z + 1024+(proj->toffset<<8);
n = ((FindDistance2D(srcvect.x-destvect.x,srcvect.y-destvect.y))>>8)+1; n = ((FindDistance2D(srcvect.x-destvect.x,srcvect.y-destvect.y))>>8)+1;
destvect.x = ((destvect.x-srcvect.x)/n); destvect.x = tabledivide32_noinline((destvect.x-srcvect.x), n);
destvect.y = ((destvect.y-srcvect.y)/n); destvect.y = tabledivide32_noinline((destvect.y-srcvect.y), n);
destvect.z = ((destvect.z-srcvect.z)/n); destvect.z = tabledivide32_noinline((destvect.z-srcvect.z), n);
srcvect.x += destvect.x>>2; srcvect.x += destvect.x>>2;
srcvect.y += destvect.y>>2; srcvect.y += destvect.y>>2;
@ -379,7 +379,7 @@ static int32_t GetAutoAimAngle(int32_t i, int32_t p, int32_t atwith,
} }
dst = safeldist(g_player[p].ps->i, &sprite[j]); dst = safeldist(g_player[p].ps->i, &sprite[j]);
*zvel = ((spr->z - srcvect->z - cen)*vel) / dst; *zvel = tabledivide32_noinline((spr->z - srcvect->z - cen)*vel, dst);
if (!(flags&2) || sprite[j].picnum != RECON) if (!(flags&2) || sprite[j].picnum != RECON)
*sa = getangle(spr->x-srcvect->x, spr->y-srcvect->y); *sa = getangle(spr->x-srcvect->x, spr->y-srcvect->y);
@ -530,7 +530,7 @@ static void A_PreFireHitscan(const spritetype *s, vec3_t *srcvect, int32_t *zvel
const DukePlayer_t *targetps = g_player[j].ps; const DukePlayer_t *targetps = g_player[j].ps;
const int32_t d = safeldist(targetps->i, s); const int32_t d = safeldist(targetps->i, s);
*zvel = ((targetps->pos.z-srcvect->z)<<8) / d; *zvel = tabledivide32_noinline((targetps->pos.z-srcvect->z)<<8, d);
srcvect->z -= (4<<8); srcvect->z -= (4<<8);
@ -960,7 +960,7 @@ static int32_t A_ShootCustom(const int32_t i, const int32_t atwith, int16_t sa,
sa = getangle(g_player[j].ps->opos.x-srcvect->x, g_player[j].ps->opos.y-srcvect->y); sa = getangle(g_player[j].ps->opos.x-srcvect->x, g_player[j].ps->opos.y-srcvect->y);
l = safeldist(g_player[j].ps->i, s); l = safeldist(g_player[j].ps->i, s);
zvel = ((g_player[j].ps->opos.z - srcvect->z)*vel) / l; zvel = tabledivide32_noinline((g_player[j].ps->opos.z - srcvect->z)*vel, l);
if (A_CheckEnemySprite(s) && (AC_MOVFLAGS(s, &actor[i]) & face_player_smart)) if (A_CheckEnemySprite(s) && (AC_MOVFLAGS(s, &actor[i]) & face_player_smart))
sa = s->ang + (krand() & 31) - 16; sa = s->ang + (krand() & 31) - 16;
@ -974,8 +974,8 @@ static int32_t A_ShootCustom(const int32_t i, const int32_t atwith, int16_t sa,
zvel = A_GetShootZvel(zvel); zvel = A_GetShootZvel(zvel);
j = A_InsertSprite(sect, j = A_InsertSprite(sect,
srcvect->x + (sintable[(348 + sa + 512) & 2047] / proj->offset), srcvect->x + tabledivide32_noinline(sintable[(348 + sa + 512) & 2047], proj->offset),
srcvect->y + (sintable[(sa + 348) & 2047] / proj->offset), srcvect->y + tabledivide32_noinline(sintable[(sa + 348) & 2047], proj->offset),
srcvect->z - (1 << 8), atwith, 0, 14, 14, sa, vel, zvel, i, 4); srcvect->z - (1 << 8), atwith, 0, 14, 14, sa, vel, zvel, i, 4);
sprite[j].xrepeat = proj->xrepeat; sprite[j].xrepeat = proj->xrepeat;
@ -1017,7 +1017,7 @@ static int32_t A_ShootCustom(const int32_t i, const int32_t atwith, int16_t sa,
{ {
int32_t x; int32_t x;
j = g_player[A_FindPlayer(s, &x)].ps->i; j = g_player[A_FindPlayer(s, &x)].ps->i;
zvel = ((sprite[j].z - srcvect->z) << 8) / (x + 1); zvel = tabledivide32_noinline((sprite[j].z - srcvect->z) << 8, x + 1);
sa = getangle(sprite[j].x - srcvect->x, sprite[j].y - srcvect->y); sa = getangle(sprite[j].x - srcvect->x, sprite[j].y - srcvect->y);
} }
@ -1205,7 +1205,7 @@ int32_t A_ShootWithZvel(int32_t i, int32_t atwith, int32_t override_zvel)
{ {
int32_t x; int32_t x;
j = g_player[A_FindPlayer(s,&x)].ps->i; j = g_player[A_FindPlayer(s,&x)].ps->i;
zvel = ((sprite[j].z-srcvect.z)<<8) / (x+1); zvel = tabledivide32_noinline((sprite[j].z-srcvect.z)<<8, x+1);
sa = getangle(sprite[j].x-srcvect.x,sprite[j].y-srcvect.y); sa = getangle(sprite[j].x-srcvect.x,sprite[j].y-srcvect.y);
} }
} }
@ -1352,7 +1352,7 @@ int32_t A_ShootWithZvel(int32_t i, int32_t atwith, int32_t override_zvel)
// sa = getangle(g_player[j].ps->opos.x-sx,g_player[j].ps->opos.y-sy); // sa = getangle(g_player[j].ps->opos.x-sx,g_player[j].ps->opos.y-sy);
sa += 16-(krand()&31); sa += 16-(krand()&31);
hit.pos.x = safeldist(g_player[j].ps->i, s); hit.pos.x = safeldist(g_player[j].ps->i, s);
zvel = ((g_player[j].ps->opos.z - srcvect.z + (3<<8))*vel) / hit.pos.x; zvel = tabledivide32_noinline((g_player[j].ps->opos.z - srcvect.z + (3<<8))*vel, hit.pos.x);
} }
zvel = A_GetShootZvel(zvel); zvel = A_GetShootZvel(zvel);
@ -1438,7 +1438,7 @@ int32_t A_ShootWithZvel(int32_t i, int32_t atwith, int32_t override_zvel)
} }
l = safeldist(g_player[j].ps->i, s); l = safeldist(g_player[j].ps->i, s);
zvel = ((g_player[j].ps->opos.z - srcvect.z)*vel) / l; zvel = tabledivide32_noinline((g_player[j].ps->opos.z - srcvect.z)*vel, l);
if (A_CheckEnemySprite(s) && (AC_MOVFLAGS(s, &actor[i]) & face_player_smart)) if (A_CheckEnemySprite(s) && (AC_MOVFLAGS(s, &actor[i]) & face_player_smart))
sa = s->ang+(krand()&31)-16; sa = s->ang+(krand()&31)-16;
@ -1635,7 +1635,7 @@ int32_t A_ShootWithZvel(int32_t i, int32_t atwith, int32_t override_zvel)
{ {
j = A_FindPlayer(s, NULL); j = A_FindPlayer(s, NULL);
l = safeldist(g_player[j].ps->i, s); l = safeldist(g_player[j].ps->i, s);
zvel = ((g_player[j].ps->opos.z-srcvect.z)*512) / l ; zvel = tabledivide32_noinline((g_player[j].ps->opos.z-srcvect.z)*512, l);
} }
else zvel = 0; else zvel = 0;
@ -1830,7 +1830,7 @@ static void G_DrawWeaponTile(int32_t x, int32_t y, int32_t tilenum, int32_t shad
// HACK: Draw the upper part of the chaingun two screen // HACK: Draw the upper part of the chaingun two screen
// pixels (not texels; multiplied by weapon scale) lower // pixels (not texels; multiplied by weapon scale) lower
// first, preventing ugly horizontal seam. // first, preventing ugly horizontal seam.
g_dts_yadd = (65536*2*200)/ydim; g_dts_yadd = tabledivide32_noinline(65536*2*200, ydim);
G_DrawTileScaled(x,y,tilenum,shadef[slot],orientation,p); G_DrawTileScaled(x,y,tilenum,shadef[slot],orientation,p);
g_dts_yadd = 0; g_dts_yadd = 0;
} }
@ -2753,8 +2753,8 @@ void P_GetInput(int32_t snum)
if (ud.config.MouseBias) if (ud.config.MouseBias)
{ {
if (klabs(info[0].dyaw) > klabs(info[0].dpitch)) if (klabs(info[0].dyaw) > klabs(info[0].dpitch))
info[0].dpitch /= ud.config.MouseBias; info[0].dpitch = tabledivide32_noinline(info[0].dpitch, ud.config.MouseBias);
else info[0].dyaw /= ud.config.MouseBias; else info[0].dyaw = tabledivide32_noinline(info[0].dyaw, ud.config.MouseBias);
} }
tics = totalclock-lastcontroltime; tics = totalclock-lastcontroltime;

View file

@ -537,7 +537,7 @@ void G_CacheMapData(void)
if (bpp > 8 && totalclock - tc > TICRATE/4) if (bpp > 8 && totalclock - tc > TICRATE/4)
{ {
/*Bsprintf(tempbuf,"%d resources remaining\n",g_precacheCount-pc+1);*/ /*Bsprintf(tempbuf,"%d resources remaining\n",g_precacheCount-pc+1);*/
tc = min(100,100*pc/g_precacheCount); tc = min(100, tabledivide32_noinline(100 * pc, g_precacheCount));
Bsprintf(tempbuf,"Loaded %d%% (%d/%d textures)\n",tc,pc,g_precacheCount); Bsprintf(tempbuf,"Loaded %d%% (%d/%d textures)\n",tc,pc,g_precacheCount);
G_DoLoadScreen(tempbuf, tc); G_DoLoadScreen(tempbuf, tc);
tc = totalclock; tc = totalclock;

View file

@ -692,7 +692,7 @@ static void docmpsd(const void *ptr, void *dump, uint32_t size, uint32_t cnt, ui
{ \ { \
const UINT(Datbits) *p=(UINT(Datbits) *)ptr; \ const UINT(Datbits) *p=(UINT(Datbits) *)ptr; \
UINT(Datbits) *op=(UINT(Datbits) *)dump; \ UINT(Datbits) *op=(UINT(Datbits) *)dump; \
uint32_t i, nelts=(size*cnt)/BYTES(Datbits); \ uint32_t i, nelts=tabledivide32_noinline(size*cnt, BYTES(Datbits)); \
if (nelts>65536) \ if (nelts>65536) \
CPELTS(32,Datbits); \ CPELTS(32,Datbits); \
else if (nelts>256) \ else if (nelts>256) \
@ -831,7 +831,7 @@ readidx_##Idxbits##_##Datbits: \
#define CPDATA(Datbits) do \ #define CPDATA(Datbits) do \
{ \ { \
uint32_t nelts=(sp->size*cnt)/BYTES(Datbits); \ uint32_t nelts=tabledivide32_noinline(sp->size*cnt, BYTES(Datbits)); \
if (nelts>65536) \ if (nelts>65536) \
CPELTS(32,Datbits); \ CPELTS(32,Datbits); \
else if (nelts>256) \ else if (nelts>256) \

View file

@ -571,8 +571,9 @@ void G_OperateSectors(int32_t sn, int32_t ii)
dax += wall[i].x; dax += wall[i].x;
day += wall[i].y; day += wall[i].y;
} }
dax /= (endwall-startwall+1);
day /= (endwall-startwall+1); dax = tabledivide32_noinline(dax, (endwall-startwall+1));
day = tabledivide32_noinline(day, (endwall-startwall+1));
//find any points with either same x or same y coordinate //find any points with either same x or same y coordinate
// as center (dax, day) - should be 2 points found. // as center (dax, day) - should be 2 points found.