mirror of
https://github.com/ZDoom/Raze.git
synced 2025-01-22 08:22:38 +00:00
104bdb131b
git-svn-id: https://svn.eduke32.com/eduke32@6949 1a8010ca-5511-0410-912e-c29ae57300e0
343 lines
10 KiB
C++
343 lines
10 KiB
C++
// This file has been modified from Ken Silverman's original release
|
|
// by Jonathon Fowler (jf@jonof.id.au)
|
|
|
|
|
|
#ifndef pragmas_h_
|
|
#define pragmas_h_
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
#define EDUKE32_GENERATE_PRAGMAS \
|
|
EDUKE32_SCALER_PRAGMA(1) EDUKE32_SCALER_PRAGMA(2) EDUKE32_SCALER_PRAGMA(3) EDUKE32_SCALER_PRAGMA(4) \
|
|
EDUKE32_SCALER_PRAGMA(5) EDUKE32_SCALER_PRAGMA(6) EDUKE32_SCALER_PRAGMA(7) EDUKE32_SCALER_PRAGMA(8) \
|
|
EDUKE32_SCALER_PRAGMA(9) EDUKE32_SCALER_PRAGMA(10) EDUKE32_SCALER_PRAGMA(11) EDUKE32_SCALER_PRAGMA(12) \
|
|
EDUKE32_SCALER_PRAGMA(13) EDUKE32_SCALER_PRAGMA(14) EDUKE32_SCALER_PRAGMA(15) EDUKE32_SCALER_PRAGMA(16) \
|
|
EDUKE32_SCALER_PRAGMA(17) EDUKE32_SCALER_PRAGMA(18) EDUKE32_SCALER_PRAGMA(19) EDUKE32_SCALER_PRAGMA(20) \
|
|
EDUKE32_SCALER_PRAGMA(21) EDUKE32_SCALER_PRAGMA(22) EDUKE32_SCALER_PRAGMA(23) EDUKE32_SCALER_PRAGMA(24) \
|
|
EDUKE32_SCALER_PRAGMA(25) EDUKE32_SCALER_PRAGMA(26) EDUKE32_SCALER_PRAGMA(27) EDUKE32_SCALER_PRAGMA(28) \
|
|
EDUKE32_SCALER_PRAGMA(29) EDUKE32_SCALER_PRAGMA(30) EDUKE32_SCALER_PRAGMA(31)
|
|
|
|
extern int32_t dmval;
|
|
#if !defined(NOASM) && defined __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
extern int32_t reciptable[2048], fpuasm;
|
|
#if !defined(NOASM) && defined __cplusplus
|
|
}
|
|
#endif
|
|
|
|
// break the C version of divscale out from the others
|
|
// because asm version overflows in drawmapview()
|
|
|
|
#define qw(x) ((int64_t)(x)) // quadword cast
|
|
#define dw(x) ((int32_t)(x)) // doubleword cast
|
|
#define wo(x) ((int16_t)(x)) // word cast
|
|
#define by(x) ((uint8_t)(x)) // byte cast
|
|
|
|
#define LIBDIVIDE_ALWAYS
|
|
#define DIVTABLESIZE 16384
|
|
|
|
extern libdivide_s64_t divtable64[DIVTABLESIZE];
|
|
extern libdivide_s32_t divtable32[DIVTABLESIZE];
|
|
extern void initdivtables(void);
|
|
|
|
#if defined(__arm__) || defined(LIBDIVIDE_ALWAYS)
|
|
static inline uint32_t divideu32(uint32_t n, uint32_t d)
|
|
{
|
|
static libdivide_u32_t udiv;
|
|
static uint32_t lastd;
|
|
|
|
if (d == lastd)
|
|
goto skip;
|
|
|
|
udiv = libdivide_u32_gen((lastd = d));
|
|
skip:
|
|
return libdivide_u32_do(n, &udiv);
|
|
}
|
|
|
|
static inline int32_t tabledivide64(int64_t n, int32_t d)
|
|
{
|
|
static libdivide_s64_t sdiv;
|
|
static int32_t lastd;
|
|
libdivide_s64_t const * const dptr = ((unsigned)d < DIVTABLESIZE) ? (libdivide_s64_t *)&divtable64[d] : &sdiv;
|
|
|
|
if (d == lastd || dptr != &sdiv)
|
|
goto skip;
|
|
|
|
sdiv = libdivide_s64_gen((lastd = d));
|
|
skip:
|
|
return libdivide_s64_do(n, dptr);
|
|
}
|
|
|
|
static inline int32_t tabledivide32(int32_t n, int32_t d)
|
|
{
|
|
static libdivide_s32_t sdiv;
|
|
static int32_t lastd;
|
|
libdivide_s32_t const * const dptr = ((unsigned)d < DIVTABLESIZE) ? (libdivide_s32_t *)&divtable32[d] : &sdiv;
|
|
|
|
if (d == lastd || dptr != &sdiv)
|
|
goto skip;
|
|
|
|
sdiv = libdivide_s32_gen((lastd = d));
|
|
skip:
|
|
return libdivide_s32_do(n, dptr);
|
|
}
|
|
#else
|
|
static FORCE_INLINE uint32_t divideu32(uint32_t n, uint32_t d) { return n / d; }
|
|
|
|
static inline int32_t tabledivide64(int64_t n, int32_t d)
|
|
{
|
|
return ((unsigned)d < DIVTABLESIZE) ? libdivide_s64_do(n, (libdivide_s64_t *)&divtable64[d]) : n / d;
|
|
}
|
|
|
|
static inline int32_t tabledivide32(int32_t n, int32_t d)
|
|
{
|
|
return ((unsigned)d < DIVTABLESIZE) ? libdivide_s32_do(n, (libdivide_s32_t *)&divtable32[d]) : n / d;
|
|
}
|
|
#endif
|
|
|
|
extern uint32_t divideu32_noinline(uint32_t n, uint32_t d);
|
|
extern int32_t tabledivide32_noinline(int32_t n, int32_t d);
|
|
extern int32_t tabledivide64_noinline(int64_t n, int32_t d);
|
|
|
|
#ifdef GEKKO
|
|
static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx) { return tabledivide64(ldexp(eax, ecx), ebx); }
|
|
#else
|
|
static inline int32_t divscale(int32_t eax, int32_t ebx, int32_t ecx)
|
|
{
|
|
const int64_t numer = qw(eax) << by(ecx);
|
|
return dw(tabledivide64(numer, ebx));
|
|
}
|
|
#endif
|
|
|
|
#define EDUKE32_SCALER_PRAGMA(a) \
|
|
static FORCE_INLINE int32_t divscale##a(int32_t eax, int32_t ebx) { return divscale(eax, ebx, a); }
|
|
EDUKE32_GENERATE_PRAGMAS EDUKE32_SCALER_PRAGMA(32)
|
|
#undef EDUKE32_SCALER_PRAGMA
|
|
|
|
static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx)
|
|
{
|
|
const int64_t numer = qw(eax) * edx;
|
|
return dw(tabledivide64(numer, ecx));
|
|
}
|
|
|
|
static inline int32_t scaleadd(int32_t eax, int32_t edx, int32_t addend, int32_t ecx)
|
|
{
|
|
const int64_t numer = qw(eax) * edx + addend;
|
|
return dw(tabledivide64(numer, ecx));
|
|
}
|
|
|
|
static FORCE_INLINE int32_t roundscale(int32_t eax, int32_t edx, int32_t ecx)
|
|
{
|
|
return scaleadd(eax, edx, ecx / 2, ecx);
|
|
}
|
|
|
|
static FORCE_INLINE int32_t sqr(int32_t a) { return a * a; }
|
|
|
|
#if defined(__GNUC__) && defined(GEKKO)
|
|
|
|
// GCC Inline Assembler version (PowerPC)
|
|
#include "pragmas_ppc.h"
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM)
|
|
|
|
// GCC Inline Assembler version (x86)
|
|
#include "pragmas_x86_gcc.h"
|
|
|
|
#elif defined(_MSC_VER) && !defined(NOASM) // __GNUC__
|
|
|
|
// Microsoft C inline assembler
|
|
#include "pragmas_x86_msvc.h"
|
|
|
|
#elif defined(__arm__) // _MSC_VER
|
|
|
|
// GCC Inline Assembler version (ARM)
|
|
#include "pragmas_arm.h"
|
|
|
|
#endif
|
|
|
|
//
|
|
// Generic C
|
|
//
|
|
|
|
#ifndef pragmas_have_mulscale
|
|
|
|
#define EDUKE32_SCALER_PRAGMA(a) \
|
|
static FORCE_INLINE int32_t mulscale##a(int32_t eax, int32_t edx) { return dw((qw(eax) * edx) >> by(a)); } \
|
|
static FORCE_INLINE int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_t edi) \
|
|
{ \
|
|
return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(a)); \
|
|
} \
|
|
static FORCE_INLINE int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \
|
|
{ \
|
|
return dw(((qw(eax) * edx) + (qw(ebx) * ecx) + (qw(esi) * edi)) >> by(a)); \
|
|
} \
|
|
|
|
EDUKE32_GENERATE_PRAGMAS EDUKE32_SCALER_PRAGMA(32)
|
|
|
|
#undef EDUKE32_SCALER_PRAGMA
|
|
|
|
#endif
|
|
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
template <typename T>
|
|
static FORCE_INLINE void swap(T * const a, T * const b)
|
|
{
|
|
T const t = *a;
|
|
*a = *b;
|
|
*b = t;
|
|
}
|
|
#define swapptr swap
|
|
extern "C" {
|
|
#else
|
|
static FORCE_INLINE void swapptr(void *a, void *b)
|
|
{
|
|
intptr_t const t = *(intptr_t*) a;
|
|
*(intptr_t*) a = *(intptr_t*) b;
|
|
*(intptr_t*) b = t;
|
|
}
|
|
#endif
|
|
|
|
#ifndef pragmas_have_swaps
|
|
#ifdef __cplusplus
|
|
#define swapchar swap
|
|
#define swapshort swap
|
|
#define swaplong swap
|
|
#define swapfloat swap
|
|
#define swapdouble swap
|
|
#define swap64bit swap
|
|
#else
|
|
static FORCE_INLINE void swapchar(void *a, void *b)
|
|
{
|
|
char const t = *(char *)b;
|
|
*(char *)b = *(char *)a;
|
|
*(char *)a = t;
|
|
}
|
|
static FORCE_INLINE void swapshort(void *a, void *b)
|
|
{
|
|
int16_t const t = *(int16_t *)b;
|
|
*(int16_t *)b = *(int16_t *)a;
|
|
*(int16_t *)a = t;
|
|
}
|
|
static FORCE_INLINE void swaplong(void *a, void *b)
|
|
{
|
|
int32_t const t = *(int32_t *)b;
|
|
*(int32_t *)b = *(int32_t *)a;
|
|
*(int32_t *)a = t;
|
|
}
|
|
static FORCE_INLINE void swapfloat(void *a, void *b)
|
|
{
|
|
float const t = *(float *)b;
|
|
*(float *)b = *(float *)a;
|
|
*(float *)a = t;
|
|
}
|
|
static FORCE_INLINE void swapdouble(void *a, void *b)
|
|
{
|
|
double const t = *(double *)b;
|
|
*(double *)b = *(double *)a;
|
|
*(double *)a = t;
|
|
}
|
|
static FORCE_INLINE void swap64bit(void *a, void *b)
|
|
{
|
|
uint64_t const t = *(uint64_t *)b;
|
|
*(uint64_t *)b = *(uint64_t *)a;
|
|
*(uint64_t *)a = t;
|
|
}
|
|
#endif
|
|
static FORCE_INLINE void swapchar2(void *a, void *b, int32_t s)
|
|
{
|
|
swapchar((char *)a, (char *)b);
|
|
swapchar((char *)a + 1, (char *)b + s);
|
|
}
|
|
#endif
|
|
|
|
static FORCE_INLINE char readpixel(void *s) { return *(char *)s; }
|
|
static FORCE_INLINE void drawpixel(void *s, char a) { *(char *)s = a; }
|
|
|
|
#ifndef pragmas_have_klabs
|
|
static FORCE_INLINE int32_t klabs(int32_t a)
|
|
{
|
|
const uint32_t m = a >> (sizeof(uint32_t) * CHAR_BIT - 1);
|
|
return (a ^ m) - m;
|
|
}
|
|
#endif
|
|
#ifndef pragmas_have_ksgn
|
|
static FORCE_INLINE int32_t ksgn(int32_t a) { return (a > 0) - (a < 0); }
|
|
#endif
|
|
|
|
#ifndef pragmas_have_mulscale
|
|
static FORCE_INLINE int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) >> by(ecx)); }
|
|
static FORCE_INLINE int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx)
|
|
{
|
|
return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(ecx));
|
|
}
|
|
#endif
|
|
|
|
#ifndef pragmas_have_qinterpolatedown16
|
|
void qinterpolatedown16(intptr_t bufptr, int32_t num, int32_t val, int32_t add);
|
|
void qinterpolatedown16short(intptr_t bufptr, int32_t num, int32_t val, int32_t add);
|
|
#endif
|
|
|
|
#ifndef pragmas_have_clearbuf
|
|
void clearbuf(void *d, int32_t c, int32_t a);
|
|
#endif
|
|
#ifndef pragmas_have_copybuf
|
|
void copybuf(const void *s, void *d, int32_t c);
|
|
#endif
|
|
#ifndef pragmas_have_swaps
|
|
void swapbuf4(void *a, void *b, int32_t c);
|
|
#endif
|
|
|
|
#ifndef pragmas_have_clearbufbyte
|
|
void clearbufbyte(void *D, int32_t c, int32_t a);
|
|
#endif
|
|
#ifndef pragmas_have_copybufbyte
|
|
void copybufbyte(const void *S, void *D, int32_t c);
|
|
#endif
|
|
#ifndef pragmas_have_copybufreverse
|
|
void copybufreverse(const void *S, void *D, int32_t c);
|
|
#endif
|
|
|
|
#ifndef pragmas_have_krecipasm
|
|
static inline int32_t krecipasm(int32_t i)
|
|
{
|
|
// Ken did this
|
|
union { int32_t i; float f; } x;
|
|
x.f = (float)i;
|
|
i = x.i;
|
|
return ((reciptable[(i >> 12) & 2047] >> (((i - 0x3f800000) >> 23) & 31)) ^ (i >> 31));
|
|
}
|
|
#endif
|
|
|
|
#undef qw
|
|
#undef dw
|
|
#undef wo
|
|
#undef by
|
|
|
|
static inline void swapbufreverse(void *s, void *d, int32_t c)
|
|
{
|
|
uint8_t *src = (uint8_t *)s, *dst = (uint8_t *)d;
|
|
Bassert(c >= 4);
|
|
|
|
do
|
|
{
|
|
swapchar(dst, src);
|
|
swapchar(dst + 1, src - 1);
|
|
swapchar(dst + 2, src - 2);
|
|
swapchar(dst + 3, src - 3);
|
|
dst += 4, src -= 4;
|
|
} while ((c -= 4) > 4);
|
|
|
|
while (c--)
|
|
swapchar(dst++, src--);
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif // pragmas_h_
|