diff --git a/polymer/eduke32/build/include/pragmas.h b/polymer/eduke32/build/include/pragmas.h index e54e2f6c7..1dc8810cd 100644 --- a/polymer/eduke32/build/include/pragmas.h +++ b/polymer/eduke32/build/include/pragmas.h @@ -101,10 +101,6 @@ static inline int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_ return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(a)); \ } \ \ -static inline int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \ -{ \ - return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> by(a)); \ -} \ PRAGMA_FUNCS _scaler(32) @@ -114,6 +110,7 @@ static inline void swapchar(void* a, void* b) { char t = *((char*)b); *((char*) static inline void swapchar2(void* a, void* b, int32_t s) { swapchar(a,b); swapchar((char*)a+1,(char*)b+s); } static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*)b); *((int16_t*)b) = *((int16_t*)a); *((int16_t*)a) = t; } static inline void swaplong(void* a, void* b) { int32_t t = *((int32_t*)b); *((int32_t*)b) = *((int32_t*)a); *((int32_t*)a) = t; } +static inline void swapfloat(void* a, void* b) { float t = *((float*)b); *((float*)b) = *((float*)a); *((float*)a) = t; } static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*)b); *((int64_t*)b) = *((int64_t*)a); *((int64_t*)a) = t; } static inline char readpixel(void* s) { return (*((char*)(s))); } @@ -121,10 +118,6 @@ static inline void drawpixel(void* s, char a) { *((char*)(s)) = a; } static inline void drawpixels(void* s, int16_t a) { *((int16_t*)(s)) = a; } static inline void drawpixelses(void* s, int32_t a) { *((int32_t*)(s)) = a; } -static inline int32_t mul3(int32_t a) { return (a<<1)+a; } -static inline int32_t mul5(int32_t a) { return (a<<2)+a; } -static inline int32_t mul9(int32_t a) { return (a<<3)+a; } - static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a%_b; return _a/_b; } static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a/_b; return _a%_b; } @@ -137,9 +130,9 @@ static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b) static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b) return b; return a; } static inline int32_t sqr(int32_t eax) { return (eax) * (eax); } -static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) / qw(ecx)); } -static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) >> by(ecx)); } -static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(ecx)); } +static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) / ecx); } +static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) >> by(ecx)); } +static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(ecx)); } void qinterpolatedown16 (intptr_t bufptr, int32_t num, int32_t val, int32_t add); void qinterpolatedown16short (intptr_t bufptr, int32_t num, int32_t val, int32_t add); diff --git a/polymer/eduke32/build/include/pragmas_arm.h b/polymer/eduke32/build/include/pragmas_arm.h index df276874a..4714046ca 100644 --- a/polymer/eduke32/build/include/pragmas_arm.h +++ b/polymer/eduke32/build/include/pragmas_arm.h @@ -5,20 +5,16 @@ #ifndef __pragmas_arm_h__ #define __pragmas_arm_h__ +// TODO: implement libdivide.h #define _scaler(a) \ static inline int32_t mulscale##a(int32_t eax, int32_t edx) \ { \ - return dw((qw(eax) * qw(edx)) >> by(a)); \ + return dw((qw(eax) * edx) >> by(a)); \ } \ \ static inline int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_t edi) \ { \ - return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(a)); \ -} \ -\ -static inline int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \ -{ \ - return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> by(a)); \ + return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(a)); \ } \ PRAGMA_FUNCS _scaler(32) @@ -29,6 +25,7 @@ static inline void swapchar(void* a, void* b) { char t = *((char*) b); *((char* static inline void swapchar2(void* a, void* b, int32_t s) { swapchar(a, b); swapchar((char*) a+1, (char*) b+s); } static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*) b); *((int16_t*) b) = *((int16_t*) a); *((int16_t*) a) = t; } static inline void swaplong(void* a, void* b) { int32_t t = *((int32_t*) b); *((int32_t*) b) = *((int32_t*) a); *((int32_t*) a) = t; } +static inline void swapfloat(void* a, void* b) { float t = *((float*) b); *((float*) b) = *((float*) a); *((float*) a) = t; } static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*) b); *((int64_t*) b) = *((int64_t*) a); *((int64_t*) a) = t; } static inline char readpixel(void* s) { return (*((char*) (s))); } @@ -36,10 +33,6 @@ static inline void drawpixel(void* s, char a) { *((char*) (s)) = a; } static inline void drawpixels(void* s, int16_t a) { *((int16_t*) (s)) = a; } static inline void drawpixelses(void* s, int32_t a) { *((int32_t*) (s)) = a; } -static inline int32_t mul3(int32_t a) { return (a<<1)+a; } -static inline int32_t mul5(int32_t a) { return (a<<2)+a; } -static inline int32_t mul9(int32_t a) { return (a<<3)+a; } - static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a%_b; return _a/_b; } static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a/_b; return _a%_b; } diff --git a/polymer/eduke32/build/include/pragmas_ppc.h b/polymer/eduke32/build/include/pragmas_ppc.h index 5458489c4..5e7462510 100644 --- a/polymer/eduke32/build/include/pragmas_ppc.h +++ b/polymer/eduke32/build/include/pragmas_ppc.h @@ -8,6 +8,42 @@ int32_t scale(int32_t a, int32_t d, int32_t c); +#define _scaler(x) \ +static inline int32_t mulscale##x(int32_t a, int32_t d) \ +{ \ + int32_t mullo, mulhi; \ + __asm__ ( \ + " mullw %0, %2, %3\n" \ + " mulhw %1, %2, %3\n" \ + " srwi %0, %0, %4\n" \ + " insrwi %0, %1, %4, 0\n" \ + : "=&r"(mullo), "=r"(mulhi) \ + : "r"(a), "r"(d), "i"(x) \ + ); \ + return mullo; \ +} \ +static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \ +{ \ + int32_t mulhi, mullo, sumhi, sumlo; \ + __asm__ ( \ + " mullw %0, %4, %5\n" \ + " mulhw %1, %4, %5\n" \ + " mullw %2, %6, %7\n" \ + " mulhw %3, %6, %7\n" \ + " addc %0, %0, %2\n" \ + " adde %1, %1, %3\n" \ + " srwi %0, %0, %8\n" \ + " insrwi %0, %1, %8, 0\n" \ + : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \ + : "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \ + : "xer" \ + ); \ + return sumlo; \ +} + +PRAGMA_FUNCS +#undef _scaler + static inline int32_t mulscale(int32_t a, int32_t d, int32_t c) { int32_t mullo, mulhi; @@ -24,24 +60,6 @@ static inline int32_t mulscale(int32_t a, int32_t d, int32_t c) return mullo; } -#define _scaler(x) \ -static inline int32_t mulscale##x(int32_t a, int32_t d) \ -{ \ - int32_t mullo, mulhi; \ - __asm__ ( \ - " mullw %0, %2, %3\n" \ - " mulhw %1, %2, %3\n" \ - " srwi %0, %0, %4\n" \ - " insrwi %0, %1, %4, 0\n" \ - : "=&r"(mullo), "=r"(mulhi) \ - : "r"(a), "r"(d), "i"(x) \ - ); \ - return mullo; \ -} - -PRAGMA_FUNCS -#undef _scaler - static inline int32_t mulscale32(int32_t a, int32_t d) { int32_t mulhi; @@ -73,29 +91,6 @@ static inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int3 return sumlo; } -#define _scaler(x) \ -static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \ -{ \ - int32_t mulhi, mullo, sumhi, sumlo; \ - __asm__ ( \ - " mullw %0, %4, %5\n" \ - " mulhw %1, %4, %5\n" \ - " mullw %2, %6, %7\n" \ - " mulhw %3, %6, %7\n" \ - " addc %0, %0, %2\n" \ - " adde %1, %1, %3\n" \ - " srwi %0, %0, %8\n" \ - " insrwi %0, %1, %8, 0\n" \ - : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \ - : "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \ - : "xer" \ - ); \ - return sumlo; \ -} - -PRAGMA_FUNCS -#undef _scaler - static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) { int32_t mulhi, mullo, sumhi, sumlo; @@ -113,30 +108,6 @@ static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) return sumhi; } -// tmulscale only seems to be used in one place... -static inline int32_t tmulscale11(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) -{ - int32_t mulhi, mullo, sumhi, sumlo; - __asm__( - " mullw %0, %4, %5\n" \ - " mulhw %1, %4, %5\n" \ - " mullw %2, %6, %7\n" \ - " mulhw %3, %6, %7\n" \ - " addc %0, %0, %2\n" \ - " adde %1, %1, %3\n" \ - " mullw %2, %8, %9\n" \ - " mulhw %3, %8, %9\n" \ - " addc %0, %0, %2\n" \ - " adde %1, %1, %3\n" \ - " srwi %0, %0, 11\n" \ - " insrwi %0, %1, 11, 0\n" \ - : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) - : "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D) - : "xer" - ); - return sumlo; -} - static inline char readpixel(void *d) { return *(char*) d; @@ -225,21 +196,6 @@ static inline void qinterpolatedown16short(intptr_t bufptr, int32_t num, int32_t } } -static inline int32_t mul3(int32_t a) -{ - return (a<<1)+a; -} - -static inline int32_t mul5(int32_t a) -{ - return (a<<2)+a; -} - -static inline int32_t mul9(int32_t a) -{ - return (a<<3)+a; -} - static inline int32_t klabs(int32_t a) { int32_t mask; @@ -296,6 +252,13 @@ static inline void swaplong(void *a, void *b) *(int32_t*) b = t; } +static inline void swapfloat(void *a, void *b) +{ + float t = *(float*) a; + *(float*) a = *(float*) b; + *(float*) b = t; +} + static inline void swap64bit(void *a, void *b) { double t = *(double*) a; diff --git a/polymer/eduke32/build/include/pragmas_x86_gcc.h b/polymer/eduke32/build/include/pragmas_x86_gcc.h index 0cbb45c77..3ea80bd62 100644 --- a/polymer/eduke32/build/include/pragmas_x86_gcc.h +++ b/polymer/eduke32/build/include/pragmas_x86_gcc.h @@ -471,263 +471,6 @@ void copybufreverse(const void *S, void *D, int32_t c); : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ __d; }) -#define tmulscale1(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale2(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale3(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale4(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale5(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale6(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale7(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale8(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale9(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale10(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale11(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale12(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale13(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale14(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale15(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale16(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale17(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale18(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale19(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale20(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale21(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale22(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale23(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale24(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale25(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale26(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale27(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale28(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale29(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale30(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale31(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale32(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __d; }) - #ifdef USE_ASM_DIVSCALE #define divscale(a,b,c) \ ({ int32_t __a=(a), __b=(b), __c=(c); \ @@ -927,22 +670,6 @@ void copybufreverse(const void *S, void *D, int32_t c); : "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \ 0; }) -#define mul3(a) \ - ({ int32_t __a=(a), __r; \ - __asm__ __volatile__ ("lea (%1,%1,2), %0" \ - : "=r" (__r) : "0" (__a) : "cc"); \ - __r; }) -#define mul5(a) \ - ({ int32_t __a=(a), __r; \ - __asm__ __volatile__ ("lea (%1,%1,4), %0" \ - : "=r" (__r) : "0" (__a) : "cc"); \ - __r; }) -#define mul9(a) \ - ({ int32_t __a=(a), __r; \ - __asm__ __volatile__ ("lea (%1,%1,8), %0" \ - : "=r" (__r) : "0" (__a) : "cc"); \ - __r; }) - //returns eax/ebx, dmval = eax%edx; #define divmod(a,b) \ ({ int32_t __a=(a), __b=(b); \ @@ -1004,6 +731,7 @@ void copybufreverse(const void *S, void *D, int32_t c); __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \ : : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \ 0; }) +#define swapfloat swaplong #define swapbuf4(a,b,c) \ ({ void *__a=(a), *__b=(b); int32_t __c=(c); \ __asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \ diff --git a/polymer/eduke32/build/include/pragmas_x86_msvc.h b/polymer/eduke32/build/include/pragmas_x86_msvc.h index 43fd20a8c..1461e6116 100644 --- a/polymer/eduke32/build/include/pragmas_x86_msvc.h +++ b/polymer/eduke32/build/include/pragmas_x86_msvc.h @@ -35,23 +35,30 @@ static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c) } } -#define MULSCALE(x) \ +#define _scaler(x) \ static __inline int32_t mulscale##x (int32_t a, int32_t d) \ { \ _asm mov eax, a \ _asm imul d \ _asm shrd eax, edx, x \ -} +} \ +static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \ +{ \ + _asm mov eax, a \ + _asm imul d \ + _asm mov ebx, eax \ + _asm mov eax, S \ + _asm mov esi, edx \ + _asm imul D \ + _asm add eax, ebx \ + _asm adc edx, esi \ + _asm shrd eax, edx, x \ +} \ + + +PRAGMA_FUNCS +#undef _scaler -MULSCALE(1) MULSCALE(2) MULSCALE(3) MULSCALE(4) -MULSCALE(5) MULSCALE(6) MULSCALE(7) MULSCALE(8) -MULSCALE(9) MULSCALE(10) MULSCALE(11) MULSCALE(12) -MULSCALE(13) MULSCALE(14) MULSCALE(15) MULSCALE(16) -MULSCALE(17) MULSCALE(18) MULSCALE(19) MULSCALE(20) -MULSCALE(21) MULSCALE(22) MULSCALE(23) MULSCALE(24) -MULSCALE(25) MULSCALE(26) MULSCALE(27) MULSCALE(28) -MULSCALE(29) MULSCALE(30) MULSCALE(31) -#undef MULSCALE static __inline int32_t mulscale32(int32_t a, int32_t d) { _asm { @@ -77,29 +84,6 @@ static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, in } } -#define DMULSCALE(x) \ -static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \ -{ \ - _asm mov eax, a \ - _asm imul d \ - _asm mov ebx, eax \ - _asm mov eax, S \ - _asm mov esi, edx \ - _asm imul D \ - _asm add eax, ebx \ - _asm adc edx, esi \ - _asm shrd eax, edx, x \ -} - -DMULSCALE(1) DMULSCALE(2) DMULSCALE(3) DMULSCALE(4) -DMULSCALE(5) DMULSCALE(6) DMULSCALE(7) DMULSCALE(8) -DMULSCALE(9) DMULSCALE(10) DMULSCALE(11) DMULSCALE(12) -DMULSCALE(13) DMULSCALE(14) DMULSCALE(15) DMULSCALE(16) -DMULSCALE(17) DMULSCALE(18) DMULSCALE(19) DMULSCALE(20) -DMULSCALE(21) DMULSCALE(22) DMULSCALE(23) DMULSCALE(24) -DMULSCALE(25) DMULSCALE(26) DMULSCALE(27) DMULSCALE(28) -DMULSCALE(29) DMULSCALE(30) DMULSCALE(31) -#undef DMULSCALE static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) { _asm { @@ -115,54 +99,6 @@ static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) } } -#define TMULSCALE(x) \ -static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \ -{ \ - _asm mov eax, a \ - _asm mov ebx, b \ - _asm imul d \ - _asm xchg eax, ebx \ - _asm mov ecx, c \ - _asm xchg edx, ecx \ - _asm imul edx \ - _asm add ebx, eax \ - _asm adc ecx, edx \ - _asm mov eax, S \ - _asm imul D \ - _asm add eax, ebx \ - _asm adc edx, ecx \ - _asm shrd eax, edx, x \ -} - -TMULSCALE(1) TMULSCALE(2) TMULSCALE(3) TMULSCALE(4) -TMULSCALE(5) TMULSCALE(6) TMULSCALE(7) TMULSCALE(8) -TMULSCALE(9) TMULSCALE(10) TMULSCALE(11) TMULSCALE(12) -TMULSCALE(13) TMULSCALE(14) TMULSCALE(15) TMULSCALE(16) -TMULSCALE(17) TMULSCALE(18) TMULSCALE(19) TMULSCALE(20) -TMULSCALE(21) TMULSCALE(22) TMULSCALE(23) TMULSCALE(24) -TMULSCALE(25) TMULSCALE(26) TMULSCALE(27) TMULSCALE(28) -TMULSCALE(29) TMULSCALE(30) TMULSCALE(31) -#undef TMULSCALE -static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) -{ - _asm { - mov eax, a - mov ebx, b - imul d - xchg eax, ebx - mov ecx, c - xchg edx, ecx - imul edx - add ebx, eax - adc ecx, edx - mov eax, S - imul D - add eax, ebx - adc edx, ecx - mov eax, edx - } -} - #ifdef USE_ASM_DIVSCALE static __inline int32_t divscale(int32_t a, int32_t b, int32_t c) { @@ -479,30 +415,6 @@ static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, in } } -static __inline int32_t mul3(int32_t a) -{ - _asm { - mov eax, a - lea eax, [eax+eax*2] - } -} - -static __inline int32_t mul5(int32_t a) -{ - _asm { - mov eax, a - lea eax, [eax+eax*4] - } -} - -static __inline int32_t mul9(int32_t a) -{ - _asm { - mov eax, a - lea eax, [eax+eax*8] - } -} - //returns eax/ebx, dmval = eax%edx; static __inline int32_t divmod(int32_t a, int32_t b) { @@ -633,6 +545,8 @@ static __inline void swaplong(void *a, void *b) } } +#define swapfloat swaplong + static __inline void swapbuf4(void *a, void *b, int32_t c) { _asm {