diff --git a/source/build/include/pragmas.h b/source/build/include/pragmas.h index 935498426..eec0a730b 100644 --- a/source/build/include/pragmas.h +++ b/source/build/include/pragmas.h @@ -118,27 +118,6 @@ static inline int32_t roundscale(int32_t eax, int32_t edx, int32_t ecx) return scaleadd(eax, edx, ecx / 2, ecx); } -#if defined(__GNUC__) && defined(GEKKO) - -// GCC Inline Assembler version (PowerPC) -#include "pragmas_ppc.h" - -#elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM) - -// GCC Inline Assembler version (x86) -#include "pragmas_x86_gcc.h" - -#elif defined(_MSC_VER) && !defined(NOASM) // __GNUC__ - -// Microsoft C inline assembler -#include "pragmas_x86_msvc.h" - -#elif defined(__arm__) // _MSC_VER - -// GCC Inline Assembler version (ARM) -#include "pragmas_arm.h" - -#endif // // Generic C @@ -185,51 +164,13 @@ static FORCE_INLINE void swapptr(void *a, void *b) #endif #ifndef pragmas_have_swaps -#ifdef __cplusplus #define swapchar swap #define swapshort swap #define swaplong swap #define swapfloat swap #define swapdouble swap #define swap64bit swap -#else -static FORCE_INLINE void swapchar(void *a, void *b) -{ - char const t = *(char *)b; - *(char *)b = *(char *)a; - *(char *)a = t; -} -static FORCE_INLINE void swapshort(void *a, void *b) -{ - int16_t const t = *(int16_t *)b; - *(int16_t *)b = *(int16_t *)a; - *(int16_t *)a = t; -} -static FORCE_INLINE void swaplong(void *a, void *b) -{ - int32_t const t = *(int32_t *)b; - *(int32_t *)b = *(int32_t *)a; - *(int32_t *)a = t; -} -static FORCE_INLINE void swapfloat(void *a, void *b) -{ - float const t = *(float *)b; - *(float *)b = *(float *)a; - *(float *)a = t; -} -static FORCE_INLINE void swapdouble(void *a, void *b) -{ - double const t = *(double *)b; - *(double *)b = *(double *)a; - *(double *)a = t; -} -static FORCE_INLINE void swap64bit(void *a, void *b) -{ - uint64_t const t = *(uint64_t *)b; - *(uint64_t *)b = *(uint64_t *)a; - *(uint64_t *)a = t; -} -#endif + static FORCE_INLINE void swapchar2(void *a, void *b, int32_t s) { swapchar((char *)a, (char *)b); diff --git a/source/build/include/pragmas_arm.h b/source/build/include/pragmas_arm.h deleted file mode 100644 index c052687b8..000000000 --- a/source/build/include/pragmas_arm.h +++ /dev/null @@ -1,10 +0,0 @@ -// -// GCC Inline Assembler version (ARM) -// - -#ifndef pragmas_arm_h_ -#define pragmas_arm_h_ - -// :( - -#endif diff --git a/source/build/include/pragmas_ppc.h b/source/build/include/pragmas_ppc.h deleted file mode 100644 index ac86158b2..000000000 --- a/source/build/include/pragmas_ppc.h +++ /dev/null @@ -1,186 +0,0 @@ -// GCC Inline Assembler version (PowerPC) - -#ifdef pragmas_h_ -#ifndef pragmas_ppc_h_ -#define pragmas_ppc_h_ - -#define pragmas_have_mulscale - -#define EDUKE32_SCALER_PRAGMA(x) \ -static inline int32_t mulscale##x(int32_t a, int32_t d) \ -{ \ - int32_t mullo, mulhi; \ - __asm__ ( \ - " mullw %0, %2, %3\n" \ - " mulhw %1, %2, %3\n" \ - " srwi %0, %0, %4\n" \ - " insrwi %0, %1, %4, 0\n" \ - : "=&r"(mullo), "=r"(mulhi) \ - : "r"(a), "r"(d), "i"(x) \ - ); \ - return mullo; \ -} \ -static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \ -{ \ - int32_t mulhi, mullo, sumhi, sumlo; \ - __asm__ ( \ - " mullw %0, %4, %5\n" \ - " mulhw %1, %4, %5\n" \ - " mullw %2, %6, %7\n" \ - " mulhw %3, %6, %7\n" \ - " addc %0, %0, %2\n" \ - " adde %1, %1, %3\n" \ - " srwi %0, %0, %8\n" \ - " insrwi %0, %1, %8, 0\n" \ - : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \ - : "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \ - : "xer" \ - ); \ - return sumlo; \ -} \ -static inline int32_t tmulscale##x(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \ -{ \ - int32_t mulhi, mullo, sumhi, sumlo; \ - __asm__( \ - " mullw %0, %4, %5\n" \ - " mulhw %1, %4, %5\n" \ - " mullw %2, %6, %7\n" \ - " mulhw %3, %6, %7\n" \ - " addc %0, %0, %2\n" \ - " adde %1, %1, %3\n" \ - " mullw %2, %8, %9\n" \ - " mulhw %3, %8, %9\n" \ - " addc %0, %0, %2\n" \ - " adde %1, %1, %3\n" \ - " srwi %0, %0, %10\n" \ - " insrwi %0, %1, %10, 0\n" \ - : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) \ - : "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D), "i"(x) \ - : "xer" \ - ); \ - return sumlo; \ -} \ - -EDUKE32_GENERATE_PRAGMAS -#undef EDUKE32_SCALER_PRAGMA - -static inline int32_t mulscale(int32_t a, int32_t d, int32_t c) -{ - int32_t mullo, mulhi; - __asm__( - " mullw %0, %2, %3\n" - " mulhw %1, %2, %3\n" - " srw %0, %0, %4\n" - " slw %1, %1, %5\n" - " or %0, %0, %1\n" - : "=&r"(mullo), "=&r"(mulhi) - : "r"(a), "r"(d), "r"(c), "r"(32-c) - : "xer" - ); - return mullo; -} - -static inline int32_t mulscale32(int32_t a, int32_t d) -{ - int32_t mulhi; - __asm__( - " mulhw %0, %1, %2\n" - : "=r"(mulhi) - : "r"(a), "r"(d) - ); - return mulhi; -} - -static inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c) -{ - int32_t mulhi, mullo, sumhi, sumlo; - __asm__( - " mullw %0, %4, %5\n" - " mulhw %1, %4, %5\n" - " mullw %2, %6, %7\n" - " mulhw %3, %6, %7\n" - " addc %0, %0, %2\n" - " adde %1, %1, %3\n" - " srw %0, %0, %8\n" - " slw %1, %1, %9\n" - " or %0, %0, %1\n" - : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) - : "r"(a), "r"(d), "r"(S), "r"(D), "r"(c), "r"(32-c) - : "xer" - ); - return sumlo; -} - -static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) -{ - int32_t mulhi, mullo, sumhi, sumlo; - __asm__(\ - " mullw %0, %4, %5\n" \ - " mulhw %1, %4, %5\n" \ - " mullw %2, %6, %7\n" \ - " mulhw %3, %6, %7\n" \ - " addc %0, %0, %2\n" \ - " adde %1, %1, %3\n" \ - : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) - : "r"(a), "r"(d), "r"(S), "r"(D) - : "xer" - ); - return sumhi; -} - -static inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) -{ - int32_t mulhi, mullo, sumhi, sumlo; - __asm__( - " mullw %0, %4, %5\n" - " mulhw %1, %4, %5\n" - " mullw %2, %6, %7\n" - " mulhw %3, %6, %7\n" - " addc %0, %0, %2\n" - " adde %1, %1, %3\n" - " mullw %2, %8, %9\n" - " mulhw %3, %8, %9\n" - " addc %0, %0, %2\n" - " adde %1, %1, %3\n" - : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) - : "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D) - : "xer" - ); - return sumhi; -} - -#define pragmas_have_klabs - -static inline int32_t klabs(int32_t a) -{ - int32_t mask; - __asm__( - " srawi %0, %1, 31\n" - " xor %1, %0, %1\n" - " subf %1, %0, %1\n" - : "=&r"(mask), "+r"(a) - : - : "xer" - ); - return a; -} - -#define pragmas_have_ksgn - -static inline int ksgn(int32_t a) -{ - int32_t s, t; - __asm__( - " neg %1, %2\n" - " srawi %0, %2, 31\n" - " srwi %1, %1, 31\n" - " or %1, %1, %0\n" - : "=r"(t), "=&r"(s) - : "r"(a) - : "xer" - ); - return s; -} - -#endif // pragmas_ppc_h_ -#endif // pragmas_h_ diff --git a/source/build/include/pragmas_x86_gcc.h b/source/build/include/pragmas_x86_gcc.h deleted file mode 100644 index 7d7be1fb5..000000000 --- a/source/build/include/pragmas_x86_gcc.h +++ /dev/null @@ -1,821 +0,0 @@ -// -// GCC Inline Assembler version (x86) -// - -//{{{ - -#ifdef pragmas_h_ -#ifndef pragmas_x86_h_ -#define pragmas_x86_h_ - -#define pragmas_have_mulscale - -#define mulscale(a,d,c) \ - ({ int32_t __a=(a), __d=(d), __c=(c); \ - __asm__ __volatile__ ("imull %%edx; shrdl %%cl, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d), "c" (__c) : "cc"); \ - __a; }) -#define mulscale1(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $1, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale2(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $2, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale3(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $3, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale4(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $4, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale5(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $5, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale6(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $6, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale7(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $7, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale8(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $8, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale9(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $9, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale10(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $10, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale11(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $11, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale12(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $12, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale13(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $13, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale14(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $14, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale15(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $15, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale16(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $16, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale17(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $17, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale18(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $18, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale19(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $19, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale20(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $20, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale21(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $21, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale22(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $22, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale23(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $23, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale24(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $24, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale25(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $25, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale26(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $26, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale27(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $27, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale28(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $28, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale29(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $29, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale30(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $30, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale31(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx; shrdl $31, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __a; }) -#define mulscale32(a,d) \ - ({ int32_t __a=(a), __d=(d); \ - __asm__ __volatile__ ("imull %%edx" \ - : "=a" (__a), "=d" (__d) \ - : "a" (__a), "d" (__d) : "cc"); \ - __d; }) - -#define dmulscale(a,d,S,D,c) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D), __c=(c); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl %%cl, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D), "c" (__c) : "ebx", "cc"); \ - __a; }) -#define dmulscale1(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $1, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale2(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $2, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale3(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $3, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale4(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $4, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale5(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $5, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale6(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $6, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale7(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $7, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale8(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $8, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale9(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $9, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale10(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $10, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale11(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $11, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale12(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $12, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale13(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $13, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale14(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $14, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale15(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $15, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale16(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $16, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale17(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $17, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale18(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $18, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale19(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $19, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale20(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $20, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale21(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $21, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale22(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $22, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale23(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $23, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale24(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $24, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale25(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $25, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale26(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $26, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale27(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $27, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale28(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $28, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale29(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $29, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale30(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $30, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale31(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $31, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __a; }) -#define dmulscale32(a,d,S,D) \ - ({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx" \ - : "=a" (__a), "=d" (__d), "=S" (__S) \ - : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ - __d; }) - -#define tmulscale1(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale2(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale3(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale4(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale5(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale6(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale7(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale8(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale9(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale10(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale11(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale12(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale13(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale14(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale15(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale16(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale17(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale18(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale19(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale20(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale21(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale22(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale23(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale24(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale25(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale26(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale27(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale28(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale29(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale30(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale31(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __a; }) -#define tmulscale32(a,d,b,c,S,D) \ - ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ - __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ - "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ - "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \ - : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ - : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ - __d; }) - -#define pragmas_have_clearbuf - -#define clearbuf(D,c,a) \ - ({ void *__D=(D); int32_t __c=(c), __a=(a); \ - __asm__ __volatile__ ("rep; stosl" \ - : "=&D" (__D), "=&c" (__c) : "0" (__D), "1" (__c), "a" (__a) : "memory", "cc"); \ - 0; }) - -#define pragmas_have_copybuf - -#define copybuf(S,D,c) \ - ({ const void *__S=(S), *__D=(D); int32_t __c=(c); \ - __asm__ __volatile__ ("rep; movsl" \ - : "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \ - 0; }) - -#define pragmas_have_klabs - -#define klabs(a) \ - ({ int32_t __a=(a); \ - __asm__ __volatile__ ("testl %%eax, %%eax; jns 0f; negl %%eax; 0:" \ - : "=a" (__a) : "a" (__a) : "cc"); \ - __a; }) - -#define pragmas_have_ksgn - -#define ksgn(b) \ - ({ int32_t __b=(b), __r; \ - __asm__ __volatile__ ("addl %%ebx, %%ebx; sbbl %%eax, %%eax; cmpl %%ebx, %%eax; adcb $0, %%al" \ - : "=a" (__r) : "b" (__b) : "cc"); \ - __r; }) - -#define pragmas_have_swaps - -#define swapchar(a,b) \ - ({ void *__a=(a), *__b=(b); \ - __asm__ __volatile__ ("movb (%%eax), %%cl; movb (%%ebx), %%ch; movb %%cl, (%%ebx); movb %%ch, (%%eax)" \ - : : "a" (__a), "b" (__b) : "ecx", "memory", "cc"); \ - 0; }) -#define swapshort(a,b) \ - ({ void *__a=(a), *__b=(b); \ - __asm__ __volatile__ ("movw (%%eax), %%cx; movw (%%ebx), %%dx; movw %%cx, (%%ebx); movw %%dx, (%%eax)" \ - : : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \ - 0; }) -#define swaplong(a,b) \ - ({ void *__a=(a), *__b=(b); \ - __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \ - : : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \ - 0; }) -#define swapfloat swaplong -#define swapbuf4(a,b,c) \ - ({ void *__a=(a), *__b=(b); int32_t __c=(c); \ - __asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \ - "movl %%edi, (%%eax); addl $4, %%eax; addl $4, %%ebx; decl %%ecx; jnz 0b" \ - : : "a" (__a), "b" (__b), "c" (__c) : "esi", "edi", "memory", "cc"); \ - 0; }) -#define swap64bit(a,b) \ - ({ void *__a=(a), *__b=(b); \ - __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); " \ - "movl 4(%%eax), %%ecx; movl %%edx, (%%eax); movl 4(%%ebx), %%edx; " \ - "movl %%ecx, 4(%%ebx); movl %%edx, 4(%%eax)" \ - : : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \ - 0; }) -#define swapdouble swap64bit -//swapchar2(ptr1,ptr2,xsiz); is the same as: -//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz); -#define swapchar2(a,b,S) \ - ({ void *__a=(a), *__b=(b); int32_t __S=(S); \ - __asm__ __volatile__ ("addl %%ebx, %%esi; movw (%%eax), %%cx; movb (%%ebx), %%dl; " \ - "movb %%cl, (%%ebx); movb (%%esi), %%dh; movb %%ch, (%%esi); " \ - "movw %%dx, (%%eax)" \ - : "=S" (__S) : "a" (__a), "b" (__b), "S" (__S) : "ecx", "edx", "memory", "cc"); \ - 0; }) - - -#define pragmas_have_qinterpolatedown16 - -#define qinterpolatedown16(a,c,d,S) \ - ({ void *__a=(void*)(a); int32_t __c=(c), __d=(d), __S=(S); \ - __asm__ __volatile__ ("movl %%ecx, %%ebx; shrl $1, %%ecx; jz 1f; " \ - "0: leal (%%edx,%%esi,), %%edi; sarl $16, %%edx; movl %%edx, (%%eax); " \ - "leal (%%edi,%%esi,), %%edx; sarl $16, %%edi; movl %%edi, 4(%%eax); " \ - "addl $8, %%eax; decl %%ecx; jnz 0b; testl $1, %%ebx; jz 2f; " \ - "1: sarl $16, %%edx; movl %%edx, (%%eax); 2:" \ - : "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \ - : "ebx", "edi", "memory", "cc"); \ - 0; }) - -#define qinterpolatedown16short(a,c,d,S) \ - ({ void *__a=(void*)(a); int32_t __c=(c), __d=(d), __S=(S); \ - __asm__ __volatile__ ("testl %%ecx, %%ecx; jz 3f; testb $2, %%al; jz 0f; movl %%edx, %%ebx; " \ - "sarl $16, %%ebx; movw %%bx, (%%eax); addl %%esi, %%edx; addl $2, %%eax; " \ - "decl %%ecx; jz 3f; " \ - "0: subl $2, %%ecx; jc 2f; " \ - "1: movl %%edx, %%ebx; addl %%esi, %%edx; sarl $16, %%ebx; movl %%edx, %%edi; " \ - "andl $0xffff0000, %%edi; addl %%esi, %%edx; addl %%edi, %%ebx; " \ - "movl %%ebx, (%%eax); addl $4, %%eax; subl $2, %%ecx; jnc 1b; testb $1, %%cl; " \ - "jz 3f; " \ - "2: movl %%edx, %%ebx; sarl $16, %%ebx; movw %%bx, (%%eax); 3:" \ - : "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \ - : "ebx", "edi", "memory", "cc"); \ - 0; }) - -#define pragmas_have_krecipasm - -#define krecipasm(a) \ - ({ int32_t __a=(a); \ - __asm__ __volatile__ ( \ - "movl %%eax, (" ASMSYM("fpuasm") "); fildl (" ASMSYM("fpuasm") "); " \ - "addl %%eax, %%eax; fstps (" ASMSYM("fpuasm") "); sbbl %%ebx, %%ebx; " \ - "movl (" ASMSYM("fpuasm") "), %%eax; movl %%eax, %%ecx; " \ - "andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \ - "shrl $23, %%ecx; movl " ASMSYM("reciptable") "(%%eax), %%eax; " \ - "sarl %%cl, %%eax; xorl %%ebx, %%eax" \ - : "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \ - __a; }) - -//}}} - -#endif // pragmas_x86_h_ -#endif // pragmas_h_ diff --git a/source/build/include/pragmas_x86_msvc.h b/source/build/include/pragmas_x86_msvc.h deleted file mode 100644 index 85b4c2cca..000000000 --- a/source/build/include/pragmas_x86_msvc.h +++ /dev/null @@ -1,492 +0,0 @@ -// -// Microsoft C inline assembler -// - -//{{{ - -#ifdef pragmas_h_ -#ifndef pragmas_x86_h_ -#define pragmas_x86_h_ - -#define pragmas_have_mulscale - -static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c) -{ - _asm { - mov ecx, c - mov eax, a - imul d - shrd eax, edx, cl - } -} - -#define EDUKE32_SCALER_PRAGMA(x) \ -static __inline int32_t mulscale##x (int32_t a, int32_t d) \ -{ \ - _asm mov eax, a \ - _asm imul d \ - _asm shrd eax, edx, x \ -} \ -static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \ -{ \ - _asm mov eax, a \ - _asm imul d \ - _asm mov ebx, eax \ - _asm mov eax, S \ - _asm mov esi, edx \ - _asm imul D \ - _asm add eax, ebx \ - _asm adc edx, esi \ - _asm shrd eax, edx, x \ -} \ -static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \ -{ \ - _asm mov eax, a \ - _asm mov ebx, b \ - _asm imul d \ - _asm xchg eax, ebx \ - _asm mov ecx, c \ - _asm xchg edx, ecx \ - _asm imul edx \ - _asm add ebx, eax \ - _asm adc ecx, edx \ - _asm mov eax, S \ - _asm imul D \ - _asm add eax, ebx \ - _asm adc edx, ecx \ - _asm shrd eax, edx, x \ -} \ - -EDUKE32_GENERATE_PRAGMAS -#undef EDUKE32_SCALER_PRAGMA - -static __inline int32_t mulscale32(int32_t a, int32_t d) -{ - _asm { - mov eax, a - imul d - mov eax, edx - } -} - -static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c) -{ - _asm { - mov ecx, c - mov eax, a - imul d - mov ebx, eax - mov eax, S - mov esi, edx - imul D - add eax, ebx - adc edx, esi - shrd eax, edx, cl - } -} - -static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) -{ - _asm { - mov eax, a - imul d - mov ebx, eax - mov eax, S - mov esi, edx - imul D - add eax, ebx - adc edx, esi - mov eax, edx - } -} - -static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) -{ - _asm { - mov eax, a - mov ebx, b - imul d - xchg eax, ebx - mov ecx, c - xchg edx, ecx - imul edx - add ebx, eax - adc ecx, edx - mov eax, S - imul D - add eax, ebx - adc edx, ecx - mov eax, edx - } -} - -#define pragmas_have_clearbuf - -static __inline void clearbuf(void *d, int32_t c, int32_t a) -{ - _asm { - mov edi, d - mov ecx, c - mov eax, a - rep stosd - } -} - -#define pragmas_have_clearbufbyte - -static __inline void clearbufbyte(void *d, int32_t c, int32_t a) -{ - _asm { - mov edi, d - mov ecx, c - mov eax, a - cmp ecx, 4 - jae longcopy - test cl, 1 - jz preskip - stosb - preskip : - shr ecx, 1 - rep stosw - jmp endit - longcopy : - test edi, 1 - jz skip1 - stosb - dec ecx - skip1 : - test edi, 2 - jz skip2 - stosw - sub ecx, 2 - skip2 : - mov ebx, ecx - shr ecx, 2 - rep stosd - test bl, 2 - jz skip3 - stosw - skip3 : - test bl, 1 - jz endit - stosb - endit : - } -} - -#define pragmas_have_copybuf - -static __inline void copybuf(const void *s, void *d, int32_t c) -{ - _asm { - mov esi, s - mov edi, d - mov ecx, c - rep movsd - } -} - -#define pragmas_have_copybufbyte - -static __inline void copybufbyte(const void *s, void *d, int32_t c) -{ - _asm { - mov esi, s - mov edi, d - mov ecx, c - cmp ecx, 4 - jae longcopy - test cl, 1 - jz preskip - movsb - preskip : - shr ecx, 1 - rep movsw - jmp endit - longcopy : - test edi, 1 - jz skip1 - movsb - dec ecx - skip1 : - test edi, 2 - jz skip2 - movsw - sub ecx, 2 - skip2 : - mov ebx, ecx - shr ecx, 2 - rep movsd - test bl, 2 - jz skip3 - movsw - skip3 : - test bl, 1 - jz endit - movsb - endit : - } -} - -#define pragmas_have_copybufreverse - -static __inline void copybufreverse(const void *s, void *d, int32_t c) -{ - _asm { - mov esi, s - mov edi, d - mov ecx, c - shr ecx, 1 - jnc skipit1 - mov al, byte ptr[esi] - dec esi - mov byte ptr[edi], al - inc edi - skipit1 : - shr ecx, 1 - jnc skipit2 - mov ax, word ptr[esi-1] - sub esi, 2 - ror ax, 8 - mov word ptr[edi], ax - add edi, 2 - skipit2: - test ecx, ecx - jz endloop - begloop : - mov eax, dword ptr[esi-3] - sub esi, 4 - bswap eax - mov dword ptr[edi], eax - add edi, 4 - dec ecx - jnz begloop - endloop : - } -} - -#define pragmas_have_qinterpolatedown16 - -static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s) -{ - _asm { - mov eax, a - mov ecx, c - mov edx, d - mov esi, s - mov ebx, ecx - shr ecx, 1 - jz skipbegcalc - begqcalc : - lea edi, [edx+esi] - sar edx, 16 - mov dword ptr[eax], edx - lea edx, [edi+esi] - sar edi, 16 - mov dword ptr[eax+4], edi - add eax, 8 - dec ecx - jnz begqcalc - test ebx, 1 - jz skipbegqcalc2 - skipbegcalc : - sar edx, 16 - mov dword ptr[eax], edx - skipbegqcalc2 : - } -} - -static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s) -{ - _asm { - mov eax, a - mov ecx, c - mov edx, d - mov esi, s - test ecx, ecx - jz endit - test al, 2 - jz skipalignit - mov ebx, edx - sar ebx, 16 - mov word ptr[eax], bx - add edx, esi - add eax, 2 - dec ecx - jz endit - skipalignit : - sub ecx, 2 - jc finishit - begqcalc : - mov ebx, edx - add edx, esi - sar ebx, 16 - mov edi, edx - and edi, 0ffff0000h - add edx, esi - add ebx, edi - mov dword ptr[eax], ebx - add eax, 4 - sub ecx, 2 - jnc begqcalc - test cl, 1 - jz endit - finishit : - mov ebx, edx - sar ebx, 16 - mov word ptr[eax], bx - endit : - } -} - -#define pragmas_have_klabs - -static __inline int32_t klabs(int32_t a) -{ - _asm { - mov eax, a - test eax, eax - jns skipnegate - neg eax - skipnegate : - } -} - -#define pragmas_have_ksgn - -static __inline int ksgn(int32_t b) -{ - _asm { - mov ebx, b - add ebx, ebx - sbb eax, eax - cmp eax, ebx - adc al, 0 - } -} - -#define pragmas_have_swaps - -static __inline void swapchar(void *a, void *b) -{ - _asm { - mov eax, a - mov ebx, b - mov cl, [eax] - mov ch, [ebx] - mov[ebx], cl - mov[eax], ch - } -} - -static __inline void swapshort(void *a, void *b) -{ - _asm { - mov eax, a - mov ebx, b - mov cx, [eax] - mov dx, [ebx] - mov[ebx], cx - mov[eax], dx - } -} - -static __inline void swaplong(void *a, void *b) -{ - _asm { - mov eax, a - mov ebx, b - mov ecx, [eax] - mov edx, [ebx] - mov[ebx], ecx - mov[eax], edx - } -} - -#define swapfloat swaplong - -static __inline void swapbuf4(void *a, void *b, int32_t c) -{ - _asm { - mov eax, a - mov ebx, b - mov ecx, c - begswap : - mov esi, [eax] - mov edi, [ebx] - mov[ebx], esi - mov[eax], edi - add eax, 4 - add ebx, 4 - dec ecx - jnz short begswap - } -} - -static __inline void swap64bit(void *a, void *b) -{ - _asm { - mov eax, a - mov ebx, b - mov ecx, [eax] - mov edx, [ebx] - mov[ebx], ecx - mov ecx, [eax+4] - mov[eax], edx - mov edx, [ebx+4] - mov[ebx+4], ecx - mov[eax+4], edx - } -} - -#define swapdouble swap64bit - -//swapchar2(ptr1,ptr2,xsiz); is the same as: -//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz); -static __inline void swapchar2(void *a, void *b, int32_t s) -{ - _asm { - mov eax, a - mov ebx, b - mov esi, s - add esi, ebx - mov cx, [eax] - mov dl, [ebx] - mov[ebx], cl - mov dh, [esi] - mov[esi], ch - mov[eax], dx - } -} - -#define pragmas_have_krecipasm - -//0x007ff000 is (11<<13), 0x3f800000 is (127<<23) -static inline int32_t krecipasm(int32_t a) -{ - _asm - { - push ebx - mov eax, a - mov fpuasm, eax - fild dword ptr fpuasm - add eax, eax - fstp dword ptr fpuasm - sbb ebx, ebx - mov eax, fpuasm - mov ecx, eax - and eax, 0x007ff000 - shr eax, 10 - sub ecx, 0x3f800000 - shr ecx, 23 - mov eax, dword ptr reciptable[eax] - sar eax, cl - xor eax, ebx - pop ebx - } -} - -//}}} - -#endif // pragmas_x86_h_ -#endif // pragmas_h_