diff --git a/polymer/eduke32/build/include/pragmas.h b/polymer/eduke32/build/include/pragmas.h index 66884954c..b646f5e27 100644 --- a/polymer/eduke32/build/include/pragmas.h +++ b/polymer/eduke32/build/include/pragmas.h @@ -168,7 +168,11 @@ FORCE_INLINE int32_t sqr(int32_t a) { return a * a; } FORCE_INLINE int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_t edi) \ { \ return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(a)); \ - } + } \ + FORCE_INLINE int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \ + { \ + return dw(((qw(eax) * edx) + (qw(ebx) * ecx) + (qw(esi) * edi)) >> by(a)); \ + } \ EDUKE32_GENERATE_PRAGMAS EDUKE32_SCALER_PRAGMA(32) diff --git a/polymer/eduke32/build/include/pragmas_ppc.h b/polymer/eduke32/build/include/pragmas_ppc.h index 4ac849c0a..e52bfba97 100644 --- a/polymer/eduke32/build/include/pragmas_ppc.h +++ b/polymer/eduke32/build/include/pragmas_ppc.h @@ -37,7 +37,29 @@ static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \ : "xer" \ ); \ return sumlo; \ -} +} \ +static inline int32_t tmulscale##x(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \ +{ \ + int32_t mulhi, mullo, sumhi, sumlo; \ + __asm__( \ + " mullw %0, %4, %5\n" \ + " mulhw %1, %4, %5\n" \ + " mullw %2, %6, %7\n" \ + " mulhw %3, %6, %7\n" \ + " addc %0, %0, %2\n" \ + " adde %1, %1, %3\n" \ + " mullw %2, %8, %9\n" \ + " mulhw %3, %8, %9\n" \ + " addc %0, %0, %2\n" \ + " adde %1, %1, %3\n" \ + " srwi %0, %0, %10\n" \ + " insrwi %0, %1, %10, 0\n" \ + : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) \ + : "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D), "i"(x) \ + : "xer" \ + ); \ + return sumlo; \ +} \ EDUKE32_GENERATE_PRAGMAS #undef EDUKE32_SCALER_PRAGMA @@ -106,6 +128,27 @@ static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) return sumhi; } +static inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) +{ + int32_t mulhi, mullo, sumhi, sumlo; + __asm__( + " mullw %0, %4, %5\n" + " mulhw %1, %4, %5\n" + " mullw %2, %6, %7\n" + " mulhw %3, %6, %7\n" + " addc %0, %0, %2\n" + " adde %1, %1, %3\n" + " mullw %2, %8, %9\n" + " mulhw %3, %8, %9\n" + " addc %0, %0, %2\n" + " adde %1, %1, %3\n" + : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) + : "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D) + : "xer" + ); + return sumhi; +} + #define pragmas_have_klabs static inline int32_t klabs(int32_t a) diff --git a/polymer/eduke32/build/include/pragmas_x86_gcc.h b/polymer/eduke32/build/include/pragmas_x86_gcc.h index ee0ebee84..f65cdee15 100644 --- a/polymer/eduke32/build/include/pragmas_x86_gcc.h +++ b/polymer/eduke32/build/include/pragmas_x86_gcc.h @@ -447,6 +447,263 @@ : "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \ __d; }) +#define tmulscale1(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale2(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale3(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale4(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale5(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale6(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale7(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale8(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale9(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale10(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale11(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale12(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale13(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale14(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale15(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale16(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale17(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale18(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale19(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale20(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale21(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale22(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale23(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale24(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale25(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale26(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale27(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale28(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale29(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale30(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale31(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __a; }) +#define tmulscale32(a,d,b,c,S,D) \ + ({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \ + __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \ + "imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \ + "imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \ + : "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \ + : "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \ + __d; }) + #define pragmas_have_clearbuf #define clearbuf(D,c,a) \ diff --git a/polymer/eduke32/build/include/pragmas_x86_msvc.h b/polymer/eduke32/build/include/pragmas_x86_msvc.h index 6904b1c1b..4e7a32fbb 100644 --- a/polymer/eduke32/build/include/pragmas_x86_msvc.h +++ b/polymer/eduke32/build/include/pragmas_x86_msvc.h @@ -39,7 +39,23 @@ static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D _asm adc edx, esi \ _asm shrd eax, edx, x \ } \ - +static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \ +{ \ + _asm mov eax, a \ + _asm mov ebx, b \ + _asm imul d \ + _asm xchg eax, ebx \ + _asm mov ecx, c \ + _asm xchg edx, ecx \ + _asm imul edx \ + _asm add ebx, eax \ + _asm adc ecx, edx \ + _asm mov eax, S \ + _asm imul D \ + _asm add eax, ebx \ + _asm adc edx, ecx \ + _asm shrd eax, edx, x \ +} \ EDUKE32_GENERATE_PRAGMAS #undef EDUKE32_SCALER_PRAGMA @@ -84,6 +100,26 @@ static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) } } +static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) +{ + _asm { + mov eax, a + mov ebx, b + imul d + xchg eax, ebx + mov ecx, c + xchg edx, ecx + imul edx + add ebx, eax + adc ecx, edx + mov eax, S + imul D + add eax, ebx + adc edx, ecx + mov eax, edx + } +} + #define pragmas_have_clearbuf static __inline void clearbuf(void *d, int32_t c, int32_t a)