- removed all inline assembly crap.

Since no division function was handled here this is totally pointless on modern systems and will even on 32 bit cause performance degradation due to lack of optimizability.
The rest of that code in pragmas.h and pragmas.cpp should probably also be put under review. Much of it made sense 20 years ago but not anymore.
This commit is contained in:
Christoph Oelckers 2019-09-08 21:19:23 +02:00
parent b389c99bf5
commit 6299d2fe86
5 changed files with 1 additions and 1569 deletions

View file

@ -118,27 +118,6 @@ static inline int32_t roundscale(int32_t eax, int32_t edx, int32_t ecx)
return scaleadd(eax, edx, ecx / 2, ecx);
}
#if defined(__GNUC__) && defined(GEKKO)
// GCC Inline Assembler version (PowerPC)
#include "pragmas_ppc.h"
#elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM)
// GCC Inline Assembler version (x86)
#include "pragmas_x86_gcc.h"
#elif defined(_MSC_VER) && !defined(NOASM) // __GNUC__
// Microsoft C inline assembler
#include "pragmas_x86_msvc.h"
#elif defined(__arm__) // _MSC_VER
// GCC Inline Assembler version (ARM)
#include "pragmas_arm.h"
#endif
//
// Generic C
@ -185,51 +164,13 @@ static FORCE_INLINE void swapptr(void *a, void *b)
#endif
#ifndef pragmas_have_swaps
#ifdef __cplusplus
#define swapchar swap
#define swapshort swap
#define swaplong swap
#define swapfloat swap
#define swapdouble swap
#define swap64bit swap
#else
static FORCE_INLINE void swapchar(void *a, void *b)
{
char const t = *(char *)b;
*(char *)b = *(char *)a;
*(char *)a = t;
}
static FORCE_INLINE void swapshort(void *a, void *b)
{
int16_t const t = *(int16_t *)b;
*(int16_t *)b = *(int16_t *)a;
*(int16_t *)a = t;
}
static FORCE_INLINE void swaplong(void *a, void *b)
{
int32_t const t = *(int32_t *)b;
*(int32_t *)b = *(int32_t *)a;
*(int32_t *)a = t;
}
static FORCE_INLINE void swapfloat(void *a, void *b)
{
float const t = *(float *)b;
*(float *)b = *(float *)a;
*(float *)a = t;
}
static FORCE_INLINE void swapdouble(void *a, void *b)
{
double const t = *(double *)b;
*(double *)b = *(double *)a;
*(double *)a = t;
}
static FORCE_INLINE void swap64bit(void *a, void *b)
{
uint64_t const t = *(uint64_t *)b;
*(uint64_t *)b = *(uint64_t *)a;
*(uint64_t *)a = t;
}
#endif
static FORCE_INLINE void swapchar2(void *a, void *b, int32_t s)
{
swapchar((char *)a, (char *)b);

View file

@ -1,10 +0,0 @@
//
// GCC Inline Assembler version (ARM)
//
#ifndef pragmas_arm_h_
#define pragmas_arm_h_
// :(
#endif

View file

@ -1,186 +0,0 @@
// GCC Inline Assembler version (PowerPC)
#ifdef pragmas_h_
#ifndef pragmas_ppc_h_
#define pragmas_ppc_h_
#define pragmas_have_mulscale
#define EDUKE32_SCALER_PRAGMA(x) \
static inline int32_t mulscale##x(int32_t a, int32_t d) \
{ \
int32_t mullo, mulhi; \
__asm__ ( \
" mullw %0, %2, %3\n" \
" mulhw %1, %2, %3\n" \
" srwi %0, %0, %4\n" \
" insrwi %0, %1, %4, 0\n" \
: "=&r"(mullo), "=r"(mulhi) \
: "r"(a), "r"(d), "i"(x) \
); \
return mullo; \
} \
static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
int32_t mulhi, mullo, sumhi, sumlo; \
__asm__ ( \
" mullw %0, %4, %5\n" \
" mulhw %1, %4, %5\n" \
" mullw %2, %6, %7\n" \
" mulhw %3, %6, %7\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
" srwi %0, %0, %8\n" \
" insrwi %0, %1, %8, 0\n" \
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \
: "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \
: "xer" \
); \
return sumlo; \
} \
static inline int32_t tmulscale##x(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \
{ \
int32_t mulhi, mullo, sumhi, sumlo; \
__asm__( \
" mullw %0, %4, %5\n" \
" mulhw %1, %4, %5\n" \
" mullw %2, %6, %7\n" \
" mulhw %3, %6, %7\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
" mullw %2, %8, %9\n" \
" mulhw %3, %8, %9\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
" srwi %0, %0, %10\n" \
" insrwi %0, %1, %10, 0\n" \
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) \
: "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D), "i"(x) \
: "xer" \
); \
return sumlo; \
} \
EDUKE32_GENERATE_PRAGMAS
#undef EDUKE32_SCALER_PRAGMA
static inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
{
int32_t mullo, mulhi;
__asm__(
" mullw %0, %2, %3\n"
" mulhw %1, %2, %3\n"
" srw %0, %0, %4\n"
" slw %1, %1, %5\n"
" or %0, %0, %1\n"
: "=&r"(mullo), "=&r"(mulhi)
: "r"(a), "r"(d), "r"(c), "r"(32-c)
: "xer"
);
return mullo;
}
static inline int32_t mulscale32(int32_t a, int32_t d)
{
int32_t mulhi;
__asm__(
" mulhw %0, %1, %2\n"
: "=r"(mulhi)
: "r"(a), "r"(d)
);
return mulhi;
}
static inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)
{
int32_t mulhi, mullo, sumhi, sumlo;
__asm__(
" mullw %0, %4, %5\n"
" mulhw %1, %4, %5\n"
" mullw %2, %6, %7\n"
" mulhw %3, %6, %7\n"
" addc %0, %0, %2\n"
" adde %1, %1, %3\n"
" srw %0, %0, %8\n"
" slw %1, %1, %9\n"
" or %0, %0, %1\n"
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi)
: "r"(a), "r"(d), "r"(S), "r"(D), "r"(c), "r"(32-c)
: "xer"
);
return sumlo;
}
static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
{
int32_t mulhi, mullo, sumhi, sumlo;
__asm__(\
" mullw %0, %4, %5\n" \
" mulhw %1, %4, %5\n" \
" mullw %2, %6, %7\n" \
" mulhw %3, %6, %7\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi)
: "r"(a), "r"(d), "r"(S), "r"(D)
: "xer"
);
return sumhi;
}
static inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
{
int32_t mulhi, mullo, sumhi, sumlo;
__asm__(
" mullw %0, %4, %5\n"
" mulhw %1, %4, %5\n"
" mullw %2, %6, %7\n"
" mulhw %3, %6, %7\n"
" addc %0, %0, %2\n"
" adde %1, %1, %3\n"
" mullw %2, %8, %9\n"
" mulhw %3, %8, %9\n"
" addc %0, %0, %2\n"
" adde %1, %1, %3\n"
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi)
: "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D)
: "xer"
);
return sumhi;
}
#define pragmas_have_klabs
static inline int32_t klabs(int32_t a)
{
int32_t mask;
__asm__(
" srawi %0, %1, 31\n"
" xor %1, %0, %1\n"
" subf %1, %0, %1\n"
: "=&r"(mask), "+r"(a)
:
: "xer"
);
return a;
}
#define pragmas_have_ksgn
static inline int ksgn(int32_t a)
{
int32_t s, t;
__asm__(
" neg %1, %2\n"
" srawi %0, %2, 31\n"
" srwi %1, %1, 31\n"
" or %1, %1, %0\n"
: "=r"(t), "=&r"(s)
: "r"(a)
: "xer"
);
return s;
}
#endif // pragmas_ppc_h_
#endif // pragmas_h_

View file

@ -1,821 +0,0 @@
//
// GCC Inline Assembler version (x86)
//
//{{{
#ifdef pragmas_h_
#ifndef pragmas_x86_h_
#define pragmas_x86_h_
#define pragmas_have_mulscale
#define mulscale(a,d,c) \
({ int32_t __a=(a), __d=(d), __c=(c); \
__asm__ __volatile__ ("imull %%edx; shrdl %%cl, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d), "c" (__c) : "cc"); \
__a; })
#define mulscale1(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $1, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale2(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $2, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale3(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $3, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale4(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $4, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale5(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $5, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale6(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $6, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale7(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $7, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale8(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $8, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale9(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $9, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale10(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $10, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale11(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $11, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale12(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $12, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale13(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $13, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale14(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $14, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale15(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $15, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale16(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $16, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale17(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $17, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale18(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $18, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale19(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $19, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale20(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $20, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale21(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $21, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale22(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $22, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale23(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $23, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale24(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $24, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale25(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $25, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale26(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $26, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale27(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $27, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale28(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $28, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale29(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $29, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale30(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $30, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale31(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx; shrdl $31, %%edx, %%eax" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__a; })
#define mulscale32(a,d) \
({ int32_t __a=(a), __d=(d); \
__asm__ __volatile__ ("imull %%edx" \
: "=a" (__a), "=d" (__d) \
: "a" (__a), "d" (__d) : "cc"); \
__d; })
#define dmulscale(a,d,S,D,c) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D), __c=(c); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl %%cl, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D), "c" (__c) : "ebx", "cc"); \
__a; })
#define dmulscale1(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $1, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale2(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $2, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale3(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $3, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale4(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $4, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale5(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $5, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale6(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $6, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale7(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $7, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale8(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $8, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale9(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $9, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale10(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $10, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale11(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $11, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale12(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $12, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale13(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $13, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale14(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $14, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale15(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $15, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale16(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $16, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale17(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $17, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale18(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $18, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale19(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $19, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale20(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $20, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale21(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $21, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale22(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $22, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale23(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $23, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale24(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $24, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale25(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $25, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale26(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $26, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale27(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $27, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale28(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $28, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale29(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $29, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale30(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $30, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale31(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $31, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__a; })
#define dmulscale32(a,d,S,D) \
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx" \
: "=a" (__a), "=d" (__d), "=S" (__S) \
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__d; })
#define tmulscale1(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale2(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale3(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale4(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale5(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale6(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale7(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale8(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale9(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale10(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale11(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale12(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale13(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale14(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale15(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale16(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale17(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale18(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale19(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale20(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale21(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale22(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale23(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale24(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale25(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale26(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale27(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale28(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale29(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale30(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale31(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale32(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__d; })
#define pragmas_have_clearbuf
#define clearbuf(D,c,a) \
({ void *__D=(D); int32_t __c=(c), __a=(a); \
__asm__ __volatile__ ("rep; stosl" \
: "=&D" (__D), "=&c" (__c) : "0" (__D), "1" (__c), "a" (__a) : "memory", "cc"); \
0; })
#define pragmas_have_copybuf
#define copybuf(S,D,c) \
({ const void *__S=(S), *__D=(D); int32_t __c=(c); \
__asm__ __volatile__ ("rep; movsl" \
: "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \
0; })
#define pragmas_have_klabs
#define klabs(a) \
({ int32_t __a=(a); \
__asm__ __volatile__ ("testl %%eax, %%eax; jns 0f; negl %%eax; 0:" \
: "=a" (__a) : "a" (__a) : "cc"); \
__a; })
#define pragmas_have_ksgn
#define ksgn(b) \
({ int32_t __b=(b), __r; \
__asm__ __volatile__ ("addl %%ebx, %%ebx; sbbl %%eax, %%eax; cmpl %%ebx, %%eax; adcb $0, %%al" \
: "=a" (__r) : "b" (__b) : "cc"); \
__r; })
#define pragmas_have_swaps
#define swapchar(a,b) \
({ void *__a=(a), *__b=(b); \
__asm__ __volatile__ ("movb (%%eax), %%cl; movb (%%ebx), %%ch; movb %%cl, (%%ebx); movb %%ch, (%%eax)" \
: : "a" (__a), "b" (__b) : "ecx", "memory", "cc"); \
0; })
#define swapshort(a,b) \
({ void *__a=(a), *__b=(b); \
__asm__ __volatile__ ("movw (%%eax), %%cx; movw (%%ebx), %%dx; movw %%cx, (%%ebx); movw %%dx, (%%eax)" \
: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
0; })
#define swaplong(a,b) \
({ void *__a=(a), *__b=(b); \
__asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \
: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
0; })
#define swapfloat swaplong
#define swapbuf4(a,b,c) \
({ void *__a=(a), *__b=(b); int32_t __c=(c); \
__asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \
"movl %%edi, (%%eax); addl $4, %%eax; addl $4, %%ebx; decl %%ecx; jnz 0b" \
: : "a" (__a), "b" (__b), "c" (__c) : "esi", "edi", "memory", "cc"); \
0; })
#define swap64bit(a,b) \
({ void *__a=(a), *__b=(b); \
__asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); " \
"movl 4(%%eax), %%ecx; movl %%edx, (%%eax); movl 4(%%ebx), %%edx; " \
"movl %%ecx, 4(%%ebx); movl %%edx, 4(%%eax)" \
: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
0; })
#define swapdouble swap64bit
//swapchar2(ptr1,ptr2,xsiz); is the same as:
//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
#define swapchar2(a,b,S) \
({ void *__a=(a), *__b=(b); int32_t __S=(S); \
__asm__ __volatile__ ("addl %%ebx, %%esi; movw (%%eax), %%cx; movb (%%ebx), %%dl; " \
"movb %%cl, (%%ebx); movb (%%esi), %%dh; movb %%ch, (%%esi); " \
"movw %%dx, (%%eax)" \
: "=S" (__S) : "a" (__a), "b" (__b), "S" (__S) : "ecx", "edx", "memory", "cc"); \
0; })
#define pragmas_have_qinterpolatedown16
#define qinterpolatedown16(a,c,d,S) \
({ void *__a=(void*)(a); int32_t __c=(c), __d=(d), __S=(S); \
__asm__ __volatile__ ("movl %%ecx, %%ebx; shrl $1, %%ecx; jz 1f; " \
"0: leal (%%edx,%%esi,), %%edi; sarl $16, %%edx; movl %%edx, (%%eax); " \
"leal (%%edi,%%esi,), %%edx; sarl $16, %%edi; movl %%edi, 4(%%eax); " \
"addl $8, %%eax; decl %%ecx; jnz 0b; testl $1, %%ebx; jz 2f; " \
"1: sarl $16, %%edx; movl %%edx, (%%eax); 2:" \
: "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
: "ebx", "edi", "memory", "cc"); \
0; })
#define qinterpolatedown16short(a,c,d,S) \
({ void *__a=(void*)(a); int32_t __c=(c), __d=(d), __S=(S); \
__asm__ __volatile__ ("testl %%ecx, %%ecx; jz 3f; testb $2, %%al; jz 0f; movl %%edx, %%ebx; " \
"sarl $16, %%ebx; movw %%bx, (%%eax); addl %%esi, %%edx; addl $2, %%eax; " \
"decl %%ecx; jz 3f; " \
"0: subl $2, %%ecx; jc 2f; " \
"1: movl %%edx, %%ebx; addl %%esi, %%edx; sarl $16, %%ebx; movl %%edx, %%edi; " \
"andl $0xffff0000, %%edi; addl %%esi, %%edx; addl %%edi, %%ebx; " \
"movl %%ebx, (%%eax); addl $4, %%eax; subl $2, %%ecx; jnc 1b; testb $1, %%cl; " \
"jz 3f; " \
"2: movl %%edx, %%ebx; sarl $16, %%ebx; movw %%bx, (%%eax); 3:" \
: "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
: "ebx", "edi", "memory", "cc"); \
0; })
#define pragmas_have_krecipasm
#define krecipasm(a) \
({ int32_t __a=(a); \
__asm__ __volatile__ ( \
"movl %%eax, (" ASMSYM("fpuasm") "); fildl (" ASMSYM("fpuasm") "); " \
"addl %%eax, %%eax; fstps (" ASMSYM("fpuasm") "); sbbl %%ebx, %%ebx; " \
"movl (" ASMSYM("fpuasm") "), %%eax; movl %%eax, %%ecx; " \
"andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \
"shrl $23, %%ecx; movl " ASMSYM("reciptable") "(%%eax), %%eax; " \
"sarl %%cl, %%eax; xorl %%ebx, %%eax" \
: "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \
__a; })
//}}}
#endif // pragmas_x86_h_
#endif // pragmas_h_

View file

@ -1,492 +0,0 @@
//
// Microsoft C inline assembler
//
//{{{
#ifdef pragmas_h_
#ifndef pragmas_x86_h_
#define pragmas_x86_h_
#define pragmas_have_mulscale
static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
{
_asm {
mov ecx, c
mov eax, a
imul d
shrd eax, edx, cl
}
}
#define EDUKE32_SCALER_PRAGMA(x) \
static __inline int32_t mulscale##x (int32_t a, int32_t d) \
{ \
_asm mov eax, a \
_asm imul d \
_asm shrd eax, edx, x \
} \
static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
_asm mov eax, a \
_asm imul d \
_asm mov ebx, eax \
_asm mov eax, S \
_asm mov esi, edx \
_asm imul D \
_asm add eax, ebx \
_asm adc edx, esi \
_asm shrd eax, edx, x \
} \
static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \
{ \
_asm mov eax, a \
_asm mov ebx, b \
_asm imul d \
_asm xchg eax, ebx \
_asm mov ecx, c \
_asm xchg edx, ecx \
_asm imul edx \
_asm add ebx, eax \
_asm adc ecx, edx \
_asm mov eax, S \
_asm imul D \
_asm add eax, ebx \
_asm adc edx, ecx \
_asm shrd eax, edx, x \
} \
EDUKE32_GENERATE_PRAGMAS
#undef EDUKE32_SCALER_PRAGMA
static __inline int32_t mulscale32(int32_t a, int32_t d)
{
_asm {
mov eax, a
imul d
mov eax, edx
}
}
static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)
{
_asm {
mov ecx, c
mov eax, a
imul d
mov ebx, eax
mov eax, S
mov esi, edx
imul D
add eax, ebx
adc edx, esi
shrd eax, edx, cl
}
}
static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
{
_asm {
mov eax, a
imul d
mov ebx, eax
mov eax, S
mov esi, edx
imul D
add eax, ebx
adc edx, esi
mov eax, edx
}
}
static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
{
_asm {
mov eax, a
mov ebx, b
imul d
xchg eax, ebx
mov ecx, c
xchg edx, ecx
imul edx
add ebx, eax
adc ecx, edx
mov eax, S
imul D
add eax, ebx
adc edx, ecx
mov eax, edx
}
}
#define pragmas_have_clearbuf
static __inline void clearbuf(void *d, int32_t c, int32_t a)
{
_asm {
mov edi, d
mov ecx, c
mov eax, a
rep stosd
}
}
#define pragmas_have_clearbufbyte
static __inline void clearbufbyte(void *d, int32_t c, int32_t a)
{
_asm {
mov edi, d
mov ecx, c
mov eax, a
cmp ecx, 4
jae longcopy
test cl, 1
jz preskip
stosb
preskip :
shr ecx, 1
rep stosw
jmp endit
longcopy :
test edi, 1
jz skip1
stosb
dec ecx
skip1 :
test edi, 2
jz skip2
stosw
sub ecx, 2
skip2 :
mov ebx, ecx
shr ecx, 2
rep stosd
test bl, 2
jz skip3
stosw
skip3 :
test bl, 1
jz endit
stosb
endit :
}
}
#define pragmas_have_copybuf
static __inline void copybuf(const void *s, void *d, int32_t c)
{
_asm {
mov esi, s
mov edi, d
mov ecx, c
rep movsd
}
}
#define pragmas_have_copybufbyte
static __inline void copybufbyte(const void *s, void *d, int32_t c)
{
_asm {
mov esi, s
mov edi, d
mov ecx, c
cmp ecx, 4
jae longcopy
test cl, 1
jz preskip
movsb
preskip :
shr ecx, 1
rep movsw
jmp endit
longcopy :
test edi, 1
jz skip1
movsb
dec ecx
skip1 :
test edi, 2
jz skip2
movsw
sub ecx, 2
skip2 :
mov ebx, ecx
shr ecx, 2
rep movsd
test bl, 2
jz skip3
movsw
skip3 :
test bl, 1
jz endit
movsb
endit :
}
}
#define pragmas_have_copybufreverse
static __inline void copybufreverse(const void *s, void *d, int32_t c)
{
_asm {
mov esi, s
mov edi, d
mov ecx, c
shr ecx, 1
jnc skipit1
mov al, byte ptr[esi]
dec esi
mov byte ptr[edi], al
inc edi
skipit1 :
shr ecx, 1
jnc skipit2
mov ax, word ptr[esi-1]
sub esi, 2
ror ax, 8
mov word ptr[edi], ax
add edi, 2
skipit2:
test ecx, ecx
jz endloop
begloop :
mov eax, dword ptr[esi-3]
sub esi, 4
bswap eax
mov dword ptr[edi], eax
add edi, 4
dec ecx
jnz begloop
endloop :
}
}
#define pragmas_have_qinterpolatedown16
static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s)
{
_asm {
mov eax, a
mov ecx, c
mov edx, d
mov esi, s
mov ebx, ecx
shr ecx, 1
jz skipbegcalc
begqcalc :
lea edi, [edx+esi]
sar edx, 16
mov dword ptr[eax], edx
lea edx, [edi+esi]
sar edi, 16
mov dword ptr[eax+4], edi
add eax, 8
dec ecx
jnz begqcalc
test ebx, 1
jz skipbegqcalc2
skipbegcalc :
sar edx, 16
mov dword ptr[eax], edx
skipbegqcalc2 :
}
}
static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s)
{
_asm {
mov eax, a
mov ecx, c
mov edx, d
mov esi, s
test ecx, ecx
jz endit
test al, 2
jz skipalignit
mov ebx, edx
sar ebx, 16
mov word ptr[eax], bx
add edx, esi
add eax, 2
dec ecx
jz endit
skipalignit :
sub ecx, 2
jc finishit
begqcalc :
mov ebx, edx
add edx, esi
sar ebx, 16
mov edi, edx
and edi, 0ffff0000h
add edx, esi
add ebx, edi
mov dword ptr[eax], ebx
add eax, 4
sub ecx, 2
jnc begqcalc
test cl, 1
jz endit
finishit :
mov ebx, edx
sar ebx, 16
mov word ptr[eax], bx
endit :
}
}
#define pragmas_have_klabs
static __inline int32_t klabs(int32_t a)
{
_asm {
mov eax, a
test eax, eax
jns skipnegate
neg eax
skipnegate :
}
}
#define pragmas_have_ksgn
static __inline int ksgn(int32_t b)
{
_asm {
mov ebx, b
add ebx, ebx
sbb eax, eax
cmp eax, ebx
adc al, 0
}
}
#define pragmas_have_swaps
static __inline void swapchar(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov cl, [eax]
mov ch, [ebx]
mov[ebx], cl
mov[eax], ch
}
}
static __inline void swapshort(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov cx, [eax]
mov dx, [ebx]
mov[ebx], cx
mov[eax], dx
}
}
static __inline void swaplong(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov ecx, [eax]
mov edx, [ebx]
mov[ebx], ecx
mov[eax], edx
}
}
#define swapfloat swaplong
static __inline void swapbuf4(void *a, void *b, int32_t c)
{
_asm {
mov eax, a
mov ebx, b
mov ecx, c
begswap :
mov esi, [eax]
mov edi, [ebx]
mov[ebx], esi
mov[eax], edi
add eax, 4
add ebx, 4
dec ecx
jnz short begswap
}
}
static __inline void swap64bit(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov ecx, [eax]
mov edx, [ebx]
mov[ebx], ecx
mov ecx, [eax+4]
mov[eax], edx
mov edx, [ebx+4]
mov[ebx+4], ecx
mov[eax+4], edx
}
}
#define swapdouble swap64bit
//swapchar2(ptr1,ptr2,xsiz); is the same as:
//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
static __inline void swapchar2(void *a, void *b, int32_t s)
{
_asm {
mov eax, a
mov ebx, b
mov esi, s
add esi, ebx
mov cx, [eax]
mov dl, [ebx]
mov[ebx], cl
mov dh, [esi]
mov[esi], ch
mov[eax], dx
}
}
#define pragmas_have_krecipasm
//0x007ff000 is (11<<13), 0x3f800000 is (127<<23)
static inline int32_t krecipasm(int32_t a)
{
_asm
{
push ebx
mov eax, a
mov fpuasm, eax
fild dword ptr fpuasm
add eax, eax
fstp dword ptr fpuasm
sbb ebx, ebx
mov eax, fpuasm
mov ecx, eax
and eax, 0x007ff000
shr eax, 10
sub ecx, 0x3f800000
shr ecx, 23
mov eax, dword ptr reciptable[eax]
sar eax, cl
xor eax, ebx
pop ebx
}
}
//}}}
#endif // pragmas_x86_h_
#endif // pragmas_h_