mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-01-27 09:20:51 +00:00
- removed all inline assembly crap.
Since no division function was handled here this is totally pointless on modern systems and will even on 32 bit cause performance degradation due to lack of optimizability. The rest of that code in pragmas.h and pragmas.cpp should probably also be put under review. Much of it made sense 20 years ago but not anymore.
This commit is contained in:
parent
b389c99bf5
commit
6299d2fe86
5 changed files with 1 additions and 1569 deletions
|
@ -118,27 +118,6 @@ static inline int32_t roundscale(int32_t eax, int32_t edx, int32_t ecx)
|
|||
return scaleadd(eax, edx, ecx / 2, ecx);
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && defined(GEKKO)
|
||||
|
||||
// GCC Inline Assembler version (PowerPC)
|
||||
#include "pragmas_ppc.h"
|
||||
|
||||
#elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM)
|
||||
|
||||
// GCC Inline Assembler version (x86)
|
||||
#include "pragmas_x86_gcc.h"
|
||||
|
||||
#elif defined(_MSC_VER) && !defined(NOASM) // __GNUC__
|
||||
|
||||
// Microsoft C inline assembler
|
||||
#include "pragmas_x86_msvc.h"
|
||||
|
||||
#elif defined(__arm__) // _MSC_VER
|
||||
|
||||
// GCC Inline Assembler version (ARM)
|
||||
#include "pragmas_arm.h"
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// Generic C
|
||||
|
@ -185,51 +164,13 @@ static FORCE_INLINE void swapptr(void *a, void *b)
|
|||
#endif
|
||||
|
||||
#ifndef pragmas_have_swaps
|
||||
#ifdef __cplusplus
|
||||
#define swapchar swap
|
||||
#define swapshort swap
|
||||
#define swaplong swap
|
||||
#define swapfloat swap
|
||||
#define swapdouble swap
|
||||
#define swap64bit swap
|
||||
#else
|
||||
static FORCE_INLINE void swapchar(void *a, void *b)
|
||||
{
|
||||
char const t = *(char *)b;
|
||||
*(char *)b = *(char *)a;
|
||||
*(char *)a = t;
|
||||
}
|
||||
static FORCE_INLINE void swapshort(void *a, void *b)
|
||||
{
|
||||
int16_t const t = *(int16_t *)b;
|
||||
*(int16_t *)b = *(int16_t *)a;
|
||||
*(int16_t *)a = t;
|
||||
}
|
||||
static FORCE_INLINE void swaplong(void *a, void *b)
|
||||
{
|
||||
int32_t const t = *(int32_t *)b;
|
||||
*(int32_t *)b = *(int32_t *)a;
|
||||
*(int32_t *)a = t;
|
||||
}
|
||||
static FORCE_INLINE void swapfloat(void *a, void *b)
|
||||
{
|
||||
float const t = *(float *)b;
|
||||
*(float *)b = *(float *)a;
|
||||
*(float *)a = t;
|
||||
}
|
||||
static FORCE_INLINE void swapdouble(void *a, void *b)
|
||||
{
|
||||
double const t = *(double *)b;
|
||||
*(double *)b = *(double *)a;
|
||||
*(double *)a = t;
|
||||
}
|
||||
static FORCE_INLINE void swap64bit(void *a, void *b)
|
||||
{
|
||||
uint64_t const t = *(uint64_t *)b;
|
||||
*(uint64_t *)b = *(uint64_t *)a;
|
||||
*(uint64_t *)a = t;
|
||||
}
|
||||
#endif
|
||||
|
||||
static FORCE_INLINE void swapchar2(void *a, void *b, int32_t s)
|
||||
{
|
||||
swapchar((char *)a, (char *)b);
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
//
|
||||
// GCC Inline Assembler version (ARM)
|
||||
//
|
||||
|
||||
#ifndef pragmas_arm_h_
|
||||
#define pragmas_arm_h_
|
||||
|
||||
// :(
|
||||
|
||||
#endif
|
|
@ -1,186 +0,0 @@
|
|||
// GCC Inline Assembler version (PowerPC)
|
||||
|
||||
#ifdef pragmas_h_
|
||||
#ifndef pragmas_ppc_h_
|
||||
#define pragmas_ppc_h_
|
||||
|
||||
#define pragmas_have_mulscale
|
||||
|
||||
#define EDUKE32_SCALER_PRAGMA(x) \
|
||||
static inline int32_t mulscale##x(int32_t a, int32_t d) \
|
||||
{ \
|
||||
int32_t mullo, mulhi; \
|
||||
__asm__ ( \
|
||||
" mullw %0, %2, %3\n" \
|
||||
" mulhw %1, %2, %3\n" \
|
||||
" srwi %0, %0, %4\n" \
|
||||
" insrwi %0, %1, %4, 0\n" \
|
||||
: "=&r"(mullo), "=r"(mulhi) \
|
||||
: "r"(a), "r"(d), "i"(x) \
|
||||
); \
|
||||
return mullo; \
|
||||
} \
|
||||
static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \
|
||||
{ \
|
||||
int32_t mulhi, mullo, sumhi, sumlo; \
|
||||
__asm__ ( \
|
||||
" mullw %0, %4, %5\n" \
|
||||
" mulhw %1, %4, %5\n" \
|
||||
" mullw %2, %6, %7\n" \
|
||||
" mulhw %3, %6, %7\n" \
|
||||
" addc %0, %0, %2\n" \
|
||||
" adde %1, %1, %3\n" \
|
||||
" srwi %0, %0, %8\n" \
|
||||
" insrwi %0, %1, %8, 0\n" \
|
||||
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \
|
||||
: "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \
|
||||
: "xer" \
|
||||
); \
|
||||
return sumlo; \
|
||||
} \
|
||||
static inline int32_t tmulscale##x(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \
|
||||
{ \
|
||||
int32_t mulhi, mullo, sumhi, sumlo; \
|
||||
__asm__( \
|
||||
" mullw %0, %4, %5\n" \
|
||||
" mulhw %1, %4, %5\n" \
|
||||
" mullw %2, %6, %7\n" \
|
||||
" mulhw %3, %6, %7\n" \
|
||||
" addc %0, %0, %2\n" \
|
||||
" adde %1, %1, %3\n" \
|
||||
" mullw %2, %8, %9\n" \
|
||||
" mulhw %3, %8, %9\n" \
|
||||
" addc %0, %0, %2\n" \
|
||||
" adde %1, %1, %3\n" \
|
||||
" srwi %0, %0, %10\n" \
|
||||
" insrwi %0, %1, %10, 0\n" \
|
||||
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi) \
|
||||
: "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D), "i"(x) \
|
||||
: "xer" \
|
||||
); \
|
||||
return sumlo; \
|
||||
} \
|
||||
|
||||
EDUKE32_GENERATE_PRAGMAS
|
||||
#undef EDUKE32_SCALER_PRAGMA
|
||||
|
||||
static inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
|
||||
{
|
||||
int32_t mullo, mulhi;
|
||||
__asm__(
|
||||
" mullw %0, %2, %3\n"
|
||||
" mulhw %1, %2, %3\n"
|
||||
" srw %0, %0, %4\n"
|
||||
" slw %1, %1, %5\n"
|
||||
" or %0, %0, %1\n"
|
||||
: "=&r"(mullo), "=&r"(mulhi)
|
||||
: "r"(a), "r"(d), "r"(c), "r"(32-c)
|
||||
: "xer"
|
||||
);
|
||||
return mullo;
|
||||
}
|
||||
|
||||
static inline int32_t mulscale32(int32_t a, int32_t d)
|
||||
{
|
||||
int32_t mulhi;
|
||||
__asm__(
|
||||
" mulhw %0, %1, %2\n"
|
||||
: "=r"(mulhi)
|
||||
: "r"(a), "r"(d)
|
||||
);
|
||||
return mulhi;
|
||||
}
|
||||
|
||||
static inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)
|
||||
{
|
||||
int32_t mulhi, mullo, sumhi, sumlo;
|
||||
__asm__(
|
||||
" mullw %0, %4, %5\n"
|
||||
" mulhw %1, %4, %5\n"
|
||||
" mullw %2, %6, %7\n"
|
||||
" mulhw %3, %6, %7\n"
|
||||
" addc %0, %0, %2\n"
|
||||
" adde %1, %1, %3\n"
|
||||
" srw %0, %0, %8\n"
|
||||
" slw %1, %1, %9\n"
|
||||
" or %0, %0, %1\n"
|
||||
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi)
|
||||
: "r"(a), "r"(d), "r"(S), "r"(D), "r"(c), "r"(32-c)
|
||||
: "xer"
|
||||
);
|
||||
return sumlo;
|
||||
}
|
||||
|
||||
static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
|
||||
{
|
||||
int32_t mulhi, mullo, sumhi, sumlo;
|
||||
__asm__(\
|
||||
" mullw %0, %4, %5\n" \
|
||||
" mulhw %1, %4, %5\n" \
|
||||
" mullw %2, %6, %7\n" \
|
||||
" mulhw %3, %6, %7\n" \
|
||||
" addc %0, %0, %2\n" \
|
||||
" adde %1, %1, %3\n" \
|
||||
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi)
|
||||
: "r"(a), "r"(d), "r"(S), "r"(D)
|
||||
: "xer"
|
||||
);
|
||||
return sumhi;
|
||||
}
|
||||
|
||||
static inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
|
||||
{
|
||||
int32_t mulhi, mullo, sumhi, sumlo;
|
||||
__asm__(
|
||||
" mullw %0, %4, %5\n"
|
||||
" mulhw %1, %4, %5\n"
|
||||
" mullw %2, %6, %7\n"
|
||||
" mulhw %3, %6, %7\n"
|
||||
" addc %0, %0, %2\n"
|
||||
" adde %1, %1, %3\n"
|
||||
" mullw %2, %8, %9\n"
|
||||
" mulhw %3, %8, %9\n"
|
||||
" addc %0, %0, %2\n"
|
||||
" adde %1, %1, %3\n"
|
||||
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi)
|
||||
: "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D)
|
||||
: "xer"
|
||||
);
|
||||
return sumhi;
|
||||
}
|
||||
|
||||
#define pragmas_have_klabs
|
||||
|
||||
static inline int32_t klabs(int32_t a)
|
||||
{
|
||||
int32_t mask;
|
||||
__asm__(
|
||||
" srawi %0, %1, 31\n"
|
||||
" xor %1, %0, %1\n"
|
||||
" subf %1, %0, %1\n"
|
||||
: "=&r"(mask), "+r"(a)
|
||||
:
|
||||
: "xer"
|
||||
);
|
||||
return a;
|
||||
}
|
||||
|
||||
#define pragmas_have_ksgn
|
||||
|
||||
static inline int ksgn(int32_t a)
|
||||
{
|
||||
int32_t s, t;
|
||||
__asm__(
|
||||
" neg %1, %2\n"
|
||||
" srawi %0, %2, 31\n"
|
||||
" srwi %1, %1, 31\n"
|
||||
" or %1, %1, %0\n"
|
||||
: "=r"(t), "=&r"(s)
|
||||
: "r"(a)
|
||||
: "xer"
|
||||
);
|
||||
return s;
|
||||
}
|
||||
|
||||
#endif // pragmas_ppc_h_
|
||||
#endif // pragmas_h_
|
|
@ -1,821 +0,0 @@
|
|||
//
|
||||
// GCC Inline Assembler version (x86)
|
||||
//
|
||||
|
||||
//{{{
|
||||
|
||||
#ifdef pragmas_h_
|
||||
#ifndef pragmas_x86_h_
|
||||
#define pragmas_x86_h_
|
||||
|
||||
#define pragmas_have_mulscale
|
||||
|
||||
#define mulscale(a,d,c) \
|
||||
({ int32_t __a=(a), __d=(d), __c=(c); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl %%cl, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d), "c" (__c) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale1(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $1, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale2(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $2, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale3(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $3, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale4(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $4, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale5(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $5, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale6(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $6, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale7(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $7, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale8(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $8, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale9(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $9, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale10(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $10, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale11(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $11, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale12(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $12, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale13(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $13, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale14(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $14, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale15(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $15, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale16(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $16, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale17(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $17, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale18(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $18, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale19(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $19, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale20(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $20, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale21(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $21, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale22(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $22, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale23(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $23, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale24(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $24, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale25(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $25, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale26(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $26, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale27(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $27, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale28(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $28, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale29(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $29, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale30(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $30, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale31(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx; shrdl $31, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__a; })
|
||||
#define mulscale32(a,d) \
|
||||
({ int32_t __a=(a), __d=(d); \
|
||||
__asm__ __volatile__ ("imull %%edx" \
|
||||
: "=a" (__a), "=d" (__d) \
|
||||
: "a" (__a), "d" (__d) : "cc"); \
|
||||
__d; })
|
||||
|
||||
#define dmulscale(a,d,S,D,c) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D), __c=(c); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl %%cl, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D), "c" (__c) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale1(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $1, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale2(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $2, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale3(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $3, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale4(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $4, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale5(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $5, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale6(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $6, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale7(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $7, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale8(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $8, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale9(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $9, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale10(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $10, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale11(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $11, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale12(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $12, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale13(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $13, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale14(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $14, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale15(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $15, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale16(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $16, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale17(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $17, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale18(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $18, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale19(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $19, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale20(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $20, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale21(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $21, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale22(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $22, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale23(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $23, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale24(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $24, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale25(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $25, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale26(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $26, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale27(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $27, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale28(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $28, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale29(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $29, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale30(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $30, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale31(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx; shrdl $31, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__a; })
|
||||
#define dmulscale32(a,d,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; movl %%eax, %%ebx; movl %%esi, %%eax; movl %%edx, %%esi; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%esi, %%edx" \
|
||||
: "=a" (__a), "=d" (__d), "=S" (__S) \
|
||||
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
|
||||
__d; })
|
||||
|
||||
#define tmulscale1(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale2(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale3(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale4(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale5(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale6(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale7(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale8(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale9(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale10(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale11(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale12(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale13(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale14(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale15(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale16(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale17(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale18(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale19(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale20(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale21(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale22(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale23(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale24(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale25(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale26(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale27(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale28(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale29(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale30(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale31(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__a; })
|
||||
#define tmulscale32(a,d,b,c,S,D) \
|
||||
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
|
||||
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
|
||||
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
|
||||
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \
|
||||
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
|
||||
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
|
||||
__d; })
|
||||
|
||||
#define pragmas_have_clearbuf
|
||||
|
||||
#define clearbuf(D,c,a) \
|
||||
({ void *__D=(D); int32_t __c=(c), __a=(a); \
|
||||
__asm__ __volatile__ ("rep; stosl" \
|
||||
: "=&D" (__D), "=&c" (__c) : "0" (__D), "1" (__c), "a" (__a) : "memory", "cc"); \
|
||||
0; })
|
||||
|
||||
#define pragmas_have_copybuf
|
||||
|
||||
#define copybuf(S,D,c) \
|
||||
({ const void *__S=(S), *__D=(D); int32_t __c=(c); \
|
||||
__asm__ __volatile__ ("rep; movsl" \
|
||||
: "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \
|
||||
0; })
|
||||
|
||||
#define pragmas_have_klabs
|
||||
|
||||
#define klabs(a) \
|
||||
({ int32_t __a=(a); \
|
||||
__asm__ __volatile__ ("testl %%eax, %%eax; jns 0f; negl %%eax; 0:" \
|
||||
: "=a" (__a) : "a" (__a) : "cc"); \
|
||||
__a; })
|
||||
|
||||
#define pragmas_have_ksgn
|
||||
|
||||
#define ksgn(b) \
|
||||
({ int32_t __b=(b), __r; \
|
||||
__asm__ __volatile__ ("addl %%ebx, %%ebx; sbbl %%eax, %%eax; cmpl %%ebx, %%eax; adcb $0, %%al" \
|
||||
: "=a" (__r) : "b" (__b) : "cc"); \
|
||||
__r; })
|
||||
|
||||
#define pragmas_have_swaps
|
||||
|
||||
#define swapchar(a,b) \
|
||||
({ void *__a=(a), *__b=(b); \
|
||||
__asm__ __volatile__ ("movb (%%eax), %%cl; movb (%%ebx), %%ch; movb %%cl, (%%ebx); movb %%ch, (%%eax)" \
|
||||
: : "a" (__a), "b" (__b) : "ecx", "memory", "cc"); \
|
||||
0; })
|
||||
#define swapshort(a,b) \
|
||||
({ void *__a=(a), *__b=(b); \
|
||||
__asm__ __volatile__ ("movw (%%eax), %%cx; movw (%%ebx), %%dx; movw %%cx, (%%ebx); movw %%dx, (%%eax)" \
|
||||
: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
|
||||
0; })
|
||||
#define swaplong(a,b) \
|
||||
({ void *__a=(a), *__b=(b); \
|
||||
__asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \
|
||||
: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
|
||||
0; })
|
||||
#define swapfloat swaplong
|
||||
#define swapbuf4(a,b,c) \
|
||||
({ void *__a=(a), *__b=(b); int32_t __c=(c); \
|
||||
__asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \
|
||||
"movl %%edi, (%%eax); addl $4, %%eax; addl $4, %%ebx; decl %%ecx; jnz 0b" \
|
||||
: : "a" (__a), "b" (__b), "c" (__c) : "esi", "edi", "memory", "cc"); \
|
||||
0; })
|
||||
#define swap64bit(a,b) \
|
||||
({ void *__a=(a), *__b=(b); \
|
||||
__asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); " \
|
||||
"movl 4(%%eax), %%ecx; movl %%edx, (%%eax); movl 4(%%ebx), %%edx; " \
|
||||
"movl %%ecx, 4(%%ebx); movl %%edx, 4(%%eax)" \
|
||||
: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
|
||||
0; })
|
||||
#define swapdouble swap64bit
|
||||
//swapchar2(ptr1,ptr2,xsiz); is the same as:
|
||||
//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
|
||||
#define swapchar2(a,b,S) \
|
||||
({ void *__a=(a), *__b=(b); int32_t __S=(S); \
|
||||
__asm__ __volatile__ ("addl %%ebx, %%esi; movw (%%eax), %%cx; movb (%%ebx), %%dl; " \
|
||||
"movb %%cl, (%%ebx); movb (%%esi), %%dh; movb %%ch, (%%esi); " \
|
||||
"movw %%dx, (%%eax)" \
|
||||
: "=S" (__S) : "a" (__a), "b" (__b), "S" (__S) : "ecx", "edx", "memory", "cc"); \
|
||||
0; })
|
||||
|
||||
|
||||
#define pragmas_have_qinterpolatedown16
|
||||
|
||||
#define qinterpolatedown16(a,c,d,S) \
|
||||
({ void *__a=(void*)(a); int32_t __c=(c), __d=(d), __S=(S); \
|
||||
__asm__ __volatile__ ("movl %%ecx, %%ebx; shrl $1, %%ecx; jz 1f; " \
|
||||
"0: leal (%%edx,%%esi,), %%edi; sarl $16, %%edx; movl %%edx, (%%eax); " \
|
||||
"leal (%%edi,%%esi,), %%edx; sarl $16, %%edi; movl %%edi, 4(%%eax); " \
|
||||
"addl $8, %%eax; decl %%ecx; jnz 0b; testl $1, %%ebx; jz 2f; " \
|
||||
"1: sarl $16, %%edx; movl %%edx, (%%eax); 2:" \
|
||||
: "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
|
||||
: "ebx", "edi", "memory", "cc"); \
|
||||
0; })
|
||||
|
||||
#define qinterpolatedown16short(a,c,d,S) \
|
||||
({ void *__a=(void*)(a); int32_t __c=(c), __d=(d), __S=(S); \
|
||||
__asm__ __volatile__ ("testl %%ecx, %%ecx; jz 3f; testb $2, %%al; jz 0f; movl %%edx, %%ebx; " \
|
||||
"sarl $16, %%ebx; movw %%bx, (%%eax); addl %%esi, %%edx; addl $2, %%eax; " \
|
||||
"decl %%ecx; jz 3f; " \
|
||||
"0: subl $2, %%ecx; jc 2f; " \
|
||||
"1: movl %%edx, %%ebx; addl %%esi, %%edx; sarl $16, %%ebx; movl %%edx, %%edi; " \
|
||||
"andl $0xffff0000, %%edi; addl %%esi, %%edx; addl %%edi, %%ebx; " \
|
||||
"movl %%ebx, (%%eax); addl $4, %%eax; subl $2, %%ecx; jnc 1b; testb $1, %%cl; " \
|
||||
"jz 3f; " \
|
||||
"2: movl %%edx, %%ebx; sarl $16, %%ebx; movw %%bx, (%%eax); 3:" \
|
||||
: "=a" (__a), "=c" (__c), "=d" (__d) : "a" (__a), "c" (__c), "d" (__d), "S" (__S) \
|
||||
: "ebx", "edi", "memory", "cc"); \
|
||||
0; })
|
||||
|
||||
#define pragmas_have_krecipasm
|
||||
|
||||
#define krecipasm(a) \
|
||||
({ int32_t __a=(a); \
|
||||
__asm__ __volatile__ ( \
|
||||
"movl %%eax, (" ASMSYM("fpuasm") "); fildl (" ASMSYM("fpuasm") "); " \
|
||||
"addl %%eax, %%eax; fstps (" ASMSYM("fpuasm") "); sbbl %%ebx, %%ebx; " \
|
||||
"movl (" ASMSYM("fpuasm") "), %%eax; movl %%eax, %%ecx; " \
|
||||
"andl $0x007ff000, %%eax; shrl $10, %%eax; subl $0x3f800000, %%ecx; " \
|
||||
"shrl $23, %%ecx; movl " ASMSYM("reciptable") "(%%eax), %%eax; " \
|
||||
"sarl %%cl, %%eax; xorl %%ebx, %%eax" \
|
||||
: "=a" (__a) : "a" (__a) : "ebx", "ecx", "memory", "cc"); \
|
||||
__a; })
|
||||
|
||||
//}}}
|
||||
|
||||
#endif // pragmas_x86_h_
|
||||
#endif // pragmas_h_
|
|
@ -1,492 +0,0 @@
|
|||
//
|
||||
// Microsoft C inline assembler
|
||||
//
|
||||
|
||||
//{{{
|
||||
|
||||
#ifdef pragmas_h_
|
||||
#ifndef pragmas_x86_h_
|
||||
#define pragmas_x86_h_
|
||||
|
||||
#define pragmas_have_mulscale
|
||||
|
||||
static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
|
||||
{
|
||||
_asm {
|
||||
mov ecx, c
|
||||
mov eax, a
|
||||
imul d
|
||||
shrd eax, edx, cl
|
||||
}
|
||||
}
|
||||
|
||||
#define EDUKE32_SCALER_PRAGMA(x) \
|
||||
static __inline int32_t mulscale##x (int32_t a, int32_t d) \
|
||||
{ \
|
||||
_asm mov eax, a \
|
||||
_asm imul d \
|
||||
_asm shrd eax, edx, x \
|
||||
} \
|
||||
static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
|
||||
{ \
|
||||
_asm mov eax, a \
|
||||
_asm imul d \
|
||||
_asm mov ebx, eax \
|
||||
_asm mov eax, S \
|
||||
_asm mov esi, edx \
|
||||
_asm imul D \
|
||||
_asm add eax, ebx \
|
||||
_asm adc edx, esi \
|
||||
_asm shrd eax, edx, x \
|
||||
} \
|
||||
static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \
|
||||
{ \
|
||||
_asm mov eax, a \
|
||||
_asm mov ebx, b \
|
||||
_asm imul d \
|
||||
_asm xchg eax, ebx \
|
||||
_asm mov ecx, c \
|
||||
_asm xchg edx, ecx \
|
||||
_asm imul edx \
|
||||
_asm add ebx, eax \
|
||||
_asm adc ecx, edx \
|
||||
_asm mov eax, S \
|
||||
_asm imul D \
|
||||
_asm add eax, ebx \
|
||||
_asm adc edx, ecx \
|
||||
_asm shrd eax, edx, x \
|
||||
} \
|
||||
|
||||
EDUKE32_GENERATE_PRAGMAS
|
||||
#undef EDUKE32_SCALER_PRAGMA
|
||||
|
||||
static __inline int32_t mulscale32(int32_t a, int32_t d)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
imul d
|
||||
mov eax, edx
|
||||
}
|
||||
}
|
||||
|
||||
static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)
|
||||
{
|
||||
_asm {
|
||||
mov ecx, c
|
||||
mov eax, a
|
||||
imul d
|
||||
mov ebx, eax
|
||||
mov eax, S
|
||||
mov esi, edx
|
||||
imul D
|
||||
add eax, ebx
|
||||
adc edx, esi
|
||||
shrd eax, edx, cl
|
||||
}
|
||||
}
|
||||
|
||||
static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
imul d
|
||||
mov ebx, eax
|
||||
mov eax, S
|
||||
mov esi, edx
|
||||
imul D
|
||||
add eax, ebx
|
||||
adc edx, esi
|
||||
mov eax, edx
|
||||
}
|
||||
}
|
||||
|
||||
static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ebx, b
|
||||
imul d
|
||||
xchg eax, ebx
|
||||
mov ecx, c
|
||||
xchg edx, ecx
|
||||
imul edx
|
||||
add ebx, eax
|
||||
adc ecx, edx
|
||||
mov eax, S
|
||||
imul D
|
||||
add eax, ebx
|
||||
adc edx, ecx
|
||||
mov eax, edx
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_clearbuf
|
||||
|
||||
static __inline void clearbuf(void *d, int32_t c, int32_t a)
|
||||
{
|
||||
_asm {
|
||||
mov edi, d
|
||||
mov ecx, c
|
||||
mov eax, a
|
||||
rep stosd
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_clearbufbyte
|
||||
|
||||
static __inline void clearbufbyte(void *d, int32_t c, int32_t a)
|
||||
{
|
||||
_asm {
|
||||
mov edi, d
|
||||
mov ecx, c
|
||||
mov eax, a
|
||||
cmp ecx, 4
|
||||
jae longcopy
|
||||
test cl, 1
|
||||
jz preskip
|
||||
stosb
|
||||
preskip :
|
||||
shr ecx, 1
|
||||
rep stosw
|
||||
jmp endit
|
||||
longcopy :
|
||||
test edi, 1
|
||||
jz skip1
|
||||
stosb
|
||||
dec ecx
|
||||
skip1 :
|
||||
test edi, 2
|
||||
jz skip2
|
||||
stosw
|
||||
sub ecx, 2
|
||||
skip2 :
|
||||
mov ebx, ecx
|
||||
shr ecx, 2
|
||||
rep stosd
|
||||
test bl, 2
|
||||
jz skip3
|
||||
stosw
|
||||
skip3 :
|
||||
test bl, 1
|
||||
jz endit
|
||||
stosb
|
||||
endit :
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_copybuf
|
||||
|
||||
static __inline void copybuf(const void *s, void *d, int32_t c)
|
||||
{
|
||||
_asm {
|
||||
mov esi, s
|
||||
mov edi, d
|
||||
mov ecx, c
|
||||
rep movsd
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_copybufbyte
|
||||
|
||||
static __inline void copybufbyte(const void *s, void *d, int32_t c)
|
||||
{
|
||||
_asm {
|
||||
mov esi, s
|
||||
mov edi, d
|
||||
mov ecx, c
|
||||
cmp ecx, 4
|
||||
jae longcopy
|
||||
test cl, 1
|
||||
jz preskip
|
||||
movsb
|
||||
preskip :
|
||||
shr ecx, 1
|
||||
rep movsw
|
||||
jmp endit
|
||||
longcopy :
|
||||
test edi, 1
|
||||
jz skip1
|
||||
movsb
|
||||
dec ecx
|
||||
skip1 :
|
||||
test edi, 2
|
||||
jz skip2
|
||||
movsw
|
||||
sub ecx, 2
|
||||
skip2 :
|
||||
mov ebx, ecx
|
||||
shr ecx, 2
|
||||
rep movsd
|
||||
test bl, 2
|
||||
jz skip3
|
||||
movsw
|
||||
skip3 :
|
||||
test bl, 1
|
||||
jz endit
|
||||
movsb
|
||||
endit :
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_copybufreverse
|
||||
|
||||
static __inline void copybufreverse(const void *s, void *d, int32_t c)
|
||||
{
|
||||
_asm {
|
||||
mov esi, s
|
||||
mov edi, d
|
||||
mov ecx, c
|
||||
shr ecx, 1
|
||||
jnc skipit1
|
||||
mov al, byte ptr[esi]
|
||||
dec esi
|
||||
mov byte ptr[edi], al
|
||||
inc edi
|
||||
skipit1 :
|
||||
shr ecx, 1
|
||||
jnc skipit2
|
||||
mov ax, word ptr[esi-1]
|
||||
sub esi, 2
|
||||
ror ax, 8
|
||||
mov word ptr[edi], ax
|
||||
add edi, 2
|
||||
skipit2:
|
||||
test ecx, ecx
|
||||
jz endloop
|
||||
begloop :
|
||||
mov eax, dword ptr[esi-3]
|
||||
sub esi, 4
|
||||
bswap eax
|
||||
mov dword ptr[edi], eax
|
||||
add edi, 4
|
||||
dec ecx
|
||||
jnz begloop
|
||||
endloop :
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_qinterpolatedown16
|
||||
|
||||
static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ecx, c
|
||||
mov edx, d
|
||||
mov esi, s
|
||||
mov ebx, ecx
|
||||
shr ecx, 1
|
||||
jz skipbegcalc
|
||||
begqcalc :
|
||||
lea edi, [edx+esi]
|
||||
sar edx, 16
|
||||
mov dword ptr[eax], edx
|
||||
lea edx, [edi+esi]
|
||||
sar edi, 16
|
||||
mov dword ptr[eax+4], edi
|
||||
add eax, 8
|
||||
dec ecx
|
||||
jnz begqcalc
|
||||
test ebx, 1
|
||||
jz skipbegqcalc2
|
||||
skipbegcalc :
|
||||
sar edx, 16
|
||||
mov dword ptr[eax], edx
|
||||
skipbegqcalc2 :
|
||||
}
|
||||
}
|
||||
|
||||
static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ecx, c
|
||||
mov edx, d
|
||||
mov esi, s
|
||||
test ecx, ecx
|
||||
jz endit
|
||||
test al, 2
|
||||
jz skipalignit
|
||||
mov ebx, edx
|
||||
sar ebx, 16
|
||||
mov word ptr[eax], bx
|
||||
add edx, esi
|
||||
add eax, 2
|
||||
dec ecx
|
||||
jz endit
|
||||
skipalignit :
|
||||
sub ecx, 2
|
||||
jc finishit
|
||||
begqcalc :
|
||||
mov ebx, edx
|
||||
add edx, esi
|
||||
sar ebx, 16
|
||||
mov edi, edx
|
||||
and edi, 0ffff0000h
|
||||
add edx, esi
|
||||
add ebx, edi
|
||||
mov dword ptr[eax], ebx
|
||||
add eax, 4
|
||||
sub ecx, 2
|
||||
jnc begqcalc
|
||||
test cl, 1
|
||||
jz endit
|
||||
finishit :
|
||||
mov ebx, edx
|
||||
sar ebx, 16
|
||||
mov word ptr[eax], bx
|
||||
endit :
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_klabs
|
||||
|
||||
static __inline int32_t klabs(int32_t a)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
test eax, eax
|
||||
jns skipnegate
|
||||
neg eax
|
||||
skipnegate :
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_ksgn
|
||||
|
||||
static __inline int ksgn(int32_t b)
|
||||
{
|
||||
_asm {
|
||||
mov ebx, b
|
||||
add ebx, ebx
|
||||
sbb eax, eax
|
||||
cmp eax, ebx
|
||||
adc al, 0
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_swaps
|
||||
|
||||
static __inline void swapchar(void *a, void *b)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ebx, b
|
||||
mov cl, [eax]
|
||||
mov ch, [ebx]
|
||||
mov[ebx], cl
|
||||
mov[eax], ch
|
||||
}
|
||||
}
|
||||
|
||||
static __inline void swapshort(void *a, void *b)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ebx, b
|
||||
mov cx, [eax]
|
||||
mov dx, [ebx]
|
||||
mov[ebx], cx
|
||||
mov[eax], dx
|
||||
}
|
||||
}
|
||||
|
||||
static __inline void swaplong(void *a, void *b)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ebx, b
|
||||
mov ecx, [eax]
|
||||
mov edx, [ebx]
|
||||
mov[ebx], ecx
|
||||
mov[eax], edx
|
||||
}
|
||||
}
|
||||
|
||||
#define swapfloat swaplong
|
||||
|
||||
static __inline void swapbuf4(void *a, void *b, int32_t c)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ebx, b
|
||||
mov ecx, c
|
||||
begswap :
|
||||
mov esi, [eax]
|
||||
mov edi, [ebx]
|
||||
mov[ebx], esi
|
||||
mov[eax], edi
|
||||
add eax, 4
|
||||
add ebx, 4
|
||||
dec ecx
|
||||
jnz short begswap
|
||||
}
|
||||
}
|
||||
|
||||
static __inline void swap64bit(void *a, void *b)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ebx, b
|
||||
mov ecx, [eax]
|
||||
mov edx, [ebx]
|
||||
mov[ebx], ecx
|
||||
mov ecx, [eax+4]
|
||||
mov[eax], edx
|
||||
mov edx, [ebx+4]
|
||||
mov[ebx+4], ecx
|
||||
mov[eax+4], edx
|
||||
}
|
||||
}
|
||||
|
||||
#define swapdouble swap64bit
|
||||
|
||||
//swapchar2(ptr1,ptr2,xsiz); is the same as:
|
||||
//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
|
||||
static __inline void swapchar2(void *a, void *b, int32_t s)
|
||||
{
|
||||
_asm {
|
||||
mov eax, a
|
||||
mov ebx, b
|
||||
mov esi, s
|
||||
add esi, ebx
|
||||
mov cx, [eax]
|
||||
mov dl, [ebx]
|
||||
mov[ebx], cl
|
||||
mov dh, [esi]
|
||||
mov[esi], ch
|
||||
mov[eax], dx
|
||||
}
|
||||
}
|
||||
|
||||
#define pragmas_have_krecipasm
|
||||
|
||||
//0x007ff000 is (11<<13), 0x3f800000 is (127<<23)
|
||||
static inline int32_t krecipasm(int32_t a)
|
||||
{
|
||||
_asm
|
||||
{
|
||||
push ebx
|
||||
mov eax, a
|
||||
mov fpuasm, eax
|
||||
fild dword ptr fpuasm
|
||||
add eax, eax
|
||||
fstp dword ptr fpuasm
|
||||
sbb ebx, ebx
|
||||
mov eax, fpuasm
|
||||
mov ecx, eax
|
||||
and eax, 0x007ff000
|
||||
shr eax, 10
|
||||
sub ecx, 0x3f800000
|
||||
shr ecx, 23
|
||||
mov eax, dword ptr reciptable[eax]
|
||||
sar eax, cl
|
||||
xor eax, ebx
|
||||
pop ebx
|
||||
}
|
||||
}
|
||||
|
||||
//}}}
|
||||
|
||||
#endif // pragmas_x86_h_
|
||||
#endif // pragmas_h_
|
Loading…
Reference in a new issue