Remove a few unused things (tmulscale, etc) from the pragmas.h family of headers

git-svn-id: https://svn.eduke32.com/eduke32@4604 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
terminx 2014-09-30 04:05:40 +00:00
parent 1a286d1e52
commit 8148ccf3c3
5 changed files with 72 additions and 481 deletions

View file

@ -101,10 +101,6 @@ static inline int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_
return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(a)); \
} \
\
static inline int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \
{ \
return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> by(a)); \
} \
PRAGMA_FUNCS _scaler(32)
@ -114,6 +110,7 @@ static inline void swapchar(void* a, void* b) { char t = *((char*)b); *((char*)
static inline void swapchar2(void* a, void* b, int32_t s) { swapchar(a,b); swapchar((char*)a+1,(char*)b+s); }
static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*)b); *((int16_t*)b) = *((int16_t*)a); *((int16_t*)a) = t; }
static inline void swaplong(void* a, void* b) { int32_t t = *((int32_t*)b); *((int32_t*)b) = *((int32_t*)a); *((int32_t*)a) = t; }
static inline void swapfloat(void* a, void* b) { float t = *((float*)b); *((float*)b) = *((float*)a); *((float*)a) = t; }
static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*)b); *((int64_t*)b) = *((int64_t*)a); *((int64_t*)a) = t; }
static inline char readpixel(void* s) { return (*((char*)(s))); }
@ -121,10 +118,6 @@ static inline void drawpixel(void* s, char a) { *((char*)(s)) = a; }
static inline void drawpixels(void* s, int16_t a) { *((int16_t*)(s)) = a; }
static inline void drawpixelses(void* s, int32_t a) { *((int32_t*)(s)) = a; }
static inline int32_t mul3(int32_t a) { return (a<<1)+a; }
static inline int32_t mul5(int32_t a) { return (a<<2)+a; }
static inline int32_t mul9(int32_t a) { return (a<<3)+a; }
static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a%_b; return _a/_b; }
static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a/_b; return _a%_b; }
@ -137,9 +130,9 @@ static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b)
static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b) return b; return a; }
static inline int32_t sqr(int32_t eax) { return (eax) * (eax); }
static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) / qw(ecx)); }
static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) >> by(ecx)); }
static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(ecx)); }
static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) / ecx); }
static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) >> by(ecx)); }
static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(ecx)); }
void qinterpolatedown16 (intptr_t bufptr, int32_t num, int32_t val, int32_t add);
void qinterpolatedown16short (intptr_t bufptr, int32_t num, int32_t val, int32_t add);

View file

@ -5,20 +5,16 @@
#ifndef __pragmas_arm_h__
#define __pragmas_arm_h__
// TODO: implement libdivide.h
#define _scaler(a) \
static inline int32_t mulscale##a(int32_t eax, int32_t edx) \
{ \
return dw((qw(eax) * qw(edx)) >> by(a)); \
return dw((qw(eax) * edx) >> by(a)); \
} \
\
static inline int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_t edi) \
{ \
return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(a)); \
} \
\
static inline int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \
{ \
return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> by(a)); \
return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(a)); \
} \
PRAGMA_FUNCS _scaler(32)
@ -29,6 +25,7 @@ static inline void swapchar(void* a, void* b) { char t = *((char*) b); *((char*
static inline void swapchar2(void* a, void* b, int32_t s) { swapchar(a, b); swapchar((char*) a+1, (char*) b+s); }
static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*) b); *((int16_t*) b) = *((int16_t*) a); *((int16_t*) a) = t; }
static inline void swaplong(void* a, void* b) { int32_t t = *((int32_t*) b); *((int32_t*) b) = *((int32_t*) a); *((int32_t*) a) = t; }
static inline void swapfloat(void* a, void* b) { float t = *((float*) b); *((float*) b) = *((float*) a); *((float*) a) = t; }
static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*) b); *((int64_t*) b) = *((int64_t*) a); *((int64_t*) a) = t; }
static inline char readpixel(void* s) { return (*((char*) (s))); }
@ -36,10 +33,6 @@ static inline void drawpixel(void* s, char a) { *((char*) (s)) = a; }
static inline void drawpixels(void* s, int16_t a) { *((int16_t*) (s)) = a; }
static inline void drawpixelses(void* s, int32_t a) { *((int32_t*) (s)) = a; }
static inline int32_t mul3(int32_t a) { return (a<<1)+a; }
static inline int32_t mul5(int32_t a) { return (a<<2)+a; }
static inline int32_t mul9(int32_t a) { return (a<<3)+a; }
static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a%_b; return _a/_b; }
static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a/_b; return _a%_b; }

View file

@ -8,6 +8,42 @@
int32_t scale(int32_t a, int32_t d, int32_t c);
#define _scaler(x) \
static inline int32_t mulscale##x(int32_t a, int32_t d) \
{ \
int32_t mullo, mulhi; \
__asm__ ( \
" mullw %0, %2, %3\n" \
" mulhw %1, %2, %3\n" \
" srwi %0, %0, %4\n" \
" insrwi %0, %1, %4, 0\n" \
: "=&r"(mullo), "=r"(mulhi) \
: "r"(a), "r"(d), "i"(x) \
); \
return mullo; \
} \
static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
int32_t mulhi, mullo, sumhi, sumlo; \
__asm__ ( \
" mullw %0, %4, %5\n" \
" mulhw %1, %4, %5\n" \
" mullw %2, %6, %7\n" \
" mulhw %3, %6, %7\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
" srwi %0, %0, %8\n" \
" insrwi %0, %1, %8, 0\n" \
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \
: "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \
: "xer" \
); \
return sumlo; \
}
PRAGMA_FUNCS
#undef _scaler
static inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
{
int32_t mullo, mulhi;
@ -24,24 +60,6 @@ static inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
return mullo;
}
#define _scaler(x) \
static inline int32_t mulscale##x(int32_t a, int32_t d) \
{ \
int32_t mullo, mulhi; \
__asm__ ( \
" mullw %0, %2, %3\n" \
" mulhw %1, %2, %3\n" \
" srwi %0, %0, %4\n" \
" insrwi %0, %1, %4, 0\n" \
: "=&r"(mullo), "=r"(mulhi) \
: "r"(a), "r"(d), "i"(x) \
); \
return mullo; \
}
PRAGMA_FUNCS
#undef _scaler
static inline int32_t mulscale32(int32_t a, int32_t d)
{
int32_t mulhi;
@ -73,29 +91,6 @@ static inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int3
return sumlo;
}
#define _scaler(x) \
static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
int32_t mulhi, mullo, sumhi, sumlo; \
__asm__ ( \
" mullw %0, %4, %5\n" \
" mulhw %1, %4, %5\n" \
" mullw %2, %6, %7\n" \
" mulhw %3, %6, %7\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
" srwi %0, %0, %8\n" \
" insrwi %0, %1, %8, 0\n" \
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \
: "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \
: "xer" \
); \
return sumlo; \
}
PRAGMA_FUNCS
#undef _scaler
static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
{
int32_t mulhi, mullo, sumhi, sumlo;
@ -113,30 +108,6 @@ static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
return sumhi;
}
// tmulscale only seems to be used in one place...
static inline int32_t tmulscale11(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
{
int32_t mulhi, mullo, sumhi, sumlo;
__asm__(
" mullw %0, %4, %5\n" \
" mulhw %1, %4, %5\n" \
" mullw %2, %6, %7\n" \
" mulhw %3, %6, %7\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
" mullw %2, %8, %9\n" \
" mulhw %3, %8, %9\n" \
" addc %0, %0, %2\n" \
" adde %1, %1, %3\n" \
" srwi %0, %0, 11\n" \
" insrwi %0, %1, 11, 0\n" \
: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi)
: "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D)
: "xer"
);
return sumlo;
}
static inline char readpixel(void *d)
{
return *(char*) d;
@ -225,21 +196,6 @@ static inline void qinterpolatedown16short(intptr_t bufptr, int32_t num, int32_t
}
}
static inline int32_t mul3(int32_t a)
{
return (a<<1)+a;
}
static inline int32_t mul5(int32_t a)
{
return (a<<2)+a;
}
static inline int32_t mul9(int32_t a)
{
return (a<<3)+a;
}
static inline int32_t klabs(int32_t a)
{
int32_t mask;
@ -296,6 +252,13 @@ static inline void swaplong(void *a, void *b)
*(int32_t*) b = t;
}
static inline void swapfloat(void *a, void *b)
{
float t = *(float*) a;
*(float*) a = *(float*) b;
*(float*) b = t;
}
static inline void swap64bit(void *a, void *b)
{
double t = *(double*) a;

View file

@ -471,263 +471,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
__d; })
#define tmulscale1(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale2(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale3(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale4(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale5(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale6(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale7(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale8(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale9(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale10(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale11(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale12(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale13(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale14(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale15(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale16(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale17(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale18(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale19(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale20(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale21(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale22(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale23(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale24(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale25(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale26(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale27(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale28(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale29(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale30(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale31(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__a; })
#define tmulscale32(a,d,b,c,S,D) \
({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
__asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \
: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
__d; })
#ifdef USE_ASM_DIVSCALE
#define divscale(a,b,c) \
({ int32_t __a=(a), __b=(b), __c=(c); \
@ -927,22 +670,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
: "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \
0; })
#define mul3(a) \
({ int32_t __a=(a), __r; \
__asm__ __volatile__ ("lea (%1,%1,2), %0" \
: "=r" (__r) : "0" (__a) : "cc"); \
__r; })
#define mul5(a) \
({ int32_t __a=(a), __r; \
__asm__ __volatile__ ("lea (%1,%1,4), %0" \
: "=r" (__r) : "0" (__a) : "cc"); \
__r; })
#define mul9(a) \
({ int32_t __a=(a), __r; \
__asm__ __volatile__ ("lea (%1,%1,8), %0" \
: "=r" (__r) : "0" (__a) : "cc"); \
__r; })
//returns eax/ebx, dmval = eax%edx;
#define divmod(a,b) \
({ int32_t __a=(a), __b=(b); \
@ -1004,6 +731,7 @@ void copybufreverse(const void *S, void *D, int32_t c);
__asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \
: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
0; })
#define swapfloat swaplong
#define swapbuf4(a,b,c) \
({ void *__a=(a), *__b=(b); int32_t __c=(c); \
__asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \

View file

@ -35,23 +35,30 @@ static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
}
}
#define MULSCALE(x) \
#define _scaler(x) \
static __inline int32_t mulscale##x (int32_t a, int32_t d) \
{ \
_asm mov eax, a \
_asm imul d \
_asm shrd eax, edx, x \
}
} \
static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
_asm mov eax, a \
_asm imul d \
_asm mov ebx, eax \
_asm mov eax, S \
_asm mov esi, edx \
_asm imul D \
_asm add eax, ebx \
_asm adc edx, esi \
_asm shrd eax, edx, x \
} \
PRAGMA_FUNCS
#undef _scaler
MULSCALE(1) MULSCALE(2) MULSCALE(3) MULSCALE(4)
MULSCALE(5) MULSCALE(6) MULSCALE(7) MULSCALE(8)
MULSCALE(9) MULSCALE(10) MULSCALE(11) MULSCALE(12)
MULSCALE(13) MULSCALE(14) MULSCALE(15) MULSCALE(16)
MULSCALE(17) MULSCALE(18) MULSCALE(19) MULSCALE(20)
MULSCALE(21) MULSCALE(22) MULSCALE(23) MULSCALE(24)
MULSCALE(25) MULSCALE(26) MULSCALE(27) MULSCALE(28)
MULSCALE(29) MULSCALE(30) MULSCALE(31)
#undef MULSCALE
static __inline int32_t mulscale32(int32_t a, int32_t d)
{
_asm {
@ -77,29 +84,6 @@ static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, in
}
}
#define DMULSCALE(x) \
static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
_asm mov eax, a \
_asm imul d \
_asm mov ebx, eax \
_asm mov eax, S \
_asm mov esi, edx \
_asm imul D \
_asm add eax, ebx \
_asm adc edx, esi \
_asm shrd eax, edx, x \
}
DMULSCALE(1) DMULSCALE(2) DMULSCALE(3) DMULSCALE(4)
DMULSCALE(5) DMULSCALE(6) DMULSCALE(7) DMULSCALE(8)
DMULSCALE(9) DMULSCALE(10) DMULSCALE(11) DMULSCALE(12)
DMULSCALE(13) DMULSCALE(14) DMULSCALE(15) DMULSCALE(16)
DMULSCALE(17) DMULSCALE(18) DMULSCALE(19) DMULSCALE(20)
DMULSCALE(21) DMULSCALE(22) DMULSCALE(23) DMULSCALE(24)
DMULSCALE(25) DMULSCALE(26) DMULSCALE(27) DMULSCALE(28)
DMULSCALE(29) DMULSCALE(30) DMULSCALE(31)
#undef DMULSCALE
static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
{
_asm {
@ -115,54 +99,6 @@ static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
}
}
#define TMULSCALE(x) \
static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \
{ \
_asm mov eax, a \
_asm mov ebx, b \
_asm imul d \
_asm xchg eax, ebx \
_asm mov ecx, c \
_asm xchg edx, ecx \
_asm imul edx \
_asm add ebx, eax \
_asm adc ecx, edx \
_asm mov eax, S \
_asm imul D \
_asm add eax, ebx \
_asm adc edx, ecx \
_asm shrd eax, edx, x \
}
TMULSCALE(1) TMULSCALE(2) TMULSCALE(3) TMULSCALE(4)
TMULSCALE(5) TMULSCALE(6) TMULSCALE(7) TMULSCALE(8)
TMULSCALE(9) TMULSCALE(10) TMULSCALE(11) TMULSCALE(12)
TMULSCALE(13) TMULSCALE(14) TMULSCALE(15) TMULSCALE(16)
TMULSCALE(17) TMULSCALE(18) TMULSCALE(19) TMULSCALE(20)
TMULSCALE(21) TMULSCALE(22) TMULSCALE(23) TMULSCALE(24)
TMULSCALE(25) TMULSCALE(26) TMULSCALE(27) TMULSCALE(28)
TMULSCALE(29) TMULSCALE(30) TMULSCALE(31)
#undef TMULSCALE
static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
{
_asm {
mov eax, a
mov ebx, b
imul d
xchg eax, ebx
mov ecx, c
xchg edx, ecx
imul edx
add ebx, eax
adc ecx, edx
mov eax, S
imul D
add eax, ebx
adc edx, ecx
mov eax, edx
}
}
#ifdef USE_ASM_DIVSCALE
static __inline int32_t divscale(int32_t a, int32_t b, int32_t c)
{
@ -479,30 +415,6 @@ static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, in
}
}
static __inline int32_t mul3(int32_t a)
{
_asm {
mov eax, a
lea eax, [eax+eax*2]
}
}
static __inline int32_t mul5(int32_t a)
{
_asm {
mov eax, a
lea eax, [eax+eax*4]
}
}
static __inline int32_t mul9(int32_t a)
{
_asm {
mov eax, a
lea eax, [eax+eax*8]
}
}
//returns eax/ebx, dmval = eax%edx;
static __inline int32_t divmod(int32_t a, int32_t b)
{
@ -633,6 +545,8 @@ static __inline void swaplong(void *a, void *b)
}
}
#define swapfloat swaplong
static __inline void swapbuf4(void *a, void *b, int32_t c)
{
_asm {