Remove a few unused things (tmulscale, etc) from the pragmas.h family of headers

git-svn-id: https://svn.eduke32.com/eduke32@4604 1a8010ca-5511-0410-912e-c29ae57300e0
2024-12-25 03:00:46 +00:00 · 2014-09-30 04:05:40 +00:00 · 2014-09-30 04:05:40 +00:00 · 8148ccf3c3
commit 8148ccf3c3
parent 1a286d1e52
5 changed files with 72 additions and 481 deletions
--- a/polymer/eduke32/build/include/pragmas.h
+++ b/polymer/eduke32/build/include/pragmas.h
@ -101,10 +101,6 @@ static inline int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_
 	return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(a)); \
 } \
 \
-static inline int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \
-{ \
-	return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> by(a)); \
-} \

 PRAGMA_FUNCS _scaler(32)

@ -114,6 +110,7 @@ static inline void swapchar(void* a, void* b)  { char t = *((char*)b); *((char*)
 static inline void swapchar2(void* a, void* b, int32_t s) { swapchar(a,b); swapchar((char*)a+1,(char*)b+s); }
 static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*)b); *((int16_t*)b) = *((int16_t*)a); *((int16_t*)a) = t; }
 static inline void swaplong(void* a, void* b)  { int32_t t = *((int32_t*)b); *((int32_t*)b) = *((int32_t*)a); *((int32_t*)a) = t; }
+static inline void swapfloat(void* a, void* b)  { float t = *((float*)b); *((float*)b) = *((float*)a); *((float*)a) = t; }
 static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*)b); *((int64_t*)b) = *((int64_t*)a); *((int64_t*)a) = t; }

 static inline char readpixel(void* s)    { return (*((char*)(s))); }
@ -121,10 +118,6 @@ static inline void drawpixel(void* s, char a)    { *((char*)(s)) = a; }
 static inline void drawpixels(void* s, int16_t a)  { *((int16_t*)(s)) = a; }
 static inline void drawpixelses(void* s, int32_t a) { *((int32_t*)(s)) = a; }

-static inline int32_t mul3(int32_t a) { return (a<<1)+a; }
-static inline int32_t mul5(int32_t a) { return (a<<2)+a; }
-static inline int32_t mul9(int32_t a) { return (a<<3)+a; }
-
 static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a%_b; return _a/_b; }
 static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t)a, _b=(uint32_t)b; dmval = _a/_b; return _a%_b; }

@ -137,9 +130,9 @@ static inline int32_t kmin(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b)
 static inline int32_t kmax(int32_t a, int32_t b) { if ((int32_t)a < (int32_t)b) return b; return a; }

 static inline int32_t sqr(int32_t eax) { return (eax) * (eax); }
-static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) / qw(ecx)); }
-static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * qw(edx)) >> by(ecx)); }
-static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(ecx)); }
+static inline int32_t scale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) / ecx); }
+static inline int32_t mulscale(int32_t eax, int32_t edx, int32_t ecx) { return dw((qw(eax) * edx) >> by(ecx)); }
+static inline int32_t dmulscale(int32_t eax, int32_t edx, int32_t esi, int32_t edi, int32_t ecx) { return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(ecx)); }

 void qinterpolatedown16 (intptr_t bufptr, int32_t num, int32_t val, int32_t add);
 void qinterpolatedown16short (intptr_t bufptr, int32_t num, int32_t val, int32_t add);
--- a/polymer/eduke32/build/include/pragmas_arm.h
+++ b/polymer/eduke32/build/include/pragmas_arm.h
@ -5,20 +5,16 @@
 #ifndef __pragmas_arm_h__
 #define __pragmas_arm_h__

+// TODO: implement libdivide.h
 #define _scaler(a) \
 static inline int32_t mulscale##a(int32_t eax, int32_t edx) \
 { \
-	return dw((qw(eax) * qw(edx)) >> by(a)); \
+	return dw((qw(eax) * edx) >> by(a)); \
 } \
 \
 static inline int32_t dmulscale##a(int32_t eax, int32_t edx, int32_t esi, int32_t edi) \
 { \
-	return dw(((qw(eax) * qw(edx)) + (qw(esi) * qw(edi))) >> by(a)); \
-} \
-\
-static inline int32_t tmulscale##a(int32_t eax, int32_t edx, int32_t ebx, int32_t ecx, int32_t esi, int32_t edi) \
-{ \
-	return dw(((qw(eax) * qw(edx)) + (qw(ebx) * qw(ecx)) + (qw(esi) * qw(edi))) >> by(a)); \
+	return dw(((qw(eax) * edx) + (qw(esi) * edi)) >> by(a)); \
 } \

 PRAGMA_FUNCS _scaler(32)
@ -29,6 +25,7 @@ static inline void swapchar(void* a, void* b)  { char t = *((char*) b); *((char*
 static inline void swapchar2(void* a, void* b, int32_t s) { swapchar(a, b); swapchar((char*) a+1, (char*) b+s); }
 static inline void swapshort(void* a, void* b) { int16_t t = *((int16_t*) b); *((int16_t*) b) = *((int16_t*) a); *((int16_t*) a) = t; }
 static inline void swaplong(void* a, void* b)  { int32_t t = *((int32_t*) b); *((int32_t*) b) = *((int32_t*) a); *((int32_t*) a) = t; }
+static inline void swapfloat(void* a, void* b)  { float t = *((float*) b); *((float*) b) = *((float*) a); *((float*) a) = t; }
 static inline void swap64bit(void* a, void* b) { int64_t t = *((int64_t*) b); *((int64_t*) b) = *((int64_t*) a); *((int64_t*) a) = t; }

 static inline char readpixel(void* s)    { return (*((char*) (s))); }
@ -36,10 +33,6 @@ static inline void drawpixel(void* s, char a)    { *((char*) (s)) = a; }
 static inline void drawpixels(void* s, int16_t a)  { *((int16_t*) (s)) = a; }
 static inline void drawpixelses(void* s, int32_t a) { *((int32_t*) (s)) = a; }

-static inline int32_t mul3(int32_t a) { return (a<<1)+a; }
-static inline int32_t mul5(int32_t a) { return (a<<2)+a; }
-static inline int32_t mul9(int32_t a) { return (a<<3)+a; }
-
 static inline int32_t divmod(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a%_b; return _a/_b; }
 static inline int32_t moddiv(int32_t a, int32_t b) { uint32_t _a=(uint32_t) a, _b=(uint32_t) b; dmval = _a/_b; return _a%_b; }

--- a/polymer/eduke32/build/include/pragmas_ppc.h
+++ b/polymer/eduke32/build/include/pragmas_ppc.h
@ -8,6 +8,42 @@

 int32_t scale(int32_t a, int32_t d, int32_t c);

+#define _scaler(x) \
+static inline int32_t mulscale##x(int32_t a, int32_t d) \
+{ \
+	int32_t mullo, mulhi; \
+	__asm__ ( \
+		" mullw  %0, %2, %3\n" \
+		" mulhw  %1, %2, %3\n" \
+		" srwi   %0, %0, %4\n" \
+		" insrwi %0, %1, %4, 0\n" \
+		: "=&r"(mullo), "=r"(mulhi) \
+		: "r"(a), "r"(d), "i"(x) \
+	); \
+	return mullo; \
+} \
+static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \
+{ \
+	int32_t mulhi, mullo, sumhi, sumlo; \
+	__asm__ ( \
+		" mullw  %0, %4, %5\n" \
+		" mulhw  %1, %4, %5\n" \
+		" mullw  %2, %6, %7\n" \
+		" mulhw  %3, %6, %7\n" \
+		" addc   %0, %0, %2\n" \
+		" adde   %1, %1, %3\n" \
+		" srwi   %0, %0, %8\n" \
+		" insrwi %0, %1, %8, 0\n" \
+		: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \
+		: "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \
+		: "xer" \
+	); \
+	return sumlo; \
+}
+
+PRAGMA_FUNCS
+#undef _scaler
+
 static inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
 {
    int32_t mullo, mulhi;
@ -24,24 +60,6 @@ static inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
    return mullo;
 }

-#define _scaler(x) \
-static inline int32_t mulscale##x(int32_t a, int32_t d) \
-{ \
-	int32_t mullo, mulhi; \
-	__asm__ ( \
-		" mullw  %0, %2, %3\n" \
-		" mulhw  %1, %2, %3\n" \
-		" srwi   %0, %0, %4\n" \
-		" insrwi %0, %1, %4, 0\n" \
-		: "=&r"(mullo), "=r"(mulhi) \
-		: "r"(a), "r"(d), "i"(x) \
-	); \
-	return mullo; \
-}
-
-PRAGMA_FUNCS
-#undef _scaler
-
 static inline int32_t mulscale32(int32_t a, int32_t d)
 {
    int32_t mulhi;
@ -73,29 +91,6 @@ static inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int3
    return sumlo;
 }

-#define _scaler(x) \
-static inline int32_t dmulscale##x(int32_t a, int32_t d, int32_t S, int32_t D) \
-{ \
-	int32_t mulhi, mullo, sumhi, sumlo; \
-	__asm__ ( \
-		" mullw  %0, %4, %5\n" \
-		" mulhw  %1, %4, %5\n" \
-		" mullw  %2, %6, %7\n" \
-		" mulhw  %3, %6, %7\n" \
-		" addc   %0, %0, %2\n" \
-		" adde   %1, %1, %3\n" \
-		" srwi   %0, %0, %8\n" \
-		" insrwi %0, %1, %8, 0\n" \
-		: "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=r"(mulhi) \
-		: "r"(a), "r"(d), "r"(S), "r"(D), "i"(x) \
-		: "xer" \
-	); \
-	return sumlo; \
-}
-
-PRAGMA_FUNCS
-#undef _scaler
-
 static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
 {
    int32_t mulhi, mullo, sumhi, sumlo;
@ -113,30 +108,6 @@ static inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
    return sumhi;
 }

-// tmulscale only seems to be used in one place...
-static inline int32_t tmulscale11(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
-{
-    int32_t mulhi, mullo, sumhi, sumlo;
-    __asm__(
-        " mullw  %0, %4, %5\n" \
-        " mulhw  %1, %4, %5\n" \
-        " mullw  %2, %6, %7\n" \
-        " mulhw  %3, %6, %7\n" \
-        " addc   %0, %0, %2\n" \
-        " adde   %1, %1, %3\n" \
-        " mullw  %2, %8, %9\n" \
-        " mulhw  %3, %8, %9\n" \
-        " addc   %0, %0, %2\n" \
-        " adde   %1, %1, %3\n" \
-        " srwi   %0, %0, 11\n" \
-        " insrwi %0, %1, 11, 0\n" \
-        : "=&r"(sumlo), "=&r"(sumhi), "=&r"(mullo), "=&r"(mulhi)
-        : "r"(a), "r"(d), "r"(b), "r"(c), "r"(S), "r"(D)
-        : "xer"
-        );
-    return sumlo;
-}
-
 static inline char readpixel(void *d)
 {
    return *(char*) d;
@ -225,21 +196,6 @@ static inline void qinterpolatedown16short(intptr_t bufptr, int32_t num, int32_t
    }
 }

-static inline int32_t mul3(int32_t a)
-{
-    return (a<<1)+a;
-}
-
-static inline int32_t mul5(int32_t a)
-{
-    return (a<<2)+a;
-}
-
-static inline int32_t mul9(int32_t a)
-{
-    return (a<<3)+a;
-}
-
 static inline int32_t klabs(int32_t a)
 {
    int32_t mask;
@ -296,6 +252,13 @@ static inline void swaplong(void *a, void *b)
    *(int32_t*) b = t;
 }

+static inline void swapfloat(void *a, void *b)
+{
+    float t = *(float*) a;
+    *(float*) a = *(float*) b;
+    *(float*) b = t;
+}
+
 static inline void swap64bit(void *a, void *b)
 {
    double t = *(double*) a;
--- a/polymer/eduke32/build/include/pragmas_x86_gcc.h
+++ b/polymer/eduke32/build/include/pragmas_x86_gcc.h
@ -471,263 +471,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
 		: "a" (__a), "d" (__d), "S" (__S), "D" (__D) : "ebx", "cc"); \
 	 __d; })

-#define tmulscale1(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $1, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale2(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $2, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale3(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $3, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale4(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $4, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale5(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $5, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale6(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $6, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale7(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $7, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale8(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $8, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale9(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $9, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale10(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $10, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale11(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $11, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale12(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $12, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale13(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $13, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale14(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $14, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale15(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $15, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale16(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $16, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale17(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $17, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale18(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $18, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale19(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $19, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale20(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $20, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale21(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $21, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale22(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $22, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale23(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $23, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale24(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $24, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale25(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $25, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale26(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $26, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale27(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $27, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale28(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $28, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale29(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $29, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale30(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $30, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale31(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx; shrdl $31, %%edx, %%eax" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __a; })
-#define tmulscale32(a,d,b,c,S,D) \
-	({ int32_t __a=(a), __d=(d), __b=(b), __c=(c), __S=(S), __D=(D); \
-	   __asm__ __volatile__ ("imull %%edx; xchgl %%ebx, %%eax; xchgl %%ecx, %%edx; " \
-				"imull %%edx; addl %%eax, %%ebx; adcl %%edx, %%ecx; movl %%esi, %%eax; " \
-				"imull %%edi; addl %%ebx, %%eax; adcl %%ecx, %%edx" \
-		: "=a" (__a), "=d" (__d), "=b" (__b), "=c" (__c) \
-		: "a" (__a), "d" (__d), "b" (__b), "c" (__c), "S" (__S), "D" (__D) : "cc"); \
-	 __d; })
-
 #ifdef USE_ASM_DIVSCALE
 #define divscale(a,b,c) \
 	({ int32_t __a=(a), __b=(b), __c=(c); \
@ -927,22 +670,6 @@ void copybufreverse(const void *S, void *D, int32_t c);
 		: "=&S" (__S), "=&D" (__D), "=&c" (__c) : "0" (__S), "1" (__D), "2" (__c) : "memory", "cc"); \
 	 0; })

-#define mul3(a) \
-	({ int32_t __a=(a), __r; \
-	   __asm__ __volatile__ ("lea (%1,%1,2), %0" \
-		: "=r" (__r) : "0" (__a) : "cc"); \
-	 __r; })
-#define mul5(a) \
-	({ int32_t __a=(a), __r; \
-	   __asm__ __volatile__ ("lea (%1,%1,4), %0" \
-		: "=r" (__r) : "0" (__a) : "cc"); \
-	 __r; })
-#define mul9(a) \
-	({ int32_t __a=(a), __r; \
-	   __asm__ __volatile__ ("lea (%1,%1,8), %0" \
-		: "=r" (__r) : "0" (__a) : "cc"); \
-	 __r; })
-
 //returns eax/ebx, dmval = eax%edx;
 #define divmod(a,b) \
 	({ int32_t __a=(a), __b=(b); \
@ -1004,6 +731,7 @@ void copybufreverse(const void *S, void *D, int32_t c);
 	   __asm__ __volatile__ ("movl (%%eax), %%ecx; movl (%%ebx), %%edx; movl %%ecx, (%%ebx); movl %%edx, (%%eax)" \
 		: : "a" (__a), "b" (__b) : "ecx", "edx", "memory", "cc"); \
 	 0; })
+#define swapfloat swaplong
 #define swapbuf4(a,b,c) \
 	({ void *__a=(a), *__b=(b); int32_t __c=(c); \
 	   __asm__ __volatile__ ("0: movl (%%eax), %%esi; movl (%%ebx), %%edi; movl %%esi, (%%ebx); " \
--- a/polymer/eduke32/build/include/pragmas_x86_msvc.h
+++ b/polymer/eduke32/build/include/pragmas_x86_msvc.h
@ -35,23 +35,30 @@ static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
    }
 }

-#define MULSCALE(x) \
+#define _scaler(x) \
 static __inline int32_t mulscale##x (int32_t a, int32_t d) \
 { \
 	_asm mov eax, a \
 	_asm imul d \
 	_asm shrd eax, edx, x \
-}
+} \
+static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
+{ \
+	_asm mov eax, a \
+	_asm imul d \
+	_asm mov ebx, eax \
+	_asm mov eax, S \
+	_asm mov esi, edx \
+	_asm imul D \
+	_asm add eax, ebx \
+	_asm adc edx, esi \
+	_asm shrd eax, edx, x \
+} \
+
+
+PRAGMA_FUNCS 
+#undef _scaler

-MULSCALE(1)	MULSCALE(2)	MULSCALE(3)	MULSCALE(4)
-MULSCALE(5)	MULSCALE(6)	MULSCALE(7)	MULSCALE(8)
-MULSCALE(9)	MULSCALE(10)	MULSCALE(11)	MULSCALE(12)
-MULSCALE(13)	MULSCALE(14)	MULSCALE(15)	MULSCALE(16)
-MULSCALE(17)	MULSCALE(18)	MULSCALE(19)	MULSCALE(20)
-MULSCALE(21)	MULSCALE(22)	MULSCALE(23)	MULSCALE(24)
-MULSCALE(25)	MULSCALE(26)	MULSCALE(27)	MULSCALE(28)
-MULSCALE(29)	MULSCALE(30)	MULSCALE(31)
-#undef MULSCALE	
 static __inline int32_t mulscale32(int32_t a, int32_t d)
 {
    _asm {
@ -77,29 +84,6 @@ static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, in
    }
 }

-#define DMULSCALE(x) \
-static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
-{ \
-	_asm mov eax, a \
-	_asm imul d \
-	_asm mov ebx, eax \
-	_asm mov eax, S \
-	_asm mov esi, edx \
-	_asm imul D \
-	_asm add eax, ebx \
-	_asm adc edx, esi \
-	_asm shrd eax, edx, x \
-}
-
-DMULSCALE(1)	DMULSCALE(2)	DMULSCALE(3)	DMULSCALE(4)
-DMULSCALE(5)	DMULSCALE(6)	DMULSCALE(7)	DMULSCALE(8)
-DMULSCALE(9)	DMULSCALE(10)	DMULSCALE(11)	DMULSCALE(12)
-DMULSCALE(13)	DMULSCALE(14)	DMULSCALE(15)	DMULSCALE(16)
-DMULSCALE(17)	DMULSCALE(18)	DMULSCALE(19)	DMULSCALE(20)
-DMULSCALE(21)	DMULSCALE(22)	DMULSCALE(23)	DMULSCALE(24)
-DMULSCALE(25)	DMULSCALE(26)	DMULSCALE(27)	DMULSCALE(28)
-DMULSCALE(29)	DMULSCALE(30)	DMULSCALE(31)
-#undef DMULSCALE	
 static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
 {
    _asm {
@ -115,54 +99,6 @@ static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
    }
 }

-#define TMULSCALE(x) \
-static __inline int32_t tmulscale##x (int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D) \
-{ \
-	_asm mov eax, a \
-	_asm mov ebx, b \
-	_asm imul d \
-	_asm xchg eax, ebx \
-	_asm mov ecx, c \
-	_asm xchg edx, ecx \
-	_asm imul edx \
-	_asm add ebx, eax \
-	_asm adc ecx, edx \
-	_asm mov eax, S \
-	_asm imul D \
-	_asm add eax, ebx \
-	_asm adc edx, ecx \
-	_asm shrd eax, edx, x \
-}
-
-TMULSCALE(1)	TMULSCALE(2)	TMULSCALE(3)	TMULSCALE(4)
-TMULSCALE(5)	TMULSCALE(6)	TMULSCALE(7)	TMULSCALE(8)
-TMULSCALE(9)	TMULSCALE(10)	TMULSCALE(11)	TMULSCALE(12)
-TMULSCALE(13)	TMULSCALE(14)	TMULSCALE(15)	TMULSCALE(16)
-TMULSCALE(17)	TMULSCALE(18)	TMULSCALE(19)	TMULSCALE(20)
-TMULSCALE(21)	TMULSCALE(22)	TMULSCALE(23)	TMULSCALE(24)
-TMULSCALE(25)	TMULSCALE(26)	TMULSCALE(27)	TMULSCALE(28)
-TMULSCALE(29)	TMULSCALE(30)	TMULSCALE(31)
-#undef TMULSCALE	
-static __inline int32_t tmulscale32(int32_t a, int32_t d, int32_t b, int32_t c, int32_t S, int32_t D)
-{
-    _asm {
-        mov eax, a
-            mov ebx, b
-            imul d
-            xchg eax, ebx
-            mov ecx, c
-            xchg edx, ecx
-            imul edx
-            add ebx, eax
-            adc ecx, edx
-            mov eax, S
-            imul D
-            add eax, ebx
-            adc edx, ecx
-            mov eax, edx
-    }
-}
-
 #ifdef USE_ASM_DIVSCALE
 static __inline int32_t divscale(int32_t a, int32_t b, int32_t c)
 {
@ -479,30 +415,6 @@ static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, in
    }
 }

-static __inline int32_t mul3(int32_t a)
-{
-    _asm {
-        mov eax, a
-            lea eax, [eax+eax*2]
-    }
-}
-
-static __inline int32_t mul5(int32_t a)
-{
-    _asm {
-        mov eax, a
-            lea eax, [eax+eax*4]
-    }
-}
-
-static __inline int32_t mul9(int32_t a)
-{
-    _asm {
-        mov eax, a
-            lea eax, [eax+eax*8]
-    }
-}
-
 //returns eax/ebx, dmval = eax%edx;
 static __inline int32_t divmod(int32_t a, int32_t b)
 {
@ -633,6 +545,8 @@ static __inline void swaplong(void *a, void *b)
    }
 }

+#define swapfloat swaplong
+
 static __inline void swapbuf4(void *a, void *b, int32_t c)
 {
    _asm {