From 664f8e578d04c4e4a4b1d84a51c8b057f2f99381 Mon Sep 17 00:00:00 2001 From: Thilo Schulz Date: Wed, 22 Jun 2011 14:36:11 +0000 Subject: [PATCH] - align sse control word storage space to 16 byte boundary for snapvector - replace some whitespace with tabs in snapvector.c - Give gcc a bit more freedom in choice of registers --- code/asm/ftola.c | 24 +++++++++++++++-------- code/asm/snapvector.asm | 10 +++++----- code/asm/snapvector.c | 43 +++++++++++++++++++++++------------------ code/qcommon/common.c | 4 ++-- code/qcommon/q_shared.h | 10 +++++----- 5 files changed, 52 insertions(+), 39 deletions(-) diff --git a/code/asm/ftola.c b/code/asm/ftola.c index e0298e8e..ad197836 100644 --- a/code/asm/ftola.c +++ b/code/asm/ftola.c @@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA long qftolsse(float f) { - register long retval; + long retval; __asm__ volatile ( @@ -40,21 +40,25 @@ long qftolsse(float f) return retval; } -void qvmftolsse(void) +int qvmftolsse(void) { + int retval; + __asm__ volatile ( "movss (" EDI ", " EBX ", 4), %%xmm0\n" - "cvttss2si %%xmm0, " EAX "\n" - : + "cvttss2si %%xmm0, %0\n" + : "=r" (retval) : : "%xmm0" ); + + return retval; } long qftolx87(float f) { - register long retval; + long retval; __asm__ volatile ( @@ -68,13 +72,17 @@ long qftolx87(float f) return retval; } -void qvmftolx87(void) +int qvmftolx87(void) { + int retval; + __asm__ volatile ( "flds (" EDI ", " EBX ", 4)\n" "fistpl (" EDI ", " EBX ", 4)\n" - "mov (" EDI ", " EBX ", 4), " EAX "\n" - : + "mov (" EDI ", " EBX ", 4), %0\n" + : "=r" (retval) ); + + return retval; } diff --git a/code/asm/snapvector.asm b/code/asm/snapvector.asm index 87c77372..eca40fe1 100644 --- a/code/asm/snapvector.asm +++ b/code/asm/snapvector.asm @@ -44,7 +44,7 @@ IFDEF idx64 ; qsnapvector using SSE qsnapvectorsse PROC - sub rsp, 4 + sub rsp, 8 stmxcsr [rsp] ; save SSE control word ldmxcsr ssecw ; set to round nearest @@ -58,19 +58,19 @@ IFDEF idx64 pop rdi ldmxcsr [rsp] ; restore sse control word to old value - add rsp, 4 + add rsp, 8 ret qsnapvectorsse ENDP ELSE qsnapvectorsse PROC - sub esp, 4 + sub esp, 8 stmxcsr [esp] ; save SSE control word ldmxcsr ssecw ; set to round nearest push edi - mov edi, dword ptr 12[esp] ; maskmovdqu uses edi as implicit memory operand + mov edi, dword ptr 16[esp] ; maskmovdqu uses edi as implicit memory operand movaps xmm1, ssemask ; initialize the mask register for maskmovdqu movups xmm0, [edi] ; here is stored our vector. Read 4 values in one go cvtps2dq xmm0, xmm0 ; convert 4 single fp to int @@ -79,7 +79,7 @@ ELSE pop edi ldmxcsr [esp] ; restore sse control word to old value - add esp, 4 + add esp, 8 ret qsnapvectorsse ENDP diff --git a/code/asm/snapvector.c b/code/asm/snapvector.c index 402b3925..8e9b2868 100644 --- a/code/asm/snapvector.c +++ b/code/asm/snapvector.c @@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /* * GNU inline asm version of qsnapvector + * See MASM snapvector.asm for commentary */ static unsigned char ssemask[16] __attribute__((aligned(16))) = @@ -32,29 +33,33 @@ static unsigned char ssemask[16] __attribute__((aligned(16))) = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00" }; -static unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80; -static unsigned short fpucw = 0x037F; +static const unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80; +static const unsigned short fpucw = 0x037F; void qsnapvectorsse(vec3_t vec) { + uint32_t oldcw __attribute__((aligned(16))); + __asm__ volatile ( - "sub $4, " ESP "\n" - "stmxcsr (" ESP ")\n" + "stmxcsr %3\n" "ldmxcsr %1\n" "movaps (%0), %%xmm1\n" - "movups (" EDI "), %%xmm0\n" + "movups (%2), %%xmm0\n" "cvtps2dq %%xmm0, %%xmm0\n" "cvtdq2ps %%xmm0, %%xmm0\n" + // vec MUST reside in register rdi as maskmovdqu uses + // it as an implicit operand. The "D" constraint makes + // sure of that. "maskmovdqu %%xmm1, %%xmm0\n" - "ldmxcsr (" ESP ")\n" - "add $4, " ESP "\n" + "ldmxcsr %3\n" : - : "r" (ssemask), "m" (ssecw), "D" (vec) + : "r" (ssemask), "m" (ssecw), "D" (vec), "m" (oldcw) : "memory", "%xmm0", "%xmm1" ); + } #define QROUNDX87(src) \ @@ -67,16 +72,16 @@ void qsnapvectorx87(vec3_t vec) { __asm__ volatile ( - "sub $2, " ESP "\n" - "fnstcw (" ESP ")\n" - "fldcw %0\n" - QROUNDX87("(%1)") - QROUNDX87("4(%1)") - QROUNDX87("8(%1)") - "fldcw (" ESP ")\n" - "add $2, " ESP "\n" - : - : "m" (fpucw), "r" (vec) - : "memory" + "sub $2, " ESP "\n" + "fnstcw (" ESP ")\n" + "fldcw %0\n" + QROUNDX87("(%1)") + QROUNDX87("4(%1)") + QROUNDX87("8(%1)") + "fldcw (" ESP ")\n" + "add $2, " ESP "\n" + : + : "m" (fpucw), "r" (vec) + : "memory" ); } diff --git a/code/qcommon/common.c b/code/qcommon/common.c index 68189a81..c5ff163f 100644 --- a/code/qcommon/common.c +++ b/code/qcommon/common.c @@ -91,10 +91,10 @@ cvar_t *com_homepath; cvar_t *com_busyWait; #if idx64 - void (*Q_VMftol)(void); + int (*Q_VMftol)(void); #elif id386 long (QDECL *Q_ftol)(float f); - void (QDECL *Q_VMftol)(void); + int (QDECL *Q_VMftol)(void); void (QDECL *Q_SnapVector)(vec3_t vec); #endif diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h index 6ce754af..7c1af62d 100644 --- a/code/qcommon/q_shared.h +++ b/code/qcommon/q_shared.h @@ -423,23 +423,23 @@ int Q_isnan(float x); #if idx64 extern long qftolsse(float f); - extern void qvmftolsse(void); + extern int qvmftolsse(void); extern void qsnapvectorsse(vec3_t vec); #define Q_ftol qftolsse #define Q_SnapVector qsnapvectorsse - extern void (*Q_VMftol)(void); + extern int (*Q_VMftol)(void); #elif id386 extern long QDECL qftolx87(float f); extern long QDECL qftolsse(float f); - extern void QDECL qvmftolx87(void); - extern void QDECL qvmftolsse(void); + extern int QDECL qvmftolx87(void); + extern int QDECL qvmftolsse(void); extern void QDECL qsnapvectorx87(vec3_t vec); extern void QDECL qsnapvectorsse(vec3_t vec); extern long (QDECL *Q_ftol)(float f); - extern void (QDECL *Q_VMftol)(void); + extern int (QDECL *Q_VMftol)(void); extern void (QDECL *Q_SnapVector)(vec3_t vec); #else #define Q_ftol(f) lrintf((f))