mirror of
https://github.com/UberGames/lilium-voyager.git
synced 2025-01-18 13:41:37 +00:00
Set default rounding mode to FE_NEAREST again. Thanks to Matthias Bentrup for providing some explanations.
This commit is contained in:
parent
ebec84c55d
commit
8a500d71da
6 changed files with 34 additions and 59 deletions
|
@ -25,11 +25,11 @@ IFNDEF idx64
|
|||
.model flat, c
|
||||
ENDIF
|
||||
|
||||
; .data
|
||||
.data
|
||||
|
||||
; ifndef idx64
|
||||
; fpucw WORD 0F7Fh
|
||||
; endif
|
||||
ifndef idx64
|
||||
fpucw WORD 0F7Fh
|
||||
endif
|
||||
|
||||
.code
|
||||
|
||||
|
@ -51,22 +51,19 @@ ELSE
|
|||
; qftol using FPU
|
||||
|
||||
qftolx87m macro src
|
||||
; not necessary, fpucw is set with _controlfp at startup
|
||||
; sub esp, 2
|
||||
; fnstcw word ptr [esp]
|
||||
; fldcw fpucw
|
||||
sub esp, 2
|
||||
fnstcw word ptr [esp]
|
||||
fldcw fpucw
|
||||
fld dword ptr src
|
||||
fistp dword ptr src
|
||||
; fldcw [esp]
|
||||
fldcw [esp]
|
||||
mov eax, src
|
||||
; add esp, 2
|
||||
add esp, 2
|
||||
ret
|
||||
endm
|
||||
|
||||
qftolx87 PROC
|
||||
; need this line when storing FPU control word on stack
|
||||
; qftolx87m [esp + 6]
|
||||
qftolx87m [esp + 4]
|
||||
qftolx87m [esp + 6]
|
||||
qftolx87 ENDP
|
||||
|
||||
qvmftolx87 PROC
|
||||
|
|
|
@ -22,6 +22,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|||
|
||||
#include "qasm-inline.h"
|
||||
|
||||
static const unsigned short fpucw = 0x0C7F;
|
||||
|
||||
/*
|
||||
* GNU inline asm ftol conversion functions using SSE or FPU
|
||||
*/
|
||||
|
@ -59,14 +61,18 @@ int qvmftolsse(void)
|
|||
long qftolx87(float f)
|
||||
{
|
||||
long retval;
|
||||
unsigned short oldcw;
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
"fnstcw %2\n"
|
||||
"fldcw %3\n"
|
||||
"flds %1\n"
|
||||
"fistpl %1\n"
|
||||
"fldcw %2\n"
|
||||
"mov %1, %0\n"
|
||||
: "=r" (retval)
|
||||
: "m" (f)
|
||||
: "m" (f), "m" (oldcw), "m" (fpucw)
|
||||
);
|
||||
|
||||
return retval;
|
||||
|
@ -75,13 +81,18 @@ long qftolx87(float f)
|
|||
int qvmftolx87(void)
|
||||
{
|
||||
int retval;
|
||||
unsigned short oldcw;
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
"fnstcw %1\n"
|
||||
"fldcw %2\n"
|
||||
"flds (" EDI ", " EBX ", 4)\n"
|
||||
"fistpl (" EDI ", " EBX ", 4)\n"
|
||||
"fldcw %2\n"
|
||||
"mov (" EDI ", " EBX ", 4), %0\n"
|
||||
: "=r" (retval)
|
||||
: "m" (oldcw), "m" (fpucw)
|
||||
);
|
||||
|
||||
return retval;
|
||||
|
|
|
@ -45,9 +45,6 @@ IFDEF idx64
|
|||
|
||||
qsnapvectorsse PROC
|
||||
sub rsp, 8
|
||||
stmxcsr [rsp] ; save SSE control word
|
||||
ldmxcsr ssecw ; set to round nearest
|
||||
|
||||
movaps xmm1, ssemask ; initialize the mask register
|
||||
movups xmm0, [rcx] ; here is stored our vector. Read 4 values in one go
|
||||
movaps xmm2, xmm0 ; keep a copy of the original data
|
||||
|
@ -57,20 +54,13 @@ IFDEF idx64
|
|||
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
|
||||
orps xmm0, xmm1 ; combine all 4 values again
|
||||
movups [rcx], xmm0 ; write 3 rounded and 1 unchanged values back to memory
|
||||
|
||||
ldmxcsr [rsp] ; restore sse control word to old value
|
||||
add rsp, 8
|
||||
ret
|
||||
qsnapvectorsse ENDP
|
||||
|
||||
ELSE
|
||||
|
||||
qsnapvectorsse PROC
|
||||
sub esp, 8
|
||||
stmxcsr [esp] ; save SSE control word
|
||||
ldmxcsr ssecw ; set to round nearest
|
||||
|
||||
mov eax, dword ptr 12[esp] ; store address of vector in eax
|
||||
mov eax, dword ptr 4[esp] ; store address of vector in eax
|
||||
movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
|
||||
movups xmm0, [eax] ; here is stored our vector. Read 4 values in one go
|
||||
movaps xmm2, xmm0 ; keep a copy of the original data
|
||||
|
@ -80,9 +70,6 @@ ELSE
|
|||
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
|
||||
orps xmm0, xmm1 ; combine all 4 values again
|
||||
movups [eax], xmm0 ; write 3 rounded and 1 unchanged values back to memory
|
||||
|
||||
ldmxcsr [esp] ; restore sse control word to old value
|
||||
add esp, 8
|
||||
ret
|
||||
qsnapvectorsse ENDP
|
||||
|
||||
|
@ -95,14 +82,9 @@ ELSE
|
|||
|
||||
qsnapvectorx87 PROC
|
||||
mov eax, dword ptr 4[esp]
|
||||
sub esp, 2
|
||||
fnstcw word ptr [esp]
|
||||
fldcw fpucw
|
||||
qroundx87 [eax]
|
||||
qroundx87 4[eax]
|
||||
qroundx87 8[eax]
|
||||
fldcw [esp]
|
||||
add esp, 2
|
||||
ret
|
||||
qsnapvectorx87 ENDP
|
||||
|
||||
|
|
|
@ -33,31 +33,21 @@ static unsigned char ssemask[16] __attribute__((aligned(16))) =
|
|||
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00"
|
||||
};
|
||||
|
||||
static const unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80;
|
||||
static const unsigned short fpucw = 0x037F;
|
||||
|
||||
void qsnapvectorsse(vec3_t vec)
|
||||
{
|
||||
uint32_t oldcw __attribute__((aligned(16)));
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
"stmxcsr %3\n"
|
||||
"ldmxcsr %1\n"
|
||||
|
||||
"movaps (%0), %%xmm1\n"
|
||||
"movups (%2), %%xmm0\n"
|
||||
"movups (%1), %%xmm0\n"
|
||||
"movaps %%xmm0, %%xmm2\n"
|
||||
"andps %%xmm1, %%xmm0\n"
|
||||
"andnps %%xmm2, %%xmm1\n"
|
||||
"cvtps2dq %%xmm0, %%xmm0\n"
|
||||
"cvtdq2ps %%xmm0, %%xmm0\n"
|
||||
"orps %%xmm1, %%xmm0\n"
|
||||
"movups %%xmm0, (%2)\n"
|
||||
|
||||
"ldmxcsr %3\n"
|
||||
"movups %%xmm0, (%1)\n"
|
||||
:
|
||||
: "r" (ssemask), "m" (ssecw), "r" (vec), "m" (oldcw)
|
||||
: "r" (ssemask), "r" (vec)
|
||||
: "memory", "%xmm0", "%xmm1", "%xmm2"
|
||||
);
|
||||
|
||||
|
@ -73,16 +63,11 @@ void qsnapvectorx87(vec3_t vec)
|
|||
{
|
||||
__asm__ volatile
|
||||
(
|
||||
"sub $2, " ESP "\n"
|
||||
"fnstcw (" ESP ")\n"
|
||||
"fldcw %0\n"
|
||||
QROUNDX87("(%1)")
|
||||
QROUNDX87("4(%1)")
|
||||
QROUNDX87("8(%1)")
|
||||
"fldcw (" ESP ")\n"
|
||||
"add $2, " ESP "\n"
|
||||
QROUNDX87("(%0)")
|
||||
QROUNDX87("4(%0)")
|
||||
QROUNDX87("8(%0)")
|
||||
:
|
||||
: "m" (fpucw), "r" (vec)
|
||||
: "r" (vec)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -810,8 +810,8 @@ void Sys_GLimpInit( void )
|
|||
|
||||
void Sys_SetFloatEnv(void)
|
||||
{
|
||||
// rounding towards 0
|
||||
fesetround(FE_TOWARDZERO);
|
||||
// rounding toward nearest
|
||||
fesetround(FE_TONEAREST);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -59,14 +59,14 @@ Set FPU control word to default value
|
|||
#define _MCW_EM 0x0008001fU
|
||||
#define _MCW_RC 0x00000300U
|
||||
#define _MCW_PC 0x00030000U
|
||||
#define _RC_CHOP 0x00000300U
|
||||
#define _RC_NEAR 0x00000000U
|
||||
#define _PC_53 0x00010000U
|
||||
|
||||
unsigned int _controlfp(unsigned int new, unsigned int mask);
|
||||
#endif
|
||||
|
||||
#define FPUCWMASK1 (_MCW_RC | _MCW_EM)
|
||||
#define FPUCW (_RC_CHOP | _MCW_EM | _PC_53)
|
||||
#define FPUCW (_RC_NEAR | _MCW_EM | _PC_53)
|
||||
|
||||
#if idx64
|
||||
#define FPUCWMASK (FPUCWMASK1)
|
||||
|
|
Loading…
Reference in a new issue