mirror of
https://github.com/UberGames/lilium-voyager.git
synced 2025-01-23 07:40:38 +00:00
03201aff22
- Clean up ftol()/snapvector() mess - Make use of SSE instructions for ftol()/snapvector() if available - move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions - Set FPU control word at program startup to get consistent behaviour on all platforms
107 lines
2.9 KiB
NASM
107 lines
2.9 KiB
NASM
; ===========================================================================
|
|
; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
|
|
;
|
|
; This file is part of Quake III Arena source code.
|
|
;
|
|
; Quake III Arena source code is free software; you can redistribute it
|
|
; and/or modify it under the terms of the GNU General Public License as
|
|
; published by the Free Software Foundation; either version 2 of the License,
|
|
; or (at your option) any later version.
|
|
;
|
|
; Quake III Arena source code is distributed in the hope that it will be
|
|
; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
; GNU General Public License for more details.
|
|
;
|
|
; You should have received a copy of the GNU General Public License
|
|
; along with Quake III Arena source code; if not, write to the Free Software
|
|
; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
; ===========================================================================
|
|
|
|
; MASM version of snapvector conversion function using SSE or FPU
|
|
; assume __cdecl calling convention is being used for x86, __fastcall for x64
|
|
;
|
|
; function prototype:
|
|
; void qsnapvector(vec3_t vec)
|
|
|
|
IFNDEF idx64
|
|
.model flat, c
|
|
ENDIF
|
|
|
|
.data
|
|
|
|
ALIGN 16
|
|
ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
|
|
ssecw DWORD 00001F80h
|
|
|
|
IFNDEF idx64
|
|
fpucw WORD 037Fh
|
|
ENDIF
|
|
|
|
.code
|
|
|
|
IFDEF idx64
|
|
; qsnapvector using SSE
|
|
|
|
qsnapvectorsse PROC
|
|
sub rsp, 4
|
|
stmxcsr [rsp] ; save SSE control word
|
|
ldmxcsr ssecw ; set to round nearest
|
|
|
|
push rdi
|
|
mov rdi, rcx ; maskmovdqu uses rdi as implicit memory operand
|
|
movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
|
|
movups xmm0, [rdi] ; here is stored our vector. Read 4 values in one go
|
|
cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
|
|
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
|
|
maskmovdqu xmm0, xmm1 ; write 3 values back to memory
|
|
pop rdi
|
|
|
|
ldmxcsr [rsp] ; restore sse control word to old value
|
|
add rsp, 4
|
|
ret
|
|
qsnapvectorsse ENDP
|
|
|
|
ELSE
|
|
|
|
qsnapvectorsse PROC
|
|
sub esp, 4
|
|
stmxcsr [esp] ; save SSE control word
|
|
ldmxcsr ssecw ; set to round nearest
|
|
|
|
push edi
|
|
mov edi, dword ptr 12[esp] ; maskmovdqu uses edi as implicit memory operand
|
|
movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
|
|
movups xmm0, [edi] ; here is stored our vector. Read 4 values in one go
|
|
cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
|
|
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
|
|
maskmovdqu xmm0, xmm1 ; write 3 values back to memory
|
|
pop edi
|
|
|
|
ldmxcsr [esp] ; restore sse control word to old value
|
|
add esp, 4
|
|
ret
|
|
qsnapvectorsse ENDP
|
|
|
|
qroundx87 macro src
|
|
fld dword ptr src
|
|
fistp dword ptr src
|
|
fild dword ptr src
|
|
fstp dword ptr src
|
|
endm
|
|
|
|
qsnapvectorx87 PROC
|
|
mov eax, dword ptr 4[esp]
|
|
sub esp, 2
|
|
fnstcw word ptr [esp]
|
|
fldcw fpucw
|
|
qroundx87 [eax]
|
|
qroundx87 4[eax]
|
|
qroundx87 8[eax]
|
|
fldcw [esp]
|
|
add esp, 2
|
|
qsnapvectorx87 ENDP
|
|
|
|
ENDIF
|
|
|
|
end
|