lilium-voyager/code/asm/snapvector.asm
2011-09-19 21:57:15 +00:00

111 lines
3.3 KiB
NASM

; ===========================================================================
; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
;
; This file is part of Quake III Arena source code.
;
; Quake III Arena source code is free software; you can redistribute it
; and/or modify it under the terms of the GNU General Public License as
; published by the Free Software Foundation; either version 2 of the License,
; or (at your option) any later version.
;
; Quake III Arena source code is distributed in the hope that it will be
; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with Quake III Arena source code; if not, write to the Free Software
; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
; ===========================================================================
; MASM version of snapvector conversion function using SSE or FPU
; assume __cdecl calling convention is being used for x86, __fastcall for x64
;
; function prototype:
; void qsnapvector(vec3_t vec)
IFNDEF idx64
.model flat, c
ENDIF
.data
ALIGN 16
ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
ssecw DWORD 00001F80h
IFNDEF idx64
fpucw WORD 037Fh
ENDIF
.code
IFDEF idx64
; qsnapvector using SSE
qsnapvectorsse PROC
sub rsp, 8
stmxcsr [rsp] ; save SSE control word
ldmxcsr ssecw ; set to round nearest
movaps xmm1, ssemask ; initialize the mask register
movups xmm0, [rcx] ; here is stored our vector. Read 4 values in one go
movaps xmm2, xmm0 ; keep a copy of the original data
andps xmm0, xmm1 ; set the fourth value to zero in xmm0
andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero
cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
orps xmm0, xmm1 ; combine all 4 values again
movups [rcx], xmm0 ; write 3 rounded and 1 unchanged values back to memory
ldmxcsr [rsp] ; restore sse control word to old value
add rsp, 8
ret
qsnapvectorsse ENDP
ELSE
qsnapvectorsse PROC
sub esp, 8
stmxcsr [esp] ; save SSE control word
ldmxcsr ssecw ; set to round nearest
mov eax, dword ptr 12[esp] ; store address of vector in eax
movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
movups xmm0, [eax] ; here is stored our vector. Read 4 values in one go
movaps xmm2, xmm0 ; keep a copy of the original data
andps xmm0, xmm1 ; set the fourth value to zero in xmm0
andnps xmm1, xmm2 ; copy fourth value to xmm1 and set rest to zero
cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
orps xmm0, xmm1 ; combine all 4 values again
movups [eax], xmm0 ; write 3 rounded and 1 unchanged values back to memory
ldmxcsr [esp] ; restore sse control word to old value
add esp, 8
ret
qsnapvectorsse ENDP
qroundx87 macro src
fld dword ptr src
fistp dword ptr src
fild dword ptr src
fstp dword ptr src
endm
qsnapvectorx87 PROC
mov eax, dword ptr 4[esp]
sub esp, 2
fnstcw word ptr [esp]
fldcw fpucw
qroundx87 [eax]
qroundx87 4[eax]
qroundx87 8[eax]
fldcw [esp]
add esp, 2
ret
qsnapvectorx87 ENDP
ENDIF
end