- Add MASM assembler files for MSVC x64 projects to support vm_x86 in x64 mode

- Clean up ftol()/snapvector() mess
- Make use of SSE instructions for ftol()/snapvector() if available
- move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions
- Set FPU control word at program startup to get consistent behaviour on all platforms
This commit is contained in:
Thilo Schulz 2011-06-13 09:56:39 +00:00
parent 471182cba0
commit 03201aff22
22 changed files with 540 additions and 460 deletions

View file

@ -1504,13 +1504,30 @@ ifeq ($(ARCH),i386)
Q3OBJ += \ Q3OBJ += \
$(B)/client/snd_mixa.o \ $(B)/client/snd_mixa.o \
$(B)/client/matha.o \ $(B)/client/matha.o \
$(B)/client/snapvectora.o $(B)/client/snapvector.o \
$(B)/client/ftola.o
endif endif
ifeq ($(ARCH),x86) ifeq ($(ARCH),x86)
Q3OBJ += \ Q3OBJ += \
$(B)/client/snd_mixa.o \ $(B)/client/snd_mixa.o \
$(B)/client/matha.o \ $(B)/client/matha.o \
$(B)/client/snapvectora.o $(B)/client/snapvector.o \
$(B)/client/ftola.o
endif
ifeq ($(ARCH),x86_64)
Q3OBJ += \
$(B)/client/snapvector.o \
$(B)/client/ftola.o
endif
ifeq ($(ARCH),amd64)
Q3OBJ += \
$(B)/client/snapvector.o \
$(B)/client/ftola.o
endif
ifeq ($(ARCH),x64)
Q3OBJ += \
$(B)/client/snapvector.o \
$(B)/client/ftola.o
endif endif
ifeq ($(USE_VOIP),1) ifeq ($(USE_VOIP),1)
@ -1572,13 +1589,11 @@ endif
ifeq ($(HAVE_VM_COMPILED),true) ifeq ($(HAVE_VM_COMPILED),true)
ifeq ($(ARCH),i386) ifeq ($(ARCH),i386)
Q3OBJ += \ Q3OBJ += \
$(B)/client/vm_x86.o \ $(B)/client/vm_x86.o
$(B)/client/ftola.o
endif endif
ifeq ($(ARCH),x86) ifeq ($(ARCH),x86)
Q3OBJ += \ Q3OBJ += \
$(B)/client/vm_x86.o \ $(B)/client/vm_x86.o
$(B)/client/ftola.o
endif endif
ifeq ($(ARCH),x86_64) ifeq ($(ARCH),x86_64)
ifeq ($(USE_OLD_VM64),1) ifeq ($(USE_OLD_VM64),1)
@ -1587,8 +1602,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
$(B)/client/vm_x86_64_assembler.o $(B)/client/vm_x86_64_assembler.o
else else
Q3OBJ += \ Q3OBJ += \
$(B)/client/vm_x86.o \ $(B)/client/vm_x86.o
$(B)/client/ftola.o
endif endif
endif endif
ifeq ($(ARCH),amd64) ifeq ($(ARCH),amd64)
@ -1598,8 +1612,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
$(B)/client/vm_x86_64_assembler.o $(B)/client/vm_x86_64_assembler.o
else else
Q3OBJ += \ Q3OBJ += \
$(B)/client/vm_x86.o \ $(B)/client/vm_x86.o
$(B)/client/ftola.o
endif endif
endif endif
ifeq ($(ARCH),x64) ifeq ($(ARCH),x64)
@ -1609,8 +1622,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
$(B)/client/vm_x86_64_assembler.o $(B)/client/vm_x86_64_assembler.o
else else
Q3OBJ += \ Q3OBJ += \
$(B)/client/vm_x86.o \ $(B)/client/vm_x86.o
$(B)/client/ftola.o
endif endif
endif endif
ifeq ($(ARCH),ppc) ifeq ($(ARCH),ppc)
@ -1747,13 +1759,30 @@ Q3DOBJ = \
ifeq ($(ARCH),i386) ifeq ($(ARCH),i386)
Q3DOBJ += \ Q3DOBJ += \
$(B)/ded/snapvectora.o \ $(B)/ded/matha.o \
$(B)/ded/matha.o $(B)/ded/snapvector.o \
$(B)/ded/ftola.o
endif endif
ifeq ($(ARCH),x86) ifeq ($(ARCH),x86)
Q3DOBJ += \ Q3DOBJ += \
$(B)/ded/snapvectora.o \ $(B)/ded/matha.o \
$(B)/ded/matha.o $(B)/ded/snapvector.o \
$(B)/ded/ftola.o
endif
ifeq ($(ARCH),x86_64)
Q3DOBJ += \
$(B)/ded/snapvector.o \
$(B)/ded/ftola.o
endif
ifeq ($(ARCH),amd64)
Q3DOBJ += \
$(B)/ded/snapvector.o \
$(B)/ded/ftola.o
endif
ifeq ($(ARCH),x64)
Q3DOBJ += \
$(B)/ded/snapvector.o \
$(B)/ded/ftola.o
endif endif
ifeq ($(USE_INTERNAL_ZLIB),1) ifeq ($(USE_INTERNAL_ZLIB),1)
@ -1769,13 +1798,11 @@ endif
ifeq ($(HAVE_VM_COMPILED),true) ifeq ($(HAVE_VM_COMPILED),true)
ifeq ($(ARCH),i386) ifeq ($(ARCH),i386)
Q3DOBJ += \ Q3DOBJ += \
$(B)/ded/vm_x86.o \ $(B)/ded/vm_x86.o
$(B)/ded/ftola.o
endif endif
ifeq ($(ARCH),x86) ifeq ($(ARCH),x86)
Q3DOBJ += \ Q3DOBJ += \
$(B)/ded/vm_x86.o \ $(B)/ded/vm_x86.o
$(B)/ded/ftola.o
endif endif
ifeq ($(ARCH),x86_64) ifeq ($(ARCH),x86_64)
ifeq ($(USE_OLD_VM64),1) ifeq ($(USE_OLD_VM64),1)
@ -1784,8 +1811,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
$(B)/ded/vm_x86_64_assembler.o $(B)/ded/vm_x86_64_assembler.o
else else
Q3DOBJ += \ Q3DOBJ += \
$(B)/ded/vm_x86.o \ $(B)/ded/vm_x86.o
$(B)/ded/ftola.o
endif endif
endif endif
ifeq ($(ARCH),amd64) ifeq ($(ARCH),amd64)
@ -1795,8 +1821,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
$(B)/ded/vm_x86_64_assembler.o $(B)/ded/vm_x86_64_assembler.o
else else
Q3DOBJ += \ Q3DOBJ += \
$(B)/ded/vm_x86.o \ $(B)/ded/vm_x86.o
$(B)/ded/ftola.o
endif endif
endif endif
ifeq ($(ARCH),x64) ifeq ($(ARCH),x64)
@ -1806,8 +1831,7 @@ ifeq ($(HAVE_VM_COMPILED),true)
$(B)/ded/vm_x86_64_assembler.o $(B)/ded/vm_x86_64_assembler.o
else else
Q3DOBJ += \ Q3DOBJ += \
$(B)/ded/vm_x86.o \ $(B)/ded/vm_x86.o
$(B)/ded/ftola.o
endif endif
endif endif
ifeq ($(ARCH),ppc) ifeq ($(ARCH),ppc)
@ -2133,6 +2157,10 @@ $(B)/missionpack/vm/ui.qvm: $(MPUIVMOBJ) $(UIDIR)/ui_syscalls.asm $(Q3ASM)
$(B)/client/%.o: $(ASMDIR)/%.s $(B)/client/%.o: $(ASMDIR)/%.s
$(DO_AS) $(DO_AS)
# k8 so inline assembler knows about SSE
$(B)/client/%.o: $(ASMDIR)/%.c
$(DO_CC) -march=k8
$(B)/client/%.o: $(CDIR)/%.c $(B)/client/%.o: $(CDIR)/%.c
$(DO_CC) $(DO_CC)
@ -2176,6 +2204,10 @@ $(B)/client/%.o: $(SYSDIR)/%.rc
$(B)/ded/%.o: $(ASMDIR)/%.s $(B)/ded/%.o: $(ASMDIR)/%.s
$(DO_AS) $(DO_AS)
# k8 so inline assembler knows about SSE
$(B)/ded/%.o: $(ASMDIR)/%.c
$(DO_CC) -march=k8
$(B)/ded/%.o: $(SDIR)/%.c $(B)/ded/%.o: $(SDIR)/%.c
$(DO_DED_CC) $(DO_DED_CC)

90
code/asm/ftola.asm Normal file
View file

@ -0,0 +1,90 @@
; ===========================================================================
; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
;
; This file is part of Quake III Arena source code.
;
; Quake III Arena source code is free software; you can redistribute it
; and/or modify it under the terms of the GNU General Public License as
; published by the Free Software Foundation; either version 2 of the License,
; or (at your option) any later version.
;
; Quake III Arena source code is distributed in the hope that it will be
; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with Quake III Arena source code; if not, write to the Free Software
; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
; ===========================================================================
; MASM ftol conversion functions using SSE or FPU
; assume __cdecl calling convention is being used for x86, __fastcall for x64
IFNDEF idx64
.model flat, c
ENDIF
; .data
; ifndef idx64
; fpucw WORD 0F7Fh
; endif
.code
IFDEF idx64
; qftol using SSE
qftolsse PROC
cvttss2si eax, xmm0
ret
qftolsse ENDP
qvmftolsse PROC
movss xmm0, dword ptr [rdi + rbx * 4]
cvttss2si eax, xmm0
ret
qvmftolsse ENDP
ELSE
; qftol using FPU
qftolx87m macro src
; not necessary, fpucw is set with _controlfp at startup
; sub esp, 2
; fnstcw word ptr [esp]
; fldcw fpucw
fld dword ptr src
fistp dword ptr src
; fldcw [esp]
mov eax, src
; add esp, 2
ret
endm
qftolx87 PROC
; need this line when storing FPU control word on stack
; qftolx87m [esp + 6]
qftolx87m [esp + 4]
qftolx87 ENDP
qvmftolx87 PROC
qftolx87m [edi + ebx * 4]
qvmftolx87 ENDP
; qftol using SSE
qftolsse PROC
movss xmm0, dword ptr [esp + 4]
cvttss2si eax, xmm0
ret
qftolsse ENDP
qvmftolsse PROC
movss xmm0, dword ptr [edi + ebx * 4]
cvttss2si eax, xmm0
ret
qvmftolsse ENDP
ENDIF
end

View file

@ -1,157 +0,0 @@
/*
===========================================================================
Copyright (C) 1999-2005 Id Software, Inc.
This file is part of Quake III Arena source code.
Quake III Arena source code is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Quake III Arena source code is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Quake III Arena source code; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
===========================================================================
*/
//
// qftol -- fast floating point to long conversion.
//
// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
// <tim@ngus.net>
#include "qasm.h"
.data
temp: .single 0.0
fpucw: .long 0
// Precision Control Field , 2 bits / 0x0300
// PC24 0x0000 Single precision (24 bits).
// PC53 0x0200 Double precision (53 bits).
// PC64 0x0300 Extended precision (64 bits).
// Rounding Control Field, 2 bits / 0x0C00
// RCN 0x0000 Rounding to nearest (even).
// RCD 0x0400 Rounding down (directed, minus).
// RCU 0x0800 Rounding up (directed plus).
// RC0 0x0C00 Rounding towards zero (chop mode).
// rounding towards nearest (even)
cw027F: .long 0x027F
cw037F: .long 0x037F
// rounding towards zero (chop mode)
cw0E7F: .long 0x0E7F
cw0F7F: .long 0x0F7F
.text
//
// int qftol( void ) - default control word
//
.globl C(qftol)
C(qftol):
fistpl temp
movl temp,%eax
ret
//
// int qftol027F( void ) - DirectX FPU
//
.globl C(qftol027F)
C(qftol027F):
fnstcw fpucw
fldcw cw027F
fistpl temp
fldcw fpucw
movl temp,%eax
ret
//
// int qftol037F( void ) - Linux FPU
//
.globl C(qftol037F)
C(qftol037F):
fnstcw fpucw
fldcw cw037F
fistpl temp
fldcw fpucw
movl temp,%eax
ret
//
// int qftol0F7F( void ) - ANSI
//
.globl C(qftol0F7F)
C(qftol0F7F):
fnstcw fpucw
fldcw cw0F7F
fistpl temp
fldcw fpucw
movl temp,%eax
ret
//
// int qftol0E7F( void )
//
.globl C(qftol0E7F)
C(qftol0E7F):
fnstcw fpucw
fldcw cw0E7F
fistpl temp
fldcw fpucw
movl temp,%eax
ret
//
// long Q_ftol( float q )
//
.globl C(Q_ftol)
C(Q_ftol):
flds 4(%esp)
fistpl temp
movl temp,%eax
ret
//
// long qftol0F7F( float q ) - Linux FPU
//
.globl C(Q_ftol0F7F)
C(Q_ftol0F7F):
fnstcw fpucw
flds 4(%esp)
fldcw cw0F7F
fistpl temp
fldcw fpucw
movl temp,%eax
ret

107
code/asm/snapvector.asm Normal file
View file

@ -0,0 +1,107 @@
; ===========================================================================
; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
;
; This file is part of Quake III Arena source code.
;
; Quake III Arena source code is free software; you can redistribute it
; and/or modify it under the terms of the GNU General Public License as
; published by the Free Software Foundation; either version 2 of the License,
; or (at your option) any later version.
;
; Quake III Arena source code is distributed in the hope that it will be
; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with Quake III Arena source code; if not, write to the Free Software
; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
; ===========================================================================
; MASM version of snapvector conversion function using SSE or FPU
; assume __cdecl calling convention is being used for x86, __fastcall for x64
;
; function prototype:
; void qsnapvector(vec3_t vec)
IFNDEF idx64
.model flat, c
ENDIF
.data
ALIGN 16
ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
ssecw DWORD 00001F80h
IFNDEF idx64
fpucw WORD 037Fh
ENDIF
.code
IFDEF idx64
; qsnapvector using SSE
qsnapvectorsse PROC
sub rsp, 4
stmxcsr [rsp] ; save SSE control word
ldmxcsr ssecw ; set to round nearest
push rdi
mov rdi, rcx ; maskmovdqu uses rdi as implicit memory operand
movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
movups xmm0, [rdi] ; here is stored our vector. Read 4 values in one go
cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
maskmovdqu xmm0, xmm1 ; write 3 values back to memory
pop rdi
ldmxcsr [rsp] ; restore sse control word to old value
add rsp, 4
ret
qsnapvectorsse ENDP
ELSE
qsnapvectorsse PROC
sub esp, 4
stmxcsr [esp] ; save SSE control word
ldmxcsr ssecw ; set to round nearest
push edi
mov edi, dword ptr 12[esp] ; maskmovdqu uses edi as implicit memory operand
movaps xmm1, ssemask ; initialize the mask register for maskmovdqu
movups xmm0, [edi] ; here is stored our vector. Read 4 values in one go
cvtps2dq xmm0, xmm0 ; convert 4 single fp to int
cvtdq2ps xmm0, xmm0 ; convert 4 int to single fp
maskmovdqu xmm0, xmm1 ; write 3 values back to memory
pop edi
ldmxcsr [esp] ; restore sse control word to old value
add esp, 4
ret
qsnapvectorsse ENDP
qroundx87 macro src
fld dword ptr src
fistp dword ptr src
fild dword ptr src
fstp dword ptr src
endm
qsnapvectorx87 PROC
mov eax, dword ptr 4[esp]
sub esp, 2
fnstcw word ptr [esp]
fldcw fpucw
qroundx87 [eax]
qroundx87 4[eax]
qroundx87 8[eax]
fldcw [esp]
add esp, 2
qsnapvectorx87 ENDP
ENDIF
end

View file

@ -1,103 +0,0 @@
/*
===========================================================================
Copyright (C) 1999-2005 Id Software, Inc.
This file is part of Quake III Arena source code.
Quake III Arena source code is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.
Quake III Arena source code is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Quake III Arena source code; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
===========================================================================
*/
//
// Sys_SnapVector NASM code (Andrew Henderson)
// See win32/win_shared.c for the Win32 equivalent
// This code is provided to ensure that the
// rounding behavior (and, if necessary, the
// precision) of DLL and QVM code are identical
// e.g. for network-visible operations.
// See ftol.nasm for operations on a single float,
// as used in compiled VM and DLL code that does
// not use this system trap.
//
// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
// <tim@ngus.net>
#include "qasm.h"
#if id386
.data
fpucw: .long 0
cw037F: .long 0x037F
.text
// void Sys_SnapVector( float *v )
.globl C(Sys_SnapVector)
C(Sys_SnapVector):
pushl %eax
pushl %ebp
movl %esp,%ebp
fnstcw fpucw
movl 12(%ebp),%eax
fldcw cw037F
flds (%eax)
fistpl (%eax)
fildl (%eax)
fstps (%eax)
flds 4(%eax)
fistpl 4(%eax)
fildl 4(%eax)
fstps 4(%eax)
flds 8(%eax)
fistpl 8(%eax)
fildl 8(%eax)
fstps 8(%eax)
fldcw fpucw
popl %ebp
popl %eax
ret
// void Sys_SnapVectorCW( float *v, unsigned short int cw )
.globl C(Sys_SnapVectorCW)
C(Sys_SnapVectorCW):
pushl %eax
pushl %ebp
movl %esp,%ebp
fnstcw fpucw
movl 12(%ebp),%eax
fldcw 16(%ebp)
flds (%eax)
fistpl (%eax)
fildl (%eax)
fstps (%eax)
flds 4(%eax)
fistpl 4(%eax)
fildl 4(%eax)
fstps 4(%eax)
flds 8(%eax)
fistpl 8(%eax)
fildl 8(%eax)
fstps 8(%eax)
fldcw fpucw
popl %ebp
popl %eax
ret
#endif

76
code/asm/vm_x86_64.asm Normal file
View file

@ -0,0 +1,76 @@
; ===========================================================================
; Copyright (C) 2011 Thilo Schulz <thilo@tjps.eu>
;
; This file is part of Quake III Arena source code.
;
; Quake III Arena source code is free software; you can redistribute it
; and/or modify it under the terms of the GNU General Public License as
; published by the Free Software Foundation; either version 2 of the License,
; or (at your option) any later version.
;
; Quake III Arena source code is distributed in the hope that it will be
; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with Quake III Arena source code; if not, write to the Free Software
; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
; ===========================================================================
; Call wrapper for vm_x86 when built with MSVC in 64 bit mode,
; since MSVC does not support inline x64 assembler code anymore.
;
; assumes __fastcall calling convention
DoSyscall PROTO
.code
; Call to static void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
qsyscall64 PROC
sub rsp, 28h ; after this esp will be aligned to 16 byte boundary
mov qword ptr [rsp + 20h], rcx ; 5th parameter "arg" is passed on stack
mov r9b, bl ; opStackOfs
mov r8, rdi ; opStackBase
mov edx, esi ; programStack
mov ecx, eax ; syscallNum
mov rax, DoSyscall ; store call address of DoSyscall in rax
call rax
add rsp, 28h
ret
qsyscall64 ENDP
; Call to compiled code after setting up the register environment for the VM
; prototype:
; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
qvmcall64 PROC
push rsi ; push non-volatile registers to stack
push rdi
push rbx
; need to save pointer in rcx so we can write back the programData value to caller
push rcx
; registers r8 and r9 have correct value already thanx to __fastcall
xor rbx, rbx ; opStackOfs starts out being 0
mov rdi, rdx ; opStack
mov esi, dword ptr [rcx] ; programStack
call qword ptr [r8] ; instructionPointers[0] is also the entry point
pop rcx
mov dword ptr [rcx], esi ; write back the programStack value
mov al, bl ; return opStack offset
pop rbx
pop rdi
pop rsi
ret
qvmcall64 ENDP
end

View file

@ -660,7 +660,7 @@ intptr_t CL_CgameSystemCalls( intptr_t *args ) {
case CG_REAL_TIME: case CG_REAL_TIME:
return Com_RealTime( VMA(1) ); return Com_RealTime( VMA(1) );
case CG_SNAPVECTOR: case CG_SNAPVECTOR:
Sys_SnapVector( VMA(1) ); Q_SnapVector(VMA(1));
return 0; return 0;
case CG_CIN_PLAYCINEMATIC: case CG_CIN_PLAYCINEMATIC:

View file

@ -22,8 +22,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include "snd_local.h" #include "snd_local.h"
long myftol( float f );
#define C0 0.4829629131445341 #define C0 0.4829629131445341
#define C1 0.8365163037378079 #define C1 0.8365163037378079
#define C2 0.2241438680420134 #define C2 0.2241438680420134

View file

@ -90,6 +90,14 @@ cvar_t *com_basegame;
cvar_t *com_homepath; cvar_t *com_homepath;
cvar_t *com_busyWait; cvar_t *com_busyWait;
#if defined(idx64)
void (*Q_VMftol)(void);
#elif defined(id386)
long (QDECL *Q_ftol)(float f);
void (QDECL *Q_VMftol)(void);
void (QDECL *Q_SnapVector)(vec3_t vec);
#endif
// com_speeds times // com_speeds times
int time_game; int time_game;
int time_frontend; // renderer frontend time int time_frontend; // renderer frontend time
@ -2565,6 +2573,53 @@ static void Com_DetectAltivec(void)
} }
} }
/*
=================
Com_DetectSSE
Find out whether we have SSE support for Q_ftol function
=================
*/
#if defined(id386) || defined(idx64)
static void Com_DetectSSE(void)
{
#ifndef idx64
cpuFeatures_t feat;
feat = Sys_GetProcessorFeatures();
if(feat & CF_SSE)
{
if(feat & CF_SSE2)
Q_SnapVector = qsnapvectorsse;
else
Q_SnapVector = qsnapvectorx87;
Q_ftol = qftolsse;
#endif
Q_VMftol = qvmftolsse;
Com_Printf("Have SSE support\n");
#ifndef idx64
}
else
{
Q_ftol = qftolx87;
Q_VMftol = qvmftolx87;
Q_SnapVector = qsnapvectorx87;
Com_Printf("No SSE support on this machine\n");
}
#endif
}
#else
#define Com_DetectSSE()
#endif
/* /*
================= =================
Com_InitRand Com_InitRand
@ -2615,6 +2670,8 @@ void Com_Init( char *commandLine ) {
// Swap_Init (); // Swap_Init ();
Cbuf_Init (); Cbuf_Init ();
Com_DetectSSE();
// override anything from the config files with command line args // override anything from the config files with command line args
Com_StartupVariable( NULL ); Com_StartupVariable( NULL );

View file

@ -87,7 +87,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#define OS_STRING "win_mingw64" #define OS_STRING "win_mingw64"
#endif #endif
#define ID_INLINE inline #define ID_INLINE __inline
#define PATH_SEP '\\' #define PATH_SEP '\\'
#if defined( __WIN64__ ) #if defined( __WIN64__ )

View file

@ -419,6 +419,58 @@ extern vec3_t axisDefault[3];
#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask) #define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
int Q_isnan(float x);
#ifdef idx64
extern long qftolsse(float f);
extern void qvmftolsse(void);
extern void qsnapvectorsse(vec3_t vec);
#define Q_ftol qftolsse
#define Q_SnapVector qsnapvectorsse
extern void (*Q_VMftol)(void);
#elif defined(id386)
extern long QDECL qftolx87(float f);
extern long QDECL qftolsse(float f);
extern void QDECL qvmftolx87(void);
extern void QDECL qvmftolsse(void);
extern void QDECL qsnapvectorx87(vec3_t vec);
extern void QDECL qsnapvectorsse(vec3_t vec);
extern long (QDECL *Q_ftol)(float f);
extern void (QDECL *Q_VMftol)(void);
extern void (QDECL *Q_SnapVector)(vec3_t vec);
#else
#define Q_ftol(f) lrintf((f))
#define Q_SnapVector(vec)\
do\
{\
vec3_t *temp = (vec);\
\
(*temp)[0] = round((*temp)[0]);\
(*temp)[1] = round((*temp)[1]);\
(*temp)[2] = round((*temp)[2]);\
} while(0)
#endif
/*
// if your system does not have lrintf() and round() you can try this block. Please also open a bug report at bugzilla.icculus.org
// or write a mail to the ioq3 mailing list.
#else
#define Q_ftol(f) ((long) (f))
#define Q_round(f) do { if((f) < 0) (f) -= 0.5f; else (f) += 0.5f; (f) = Q_ftol((f)); } while(0)
#define Q_SnapVector(vec) \
do\
{\
vec3_t *temp = (vec);\
\
Q_round((*temp)[0]);\
Q_round((*temp)[1]);\
Q_round((*temp)[2]);\
} while(0)
#endif
*/
#if idppc #if idppc
static ID_INLINE float Q_rsqrt( float number ) { static ID_INLINE float Q_rsqrt( float number ) {
@ -637,8 +689,6 @@ void MakeNormalVectors( const vec3_t forward, vec3_t right, vec3_t up );
void MatrixMultiply(float in1[3][3], float in2[3][3], float out[3][3]); void MatrixMultiply(float in1[3][3], float in2[3][3], float out[3][3]);
void AngleVectors( const vec3_t angles, vec3_t forward, vec3_t right, vec3_t up); void AngleVectors( const vec3_t angles, vec3_t forward, vec3_t right, vec3_t up);
void PerpendicularVector( vec3_t dst, const vec3_t src ); void PerpendicularVector( vec3_t dst, const vec3_t src );
int Q_isnan( float x );
#ifndef MAX #ifndef MAX
#define MAX(x,y) ((x)>(y)?(x):(y)) #define MAX(x,y) ((x)>(y)?(x):(y))

View file

@ -67,29 +67,6 @@ static int pc = 0;
#define FTOL_PTR #define FTOL_PTR
#ifdef _MSC_VER
#if defined( FTOL_PTR )
int _ftol( float );
static void *ftolPtr = _ftol;
#endif
#else // _MSC_VER
#if defined( FTOL_PTR )
int qftol( void );
int qftol027F( void );
int qftol037F( void );
int qftol0E7F( void );
int qftol0F7F( void );
static void *ftolPtr = qftol0F7F;
#endif // FTOL_PTR
#endif
static int instruction, pass; static int instruction, pass;
static int lastConst = 0; static int lastConst = 0;
static int oc0, oc1, pop0, pop1; static int oc0, oc1, pop0, pop1;
@ -111,15 +88,17 @@ typedef enum
static ELastCommand LastCommand; static ELastCommand LastCommand;
static inline int iss8(int32_t v) static int iss8(int32_t v)
{ {
return (SCHAR_MIN <= v && v <= SCHAR_MAX); return (SCHAR_MIN <= v && v <= SCHAR_MAX);
} }
static inline int isu8(uint32_t v) #if 0
static int isu8(uint32_t v)
{ {
return (v <= UCHAR_MAX); return (v <= UCHAR_MAX);
} }
#endif
static int NextConstant4(void) static int NextConstant4(void)
{ {
@ -436,30 +415,37 @@ Uses asm to retrieve arguments from registers to work around different calling c
================= =================
*/ */
#if defined(_MSC_VER) && defined(idx64)
extern void qsyscall64(void);
extern uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
// Microsoft does not support inline assembler on x64 platforms. Meh.
void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
{
#else
static void DoSyscall(void) static void DoSyscall(void)
{ {
vm_t *savedVM;
int syscallNum; int syscallNum;
int programStack; int programStack;
int *opStackBase; int *opStackBase;
int opStackOfs; uint8_t opStackOfs;
intptr_t arg; intptr_t arg;
#endif
vm_t *savedVM;
#ifdef _MSC_VER #ifdef _MSC_VER
#ifndef idx64
__asm __asm
{ {
mov dword ptr syscallNum, eax mov dword ptr syscallNum, eax
mov dword ptr programStack, esi mov dword ptr programStack, esi
mov dword ptr opStackOfs, ebx mov byte ptr opStackOfs, bl
#ifdef idx64
mov qword ptr opStackBase, rdi
mov qword ptr arg, rcx
#else
mov dword ptr opStackBase, edi mov dword ptr opStackBase, edi
mov dword ptr arg, ecx mov dword ptr arg, ecx
#endif
} }
#endif
#else #else
__asm__ volatile( __asm__ volatile(
"" ""
@ -539,8 +525,13 @@ Call to DoSyscall()
int EmitCallDoSyscall(vm_t *vm) int EmitCallDoSyscall(vm_t *vm)
{ {
// use edx register to store DoSyscall address // use edx register to store DoSyscall address
#if defined(_MSC_VER) && defined(idx64)
EmitRexString(0x48, "BA"); // mov edx, qsyscall64
EmitPtr(qsyscall64);
#else
EmitRexString(0x48, "BA"); // mov edx, DoSyscall EmitRexString(0x48, "BA"); // mov edx, DoSyscall
EmitPtr(DoSyscall); EmitPtr(DoSyscall);
#endif
// Push important registers to stack as we can't really make // Push important registers to stack as we can't really make
// any assumptions about calling conventions. // any assumptions about calling conventions.
@ -1629,9 +1620,8 @@ void VM_Compile(vm_t *vm, vmHeader_t *header)
EmitString("DB 1C 9F"); // fistp dword ptr [edi + ebx * 4] EmitString("DB 1C 9F"); // fistp dword ptr [edi + ebx * 4]
#else // FTOL_PTR #else // FTOL_PTR
// call the library conversion function // call the library conversion function
EmitString("D9 04 9F"); // fld dword ptr [edi + ebx * 4] EmitRexString(0x48, "BA"); // mov edx, Q_VMftol
EmitRexString(0x48, "BA"); // mov edx, ftolPtr EmitPtr(Q_VMftol);
EmitPtr(ftolPtr);
EmitRexString(0x48, "FF D2"); // call edx EmitRexString(0x48, "FF D2"); // call edx
EmitCommand(LAST_COMMAND_MOV_STACK_EAX); // mov dword ptr [edi + ebx * 4], eax EmitCommand(LAST_COMMAND_MOV_STACK_EAX); // mov dword ptr [edi + ebx * 4], eax
#endif #endif
@ -1746,12 +1736,12 @@ This function is called directly by the generated code
int VM_CallCompiled(vm_t *vm, int *args) int VM_CallCompiled(vm_t *vm, int *args)
{ {
int stack[OPSTACK_SIZE + 7]; byte stack[OPSTACK_SIZE * 4 + 15];
void *entryPoint; void *entryPoint;
int programCounter; int programCounter;
int programStack, stackOnEntry; int programStack, stackOnEntry;
byte *image; byte *image;
int *opStack, *opStackOnEntry; int *opStack;
int opStackOfs; int opStackOfs;
currentVM = vm; currentVM = vm;
@ -1784,35 +1774,16 @@ int VM_CallCompiled(vm_t *vm, int *args)
// off we go into generated code... // off we go into generated code...
entryPoint = vm->codeBase + vm->entryOfs; entryPoint = vm->codeBase + vm->entryOfs;
opStack = opStackOnEntry = PADP(stack, 8); opStack = PADP(stack, 16);
*opStack = 0xDEADBEEF; *opStack = 0xDEADBEEF;
opStackOfs = 0; opStackOfs = 0;
#ifdef _MSC_VER #ifdef _MSC_VER
#ifdef idx64
opStackOfs = qvmcall64(&programStack, opStack, vm->instructionPointers, vm->dataBase);
#else
__asm __asm
{ {
#ifdef idx64
// non-volatile registers according to x64 calling convention
push rsi
push rdi
push rbx
mov esi, dword ptr programStack
mov rdi, qword ptr opStack
mov ebx, dword ptr opStackOfs
mov r8, qword ptr vm->instructionPointers
mov r9, qword ptr vm->dataBase
call entryPoint
mov dword ptr opStackOfs, ebx
mov qword ptr opStack, rdi
mov dword ptr programStack, esi
pop rbx
pop rdi
pop rsi
#else
pushad pushad
mov esi, dword ptr programStack mov esi, dword ptr programStack
@ -1826,8 +1797,8 @@ int VM_CallCompiled(vm_t *vm, int *args)
mov dword ptr programStack, esi mov dword ptr programStack, esi
popad popad
#endif
} }
#endif
#elif defined(idx64) #elif defined(idx64)
__asm__ volatile( __asm__ volatile(
"movq %5, %%rax\r\n" "movq %5, %%rax\r\n"
@ -1855,7 +1826,7 @@ int VM_CallCompiled(vm_t *vm, int *args)
); );
#endif #endif
if(opStack != opStackOnEntry || opStackOfs != 1 || *opStack != 0xDEADBEEF) if(opStackOfs != 1 || *opStack != 0xDEADBEEF)
{ {
Com_Error(ERR_DROP, "opStack corrupted in compiled code"); Com_Error(ERR_DROP, "opStack corrupted in compiled code");
} }

View file

@ -359,9 +359,9 @@ void R_SetupEntityLighting( const trRefdef_t *refdef, trRefEntity_t *ent ) {
} }
// save out the byte packet version // save out the byte packet version
((byte *)&ent->ambientLightInt)[0] = myftol( ent->ambientLight[0] ); ((byte *)&ent->ambientLightInt)[0] = Q_ftol(ent->ambientLight[0]);
((byte *)&ent->ambientLightInt)[1] = myftol( ent->ambientLight[1] ); ((byte *)&ent->ambientLightInt)[1] = Q_ftol(ent->ambientLight[1]);
((byte *)&ent->ambientLightInt)[2] = myftol( ent->ambientLight[2] ); ((byte *)&ent->ambientLightInt)[2] = Q_ftol(ent->ambientLight[2]);
((byte *)&ent->ambientLightInt)[3] = 0xff; ((byte *)&ent->ambientLightInt)[3] = 0xff;
// transform the direction to local space // transform the direction to local space

View file

@ -34,14 +34,6 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#define GL_INDEX_TYPE GL_UNSIGNED_INT #define GL_INDEX_TYPE GL_UNSIGNED_INT
typedef unsigned int glIndex_t; typedef unsigned int glIndex_t;
// fast float to int conversion
#if id386 && !defined(__GNUC__)
long myftol( float f );
#else
#define myftol(x) ((int)(x))
#endif
// everything that is needed by the backend needs // everything that is needed by the backend needs
// to be double buffered to allow it to run in // to be double buffered to allow it to run in
// parallel on a dual cpu machine // parallel on a dual cpu machine

View file

@ -218,7 +218,7 @@ int R_ComputeLOD( trRefEntity_t *ent ) {
} }
flod *= tr.currentModel->numLods; flod *= tr.currentModel->numLods;
lod = myftol( flod ); lod = Q_ftol(flod);
if ( lod < 0 ) if ( lod < 0 )
{ {

View file

@ -233,7 +233,7 @@ static void R_BindAnimatedImage( textureBundle_t *bundle ) {
// it is necessary to do this messy calc to make sure animations line up // it is necessary to do this messy calc to make sure animations line up
// exactly with waveforms of the same frequency // exactly with waveforms of the same frequency
index = myftol( tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE ); index = Q_ftol(tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE);
index >>= FUNCTABLE_SIZE2; index >>= FUNCTABLE_SIZE2;
if ( index < 0 ) { if ( index < 0 ) {
@ -689,9 +689,9 @@ static void ProjectDlightTexture_scalar( void ) {
} }
} }
clipBits[i] = clip; clipBits[i] = clip;
colors[0] = myftol(floatColor[0] * modulate); colors[0] = Q_ftol(floatColor[0] * modulate);
colors[1] = myftol(floatColor[1] * modulate); colors[1] = Q_ftol(floatColor[1] * modulate);
colors[2] = myftol(floatColor[2] * modulate); colors[2] = Q_ftol(floatColor[2] * modulate);
colors[3] = 255; colors[3] = 255;
} }

View file

@ -27,7 +27,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#endif #endif
#define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ myftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude)) #define WAVEVALUE( table, base, amplitude, phase, freq ) ((base) + table[ Q_ftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude))
static float *TableForFunc( genFunc_t func ) static float *TableForFunc( genFunc_t func )
{ {
@ -699,7 +699,7 @@ void RB_CalcWaveColor( const waveForm_t *wf, unsigned char *dstColors )
glow = 1; glow = 1;
} }
v = myftol( 255 * glow ); v = Q_ftol(255 * glow);
color[0] = color[1] = color[2] = v; color[0] = color[1] = color[2] = v;
color[3] = 255; color[3] = 255;
v = *(int *)color; v = *(int *)color;
@ -1018,21 +1018,6 @@ void RB_CalcRotateTexCoords( float degsPerSecond, float *st )
} }
#if id386 && !defined(__GNUC__)
long myftol( float f ) {
static int tmp;
__asm fld f
__asm fistp tmp
__asm mov eax, tmp
}
#endif
/* /*
** RB_CalcSpecularAlpha ** RB_CalcSpecularAlpha
** **
@ -1195,19 +1180,19 @@ static void RB_CalcDiffuseColor_scalar( unsigned char *colors )
*(int *)&colors[i*4] = ambientLightInt; *(int *)&colors[i*4] = ambientLightInt;
continue; continue;
} }
j = myftol( ambientLight[0] + incoming * directedLight[0] ); j = Q_ftol(ambientLight[0] + incoming * directedLight[0]);
if ( j > 255 ) { if ( j > 255 ) {
j = 255; j = 255;
} }
colors[i*4+0] = j; colors[i*4+0] = j;
j = myftol( ambientLight[1] + incoming * directedLight[1] ); j = Q_ftol(ambientLight[1] + incoming * directedLight[1]);
if ( j > 255 ) { if ( j > 255 ) {
j = 255; j = 255;
} }
colors[i*4+1] = j; colors[i*4+1] = j;
j = myftol( ambientLight[2] + incoming * directedLight[2] ); j = Q_ftol(ambientLight[2] + incoming * directedLight[2]);
if ( j > 255 ) { if ( j > 255 ) {
j = 255; j = 255;
} }

View file

@ -553,10 +553,10 @@ static void FillCloudBox( const shader_t *shader, int stage )
continue; continue;
} }
sky_mins_subd[0] = myftol( sky_mins[0][i] * HALF_SKY_SUBDIVISIONS ); sky_mins_subd[0] = Q_ftol(sky_mins[0][i] * HALF_SKY_SUBDIVISIONS);
sky_mins_subd[1] = myftol( sky_mins[1][i] * HALF_SKY_SUBDIVISIONS ); sky_mins_subd[1] = Q_ftol(sky_mins[1][i] * HALF_SKY_SUBDIVISIONS);
sky_maxs_subd[0] = myftol( sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS ); sky_maxs_subd[0] = Q_ftol(sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS);
sky_maxs_subd[1] = myftol( sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS ); sky_maxs_subd[1] = Q_ftol(sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS);
if ( sky_mins_subd[0] < -HALF_SKY_SUBDIVISIONS ) if ( sky_mins_subd[0] < -HALF_SKY_SUBDIVISIONS )
sky_mins_subd[0] = -HALF_SKY_SUBDIVISIONS; sky_mins_subd[0] = -HALF_SKY_SUBDIVISIONS;

View file

@ -426,7 +426,7 @@ intptr_t SV_GameSystemCalls( intptr_t *args ) {
case G_REAL_TIME: case G_REAL_TIME:
return Com_RealTime( VMA(1) ); return Com_RealTime( VMA(1) );
case G_SNAPVECTOR: case G_SNAPVECTOR:
Sys_SnapVector( VMA(1) ); Q_SnapVector(VMA(1));
return 0; return 0;
//==================================== //====================================

View file

@ -416,8 +416,8 @@ Used to load a development dll instead of a virtual machine
#2 look in fs_basepath #2 look in fs_basepath
================= =================
*/ */
void *Sys_LoadDll( const char *name, void * QDECL Sys_LoadDll( const char *name,
intptr_t (**entryPoint)(int, ...), intptr_t (QDECL **entryPoint)(int, ...),
intptr_t (*systemcalls)(intptr_t, ...) ) intptr_t (*systemcalls)(intptr_t, ...) )
{ {
void *libHandle; void *libHandle;

View file

@ -36,6 +36,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include <pwd.h> #include <pwd.h>
#include <libgen.h> #include <libgen.h>
#include <fcntl.h> #include <fcntl.h>
#include <fenv.h>
qboolean stdinIsATTY; qboolean stdinIsATTY;
@ -125,31 +126,6 @@ int Sys_Milliseconds (void)
return curtime; return curtime;
} }
#if !id386
/*
==================
fastftol
==================
*/
long fastftol( float f )
{
return (long)f;
}
/*
==================
Sys_SnapVector
==================
*/
void Sys_SnapVector( float *v )
{
v[0] = rint(v[0]);
v[1] = rint(v[1]);
v[2] = rint(v[2]);
}
#endif
/* /*
================== ==================
Sys_RandomBytes Sys_RandomBytes
@ -756,6 +732,12 @@ void Sys_GLimpInit( void )
// NOP // NOP
} }
void Sys_SetFloatEnv(void)
{
// rounding towards 0
fesetround(FE_TOWARDZERO);
}
/* /*
============== ==============
Sys_PlatformInit Sys_PlatformInit

View file

@ -37,6 +37,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include <wincrypt.h> #include <wincrypt.h>
#include <shlobj.h> #include <shlobj.h>
#include <psapi.h> #include <psapi.h>
#include <float.h>
// Used to determine where to store user-specific files // Used to determine where to store user-specific files
static char homePath[ MAX_OSPATH ] = { 0 }; static char homePath[ MAX_OSPATH ] = { 0 };
@ -45,15 +46,39 @@ static char homePath[ MAX_OSPATH ] = { 0 };
static UINT timerResolution = 0; static UINT timerResolution = 0;
#endif #endif
#ifdef __WIN64__ /*
void Sys_SnapVector( float *v ) ================
{ Sys_SetFPUCW
v[0] = rint(v[0]); Set FPU control word to default value
v[1] = rint(v[1]); ================
v[2] = rint(v[2]); */
}
#ifndef _RC_CHOP
// mingw doesn't seem to have these defined :(
#define _MCW_EM 0x0008001fU
#define _MCW_RC 0x00000300U
#define _MCW_PC 0x00030000U
#define _RC_CHOP 0x00000300U
#define _PC_53 0x00010000U
unsigned int _controlfp(unsigned int new, unsigned int mask);
#endif #endif
#define FPUCWMASK1 (_MCW_RC | _MCW_EM)
#define FPUCW (_RC_CHOP | _MCW_EM | _PC_53)
#ifdef idx64
#define FPUCWMASK (FPUCWMASK1)
#else
#define FPUCWMASK (FPUCWMASK1 | _MCW_PC)
#endif
void Sys_SetFloatEnv(void)
{
_controlfp(FPUCW, FPUCWMASK);
}
/* /*
================ ================
Sys_DefaultHomePath Sys_DefaultHomePath
@ -140,34 +165,6 @@ int Sys_Milliseconds (void)
return sys_curtime; return sys_curtime;
} }
#ifndef __GNUC__ //see snapvectora.s
/*
================
Sys_SnapVector
================
*/
void Sys_SnapVector( float *v )
{
int i;
float f;
f = *v;
__asm fld f;
__asm fistp i;
*v = i;
v++;
f = *v;
__asm fld f;
__asm fistp i;
*v = i;
v++;
f = *v;
__asm fld f;
__asm fistp i;
*v = i;
}
#endif
/* /*
================ ================
Sys_RandomBytes Sys_RandomBytes
@ -719,9 +716,12 @@ void Sys_PlatformInit( void )
{ {
#ifndef DEDICATED #ifndef DEDICATED
TIMECAPS ptc; TIMECAPS ptc;
const char *SDL_VIDEODRIVER = getenv( "SDL_VIDEODRIVER" ); const char *SDL_VIDEODRIVER = getenv( "SDL_VIDEODRIVER" );
#endif
Sys_SetFloatEnv();
#ifndef DEDICATED
if( SDL_VIDEODRIVER ) if( SDL_VIDEODRIVER )
{ {
Com_Printf( "SDL_VIDEODRIVER is externally set to \"%s\", " Com_Printf( "SDL_VIDEODRIVER is externally set to \"%s\", "