mirror of
https://github.com/UberGames/lilium-voyager.git
synced 2024-11-10 14:41:42 +00:00
* Fix the MinGW and (hopefully) OS X builds
* Remove custom memcpy/memset code
This commit is contained in:
parent
2d9d10772f
commit
6e24cfe7d3
5 changed files with 22 additions and 684 deletions
|
@ -2857,316 +2857,6 @@ void Com_Shutdown (void) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if I_WANT_A_CUSTOM_MEMCPY && !defined(_WIN32)
|
|
||||||
void Com_Memcpy (void* dest, const void* src, const size_t count)
|
|
||||||
{
|
|
||||||
memcpy(dest, src, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Com_Memset (void* dest, const int val, const size_t count)
|
|
||||||
{
|
|
||||||
memset(dest, val, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif I_WANT_A_CUSTOM_MEMCPY && defined(_WIN32)
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
PRE_READ, // prefetch assuming that buffer is used for reading only
|
|
||||||
PRE_WRITE, // prefetch assuming that buffer is used for writing only
|
|
||||||
PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing
|
|
||||||
} e_prefetch;
|
|
||||||
|
|
||||||
void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
|
|
||||||
|
|
||||||
#define EMMS_INSTRUCTION __asm emms
|
|
||||||
|
|
||||||
void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov edx,dest
|
|
||||||
mov eax,constant
|
|
||||||
mov ecx,count
|
|
||||||
and ecx,~7
|
|
||||||
jz padding
|
|
||||||
sub ecx,8
|
|
||||||
jmp loopu
|
|
||||||
align 16
|
|
||||||
loopu:
|
|
||||||
test [edx+ecx*4 + 28],ebx // fetch next block destination to L1 cache
|
|
||||||
mov [edx+ecx*4 + 0],eax
|
|
||||||
mov [edx+ecx*4 + 4],eax
|
|
||||||
mov [edx+ecx*4 + 8],eax
|
|
||||||
mov [edx+ecx*4 + 12],eax
|
|
||||||
mov [edx+ecx*4 + 16],eax
|
|
||||||
mov [edx+ecx*4 + 20],eax
|
|
||||||
mov [edx+ecx*4 + 24],eax
|
|
||||||
mov [edx+ecx*4 + 28],eax
|
|
||||||
sub ecx,8
|
|
||||||
jge loopu
|
|
||||||
padding: mov ecx,count
|
|
||||||
mov ebx,ecx
|
|
||||||
and ecx,7
|
|
||||||
jz outta
|
|
||||||
and ebx,~7
|
|
||||||
lea edx,[edx+ebx*4] // advance dest pointer
|
|
||||||
test [edx+0],eax // fetch destination to L1 cache
|
|
||||||
cmp ecx,4
|
|
||||||
jl skip4
|
|
||||||
mov [edx+0],eax
|
|
||||||
mov [edx+4],eax
|
|
||||||
mov [edx+8],eax
|
|
||||||
mov [edx+12],eax
|
|
||||||
add edx,16
|
|
||||||
sub ecx,4
|
|
||||||
skip4: cmp ecx,2
|
|
||||||
jl skip2
|
|
||||||
mov [edx+0],eax
|
|
||||||
mov [edx+4],eax
|
|
||||||
add edx,8
|
|
||||||
sub ecx,2
|
|
||||||
skip2: cmp ecx,1
|
|
||||||
jl outta
|
|
||||||
mov [edx+0],eax
|
|
||||||
outta:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// optimized memory copy routine that handles all alignment
|
|
||||||
// cases and block sizes efficiently
|
|
||||||
void Com_Memcpy (void* dest, const void* src, const size_t count) {
|
|
||||||
Com_Prefetch (src, count, PRE_READ);
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
push edi
|
|
||||||
push esi
|
|
||||||
mov ecx,count
|
|
||||||
cmp ecx,0 // count = 0 check (just to be on the safe side)
|
|
||||||
je outta
|
|
||||||
mov edx,dest
|
|
||||||
mov ebx,src
|
|
||||||
cmp ecx,32 // padding only?
|
|
||||||
jl padding
|
|
||||||
|
|
||||||
mov edi,ecx
|
|
||||||
and edi,~31 // edi = count&~31
|
|
||||||
sub edi,32
|
|
||||||
|
|
||||||
align 16
|
|
||||||
loopMisAligned:
|
|
||||||
mov eax,[ebx + edi + 0 + 0*8]
|
|
||||||
mov esi,[ebx + edi + 4 + 0*8]
|
|
||||||
mov [edx+edi+0 + 0*8],eax
|
|
||||||
mov [edx+edi+4 + 0*8],esi
|
|
||||||
mov eax,[ebx + edi + 0 + 1*8]
|
|
||||||
mov esi,[ebx + edi + 4 + 1*8]
|
|
||||||
mov [edx+edi+0 + 1*8],eax
|
|
||||||
mov [edx+edi+4 + 1*8],esi
|
|
||||||
mov eax,[ebx + edi + 0 + 2*8]
|
|
||||||
mov esi,[ebx + edi + 4 + 2*8]
|
|
||||||
mov [edx+edi+0 + 2*8],eax
|
|
||||||
mov [edx+edi+4 + 2*8],esi
|
|
||||||
mov eax,[ebx + edi + 0 + 3*8]
|
|
||||||
mov esi,[ebx + edi + 4 + 3*8]
|
|
||||||
mov [edx+edi+0 + 3*8],eax
|
|
||||||
mov [edx+edi+4 + 3*8],esi
|
|
||||||
sub edi,32
|
|
||||||
jge loopMisAligned
|
|
||||||
|
|
||||||
mov edi,ecx
|
|
||||||
and edi,~31
|
|
||||||
add ebx,edi // increase src pointer
|
|
||||||
add edx,edi // increase dst pointer
|
|
||||||
and ecx,31 // new count
|
|
||||||
jz outta // if count = 0, get outta here
|
|
||||||
|
|
||||||
padding:
|
|
||||||
cmp ecx,16
|
|
||||||
jl skip16
|
|
||||||
mov eax,dword ptr [ebx]
|
|
||||||
mov dword ptr [edx],eax
|
|
||||||
mov eax,dword ptr [ebx+4]
|
|
||||||
mov dword ptr [edx+4],eax
|
|
||||||
mov eax,dword ptr [ebx+8]
|
|
||||||
mov dword ptr [edx+8],eax
|
|
||||||
mov eax,dword ptr [ebx+12]
|
|
||||||
mov dword ptr [edx+12],eax
|
|
||||||
sub ecx,16
|
|
||||||
add ebx,16
|
|
||||||
add edx,16
|
|
||||||
skip16:
|
|
||||||
cmp ecx,8
|
|
||||||
jl skip8
|
|
||||||
mov eax,dword ptr [ebx]
|
|
||||||
mov dword ptr [edx],eax
|
|
||||||
mov eax,dword ptr [ebx+4]
|
|
||||||
sub ecx,8
|
|
||||||
mov dword ptr [edx+4],eax
|
|
||||||
add ebx,8
|
|
||||||
add edx,8
|
|
||||||
skip8:
|
|
||||||
cmp ecx,4
|
|
||||||
jl skip4
|
|
||||||
mov eax,dword ptr [ebx] // here 4-7 bytes
|
|
||||||
add ebx,4
|
|
||||||
sub ecx,4
|
|
||||||
mov dword ptr [edx],eax
|
|
||||||
add edx,4
|
|
||||||
skip4: // 0-3 remaining bytes
|
|
||||||
cmp ecx,2
|
|
||||||
jl skip2
|
|
||||||
mov ax,word ptr [ebx] // two bytes
|
|
||||||
cmp ecx,3 // less than 3?
|
|
||||||
mov word ptr [edx],ax
|
|
||||||
jl outta
|
|
||||||
mov al,byte ptr [ebx+2] // last byte
|
|
||||||
mov byte ptr [edx+2],al
|
|
||||||
jmp outta
|
|
||||||
skip2:
|
|
||||||
cmp ecx,1
|
|
||||||
jl outta
|
|
||||||
mov al,byte ptr [ebx]
|
|
||||||
mov byte ptr [edx],al
|
|
||||||
outta:
|
|
||||||
pop esi
|
|
||||||
pop edi
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Com_Memset (void* dest, const int val, const size_t count)
|
|
||||||
{
|
|
||||||
unsigned int fillval;
|
|
||||||
|
|
||||||
if (count < 8)
|
|
||||||
{
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov edx,dest
|
|
||||||
mov eax, val
|
|
||||||
mov ah,al
|
|
||||||
mov ebx,eax
|
|
||||||
and ebx, 0xffff
|
|
||||||
shl eax,16
|
|
||||||
add eax,ebx // eax now contains pattern
|
|
||||||
mov ecx,count
|
|
||||||
cmp ecx,4
|
|
||||||
jl skip4
|
|
||||||
mov [edx],eax // copy first dword
|
|
||||||
add edx,4
|
|
||||||
sub ecx,4
|
|
||||||
skip4: cmp ecx,2
|
|
||||||
jl skip2
|
|
||||||
mov word ptr [edx],ax // copy 2 bytes
|
|
||||||
add edx,2
|
|
||||||
sub ecx,2
|
|
||||||
skip2: cmp ecx,0
|
|
||||||
je skip1
|
|
||||||
mov byte ptr [edx],al // copy single byte
|
|
||||||
skip1:
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fillval = val;
|
|
||||||
|
|
||||||
fillval = fillval|(fillval<<8);
|
|
||||||
fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern
|
|
||||||
|
|
||||||
_copyDWord ((unsigned int*)(dest),fillval, count/4);
|
|
||||||
|
|
||||||
__asm // padding of 0-3 bytes
|
|
||||||
{
|
|
||||||
mov ecx,count
|
|
||||||
mov eax,ecx
|
|
||||||
and ecx,3
|
|
||||||
jz skipA
|
|
||||||
and eax,~3
|
|
||||||
mov ebx,dest
|
|
||||||
add ebx,eax
|
|
||||||
mov eax,fillval
|
|
||||||
cmp ecx,2
|
|
||||||
jl skipB
|
|
||||||
mov word ptr [ebx],ax
|
|
||||||
cmp ecx,2
|
|
||||||
je skipA
|
|
||||||
mov byte ptr [ebx+2],al
|
|
||||||
jmp skipA
|
|
||||||
skipB:
|
|
||||||
cmp ecx,0
|
|
||||||
je skipA
|
|
||||||
mov byte ptr [ebx],al
|
|
||||||
skipA:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
qboolean Com_Memcmp (const void *src0, const void *src1, const unsigned int count)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
// MMX version anyone?
|
|
||||||
|
|
||||||
if (count >= 16)
|
|
||||||
{
|
|
||||||
unsigned int *dw = (unsigned int*)(src0);
|
|
||||||
unsigned int *sw = (unsigned int*)(src1);
|
|
||||||
|
|
||||||
unsigned int nm2 = count/16;
|
|
||||||
for (i = 0; i < nm2; i+=4)
|
|
||||||
{
|
|
||||||
unsigned int tmp = (dw[i+0]-sw[i+0])|(dw[i+1]-sw[i+1])|
|
|
||||||
(dw[i+2]-sw[i+2])|(dw[i+3]-sw[i+3]);
|
|
||||||
if (tmp)
|
|
||||||
return qfalse;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count & 15)
|
|
||||||
{
|
|
||||||
byte *d = (byte*)src0;
|
|
||||||
byte *s = (byte*)src1;
|
|
||||||
for (i = count & 0xfffffff0; i < count; i++)
|
|
||||||
if (d[i]!=s[i])
|
|
||||||
return qfalse;
|
|
||||||
}
|
|
||||||
|
|
||||||
return qtrue;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
|
|
||||||
{
|
|
||||||
// write buffer prefetching is performed only if
|
|
||||||
// the processor benefits from it. Read and read/write
|
|
||||||
// prefetching is always performed.
|
|
||||||
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case PRE_WRITE : break;
|
|
||||||
case PRE_READ:
|
|
||||||
case PRE_READ_WRITE:
|
|
||||||
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov ebx,s
|
|
||||||
mov ecx,bytes
|
|
||||||
cmp ecx,4096 // clamp to 4kB
|
|
||||||
jle skipClamp
|
|
||||||
mov ecx,4096
|
|
||||||
skipClamp:
|
|
||||||
add ecx,0x1f
|
|
||||||
shr ecx,5 // number of cache lines
|
|
||||||
jz skip
|
|
||||||
jmp loopie
|
|
||||||
|
|
||||||
align 16
|
|
||||||
loopie: test byte ptr [ebx],al
|
|
||||||
add ebx,32
|
|
||||||
dec ecx
|
|
||||||
jnz loopie
|
|
||||||
skip:
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
//------------------------------------------------------------------------
|
//------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -38,13 +38,8 @@ void MD4Init (MD4_CTX *);
|
||||||
void MD4Update (MD4_CTX *, const unsigned char *, unsigned int);
|
void MD4Update (MD4_CTX *, const unsigned char *, unsigned int);
|
||||||
void MD4Final (unsigned char [16], MD4_CTX *);
|
void MD4Final (unsigned char [16], MD4_CTX *);
|
||||||
|
|
||||||
#if I_WANT_A_CUSTOM_MEMCPY
|
|
||||||
void Com_Memset (void* dest, const int val, const size_t count);
|
|
||||||
void Com_Memcpy (void* dest, const void* src, const size_t count);
|
|
||||||
#else
|
|
||||||
#define Com_Memset memset
|
#define Com_Memset memset
|
||||||
#define Com_Memcpy memcpy
|
#define Com_Memcpy memcpy
|
||||||
#endif
|
|
||||||
|
|
||||||
/* MD4C.C - RSA Data Security, Inc., MD4 message-digest algorithm */
|
/* MD4C.C - RSA Data Security, Inc., MD4 message-digest algorithm */
|
||||||
/* Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
|
/* Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
|
||||||
|
|
|
@ -243,13 +243,8 @@ void Snd_Memset (void* dest, const int val, const size_t count);
|
||||||
#define Snd_Memset Com_Memset
|
#define Snd_Memset Com_Memset
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if I_WANT_A_CUSTOM_MEMCPY
|
|
||||||
void Com_Memset (void* dest, const int val, const size_t count);
|
|
||||||
void Com_Memcpy (void* dest, const void* src, const size_t count);
|
|
||||||
#else
|
|
||||||
#define Com_Memset memset
|
#define Com_Memset memset
|
||||||
#define Com_Memcpy memcpy
|
#define Com_Memcpy memcpy
|
||||||
#endif
|
|
||||||
|
|
||||||
#define CIN_system 1
|
#define CIN_system 1
|
||||||
#define CIN_loop 2
|
#define CIN_loop 2
|
||||||
|
|
|
@ -350,6 +350,9 @@ ifeq ($(PLATFORM),mingw32)
|
||||||
LDFLAGS+=-m32
|
LDFLAGS+=-m32
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
BUILD_SERVER = 0
|
||||||
|
BUILD_CLIENT_SMP = 0
|
||||||
|
|
||||||
else # ifeq mingw32
|
else # ifeq mingw32
|
||||||
|
|
||||||
#############################################################################
|
#############################################################################
|
||||||
|
@ -823,9 +826,19 @@ Q3OBJ = \
|
||||||
|
|
||||||
ifeq ($(ARCH),i386)
|
ifeq ($(ARCH),i386)
|
||||||
Q3OBJ += $(B)/client/vm_x86.o
|
Q3OBJ += $(B)/client/vm_x86.o
|
||||||
|
Q3OBJ += \
|
||||||
|
$(B)/client/snd_mixa.o \
|
||||||
|
$(B)/client/matha.o \
|
||||||
|
$(B)/client/ftola.o \
|
||||||
|
$(B)/client/snapvectora.o
|
||||||
endif
|
endif
|
||||||
ifeq ($(ARCH),x86)
|
ifeq ($(ARCH),x86)
|
||||||
Q3OBJ += $(B)/client/vm_x86.o
|
Q3OBJ += $(B)/client/vm_x86.o
|
||||||
|
Q3OBJ += \
|
||||||
|
$(B)/client/snd_mixa.o \
|
||||||
|
$(B)/client/matha.o \
|
||||||
|
$(B)/client/ftola.o \
|
||||||
|
$(B)/client/snapvectora.o
|
||||||
endif
|
endif
|
||||||
ifeq ($(ARCH),x86_64)
|
ifeq ($(ARCH),x86_64)
|
||||||
Q3OBJ += $(B)/client/vm_x86_64.o
|
Q3OBJ += $(B)/client/vm_x86_64.o
|
||||||
|
@ -837,21 +850,6 @@ ifeq ($(ARCH),ppc)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
Q3OBJ += \
|
|
||||||
$(B)/client/linux_common.o \
|
|
||||||
\
|
|
||||||
$(B)/client/snd_mixa.o \
|
|
||||||
$(B)/client/matha.o \
|
|
||||||
$(B)/client/ftola.o \
|
|
||||||
$(B)/client/snapvectora.o \
|
|
||||||
\
|
|
||||||
$(B)/client/unix_main.o \
|
|
||||||
$(B)/client/unix_net.o \
|
|
||||||
$(B)/client/unix_shared.o \
|
|
||||||
$(B)/client/linux_signals.o \
|
|
||||||
$(B)/client/linux_qgl.o \
|
|
||||||
$(B)/client/linux_snd.o \
|
|
||||||
$(B)/client/sdl_snd.o
|
|
||||||
|
|
||||||
ifeq ($(PLATFORM),mingw32)
|
ifeq ($(PLATFORM),mingw32)
|
||||||
Q3OBJ += \
|
Q3OBJ += \
|
||||||
|
@ -867,6 +865,15 @@ ifeq ($(PLATFORM),mingw32)
|
||||||
$(B)/client/win_wndproc.o \
|
$(B)/client/win_wndproc.o \
|
||||||
$(B)/client/win_resource.o
|
$(B)/client/win_resource.o
|
||||||
else
|
else
|
||||||
|
Q3OBJ += \
|
||||||
|
$(B)/client/unix_main.o \
|
||||||
|
$(B)/client/unix_net.o \
|
||||||
|
$(B)/client/unix_shared.o \
|
||||||
|
$(B)/client/linux_signals.o \
|
||||||
|
$(B)/client/linux_qgl.o \
|
||||||
|
$(B)/client/linux_snd.o \
|
||||||
|
$(B)/client/sdl_snd.o
|
||||||
|
|
||||||
ifeq ($(PLATFORM),linux)
|
ifeq ($(PLATFORM),linux)
|
||||||
Q3OBJ += $(B)/client/linux_joystick.o
|
Q3OBJ += $(B)/client/linux_joystick.o
|
||||||
endif
|
endif
|
||||||
|
@ -1051,7 +1058,6 @@ $(B)/client/irix_glimp_smp.o : $(UDIR)/irix_glimp.c; $(DO_SMP_CC)
|
||||||
$(B)/client/irix_snd.o : $(UDIR)/irix_snd.c; $(DO_CC)
|
$(B)/client/irix_snd.o : $(UDIR)/irix_snd.c; $(DO_CC)
|
||||||
$(B)/client/irix_input.o : $(UDIR)/irix_input.c; $(DO_CC)
|
$(B)/client/irix_input.o : $(UDIR)/irix_input.c; $(DO_CC)
|
||||||
$(B)/client/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_CC) $(GL_CFLAGS)
|
$(B)/client/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_CC) $(GL_CFLAGS)
|
||||||
$(B)/client/linux_common.o : $(UDIR)/linux_common.c; $(DO_CC)
|
|
||||||
$(B)/client/linux_glimp.o : $(UDIR)/linux_glimp.c; $(DO_CC) $(GL_CFLAGS)
|
$(B)/client/linux_glimp.o : $(UDIR)/linux_glimp.c; $(DO_CC) $(GL_CFLAGS)
|
||||||
$(B)/client/sdl_glimp.o : $(UDIR)/sdl_glimp.c; $(DO_CC) $(GL_CFLAGS)
|
$(B)/client/sdl_glimp.o : $(UDIR)/sdl_glimp.c; $(DO_CC) $(GL_CFLAGS)
|
||||||
$(B)/client/linux_glimp_smp.o : $(UDIR)/linux_glimp.c; $(DO_SMP_CC) $(GL_CFLAGS)
|
$(B)/client/linux_glimp_smp.o : $(UDIR)/linux_glimp.c; $(DO_SMP_CC) $(GL_CFLAGS)
|
||||||
|
@ -1154,7 +1160,6 @@ Q3DOBJ = \
|
||||||
$(B)/ded/l_struct.o \
|
$(B)/ded/l_struct.o \
|
||||||
\
|
\
|
||||||
$(B)/ded/linux_signals.o \
|
$(B)/ded/linux_signals.o \
|
||||||
$(B)/ded/linux_common.o \
|
|
||||||
$(B)/ded/unix_main.o \
|
$(B)/ded/unix_main.o \
|
||||||
$(B)/ded/unix_net.o \
|
$(B)/ded/unix_net.o \
|
||||||
$(B)/ded/unix_shared.o \
|
$(B)/ded/unix_shared.o \
|
||||||
|
@ -1236,7 +1241,6 @@ $(B)/ded/l_script.o : $(BLIBDIR)/l_script.c; $(DO_BOT_CC)
|
||||||
$(B)/ded/l_struct.o : $(BLIBDIR)/l_struct.c; $(DO_BOT_CC)
|
$(B)/ded/l_struct.o : $(BLIBDIR)/l_struct.c; $(DO_BOT_CC)
|
||||||
|
|
||||||
$(B)/ded/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_DED_CC)
|
$(B)/ded/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_DED_CC)
|
||||||
$(B)/ded/linux_common.o : $(UDIR)/linux_common.c; $(DO_DED_CC)
|
|
||||||
$(B)/ded/unix_main.o : $(UDIR)/unix_main.c; $(DO_DED_CC)
|
$(B)/ded/unix_main.o : $(UDIR)/unix_main.c; $(DO_DED_CC)
|
||||||
$(B)/ded/unix_net.o : $(UDIR)/unix_net.c; $(DO_DED_CC)
|
$(B)/ded/unix_net.o : $(UDIR)/unix_net.c; $(DO_DED_CC)
|
||||||
$(B)/ded/unix_shared.o : $(UDIR)/unix_shared.c; $(DO_DED_CC)
|
$(B)/ded/unix_shared.o : $(UDIR)/unix_shared.c; $(DO_DED_CC)
|
||||||
|
|
|
@ -1,346 +0,0 @@
|
||||||
#if 0 // not used anymore
|
|
||||||
/*
|
|
||||||
===========================================================================
|
|
||||||
Copyright (C) 1999-2005 Id Software, Inc.
|
|
||||||
|
|
||||||
This file is part of Quake III Arena source code.
|
|
||||||
|
|
||||||
Quake III Arena source code is free software; you can redistribute it
|
|
||||||
and/or modify it under the terms of the GNU General Public License as
|
|
||||||
published by the Free Software Foundation; either version 2 of the License,
|
|
||||||
or (at your option) any later version.
|
|
||||||
|
|
||||||
Quake III Arena source code is distributed in the hope that it will be
|
|
||||||
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with Quake III Arena source code; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
===========================================================================
|
|
||||||
*/
|
|
||||||
/**
|
|
||||||
* GAS syntax equivalents of the MSVC asm memory calls in common.c
|
|
||||||
*
|
|
||||||
* The following changes have been made to the asm:
|
|
||||||
* 1. Registers are loaded by the inline asm arguments when possible
|
|
||||||
* 2. Labels have been changed to local label format (0,1,etc.) to allow inlining
|
|
||||||
*
|
|
||||||
* HISTORY:
|
|
||||||
* AH - Created on 08 Dec 2000
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <unistd.h> // AH - for size_t
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
// bk001207 - we need something under Linux, too. Mac?
|
|
||||||
#if 1 // defined(C_ONLY) // bk010102 - dedicated?
|
|
||||||
void Com_Memcpy (void* dest, const void* src, const size_t count) {
|
|
||||||
memcpy(dest, src, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Com_Memset (void* dest, const int val, const size_t count) {
|
|
||||||
memset(dest, val, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
PRE_READ, // prefetch assuming that buffer is used for reading only
|
|
||||||
PRE_WRITE, // prefetch assuming that buffer is used for writing only
|
|
||||||
PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing
|
|
||||||
} e_prefetch;
|
|
||||||
|
|
||||||
void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
|
|
||||||
|
|
||||||
void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
|
|
||||||
// MMX version not used on standard Pentium MMX
|
|
||||||
// because the dword version is faster (with
|
|
||||||
// proper destination prefetching)
|
|
||||||
__asm__ __volatile__ (" \
|
|
||||||
//mov eax,constant // eax = val \
|
|
||||||
//mov edx,dest // dest \
|
|
||||||
//mov ecx,count \
|
|
||||||
movd %%eax, %%mm0 \
|
|
||||||
punpckldq %%mm0, %%mm0 \
|
|
||||||
\
|
|
||||||
// ensure that destination is qword aligned \
|
|
||||||
\
|
|
||||||
testl $7, %%edx // qword padding?\
|
|
||||||
jz 0f \
|
|
||||||
movl %%eax, (%%edx) \
|
|
||||||
decl %%ecx \
|
|
||||||
addl $4, %%edx \
|
|
||||||
\
|
|
||||||
0: movl %%ecx, %%ebx \
|
|
||||||
andl $0xfffffff0, %%ecx \
|
|
||||||
jz 2f \
|
|
||||||
jmp 1f \
|
|
||||||
.align 16 \
|
|
||||||
\
|
|
||||||
// funny ordering here to avoid commands \
|
|
||||||
// that cross 32-byte boundaries (the \
|
|
||||||
// [edx+0] version has a special 3-byte opcode... \
|
|
||||||
1: movq %%mm0, 8(%%edx) \
|
|
||||||
movq %%mm0, 16(%%edx) \
|
|
||||||
movq %%mm0, 24(%%edx) \
|
|
||||||
movq %%mm0, 32(%%edx) \
|
|
||||||
movq %%mm0, 40(%%edx) \
|
|
||||||
movq %%mm0, 48(%%edx) \
|
|
||||||
movq %%mm0, 56(%%edx) \
|
|
||||||
movq %%mm0, (%%edx)\
|
|
||||||
addl $64, %%edx \
|
|
||||||
subl $16, %%ecx \
|
|
||||||
jnz 1b \
|
|
||||||
2: \
|
|
||||||
movl %%ebx, %%ecx // ebx = cnt \
|
|
||||||
andl $0xfffffff0, %%ecx // ecx = cnt&~15 \
|
|
||||||
subl %%ecx, %%ebx \
|
|
||||||
jz 6f \
|
|
||||||
cmpl $8, %%ebx \
|
|
||||||
jl 3f \
|
|
||||||
\
|
|
||||||
movq %%mm0, (%%edx) \
|
|
||||||
movq %%mm0, 8(%%edx) \
|
|
||||||
movq %%mm0, 16(%%edx) \
|
|
||||||
movq %%mm0, 24(%%edx) \
|
|
||||||
addl $32, %%edx \
|
|
||||||
subl $8, %%ebx \
|
|
||||||
jz 6f \
|
|
||||||
\
|
|
||||||
3: cmpl $4, %%ebx \
|
|
||||||
jl 4f \
|
|
||||||
\
|
|
||||||
movq %%mm0, (%%edx) \
|
|
||||||
movq %%mm0, 8(%%edx) \
|
|
||||||
addl $16, %%edx \
|
|
||||||
subl $4, %%ebx \
|
|
||||||
\
|
|
||||||
4: cmpl $2, %%ebx \
|
|
||||||
jl 5f \
|
|
||||||
movq %%mm0, (%%edx) \
|
|
||||||
addl $8, %%edx \
|
|
||||||
subl $2, %%ebx \
|
|
||||||
\
|
|
||||||
5: cmpl $1, %%ebx \
|
|
||||||
jl 6f \
|
|
||||||
movl %%eax, (%%edx) \
|
|
||||||
6: \
|
|
||||||
emms \
|
|
||||||
"
|
|
||||||
: : "a" (constant), "c" (count), "d" (dest)
|
|
||||||
: "%ebx", "%edi", "%esi", "cc", "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
// optimized memory copy routine that handles all alignment
|
|
||||||
// cases and block sizes efficiently
|
|
||||||
void Com_Memcpy (void* dest, const void* src, const size_t count) {
|
|
||||||
Com_Prefetch (src, count, PRE_READ);
|
|
||||||
__asm__ __volatile__ (" \
|
|
||||||
pushl %%edi \
|
|
||||||
pushl %%esi \
|
|
||||||
//mov ecx,count \
|
|
||||||
cmpl $0, %%ecx // count = 0 check (just to be on the safe side) \
|
|
||||||
je 6f \
|
|
||||||
//mov edx,dest \
|
|
||||||
movl %0, %%ebx \
|
|
||||||
cmpl $32, %%ecx // padding only? \
|
|
||||||
jl 1f \
|
|
||||||
\
|
|
||||||
movl %%ecx, %%edi \
|
|
||||||
andl $0xfffffe00, %%edi // edi = count&~31 \
|
|
||||||
subl $32, %%edi \
|
|
||||||
\
|
|
||||||
.align 16 \
|
|
||||||
0: \
|
|
||||||
movl (%%ebx, %%edi, 1), %%eax \
|
|
||||||
movl 4(%%ebx, %%edi, 1), %%esi \
|
|
||||||
movl %%eax, (%%edx, %%edi, 1) \
|
|
||||||
movl %%esi, 4(%%edx, %%edi, 1) \
|
|
||||||
movl 8(%%ebx, %%edi, 1), %%eax \
|
|
||||||
movl 12(%%ebx, %%edi, 1), %%esi \
|
|
||||||
movl %%eax, 8(%%edx, %%edi, 1) \
|
|
||||||
movl %%esi, 12(%%edx, %%edi, 1) \
|
|
||||||
movl 16(%%ebx, %%edi, 1), %%eax \
|
|
||||||
movl 20(%%ebx, %%edi, 1), %%esi \
|
|
||||||
movl %%eax, 16(%%edx, %%edi, 1) \
|
|
||||||
movl %%esi, 20(%%edx, %%edi, 1) \
|
|
||||||
movl 24(%%ebx, %%edi, 1), %%eax \
|
|
||||||
movl 28(%%ebx, %%edi, 1), %%esi \
|
|
||||||
movl %%eax, 24(%%edx, %%edi, 1) \
|
|
||||||
movl %%esi, 28(%%edx, %%edi, 1) \
|
|
||||||
subl $32, %%edi \
|
|
||||||
jge 0b \
|
|
||||||
\
|
|
||||||
movl %%ecx, %%edi \
|
|
||||||
andl $0xfffffe00, %%edi \
|
|
||||||
addl %%edi, %%ebx // increase src pointer \
|
|
||||||
addl %%edi, %%edx // increase dst pointer \
|
|
||||||
andl $31, %%ecx // new count \
|
|
||||||
jz 6f // if count = 0, get outta here \
|
|
||||||
\
|
|
||||||
1: \
|
|
||||||
cmpl $16, %%ecx \
|
|
||||||
jl 2f \
|
|
||||||
movl (%%ebx), %%eax \
|
|
||||||
movl %%eax, (%%edx) \
|
|
||||||
movl 4(%%ebx), %%eax \
|
|
||||||
movl %%eax, 4(%%edx) \
|
|
||||||
movl 8(%%ebx), %%eax \
|
|
||||||
movl %%eax, 8(%%edx) \
|
|
||||||
movl 12(%%ebx), %%eax \
|
|
||||||
movl %%eax, 12(%%edx) \
|
|
||||||
subl $16, %%ecx \
|
|
||||||
addl $16, %%ebx \
|
|
||||||
addl $16, %%edx \
|
|
||||||
2: \
|
|
||||||
cmpl $8, %%ecx \
|
|
||||||
jl 3f \
|
|
||||||
movl (%%ebx), %%eax \
|
|
||||||
movl %%eax, (%%edx) \
|
|
||||||
movl 4(%%ebx), %%eax \
|
|
||||||
subl $8, %%ecx \
|
|
||||||
movl %%eax, 4(%%edx) \
|
|
||||||
addl $8, %%ebx \
|
|
||||||
addl $8, %%edx \
|
|
||||||
3: \
|
|
||||||
cmpl $4, %%ecx \
|
|
||||||
jl 4f \
|
|
||||||
movl (%%ebx), %%eax // here 4-7 bytes \
|
|
||||||
addl $4, %%ebx \
|
|
||||||
subl $4, %%ecx \
|
|
||||||
movl %%eax, (%%edx) \
|
|
||||||
addl $4, %%edx \
|
|
||||||
4: // 0-3 remaining bytes \
|
|
||||||
cmpl $2, %%ecx \
|
|
||||||
jl 5f \
|
|
||||||
movw (%%ebx), %%ax // two bytes \
|
|
||||||
cmpl $3, %%ecx // less than 3? \
|
|
||||||
movw %%ax, (%%edx) \
|
|
||||||
jl 6f \
|
|
||||||
movb 2(%%ebx), %%al // last byte \
|
|
||||||
movb %%al, 2(%%edx) \
|
|
||||||
jmp 6f \
|
|
||||||
5: \
|
|
||||||
cmpl $1, %%ecx \
|
|
||||||
jl 6f \
|
|
||||||
movb (%%ebx), %%al \
|
|
||||||
movb %%al, (%%edx) \
|
|
||||||
6: \
|
|
||||||
popl %%esi \
|
|
||||||
popl %%edi \
|
|
||||||
"
|
|
||||||
: : "m" (src), "d" (dest), "c" (count)
|
|
||||||
: "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
void Com_Memset (void* dest, const int val, const size_t count)
|
|
||||||
{
|
|
||||||
unsigned int fillval;
|
|
||||||
|
|
||||||
if (count < 8)
|
|
||||||
{
|
|
||||||
__asm__ __volatile__ (" \
|
|
||||||
//mov edx,dest \
|
|
||||||
//mov eax, val \
|
|
||||||
movb %%al, %%ah \
|
|
||||||
movl %%eax, %%ebx \
|
|
||||||
andl $0xffff, %%ebx \
|
|
||||||
shll $16, %%eax \
|
|
||||||
addl %%ebx, %%eax // eax now contains pattern \
|
|
||||||
//mov ecx,count \
|
|
||||||
cmpl $4, %%ecx \
|
|
||||||
jl 0f \
|
|
||||||
movl %%eax, (%%edx) // copy first dword \
|
|
||||||
addl $4, %%edx \
|
|
||||||
subl $4, %%ecx \
|
|
||||||
0: cmpl $2, %%ecx \
|
|
||||||
jl 1f \
|
|
||||||
movw %%ax, (%%edx) // copy 2 bytes \
|
|
||||||
addl $2, %%edx \
|
|
||||||
subl $2, %%ecx \
|
|
||||||
1: cmpl $0, %%ecx \
|
|
||||||
je 2f \
|
|
||||||
movb %%al, (%%edx) // copy single byte \
|
|
||||||
2: \
|
|
||||||
"
|
|
||||||
: : "d" (dest), "a" (val), "c" (count)
|
|
||||||
: "%ebx", "%edi", "%esi", "cc", "memory");
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fillval = val;
|
|
||||||
|
|
||||||
fillval = fillval|(fillval<<8);
|
|
||||||
fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern
|
|
||||||
|
|
||||||
_copyDWord ((unsigned int*)(dest),fillval, count/4);
|
|
||||||
|
|
||||||
__asm__ __volatile__ (" // padding of 0-3 bytes \
|
|
||||||
//mov ecx,count \
|
|
||||||
movl %%ecx, %%eax \
|
|
||||||
andl $3, %%ecx \
|
|
||||||
jz 1f \
|
|
||||||
andl $0xffffff00, %%eax \
|
|
||||||
//mov ebx,dest \
|
|
||||||
addl %%eax, %%edx \
|
|
||||||
movl %0, %%eax \
|
|
||||||
cmpl $2, %%ecx \
|
|
||||||
jl 0f \
|
|
||||||
movw %%ax, (%%edx) \
|
|
||||||
cmpl $2, %%ecx \
|
|
||||||
je 1f \
|
|
||||||
movb %%al, 2(%%edx) \
|
|
||||||
jmp 1f \
|
|
||||||
0: \
|
|
||||||
cmpl $0, %%ecx\
|
|
||||||
je 1f\
|
|
||||||
movb %%al, (%%edx)\
|
|
||||||
1: \
|
|
||||||
"
|
|
||||||
: : "m" (fillval), "c" (count), "d" (dest)
|
|
||||||
: "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
|
|
||||||
{
|
|
||||||
// write buffer prefetching is performed only if
|
|
||||||
// the processor benefits from it. Read and read/write
|
|
||||||
// prefetching is always performed.
|
|
||||||
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case PRE_WRITE : break;
|
|
||||||
case PRE_READ:
|
|
||||||
case PRE_READ_WRITE:
|
|
||||||
|
|
||||||
__asm__ __volatile__ ("\
|
|
||||||
//mov ebx,s\
|
|
||||||
//mov ecx,bytes\
|
|
||||||
cmpl $4096, %%ecx // clamp to 4kB\
|
|
||||||
jle 0f\
|
|
||||||
movl $4096, %%ecx\
|
|
||||||
0:\
|
|
||||||
addl $0x1f, %%ecx\
|
|
||||||
shrl $5, %%ecx // number of cache lines\
|
|
||||||
jz 2f\
|
|
||||||
jmp 1f\
|
|
||||||
\
|
|
||||||
.align 16\
|
|
||||||
1: testb %%al, (%%edx)\
|
|
||||||
addl $32, %%edx\
|
|
||||||
decl %%ecx\
|
|
||||||
jnz 1b\
|
|
||||||
2:\
|
|
||||||
"
|
|
||||||
: : "d" (s), "c" (bytes)
|
|
||||||
: "%eax", "%ebx", "%edi", "%esi", "memory", "cc");
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
|
Loading…
Reference in a new issue