* Fix the MinGW and (hopefully) OS X builds

* Remove custom memcpy/memset code
2006-01-04 03:40:49 +00:00 · 2006-01-04 03:40:49 +00:00 · 6e24cfe7d3
parent 2d9d10772f
commit 6e24cfe7d3
5 changed files with 22 additions and 684 deletions
--- a/code/qcommon/common.c
+++ b/code/qcommon/common.c
@ -2857,316 +2857,6 @@ void Com_Shutdown (void) {

 }

-#if I_WANT_A_CUSTOM_MEMCPY && !defined(_WIN32)
-void Com_Memcpy (void* dest, const void* src, const size_t count)
-{
-	memcpy(dest, src, count);
-}
-
-void Com_Memset (void* dest, const int val, const size_t count)
-{
-	memset(dest, val, count);
-}
-
-#elif I_WANT_A_CUSTOM_MEMCPY && defined(_WIN32)
-
-typedef enum
-{
-	PRE_READ,									// prefetch assuming that buffer is used for reading only
-	PRE_WRITE,									// prefetch assuming that buffer is used for writing only
-	PRE_READ_WRITE								// prefetch assuming that buffer is used for both reading and writing
-} e_prefetch;
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
-
-#define EMMS_INSTRUCTION	__asm emms
-
-void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
-	__asm
-	{
-			mov		edx,dest
-			mov		eax,constant
-			mov		ecx,count
-			and		ecx,~7
-			jz		padding
-			sub		ecx,8
-			jmp		loopu
-			align	16
-loopu:		
-			test	[edx+ecx*4 + 28],ebx		// fetch next block destination to L1 cache
-			mov		[edx+ecx*4 + 0],eax
-			mov		[edx+ecx*4 + 4],eax
-			mov		[edx+ecx*4 + 8],eax
-			mov		[edx+ecx*4 + 12],eax
-			mov		[edx+ecx*4 + 16],eax
-			mov		[edx+ecx*4 + 20],eax
-			mov		[edx+ecx*4 + 24],eax
-			mov		[edx+ecx*4 + 28],eax
-			sub		ecx,8
-			jge		loopu
-padding:	mov		ecx,count
-			mov		ebx,ecx
-			and		ecx,7
-			jz		outta
-			and		ebx,~7
-			lea		edx,[edx+ebx*4]				// advance dest pointer
-			test	[edx+0],eax					// fetch destination to L1 cache
-			cmp		ecx,4
-			jl		skip4
-			mov		[edx+0],eax
-			mov		[edx+4],eax
-			mov		[edx+8],eax
-			mov		[edx+12],eax
-			add		edx,16
-			sub		ecx,4
-skip4:		cmp		ecx,2
-			jl		skip2
-			mov		[edx+0],eax
-			mov		[edx+4],eax
-			add		edx,8
-			sub		ecx,2
-skip2:		cmp		ecx,1
-			jl		outta
-			mov		[edx+0],eax
-outta:
-	}
-}
-
-// optimized memory copy routine that handles all alignment
-// cases and block sizes efficiently
-void Com_Memcpy (void* dest, const void* src, const size_t count) {
-	Com_Prefetch (src, count, PRE_READ);
-	__asm
-	{
-		push	edi
-		push	esi
-		mov		ecx,count
-		cmp		ecx,0						// count = 0 check (just to be on the safe side)
-		je		outta
-		mov		edx,dest
-		mov		ebx,src
-		cmp		ecx,32						// padding only?
-		jl		padding
-
-		mov		edi,ecx					
-		and		edi,~31					// edi = count&~31
-		sub		edi,32
-
-		align 16
-loopMisAligned:
-		mov		eax,[ebx + edi + 0 + 0*8]
-		mov		esi,[ebx + edi + 4 + 0*8]
-		mov		[edx+edi+0 + 0*8],eax
-		mov		[edx+edi+4 + 0*8],esi
-		mov		eax,[ebx + edi + 0 + 1*8]
-		mov		esi,[ebx + edi + 4 + 1*8]
-		mov		[edx+edi+0 + 1*8],eax
-		mov		[edx+edi+4 + 1*8],esi
-		mov		eax,[ebx + edi + 0 + 2*8]
-		mov		esi,[ebx + edi + 4 + 2*8]
-		mov		[edx+edi+0 + 2*8],eax
-		mov		[edx+edi+4 + 2*8],esi
-		mov		eax,[ebx + edi + 0 + 3*8]
-		mov		esi,[ebx + edi + 4 + 3*8]
-		mov		[edx+edi+0 + 3*8],eax
-		mov		[edx+edi+4 + 3*8],esi
-		sub		edi,32
-		jge		loopMisAligned
-		
-		mov		edi,ecx
-		and		edi,~31
-		add		ebx,edi					// increase src pointer
-		add		edx,edi					// increase dst pointer
-		and		ecx,31					// new count
-		jz		outta					// if count = 0, get outta here
-
-padding:
-		cmp		ecx,16
-		jl		skip16
-		mov		eax,dword ptr [ebx]
-		mov		dword ptr [edx],eax
-		mov		eax,dword ptr [ebx+4]
-		mov		dword ptr [edx+4],eax
-		mov		eax,dword ptr [ebx+8]
-		mov		dword ptr [edx+8],eax
-		mov		eax,dword ptr [ebx+12]
-		mov		dword ptr [edx+12],eax
-		sub		ecx,16
-		add		ebx,16
-		add		edx,16
-skip16:
-		cmp		ecx,8
-		jl		skip8
-		mov		eax,dword ptr [ebx]
-		mov		dword ptr [edx],eax
-		mov		eax,dword ptr [ebx+4]
-		sub		ecx,8
-		mov		dword ptr [edx+4],eax
-		add		ebx,8
-		add		edx,8
-skip8:
-		cmp		ecx,4
-		jl		skip4
-		mov		eax,dword ptr [ebx]	// here 4-7 bytes
-		add		ebx,4
-		sub		ecx,4
-		mov		dword ptr [edx],eax
-		add		edx,4
-skip4:							// 0-3 remaining bytes
-		cmp		ecx,2
-		jl		skip2
-		mov		ax,word ptr [ebx]	// two bytes
-		cmp		ecx,3				// less than 3?
-		mov		word ptr [edx],ax
-		jl		outta
-		mov		al,byte ptr [ebx+2]	// last byte
-		mov		byte ptr [edx+2],al
-		jmp		outta
-skip2:
-		cmp		ecx,1
-		jl		outta
-		mov		al,byte ptr [ebx]
-		mov		byte ptr [edx],al
-outta:
-		pop		esi
-		pop		edi
-	}
-}
-
-void Com_Memset (void* dest, const int val, const size_t count)
-{
-	unsigned int fillval;
-
-	if (count < 8)
-	{
-		__asm
-		{
-			mov		edx,dest
-			mov		eax, val
-			mov		ah,al
-			mov		ebx,eax
-			and		ebx, 0xffff
-			shl		eax,16
-			add		eax,ebx				// eax now contains pattern
-			mov		ecx,count
-			cmp		ecx,4
-			jl		skip4
-			mov		[edx],eax			// copy first dword
-			add		edx,4
-			sub		ecx,4
-	skip4:	cmp		ecx,2
-			jl		skip2
-			mov		word ptr [edx],ax	// copy 2 bytes
-			add		edx,2
-			sub		ecx,2
-	skip2:	cmp		ecx,0
-			je		skip1
-			mov		byte ptr [edx],al	// copy single byte
-	skip1:
-		}
-		return;
-	}
-
-	fillval = val;
-	
-	fillval = fillval|(fillval<<8);
-	fillval = fillval|(fillval<<16);		// fill dword with 8-bit pattern
-
-	_copyDWord ((unsigned int*)(dest),fillval, count/4);
-	
-	__asm									// padding of 0-3 bytes
-	{
-		mov		ecx,count
-		mov		eax,ecx
-		and		ecx,3
-		jz		skipA
-		and		eax,~3
-		mov		ebx,dest
-		add		ebx,eax
-		mov		eax,fillval
-		cmp		ecx,2
-		jl		skipB
-		mov		word ptr [ebx],ax
-		cmp		ecx,2
-		je		skipA					
-		mov		byte ptr [ebx+2],al		
-		jmp		skipA
-skipB:		
-		cmp		ecx,0
-		je		skipA
-		mov		byte ptr [ebx],al
-skipA:
-	}
-}
-
-qboolean Com_Memcmp (const void *src0, const void *src1, const unsigned int count)
-{
-	unsigned int i;
-	// MMX version anyone?
-
-	if (count >= 16)
-	{
-		unsigned int *dw = (unsigned int*)(src0);
-		unsigned int *sw = (unsigned int*)(src1);
-
-		unsigned int nm2 = count/16;
-		for (i = 0; i < nm2; i+=4)
-		{
-			unsigned int tmp = (dw[i+0]-sw[i+0])|(dw[i+1]-sw[i+1])|
-						  (dw[i+2]-sw[i+2])|(dw[i+3]-sw[i+3]);
-			if (tmp)
-				return qfalse;
-		}
-	}
-	if (count & 15)
-	{
-		byte *d = (byte*)src0;
-		byte *s = (byte*)src1;
-		for (i = count & 0xfffffff0; i < count; i++)
-		if (d[i]!=s[i])
-			return qfalse;
-	}
-
-	return qtrue;
-}
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
-{
-	// write buffer prefetching is performed only if
-	// the processor benefits from it. Read and read/write
-	// prefetching is always performed.
-
-	switch (type)
-	{
-		case PRE_WRITE : break;
-		case PRE_READ:
-		case PRE_READ_WRITE:
-
-		__asm
-		{
-			mov		ebx,s
-			mov		ecx,bytes
-			cmp		ecx,4096				// clamp to 4kB
-			jle		skipClamp
-			mov		ecx,4096
-skipClamp:
-			add		ecx,0x1f
-			shr		ecx,5					// number of cache lines
-			jz		skip
-			jmp		loopie
-
-			align 16
-	loopie:	test	byte ptr [ebx],al
-			add		ebx,32
-			dec		ecx
-			jnz		loopie
-	skip:
-		}
-
-		break;
-	}
-}
-#endif
 //------------------------------------------------------------------------


--- a/code/qcommon/md4.c
+++ b/code/qcommon/md4.c
@ -38,13 +38,8 @@ void MD4Init (MD4_CTX *);
 void MD4Update (MD4_CTX *, const unsigned char *, unsigned int);
 void MD4Final (unsigned char [16], MD4_CTX *);

-#if I_WANT_A_CUSTOM_MEMCPY
-void Com_Memset (void* dest, const int val, const size_t count);
-void Com_Memcpy (void* dest, const void* src, const size_t count);
-#else
 #define Com_Memset memset
 #define Com_Memcpy memcpy
-#endif

 /* MD4C.C - RSA Data Security, Inc., MD4 message-digest algorithm */
 /* Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved.
--- a/code/qcommon/q_shared.h
+++ b/code/qcommon/q_shared.h
@ -243,13 +243,8 @@ void Snd_Memset (void* dest, const int val, const size_t count);
 #define Snd_Memset Com_Memset
 #endif

-#if I_WANT_A_CUSTOM_MEMCPY
-void Com_Memset (void* dest, const int val, const size_t count);
-void Com_Memcpy (void* dest, const void* src, const size_t count);
-#else
 #define Com_Memset memset
 #define Com_Memcpy memcpy
-#endif

 #define CIN_system	1
 #define CIN_loop	2
--- a/code/unix/Makefile
+++ b/code/unix/Makefile
@ -350,6 +350,9 @@ ifeq ($(PLATFORM),mingw32)
    LDFLAGS+=-m32
  endif

+  BUILD_SERVER = 0
+  BUILD_CLIENT_SMP = 0
+
 else # ifeq mingw32

 #############################################################################
@ -823,9 +826,19 @@ Q3OBJ = \

 ifeq ($(ARCH),i386)
  Q3OBJ += $(B)/client/vm_x86.o
+  Q3OBJ += \
+    $(B)/client/snd_mixa.o \
+    $(B)/client/matha.o \
+    $(B)/client/ftola.o \
+    $(B)/client/snapvectora.o
 endif
 ifeq ($(ARCH),x86)
  Q3OBJ += $(B)/client/vm_x86.o
+  Q3OBJ += \
+    $(B)/client/snd_mixa.o \
+    $(B)/client/matha.o \
+    $(B)/client/ftola.o \
+    $(B)/client/snapvectora.o
 endif
 ifeq ($(ARCH),x86_64)
  Q3OBJ += $(B)/client/vm_x86_64.o
@ -837,21 +850,6 @@ ifeq ($(ARCH),ppc)
  endif
 endif

-Q3OBJ += \
-  $(B)/client/linux_common.o \
-  \
-  $(B)/client/snd_mixa.o \
-  $(B)/client/matha.o \
-  $(B)/client/ftola.o \
-  $(B)/client/snapvectora.o \
-  \
-  $(B)/client/unix_main.o \
-  $(B)/client/unix_net.o \
-  $(B)/client/unix_shared.o \
-  $(B)/client/linux_signals.o \
-  $(B)/client/linux_qgl.o \
-  $(B)/client/linux_snd.o \
-  $(B)/client/sdl_snd.o

 ifeq ($(PLATFORM),mingw32)
  Q3OBJ += \
@ -867,6 +865,15 @@ ifeq ($(PLATFORM),mingw32)
    $(B)/client/win_wndproc.o \
    $(B)/client/win_resource.o
 else
+  Q3OBJ += \
+    $(B)/client/unix_main.o \
+    $(B)/client/unix_net.o \
+    $(B)/client/unix_shared.o \
+    $(B)/client/linux_signals.o \
+    $(B)/client/linux_qgl.o \
+    $(B)/client/linux_snd.o \
+    $(B)/client/sdl_snd.o
+
  ifeq ($(PLATFORM),linux)
    Q3OBJ += $(B)/client/linux_joystick.o
  endif
@ -1051,7 +1058,6 @@ $(B)/client/irix_glimp_smp.o : $(UDIR)/irix_glimp.c; $(DO_SMP_CC)
 $(B)/client/irix_snd.o : $(UDIR)/irix_snd.c; $(DO_CC)
 $(B)/client/irix_input.o : $(UDIR)/irix_input.c; $(DO_CC)
 $(B)/client/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_CC) $(GL_CFLAGS)
-$(B)/client/linux_common.o : $(UDIR)/linux_common.c; $(DO_CC)
 $(B)/client/linux_glimp.o : $(UDIR)/linux_glimp.c; $(DO_CC)  $(GL_CFLAGS)
 $(B)/client/sdl_glimp.o : $(UDIR)/sdl_glimp.c; $(DO_CC)  $(GL_CFLAGS)
 $(B)/client/linux_glimp_smp.o : $(UDIR)/linux_glimp.c; $(DO_SMP_CC)  $(GL_CFLAGS)
@ -1154,7 +1160,6 @@ Q3DOBJ = \
  $(B)/ded/l_struct.o \
  \
  $(B)/ded/linux_signals.o \
-  $(B)/ded/linux_common.o \
  $(B)/ded/unix_main.o \
  $(B)/ded/unix_net.o \
  $(B)/ded/unix_shared.o \
@ -1236,7 +1241,6 @@ $(B)/ded/l_script.o : $(BLIBDIR)/l_script.c; $(DO_BOT_CC)
 $(B)/ded/l_struct.o : $(BLIBDIR)/l_struct.c; $(DO_BOT_CC)

 $(B)/ded/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_DED_CC)
-$(B)/ded/linux_common.o : $(UDIR)/linux_common.c; $(DO_DED_CC)
 $(B)/ded/unix_main.o : $(UDIR)/unix_main.c; $(DO_DED_CC)
 $(B)/ded/unix_net.o : $(UDIR)/unix_net.c; $(DO_DED_CC)
 $(B)/ded/unix_shared.o : $(UDIR)/unix_shared.c; $(DO_DED_CC)
--- a/code/unix/linux_common.c
+++ b/code/unix/linux_common.c
@ -1,346 +0,0 @@
-#if 0 // not used anymore
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-===========================================================================
-*/
-/** 
- * GAS syntax equivalents of the MSVC asm memory calls in common.c
- *
- * The following changes have been made to the asm:
- * 1. Registers are loaded by the inline asm arguments when possible
- * 2. Labels have been changed to local label format (0,1,etc.) to allow inlining
- *
- * HISTORY:
- *	AH - Created on 08 Dec 2000
- */
-
-#include <unistd.h>   // AH - for size_t
-#include <string.h>
-
-// bk001207 - we need something under Linux, too. Mac?
-#if 1 // defined(C_ONLY) // bk010102 - dedicated?
-void Com_Memcpy (void* dest, const void* src, const size_t count) {
-  memcpy(dest, src, count);
-}
-
-void Com_Memset (void* dest, const int val, const size_t count) {
-  memset(dest, val, count);
-}
-
-#else
-
-typedef enum {
-  PRE_READ,         // prefetch assuming that buffer is used for reading only
-  PRE_WRITE,        // prefetch assuming that buffer is used for writing only
-  PRE_READ_WRITE    // prefetch assuming that buffer is used for both reading and writing
-} e_prefetch;
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type);
-
-void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) {
-	// MMX version not used on standard Pentium MMX
-	// because the dword version is faster (with
-	// proper destination prefetching)
-		__asm__ __volatile__ (" \
-			//mov			eax,constant		// eax = val \
-			//mov			edx,dest			// dest \
-			//mov			ecx,count \
-			movd		%%eax, %%mm0 \
-			punpckldq	%%mm0, %%mm0 \
-\
-			// ensure that destination is qword aligned \
-\
-			testl		$7, %%edx				// qword padding?\
-			jz		0f	\
-			movl		%%eax, (%%edx) \
-			decl		%%ecx \
-			addl		$4, %%edx \
-\
-0:			movl		%%ecx, %%ebx				\
-			andl		$0xfffffff0, %%ecx	\
-			jz		2f \
-			jmp		1f \
-			.align 		16 \
-\
-			// funny ordering here to avoid commands \
-			// that cross 32-byte boundaries (the \
-			// [edx+0] version has a special 3-byte opcode... \
-1:			movq		%%mm0, 8(%%edx) \
-			movq		%%mm0, 16(%%edx) \
-			movq		%%mm0, 24(%%edx) \
-			movq		%%mm0, 32(%%edx) \
-			movq		%%mm0, 40(%%edx) \
-			movq		%%mm0, 48(%%edx) \
-			movq		%%mm0, 56(%%edx) \
-			movq		%%mm0, (%%edx)\
-			addl		$64, %%edx \
-			subl		$16, %%ecx \
-			jnz		1b \
-2:	\
-			movl		%%ebx, %%ecx				// ebx = cnt \
-			andl		$0xfffffff0, %%ecx				// ecx = cnt&~15 \
-			subl		%%ecx, %%ebx \
-			jz		6f \
-			cmpl		$8, %%ebx \
-			jl		3f \
-\
-			movq		%%mm0, (%%edx) \
-			movq		%%mm0, 8(%%edx) \
-			movq		%%mm0, 16(%%edx) \
-			movq		%%mm0, 24(%%edx) \
-			addl		$32, %%edx \
-			subl		$8, %%ebx \
-			jz		6f \
-\
-3:			cmpl		$4, %%ebx \
-			jl		4f \
-			\
-			movq		%%mm0, (%%edx) \
-			movq		%%mm0, 8(%%edx) \
-			addl		$16, %%edx \
-			subl		$4, %%ebx \
-\
-4:			cmpl		$2, %%ebx \
-			jl		5f \
-			movq		%%mm0, (%%edx) \
-			addl		$8, %%edx \
-			subl		$2, %%ebx \
-\
-5:			cmpl		$1, %%ebx \
-			jl		6f \
-			movl		%%eax, (%%edx) \
-6: \
-			emms \
-	"
-	: : "a" (constant), "c" (count), "d" (dest)
-	: "%ebx", "%edi", "%esi", "cc", "memory");
-}
-
-// optimized memory copy routine that handles all alignment
-// cases and block sizes efficiently
-void Com_Memcpy (void* dest, const void* src, const size_t count) {
-	Com_Prefetch (src, count, PRE_READ);
-	__asm__ __volatile__ (" \
-		pushl		%%edi \
-		pushl		%%esi \
-		//mov		ecx,count \
-		cmpl		$0, %%ecx						// count = 0 check (just to be on the safe side) \
-		je		6f \
-		//mov		edx,dest \
-		movl		%0, %%ebx \
-		cmpl		$32, %%ecx						// padding only? \
-		jl		1f \
-\
-		movl		%%ecx, %%edi					\
-		andl		$0xfffffe00, %%edi					// edi = count&~31 \
-		subl		$32, %%edi \
-\
-		.align 16 \
-0: \
-		movl		(%%ebx, %%edi, 1), %%eax \
-		movl		4(%%ebx, %%edi, 1), %%esi \
-		movl		%%eax, (%%edx, %%edi, 1) \
-		movl		%%esi, 4(%%edx, %%edi, 1) \
-		movl		8(%%ebx, %%edi, 1), %%eax \
-		movl		12(%%ebx, %%edi, 1), %%esi \
-		movl		%%eax, 8(%%edx, %%edi, 1) \
-		movl		%%esi, 12(%%edx, %%edi, 1) \
-		movl		16(%%ebx, %%edi, 1), %%eax \
-		movl		20(%%ebx, %%edi, 1), %%esi \
-		movl		%%eax, 16(%%edx, %%edi, 1) \
-		movl		%%esi, 20(%%edx, %%edi, 1) \
-		movl		24(%%ebx, %%edi, 1), %%eax \
-		movl		28(%%ebx, %%edi, 1), %%esi \
-		movl		%%eax, 24(%%edx, %%edi, 1) \
-		movl		%%esi, 28(%%edx, %%edi, 1) \
-		subl		$32, %%edi \
-		jge		0b \
-		\
-		movl		%%ecx, %%edi \
-		andl		$0xfffffe00, %%edi \
-		addl		%%edi, %%ebx					// increase src pointer \
-		addl		%%edi, %%edx					// increase dst pointer \
-		andl		$31, %%ecx					// new count \
-		jz		6f					// if count = 0, get outta here \
-\
-1: \
-		cmpl		$16, %%ecx \
-		jl		2f \
-		movl		(%%ebx), %%eax \
-		movl		%%eax, (%%edx) \
-		movl		4(%%ebx), %%eax \
-		movl		%%eax, 4(%%edx) \
-		movl		8(%%ebx), %%eax \
-		movl		%%eax, 8(%%edx) \
-		movl		12(%%ebx), %%eax \
-		movl		%%eax, 12(%%edx) \
-		subl		$16, %%ecx \
-		addl		$16, %%ebx \
-		addl		$16, %%edx \
-2: \
-		cmpl		$8, %%ecx \
-		jl		3f \
-		movl		(%%ebx), %%eax \
-		movl		%%eax, (%%edx) \
-		movl		4(%%ebx), %%eax \
-		subl		$8, %%ecx \
-		movl		%%eax, 4(%%edx) \
-		addl		$8, %%ebx \
-		addl		$8, %%edx \
-3: \
-		cmpl		$4, %%ecx \
-		jl		4f \
-		movl		(%%ebx), %%eax	// here 4-7 bytes \
-		addl		$4, %%ebx \
-		subl		$4, %%ecx \
-		movl		%%eax, (%%edx) \
-		addl		$4, %%edx \
-4:							// 0-3 remaining bytes \
-		cmpl		$2, %%ecx \
-		jl		5f \
-		movw		(%%ebx), %%ax	// two bytes \
-		cmpl		$3, %%ecx				// less than 3? \
-		movw		%%ax, (%%edx) \
-		jl		6f \
-		movb		2(%%ebx), %%al	// last byte \
-		movb		%%al, 2(%%edx) \
-		jmp		6f \
-5: \
-		cmpl		$1, %%ecx \
-		jl		6f \
-		movb		(%%ebx), %%al \
-		movb		%%al, (%%edx) \
-6: \
-		popl		%%esi \
-		popl		%%edi \
-	"
-	: : "m" (src), "d" (dest), "c" (count)
-	: "%eax", "%ebx", "%edi", "%esi", "cc", "memory");
-}
-
-void Com_Memset (void* dest, const int val, const size_t count)
-{
-	unsigned int fillval;
-
-	if (count < 8)
-	{
-		__asm__ __volatile__ (" \
-			//mov		edx,dest \
-			//mov		eax, val \
-			movb		%%al, %%ah \
-			movl		%%eax, %%ebx \
-			andl		$0xffff, %%ebx \
-			shll		$16, %%eax \
-			addl		%%ebx, %%eax	// eax now contains pattern \
-			//mov		ecx,count \
-			cmpl		$4, %%ecx \
-			jl		0f \
-			movl		%%eax, (%%edx)	// copy first dword \
-			addl		$4, %%edx \
-			subl		$4, %%ecx \
-	0:		cmpl		$2, %%ecx \
-			jl		1f \
-			movw		%%ax, (%%edx)	// copy 2 bytes \
-			addl		$2, %%edx \
-			subl		$2, %%ecx \
-	1:		cmpl		$0, %%ecx \
-			je		2f \
-			movb		%%al, (%%edx)	// copy single byte \
-	2:		 \
-		"
-		: : "d" (dest), "a" (val), "c" (count)
-		: "%ebx", "%edi", "%esi", "cc", "memory");
-		
-		return;
-	}
-
-	fillval = val;
-	
-	fillval = fillval|(fillval<<8);
-	fillval = fillval|(fillval<<16);		// fill dword with 8-bit pattern
-
-	_copyDWord ((unsigned int*)(dest),fillval, count/4);
-	
-	__asm__ __volatile__ ("     		// padding of 0-3 bytes \
-		//mov		ecx,count \
-		movl		%%ecx, %%eax \
-		andl		$3, %%ecx \
-		jz		1f \
-		andl		$0xffffff00, %%eax \
-		//mov		ebx,dest \
-		addl		%%eax, %%edx \
-		movl		%0, %%eax \
-		cmpl		$2, %%ecx \
-		jl		0f \
-		movw		%%ax, (%%edx) \
-		cmpl		$2, %%ecx \
-		je		1f					\
-		movb		%%al, 2(%%edx)		\
-		jmp		1f \
-0:		\
-		cmpl		$0, %%ecx\
-		je		1f\
-		movb		%%al, (%%edx)\
-1:	\
-	"
-	: : "m" (fillval), "c" (count), "d" (dest)
-	: "%eax", "%ebx", "%edi", "%esi", "cc", "memory");	
-}
-
-void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type)
-{
-	// write buffer prefetching is performed only if
-	// the processor benefits from it. Read and read/write
-	// prefetching is always performed.
-
-	switch (type)
-	{
-		case PRE_WRITE : break;
-		case PRE_READ:
-		case PRE_READ_WRITE:
-
-		__asm__ __volatile__ ("\
-			//mov		ebx,s\
-			//mov		ecx,bytes\
-			cmpl		$4096, %%ecx				// clamp to 4kB\
-			jle		0f\
-			movl		$4096, %%ecx\
-	0:\
-			addl		$0x1f, %%ecx\
-			shrl		$5, %%ecx					// number of cache lines\
-			jz		2f\
-			jmp		1f\
-\
-			.align 16\
-	1:		testb		%%al, (%%edx)\
-			addl		$32, %%edx\
-			decl		%%ecx\
-			jnz		1b\
-	2:\
-		"
-		: : "d" (s), "c" (bytes)
-		: "%eax", "%ebx", "%edi", "%esi", "memory", "cc");
-		
-		break;
-	}
-}
-
-#endif
-#endif