diff --git a/code/qcommon/common.c b/code/qcommon/common.c index b9de0679..74f75ea0 100644 --- a/code/qcommon/common.c +++ b/code/qcommon/common.c @@ -2857,316 +2857,6 @@ void Com_Shutdown (void) { } -#if I_WANT_A_CUSTOM_MEMCPY && !defined(_WIN32) -void Com_Memcpy (void* dest, const void* src, const size_t count) -{ - memcpy(dest, src, count); -} - -void Com_Memset (void* dest, const int val, const size_t count) -{ - memset(dest, val, count); -} - -#elif I_WANT_A_CUSTOM_MEMCPY && defined(_WIN32) - -typedef enum -{ - PRE_READ, // prefetch assuming that buffer is used for reading only - PRE_WRITE, // prefetch assuming that buffer is used for writing only - PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing -} e_prefetch; - -void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type); - -#define EMMS_INSTRUCTION __asm emms - -void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) { - __asm - { - mov edx,dest - mov eax,constant - mov ecx,count - and ecx,~7 - jz padding - sub ecx,8 - jmp loopu - align 16 -loopu: - test [edx+ecx*4 + 28],ebx // fetch next block destination to L1 cache - mov [edx+ecx*4 + 0],eax - mov [edx+ecx*4 + 4],eax - mov [edx+ecx*4 + 8],eax - mov [edx+ecx*4 + 12],eax - mov [edx+ecx*4 + 16],eax - mov [edx+ecx*4 + 20],eax - mov [edx+ecx*4 + 24],eax - mov [edx+ecx*4 + 28],eax - sub ecx,8 - jge loopu -padding: mov ecx,count - mov ebx,ecx - and ecx,7 - jz outta - and ebx,~7 - lea edx,[edx+ebx*4] // advance dest pointer - test [edx+0],eax // fetch destination to L1 cache - cmp ecx,4 - jl skip4 - mov [edx+0],eax - mov [edx+4],eax - mov [edx+8],eax - mov [edx+12],eax - add edx,16 - sub ecx,4 -skip4: cmp ecx,2 - jl skip2 - mov [edx+0],eax - mov [edx+4],eax - add edx,8 - sub ecx,2 -skip2: cmp ecx,1 - jl outta - mov [edx+0],eax -outta: - } -} - -// optimized memory copy routine that handles all alignment -// cases and block sizes efficiently -void Com_Memcpy (void* dest, const void* src, const size_t count) { - Com_Prefetch (src, count, PRE_READ); - __asm - { - push edi - push esi - mov ecx,count - cmp ecx,0 // count = 0 check (just to be on the safe side) - je outta - mov edx,dest - mov ebx,src - cmp ecx,32 // padding only? - jl padding - - mov edi,ecx - and edi,~31 // edi = count&~31 - sub edi,32 - - align 16 -loopMisAligned: - mov eax,[ebx + edi + 0 + 0*8] - mov esi,[ebx + edi + 4 + 0*8] - mov [edx+edi+0 + 0*8],eax - mov [edx+edi+4 + 0*8],esi - mov eax,[ebx + edi + 0 + 1*8] - mov esi,[ebx + edi + 4 + 1*8] - mov [edx+edi+0 + 1*8],eax - mov [edx+edi+4 + 1*8],esi - mov eax,[ebx + edi + 0 + 2*8] - mov esi,[ebx + edi + 4 + 2*8] - mov [edx+edi+0 + 2*8],eax - mov [edx+edi+4 + 2*8],esi - mov eax,[ebx + edi + 0 + 3*8] - mov esi,[ebx + edi + 4 + 3*8] - mov [edx+edi+0 + 3*8],eax - mov [edx+edi+4 + 3*8],esi - sub edi,32 - jge loopMisAligned - - mov edi,ecx - and edi,~31 - add ebx,edi // increase src pointer - add edx,edi // increase dst pointer - and ecx,31 // new count - jz outta // if count = 0, get outta here - -padding: - cmp ecx,16 - jl skip16 - mov eax,dword ptr [ebx] - mov dword ptr [edx],eax - mov eax,dword ptr [ebx+4] - mov dword ptr [edx+4],eax - mov eax,dword ptr [ebx+8] - mov dword ptr [edx+8],eax - mov eax,dword ptr [ebx+12] - mov dword ptr [edx+12],eax - sub ecx,16 - add ebx,16 - add edx,16 -skip16: - cmp ecx,8 - jl skip8 - mov eax,dword ptr [ebx] - mov dword ptr [edx],eax - mov eax,dword ptr [ebx+4] - sub ecx,8 - mov dword ptr [edx+4],eax - add ebx,8 - add edx,8 -skip8: - cmp ecx,4 - jl skip4 - mov eax,dword ptr [ebx] // here 4-7 bytes - add ebx,4 - sub ecx,4 - mov dword ptr [edx],eax - add edx,4 -skip4: // 0-3 remaining bytes - cmp ecx,2 - jl skip2 - mov ax,word ptr [ebx] // two bytes - cmp ecx,3 // less than 3? - mov word ptr [edx],ax - jl outta - mov al,byte ptr [ebx+2] // last byte - mov byte ptr [edx+2],al - jmp outta -skip2: - cmp ecx,1 - jl outta - mov al,byte ptr [ebx] - mov byte ptr [edx],al -outta: - pop esi - pop edi - } -} - -void Com_Memset (void* dest, const int val, const size_t count) -{ - unsigned int fillval; - - if (count < 8) - { - __asm - { - mov edx,dest - mov eax, val - mov ah,al - mov ebx,eax - and ebx, 0xffff - shl eax,16 - add eax,ebx // eax now contains pattern - mov ecx,count - cmp ecx,4 - jl skip4 - mov [edx],eax // copy first dword - add edx,4 - sub ecx,4 - skip4: cmp ecx,2 - jl skip2 - mov word ptr [edx],ax // copy 2 bytes - add edx,2 - sub ecx,2 - skip2: cmp ecx,0 - je skip1 - mov byte ptr [edx],al // copy single byte - skip1: - } - return; - } - - fillval = val; - - fillval = fillval|(fillval<<8); - fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern - - _copyDWord ((unsigned int*)(dest),fillval, count/4); - - __asm // padding of 0-3 bytes - { - mov ecx,count - mov eax,ecx - and ecx,3 - jz skipA - and eax,~3 - mov ebx,dest - add ebx,eax - mov eax,fillval - cmp ecx,2 - jl skipB - mov word ptr [ebx],ax - cmp ecx,2 - je skipA - mov byte ptr [ebx+2],al - jmp skipA -skipB: - cmp ecx,0 - je skipA - mov byte ptr [ebx],al -skipA: - } -} - -qboolean Com_Memcmp (const void *src0, const void *src1, const unsigned int count) -{ - unsigned int i; - // MMX version anyone? - - if (count >= 16) - { - unsigned int *dw = (unsigned int*)(src0); - unsigned int *sw = (unsigned int*)(src1); - - unsigned int nm2 = count/16; - for (i = 0; i < nm2; i+=4) - { - unsigned int tmp = (dw[i+0]-sw[i+0])|(dw[i+1]-sw[i+1])| - (dw[i+2]-sw[i+2])|(dw[i+3]-sw[i+3]); - if (tmp) - return qfalse; - } - } - if (count & 15) - { - byte *d = (byte*)src0; - byte *s = (byte*)src1; - for (i = count & 0xfffffff0; i < count; i++) - if (d[i]!=s[i]) - return qfalse; - } - - return qtrue; -} - -void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type) -{ - // write buffer prefetching is performed only if - // the processor benefits from it. Read and read/write - // prefetching is always performed. - - switch (type) - { - case PRE_WRITE : break; - case PRE_READ: - case PRE_READ_WRITE: - - __asm - { - mov ebx,s - mov ecx,bytes - cmp ecx,4096 // clamp to 4kB - jle skipClamp - mov ecx,4096 -skipClamp: - add ecx,0x1f - shr ecx,5 // number of cache lines - jz skip - jmp loopie - - align 16 - loopie: test byte ptr [ebx],al - add ebx,32 - dec ecx - jnz loopie - skip: - } - - break; - } -} -#endif //------------------------------------------------------------------------ diff --git a/code/qcommon/md4.c b/code/qcommon/md4.c index 24b79610..82c4b0d8 100644 --- a/code/qcommon/md4.c +++ b/code/qcommon/md4.c @@ -38,13 +38,8 @@ void MD4Init (MD4_CTX *); void MD4Update (MD4_CTX *, const unsigned char *, unsigned int); void MD4Final (unsigned char [16], MD4_CTX *); -#if I_WANT_A_CUSTOM_MEMCPY -void Com_Memset (void* dest, const int val, const size_t count); -void Com_Memcpy (void* dest, const void* src, const size_t count); -#else #define Com_Memset memset #define Com_Memcpy memcpy -#endif /* MD4C.C - RSA Data Security, Inc., MD4 message-digest algorithm */ /* Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved. diff --git a/code/qcommon/q_shared.h b/code/qcommon/q_shared.h index d6c69dc8..a9388e7f 100644 --- a/code/qcommon/q_shared.h +++ b/code/qcommon/q_shared.h @@ -243,13 +243,8 @@ void Snd_Memset (void* dest, const int val, const size_t count); #define Snd_Memset Com_Memset #endif -#if I_WANT_A_CUSTOM_MEMCPY -void Com_Memset (void* dest, const int val, const size_t count); -void Com_Memcpy (void* dest, const void* src, const size_t count); -#else #define Com_Memset memset #define Com_Memcpy memcpy -#endif #define CIN_system 1 #define CIN_loop 2 diff --git a/code/unix/Makefile b/code/unix/Makefile index 3aaff03b..82bbe0a2 100644 --- a/code/unix/Makefile +++ b/code/unix/Makefile @@ -350,6 +350,9 @@ ifeq ($(PLATFORM),mingw32) LDFLAGS+=-m32 endif + BUILD_SERVER = 0 + BUILD_CLIENT_SMP = 0 + else # ifeq mingw32 ############################################################################# @@ -823,9 +826,19 @@ Q3OBJ = \ ifeq ($(ARCH),i386) Q3OBJ += $(B)/client/vm_x86.o + Q3OBJ += \ + $(B)/client/snd_mixa.o \ + $(B)/client/matha.o \ + $(B)/client/ftola.o \ + $(B)/client/snapvectora.o endif ifeq ($(ARCH),x86) Q3OBJ += $(B)/client/vm_x86.o + Q3OBJ += \ + $(B)/client/snd_mixa.o \ + $(B)/client/matha.o \ + $(B)/client/ftola.o \ + $(B)/client/snapvectora.o endif ifeq ($(ARCH),x86_64) Q3OBJ += $(B)/client/vm_x86_64.o @@ -837,21 +850,6 @@ ifeq ($(ARCH),ppc) endif endif -Q3OBJ += \ - $(B)/client/linux_common.o \ - \ - $(B)/client/snd_mixa.o \ - $(B)/client/matha.o \ - $(B)/client/ftola.o \ - $(B)/client/snapvectora.o \ - \ - $(B)/client/unix_main.o \ - $(B)/client/unix_net.o \ - $(B)/client/unix_shared.o \ - $(B)/client/linux_signals.o \ - $(B)/client/linux_qgl.o \ - $(B)/client/linux_snd.o \ - $(B)/client/sdl_snd.o ifeq ($(PLATFORM),mingw32) Q3OBJ += \ @@ -867,6 +865,15 @@ ifeq ($(PLATFORM),mingw32) $(B)/client/win_wndproc.o \ $(B)/client/win_resource.o else + Q3OBJ += \ + $(B)/client/unix_main.o \ + $(B)/client/unix_net.o \ + $(B)/client/unix_shared.o \ + $(B)/client/linux_signals.o \ + $(B)/client/linux_qgl.o \ + $(B)/client/linux_snd.o \ + $(B)/client/sdl_snd.o + ifeq ($(PLATFORM),linux) Q3OBJ += $(B)/client/linux_joystick.o endif @@ -1051,7 +1058,6 @@ $(B)/client/irix_glimp_smp.o : $(UDIR)/irix_glimp.c; $(DO_SMP_CC) $(B)/client/irix_snd.o : $(UDIR)/irix_snd.c; $(DO_CC) $(B)/client/irix_input.o : $(UDIR)/irix_input.c; $(DO_CC) $(B)/client/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_CC) $(GL_CFLAGS) -$(B)/client/linux_common.o : $(UDIR)/linux_common.c; $(DO_CC) $(B)/client/linux_glimp.o : $(UDIR)/linux_glimp.c; $(DO_CC) $(GL_CFLAGS) $(B)/client/sdl_glimp.o : $(UDIR)/sdl_glimp.c; $(DO_CC) $(GL_CFLAGS) $(B)/client/linux_glimp_smp.o : $(UDIR)/linux_glimp.c; $(DO_SMP_CC) $(GL_CFLAGS) @@ -1154,7 +1160,6 @@ Q3DOBJ = \ $(B)/ded/l_struct.o \ \ $(B)/ded/linux_signals.o \ - $(B)/ded/linux_common.o \ $(B)/ded/unix_main.o \ $(B)/ded/unix_net.o \ $(B)/ded/unix_shared.o \ @@ -1236,7 +1241,6 @@ $(B)/ded/l_script.o : $(BLIBDIR)/l_script.c; $(DO_BOT_CC) $(B)/ded/l_struct.o : $(BLIBDIR)/l_struct.c; $(DO_BOT_CC) $(B)/ded/linux_signals.o : $(UDIR)/linux_signals.c; $(DO_DED_CC) -$(B)/ded/linux_common.o : $(UDIR)/linux_common.c; $(DO_DED_CC) $(B)/ded/unix_main.o : $(UDIR)/unix_main.c; $(DO_DED_CC) $(B)/ded/unix_net.o : $(UDIR)/unix_net.c; $(DO_DED_CC) $(B)/ded/unix_shared.o : $(UDIR)/unix_shared.c; $(DO_DED_CC) diff --git a/code/unix/linux_common.c b/code/unix/linux_common.c deleted file mode 100644 index ab1eafa6..00000000 --- a/code/unix/linux_common.c +++ /dev/null @@ -1,346 +0,0 @@ -#if 0 // not used anymore -/* -=========================================================================== -Copyright (C) 1999-2005 Id Software, Inc. - -This file is part of Quake III Arena source code. - -Quake III Arena source code is free software; you can redistribute it -and/or modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the License, -or (at your option) any later version. - -Quake III Arena source code is distributed in the hope that it will be -useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with Quake III Arena source code; if not, write to the Free Software -Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -=========================================================================== -*/ -/** - * GAS syntax equivalents of the MSVC asm memory calls in common.c - * - * The following changes have been made to the asm: - * 1. Registers are loaded by the inline asm arguments when possible - * 2. Labels have been changed to local label format (0,1,etc.) to allow inlining - * - * HISTORY: - * AH - Created on 08 Dec 2000 - */ - -#include // AH - for size_t -#include - -// bk001207 - we need something under Linux, too. Mac? -#if 1 // defined(C_ONLY) // bk010102 - dedicated? -void Com_Memcpy (void* dest, const void* src, const size_t count) { - memcpy(dest, src, count); -} - -void Com_Memset (void* dest, const int val, const size_t count) { - memset(dest, val, count); -} - -#else - -typedef enum { - PRE_READ, // prefetch assuming that buffer is used for reading only - PRE_WRITE, // prefetch assuming that buffer is used for writing only - PRE_READ_WRITE // prefetch assuming that buffer is used for both reading and writing -} e_prefetch; - -void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type); - -void _copyDWord (unsigned int* dest, const unsigned int constant, const unsigned int count) { - // MMX version not used on standard Pentium MMX - // because the dword version is faster (with - // proper destination prefetching) - __asm__ __volatile__ (" \ - //mov eax,constant // eax = val \ - //mov edx,dest // dest \ - //mov ecx,count \ - movd %%eax, %%mm0 \ - punpckldq %%mm0, %%mm0 \ -\ - // ensure that destination is qword aligned \ -\ - testl $7, %%edx // qword padding?\ - jz 0f \ - movl %%eax, (%%edx) \ - decl %%ecx \ - addl $4, %%edx \ -\ -0: movl %%ecx, %%ebx \ - andl $0xfffffff0, %%ecx \ - jz 2f \ - jmp 1f \ - .align 16 \ -\ - // funny ordering here to avoid commands \ - // that cross 32-byte boundaries (the \ - // [edx+0] version has a special 3-byte opcode... \ -1: movq %%mm0, 8(%%edx) \ - movq %%mm0, 16(%%edx) \ - movq %%mm0, 24(%%edx) \ - movq %%mm0, 32(%%edx) \ - movq %%mm0, 40(%%edx) \ - movq %%mm0, 48(%%edx) \ - movq %%mm0, 56(%%edx) \ - movq %%mm0, (%%edx)\ - addl $64, %%edx \ - subl $16, %%ecx \ - jnz 1b \ -2: \ - movl %%ebx, %%ecx // ebx = cnt \ - andl $0xfffffff0, %%ecx // ecx = cnt&~15 \ - subl %%ecx, %%ebx \ - jz 6f \ - cmpl $8, %%ebx \ - jl 3f \ -\ - movq %%mm0, (%%edx) \ - movq %%mm0, 8(%%edx) \ - movq %%mm0, 16(%%edx) \ - movq %%mm0, 24(%%edx) \ - addl $32, %%edx \ - subl $8, %%ebx \ - jz 6f \ -\ -3: cmpl $4, %%ebx \ - jl 4f \ - \ - movq %%mm0, (%%edx) \ - movq %%mm0, 8(%%edx) \ - addl $16, %%edx \ - subl $4, %%ebx \ -\ -4: cmpl $2, %%ebx \ - jl 5f \ - movq %%mm0, (%%edx) \ - addl $8, %%edx \ - subl $2, %%ebx \ -\ -5: cmpl $1, %%ebx \ - jl 6f \ - movl %%eax, (%%edx) \ -6: \ - emms \ - " - : : "a" (constant), "c" (count), "d" (dest) - : "%ebx", "%edi", "%esi", "cc", "memory"); -} - -// optimized memory copy routine that handles all alignment -// cases and block sizes efficiently -void Com_Memcpy (void* dest, const void* src, const size_t count) { - Com_Prefetch (src, count, PRE_READ); - __asm__ __volatile__ (" \ - pushl %%edi \ - pushl %%esi \ - //mov ecx,count \ - cmpl $0, %%ecx // count = 0 check (just to be on the safe side) \ - je 6f \ - //mov edx,dest \ - movl %0, %%ebx \ - cmpl $32, %%ecx // padding only? \ - jl 1f \ -\ - movl %%ecx, %%edi \ - andl $0xfffffe00, %%edi // edi = count&~31 \ - subl $32, %%edi \ -\ - .align 16 \ -0: \ - movl (%%ebx, %%edi, 1), %%eax \ - movl 4(%%ebx, %%edi, 1), %%esi \ - movl %%eax, (%%edx, %%edi, 1) \ - movl %%esi, 4(%%edx, %%edi, 1) \ - movl 8(%%ebx, %%edi, 1), %%eax \ - movl 12(%%ebx, %%edi, 1), %%esi \ - movl %%eax, 8(%%edx, %%edi, 1) \ - movl %%esi, 12(%%edx, %%edi, 1) \ - movl 16(%%ebx, %%edi, 1), %%eax \ - movl 20(%%ebx, %%edi, 1), %%esi \ - movl %%eax, 16(%%edx, %%edi, 1) \ - movl %%esi, 20(%%edx, %%edi, 1) \ - movl 24(%%ebx, %%edi, 1), %%eax \ - movl 28(%%ebx, %%edi, 1), %%esi \ - movl %%eax, 24(%%edx, %%edi, 1) \ - movl %%esi, 28(%%edx, %%edi, 1) \ - subl $32, %%edi \ - jge 0b \ - \ - movl %%ecx, %%edi \ - andl $0xfffffe00, %%edi \ - addl %%edi, %%ebx // increase src pointer \ - addl %%edi, %%edx // increase dst pointer \ - andl $31, %%ecx // new count \ - jz 6f // if count = 0, get outta here \ -\ -1: \ - cmpl $16, %%ecx \ - jl 2f \ - movl (%%ebx), %%eax \ - movl %%eax, (%%edx) \ - movl 4(%%ebx), %%eax \ - movl %%eax, 4(%%edx) \ - movl 8(%%ebx), %%eax \ - movl %%eax, 8(%%edx) \ - movl 12(%%ebx), %%eax \ - movl %%eax, 12(%%edx) \ - subl $16, %%ecx \ - addl $16, %%ebx \ - addl $16, %%edx \ -2: \ - cmpl $8, %%ecx \ - jl 3f \ - movl (%%ebx), %%eax \ - movl %%eax, (%%edx) \ - movl 4(%%ebx), %%eax \ - subl $8, %%ecx \ - movl %%eax, 4(%%edx) \ - addl $8, %%ebx \ - addl $8, %%edx \ -3: \ - cmpl $4, %%ecx \ - jl 4f \ - movl (%%ebx), %%eax // here 4-7 bytes \ - addl $4, %%ebx \ - subl $4, %%ecx \ - movl %%eax, (%%edx) \ - addl $4, %%edx \ -4: // 0-3 remaining bytes \ - cmpl $2, %%ecx \ - jl 5f \ - movw (%%ebx), %%ax // two bytes \ - cmpl $3, %%ecx // less than 3? \ - movw %%ax, (%%edx) \ - jl 6f \ - movb 2(%%ebx), %%al // last byte \ - movb %%al, 2(%%edx) \ - jmp 6f \ -5: \ - cmpl $1, %%ecx \ - jl 6f \ - movb (%%ebx), %%al \ - movb %%al, (%%edx) \ -6: \ - popl %%esi \ - popl %%edi \ - " - : : "m" (src), "d" (dest), "c" (count) - : "%eax", "%ebx", "%edi", "%esi", "cc", "memory"); -} - -void Com_Memset (void* dest, const int val, const size_t count) -{ - unsigned int fillval; - - if (count < 8) - { - __asm__ __volatile__ (" \ - //mov edx,dest \ - //mov eax, val \ - movb %%al, %%ah \ - movl %%eax, %%ebx \ - andl $0xffff, %%ebx \ - shll $16, %%eax \ - addl %%ebx, %%eax // eax now contains pattern \ - //mov ecx,count \ - cmpl $4, %%ecx \ - jl 0f \ - movl %%eax, (%%edx) // copy first dword \ - addl $4, %%edx \ - subl $4, %%ecx \ - 0: cmpl $2, %%ecx \ - jl 1f \ - movw %%ax, (%%edx) // copy 2 bytes \ - addl $2, %%edx \ - subl $2, %%ecx \ - 1: cmpl $0, %%ecx \ - je 2f \ - movb %%al, (%%edx) // copy single byte \ - 2: \ - " - : : "d" (dest), "a" (val), "c" (count) - : "%ebx", "%edi", "%esi", "cc", "memory"); - - return; - } - - fillval = val; - - fillval = fillval|(fillval<<8); - fillval = fillval|(fillval<<16); // fill dword with 8-bit pattern - - _copyDWord ((unsigned int*)(dest),fillval, count/4); - - __asm__ __volatile__ (" // padding of 0-3 bytes \ - //mov ecx,count \ - movl %%ecx, %%eax \ - andl $3, %%ecx \ - jz 1f \ - andl $0xffffff00, %%eax \ - //mov ebx,dest \ - addl %%eax, %%edx \ - movl %0, %%eax \ - cmpl $2, %%ecx \ - jl 0f \ - movw %%ax, (%%edx) \ - cmpl $2, %%ecx \ - je 1f \ - movb %%al, 2(%%edx) \ - jmp 1f \ -0: \ - cmpl $0, %%ecx\ - je 1f\ - movb %%al, (%%edx)\ -1: \ - " - : : "m" (fillval), "c" (count), "d" (dest) - : "%eax", "%ebx", "%edi", "%esi", "cc", "memory"); -} - -void Com_Prefetch (const void *s, const unsigned int bytes, e_prefetch type) -{ - // write buffer prefetching is performed only if - // the processor benefits from it. Read and read/write - // prefetching is always performed. - - switch (type) - { - case PRE_WRITE : break; - case PRE_READ: - case PRE_READ_WRITE: - - __asm__ __volatile__ ("\ - //mov ebx,s\ - //mov ecx,bytes\ - cmpl $4096, %%ecx // clamp to 4kB\ - jle 0f\ - movl $4096, %%ecx\ - 0:\ - addl $0x1f, %%ecx\ - shrl $5, %%ecx // number of cache lines\ - jz 2f\ - jmp 1f\ -\ - .align 16\ - 1: testb %%al, (%%edx)\ - addl $32, %%edx\ - decl %%ecx\ - jnz 1b\ - 2:\ - " - : : "d" (s), "c" (bytes) - : "%eax", "%ebx", "%edi", "%esi", "memory", "cc"); - - break; - } -} - -#endif -#endif