diff --git a/src/doomdef.h b/src/doomdef.h index b382d0ecb..4e08b11bf 100644 --- a/src/doomdef.h +++ b/src/doomdef.h @@ -533,7 +533,7 @@ extern char liveeventbackup[256]; #define M_GetText(x) (x) #endif void M_StartupLocale(void); -extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL; +void *M_Memcpy(void* dest, const void* src, size_t n); char *va(const char *format, ...) FUNCPRINTF; char *M_GetToken(const char *inputString); void M_UnGetToken(void); diff --git a/src/m_misc.c b/src/m_misc.c index 923a188c6..ce332910d 100644 --- a/src/m_misc.c +++ b/src/m_misc.c @@ -2207,430 +2207,11 @@ char *sizeu5(size_t num) return sizeu5_buf; } -#if defined (__GNUC__) && defined (__i386__) // from libkwave, under GPL -// Alam: note libkwave memcpy code comes from mplayer's libvo/aclib_template.c, r699 - -/* for small memory blocks (<256 bytes) this version is faster */ -#define small_memcpy(dest,src,n)\ -{\ -register unsigned long int dummy;\ -__asm__ __volatile__(\ - "cld\n\t"\ - "rep; movsb"\ - :"=&D"(dest), "=&S"(src), "=&c"(dummy)\ - :"0" (dest), "1" (src),"2" (n)\ - : "memory", "cc");\ -} -/* linux kernel __memcpy (from: /include/asm/string.h) */ -ATTRINLINE static FUNCINLINE void *__memcpy (void *dest, const void * src, size_t n) +void *M_Memcpy(void *dest, const void *src, size_t n) { - int d0, d1, d2; - - if ( n < 4 ) - { - small_memcpy(dest, src, n); - } - else - { - __asm__ __volatile__ ( - "rep ; movsl;" - "testb $2,%b4;" - "je 1f;" - "movsw;" - "1:\ttestb $1,%b4;" - "je 2f;" - "movsb;" - "2:" - : "=&c" (d0), "=&D" (d1), "=&S" (d2) - :"0" (n/4), "q" (n),"1" ((long) dest),"2" ((long) src) - : "memory"); - } - - return dest; -} - -#define SSE_MMREG_SIZE 16 -#define MMX_MMREG_SIZE 8 - -#define MMX1_MIN_LEN 0x800 /* 2K blocks */ -#define MIN_LEN 0x40 /* 64-byte blocks */ - -/* SSE note: i tried to move 128 bytes a time instead of 64 but it -didn't make any measureable difference. i'm using 64 for the sake of -simplicity. [MF] */ -static /*FUNCTARGET("sse2")*/ void *sse_cpy(void * dest, const void * src, size_t n) -{ - void *retval = dest; - size_t i; - - /* PREFETCH has effect even for MOVSB instruction ;) */ - __asm__ __volatile__ ( - "prefetchnta (%0);" - "prefetchnta 32(%0);" - "prefetchnta 64(%0);" - "prefetchnta 96(%0);" - "prefetchnta 128(%0);" - "prefetchnta 160(%0);" - "prefetchnta 192(%0);" - "prefetchnta 224(%0);" - "prefetchnta 256(%0);" - "prefetchnta 288(%0);" - : : "r" (src) ); - - if (n >= MIN_LEN) - { - register unsigned long int delta; - /* Align destinition to MMREG_SIZE -boundary */ - delta = ((unsigned long int)dest)&(SSE_MMREG_SIZE-1); - if (delta) - { - delta=SSE_MMREG_SIZE-delta; - n -= delta; - small_memcpy(dest, src, delta); - } - i = n >> 6; /* n/64 */ - n&=63; - if (((unsigned long)src) & 15) - /* if SRC is misaligned */ - for (; i>0; i--) - { - __asm__ __volatile__ ( - "prefetchnta 320(%0);" - "prefetchnta 352(%0);" - "movups (%0), %%xmm0;" - "movups 16(%0), %%xmm1;" - "movups 32(%0), %%xmm2;" - "movups 48(%0), %%xmm3;" - "movntps %%xmm0, (%1);" - "movntps %%xmm1, 16(%1);" - "movntps %%xmm2, 32(%1);" - "movntps %%xmm3, 48(%1);" - :: "r" (src), "r" (dest) : "memory"); - src = (const unsigned char *)src + 64; - dest = (unsigned char *)dest + 64; - } - else - /* - Only if SRC is aligned on 16-byte boundary. - It allows to use movaps instead of movups, which required data - to be aligned or a general-protection exception (#GP) is generated. - */ - for (; i>0; i--) - { - __asm__ __volatile__ ( - "prefetchnta 320(%0);" - "prefetchnta 352(%0);" - "movaps (%0), %%xmm0;" - "movaps 16(%0), %%xmm1;" - "movaps 32(%0), %%xmm2;" - "movaps 48(%0), %%xmm3;" - "movntps %%xmm0, (%1);" - "movntps %%xmm1, 16(%1);" - "movntps %%xmm2, 32(%1);" - "movntps %%xmm3, 48(%1);" - :: "r" (src), "r" (dest) : "memory"); - src = ((const unsigned char *)src) + 64; - dest = ((unsigned char *)dest) + 64; - } - /* since movntq is weakly-ordered, a "sfence" - * is needed to become ordered again. */ - __asm__ __volatile__ ("sfence":::"memory"); - /* enables to use FPU */ - __asm__ __volatile__ ("emms":::"memory"); - } - /* - * Now do the tail of the block - */ - if (n) __memcpy(dest, src, n); - return retval; -} - -static FUNCTARGET("mmx") void *mmx2_cpy(void *dest, const void *src, size_t n) -{ - void *retval = dest; - size_t i; - - /* PREFETCH has effect even for MOVSB instruction ;) */ - __asm__ __volatile__ ( - "prefetchnta (%0);" - "prefetchnta 32(%0);" - "prefetchnta 64(%0);" - "prefetchnta 96(%0);" - "prefetchnta 128(%0);" - "prefetchnta 160(%0);" - "prefetchnta 192(%0);" - "prefetchnta 224(%0);" - "prefetchnta 256(%0);" - "prefetchnta 288(%0);" - : : "r" (src)); - - if (n >= MIN_LEN) - { - register unsigned long int delta; - /* Align destinition to MMREG_SIZE -boundary */ - delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1); - if (delta) - { - delta=MMX_MMREG_SIZE-delta; - n -= delta; - small_memcpy(dest, src, delta); - } - i = n >> 6; /* n/64 */ - n&=63; - for (; i>0; i--) - { - __asm__ __volatile__ ( - "prefetchnta 320(%0);" - "prefetchnta 352(%0);" - "movq (%0), %%mm0;" - "movq 8(%0), %%mm1;" - "movq 16(%0), %%mm2;" - "movq 24(%0), %%mm3;" - "movq 32(%0), %%mm4;" - "movq 40(%0), %%mm5;" - "movq 48(%0), %%mm6;" - "movq 56(%0), %%mm7;" - "movntq %%mm0, (%1);" - "movntq %%mm1, 8(%1);" - "movntq %%mm2, 16(%1);" - "movntq %%mm3, 24(%1);" - "movntq %%mm4, 32(%1);" - "movntq %%mm5, 40(%1);" - "movntq %%mm6, 48(%1);" - "movntq %%mm7, 56(%1);" - :: "r" (src), "r" (dest) : "memory"); - src = ((const unsigned char *)src) + 64; - dest = ((unsigned char *)dest) + 64; - } - /* since movntq is weakly-ordered, a "sfence" - * is needed to become ordered again. */ - __asm__ __volatile__ ("sfence":::"memory"); - __asm__ __volatile__ ("emms":::"memory"); - } - /* - * Now do the tail of the block - */ - if (n) __memcpy(dest, src, n); - return retval; -} - -static FUNCTARGET("mmx") void *mmx1_cpy(void *dest, const void *src, size_t n) //3DNOW -{ - void *retval = dest; - size_t i; - - /* PREFETCH has effect even for MOVSB instruction ;) */ - __asm__ __volatile__ ( - "prefetch (%0);" - "prefetch 32(%0);" - "prefetch 64(%0);" - "prefetch 96(%0);" - "prefetch 128(%0);" - "prefetch 160(%0);" - "prefetch 192(%0);" - "prefetch 224(%0);" - "prefetch 256(%0);" - "prefetch 288(%0);" - : : "r" (src)); - - if (n >= MMX1_MIN_LEN) - { - register unsigned long int delta; - /* Align destinition to MMREG_SIZE -boundary */ - delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1); - if (delta) - { - delta=MMX_MMREG_SIZE-delta; - n -= delta; - small_memcpy(dest, src, delta); - } - i = n >> 6; /* n/64 */ - n&=63; - for (; i>0; i--) - { - __asm__ __volatile__ ( - "prefetch 320(%0);" - "prefetch 352(%0);" - "movq (%0), %%mm0;" - "movq 8(%0), %%mm1;" - "movq 16(%0), %%mm2;" - "movq 24(%0), %%mm3;" - "movq 32(%0), %%mm4;" - "movq 40(%0), %%mm5;" - "movq 48(%0), %%mm6;" - "movq 56(%0), %%mm7;" - "movq %%mm0, (%1);" - "movq %%mm1, 8(%1);" - "movq %%mm2, 16(%1);" - "movq %%mm3, 24(%1);" - "movq %%mm4, 32(%1);" - "movq %%mm5, 40(%1);" - "movq %%mm6, 48(%1);" - "movq %%mm7, 56(%1);" - :: "r" (src), "r" (dest) : "memory"); - src = ((const unsigned char *)src) + 64; - dest = ((unsigned char *)dest) + 64; - } - __asm__ __volatile__ ("femms":::"memory"); // same as mmx_cpy() but with a femms - } - /* - * Now do the tail of the block - */ - if (n) __memcpy(dest, src, n); - return retval; -} -#endif - -// Alam: why? memcpy may be __cdecl/_System and our code may be not the same type -static void *cpu_cpy(void *dest, const void *src, size_t n) -{ - if (src == NULL) - { - CONS_Debug(DBG_MEMORY, "Memcpy from 0x0?!: %p %p %s\n", dest, src, sizeu1(n)); - return dest; - } - - if(dest == NULL) - { - CONS_Debug(DBG_MEMORY, "Memcpy to 0x0?!: %p %p %s\n", dest, src, sizeu1(n)); - return dest; - } - return memcpy(dest, src, n); } -static /*FUNCTARGET("mmx")*/ void *mmx_cpy(void *dest, const void *src, size_t n) -{ -#if defined (_MSC_VER) && defined (_X86_) - _asm - { - mov ecx, [n] - mov esi, [src] - mov edi, [dest] - shr ecx, 6 // mit mmx: 64bytes per iteration - jz lower_64 // if lower than 64 bytes - loop_64: // MMX transfers multiples of 64bytes - movq mm0, 0[ESI] // read sources - movq mm1, 8[ESI] - movq mm2, 16[ESI] - movq mm3, 24[ESI] - movq mm4, 32[ESI] - movq mm5, 40[ESI] - movq mm6, 48[ESI] - movq mm7, 56[ESI] - - movq 0[EDI], mm0 // write destination - movq 8[EDI], mm1 - movq 16[EDI], mm2 - movq 24[EDI], mm3 - movq 32[EDI], mm4 - movq 40[EDI], mm5 - movq 48[EDI], mm6 - movq 56[EDI], mm7 - - add esi, 64 - add edi, 64 - dec ecx - jnz loop_64 - emms // close mmx operation - lower_64:// transfer rest of buffer - mov ebx,esi - sub ebx,src - mov ecx,[n] - sub ecx,ebx - shr ecx, 3 // multiples of 8 bytes - jz lower_8 - loop_8: - movq mm0, [esi] // read source - movq [edi], mm0 // write destination - add esi, 8 - add edi, 8 - dec ecx - jnz loop_8 - emms // close mmx operation - lower_8: - mov ebx,esi - sub ebx,src - mov ecx,[n] - sub ecx,ebx - rep movsb - mov eax, [dest] // return dest - } -#elif defined (__GNUC__) && defined (__i386__) - void *retval = dest; - size_t i; - - if (n >= MMX1_MIN_LEN) - { - register unsigned long int delta; - /* Align destinition to MMREG_SIZE -boundary */ - delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1); - if (delta) - { - delta=MMX_MMREG_SIZE-delta; - n -= delta; - small_memcpy(dest, src, delta); - } - i = n >> 6; /* n/64 */ - n&=63; - for (; i>0; i--) - { - __asm__ __volatile__ ( - "movq (%0), %%mm0;" - "movq 8(%0), %%mm1;" - "movq 16(%0), %%mm2;" - "movq 24(%0), %%mm3;" - "movq 32(%0), %%mm4;" - "movq 40(%0), %%mm5;" - "movq 48(%0), %%mm6;" - "movq 56(%0), %%mm7;" - "movq %%mm0, (%1);" - "movq %%mm1, 8(%1);" - "movq %%mm2, 16(%1);" - "movq %%mm3, 24(%1);" - "movq %%mm4, 32(%1);" - "movq %%mm5, 40(%1);" - "movq %%mm6, 48(%1);" - "movq %%mm7, 56(%1);" - :: "r" (src), "r" (dest) : "memory"); - src = ((const unsigned char *)src) + 64; - dest = ((unsigned char *)dest) + 64; - } - __asm__ __volatile__ ("emms":::"memory"); - } - /* - * Now do the tail of the block - */ - if (n) __memcpy(dest, src, n); - return retval; -#else - return cpu_cpy(dest, src, n); -#endif -} - -void *(*M_Memcpy)(void* dest, const void* src, size_t n) = cpu_cpy; - -/** Memcpy that uses MMX, 3DNow, MMXExt or even SSE - * Do not use on overlapped memory, use memmove for that - */ -void M_SetupMemcpy(void) -{ -#if defined (__GNUC__) && defined (__i386__) - if (R_SSE2) - M_Memcpy = sse_cpy; - else if (R_MMXExt) - M_Memcpy = mmx2_cpy; - else if (R_3DNow) - M_Memcpy = mmx1_cpy; - else -#endif - if (R_MMX) - M_Memcpy = mmx_cpy; -#if 0 - M_Memcpy = cpu_cpy; -#endif -} - /** Return the appropriate message for a file error or end of file. */ const char *M_FileError(FILE *fp) diff --git a/src/screen.c b/src/screen.c index 3c50ec67e..ca59b251d 100644 --- a/src/screen.c +++ b/src/screen.c @@ -98,14 +98,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_ // Short and Tall sky drawer, for the current color mode void (*walldrawerfunc)(void); -boolean R_486 = false; -boolean R_586 = false; -boolean R_MMX = false; -boolean R_SSE = false; -boolean R_3DNow = false; -boolean R_MMXExt = false; -boolean R_SSE2 = false; - void SCR_SetDrawFuncs(void) { // @@ -225,48 +217,6 @@ void SCR_SetMode(void) // void SCR_Startup(void) { - const CPUInfoFlags *RCpuInfo = I_CPUInfo(); - if (!M_CheckParm("-NOCPUID") && RCpuInfo) - { -#if defined (__i386__) || defined (_M_IX86) || defined (__WATCOMC__) - R_486 = true; -#endif - if (RCpuInfo->RDTSC) - R_586 = true; - if (RCpuInfo->MMX) - R_MMX = true; - if (RCpuInfo->AMD3DNow) - R_3DNow = true; - if (RCpuInfo->MMXExt) - R_MMXExt = true; - if (RCpuInfo->SSE) - R_SSE = true; - if (RCpuInfo->SSE2) - R_SSE2 = true; - CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2); - } - - if (M_CheckParm("-486")) - R_486 = true; - if (M_CheckParm("-586")) - R_586 = true; - if (M_CheckParm("-MMX")) - R_MMX = true; - if (M_CheckParm("-3DNow")) - R_3DNow = true; - if (M_CheckParm("-MMXExt")) - R_MMXExt = true; - - if (M_CheckParm("-SSE")) - R_SSE = true; - if (M_CheckParm("-noSSE")) - R_SSE = false; - - if (M_CheckParm("-SSE2")) - R_SSE2 = true; - - M_SetupMemcpy(); - if (dedicated) { V_Init(); diff --git a/src/screen.h b/src/screen.h index 46c1b99c6..e4c1006c3 100644 --- a/src/screen.h +++ b/src/screen.h @@ -172,17 +172,6 @@ extern void (*spanfunc)(void); extern void (*spanfuncs[SPANDRAWFUNC_MAX])(void); extern void (*spanfuncs_npo2[SPANDRAWFUNC_MAX])(void); -// ----- -// CPUID -// ----- -extern boolean R_ASM; -extern boolean R_486; -extern boolean R_586; -extern boolean R_MMX; -extern boolean R_3DNow; -extern boolean R_MMXExt; -extern boolean R_SSE2; - // ---------------- // screen variables // ----------------