0
0
Fork 0
mirror of https://git.do.srb2.org/KartKrew/Kart-Public.git synced 2025-03-26 21:01:19 +00:00

Remove ASM memcpy and CPUID code

This commit is contained in:
Alug 2024-03-30 03:02:19 +01:00
parent 44a02ca494
commit 9d9572af8f
9 changed files with 2 additions and 592 deletions

View file

@ -233,11 +233,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
return -1;
}
const CPUInfoFlags *I_CPUInfo(void)
{
return NULL;
}
const char *I_LocateWad(void)
{
return "/sdcard/srb2";

View file

@ -492,7 +492,7 @@ void M_StartupLocale(void);
// M_GetText function that just returns the string.
#define M_GetText(x) (x)
#endif
extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL;
void *M_Memcpy(void *dest, const void *src, size_t n);
char *va(const char *format, ...) FUNCPRINTF;
char *M_GetToken(const char *inputString);
char *sizeu1(size_t num);

View file

@ -137,11 +137,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
return -1;
}
const CPUInfoFlags *I_CPUInfo(void)
{
return NULL;
}
const char *I_LocateWad(void)
{
return NULL;

View file

@ -296,40 +296,6 @@ char *I_GetUserName(void);
*/
INT32 I_mkdir(const char *dirname, INT32 unixright);
typedef struct {
int FPU : 1; ///< FPU availabile
int CPUID : 1; ///< CPUID instruction
int RDTSC : 1; ///< RDTSC instruction
int MMX : 1; ///< MMX features
int MMXExt : 1; ///< MMX Ext. features
int CMOV : 1; ///< Pentium Pro's "cmov"
int AMD3DNow : 1; ///< 3DNow features
int AMD3DNowExt: 1; ///< 3DNow! Ext. features
int SSE : 1; ///< SSE features
int SSE2 : 1; ///< SSE2 features
int SSE3 : 1; ///< SSE3 features
int IA64 : 1; ///< Running on IA64
int AMD64 : 1; ///< Running on AMD64
int AltiVec : 1; ///< AltiVec features
int FPPE : 1; ///< floating-point precision error
int PFC : 1; ///< TBD?
int cmpxchg : 1; ///< ?
int cmpxchg16b : 1; ///< ?
int cmp8xchg16 : 1; ///< ?
int FPE : 1; ///< FPU Emu
int DEP : 1; ///< Data excution prevent
int PPCMM64 : 1; ///< PowerPC Movemem 64bit ok?
int ALPHAbyte : 1; ///< ?
int PAE : 1; ///< Physical Address Extension
int CPUs : 8;
} CPUInfoFlags;
/** \brief Info about CPU
\return CPUInfo in bits
*/
const CPUInfoFlags *I_CPUInfo(void);
/** \brief Find main WAD
\return path to main WAD
*/

View file

@ -1943,430 +1943,11 @@ char *sizeu5(size_t num)
return sizeu5_buf;
}
#if defined (__GNUC__) && defined (__i386__) // from libkwave, under GPL
// Alam: note libkwave memcpy code comes from mplayer's libvo/aclib_template.c, r699
/* for small memory blocks (<256 bytes) this version is faster */
#define small_memcpy(dest,src,n)\
{\
register unsigned long int dummy;\
__asm__ __volatile__(\
"cld\n\t"\
"rep; movsb"\
:"=&D"(dest), "=&S"(src), "=&c"(dummy)\
:"0" (dest), "1" (src),"2" (n)\
: "memory", "cc");\
}
/* linux kernel __memcpy (from: /include/asm/string.h) */
ATTRINLINE static FUNCINLINE void *__memcpy (void *dest, const void * src, size_t n)
void *M_Memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
if ( n < 4 )
{
small_memcpy(dest, src, n);
}
else
{
__asm__ __volatile__ (
"rep ; movsl;"
"testb $2,%b4;"
"je 1f;"
"movsw;"
"1:\ttestb $1,%b4;"
"je 2f;"
"movsb;"
"2:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
:"0" (n/4), "q" (n),"1" ((long) dest),"2" ((long) src)
: "memory");
}
return dest;
}
#define SSE_MMREG_SIZE 16
#define MMX_MMREG_SIZE 8
#define MMX1_MIN_LEN 0x800 /* 2K blocks */
#define MIN_LEN 0x40 /* 64-byte blocks */
/* SSE note: i tried to move 128 bytes a time instead of 64 but it
didn't make any measureable difference. i'm using 64 for the sake of
simplicity. [MF] */
static /*FUNCTARGET("sse2")*/ void *sse_cpy(void * dest, const void * src, size_t n)
{
void *retval = dest;
size_t i;
/* PREFETCH has effect even for MOVSB instruction ;) */
__asm__ __volatile__ (
"prefetchnta (%0);"
"prefetchnta 32(%0);"
"prefetchnta 64(%0);"
"prefetchnta 96(%0);"
"prefetchnta 128(%0);"
"prefetchnta 160(%0);"
"prefetchnta 192(%0);"
"prefetchnta 224(%0);"
"prefetchnta 256(%0);"
"prefetchnta 288(%0);"
: : "r" (src) );
if (n >= MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(SSE_MMREG_SIZE-1);
if (delta)
{
delta=SSE_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
if (((unsigned long)src) & 15)
/* if SRC is misaligned */
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetchnta 320(%0);"
"prefetchnta 352(%0);"
"movups (%0), %%xmm0;"
"movups 16(%0), %%xmm1;"
"movups 32(%0), %%xmm2;"
"movups 48(%0), %%xmm3;"
"movntps %%xmm0, (%1);"
"movntps %%xmm1, 16(%1);"
"movntps %%xmm2, 32(%1);"
"movntps %%xmm3, 48(%1);"
:: "r" (src), "r" (dest) : "memory");
src = (const unsigned char *)src + 64;
dest = (unsigned char *)dest + 64;
}
else
/*
Only if SRC is aligned on 16-byte boundary.
It allows to use movaps instead of movups, which required data
to be aligned or a general-protection exception (#GP) is generated.
*/
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetchnta 320(%0);"
"prefetchnta 352(%0);"
"movaps (%0), %%xmm0;"
"movaps 16(%0), %%xmm1;"
"movaps 32(%0), %%xmm2;"
"movaps 48(%0), %%xmm3;"
"movntps %%xmm0, (%1);"
"movntps %%xmm1, 16(%1);"
"movntps %%xmm2, 32(%1);"
"movntps %%xmm3, 48(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
/* since movntq is weakly-ordered, a "sfence"
* is needed to become ordered again. */
__asm__ __volatile__ ("sfence":::"memory");
/* enables to use FPU */
__asm__ __volatile__ ("emms":::"memory");
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
}
static FUNCTARGET("mmx") void *mmx2_cpy(void *dest, const void *src, size_t n)
{
void *retval = dest;
size_t i;
/* PREFETCH has effect even for MOVSB instruction ;) */
__asm__ __volatile__ (
"prefetchnta (%0);"
"prefetchnta 32(%0);"
"prefetchnta 64(%0);"
"prefetchnta 96(%0);"
"prefetchnta 128(%0);"
"prefetchnta 160(%0);"
"prefetchnta 192(%0);"
"prefetchnta 224(%0);"
"prefetchnta 256(%0);"
"prefetchnta 288(%0);"
: : "r" (src));
if (n >= MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
if (delta)
{
delta=MMX_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetchnta 320(%0);"
"prefetchnta 352(%0);"
"movq (%0), %%mm0;"
"movq 8(%0), %%mm1;"
"movq 16(%0), %%mm2;"
"movq 24(%0), %%mm3;"
"movq 32(%0), %%mm4;"
"movq 40(%0), %%mm5;"
"movq 48(%0), %%mm6;"
"movq 56(%0), %%mm7;"
"movntq %%mm0, (%1);"
"movntq %%mm1, 8(%1);"
"movntq %%mm2, 16(%1);"
"movntq %%mm3, 24(%1);"
"movntq %%mm4, 32(%1);"
"movntq %%mm5, 40(%1);"
"movntq %%mm6, 48(%1);"
"movntq %%mm7, 56(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
/* since movntq is weakly-ordered, a "sfence"
* is needed to become ordered again. */
__asm__ __volatile__ ("sfence":::"memory");
__asm__ __volatile__ ("emms":::"memory");
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
}
static FUNCTARGET("mmx") void *mmx1_cpy(void *dest, const void *src, size_t n) //3DNOW
{
void *retval = dest;
size_t i;
/* PREFETCH has effect even for MOVSB instruction ;) */
__asm__ __volatile__ (
"prefetch (%0);"
"prefetch 32(%0);"
"prefetch 64(%0);"
"prefetch 96(%0);"
"prefetch 128(%0);"
"prefetch 160(%0);"
"prefetch 192(%0);"
"prefetch 224(%0);"
"prefetch 256(%0);"
"prefetch 288(%0);"
: : "r" (src));
if (n >= MMX1_MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
if (delta)
{
delta=MMX_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetch 320(%0);"
"prefetch 352(%0);"
"movq (%0), %%mm0;"
"movq 8(%0), %%mm1;"
"movq 16(%0), %%mm2;"
"movq 24(%0), %%mm3;"
"movq 32(%0), %%mm4;"
"movq 40(%0), %%mm5;"
"movq 48(%0), %%mm6;"
"movq 56(%0), %%mm7;"
"movq %%mm0, (%1);"
"movq %%mm1, 8(%1);"
"movq %%mm2, 16(%1);"
"movq %%mm3, 24(%1);"
"movq %%mm4, 32(%1);"
"movq %%mm5, 40(%1);"
"movq %%mm6, 48(%1);"
"movq %%mm7, 56(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
__asm__ __volatile__ ("femms":::"memory"); // same as mmx_cpy() but with a femms
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
}
#endif
// Alam: why? memcpy may be __cdecl/_System and our code may be not the same type
static void *cpu_cpy(void *dest, const void *src, size_t n)
{
if (src == NULL)
{
CONS_Debug(DBG_MEMORY, "Memcpy from 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
return dest;
}
if(dest == NULL)
{
CONS_Debug(DBG_MEMORY, "Memcpy to 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
return dest;
}
return memcpy(dest, src, n);
}
static /*FUNCTARGET("mmx")*/ void *mmx_cpy(void *dest, const void *src, size_t n)
{
#if defined (_MSC_VER) && defined (_X86_)
_asm
{
mov ecx, [n]
mov esi, [src]
mov edi, [dest]
shr ecx, 6 // mit mmx: 64bytes per iteration
jz lower_64 // if lower than 64 bytes
loop_64: // MMX transfers multiples of 64bytes
movq mm0, 0[ESI] // read sources
movq mm1, 8[ESI]
movq mm2, 16[ESI]
movq mm3, 24[ESI]
movq mm4, 32[ESI]
movq mm5, 40[ESI]
movq mm6, 48[ESI]
movq mm7, 56[ESI]
movq 0[EDI], mm0 // write destination
movq 8[EDI], mm1
movq 16[EDI], mm2
movq 24[EDI], mm3
movq 32[EDI], mm4
movq 40[EDI], mm5
movq 48[EDI], mm6
movq 56[EDI], mm7
add esi, 64
add edi, 64
dec ecx
jnz loop_64
emms // close mmx operation
lower_64:// transfer rest of buffer
mov ebx,esi
sub ebx,src
mov ecx,[n]
sub ecx,ebx
shr ecx, 3 // multiples of 8 bytes
jz lower_8
loop_8:
movq mm0, [esi] // read source
movq [edi], mm0 // write destination
add esi, 8
add edi, 8
dec ecx
jnz loop_8
emms // close mmx operation
lower_8:
mov ebx,esi
sub ebx,src
mov ecx,[n]
sub ecx,ebx
rep movsb
mov eax, [dest] // return dest
}
#elif defined (__GNUC__) && defined (__i386__)
void *retval = dest;
size_t i;
if (n >= MMX1_MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
if (delta)
{
delta=MMX_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
for (; i>0; i--)
{
__asm__ __volatile__ (
"movq (%0), %%mm0;"
"movq 8(%0), %%mm1;"
"movq 16(%0), %%mm2;"
"movq 24(%0), %%mm3;"
"movq 32(%0), %%mm4;"
"movq 40(%0), %%mm5;"
"movq 48(%0), %%mm6;"
"movq 56(%0), %%mm7;"
"movq %%mm0, (%1);"
"movq %%mm1, 8(%1);"
"movq %%mm2, 16(%1);"
"movq %%mm3, 24(%1);"
"movq %%mm4, 32(%1);"
"movq %%mm5, 40(%1);"
"movq %%mm6, 48(%1);"
"movq %%mm7, 56(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
__asm__ __volatile__ ("emms":::"memory");
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
#else
return cpu_cpy(dest, src, n);
#endif
}
void *(*M_Memcpy)(void* dest, const void* src, size_t n) = cpu_cpy;
/** Memcpy that uses MMX, 3DNow, MMXExt or even SSE
* Do not use on overlapped memory, use memmove for that
*/
void M_SetupMemcpy(void)
{
#if defined (__GNUC__) && defined (__i386__)
if (R_SSE2)
M_Memcpy = sse_cpy;
else if (R_MMXExt)
M_Memcpy = mmx2_cpy;
else if (R_3DNow)
M_Memcpy = mmx1_cpy;
else
#endif
if (R_MMX)
M_Memcpy = mmx_cpy;
#if 0
M_Memcpy = cpu_cpy;
#endif
}
/** Return the appropriate message for a file error or end of file.
*/
const char *M_FileError(FILE *fp)

View file

@ -98,8 +98,6 @@ TMatrix *RotateZMatrix(angle_t rad);
// s1 = s2+s3+s1 (1024 lenghtmax)
void strcatbf(char *s1, const char *s2, const char *s3);
void M_SetupMemcpy(void);
const char *M_FileError(FILE *handle);
// counting bits, for weapon ammo code, usually

View file

@ -90,15 +90,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
// Short and Tall sky drawer, for the current color mode
void (*walldrawerfunc)(void);
boolean R_486 = false;
boolean R_586 = false;
boolean R_MMX = false;
boolean R_SSE = false;
boolean R_3DNow = false;
boolean R_MMXExt = false;
boolean R_SSE2 = false;
void SCR_SetMode(void)
{
if (dedicated)
@ -154,48 +145,6 @@ void SCR_SetMode(void)
//
void SCR_Startup(void)
{
const CPUInfoFlags *RCpuInfo = I_CPUInfo();
if (!M_CheckParm("-NOCPUID") && RCpuInfo)
{
#if defined (__i386__) || defined (_M_IX86) || defined (__WATCOMC__)
R_486 = true;
#endif
if (RCpuInfo->RDTSC)
R_586 = true;
if (RCpuInfo->MMX)
R_MMX = true;
if (RCpuInfo->AMD3DNow)
R_3DNow = true;
if (RCpuInfo->MMXExt)
R_MMXExt = true;
if (RCpuInfo->SSE)
R_SSE = true;
if (RCpuInfo->SSE2)
R_SSE2 = true;
CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
}
if (M_CheckParm("-486"))
R_486 = true;
if (M_CheckParm("-586"))
R_586 = true;
if (M_CheckParm("-MMX"))
R_MMX = true;
if (M_CheckParm("-3DNow"))
R_3DNow = true;
if (M_CheckParm("-MMXExt"))
R_MMXExt = true;
if (M_CheckParm("-SSE"))
R_SSE = true;
if (M_CheckParm("-noSSE"))
R_SSE = false;
if (M_CheckParm("-SSE2"))
R_SSE2 = true;
M_SetupMemcpy();
if (dedicated)
{
V_Init();

View file

@ -138,17 +138,6 @@ extern void (*transtransfunc)(void);
extern void (*twosmultipatchfunc)(void);
extern void (*twosmultipatchtransfunc)(void);
// -----
// CPUID
// -----
extern boolean R_ASM;
extern boolean R_486;
extern boolean R_586;
extern boolean R_MMX;
extern boolean R_3DNow;
extern boolean R_MMXExt;
extern boolean R_SSE2;
// ----------------
// screen variables
// ----------------

View file

@ -3910,69 +3910,6 @@ UINT32 I_GetFreeMem(UINT32 *total)
#endif
}
const CPUInfoFlags *I_CPUInfo(void)
{
#if defined (_WIN32)
static CPUInfoFlags WIN_CPUInfo;
SYSTEM_INFO SI;
p_IsProcessorFeaturePresent pfnCPUID = (p_IsProcessorFeaturePresent)(LPVOID)GetProcAddress(GetModuleHandleA("kernel32.dll"), "IsProcessorFeaturePresent");
ZeroMemory(&WIN_CPUInfo,sizeof (WIN_CPUInfo));
if (pfnCPUID)
{
WIN_CPUInfo.FPPE = pfnCPUID( 0); //PF_FLOATING_POINT_PRECISION_ERRATA
WIN_CPUInfo.FPE = pfnCPUID( 1); //PF_FLOATING_POINT_EMULATED
WIN_CPUInfo.cmpxchg = pfnCPUID( 2); //PF_COMPARE_EXCHANGE_DOUBLE
WIN_CPUInfo.MMX = pfnCPUID( 3); //PF_MMX_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.PPCMM64 = pfnCPUID( 4); //PF_PPC_MOVEMEM_64BIT_OK
WIN_CPUInfo.ALPHAbyte = pfnCPUID( 5); //PF_ALPHA_BYTE_INSTRUCTIONS
WIN_CPUInfo.SSE = pfnCPUID( 6); //PF_XMMI_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.AMD3DNow = pfnCPUID( 7); //PF_3DNOW_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.RDTSC = pfnCPUID( 8); //PF_RDTSC_INSTRUCTION_AVAILABLE
WIN_CPUInfo.PAE = pfnCPUID( 9); //PF_PAE_ENABLED
WIN_CPUInfo.SSE2 = pfnCPUID(10); //PF_XMMI64_INSTRUCTIONS_AVAILABLE
//WIN_CPUInfo.blank = pfnCPUID(11); //PF_SSE_DAZ_MODE_AVAILABLE
WIN_CPUInfo.DEP = pfnCPUID(12); //PF_NX_ENABLED
WIN_CPUInfo.SSE3 = pfnCPUID(13); //PF_SSE3_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.cmpxchg16b = pfnCPUID(14); //PF_COMPARE_EXCHANGE128
WIN_CPUInfo.cmp8xchg16 = pfnCPUID(15); //PF_COMPARE64_EXCHANGE128
WIN_CPUInfo.PFC = pfnCPUID(16); //PF_CHANNELS_ENABLED
}
#ifdef HAVE_SDLCPUINFO
else
{
WIN_CPUInfo.RDTSC = SDL_HasRDTSC();
WIN_CPUInfo.MMX = SDL_HasMMX();
WIN_CPUInfo.AMD3DNow = SDL_Has3DNow();
WIN_CPUInfo.SSE = SDL_HasSSE();
WIN_CPUInfo.SSE2 = SDL_HasSSE2();
WIN_CPUInfo.AltiVec = SDL_HasAltiVec();
}
WIN_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
WIN_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
#endif
GetSystemInfo(&SI);
WIN_CPUInfo.CPUs = SI.dwNumberOfProcessors;
WIN_CPUInfo.IA64 = (SI.dwProcessorType == 2200); // PROCESSOR_INTEL_IA64
WIN_CPUInfo.AMD64 = (SI.dwProcessorType == 8664); // PROCESSOR_AMD_X8664
return &WIN_CPUInfo;
#elif defined (HAVE_SDLCPUINFO)
static CPUInfoFlags SDL_CPUInfo;
memset(&SDL_CPUInfo,0,sizeof (CPUInfoFlags));
SDL_CPUInfo.RDTSC = SDL_HasRDTSC();
SDL_CPUInfo.MMX = SDL_HasMMX();
SDL_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
SDL_CPUInfo.AMD3DNow = SDL_Has3DNow();
SDL_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
SDL_CPUInfo.SSE = SDL_HasSSE();
SDL_CPUInfo.SSE2 = SDL_HasSSE2();
SDL_CPUInfo.AltiVec = SDL_HasAltiVec();
return &SDL_CPUInfo;
#else
return NULL; /// \todo CPUID asm
#endif
}
// note CPUAFFINITY code used to reside here
void I_RegisterSysCommands(void) {}
#endif