From c405a13bbd6dc2bfac91588b0cead4aa7d363167 Mon Sep 17 00:00:00 2001 From: helixhorned Date: Wed, 5 Mar 2014 21:12:58 +0000 Subject: [PATCH] Adapt x86 GCC assembly copybufreverse() to x86_64 and enable unconditionally. For a 1680x1050 scene standing directly in front of a mirror, this increases FPS from ~90 to ~95 for me. git-svn-id: https://svn.eduke32.com/eduke32@4363 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/src/pragmas.c | 46 ++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/polymer/eduke32/build/src/pragmas.c b/polymer/eduke32/build/src/pragmas.c index 09bf1da42..8336508c5 100644 --- a/polymer/eduke32/build/src/pragmas.c +++ b/polymer/eduke32/build/src/pragmas.c @@ -390,6 +390,50 @@ void copybufbyte(const void *S, void *D, int32_t c) while ((c--) > 0) *(q++) = *(p++); } + +// copybufreverse() is a special case: use the assembly version for GCC on x86 +// *and* x86_64, and the C version otherwise. +// XXX: we don't honor NOASM in the x86_64 case. + +#if defined(__GNUC__) && defined(__x86_64__) +// NOTE: Almost CODEDUP from x86 GCC assembly version, except that +// - %%esi -> %%rsi +// - %%edi -> %%rdi +// - (dec,inc,sub,add)l suffix removed where necessary +void copybufreverse(const void *S, void *D, int32_t c) +{ + __asm__ __volatile__( + "shrl $1, %%ecx\n\t" + "jnc 0f\n\t" // jnc skipit1 + "movb (%%rsi), %%al\n\t" + "dec %%rsi\n\t" + "movb %%al, (%%rdi)\n\t" + "inc %%rdi\n\t" + "0:\n\t" // skipit1: + "shrl $1, %%ecx\n\t" + "jnc 1f\n\t" // jnc skipit2 + "movw -1(%%rsi), %%ax\n\t" + "sub $2, %%rsi\n\t" + "rorw $8, %%ax\n\t" + "movw %%ax, (%%rdi)\n\t" + "add $2, %%rdi\n\t" + "1:\n\t" // skipit2 + "testl %%ecx, %%ecx\n\t" + "jz 3f\n\t" // jz endloop + "2:\n\t" // begloop + "movl -3(%%rsi), %%eax\n\t" + "sub $4, %%rsi\n\t" + "bswapl %%eax\n\t" + "movl %%eax, (%%rdi)\n\t" + "add $4, %%rdi\n\t" + "decl %%ecx\n\t" + "jnz 2b\n\t" // jnz begloop + "3:" + : "+S"(S), "+D"(D), "+c"(c) : + : "eax", "memory", "cc" + ); +} +#else void copybufreverse(const void *S, void *D, int32_t c) { const char *p = (const char *)S; @@ -397,7 +441,7 @@ void copybufreverse(const void *S, void *D, int32_t c) while ((c--) > 0) *(q++) = *(p--); } - #endif +#endif