mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-01-12 19:20:38 +00:00
Adapt x86 GCC assembly copybufreverse() to x86_64 and enable unconditionally.
For a 1680x1050 scene standing directly in front of a mirror, this increases FPS from ~90 to ~95 for me. git-svn-id: https://svn.eduke32.com/eduke32@4363 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
acc4d12f4b
commit
c405a13bbd
1 changed files with 45 additions and 1 deletions
|
@ -390,6 +390,50 @@ void copybufbyte(const void *S, void *D, int32_t c)
|
|||
while ((c--) > 0) *(q++) = *(p++);
|
||||
}
|
||||
|
||||
|
||||
// copybufreverse() is a special case: use the assembly version for GCC on x86
|
||||
// *and* x86_64, and the C version otherwise.
|
||||
// XXX: we don't honor NOASM in the x86_64 case.
|
||||
|
||||
#if defined(__GNUC__) && defined(__x86_64__)
|
||||
// NOTE: Almost CODEDUP from x86 GCC assembly version, except that
|
||||
// - %%esi -> %%rsi
|
||||
// - %%edi -> %%rdi
|
||||
// - (dec,inc,sub,add)l suffix removed where necessary
|
||||
void copybufreverse(const void *S, void *D, int32_t c)
|
||||
{
|
||||
__asm__ __volatile__(
|
||||
"shrl $1, %%ecx\n\t"
|
||||
"jnc 0f\n\t" // jnc skipit1
|
||||
"movb (%%rsi), %%al\n\t"
|
||||
"dec %%rsi\n\t"
|
||||
"movb %%al, (%%rdi)\n\t"
|
||||
"inc %%rdi\n\t"
|
||||
"0:\n\t" // skipit1:
|
||||
"shrl $1, %%ecx\n\t"
|
||||
"jnc 1f\n\t" // jnc skipit2
|
||||
"movw -1(%%rsi), %%ax\n\t"
|
||||
"sub $2, %%rsi\n\t"
|
||||
"rorw $8, %%ax\n\t"
|
||||
"movw %%ax, (%%rdi)\n\t"
|
||||
"add $2, %%rdi\n\t"
|
||||
"1:\n\t" // skipit2
|
||||
"testl %%ecx, %%ecx\n\t"
|
||||
"jz 3f\n\t" // jz endloop
|
||||
"2:\n\t" // begloop
|
||||
"movl -3(%%rsi), %%eax\n\t"
|
||||
"sub $4, %%rsi\n\t"
|
||||
"bswapl %%eax\n\t"
|
||||
"movl %%eax, (%%rdi)\n\t"
|
||||
"add $4, %%rdi\n\t"
|
||||
"decl %%ecx\n\t"
|
||||
"jnz 2b\n\t" // jnz begloop
|
||||
"3:"
|
||||
: "+S"(S), "+D"(D), "+c"(c) :
|
||||
: "eax", "memory", "cc"
|
||||
);
|
||||
}
|
||||
#else
|
||||
void copybufreverse(const void *S, void *D, int32_t c)
|
||||
{
|
||||
const char *p = (const char *)S;
|
||||
|
@ -397,7 +441,7 @@ void copybufreverse(const void *S, void *D, int32_t c)
|
|||
|
||||
while ((c--) > 0) *(q++) = *(p--);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue