mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-01-13 11:30:44 +00:00
Adapt x86 GCC assembly copybufreverse() to x86_64 and enable unconditionally.
For a 1680x1050 scene standing directly in front of a mirror, this increases FPS from ~90 to ~95 for me. git-svn-id: https://svn.eduke32.com/eduke32@4363 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
acc4d12f4b
commit
c405a13bbd
1 changed files with 45 additions and 1 deletions
|
@ -390,6 +390,50 @@ void copybufbyte(const void *S, void *D, int32_t c)
|
||||||
while ((c--) > 0) *(q++) = *(p++);
|
while ((c--) > 0) *(q++) = *(p++);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// copybufreverse() is a special case: use the assembly version for GCC on x86
|
||||||
|
// *and* x86_64, and the C version otherwise.
|
||||||
|
// XXX: we don't honor NOASM in the x86_64 case.
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && defined(__x86_64__)
|
||||||
|
// NOTE: Almost CODEDUP from x86 GCC assembly version, except that
|
||||||
|
// - %%esi -> %%rsi
|
||||||
|
// - %%edi -> %%rdi
|
||||||
|
// - (dec,inc,sub,add)l suffix removed where necessary
|
||||||
|
void copybufreverse(const void *S, void *D, int32_t c)
|
||||||
|
{
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"shrl $1, %%ecx\n\t"
|
||||||
|
"jnc 0f\n\t" // jnc skipit1
|
||||||
|
"movb (%%rsi), %%al\n\t"
|
||||||
|
"dec %%rsi\n\t"
|
||||||
|
"movb %%al, (%%rdi)\n\t"
|
||||||
|
"inc %%rdi\n\t"
|
||||||
|
"0:\n\t" // skipit1:
|
||||||
|
"shrl $1, %%ecx\n\t"
|
||||||
|
"jnc 1f\n\t" // jnc skipit2
|
||||||
|
"movw -1(%%rsi), %%ax\n\t"
|
||||||
|
"sub $2, %%rsi\n\t"
|
||||||
|
"rorw $8, %%ax\n\t"
|
||||||
|
"movw %%ax, (%%rdi)\n\t"
|
||||||
|
"add $2, %%rdi\n\t"
|
||||||
|
"1:\n\t" // skipit2
|
||||||
|
"testl %%ecx, %%ecx\n\t"
|
||||||
|
"jz 3f\n\t" // jz endloop
|
||||||
|
"2:\n\t" // begloop
|
||||||
|
"movl -3(%%rsi), %%eax\n\t"
|
||||||
|
"sub $4, %%rsi\n\t"
|
||||||
|
"bswapl %%eax\n\t"
|
||||||
|
"movl %%eax, (%%rdi)\n\t"
|
||||||
|
"add $4, %%rdi\n\t"
|
||||||
|
"decl %%ecx\n\t"
|
||||||
|
"jnz 2b\n\t" // jnz begloop
|
||||||
|
"3:"
|
||||||
|
: "+S"(S), "+D"(D), "+c"(c) :
|
||||||
|
: "eax", "memory", "cc"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#else
|
||||||
void copybufreverse(const void *S, void *D, int32_t c)
|
void copybufreverse(const void *S, void *D, int32_t c)
|
||||||
{
|
{
|
||||||
const char *p = (const char *)S;
|
const char *p = (const char *)S;
|
||||||
|
@ -397,7 +441,7 @@ void copybufreverse(const void *S, void *D, int32_t c)
|
||||||
|
|
||||||
while ((c--) > 0) *(q++) = *(p--);
|
while ((c--) > 0) *(q++) = *(p--);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue