mirror of
https://github.com/ZDoom/raze-gles.git
synced 2024-11-14 08:30:58 +00:00
a-c.c: in {v,mv}lineasm4, use vector types for vplc/vinc with GCC >=4.7.
For a full 1680x1050 screen drawing a solid/masked wall, the FPS increases from 118 to 133 and from 114 to 116 (respectively) for me. Guarded by the macro USE_VECTOR_EXT in the source. git-svn-id: https://svn.eduke32.com/eduke32@4160 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
02507d76e4
commit
ca22bf8185
1 changed files with 32 additions and 17 deletions
|
@ -153,30 +153,29 @@ extern uint32_t vplce[4];
|
||||||
extern int32_t vince[4];
|
extern int32_t vince[4];
|
||||||
extern intptr_t bufplce[4];
|
extern intptr_t bufplce[4];
|
||||||
|
|
||||||
|
#if defined __GNUC__ && __GNUC_MINOR__ >= 7
|
||||||
|
# define USE_VECTOR_EXT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_VECTOR_EXT
|
||||||
|
typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16)));
|
||||||
|
#endif
|
||||||
|
|
||||||
// cnt >= 1
|
// cnt >= 1
|
||||||
void vlineasm4(int32_t cnt, char *p)
|
void vlineasm4(int32_t cnt, char *p)
|
||||||
{
|
{
|
||||||
char ch;
|
char ch;
|
||||||
int32_t i;
|
int32_t i;
|
||||||
#if 1
|
|
||||||
// this gives slightly more stuff in registers in the loop
|
|
||||||
// (on x86_64 at least)
|
|
||||||
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
|
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
|
||||||
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
|
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
|
||||||
|
#ifdef USE_VECTOR_EXT
|
||||||
|
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
|
||||||
|
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
|
||||||
|
#else
|
||||||
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
|
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
|
||||||
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
|
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
|
||||||
#else
|
|
||||||
char *pal[4];
|
|
||||||
char *buf[4];
|
|
||||||
int32_t vinc[4];
|
|
||||||
uint32_t vplc[4];
|
|
||||||
|
|
||||||
Bmemcpy(pal, palookupoffse, sizeof(pal));
|
|
||||||
Bmemcpy(buf, bufplce, sizeof(buf));
|
|
||||||
Bmemcpy(vinc, vince, sizeof(vinc));
|
|
||||||
Bmemcpy(vplc, vplce, sizeof(vplc));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const int32_t logy = glogy, ourbpl = bpl;
|
const int32_t logy = glogy, ourbpl = bpl;
|
||||||
|
|
||||||
do
|
do
|
||||||
|
@ -185,13 +184,19 @@ void vlineasm4(int32_t cnt, char *p)
|
||||||
{
|
{
|
||||||
ch = getpix(logy, buf[i], vplc[i]);
|
ch = getpix(logy, buf[i], vplc[i]);
|
||||||
p[i] = pal[i][ch];
|
p[i] = pal[i][ch];
|
||||||
|
#if !defined USE_VECTOR_EXT
|
||||||
vplc[i] += vinc[i];
|
vplc[i] += vinc[i];
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef USE_VECTOR_EXT
|
||||||
|
vplc += vinc;
|
||||||
|
#endif
|
||||||
p += ourbpl;
|
p += ourbpl;
|
||||||
}
|
}
|
||||||
while (--cnt);
|
while (--cnt);
|
||||||
|
|
||||||
Bmemcpy(vplce, vplc, sizeof(vplce));
|
for (i=0; i<4; i++)
|
||||||
|
vplce[i] = vplc[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
void setupmvlineasm(int32_t neglogy) { glogy = neglogy; }
|
void setupmvlineasm(int32_t neglogy) { glogy = neglogy; }
|
||||||
|
@ -227,9 +232,13 @@ void mvlineasm4(int32_t cnt, char *p)
|
||||||
|
|
||||||
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
|
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
|
||||||
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
|
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
|
||||||
|
#ifdef USE_VECTOR_EXT
|
||||||
|
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
|
||||||
|
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
|
||||||
|
#else
|
||||||
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
|
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
|
||||||
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
|
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
|
||||||
|
#endif
|
||||||
const int32_t logy = glogy, ourbpl = bpl;
|
const int32_t logy = glogy, ourbpl = bpl;
|
||||||
|
|
||||||
do
|
do
|
||||||
|
@ -238,13 +247,19 @@ void mvlineasm4(int32_t cnt, char *p)
|
||||||
{
|
{
|
||||||
ch = getpix(logy, buf[i], vplc[i]);
|
ch = getpix(logy, buf[i], vplc[i]);
|
||||||
if (ch != 255) p[i] = pal[i][ch];
|
if (ch != 255) p[i] = pal[i][ch];
|
||||||
|
#if !defined USE_VECTOR_EXT
|
||||||
vplc[i] += vinc[i];
|
vplc[i] += vinc[i];
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef USE_VECTOR_EXT
|
||||||
|
vplc += vinc;
|
||||||
|
#endif
|
||||||
p += ourbpl;
|
p += ourbpl;
|
||||||
}
|
}
|
||||||
while (--cnt);
|
while (--cnt);
|
||||||
|
|
||||||
Bmemcpy(vplce, vplc, sizeof(vplce));
|
for (i=0; i<4; i++)
|
||||||
|
vplce[i] = vplc[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef USE_ASM64
|
#ifdef USE_ASM64
|
||||||
|
|
Loading…
Reference in a new issue