Try out __restrict in the classic renderer. #define'd for easy removal in case it causes problems or is useless.

git-svn-id: https://svn.eduke32.com/eduke32@4702 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
terminx 2014-10-29 17:06:43 +00:00
parent 9601554320
commit 43a759ec07
4 changed files with 30 additions and 29 deletions

View file

@ -9,6 +9,7 @@
#include "compat.h"
#define CLASSIC_SLICE_BY_4
#define A_C_RESTRICT __restrict
/** Definitions of high-precision integer types. **/
// Should be used for values that represent coordinates with which calculations

View file

@ -65,8 +65,8 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by,
if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; }
{
const char *const palptr = &ghlinepal[paloffs];
const char *const buf = gbuf;
const char *const A_C_RESTRICT palptr = &ghlinepal[paloffs];
const char *const A_C_RESTRICT buf = gbuf;
const int32_t bxinc = gbxinc, byinc = gbyinc;
const int32_t logx = glogx, logy = glogy;
char *pp = (char *)p;
@ -99,7 +99,7 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by,
void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t bx, int32_t by)
{
intptr_t *slopalptr;
intptr_t * A_C_RESTRICT slopalptr;
int32_t bz, bzinc;
uint32_t u, v;
@ -136,8 +136,8 @@ void setupvlineasm(int32_t neglogy) { glogy = neglogy; }
// cnt+1 loop iterations!
int32_t vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{
const char *const buf = (char *)bufplc;
const char *const pal = (char *)paloffs;
const char *const A_C_RESTRICT buf = (char *)bufplc;
const char *const A_C_RESTRICT pal = (char *)paloffs;
const int32_t logy = glogy, ourbpl = bpl;
char *pp = (char *)p;
@ -215,7 +215,7 @@ typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16)));
#endif
// cnt >= 1
void vlineasm4nlogy(int32_t cnt, char *p, char *const *pal, char *const *buf,
void vlineasm4nlogy(int32_t cnt, char *p, char *const A_C_RESTRICT * pal, char *const A_C_RESTRICT * buf,
#ifdef USE_VECTOR_EXT
uint32_vec4 vplc, const uint32_vec4 vinc)
#else
@ -248,8 +248,8 @@ void vlineasm4nlogy(int32_t cnt, char *p, char *const *pal, char *const *buf,
// cnt >= 1
void vlineasm4(int32_t cnt, char *p)
{
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
char * const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char * const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
#ifdef USE_VECTOR_EXT
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
@ -337,8 +337,8 @@ int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
{
char ch;
const char *const buf = (char *)bufplc;
const char *const pal = (char *)paloffs;
const char *const A_C_RESTRICT buf = (char *)bufplc;
const char *const A_C_RESTRICT pal = (char *)paloffs;
const int32_t logy = glogy, ourbpl = bpl;
char *pp = (char *)p;
@ -376,8 +376,8 @@ int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
// cnt >= 1
void mvlineasm4(int32_t cnt, char *p)
{
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
char *const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
#ifdef USE_VECTOR_EXT
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
@ -470,9 +470,9 @@ int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
{
char ch;
const char *const buf = (char *)bufplc;
const char *const pal = (char *)paloffs;
const char *const trans = (char *)gtrans;
const char *const A_C_RESTRICT buf = (char *)bufplc;
const char *const A_C_RESTRICT pal = (char *)paloffs;
const char *const A_C_RESTRICT trans = (char *)gtrans;
const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
char *pp = (char *)p;
@ -526,8 +526,8 @@ void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc
int32_t cnt = tabledivide32(asm2-p-1, bpl); // >= 1
const int32_t vinc2 = asm1;
const char *const buf1 = (char *)bufplc1;
const char *const buf2 = (char *)bufplc2;
const char *const A_C_RESTRICT buf1 = (char *)bufplc1;
const char *const A_C_RESTRICT buf2 = (char *)bufplc2;
const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
char *pp = (char *)p;

View file

@ -3195,7 +3195,7 @@ static inline void wallmosts_finish(int16_t *mostbuf, int32_t z1, int32_t z2,
{
// PK 20110423: a bit consistency checking is a good thing:
int32_t tmp = (ix2-ix1 >= 0) ? (ix2-ix1+1) : 1;
int32_t yinc = ((scale(z2, xdimenscale, iy2)<<4) - y) / tmp;
int32_t yinc = tabledivide32((scale(z2, xdimenscale, iy2)<<4) - y, tmp);
qinterpolatedown16short((intptr_t)&mostbuf[ix1], tmp, y+(globalhoriz<<16), yinc);
}
@ -4061,8 +4061,8 @@ static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_
{
char ch;
const char *const buf = (char *)bufplc;
const char *const pal = (char *)asm3;
const char *const A_C_RESTRICT buf = (char *)bufplc;
const char *const A_C_RESTRICT pal = (char *)asm3;
const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
@ -4087,9 +4087,9 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_
{
char ch;
const char *const buf = (char *)bufplc;
const char *const pal = (char *)asm3;
const char *const trans = getblendtab(globalblend);
const char *const A_C_RESTRICT buf = (char *)bufplc;
const char *const A_C_RESTRICT pal = (char *)asm3;
const char *const A_C_RESTRICT trans = getblendtab(globalblend);
const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX;
const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX;
@ -4223,9 +4223,9 @@ static inline void setupslopevlin_alsotrans(int32_t logylogx, intptr_t bufplc, i
// cnt iterations
static void tslopevlin(uint8_t *p, int32_t i, const intptr_t *slopalptr, int32_t cnt, int32_t bx, int32_t by)
{
const char *const buf = ggbuf;
const char *const pal = ggpal;
const char *const trans = getblendtab(0);
const char *const A_C_RESTRICT buf = ggbuf;
const char *const A_C_RESTRICT pal = ggpal;
const char *const A_C_RESTRICT trans = getblendtab(0);
const int32_t bzinc = (asm1>>3), pinc = ggpinc;
const int32_t transmode = (globalorientation&128);

View file

@ -154,7 +154,7 @@ static int32_t OSD_CvarModified(const osdcvar_t *cvar)
case CVAR_DOUBLE:
return (cvar->dval.d != *(double *)cvar->c.vptr);
default:
return 0;
EDUKE32_UNREACHABLE_SECTION(return 0);
}
}
@ -2154,7 +2154,7 @@ int32_t osdcmd_cvar_set(const osdfuncparm_t *parm)
}
break;
default:
break;
EDUKE32_UNREACHABLE_SECTION(break);
}
#ifdef USE_OPENGL
@ -2220,7 +2220,7 @@ void OSD_WriteCvars(FILE *fp)
fprintf(fp,"%s \"%s\"\n",osd->cvars[i].c.name,(char *)osd->cvars[i].c.vptr);
break;
default:
break;
EDUKE32_UNREACHABLE_SECTION(break);
}
}
}