From 43a759ec071311f34a7ccdb921c75977572b9b26 Mon Sep 17 00:00:00 2001 From: terminx Date: Wed, 29 Oct 2014 17:06:43 +0000 Subject: [PATCH] Try out __restrict in the classic renderer. #define'd for easy removal in case it causes problems or is useless. git-svn-id: https://svn.eduke32.com/eduke32@4702 1a8010ca-5511-0410-912e-c29ae57300e0 --- polymer/eduke32/build/include/a.h | 1 + polymer/eduke32/build/src/a-c.c | 34 +++++++++++++++--------------- polymer/eduke32/build/src/engine.c | 18 ++++++++-------- polymer/eduke32/build/src/osd.c | 6 +++--- 4 files changed, 30 insertions(+), 29 deletions(-) diff --git a/polymer/eduke32/build/include/a.h b/polymer/eduke32/build/include/a.h index 8f5e51242..8412b6ef0 100644 --- a/polymer/eduke32/build/include/a.h +++ b/polymer/eduke32/build/include/a.h @@ -9,6 +9,7 @@ #include "compat.h" #define CLASSIC_SLICE_BY_4 +#define A_C_RESTRICT __restrict /** Definitions of high-precision integer types. **/ // Should be used for values that represent coordinates with which calculations diff --git a/polymer/eduke32/build/src/a-c.c b/polymer/eduke32/build/src/a-c.c index a87fcb782..2529d066d 100644 --- a/polymer/eduke32/build/src/a-c.c +++ b/polymer/eduke32/build/src/a-c.c @@ -65,8 +65,8 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; } { - const char *const palptr = &ghlinepal[paloffs]; - const char *const buf = gbuf; + const char *const A_C_RESTRICT palptr = &ghlinepal[paloffs]; + const char *const A_C_RESTRICT buf = gbuf; const int32_t bxinc = gbxinc, byinc = gbyinc; const int32_t logx = glogx, logy = glogy; char *pp = (char *)p; @@ -99,7 +99,7 @@ void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t bx, int32_t by) { - intptr_t *slopalptr; + intptr_t * A_C_RESTRICT slopalptr; int32_t bz, bzinc; uint32_t u, v; @@ -136,8 +136,8 @@ void setupvlineasm(int32_t neglogy) { glogy = neglogy; } // cnt+1 loop iterations! int32_t vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p) { - const char *const buf = (char *)bufplc; - const char *const pal = (char *)paloffs; + const char *const A_C_RESTRICT buf = (char *)bufplc; + const char *const A_C_RESTRICT pal = (char *)paloffs; const int32_t logy = glogy, ourbpl = bpl; char *pp = (char *)p; @@ -215,7 +215,7 @@ typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16))); #endif // cnt >= 1 -void vlineasm4nlogy(int32_t cnt, char *p, char *const *pal, char *const *buf, +void vlineasm4nlogy(int32_t cnt, char *p, char *const A_C_RESTRICT * pal, char *const A_C_RESTRICT * buf, #ifdef USE_VECTOR_EXT uint32_vec4 vplc, const uint32_vec4 vinc) #else @@ -248,8 +248,8 @@ void vlineasm4nlogy(int32_t cnt, char *p, char *const *pal, char *const *buf, // cnt >= 1 void vlineasm4(int32_t cnt, char *p) { - char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; - char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; + char * const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; + char * const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; #ifdef USE_VECTOR_EXT uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]}; uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]}; @@ -337,8 +337,8 @@ int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i { char ch; - const char *const buf = (char *)bufplc; - const char *const pal = (char *)paloffs; + const char *const A_C_RESTRICT buf = (char *)bufplc; + const char *const A_C_RESTRICT pal = (char *)paloffs; const int32_t logy = glogy, ourbpl = bpl; char *pp = (char *)p; @@ -376,8 +376,8 @@ int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i // cnt >= 1 void mvlineasm4(int32_t cnt, char *p) { - char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; - char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; + char *const A_C_RESTRICT pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]}; + char *const A_C_RESTRICT buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]}; #ifdef USE_VECTOR_EXT uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]}; uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]}; @@ -470,9 +470,9 @@ int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i { char ch; - const char *const buf = (char *)bufplc; - const char *const pal = (char *)paloffs; - const char *const trans = (char *)gtrans; + const char *const A_C_RESTRICT buf = (char *)bufplc; + const char *const A_C_RESTRICT pal = (char *)paloffs; + const char *const A_C_RESTRICT trans = (char *)gtrans; const int32_t logy = glogy, ourbpl = bpl, transm = transmode; char *pp = (char *)p; @@ -526,8 +526,8 @@ void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc int32_t cnt = tabledivide32(asm2-p-1, bpl); // >= 1 const int32_t vinc2 = asm1; - const char *const buf1 = (char *)bufplc1; - const char *const buf2 = (char *)bufplc2; + const char *const A_C_RESTRICT buf1 = (char *)bufplc1; + const char *const A_C_RESTRICT buf2 = (char *)bufplc2; const int32_t logy = glogy, ourbpl = bpl, transm = transmode; char *pp = (char *)p; diff --git a/polymer/eduke32/build/src/engine.c b/polymer/eduke32/build/src/engine.c index 80a53dd48..9eff744ec 100644 --- a/polymer/eduke32/build/src/engine.c +++ b/polymer/eduke32/build/src/engine.c @@ -3195,7 +3195,7 @@ static inline void wallmosts_finish(int16_t *mostbuf, int32_t z1, int32_t z2, { // PK 20110423: a bit consistency checking is a good thing: int32_t tmp = (ix2-ix1 >= 0) ? (ix2-ix1+1) : 1; - int32_t yinc = ((scale(z2, xdimenscale, iy2)<<4) - y) / tmp; + int32_t yinc = tabledivide32((scale(z2, xdimenscale, iy2)<<4) - y, tmp); qinterpolatedown16short((intptr_t)&mostbuf[ix1], tmp, y+(globalhoriz<<16), yinc); } @@ -4061,8 +4061,8 @@ static void nonpow2_mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_ { char ch; - const char *const buf = (char *)bufplc; - const char *const pal = (char *)asm3; + const char *const A_C_RESTRICT buf = (char *)bufplc; + const char *const A_C_RESTRICT pal = (char *)asm3; const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX; const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX; @@ -4087,9 +4087,9 @@ static void nonpow2_thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_ { char ch; - const char *const buf = (char *)bufplc; - const char *const pal = (char *)asm3; - const char *const trans = getblendtab(globalblend); + const char *const A_C_RESTRICT buf = (char *)bufplc; + const char *const A_C_RESTRICT pal = (char *)asm3; + const char *const A_C_RESTRICT trans = getblendtab(globalblend); const uint32_t xdiv = globalxspan > 1 ? (uint32_t)ourdivscale32(1, globalxspan) : UINT32_MAX; const uint32_t ydiv = globalyspan > 1 ? (uint32_t)ourdivscale32(1, globalyspan) : UINT32_MAX; @@ -4223,9 +4223,9 @@ static inline void setupslopevlin_alsotrans(int32_t logylogx, intptr_t bufplc, i // cnt iterations static void tslopevlin(uint8_t *p, int32_t i, const intptr_t *slopalptr, int32_t cnt, int32_t bx, int32_t by) { - const char *const buf = ggbuf; - const char *const pal = ggpal; - const char *const trans = getblendtab(0); + const char *const A_C_RESTRICT buf = ggbuf; + const char *const A_C_RESTRICT pal = ggpal; + const char *const A_C_RESTRICT trans = getblendtab(0); const int32_t bzinc = (asm1>>3), pinc = ggpinc; const int32_t transmode = (globalorientation&128); diff --git a/polymer/eduke32/build/src/osd.c b/polymer/eduke32/build/src/osd.c index 81fa6fd11..0c9bcba34 100644 --- a/polymer/eduke32/build/src/osd.c +++ b/polymer/eduke32/build/src/osd.c @@ -154,7 +154,7 @@ static int32_t OSD_CvarModified(const osdcvar_t *cvar) case CVAR_DOUBLE: return (cvar->dval.d != *(double *)cvar->c.vptr); default: - return 0; + EDUKE32_UNREACHABLE_SECTION(return 0); } } @@ -2154,7 +2154,7 @@ int32_t osdcmd_cvar_set(const osdfuncparm_t *parm) } break; default: - break; + EDUKE32_UNREACHABLE_SECTION(break); } #ifdef USE_OPENGL @@ -2220,7 +2220,7 @@ void OSD_WriteCvars(FILE *fp) fprintf(fp,"%s \"%s\"\n",osd->cvars[i].c.name,(char *)osd->cvars[i].c.vptr); break; default: - break; + EDUKE32_UNREACHABLE_SECTION(break); } } }