raze/polymer/eduke32/build/src/a-c.c
helixhorned a68ef20182 a-c.c: Make vector extensions also be compiled with Clang.
I was checking for GCC >= 4.7, while Clangs pretends to be GCC 4.2. Use a
feature test macro instead. The comment I made in r4161 regarding GCC vs.
Clang code was wrong. Now, Clang generates slightly faster code for these cases
(solid and masked 4-pixel wide vlines).

git-svn-id: https://svn.eduke32.com/eduke32@4182 1a8010ca-5511-0410-912e-c29ae57300e0
2013-11-28 21:18:07 +00:00

616 lines
15 KiB
C

// A.ASM replacement using C
// Mainly by Ken Silverman, with things melded with my port by
// Jonathon Fowler (jf@jonof.id.au)
//
// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
// Ken Silverman's official web site: "http://www.advsys.net/ken"
// See the included license file "BUILDLIC.TXT" for license info.
#include "a.h"
#ifdef ENGINE_USING_A_C
int32_t krecip(int32_t num); // from engine.c
#define BITSOFPRECISION 3
#define BITSOFPRECISIONPOW 8
// Compile code to saturate vplc for sprites to prevent stray lines at the
// bottom of non-y-flipped ones?
#define USE_SATURATE_VPLC
// Also for translucent masks?
//#define USE_SATURATE_VPLC_TRANS
extern intptr_t asm1, asm2, asm3, asm4;
extern int32_t fpuasm, globalx3, globaly3;
extern void *reciptable;
#ifdef USE_ASM64
# define A64_ASSIGN(var, val) var=val
#else
# define A64_ASSIGN(var, val)
#endif
#ifdef USE_ASM64
// variables for a64.yasm
int32_t a64_bpl, a64_transmode, a64_glogy;
intptr_t a64_paloffs;
char *a64_gtrans;
#endif
static int32_t bpl, transmode = 0;
static int32_t glogx, glogy, gbxinc, gbyinc, gpinc;
static char *gbuf, *gpal, *ghlinepal, *gtrans;
static char *gpal2;
//Global variable functions
void setvlinebpl(int32_t dabpl) { A64_ASSIGN(a64_bpl, dabpl); bpl = dabpl;}
void fixtransluscence(intptr_t datransoff)
{
A64_ASSIGN(a64_gtrans, (char *)datransoff);
gtrans = (char *)datransoff;
}
void settransnormal(void) { A64_ASSIGN(a64_transmode, 0); transmode = 0; }
void settransreverse(void) { A64_ASSIGN(a64_transmode, 1); transmode = 1; }
///// Ceiling/floor horizontal line functions /////
void sethlinesizes(int32_t logx, int32_t logy, intptr_t bufplc)
{ glogx = logx; glogy = logy; gbuf = (char *)bufplc; }
void setpalookupaddress(char *paladdr) { ghlinepal = paladdr; }
void setuphlineasm4(int32_t bxinc, int32_t byinc) { gbxinc = bxinc; gbyinc = byinc; }
void hlineasm4(int32_t cnt, int32_t skiploadincs, int32_t paloffs, uint32_t by, uint32_t bx, intptr_t p)
{
if (!skiploadincs) { gbxinc = asm1; gbyinc = asm2; }
{
const char *const palptr = &ghlinepal[paloffs];
const char *const buf = gbuf;
const int32_t bxinc = gbxinc, byinc = gbyinc;
const int32_t logx = glogx, logy = glogy;
char *pp = (char *)p;
for (; cnt>=0; cnt--)
{
*pp = palptr[buf[((bx>>(32-logx))<<logy)+(by>>(32-logy))]];
bx -= bxinc;
by -= byinc;
pp--;
}
}
}
///// Sloped ceiling/floor vertical line functions /////
void setupslopevlin(int32_t logylogx, intptr_t bufplc, int32_t pinc)
{
glogx = (logylogx&255); glogy = (logylogx>>8);
gbuf = (char *)bufplc; gpinc = pinc;
}
void slopevlin(intptr_t p, int32_t i, intptr_t slopaloffs, int32_t cnt, int32_t bx, int32_t by)
{
intptr_t *slopalptr;
int32_t bz, bzinc;
uint32_t u, v;
bz = asm3; bzinc = (asm1>>3);
slopalptr = (intptr_t *)slopaloffs;
for (; cnt>0; cnt--)
{
i = krecip(bz>>6); bz += bzinc;
u = bx+(int64_t)globalx3*i;
v = by+(int64_t)globaly3*i;
(*(char *)p) = *(char *)(((intptr_t)slopalptr[0])+gbuf[((u>>(32-glogx))<<glogy)+(v>>(32-glogy))]);
slopalptr--;
p += gpinc;
}
}
///// Wall,face sprite/wall sprite vertical line functions /////
extern int32_t globaltilesizy;
static inline uint32_t ourmulscale32(uint32_t a, uint32_t b)
{
return ((uint64_t)a*b)>>32;
}
static inline int32_t getpix(int32_t logy, const char *buf, uint32_t vplc)
{
if (logy != 0)
return buf[vplc>>logy];
else
return buf[ourmulscale32(vplc,globaltilesizy)];
}
void setupvlineasm(int32_t neglogy) { glogy = neglogy; }
// cnt+1 loop iterations!
int32_t vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{
const char *const buf = (char *)bufplc;
const char *const pal = (char *)paloffs;
const int32_t logy = glogy, ourbpl = bpl;
char *pp = (char *)p;
cnt++;
do
{
if (logy != 0)
*pp = pal[buf[vplc>>logy]];
else
*pp = pal[buf[ourmulscale32(vplc,globaltilesizy)]];
pp += ourbpl;
vplc += vinc;
}
while (--cnt);
return vplc;
}
extern intptr_t palookupoffse[4];
extern uint32_t vplce[4];
extern int32_t vince[4];
extern intptr_t bufplce[4];
#if !defined __has_extension
# define __has_extension(x) 0
#endif
#if (defined __GNUC__ && __GNUC_MINOR__ >= 7) || (defined __clang__ && __has_extension(attribute_ext_vector_type))
# if defined _WIN64 || defined __amd64 || defined __x86_64 || defined __x86_64__
# define USE_VECTOR_EXT
# endif
#endif
#ifdef USE_VECTOR_EXT
typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16)));
#endif
#ifdef USE_SATURATE_VPLC
# define saturate_vplc(vplc, vinc) vplc |= g_saturate & -(vplc < (uint32_t)vinc)
// NOTE: the vector types yield -1 for logical "true":
# define saturate_vplc_vec(vplc, vinc) vplc |= g_saturate & (vplc < vinc)
# ifdef USE_SATURATE_VPLC_TRANS
# define saturate_vplc_trans(vplc, vinc) saturate_vplc(vplc, vinc)
# else
# define saturate_vplc_trans(vplc, vinc)
# endif
#else
# define saturate_vplc(vplc, vinc)
# define saturate_vplc_vec(vplc, vinc)
# define saturate_vplc_trans(vplc, vinc)
#endif
// cnt >= 1
void vlineasm4(int32_t cnt, char *p)
{
char ch;
int32_t i;
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
#ifdef USE_VECTOR_EXT
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
#else
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
#endif
const int32_t logy = glogy, ourbpl = bpl;
do
{
for (i=0; i<4; i++)
{
ch = getpix(logy, buf[i], vplc[i]);
p[i] = pal[i][ch];
#if !defined USE_VECTOR_EXT
vplc[i] += vinc[i];
#endif
}
#ifdef USE_VECTOR_EXT
vplc += vinc;
#endif
p += ourbpl;
}
while (--cnt);
for (i=0; i<4; i++)
vplce[i] = vplc[i];
}
#ifdef USE_SATURATE_VPLC
static int32_t g_saturate; // -1 if saturating vplc is requested, 0 else
# define set_saturate(dosaturate) g_saturate = -!!dosaturate
#else
# define set_saturate(dosaturate) UNREFERENCED_PARAMETER(dosaturate)
#endif
void setupmvlineasm(int32_t neglogy, int32_t dosaturate)
{
glogy = neglogy;
set_saturate(dosaturate);
}
// cnt+1 loop iterations!
int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{
char ch;
const char *const buf = (char *)bufplc;
const char *const pal = (char *)paloffs;
const int32_t logy = glogy, ourbpl = bpl;
char *pp = (char *)p;
cnt++;
do
{
ch = getpix(logy, buf, vplc);
if (ch != 255) *pp = pal[ch];
pp += ourbpl;
vplc += vinc;
saturate_vplc(vplc, vinc);
}
while (--cnt);
return vplc;
}
// cnt >= 1
void mvlineasm4(int32_t cnt, char *p)
{
char ch;
int32_t i;
char *const pal[4] = {(char *)palookupoffse[0], (char *)palookupoffse[1], (char *)palookupoffse[2], (char *)palookupoffse[3]};
char *const buf[4] = {(char *)bufplce[0], (char *)bufplce[1], (char *)bufplce[2], (char *)bufplce[3]};
#ifdef USE_VECTOR_EXT
uint32_vec4 vinc = {vince[0], vince[1], vince[2], vince[3]};
uint32_vec4 vplc = {vplce[0], vplce[1], vplce[2], vplce[3]};
#else
const int32_t vinc[4] = {vince[0], vince[1], vince[2], vince[3]};
uint32_t vplc[4] = {vplce[0], vplce[1], vplce[2], vplce[3]};
#endif
const int32_t logy = glogy, ourbpl = bpl;
do
{
for (i=0; i<4; i++)
{
ch = getpix(logy, buf[i], vplc[i]);
if (ch != 255) p[i] = pal[i][ch];
#if !defined USE_VECTOR_EXT
vplc[i] += vinc[i];
saturate_vplc(vplc[i], vinc[i]);
#endif
}
#ifdef USE_VECTOR_EXT
vplc += vinc;
saturate_vplc_vec(vplc, vinc);
#endif
p += ourbpl;
}
while (--cnt);
for (i=0; i<4; i++)
vplce[i] = vplc[i];
}
#ifdef USE_ASM64
# define GLOGY a64_glogy
#else
# define GLOGY glogy
#endif
void setuptvlineasm(int32_t neglogy, int32_t dosaturate)
{
GLOGY = neglogy;
set_saturate(dosaturate);
}
#if !defined USE_ASM64
// cnt+1 loop iterations!
int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{
char ch;
const char *const buf = (char *)bufplc;
const char *const pal = (char *)paloffs;
const char *const trans = (char *)gtrans;
const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
char *pp = (char *)p;
cnt++;
if (transm)
{
do
{
ch = getpix(logy, buf, vplc);
if (ch != 255) *pp = trans[(*pp)|(pal[ch]<<8)];
pp += ourbpl;
vplc += vinc;
saturate_vplc_trans(vplc, vinc);
}
while (--cnt);
}
else
{
do
{
ch = getpix(logy, buf, vplc);
if (ch != 255) *pp = trans[((*pp)<<8)|pal[ch]];
pp += ourbpl;
vplc += vinc;
saturate_vplc_trans(vplc, vinc);
}
while (--cnt);
}
return vplc;
}
#endif
void setuptvlineasm2(int32_t neglogy, intptr_t paloffs1, intptr_t paloffs2)
{
GLOGY = neglogy;
A64_ASSIGN(a64_paloffs, paloffs1);
gpal = (char *)paloffs1;
gpal2 = (char *)paloffs2;
}
#if !defined USE_ASM64
// Pass: asm1=vinc2, asm2=pend
// Return: asm1=vplc1, asm2=vplc2
void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc2, uint32_t vplc1, intptr_t p)
{
char ch;
int32_t cnt = (asm2-p-1)/bpl; // >= 1
const int32_t vinc2 = asm1;
const char *const buf1 = (char *)bufplc1;
const char *const buf2 = (char *)bufplc2;
const int32_t logy = glogy, ourbpl = bpl, transm = transmode;
char *pp = (char *)p;
cnt++;
if (transm)
{
do
{
ch = getpix(logy, buf1, vplc1);
if (ch != 255) pp[0] = gtrans[pp[0]|(gpal[ch]<<8)];
vplc1 += vinc1;
saturate_vplc_trans(vplc1, vinc1);
ch = getpix(logy, buf2, vplc2);
if (ch != 255) pp[1] = gtrans[pp[1]|(gpal2[ch]<<8)];
vplc2 += vinc2;
saturate_vplc_trans(vplc2, vinc2);
pp += ourbpl;
}
while (--cnt > 0);
}
else
{
do
{
ch = getpix(logy, buf1, vplc1);
if (ch != 255) pp[0] = gtrans[(pp[0]<<8)|gpal[ch]];
vplc1 += vinc1;
saturate_vplc_trans(vplc1, vinc1);
ch = getpix(logy, buf2, vplc2);
if (ch != 255) pp[1] = gtrans[(pp[1]<<8)|gpal2[ch]];
vplc2 += vinc2;
saturate_vplc_trans(vplc2, vinc2);
pp += ourbpl;
}
while (--cnt);
}
asm1 = vplc1;
asm2 = vplc2;
}
#endif
//Floor sprite horizontal line functions
void msethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; }
// cntup16>>16 + 1 iterations
void mhline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p)
{
char ch;
const int32_t xinc = asm1, yinc = asm2;
UNREFERENCED_PARAMETER(junk);
gbuf = (char *)bufplc;
gpal = (char *)asm3;
cntup16>>=16;
cntup16++;
do
{
ch = gbuf[((bx>>(32-glogx))<<glogy)+(by>>(32-glogy))];
if (ch != 255) *((char *)p) = gpal[ch];
bx += xinc;
by += yinc;
p++;
}
while (--cntup16);
}
void tsethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; }
// cntup16>>16 + 1 iterations
void thline(intptr_t bufplc, uint32_t bx, int32_t cntup16, int32_t junk, uint32_t by, intptr_t p)
{
char ch;
const int32_t xinc = asm1, yinc = asm2;
UNREFERENCED_PARAMETER(junk);
gbuf = (char *)bufplc;
gpal = (char *)asm3;
cntup16>>=16;
cntup16++;
if (transmode)
{
do
{
ch = gbuf[((bx>>(32-glogx))<<glogy)+(by>>(32-glogy))];
if (ch != 255) *((char *)p) = gtrans[(*((char *)p))|(gpal[ch]<<8)];
bx += xinc;
by += yinc;
p++;
}
while (--cntup16);
}
else
{
do
{
ch = gbuf[((bx>>(32-glogx))<<glogy)+(by>>(32-glogy))];
if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<8)|gpal[ch]];
bx += xinc;
by += yinc;
p++;
}
while (--cntup16);
}
}
//Rotatesprite vertical line functions
void setupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz)
{
gpal = (char *)paloffs;
gbxinc = bxinc;
gbyinc = byinc;
glogy = ysiz;
}
void spritevline(int32_t bx, int32_t by, int32_t cnt, intptr_t bufplc, intptr_t p)
{
gbuf = (char *)bufplc;
for (; cnt>1; cnt--)
{
(*(char *)p) = gpal[gbuf[(bx>>16)*glogy+(by>>16)]];
bx += gbxinc;
by += gbyinc;
p += bpl;
}
}
//Rotatesprite vertical line functions
void msetupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz)
{
gpal = (char *)paloffs;
gbxinc = bxinc;
gbyinc = byinc;
glogy = ysiz;
}
void mspritevline(int32_t bx, int32_t by, int32_t cnt, intptr_t bufplc, intptr_t p)
{
char ch;
gbuf = (char *)bufplc;
for (; cnt>1; cnt--)
{
ch = gbuf[(bx>>16)*glogy+(by>>16)];
if (ch != 255)(*(char *)p) = gpal[ch];
bx += gbxinc;
by += gbyinc;
p += bpl;
}
}
void tsetupspritevline(intptr_t paloffs, int32_t bxinc, int32_t byinc, int32_t ysiz)
{
gpal = (char *)paloffs;
gbxinc = bxinc;
gbyinc = byinc;
glogy = ysiz;
}
void tspritevline(int32_t bx, int32_t by, int32_t cnt, intptr_t bufplc, intptr_t p)
{
char ch;
gbuf = (char *)bufplc;
if (transmode)
{
for (; cnt>1; cnt--)
{
ch = gbuf[(bx>>16)*glogy+(by>>16)];
if (ch != 255) *((char *)p) = gtrans[(*((char *)p))+(gpal[ch]<<8)];
bx += gbxinc;
by += gbyinc;
p += bpl;
}
}
else
{
for (; cnt>1; cnt--)
{
ch = gbuf[(bx>>16)*glogy+(by>>16)];
if (ch != 255) *((char *)p) = gtrans[((*((char *)p))<<8)+gpal[ch]];
bx += gbxinc;
by += gbyinc;
p += bpl;
}
}
}
void setupdrawslab(int32_t dabpl, intptr_t pal)
{ bpl = dabpl; gpal = (char *)pal; }
void drawslab(int32_t dx, int32_t v, int32_t dy, int32_t vi, intptr_t vptr, intptr_t p)
{
int32_t x;
while (dy > 0)
{
char c = gpal[(int32_t)(*(char *)((v>>16)+vptr))];
for (x=0; x < dx; x++)
((char*)p)[x] = c;
p += bpl;
v += vi;
dy--;
}
}
#if 0
void stretchhline(intptr_t p0, int32_t u, int32_t cnt, int32_t uinc, intptr_t rptr, intptr_t p)
{
p0 = p-(cnt<<2);
do
{
p--;
*(char *)p = *(char *)((u>>16)+rptr); u -= uinc;
}
while (p > p0);
}
#endif
void mmxoverlay() { }
#endif
/*
* vim:ts=4:
*/