a-c.c: prevent stray lines on bottom of sprites by saturating v. tex coords.

The functions mvlineasm1, mvlineasm4 and tvlineasm2 can now be set to clamp
the vertical texture coordinate (vplc), preventing the unsightly stray lines
on the bottom of non-y-flipped sprites. (The first part of this effort, r3483,
handled their top).

However, this is only enabled for the mvlineasm ones: the vectorized variants
suffered almost no slowdown (even though a PADDUSD SSE instruction would be a
nice thing to have), while it was pretty significant for the sequential
translucent ones.

Summarizing, this leaves two cases where stray lines may appear in the non-ASM
build (the saturation is NYI for a.nasm):
- at the bottom of y-flipped sprites
- at the bottom of translucent sprites (can be toggled by #define)

Another observation is that recent GCC generates much faster code for this
stuff than Clang from SVN.

git-svn-id: https://svn.eduke32.com/eduke32@4161 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
helixhorned 2013-11-16 18:47:11 +00:00
parent ca22bf8185
commit 3753d45601
4 changed files with 65 additions and 19 deletions

View file

@ -35,7 +35,7 @@ extern int32_t __cdecl fixtransluscence(int32_t);
extern int32_t __cdecl prevlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t __cdecl vlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t __cdecl vlineasm1nonpow2(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t __cdecl setuptvlineasm(int32_t);
extern int32_t __cdecl setuptvlineasm(int32_t,int32_t);
extern int32_t __cdecl tvlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t __cdecl tvlineasm1nonpow2(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t __cdecl setuptvlineasm2(int32_t,int32_t,int32_t);
@ -44,7 +44,7 @@ extern int32_t __cdecl mvlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_
extern int32_t __cdecl mvlineasm1nonpow2(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t __cdecl setupvlineasm(int32_t);
extern int32_t __cdecl vlineasm4(int32_t,char *);
extern int32_t __cdecl setupmvlineasm(int32_t);
extern int32_t __cdecl setupmvlineasm(int32_t,int32_t);
extern int32_t __cdecl mvlineasm4(int32_t,char *);
extern int32_t __cdecl setupspritevline(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t __cdecl spritevline(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
@ -93,7 +93,7 @@ extern int32_t _cdecl fixtransluscence(int32_t);
extern int32_t _cdecl prevlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t _cdecl vlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t _cdecl vlineasm1nonpow2(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t _cdecl setuptvlineasm(int32_t);
extern int32_t _cdecl setuptvlineasm(int32_t,int32_t);
extern int32_t _cdecl tvlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t _cdecl tvlineasm1nonpow2(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t _cdecl setuptvlineasm2(int32_t,int32_t,int32_t);
@ -102,7 +102,7 @@ extern int32_t _cdecl mvlineasm1(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t
extern int32_t _cdecl mvlineasm1nonpow2(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t _cdecl setupvlineasm(int32_t);
extern int32_t _cdecl vlineasm4(int32_t,char *);
extern int32_t _cdecl setupmvlineasm(int32_t);
extern int32_t _cdecl setupmvlineasm(int32_t,int32_t);
extern int32_t _cdecl mvlineasm4(int32_t,char *);
extern int32_t _cdecl setupspritevline(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
extern int32_t _cdecl spritevline(int32_t,int32_t,int32_t,int32_t,int32_t,int32_t);
@ -152,11 +152,11 @@ void setupvlineasm(int32_t neglogy);
int32_t vlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p);
void vlineasm4(int32_t cnt, char *p);
void setupmvlineasm(int32_t neglogy);
void setupmvlineasm(int32_t neglogy, int32_t dosaturate);
int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p);
void mvlineasm4(int32_t cnt, char *p);
void setuptvlineasm(int32_t neglogy);
void setuptvlineasm(int32_t neglogy, int32_t dosaturate);
int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p);
void setuptvlineasm2(int32_t neglogy, intptr_t paloffs1, intptr_t paloffs2);

View file

@ -15,6 +15,12 @@ int32_t krecip(int32_t num); // from engine.c
#define BITSOFPRECISION 3
#define BITSOFPRECISIONPOW 8
// Compile code to saturate vplc for sprites to prevent stray lines at the
// bottom of non-y-flipped ones?
#define USE_SATURATE_VPLC
// Also for translucent masks?
//#define USE_SATURATE_VPLC_TRANS
extern intptr_t asm1, asm2, asm3, asm4;
extern int32_t fpuasm, globalx3, globaly3;
extern void *reciptable;
@ -161,6 +167,21 @@ extern intptr_t bufplce[4];
typedef uint32_t uint32_vec4 __attribute__ ((vector_size (16)));
#endif
#ifdef USE_SATURATE_VPLC
# define saturate_vplc(vplc, vinc) vplc |= g_saturate & -(vplc < (uint32_t)vinc)
// NOTE: the vector types yield -1 for logical "true":
# define saturate_vplc_vec(vplc, vinc) vplc |= g_saturate & (vplc < vinc)
# ifdef USE_SATURATE_VPLC_TRANS
# define saturate_vplc_trans(vplc, vinc) saturate_vplc(vplc, vinc)
# else
# define saturate_vplc_trans(vplc, vinc)
# endif
#else
# define saturate_vplc(vplc, vinc)
# define saturate_vplc_vec(vplc, vinc)
# define saturate_vplc_trans(vplc, vinc)
#endif
// cnt >= 1
void vlineasm4(int32_t cnt, char *p)
{
@ -199,7 +220,19 @@ void vlineasm4(int32_t cnt, char *p)
vplce[i] = vplc[i];
}
void setupmvlineasm(int32_t neglogy) { glogy = neglogy; }
#ifdef USE_SATURATE_VPLC
static int32_t g_saturate; // -1 if saturating vplc is requested, 0 else
# define set_saturate(dosaturate) g_saturate = -!!dosaturate
#else
# define set_saturate(dosaturate) UNREFERENCED_PARAMETER(dosaturate)
#endif
void setupmvlineasm(int32_t neglogy, int32_t dosaturate)
{
glogy = neglogy;
set_saturate(dosaturate);
}
// cnt+1 loop iterations!
int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
{
@ -218,6 +251,7 @@ int32_t mvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
if (ch != 255) *pp = pal[ch];
pp += ourbpl;
vplc += vinc;
saturate_vplc(vplc, vinc);
}
while (--cnt);
@ -249,10 +283,12 @@ void mvlineasm4(int32_t cnt, char *p)
if (ch != 255) p[i] = pal[i][ch];
#if !defined USE_VECTOR_EXT
vplc[i] += vinc[i];
saturate_vplc(vplc[i], vinc[i]);
#endif
}
#ifdef USE_VECTOR_EXT
vplc += vinc;
saturate_vplc_vec(vplc, vinc);
#endif
p += ourbpl;
}
@ -268,9 +304,10 @@ void mvlineasm4(int32_t cnt, char *p)
# define GLOGY glogy
#endif
void setuptvlineasm(int32_t neglogy)
void setuptvlineasm(int32_t neglogy, int32_t dosaturate)
{
GLOGY = neglogy;
set_saturate(dosaturate);
}
#if !defined USE_ASM64
@ -295,6 +332,7 @@ int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
if (ch != 255) *pp = trans[(*pp)|(pal[ch]<<8)];
pp += ourbpl;
vplc += vinc;
saturate_vplc_trans(vplc, vinc);
}
while (--cnt);
}
@ -306,6 +344,7 @@ int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
if (ch != 255) *pp = trans[((*pp)<<8)|pal[ch]];
pp += ourbpl;
vplc += vinc;
saturate_vplc_trans(vplc, vinc);
}
while (--cnt);
}
@ -347,10 +386,12 @@ void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc
ch = getpix(logy, buf1, vplc1);
if (ch != 255) pp[0] = gtrans[pp[0]|(gpal[ch]<<8)];
vplc1 += vinc1;
saturate_vplc_trans(vplc1, vinc1);
ch = getpix(logy, buf2, vplc2);
if (ch != 255) pp[1] = gtrans[pp[1]|(gpal2[ch]<<8)];
vplc2 += vinc2;
saturate_vplc_trans(vplc2, vinc2);
pp += ourbpl;
}
@ -363,10 +404,12 @@ void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc
ch = getpix(logy, buf1, vplc1);
if (ch != 255) pp[0] = gtrans[(pp[0]<<8)|gpal[ch]];
vplc1 += vinc1;
saturate_vplc_trans(vplc1, vinc1);
ch = getpix(logy, buf2, vplc2);
if (ch != 255) pp[1] = gtrans[(pp[1]<<8)|gpal2[ch]];
vplc2 += vinc2;
saturate_vplc_trans(vplc2, vinc2);
pp += ourbpl;
}

View file

@ -656,6 +656,8 @@ CDECLENDSET 1
ALIGN 16
setupmvlineasm:
;; NOTE: We actually receive two args (second one: saturate vplc?), but the
;; second one is currently unused here. See the others: UNUSED_DO_SATURATE.
CDECLBEGINSET 1
mov byte [maskmach3a+2], al
mov byte [machmv13+2], al
@ -668,6 +670,7 @@ CDECLENDSET 1
ALIGN 16
setuptvlineasm:
;; UNUSED_DO_SATURATE
CDECLBEGINSET 1
mov byte [transmach3a+2], al
CDECLENDSET 1

View file

@ -2790,7 +2790,7 @@ static WSHELPER_DECL void calc_vplcinc(uint32_t *vplc, int32_t *vinc, const int3
//
// maskwallscan (internal)
//
static void maskwallscan(int32_t x1, int32_t x2)
static void maskwallscan(int32_t x1, int32_t x2, int32_t saturatevplc)
{
int32_t x;
intptr_t p, fpalookup;
@ -2818,7 +2818,7 @@ static void maskwallscan(int32_t x1, int32_t x2)
fpalookup = FP_OFF(palookup[globalpal]);
setupmvlineasm(globalshiftval);
setupmvlineasm(globalshiftval, saturatevplc);
x = x1;
@ -4021,7 +4021,7 @@ static void transmaskvline2(int32_t x)
//
// transmaskwallscan (internal)
//
static void transmaskwallscan(int32_t x1, int32_t x2)
static void transmaskwallscan(int32_t x1, int32_t x2, int32_t saturatevplc)
{
int32_t x;
@ -4034,7 +4034,7 @@ static void transmaskwallscan(int32_t x1, int32_t x2)
if (waloff[globalpicnum] == 0) loadtile(globalpicnum);
setuptvlineasm(globalshiftval);
setuptvlineasm(globalshiftval, saturatevplc);
x = x1;
while ((startumost[x+windowx1] > startdmost[x+windowx1]) && (x <= x2)) x++;
@ -5808,9 +5808,9 @@ draw_as_face_sprite:
drawing_sprite = 1;
if ((cstat&2) == 0)
maskwallscan(lx,rx);
maskwallscan(lx,rx, (cstat&8)==0);
else
transmaskwallscan(lx,rx);
transmaskwallscan(lx,rx, (cstat&8)==0);
drawing_sprite = 0;
}
@ -6070,9 +6070,9 @@ draw_as_face_sprite:
drawing_sprite = 1;
if ((cstat&2) == 0)
maskwallscan(sx1,sx2);
maskwallscan(sx1,sx2, (cstat&8)==0);
else
transmaskwallscan(sx1,sx2);
transmaskwallscan(sx1,sx2, (cstat&8)==0);
drawing_sprite = 0;
}
@ -6653,7 +6653,7 @@ static void drawmaskwall(int16_t damaskwallcnt)
if ((globalorientation&128) == 0)
{
maskwallscan(xb1[z],xb2[z]);
maskwallscan(xb1[z],xb2[z], 0);
}
else
{
@ -6662,7 +6662,7 @@ static void drawmaskwall(int16_t damaskwallcnt)
if (globalorientation&512) settransreverse(); else settransnormal();
}
transmaskwallscan(xb1[z],xb2[z]);
transmaskwallscan(xb1[z],xb2[z], 0);
}
}
@ -7417,7 +7417,7 @@ static void dorotatesprite(int32_t sx, int32_t sy, int32_t z, int16_t a, int16_t
if (dastat&64)
setupvlineasm(24L);
else
setupmvlineasm(24L);
setupmvlineasm(24L, 0);
by <<= 8; yv <<= 8; yv2 <<= 8;