Classic: fix drawing tilesizy=512 walls, use non-pow2 routines with pskies.

The former is really only a workaround. Walls/vertical sprites/pskies with
ysize 512 (and presumably greater, but this was not tested) are rendered
with one shade higher at the borders (1 pixel vlines) because of a certain
assumption in the ASM (see comments there). With very dark shades, the
palookup[] buffer is accessed oob. We simply allocate 256 bytes more at the
end for each.

The latter is only for CLASSIC_NONPOW2_YSIZE_WALLS builds, which is not enabled
yet. It seems to matter only for the uncommon case where the such pskies repeat
in the height. A 1680x1050 window fully covered with such a sky is then rendered
at about 60/85 the FPS for me (mostly due to not using the 4 pixel vline
routines), so it may be leaning a bit too much on the side of correctness.

A compilation switch DEBUG_TILESIZY_512 is introduced in engine.c for
demonstration purposes.

git-svn-id: https://svn.eduke32.com/eduke32@3310 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
helixhorned 2012-12-23 13:59:52 +00:00
parent 0805060c74
commit 9dc1d212a7
2 changed files with 85 additions and 27 deletions

View file

@ -699,8 +699,12 @@ vlineasm1_nosetup:
fixchain1a: sub edi, 320 fixchain1a: sub edi, 320
beginvline: beginvline:
mov ebx, edx mov ebx, edx
; Here, the right shift value is supposed to be >= 24,
; translating into tiles with y sizes <= 256 ...
; (ebx is what is called "vplc" in the C replacement code.)
mach3a: shr ebx, 32 mach3a: shr ebx, 32
fixchain1b: add edi, 320 fixchain1b: add edi, 320
; ... so that the upper 24 bits of ebx are clear here:
mov bl, byte [esi+ebx] mov bl, byte [esi+ebx]
add edx, eax add edx, eax
dec ecx dec ecx

View file

@ -47,6 +47,8 @@
//#define CLASSIC_NONPOW2_YSIZE_WALLS //#define CLASSIC_NONPOW2_YSIZE_WALLS
#define CLASSIC_NONPOW2_YSIZE_SPRITES #define CLASSIC_NONPOW2_YSIZE_SPRITES
//#define DEBUG_TILESIZY_512
#if !defined DEBUG_MAIN_ARRAYS #if !defined DEBUG_MAIN_ARRAYS
const int32_t engine_main_arrays_are_static = 0; // for Lunatic const int32_t engine_main_arrays_are_static = 0; // for Lunatic
#else #else
@ -4382,6 +4384,8 @@ static void parascan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat, i
static const int16_t zeropskyoff[MAXPSKYTILES] = { 0 }; static const int16_t zeropskyoff[MAXPSKYTILES] = { 0 };
const int16_t *dapskyoff; const int16_t *dapskyoff;
int32_t logtilesizy, tsizy;
UNREFERENCED_PARAMETER(dax1); UNREFERENCED_PARAMETER(dax1);
UNREFERENCED_PARAMETER(dax2); UNREFERENCED_PARAMETER(dax2);
@ -4390,7 +4394,8 @@ static void parascan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat, i
globalhorizbak = globalhoriz; globalhorizbak = globalhoriz;
globvis = globalpisibility; globvis = globalpisibility;
//globalorientation = 0L; //globalorientation = 0L;
if (sec->visibility != 0) globvis = mulscale4(globvis, (uint8_t)(sec->visibility+16)); if (sec->visibility != 0)
globvis = mulscale4(globvis, (uint8_t)(sec->visibility+16));
if (dastat == 0) if (dastat == 0)
{ {
@ -4415,11 +4420,32 @@ static void parascan(int32_t dax1, int32_t dax2, int32_t sectnum, char dastat, i
if ((unsigned)globalpicnum >= MAXTILES) globalpicnum = 0; if ((unsigned)globalpicnum >= MAXTILES) globalpicnum = 0;
DO_TILE_ANIM(globalpicnum, sectnum); DO_TILE_ANIM(globalpicnum, sectnum);
globalshiftval = (picsiz[globalpicnum]>>4);
if (pow2long[globalshiftval] != tilesizy[globalpicnum]) globalshiftval++; logtilesizy = (picsiz[globalpicnum]>>4);
globalshiftval = 32-globalshiftval; tsizy = tilesizy[globalpicnum];
globalzd = (((tilesizy[globalpicnum]>>1)+parallaxyoffs)<<globalshiftval)+((uint32_t)globalypanning<<24);
globalyscale = (8<<(globalshiftval-19)); globalshiftval = logtilesizy;
#if !defined CLASSIC_NONPOW2_YSIZE_WALLS
// before proper non-power-of-two tilesizy drawing
if (pow2long[logtilesizy] != tsizy)
globalshiftval++;
#else
// non power-of-two y size textures!
if (pow2long[logtilesizy] != tsizy || tsizy >= 512)
{
globaltilesizy = tsizy;
globalyscale = 65536 / tsizy;
globalshiftval = 0;
globalzd = divscale32(((tsizy>>1)+parallaxyoffs), tsizy) + ((uint32_t)globalypanning<<24);
}
else
#endif
{
globalshiftval = 32-globalshiftval;
globalyscale = (8<<(globalshiftval-19));
globalzd = (((tsizy>>1)+parallaxyoffs)<<globalshiftval) + ((uint32_t)globalypanning<<24);
}
//if (globalorientation&256) globalyscale = -globalyscale, globalzd = -globalzd; //if (globalorientation&256) globalyscale = -globalyscale, globalzd = -globalzd;
dapskyoff = zeropskyoff; dapskyoff = zeropskyoff;
@ -4582,11 +4608,17 @@ static void setup_globals_wall2(const walltype *wal, uint8_t secvisibility, int3
globalshiftval = logtilesizy; globalshiftval = logtilesizy;
#if !defined CLASSIC_NONPOW2_YSIZE_WALLS #if !defined CLASSIC_NONPOW2_YSIZE_WALLS
// before proper non-power-of-two tilesizy drawing // before proper non-power-of-two tilesizy drawing
if (pow2long[logtilesizy] != tilesizy[globalpicnum]) if (pow2long[logtilesizy] != tsizy)
globalshiftval++; globalshiftval++;
#else #else
// non power-of-two y size textures! // non power-of-two y size textures!
if (pow2long[logtilesizy] == tsizy) if (pow2long[logtilesizy] != tsizy || tsizy >= 512)
{
globaltilesizy = tsizy;
globalyscale = divscale13(wal->yrepeat, tsizy);
globalshiftval = 0;
}
else
#endif #endif
{ {
// globalshiftval==13 --> globalshiftval==19 // globalshiftval==13 --> globalshiftval==19
@ -4594,14 +4626,6 @@ static void setup_globals_wall2(const walltype *wal, uint8_t secvisibility, int3
globalshiftval = 32-globalshiftval; globalshiftval = 32-globalshiftval;
globalyscale = wal->yrepeat<<(globalshiftval-19); globalyscale = wal->yrepeat<<(globalshiftval-19);
} }
#if defined CLASSIC_NONPOW2_YSIZE_WALLS
else
{
globaltilesizy = tsizy;
globalyscale = divscale13(wal->yrepeat, tsizy);
globalshiftval = 0;
}
#endif
if ((globalorientation&4) == 0) if ((globalorientation&4) == 0)
globalzd = (((int64_t)(globalposz-topzref)*globalyscale)<<8); globalzd = (((int64_t)(globalposz-topzref)*globalyscale)<<8);
@ -5441,20 +5465,18 @@ static void setup_globals_sprite1(const spritetype *tspr, const sectortype *sec,
globalshiftval++; globalshiftval++;
#else #else
// non power-of-two y size textures! // non power-of-two y size textures!
if (pow2long[logtilesizy] == tsizy) if (pow2long[logtilesizy] != tsizy || tsizy >= 512)
#endif
{
globalshiftval = 32-globalshiftval;
globalyscale = divscale(512,tspr->yrepeat,globalshiftval-19);
}
#if defined CLASSIC_NONPOW2_YSIZE_SPRITES
else
{ {
globaltilesizy = tsizy; globaltilesizy = tsizy;
globalyscale = (1<<22)/(tsizy*tspr->yrepeat); globalyscale = (1<<22)/(tsizy*tspr->yrepeat);
globalshiftval = 0; globalshiftval = 0;
} }
else
#endif #endif
{
globalshiftval = 32-globalshiftval;
globalyscale = divscale(512,tspr->yrepeat,globalshiftval-19);
}
globalzd = ((int64_t)(globalposz-z1)*globalyscale)<<8; globalzd = ((int64_t)(globalposz-z1)*globalyscale)<<8;
if ((cstat&8) > 0) if ((cstat&8) > 0)
@ -7695,6 +7717,18 @@ static void initfastcolorlookup(int32_t rscale, int32_t gscale, int32_t bscale)
} }
static void alloc_palookup(int32_t pal)
{
#if defined ENGINE_USING_A_C || (defined CLASSIC_NONPOW2_YSIZE_WALLS && defined CLASSIC_NONPOW2_YSIZE_SPRITES)
palookup[pal] = (char *)Bmalloc(numshades*256);
#else
// The asm functions vlineasm1, mvlineasm1 (maybe others?) access the next
// palookup[...] shade entry for tilesizy==512 tiles.
// See DEBUG_TILESIZY_512 and the comment in a.nasm: vlineasm1.
palookup[pal] = (char *)Bcalloc(numshades+1, 256);
#endif
}
// //
// loadpalette (internal) // loadpalette (internal)
// //
@ -7713,7 +7747,8 @@ static int32_t loadpalette(void)
kread(fil,palette,768); kread(fil,palette,768);
kread(fil,&numshades,2); numshades = B_LITTLE16(numshades); kread(fil,&numshades,2); numshades = B_LITTLE16(numshades);
palookup[0] = (char *)Bmalloc(numshades<<8); alloc_palookup(0);
transluc = (char *)Bmalloc(65536); transluc = (char *)Bmalloc(65536);
if (palookup[0] == NULL || transluc == NULL) if (palookup[0] == NULL || transluc == NULL)
exit(1); exit(1);
@ -7728,6 +7763,15 @@ static int32_t loadpalette(void)
kclose(fil); kclose(fil);
#ifdef DEBUG_TILESIZY_512
{
int32_t i;
// Bump shade 1 by 16.
for (i=256; i<512; i++)
palookup[0][i] = palookup[0][i+(16<<8)];
}
#endif
if (crc32once((uint8_t *)transluc, 65536)==0x94a1fac6) // Duke3D 1.5 GRP if (crc32once((uint8_t *)transluc, 65536)==0x94a1fac6) // Duke3D 1.5 GRP
{ {
int32_t i; int32_t i;
@ -10642,6 +10686,7 @@ void loadtile(int16_t tilenume)
waloff[tilenume] = (intptr_t)(artptrs[i]) + tilefileoffs[tilenume]; waloff[tilenume] = (intptr_t)(artptrs[i]) + tilefileoffs[tilenume];
faketimerhandler(); faketimerhandler();
// OSD_Printf("loaded tile %d from zip\n", tilenume); // OSD_Printf("loaded tile %d from zip\n", tilenume);
return; return;
} }
#endif #endif
@ -10698,6 +10743,16 @@ void loadtile(int16_t tilenume)
kread(artfil, (char *)waloff[tilenume], dasiz); kread(artfil, (char *)waloff[tilenume], dasiz);
faketimerhandler(); faketimerhandler();
artfilplc = tilefileoffs[tilenume]+dasiz; artfilplc = tilefileoffs[tilenume]+dasiz;
#ifdef DEBUG_TILESIZY_512
if (tilesizy[tilenume] >= 512)
{
int32_t i;
char *p = (char *)waloff[tilenume];
for (i=0; i<tilesizx[tilenume]*tilesizy[tilenume]; i++)
p[i] = i;
}
#endif
} }
// //
@ -13680,8 +13735,7 @@ void makepalookup(int32_t palnum, const char *remapbuf, int8_t r, int8_t g, int8
if (palookup[palnum] == NULL || (palnum!=0 && palookup[palnum] == palookup[0])) if (palookup[palnum] == NULL || (palnum!=0 && palookup[palnum] == palookup[0]))
{ {
//Allocate palookup buffer alloc_palookup(palnum);
palookup[palnum] = (char *)Bmalloc(numshades<<8);
if (palookup[palnum] == NULL) if (palookup[palnum] == NULL)
exit(1); exit(1);
} }