quakeforge/libs/video/renderer/sw32/d_scan.c
Bill Currie e167300a84 Fix a segfault in the sw32 renderer on entering e1m7
While this particular tigger of the real bug was caused by 659d95221e
(hopefully fix both the "get stuck waiting for 3d" bug and the null
worldmode bug.), the real bug was lurking in the code since the dawn of
time (from sw32's perspective). This fix is as per LordHavoc's suggestion
(heh, despite the years, he knows his code), but I spent the time hunting
down the trigger to understand just what was going on.

It turns out that (0,0,0) is too close to a wall (probably on, but the
slight default offset is too close) and the above commit changed the first
rendered frame to be before the player origin was set rather than after.
This fix feels correct to me because noclipping around with the sw32
renderer would probably hit the same bug with a bit of bad luck. Thus
ensure the index resulting from zi never exceeds 65535.
2012-04-24 22:02:20 +09:00

884 lines
25 KiB
C

/*
d_scan.c
Portable C scan-level rasterization code, all pixel depths.
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#define NH_DEFINE
#include "namehack.h"
#include "QF/qendian.h"
#include "QF/render.h"
#include "QF/sys.h"
#include "compat.h"
#include "d_local.h"
#include "r_internal.h"
#include "vid_internal.h"
static byte *r_turb_pbase;
static void *r_turb_pdest;
static fixed16_t r_turb_s, r_turb_t, r_turb_sstep, r_turb_tstep;
static int *r_turb_turb;
static int r_turb_spancount;
/*
D_WarpScreen
this performs a slight compression of the screen at the same time as
the sine warp, to keep the edges from wrapping
*/
void
sw32_D_WarpScreen (void)
{
switch(sw32_r_pixbytes) {
case 1:
{
int w, h;
int u, v;
byte *dest;
int *turb;
int *col;
byte **row;
byte *rowptr[MAXHEIGHT];
int column[MAXWIDTH];
float wratio, hratio;
w = r_refdef.vrect.width;
h = r_refdef.vrect.height;
wratio = w / (float) scr_vrect.width;
hratio = h / (float) scr_vrect.height;
for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
rowptr[v] = (byte *) sw32_d_viewbuffer + (r_refdef.vrect.y *
sw32_screenwidth) +
(sw32_screenwidth * (int) ((float) v * hratio * h /
(h + AMP2 * 2)));
}
for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
column[u] = r_refdef.vrect.x +
(int) ((float) u * wratio * w / (w + AMP2 * 2));
}
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
dest = (byte *)vid.buffer + scr_vrect.y * vid.rowbytes +
scr_vrect.x;
for (v = 0; v < scr_vrect.height; v++, dest += vid.rowbytes) {
col = &column[turb[v]];
row = &rowptr[v];
for (u = 0; u < scr_vrect.width; u += 4) {
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
}
}
}
break;
case 2:
{
int w, h;
int u, v;
short *dest;
int *turb;
int *col;
short **row;
short *rowptr[MAXHEIGHT];
int column[MAXWIDTH];
float wratio, hratio;
w = r_refdef.vrect.width;
h = r_refdef.vrect.height;
wratio = w / (float) scr_vrect.width;
hratio = h / (float) scr_vrect.height;
for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
rowptr[v] = (short *) sw32_d_viewbuffer +
(r_refdef.vrect.y * sw32_screenwidth) +
(sw32_screenwidth * (int) ((float) v * hratio * h /
(h + AMP2 * 2)));
}
for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
column[u] = r_refdef.vrect.x +
(int) ((float) u * wratio * w / (w + AMP2 * 2));
}
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
dest = (short *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 1) +
scr_vrect.x;
for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 1)) {
col = &column[turb[v]];
row = &rowptr[v];
for (u = 0; u < scr_vrect.width; u += 4) {
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
}
}
}
break;
case 4:
{
int w, h;
int u, v;
int *dest;
int *turb;
int *col;
int **row;
int *rowptr[MAXHEIGHT];
int column[MAXWIDTH];
float wratio, hratio;
w = r_refdef.vrect.width;
h = r_refdef.vrect.height;
wratio = w / (float) scr_vrect.width;
hratio = h / (float) scr_vrect.height;
for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
rowptr[v] = (int *) sw32_d_viewbuffer +
(r_refdef.vrect.y * sw32_screenwidth) +
(sw32_screenwidth * (int) ((float) v * hratio * h /
(h + AMP2 * 2)));
}
for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
column[u] = r_refdef.vrect.x +
(int) ((float) u * wratio * w / (w + AMP2 * 2));
}
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
dest = (int *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 2) +
scr_vrect.x;
for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 2)) {
col = &column[turb[v]];
row = &rowptr[v];
for (u = 0; u < scr_vrect.width; u += 4) {
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
}
}
}
break;
default:
Sys_Error("D_WarpScreen: unsupported r_pixbytes %i", sw32_r_pixbytes);
}
}
static void
D_DrawTurbulentSpan (void)
{
int sturb, tturb;
switch (sw32_r_pixbytes) {
case 1:
{
byte *pdest = (byte *) r_turb_pdest;
do {
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
(CYCLE - 1)]) >> 16) & 63;
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
(CYCLE - 1)]) >> 16) & 63;
*pdest++ = r_turb_pbase[(tturb << 6) + sturb];
r_turb_s += r_turb_sstep;
r_turb_t += r_turb_tstep;
} while (--r_turb_spancount > 0);
r_turb_pdest = (byte *)pdest;
}
break;
case 2:
{
short *pdest = (short *) r_turb_pdest;
do {
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
(CYCLE - 1)]) >> 16) & 63;
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
(CYCLE - 1)]) >> 16) & 63;
*pdest++ = sw32_8to16table[r_turb_pbase[(tturb << 6) + sturb]];
r_turb_s += r_turb_sstep;
r_turb_t += r_turb_tstep;
} while (--r_turb_spancount > 0);
r_turb_pdest = (byte *)pdest;
}
break;
case 4:
{
int *pdest = (int *) r_turb_pdest;
do {
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
(CYCLE - 1)]) >> 16) & 63;
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
(CYCLE - 1)]) >> 16) & 63;
*pdest++ = d_8to24table[r_turb_pbase[(tturb << 6) + sturb]];
r_turb_s += r_turb_sstep;
r_turb_t += r_turb_tstep;
} while (--r_turb_spancount > 0);
r_turb_pdest = (byte *)pdest;
}
break;
default:
Sys_Error("D_DrawTurbulentSpan: unsupported r_pixbytes %i",
sw32_r_pixbytes);
}
}
void
sw32_Turbulent (espan_t *pspan)
{
int count;
fixed16_t snext, tnext;
float sdivz, tdivz, zi, z, du, dv, spancountminus1;
float sdivz16stepu, tdivz16stepu, zi16stepu;
r_turb_turb = sw32_sintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
r_turb_sstep = 0; // keep compiler happy
r_turb_tstep = 0; // ditto
r_turb_pbase = (byte *) sw32_cacheblock;
sdivz16stepu = sw32_d_sdivzstepu * 16;
tdivz16stepu = sw32_d_tdivzstepu * 16;
zi16stepu = d_zistepu * 16 * 65536;
do {
r_turb_pdest = (byte *) sw32_d_viewbuffer + ((sw32_screenwidth * pspan->v) +
pspan->u) * sw32_r_pixbytes;
count = pspan->count;
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
du = (float) pspan->u;
dv = (float) pspan->v;
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
z = sw32_d_zitable[(unsigned short) zi];
r_turb_s = (int) (sdivz * z) + sw32_sadjust;
if (r_turb_s > sw32_bbextents)
r_turb_s = sw32_bbextents;
else if (r_turb_s < 0)
r_turb_s = 0;
r_turb_t = (int) (tdivz * z) + sw32_tadjust;
if (r_turb_t > sw32_bbextentt)
r_turb_t = sw32_bbextentt;
else if (r_turb_t < 0)
r_turb_t = 0;
do {
// calculate s and t at the far end of the span
if (count >= 16)
r_turb_spancount = 16;
else
r_turb_spancount = count;
count -= r_turb_spancount;
if (count) {
// calculate s/z, t/z, zi->fixed s and t at far end of span,
// calculate s and t steps across span by shifting
sdivz += sdivz16stepu;
tdivz += tdivz16stepu;
zi += zi16stepu;
z = sw32_d_zitable[(unsigned short) zi];
snext = (int) (sdivz * z) + sw32_sadjust;
if (snext > sw32_bbextents)
snext = sw32_bbextents;
else if (snext < 16)
snext = 16; // prevent round-off error on <0
// steps from
// from causing overstepping & running off the
// edge of the texture
tnext = (int) (tdivz * z) + sw32_tadjust;
if (tnext > sw32_bbextentt)
tnext = sw32_bbextentt;
else if (tnext < 16)
tnext = 16; // guard against round-off error on
// <0 steps
r_turb_sstep = (snext - r_turb_s) >> 4;
r_turb_tstep = (tnext - r_turb_t) >> 4;
} else {
// calculate s/z, t/z, zi->fixed s and t at last pixel in
// span (so can't step off polygon), clamp, calculate s and t
// steps across span by division, biasing steps low so we
// don't run off the texture
spancountminus1 = (float) (r_turb_spancount - 1);
sdivz += sw32_d_sdivzstepu * spancountminus1;
tdivz += sw32_d_tdivzstepu * spancountminus1;
zi += d_zistepu * 65536.0f * spancountminus1;
z = sw32_d_zitable[(unsigned short) zi];
snext = (int) (sdivz * z) + sw32_sadjust;
if (snext > sw32_bbextents)
snext = sw32_bbextents;
else if (snext < 16)
snext = 16; // prevent round-off error on <0 steps
// from causing overstepping & running
// off the edge of the texture
tnext = (int) (tdivz * z) + sw32_tadjust;
if (tnext > sw32_bbextentt)
tnext = sw32_bbextentt;
else if (tnext < 16)
tnext = 16; // guard against round-off error on
// <0 steps
if (r_turb_spancount > 1) {
r_turb_sstep = (snext - r_turb_s) / (r_turb_spancount - 1);
r_turb_tstep = (tnext - r_turb_t) / (r_turb_spancount - 1);
}
}
r_turb_s = r_turb_s & ((CYCLE << 16) - 1);
r_turb_t = r_turb_t & ((CYCLE << 16) - 1);
D_DrawTurbulentSpan ();
r_turb_s = snext;
r_turb_t = tnext;
} while (count > 0);
} while ((pspan = pspan->pnext) != NULL);
}
void
sw32_D_DrawSpans (espan_t *pspan)
{
switch(sw32_r_pixbytes) {
case 1:
{
byte *pbase = (byte *) sw32_cacheblock, *pdest;
int count;
fixed16_t s, t, snext, tnext, sstep, tstep;
float sdivz, tdivz, zi, z, du, dv;
float sdivz8stepu, tdivz8stepu, zi8stepu;
sstep = 0; // keep compiler happy
tstep = 0; // ditto
sdivz8stepu = sw32_d_sdivzstepu * 8;
tdivz8stepu = sw32_d_tdivzstepu * 8;
zi8stepu = d_zistepu * 8 * 65536;
do {
pdest = (byte *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
pspan->u;
count = pspan->count;
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
du = (float) pspan->u;
dv = (float) pspan->v;
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
z = sw32_d_zitable[(unsigned short) zi];
s = (int) (sdivz * z) + sw32_sadjust;
s = bound(0, s, sw32_bbextents);
t = (int) (tdivz * z) + sw32_tadjust;
t = bound(0, t, sw32_bbextentt);
while(count >= 8) {
count -= 8;
// calculate s/z, t/z, zi->fixed s and t at far end of span,
// calculate s and t steps across span by shifting
sdivz += sdivz8stepu;
tdivz += tdivz8stepu;
zi += zi8stepu;
z = sw32_d_zitable[(unsigned short) zi];
// prevent round-off error on <0 steps from from causing
// overstepping & running off the edge of the texture
snext = (int) (sdivz * z) + sw32_sadjust;
snext = bound(8, snext, sw32_bbextents);
tnext = (int) (tdivz * z) + sw32_tadjust;
tnext = bound(8, tnext, sw32_bbextentt);
sstep = (snext - s) >> 3;
tstep = (tnext - t) >> 3;
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s = snext;
t = tnext;
pdest += 8;
}
if (count)
{
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
// (so can't step off polygon), clamp, calculate s and t steps
// across span by division, biasing steps low so we don't run
// off the texture
//countminus1 = (float) (count - 1);
sdivz += sw32_d_sdivzstepu * count; //minus1;
tdivz += sw32_d_tdivzstepu * count; //minus1;
zi += d_zistepu * 65536.0f * count; //minus1;
z = sw32_d_zitable[(unsigned short) zi];
// prevent round-off error on <0 steps from from causing
// overstepping & running off the edge of the texture
snext = (int) (sdivz * z) + sw32_sadjust;
snext = bound(count, snext, sw32_bbextents);
tnext = (int) (tdivz * z) + sw32_tadjust;
tnext = bound(count, tnext, sw32_bbextentt);
if (count > 1) {
sstep = (snext - s) / count; //(count - 1);
tstep = (tnext - t) / count; //(count - 1);
if (count & 4)
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest += 4;
}
if (count & 2)
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest += 2;
}
if (count & 1)
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
}
else
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
}
}
} while ((pspan = pspan->pnext) != NULL);
}
break;
case 2:
{
short *pbase = (short *) sw32_cacheblock, *pdest;
int count;
fixed16_t s, t, snext, tnext, sstep, tstep;
float sdivz, tdivz, zi, z, du, dv;
float sdivz8stepu, tdivz8stepu, zi8stepu;
sstep = 0; // keep compiler happy
tstep = 0; // ditto
sdivz8stepu = sw32_d_sdivzstepu * 8;
tdivz8stepu = sw32_d_tdivzstepu * 8;
zi8stepu = d_zistepu * 8 * 65536;
do {
pdest = (short *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
pspan->u;
count = pspan->count;
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
du = (float) pspan->u;
dv = (float) pspan->v;
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
z = sw32_d_zitable[(unsigned short) zi];
s = (int) (sdivz * z) + sw32_sadjust;
s = bound(0, s, sw32_bbextents);
t = (int) (tdivz * z) + sw32_tadjust;
t = bound(0, t, sw32_bbextentt);
while(count >= 8) {
count -= 8;
// calculate s/z, t/z, zi->fixed s and t at far end of span,
// calculate s and t steps across span by shifting
sdivz += sdivz8stepu;
tdivz += tdivz8stepu;
zi += zi8stepu;
z = sw32_d_zitable[(unsigned short) zi];
// prevent round-off error on <0 steps from from causing
// overstepping & running off the edge of the texture
snext = (int) (sdivz * z) + sw32_sadjust;
snext = bound(8, snext, sw32_bbextents);
tnext = (int) (tdivz * z) + sw32_tadjust;
tnext = bound(8, tnext, sw32_bbextentt);
sstep = (snext - s) >> 3;
tstep = (tnext - t) >> 3;
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s = snext;t = tnext;
pdest += 8;
}
if (count)
{
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
// (so can't step off polygon), clamp, calculate s and t steps
// across span by division, biasing steps low so we don't run
// off the texture
//countminus1 = (float) (count - 1);
sdivz += sw32_d_sdivzstepu * count; //minus1;
tdivz += sw32_d_tdivzstepu * count; //minus1;
zi += d_zistepu * 65536.0f * count; //minus1;
z = sw32_d_zitable[(unsigned short) zi];
// prevent round-off error on <0 steps from from causing
// overstepping & running off the edge of the texture
snext = (int) (sdivz * z) + sw32_sadjust;
snext = bound(count, snext, sw32_bbextents);
tnext = (int) (tdivz * z) + sw32_tadjust;
tnext = bound(count, tnext, sw32_bbextentt);
if (count > 1) {
sstep = (snext - s) / count; //(count - 1);
tstep = (tnext - t) / count; //(count - 1);
if (count & 4)
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;t += tstep;
pdest += 4;
}
if (count & 2)
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest += 2;
}
if (count & 1)
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
}
else
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
}
}
} while ((pspan = pspan->pnext) != NULL);
}
break;
case 4:
{
int *pbase = (int *) sw32_cacheblock, *pdest;
int count;
fixed16_t s, t, snext, tnext, sstep, tstep;
float sdivz, tdivz, zi, z, du, dv;
float sdivz8stepu, tdivz8stepu, zi8stepu;
sstep = 0; // keep compiler happy
tstep = 0; // ditto
sdivz8stepu = sw32_d_sdivzstepu * 8;
tdivz8stepu = sw32_d_tdivzstepu * 8;
zi8stepu = d_zistepu * 8 * 65536;
do {
pdest = (int *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) + pspan->u;
count = pspan->count;
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
du = (float) pspan->u;
dv = (float) pspan->v;
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
z = sw32_d_zitable[(unsigned short) zi];
s = (int) (sdivz * z) + sw32_sadjust;
s = bound(0, s, sw32_bbextents);
t = (int) (tdivz * z) + sw32_tadjust;
t = bound(0, t, sw32_bbextentt);
while(count >= 8) {
count -= 8;
// calculate s/z, t/z, zi->fixed s and t at far end of span,
// calculate s and t steps across span by shifting
sdivz += sdivz8stepu;
tdivz += tdivz8stepu;
zi += zi8stepu;
z = sw32_d_zitable[(unsigned short) zi];
// prevent round-off error on <0 steps from from causing
// overstepping & running off the edge of the texture
snext = (int) (sdivz * z) + sw32_sadjust;
snext = bound(8, snext, sw32_bbextents);
tnext = (int) (tdivz * z) + sw32_tadjust;
tnext = bound(8, tnext, sw32_bbextentt);
sstep = (snext - s) >> 3;
tstep = (tnext - t) >> 3;
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s = snext;
t = tnext;
pdest += 8;
}
if (count)
{
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
// (so can't step off polygon), clamp, calculate s and t steps
// across span by division, biasing steps low so we don't run
// off the texture
//countminus1 = (float) (count - 1);
sdivz += sw32_d_sdivzstepu * count; //minus1;
tdivz += sw32_d_tdivzstepu * count; //minus1;
zi += d_zistepu * 65536.0f * count; //minus1;
z = sw32_d_zitable[(unsigned short) zi];
// prevent round-off error on <0 steps from from causing
// overstepping & running off the edge of the texture
snext = (int) (sdivz * z) + sw32_sadjust;
snext = bound(count, snext, sw32_bbextents);
tnext = (int) (tdivz * z) + sw32_tadjust;
tnext = bound(count, tnext, sw32_bbextentt);
if (count > 1) {
sstep = (snext - s) / count; //(count - 1);
tstep = (tnext - t) / count; //(count - 1);
if (count & 4)
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest += 4;
}
if (count & 2)
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
pdest += 2;
}
if (count & 1)
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
}
else
{
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
s += sstep;
t += tstep;
}
}
} while ((pspan = pspan->pnext) != NULL);
}
break;
default:
Sys_Error("D_DrawSpans: unsupported r_pixbytes %i", sw32_r_pixbytes);
}
}
void
sw32_D_DrawZSpans (espan_t *pspan)
{
int count, doublecount, izistep;
int izi;
short *pdest;
unsigned int ltemp;
double zi;
float du, dv;
// FIXME: check for clamping/range problems
// we count on FP exceptions being turned off to avoid range problems
izistep = (int) (d_zistepu * 0x8000 * 0x10000);
do {
pdest = sw32_d_pzbuffer + (sw32_d_zwidth * pspan->v) + pspan->u;
count = pspan->count;
// calculate the initial 1/z
du = (float) pspan->u;
dv = (float) pspan->v;
zi = d_ziorigin + dv * d_zistepv + du * d_zistepu;
// we count on FP exceptions being turned off to avoid range problems
izi = (int) (zi * 0x8000 * 0x10000);
// LordHavoc: added big endian case, the old code is not correct on
// big-endian (results in swapped depth pairs), and is tuned more for
// x86, PowerPC compilers can probably do a good job with raw loop
// unrolling if it is even necessary...
if (bigendien)
{
do
{
*pdest++ = (short) (izi >> 16);
izi += izistep;
}
while(--count);
}
else
{
if ((intptr_t) pdest & 0x02) {
*pdest++ = (short) (izi >> 16);
izi += izistep;
count--;
}
if ((doublecount = count >> 1) > 0) {
do {
ltemp = izi >> 16;
izi += izistep;
ltemp |= izi & 0xFFFF0000;
izi += izistep;
*(int *) pdest = ltemp;
pdest += 2;
} while (--doublecount > 0);
}
if (count & 1)
*pdest = (short) (izi >> 16);
}
} while ((pspan = pspan->pnext) != NULL);
}