mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2024-11-23 21:02:50 +00:00
e167300a84
While this particular tigger of the real bug was caused by 659d95221e
(hopefully fix both the "get stuck waiting for 3d" bug and the null
worldmode bug.), the real bug was lurking in the code since the dawn of
time (from sw32's perspective). This fix is as per LordHavoc's suggestion
(heh, despite the years, he knows his code), but I spent the time hunting
down the trigger to understand just what was going on.
It turns out that (0,0,0) is too close to a wall (probably on, but the
slight default offset is too close) and the above commit changed the first
rendered frame to be before the player origin was set rather than after.
This fix feels correct to me because noclipping around with the sw32
renderer would probably hit the same bug with a bit of bad luck. Thus
ensure the index resulting from zi never exceeds 65535.
884 lines
25 KiB
C
884 lines
25 KiB
C
/*
|
|
d_scan.c
|
|
|
|
Portable C scan-level rasterization code, all pixel depths.
|
|
|
|
Copyright (C) 1996-1997 Id Software, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to:
|
|
|
|
Free Software Foundation, Inc.
|
|
59 Temple Place - Suite 330
|
|
Boston, MA 02111-1307, USA
|
|
|
|
*/
|
|
#ifdef HAVE_CONFIG_H
|
|
# include "config.h"
|
|
#endif
|
|
|
|
#define NH_DEFINE
|
|
#include "namehack.h"
|
|
|
|
#include "QF/qendian.h"
|
|
#include "QF/render.h"
|
|
#include "QF/sys.h"
|
|
|
|
#include "compat.h"
|
|
#include "d_local.h"
|
|
#include "r_internal.h"
|
|
#include "vid_internal.h"
|
|
|
|
static byte *r_turb_pbase;
|
|
static void *r_turb_pdest;
|
|
static fixed16_t r_turb_s, r_turb_t, r_turb_sstep, r_turb_tstep;
|
|
static int *r_turb_turb;
|
|
static int r_turb_spancount;
|
|
|
|
/*
|
|
D_WarpScreen
|
|
|
|
this performs a slight compression of the screen at the same time as
|
|
the sine warp, to keep the edges from wrapping
|
|
*/
|
|
void
|
|
sw32_D_WarpScreen (void)
|
|
{
|
|
switch(sw32_r_pixbytes) {
|
|
case 1:
|
|
{
|
|
int w, h;
|
|
int u, v;
|
|
byte *dest;
|
|
int *turb;
|
|
int *col;
|
|
byte **row;
|
|
byte *rowptr[MAXHEIGHT];
|
|
int column[MAXWIDTH];
|
|
float wratio, hratio;
|
|
|
|
w = r_refdef.vrect.width;
|
|
h = r_refdef.vrect.height;
|
|
|
|
wratio = w / (float) scr_vrect.width;
|
|
hratio = h / (float) scr_vrect.height;
|
|
|
|
for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
|
|
rowptr[v] = (byte *) sw32_d_viewbuffer + (r_refdef.vrect.y *
|
|
sw32_screenwidth) +
|
|
(sw32_screenwidth * (int) ((float) v * hratio * h /
|
|
(h + AMP2 * 2)));
|
|
}
|
|
|
|
for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
|
|
column[u] = r_refdef.vrect.x +
|
|
(int) ((float) u * wratio * w / (w + AMP2 * 2));
|
|
}
|
|
|
|
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
dest = (byte *)vid.buffer + scr_vrect.y * vid.rowbytes +
|
|
scr_vrect.x;
|
|
|
|
for (v = 0; v < scr_vrect.height; v++, dest += vid.rowbytes) {
|
|
col = &column[turb[v]];
|
|
row = &rowptr[v];
|
|
for (u = 0; u < scr_vrect.width; u += 4) {
|
|
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
|
|
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
|
|
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
|
|
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
int w, h;
|
|
int u, v;
|
|
short *dest;
|
|
int *turb;
|
|
int *col;
|
|
short **row;
|
|
short *rowptr[MAXHEIGHT];
|
|
int column[MAXWIDTH];
|
|
float wratio, hratio;
|
|
|
|
w = r_refdef.vrect.width;
|
|
h = r_refdef.vrect.height;
|
|
|
|
wratio = w / (float) scr_vrect.width;
|
|
hratio = h / (float) scr_vrect.height;
|
|
|
|
for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
|
|
rowptr[v] = (short *) sw32_d_viewbuffer +
|
|
(r_refdef.vrect.y * sw32_screenwidth) +
|
|
(sw32_screenwidth * (int) ((float) v * hratio * h /
|
|
(h + AMP2 * 2)));
|
|
}
|
|
|
|
for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
|
|
column[u] = r_refdef.vrect.x +
|
|
(int) ((float) u * wratio * w / (w + AMP2 * 2));
|
|
}
|
|
|
|
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
dest = (short *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 1) +
|
|
scr_vrect.x;
|
|
|
|
for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 1)) {
|
|
col = &column[turb[v]];
|
|
row = &rowptr[v];
|
|
for (u = 0; u < scr_vrect.width; u += 4) {
|
|
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
|
|
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
|
|
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
|
|
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
int w, h;
|
|
int u, v;
|
|
int *dest;
|
|
int *turb;
|
|
int *col;
|
|
int **row;
|
|
int *rowptr[MAXHEIGHT];
|
|
int column[MAXWIDTH];
|
|
float wratio, hratio;
|
|
|
|
w = r_refdef.vrect.width;
|
|
h = r_refdef.vrect.height;
|
|
|
|
wratio = w / (float) scr_vrect.width;
|
|
hratio = h / (float) scr_vrect.height;
|
|
|
|
for (v = 0; v < scr_vrect.height + AMP2 * 2; v++) {
|
|
rowptr[v] = (int *) sw32_d_viewbuffer +
|
|
(r_refdef.vrect.y * sw32_screenwidth) +
|
|
(sw32_screenwidth * (int) ((float) v * hratio * h /
|
|
(h + AMP2 * 2)));
|
|
}
|
|
|
|
for (u = 0; u < scr_vrect.width + AMP2 * 2; u++) {
|
|
column[u] = r_refdef.vrect.x +
|
|
(int) ((float) u * wratio * w / (w + AMP2 * 2));
|
|
}
|
|
|
|
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
dest = (int *) vid.buffer + scr_vrect.y * (vid.rowbytes >> 2) +
|
|
scr_vrect.x;
|
|
|
|
for (v = 0; v < scr_vrect.height; v++, dest += (vid.rowbytes >> 2)) {
|
|
col = &column[turb[v]];
|
|
row = &rowptr[v];
|
|
for (u = 0; u < scr_vrect.width; u += 4) {
|
|
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
|
|
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
|
|
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
|
|
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
Sys_Error("D_WarpScreen: unsupported r_pixbytes %i", sw32_r_pixbytes);
|
|
}
|
|
}
|
|
|
|
static void
|
|
D_DrawTurbulentSpan (void)
|
|
{
|
|
int sturb, tturb;
|
|
|
|
switch (sw32_r_pixbytes) {
|
|
case 1:
|
|
{
|
|
byte *pdest = (byte *) r_turb_pdest;
|
|
do {
|
|
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
*pdest++ = r_turb_pbase[(tturb << 6) + sturb];
|
|
r_turb_s += r_turb_sstep;
|
|
r_turb_t += r_turb_tstep;
|
|
} while (--r_turb_spancount > 0);
|
|
r_turb_pdest = (byte *)pdest;
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
short *pdest = (short *) r_turb_pdest;
|
|
do {
|
|
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
*pdest++ = sw32_8to16table[r_turb_pbase[(tturb << 6) + sturb]];
|
|
r_turb_s += r_turb_sstep;
|
|
r_turb_t += r_turb_tstep;
|
|
} while (--r_turb_spancount > 0);
|
|
r_turb_pdest = (byte *)pdest;
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
int *pdest = (int *) r_turb_pdest;
|
|
do {
|
|
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
*pdest++ = d_8to24table[r_turb_pbase[(tturb << 6) + sturb]];
|
|
r_turb_s += r_turb_sstep;
|
|
r_turb_t += r_turb_tstep;
|
|
} while (--r_turb_spancount > 0);
|
|
r_turb_pdest = (byte *)pdest;
|
|
}
|
|
break;
|
|
default:
|
|
Sys_Error("D_DrawTurbulentSpan: unsupported r_pixbytes %i",
|
|
sw32_r_pixbytes);
|
|
}
|
|
}
|
|
|
|
void
|
|
sw32_Turbulent (espan_t *pspan)
|
|
{
|
|
int count;
|
|
fixed16_t snext, tnext;
|
|
float sdivz, tdivz, zi, z, du, dv, spancountminus1;
|
|
float sdivz16stepu, tdivz16stepu, zi16stepu;
|
|
|
|
r_turb_turb = sw32_sintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
|
|
r_turb_sstep = 0; // keep compiler happy
|
|
r_turb_tstep = 0; // ditto
|
|
|
|
r_turb_pbase = (byte *) sw32_cacheblock;
|
|
|
|
sdivz16stepu = sw32_d_sdivzstepu * 16;
|
|
tdivz16stepu = sw32_d_tdivzstepu * 16;
|
|
zi16stepu = d_zistepu * 16 * 65536;
|
|
|
|
do {
|
|
r_turb_pdest = (byte *) sw32_d_viewbuffer + ((sw32_screenwidth * pspan->v) +
|
|
pspan->u) * sw32_r_pixbytes;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
r_turb_s = (int) (sdivz * z) + sw32_sadjust;
|
|
if (r_turb_s > sw32_bbextents)
|
|
r_turb_s = sw32_bbextents;
|
|
else if (r_turb_s < 0)
|
|
r_turb_s = 0;
|
|
|
|
r_turb_t = (int) (tdivz * z) + sw32_tadjust;
|
|
if (r_turb_t > sw32_bbextentt)
|
|
r_turb_t = sw32_bbextentt;
|
|
else if (r_turb_t < 0)
|
|
r_turb_t = 0;
|
|
|
|
do {
|
|
// calculate s and t at the far end of the span
|
|
if (count >= 16)
|
|
r_turb_spancount = 16;
|
|
else
|
|
r_turb_spancount = count;
|
|
|
|
count -= r_turb_spancount;
|
|
|
|
if (count) {
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz16stepu;
|
|
tdivz += tdivz16stepu;
|
|
zi += zi16stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
if (snext > sw32_bbextents)
|
|
snext = sw32_bbextents;
|
|
else if (snext < 16)
|
|
snext = 16; // prevent round-off error on <0
|
|
// steps from
|
|
// from causing overstepping & running off the
|
|
// edge of the texture
|
|
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
if (tnext > sw32_bbextentt)
|
|
tnext = sw32_bbextentt;
|
|
else if (tnext < 16)
|
|
tnext = 16; // guard against round-off error on
|
|
// <0 steps
|
|
|
|
r_turb_sstep = (snext - r_turb_s) >> 4;
|
|
r_turb_tstep = (tnext - r_turb_t) >> 4;
|
|
} else {
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in
|
|
// span (so can't step off polygon), clamp, calculate s and t
|
|
// steps across span by division, biasing steps low so we
|
|
// don't run off the texture
|
|
spancountminus1 = (float) (r_turb_spancount - 1);
|
|
sdivz += sw32_d_sdivzstepu * spancountminus1;
|
|
tdivz += sw32_d_tdivzstepu * spancountminus1;
|
|
zi += d_zistepu * 65536.0f * spancountminus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
if (snext > sw32_bbextents)
|
|
snext = sw32_bbextents;
|
|
else if (snext < 16)
|
|
snext = 16; // prevent round-off error on <0 steps
|
|
// from causing overstepping & running
|
|
// off the edge of the texture
|
|
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
if (tnext > sw32_bbextentt)
|
|
tnext = sw32_bbextentt;
|
|
else if (tnext < 16)
|
|
tnext = 16; // guard against round-off error on
|
|
// <0 steps
|
|
|
|
if (r_turb_spancount > 1) {
|
|
r_turb_sstep = (snext - r_turb_s) / (r_turb_spancount - 1);
|
|
r_turb_tstep = (tnext - r_turb_t) / (r_turb_spancount - 1);
|
|
}
|
|
}
|
|
|
|
r_turb_s = r_turb_s & ((CYCLE << 16) - 1);
|
|
r_turb_t = r_turb_t & ((CYCLE << 16) - 1);
|
|
|
|
D_DrawTurbulentSpan ();
|
|
|
|
r_turb_s = snext;
|
|
r_turb_t = tnext;
|
|
|
|
} while (count > 0);
|
|
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
|
|
void
|
|
sw32_D_DrawSpans (espan_t *pspan)
|
|
{
|
|
switch(sw32_r_pixbytes) {
|
|
case 1:
|
|
{
|
|
byte *pbase = (byte *) sw32_cacheblock, *pdest;
|
|
int count;
|
|
fixed16_t s, t, snext, tnext, sstep, tstep;
|
|
float sdivz, tdivz, zi, z, du, dv;
|
|
float sdivz8stepu, tdivz8stepu, zi8stepu;
|
|
|
|
sstep = 0; // keep compiler happy
|
|
tstep = 0; // ditto
|
|
|
|
sdivz8stepu = sw32_d_sdivzstepu * 8;
|
|
tdivz8stepu = sw32_d_tdivzstepu * 8;
|
|
zi8stepu = d_zistepu * 8 * 65536;
|
|
|
|
do {
|
|
pdest = (byte *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
|
|
pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
s = (int) (sdivz * z) + sw32_sadjust;
|
|
s = bound(0, s, sw32_bbextents);
|
|
t = (int) (tdivz * z) + sw32_tadjust;
|
|
t = bound(0, t, sw32_bbextentt);
|
|
|
|
while(count >= 8) {
|
|
count -= 8;
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz8stepu;
|
|
tdivz += tdivz8stepu;
|
|
zi += zi8stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(8, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(8, tnext, sw32_bbextentt);
|
|
|
|
sstep = (snext - s) >> 3;
|
|
tstep = (tnext - t) >> 3;
|
|
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s = snext;
|
|
t = tnext;
|
|
pdest += 8;
|
|
}
|
|
if (count)
|
|
{
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
|
|
// (so can't step off polygon), clamp, calculate s and t steps
|
|
// across span by division, biasing steps low so we don't run
|
|
// off the texture
|
|
//countminus1 = (float) (count - 1);
|
|
sdivz += sw32_d_sdivzstepu * count; //minus1;
|
|
tdivz += sw32_d_tdivzstepu * count; //minus1;
|
|
zi += d_zistepu * 65536.0f * count; //minus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(count, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(count, tnext, sw32_bbextentt);
|
|
|
|
if (count > 1) {
|
|
sstep = (snext - s) / count; //(count - 1);
|
|
tstep = (tnext - t) / count; //(count - 1);
|
|
|
|
if (count & 4)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 4;
|
|
}
|
|
if (count & 2)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 2;
|
|
}
|
|
if (count & 1)
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
}
|
|
else
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
}
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
short *pbase = (short *) sw32_cacheblock, *pdest;
|
|
int count;
|
|
fixed16_t s, t, snext, tnext, sstep, tstep;
|
|
float sdivz, tdivz, zi, z, du, dv;
|
|
float sdivz8stepu, tdivz8stepu, zi8stepu;
|
|
|
|
sstep = 0; // keep compiler happy
|
|
tstep = 0; // ditto
|
|
|
|
sdivz8stepu = sw32_d_sdivzstepu * 8;
|
|
tdivz8stepu = sw32_d_tdivzstepu * 8;
|
|
zi8stepu = d_zistepu * 8 * 65536;
|
|
|
|
do {
|
|
pdest = (short *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
|
|
pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
s = (int) (sdivz * z) + sw32_sadjust;
|
|
s = bound(0, s, sw32_bbextents);
|
|
t = (int) (tdivz * z) + sw32_tadjust;
|
|
t = bound(0, t, sw32_bbextentt);
|
|
|
|
while(count >= 8) {
|
|
count -= 8;
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz8stepu;
|
|
tdivz += tdivz8stepu;
|
|
zi += zi8stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(8, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(8, tnext, sw32_bbextentt);
|
|
|
|
sstep = (snext - s) >> 3;
|
|
tstep = (tnext - t) >> 3;
|
|
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s = snext;t = tnext;
|
|
pdest += 8;
|
|
}
|
|
if (count)
|
|
{
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
|
|
// (so can't step off polygon), clamp, calculate s and t steps
|
|
// across span by division, biasing steps low so we don't run
|
|
// off the texture
|
|
//countminus1 = (float) (count - 1);
|
|
sdivz += sw32_d_sdivzstepu * count; //minus1;
|
|
tdivz += sw32_d_tdivzstepu * count; //minus1;
|
|
zi += d_zistepu * 65536.0f * count; //minus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(count, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(count, tnext, sw32_bbextentt);
|
|
|
|
if (count > 1) {
|
|
sstep = (snext - s) / count; //(count - 1);
|
|
tstep = (tnext - t) / count; //(count - 1);
|
|
|
|
if (count & 4)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;t += tstep;
|
|
pdest += 4;
|
|
}
|
|
if (count & 2)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 2;
|
|
}
|
|
if (count & 1)
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
}
|
|
else
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
}
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
int *pbase = (int *) sw32_cacheblock, *pdest;
|
|
int count;
|
|
fixed16_t s, t, snext, tnext, sstep, tstep;
|
|
float sdivz, tdivz, zi, z, du, dv;
|
|
float sdivz8stepu, tdivz8stepu, zi8stepu;
|
|
|
|
sstep = 0; // keep compiler happy
|
|
tstep = 0; // ditto
|
|
|
|
sdivz8stepu = sw32_d_sdivzstepu * 8;
|
|
tdivz8stepu = sw32_d_tdivzstepu * 8;
|
|
zi8stepu = d_zistepu * 8 * 65536;
|
|
|
|
do {
|
|
pdest = (int *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) + pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
s = (int) (sdivz * z) + sw32_sadjust;
|
|
s = bound(0, s, sw32_bbextents);
|
|
t = (int) (tdivz * z) + sw32_tadjust;
|
|
t = bound(0, t, sw32_bbextentt);
|
|
|
|
while(count >= 8) {
|
|
count -= 8;
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz8stepu;
|
|
tdivz += tdivz8stepu;
|
|
zi += zi8stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(8, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(8, tnext, sw32_bbextentt);
|
|
|
|
sstep = (snext - s) >> 3;
|
|
tstep = (tnext - t) >> 3;
|
|
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s = snext;
|
|
t = tnext;
|
|
pdest += 8;
|
|
}
|
|
if (count)
|
|
{
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
|
|
// (so can't step off polygon), clamp, calculate s and t steps
|
|
// across span by division, biasing steps low so we don't run
|
|
// off the texture
|
|
//countminus1 = (float) (count - 1);
|
|
sdivz += sw32_d_sdivzstepu * count; //minus1;
|
|
tdivz += sw32_d_tdivzstepu * count; //minus1;
|
|
zi += d_zistepu * 65536.0f * count; //minus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(count, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(count, tnext, sw32_bbextentt);
|
|
|
|
if (count > 1) {
|
|
sstep = (snext - s) / count; //(count - 1);
|
|
tstep = (tnext - t) / count; //(count - 1);
|
|
|
|
if (count & 4)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 4;
|
|
}
|
|
if (count & 2)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 2;
|
|
}
|
|
if (count & 1)
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
}
|
|
else
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
}
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
break;
|
|
default:
|
|
Sys_Error("D_DrawSpans: unsupported r_pixbytes %i", sw32_r_pixbytes);
|
|
}
|
|
}
|
|
|
|
void
|
|
sw32_D_DrawZSpans (espan_t *pspan)
|
|
{
|
|
int count, doublecount, izistep;
|
|
int izi;
|
|
short *pdest;
|
|
unsigned int ltemp;
|
|
double zi;
|
|
float du, dv;
|
|
|
|
// FIXME: check for clamping/range problems
|
|
// we count on FP exceptions being turned off to avoid range problems
|
|
izistep = (int) (d_zistepu * 0x8000 * 0x10000);
|
|
|
|
do {
|
|
pdest = sw32_d_pzbuffer + (sw32_d_zwidth * pspan->v) + pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial 1/z
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
zi = d_ziorigin + dv * d_zistepv + du * d_zistepu;
|
|
// we count on FP exceptions being turned off to avoid range problems
|
|
izi = (int) (zi * 0x8000 * 0x10000);
|
|
|
|
// LordHavoc: added big endian case, the old code is not correct on
|
|
// big-endian (results in swapped depth pairs), and is tuned more for
|
|
// x86, PowerPC compilers can probably do a good job with raw loop
|
|
// unrolling if it is even necessary...
|
|
if (bigendien)
|
|
{
|
|
do
|
|
{
|
|
*pdest++ = (short) (izi >> 16);
|
|
izi += izistep;
|
|
}
|
|
while(--count);
|
|
}
|
|
else
|
|
{
|
|
if ((intptr_t) pdest & 0x02) {
|
|
*pdest++ = (short) (izi >> 16);
|
|
izi += izistep;
|
|
count--;
|
|
}
|
|
|
|
if ((doublecount = count >> 1) > 0) {
|
|
do {
|
|
ltemp = izi >> 16;
|
|
izi += izistep;
|
|
ltemp |= izi & 0xFFFF0000;
|
|
izi += izistep;
|
|
*(int *) pdest = ltemp;
|
|
pdest += 2;
|
|
} while (--doublecount > 0);
|
|
}
|
|
|
|
if (count & 1)
|
|
*pdest = (short) (izi >> 16);
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|