mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-01-18 23:11:38 +00:00
a05210d864
Lighting on alias models seems to be borked, and skies are borked in both sw renderers (seems to be nothing to do with this commit, though).
883 lines
25 KiB
C
883 lines
25 KiB
C
/*
|
|
d_scan.c
|
|
|
|
Portable C scan-level rasterization code, all pixel depths.
|
|
|
|
Copyright (C) 1996-1997 Id Software, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to:
|
|
|
|
Free Software Foundation, Inc.
|
|
59 Temple Place - Suite 330
|
|
Boston, MA 02111-1307, USA
|
|
|
|
*/
|
|
#ifdef HAVE_CONFIG_H
|
|
# include "config.h"
|
|
#endif
|
|
|
|
#define NH_DEFINE
|
|
#include "namehack.h"
|
|
|
|
#include "QF/qendian.h"
|
|
#include "QF/render.h"
|
|
#include "QF/sys.h"
|
|
#include "QF/ui/view.h"
|
|
|
|
#include "compat.h"
|
|
#include "d_local.h"
|
|
#include "r_internal.h"
|
|
#include "vid_internal.h"
|
|
#include "vid_sw.h"
|
|
|
|
static byte *r_turb_pbase;
|
|
static void *r_turb_pdest;
|
|
static fixed16_t r_turb_s, r_turb_t, r_turb_sstep, r_turb_tstep;
|
|
static int *r_turb_turb;
|
|
static int r_turb_spancount;
|
|
|
|
/*
|
|
D_WarpScreen
|
|
|
|
this performs a slight compression of the screen at the same time as
|
|
the sine warp, to keep the edges from wrapping
|
|
*/
|
|
void
|
|
sw32_D_WarpScreen (void)
|
|
{
|
|
switch(sw32_ctx->pixbytes) {
|
|
case 1:
|
|
{
|
|
int w, h;
|
|
int u, v;
|
|
int scr_x = vr_data.scr_view->xpos;
|
|
int scr_y = vr_data.scr_view->ylen;
|
|
int scr_w = vr_data.scr_view->xpos;
|
|
int scr_h = vr_data.scr_view->ylen;
|
|
byte *dest;
|
|
int *turb;
|
|
int *col;
|
|
byte **row;
|
|
byte *rowptr[MAXHEIGHT];
|
|
int column[MAXWIDTH];
|
|
float wratio, hratio;
|
|
|
|
w = r_refdef.vrect.width;
|
|
h = r_refdef.vrect.height;
|
|
|
|
wratio = w / (float) scr_w;
|
|
hratio = h / (float) scr_h;
|
|
|
|
for (v = 0; v < scr_h + AMP2 * 2; v++) {
|
|
rowptr[v] = (byte *) sw32_d_viewbuffer + (r_refdef.vrect.y *
|
|
sw32_screenwidth) +
|
|
(sw32_screenwidth * (int) ((float) v * hratio * h /
|
|
(h + AMP2 * 2)));
|
|
}
|
|
|
|
for (u = 0; u < scr_w + AMP2 * 2; u++) {
|
|
column[u] = r_refdef.vrect.x +
|
|
(int) ((float) u * wratio * w / (w + AMP2 * 2));
|
|
}
|
|
|
|
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
dest = (byte *)vid.buffer + scr_y * vid.rowbytes + scr_x;
|
|
|
|
for (v = 0; v < scr_h; v++, dest += vid.rowbytes) {
|
|
col = &column[turb[v]];
|
|
row = &rowptr[v];
|
|
for (u = 0; u < scr_w; u += 4) {
|
|
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
|
|
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
|
|
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
|
|
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
int w, h;
|
|
int u, v;
|
|
int scr_x = vr_data.scr_view->xpos;
|
|
int scr_y = vr_data.scr_view->ylen;
|
|
int scr_w = vr_data.scr_view->xpos;
|
|
int scr_h = vr_data.scr_view->ylen;
|
|
short *dest;
|
|
int *turb;
|
|
int *col;
|
|
short **row;
|
|
short *rowptr[MAXHEIGHT];
|
|
int column[MAXWIDTH];
|
|
float wratio, hratio;
|
|
|
|
w = r_refdef.vrect.width;
|
|
h = r_refdef.vrect.height;
|
|
|
|
wratio = w / (float) scr_w;
|
|
hratio = h / (float) scr_h;
|
|
|
|
for (v = 0; v < scr_h + AMP2 * 2; v++) {
|
|
rowptr[v] = (short *) sw32_d_viewbuffer +
|
|
(r_refdef.vrect.y * sw32_screenwidth) +
|
|
(sw32_screenwidth * (int) ((float) v * hratio * h /
|
|
(h + AMP2 * 2)));
|
|
}
|
|
|
|
for (u = 0; u < scr_w + AMP2 * 2; u++) {
|
|
column[u] = r_refdef.vrect.x +
|
|
(int) ((float) u * wratio * w / (w + AMP2 * 2));
|
|
}
|
|
|
|
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
dest = (short *) vid.buffer + scr_y * (vid.rowbytes >> 1) + scr_x;
|
|
|
|
for (v = 0; v < scr_h; v++, dest += (vid.rowbytes >> 1)) {
|
|
col = &column[turb[v]];
|
|
row = &rowptr[v];
|
|
for (u = 0; u < scr_w; u += 4) {
|
|
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
|
|
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
|
|
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
|
|
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
int w, h;
|
|
int u, v;
|
|
int scr_x = vr_data.scr_view->xpos;
|
|
int scr_y = vr_data.scr_view->ylen;
|
|
int scr_w = vr_data.scr_view->xpos;
|
|
int scr_h = vr_data.scr_view->ylen;
|
|
int *dest;
|
|
int *turb;
|
|
int *col;
|
|
int **row;
|
|
int *rowptr[MAXHEIGHT];
|
|
int column[MAXWIDTH];
|
|
float wratio, hratio;
|
|
|
|
w = r_refdef.vrect.width;
|
|
h = r_refdef.vrect.height;
|
|
|
|
wratio = w / (float) scr_w;
|
|
hratio = h / (float) scr_h;
|
|
|
|
for (v = 0; v < scr_h + AMP2 * 2; v++) {
|
|
rowptr[v] = (int *) sw32_d_viewbuffer +
|
|
(r_refdef.vrect.y * sw32_screenwidth) +
|
|
(sw32_screenwidth * (int) ((float) v * hratio * h /
|
|
(h + AMP2 * 2)));
|
|
}
|
|
|
|
for (u = 0; u < scr_w + AMP2 * 2; u++) {
|
|
column[u] = r_refdef.vrect.x +
|
|
(int) ((float) u * wratio * w / (w + AMP2 * 2));
|
|
}
|
|
|
|
turb = sw32_intsintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
dest = (int *) vid.buffer + scr_y * (vid.rowbytes >> 2) + scr_x;
|
|
|
|
for (v = 0; v < scr_h; v++, dest += (vid.rowbytes >> 2)) {
|
|
col = &column[turb[v]];
|
|
row = &rowptr[v];
|
|
for (u = 0; u < scr_w; u += 4) {
|
|
dest[u + 0] = row[turb[u + 0]][col[u + 0]];
|
|
dest[u + 1] = row[turb[u + 1]][col[u + 1]];
|
|
dest[u + 2] = row[turb[u + 2]][col[u + 2]];
|
|
dest[u + 3] = row[turb[u + 3]][col[u + 3]];
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
Sys_Error("D_WarpScreen: unsupported r_pixbytes %i", sw32_ctx->pixbytes);
|
|
}
|
|
}
|
|
|
|
static void
|
|
D_DrawTurbulentSpan (void)
|
|
{
|
|
int sturb, tturb;
|
|
|
|
switch (sw32_ctx->pixbytes) {
|
|
case 1:
|
|
{
|
|
byte *pdest = (byte *) r_turb_pdest;
|
|
do {
|
|
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
*pdest++ = r_turb_pbase[(tturb << 6) + sturb];
|
|
r_turb_s += r_turb_sstep;
|
|
r_turb_t += r_turb_tstep;
|
|
} while (--r_turb_spancount > 0);
|
|
r_turb_pdest = (byte *)pdest;
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
short *pdest = (short *) r_turb_pdest;
|
|
do {
|
|
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
*pdest++ = sw32_8to16table[r_turb_pbase[(tturb << 6) + sturb]];
|
|
r_turb_s += r_turb_sstep;
|
|
r_turb_t += r_turb_tstep;
|
|
} while (--r_turb_spancount > 0);
|
|
r_turb_pdest = (byte *)pdest;
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
int *pdest = (int *) r_turb_pdest;
|
|
do {
|
|
sturb = ((r_turb_s + r_turb_turb[(r_turb_t >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
tturb = ((r_turb_t + r_turb_turb[(r_turb_s >> 16) &
|
|
(CYCLE - 1)]) >> 16) & 63;
|
|
*pdest++ = d_8to24table[r_turb_pbase[(tturb << 6) + sturb]];
|
|
r_turb_s += r_turb_sstep;
|
|
r_turb_t += r_turb_tstep;
|
|
} while (--r_turb_spancount > 0);
|
|
r_turb_pdest = (byte *)pdest;
|
|
}
|
|
break;
|
|
default:
|
|
Sys_Error("D_DrawTurbulentSpan: unsupported r_pixbytes %i",
|
|
sw32_ctx->pixbytes);
|
|
}
|
|
}
|
|
|
|
void
|
|
sw32_Turbulent (espan_t *pspan)
|
|
{
|
|
int count;
|
|
fixed16_t snext, tnext;
|
|
float sdivz, tdivz, zi, z, du, dv, spancountminus1;
|
|
float sdivz16stepu, tdivz16stepu, zi16stepu;
|
|
|
|
r_turb_turb = sw32_sintable + ((int) (vr_data.realtime * SPEED) & (CYCLE - 1));
|
|
|
|
r_turb_sstep = 0; // keep compiler happy
|
|
r_turb_tstep = 0; // ditto
|
|
|
|
r_turb_pbase = (byte *) sw32_cacheblock;
|
|
|
|
sdivz16stepu = sw32_d_sdivzstepu * 16;
|
|
tdivz16stepu = sw32_d_tdivzstepu * 16;
|
|
zi16stepu = d_zistepu * 16 * 65536;
|
|
|
|
do {
|
|
r_turb_pdest = (byte *) sw32_d_viewbuffer + ((sw32_screenwidth * pspan->v) +
|
|
pspan->u) * sw32_ctx->pixbytes;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
r_turb_s = (int) (sdivz * z) + sw32_sadjust;
|
|
if (r_turb_s > sw32_bbextents)
|
|
r_turb_s = sw32_bbextents;
|
|
else if (r_turb_s < 0)
|
|
r_turb_s = 0;
|
|
|
|
r_turb_t = (int) (tdivz * z) + sw32_tadjust;
|
|
if (r_turb_t > sw32_bbextentt)
|
|
r_turb_t = sw32_bbextentt;
|
|
else if (r_turb_t < 0)
|
|
r_turb_t = 0;
|
|
|
|
do {
|
|
// calculate s and t at the far end of the span
|
|
if (count >= 16)
|
|
r_turb_spancount = 16;
|
|
else
|
|
r_turb_spancount = count;
|
|
|
|
count -= r_turb_spancount;
|
|
|
|
if (count) {
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz16stepu;
|
|
tdivz += tdivz16stepu;
|
|
zi += zi16stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
if (snext > sw32_bbextents)
|
|
snext = sw32_bbextents;
|
|
else if (snext < 16)
|
|
snext = 16; // prevent round-off error on <0
|
|
// steps from
|
|
// from causing overstepping & running off the
|
|
// edge of the texture
|
|
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
if (tnext > sw32_bbextentt)
|
|
tnext = sw32_bbextentt;
|
|
else if (tnext < 16)
|
|
tnext = 16; // guard against round-off error on
|
|
// <0 steps
|
|
|
|
r_turb_sstep = (snext - r_turb_s) >> 4;
|
|
r_turb_tstep = (tnext - r_turb_t) >> 4;
|
|
} else {
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in
|
|
// span (so can't step off polygon), clamp, calculate s and t
|
|
// steps across span by division, biasing steps low so we
|
|
// don't run off the texture
|
|
spancountminus1 = (float) (r_turb_spancount - 1);
|
|
sdivz += sw32_d_sdivzstepu * spancountminus1;
|
|
tdivz += sw32_d_tdivzstepu * spancountminus1;
|
|
zi += d_zistepu * 65536.0f * spancountminus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
if (snext > sw32_bbextents)
|
|
snext = sw32_bbextents;
|
|
else if (snext < 16)
|
|
snext = 16; // prevent round-off error on <0 steps
|
|
// from causing overstepping & running
|
|
// off the edge of the texture
|
|
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
if (tnext > sw32_bbextentt)
|
|
tnext = sw32_bbextentt;
|
|
else if (tnext < 16)
|
|
tnext = 16; // guard against round-off error on
|
|
// <0 steps
|
|
|
|
if (r_turb_spancount > 1) {
|
|
r_turb_sstep = (snext - r_turb_s) / (r_turb_spancount - 1);
|
|
r_turb_tstep = (tnext - r_turb_t) / (r_turb_spancount - 1);
|
|
}
|
|
}
|
|
|
|
r_turb_s = r_turb_s & ((CYCLE << 16) - 1);
|
|
r_turb_t = r_turb_t & ((CYCLE << 16) - 1);
|
|
|
|
D_DrawTurbulentSpan ();
|
|
|
|
r_turb_s = snext;
|
|
r_turb_t = tnext;
|
|
|
|
} while (count > 0);
|
|
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
|
|
void
|
|
sw32_D_DrawSpans (espan_t *pspan)
|
|
{
|
|
switch(sw32_ctx->pixbytes) {
|
|
case 1:
|
|
{
|
|
byte *pbase = (byte *) sw32_cacheblock, *pdest;
|
|
int count;
|
|
fixed16_t s, t, snext, tnext, sstep, tstep;
|
|
float sdivz, tdivz, zi, z, du, dv;
|
|
float sdivz8stepu, tdivz8stepu, zi8stepu;
|
|
|
|
sstep = 0; // keep compiler happy
|
|
tstep = 0; // ditto
|
|
|
|
sdivz8stepu = sw32_d_sdivzstepu * 8;
|
|
tdivz8stepu = sw32_d_tdivzstepu * 8;
|
|
zi8stepu = d_zistepu * 8 * 65536;
|
|
|
|
do {
|
|
pdest = (byte *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
|
|
pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
s = (int) (sdivz * z) + sw32_sadjust;
|
|
s = bound(0, s, sw32_bbextents);
|
|
t = (int) (tdivz * z) + sw32_tadjust;
|
|
t = bound(0, t, sw32_bbextentt);
|
|
|
|
while(count >= 8) {
|
|
count -= 8;
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz8stepu;
|
|
tdivz += tdivz8stepu;
|
|
zi += zi8stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(8, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(8, tnext, sw32_bbextentt);
|
|
|
|
sstep = (snext - s) >> 3;
|
|
tstep = (tnext - t) >> 3;
|
|
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s = snext;
|
|
t = tnext;
|
|
pdest += 8;
|
|
}
|
|
if (count)
|
|
{
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
|
|
// (so can't step off polygon), clamp, calculate s and t steps
|
|
// across span by division, biasing steps low so we don't run
|
|
// off the texture
|
|
//countminus1 = (float) (count - 1);
|
|
sdivz += sw32_d_sdivzstepu * count; //minus1;
|
|
tdivz += sw32_d_tdivzstepu * count; //minus1;
|
|
zi += d_zistepu * 65536.0f * count; //minus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(count, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(count, tnext, sw32_bbextentt);
|
|
|
|
if (count > 1) {
|
|
sstep = (snext - s) / count; //(count - 1);
|
|
tstep = (tnext - t) / count; //(count - 1);
|
|
|
|
if (count & 4)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 4;
|
|
}
|
|
if (count & 2)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 2;
|
|
}
|
|
if (count & 1)
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
}
|
|
else
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
}
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
short *pbase = (short *) sw32_cacheblock, *pdest;
|
|
int count;
|
|
fixed16_t s, t, snext, tnext, sstep, tstep;
|
|
float sdivz, tdivz, zi, z, du, dv;
|
|
float sdivz8stepu, tdivz8stepu, zi8stepu;
|
|
|
|
sstep = 0; // keep compiler happy
|
|
tstep = 0; // ditto
|
|
|
|
sdivz8stepu = sw32_d_sdivzstepu * 8;
|
|
tdivz8stepu = sw32_d_tdivzstepu * 8;
|
|
zi8stepu = d_zistepu * 8 * 65536;
|
|
|
|
do {
|
|
pdest = (short *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) +
|
|
pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
s = (int) (sdivz * z) + sw32_sadjust;
|
|
s = bound(0, s, sw32_bbextents);
|
|
t = (int) (tdivz * z) + sw32_tadjust;
|
|
t = bound(0, t, sw32_bbextentt);
|
|
|
|
while(count >= 8) {
|
|
count -= 8;
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz8stepu;
|
|
tdivz += tdivz8stepu;
|
|
zi += zi8stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(8, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(8, tnext, sw32_bbextentt);
|
|
|
|
sstep = (snext - s) >> 3;
|
|
tstep = (tnext - t) >> 3;
|
|
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s = snext;t = tnext;
|
|
pdest += 8;
|
|
}
|
|
if (count)
|
|
{
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
|
|
// (so can't step off polygon), clamp, calculate s and t steps
|
|
// across span by division, biasing steps low so we don't run
|
|
// off the texture
|
|
//countminus1 = (float) (count - 1);
|
|
sdivz += sw32_d_sdivzstepu * count; //minus1;
|
|
tdivz += sw32_d_tdivzstepu * count; //minus1;
|
|
zi += d_zistepu * 65536.0f * count; //minus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(count, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(count, tnext, sw32_bbextentt);
|
|
|
|
if (count > 1) {
|
|
sstep = (snext - s) / count; //(count - 1);
|
|
tstep = (tnext - t) / count; //(count - 1);
|
|
|
|
if (count & 4)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;t += tstep;
|
|
pdest += 4;
|
|
}
|
|
if (count & 2)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 2;
|
|
}
|
|
if (count & 1)
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
}
|
|
else
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
}
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
int *pbase = (int *) sw32_cacheblock, *pdest;
|
|
int count;
|
|
fixed16_t s, t, snext, tnext, sstep, tstep;
|
|
float sdivz, tdivz, zi, z, du, dv;
|
|
float sdivz8stepu, tdivz8stepu, zi8stepu;
|
|
|
|
sstep = 0; // keep compiler happy
|
|
tstep = 0; // ditto
|
|
|
|
sdivz8stepu = sw32_d_sdivzstepu * 8;
|
|
tdivz8stepu = sw32_d_tdivzstepu * 8;
|
|
zi8stepu = d_zistepu * 8 * 65536;
|
|
|
|
do {
|
|
pdest = (int *) sw32_d_viewbuffer + (sw32_screenwidth * pspan->v) + pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
sdivz = sw32_d_sdivzorigin + dv * sw32_d_sdivzstepv + du * sw32_d_sdivzstepu;
|
|
tdivz = sw32_d_tdivzorigin + dv * sw32_d_tdivzstepv + du * sw32_d_tdivzstepu;
|
|
zi = (d_ziorigin + dv * d_zistepv + du * d_zistepu) * 65536.0f;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
s = (int) (sdivz * z) + sw32_sadjust;
|
|
s = bound(0, s, sw32_bbextents);
|
|
t = (int) (tdivz * z) + sw32_tadjust;
|
|
t = bound(0, t, sw32_bbextentt);
|
|
|
|
while(count >= 8) {
|
|
count -= 8;
|
|
// calculate s/z, t/z, zi->fixed s and t at far end of span,
|
|
// calculate s and t steps across span by shifting
|
|
sdivz += sdivz8stepu;
|
|
tdivz += tdivz8stepu;
|
|
zi += zi8stepu;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(8, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(8, tnext, sw32_bbextentt);
|
|
|
|
sstep = (snext - s) >> 3;
|
|
tstep = (tnext - t) >> 3;
|
|
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[4] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[5] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[6] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[7] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s = snext;
|
|
t = tnext;
|
|
pdest += 8;
|
|
}
|
|
if (count)
|
|
{
|
|
// calculate s/z, t/z, zi->fixed s and t at last pixel in span
|
|
// (so can't step off polygon), clamp, calculate s and t steps
|
|
// across span by division, biasing steps low so we don't run
|
|
// off the texture
|
|
//countminus1 = (float) (count - 1);
|
|
sdivz += sw32_d_sdivzstepu * count; //minus1;
|
|
tdivz += sw32_d_tdivzstepu * count; //minus1;
|
|
zi += d_zistepu * 65536.0f * count; //minus1;
|
|
z = sw32_d_zitable[(unsigned short) zi];
|
|
|
|
// prevent round-off error on <0 steps from from causing
|
|
// overstepping & running off the edge of the texture
|
|
snext = (int) (sdivz * z) + sw32_sadjust;
|
|
snext = bound(count, snext, sw32_bbextents);
|
|
tnext = (int) (tdivz * z) + sw32_tadjust;
|
|
tnext = bound(count, tnext, sw32_bbextentt);
|
|
|
|
if (count > 1) {
|
|
sstep = (snext - s) / count; //(count - 1);
|
|
tstep = (tnext - t) / count; //(count - 1);
|
|
|
|
if (count & 4)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[2] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[3] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 4;
|
|
}
|
|
if (count & 2)
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest[1] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
s += sstep;
|
|
t += tstep;
|
|
pdest += 2;
|
|
}
|
|
if (count & 1)
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
}
|
|
else
|
|
{
|
|
pdest[0] = pbase[(t >> 16) * sw32_cachewidth + (s >> 16)];
|
|
}
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|
|
break;
|
|
default:
|
|
Sys_Error("D_DrawSpans: unsupported r_pixbytes %i", sw32_ctx->pixbytes);
|
|
}
|
|
}
|
|
|
|
void
|
|
sw32_D_DrawZSpans (espan_t *pspan)
|
|
{
|
|
int count, doublecount, izistep;
|
|
int izi;
|
|
short *pdest;
|
|
unsigned int ltemp;
|
|
double zi;
|
|
float du, dv;
|
|
|
|
// FIXME: check for clamping/range problems
|
|
// we count on FP exceptions being turned off to avoid range problems
|
|
izistep = (int) (d_zistepu * 0x8000 * 0x10000);
|
|
|
|
do {
|
|
pdest = sw32_d_pzbuffer + (sw32_d_zwidth * pspan->v) + pspan->u;
|
|
|
|
count = pspan->count;
|
|
|
|
// calculate the initial 1/z
|
|
du = (float) pspan->u;
|
|
dv = (float) pspan->v;
|
|
|
|
zi = d_ziorigin + dv * d_zistepv + du * d_zistepu;
|
|
// we count on FP exceptions being turned off to avoid range problems
|
|
izi = (int) (zi * 0x8000 * 0x10000);
|
|
|
|
// LordHavoc: added big endian case, the old code is not correct on
|
|
// big-endian (results in swapped depth pairs), and is tuned more for
|
|
// x86, PowerPC compilers can probably do a good job with raw loop
|
|
// unrolling if it is even necessary...
|
|
if (bigendien)
|
|
{
|
|
do
|
|
{
|
|
*pdest++ = (short) (izi >> 16);
|
|
izi += izistep;
|
|
}
|
|
while(--count);
|
|
}
|
|
else
|
|
{
|
|
if ((intptr_t) pdest & 0x02) {
|
|
*pdest++ = (short) (izi >> 16);
|
|
izi += izistep;
|
|
count--;
|
|
}
|
|
|
|
if ((doublecount = count >> 1) > 0) {
|
|
do {
|
|
ltemp = izi >> 16;
|
|
izi += izistep;
|
|
ltemp |= izi & 0xFFFF0000;
|
|
izi += izistep;
|
|
*(int *) pdest = ltemp;
|
|
pdest += 2;
|
|
} while (--doublecount > 0);
|
|
}
|
|
|
|
if (count & 1)
|
|
*pdest = (short) (izi >> 16);
|
|
}
|
|
} while ((pspan = pspan->pnext) != NULL);
|
|
}
|