mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2024-11-27 14:42:07 +00:00
6377734e32
I got tired of having to maintain two separate software renderers, but didn't want to just nuke sw32, so its core changes are merged into sw. Alias model rendering is broken, but I know exactly what's wrong and how to fix it, just need to take care due to asm.
1758 lines
45 KiB
ArmAsm
1758 lines
45 KiB
ArmAsm
/*
|
|
d_polysa.S
|
|
|
|
x86 assembly-language polygon model drawing code
|
|
|
|
Copyright (C) 1996-1997 Id Software, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to:
|
|
|
|
Free Software Foundation, Inc.
|
|
59 Temple Place - Suite 330
|
|
Boston, MA 02111-1307, USA
|
|
|
|
$Id$
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
# include <config.h>
|
|
#endif
|
|
#include "asm_i386.h"
|
|
#include "quakeasm.h"
|
|
#include "asm_draw.h"
|
|
#include "d_ifacea.h"
|
|
|
|
#ifdef PIC
|
|
#undef USE_INTEL_ASM //XXX asm pic hack
|
|
#endif
|
|
|
|
#ifdef USE_INTEL_ASM
|
|
|
|
// !!! if this is changed, it must be changed in d_polyse.c too !!!
|
|
#define DPS_MAXSPANS MAXHEIGHT+1
|
|
// 1 extra for spanpackage that marks end
|
|
|
|
//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
|
|
#define SPAN_SIZE (1024+1+1+1)*32
|
|
|
|
|
|
.data
|
|
|
|
.align 4
|
|
p10_minus_p20: .single 0
|
|
p01_minus_p21: .single 0
|
|
temp0: .single 0
|
|
temp1: .single 0
|
|
Ltemp: .single 0
|
|
|
|
aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
|
|
.long LDraw4, LDraw3, LDraw2, LDraw1
|
|
|
|
lzistepx: .long 0
|
|
|
|
|
|
.text
|
|
|
|
#ifndef NeXT
|
|
.extern C(D_PolysetSetEdgeTable)
|
|
.extern C(D_RasterizeAliasPolySmooth)
|
|
#endif
|
|
|
|
//----------------------------------------------------------------------
|
|
// affine triangle gradient calculation code
|
|
//----------------------------------------------------------------------
|
|
|
|
#define skinwidth 4+0
|
|
|
|
.globl C(D_PolysetCalcGradients)
|
|
C(D_PolysetCalcGradients):
|
|
|
|
// p00_minus_p20 = r_p0[0] - r_p2[0];
|
|
// p01_minus_p21 = r_p0[1] - r_p2[1];
|
|
// p10_minus_p20 = r_p1[0] - r_p2[0];
|
|
// p11_minus_p21 = r_p1[1] - r_p2[1];
|
|
//
|
|
// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
|
|
// p00_minus_p20 * p11_minus_p21);
|
|
//
|
|
// ystepdenominv = -xstepdenominv;
|
|
|
|
fildl C(r_p0)+0 // r_p0[0]
|
|
fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
|
|
fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
|
|
fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
|
|
fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
|
|
fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
|
|
// r_p1[1] | r_p2[0] | r_p0[0]
|
|
fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
|
|
// r_p1[1] | r_p2[0] | p10_minus_p20
|
|
fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | d_xdenom
|
|
fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | d_xdenom
|
|
fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
|
|
fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
|
|
//// ceil () for light so positive steps are exaggerated, negative steps
|
|
//// diminished, pushing us away from underflow toward overflow. Underflow is
|
|
//// very visible, overflow is very unlikely, because of ambient lighting
|
|
// t0 = r_p0[4] - r_p2[4];
|
|
// t1 = r_p1[4] - r_p2[4];
|
|
|
|
fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
|
|
// r_lstepx = (int)
|
|
// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
|
|
// r_lstepy = (int)
|
|
// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(2) // xstepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmuls C(float_minus_1) // ystepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fldcw C(r_ceil_cw)
|
|
fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fldcw C(r_single_cw)
|
|
|
|
// t0 = r_p0[2] - r_p2[2];
|
|
// t1 = r_p1[2] - r_p2[2];
|
|
|
|
fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
|
|
// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
|
|
// xstepdenominv);
|
|
// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
|
|
// ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
|
|
fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
|
|
// t0 = r_p0[3] - r_p2[3];
|
|
// t1 = r_p1[3] - r_p2[3];
|
|
|
|
fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
|
|
// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
|
|
// xstepdenominv);
|
|
// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
|
|
// ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
|
|
// t0 = r_p0[5] - r_p2[5];
|
|
// t1 = r_p1[5] - r_p2[5];
|
|
|
|
fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
|
|
// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
|
|
// xstepdenominv);
|
|
// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
|
|
// ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | t0*p11_minus_p21
|
|
fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | t0*p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | t0*p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21
|
|
fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv |
|
|
// t1*p00_minus_p20 | t0*p10_minus_p20
|
|
fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv |
|
|
// t1*p00_minus_p20 | t0*p10_minus_p20
|
|
fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
|
|
// xstepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t0*p10_minus_p20
|
|
fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20
|
|
fxch %st(1) // xstepdenominv | ystepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20
|
|
fmulp %st(0),%st(2) // ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21) *
|
|
// xstepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20
|
|
fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
|
|
// xstepdenominv |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20) *
|
|
// ystepdenominv
|
|
fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
|
|
// ystepdenominv
|
|
fistpl C(r_zistepy)
|
|
|
|
// a_sstepxfrac = r_sstepx << 16;
|
|
// a_tstepxfrac = r_tstepx << 16;
|
|
//
|
|
// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
|
|
// (r_sstepx >> 16);
|
|
|
|
movl C(r_sstepx),%eax
|
|
movl C(r_tstepx),%edx
|
|
shll $16,%eax
|
|
shll $16,%edx
|
|
movl %eax,C(a_sstepxfrac)
|
|
movl %edx,C(a_tstepxfrac)
|
|
|
|
movl C(r_sstepx),%ecx
|
|
movl C(r_tstepx),%eax
|
|
sarl $16,%ecx
|
|
sarl $16,%eax
|
|
imull skinwidth(%esp)
|
|
addl %ecx,%eax
|
|
movl %eax,C(a_ststepxwhole)
|
|
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// recursive subdivision affine triangle drawing code
|
|
//
|
|
// not C-callable because of stdcall return
|
|
//----------------------------------------------------------------------
|
|
|
|
#define lp1 4+16
|
|
#define lp2 8+16
|
|
#define lp3 12+16
|
|
|
|
.globl C(D_PolysetRecursiveTriangle)
|
|
C(D_PolysetRecursiveTriangle):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
pushl %esi // preserve register variables
|
|
pushl %edi
|
|
pushl %ebx
|
|
|
|
// int *temp;
|
|
// int d;
|
|
// int new[6];
|
|
// int i;
|
|
// int z;
|
|
// short *zbuf;
|
|
movl lp2(%esp),%esi
|
|
movl lp1(%esp),%ebx
|
|
movl lp3(%esp),%edi
|
|
|
|
// d = lp2[0] - lp1[0];
|
|
// if (d < -1 || d > 1)
|
|
// goto split;
|
|
movl 0(%esi),%eax
|
|
|
|
movl 0(%ebx),%edx
|
|
movl 4(%esi),%ebp
|
|
|
|
subl %edx,%eax
|
|
movl 4(%ebx),%ecx
|
|
|
|
subl %ecx,%ebp
|
|
incl %eax
|
|
|
|
cmpl $2,%eax
|
|
ja LSplit
|
|
|
|
// d = lp2[1] - lp1[1];
|
|
// if (d < -1 || d > 1)
|
|
// goto split;
|
|
movl 0(%edi),%eax
|
|
incl %ebp
|
|
|
|
cmpl $2,%ebp
|
|
ja LSplit
|
|
|
|
// d = lp3[0] - lp2[0];
|
|
// if (d < -1 || d > 1)
|
|
// goto split2;
|
|
movl 0(%esi),%edx
|
|
movl 4(%edi),%ebp
|
|
|
|
subl %edx,%eax
|
|
movl 4(%esi),%ecx
|
|
|
|
subl %ecx,%ebp
|
|
incl %eax
|
|
|
|
cmpl $2,%eax
|
|
ja LSplit2
|
|
|
|
// d = lp3[1] - lp2[1];
|
|
// if (d < -1 || d > 1)
|
|
// goto split2;
|
|
movl 0(%ebx),%eax
|
|
incl %ebp
|
|
|
|
cmpl $2,%ebp
|
|
ja LSplit2
|
|
|
|
// d = lp1[0] - lp3[0];
|
|
// if (d < -1 || d > 1)
|
|
// goto split3;
|
|
movl 0(%edi),%edx
|
|
movl 4(%ebx),%ebp
|
|
|
|
subl %edx,%eax
|
|
movl 4(%edi),%ecx
|
|
|
|
subl %ecx,%ebp
|
|
incl %eax
|
|
|
|
incl %ebp
|
|
movl %ebx,%edx
|
|
|
|
cmpl $2,%eax
|
|
ja LSplit3
|
|
|
|
// d = lp1[1] - lp3[1];
|
|
// if (d < -1 || d > 1)
|
|
// {
|
|
//split3:
|
|
// temp = lp1;
|
|
// lp3 = lp2;
|
|
// lp1 = lp3;
|
|
// lp2 = temp;
|
|
// goto split;
|
|
// }
|
|
//
|
|
// return; // entire tri is filled
|
|
//
|
|
cmpl $2,%ebp
|
|
jna LDone
|
|
|
|
LSplit3:
|
|
movl %edi,%ebx
|
|
movl %esi,%edi
|
|
movl %edx,%esi
|
|
jmp LSplit
|
|
|
|
//split2:
|
|
LSplit2:
|
|
|
|
// temp = lp1;
|
|
// lp1 = lp2;
|
|
// lp2 = lp3;
|
|
// lp3 = temp;
|
|
movl %ebx,%eax
|
|
movl %esi,%ebx
|
|
movl %edi,%esi
|
|
movl %eax,%edi
|
|
|
|
//split:
|
|
LSplit:
|
|
|
|
subl $24,%esp // allocate space for a new vertex
|
|
|
|
//// split this edge
|
|
// new[0] = (lp1[0] + lp2[0]) >> 1;
|
|
// new[1] = (lp1[1] + lp2[1]) >> 1;
|
|
// new[2] = (lp1[2] + lp2[2]) >> 1;
|
|
// new[3] = (lp1[3] + lp2[3]) >> 1;
|
|
// new[5] = (lp1[5] + lp2[5]) >> 1;
|
|
movl 8(%ebx),%eax
|
|
|
|
movl 8(%esi),%edx
|
|
movl 12(%ebx),%ecx
|
|
|
|
addl %edx,%eax
|
|
movl 12(%esi),%edx
|
|
|
|
sarl $1,%eax
|
|
addl %edx,%ecx
|
|
|
|
movl %eax,8(%esp)
|
|
movl 20(%ebx),%eax
|
|
|
|
sarl $1,%ecx
|
|
movl 20(%esi),%edx
|
|
|
|
movl %ecx,12(%esp)
|
|
addl %edx,%eax
|
|
|
|
movl 0(%ebx),%ecx
|
|
movl 0(%esi),%edx
|
|
|
|
sarl $1,%eax
|
|
addl %ecx,%edx
|
|
|
|
movl %eax,20(%esp)
|
|
movl 4(%ebx),%eax
|
|
|
|
sarl $1,%edx
|
|
movl 4(%esi),%ebp
|
|
|
|
movl %edx,0(%esp)
|
|
addl %eax,%ebp
|
|
|
|
sarl $1,%ebp
|
|
movl %ebp,4(%esp)
|
|
|
|
//// draw the point if splitting a leading edge
|
|
// if (lp2[1] > lp1[1])
|
|
// goto nodraw;
|
|
cmpl %eax,4(%esi)
|
|
jg LNoDraw
|
|
|
|
// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
|
|
// goto nodraw;
|
|
movl 0(%esi),%edx
|
|
jnz LDraw
|
|
|
|
cmpl %ecx,%edx
|
|
jl LNoDraw
|
|
|
|
LDraw:
|
|
|
|
// z = new[5] >> 16;
|
|
movl 20(%esp),%edx
|
|
movl 4(%esp),%ecx
|
|
|
|
sarl $16,%edx
|
|
movl 0(%esp),%ebp
|
|
|
|
// zbuf = zspantable[new[1]] + new[0];
|
|
movl C(zspantable)(,%ecx,4),%eax
|
|
|
|
// if (z >= *zbuf)
|
|
// {
|
|
cmpw (%eax,%ebp,2),%dx
|
|
jnge LNoDraw
|
|
|
|
// int pix;
|
|
//
|
|
// *zbuf = z;
|
|
movw %dx,(%eax,%ebp,2)
|
|
|
|
// pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
|
|
movl 12(%esp),%eax
|
|
|
|
sarl $16,%eax
|
|
movl 8(%esp),%edx
|
|
|
|
sarl $16,%edx
|
|
subl %ecx,%ecx
|
|
|
|
movl C(skintable)(,%eax,4),%eax
|
|
movl 4(%esp),%ebp
|
|
|
|
movb (%eax,%edx,),%cl
|
|
movl C(d_pcolormap),%edx
|
|
|
|
movb (%edx,%ecx,),%dl
|
|
movl 0(%esp),%ecx
|
|
|
|
// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
|
|
movl C(d_scantable)(,%ebp,4),%eax
|
|
addl %eax,%ecx
|
|
movl C(d_viewbuffer),%eax
|
|
movb %dl,(%eax,%ecx,1)
|
|
|
|
// }
|
|
//
|
|
//nodraw:
|
|
LNoDraw:
|
|
|
|
//// recursively continue
|
|
// D_PolysetRecursiveTriangle (lp3, lp1, new);
|
|
pushl %esp
|
|
pushl %ebx
|
|
pushl %edi
|
|
call C(D_PolysetRecursiveTriangle)
|
|
|
|
// D_PolysetRecursiveTriangle (lp3, new, lp2);
|
|
movl %esp,%ebx
|
|
pushl %esi
|
|
pushl %ebx
|
|
pushl %edi
|
|
call C(D_PolysetRecursiveTriangle)
|
|
addl $24,%esp
|
|
|
|
LDone:
|
|
popl %ebx // restore register variables
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebp // restore caller stack frame pointer
|
|
ret $12
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// 8-bpp horizontal span drawing code for affine polygons, with smooth
|
|
// shading and no transparency
|
|
//----------------------------------------------------------------------
|
|
|
|
#define pspans 4+8
|
|
|
|
.globl C(D_PolysetAff8Start)
|
|
C(D_PolysetAff8Start):
|
|
|
|
.globl C(polyset_draw_spans_8)
|
|
C(polyset_draw_spans_8):
|
|
pushl %esi // preserve register variables
|
|
pushl %ebx
|
|
|
|
movl pspans(%esp),%esi // point to the first span descriptor
|
|
movl C(r_zistepx),%ecx
|
|
|
|
pushl %ebp // preserve caller's stack frame
|
|
pushl %edi
|
|
|
|
rorl $16,%ecx // put high 16 bits of 1/z step in low word
|
|
movl spanpackage_t_count(%esi),%edx
|
|
|
|
movl %ecx,lzistepx
|
|
|
|
LSpanLoop:
|
|
|
|
// lcount = d_aspancount - pspanpackage->count;
|
|
//
|
|
// errorterm += erroradjustup;
|
|
// if (errorterm >= 0)
|
|
// {
|
|
// d_aspancount += d_countextrastep;
|
|
// errorterm -= erroradjustdown;
|
|
// }
|
|
// else
|
|
// {
|
|
// d_aspancount += ubasestep;
|
|
// }
|
|
movl C(d_aspancount),%eax
|
|
subl %edx,%eax
|
|
|
|
movl C(erroradjustup),%edx
|
|
movl C(errorterm),%ebx
|
|
addl %edx,%ebx
|
|
js LNoTurnover
|
|
|
|
movl C(erroradjustdown),%edx
|
|
movl C(d_countextrastep),%edi
|
|
subl %edx,%ebx
|
|
movl C(d_aspancount),%ebp
|
|
movl %ebx,C(errorterm)
|
|
addl %edi,%ebp
|
|
movl %ebp,C(d_aspancount)
|
|
jmp LRightEdgeStepped
|
|
|
|
LNoTurnover:
|
|
movl C(d_aspancount),%edi
|
|
movl C(ubasestep),%edx
|
|
movl %ebx,C(errorterm)
|
|
addl %edx,%edi
|
|
movl %edi,C(d_aspancount)
|
|
|
|
LRightEdgeStepped:
|
|
cmpl $1,%eax
|
|
|
|
jl LNextSpan
|
|
jz LExactlyOneLong
|
|
|
|
//
|
|
// set up advancetable
|
|
//
|
|
movl C(a_ststepxwhole),%ecx
|
|
movl C(r_affinetridesc)+atd_skinwidth,%edx
|
|
|
|
movl %ecx,C(advancetable)+4 // advance base in t
|
|
addl %edx,%ecx
|
|
|
|
movl %ecx,C(advancetable) // advance extra in t
|
|
movl C(a_tstepxfrac),%ecx
|
|
|
|
movw C(r_lstepx),%cx
|
|
movl %eax,%edx // count
|
|
|
|
movl %ecx,C(tstep)
|
|
addl $7,%edx
|
|
|
|
shrl $3,%edx // count of full and partial loops
|
|
movl spanpackage_t_sfrac(%esi),%ebx
|
|
|
|
movw %dx,%bx
|
|
movl spanpackage_t_pz(%esi),%ecx
|
|
|
|
negl %eax
|
|
|
|
movl spanpackage_t_pdest(%esi),%edi
|
|
andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
|
|
|
|
subl %eax,%edi // compensate for hardwired offsets
|
|
subl %eax,%ecx
|
|
|
|
subl %eax,%ecx
|
|
movl spanpackage_t_tfrac(%esi),%edx
|
|
|
|
movw spanpackage_t_light(%esi),%dx
|
|
movl spanpackage_t_zi(%esi),%ebp
|
|
|
|
rorl $16,%ebp // put high 16 bits of 1/z in low word
|
|
pushl %esi
|
|
|
|
movl spanpackage_t_ptex(%esi),%esi
|
|
jmp *aff8entryvec_table(,%eax,4)
|
|
|
|
// %bx = count of full and partial loops
|
|
// %ebx high word = sfrac
|
|
// %ecx = pz
|
|
// %dx = light
|
|
// %edx high word = tfrac
|
|
// %esi = ptex
|
|
// %edi = pdest
|
|
// %ebp = 1/z
|
|
// tstep low word = C(r_lstepx)
|
|
// tstep high word = C(a_tstepxfrac)
|
|
// C(a_sstepxfrac) low word = 0
|
|
// C(a_sstepxfrac) high word = C(a_sstepxfrac)
|
|
|
|
LDrawLoop:
|
|
|
|
// FIXME: do we need to clamp light? We may need at least a buffer bit to
|
|
// keep it from poking into tfrac and causing problems
|
|
|
|
LDraw8:
|
|
cmpw (%ecx),%bp
|
|
jl Lp1
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch8:
|
|
movb %al,(%edi)
|
|
Lp1:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
LDraw7:
|
|
cmpw 2(%ecx),%bp
|
|
jl Lp2
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,2(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch7:
|
|
movb %al,1(%edi)
|
|
Lp2:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
LDraw6:
|
|
cmpw 4(%ecx),%bp
|
|
jl Lp3
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,4(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch6:
|
|
movb %al,2(%edi)
|
|
Lp3:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
LDraw5:
|
|
cmpw 6(%ecx),%bp
|
|
jl Lp4
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,6(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch5:
|
|
movb %al,3(%edi)
|
|
Lp4:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
LDraw4:
|
|
cmpw 8(%ecx),%bp
|
|
jl Lp5
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,8(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch4:
|
|
movb %al,4(%edi)
|
|
Lp5:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
LDraw3:
|
|
cmpw 10(%ecx),%bp
|
|
jl Lp6
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,10(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch3:
|
|
movb %al,5(%edi)
|
|
Lp6:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
LDraw2:
|
|
cmpw 12(%ecx),%bp
|
|
jl Lp7
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,12(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch2:
|
|
movb %al,6(%edi)
|
|
Lp7:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
LDraw1:
|
|
cmpw 14(%ecx),%bp
|
|
jl Lp8
|
|
xorl %eax,%eax
|
|
movb %dh,%ah
|
|
movb (%esi),%al
|
|
movw %bp,14(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch1:
|
|
movb %al,7(%edi)
|
|
Lp8:
|
|
addl C(tstep),%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl C(advancetable)+4(,%eax,4),%esi
|
|
|
|
addl $8,%edi
|
|
addl $16,%ecx
|
|
|
|
decw %bx
|
|
jnz LDrawLoop
|
|
|
|
popl %esi // restore spans pointer
|
|
LNextSpan:
|
|
addl $(spanpackage_t_size),%esi // point to next span
|
|
LNextSpanESISet:
|
|
movl spanpackage_t_count(%esi),%edx
|
|
cmpl $-999999,%edx // any more spans?
|
|
jnz LSpanLoop // yes
|
|
|
|
popl %edi
|
|
popl %ebp // restore the caller's stack frame
|
|
popl %ebx // restore register variables
|
|
popl %esi
|
|
ret
|
|
|
|
|
|
// draw a one-long span
|
|
|
|
LExactlyOneLong:
|
|
|
|
movl spanpackage_t_pz(%esi),%ecx
|
|
movl spanpackage_t_zi(%esi),%ebp
|
|
|
|
rorl $16,%ebp // put high 16 bits of 1/z in low word
|
|
movl spanpackage_t_ptex(%esi),%ebx
|
|
|
|
cmpw (%ecx),%bp
|
|
jl LNextSpan
|
|
xorl %eax,%eax
|
|
movl spanpackage_t_pdest(%esi),%edi
|
|
movb spanpackage_t_light+1(%esi),%ah
|
|
addl $(spanpackage_t_size),%esi // point to next span
|
|
movb (%ebx),%al
|
|
movw %bp,(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch9:
|
|
movb %al,(%edi)
|
|
|
|
jmp LNextSpanESISet
|
|
|
|
.globl C(D_PolysetAff8End)
|
|
C(D_PolysetAff8End):
|
|
|
|
|
|
#define pcolormap 4
|
|
|
|
.globl C(D_Aff8Patch)
|
|
C(D_Aff8Patch):
|
|
movl pcolormap(%esp),%eax
|
|
movl %eax,LPatch1-4
|
|
movl %eax,LPatch2-4
|
|
movl %eax,LPatch3-4
|
|
movl %eax,LPatch4-4
|
|
movl %eax,LPatch5-4
|
|
movl %eax,LPatch6-4
|
|
movl %eax,LPatch7-4
|
|
movl %eax,LPatch8-4
|
|
movl %eax,LPatch9-4
|
|
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model polygon dispatching code, combined with subdivided affine
|
|
// triangle drawing code
|
|
//----------------------------------------------------------------------
|
|
|
|
.globl C(D_PolysetDraw)
|
|
C(D_PolysetDraw):
|
|
|
|
// spanpackage_t spans[DPS_MAXSPANS + 1 +
|
|
// ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
|
|
// // one extra because of cache line pretouching
|
|
//
|
|
// a_spans = (spanpackage_t *)
|
|
// (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
|
|
subl $(SPAN_SIZE),%esp
|
|
movl %esp,%eax
|
|
addl $(CACHE_SIZE - 1),%eax
|
|
andl $(~(CACHE_SIZE - 1)),%eax
|
|
movl %eax,C(a_spans)
|
|
|
|
// if (r_affinetridesc.drawtype)
|
|
// D_DrawSubdiv ();
|
|
// else
|
|
// D_DrawNonSubdiv ();
|
|
movl C(r_affinetridesc)+atd_drawtype,%eax
|
|
testl %eax,%eax
|
|
jz C(D_DrawNonSubdiv)
|
|
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
|
|
// lnumtriangles = r_affinetridesc.numtriangles;
|
|
movl C(r_affinetridesc)+atd_numtriangles,%ebp
|
|
|
|
pushl %esi // preserve register variables
|
|
shll $mtri_shift,%ebp
|
|
|
|
pushl %ebx
|
|
// ptri = r_affinetridesc.ptriangles;
|
|
movl C(r_affinetridesc)+atd_ptriangles,%ebx
|
|
|
|
pushl %edi
|
|
|
|
// mtriangle_t *ptri;
|
|
// finalvert_t *pfv, *index0, *index1, *index2;
|
|
// int i;
|
|
// int lnumtriangles;
|
|
// int s0, s1, s2;
|
|
|
|
// pfv = r_affinetridesc.pfinalverts;
|
|
movl C(r_affinetridesc)+atd_pfinalverts,%edi
|
|
|
|
// for (i=0 ; i<lnumtriangles ; i++)
|
|
// {
|
|
|
|
Llooptop:
|
|
|
|
// index0 = pfv + ptri[i].vertindex[0];
|
|
// index1 = pfv + ptri[i].vertindex[1];
|
|
// index2 = pfv + ptri[i].vertindex[2];
|
|
movl mtri_vertindex-mtri_size+0(%ebx,%ebp,),%ecx
|
|
movl mtri_vertindex-mtri_size+4(%ebx,%ebp,),%esi
|
|
|
|
shll $(fv_shift),%ecx
|
|
movl mtri_vertindex-mtri_size+8(%ebx,%ebp,),%edx
|
|
|
|
shll $(fv_shift),%esi
|
|
addl %edi,%ecx
|
|
|
|
shll $(fv_shift),%edx
|
|
addl %edi,%esi
|
|
|
|
addl %edi,%edx
|
|
|
|
// if (((index0->v[1]-index1->v[1]) *
|
|
// (index0->v[0]-index2->v[0]) -
|
|
// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
|
|
// {
|
|
// continue;
|
|
// }
|
|
//
|
|
// d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
|
|
fildl fv_v+4(%ecx) // i0v1
|
|
fildl fv_v+4(%esi) // i1v1 | i0v1
|
|
fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1
|
|
fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1
|
|
fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1
|
|
fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1
|
|
fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
|
|
fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
|
|
fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
|
|
fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
|
|
fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
|
|
fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
|
|
fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
|
|
fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
|
|
fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
|
|
movl fv_v+16(%ecx),%eax
|
|
andl $0xFF00,%eax
|
|
fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
|
|
addl C(acolormap),%eax
|
|
fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
|
|
movl %eax,C(d_pcolormap)
|
|
fstps Ltemp
|
|
movl Ltemp,%eax
|
|
subl $0x80000001,%eax
|
|
jc Lskip
|
|
|
|
// if (ptri[i].facesfront)
|
|
// {
|
|
// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
|
|
movl mtri_facesfront-mtri_size(%ebx,%ebp,),%eax
|
|
testl %eax,%eax
|
|
jz Lfacesback
|
|
|
|
pushl %edx
|
|
pushl %esi
|
|
pushl %ecx
|
|
call C(D_PolysetRecursiveTriangle)
|
|
|
|
subl $mtri_size,%ebp
|
|
jnz Llooptop
|
|
jmp Ldone2
|
|
|
|
// }
|
|
// else
|
|
// {
|
|
Lfacesback:
|
|
|
|
// s0 = index0->v[2];
|
|
// s1 = index1->v[2];
|
|
// s2 = index2->v[2];
|
|
movl fv_v+8(%ecx),%eax
|
|
pushl %eax
|
|
movl fv_v+8(%esi),%eax
|
|
pushl %eax
|
|
movl fv_v+8(%edx),%eax
|
|
pushl %eax
|
|
pushl %ecx
|
|
pushl %edx
|
|
|
|
// if (index0->flags & ALIAS_ONSEAM)
|
|
// index0->v[2] += r_affinetridesc.seamfixupX16;
|
|
movl C(r_affinetridesc)+atd_seamfixupX16,%eax
|
|
testl $(ALIAS_ONSEAM),fv_flags(%ecx)
|
|
jz Lp11
|
|
addl %eax,fv_v+8(%ecx)
|
|
Lp11:
|
|
|
|
// if (index1->flags & ALIAS_ONSEAM)
|
|
// index1->v[2] += r_affinetridesc.seamfixupX16;
|
|
testl $(ALIAS_ONSEAM),fv_flags(%esi)
|
|
jz Lp12
|
|
addl %eax,fv_v+8(%esi)
|
|
Lp12:
|
|
|
|
// if (index2->flags & ALIAS_ONSEAM)
|
|
// index2->v[2] += r_affinetridesc.seamfixupX16;
|
|
testl $(ALIAS_ONSEAM),fv_flags(%edx)
|
|
jz Lp13
|
|
addl %eax,fv_v+8(%edx)
|
|
Lp13:
|
|
|
|
// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
|
|
pushl %edx
|
|
pushl %esi
|
|
pushl %ecx
|
|
call C(D_PolysetRecursiveTriangle)
|
|
|
|
// index0->v[2] = s0;
|
|
// index1->v[2] = s1;
|
|
// index2->v[2] = s2;
|
|
popl %edx
|
|
popl %ecx
|
|
popl %eax
|
|
movl %eax,fv_v+8(%edx)
|
|
popl %eax
|
|
movl %eax,fv_v+8(%esi)
|
|
popl %eax
|
|
movl %eax,fv_v+8(%ecx)
|
|
|
|
// }
|
|
// }
|
|
Lskip:
|
|
subl $mtri_size,%ebp
|
|
jnz Llooptop
|
|
|
|
Ldone2:
|
|
popl %edi // restore the caller's stack frame
|
|
popl %ebx
|
|
popl %esi // restore register variables
|
|
popl %ebp
|
|
|
|
addl $(SPAN_SIZE),%esp
|
|
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model triangle left-edge scanning code
|
|
//----------------------------------------------------------------------
|
|
|
|
#define height 4+16
|
|
|
|
.globl C(D_PolysetScanLeftEdge)
|
|
C(D_PolysetScanLeftEdge):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
pushl %esi // preserve register variables
|
|
pushl %edi
|
|
pushl %ebx
|
|
|
|
movl height(%esp),%eax
|
|
movl C(d_sfrac),%ecx
|
|
andl $0xFFFF,%eax
|
|
movl C(d_ptex),%ebx
|
|
orl %eax,%ecx
|
|
movl C(d_pedgespanpackage),%esi
|
|
movl C(d_tfrac),%edx
|
|
movl C(d_light),%edi
|
|
movl C(d_zi),%ebp
|
|
|
|
// %eax: scratch
|
|
// %ebx: d_ptex
|
|
// %ecx: d_sfrac in high word, count in low word
|
|
// %edx: d_tfrac
|
|
// %esi: d_pedgespanpackage, errorterm, scratch alternately
|
|
// %edi: d_light
|
|
// %ebp: d_zi
|
|
|
|
// do
|
|
// {
|
|
|
|
LScanLoop:
|
|
|
|
// d_pedgespanpackage->ptex = ptex;
|
|
// d_pedgespanpackage->pdest = d_pdest;
|
|
// d_pedgespanpackage->pz = d_pz;
|
|
// d_pedgespanpackage->count = d_aspancount;
|
|
// d_pedgespanpackage->light = d_light;
|
|
// d_pedgespanpackage->zi = d_zi;
|
|
// d_pedgespanpackage->sfrac = d_sfrac << 16;
|
|
// d_pedgespanpackage->tfrac = d_tfrac << 16;
|
|
movl %ebx,spanpackage_t_ptex(%esi)
|
|
movl C(d_pdest),%eax
|
|
movl %eax,spanpackage_t_pdest(%esi)
|
|
movl C(d_pz),%eax
|
|
movl %eax,spanpackage_t_pz(%esi)
|
|
movl C(d_aspancount),%eax
|
|
movl %eax,spanpackage_t_count(%esi)
|
|
movl %edi,spanpackage_t_light(%esi)
|
|
movl %ebp,spanpackage_t_zi(%esi)
|
|
movl %ecx,spanpackage_t_sfrac(%esi)
|
|
movl %edx,spanpackage_t_tfrac(%esi)
|
|
|
|
// pretouch the next cache line
|
|
movb spanpackage_t_size(%esi),%al
|
|
|
|
// d_pedgespanpackage++;
|
|
addl $(spanpackage_t_size),%esi
|
|
movl C(erroradjustup),%eax
|
|
movl %esi,C(d_pedgespanpackage)
|
|
|
|
// errorterm += erroradjustup;
|
|
movl C(errorterm),%esi
|
|
addl %eax,%esi
|
|
movl C(d_pdest),%eax
|
|
|
|
// if (errorterm >= 0)
|
|
// {
|
|
js LNoLeftEdgeTurnover
|
|
|
|
// errorterm -= erroradjustdown;
|
|
// d_pdest += d_pdestextrastep;
|
|
subl C(erroradjustdown),%esi
|
|
addl C(d_pdestextrastep),%eax
|
|
movl %esi,C(errorterm)
|
|
movl %eax,C(d_pdest)
|
|
|
|
// d_pz += d_pzextrastep;
|
|
// d_aspancount += d_countextrastep;
|
|
// d_ptex += d_ptexextrastep;
|
|
// d_sfrac += d_sfracextrastep;
|
|
// d_ptex += d_sfrac >> 16;
|
|
// d_sfrac &= 0xFFFF;
|
|
// d_tfrac += d_tfracextrastep;
|
|
movl C(d_pz),%eax
|
|
movl C(d_aspancount),%esi
|
|
addl C(d_pzextrastep),%eax
|
|
addl C(d_sfracextrastep),%ecx
|
|
adcl C(d_ptexextrastep),%ebx
|
|
addl C(d_countextrastep),%esi
|
|
movl %eax,C(d_pz)
|
|
movl C(d_tfracextrastep),%eax
|
|
movl %esi,C(d_aspancount)
|
|
addl %eax,%edx
|
|
|
|
// if (d_tfrac & 0x10000)
|
|
// {
|
|
jnc LSkip1
|
|
|
|
// d_ptex += r_affinetridesc.skinwidth;
|
|
// d_tfrac &= 0xFFFF;
|
|
addl C(r_affinetridesc)+atd_skinwidth,%ebx
|
|
|
|
// }
|
|
|
|
LSkip1:
|
|
|
|
// d_light += d_lightextrastep;
|
|
// d_zi += d_ziextrastep;
|
|
addl C(d_lightextrastep),%edi
|
|
addl C(d_ziextrastep),%ebp
|
|
|
|
// }
|
|
movl C(d_pedgespanpackage),%esi
|
|
decl %ecx
|
|
testl $0xFFFF,%ecx
|
|
jnz LScanLoop
|
|
|
|
popl %ebx
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebp
|
|
ret
|
|
|
|
// else
|
|
// {
|
|
|
|
LNoLeftEdgeTurnover:
|
|
movl %esi,C(errorterm)
|
|
|
|
// d_pdest += d_pdestbasestep;
|
|
addl C(d_pdestbasestep),%eax
|
|
movl %eax,C(d_pdest)
|
|
|
|
// d_pz += d_pzbasestep;
|
|
// d_aspancount += ubasestep;
|
|
// d_ptex += d_ptexbasestep;
|
|
// d_sfrac += d_sfracbasestep;
|
|
// d_ptex += d_sfrac >> 16;
|
|
// d_sfrac &= 0xFFFF;
|
|
movl C(d_pz),%eax
|
|
movl C(d_aspancount),%esi
|
|
addl C(d_pzbasestep),%eax
|
|
addl C(d_sfracbasestep),%ecx
|
|
adcl C(d_ptexbasestep),%ebx
|
|
addl C(ubasestep),%esi
|
|
movl %eax,C(d_pz)
|
|
movl %esi,C(d_aspancount)
|
|
|
|
// d_tfrac += d_tfracbasestep;
|
|
movl C(d_tfracbasestep),%esi
|
|
addl %esi,%edx
|
|
|
|
// if (d_tfrac & 0x10000)
|
|
// {
|
|
jnc LSkip2
|
|
|
|
// d_ptex += r_affinetridesc.skinwidth;
|
|
// d_tfrac &= 0xFFFF;
|
|
addl C(r_affinetridesc)+atd_skinwidth,%ebx
|
|
|
|
// }
|
|
|
|
LSkip2:
|
|
|
|
// d_light += d_lightbasestep;
|
|
// d_zi += d_zibasestep;
|
|
addl C(d_lightbasestep),%edi
|
|
addl C(d_zibasestep),%ebp
|
|
|
|
// }
|
|
// } while (--height);
|
|
movl C(d_pedgespanpackage),%esi
|
|
decl %ecx
|
|
testl $0xFFFF,%ecx
|
|
jnz LScanLoop
|
|
|
|
popl %ebx
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebp
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model vertex drawing code
|
|
//----------------------------------------------------------------------
|
|
|
|
#define fv 4+8
|
|
#define numverts 8+8
|
|
|
|
.globl C(D_PolysetDrawFinalVerts)
|
|
C(D_PolysetDrawFinalVerts):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
pushl %ebx
|
|
|
|
// int i, z;
|
|
// short *zbuf;
|
|
|
|
movl numverts(%esp),%ecx
|
|
movl fv(%esp),%ebx
|
|
|
|
pushl %esi // preserve register variables
|
|
pushl %edi
|
|
|
|
LFVLoop:
|
|
|
|
// for (i=0 ; i<numverts ; i++, fv++)
|
|
// {
|
|
// // valid triangle coordinates for filling can include the bottom and
|
|
// // right clip edges, due to the fill rule; these shouldn't be drawn
|
|
// if ((fv->v[0] < r_refdef.vrectright) &&
|
|
// (fv->v[1] < r_refdef.vrectbottom))
|
|
// {
|
|
movl fv_v+0(%ebx),%eax
|
|
movl C(r_refdef)+rd_vrectright,%edx
|
|
cmpl %edx,%eax
|
|
jge LNextVert
|
|
movl fv_v+4(%ebx),%esi
|
|
movl C(r_refdef)+rd_vrectbottom,%edx
|
|
cmpl %edx,%esi
|
|
jge LNextVert
|
|
|
|
// zbuf = zspantable[fv->v[1]] + fv->v[0];
|
|
movl C(zspantable)(,%esi,4),%edi
|
|
|
|
// z = fv->v[5]>>16;
|
|
movl fv_v+20(%ebx),%edx
|
|
shrl $16,%edx
|
|
|
|
// if (z >= *zbuf)
|
|
// {
|
|
// int pix;
|
|
cmpw (%edi,%eax,2),%dx
|
|
jl LNextVert
|
|
|
|
// *zbuf = z;
|
|
movw %dx,(%edi,%eax,2)
|
|
|
|
// pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
|
|
movl fv_v+12(%ebx),%edi
|
|
shrl $16,%edi
|
|
movl C(skintable)(,%edi,4),%edi
|
|
movl fv_v+8(%ebx),%edx
|
|
shrl $16,%edx
|
|
movb (%edi,%edx),%dl
|
|
|
|
// pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
|
|
movl fv_v+16(%ebx),%edi
|
|
andl $0xFF00,%edi
|
|
andl $0x00FF,%edx
|
|
addl %edx,%edi
|
|
movl C(acolormap),%edx
|
|
movb (%edx,%edi,1),%dl
|
|
|
|
// d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
|
|
movl C(d_scantable)(,%esi,4),%edi
|
|
movl C(d_viewbuffer),%esi
|
|
addl %eax,%edi
|
|
movb %dl,(%esi,%edi)
|
|
|
|
// }
|
|
// }
|
|
// }
|
|
LNextVert:
|
|
addl $(fv_size),%ebx
|
|
decl %ecx
|
|
jnz LFVLoop
|
|
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebx
|
|
popl %ebp
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model non-subdivided polygon dispatching code
|
|
//
|
|
// not C-callable because of stack buffer cleanup
|
|
//----------------------------------------------------------------------
|
|
|
|
.globl C(D_DrawNonSubdiv)
|
|
C(D_DrawNonSubdiv):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
movl C(r_affinetridesc)+atd_numtriangles,%ebp
|
|
pushl %ebx
|
|
shll $(mtri_shift),%ebp
|
|
pushl %esi // preserve register variables
|
|
movl C(r_affinetridesc)+atd_ptriangles,%esi
|
|
pushl %edi
|
|
|
|
// mtriangle_t *ptri;
|
|
// finalvert_t *pfv, *index0, *index1, *index2;
|
|
// int i;
|
|
// int lnumtriangles;
|
|
|
|
// pfv = r_affinetridesc.pfinalverts;
|
|
// ptri = r_affinetridesc.ptriangles;
|
|
// lnumtriangles = r_affinetridesc.numtriangles;
|
|
|
|
LNDLoop:
|
|
|
|
// for (i=0 ; i<lnumtriangles ; i++, ptri++)
|
|
// {
|
|
// index0 = pfv + ptri->vertindex[0];
|
|
// index1 = pfv + ptri->vertindex[1];
|
|
// index2 = pfv + ptri->vertindex[2];
|
|
movl C(r_affinetridesc)+atd_pfinalverts,%edi
|
|
movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
|
|
shll $(fv_shift),%ecx
|
|
movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
|
|
shll $(fv_shift),%edx
|
|
movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
|
|
shll $(fv_shift),%ebx
|
|
addl %edi,%ecx
|
|
addl %edi,%edx
|
|
addl %edi,%ebx
|
|
|
|
// d_xdenom = (index0->v[1]-index1->v[1]) *
|
|
// (index0->v[0]-index2->v[0]) -
|
|
// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
|
|
movl fv_v+4(%ecx),%eax
|
|
movl fv_v+0(%ecx),%esi
|
|
subl fv_v+4(%edx),%eax
|
|
subl fv_v+0(%ebx),%esi
|
|
imull %esi,%eax
|
|
movl fv_v+0(%ecx),%esi
|
|
movl fv_v+4(%ecx),%edi
|
|
subl fv_v+0(%edx),%esi
|
|
subl fv_v+4(%ebx),%edi
|
|
imull %esi,%edi
|
|
subl %edi,%eax
|
|
|
|
// if (d_xdenom >= 0)
|
|
// {
|
|
// continue;
|
|
jns LNextTri
|
|
|
|
// }
|
|
|
|
movl %eax,C(d_xdenom)
|
|
fildl C(d_xdenom)
|
|
|
|
// r_p0[0] = index0->v[0]; // u
|
|
// r_p0[1] = index0->v[1]; // v
|
|
// r_p0[2] = index0->v[2]; // s
|
|
// r_p0[3] = index0->v[3]; // t
|
|
// r_p0[4] = index0->v[4]; // light
|
|
// r_p0[5] = index0->v[5]; // iz
|
|
movl fv_v+0(%ecx),%eax
|
|
movl fv_v+4(%ecx),%esi
|
|
movl %eax,C(r_p0)+0
|
|
movl %esi,C(r_p0)+4
|
|
movl fv_v+8(%ecx),%eax
|
|
movl fv_v+12(%ecx),%esi
|
|
movl %eax,C(r_p0)+8
|
|
movl %esi,C(r_p0)+12
|
|
movl fv_v+16(%ecx),%eax
|
|
movl fv_v+20(%ecx),%esi
|
|
movl %eax,C(r_p0)+16
|
|
movl %esi,C(r_p0)+20
|
|
|
|
fdivrs C(float_1)
|
|
|
|
// r_p1[0] = index1->v[0];
|
|
// r_p1[1] = index1->v[1];
|
|
// r_p1[2] = index1->v[2];
|
|
// r_p1[3] = index1->v[3];
|
|
// r_p1[4] = index1->v[4];
|
|
// r_p1[5] = index1->v[5];
|
|
movl fv_v+0(%edx),%eax
|
|
movl fv_v+4(%edx),%esi
|
|
movl %eax,C(r_p1)+0
|
|
movl %esi,C(r_p1)+4
|
|
movl fv_v+8(%edx),%eax
|
|
movl fv_v+12(%edx),%esi
|
|
movl %eax,C(r_p1)+8
|
|
movl %esi,C(r_p1)+12
|
|
movl fv_v+16(%edx),%eax
|
|
movl fv_v+20(%edx),%esi
|
|
movl %eax,C(r_p1)+16
|
|
movl %esi,C(r_p1)+20
|
|
|
|
// r_p2[0] = index2->v[0];
|
|
// r_p2[1] = index2->v[1];
|
|
// r_p2[2] = index2->v[2];
|
|
// r_p2[3] = index2->v[3];
|
|
// r_p2[4] = index2->v[4];
|
|
// r_p2[5] = index2->v[5];
|
|
movl fv_v+0(%ebx),%eax
|
|
movl fv_v+4(%ebx),%esi
|
|
movl %eax,C(r_p2)+0
|
|
movl %esi,C(r_p2)+4
|
|
movl fv_v+8(%ebx),%eax
|
|
movl fv_v+12(%ebx),%esi
|
|
movl %eax,C(r_p2)+8
|
|
movl %esi,C(r_p2)+12
|
|
movl fv_v+16(%ebx),%eax
|
|
movl fv_v+20(%ebx),%esi
|
|
movl %eax,C(r_p2)+16
|
|
movl C(r_affinetridesc)+atd_ptriangles,%edi
|
|
movl %esi,C(r_p2)+20
|
|
movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
|
|
|
|
// if (!ptri->facesfront)
|
|
// {
|
|
testl %eax,%eax
|
|
jnz LFacesFront
|
|
|
|
// if (index0->flags & ALIAS_ONSEAM)
|
|
// r_p0[2] += r_affinetridesc.seamfixupX16;
|
|
movl fv_flags(%ecx),%eax
|
|
movl fv_flags(%edx),%esi
|
|
movl fv_flags(%ebx),%edi
|
|
testl $(ALIAS_ONSEAM),%eax
|
|
movl C(r_affinetridesc)+atd_seamfixupX16,%eax
|
|
jz LOnseamDone0
|
|
addl %eax,C(r_p0)+8
|
|
LOnseamDone0:
|
|
|
|
// if (index1->flags & ALIAS_ONSEAM)
|
|
// r_p1[2] += r_affinetridesc.seamfixupX16;
|
|
testl $(ALIAS_ONSEAM),%esi
|
|
jz LOnseamDone1
|
|
addl %eax,C(r_p1)+8
|
|
LOnseamDone1:
|
|
|
|
// if (index2->flags & ALIAS_ONSEAM)
|
|
// r_p2[2] += r_affinetridesc.seamfixupX16;
|
|
testl $(ALIAS_ONSEAM),%edi
|
|
jz LOnseamDone2
|
|
addl %eax,C(r_p2)+8
|
|
LOnseamDone2:
|
|
|
|
// }
|
|
|
|
LFacesFront:
|
|
|
|
fstps C(d_xdenom)
|
|
|
|
// D_PolysetSetEdgeTable ();
|
|
// D_RasterizeAliasPolySmooth ();
|
|
call C(D_PolysetSetEdgeTable)
|
|
call C(D_RasterizeAliasPolySmooth)
|
|
|
|
LNextTri:
|
|
movl C(r_affinetridesc)+atd_ptriangles,%esi
|
|
subl $mtri_size,%ebp
|
|
jnz LNDLoop
|
|
// }
|
|
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebx
|
|
popl %ebp
|
|
|
|
addl $(SPAN_SIZE),%esp
|
|
|
|
ret
|
|
|
|
|
|
#endif // USE_INTEL_ASM
|
|
|
|
#if defined(__linux__) && defined(__ELF__)
|
|
.section .note.GNU-stack,"",%progbits
|
|
#endif
|