mirror of
https://github.com/nzp-team/fteqw.git
synced 2024-11-26 22:01:50 +00:00
6dd7ddd353
minor change to qclib for hexen2 git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@1735 fc73d0e0-1445-4013-8a0c-d673dee63da5
1786 lines
45 KiB
ArmAsm
1786 lines
45 KiB
ArmAsm
/*
|
|
Copyright (C) 1996-1997 Id Software, Inc.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
*/
|
|
|
|
//
|
|
// d_polysa.s
|
|
// x86 assembly-language polygon model drawing code
|
|
//
|
|
#define SWQUAKE
|
|
#include "asm_i386.h"
|
|
#include "quakeasm.h"
|
|
#include "asm_draw.h"
|
|
#include "d_ifacea.h"
|
|
|
|
#if id386
|
|
|
|
// !!! if this is changed, it must be changed in d_polyse.c too !!!
|
|
#define DPS_MAXSPANS MAXHEIGHT+1
|
|
// 1 extra for spanpackage that marks end
|
|
|
|
//#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
|
|
#define SPAN_SIZE (1024+1+1+1)*32
|
|
|
|
|
|
.data
|
|
|
|
.align 4
|
|
p10_minus_p20: .single 0
|
|
p01_minus_p21: .single 0
|
|
temp0: .single 0
|
|
temp1: .single 0
|
|
Ltemp: .single 0
|
|
|
|
aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5
|
|
.long LDraw4, LDraw3, LDraw2, LDraw1
|
|
|
|
lzistepx: .long 0
|
|
|
|
|
|
.text
|
|
|
|
#ifndef NeXT
|
|
.extern C(D_PolysetSetEdgeTable)
|
|
.extern C(D_RasterizeAliasPolySmooth8Asm)
|
|
#endif
|
|
|
|
//----------------------------------------------------------------------
|
|
// affine triangle gradient calculation code
|
|
//----------------------------------------------------------------------
|
|
|
|
#define skinwidth 4+0
|
|
|
|
.globl C(D_PolysetCalcGradientsAsm)
|
|
C(D_PolysetCalcGradientsAsm):
|
|
|
|
// p00_minus_p20 = r_p0[0] - r_p2[0];
|
|
// p01_minus_p21 = r_p0[1] - r_p2[1];
|
|
// p10_minus_p20 = r_p1[0] - r_p2[0];
|
|
// p11_minus_p21 = r_p1[1] - r_p2[1];
|
|
//
|
|
// xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
|
|
// p00_minus_p20 * p11_minus_p21);
|
|
//
|
|
// ystepdenominv = -xstepdenominv;
|
|
|
|
fildl C(r_p0)+0 // r_p0[0]
|
|
fildl C(r_p2)+0 // r_p2[0] | r_p0[0]
|
|
fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0]
|
|
fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
|
|
fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
|
|
fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
|
|
// r_p2[0] | r_p0[0]
|
|
fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] |
|
|
// r_p1[1] | r_p2[0] | r_p0[0]
|
|
fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] |
|
|
// r_p1[1] | r_p2[0] | p10_minus_p20
|
|
fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fxch %st(1) // p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | p10_minus_p20
|
|
fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | d_xdenom
|
|
fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 |
|
|
// p00_minus_p20 | d_xdenom
|
|
fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv
|
|
fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
|
|
//// ceil () for light so positive steps are exaggerated, negative steps
|
|
//// diminished, pushing us away from underflow toward overflow. Underflow is
|
|
//// very visible, overflow is very unlikely, because of ambient lighting
|
|
// t0 = r_p0[4] - r_p2[4];
|
|
// t1 = r_p1[4] - r_p2[4];
|
|
|
|
fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
|
|
// r_lstepx = (int)
|
|
// ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
|
|
// r_lstepy = (int)
|
|
// ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(2) // xstepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmuls float_minus_1 // ystepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fldcw ceil_cw
|
|
fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fldcw single_cw
|
|
|
|
// t0 = r_p0[2] - r_p2[2];
|
|
// t1 = r_p1[2] - r_p2[2];
|
|
|
|
fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
|
|
// r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
|
|
// xstepdenominv);
|
|
// r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
|
|
// ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv
|
|
fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
|
|
// t0 = r_p0[3] - r_p2[3];
|
|
// t1 = r_p1[3] - r_p2[3];
|
|
|
|
fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
|
|
// r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
|
|
// xstepdenominv);
|
|
// r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
|
|
// ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// t0*p11_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)*
|
|
// ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
|
|
// xstepdenominv | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
|
|
// t0 = r_p0[5] - r_p2[5];
|
|
// t1 = r_p1[5] - r_p2[5];
|
|
|
|
fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// p11_minus_p21
|
|
fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 | p11_minus_p21
|
|
fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
|
|
// r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
|
|
// xstepdenominv);
|
|
// r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
|
|
// ystepdenominv);
|
|
|
|
fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | p11_minus_p21
|
|
fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | t0*p11_minus_p21
|
|
fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | t0*p11_minus_p21
|
|
fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv |
|
|
// p00_minus_p20 | t0*p11_minus_p21
|
|
fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
|
|
// xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv | p00_minus_p20 |
|
|
// t0*p11_minus_p21
|
|
fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv |
|
|
// t1*p00_minus_p20 | t0*p11_minus_p21
|
|
fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 |
|
|
// ystepdenominv | xstepdenominv |
|
|
// t1*p00_minus_p20 | t0*p10_minus_p20
|
|
fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// ystepdenominv | xstepdenominv |
|
|
// t1*p00_minus_p20 | t0*p10_minus_p20
|
|
fxch %st(3) // t1*p00_minus_p20 | ystepdenominv |
|
|
// xstepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t0*p10_minus_p20
|
|
fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20
|
|
fxch %st(1) // xstepdenominv | ystepdenominv |
|
|
// t1*p01_minus_p21 - t0*p11_minus_p21 |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20
|
|
fmulp %st(0),%st(2) // ystepdenominv |
|
|
// (t1*p01_minus_p21 - t0*p11_minus_p21) *
|
|
// xstepdenominv |
|
|
// t1*p00_minus_p20 - t0*p10_minus_p20
|
|
fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) *
|
|
// xstepdenominv |
|
|
// (t1*p00_minus_p20 - t0*p10_minus_p20) *
|
|
// ystepdenominv
|
|
fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) *
|
|
// ystepdenominv
|
|
fistpl C(r_zistepy)
|
|
|
|
// a_sstepxfrac = r_sstepx << 16;
|
|
// a_tstepxfrac = r_tstepx << 16;
|
|
//
|
|
// a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
|
|
// (r_sstepx >> 16);
|
|
|
|
movl C(r_sstepx),%eax
|
|
movl C(r_tstepx),%edx
|
|
shll $16,%eax
|
|
shll $16,%edx
|
|
movl %eax,C(a_sstepxfrac)
|
|
movl %edx,C(a_tstepxfrac)
|
|
|
|
movl C(r_sstepx),%ecx
|
|
movl C(r_tstepx),%eax
|
|
sarl $16,%ecx
|
|
sarl $16,%eax
|
|
imull skinwidth(%esp)
|
|
addl %ecx,%eax
|
|
movl %eax,C(a_ststepxwhole)
|
|
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// recursive subdivision affine triangle drawing code
|
|
//
|
|
// not C-callable because of stdcall return
|
|
//----------------------------------------------------------------------
|
|
|
|
#define lp1 4+16
|
|
#define lp2 8+16
|
|
#define lp3 12+16
|
|
|
|
.globl C(D_PolysetRecursiveTriangleAsm)
|
|
C(D_PolysetRecursiveTriangleAsm):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
pushl %esi // preserve register variables
|
|
pushl %edi
|
|
pushl %ebx
|
|
|
|
// int *temp;
|
|
// int d;
|
|
// int new[6];
|
|
// int i;
|
|
// int z;
|
|
// short *zbuf;
|
|
movl lp2(%esp),%esi
|
|
movl lp1(%esp),%ebx
|
|
movl lp3(%esp),%edi
|
|
|
|
// d = lp2[0] - lp1[0];
|
|
// if (d < -1 || d > 1)
|
|
// goto split;
|
|
movl 0(%esi),%eax
|
|
|
|
movl 0(%ebx),%edx
|
|
movl 4(%esi),%ebp
|
|
|
|
subl %edx,%eax
|
|
movl 4(%ebx),%ecx
|
|
|
|
subl %ecx,%ebp
|
|
incl %eax
|
|
|
|
cmpl $2,%eax
|
|
ja LSplit
|
|
|
|
// d = lp2[1] - lp1[1];
|
|
// if (d < -1 || d > 1)
|
|
// goto split;
|
|
movl 0(%edi),%eax
|
|
incl %ebp
|
|
|
|
cmpl $2,%ebp
|
|
ja LSplit
|
|
|
|
// d = lp3[0] - lp2[0];
|
|
// if (d < -1 || d > 1)
|
|
// goto split2;
|
|
movl 0(%esi),%edx
|
|
movl 4(%edi),%ebp
|
|
|
|
subl %edx,%eax
|
|
movl 4(%esi),%ecx
|
|
|
|
subl %ecx,%ebp
|
|
incl %eax
|
|
|
|
cmpl $2,%eax
|
|
ja LSplit2
|
|
|
|
// d = lp3[1] - lp2[1];
|
|
// if (d < -1 || d > 1)
|
|
// goto split2;
|
|
movl 0(%ebx),%eax
|
|
incl %ebp
|
|
|
|
cmpl $2,%ebp
|
|
ja LSplit2
|
|
|
|
// d = lp1[0] - lp3[0];
|
|
// if (d < -1 || d > 1)
|
|
// goto split3;
|
|
movl 0(%edi),%edx
|
|
movl 4(%ebx),%ebp
|
|
|
|
subl %edx,%eax
|
|
movl 4(%edi),%ecx
|
|
|
|
subl %ecx,%ebp
|
|
incl %eax
|
|
|
|
incl %ebp
|
|
movl %ebx,%edx
|
|
|
|
cmpl $2,%eax
|
|
ja LSplit3
|
|
|
|
// d = lp1[1] - lp3[1];
|
|
// if (d < -1 || d > 1)
|
|
// {
|
|
//split3:
|
|
// temp = lp1;
|
|
// lp3 = lp2;
|
|
// lp1 = lp3;
|
|
// lp2 = temp;
|
|
// goto split;
|
|
// }
|
|
//
|
|
// return; // entire tri is filled
|
|
//
|
|
cmpl $2,%ebp
|
|
jna LDone
|
|
|
|
LSplit3:
|
|
movl %edi,%ebx
|
|
movl %esi,%edi
|
|
movl %edx,%esi
|
|
jmp LSplit
|
|
|
|
//split2:
|
|
LSplit2:
|
|
|
|
// temp = lp1;
|
|
// lp1 = lp2;
|
|
// lp2 = lp3;
|
|
// lp3 = temp;
|
|
movl %ebx,%eax
|
|
movl %esi,%ebx
|
|
movl %edi,%esi
|
|
movl %eax,%edi
|
|
|
|
//split:
|
|
LSplit:
|
|
|
|
subl $24,%esp // allocate space for a new vertex
|
|
|
|
//// split this edge
|
|
// new[0] = (lp1[0] + lp2[0]) >> 1;
|
|
// new[1] = (lp1[1] + lp2[1]) >> 1;
|
|
// new[2] = (lp1[2] + lp2[2]) >> 1;
|
|
// new[3] = (lp1[3] + lp2[3]) >> 1;
|
|
// new[5] = (lp1[5] + lp2[5]) >> 1;
|
|
movl 8(%ebx),%eax
|
|
|
|
movl 8(%esi),%edx
|
|
movl 12(%ebx),%ecx
|
|
|
|
addl %edx,%eax
|
|
movl 12(%esi),%edx
|
|
|
|
sarl $1,%eax
|
|
addl %edx,%ecx
|
|
|
|
movl %eax,8(%esp)
|
|
movl 20(%ebx),%eax
|
|
|
|
sarl $1,%ecx
|
|
movl 20(%esi),%edx
|
|
|
|
movl %ecx,12(%esp)
|
|
addl %edx,%eax
|
|
|
|
movl 0(%ebx),%ecx
|
|
movl 0(%esi),%edx
|
|
|
|
sarl $1,%eax
|
|
addl %ecx,%edx
|
|
|
|
movl %eax,20(%esp)
|
|
movl 4(%ebx),%eax
|
|
|
|
sarl $1,%edx
|
|
movl 4(%esi),%ebp
|
|
|
|
movl %edx,0(%esp)
|
|
addl %eax,%ebp
|
|
|
|
sarl $1,%ebp
|
|
movl %ebp,4(%esp)
|
|
|
|
//// draw the point if splitting a leading edge
|
|
// if (lp2[1] > lp1[1])
|
|
// goto nodraw;
|
|
cmpl %eax,4(%esi)
|
|
jg LNoDraw
|
|
|
|
// if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
|
|
// goto nodraw;
|
|
movl 0(%esi),%edx
|
|
jnz LDraw
|
|
|
|
cmpl %ecx,%edx
|
|
jl LNoDraw
|
|
|
|
LDraw:
|
|
|
|
// z = new[5] >> 16;
|
|
movl 20(%esp),%edx
|
|
movl 4(%esp),%ecx
|
|
|
|
sarl $16,%edx
|
|
movl 0(%esp),%ebp
|
|
|
|
// zbuf = zspantable[new[1]] + new[0];
|
|
movl C(zspantable)(,%ecx,4),%eax
|
|
|
|
// if (z >= *zbuf)
|
|
// {
|
|
cmpw (%eax,%ebp,2),%dx
|
|
jnge LNoDraw
|
|
|
|
// int pix;
|
|
//
|
|
// *zbuf = z;
|
|
movw %dx,(%eax,%ebp,2)
|
|
|
|
// pix = d_pcolormap[apalremap[skintable[new[3]>>16][new[2]>>16]]];
|
|
movl 12(%esp),%eax
|
|
|
|
sarl $16,%eax
|
|
movl 8(%esp),%edx
|
|
|
|
sarl $16,%edx
|
|
subl %ecx,%ecx
|
|
|
|
movl C(skintable)(,%eax,4),%eax
|
|
movl 4(%esp),%ebp
|
|
|
|
movb (%eax,%edx,),%cl
|
|
|
|
movb C(apalremap)(%ecx),%cl
|
|
movl C(d_pcolormap),%edx
|
|
|
|
movb (%edx,%ecx,),%dl
|
|
movl 0(%esp),%ecx
|
|
|
|
// d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
|
|
movl C(d_scantable)(,%ebp,4),%eax
|
|
addl %eax,%ecx
|
|
movl C(d_viewbuffer),%eax
|
|
movb %dl,(%eax,%ecx,1)
|
|
|
|
// }
|
|
//
|
|
//nodraw:
|
|
LNoDraw:
|
|
|
|
//// recursively continue
|
|
// D_PolysetRecursiveTriangleAsm (lp3, lp1, new);
|
|
pushl %esp
|
|
pushl %ebx
|
|
pushl %edi
|
|
call C(D_PolysetRecursiveTriangleAsm)
|
|
|
|
// D_PolysetRecursiveTriangleAsm (lp3, new, lp2);
|
|
movl %esp,%ebx
|
|
pushl %esi
|
|
pushl %ebx
|
|
pushl %edi
|
|
call C(D_PolysetRecursiveTriangleAsm)
|
|
addl $24,%esp
|
|
|
|
LDone:
|
|
popl %ebx // restore register variables
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebp // restore caller stack frame pointer
|
|
ret $12
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// 8-bpp horizontal span drawing code for affine polygons, with smooth
|
|
// shading and no transparency
|
|
//----------------------------------------------------------------------
|
|
|
|
#define pspans 4+8
|
|
|
|
.globl C(D_PolysetAff8Start)
|
|
C(D_PolysetAff8Start):
|
|
|
|
.globl C(D_PolysetDrawSpans8)
|
|
C(D_PolysetDrawSpans8):
|
|
pushl %esi // preserve register variables
|
|
pushl %ebx
|
|
|
|
movl pspans(%esp),%esi // point to the first span descriptor
|
|
movl C(r_zistepx),%ecx
|
|
|
|
pushl %ebp // preserve caller's stack frame
|
|
pushl %edi
|
|
|
|
rorl $16,%ecx // put high 16 bits of 1/z step in low word
|
|
movl spanpackage_t_count(%esi),%edx
|
|
|
|
movl %ecx,lzistepx
|
|
|
|
LSpanLoop:
|
|
|
|
// lcount = d_aspancount - pspanpackage->count;
|
|
//
|
|
// errorterm += erroradjustup;
|
|
// if (errorterm >= 0)
|
|
// {
|
|
// d_aspancount += d_countextrastep;
|
|
// errorterm -= erroradjustdown;
|
|
// }
|
|
// else
|
|
// {
|
|
// d_aspancount += ubasestep;
|
|
// }
|
|
movl C(d_aspancount),%eax
|
|
subl %edx,%eax
|
|
|
|
movl C(erroradjustup),%edx
|
|
movl C(errorterm),%ebx
|
|
addl %edx,%ebx
|
|
js LNoTurnover
|
|
|
|
movl C(erroradjustdown),%edx
|
|
movl C(d_countextrastep),%edi
|
|
subl %edx,%ebx
|
|
movl C(d_aspancount),%ebp
|
|
movl %ebx,C(errorterm)
|
|
addl %edi,%ebp
|
|
movl %ebp,C(d_aspancount)
|
|
jmp LRightEdgeStepped
|
|
|
|
LNoTurnover:
|
|
movl C(d_aspancount),%edi
|
|
movl C(ubasestep),%edx
|
|
movl %ebx,C(errorterm)
|
|
addl %edx,%edi
|
|
movl %edi,C(d_aspancount)
|
|
|
|
LRightEdgeStepped:
|
|
cmpl $1,%eax
|
|
|
|
jl LNextSpan
|
|
jz LExactlyOneLong
|
|
|
|
//
|
|
// set up advancetable
|
|
//
|
|
movl C(a_ststepxwhole),%ecx
|
|
movl C(r_affinetridesc)+atd_skinwidth,%edx
|
|
|
|
movl %ecx,advancetable+4 // advance base in t
|
|
addl %edx,%ecx
|
|
|
|
movl %ecx,advancetable // advance extra in t
|
|
movl C(a_tstepxfrac),%ecx
|
|
|
|
movw C(r_lstepx),%cx
|
|
movl %eax,%edx // count
|
|
|
|
movl %ecx,tstep
|
|
addl $7,%edx
|
|
|
|
shrl $3,%edx // count of full and partial loops
|
|
movl spanpackage_t_sfrac(%esi),%ebx
|
|
|
|
movw %dx,%bx
|
|
movl spanpackage_t_pz(%esi),%ecx
|
|
|
|
negl %eax
|
|
|
|
movl spanpackage_t_pdest(%esi),%edi
|
|
andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1
|
|
|
|
subl %eax,%edi // compensate for hardwired offsets
|
|
subl %eax,%ecx
|
|
|
|
subl %eax,%ecx
|
|
movl spanpackage_t_tfrac(%esi),%edx
|
|
|
|
movw spanpackage_t_light(%esi),%dx
|
|
movl spanpackage_t_zi(%esi),%ebp
|
|
|
|
rorl $16,%ebp // put high 16 bits of 1/z in low word
|
|
pushl %esi
|
|
|
|
movl spanpackage_t_ptex(%esi),%esi
|
|
jmp aff8entryvec_table(,%eax,4)
|
|
|
|
// %bx = count of full and partial loops
|
|
// %ebx high word = sfrac
|
|
// %ecx = pz
|
|
// %dx = light
|
|
// %edx high word = tfrac
|
|
// %esi = ptex
|
|
// %edi = pdest
|
|
// %ebp = 1/z
|
|
// tstep low word = C(r_lstepx)
|
|
// tstep high word = C(a_tstepxfrac)
|
|
// C(a_sstepxfrac) low word = 0
|
|
// C(a_sstepxfrac) high word = C(a_sstepxfrac)
|
|
|
|
LDrawLoop:
|
|
|
|
// FIXME: do we need to clamp light? We may need at least a buffer bit to
|
|
// keep it from poking into tfrac and causing problems
|
|
|
|
LDraw8:
|
|
cmpw (%ecx),%bp
|
|
jl Lp1
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch8:
|
|
movb %dh,%ah
|
|
movw %bp,(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch8:
|
|
movb %al,(%edi)
|
|
Lp1:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
LDraw7:
|
|
cmpw 2(%ecx),%bp
|
|
jl Lp2
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch7:
|
|
movb %dh,%ah
|
|
movw %bp,2(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch7:
|
|
movb %al,1(%edi)
|
|
Lp2:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
LDraw6:
|
|
cmpw 4(%ecx),%bp
|
|
jl Lp3
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch6:
|
|
movb %dh,%ah
|
|
movw %bp,4(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch6:
|
|
movb %al,2(%edi)
|
|
Lp3:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
LDraw5:
|
|
cmpw 6(%ecx),%bp
|
|
jl Lp4
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch5:
|
|
movb %dh,%ah
|
|
movw %bp,6(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch5:
|
|
movb %al,3(%edi)
|
|
Lp4:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
LDraw4:
|
|
cmpw 8(%ecx),%bp
|
|
jl Lp5
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch4:
|
|
movb %dh,%ah
|
|
movw %bp,8(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch4:
|
|
movb %al,4(%edi)
|
|
Lp5:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
LDraw3:
|
|
cmpw 10(%ecx),%bp
|
|
jl Lp6
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch3:
|
|
movb %dh,%ah
|
|
movw %bp,10(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch3:
|
|
movb %al,5(%edi)
|
|
Lp6:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
LDraw2:
|
|
cmpw 12(%ecx),%bp
|
|
jl Lp7
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch2:
|
|
movb %dh,%ah
|
|
movw %bp,12(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch2:
|
|
movb %al,6(%edi)
|
|
Lp7:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
LDraw1:
|
|
cmpw 14(%ecx),%bp
|
|
jl Lp8
|
|
xorl %eax,%eax
|
|
movb (%esi),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch1:
|
|
movb %dh,%ah
|
|
movw %bp,14(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch1:
|
|
movb %al,7(%edi)
|
|
Lp8:
|
|
addl tstep,%edx
|
|
sbbl %eax,%eax
|
|
addl lzistepx,%ebp
|
|
adcl $0,%ebp
|
|
addl C(a_sstepxfrac),%ebx
|
|
adcl advancetable+4(,%eax,4),%esi
|
|
|
|
addl $8,%edi
|
|
addl $16,%ecx
|
|
|
|
decw %bx
|
|
jnz LDrawLoop
|
|
|
|
popl %esi // restore spans pointer
|
|
LNextSpan:
|
|
addl $(spanpackage_t_size),%esi // point to next span
|
|
LNextSpanESISet:
|
|
movl spanpackage_t_count(%esi),%edx
|
|
cmpl $-999999,%edx // any more spans?
|
|
jnz LSpanLoop // yes
|
|
|
|
popl %edi
|
|
popl %ebp // restore the caller's stack frame
|
|
popl %ebx // restore register variables
|
|
popl %esi
|
|
ret
|
|
|
|
|
|
// draw a one-long span
|
|
|
|
LExactlyOneLong:
|
|
|
|
movl spanpackage_t_pz(%esi),%ecx
|
|
movl spanpackage_t_zi(%esi),%ebp
|
|
|
|
rorl $16,%ebp // put high 16 bits of 1/z in low word
|
|
movl spanpackage_t_ptex(%esi),%ebx
|
|
|
|
cmpw (%ecx),%bp
|
|
jl LNextSpan
|
|
xorl %eax,%eax
|
|
movb (%ebx),%al
|
|
movb 0x12345678(%eax),%al
|
|
PPatch9:
|
|
movl spanpackage_t_pdest(%esi),%edi
|
|
movb spanpackage_t_light+1(%esi),%ah
|
|
addl $(spanpackage_t_size),%esi // point to next span
|
|
movw %bp,(%ecx)
|
|
movb 0x12345678(%eax),%al
|
|
LPatch9:
|
|
movb %al,(%edi)
|
|
|
|
jmp LNextSpanESISet
|
|
|
|
.globl C(D_PolysetAff8End)
|
|
C(D_PolysetAff8End):
|
|
|
|
|
|
#define pcolormap 4
|
|
#define ppalremap 8
|
|
|
|
.globl C(D_Aff8Patch)
|
|
C(D_Aff8Patch):
|
|
movl pcolormap(%esp),%eax
|
|
movl %eax,LPatch1-4
|
|
movl %eax,LPatch2-4
|
|
movl %eax,LPatch3-4
|
|
movl %eax,LPatch4-4
|
|
movl %eax,LPatch5-4
|
|
movl %eax,LPatch6-4
|
|
movl %eax,LPatch7-4
|
|
movl %eax,LPatch8-4
|
|
movl %eax,LPatch9-4
|
|
movl ppalremap(%esp),%eax
|
|
movl %eax,PPatch1-4
|
|
movl %eax,PPatch2-4
|
|
movl %eax,PPatch3-4
|
|
movl %eax,PPatch4-4
|
|
movl %eax,PPatch5-4
|
|
movl %eax,PPatch6-4
|
|
movl %eax,PPatch7-4
|
|
movl %eax,PPatch8-4
|
|
movl %eax,PPatch9-4
|
|
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model polygon dispatching code, combined with subdivided affine
|
|
// triangle drawing code
|
|
//----------------------------------------------------------------------
|
|
|
|
.globl C(D_PolysetDrawAsm)
|
|
C(D_PolysetDrawAsm):
|
|
|
|
// spanpackage_t spans[DPS_MAXSPANS + 1 +
|
|
// ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
|
|
// // one extra because of cache line pretouching
|
|
//
|
|
// a_spans = (spanpackage_t *)
|
|
// (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
|
|
subl $(SPAN_SIZE),%esp
|
|
movl %esp,%eax
|
|
addl $(CACHE_SIZE - 1),%eax
|
|
andl $(~(CACHE_SIZE - 1)),%eax
|
|
movl %eax,C(a_spans)
|
|
|
|
// if (r_affinetridesc.drawtype)
|
|
// D_DrawSubdiv ();
|
|
// else
|
|
// D_DrawNonSubdiv ();
|
|
movl C(r_affinetridesc)+atd_drawtype,%eax
|
|
testl %eax,%eax
|
|
jz C(D_DrawNonSubdivAsm)
|
|
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
|
|
// lnumtriangles = r_affinetridesc.numtriangles;
|
|
movl C(r_affinetridesc)+atd_numtriangles,%ebp
|
|
|
|
pushl %esi // preserve register variables
|
|
shll $4,%ebp
|
|
|
|
pushl %ebx
|
|
// ptri = r_affinetridesc.ptriangles;
|
|
movl C(r_affinetridesc)+atd_ptriangles,%ebx
|
|
|
|
pushl %edi
|
|
|
|
// mtriangle_t *ptri;
|
|
// finalvert_t *pfv, *index0, *index1, *index2;
|
|
// int i;
|
|
// int lnumtriangles;
|
|
// int s0, s1, s2;
|
|
|
|
// pfv = r_affinetridesc.pfinalverts;
|
|
movl C(r_affinetridesc)+atd_pfinalverts,%edi
|
|
|
|
// for (i=0 ; i<lnumtriangles ; i++)
|
|
// {
|
|
|
|
Llooptop:
|
|
|
|
// index0 = pfv + ptri[i].vertindex[0];
|
|
// index1 = pfv + ptri[i].vertindex[1];
|
|
// index2 = pfv + ptri[i].vertindex[2];
|
|
movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx
|
|
movl mtri_vertindex-16+4(%ebx,%ebp,),%esi
|
|
|
|
shll $(fv_shift),%ecx
|
|
movl mtri_vertindex-16+8(%ebx,%ebp,),%edx
|
|
|
|
shll $(fv_shift),%esi
|
|
addl %edi,%ecx
|
|
|
|
shll $(fv_shift),%edx
|
|
addl %edi,%esi
|
|
|
|
addl %edi,%edx
|
|
|
|
// if (((index0->v[1]-index1->v[1]) *
|
|
// (index0->v[0]-index2->v[0]) -
|
|
// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
|
|
// {
|
|
// continue;
|
|
// }
|
|
//
|
|
// d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
|
|
fildl fv_v+4(%ecx) // i0v1
|
|
fildl fv_v+4(%esi) // i1v1 | i0v1
|
|
fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1
|
|
fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1
|
|
fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1
|
|
fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1
|
|
fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
|
|
fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
|
|
fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
|
|
fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
|
|
fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
|
|
fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
|
|
fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
|
|
fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
|
|
fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
|
|
movl fv_v+16(%ecx),%eax
|
|
andl $0xFF00,%eax
|
|
fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
|
|
addl C(acolormap),%eax
|
|
fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
|
|
movl %eax,C(d_pcolormap)
|
|
fstps Ltemp
|
|
movl Ltemp,%eax
|
|
subl $0x80000001,%eax
|
|
jc Lskip
|
|
|
|
#ifdef ONSEAMSTUFF
|
|
// if (ptri[i].facesfront)
|
|
// {
|
|
// D_PolysetRecursiveTriangleAsm(index0->v, index1->v, index2->v);
|
|
movl mtri_facesfront-16(%ebx,%ebp,),%eax
|
|
testl %eax,%eax
|
|
jz Lfacesback
|
|
#endif
|
|
|
|
pushl %edx
|
|
pushl %esi
|
|
pushl %ecx
|
|
call C(D_PolysetRecursiveTriangleAsm)
|
|
|
|
subl $16,%ebp
|
|
jnz Llooptop
|
|
jmp Ldone2
|
|
|
|
#ifdef ONSEAMSTUFF
|
|
// }
|
|
// else
|
|
// {
|
|
Lfacesback:
|
|
|
|
// s0 = index0->v[2];
|
|
// s1 = index1->v[2];
|
|
// s2 = index2->v[2];
|
|
movl fv_v+8(%ecx),%eax
|
|
pushl %eax
|
|
movl fv_v+8(%esi),%eax
|
|
pushl %eax
|
|
movl fv_v+8(%edx),%eax
|
|
pushl %eax
|
|
pushl %ecx
|
|
pushl %edx
|
|
|
|
// if (index0->flags & ALIAS_ONSEAM)
|
|
// index0->v[2] += r_affinetridesc.seamfixupX16;
|
|
movl C(r_affinetridesc)+atd_seamfixupX16,%eax
|
|
testl $(ALIAS_ONSEAM),fv_flags(%ecx)
|
|
jz Lp11
|
|
addl %eax,fv_v+8(%ecx)
|
|
Lp11:
|
|
|
|
// if (index1->flags & ALIAS_ONSEAM)
|
|
// index1->v[2] += r_affinetridesc.seamfixupX16;
|
|
testl $(ALIAS_ONSEAM),fv_flags(%esi)
|
|
jz Lp12
|
|
addl %eax,fv_v+8(%esi)
|
|
Lp12:
|
|
|
|
// if (index2->flags & ALIAS_ONSEAM)
|
|
// index2->v[2] += r_affinetridesc.seamfixupX16;
|
|
testl $(ALIAS_ONSEAM),fv_flags(%edx)
|
|
jz Lp13
|
|
addl %eax,fv_v+8(%edx)
|
|
Lp13:
|
|
|
|
// D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
|
|
pushl %edx
|
|
pushl %esi
|
|
pushl %ecx
|
|
call C(D_PolysetRecursiveTriangleAsm)
|
|
|
|
// index0->v[2] = s0;
|
|
// index1->v[2] = s1;
|
|
// index2->v[2] = s2;
|
|
popl %edx
|
|
popl %ecx
|
|
popl %eax
|
|
movl %eax,fv_v+8(%edx)
|
|
popl %eax
|
|
movl %eax,fv_v+8(%esi)
|
|
popl %eax
|
|
movl %eax,fv_v+8(%ecx)
|
|
|
|
// }
|
|
// }
|
|
#endif
|
|
Lskip:
|
|
subl $16,%ebp
|
|
jnz Llooptop
|
|
|
|
Ldone2:
|
|
popl %edi // restore the caller's stack frame
|
|
popl %ebx
|
|
popl %esi // restore register variables
|
|
popl %ebp
|
|
|
|
addl $(SPAN_SIZE),%esp
|
|
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model triangle left-edge scanning code
|
|
//----------------------------------------------------------------------
|
|
|
|
#define height 4+16
|
|
|
|
.globl C(D_PolysetScanLeftEdge)
|
|
C(D_PolysetScanLeftEdge):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
pushl %esi // preserve register variables
|
|
pushl %edi
|
|
pushl %ebx
|
|
|
|
movl height(%esp),%eax
|
|
movl C(d_sfrac),%ecx
|
|
andl $0xFFFF,%eax
|
|
movl C(d_ptex),%ebx
|
|
orl %eax,%ecx
|
|
movl C(d_pedgespanpackage),%esi
|
|
movl C(d_tfrac),%edx
|
|
movl C(d_light),%edi
|
|
movl C(d_zi),%ebp
|
|
|
|
// %eax: scratch
|
|
// %ebx: d_ptex
|
|
// %ecx: d_sfrac in high word, count in low word
|
|
// %edx: d_tfrac
|
|
// %esi: d_pedgespanpackage, errorterm, scratch alternately
|
|
// %edi: d_light
|
|
// %ebp: d_zi
|
|
|
|
// do
|
|
// {
|
|
|
|
LScanLoop:
|
|
|
|
// d_pedgespanpackage->ptex = ptex;
|
|
// d_pedgespanpackage->pdest = d_pdest;
|
|
// d_pedgespanpackage->pz = d_pz;
|
|
// d_pedgespanpackage->count = d_aspancount;
|
|
// d_pedgespanpackage->light = d_light;
|
|
// d_pedgespanpackage->zi = d_zi;
|
|
// d_pedgespanpackage->sfrac = d_sfrac << 16;
|
|
// d_pedgespanpackage->tfrac = d_tfrac << 16;
|
|
movl %ebx,spanpackage_t_ptex(%esi)
|
|
movl C(d_pdest),%eax
|
|
movl %eax,spanpackage_t_pdest(%esi)
|
|
movl C(d_pz),%eax
|
|
movl %eax,spanpackage_t_pz(%esi)
|
|
movl C(d_aspancount),%eax
|
|
movl %eax,spanpackage_t_count(%esi)
|
|
movl %edi,spanpackage_t_light(%esi)
|
|
movl %ebp,spanpackage_t_zi(%esi)
|
|
movl %ecx,spanpackage_t_sfrac(%esi)
|
|
movl %edx,spanpackage_t_tfrac(%esi)
|
|
|
|
// pretouch the next cache line
|
|
movb spanpackage_t_size(%esi),%al
|
|
|
|
// d_pedgespanpackage++;
|
|
addl $(spanpackage_t_size),%esi
|
|
movl C(erroradjustup),%eax
|
|
movl %esi,C(d_pedgespanpackage)
|
|
|
|
// errorterm += erroradjustup;
|
|
movl C(errorterm),%esi
|
|
addl %eax,%esi
|
|
movl C(d_pdest),%eax
|
|
|
|
// if (errorterm >= 0)
|
|
// {
|
|
js LNoLeftEdgeTurnover
|
|
|
|
// errorterm -= erroradjustdown;
|
|
// d_pdest += d_pdestextrastep;
|
|
subl C(erroradjustdown),%esi
|
|
addl C(d_pdestextrastep),%eax
|
|
movl %esi,C(errorterm)
|
|
movl %eax,C(d_pdest)
|
|
|
|
// d_pz += d_pzextrastep;
|
|
// d_aspancount += d_countextrastep;
|
|
// d_ptex += d_ptexextrastep;
|
|
// d_sfrac += d_sfracextrastep;
|
|
// d_ptex += d_sfrac >> 16;
|
|
// d_sfrac &= 0xFFFF;
|
|
// d_tfrac += d_tfracextrastep;
|
|
movl C(d_pz),%eax
|
|
movl C(d_aspancount),%esi
|
|
addl C(d_pzextrastep),%eax
|
|
addl C(d_sfracextrastep),%ecx
|
|
adcl C(d_ptexextrastep),%ebx
|
|
addl C(d_countextrastep),%esi
|
|
movl %eax,C(d_pz)
|
|
movl C(d_tfracextrastep),%eax
|
|
movl %esi,C(d_aspancount)
|
|
addl %eax,%edx
|
|
|
|
// if (d_tfrac & 0x10000)
|
|
// {
|
|
jnc LSkip1
|
|
|
|
// d_ptex += r_affinetridesc.skinwidth;
|
|
// d_tfrac &= 0xFFFF;
|
|
addl C(r_affinetridesc)+atd_skinwidth,%ebx
|
|
|
|
// }
|
|
|
|
LSkip1:
|
|
|
|
// d_light += d_lightextrastep;
|
|
// d_zi += d_ziextrastep;
|
|
addl C(d_lightextrastep),%edi
|
|
addl C(d_ziextrastep),%ebp
|
|
|
|
// }
|
|
movl C(d_pedgespanpackage),%esi
|
|
decl %ecx
|
|
testl $0xFFFF,%ecx
|
|
jnz LScanLoop
|
|
|
|
popl %ebx
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebp
|
|
ret
|
|
|
|
// else
|
|
// {
|
|
|
|
LNoLeftEdgeTurnover:
|
|
movl %esi,C(errorterm)
|
|
|
|
// d_pdest += d_pdestbasestep;
|
|
addl C(d_pdestbasestep),%eax
|
|
movl %eax,C(d_pdest)
|
|
|
|
// d_pz += d_pzbasestep;
|
|
// d_aspancount += ubasestep;
|
|
// d_ptex += d_ptexbasestep;
|
|
// d_sfrac += d_sfracbasestep;
|
|
// d_ptex += d_sfrac >> 16;
|
|
// d_sfrac &= 0xFFFF;
|
|
movl C(d_pz),%eax
|
|
movl C(d_aspancount),%esi
|
|
addl C(d_pzbasestep),%eax
|
|
addl C(d_sfracbasestep),%ecx
|
|
adcl C(d_ptexbasestep),%ebx
|
|
addl C(ubasestep),%esi
|
|
movl %eax,C(d_pz)
|
|
movl %esi,C(d_aspancount)
|
|
|
|
// d_tfrac += d_tfracbasestep;
|
|
movl C(d_tfracbasestep),%esi
|
|
addl %esi,%edx
|
|
|
|
// if (d_tfrac & 0x10000)
|
|
// {
|
|
jnc LSkip2
|
|
|
|
// d_ptex += r_affinetridesc.skinwidth;
|
|
// d_tfrac &= 0xFFFF;
|
|
addl C(r_affinetridesc)+atd_skinwidth,%ebx
|
|
|
|
// }
|
|
|
|
LSkip2:
|
|
|
|
// d_light += d_lightbasestep;
|
|
// d_zi += d_zibasestep;
|
|
addl C(d_lightbasestep),%edi
|
|
addl C(d_zibasestep),%ebp
|
|
|
|
// }
|
|
// } while (--height);
|
|
movl C(d_pedgespanpackage),%esi
|
|
decl %ecx
|
|
testl $0xFFFF,%ecx
|
|
jnz LScanLoop
|
|
|
|
popl %ebx
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebp
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model vertex drawing code
|
|
//----------------------------------------------------------------------
|
|
|
|
#define fv 4+8
|
|
#define numverts 8+8
|
|
|
|
.globl C(D_PolysetDrawFinalVertsAsm)
|
|
C(D_PolysetDrawFinalVertsAsm):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
pushl %ebx
|
|
|
|
// int i, z;
|
|
// short *zbuf;
|
|
|
|
movl numverts(%esp),%ecx
|
|
movl fv(%esp),%ebx
|
|
|
|
pushl %esi // preserve register variables
|
|
pushl %edi
|
|
|
|
LFVLoop:
|
|
|
|
// for (i=0 ; i<numverts ; i++, fv++)
|
|
// {
|
|
// // valid triangle coordinates for filling can include the bottom and
|
|
// // right clip edges, due to the fill rule; these shouldn't be drawn
|
|
// if ((fv->v[0] < r_refdef.vrectright) &&
|
|
// (fv->v[1] < r_refdef.vrectbottom))
|
|
// {
|
|
movl fv_v+0(%ebx),%eax
|
|
movl C(r_refdef)+rd_vrectright,%edx
|
|
cmpl %edx,%eax
|
|
jge LNextVert
|
|
movl fv_v+4(%ebx),%esi
|
|
movl C(r_refdef)+rd_vrectbottom,%edx
|
|
cmpl %edx,%esi
|
|
jge LNextVert
|
|
|
|
// zbuf = zspantable[fv->v[1]] + fv->v[0];
|
|
movl C(zspantable)(,%esi,4),%edi
|
|
|
|
// z = fv->v[5]>>16;
|
|
movl fv_v+20(%ebx),%edx
|
|
shrl $16,%edx
|
|
|
|
// if (z >= *zbuf)
|
|
// {
|
|
// int pix;
|
|
cmpw (%edi,%eax,2),%dx
|
|
jl LNextVert
|
|
|
|
// *zbuf = z;
|
|
movw %dx,(%edi,%eax,2)
|
|
|
|
// pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
|
|
movl fv_v+12(%ebx),%edi
|
|
shrl $16,%edi
|
|
movl C(skintable)(,%edi,4),%edi
|
|
movl fv_v+8(%ebx),%edx
|
|
shrl $16,%edx
|
|
movb (%edi,%edx),%dl
|
|
|
|
// pix = apalremap[pix];
|
|
andl $0x00FF,%edx
|
|
movb C(apalremap)(%edx), %dl
|
|
|
|
// pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
|
|
movl fv_v+16(%ebx),%edi
|
|
andl $0xFF00,%edi
|
|
addl %edx,%edi
|
|
movl C(acolormap),%edx
|
|
movb (%edx,%edi,1),%dl
|
|
|
|
// d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
|
|
movl C(d_scantable)(,%esi,4),%edi
|
|
movl C(d_viewbuffer),%esi
|
|
addl %eax,%edi
|
|
movb %dl,(%esi,%edi)
|
|
|
|
// }
|
|
// }
|
|
// }
|
|
LNextVert:
|
|
addl $(fv_size),%ebx
|
|
decl %ecx
|
|
jnz LFVLoop
|
|
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebx
|
|
popl %ebp
|
|
ret
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
// Alias model non-subdivided polygon dispatching code
|
|
//
|
|
// not C-callable because of stack buffer cleanup
|
|
//----------------------------------------------------------------------
|
|
|
|
.globl C(D_DrawNonSubdivAsm)
|
|
C(D_DrawNonSubdivAsm):
|
|
pushl %ebp // preserve caller stack frame pointer
|
|
movl C(r_affinetridesc)+atd_numtriangles,%ebp
|
|
pushl %ebx
|
|
shll $(mtri_shift), %ebp
|
|
pushl %esi // preserve register variables
|
|
movl C(r_affinetridesc)+atd_ptriangles,%esi
|
|
pushl %edi
|
|
|
|
// mtriangle_t *ptri;
|
|
// finalvert_t *pfv, *index0, *index1, *index2;
|
|
// int i;
|
|
// int lnumtriangles;
|
|
|
|
// pfv = r_affinetridesc.pfinalverts;
|
|
// ptri = r_affinetridesc.ptriangles;
|
|
// lnumtriangles = r_affinetridesc.numtriangles;
|
|
|
|
LNDLoop:
|
|
|
|
// for (i=0 ; i<lnumtriangles ; i++, ptri++)
|
|
// {
|
|
// index0 = pfv + ptri->vertindex[0];
|
|
// index1 = pfv + ptri->vertindex[1];
|
|
// index2 = pfv + ptri->vertindex[2];
|
|
movl C(r_affinetridesc)+atd_pfinalverts,%edi
|
|
movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
|
|
shll $(fv_shift),%ecx
|
|
movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
|
|
shll $(fv_shift),%edx
|
|
movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
|
|
shll $(fv_shift),%ebx
|
|
addl %edi,%ecx
|
|
addl %edi,%edx
|
|
addl %edi,%ebx
|
|
|
|
// d_xdenom = (index0->v[1]-index1->v[1]) *
|
|
// (index0->v[0]-index2->v[0]) -
|
|
// (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
|
|
movl fv_v+4(%ecx),%eax
|
|
movl fv_v+0(%ecx),%esi
|
|
subl fv_v+4(%edx),%eax
|
|
subl fv_v+0(%ebx),%esi
|
|
imull %esi,%eax
|
|
movl fv_v+0(%ecx),%esi
|
|
movl fv_v+4(%ecx),%edi
|
|
subl fv_v+0(%edx),%esi
|
|
subl fv_v+4(%ebx),%edi
|
|
imull %esi,%edi
|
|
subl %edi,%eax
|
|
|
|
// if (d_xdenom >= 0)
|
|
// {
|
|
// continue;
|
|
jns LNextTri
|
|
|
|
// }
|
|
|
|
movl %eax,C(d_xdenom)
|
|
fildl C(d_xdenom)
|
|
|
|
//ecx = index0
|
|
//edx = index1
|
|
//ebx = index2
|
|
|
|
//edi = temp
|
|
//eax = temp (non cachable)
|
|
//esi = temp (non cachable)
|
|
|
|
//use esi for st pointer?
|
|
//edi contains base triangles
|
|
//ebp is the current triangle number
|
|
//
|
|
movl C(r_affinetridesc)+atd_pstverts,%edi
|
|
|
|
// r_p0[0] = index0->v[0]; // u
|
|
// r_p0[1] = index0->v[1]; // v
|
|
// r_p0[4] = index0->v[4]; // light
|
|
// r_p0[5] = index0->v[5]; // iz
|
|
movl fv_v+0(%ecx),%eax
|
|
movl fv_v+4(%ecx),%esi
|
|
movl %eax,C(r_p0)+0
|
|
movl %esi,C(r_p0)+4
|
|
movl fv_v+16(%ecx),%eax
|
|
movl fv_v+20(%ecx),%esi
|
|
movl %eax,C(r_p0)+16
|
|
movl %esi,C(r_p0)+20
|
|
|
|
//now we can reuse ecx
|
|
movl C(r_affinetridesc)+atd_ptriangles,%ecx
|
|
|
|
//esi = edi + ecx->st_index[0];
|
|
// r_p0[2] = esi->v[2]; // s
|
|
// r_p0[3] = esi->v[3]; // t
|
|
|
|
movl mtri_stindex+0-mtri_size(%ecx,%ebp),%esi
|
|
shll $(stv_shift), %esi
|
|
addl %edi, %esi
|
|
|
|
movl stv_s(%esi), %eax
|
|
movl stv_t(%esi), %esi
|
|
movl %eax,C(r_p0)+8
|
|
movl %esi,C(r_p0)+12
|
|
|
|
|
|
fdivrs float_1
|
|
|
|
// r_p1[0] = index1->v[0];
|
|
// r_p1[1] = index1->v[1];
|
|
// r_p1[4] = index1->v[4];
|
|
// r_p1[5] = index1->v[5];
|
|
movl fv_v+0(%edx),%eax
|
|
movl fv_v+4(%edx),%esi
|
|
movl %eax,C(r_p1)+0
|
|
movl %esi,C(r_p1)+4
|
|
movl fv_v+16(%edx),%eax
|
|
movl fv_v+20(%edx),%esi
|
|
movl %eax,C(r_p1)+16
|
|
movl %esi,C(r_p1)+20
|
|
|
|
// r_p1[2] = index1->v[2];
|
|
// r_p1[3] = index1->v[3];
|
|
movl mtri_stindex+4-mtri_size(%ecx,%ebp),%esi
|
|
shll $(stv_shift), %esi
|
|
addl %edi, %esi
|
|
|
|
movl stv_s(%esi), %eax
|
|
movl stv_t(%esi), %esi
|
|
movl %eax,C(r_p1)+8
|
|
movl %esi,C(r_p1)+12
|
|
|
|
|
|
|
|
|
|
// r_p2[0] = index2->v[0];
|
|
// r_p2[1] = index2->v[1];
|
|
// r_p2[4] = index2->v[4];
|
|
// r_p2[5] = index2->v[5];
|
|
movl fv_v+0(%ebx),%eax
|
|
movl fv_v+4(%ebx),%esi
|
|
movl %eax,C(r_p2)+0
|
|
movl %esi,C(r_p2)+4
|
|
movl fv_v+16(%ebx),%eax
|
|
movl fv_v+20(%ebx),%esi
|
|
movl %eax,C(r_p2)+16
|
|
movl %esi,C(r_p2)+20
|
|
|
|
// r_p2[2] = index2->v[2];
|
|
// r_p2[3] = index2->v[3];
|
|
movl mtri_stindex+8-mtri_size(%ecx,%ebp),%esi
|
|
shll $(stv_shift), %esi
|
|
addl %edi, %esi
|
|
|
|
movl stv_s(%esi), %eax
|
|
movl stv_t(%esi), %esi
|
|
movl %eax,C(r_p2)+8
|
|
movl %esi,C(r_p2)+12
|
|
|
|
|
|
fstps C(d_xdenom)
|
|
|
|
// D_PolysetSetEdgeTable ();
|
|
// D_RasterizeAliasPolySmooth8Asm ();
|
|
call C(D_PolysetSetEdgeTable)
|
|
call C(D_RasterizeAliasPolySmooth8Asm)
|
|
|
|
LNextTri:
|
|
movl C(r_affinetridesc)+atd_ptriangles,%esi
|
|
subl $(mtri_size),%ebp
|
|
jnz LNDLoop
|
|
// }
|
|
|
|
popl %edi
|
|
popl %esi
|
|
popl %ebx
|
|
popl %ebp
|
|
|
|
addl $(SPAN_SIZE),%esp
|
|
|
|
ret
|
|
|
|
|
|
#endif // id386
|
|
|