/* d_polysa.S x86 assembly-language polygon model drawing code Copyright (C) 1996-1997 Id Software, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to: Free Software Foundation, Inc. 59 Temple Place - Suite 330 Boston, MA 02111-1307, USA $Id$ */ #ifdef HAVE_CONFIG_H # include #endif #include "asm_ia32.h" #include "quakeasm.h" #include "asm_draw.h" #include "d_ifacea.h" #ifdef USE_INTEL_ASM // !!! if this is changed, it must be changed in d_polyse.c too !!! #define DPS_MAXSPANS MAXHEIGHT+1 // 1 extra for spanpackage that marks end //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size) #define SPAN_SIZE (1024+1+1+1)*32 .data .align 4 p10_minus_p20: .single 0 p01_minus_p21: .single 0 temp0: .single 0 temp1: .single 0 Ltemp: .single 0 aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5 .long LDraw4, LDraw3, LDraw2, LDraw1 lzistepx: .long 0 .text #ifndef NeXT .extern C(D_PolysetSetEdgeTable) .extern C(D_RasterizeAliasPolySmooth) #endif //---------------------------------------------------------------------- // affine triangle gradient calculation code //---------------------------------------------------------------------- #define skinwidth 4+0 .globl C(D_PolysetCalcGradients) C(D_PolysetCalcGradients): // p00_minus_p20 = r_p0[0] - r_p2[0]; // p01_minus_p21 = r_p0[1] - r_p2[1]; // p10_minus_p20 = r_p1[0] - r_p2[0]; // p11_minus_p21 = r_p1[1] - r_p2[1]; // // xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 - // p00_minus_p20 * p11_minus_p21); // // ystepdenominv = -xstepdenominv; fildl C(r_p0)+0 // r_p0[0] fildl C(r_p2)+0 // r_p2[0] | r_p0[0] fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0] fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] | // r_p2[0] | r_p0[0] fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] | // r_p2[0] | r_p0[0] fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] | // r_p2[0] | r_p0[0] fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] | // r_p2[0] | r_p0[0] fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] | // r_p1[1] | r_p2[0] | r_p0[0] fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] | // r_p1[1] | r_p2[0] | p10_minus_p20 fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] | // p00_minus_p20 | p10_minus_p20 fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 | // p00_minus_p20 | p10_minus_p20 fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 | // p00_minus_p20 | p10_minus_p20 fxch %st(1) // p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | p10_minus_p20 flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | p10_minus_p20 fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | d_xdenom fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | d_xdenom fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21 //// ceil () for light so positive steps are exaggerated, negative steps //// diminished, pushing us away from underflow toward overflow. Underflow is //// very visible, overflow is very unlikely, because of ambient lighting // t0 = r_p0[4] - r_p2[4]; // t1 = r_p1[4] - r_p2[4]; fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 | // p11_minus_p21 // r_lstepx = (int) // ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); // r_lstepy = (int) // ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // t0*p11_minus_p21 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // t0*p11_minus_p21 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | // t1*p01_minus_p21 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | // t1*p00_minus_p20 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(2) // xstepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmuls float_minus_1 // ystepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 | // t1*p00_minus_p20 - t0*p10_minus_p20 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 | // | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fldcw ceil_cw fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fldcw single_cw // t0 = r_p0[2] - r_p2[2]; // t1 = r_p1[2] - r_p2[2]; fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 // r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * // xstepdenominv); // r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * // ystepdenominv); fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | // t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | // t1*p00_minus_p20 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 // t0 = r_p0[3] - r_p2[3]; // t1 = r_p1[3] - r_p2[3]; fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 // r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * // xstepdenominv); // r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * // ystepdenominv); fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | // t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | // t1*p00_minus_p20 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 // t0 = r_p0[5] - r_p2[5]; // t1 = r_p1[5] - r_p2[5]; fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 // r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * // xstepdenominv); // r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * // ystepdenominv); fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | t0*p11_minus_p21 fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv | // p00_minus_p20 | t0*p11_minus_p21 fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv | // p00_minus_p20 | t0*p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv | // xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | // t1*p00_minus_p20 | t0*p11_minus_p21 fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | // t1*p00_minus_p20 | t0*p10_minus_p20 fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | // t1*p00_minus_p20 | t0*p10_minus_p20 fxch %st(3) // t1*p00_minus_p20 | ystepdenominv | // xstepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // t0*p10_minus_p20 fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // t1*p00_minus_p20 - t0*p10_minus_p20 fxch %st(1) // xstepdenominv | ystepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // t1*p00_minus_p20 - t0*p10_minus_p20 fmulp %st(0),%st(2) // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21) * // xstepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) * // xstepdenominv | // (t1*p00_minus_p20 - t0*p10_minus_p20) * // ystepdenominv fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) * // ystepdenominv fistpl C(r_zistepy) // a_sstepxfrac = r_sstepx << 16; // a_tstepxfrac = r_tstepx << 16; // // a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) + // (r_sstepx >> 16); movl C(r_sstepx),%eax movl C(r_tstepx),%edx shll $16,%eax shll $16,%edx movl %eax,C(a_sstepxfrac) movl %edx,C(a_tstepxfrac) movl C(r_sstepx),%ecx movl C(r_tstepx),%eax sarl $16,%ecx sarl $16,%eax imull skinwidth(%esp) addl %ecx,%eax movl %eax,C(a_ststepxwhole) ret //---------------------------------------------------------------------- // recursive subdivision affine triangle drawing code // // not C-callable because of stdcall return //---------------------------------------------------------------------- #define lp1 4+16 #define lp2 8+16 #define lp3 12+16 .globl C(D_PolysetRecursiveTriangle) C(D_PolysetRecursiveTriangle): pushl %ebp // preserve caller stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // int *temp; // int d; // int new[6]; // int i; // int z; // short *zbuf; movl lp2(%esp),%esi movl lp1(%esp),%ebx movl lp3(%esp),%edi // d = lp2[0] - lp1[0]; // if (d < -1 || d > 1) // goto split; movl 0(%esi),%eax movl 0(%ebx),%edx movl 4(%esi),%ebp subl %edx,%eax movl 4(%ebx),%ecx subl %ecx,%ebp incl %eax cmpl $2,%eax ja LSplit // d = lp2[1] - lp1[1]; // if (d < -1 || d > 1) // goto split; movl 0(%edi),%eax incl %ebp cmpl $2,%ebp ja LSplit // d = lp3[0] - lp2[0]; // if (d < -1 || d > 1) // goto split2; movl 0(%esi),%edx movl 4(%edi),%ebp subl %edx,%eax movl 4(%esi),%ecx subl %ecx,%ebp incl %eax cmpl $2,%eax ja LSplit2 // d = lp3[1] - lp2[1]; // if (d < -1 || d > 1) // goto split2; movl 0(%ebx),%eax incl %ebp cmpl $2,%ebp ja LSplit2 // d = lp1[0] - lp3[0]; // if (d < -1 || d > 1) // goto split3; movl 0(%edi),%edx movl 4(%ebx),%ebp subl %edx,%eax movl 4(%edi),%ecx subl %ecx,%ebp incl %eax incl %ebp movl %ebx,%edx cmpl $2,%eax ja LSplit3 // d = lp1[1] - lp3[1]; // if (d < -1 || d > 1) // { //split3: // temp = lp1; // lp3 = lp2; // lp1 = lp3; // lp2 = temp; // goto split; // } // // return; // entire tri is filled // cmpl $2,%ebp jna LDone LSplit3: movl %edi,%ebx movl %esi,%edi movl %edx,%esi jmp LSplit //split2: LSplit2: // temp = lp1; // lp1 = lp2; // lp2 = lp3; // lp3 = temp; movl %ebx,%eax movl %esi,%ebx movl %edi,%esi movl %eax,%edi //split: LSplit: subl $24,%esp // allocate space for a new vertex //// split this edge // new[0] = (lp1[0] + lp2[0]) >> 1; // new[1] = (lp1[1] + lp2[1]) >> 1; // new[2] = (lp1[2] + lp2[2]) >> 1; // new[3] = (lp1[3] + lp2[3]) >> 1; // new[5] = (lp1[5] + lp2[5]) >> 1; movl 8(%ebx),%eax movl 8(%esi),%edx movl 12(%ebx),%ecx addl %edx,%eax movl 12(%esi),%edx sarl $1,%eax addl %edx,%ecx movl %eax,8(%esp) movl 20(%ebx),%eax sarl $1,%ecx movl 20(%esi),%edx movl %ecx,12(%esp) addl %edx,%eax movl 0(%ebx),%ecx movl 0(%esi),%edx sarl $1,%eax addl %ecx,%edx movl %eax,20(%esp) movl 4(%ebx),%eax sarl $1,%edx movl 4(%esi),%ebp movl %edx,0(%esp) addl %eax,%ebp sarl $1,%ebp movl %ebp,4(%esp) //// draw the point if splitting a leading edge // if (lp2[1] > lp1[1]) // goto nodraw; cmpl %eax,4(%esi) jg LNoDraw // if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0])) // goto nodraw; movl 0(%esi),%edx jnz LDraw cmpl %ecx,%edx jl LNoDraw LDraw: // z = new[5] >> 16; movl 20(%esp),%edx movl 4(%esp),%ecx sarl $16,%edx movl 0(%esp),%ebp // zbuf = zspantable[new[1]] + new[0]; movl C(zspantable)(,%ecx,4),%eax // if (z >= *zbuf) // { cmpw (%eax,%ebp,2),%dx jnge LNoDraw // int pix; // // *zbuf = z; movw %dx,(%eax,%ebp,2) // pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]]; movl 12(%esp),%eax sarl $16,%eax movl 8(%esp),%edx sarl $16,%edx subl %ecx,%ecx movl C(skintable)(,%eax,4),%eax movl 4(%esp),%ebp movb (%eax,%edx,),%cl movl C(d_pcolormap),%edx movb (%edx,%ecx,),%dl movl 0(%esp),%ecx // d_viewbuffer[d_scantable[new[1]] + new[0]] = pix; movl C(d_scantable)(,%ebp,4),%eax addl %eax,%ecx movl C(d_viewbuffer),%eax movb %dl,(%eax,%ecx,1) // } // //nodraw: LNoDraw: //// recursively continue // D_PolysetRecursiveTriangle (lp3, lp1, new); pushl %esp pushl %ebx pushl %edi call C(D_PolysetRecursiveTriangle) // D_PolysetRecursiveTriangle (lp3, new, lp2); movl %esp,%ebx pushl %esi pushl %ebx pushl %edi call C(D_PolysetRecursiveTriangle) addl $24,%esp LDone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller stack frame pointer ret $12 //---------------------------------------------------------------------- // 8-bpp horizontal span drawing code for affine polygons, with smooth // shading and no transparency //---------------------------------------------------------------------- #define pspans 4+8 .globl C(D_PolysetAff8Start) C(D_PolysetAff8Start): .globl C(D_PolysetDrawSpans8) C(D_PolysetDrawSpans8): pushl %esi // preserve register variables pushl %ebx movl pspans(%esp),%esi // point to the first span descriptor movl C(r_zistepx),%ecx pushl %ebp // preserve caller's stack frame pushl %edi rorl $16,%ecx // put high 16 bits of 1/z step in low word movl spanpackage_t_count(%esi),%edx movl %ecx,lzistepx LSpanLoop: // lcount = d_aspancount - pspanpackage->count; // // errorterm += erroradjustup; // if (errorterm >= 0) // { // d_aspancount += d_countextrastep; // errorterm -= erroradjustdown; // } // else // { // d_aspancount += ubasestep; // } movl C(d_aspancount),%eax subl %edx,%eax movl C(erroradjustup),%edx movl C(errorterm),%ebx addl %edx,%ebx js LNoTurnover movl C(erroradjustdown),%edx movl C(d_countextrastep),%edi subl %edx,%ebx movl C(d_aspancount),%ebp movl %ebx,C(errorterm) addl %edi,%ebp movl %ebp,C(d_aspancount) jmp LRightEdgeStepped LNoTurnover: movl C(d_aspancount),%edi movl C(ubasestep),%edx movl %ebx,C(errorterm) addl %edx,%edi movl %edi,C(d_aspancount) LRightEdgeStepped: cmpl $1,%eax jl LNextSpan jz LExactlyOneLong // // set up advancetable // movl C(a_ststepxwhole),%ecx movl C(r_affinetridesc)+atd_skinwidth,%edx movl %ecx,advancetable+4 // advance base in t addl %edx,%ecx movl %ecx,advancetable // advance extra in t movl C(a_tstepxfrac),%ecx movw C(r_lstepx),%cx movl %eax,%edx // count movl %ecx,tstep addl $7,%edx shrl $3,%edx // count of full and partial loops movl spanpackage_t_sfrac(%esi),%ebx movw %dx,%bx movl spanpackage_t_pz(%esi),%ecx negl %eax movl spanpackage_t_pdest(%esi),%edi andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1 subl %eax,%edi // compensate for hardwired offsets subl %eax,%ecx subl %eax,%ecx movl spanpackage_t_tfrac(%esi),%edx movw spanpackage_t_light(%esi),%dx movl spanpackage_t_zi(%esi),%ebp rorl $16,%ebp // put high 16 bits of 1/z in low word pushl %esi movl spanpackage_t_ptex(%esi),%esi jmp *aff8entryvec_table(,%eax,4) // %bx = count of full and partial loops // %ebx high word = sfrac // %ecx = pz // %dx = light // %edx high word = tfrac // %esi = ptex // %edi = pdest // %ebp = 1/z // tstep low word = C(r_lstepx) // tstep high word = C(a_tstepxfrac) // C(a_sstepxfrac) low word = 0 // C(a_sstepxfrac) high word = C(a_sstepxfrac) LDrawLoop: // FIXME: do we need to clamp light? We may need at least a buffer bit to // keep it from poking into tfrac and causing problems LDraw8: cmpw (%ecx),%bp jl Lp1 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,(%ecx) movb 0x12345678(%eax),%al LPatch8: movb %al,(%edi) Lp1: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw7: cmpw 2(%ecx),%bp jl Lp2 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,2(%ecx) movb 0x12345678(%eax),%al LPatch7: movb %al,1(%edi) Lp2: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw6: cmpw 4(%ecx),%bp jl Lp3 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,4(%ecx) movb 0x12345678(%eax),%al LPatch6: movb %al,2(%edi) Lp3: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw5: cmpw 6(%ecx),%bp jl Lp4 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,6(%ecx) movb 0x12345678(%eax),%al LPatch5: movb %al,3(%edi) Lp4: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw4: cmpw 8(%ecx),%bp jl Lp5 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,8(%ecx) movb 0x12345678(%eax),%al LPatch4: movb %al,4(%edi) Lp5: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw3: cmpw 10(%ecx),%bp jl Lp6 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,10(%ecx) movb 0x12345678(%eax),%al LPatch3: movb %al,5(%edi) Lp6: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw2: cmpw 12(%ecx),%bp jl Lp7 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,12(%ecx) movb 0x12345678(%eax),%al LPatch2: movb %al,6(%edi) Lp7: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw1: cmpw 14(%ecx),%bp jl Lp8 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,14(%ecx) movb 0x12345678(%eax),%al LPatch1: movb %al,7(%edi) Lp8: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi addl $8,%edi addl $16,%ecx decw %bx jnz LDrawLoop popl %esi // restore spans pointer LNextSpan: addl $(spanpackage_t_size),%esi // point to next span LNextSpanESISet: movl spanpackage_t_count(%esi),%edx cmpl $-999999,%edx // any more spans? jnz LSpanLoop // yes popl %edi popl %ebp // restore the caller's stack frame popl %ebx // restore register variables popl %esi ret // draw a one-long span LExactlyOneLong: movl spanpackage_t_pz(%esi),%ecx movl spanpackage_t_zi(%esi),%ebp rorl $16,%ebp // put high 16 bits of 1/z in low word movl spanpackage_t_ptex(%esi),%ebx cmpw (%ecx),%bp jl LNextSpan xorl %eax,%eax movl spanpackage_t_pdest(%esi),%edi movb spanpackage_t_light+1(%esi),%ah addl $(spanpackage_t_size),%esi // point to next span movb (%ebx),%al movw %bp,(%ecx) movb 0x12345678(%eax),%al LPatch9: movb %al,(%edi) jmp LNextSpanESISet .globl C(D_PolysetAff8End) C(D_PolysetAff8End): #define pcolormap 4 .globl C(D_Aff8Patch) C(D_Aff8Patch): movl pcolormap(%esp),%eax movl %eax,LPatch1-4 movl %eax,LPatch2-4 movl %eax,LPatch3-4 movl %eax,LPatch4-4 movl %eax,LPatch5-4 movl %eax,LPatch6-4 movl %eax,LPatch7-4 movl %eax,LPatch8-4 movl %eax,LPatch9-4 ret //---------------------------------------------------------------------- // Alias model polygon dispatching code, combined with subdivided affine // triangle drawing code //---------------------------------------------------------------------- .globl C(D_PolysetDraw) C(D_PolysetDraw): // spanpackage_t spans[DPS_MAXSPANS + 1 + // ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1]; // // one extra because of cache line pretouching // // a_spans = (spanpackage_t *) // (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1)); subl $(SPAN_SIZE),%esp movl %esp,%eax addl $(CACHE_SIZE - 1),%eax andl $(~(CACHE_SIZE - 1)),%eax movl %eax,C(a_spans) // if (r_affinetridesc.drawtype) // D_DrawSubdiv (); // else // D_DrawNonSubdiv (); movl C(r_affinetridesc)+atd_drawtype,%eax testl %eax,%eax jz C(D_DrawNonSubdiv) pushl %ebp // preserve caller stack frame pointer // lnumtriangles = r_affinetridesc.numtriangles; movl C(r_affinetridesc)+atd_numtriangles,%ebp pushl %esi // preserve register variables shll $4,%ebp pushl %ebx // ptri = r_affinetridesc.ptriangles; movl C(r_affinetridesc)+atd_ptriangles,%ebx pushl %edi // mtriangle_t *ptri; // finalvert_t *pfv, *index0, *index1, *index2; // int i; // int lnumtriangles; // int s0, s1, s2; // pfv = r_affinetridesc.pfinalverts; movl C(r_affinetridesc)+atd_pfinalverts,%edi // for (i=0 ; iv[1]-index1->v[1]) * // (index0->v[0]-index2->v[0]) - // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0) // { // continue; // } // // d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00]; fildl fv_v+4(%ecx) // i0v1 fildl fv_v+4(%esi) // i1v1 | i0v1 fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1 fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1 fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1 fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1 fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1 fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1 fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1 fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1 fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1 movl fv_v+16(%ecx),%eax andl $0xFF00,%eax fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1 addl C(acolormap),%eax fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1) movl %eax,C(d_pcolormap) fstps Ltemp movl Ltemp,%eax subl $0x80000001,%eax jc Lskip // if (ptri[i].facesfront) // { // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); movl mtri_facesfront-16(%ebx,%ebp,),%eax testl %eax,%eax jz Lfacesback pushl %edx pushl %esi pushl %ecx call C(D_PolysetRecursiveTriangle) subl $16,%ebp jnz Llooptop jmp Ldone2 // } // else // { Lfacesback: // s0 = index0->v[2]; // s1 = index1->v[2]; // s2 = index2->v[2]; movl fv_v+8(%ecx),%eax pushl %eax movl fv_v+8(%esi),%eax pushl %eax movl fv_v+8(%edx),%eax pushl %eax pushl %ecx pushl %edx // if (index0->flags & ALIAS_ONSEAM) // index0->v[2] += r_affinetridesc.seamfixupX16; movl C(r_affinetridesc)+atd_seamfixupX16,%eax testl $(ALIAS_ONSEAM),fv_flags(%ecx) jz Lp11 addl %eax,fv_v+8(%ecx) Lp11: // if (index1->flags & ALIAS_ONSEAM) // index1->v[2] += r_affinetridesc.seamfixupX16; testl $(ALIAS_ONSEAM),fv_flags(%esi) jz Lp12 addl %eax,fv_v+8(%esi) Lp12: // if (index2->flags & ALIAS_ONSEAM) // index2->v[2] += r_affinetridesc.seamfixupX16; testl $(ALIAS_ONSEAM),fv_flags(%edx) jz Lp13 addl %eax,fv_v+8(%edx) Lp13: // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); pushl %edx pushl %esi pushl %ecx call C(D_PolysetRecursiveTriangle) // index0->v[2] = s0; // index1->v[2] = s1; // index2->v[2] = s2; popl %edx popl %ecx popl %eax movl %eax,fv_v+8(%edx) popl %eax movl %eax,fv_v+8(%esi) popl %eax movl %eax,fv_v+8(%ecx) // } // } Lskip: subl $16,%ebp jnz Llooptop Ldone2: popl %edi // restore the caller's stack frame popl %ebx popl %esi // restore register variables popl %ebp addl $(SPAN_SIZE),%esp ret //---------------------------------------------------------------------- // Alias model triangle left-edge scanning code //---------------------------------------------------------------------- #define height 4+16 .globl C(D_PolysetScanLeftEdge) C(D_PolysetScanLeftEdge): pushl %ebp // preserve caller stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx movl height(%esp),%eax movl C(d_sfrac),%ecx andl $0xFFFF,%eax movl C(d_ptex),%ebx orl %eax,%ecx movl C(d_pedgespanpackage),%esi movl C(d_tfrac),%edx movl C(d_light),%edi movl C(d_zi),%ebp // %eax: scratch // %ebx: d_ptex // %ecx: d_sfrac in high word, count in low word // %edx: d_tfrac // %esi: d_pedgespanpackage, errorterm, scratch alternately // %edi: d_light // %ebp: d_zi // do // { LScanLoop: // d_pedgespanpackage->ptex = ptex; // d_pedgespanpackage->pdest = d_pdest; // d_pedgespanpackage->pz = d_pz; // d_pedgespanpackage->count = d_aspancount; // d_pedgespanpackage->light = d_light; // d_pedgespanpackage->zi = d_zi; // d_pedgespanpackage->sfrac = d_sfrac << 16; // d_pedgespanpackage->tfrac = d_tfrac << 16; movl %ebx,spanpackage_t_ptex(%esi) movl C(d_pdest),%eax movl %eax,spanpackage_t_pdest(%esi) movl C(d_pz),%eax movl %eax,spanpackage_t_pz(%esi) movl C(d_aspancount),%eax movl %eax,spanpackage_t_count(%esi) movl %edi,spanpackage_t_light(%esi) movl %ebp,spanpackage_t_zi(%esi) movl %ecx,spanpackage_t_sfrac(%esi) movl %edx,spanpackage_t_tfrac(%esi) // pretouch the next cache line movb spanpackage_t_size(%esi),%al // d_pedgespanpackage++; addl $(spanpackage_t_size),%esi movl C(erroradjustup),%eax movl %esi,C(d_pedgespanpackage) // errorterm += erroradjustup; movl C(errorterm),%esi addl %eax,%esi movl C(d_pdest),%eax // if (errorterm >= 0) // { js LNoLeftEdgeTurnover // errorterm -= erroradjustdown; // d_pdest += d_pdestextrastep; subl C(erroradjustdown),%esi addl C(d_pdestextrastep),%eax movl %esi,C(errorterm) movl %eax,C(d_pdest) // d_pz += d_pzextrastep; // d_aspancount += d_countextrastep; // d_ptex += d_ptexextrastep; // d_sfrac += d_sfracextrastep; // d_ptex += d_sfrac >> 16; // d_sfrac &= 0xFFFF; // d_tfrac += d_tfracextrastep; movl C(d_pz),%eax movl C(d_aspancount),%esi addl C(d_pzextrastep),%eax addl C(d_sfracextrastep),%ecx adcl C(d_ptexextrastep),%ebx addl C(d_countextrastep),%esi movl %eax,C(d_pz) movl C(d_tfracextrastep),%eax movl %esi,C(d_aspancount) addl %eax,%edx // if (d_tfrac & 0x10000) // { jnc LSkip1 // d_ptex += r_affinetridesc.skinwidth; // d_tfrac &= 0xFFFF; addl C(r_affinetridesc)+atd_skinwidth,%ebx // } LSkip1: // d_light += d_lightextrastep; // d_zi += d_ziextrastep; addl C(d_lightextrastep),%edi addl C(d_ziextrastep),%ebp // } movl C(d_pedgespanpackage),%esi decl %ecx testl $0xFFFF,%ecx jnz LScanLoop popl %ebx popl %edi popl %esi popl %ebp ret // else // { LNoLeftEdgeTurnover: movl %esi,C(errorterm) // d_pdest += d_pdestbasestep; addl C(d_pdestbasestep),%eax movl %eax,C(d_pdest) // d_pz += d_pzbasestep; // d_aspancount += ubasestep; // d_ptex += d_ptexbasestep; // d_sfrac += d_sfracbasestep; // d_ptex += d_sfrac >> 16; // d_sfrac &= 0xFFFF; movl C(d_pz),%eax movl C(d_aspancount),%esi addl C(d_pzbasestep),%eax addl C(d_sfracbasestep),%ecx adcl C(d_ptexbasestep),%ebx addl C(ubasestep),%esi movl %eax,C(d_pz) movl %esi,C(d_aspancount) // d_tfrac += d_tfracbasestep; movl C(d_tfracbasestep),%esi addl %esi,%edx // if (d_tfrac & 0x10000) // { jnc LSkip2 // d_ptex += r_affinetridesc.skinwidth; // d_tfrac &= 0xFFFF; addl C(r_affinetridesc)+atd_skinwidth,%ebx // } LSkip2: // d_light += d_lightbasestep; // d_zi += d_zibasestep; addl C(d_lightbasestep),%edi addl C(d_zibasestep),%ebp // } // } while (--height); movl C(d_pedgespanpackage),%esi decl %ecx testl $0xFFFF,%ecx jnz LScanLoop popl %ebx popl %edi popl %esi popl %ebp ret //---------------------------------------------------------------------- // Alias model vertex drawing code //---------------------------------------------------------------------- #define fv 4+8 #define numverts 8+8 .globl C(D_PolysetDrawFinalVerts) C(D_PolysetDrawFinalVerts): pushl %ebp // preserve caller stack frame pointer pushl %ebx // int i, z; // short *zbuf; movl numverts(%esp),%ecx movl fv(%esp),%ebx pushl %esi // preserve register variables pushl %edi LFVLoop: // for (i=0 ; iv[0] < r_refdef.vrectright) && // (fv->v[1] < r_refdef.vrectbottom)) // { movl fv_v+0(%ebx),%eax movl C(r_refdef)+rd_vrectright,%edx cmpl %edx,%eax jge LNextVert movl fv_v+4(%ebx),%esi movl C(r_refdef)+rd_vrectbottom,%edx cmpl %edx,%esi jge LNextVert // zbuf = zspantable[fv->v[1]] + fv->v[0]; movl C(zspantable)(,%esi,4),%edi // z = fv->v[5]>>16; movl fv_v+20(%ebx),%edx shrl $16,%edx // if (z >= *zbuf) // { // int pix; cmpw (%edi,%eax,2),%dx jl LNextVert // *zbuf = z; movw %dx,(%edi,%eax,2) // pix = skintable[fv->v[3]>>16][fv->v[2]>>16]; movl fv_v+12(%ebx),%edi shrl $16,%edi movl C(skintable)(,%edi,4),%edi movl fv_v+8(%ebx),%edx shrl $16,%edx movb (%edi,%edx),%dl // pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)]; movl fv_v+16(%ebx),%edi andl $0xFF00,%edi andl $0x00FF,%edx addl %edx,%edi movl C(acolormap),%edx movb (%edx,%edi,1),%dl // d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix; movl C(d_scantable)(,%esi,4),%edi movl C(d_viewbuffer),%esi addl %eax,%edi movb %dl,(%esi,%edi) // } // } // } LNextVert: addl $(fv_size),%ebx decl %ecx jnz LFVLoop popl %edi popl %esi popl %ebx popl %ebp ret //---------------------------------------------------------------------- // Alias model non-subdivided polygon dispatching code // // not C-callable because of stack buffer cleanup //---------------------------------------------------------------------- .globl C(D_DrawNonSubdiv) C(D_DrawNonSubdiv): pushl %ebp // preserve caller stack frame pointer movl C(r_affinetridesc)+atd_numtriangles,%ebp pushl %ebx shll $(mtri_shift),%ebp pushl %esi // preserve register variables movl C(r_affinetridesc)+atd_ptriangles,%esi pushl %edi // mtriangle_t *ptri; // finalvert_t *pfv, *index0, *index1, *index2; // int i; // int lnumtriangles; // pfv = r_affinetridesc.pfinalverts; // ptri = r_affinetridesc.ptriangles; // lnumtriangles = r_affinetridesc.numtriangles; LNDLoop: // for (i=0 ; ivertindex[0]; // index1 = pfv + ptri->vertindex[1]; // index2 = pfv + ptri->vertindex[2]; movl C(r_affinetridesc)+atd_pfinalverts,%edi movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx shll $(fv_shift),%ecx movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx shll $(fv_shift),%edx movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx shll $(fv_shift),%ebx addl %edi,%ecx addl %edi,%edx addl %edi,%ebx // d_xdenom = (index0->v[1]-index1->v[1]) * // (index0->v[0]-index2->v[0]) - // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]); movl fv_v+4(%ecx),%eax movl fv_v+0(%ecx),%esi subl fv_v+4(%edx),%eax subl fv_v+0(%ebx),%esi imull %esi,%eax movl fv_v+0(%ecx),%esi movl fv_v+4(%ecx),%edi subl fv_v+0(%edx),%esi subl fv_v+4(%ebx),%edi imull %esi,%edi subl %edi,%eax // if (d_xdenom >= 0) // { // continue; jns LNextTri // } movl %eax,C(d_xdenom) fildl C(d_xdenom) // r_p0[0] = index0->v[0]; // u // r_p0[1] = index0->v[1]; // v // r_p0[2] = index0->v[2]; // s // r_p0[3] = index0->v[3]; // t // r_p0[4] = index0->v[4]; // light // r_p0[5] = index0->v[5]; // iz movl fv_v+0(%ecx),%eax movl fv_v+4(%ecx),%esi movl %eax,C(r_p0)+0 movl %esi,C(r_p0)+4 movl fv_v+8(%ecx),%eax movl fv_v+12(%ecx),%esi movl %eax,C(r_p0)+8 movl %esi,C(r_p0)+12 movl fv_v+16(%ecx),%eax movl fv_v+20(%ecx),%esi movl %eax,C(r_p0)+16 movl %esi,C(r_p0)+20 fdivrs float_1 // r_p1[0] = index1->v[0]; // r_p1[1] = index1->v[1]; // r_p1[2] = index1->v[2]; // r_p1[3] = index1->v[3]; // r_p1[4] = index1->v[4]; // r_p1[5] = index1->v[5]; movl fv_v+0(%edx),%eax movl fv_v+4(%edx),%esi movl %eax,C(r_p1)+0 movl %esi,C(r_p1)+4 movl fv_v+8(%edx),%eax movl fv_v+12(%edx),%esi movl %eax,C(r_p1)+8 movl %esi,C(r_p1)+12 movl fv_v+16(%edx),%eax movl fv_v+20(%edx),%esi movl %eax,C(r_p1)+16 movl %esi,C(r_p1)+20 // r_p2[0] = index2->v[0]; // r_p2[1] = index2->v[1]; // r_p2[2] = index2->v[2]; // r_p2[3] = index2->v[3]; // r_p2[4] = index2->v[4]; // r_p2[5] = index2->v[5]; movl fv_v+0(%ebx),%eax movl fv_v+4(%ebx),%esi movl %eax,C(r_p2)+0 movl %esi,C(r_p2)+4 movl fv_v+8(%ebx),%eax movl fv_v+12(%ebx),%esi movl %eax,C(r_p2)+8 movl %esi,C(r_p2)+12 movl fv_v+16(%ebx),%eax movl fv_v+20(%ebx),%esi movl %eax,C(r_p2)+16 movl C(r_affinetridesc)+atd_ptriangles,%edi movl %esi,C(r_p2)+20 movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax // if (!ptri->facesfront) // { testl %eax,%eax jnz LFacesFront // if (index0->flags & ALIAS_ONSEAM) // r_p0[2] += r_affinetridesc.seamfixupX16; movl fv_flags(%ecx),%eax movl fv_flags(%edx),%esi movl fv_flags(%ebx),%edi testl $(ALIAS_ONSEAM),%eax movl C(r_affinetridesc)+atd_seamfixupX16,%eax jz LOnseamDone0 addl %eax,C(r_p0)+8 LOnseamDone0: // if (index1->flags & ALIAS_ONSEAM) // r_p1[2] += r_affinetridesc.seamfixupX16; testl $(ALIAS_ONSEAM),%esi jz LOnseamDone1 addl %eax,C(r_p1)+8 LOnseamDone1: // if (index2->flags & ALIAS_ONSEAM) // r_p2[2] += r_affinetridesc.seamfixupX16; testl $(ALIAS_ONSEAM),%edi jz LOnseamDone2 addl %eax,C(r_p2)+8 LOnseamDone2: // } LFacesFront: fstps C(d_xdenom) // D_PolysetSetEdgeTable (); // D_RasterizeAliasPolySmooth (); call C(D_PolysetSetEdgeTable) call C(D_RasterizeAliasPolySmooth) LNextTri: movl C(r_affinetridesc)+atd_ptriangles,%esi subl $16,%ebp jnz LNDLoop // } popl %edi popl %esi popl %ebx popl %ebp addl $(SPAN_SIZE),%esp ret #endif // USE_INTEL_ASM