// // d_polysa.s // x86 assembly-language polygon model drawing code // #include "../src/qasm.h" #include "d_ifacea.h" #if id386 // !!! if this is changed, it must be changed in d_polyse.c too !!! #define DPS_MAXSPANS MAXHEIGHT+1 // 1 extra for spanpackage that marks end //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size) #define SPAN_SIZE (1024+1+1+1)*32 .data .align 4 p10_minus_p20: .single 0 p01_minus_p21: .single 0 temp0: .single 0 temp1: .single 0 Ltemp: .single 0 aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5 .long LDraw4, LDraw3, LDraw2, LDraw1 lzistepx: .long 0 .text #ifndef NeXT .extern C(D_PolysetSetEdgeTable) .extern C(D_RasterizeAliasPolySmooth) #endif //---------------------------------------------------------------------- // affine triangle gradient calculation code //---------------------------------------------------------------------- #if 0 #define skinwidth 4+0 .globl C(R_PolysetCalcGradients) C(R_PolysetCalcGradients): // p00_minus_p20 = r_p0[0] - r_p2[0]; // p01_minus_p21 = r_p0[1] - r_p2[1]; // p10_minus_p20 = r_p1[0] - r_p2[0]; // p11_minus_p21 = r_p1[1] - r_p2[1]; // // xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 - // p00_minus_p20 * p11_minus_p21); // // ystepdenominv = -xstepdenominv; fildl C(r_p0)+0 // r_p0[0] fildl C(r_p2)+0 // r_p2[0] | r_p0[0] fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0] fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] | // r_p2[0] | r_p0[0] fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] | // r_p2[0] | r_p0[0] fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] | // r_p2[0] | r_p0[0] fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] | // r_p2[0] | r_p0[0] fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] | // r_p1[1] | r_p2[0] | r_p0[0] fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] | // r_p1[1] | r_p2[0] | p10_minus_p20 fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] | // p00_minus_p20 | p10_minus_p20 fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 | // p00_minus_p20 | p10_minus_p20 fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 | // p00_minus_p20 | p10_minus_p20 fxch %st(1) // p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | p10_minus_p20 flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | p10_minus_p20 fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | d_xdenom fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 | // p00_minus_p20 | d_xdenom fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21 //// ceil () for light so positive steps are exaggerated, negative steps //// diminished, pushing us away from underflow toward overflow. Underflow is //// very visible, overflow is very unlikely, because of ambient lighting // t0 = r_p0[4] - r_p2[4]; // t1 = r_p1[4] - r_p2[4]; fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 | // p11_minus_p21 // r_lstepx = (int) // ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); // r_lstepy = (int) // ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // t0*p11_minus_p21 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // t0*p11_minus_p21 | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | // t1*p01_minus_p21 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | // t1*p00_minus_p20 | t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(2) // xstepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmuls float_minus_1 // ystepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 | // t1*p00_minus_p20 - t0*p10_minus_p20 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 | // | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fldcw ceil_cw fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fldcw single_cw // t0 = r_p0[2] - r_p2[2]; // t1 = r_p1[2] - r_p2[2]; fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 // r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * // xstepdenominv); // r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * // ystepdenominv); fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | // t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | // t1*p00_minus_p20 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 // t0 = r_p0[3] - r_p2[3]; // t1 = r_p1[3] - r_p2[3]; fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 // r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * // xstepdenominv); // r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * // ystepdenominv); fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // t0*p11_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | // t1*p01_minus_p21 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | // t1*p00_minus_p20 | t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 // t0 = r_p0[5] - r_p2[5]; // t1 = r_p1[5] - r_p2[5]; fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] | // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 // r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * // xstepdenominv); // r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * // ystepdenominv); fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv | // p00_minus_p20 | t0*p11_minus_p21 fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv | // p00_minus_p20 | t0*p11_minus_p21 fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv | // p00_minus_p20 | t0*p11_minus_p21 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv | // xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv | // xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | p00_minus_p20 | // t0*p11_minus_p21 fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | // t1*p00_minus_p20 | t0*p11_minus_p21 fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 | // ystepdenominv | xstepdenominv | // t1*p00_minus_p20 | t0*p10_minus_p20 fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | // ystepdenominv | xstepdenominv | // t1*p00_minus_p20 | t0*p10_minus_p20 fxch %st(3) // t1*p00_minus_p20 | ystepdenominv | // xstepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // t0*p10_minus_p20 fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // t1*p00_minus_p20 - t0*p10_minus_p20 fxch %st(1) // xstepdenominv | ystepdenominv | // t1*p01_minus_p21 - t0*p11_minus_p21 | // t1*p00_minus_p20 - t0*p10_minus_p20 fmulp %st(0),%st(2) // ystepdenominv | // (t1*p01_minus_p21 - t0*p11_minus_p21) * // xstepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) * // xstepdenominv | // (t1*p00_minus_p20 - t0*p10_minus_p20) * // ystepdenominv fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) * // ystepdenominv fistpl C(r_zistepy) // a_sstepxfrac = r_sstepx << 16; // a_tstepxfrac = r_tstepx << 16; // // a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) + // (r_sstepx >> 16); movl C(r_sstepx),%eax movl C(r_tstepx),%edx shll $16,%eax shll $16,%edx movl %eax,C(a_sstepxfrac) movl %edx,C(a_tstepxfrac) movl C(r_sstepx),%ecx movl C(r_tstepx),%eax sarl $16,%ecx sarl $16,%eax imull skinwidth(%esp) addl %ecx,%eax movl %eax,C(a_ststepxwhole) ret #endif //---------------------------------------------------------------------- // recursive subdivision affine triangle drawing code // // not C-callable because of stdcall return //---------------------------------------------------------------------- #define lp1 4+16 #define lp2 8+16 #define lp3 12+16 .globl C(D_PolysetRecursiveTriangle) C(D_PolysetRecursiveTriangle): pushl %ebp // preserve caller stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // int *temp; // int d; // int new[6]; // int i; // int z; // short *zbuf; movl lp2(%esp),%esi movl lp1(%esp),%ebx movl lp3(%esp),%edi // d = lp2[0] - lp1[0]; // if (d < -1 || d > 1) // goto split; movl 0(%esi),%eax movl 0(%ebx),%edx movl 4(%esi),%ebp subl %edx,%eax movl 4(%ebx),%ecx subl %ecx,%ebp incl %eax cmpl $2,%eax ja LSplit // d = lp2[1] - lp1[1]; // if (d < -1 || d > 1) // goto split; movl 0(%edi),%eax incl %ebp cmpl $2,%ebp ja LSplit // d = lp3[0] - lp2[0]; // if (d < -1 || d > 1) // goto split2; movl 0(%esi),%edx movl 4(%edi),%ebp subl %edx,%eax movl 4(%esi),%ecx subl %ecx,%ebp incl %eax cmpl $2,%eax ja LSplit2 // d = lp3[1] - lp2[1]; // if (d < -1 || d > 1) // goto split2; movl 0(%ebx),%eax incl %ebp cmpl $2,%ebp ja LSplit2 // d = lp1[0] - lp3[0]; // if (d < -1 || d > 1) // goto split3; movl 0(%edi),%edx movl 4(%ebx),%ebp subl %edx,%eax movl 4(%edi),%ecx subl %ecx,%ebp incl %eax incl %ebp movl %ebx,%edx cmpl $2,%eax ja LSplit3 // d = lp1[1] - lp3[1]; // if (d < -1 || d > 1) // { //split3: // temp = lp1; // lp3 = lp2; // lp1 = lp3; // lp2 = temp; // goto split; // } // // return; // entire tri is filled // cmpl $2,%ebp jna LDone LSplit3: movl %edi,%ebx movl %esi,%edi movl %edx,%esi jmp LSplit //split2: LSplit2: // temp = lp1; // lp1 = lp2; // lp2 = lp3; // lp3 = temp; movl %ebx,%eax movl %esi,%ebx movl %edi,%esi movl %eax,%edi //split: LSplit: subl $24,%esp // allocate space for a new vertex //// split this edge // new[0] = (lp1[0] + lp2[0]) >> 1; // new[1] = (lp1[1] + lp2[1]) >> 1; // new[2] = (lp1[2] + lp2[2]) >> 1; // new[3] = (lp1[3] + lp2[3]) >> 1; // new[5] = (lp1[5] + lp2[5]) >> 1; movl 8(%ebx),%eax movl 8(%esi),%edx movl 12(%ebx),%ecx addl %edx,%eax movl 12(%esi),%edx sarl $1,%eax addl %edx,%ecx movl %eax,8(%esp) movl 20(%ebx),%eax sarl $1,%ecx movl 20(%esi),%edx movl %ecx,12(%esp) addl %edx,%eax movl 0(%ebx),%ecx movl 0(%esi),%edx sarl $1,%eax addl %ecx,%edx movl %eax,20(%esp) movl 4(%ebx),%eax sarl $1,%edx movl 4(%esi),%ebp movl %edx,0(%esp) addl %eax,%ebp sarl $1,%ebp movl %ebp,4(%esp) //// draw the point if splitting a leading edge // if (lp2[1] > lp1[1]) // goto nodraw; cmpl %eax,4(%esi) jg LNoDraw // if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0])) // goto nodraw; movl 0(%esi),%edx jnz LDraw cmpl %ecx,%edx jl LNoDraw LDraw: // z = new[5] >> 16; movl 20(%esp),%edx movl 4(%esp),%ecx sarl $16,%edx movl 0(%esp),%ebp // zbuf = zspantable[new[1]] + new[0]; movl C(zspantable)(,%ecx,4),%eax // if (z >= *zbuf) // { cmpw (%eax,%ebp,2),%dx jnge LNoDraw // int pix; // // *zbuf = z; movw %dx,(%eax,%ebp,2) // pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]]; movl 12(%esp),%eax sarl $16,%eax movl 8(%esp),%edx sarl $16,%edx subl %ecx,%ecx movl C(skintable)(,%eax,4),%eax movl 4(%esp),%ebp movb (%eax,%edx,),%cl movl C(d_pcolormap),%edx movb (%edx,%ecx,),%dl movl 0(%esp),%ecx // d_viewbuffer[d_scantable[new[1]] + new[0]] = pix; movl C(d_scantable)(,%ebp,4),%eax addl %eax,%ecx movl C(d_viewbuffer),%eax movb %dl,(%eax,%ecx,1) // } // //nodraw: LNoDraw: //// recursively continue // D_PolysetRecursiveTriangle (lp3, lp1, new); pushl %esp pushl %ebx pushl %edi call C(D_PolysetRecursiveTriangle) // D_PolysetRecursiveTriangle (lp3, new, lp2); movl %esp,%ebx pushl %esi pushl %ebx pushl %edi call C(D_PolysetRecursiveTriangle) addl $24,%esp LDone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller stack frame pointer ret $12 //---------------------------------------------------------------------- // 8-bpp horizontal span drawing code for affine polygons, with smooth // shading and no transparency //---------------------------------------------------------------------- #define pspans 4+8 .globl C(D_PolysetAff8Start) C(D_PolysetAff8Start): .globl C(R_PolysetDrawSpans8_Opaque) C(R_PolysetDrawSpans8_Opaque): pushl %esi // preserve register variables pushl %ebx movl pspans(%esp),%esi // point to the first span descriptor movl C(r_zistepx),%ecx pushl %ebp // preserve caller's stack frame pushl %edi rorl $16,%ecx // put high 16 bits of 1/z step in low word movl spanpackage_t_count(%esi),%edx movl %ecx,lzistepx LSpanLoop: // lcount = d_aspancount - pspanpackage->count; // // errorterm += erroradjustup; // if (errorterm >= 0) // { // d_aspancount += d_countextrastep; // errorterm -= erroradjustdown; // } // else // { // d_aspancount += ubasestep; // } movl C(d_aspancount),%eax subl %edx,%eax movl C(erroradjustup),%edx movl C(errorterm),%ebx addl %edx,%ebx js LNoTurnover movl C(erroradjustdown),%edx movl C(d_countextrastep),%edi subl %edx,%ebx movl C(d_aspancount),%ebp movl %ebx,C(errorterm) addl %edi,%ebp movl %ebp,C(d_aspancount) jmp LRightEdgeStepped LNoTurnover: movl C(d_aspancount),%edi movl C(ubasestep),%edx movl %ebx,C(errorterm) addl %edx,%edi movl %edi,C(d_aspancount) LRightEdgeStepped: cmpl $1,%eax jl LNextSpan jz LExactlyOneLong // // set up advancetable // movl C(a_ststepxwhole),%ecx movl C(r_affinetridesc)+atd_skinwidth,%edx movl %ecx,advancetable+4 // advance base in t addl %edx,%ecx movl %ecx,advancetable // advance extra in t movl C(a_tstepxfrac),%ecx movw C(r_lstepx),%cx movl %eax,%edx // count movl %ecx,tstep addl $7,%edx shrl $3,%edx // count of full and partial loops movl spanpackage_t_sfrac(%esi),%ebx movw %dx,%bx movl spanpackage_t_pz(%esi),%ecx negl %eax movl spanpackage_t_pdest(%esi),%edi andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1 subl %eax,%edi // compensate for hardwired offsets subl %eax,%ecx subl %eax,%ecx movl spanpackage_t_tfrac(%esi),%edx movw spanpackage_t_light(%esi),%dx movl spanpackage_t_zi(%esi),%ebp rorl $16,%ebp // put high 16 bits of 1/z in low word pushl %esi movl spanpackage_t_ptex(%esi),%esi jmp *aff8entryvec_table(,%eax,4) // %bx = count of full and partial loops // %ebx high word = sfrac // %ecx = pz // %dx = light // %edx high word = tfrac // %esi = ptex // %edi = pdest // %ebp = 1/z // tstep low word = C(r_lstepx) // tstep high word = C(a_tstepxfrac) // C(a_sstepxfrac) low word = 0 // C(a_sstepxfrac) high word = C(a_sstepxfrac) LDrawLoop: // FIXME: do we need to clamp light? We may need at least a buffer bit to // keep it from poking into tfrac and causing problems LDraw8: cmpw (%ecx),%bp jl Lp1 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,(%ecx) movb 0x12345678(%eax),%al LPatch8: movb %al,(%edi) Lp1: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw7: cmpw 2(%ecx),%bp jl Lp2 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,2(%ecx) movb 0x12345678(%eax),%al LPatch7: movb %al,1(%edi) Lp2: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw6: cmpw 4(%ecx),%bp jl Lp3 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,4(%ecx) movb 0x12345678(%eax),%al LPatch6: movb %al,2(%edi) Lp3: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw5: cmpw 6(%ecx),%bp jl Lp4 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,6(%ecx) movb 0x12345678(%eax),%al LPatch5: movb %al,3(%edi) Lp4: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw4: cmpw 8(%ecx),%bp jl Lp5 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,8(%ecx) movb 0x12345678(%eax),%al LPatch4: movb %al,4(%edi) Lp5: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw3: cmpw 10(%ecx),%bp jl Lp6 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,10(%ecx) movb 0x12345678(%eax),%al LPatch3: movb %al,5(%edi) Lp6: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw2: cmpw 12(%ecx),%bp jl Lp7 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,12(%ecx) movb 0x12345678(%eax),%al LPatch2: movb %al,6(%edi) Lp7: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi LDraw1: cmpw 14(%ecx),%bp jl Lp8 xorl %eax,%eax movb %dh,%ah movb (%esi),%al movw %bp,14(%ecx) movb 0x12345678(%eax),%al LPatch1: movb %al,7(%edi) Lp8: addl tstep,%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx adcl advancetable+4(,%eax,4),%esi addl $8,%edi addl $16,%ecx decw %bx jnz LDrawLoop popl %esi // restore spans pointer LNextSpan: addl $(spanpackage_t_size),%esi // point to next span LNextSpanESISet: movl spanpackage_t_count(%esi),%edx cmpl $-999999,%edx // any more spans? jnz LSpanLoop // yes popl %edi popl %ebp // restore the caller's stack frame popl %ebx // restore register variables popl %esi ret // draw a one-long span LExactlyOneLong: movl spanpackage_t_pz(%esi),%ecx movl spanpackage_t_zi(%esi),%ebp rorl $16,%ebp // put high 16 bits of 1/z in low word movl spanpackage_t_ptex(%esi),%ebx cmpw (%ecx),%bp jl LNextSpan xorl %eax,%eax movl spanpackage_t_pdest(%esi),%edi movb spanpackage_t_light+1(%esi),%ah addl $(spanpackage_t_size),%esi // point to next span movb (%ebx),%al movw %bp,(%ecx) movb 0x12345678(%eax),%al LPatch9: movb %al,(%edi) jmp LNextSpanESISet .globl C(D_PolysetAff8End) C(D_PolysetAff8End): .extern C(alias_colormap) // #define pcolormap 4 .globl C(D_Aff8Patch) C(D_Aff8Patch): movl C(alias_colormap),%eax movl %eax,LPatch1-4 movl %eax,LPatch2-4 movl %eax,LPatch3-4 movl %eax,LPatch4-4 movl %eax,LPatch5-4 movl %eax,LPatch6-4 movl %eax,LPatch7-4 movl %eax,LPatch8-4 movl %eax,LPatch9-4 ret //---------------------------------------------------------------------- // Alias model triangle left-edge scanning code //---------------------------------------------------------------------- #define height 4+16 .globl C(R_PolysetScanLeftEdge) C(R_PolysetScanLeftEdge): pushl %ebp // preserve caller stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx movl height(%esp),%eax movl C(d_sfrac),%ecx andl $0xFFFF,%eax movl C(d_ptex),%ebx orl %eax,%ecx movl C(d_pedgespanpackage),%esi movl C(d_tfrac),%edx movl C(d_light),%edi movl C(d_zi),%ebp // %eax: scratch // %ebx: d_ptex // %ecx: d_sfrac in high word, count in low word // %edx: d_tfrac // %esi: d_pedgespanpackage, errorterm, scratch alternately // %edi: d_light // %ebp: d_zi // do // { LScanLoop: // d_pedgespanpackage->ptex = ptex; // d_pedgespanpackage->pdest = d_pdest; // d_pedgespanpackage->pz = d_pz; // d_pedgespanpackage->count = d_aspancount; // d_pedgespanpackage->light = d_light; // d_pedgespanpackage->zi = d_zi; // d_pedgespanpackage->sfrac = d_sfrac << 16; // d_pedgespanpackage->tfrac = d_tfrac << 16; movl %ebx,spanpackage_t_ptex(%esi) movl C(d_pdest),%eax movl %eax,spanpackage_t_pdest(%esi) movl C(d_pz),%eax movl %eax,spanpackage_t_pz(%esi) movl C(d_aspancount),%eax movl %eax,spanpackage_t_count(%esi) movl %edi,spanpackage_t_light(%esi) movl %ebp,spanpackage_t_zi(%esi) movl %ecx,spanpackage_t_sfrac(%esi) movl %edx,spanpackage_t_tfrac(%esi) // pretouch the next cache line movb spanpackage_t_size(%esi),%al // d_pedgespanpackage++; addl $(spanpackage_t_size),%esi movl C(erroradjustup),%eax movl %esi,C(d_pedgespanpackage) // errorterm += erroradjustup; movl C(errorterm),%esi addl %eax,%esi movl C(d_pdest),%eax // if (errorterm >= 0) // { js LNoLeftEdgeTurnover // errorterm -= erroradjustdown; // d_pdest += d_pdestextrastep; subl C(erroradjustdown),%esi addl C(d_pdestextrastep),%eax movl %esi,C(errorterm) movl %eax,C(d_pdest) // d_pz += d_pzextrastep; // d_aspancount += d_countextrastep; // d_ptex += d_ptexextrastep; // d_sfrac += d_sfracextrastep; // d_ptex += d_sfrac >> 16; // d_sfrac &= 0xFFFF; // d_tfrac += d_tfracextrastep; movl C(d_pz),%eax movl C(d_aspancount),%esi addl C(d_pzextrastep),%eax addl C(d_sfracextrastep),%ecx adcl C(d_ptexextrastep),%ebx addl C(d_countextrastep),%esi movl %eax,C(d_pz) movl C(d_tfracextrastep),%eax movl %esi,C(d_aspancount) addl %eax,%edx // if (d_tfrac & 0x10000) // { jnc LSkip1 // d_ptex += r_affinetridesc.skinwidth; // d_tfrac &= 0xFFFF; addl C(r_affinetridesc)+atd_skinwidth,%ebx // } LSkip1: // d_light += d_lightextrastep; // d_zi += d_ziextrastep; addl C(d_lightextrastep),%edi addl C(d_ziextrastep),%ebp // } movl C(d_pedgespanpackage),%esi decl %ecx testl $0xFFFF,%ecx jnz LScanLoop popl %ebx popl %edi popl %esi popl %ebp ret // else // { LNoLeftEdgeTurnover: movl %esi,C(errorterm) // d_pdest += d_pdestbasestep; addl C(d_pdestbasestep),%eax movl %eax,C(d_pdest) // d_pz += d_pzbasestep; // d_aspancount += ubasestep; // d_ptex += d_ptexbasestep; // d_sfrac += d_sfracbasestep; // d_ptex += d_sfrac >> 16; // d_sfrac &= 0xFFFF; movl C(d_pz),%eax movl C(d_aspancount),%esi addl C(d_pzbasestep),%eax addl C(d_sfracbasestep),%ecx adcl C(d_ptexbasestep),%ebx addl C(ubasestep),%esi movl %eax,C(d_pz) movl %esi,C(d_aspancount) // d_tfrac += d_tfracbasestep; movl C(d_tfracbasestep),%esi addl %esi,%edx // if (d_tfrac & 0x10000) // { jnc LSkip2 // d_ptex += r_affinetridesc.skinwidth; // d_tfrac &= 0xFFFF; addl C(r_affinetridesc)+atd_skinwidth,%ebx // } LSkip2: // d_light += d_lightbasestep; // d_zi += d_zibasestep; addl C(d_lightbasestep),%edi addl C(d_zibasestep),%ebp // } // } while (--height); movl C(d_pedgespanpackage),%esi decl %ecx testl $0xFFFF,%ecx jnz LScanLoop popl %ebx popl %edi popl %esi popl %ebp ret #endif // id386