// SONIC ROBO BLAST 2 //----------------------------------------------------------------------------- // Copyright (C) 1998-2000 by DooM Legacy Team. // Copyright (C) 1999-2016 by Sonic Team Junior. // // This program is free software distributed under the // terms of the GNU General Public License, version 2. // See the 'LICENSE' file for more details. //----------------------------------------------------------------------------- /// \file tmap.s /// \brief optimised drawing routines for span/column rendering // structures, must match the C structures! #include "asm_defs.inc" // Rappel: seuls EAX, ECX, EDX peuvent ˆtre ‚cras‚s librement. // il faut sauver esi,edi, cd...gs /* Attention aux comparaisons! */ /* */ /* Intel_compare: */ /* */ /* cmp A,B // A-B , set flags */ /* jg A_greater_than_B */ /* */ /* AT&T_compare: */ /* */ /* cmp A,B // B-A , set flags */ /* jg B_greater_than_A */ /* */ /* (soustrait l'op‚rande source DE l'op‚rande destination, */ /* comme sur Motorola! ) */ // RAPPEL: Intel // SECTION:[BASE+INDEX*SCALE+DISP] // devient SECTION:DISP(BASE,INDEX,SCALE) //---------------------------------------------------------------------- // // R_DrawColumn // // New optimised version 10-01-1998 by D.Fabrice and P.Boris // TO DO: optimise it much farther... should take at most 3 cycles/pix // once it's fixed, add code to patch the offsets so that it // works in every screen width. // //---------------------------------------------------------------------- .data #ifdef LINUX .align 2 #else .align 4 #endif C(loopcount): .long 0 C(pixelcount): .long 0 C(tystep): .long 0 C(vidwidth): .long 0 //use this one out of the inner loops //so you don't need to patch everywhere... #ifdef USEASM #if !defined( LINUX) .text #endif .globl C(ASM_PatchRowBytes) C(ASM_PatchRowBytes): pushl %ebp movl %esp, %ebp // assure l'"adressabilit‚ du stack" movl ARG1, %edx // read first arg movl %edx, C(vidwidth) // 1 * vidwidth movl %edx,p1+2 movl %edx,w1+2 //water movl %edx,p1b+2 //sky movl %edx,p5+2 movl %edx,sh5+2 //smokie test // 2 * vidwidth addl ARG1,%edx movl %edx,p2+2 movl %edx,w2+2 //water movl %edx,p2b+2 //sky movl %edx,p6+2 movl %edx,p7+2 movl %edx,p8+2 movl %edx,p9+2 movl %edx,sh6+2 //smokie test movl %edx,sh7+2 movl %edx,sh8+2 movl %edx,sh9+2 // 3 * vidwidth addl ARG1,%edx movl %edx,p3+2 movl %edx,w3+2 //water movl %edx,p3b+2 //sky // 4 * vidwidth addl ARG1,%edx movl %edx,p4+2 movl %edx,w4+2 //water movl %edx,p4b+2 //sky popl %ebp ret #ifdef LINUX .align 2 #else .align 5 #endif .globl C(R_DrawColumn_8) C(R_DrawColumn_8): pushl %ebp // preserve caller's stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // // dest = ylookup[dc_yl] + columnofs[dc_x]; // movl C(dc_yl),%ebp movl %ebp,%ebx movl C(ylookup)(,%ebx,4),%edi movl C(dc_x),%ebx addl C(columnofs)(,%ebx,4),%edi // edi = dest // // pixelcount = yh - yl + 1 // movl C(dc_yh),%eax incl %eax subl %ebp,%eax // pixel count movl %eax,C(pixelcount) // save for final pixel jle vdone // nothing to scale // // frac = dc_texturemid - (centery-dc_yl)*fracstep; // movl C(dc_iscale),%ecx // fracstep movl C(centery),%eax subl %ebp,%eax imul %ecx,%eax movl C(dc_texturemid),%edx subl %eax,%edx movl %edx,%ebx shrl $16,%ebx // frac int. andl $0x0000007f,%ebx shll $16,%edx // y frac up movl %ecx,%ebp shll $16,%ebp // fracstep f. up shrl $16,%ecx // fracstep i. ->cl andb $0x7f,%cl movl C(dc_source),%esi // // lets rock :) ! // movl C(pixelcount),%eax movb %al,%dh shrl $2,%eax movb %al,%ch // quad count movl C(dc_colormap),%eax testb $3,%dh jz v4quadloop // // do un-even pixel // testb $1,%dh jz 2f movb (%esi,%ebx),%al // prep un-even loops addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel andb $0x7f,%bl // mask 0-127 texture index movb %dl,(%edi) // output pixel addl C(vidwidth),%edi // // do two non-quad-aligned pixels // 2: testb $2,%dh jz 3f movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel andb $0x7f,%bl // mask 0-127 texture index movb %dl,(%edi) // output pixel movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel andb $0x7f,%bl // mask 0-127 texture index addl C(vidwidth),%edi movb %dl,(%edi) // output pixel addl C(vidwidth),%edi // // test if there was at least 4 pixels // 3: testb $0xFF,%ch // test quad count jz vdone // // ebp : ystep frac. upper 24 bits // edx : y frac. upper 24 bits // ebx : y i. lower 7 bits, masked for index // ecx : ch = counter, cl = y step i. // eax : colormap aligned 256 // esi : source texture column // edi : dest screen // v4quadloop: movb $0x7f,%dh // prep mask // .align 4 vquadloop: movb (%esi,%ebx),%al // prep loop addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel movb %dl,(%edi) // output pixel andb $0x7f,%bl // mask 0-127 texture index movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl p1: movb %dl,0x12345678(%edi) andb $0x7f,%bl movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl p2: movb %dl,2*0x12345678(%edi) andb $0x7f,%bl movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl p3: movb %dl,3*0x12345678(%edi) andb $0x7f,%bl p4: addl $4*0x12345678,%edi decb %ch jnz vquadloop vdone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller's stack frame pointer ret #ifdef HORIZONTALDRAW // -------------------------------------------------------------------------- // Horizontal Column Drawer Optimisation // -------------------------------------------------------------------------- #ifdef LINUX .align 2 #else .align 5 #endif .globl C(R_DrawHColumn_8) C(R_DrawHColumn_8): pushl %ebp pushl %esi pushl %edi pushl %ebx // // dest = yhlookup[dc_x] + hcolumnofs[dc_yl]; // movl C(dc_x),%ebx movl C(yhlookup)(,%ebx,4),%edi movl C(dc_yl),%ebp movl %ebp,%ebx addl C(hcolumnofs)(,%ebx,4),%edi // edi = dest // // pixelcount = yh - yl + 1 // movl C(dc_yh),%eax incl %eax subl %ebp,%eax // pixel count movl %eax,C(pixelcount) // save for final pixel jle vhdone // nothing to scale // // frac = dc_texturemid - (centery-dc_yl)*fracstep; // movl C(dc_iscale),%ecx // fracstep movl C(centery),%eax subl %ebp,%eax imul %ecx,%eax movl C(dc_texturemid),%edx subl %eax,%edx movl %edx,%ebx shrl $16,%ebx // frac int. andl $0x0000007f,%ebx shll $16,%edx // y frac up movl %ecx,%ebp shll $16,%ebp // fracstep f. up shrl $16,%ecx // fracstep i. ->cl andb $0x7f,%cl movl C(dc_source),%esi // // lets rock :) ! // movl C(pixelcount),%eax movb %al,%dh shrl $2,%eax movb %al,%ch // quad count testb %ch, %ch jz vhnearlydone movl C(dc_colormap),%eax decl %edi //----- vhloop: movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl andb $0x7f,%bl incl %edi //----- movb (%eax),%dh movb %dh,(%edi) //----- movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx incl %edi //----- adcb %cl,%bl movb (%eax),%dl andb $0x7f,%bl movb %dl,(%edi) //----- movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl // shll $16,%edx andb $0x7f,%bl incl %edi //----- movb (%eax),%dh movb %dh,(%edi) //----- movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx incl %edi //----- adcb %cl,%bl movb (%eax),%dl andb $0x7f,%bl movb %dl,(%edi) // movl %edx,(%edi) // addl $4,%edi decb %ch jnz vhloop vhnearlydone: // movl C(pixelcount) vhdone: popl %ebx popl %edi popl %esi popl %ebp ret // -------------------------------------------------------------------------- // Rotate a buffer 90 degree in clockwise order after horiz.col. draws // -------------------------------------------------------------------------- #ifdef LINUX .align 2 #else .align 5 #endif .globl C(R_RotateBuffer) C(R_RotateBuffer): pushl %ebp pushl %esi pushl %edi pushl %ebx movl C(dc_source),%esi movl C(dc_colormap),%edi movb (%esi),%ah addl $200,%esi movb (%ebx),%al addl $200,%ebx bswap %eax movb (%esi),%ah addl $200,%esi movb (%ebx),%al addl $200,%ebx movl %eax,(%edi) addl $4,%edi popl %ebx popl %edi popl %esi popl %ebp ret #endif //---------------------------------------------------------------------- //13-02-98: // R_DrawSkyColumn : same as R_DrawColumn but: // // - wrap around 256 instead of 127. // this is needed because we have a higher texture for mouselook, // we need at least 200 lines for the sky. // // NOTE: the sky should never wrap, so it could use a faster method. // for the moment, we'll still use a wrapping method... // // IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!! // //---------------------------------------------------------------------- #ifdef LINUX .align 2 #else .align 5 #endif .globl C(R_DrawSkyColumn_8) C(R_DrawSkyColumn_8): pushl %ebp pushl %esi pushl %edi pushl %ebx // // dest = ylookup[dc_yl] + columnofs[dc_x]; // movl C(dc_yl),%ebp movl %ebp,%ebx movl C(ylookup)(,%ebx,4),%edi movl C(dc_x),%ebx addl C(columnofs)(,%ebx,4),%edi // edi = dest // // pixelcount = yh - yl + 1 // movl C(dc_yh),%eax incl %eax subl %ebp,%eax // pixel count movl %eax,C(pixelcount) // save for final pixel jle vskydone // nothing to scale // // frac = dc_texturemid - (centery-dc_yl)*fracstep; // movl C(dc_iscale),%ecx // fracstep movl C(centery),%eax subl %ebp,%eax imul %ecx,%eax movl C(dc_texturemid),%edx subl %eax,%edx movl %edx,%ebx shrl $16,%ebx // frac int. andl $0x000000ff,%ebx shll $16,%edx // y frac up movl %ecx,%ebp shll $16,%ebp // fracstep f. up shrl $16,%ecx // fracstep i. ->cl movl C(dc_source),%esi // // lets rock :) ! // movl C(pixelcount),%eax movb %al,%dh shrl $2,%eax movb %al,%ch // quad count movl C(dc_colormap),%eax testb $3,%dh jz v4skyquadloop // // do un-even pixel // testb $1,%dh jz 2f movb (%esi,%ebx),%al // prep un-even loops addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel movb %dl,(%edi) // output pixel addl C(vidwidth),%edi // // do two non-quad-aligned pixels // 2: testb $2,%dh jz 3f movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel movb %dl,(%edi) // output pixel movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel addl C(vidwidth),%edi movb %dl,(%edi) // output pixel addl C(vidwidth),%edi // // test if there was at least 4 pixels // 3: testb $0xFF,%ch // test quad count jz vskydone // // ebp : ystep frac. upper 24 bits // edx : y frac. upper 24 bits // ebx : y i. lower 7 bits, masked for index // ecx : ch = counter, cl = y step i. // eax : colormap aligned 256 // esi : source texture column // edi : dest screen // v4skyquadloop: // .align 4 vskyquadloop: movb (%esi,%ebx),%al // prep loop addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel movb %dl,(%edi) // output pixel movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl p1b: movb %dl,0x12345678(%edi) movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl p2b: movb %dl,2*0x12345678(%edi) movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl p3b: movb %dl,3*0x12345678(%edi) p4b: addl $4*0x12345678,%edi decb %ch jnz vskyquadloop vskydone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller's stack frame pointer ret //---------------------------------------------------------------------- // // R_DrawSpan // // Horizontal texture mapping // //---------------------------------------------------------------------- .data ystep: .long 0 xstep: .long 0 C(texwidth): .long 64 // texture width #if !defined( LINUX) .text #endif #ifdef LINUX .align 2 #else .align 4 #endif .globl C(R_DrawSpan_8) C(R_DrawSpan_8): pushl %ebp // preserve caller's stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // // find loop count // movl C(ds_x2),%eax incl %eax subl C(ds_x1),%eax // pixel count movl %eax,C(pixelcount) // save for final pixel js hdone // nothing to scale shrl $1,%eax // double pixel count movl %eax,C(loopcount) // // build composite position // movl C(ds_xfrac),%ebp shll $10,%ebp andl $0x0ffff0000,%ebp movl C(ds_yfrac),%eax shrl $6,%eax andl $0x0ffff,%eax movl C(ds_y),%edi orl %eax,%ebp movl C(ds_source),%esi // // calculate screen dest // movl C(ylookup)(,%edi,4),%edi movl C(ds_x1),%eax addl C(columnofs)(,%eax,4),%edi // // build composite step // movl C(ds_xstep),%ebx shll $10,%ebx andl $0x0ffff0000,%ebx movl C(ds_ystep),%eax shrl $6,%eax andl $0x0ffff,%eax orl %eax,%ebx //movl %eax,OFFSET hpatch1+2 // convice tasm to modify code... movl %ebx,hpatch1+2 //movl %eax,OFFSET hpatch2+2 // convice tasm to modify code... movl %ebx,hpatch2+2 movl %esi,hpatch3+2 movl %esi,hpatch4+2 // %eax aligned colormap // %ebx aligned colormap // %ecx,%edx scratch // %esi virtual source // %edi moving destination pointer // %ebp frac movl C(ds_colormap),%eax // shld $22,%ebp,%ecx // begin calculating third pixel (y units) // shld $6,%ebp,%ecx // begin calculating third pixel (x units) movl %ebp,%ecx addl %ebx,%ebp // advance frac pointer shrw $10,%cx roll $6,%ecx andl $4095,%ecx // finish calculation for third pixel // shld $22,%ebp,%edx // begin calculating fourth pixel (y units) // shld $6,%ebp,%edx // begin calculating fourth pixel (x units) movl %ebp,%edx shrw $10,%dx roll $6,%edx addl %ebx,%ebp // advance frac pointer andl $4095,%edx // finish calculation for fourth pixel movl %eax,%ebx movb (%esi,%ecx),%al // get first pixel movb (%esi,%edx),%bl // get second pixel testl $0x0fffffffe,C(pixelcount) movb (%eax),%dl // color translate first pixel // jnz hdoubleloop // at least two pixels to map // jmp hchecklast // movw $0xf0f0,%dx //see visplanes start jz hchecklast movb (%ebx),%dh // color translate second pixel movl C(loopcount),%esi // .align 4 hdoubleloop: // shld $22,%ebp,%ecx // begin calculating third pixel (y units) // shld $6,%ebp,%ecx // begin calculating third pixel (x units) movl %ebp,%ecx shrw $10,%cx roll $6,%ecx hpatch1: addl $0x012345678,%ebp // advance frac pointer movw %dx,(%edi) // write first pixel andl $4095,%ecx // finish calculation for third pixel // shld $22,%ebp,%edx // begin calculating fourth pixel (y units) // shld $6,%ebp,%edx // begin calculating fourth pixel (x units) movl %ebp,%edx shrw $10,%dx roll $6,%edx hpatch3: movb 0x012345678(%ecx),%al // get third pixel // movb %bl,1(%edi) // write second pixel andl $4095,%edx // finish calculation for fourth pixel hpatch2: addl $0x012345678,%ebp // advance frac pointer hpatch4: movb 0x012345678(%edx),%bl // get fourth pixel movb (%eax),%dl // color translate third pixel addl $2,%edi // advance to third pixel destination decl %esi // done with loop? movb (%ebx),%dh // color translate fourth pixel jnz hdoubleloop // check for final pixel hchecklast: testl $1,C(pixelcount) jz hdone movb %dl,(%edi) // write final pixel hdone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller's stack frame pointer ret //.endif //---------------------------------------------------------------------- // R_DrawTransColumn // // Vertical column texture drawer, with transparency. Replaces Doom2's // 'fuzz' effect, which was not so beautiful. // Transparency is always impressive in some way, don't know why... //---------------------------------------------------------------------- #ifdef LINUX .align 2 #else .align 5 #endif .globl C(R_DrawTranslucentColumn_8) C(R_DrawTranslucentColumn_8): pushl %ebp // preserve caller's stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // // dest = ylookup[dc_yl] + columnofs[dc_x]; // movl C(dc_yl),%ebp movl %ebp,%ebx movl C(ylookup)(,%ebx,4),%edi movl C(dc_x),%ebx addl C(columnofs)(,%ebx,4),%edi // edi = dest // // pixelcount = yh - yl + 1 // movl C(dc_yh),%eax incl %eax subl %ebp,%eax // pixel count movl %eax,C(pixelcount) // save for final pixel jle vtdone // nothing to scale // // frac = dc_texturemid - (centery-dc_yl)*fracstep; // movl C(dc_iscale),%ecx // fracstep movl C(centery),%eax subl %ebp,%eax imul %ecx,%eax movl C(dc_texturemid),%edx subl %eax,%edx movl %edx,%ebx shrl $16,%ebx // frac int. andl $0x0000007f,%ebx shll $16,%edx // y frac up movl %ecx,%ebp shll $16,%ebp // fracstep f. up shrl $16,%ecx // fracstep i. ->cl andb $0x7f,%cl pushw %cx movl %edx,%ecx popw %cx movl C(dc_colormap),%edx movl C(dc_source),%esi // // lets rock :) ! // movl C(pixelcount),%eax shrl $2,%eax testb $0x03,C(pixelcount) movb %al,%ch // quad count movl C(dc_transmap),%eax jz vt4quadloop // // do un-even pixel // testb $1,C(pixelcount) jz 2f movb (%esi,%ebx),%ah // fetch texel : colormap number addl %ebp,%ecx adcb %cl,%bl movb (%edi),%al // fetch dest : index into colormap andb $0x7f,%bl movb (%eax),%dl movb (%edx), %dl // use colormap now ! movb %dl,(%edi) addl C(vidwidth),%edi // // do two non-quad-aligned pixels // 2: testb $2,C(pixelcount) jz 3f movb (%esi,%ebx),%ah // fetch texel : colormap number addl %ebp,%ecx adcb %cl,%bl movb (%edi),%al // fetch dest : index into colormap andb $0x7f,%bl movb (%eax),%dl movb (%edx), %dl // use colormap now ! movb %dl,(%edi) addl C(vidwidth),%edi movb (%esi,%ebx),%ah // fetch texel : colormap number addl %ebp,%ecx adcb %cl,%bl movb (%edi),%al // fetch dest : index into colormap andb $0x7f,%bl movb (%eax),%dl movb (%edx), %dl // use colormap now ! movb %dl,(%edi) addl C(vidwidth),%edi // // test if there was at least 4 pixels // 3: testb $0xFF,%ch // test quad count jz vtdone // // tystep : ystep frac. upper 24 bits // edx : upper 24 bit : colomap // dl : tmp pixel to write // ebx : y i. lower 7 bits, masked for index // ecx : y frac. upper 16 bits // ecx : ch = counter, cl = y step i. // eax : transmap aligned 65535 (upper 16 bit) // ah : background pixel (from the screen buffer) // al : foreground pixel (from the texture) // esi : source texture column // ebp,edi : dest screen // vt4quadloop: movb (%esi,%ebx),%ah // fetch texel : colormap number p5: movb 0x12345678(%edi),%al // fetch dest : index into colormap movl %ebp,C(tystep) movl %edi,%ebp subl C(vidwidth),%edi jmp inloop // .align 4 vtquadloop: addl C(tystep),%ecx adcb %cl,%bl p6: addl $2*0x12345678,%ebp andb $0x7f,%bl movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb (%edx), %dl // use colormap now ! movb %dl,(%edi) movb (%ebp),%al // fetch dest : index into colormap inloop: addl C(tystep),%ecx adcb %cl,%bl p7: addl $2*0x12345678,%edi andb $0x7f,%bl movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb (%edx), %dl // use colormap now ! movb %dl,(%ebp) movb (%edi),%al // fetch dest : index into colormap addl C(tystep),%ecx adcb %cl,%bl p8: addl $2*0x12345678,%ebp andb $0x7f,%bl movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb (%edx), %dl // use colormap now ! movb %dl,(%edi) movb (%ebp),%al // fetch dest : index into colormap addl C(tystep),%ecx adcb %cl,%bl p9: addl $2*0x12345678,%edi andb $0x7f,%bl movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb (%edx), %dl // use colormap now ! movb %dl,(%ebp) movb (%edi),%al // fetch dest : index into colormap decb %ch jnz vtquadloop vtdone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller's stack frame pointer ret #endif // ifdef USEASM //---------------------------------------------------------------------- // R_DrawShadeColumn // // for smoke..etc.. test. //---------------------------------------------------------------------- #ifdef LINUX .align 2 #else .align 5 #endif .globl C(R_DrawShadeColumn_8) C(R_DrawShadeColumn_8): pushl %ebp // preserve caller's stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // // dest = ylookup[dc_yl] + columnofs[dc_x]; // movl C(dc_yl),%ebp movl %ebp,%ebx movl C(ylookup)(,%ebx,4),%edi movl C(dc_x),%ebx addl C(columnofs)(,%ebx,4),%edi // edi = dest // // pixelcount = yh - yl + 1 // movl C(dc_yh),%eax incl %eax subl %ebp,%eax // pixel count movl %eax,C(pixelcount) // save for final pixel jle shdone // nothing to scale // // frac = dc_texturemid - (centery-dc_yl)*fracstep; // movl C(dc_iscale),%ecx // fracstep movl C(centery),%eax subl %ebp,%eax imul %ecx,%eax movl C(dc_texturemid),%edx subl %eax,%edx movl %edx,%ebx shrl $16,%ebx // frac int. andl $0x0000007f,%ebx shll $16,%edx // y frac up movl %ecx,%ebp shll $16,%ebp // fracstep f. up shrl $16,%ecx // fracstep i. ->cl andb $0x7f,%cl movl C(dc_source),%esi // // lets rock :) ! // movl C(pixelcount),%eax movb %al,%dh shrl $2,%eax movb %al,%ch // quad count movl C(colormaps),%eax testb $0x03,%dh jz sh4quadloop // // do un-even pixel // testb $1,%dh jz 2f movb (%esi,%ebx),%ah // fetch texel : colormap number addl %ebp,%edx adcb %cl,%bl movb (%edi),%al // fetch dest : index into colormap andb $0x7f,%bl movb (%eax),%dl movb %dl,(%edi) addl C(vidwidth),%edi // // do two non-quad-aligned pixels // 2: testb $2,%dh jz 3f movb (%esi,%ebx),%ah // fetch texel : colormap number addl %ebp,%edx adcb %cl,%bl movb (%edi),%al // fetch dest : index into colormap andb $0x7f,%bl movb (%eax),%dl movb %dl,(%edi) addl C(vidwidth),%edi movb (%esi,%ebx),%ah // fetch texel : colormap number addl %ebp,%edx adcb %cl,%bl movb (%edi),%al // fetch dest : index into colormap andb $0x7f,%bl movb (%eax),%dl movb %dl,(%edi) addl C(vidwidth),%edi // // test if there was at least 4 pixels // 3: testb $0xFF,%ch // test quad count jz shdone // // ebp : ystep frac. upper 24 bits // edx : y frac. upper 24 bits // ebx : y i. lower 7 bits, masked for index // ecx : ch = counter, cl = y step i. // eax : colormap aligned 256 // esi : source texture column // edi : dest screen // sh4quadloop: movb $0x7f,%dh // prep mask movb (%esi,%ebx),%ah // fetch texel : colormap number sh5: movb 0x12345678(%edi),%al // fetch dest : index into colormap movl %ebp,C(tystep) movl %edi,%ebp subl C(vidwidth),%edi jmp shinloop // .align 4 shquadloop: addl C(tystep),%edx adcb %cl,%bl andb %dh,%bl sh6: addl $2*0x12345678,%ebp movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb %dl,(%edi) movb (%ebp),%al // fetch dest : index into colormap shinloop: addl C(tystep),%edx adcb %cl,%bl andb %dh,%bl sh7: addl $2*0x12345678,%edi movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb %dl,(%ebp) movb (%edi),%al // fetch dest : index into colormap addl C(tystep),%edx adcb %cl,%bl andb %dh,%bl sh8: addl $2*0x12345678,%ebp movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb %dl,(%edi) movb (%ebp),%al // fetch dest : index into colormap addl C(tystep),%edx adcb %cl,%bl andb %dh,%bl sh9: addl $2*0x12345678,%edi movb (%eax),%dl movb (%esi,%ebx),%ah // fetch texel : colormap number movb %dl,(%ebp) movb (%edi),%al // fetch dest : index into colormap decb %ch jnz shquadloop shdone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller's stack frame pointer ret //---------------------------------------------------------------------- // // R_DrawWaterColumn : basically it's just a copy of R_DrawColumn, // but it uses dc_colormap from dc_yl to dc_yw-1 // then it uses dc_wcolormap from dc_yw to dc_yh // // Thus, the 'underwater' part of the walls is remapped to 'water-like' // colors. // //---------------------------------------------------------------------- #ifdef LINUX .align 2 #else .align 5 #endif .globl C(R_DrawWaterColumn) C(R_DrawWaterColumn): pushl %ebp // preserve caller's stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // // dest = ylookup[dc_yl] + columnofs[dc_x]; // movl C(dc_yl),%ebp movl %ebp,%ebx movl C(ylookup)(,%ebx,4),%edi movl C(dc_x),%ebx addl C(columnofs)(,%ebx,4),%edi // edi = dest // // pixelcount = yh - yl + 1 // movl C(dc_yh),%eax incl %eax subl %ebp,%eax // pixel count movl %eax,C(pixelcount) // save for final pixel jle wdone // nothing to scale // // frac = dc_texturemid - (centery-dc_yl)*fracstep; // movl C(dc_iscale),%ecx // fracstep movl C(centery),%eax subl %ebp,%eax imul %ecx,%eax movl C(dc_texturemid),%edx subl %eax,%edx movl %edx,%ebx shrl $16,%ebx // frac int. andl $0x0000007f,%ebx shll $16,%edx // y frac up movl %ecx,%ebp shll $16,%ebp // fracstep f. up shrl $16,%ecx // fracstep i. ->cl andb $0x7f,%cl movl C(dc_source),%esi // // lets rock :) ! // movl C(pixelcount),%eax movb %al,%dh shrl $2,%eax movb %al,%ch // quad count movl C(dc_wcolormap),%eax testb $3,%dh jz w4quadloop // // do un-even pixel // testb $1,%dh jz 2f movb (%esi,%ebx),%al // prep un-even loops addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel andb $0x7f,%bl // mask 0-127 texture index movb %dl,(%edi) // output pixel addl C(vidwidth),%edi // // do two non-quad-aligned pixels // 2: testb $2,%dh jz 3f movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel andb $0x7f,%bl // mask 0-127 texture index movb %dl,(%edi) // output pixel movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel andb $0x7f,%bl // mask 0-127 texture index addl C(vidwidth),%edi movb %dl,(%edi) // output pixel addl C(vidwidth),%edi // // test if there was at least 4 pixels // 3: testb $0xFF,%ch // test quad count jz wdone // // ebp : ystep frac. upper 24 bits // edx : y frac. upper 24 bits // ebx : y i. lower 7 bits, masked for index // ecx : ch = counter, cl = y step i. // eax : colormap aligned 256 // esi : source texture column // edi : dest screen // w4quadloop: movb $0x7f,%dh // prep mask // .align 4 wquadloop: movb (%esi,%ebx),%al // prep loop addl %ebp,%edx // ypos f += ystep f adcb %cl,%bl // ypos i += ystep i movb (%eax),%dl // colormap texel movb %dl,(%edi) // output pixel andb $0x7f,%bl // mask 0-127 texture index movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl w1: movb %dl,0x12345678(%edi) andb $0x7f,%bl movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl w2: movb %dl,2*0x12345678(%edi) andb $0x7f,%bl movb (%esi,%ebx),%al // fetch source texel addl %ebp,%edx adcb %cl,%bl movb (%eax),%dl w3: movb %dl,3*0x12345678(%edi) andb $0x7f,%bl w4: addl $4*0x12345678,%edi decb %ch jnz wquadloop wdone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller's stack frame pointer ret //---------------------------------------------------------------------- // // R_DrawSpanNoWrap // // Horizontal texture mapping, does not remap colors, // neither needs to wrap around the source texture. // // Thus, a special optimisation can be used... // //---------------------------------------------------------------------- .data advancetable: .long 0, 0 #if !defined( LINUX) .text #endif #ifdef LINUX .align 2 #else .align 4 #endif .globl C(R_DrawSpanNoWrap) C(R_DrawSpanNoWrap): pushl %ebp // preserve caller's stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx // // find loop count // movl C(ds_x2),%eax incl %eax subl C(ds_x1),%eax // pixel count movl %eax,C(pixelcount) // save for final pixel jle htvdone // nothing to scale // shrl $1,%eax // double pixel count // movl %eax,C(loopcount) // // calculate screen dest // movl C(ds_y),%edi //full destination start address // // set up advancetable // movl C(ds_xstep),%ebp movl C(ds_ystep),%ecx movl %ecx,%eax movl %ebp,%edx sarl $16,%edx // xstep >>= 16; movl C(vidwidth),%ebx sarl $16,%eax // ystep >>= 16; jz 0f imull %ebx,%eax // (ystep >> 16) * texwidth; 0: addl %edx,%eax // add in xstep // (ystep >> 16) * texwidth + (xstep >> 16); movl %eax,advancetable+4 // advance base in y addl %ebx,%eax // ((ystep >> 16) + 1) * texwidth + // (xstep >> 16); movl %eax,advancetable // advance extra in y shll $16,%ebp // left-justify xstep fractional part movl %ebp,xstep shll $16,%ecx // left-justify ystep fractional part movl %ecx,ystep // // calculate the texture starting address // movl C(ds_source),%esi // texture source movl C(ds_yfrac),%eax movl %eax,%edx sarl $16,%eax movl C(ds_xfrac),%ecx imull %ebx,%eax // (yfrac >> 16) * texwidth movl %ecx,%ebx sarl $16,%ecx movl %ecx,%ebp addl %eax,%ebp // source = (xfrac >> 16) + // ((yfrac >> 16) * texwidth); // // esi : texture source // edi : screen dest // eax : colormap aligned on 256 boundary, hehehe... // ebx : xfrac << 16 // ecx : used in loop, contains either 0 or -1, *4, offset into advancetable // edx : yfrac << 16 // ebp : offset into texture // shll $16,%edx // yfrac upper word, lower byte will be used movl C(ds_colormap),%eax shll $16,%ebx // xfrac upper word, lower unused movl C(pixelcount),%ecx shrl $2,%ecx movb %cl,%dh // quad pixels count movl C(pixelcount),%ecx andl $3,%ecx jz htvquadloop // pixelcount is multiple of 4 decl %ecx jz 1f decl %ecx jz 2f // // do one to three pixels first // addl ystep,%edx // yfrac += ystep sbbl %ecx,%ecx // turn carry into 0 or -1 if set movb (%esi,%ebp),%al // get texture pixel addl xstep,%ebx // xfrac += xstep // movb (%eax),%dl // pixel goes through colormap adcl advancetable+4(,%ecx,4),%ebp // advance source movb %al,(%edi) // write pixel dest incl %edi 2: addl ystep,%edx // yfrac += ystep sbbl %ecx,%ecx // turn carry into 0 or -1 if set movb (%esi,%ebp),%al // get texture pixel addl xstep,%ebx // xfrac += xstep // movb (%eax),%dl // pixel goes through colormap adcl advancetable+4(,%ecx,4),%ebp // advance source movb %al,(%edi) // write pixel dest incl %edi 1: addl ystep,%edx // yfrac += ystep sbbl %ecx,%ecx // turn carry into 0 or -1 if set movb (%esi,%ebp),%al // get texture pixel addl xstep,%ebx // xfrac += xstep // movb (%eax),%dl // pixel goes through colormap adcl advancetable+4(,%ecx,4),%ebp // advance source movb %al,(%edi) // write pixel dest incl %edi // // test if there was at least 4 pixels // testb $0xFF,%dh jz htvdone // // two pixels per loop // U // V htvquadloop: addl ystep,%edx // yfrac += ystep sbbl %ecx,%ecx // turn carry into 0 or -1 if set movb (%esi,%ebp),%al // get texture pixel addl xstep,%ebx // xfrac += xstep // movb (%eax),%dl // pixel goes through colormap adcl advancetable+4(,%ecx,4),%ebp // advance source movb %al,(%edi) // write pixel dest addl ystep,%edx sbbl %ecx,%ecx movb (%esi,%ebp),%al addl xstep,%ebx // movb (%eax),%dl adcl advancetable+4(,%ecx,4),%ebp movb %al,1(%edi) addl ystep,%edx sbbl %ecx,%ecx movb (%esi,%ebp),%al addl xstep,%ebx // movb (%eax),%dl adcl advancetable+4(,%ecx,4),%ebp movb %al,2(%edi) addl ystep,%edx sbbl %ecx,%ecx movb (%esi,%ebp),%al addl xstep,%ebx // movb (%eax),%dl adcl advancetable+4(,%ecx,4),%ebp movb %al,3(%edi) addl $4, %edi incl %ecx //dummy decb %dh jnz htvquadloop // paire dans V-pipe htvdone: popl %ebx // restore register variables popl %edi popl %esi popl %ebp // restore caller's stack frame pointer ret //.endif #ifdef HORIZONTALDRAW // void R_RotateBuffere (void) #ifdef LINUX .align 2 #else .align 4 #endif .globl C(R_RotateBufferasm) C(R_RotateBufferasm): pushl %ebp // preserve caller's stack frame pointer pushl %esi // preserve register variables pushl %edi pushl %ebx movl C(dc_source),%esi movl C(dc_colormap),%edi movl $200,%edx ra2: movl $40,%ecx ra: movb -2*200(%esi),%al movb -6*200(%esi),%bl movb -3*200(%esi),%ah movb -7*200(%esi),%bh shll $16,%eax shll $16,%ebx movb (%esi),%al movb -4*200(%esi),%bl movb -1*200(%esi),%ah movb -5*200(%esi),%bh movl %eax,(%edi) subl $8*200,%esi movl %ebx,4(%edi) addl $8,%edi decl %ecx jnz ra addl $320*200+1,%esi //32*480 passe a la ligne suivante // addl 320-32,%edi decl %edx jnz ra2 pop %ebp // preserve caller's stack frame pointer pop %esi // preserve register variables pop %edi pop %ebx ret #endif