SRB2/src/tmap.s
2018-11-25 07:35:38 -05:00

1587 lines
42 KiB
ArmAsm
Raw Blame History

// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2018 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file tmap.s
/// \brief optimised drawing routines for span/column rendering
// structures, must match the C structures!
#include "asm_defs.inc"
// Rappel: seuls EAX, ECX, EDX peuvent <EFBFBD>tre <EFBFBD>cras<EFBFBD>s librement.
// il faut sauver esi,edi, cd...gs
/* Attention aux comparaisons! */
/* */
/* Intel_compare: */
/* */
/* cmp A,B // A-B , set flags */
/* jg A_greater_than_B */
/* */
/* AT&T_compare: */
/* */
/* cmp A,B // B-A , set flags */
/* jg B_greater_than_A */
/* */
/* (soustrait l'op<6F>rande source DE l'op<6F>rande destination, */
/* comme sur Motorola! ) */
// RAPPEL: Intel
// SECTION:[BASE+INDEX*SCALE+DISP]
// devient SECTION:DISP(BASE,INDEX,SCALE)
//----------------------------------------------------------------------
//
// R_DrawColumn
//
// New optimised version 10-01-1998 by D.Fabrice and P.Boris
// TO DO: optimise it much farther... should take at most 3 cycles/pix
// once it's fixed, add code to patch the offsets so that it
// works in every screen width.
//
//----------------------------------------------------------------------
.data
#ifdef LINUX
.align 2
#else
.align 4
#endif
C(loopcount): .long 0
C(pixelcount): .long 0
C(tystep): .long 0
C(vidwidth): .long 0 //use this one out of the inner loops
//so you don't need to patch everywhere...
#ifdef USEASM
#if !defined( LINUX) && !defined( __OS2__)
.text
#endif
.globl C(ASM_PatchRowBytes)
C(ASM_PatchRowBytes):
pushl %ebp
movl %esp, %ebp // assure l'"adressabilit<EFBFBD> du stack"
movl ARG1, %edx // read first arg
movl %edx, C(vidwidth)
// 1 * vidwidth
movl %edx,p1+2
movl %edx,w1+2 //water
movl %edx,p1b+2 //sky
movl %edx,p5+2
movl %edx,sh5+2 //smokie test
// 2 * vidwidth
addl ARG1,%edx
movl %edx,p2+2
movl %edx,w2+2 //water
movl %edx,p2b+2 //sky
movl %edx,p6+2
movl %edx,p7+2
movl %edx,p8+2
movl %edx,p9+2
movl %edx,sh6+2 //smokie test
movl %edx,sh7+2
movl %edx,sh8+2
movl %edx,sh9+2
// 3 * vidwidth
addl ARG1,%edx
movl %edx,p3+2
movl %edx,w3+2 //water
movl %edx,p3b+2 //sky
// 4 * vidwidth
addl ARG1,%edx
movl %edx,p4+2
movl %edx,w4+2 //water
movl %edx,p4b+2 //sky
popl %ebp
ret
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawColumn_8)
C(R_DrawColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_colormap),%eax
testb $3,%dh
jz v4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
v4quadloop:
movb $0x7f,%dh // prep mask
// .align 4
vquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
andb $0x7f,%bl // mask 0-127 texture index
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p1: movb %dl,0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p2: movb %dl,2*0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p3: movb %dl,3*0x12345678(%edi)
andb $0x7f,%bl
p4: addl $4*0x12345678,%edi
decb %ch
jnz vquadloop
vdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
#ifdef HORIZONTALDRAW
// --------------------------------------------------------------------------
// Horizontal Column Drawer Optimisation
// --------------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawHColumn_8)
C(R_DrawHColumn_8):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
//
// dest = yhlookup[dc_x] + hcolumnofs[dc_yl];
//
movl C(dc_x),%ebx
movl C(yhlookup)(,%ebx,4),%edi
movl C(dc_yl),%ebp
movl %ebp,%ebx
addl C(hcolumnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vhdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
testb %ch, %ch
jz vhnearlydone
movl C(dc_colormap),%eax
decl %edi //-----
vhloop:
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
andb $0x7f,%bl
incl %edi //-----
movb (%eax),%dh
movb %dh,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
incl %edi //-----
adcb %cl,%bl
movb (%eax),%dl
andb $0x7f,%bl
movb %dl,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
// shll $16,%edx
andb $0x7f,%bl
incl %edi //-----
movb (%eax),%dh
movb %dh,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
incl %edi //-----
adcb %cl,%bl
movb (%eax),%dl
andb $0x7f,%bl
movb %dl,(%edi)
// movl %edx,(%edi)
// addl $4,%edi
decb %ch
jnz vhloop
vhnearlydone:
// movl C(pixelcount)
vhdone:
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
// --------------------------------------------------------------------------
// Rotate a buffer 90 degree in clockwise order after horiz.col. draws
// --------------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_RotateBuffer)
C(R_RotateBuffer):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
movl C(dc_source),%esi
movl C(dc_colormap),%edi
movb (%esi),%ah
addl $200,%esi
movb (%ebx),%al
addl $200,%ebx
bswap %eax
movb (%esi),%ah
addl $200,%esi
movb (%ebx),%al
addl $200,%ebx
movl %eax,(%edi)
addl $4,%edi
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
#endif
//----------------------------------------------------------------------
//13-02-98:
// R_DrawSkyColumn : same as R_DrawColumn but:
//
// - wrap around 256 instead of 127.
// this is needed because we have a higher texture for mouselook,
// we need at least 200 lines for the sky.
//
// NOTE: the sky should never wrap, so it could use a faster method.
// for the moment, we'll still use a wrapping method...
//
// IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!!
//
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawSkyColumn_8)
C(R_DrawSkyColumn_8):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vskydone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x000000ff,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_colormap),%eax
testb $3,%dh
jz v4skyquadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vskydone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
v4skyquadloop:
// .align 4
vskyquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p1b: movb %dl,0x12345678(%edi)
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p2b: movb %dl,2*0x12345678(%edi)
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p3b: movb %dl,3*0x12345678(%edi)
p4b: addl $4*0x12345678,%edi
decb %ch
jnz vskyquadloop
vskydone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawSpan
//
// Horizontal texture mapping
//
//----------------------------------------------------------------------
.data
ystep: .long 0
xstep: .long 0
C(texwidth): .long 64 // texture width
#if !defined( LINUX) && !defined( __OS2__)
.text
#endif
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_DrawSpan_8)
C(R_DrawSpan_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// find loop count
//
movl C(ds_x2),%eax
incl %eax
subl C(ds_x1),%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
js hdone // nothing to scale
shrl $1,%eax // double pixel count
movl %eax,C(loopcount)
//
// build composite position
//
movl C(ds_xfrac),%ebp
shll $10,%ebp
andl $0x0ffff0000,%ebp
movl C(ds_yfrac),%eax
shrl $6,%eax
andl $0x0ffff,%eax
movl C(ds_y),%edi
orl %eax,%ebp
movl C(ds_source),%esi
//
// calculate screen dest
//
movl C(ylookup)(,%edi,4),%edi
movl C(ds_x1),%eax
addl C(columnofs)(,%eax,4),%edi
//
// build composite step
//
movl C(ds_xstep),%ebx
shll $10,%ebx
andl $0x0ffff0000,%ebx
movl C(ds_ystep),%eax
shrl $6,%eax
andl $0x0ffff,%eax
orl %eax,%ebx
//movl %eax,OFFSET hpatch1+2 // convice tasm to modify code...
movl %ebx,hpatch1+2
//movl %eax,OFFSET hpatch2+2 // convice tasm to modify code...
movl %ebx,hpatch2+2
movl %esi,hpatch3+2
movl %esi,hpatch4+2
// %eax aligned colormap
// %ebx aligned colormap
// %ecx,%edx scratch
// %esi virtual source
// %edi moving destination pointer
// %ebp frac
movl C(ds_colormap),%eax
// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
movl %ebp,%ecx
addl %ebx,%ebp // advance frac pointer
shrw $10,%cx
roll $6,%ecx
andl $4095,%ecx // finish calculation for third pixel
// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
movl %ebp,%edx
shrw $10,%dx
roll $6,%edx
addl %ebx,%ebp // advance frac pointer
andl $4095,%edx // finish calculation for fourth pixel
movl %eax,%ebx
movb (%esi,%ecx),%al // get first pixel
movb (%esi,%edx),%bl // get second pixel
testl $0x0fffffffe,C(pixelcount)
movb (%eax),%dl // color translate first pixel
// jnz hdoubleloop // at least two pixels to map
// jmp hchecklast
// movw $0xf0f0,%dx //see visplanes start
jz hchecklast
movb (%ebx),%dh // color translate second pixel
movl C(loopcount),%esi
// .align 4
hdoubleloop:
// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
movl %ebp,%ecx
shrw $10,%cx
roll $6,%ecx
hpatch1:
addl $0x012345678,%ebp // advance frac pointer
movw %dx,(%edi) // write first pixel
andl $4095,%ecx // finish calculation for third pixel
// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
movl %ebp,%edx
shrw $10,%dx
roll $6,%edx
hpatch3:
movb 0x012345678(%ecx),%al // get third pixel
// movb %bl,1(%edi) // write second pixel
andl $4095,%edx // finish calculation for fourth pixel
hpatch2:
addl $0x012345678,%ebp // advance frac pointer
hpatch4:
movb 0x012345678(%edx),%bl // get fourth pixel
movb (%eax),%dl // color translate third pixel
addl $2,%edi // advance to third pixel destination
decl %esi // done with loop?
movb (%ebx),%dh // color translate fourth pixel
jnz hdoubleloop
// check for final pixel
hchecklast:
testl $1,C(pixelcount)
jz hdone
movb %dl,(%edi) // write final pixel
hdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//.endif
//----------------------------------------------------------------------
// R_DrawTransColumn
//
// Vertical column texture drawer, with transparency. Replaces Doom2's
// 'fuzz' effect, which was not so beautiful.
// Transparency is always impressive in some way, don't know why...
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawTranslucentColumn_8)
C(R_DrawTranslucentColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vtdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
pushw %cx
movl %edx,%ecx
popw %cx
movl C(dc_colormap),%edx
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
shrl $2,%eax
testb $0x03,C(pixelcount)
movb %al,%ch // quad count
movl C(dc_transmap),%eax
jz vt4quadloop
//
// do un-even pixel
//
testb $1,C(pixelcount)
jz 2f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,C(pixelcount)
jz 3f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vtdone
//
// tystep : ystep frac. upper 24 bits
// edx : upper 24 bit : colomap
// dl : tmp pixel to write
// ebx : y i. lower 7 bits, masked for index
// ecx : y frac. upper 16 bits
// ecx : ch = counter, cl = y step i.
// eax : transmap aligned 65535 (upper 16 bit)
// ah : background pixel (from the screen buffer)
// al : foreground pixel (from the texture)
// esi : source texture column
// ebp,edi : dest screen
//
vt4quadloop:
movb (%esi,%ebx),%ah // fetch texel : colormap number
p5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
movl %ebp,C(tystep)
movl %edi,%ebp
subl C(vidwidth),%edi
jmp inloop
// .align 4
vtquadloop:
addl C(tystep),%ecx
adcb %cl,%bl
p6: addl $2*0x12345678,%ebp
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
inloop:
addl C(tystep),%ecx
adcb %cl,%bl
p7: addl $2*0x12345678,%edi
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
addl C(tystep),%ecx
adcb %cl,%bl
p8: addl $2*0x12345678,%ebp
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
addl C(tystep),%ecx
adcb %cl,%bl
p9: addl $2*0x12345678,%edi
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
decb %ch
jnz vtquadloop
vtdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
#endif // ifdef USEASM
//----------------------------------------------------------------------
// R_DrawShadeColumn
//
// for smoke..etc.. test.
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawShadeColumn_8)
C(R_DrawShadeColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle shdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(colormaps),%eax
testb $0x03,%dh
jz sh4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz shdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
sh4quadloop:
movb $0x7f,%dh // prep mask
movb (%esi,%ebx),%ah // fetch texel : colormap number
sh5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
movl %ebp,C(tystep)
movl %edi,%ebp
subl C(vidwidth),%edi
jmp shinloop
// .align 4
shquadloop:
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh6: addl $2*0x12345678,%ebp
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
shinloop:
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh7: addl $2*0x12345678,%edi
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh8: addl $2*0x12345678,%ebp
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh9: addl $2*0x12345678,%edi
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
decb %ch
jnz shquadloop
shdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawWaterColumn : basically it's just a copy of R_DrawColumn,
// but it uses dc_colormap from dc_yl to dc_yw-1
// then it uses dc_wcolormap from dc_yw to dc_yh
//
// Thus, the 'underwater' part of the walls is remapped to 'water-like'
// colors.
//
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawWaterColumn)
C(R_DrawWaterColumn):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle wdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_wcolormap),%eax
testb $3,%dh
jz w4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz wdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
w4quadloop:
movb $0x7f,%dh // prep mask
// .align 4
wquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
andb $0x7f,%bl // mask 0-127 texture index
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w1: movb %dl,0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w2: movb %dl,2*0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w3: movb %dl,3*0x12345678(%edi)
andb $0x7f,%bl
w4: addl $4*0x12345678,%edi
decb %ch
jnz wquadloop
wdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawSpanNoWrap
//
// Horizontal texture mapping, does not remap colors,
// neither needs to wrap around the source texture.
//
// Thus, a special optimisation can be used...
//
//----------------------------------------------------------------------
.data
advancetable: .long 0, 0
#if !defined( LINUX) && !defined( __OS2__)
.text
#endif
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_DrawSpanNoWrap)
C(R_DrawSpanNoWrap):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// find loop count
//
movl C(ds_x2),%eax
incl %eax
subl C(ds_x1),%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle htvdone // nothing to scale
// shrl $1,%eax // double pixel count
// movl %eax,C(loopcount)
//
// calculate screen dest
//
movl C(ds_y),%edi //full destination start address
//
// set up advancetable
//
movl C(ds_xstep),%ebp
movl C(ds_ystep),%ecx
movl %ecx,%eax
movl %ebp,%edx
sarl $16,%edx // xstep >>= 16;
movl C(vidwidth),%ebx
sarl $16,%eax // ystep >>= 16;
jz 0f
imull %ebx,%eax // (ystep >> 16) * texwidth;
0:
addl %edx,%eax // add in xstep
// (ystep >> 16) * texwidth + (xstep >> 16);
movl %eax,advancetable+4 // advance base in y
addl %ebx,%eax // ((ystep >> 16) + 1) * texwidth +
// (xstep >> 16);
movl %eax,advancetable // advance extra in y
shll $16,%ebp // left-justify xstep fractional part
movl %ebp,xstep
shll $16,%ecx // left-justify ystep fractional part
movl %ecx,ystep
//
// calculate the texture starting address
//
movl C(ds_source),%esi // texture source
movl C(ds_yfrac),%eax
movl %eax,%edx
sarl $16,%eax
movl C(ds_xfrac),%ecx
imull %ebx,%eax // (yfrac >> 16) * texwidth
movl %ecx,%ebx
sarl $16,%ecx
movl %ecx,%ebp
addl %eax,%ebp // source = (xfrac >> 16) +
// ((yfrac >> 16) * texwidth);
//
// esi : texture source
// edi : screen dest
// eax : colormap aligned on 256 boundary, hehehe...
// ebx : xfrac << 16
// ecx : used in loop, contains either 0 or -1, *4, offset into advancetable
// edx : yfrac << 16
// ebp : offset into texture
//
shll $16,%edx // yfrac upper word, lower byte will be used
movl C(ds_colormap),%eax
shll $16,%ebx // xfrac upper word, lower unused
movl C(pixelcount),%ecx
shrl $2,%ecx
movb %cl,%dh // quad pixels count
movl C(pixelcount),%ecx
andl $3,%ecx
jz htvquadloop // pixelcount is multiple of 4
decl %ecx
jz 1f
decl %ecx
jz 2f
//
// do one to three pixels first
//
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
2:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
1:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
//
// test if there was at least 4 pixels
//
testb $0xFF,%dh
jz htvdone
//
// two pixels per loop
// U
// V
htvquadloop:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,1(%edi)
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,2(%edi)
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,3(%edi)
addl $4, %edi
incl %ecx //dummy
decb %dh
jnz htvquadloop // paire dans V-pipe
htvdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//.endif
#ifdef HORIZONTALDRAW
// void R_RotateBuffere (void)
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_RotateBufferasm)
C(R_RotateBufferasm):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
movl C(dc_source),%esi
movl C(dc_colormap),%edi
movl $200,%edx
ra2:
movl $40,%ecx
ra:
movb -2*200(%esi),%al
movb -6*200(%esi),%bl
movb -3*200(%esi),%ah
movb -7*200(%esi),%bh
shll $16,%eax
shll $16,%ebx
movb (%esi),%al
movb -4*200(%esi),%bl
movb -1*200(%esi),%ah
movb -5*200(%esi),%bh
movl %eax,(%edi)
subl $8*200,%esi
movl %ebx,4(%edi)
addl $8,%edi
decl %ecx
jnz ra
addl $320*200+1,%esi //32*480 passe a la ligne suivante
// addl 320-32,%edi
decl %edx
jnz ra2
pop %ebp // preserve caller's stack frame pointer
pop %esi // preserve register variables
pop %edi
pop %ebx
ret
#endif