SRB2/src/tmap.s
2014-03-15 13:11:35 -04:00

1587 lines
42 KiB
ArmAsm
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2014 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file tmap.s
/// \brief optimised drawing routines for span/column rendering
// structures, must match the C structures!
#include "asm_defs.inc"
// Rappel: seuls EAX, ECX, EDX peuvent ˆtre crass librement.
// il faut sauver esi,edi, cd...gs
/* Attention aux comparaisons! */
/* */
/* Intel_compare: */
/* */
/* cmp A,B // A-B , set flags */
/* jg A_greater_than_B */
/* */
/* AT&T_compare: */
/* */
/* cmp A,B // B-A , set flags */
/* jg B_greater_than_A */
/* */
/* (soustrait l'oprande source DE l'oprande destination, */
/* comme sur Motorola! ) */
// RAPPEL: Intel
// SECTION:[BASE+INDEX*SCALE+DISP]
// devient SECTION:DISP(BASE,INDEX,SCALE)
//----------------------------------------------------------------------
//
// R_DrawColumn
//
// New optimised version 10-01-1998 by D.Fabrice and P.Boris
// TO DO: optimise it much farther... should take at most 3 cycles/pix
// once it's fixed, add code to patch the offsets so that it
// works in every screen width.
//
//----------------------------------------------------------------------
.data
#ifdef LINUX
.align 2
#else
.align 4
#endif
C(loopcount): .long 0
C(pixelcount): .long 0
C(tystep): .long 0
C(vidwidth): .long 0 //use this one out of the inner loops
//so you don't need to patch everywhere...
#ifdef USEASM
#if !defined( LINUX) && !defined( __OS2__)
.text
#endif
.globl C(ASM_PatchRowBytes)
C(ASM_PatchRowBytes):
pushl %ebp
movl %esp, %ebp // assure l'"adressabilit du stack"
movl ARG1, %edx // read first arg
movl %edx, C(vidwidth)
// 1 * vidwidth
movl %edx,p1+2
movl %edx,w1+2 //water
movl %edx,p1b+2 //sky
movl %edx,p5+2
movl %edx,sh5+2 //smokie test
// 2 * vidwidth
addl ARG1,%edx
movl %edx,p2+2
movl %edx,w2+2 //water
movl %edx,p2b+2 //sky
movl %edx,p6+2
movl %edx,p7+2
movl %edx,p8+2
movl %edx,p9+2
movl %edx,sh6+2 //smokie test
movl %edx,sh7+2
movl %edx,sh8+2
movl %edx,sh9+2
// 3 * vidwidth
addl ARG1,%edx
movl %edx,p3+2
movl %edx,w3+2 //water
movl %edx,p3b+2 //sky
// 4 * vidwidth
addl ARG1,%edx
movl %edx,p4+2
movl %edx,w4+2 //water
movl %edx,p4b+2 //sky
popl %ebp
ret
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawColumn_8)
C(R_DrawColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_colormap),%eax
testb $3,%dh
jz v4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
v4quadloop:
movb $0x7f,%dh // prep mask
// .align 4
vquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
andb $0x7f,%bl // mask 0-127 texture index
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p1: movb %dl,0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p2: movb %dl,2*0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p3: movb %dl,3*0x12345678(%edi)
andb $0x7f,%bl
p4: addl $4*0x12345678,%edi
decb %ch
jnz vquadloop
vdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
#ifdef HORIZONTALDRAW
// --------------------------------------------------------------------------
// Horizontal Column Drawer Optimisation
// --------------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawHColumn_8)
C(R_DrawHColumn_8):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
//
// dest = yhlookup[dc_x] + hcolumnofs[dc_yl];
//
movl C(dc_x),%ebx
movl C(yhlookup)(,%ebx,4),%edi
movl C(dc_yl),%ebp
movl %ebp,%ebx
addl C(hcolumnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vhdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
testb %ch, %ch
jz vhnearlydone
movl C(dc_colormap),%eax
decl %edi //-----
vhloop:
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
andb $0x7f,%bl
incl %edi //-----
movb (%eax),%dh
movb %dh,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
incl %edi //-----
adcb %cl,%bl
movb (%eax),%dl
andb $0x7f,%bl
movb %dl,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
// shll $16,%edx
andb $0x7f,%bl
incl %edi //-----
movb (%eax),%dh
movb %dh,(%edi) //-----
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
incl %edi //-----
adcb %cl,%bl
movb (%eax),%dl
andb $0x7f,%bl
movb %dl,(%edi)
// movl %edx,(%edi)
// addl $4,%edi
decb %ch
jnz vhloop
vhnearlydone:
// movl C(pixelcount)
vhdone:
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
// --------------------------------------------------------------------------
// Rotate a buffer 90 degree in clockwise order after horiz.col. draws
// --------------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_RotateBuffer)
C(R_RotateBuffer):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
movl C(dc_source),%esi
movl C(dc_colormap),%edi
movb (%esi),%ah
addl $200,%esi
movb (%ebx),%al
addl $200,%ebx
bswap %eax
movb (%esi),%ah
addl $200,%esi
movb (%ebx),%al
addl $200,%ebx
movl %eax,(%edi)
addl $4,%edi
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
#endif
//----------------------------------------------------------------------
//13-02-98:
// R_DrawSkyColumn : same as R_DrawColumn but:
//
// - wrap around 256 instead of 127.
// this is needed because we have a higher texture for mouselook,
// we need at least 200 lines for the sky.
//
// NOTE: the sky should never wrap, so it could use a faster method.
// for the moment, we'll still use a wrapping method...
//
// IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!!
//
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawSkyColumn_8)
C(R_DrawSkyColumn_8):
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vskydone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x000000ff,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_colormap),%eax
testb $3,%dh
jz v4skyquadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vskydone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
v4skyquadloop:
// .align 4
vskyquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p1b: movb %dl,0x12345678(%edi)
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p2b: movb %dl,2*0x12345678(%edi)
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
p3b: movb %dl,3*0x12345678(%edi)
p4b: addl $4*0x12345678,%edi
decb %ch
jnz vskyquadloop
vskydone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawSpan
//
// Horizontal texture mapping
//
//----------------------------------------------------------------------
.data
ystep: .long 0
xstep: .long 0
C(texwidth): .long 64 // texture width
#if !defined( LINUX) && !defined( __OS2__)
.text
#endif
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_DrawSpan_8)
C(R_DrawSpan_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// find loop count
//
movl C(ds_x2),%eax
incl %eax
subl C(ds_x1),%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
js hdone // nothing to scale
shrl $1,%eax // double pixel count
movl %eax,C(loopcount)
//
// build composite position
//
movl C(ds_xfrac),%ebp
shll $10,%ebp
andl $0x0ffff0000,%ebp
movl C(ds_yfrac),%eax
shrl $6,%eax
andl $0x0ffff,%eax
movl C(ds_y),%edi
orl %eax,%ebp
movl C(ds_source),%esi
//
// calculate screen dest
//
movl C(ylookup)(,%edi,4),%edi
movl C(ds_x1),%eax
addl C(columnofs)(,%eax,4),%edi
//
// build composite step
//
movl C(ds_xstep),%ebx
shll $10,%ebx
andl $0x0ffff0000,%ebx
movl C(ds_ystep),%eax
shrl $6,%eax
andl $0x0ffff,%eax
orl %eax,%ebx
//movl %eax,OFFSET hpatch1+2 // convice tasm to modify code...
movl %ebx,hpatch1+2
//movl %eax,OFFSET hpatch2+2 // convice tasm to modify code...
movl %ebx,hpatch2+2
movl %esi,hpatch3+2
movl %esi,hpatch4+2
// %eax aligned colormap
// %ebx aligned colormap
// %ecx,%edx scratch
// %esi virtual source
// %edi moving destination pointer
// %ebp frac
movl C(ds_colormap),%eax
// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
movl %ebp,%ecx
addl %ebx,%ebp // advance frac pointer
shrw $10,%cx
roll $6,%ecx
andl $4095,%ecx // finish calculation for third pixel
// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
movl %ebp,%edx
shrw $10,%dx
roll $6,%edx
addl %ebx,%ebp // advance frac pointer
andl $4095,%edx // finish calculation for fourth pixel
movl %eax,%ebx
movb (%esi,%ecx),%al // get first pixel
movb (%esi,%edx),%bl // get second pixel
testl $0x0fffffffe,C(pixelcount)
movb (%eax),%dl // color translate first pixel
// jnz hdoubleloop // at least two pixels to map
// jmp hchecklast
// movw $0xf0f0,%dx //see visplanes start
jz hchecklast
movb (%ebx),%dh // color translate second pixel
movl C(loopcount),%esi
// .align 4
hdoubleloop:
// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
movl %ebp,%ecx
shrw $10,%cx
roll $6,%ecx
hpatch1:
addl $0x012345678,%ebp // advance frac pointer
movw %dx,(%edi) // write first pixel
andl $4095,%ecx // finish calculation for third pixel
// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
movl %ebp,%edx
shrw $10,%dx
roll $6,%edx
hpatch3:
movb 0x012345678(%ecx),%al // get third pixel
// movb %bl,1(%edi) // write second pixel
andl $4095,%edx // finish calculation for fourth pixel
hpatch2:
addl $0x012345678,%ebp // advance frac pointer
hpatch4:
movb 0x012345678(%edx),%bl // get fourth pixel
movb (%eax),%dl // color translate third pixel
addl $2,%edi // advance to third pixel destination
decl %esi // done with loop?
movb (%ebx),%dh // color translate fourth pixel
jnz hdoubleloop
// check for final pixel
hchecklast:
testl $1,C(pixelcount)
jz hdone
movb %dl,(%edi) // write final pixel
hdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//.endif
//----------------------------------------------------------------------
// R_DrawTransColumn
//
// Vertical column texture drawer, with transparency. Replaces Doom2's
// 'fuzz' effect, which was not so beautiful.
// Transparency is always impressive in some way, don't know why...
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawTranslucentColumn_8)
C(R_DrawTranslucentColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle vtdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
pushw %cx
movl %edx,%ecx
popw %cx
movl C(dc_colormap),%edx
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
shrl $2,%eax
testb $0x03,C(pixelcount)
movb %al,%ch // quad count
movl C(dc_transmap),%eax
jz vt4quadloop
//
// do un-even pixel
//
testb $1,C(pixelcount)
jz 2f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,C(pixelcount)
jz 3f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%ecx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz vtdone
//
// tystep : ystep frac. upper 24 bits
// edx : upper 24 bit : colomap
// dl : tmp pixel to write
// ebx : y i. lower 7 bits, masked for index
// ecx : y frac. upper 16 bits
// ecx : ch = counter, cl = y step i.
// eax : transmap aligned 65535 (upper 16 bit)
// ah : background pixel (from the screen buffer)
// al : foreground pixel (from the texture)
// esi : source texture column
// ebp,edi : dest screen
//
vt4quadloop:
movb (%esi,%ebx),%ah // fetch texel : colormap number
p5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
movl %ebp,C(tystep)
movl %edi,%ebp
subl C(vidwidth),%edi
jmp inloop
// .align 4
vtquadloop:
addl C(tystep),%ecx
adcb %cl,%bl
p6: addl $2*0x12345678,%ebp
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
inloop:
addl C(tystep),%ecx
adcb %cl,%bl
p7: addl $2*0x12345678,%edi
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
addl C(tystep),%ecx
adcb %cl,%bl
p8: addl $2*0x12345678,%ebp
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
addl C(tystep),%ecx
adcb %cl,%bl
p9: addl $2*0x12345678,%edi
andb $0x7f,%bl
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb (%edx), %dl // use colormap now !
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
decb %ch
jnz vtquadloop
vtdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
#endif // ifdef USEASM
//----------------------------------------------------------------------
// R_DrawShadeColumn
//
// for smoke..etc.. test.
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawShadeColumn_8)
C(R_DrawShadeColumn_8):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle shdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(colormaps),%eax
testb $0x03,%dh
jz sh4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
movb (%esi,%ebx),%ah // fetch texel : colormap number
addl %ebp,%edx
adcb %cl,%bl
movb (%edi),%al // fetch dest : index into colormap
andb $0x7f,%bl
movb (%eax),%dl
movb %dl,(%edi)
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz shdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
sh4quadloop:
movb $0x7f,%dh // prep mask
movb (%esi,%ebx),%ah // fetch texel : colormap number
sh5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
movl %ebp,C(tystep)
movl %edi,%ebp
subl C(vidwidth),%edi
jmp shinloop
// .align 4
shquadloop:
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh6: addl $2*0x12345678,%ebp
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
shinloop:
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh7: addl $2*0x12345678,%edi
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh8: addl $2*0x12345678,%ebp
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%edi)
movb (%ebp),%al // fetch dest : index into colormap
addl C(tystep),%edx
adcb %cl,%bl
andb %dh,%bl
sh9: addl $2*0x12345678,%edi
movb (%eax),%dl
movb (%esi,%ebx),%ah // fetch texel : colormap number
movb %dl,(%ebp)
movb (%edi),%al // fetch dest : index into colormap
decb %ch
jnz shquadloop
shdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawWaterColumn : basically it's just a copy of R_DrawColumn,
// but it uses dc_colormap from dc_yl to dc_yw-1
// then it uses dc_wcolormap from dc_yw to dc_yh
//
// Thus, the 'underwater' part of the walls is remapped to 'water-like'
// colors.
//
//----------------------------------------------------------------------
#ifdef LINUX
.align 2
#else
.align 5
#endif
.globl C(R_DrawWaterColumn)
C(R_DrawWaterColumn):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// dest = ylookup[dc_yl] + columnofs[dc_x];
//
movl C(dc_yl),%ebp
movl %ebp,%ebx
movl C(ylookup)(,%ebx,4),%edi
movl C(dc_x),%ebx
addl C(columnofs)(,%ebx,4),%edi // edi = dest
//
// pixelcount = yh - yl + 1
//
movl C(dc_yh),%eax
incl %eax
subl %ebp,%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle wdone // nothing to scale
//
// frac = dc_texturemid - (centery-dc_yl)*fracstep;
//
movl C(dc_iscale),%ecx // fracstep
movl C(centery),%eax
subl %ebp,%eax
imul %ecx,%eax
movl C(dc_texturemid),%edx
subl %eax,%edx
movl %edx,%ebx
shrl $16,%ebx // frac int.
andl $0x0000007f,%ebx
shll $16,%edx // y frac up
movl %ecx,%ebp
shll $16,%ebp // fracstep f. up
shrl $16,%ecx // fracstep i. ->cl
andb $0x7f,%cl
movl C(dc_source),%esi
//
// lets rock :) !
//
movl C(pixelcount),%eax
movb %al,%dh
shrl $2,%eax
movb %al,%ch // quad count
movl C(dc_wcolormap),%eax
testb $3,%dh
jz w4quadloop
//
// do un-even pixel
//
testb $1,%dh
jz 2f
movb (%esi,%ebx),%al // prep un-even loops
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// do two non-quad-aligned pixels
//
2:
testb $2,%dh
jz 3f
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
movb %dl,(%edi) // output pixel
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
andb $0x7f,%bl // mask 0-127 texture index
addl C(vidwidth),%edi
movb %dl,(%edi) // output pixel
addl C(vidwidth),%edi
//
// test if there was at least 4 pixels
//
3:
testb $0xFF,%ch // test quad count
jz wdone
//
// ebp : ystep frac. upper 24 bits
// edx : y frac. upper 24 bits
// ebx : y i. lower 7 bits, masked for index
// ecx : ch = counter, cl = y step i.
// eax : colormap aligned 256
// esi : source texture column
// edi : dest screen
//
w4quadloop:
movb $0x7f,%dh // prep mask
// .align 4
wquadloop:
movb (%esi,%ebx),%al // prep loop
addl %ebp,%edx // ypos f += ystep f
adcb %cl,%bl // ypos i += ystep i
movb (%eax),%dl // colormap texel
movb %dl,(%edi) // output pixel
andb $0x7f,%bl // mask 0-127 texture index
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w1: movb %dl,0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w2: movb %dl,2*0x12345678(%edi)
andb $0x7f,%bl
movb (%esi,%ebx),%al // fetch source texel
addl %ebp,%edx
adcb %cl,%bl
movb (%eax),%dl
w3: movb %dl,3*0x12345678(%edi)
andb $0x7f,%bl
w4: addl $4*0x12345678,%edi
decb %ch
jnz wquadloop
wdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//----------------------------------------------------------------------
//
// R_DrawSpanNoWrap
//
// Horizontal texture mapping, does not remap colors,
// neither needs to wrap around the source texture.
//
// Thus, a special optimisation can be used...
//
//----------------------------------------------------------------------
.data
advancetable: .long 0, 0
#if !defined( LINUX) && !defined( __OS2__)
.text
#endif
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_DrawSpanNoWrap)
C(R_DrawSpanNoWrap):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
//
// find loop count
//
movl C(ds_x2),%eax
incl %eax
subl C(ds_x1),%eax // pixel count
movl %eax,C(pixelcount) // save for final pixel
jle htvdone // nothing to scale
// shrl $1,%eax // double pixel count
// movl %eax,C(loopcount)
//
// calculate screen dest
//
movl C(ds_y),%edi //full destination start address
//
// set up advancetable
//
movl C(ds_xstep),%ebp
movl C(ds_ystep),%ecx
movl %ecx,%eax
movl %ebp,%edx
sarl $16,%edx // xstep >>= 16;
movl C(vidwidth),%ebx
sarl $16,%eax // ystep >>= 16;
jz 0f
imull %ebx,%eax // (ystep >> 16) * texwidth;
0:
addl %edx,%eax // add in xstep
// (ystep >> 16) * texwidth + (xstep >> 16);
movl %eax,advancetable+4 // advance base in y
addl %ebx,%eax // ((ystep >> 16) + 1) * texwidth +
// (xstep >> 16);
movl %eax,advancetable // advance extra in y
shll $16,%ebp // left-justify xstep fractional part
movl %ebp,xstep
shll $16,%ecx // left-justify ystep fractional part
movl %ecx,ystep
//
// calculate the texture starting address
//
movl C(ds_source),%esi // texture source
movl C(ds_yfrac),%eax
movl %eax,%edx
sarl $16,%eax
movl C(ds_xfrac),%ecx
imull %ebx,%eax // (yfrac >> 16) * texwidth
movl %ecx,%ebx
sarl $16,%ecx
movl %ecx,%ebp
addl %eax,%ebp // source = (xfrac >> 16) +
// ((yfrac >> 16) * texwidth);
//
// esi : texture source
// edi : screen dest
// eax : colormap aligned on 256 boundary, hehehe...
// ebx : xfrac << 16
// ecx : used in loop, contains either 0 or -1, *4, offset into advancetable
// edx : yfrac << 16
// ebp : offset into texture
//
shll $16,%edx // yfrac upper word, lower byte will be used
movl C(ds_colormap),%eax
shll $16,%ebx // xfrac upper word, lower unused
movl C(pixelcount),%ecx
shrl $2,%ecx
movb %cl,%dh // quad pixels count
movl C(pixelcount),%ecx
andl $3,%ecx
jz htvquadloop // pixelcount is multiple of 4
decl %ecx
jz 1f
decl %ecx
jz 2f
//
// do one to three pixels first
//
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
2:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
1:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
incl %edi
//
// test if there was at least 4 pixels
//
testb $0xFF,%dh
jz htvdone
//
// two pixels per loop
// U
// V
htvquadloop:
addl ystep,%edx // yfrac += ystep
sbbl %ecx,%ecx // turn carry into 0 or -1 if set
movb (%esi,%ebp),%al // get texture pixel
addl xstep,%ebx // xfrac += xstep
// movb (%eax),%dl // pixel goes through colormap
adcl advancetable+4(,%ecx,4),%ebp // advance source
movb %al,(%edi) // write pixel dest
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,1(%edi)
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,2(%edi)
addl ystep,%edx
sbbl %ecx,%ecx
movb (%esi,%ebp),%al
addl xstep,%ebx
// movb (%eax),%dl
adcl advancetable+4(,%ecx,4),%ebp
movb %al,3(%edi)
addl $4, %edi
incl %ecx //dummy
decb %dh
jnz htvquadloop // paire dans V-pipe
htvdone:
popl %ebx // restore register variables
popl %edi
popl %esi
popl %ebp // restore caller's stack frame pointer
ret
//.endif
#ifdef HORIZONTALDRAW
// void R_RotateBuffere (void)
#ifdef LINUX
.align 2
#else
.align 4
#endif
.globl C(R_RotateBufferasm)
C(R_RotateBufferasm):
pushl %ebp // preserve caller's stack frame pointer
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
movl C(dc_source),%esi
movl C(dc_colormap),%edi
movl $200,%edx
ra2:
movl $40,%ecx
ra:
movb -2*200(%esi),%al
movb -6*200(%esi),%bl
movb -3*200(%esi),%ah
movb -7*200(%esi),%bh
shll $16,%eax
shll $16,%ebx
movb (%esi),%al
movb -4*200(%esi),%bl
movb -1*200(%esi),%ah
movb -5*200(%esi),%bh
movl %eax,(%edi)
subl $8*200,%esi
movl %ebx,4(%edi)
addl $8,%edi
decl %ecx
jnz ra
addl $320*200+1,%esi //32*480 passe a la ligne suivante
// addl 320-32,%edi
decl %edx
jnz ra2
pop %ebp // preserve caller's stack frame pointer
pop %esi // preserve register variables
pop %edi
pop %ebx
ret
#endif