mirror of
https://git.do.srb2.org/STJr/SRB2.git
synced 2024-11-18 18:41:57 +00:00
323 lines
8 KiB
ArmAsm
323 lines
8 KiB
ArmAsm
|
// SONIC ROBO BLAST 2
|
||
|
//-----------------------------------------------------------------------------
|
||
|
// Copyright (C) 1998-2000 by DooM Legacy Team.
|
||
|
// Copyright (C) 1999-2014 by Sonic Team Junior.
|
||
|
//
|
||
|
// This program is free software distributed under the
|
||
|
// terms of the GNU General Public License, version 2.
|
||
|
// See the 'LICENSE' file for more details.
|
||
|
//-----------------------------------------------------------------------------
|
||
|
/// \file tmap_asm.s
|
||
|
/// \brief ???
|
||
|
|
||
|
//.comm _dc_colormap,4
|
||
|
//.comm _dc_x,4
|
||
|
//.comm _dc_yl,4
|
||
|
//.comm _dc_yh,4
|
||
|
//.comm _dc_iscale,4
|
||
|
//.comm _dc_texturemid,4
|
||
|
//.comm _dc_source,4
|
||
|
//.comm _ylookup,4
|
||
|
//.comm _columnofs,4
|
||
|
//.comm _loopcount,4
|
||
|
//.comm _pixelcount,4
|
||
|
.data
|
||
|
_pixelcount:
|
||
|
.long 0x00000000
|
||
|
_loopcount:
|
||
|
.long 0x00000000
|
||
|
.align 8
|
||
|
_mmxcomm:
|
||
|
.long 0x00000000
|
||
|
.text
|
||
|
|
||
|
.align 4
|
||
|
.globl _R_DrawColumn8_NOMMX
|
||
|
_R_DrawColumn8_NOMMX:
|
||
|
pushl %ebp
|
||
|
pushl %esi
|
||
|
pushl %edi
|
||
|
pushl %ebx
|
||
|
movl _dc_yl,%edx
|
||
|
movl _dc_yh,%eax
|
||
|
subl %edx,%eax
|
||
|
leal 1(%eax),%ebx
|
||
|
testl %ebx,%ebx
|
||
|
jle rdc8ndone
|
||
|
movl _dc_x,%eax
|
||
|
movl _ylookup, %edi
|
||
|
movl (%edi,%edx,4),%esi
|
||
|
movl _columnofs, %edi
|
||
|
addl (%edi,%eax,4),%esi
|
||
|
movl _dc_iscale,%edi
|
||
|
movl %edx,%eax
|
||
|
imull %edi,%eax
|
||
|
movl _dc_texturemid,%ecx
|
||
|
addl %eax,%ecx
|
||
|
|
||
|
movl _dc_source,%ebp
|
||
|
xorl %edx, %edx
|
||
|
subl $0x12345678, %esi
|
||
|
.globl rdc8nwidth1
|
||
|
rdc8nwidth1:
|
||
|
.align 4,0x90
|
||
|
rdc8nloop:
|
||
|
movl %ecx,%eax
|
||
|
shrl $16,%eax
|
||
|
addl %edi,%ecx
|
||
|
andl $127,%eax
|
||
|
addl $0x12345678,%esi
|
||
|
.globl rdc8nwidth2
|
||
|
rdc8nwidth2:
|
||
|
movb (%eax,%ebp),%dl
|
||
|
movl _dc_colormap,%eax
|
||
|
movb (%eax,%edx),%al
|
||
|
movb %al,(%esi)
|
||
|
decl %ebx
|
||
|
jne rdc8nloop
|
||
|
rdc8ndone:
|
||
|
popl %ebx
|
||
|
popl %edi
|
||
|
popl %esi
|
||
|
popl %ebp
|
||
|
ret
|
||
|
|
||
|
//
|
||
|
// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
|
||
|
// By ES 1998/08/01
|
||
|
//
|
||
|
|
||
|
.globl _R_DrawColumn_8_Pentium
|
||
|
_R_DrawColumn_8_Pentium:
|
||
|
pushl %ebp
|
||
|
pushl %ebx
|
||
|
pushl %esi
|
||
|
pushl %edi
|
||
|
movl _dc_yl,%eax // Top pixel
|
||
|
movl _dc_yh,%ebx // Bottom pixel
|
||
|
movl _ylookup, %edi
|
||
|
movl (%edi,%ebx,4),%ecx
|
||
|
subl %eax,%ebx // ebx=number of pixels-1
|
||
|
jl rdc8pdone // no pixel to draw, done
|
||
|
jnz rdc8pmany
|
||
|
movl _dc_x,%edx // Special case: only one pixel
|
||
|
movl _columnofs, %edi
|
||
|
addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
|
||
|
movl _dc_iscale,%esi
|
||
|
imull %esi,%eax
|
||
|
movl _dc_texturemid,%edi
|
||
|
addl %eax,%edi // texture index in edi
|
||
|
movl _dc_colormap,%edx
|
||
|
shrl $16, %edi
|
||
|
movl _dc_source,%ebp
|
||
|
andl $127,%edi
|
||
|
movb (%edi,%ebp),%dl // read texture pixel
|
||
|
movb (%edx),%al // lookup for light
|
||
|
movb %al,0(%ecx) // write it
|
||
|
jmp rdc8pdone // done!
|
||
|
.align 4, 0x90
|
||
|
rdc8pmany: // draw >1 pixel
|
||
|
movl _dc_x,%edx
|
||
|
movl _columnofs, %edi
|
||
|
movl (%edi,%edx,4),%edx
|
||
|
leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom
|
||
|
.globl rdc8pwidth5
|
||
|
rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH
|
||
|
movl _dc_iscale,%edx // edx = fracstep
|
||
|
imull %edx,%eax
|
||
|
shll $9, %edx // fixme: Should get 7.25 fix as input
|
||
|
movl _dc_texturemid,%ecx
|
||
|
addl %eax,%ecx // ecx = frac
|
||
|
movl _dc_colormap,%eax // eax = lighting/special effects LUT
|
||
|
shll $9, %ecx
|
||
|
movl _dc_source,%esi // esi = source ptr
|
||
|
|
||
|
imull $0x12345678, %ebx // ebx = negative offset to pixel
|
||
|
.globl rdc8pwidth6
|
||
|
rdc8pwidth6: // DeadBeef = -SCREENWIDTH
|
||
|
|
||
|
// Begin the calculation of the two first pixels
|
||
|
leal (%ecx, %edx), %ebp
|
||
|
shrl $25, %ecx
|
||
|
movb (%esi, %ecx), %al
|
||
|
leal (%edx, %ebp), %ecx
|
||
|
shrl $25, %ebp
|
||
|
movb (%eax), %dl
|
||
|
|
||
|
// The main loop
|
||
|
rdc8ploop:
|
||
|
movb (%esi,%ebp), %al // load 1
|
||
|
leal (%ecx, %edx), %ebp // calc frac 3
|
||
|
|
||
|
shrl $25, %ecx // shift frac 2
|
||
|
movb %dl, 0x12345678(%edi, %ebx)// store 0
|
||
|
.globl rdc8pwidth1
|
||
|
rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH
|
||
|
|
||
|
movb (%eax), %al // lookup 1
|
||
|
|
||
|
movb %al, 0x12345678(%edi, %ebx)// store 1
|
||
|
.globl rdc8pwidth2
|
||
|
rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH
|
||
|
movb (%esi, %ecx), %al // load 2
|
||
|
|
||
|
leal (%ebp, %edx), %ecx // calc frac 4
|
||
|
|
||
|
shrl $25, %ebp // shift frac 3
|
||
|
movb (%eax), %dl // lookup 2
|
||
|
|
||
|
addl $0x12345678, %ebx // counter
|
||
|
.globl rdc8pwidth3
|
||
|
rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH
|
||
|
jl rdc8ploop // loop
|
||
|
|
||
|
// End of loop. Write extra pixel or just exit.
|
||
|
jnz rdc8pdone
|
||
|
movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
|
||
|
.globl rdc8pwidth4
|
||
|
rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH
|
||
|
|
||
|
rdc8pdone:
|
||
|
|
||
|
popl %edi
|
||
|
popl %esi
|
||
|
popl %ebx
|
||
|
popl %ebp
|
||
|
ret
|
||
|
|
||
|
//
|
||
|
// MMX asm version, optimised for K6
|
||
|
// By ES 1998/07/05
|
||
|
//
|
||
|
|
||
|
.globl _R_DrawColumn_8_K6_MMX
|
||
|
_R_DrawColumn_8_K6_MMX:
|
||
|
pushl %ebp
|
||
|
pushl %ebx
|
||
|
pushl %esi
|
||
|
pushl %edi
|
||
|
|
||
|
movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
|
||
|
andl $7,%eax
|
||
|
addl $8,%eax
|
||
|
movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
|
||
|
subl %eax,%esp
|
||
|
|
||
|
movl _dc_yl,%edx // Top pixel
|
||
|
movl _dc_yh,%ebx // Bottom pixel
|
||
|
movl _ylookup, %edi
|
||
|
movl (%edi,%ebx,4),%ecx
|
||
|
subl %edx,%ebx // ebx=number of pixels-1
|
||
|
jl 0x12345678 // no pixel to draw, done
|
||
|
.globl rdc8moffs1
|
||
|
rdc8moffs1:
|
||
|
jnz rdc8mmany
|
||
|
movl _dc_x,%eax // Special case: only one pixel
|
||
|
movl _columnofs, %edi
|
||
|
addl (%edi,%eax,4),%ecx // dest pixel at (%ecx)
|
||
|
movl _dc_iscale,%esi
|
||
|
imull %esi,%edx
|
||
|
movl _dc_texturemid,%edi
|
||
|
addl %edx,%edi // texture index in edi
|
||
|
movl _dc_colormap,%edx
|
||
|
shrl $16, %edi
|
||
|
movl _dc_source,%ebp
|
||
|
andl $127,%edi
|
||
|
movb (%edi,%ebp),%dl // read texture pixel
|
||
|
movb (%edx),%al // lookup for light
|
||
|
movb %al,0(%ecx) // write it
|
||
|
jmp rdc8mdone // done!
|
||
|
.globl rdc8moffs2
|
||
|
rdc8moffs2:
|
||
|
.align 4, 0x90
|
||
|
rdc8mmany: // draw >1 pixel
|
||
|
movl _dc_x,%eax
|
||
|
movl _columnofs, %edi
|
||
|
movl (%edi,%eax,4),%eax
|
||
|
leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom
|
||
|
.globl rdc8mwidth3
|
||
|
rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH
|
||
|
movl _dc_iscale,%ecx // ecx = fracstep
|
||
|
imull %ecx,%edx
|
||
|
shll $9, %ecx // fixme: Should get 7.25 fix as input
|
||
|
movl _dc_texturemid,%eax
|
||
|
addl %edx,%eax // eax = frac
|
||
|
movl _dc_colormap,%edx // edx = lighting/special effects LUT
|
||
|
shll $9, %eax
|
||
|
leal (%ecx, %ecx), %edi
|
||
|
movl _dc_source,%ebp // ebp = source ptr
|
||
|
movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs
|
||
|
|
||
|
imull $0x12345678, %ebx // ebx = negative offset to pixel
|
||
|
.globl rdc8mwidth5
|
||
|
rdc8mwidth5: // DeadBeef = -SCREENWIDTH
|
||
|
|
||
|
movl %edi, 4(%esp)
|
||
|
leal (%eax, %ecx), %edi
|
||
|
movq 0(%esp), %mm1 // fracstep:fracstep in mm1
|
||
|
movl %eax, 0(%esp)
|
||
|
shrl $25, %eax
|
||
|
movl %edi, 4(%esp)
|
||
|
movzbl (%ebp, %eax), %eax
|
||
|
movq 0(%esp), %mm0 // frac:frac in mm0
|
||
|
|
||
|
paddd %mm1, %mm0
|
||
|
shrl $25, %edi
|
||
|
movq %mm0, %mm2
|
||
|
psrld $25, %mm2 // texture index in mm2
|
||
|
paddd %mm1, %mm0
|
||
|
movq %mm2, 0(%esp)
|
||
|
|
||
|
.globl rdc8mloop
|
||
|
rdc8mloop: // The main loop
|
||
|
movq %mm0, %mm2 // move 4-5 to temp reg
|
||
|
movzbl (%ebp, %edi), %edi // read 1
|
||
|
|
||
|
psrld $25, %mm2 // shift 4-5
|
||
|
movb (%edx,%eax), %cl // lookup 0
|
||
|
|
||
|
movl 0(%esp), %eax // load 2
|
||
|
addl $0x12345678, %ebx // counter
|
||
|
.globl rdc8mwidth2
|
||
|
rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH
|
||
|
|
||
|
movb %cl, (%esi, %ebx) // write 0
|
||
|
movb (%edx,%edi), %ch // lookup 1
|
||
|
|
||
|
movb %ch, 0x12345678(%esi, %ebx) // write 1
|
||
|
.globl rdc8mwidth1
|
||
|
rdc8mwidth1: // DeadBeef = SCREENWIDTH
|
||
|
movl 4(%esp), %edi // load 3
|
||
|
|
||
|
paddd %mm1, %mm0 // frac 6-7
|
||
|
movzbl (%ebp, %eax), %eax // lookup 2
|
||
|
|
||
|
movq %mm2, 0(%esp) // store texture index 4-5
|
||
|
jl rdc8mloop
|
||
|
|
||
|
jnz rdc8mno_odd
|
||
|
movb (%edx,%eax), %cl // write the last odd pixel
|
||
|
movb %cl, 0x12345678(%esi)
|
||
|
.globl rdc8mwidth4
|
||
|
rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH
|
||
|
rdc8mno_odd:
|
||
|
|
||
|
.globl rdc8mdone
|
||
|
rdc8mdone:
|
||
|
emms
|
||
|
|
||
|
addl _mmxcomm, %esp
|
||
|
popl %edi
|
||
|
popl %esi
|
||
|
popl %ebx
|
||
|
popl %ebp
|
||
|
ret
|
||
|
|
||
|
// Need some extra space to align run-time
|
||
|
.globl R_DrawColumn_8_K6_MMX_end
|
||
|
R_DrawColumn_8_K6_MMX_end:
|
||
|
nop;nop;nop;nop;nop;nop;nop;nop;
|
||
|
nop;nop;nop;nop;nop;nop;nop;nop;
|
||
|
nop;nop;nop;nop;nop;nop;nop;nop;
|
||
|
nop;nop;nop;nop;nop;nop;nop;
|