mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-08 14:00:43 +00:00
1063a98c47
SVN r2537 (trunk)
640 lines
16 KiB
NASM
640 lines
16 KiB
NASM
;*
|
|
;* tmap2.nas
|
|
;* The tilted plane inner loop.
|
|
;*
|
|
;*---------------------------------------------------------------------------
|
|
;* Copyright 1998-2006 Randy Heit
|
|
;* All rights reserved.
|
|
;*
|
|
;* Redistribution and use in source and binary forms, with or without
|
|
;* modification, are permitted provided that the following conditions
|
|
;* are met:
|
|
;*
|
|
;* 1. Redistributions of source code must retain the above copyright
|
|
;* notice, this list of conditions and the following disclaimer.
|
|
;* 2. Redistributions in binary form must reproduce the above copyright
|
|
;* notice, this list of conditions and the following disclaimer in the
|
|
;* documentation and/or other materials provided with the distribution.
|
|
;* 3. The name of the author may not be used to endorse or promote products
|
|
;* derived from this software without specific prior written permission.
|
|
;*
|
|
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
;*---------------------------------------------------------------------------
|
|
;*
|
|
;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was
|
|
;* actually slightly slower than the more straight-forward approach
|
|
;* used here, probably because the trick requires too much setup time.
|
|
;*
|
|
|
|
BITS 32
|
|
|
|
%include "valgrind.inc"
|
|
|
|
%define SPACEFILLER4 (0x44444444)
|
|
|
|
%ifndef M_TARGET_LINUX
|
|
|
|
%define plane_sz _plane_sz
|
|
%define plane_su _plane_su
|
|
%define plane_sv _plane_sv
|
|
%define plane_shade _plane_shade
|
|
%define planelightfloat _planelightfloat
|
|
%define spanend _spanend
|
|
%define ylookup _ylookup
|
|
%define dc_destorg _dc_destorg
|
|
%define ds_colormap _ds_colormap
|
|
%define ds_source _ds_source
|
|
%define centery _centery
|
|
%define centerx _centerx
|
|
%define ds_curtiltedsource _ds_curtiltedsource
|
|
%define pviewx _pviewx
|
|
%define pviewy _pviewy
|
|
%define tiltlighting _tiltlighting
|
|
|
|
%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM
|
|
%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM
|
|
%define R_CalcTiltedLighting _R_CalcTiltedLighting
|
|
|
|
%endif
|
|
|
|
EXTERN plane_sz
|
|
EXTERN plane_su
|
|
EXTERN plane_sv
|
|
EXTERN planelightfloat
|
|
EXTERN spanend
|
|
EXTERN ylookup
|
|
EXTERN dc_destorg
|
|
EXTERN ds_colormap
|
|
EXTERN centery
|
|
EXTERN centerx
|
|
EXTERN ds_source
|
|
EXTERN plane_shade
|
|
EXTERN pviewx
|
|
EXTERN pviewy
|
|
EXTERN tiltlighting
|
|
EXTERN R_CalcTiltedLighting
|
|
|
|
GLOBAL ds_curtiltedsource
|
|
|
|
%define sv_i plane_sv
|
|
%define sv_j plane_sv+4
|
|
%define sv_k plane_sv+8
|
|
|
|
%define su_i plane_su
|
|
%define su_j plane_su+4
|
|
%define su_k plane_su+8
|
|
|
|
%define sz_i plane_sz
|
|
%define sz_j plane_sz+4
|
|
%define sz_k plane_sz+8
|
|
|
|
%define SPANBITS 3
|
|
|
|
section .bss
|
|
|
|
start_u: resq 1
|
|
start_v: resq 1
|
|
step_u: resq 1
|
|
step_v: resq 1
|
|
|
|
step_iz: resq 1
|
|
step_uz: resq 1
|
|
step_vz: resq 1
|
|
|
|
end_z: resd 1
|
|
|
|
section .data
|
|
|
|
ds_curtiltedsource: dd SPACEFILLER4
|
|
|
|
fp_1:
|
|
spanrecips: dd 0x3f800000 ; 1/1
|
|
dd 0x3f000000 ; 1/2
|
|
dd 0x3eaaaaab ; 1/3
|
|
dd 0x3e800000 ; 1/4
|
|
dd 0x3e4ccccd ; 1/5
|
|
dd 0x3e2aaaab ; 1/6
|
|
dd 0x3e124925 ; 1/7
|
|
fp_8recip: dd 0x3e000000 ; 1/8
|
|
dd 0x3de38e39 ; 1/9
|
|
dd 0x3dcccccd ; 1/10
|
|
dd 0x3dba2e8c ; 1/11
|
|
dd 0x3daaaaab ; 1/12
|
|
dd 0x3d9d89d9 ; 1/13
|
|
dd 0x3d924925 ; 1/14
|
|
dd 0x3d888889 ; 1/15
|
|
|
|
fp_quickint: dd 0x3f800000 ; 1
|
|
dd 0x40000000 ; 2
|
|
dd 0x40400000 ; 3
|
|
dd 0x40800000 ; 4
|
|
dd 0x40a00000 ; 5
|
|
dd 0x40c00000 ; 6
|
|
dd 0x40e00000 ; 7
|
|
fp_8: dd 0x41000000 ; 8
|
|
|
|
section .text
|
|
|
|
GLOBAL R_SetTiltedSpanSource_ASM
|
|
GLOBAL @R_SetTiltedSpanSource_ASM@4
|
|
|
|
R_SetTiltedSpanSource_ASM:
|
|
mov ecx,[esp+4]
|
|
|
|
@R_SetTiltedSpanSource_ASM@4:
|
|
mov [fetch1+3],ecx
|
|
mov [fetch2+3],ecx
|
|
mov [fetch3+3],ecx
|
|
mov [fetch4+3],ecx
|
|
mov [fetch5+3],ecx
|
|
mov [fetch6+3],ecx
|
|
mov [fetch7+3],ecx
|
|
mov [fetch8+3],ecx
|
|
mov [fetch9+3],ecx
|
|
mov [fetch10+3],ecx
|
|
mov [ds_curtiltedsource],ecx
|
|
selfmod rtext_start, rtext_end
|
|
ret
|
|
|
|
GLOBAL SetTiltedSpanSize
|
|
|
|
SetTiltedSpanSize:
|
|
push ecx
|
|
mov cl,dl
|
|
neg cl
|
|
mov eax,1
|
|
shl eax,cl
|
|
mov cl,[esp]
|
|
neg cl
|
|
mov [x1+2],cl
|
|
mov [x2+2],cl
|
|
mov [x3+2],cl
|
|
mov [x4+2],cl
|
|
mov [x5+2],cl
|
|
mov [x6+2],cl
|
|
mov [x7+2],cl
|
|
mov [x8+2],cl
|
|
mov [x9+2],cl
|
|
mov [x10+2],cl
|
|
|
|
sub cl,dl
|
|
dec eax
|
|
mov [y1+2],cl
|
|
mov [y2+2],cl
|
|
mov [y3+2],cl
|
|
mov [y4+2],cl
|
|
mov [y5+2],cl
|
|
mov [y6+2],cl
|
|
mov [y7+2],cl
|
|
mov [y8+2],cl
|
|
mov [y9+2],cl
|
|
mov [y10+2],cl
|
|
not eax
|
|
pop ecx
|
|
|
|
mov [m1+2],eax
|
|
mov [m2+2],eax
|
|
mov [m3+2],eax
|
|
mov [m4+2],eax
|
|
mov [m5+2],eax
|
|
mov [m6+2],eax
|
|
mov [m7+2],eax
|
|
mov [m8+2],eax
|
|
mov [m9+2],eax
|
|
mov [m10+2],eax
|
|
|
|
selfmod rtext_start, rtext_end
|
|
|
|
ret
|
|
|
|
%ifndef M_TARGET_MACHO
|
|
SECTION .rtext progbits alloc exec write align=64
|
|
%else
|
|
SECTION .text align=64
|
|
GLOBAL _rtext_tmap2_start
|
|
_rtext_tmap2_start:
|
|
%endif
|
|
|
|
rtext_start:
|
|
|
|
GLOBAL R_DrawTiltedPlane_ASM
|
|
GLOBAL @R_DrawTiltedPlane_ASM@8
|
|
|
|
R_DrawTiltedPlane_ASM:
|
|
mov ecx,[esp+4]
|
|
mov edx,[esp+8]
|
|
|
|
; ecx = y
|
|
; edx = x
|
|
|
|
@R_DrawTiltedPlane_ASM@8:
|
|
push ebx
|
|
push esi
|
|
push edi
|
|
push ebp
|
|
|
|
mov eax,[centery]
|
|
movzx ebx,word [spanend+ecx*2]
|
|
sub eax,ecx ; eax = centery-y
|
|
sub ebx,edx ; ebx = span length - 1
|
|
mov edi,[ylookup+ecx*4]
|
|
push eax
|
|
add edi,[dc_destorg]
|
|
add edi,edx ; edi = frame buffer pointer
|
|
sub edx,[centerx] ; edx = x-centerx
|
|
push edx
|
|
xor eax,eax
|
|
|
|
fild dword [esp+4] ; ymul
|
|
fild dword [esp] ; xmul | ymul
|
|
fld dword [sv_j] ; sv.j | xmul | ymul
|
|
fmul st0,st2 ; sv.j*ymul | xmul | ymul
|
|
fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul
|
|
fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul
|
|
fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul
|
|
fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul
|
|
faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
|
fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
|
fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul
|
|
fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul
|
|
fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z
|
|
fadd dword [su_k] ; u/z | v/z | 1/z
|
|
fxch st2 ; 1/z | v/z | u/z
|
|
fxch st1 ; v/z | 1/z | u/z
|
|
|
|
; if lighting is on, fill out the light table
|
|
mov al,[plane_shade]
|
|
test al,al
|
|
jz .litup
|
|
|
|
push ebx
|
|
fild dword [esp] ; width | v/z | 1/z | u/z
|
|
fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z
|
|
fadd st0,st2 ; 1/endz | v/z | 1/z | u/z
|
|
fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z
|
|
fmul dword [planelightfloat]
|
|
fxch st1
|
|
fmul dword [planelightfloat]
|
|
sub esp,8
|
|
fistp dword [esp]
|
|
fistp dword [esp+4]
|
|
call R_CalcTiltedLighting
|
|
add esp, 12
|
|
xor eax, eax
|
|
|
|
.litup add esp, 8
|
|
|
|
; calculate initial z, u, and v values
|
|
fld st1 ; 1/z | v/z | 1/z | u/z
|
|
fdivr dword [fp_1] ; z | v/z | 1/z | u/z
|
|
|
|
fld st3 ; u/z | z | v/z | 1/z | u/z
|
|
fmul st0,st1 ; u | z | v/z | 1/z | u/z
|
|
fld st2 ; v/z | u | z | v/z | 1/z | u/z
|
|
fmulp st2,st0 ; u | v | v/z | 1/z | u/z
|
|
fld st0
|
|
fistp qword [start_u]
|
|
fld st1
|
|
fistp qword [start_v]
|
|
|
|
cmp ebx,7 ; Do we have at least 8 pixels to plot?
|
|
jl near ShortStrip
|
|
|
|
; yes, we do, so figure out tex coords at end of this span
|
|
|
|
; multiply i values by span length (8)
|
|
fld dword [su_i] ; su.i
|
|
fmul dword [fp_8] ; su.i*8
|
|
fld dword [sv_i] ; sv.i | su.i*8
|
|
fmul dword [fp_8] ; sv.i*8 | su.i*8
|
|
fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8
|
|
fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8
|
|
fxch st2 ; su.i*8 | sv.i*8 | sz.i*8
|
|
fstp qword [step_uz] ; sv.i*8 | sz.i*8
|
|
fstp qword [step_vz] ; sz.i*8
|
|
fst qword [step_iz] ; sz.i*8
|
|
|
|
; find tex coords at start of next span
|
|
faddp st4
|
|
fld qword [step_vz]
|
|
faddp st3
|
|
fld qword [step_uz]
|
|
faddp st5
|
|
|
|
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
|
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
|
fst dword [end_z]
|
|
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
|
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
|
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
|
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
|
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
|
|
|
; now subtract to get stepping values for this span
|
|
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
|
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
|
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
|
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
|
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
|
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
|
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
|
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
|
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
|
|
|
FullSpan:
|
|
xor eax,eax
|
|
cmp ebx,15 ; is there another complete span after this one?
|
|
jl NextIsShort
|
|
|
|
; there is a complete span after this one
|
|
fld qword [step_iz]
|
|
faddp st4,st0
|
|
fld qword [step_vz]
|
|
faddp st3,st0
|
|
fld qword [step_uz]
|
|
faddp st5,st0
|
|
jmp StartDiv
|
|
|
|
NextIsShort:
|
|
cmp ebx,8 ; if next span is no more than 1 pixel, then we already
|
|
jle DrawFullSpan ; know everything we need to draw it
|
|
|
|
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
|
fmul dword [fp_quickint-8*4+ebx*4]
|
|
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
fmul dword [fp_quickint-8*4+ebx*4]
|
|
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
fmul dword [fp_quickint-8*4+ebx*4]
|
|
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
|
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
|
|
|
StartDiv:
|
|
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
|
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
|
|
|
DrawFullSpan:
|
|
mov ecx,[start_v]
|
|
mov edx,[start_u]
|
|
|
|
add ecx,[pviewy]
|
|
add edx,[pviewx]
|
|
|
|
mov esi,edx
|
|
mov ebp,ecx
|
|
x1 shr ebp,26
|
|
m1 and esi,0xfc000000
|
|
y1 shr esi,20
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
fetch1 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4]
|
|
mov esi,edx
|
|
mov al,[ebp+eax]
|
|
mov ebp,ecx
|
|
mov [edi+0],al
|
|
|
|
x2 shr ebp,26
|
|
m2 and esi,0xfc000000
|
|
y2 shr esi,20
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
fetch2 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4-4]
|
|
mov esi,edx
|
|
mov al,[ebp+eax]
|
|
mov ebp,ecx
|
|
mov [edi+1],al
|
|
|
|
x3 shr ebp,26
|
|
m3 and esi,0xfc000000
|
|
y3 shr esi,20
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
fetch3 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4-8]
|
|
mov esi,edx
|
|
mov al,[ebp+eax]
|
|
mov ebp,ecx
|
|
mov [edi+2],al
|
|
|
|
x4 shr ebp,26
|
|
m4 and esi,0xfc000000
|
|
y4 shr esi,20
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
fetch4 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4-12]
|
|
mov esi,edx
|
|
mov al,[ebp+eax]
|
|
mov ebp,ecx
|
|
mov [edi+3],al
|
|
|
|
x5 shr ebp,26
|
|
m5 and esi,0xfc000000
|
|
y5 shr esi,20
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
fetch5 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4-16]
|
|
mov esi,edx
|
|
mov al,[ebp+eax]
|
|
mov ebp,ecx
|
|
mov [edi+4],al
|
|
|
|
x6 shr ebp,26
|
|
m6 and esi,0xfc000000
|
|
y6 shr esi,20
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
fetch6 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4-20]
|
|
mov esi,edx
|
|
mov al,[ebp+eax]
|
|
mov ebp,ecx
|
|
mov [edi+5],al
|
|
|
|
x7 shr ebp,26
|
|
m7 and esi,0xfc000000
|
|
y7 shr esi,20
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
fetch7 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4-24]
|
|
x8 shr ecx,26
|
|
mov al,[ebp+eax]
|
|
m8 and edx,0xfc000000
|
|
mov [edi+6],al
|
|
|
|
y8 shr edx,20
|
|
mov ebp,[tiltlighting+ebx*4-28]
|
|
fetch8 mov al,[edx+ecx+SPACEFILLER4]
|
|
mov al,[ebp+eax]
|
|
mov [edi+7],al
|
|
add edi,8
|
|
|
|
sub ebx,8
|
|
jl near Done
|
|
|
|
fld st1
|
|
fistp qword [start_u]
|
|
fld st2
|
|
fistp qword [start_v]
|
|
|
|
cmp ebx,7
|
|
jl near EndIsShort
|
|
|
|
fst dword [end_z]
|
|
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
|
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
|
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
|
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
|
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
|
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
|
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
|
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
|
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
|
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
|
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
|
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
|
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
|
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
|
jmp FullSpan
|
|
|
|
OnlyOnePixelAtEnd:
|
|
fld st0
|
|
fistp qword [start_u]
|
|
fld st1
|
|
fistp qword [start_v]
|
|
|
|
OnlyOnePixel:
|
|
mov edx,[start_v]
|
|
mov ecx,[start_u]
|
|
add edx,[pviewy]
|
|
add ecx,[pviewx]
|
|
x9 shr edx,26
|
|
m9 and ecx,0xfc000000
|
|
y9 shr ecx,20
|
|
mov ebp,[tiltlighting]
|
|
fetch9 mov al,[ecx+edx+SPACEFILLER4]
|
|
mov al,[ebp+eax]
|
|
mov [edi],al
|
|
|
|
Done:
|
|
fcompp
|
|
fcompp
|
|
fstp st0
|
|
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret
|
|
|
|
ShortStrip:
|
|
cmp ebx,0
|
|
jle near OnlyOnePixel
|
|
|
|
MoreThanOnePixel:
|
|
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
|
fmul dword [fp_quickint+ebx*4]
|
|
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
fmul dword [fp_quickint+ebx*4]
|
|
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
fmul dword [fp_quickint+ebx*4]
|
|
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
|
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
|
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
|
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
|
jmp CalcPartialSteps
|
|
|
|
EndIsShort:
|
|
cmp ebx,0
|
|
je near OnlyOnePixelAtEnd
|
|
|
|
CalcPartialSteps:
|
|
fst dword [end_z]
|
|
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
|
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
|
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
|
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
|
fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z
|
|
fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z
|
|
fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z
|
|
fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z
|
|
fxch st1 ; v'-v | ustep | v/z | 1/z | u/z
|
|
fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z
|
|
fxch st1 ; ustep | vstep | v/z | 1/z | u/z
|
|
fistp qword [step_u] ; vstep | v/z | 1/z | u/z
|
|
fistp qword [step_v] ; v/z | 1/z | u/z
|
|
|
|
mov ecx,[start_v]
|
|
mov edx,[start_u]
|
|
|
|
add ecx,[pviewy]
|
|
add edx,[pviewx]
|
|
|
|
mov esi,edx
|
|
mov ebp,ecx
|
|
endloop:
|
|
x10 shr ebp,26
|
|
m10 and esi,0xfc000000
|
|
|
|
y10 shr esi,20
|
|
inc edi
|
|
|
|
add ecx,[step_v]
|
|
add edx,[step_u]
|
|
|
|
fetch10 mov al,[ebp+esi+SPACEFILLER4]
|
|
mov ebp,[tiltlighting+ebx*4]
|
|
|
|
mov esi,edx
|
|
dec ebx
|
|
|
|
mov al,[ebp+eax]
|
|
mov ebp,ecx
|
|
|
|
mov [edi-1],al
|
|
jge endloop
|
|
|
|
fcompp
|
|
fstp st0
|
|
|
|
pop ebp
|
|
pop edi
|
|
pop esi
|
|
pop ebx
|
|
ret
|
|
|
|
rtext_end:
|
|
%ifdef M_TARGET_MACHO
|
|
GLOBAL _rtext_tmap2_end
|
|
_rtext_tmap2_end:
|
|
%endif
|