qzdoom-gpl/src/asm_ia32/tmap2.asm

631 lines
16 KiB
NASM
Raw Normal View History

;*
;* tmap2.nas
;* The tilted plane inner loop.
;*
;*---------------------------------------------------------------------------
;* Copyright 1998-2006 Randy Heit
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* 1. Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;* 2. Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in the
;* documentation and/or other materials provided with the distribution.
;* 3. The name of the author may not be used to endorse or promote products
;* derived from this software without specific prior written permission.
;*
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;*---------------------------------------------------------------------------
;*
;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was
;* actually slightly slower than the more straight-forward approach
;* used here, probably because the trick requires too much setup time.
;*
BITS 32
About a week's worth of changes here. As a heads-up, I wouldn't be surprised if this doesn't build in Linux right now. The CMakeLists.txt were checked with MinGW and NMake, but how they fair under Linux is an unknown to me at this time. - Converted most sprintf (and all wsprintf) calls to either mysnprintf or FStrings, depending on the situation. - Changed the strings in the wbstartstruct to be FStrings. - Changed myvsnprintf() to output nothing if count is greater than INT_MAX. This is so that I can use a series of mysnprintf() calls and advance the pointer for each one. Once the pointer goes beyond the end of the buffer, the count will go negative, but since it's an unsigned type it will be seen as excessively huge instead. This should not be a problem, as there's no reason for ZDoom to be using text buffers larger than 2 GB anywhere. - Ripped out the disabled bit from FGameConfigFile::MigrateOldConfig(). - Changed CalcMapName() to return an FString instead of a pointer to a static buffer. - Changed startmap in d_main.cpp into an FString. - Changed CheckWarpTransMap() to take an FString& as the first argument. - Changed d_mapname in g_level.cpp into an FString. - Changed DoSubstitution() in ct_chat.cpp to place the substitutions in an FString. - Fixed: The MAPINFO parser wrote into the string buffer to construct a map name when given a Hexen map number. This was fine with the old scanner code, but only a happy coincidence prevents it from crashing with the new code - Added the 'B' conversion specifier to StringFormat::VWorker() for printing binary numbers. - Added CMake support for building with MinGW, MSYS, and NMake. Linux support is probably broken until I get around to booting into Linux again. Niceties provided over the existing Makefiles they're replacing: * All command-line builds can use the same build system, rather than having a separate one for MinGW and another for Linux. * Microsoft's NMake tool is supported as a target. * Progress meters. * Parallel makes work from a fresh checkout without needing to be primed first with a single-threaded make. * Porting to other architectures should be simplified, whenever that day comes. - Replaced the makewad tool with zipdir. This handles the dependency tracking itself instead of generating an external makefile to do it, since I couldn't figure out how to generate a makefile with an external tool and include it with a CMake-generated makefile. Where makewad used a master list of files to generate the package file, zipdir just zips the entire contents of one or more directories. - Added the gdtoa package from netlib's fp library so that ZDoom's printf-style formatting can be entirely independant of the CRT. SVN r1082 (trunk)
2008-07-23 04:57:26 +00:00
%include "valgrind.inc"
%define SPACEFILLER4 (0x44444444)
%ifndef M_TARGET_LINUX
%define plane_sz _plane_sz
%define plane_su _plane_su
%define plane_sv _plane_sv
%define plane_shade _plane_shade
%define planelightfloat _planelightfloat
%define spanend _spanend
%define ylookup _ylookup
%define dc_destorg _dc_destorg
%define ds_colormap _ds_colormap
%define ds_source _ds_source
%define centery _centery
%define centerx _centerx
%define ds_curtiltedsource _ds_curtiltedsource
%define pviewx _pviewx
%define pviewy _pviewy
%define tiltlighting _tiltlighting
%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM
%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM
%define R_CalcTiltedLighting _R_CalcTiltedLighting
%endif
EXTERN plane_sz
EXTERN plane_su
EXTERN plane_sv
EXTERN planelightfloat
EXTERN spanend
EXTERN ylookup
EXTERN dc_destorg
EXTERN ds_colormap
EXTERN centery
EXTERN centerx
EXTERN ds_source
EXTERN plane_shade
EXTERN pviewx
EXTERN pviewy
EXTERN tiltlighting
EXTERN R_CalcTiltedLighting
GLOBAL ds_curtiltedsource
%define sv_i plane_sv
%define sv_j plane_sv+4
%define sv_k plane_sv+8
%define su_i plane_su
%define su_j plane_su+4
%define su_k plane_su+8
%define sz_i plane_sz
%define sz_j plane_sz+4
%define sz_k plane_sz+8
%define SPANBITS 3
section .bss
start_u: resq 1
start_v: resq 1
step_u: resq 1
step_v: resq 1
step_iz: resq 1
step_uz: resq 1
step_vz: resq 1
end_z: resd 1
section .data
ds_curtiltedsource: dd SPACEFILLER4
fp_1:
spanrecips: dd 0x3f800000 ; 1/1
dd 0x3f000000 ; 1/2
dd 0x3eaaaaab ; 1/3
dd 0x3e800000 ; 1/4
dd 0x3e4ccccd ; 1/5
dd 0x3e2aaaab ; 1/6
dd 0x3e124925 ; 1/7
fp_8recip: dd 0x3e000000 ; 1/8
dd 0x3de38e39 ; 1/9
dd 0x3dcccccd ; 1/10
dd 0x3dba2e8c ; 1/11
dd 0x3daaaaab ; 1/12
dd 0x3d9d89d9 ; 1/13
dd 0x3d924925 ; 1/14
dd 0x3d888889 ; 1/15
fp_quickint: dd 0x3f800000 ; 1
dd 0x40000000 ; 2
dd 0x40400000 ; 3
dd 0x40800000 ; 4
dd 0x40a00000 ; 5
dd 0x40c00000 ; 6
dd 0x40e00000 ; 7
fp_8: dd 0x41000000 ; 8
section .text
GLOBAL R_SetTiltedSpanSource_ASM
GLOBAL @R_SetTiltedSpanSource_ASM@4
R_SetTiltedSpanSource_ASM:
mov ecx,[esp+4]
@R_SetTiltedSpanSource_ASM@4:
mov [fetch1+3],ecx
mov [fetch2+3],ecx
mov [fetch3+3],ecx
mov [fetch4+3],ecx
mov [fetch5+3],ecx
mov [fetch6+3],ecx
mov [fetch7+3],ecx
mov [fetch8+3],ecx
mov [fetch9+3],ecx
mov [fetch10+3],ecx
mov [ds_curtiltedsource],ecx
selfmod rtext_start, rtext_end
ret
GLOBAL SetTiltedSpanSize
SetTiltedSpanSize:
push ecx
mov cl,dl
neg cl
mov eax,1
shl eax,cl
mov cl,[esp]
neg cl
mov [x1+2],cl
mov [x2+2],cl
mov [x3+2],cl
mov [x4+2],cl
mov [x5+2],cl
mov [x6+2],cl
mov [x7+2],cl
mov [x8+2],cl
mov [x9+2],cl
mov [x10+2],cl
sub cl,dl
dec eax
mov [y1+2],cl
mov [y2+2],cl
mov [y3+2],cl
mov [y4+2],cl
mov [y5+2],cl
mov [y6+2],cl
mov [y7+2],cl
mov [y8+2],cl
mov [y9+2],cl
mov [y10+2],cl
not eax
pop ecx
mov [m1+2],eax
mov [m2+2],eax
mov [m3+2],eax
mov [m4+2],eax
mov [m5+2],eax
mov [m6+2],eax
mov [m7+2],eax
mov [m8+2],eax
mov [m9+2],eax
mov [m10+2],eax
selfmod rtext_start, rtext_end
ret
SECTION .rtext progbits alloc exec write align=64
rtext_start:
GLOBAL R_DrawTiltedPlane_ASM
GLOBAL @R_DrawTiltedPlane_ASM@8
R_DrawTiltedPlane_ASM:
mov ecx,[esp+4]
mov edx,[esp+8]
; ecx = y
; edx = x
@R_DrawTiltedPlane_ASM@8:
push ebx
push esi
push edi
push ebp
mov eax,[centery]
movzx ebx,word [spanend+ecx*2]
sub eax,ecx ; eax = centery-y
sub ebx,edx ; ebx = span length - 1
mov edi,[ylookup+ecx*4]
push eax
add edi,[dc_destorg]
add edi,edx ; edi = frame buffer pointer
sub edx,[centerx] ; edx = x-centerx
push edx
xor eax,eax
fild dword [esp+4] ; ymul
fild dword [esp] ; xmul | ymul
fld dword [sv_j] ; sv.j | xmul | ymul
fmul st0,st2 ; sv.j*ymul | xmul | ymul
fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul
fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul
fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul
fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul
faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul
fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul
fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z
fadd dword [su_k] ; u/z | v/z | 1/z
fxch st2 ; 1/z | v/z | u/z
fxch st1 ; v/z | 1/z | u/z
; if lighting is on, fill out the light table
mov al,[plane_shade]
test al,al
jz .litup
push ebx
fild dword [esp] ; width | v/z | 1/z | u/z
fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z
fadd st0,st2 ; 1/endz | v/z | 1/z | u/z
fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z
fmul dword [planelightfloat]
fxch st1
fmul dword [planelightfloat]
sub esp,8
fistp dword [esp]
fistp dword [esp+4]
call R_CalcTiltedLighting
add esp, 12
xor eax, eax
.litup add esp, 8
; calculate initial z, u, and v values
fld st1 ; 1/z | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | v/z | 1/z | u/z
fld st3 ; u/z | z | v/z | 1/z | u/z
fmul st0,st1 ; u | z | v/z | 1/z | u/z
fld st2 ; v/z | u | z | v/z | 1/z | u/z
fmulp st2,st0 ; u | v | v/z | 1/z | u/z
fld st0
fistp qword [start_u]
fld st1
fistp qword [start_v]
cmp ebx,7 ; Do we have at least 8 pixels to plot?
jl near ShortStrip
; yes, we do, so figure out tex coords at end of this span
; multiply i values by span length (8)
fld dword [su_i] ; su.i
fmul dword [fp_8] ; su.i*8
fld dword [sv_i] ; sv.i | su.i*8
fmul dword [fp_8] ; sv.i*8 | su.i*8
fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8
fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8
fxch st2 ; su.i*8 | sv.i*8 | sz.i*8
fstp qword [step_uz] ; sv.i*8 | sz.i*8
fstp qword [step_vz] ; sz.i*8
fst qword [step_iz] ; sz.i*8
; find tex coords at start of next span
faddp st4
fld qword [step_vz]
faddp st3
fld qword [step_uz]
faddp st5
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
fst dword [end_z]
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
; now subtract to get stepping values for this span
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
FullSpan:
xor eax,eax
cmp ebx,15 ; is there another complete span after this one?
jl NextIsShort
; there is a complete span after this one
fld qword [step_iz]
faddp st4,st0
fld qword [step_vz]
faddp st3,st0
fld qword [step_uz]
faddp st5,st0
jmp StartDiv
NextIsShort:
cmp ebx,8 ; if next span is no more than 1 pixel, then we already
jle DrawFullSpan ; know everything we need to draw it
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint-8*4+ebx*4]
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint-8*4+ebx*4]
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint-8*4+ebx*4]
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
faddp st5,st0 ; u | v | v/z | 1/z | u/z
StartDiv:
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
DrawFullSpan:
mov ecx,[start_v]
mov edx,[start_u]
add ecx,[pviewy]
add edx,[pviewx]
mov esi,edx
mov ebp,ecx
x1 shr ebp,26
m1 and esi,0xfc000000
y1 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch1 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+0],al
x2 shr ebp,26
m2 and esi,0xfc000000
y2 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch2 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-4]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+1],al
x3 shr ebp,26
m3 and esi,0xfc000000
y3 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch3 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-8]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+2],al
x4 shr ebp,26
m4 and esi,0xfc000000
y4 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch4 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-12]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+3],al
x5 shr ebp,26
m5 and esi,0xfc000000
y5 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch5 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-16]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+4],al
x6 shr ebp,26
m6 and esi,0xfc000000
y6 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch6 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-20]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+5],al
x7 shr ebp,26
m7 and esi,0xfc000000
y7 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch7 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-24]
x8 shr ecx,26
mov al,[ebp+eax]
m8 and edx,0xfc000000
mov [edi+6],al
y8 shr edx,20
mov ebp,[tiltlighting+ebx*4-28]
fetch8 mov al,[edx+ecx+SPACEFILLER4]
mov al,[ebp+eax]
mov [edi+7],al
add edi,8
sub ebx,8
jl near Done
fld st1
fistp qword [start_u]
fld st2
fistp qword [start_v]
cmp ebx,7
jl near EndIsShort
fst dword [end_z]
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
jmp FullSpan
OnlyOnePixelAtEnd:
fld st0
fistp qword [start_u]
fld st1
fistp qword [start_v]
OnlyOnePixel:
mov edx,[start_v]
mov ecx,[start_u]
add edx,[pviewy]
add ecx,[pviewx]
x9 shr edx,26
m9 and ecx,0xfc000000
y9 shr ecx,20
mov ebp,[tiltlighting]
fetch9 mov al,[ecx+edx+SPACEFILLER4]
mov al,[ebp+eax]
mov [edi],al
Done:
fcompp
fcompp
fstp st0
pop ebp
pop edi
pop esi
pop ebx
ret
ShortStrip:
cmp ebx,0
jle near OnlyOnePixel
MoreThanOnePixel:
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint+ebx*4]
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint+ebx*4]
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint+ebx*4]
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
faddp st5,st0 ; u | v | v/z | 1/z | u/z
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
jmp CalcPartialSteps
EndIsShort:
cmp ebx,0
je near OnlyOnePixelAtEnd
CalcPartialSteps:
fst dword [end_z]
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z
fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z
fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z
fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z
fxch st1 ; v'-v | ustep | v/z | 1/z | u/z
fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z
fxch st1 ; ustep | vstep | v/z | 1/z | u/z
fistp qword [step_u] ; vstep | v/z | 1/z | u/z
fistp qword [step_v] ; v/z | 1/z | u/z
mov ecx,[start_v]
mov edx,[start_u]
add ecx,[pviewy]
add edx,[pviewx]
mov esi,edx
mov ebp,ecx
endloop:
x10 shr ebp,26
m10 and esi,0xfc000000
y10 shr esi,20
inc edi
add ecx,[step_v]
add edx,[step_u]
fetch10 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4]
mov esi,edx
dec ebx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi-1],al
jge endloop
fcompp
fstp st0
pop ebp
pop edi
pop esi
pop ebx
ret
rtext_end: