mirror of
https://git.code.sf.net/p/quake/quake2forge
synced 2024-12-12 13:42:21 +00:00
1234 lines
31 KiB
NASM
1234 lines
31 KiB
NASM
.386P
|
|
.model FLAT
|
|
;
|
|
; d_draw16.s
|
|
; x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel
|
|
; subdivision.
|
|
;
|
|
|
|
include qasm.inc
|
|
include d_if.inc
|
|
|
|
if id386
|
|
|
|
;----------------------------------------------------------------------
|
|
; 8-bpp horizontal span drawing code for polygons, with no transparency and
|
|
; 16-pixel subdivision.
|
|
;
|
|
; Assumes there is at least one span in pspans, and that every span
|
|
; contains at least one pixel
|
|
;----------------------------------------------------------------------
|
|
|
|
_DATA SEGMENT
|
|
|
|
_DATA ENDS
|
|
_TEXT SEGMENT
|
|
|
|
; out-of-line, rarely-needed clamping code
|
|
|
|
LClampHigh0:
|
|
mov esi,ds:dword ptr[_bbextents]
|
|
jmp LClampReentry0
|
|
LClampHighOrLow0:
|
|
jg LClampHigh0
|
|
xor esi,esi
|
|
jmp LClampReentry0
|
|
|
|
LClampHigh1:
|
|
mov edx,ds:dword ptr[_bbextentt]
|
|
jmp LClampReentry1
|
|
LClampHighOrLow1:
|
|
jg LClampHigh1
|
|
xor edx,edx
|
|
jmp LClampReentry1
|
|
|
|
LClampLow2:
|
|
mov ebp,4096
|
|
jmp LClampReentry2
|
|
LClampHigh2:
|
|
mov ebp,ds:dword ptr[_bbextents]
|
|
jmp LClampReentry2
|
|
|
|
LClampLow3:
|
|
mov ecx,4096
|
|
jmp LClampReentry3
|
|
LClampHigh3:
|
|
mov ecx,ds:dword ptr[_bbextentt]
|
|
jmp LClampReentry3
|
|
|
|
LClampLow4:
|
|
mov eax,4096
|
|
jmp LClampReentry4
|
|
LClampHigh4:
|
|
mov eax,ds:dword ptr[_bbextents]
|
|
jmp LClampReentry4
|
|
|
|
LClampLow5:
|
|
mov ebx,4096
|
|
jmp LClampReentry5
|
|
LClampHigh5:
|
|
mov ebx,ds:dword ptr[_bbextentt]
|
|
jmp LClampReentry5
|
|
|
|
|
|
pspans equ 4+16
|
|
|
|
align 4
|
|
public _D_DrawSpans16
|
|
_D_DrawSpans16:
|
|
push ebp ; preserve caller's stack frame
|
|
push edi
|
|
push esi ; preserve register variables
|
|
push ebx
|
|
|
|
;
|
|
; set up scaled-by-16 steps, for 16-long segments; also set up cacheblock
|
|
; and span list pointers
|
|
;
|
|
; TODO: any overlap from rearranging?
|
|
fld ds:dword ptr[_d_sdivzstepu]
|
|
fmul ds:dword ptr[fp_16]
|
|
mov edx,ds:dword ptr[_cacheblock]
|
|
fld ds:dword ptr[_d_tdivzstepu]
|
|
fmul ds:dword ptr[fp_16]
|
|
mov ebx,ds:dword ptr[pspans+esp] ; point to the first span descriptor
|
|
fld ds:dword ptr[_d_zistepu]
|
|
fmul ds:dword ptr[fp_16]
|
|
mov ds:dword ptr[pbase],edx ; pbase = cacheblock
|
|
fstp ds:dword ptr[zi16stepu]
|
|
fstp ds:dword ptr[tdivz16stepu]
|
|
fstp ds:dword ptr[sdivz16stepu]
|
|
|
|
LSpanLoop:
|
|
;
|
|
; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
|
|
; initial s and t values
|
|
;
|
|
; FIXME: pipeline FILD?
|
|
fild ds:dword ptr[espan_t_v+ebx]
|
|
fild ds:dword ptr[espan_t_u+ebx]
|
|
|
|
fld st(1) ; dv | du | dv
|
|
fmul ds:dword ptr[_d_sdivzstepv] ; dv*d_sdivzstepv | du | dv
|
|
fld st(1) ; du | dv*d_sdivzstepv | du | dv
|
|
fmul ds:dword ptr[_d_sdivzstepu] ; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
|
fld st(2) ; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
|
fmul ds:dword ptr[_d_tdivzstepu] ; du*d_tdivzstepu | du*d_sdivzstepu |
|
|
; dv*d_sdivzstepv | du | dv
|
|
fxch st(1) ; du*d_sdivzstepu | du*d_tdivzstepu |
|
|
; dv*d_sdivzstepv | du | dv
|
|
faddp st(2),st(0) ; du*d_tdivzstepu |
|
|
; du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
|
|
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; du*d_tdivzstepu | du | dv
|
|
fld st(3) ; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; du*d_tdivzstepu | du | dv
|
|
fmul ds:dword ptr[_d_tdivzstepv] ; dv*d_tdivzstepv |
|
|
; du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; du*d_tdivzstepu | du | dv
|
|
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
|
|
fadd ds:dword ptr[_d_sdivzorigin] ; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
|
|
; du*d_sdivzstepu; stays in %st(2) at end
|
|
fxch st(4) ; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
|
|
; s/z
|
|
fmul ds:dword ptr[_d_zistepv] ; dv*d_zistepv | dv*d_tdivzstepv |
|
|
; du*d_tdivzstepu | du | s/z
|
|
fxch st(1) ; dv*d_tdivzstepv | dv*d_zistepv |
|
|
; du*d_tdivzstepu | du | s/z
|
|
faddp st(2),st(0) ; dv*d_zistepv |
|
|
; dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
|
|
fxch st(2) ; du | dv*d_tdivzstepv + du*d_tdivzstepu |
|
|
; dv*d_zistepv | s/z
|
|
fmul ds:dword ptr[_d_zistepu] ; du*d_zistepu |
|
|
; dv*d_tdivzstepv + du*d_tdivzstepu |
|
|
; dv*d_zistepv | s/z
|
|
fxch st(1) ; dv*d_tdivzstepv + du*d_tdivzstepu |
|
|
; du*d_zistepu | dv*d_zistepv | s/z
|
|
fadd ds:dword ptr[_d_tdivzorigin] ; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
|
|
; du*d_tdivzstepu; stays in %st(1) at end
|
|
fxch st(2) ; dv*d_zistepv | du*d_zistepu | t/z | s/z
|
|
faddp st(1),st(0) ; dv*d_zistepv + du*d_zistepu | t/z | s/z
|
|
|
|
fld ds:dword ptr[fp_64k] ; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
|
|
fxch st(1) ; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
|
|
fadd ds:dword ptr[_d_ziorigin] ; zi = d_ziorigin + dv*d_zistepv +
|
|
; du*d_zistepu; stays in %st(0) at end
|
|
; 1/z | fp_64k | t/z | s/z
|
|
;
|
|
; calculate and clamp s & t
|
|
;
|
|
fdiv st(1),st(0) ; 1/z | z*64k | t/z | s/z
|
|
|
|
;
|
|
; point %edi to the first pixel in the span
|
|
;
|
|
mov ecx,ds:dword ptr[_d_viewbuffer]
|
|
mov eax,ds:dword ptr[espan_t_v+ebx]
|
|
mov ds:dword ptr[pspantemp],ebx ; preserve spans pointer
|
|
|
|
mov edx,ds:dword ptr[_tadjust]
|
|
mov esi,ds:dword ptr[_sadjust]
|
|
mov edi,ds:dword ptr[_d_scantable+eax*4] ; v * screenwidth
|
|
add edi,ecx
|
|
mov ecx,ds:dword ptr[espan_t_u+ebx]
|
|
add edi,ecx ; pdest = &pdestspan[scans->u];
|
|
mov ecx,ds:dword ptr[espan_t_count+ebx]
|
|
|
|
;
|
|
; now start the FDIV for the end of the span
|
|
;
|
|
cmp ecx,16
|
|
ja LSetupNotLast1
|
|
|
|
dec ecx
|
|
jz LCleanup1 ; if only one pixel, no need to start an FDIV
|
|
mov ds:dword ptr[spancountminus1],ecx
|
|
|
|
; finish up the s and t calcs
|
|
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
|
fxch st(1) ; s | t | 1/z | t/z | s/z
|
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
|
|
|
fild ds:dword ptr[spancountminus1]
|
|
|
|
fld ds:dword ptr[_d_tdivzstepu] ; C(d_tdivzstepu) | spancountminus1
|
|
fld ds:dword ptr[_d_zistepu] ; C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
|
|
fmul st(0),st(2) ; C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1
|
|
fxch st(1) ; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
|
|
fmul st(0),st(2) ; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
|
|
fxch st(2) ; scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1
|
|
fmul ds:dword ptr[_d_sdivzstepu] ; C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 |
|
|
; C(d_tdivzstepu)*scm1
|
|
fxch st(1) ; C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 |
|
|
; C(d_tdivzstepu)*scm1
|
|
faddp st(3),st(0) ; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
|
|
fxch st(1) ; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
|
|
faddp st(3),st(0) ; C(d_sdivzstepu)*scm1
|
|
faddp st(3),st(0)
|
|
|
|
fld ds:dword ptr[fp_64k]
|
|
fdiv st(0),st(1) ; this is what we've gone to all this trouble to
|
|
; overlap
|
|
jmp LFDIVInFlight1
|
|
|
|
LCleanup1:
|
|
; finish up the s and t calcs
|
|
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
|
fxch st(1) ; s | t | 1/z | t/z | s/z
|
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
|
jmp LFDIVInFlight1
|
|
|
|
align 4
|
|
LSetupNotLast1:
|
|
; finish up the s and t calcs
|
|
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
|
fxch st(1) ; s | t | 1/z | t/z | s/z
|
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
|
|
|
fadd ds:dword ptr[zi16stepu]
|
|
fxch st(2)
|
|
fadd ds:dword ptr[sdivz16stepu]
|
|
fxch st(2)
|
|
fld ds:dword ptr[tdivz16stepu]
|
|
faddp st(2),st(0)
|
|
fld ds:dword ptr[fp_64k]
|
|
fdiv st(0),st(1) ; z = 1/1/z
|
|
; this is what we've gone to all this trouble to
|
|
; overlap
|
|
LFDIVInFlight1:
|
|
|
|
add esi,ds:dword ptr[s]
|
|
add edx,ds:dword ptr[t]
|
|
mov ebx,ds:dword ptr[_bbextents]
|
|
mov ebp,ds:dword ptr[_bbextentt]
|
|
cmp esi,ebx
|
|
ja LClampHighOrLow0
|
|
LClampReentry0:
|
|
mov ds:dword ptr[s],esi
|
|
mov ebx,ds:dword ptr[pbase]
|
|
shl esi,16
|
|
cmp edx,ebp
|
|
mov ds:dword ptr[sfracf],esi
|
|
ja LClampHighOrLow1
|
|
LClampReentry1:
|
|
mov ds:dword ptr[t],edx
|
|
mov esi,ds:dword ptr[s] ; sfrac = scans->sfrac;
|
|
shl edx,16
|
|
mov eax,ds:dword ptr[t] ; tfrac = scans->tfrac;
|
|
sar esi,16
|
|
mov ds:dword ptr[tfracf],edx
|
|
|
|
;
|
|
; calculate the texture starting address
|
|
;
|
|
sar eax,16
|
|
mov edx,ds:dword ptr[_cachewidth]
|
|
imul eax,edx ; (tfrac >> 16) * cachewidth
|
|
add esi,ebx
|
|
add esi,eax ; psource = pbase + (sfrac >> 16) +
|
|
; ((tfrac >> 16) * cachewidth);
|
|
;
|
|
; determine whether last span or not
|
|
;
|
|
cmp ecx,16
|
|
jna LLastSegment
|
|
|
|
;
|
|
; not the last segment; do full 16-wide segment
|
|
;
|
|
LNotLastSegment:
|
|
|
|
;
|
|
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
|
; get there
|
|
;
|
|
|
|
; pick up after the FDIV that was left in flight previously
|
|
|
|
fld st(0) ; duplicate it
|
|
fmul st(0),st(4) ; s = s/z * z
|
|
fxch st(1)
|
|
fmul st(0),st(3) ; t = t/z * z
|
|
fxch st(1)
|
|
fistp ds:dword ptr[snext]
|
|
fistp ds:dword ptr[tnext]
|
|
mov eax,ds:dword ptr[snext]
|
|
mov edx,ds:dword ptr[tnext]
|
|
|
|
mov bl,ds:byte ptr[esi] ; get first source texel
|
|
sub ecx,16 ; count off this segments' pixels
|
|
mov ebp,ds:dword ptr[_sadjust]
|
|
mov ds:dword ptr[counttemp],ecx ; remember count of remaining pixels
|
|
|
|
mov ecx,ds:dword ptr[_tadjust]
|
|
mov ds:byte ptr[edi],bl ; store first dest pixel
|
|
|
|
add ebp,eax
|
|
add ecx,edx
|
|
|
|
mov eax,ds:dword ptr[_bbextents]
|
|
mov edx,ds:dword ptr[_bbextentt]
|
|
|
|
cmp ebp,4096
|
|
jl LClampLow2
|
|
cmp ebp,eax
|
|
ja LClampHigh2
|
|
LClampReentry2:
|
|
|
|
cmp ecx,4096
|
|
jl LClampLow3
|
|
cmp ecx,edx
|
|
ja LClampHigh3
|
|
LClampReentry3:
|
|
|
|
mov ds:dword ptr[snext],ebp
|
|
mov ds:dword ptr[tnext],ecx
|
|
|
|
sub ebp,ds:dword ptr[s]
|
|
sub ecx,ds:dword ptr[t]
|
|
|
|
;
|
|
; set up advancetable
|
|
;
|
|
mov eax,ecx
|
|
mov edx,ebp
|
|
sar eax,20 ; tstep >>= 16;
|
|
jz LZero
|
|
sar edx,20 ; sstep >>= 16;
|
|
mov ebx,ds:dword ptr[_cachewidth]
|
|
imul eax,ebx
|
|
jmp LSetUp1
|
|
|
|
LZero:
|
|
sar edx,20 ; sstep >>= 16;
|
|
mov ebx,ds:dword ptr[_cachewidth]
|
|
|
|
LSetUp1:
|
|
|
|
add eax,edx ; add in sstep
|
|
; (tstep >> 16) * cachewidth + (sstep >> 16);
|
|
mov edx,ds:dword ptr[tfracf]
|
|
mov ds:dword ptr[advancetable+4],eax ; advance base in t
|
|
add eax,ebx ; ((tstep >> 16) + 1) * cachewidth +
|
|
; (sstep >> 16);
|
|
shl ebp,12 ; left-justify sstep fractional part
|
|
mov ebx,ds:dword ptr[sfracf]
|
|
shl ecx,12 ; left-justify tstep fractional part
|
|
mov ds:dword ptr[advancetable],eax ; advance extra in t
|
|
|
|
mov ds:dword ptr[tstep],ecx
|
|
add edx,ecx ; advance tfrac fractional part by tstep frac
|
|
|
|
sbb ecx,ecx ; turn tstep carry into -1 (0 if none)
|
|
add ebx,ebp ; advance sfrac fractional part by sstep frac
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4] ; point to next source texel
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov al,ds:byte ptr[esi]
|
|
add ebx,ebp
|
|
mov ds:byte ptr[1+edi],al
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[2+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[3+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[4+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[5+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[6+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[7+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
|
|
;
|
|
; start FDIV for end of next segment in flight, so it can overlap
|
|
;
|
|
mov ecx,ds:dword ptr[counttemp]
|
|
cmp ecx,16 ; more than one segment after this?
|
|
ja LSetupNotLast2 ; yes
|
|
|
|
dec ecx
|
|
jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV
|
|
mov ds:dword ptr[spancountminus1],ecx
|
|
fild ds:dword ptr[spancountminus1]
|
|
|
|
fld ds:dword ptr[_d_zistepu] ; C(d_zistepu) | spancountminus1
|
|
fmul st(0),st(1) ; C(d_zistepu)*scm1 | scm1
|
|
fld ds:dword ptr[_d_tdivzstepu] ; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
|
|
fmul st(0),st(2) ; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
|
|
fxch st(1) ; C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
|
|
faddp st(3),st(0) ; C(d_tdivzstepu)*scm1 | scm1
|
|
fxch st(1) ; scm1 | C(d_tdivzstepu)*scm1
|
|
fmul ds:dword ptr[_d_sdivzstepu] ; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
|
|
fxch st(1) ; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
|
|
faddp st(3),st(0) ; C(d_sdivzstepu)*scm1
|
|
fld ds:dword ptr[fp_64k] ; 64k | C(d_sdivzstepu)*scm1
|
|
fxch st(1) ; C(d_sdivzstepu)*scm1 | 64k
|
|
faddp st(4),st(0) ; 64k
|
|
|
|
fdiv st(0),st(1) ; this is what we've gone to all this trouble to
|
|
; overlap
|
|
jmp LFDIVInFlight2
|
|
|
|
align 4
|
|
LSetupNotLast2:
|
|
fadd ds:dword ptr[zi16stepu]
|
|
fxch st(2)
|
|
fadd ds:dword ptr[sdivz16stepu]
|
|
fxch st(2)
|
|
fld ds:dword ptr[tdivz16stepu]
|
|
faddp st(2),st(0)
|
|
fld ds:dword ptr[fp_64k]
|
|
fdiv st(0),st(1) ; z = 1/1/z
|
|
; this is what we've gone to all this trouble to
|
|
; overlap
|
|
LFDIVInFlight2:
|
|
mov ds:dword ptr[counttemp],ecx
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[8+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[9+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[10+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[11+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[12+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[13+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[14+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edi,16
|
|
mov ds:dword ptr[tfracf],edx
|
|
mov edx,ds:dword ptr[snext]
|
|
mov ds:dword ptr[sfracf],ebx
|
|
mov ebx,ds:dword ptr[tnext]
|
|
mov ds:dword ptr[s],edx
|
|
mov ds:dword ptr[t],ebx
|
|
|
|
mov ecx,ds:dword ptr[counttemp] ; retrieve count
|
|
|
|
;
|
|
; determine whether last span or not
|
|
;
|
|
cmp ecx,16 ; are there multiple segments remaining?
|
|
mov ds:byte ptr[-1+edi],al
|
|
ja LNotLastSegment ; yes
|
|
|
|
;
|
|
; last segment of scan
|
|
;
|
|
LLastSegment:
|
|
|
|
;
|
|
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
|
; get there. The number of pixels left is variable, and we want to land on the
|
|
; last pixel, not step one past it, so we can't run into arithmetic problems
|
|
;
|
|
test ecx,ecx
|
|
jz LNoSteps ; just draw the last pixel and we're done
|
|
|
|
; pick up after the FDIV that was left in flight previously
|
|
|
|
|
|
fld st(0) ; duplicate it
|
|
fmul st(0),st(4) ; s = s/z * z
|
|
fxch st(1)
|
|
fmul st(0),st(3) ; t = t/z * z
|
|
fxch st(1)
|
|
fistp ds:dword ptr[snext]
|
|
fistp ds:dword ptr[tnext]
|
|
|
|
mov al,ds:byte ptr[esi] ; load first texel in segment
|
|
mov ebx,ds:dword ptr[_tadjust]
|
|
mov ds:byte ptr[edi],al ; store first pixel in segment
|
|
mov eax,ds:dword ptr[_sadjust]
|
|
|
|
add eax,ds:dword ptr[snext]
|
|
add ebx,ds:dword ptr[tnext]
|
|
|
|
mov ebp,ds:dword ptr[_bbextents]
|
|
mov edx,ds:dword ptr[_bbextentt]
|
|
|
|
cmp eax,4096
|
|
jl LClampLow4
|
|
cmp eax,ebp
|
|
ja LClampHigh4
|
|
LClampReentry4:
|
|
mov ds:dword ptr[snext],eax
|
|
|
|
cmp ebx,4096
|
|
jl LClampLow5
|
|
cmp ebx,edx
|
|
ja LClampHigh5
|
|
LClampReentry5:
|
|
|
|
cmp ecx,1 ; don't bother
|
|
je LOnlyOneStep ; if two pixels in segment, there's only one step,
|
|
; of the segment length
|
|
sub eax,ds:dword ptr[s]
|
|
sub ebx,ds:dword ptr[t]
|
|
|
|
add eax,eax ; convert to 15.17 format so multiply by 1.31
|
|
add ebx,ebx ; reciprocal yields 16.48
|
|
|
|
imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; sstep = (snext - s) /
|
|
; (spancount-1)
|
|
mov ebp,edx
|
|
|
|
mov eax,ebx
|
|
imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; tstep = (tnext - t) /
|
|
; (spancount-1)
|
|
LSetEntryvec:
|
|
;
|
|
; set up advancetable
|
|
;
|
|
mov ebx,ds:dword ptr[entryvec_table_16+ecx*4]
|
|
mov eax,edx
|
|
mov ds:dword ptr[jumptemp],ebx ; entry point into code for RET later
|
|
mov ecx,ebp
|
|
sar edx,16 ; tstep >>= 16;
|
|
mov ebx,ds:dword ptr[_cachewidth]
|
|
sar ecx,16 ; sstep >>= 16;
|
|
imul edx,ebx
|
|
|
|
add edx,ecx ; add in sstep
|
|
; (tstep >> 16) * cachewidth + (sstep >> 16);
|
|
mov ecx,ds:dword ptr[tfracf]
|
|
mov ds:dword ptr[advancetable+4],edx ; advance base in t
|
|
add edx,ebx ; ((tstep >> 16) + 1) * cachewidth +
|
|
; (sstep >> 16);
|
|
shl ebp,16 ; left-justify sstep fractional part
|
|
mov ebx,ds:dword ptr[sfracf]
|
|
shl eax,16 ; left-justify tstep fractional part
|
|
mov ds:dword ptr[advancetable],edx ; advance extra in t
|
|
|
|
mov ds:dword ptr[tstep],eax
|
|
mov edx,ecx
|
|
add edx,eax
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
jmp dword ptr[jumptemp] ; jump to the number-of-pixels handler
|
|
|
|
;----------------------------------------
|
|
|
|
LNoSteps:
|
|
mov al,ds:byte ptr[esi] ; load first texel in segment
|
|
sub edi,15 ; adjust for hardwired offset
|
|
jmp LEndSpan
|
|
|
|
|
|
LOnlyOneStep:
|
|
sub eax,ds:dword ptr[s]
|
|
sub ebx,ds:dword ptr[t]
|
|
mov ebp,eax
|
|
mov edx,ebx
|
|
jmp LSetEntryvec
|
|
|
|
;----------------------------------------
|
|
|
|
public Entry2_16, Entry3_16, Entry4_16, Entry5_16
|
|
public Entry6_16, Entry7_16, Entry8_16, Entry9_16
|
|
public Entry10_16, Entry11_16, Entry12_16, Entry13_16
|
|
public Entry14_16, Entry15_16, Entry16_16
|
|
|
|
Entry2_16:
|
|
sub edi,14 ; adjust for hardwired offsets
|
|
mov al,ds:byte ptr[esi]
|
|
jmp LEntry2_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry3_16:
|
|
sub edi,13 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
jmp LEntry3_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry4_16:
|
|
sub edi,12 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry4_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry5_16:
|
|
sub edi,11 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry5_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry6_16:
|
|
sub edi,10 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry6_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry7_16:
|
|
sub edi,9 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry7_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry8_16:
|
|
sub edi,8 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry8_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry9_16:
|
|
sub edi,7 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry9_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry10_16:
|
|
sub edi,6 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry10_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry11_16:
|
|
sub edi,5 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry11_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry12_16:
|
|
sub edi,4 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry12_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry13_16:
|
|
sub edi,3 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry13_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry14_16:
|
|
sub edi,2 ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry14_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry15_16:
|
|
dec edi ; adjust for hardwired offsets
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
jmp LEntry15_16
|
|
|
|
;----------------------------------------
|
|
|
|
Entry16_16:
|
|
add edx,eax
|
|
mov al,ds:byte ptr[esi]
|
|
sbb ecx,ecx
|
|
add ebx,ebp
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[1+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry15_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[2+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry14_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[3+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry13_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[4+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry12_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[5+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry11_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[6+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry10_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[7+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry9_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[8+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry8_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[9+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry7_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[10+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry6_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[11+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry5_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[12+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
add edx,ds:dword ptr[tstep]
|
|
LEntry4_16:
|
|
sbb ecx,ecx
|
|
mov ds:byte ptr[13+edi],al
|
|
add ebx,ebp
|
|
mov al,ds:byte ptr[esi]
|
|
adc esi,ds:dword ptr[advancetable+4+ecx*4]
|
|
LEntry3_16:
|
|
mov ds:byte ptr[14+edi],al
|
|
mov al,ds:byte ptr[esi]
|
|
LEntry2_16:
|
|
|
|
LEndSpan:
|
|
|
|
;
|
|
; clear s/z, t/z, 1/z from FP stack
|
|
;
|
|
fstp st(0)
|
|
fstp st(0)
|
|
fstp st(0)
|
|
|
|
mov ebx,ds:dword ptr[pspantemp] ; restore spans pointer
|
|
mov ebx,ds:dword ptr[espan_t_pnext+ebx] ; point to next span
|
|
test ebx,ebx ; any more spans?
|
|
mov ds:byte ptr[15+edi],al
|
|
jnz LSpanLoop ; more spans
|
|
|
|
pop ebx ; restore register variables
|
|
pop esi
|
|
pop edi
|
|
pop ebp ; restore the caller's stack frame
|
|
ret
|
|
|
|
|
|
;----------------------------------------------------------------------
|
|
; 8-bpp horizontal span z drawing codefor polygons, with no transparency.
|
|
;
|
|
; Assumes there is at least one span in pzspans, and that every span
|
|
; contains at least one pixel
|
|
;----------------------------------------------------------------------
|
|
|
|
|
|
|
|
; z-clamp on a non-negative gradient span
|
|
LClamp:
|
|
mov edx,040000000h
|
|
xor ebx,ebx
|
|
fstp st(0)
|
|
jmp LZDraw
|
|
|
|
; z-clamp on a negative gradient span
|
|
LClampNeg:
|
|
mov edx,040000000h
|
|
xor ebx,ebx
|
|
fstp st(0)
|
|
jmp LZDrawNeg
|
|
|
|
|
|
pzspans equ 4+16
|
|
|
|
public _D_DrawZSpans
|
|
_D_DrawZSpans:
|
|
push ebp ; preserve caller's stack frame
|
|
push edi
|
|
push esi ; preserve register variables
|
|
push ebx
|
|
|
|
fld ds:dword ptr[_d_zistepu]
|
|
mov eax,ds:dword ptr[_d_zistepu]
|
|
mov esi,ds:dword ptr[pzspans+esp]
|
|
test eax,eax
|
|
jz LFNegSpan
|
|
|
|
fmul ds:dword ptr[Float2ToThe31nd]
|
|
fistp ds:dword ptr[izistep] ; note: we are relying on FP exceptions being turned
|
|
; off here to avoid range problems
|
|
mov ebx,ds:dword ptr[izistep] ; remains loaded for all spans
|
|
|
|
LFSpanLoop:
|
|
; set up the initial 1/z value
|
|
fild ds:dword ptr[espan_t_v+esi]
|
|
fild ds:dword ptr[espan_t_u+esi]
|
|
mov ecx,ds:dword ptr[espan_t_v+esi]
|
|
mov edi,ds:dword ptr[_d_pzbuffer]
|
|
fmul ds:dword ptr[_d_zistepu]
|
|
fxch st(1)
|
|
fmul ds:dword ptr[_d_zistepv]
|
|
fxch st(1)
|
|
fadd ds:dword ptr[_d_ziorigin]
|
|
imul ecx,ds:dword ptr[_d_zrowbytes]
|
|
faddp st(1),st(0)
|
|
|
|
; clamp if z is nearer than 2 (1/z > 0.5)
|
|
fcom ds:dword ptr[float_point5]
|
|
add edi,ecx
|
|
mov edx,ds:dword ptr[espan_t_u+esi]
|
|
add edx,edx ; word count
|
|
mov ecx,ds:dword ptr[espan_t_count+esi]
|
|
add edi,edx ; pdest = &pdestspan[scans->u];
|
|
push esi ; preserve spans pointer
|
|
fnstsw ax
|
|
test ah,045h
|
|
jz LClamp
|
|
|
|
fmul ds:dword ptr[Float2ToThe31nd]
|
|
fistp ds:dword ptr[izi] ; note: we are relying on FP exceptions being turned
|
|
; off here to avoid problems when the span is closer
|
|
; than 1/(2**31)
|
|
mov edx,ds:dword ptr[izi]
|
|
|
|
; at this point:
|
|
; %ebx = izistep
|
|
; %ecx = count
|
|
; %edx = izi
|
|
; %edi = pdest
|
|
|
|
LZDraw:
|
|
|
|
; do a single pixel up front, if necessary to dword align the destination
|
|
test edi,2
|
|
jz LFMiddle
|
|
mov eax,edx
|
|
add edx,ebx
|
|
shr eax,16
|
|
dec ecx
|
|
mov ds:word ptr[edi],ax
|
|
add edi,2
|
|
|
|
; do middle a pair of aligned dwords at a time
|
|
LFMiddle:
|
|
push ecx
|
|
shr ecx,1 ; count / 2
|
|
jz LFLast ; no aligned dwords to do
|
|
shr ecx,1 ; (count / 2) / 2
|
|
jnc LFMiddleLoop ; even number of aligned dwords to do
|
|
|
|
mov eax,edx
|
|
add edx,ebx
|
|
shr eax,16
|
|
mov esi,edx
|
|
add edx,ebx
|
|
and esi,0FFFF0000h
|
|
or eax,esi
|
|
mov ds:dword ptr[edi],eax
|
|
add edi,4
|
|
and ecx,ecx
|
|
jz LFLast
|
|
|
|
LFMiddleLoop:
|
|
mov eax,edx
|
|
add edx,ebx
|
|
shr eax,16
|
|
mov esi,edx
|
|
add edx,ebx
|
|
and esi,0FFFF0000h
|
|
or eax,esi
|
|
mov ebp,edx
|
|
mov ds:dword ptr[edi],eax
|
|
add edx,ebx
|
|
shr ebp,16
|
|
mov esi,edx
|
|
add edx,ebx
|
|
and esi,0FFFF0000h
|
|
or ebp,esi
|
|
mov ds:dword ptr[4+edi],ebp ; FIXME: eliminate register contention
|
|
add edi,8
|
|
|
|
dec ecx
|
|
jnz LFMiddleLoop
|
|
|
|
LFLast:
|
|
pop ecx ; retrieve count
|
|
pop esi ; retrieve span pointer
|
|
|
|
; do the last, unaligned pixel, if there is one
|
|
and ecx,1 ; is there an odd pixel left to do?
|
|
jz LFSpanDone ; no
|
|
shr edx,16
|
|
mov ds:word ptr[edi],dx ; do the final pixel's z
|
|
|
|
LFSpanDone:
|
|
mov esi,ds:dword ptr[espan_t_pnext+esi]
|
|
test esi,esi
|
|
jnz LFSpanLoop
|
|
|
|
jmp LFDone
|
|
|
|
LFNegSpan:
|
|
fmul ds:dword ptr[FloatMinus2ToThe31nd]
|
|
fistp ds:dword ptr[izistep] ; note: we are relying on FP exceptions being turned
|
|
; off here to avoid range problems
|
|
mov ebx,ds:dword ptr[izistep] ; remains loaded for all spans
|
|
|
|
LFNegSpanLoop:
|
|
; set up the initial 1/z value
|
|
fild ds:dword ptr[espan_t_v+esi]
|
|
fild ds:dword ptr[espan_t_u+esi]
|
|
mov ecx,ds:dword ptr[espan_t_v+esi]
|
|
mov edi,ds:dword ptr[_d_pzbuffer]
|
|
fmul ds:dword ptr[_d_zistepu]
|
|
fxch st(1)
|
|
fmul ds:dword ptr[_d_zistepv]
|
|
fxch st(1)
|
|
fadd ds:dword ptr[_d_ziorigin]
|
|
imul ecx,ds:dword ptr[_d_zrowbytes]
|
|
faddp st(1),st(0)
|
|
|
|
; clamp if z is nearer than 2 (1/z > 0.5)
|
|
fcom ds:dword ptr[float_point5]
|
|
add edi,ecx
|
|
mov edx,ds:dword ptr[espan_t_u+esi]
|
|
add edx,edx ; word count
|
|
mov ecx,ds:dword ptr[espan_t_count+esi]
|
|
add edi,edx ; pdest = &pdestspan[scans->u];
|
|
push esi ; preserve spans pointer
|
|
fnstsw ax
|
|
test ah,045h
|
|
jz LClampNeg
|
|
|
|
fmul ds:dword ptr[Float2ToThe31nd]
|
|
fistp ds:dword ptr[izi] ; note: we are relying on FP exceptions being turned
|
|
; off here to avoid problems when the span is closer
|
|
; than 1/(2**31)
|
|
mov edx,ds:dword ptr[izi]
|
|
|
|
; at this point:
|
|
; %ebx = izistep
|
|
; %ecx = count
|
|
; %edx = izi
|
|
; %edi = pdest
|
|
|
|
LZDrawNeg:
|
|
|
|
; do a single pixel up front, if necessary to dword align the destination
|
|
test edi,2
|
|
jz LFNegMiddle
|
|
mov eax,edx
|
|
sub edx,ebx
|
|
shr eax,16
|
|
dec ecx
|
|
mov ds:word ptr[edi],ax
|
|
add edi,2
|
|
|
|
; do middle a pair of aligned dwords at a time
|
|
LFNegMiddle:
|
|
push ecx
|
|
shr ecx,1 ; count / 2
|
|
jz LFNegLast ; no aligned dwords to do
|
|
shr ecx,1 ; (count / 2) / 2
|
|
jnc LFNegMiddleLoop ; even number of aligned dwords to do
|
|
|
|
mov eax,edx
|
|
sub edx,ebx
|
|
shr eax,16
|
|
mov esi,edx
|
|
sub edx,ebx
|
|
and esi,0FFFF0000h
|
|
or eax,esi
|
|
mov ds:dword ptr[edi],eax
|
|
add edi,4
|
|
and ecx,ecx
|
|
jz LFNegLast
|
|
|
|
LFNegMiddleLoop:
|
|
mov eax,edx
|
|
sub edx,ebx
|
|
shr eax,16
|
|
mov esi,edx
|
|
sub edx,ebx
|
|
and esi,0FFFF0000h
|
|
or eax,esi
|
|
mov ebp,edx
|
|
mov ds:dword ptr[edi],eax
|
|
sub edx,ebx
|
|
shr ebp,16
|
|
mov esi,edx
|
|
sub edx,ebx
|
|
and esi,0FFFF0000h
|
|
or ebp,esi
|
|
mov ds:dword ptr[4+edi],ebp ; FIXME: eliminate register contention
|
|
add edi,8
|
|
|
|
dec ecx
|
|
jnz LFNegMiddleLoop
|
|
|
|
LFNegLast:
|
|
pop ecx ; retrieve count
|
|
pop esi ; retrieve span pointer
|
|
|
|
; do the last, unaligned pixel, if there is one
|
|
and ecx,1 ; is there an odd pixel left to do?
|
|
jz LFNegSpanDone ; no
|
|
shr edx,16
|
|
mov ds:word ptr[edi],dx ; do the final pixel's z
|
|
|
|
LFNegSpanDone:
|
|
mov esi,ds:dword ptr[espan_t_pnext+esi]
|
|
test esi,esi
|
|
jnz LFNegSpanLoop
|
|
|
|
LFDone:
|
|
pop ebx ; restore register variables
|
|
pop esi
|
|
pop edi
|
|
pop ebp ; restore the caller's stack frame
|
|
ret
|
|
|
|
|
|
|
|
_TEXT ENDS
|
|
endif ;id386
|
|
END
|