884 lines
22 KiB
NASM
884 lines
22 KiB
NASM
.386P
|
|
.model FLAT
|
|
;
|
|
; d_spr8.s
|
|
; x86 assembly-language horizontal 8-bpp transparent span-drawing code.
|
|
;
|
|
|
|
include qasm.inc
|
|
include d_if.inc
|
|
|
|
if id386
|
|
|
|
;----------------------------------------------------------------------
|
|
; 8-bpp horizontal span drawing code for polygons, with transparency.
|
|
;----------------------------------------------------------------------
|
|
|
|
_TEXT SEGMENT
|
|
|
|
; out-of-line, rarely-needed clamping code
|
|
|
|
LClampHigh0:
|
|
mov esi,ds:dword ptr[_bbextents]
|
|
jmp LClampReentry0
|
|
LClampHighOrLow0:
|
|
jg LClampHigh0
|
|
xor esi,esi
|
|
jmp LClampReentry0
|
|
|
|
LClampHigh1:
|
|
mov edx,ds:dword ptr[_bbextentt]
|
|
jmp LClampReentry1
|
|
LClampHighOrLow1:
|
|
jg LClampHigh1
|
|
xor edx,edx
|
|
jmp LClampReentry1
|
|
|
|
LClampLow2:
|
|
mov ebp,2048
|
|
jmp LClampReentry2
|
|
LClampHigh2:
|
|
mov ebp,ds:dword ptr[_bbextents]
|
|
jmp LClampReentry2
|
|
|
|
LClampLow3:
|
|
mov ecx,2048
|
|
jmp LClampReentry3
|
|
LClampHigh3:
|
|
mov ecx,ds:dword ptr[_bbextentt]
|
|
jmp LClampReentry3
|
|
|
|
LClampLow4:
|
|
mov eax,2048
|
|
jmp LClampReentry4
|
|
LClampHigh4:
|
|
mov eax,ds:dword ptr[_bbextents]
|
|
jmp LClampReentry4
|
|
|
|
LClampLow5:
|
|
mov ebx,2048
|
|
jmp LClampReentry5
|
|
LClampHigh5:
|
|
mov ebx,ds:dword ptr[_bbextentt]
|
|
jmp LClampReentry5
|
|
|
|
|
|
pspans equ 4+16
|
|
|
|
align 4
|
|
public _D_SpriteDrawSpansXXX
|
|
_D_SpriteDrawSpansXXX:
|
|
push ebp ; preserve caller's stack frame
|
|
push edi
|
|
push esi ; preserve register variables
|
|
push ebx
|
|
|
|
;
|
|
; set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
|
|
; and span list pointers, and 1/z step in 0.32 fixed-point
|
|
;
|
|
; FIXME: any overlap from rearranging?
|
|
fld ds:dword ptr[_d_sdivzstepu]
|
|
fmul ds:dword ptr[fp_8]
|
|
mov edx,ds:dword ptr[_cacheblock]
|
|
fld ds:dword ptr[_d_tdivzstepu]
|
|
fmul ds:dword ptr[fp_8]
|
|
mov ebx,ds:dword ptr[pspans+esp] ; point to the first span descriptor
|
|
fld ds:dword ptr[_d_zistepu]
|
|
fmul ds:dword ptr[fp_8]
|
|
mov ds:dword ptr[pbase],edx ; pbase = cacheblock
|
|
fld ds:dword ptr[_d_zistepu]
|
|
fmul ds:dword ptr[fp_64kx64k]
|
|
fxch st(3)
|
|
fstp ds:dword ptr[sdivz8stepu]
|
|
fstp ds:dword ptr[zi8stepu]
|
|
fstp ds:dword ptr[tdivz8stepu]
|
|
fistp ds:dword ptr[izistep]
|
|
mov eax,ds:dword ptr[izistep]
|
|
ror eax,16 ; put upper 16 bits in low word
|
|
mov ecx,ds:dword ptr[sspan_t_count+ebx]
|
|
mov ds:dword ptr[izistep],eax
|
|
|
|
cmp ecx,0
|
|
jle LNextSpan
|
|
|
|
LSpanLoop:
|
|
|
|
;
|
|
; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
|
|
; initial s and t values
|
|
;
|
|
; FIXME: pipeline FILD?
|
|
fild ds:dword ptr[sspan_t_v+ebx]
|
|
fild ds:dword ptr[sspan_t_u+ebx]
|
|
|
|
fld st(1) ; dv | du | dv
|
|
fmul ds:dword ptr[_d_sdivzstepv] ; dv*d_sdivzstepv | du | dv
|
|
fld st(1) ; du | dv*d_sdivzstepv | du | dv
|
|
fmul ds:dword ptr[_d_sdivzstepu] ; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
|
fld st(2) ; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
|
|
fmul ds:dword ptr[_d_tdivzstepu] ; du*d_tdivzstepu | du*d_sdivzstepu |
|
|
; dv*d_sdivzstepv | du | dv
|
|
fxch st(1) ; du*d_sdivzstepu | du*d_tdivzstepu |
|
|
; dv*d_sdivzstepv | du | dv
|
|
faddp st(2),st(0) ; du*d_tdivzstepu |
|
|
; du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
|
|
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; du*d_tdivzstepu | du | dv
|
|
fld st(3) ; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; du*d_tdivzstepu | du | dv
|
|
fmul ds:dword ptr[_d_tdivzstepv] ; dv*d_tdivzstepv |
|
|
; du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; du*d_tdivzstepu | du | dv
|
|
fxch st(1) ; du*d_sdivzstepu + dv*d_sdivzstepv |
|
|
; dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
|
|
fadd ds:dword ptr[_d_sdivzorigin] ; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
|
|
; du*d_sdivzstepu; stays in %st(2) at end
|
|
fxch st(4) ; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
|
|
; s/z
|
|
fmul ds:dword ptr[_d_zistepv] ; dv*d_zistepv | dv*d_tdivzstepv |
|
|
; du*d_tdivzstepu | du | s/z
|
|
fxch st(1) ; dv*d_tdivzstepv | dv*d_zistepv |
|
|
; du*d_tdivzstepu | du | s/z
|
|
faddp st(2),st(0) ; dv*d_zistepv |
|
|
; dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
|
|
fxch st(2) ; du | dv*d_tdivzstepv + du*d_tdivzstepu |
|
|
; dv*d_zistepv | s/z
|
|
fmul ds:dword ptr[_d_zistepu] ; du*d_zistepu |
|
|
; dv*d_tdivzstepv + du*d_tdivzstepu |
|
|
; dv*d_zistepv | s/z
|
|
fxch st(1) ; dv*d_tdivzstepv + du*d_tdivzstepu |
|
|
; du*d_zistepu | dv*d_zistepv | s/z
|
|
fadd ds:dword ptr[_d_tdivzorigin] ; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
|
|
; du*d_tdivzstepu; stays in %st(1) at end
|
|
fxch st(2) ; dv*d_zistepv | du*d_zistepu | t/z | s/z
|
|
faddp st(1),st(0) ; dv*d_zistepv + du*d_zistepu | t/z | s/z
|
|
|
|
fld ds:dword ptr[fp_64k] ; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
|
|
fxch st(1) ; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
|
|
fadd ds:dword ptr[_d_ziorigin] ; zi = d_ziorigin + dv*d_zistepv +
|
|
; du*d_zistepu; stays in %st(0) at end
|
|
; 1/z | fp_64k | t/z | s/z
|
|
|
|
fld st(0) ; FIXME: get rid of stall on FMUL?
|
|
fmul ds:dword ptr[fp_64kx64k]
|
|
fxch st(1)
|
|
|
|
;
|
|
; calculate and clamp s & t
|
|
;
|
|
fdiv st(2),st(0) ; 1/z | z*64k | t/z | s/z
|
|
fxch st(1)
|
|
|
|
fistp ds:dword ptr[izi] ; 0.32 fixed-point 1/z
|
|
mov ebp,ds:dword ptr[izi]
|
|
|
|
;
|
|
; set pz to point to the first z-buffer pixel in the span
|
|
;
|
|
ror ebp,16 ; put upper 16 bits in low word
|
|
mov eax,ds:dword ptr[sspan_t_v+ebx]
|
|
mov ds:dword ptr[izi],ebp
|
|
mov ebp,ds:dword ptr[sspan_t_u+ebx]
|
|
imul ds:dword ptr[_d_zrowbytes]
|
|
shl ebp,1 ; a word per pixel
|
|
add eax,ds:dword ptr[_d_pzbuffer]
|
|
add eax,ebp
|
|
mov ds:dword ptr[pz],eax
|
|
|
|
;
|
|
; point %edi to the first pixel in the span
|
|
;
|
|
mov ebp,ds:dword ptr[_d_viewbuffer]
|
|
mov eax,ds:dword ptr[sspan_t_v+ebx]
|
|
push ebx ; preserve spans pointer
|
|
mov edx,ds:dword ptr[_tadjust]
|
|
mov esi,ds:dword ptr[_sadjust]
|
|
mov edi,ds:dword ptr[_d_scantable+eax*4] ; v * screenwidth
|
|
add edi,ebp
|
|
mov ebp,ds:dword ptr[sspan_t_u+ebx]
|
|
add edi,ebp ; pdest = &pdestspan[scans->u];
|
|
|
|
;
|
|
; now start the FDIV for the end of the span
|
|
;
|
|
cmp ecx,8
|
|
ja LSetupNotLast1
|
|
|
|
dec ecx
|
|
jz LCleanup1 ; if only one pixel, no need to start an FDIV
|
|
mov ds:dword ptr[spancountminus1],ecx
|
|
|
|
; finish up the s and t calcs
|
|
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
|
fxch st(1) ; s | t | 1/z | t/z | s/z
|
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
|
|
|
fild ds:dword ptr[spancountminus1]
|
|
|
|
fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | spancountminus1
|
|
fld ds:dword ptr[_d_zistepu] ; _d_zistepu | _d_tdivzstepu | spancountminus1
|
|
fmul st(0),st(2) ; _d_zistepu*scm1 | _d_tdivzstepu | scm1
|
|
fxch st(1) ; _d_tdivzstepu | _d_zistepu*scm1 | scm1
|
|
fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
|
|
fxch st(2) ; scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
|
|
fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
|
|
; _d_tdivzstepu*scm1
|
|
fxch st(1) ; _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
|
|
; _d_tdivzstepu*scm1
|
|
faddp st(3),st(0) ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
|
|
fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
|
|
faddp st(3),st(0) ; _d_sdivzstepu*scm1
|
|
faddp st(3),st(0)
|
|
|
|
fld ds:dword ptr[fp_64k]
|
|
fdiv st(0),st(1) ; this is what we've gone to all this trouble to
|
|
; overlap
|
|
jmp LFDIVInFlight1
|
|
|
|
LCleanup1:
|
|
; finish up the s and t calcs
|
|
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
|
fxch st(1) ; s | t | 1/z | t/z | s/z
|
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
|
jmp LFDIVInFlight1
|
|
|
|
align 4
|
|
LSetupNotLast1:
|
|
; finish up the s and t calcs
|
|
fxch st(1) ; z*64k | 1/z | t/z | s/z
|
|
|
|
fld st(0) ; z*64k | z*64k | 1/z | t/z | s/z
|
|
fmul st(0),st(4) ; s | z*64k | 1/z | t/z | s/z
|
|
fxch st(1) ; z*64k | s | 1/z | t/z | s/z
|
|
fmul st(0),st(3) ; t | s | 1/z | t/z | s/z
|
|
fxch st(1) ; s | t | 1/z | t/z | s/z
|
|
fistp ds:dword ptr[s] ; 1/z | t | t/z | s/z
|
|
fistp ds:dword ptr[t] ; 1/z | t/z | s/z
|
|
|
|
fadd ds:dword ptr[zi8stepu]
|
|
fxch st(2)
|
|
fadd ds:dword ptr[sdivz8stepu]
|
|
fxch st(2)
|
|
fld ds:dword ptr[tdivz8stepu]
|
|
faddp st(2),st(0)
|
|
fld ds:dword ptr[fp_64k]
|
|
fdiv st(0),st(1) ; z = 1/1/z
|
|
; this is what we've gone to all this trouble to
|
|
; overlap
|
|
LFDIVInFlight1:
|
|
|
|
add esi,ds:dword ptr[s]
|
|
add edx,ds:dword ptr[t]
|
|
mov ebx,ds:dword ptr[_bbextents]
|
|
mov ebp,ds:dword ptr[_bbextentt]
|
|
cmp esi,ebx
|
|
ja LClampHighOrLow0
|
|
LClampReentry0:
|
|
mov ds:dword ptr[s],esi
|
|
mov ebx,ds:dword ptr[pbase]
|
|
shl esi,16
|
|
cmp edx,ebp
|
|
mov ds:dword ptr[sfracf],esi
|
|
ja LClampHighOrLow1
|
|
LClampReentry1:
|
|
mov ds:dword ptr[t],edx
|
|
mov esi,ds:dword ptr[s] ; sfrac = scans->sfrac;
|
|
shl edx,16
|
|
mov eax,ds:dword ptr[t] ; tfrac = scans->tfrac;
|
|
sar esi,16
|
|
mov ds:dword ptr[tfracf],edx
|
|
|
|
;
|
|
; calculate the texture starting address
|
|
;
|
|
sar eax,16
|
|
add esi,ebx
|
|
imul eax,ds:dword ptr[_cachewidth] ; (tfrac >> 16) * cachewidth
|
|
add esi,eax ; psource = pbase + (sfrac >> 16) +
|
|
; ((tfrac >> 16) * cachewidth);
|
|
|
|
;
|
|
; determine whether last span or not
|
|
;
|
|
cmp ecx,8
|
|
jna LLastSegment
|
|
|
|
;
|
|
; not the last segment; do full 8-wide segment
|
|
;
|
|
LNotLastSegment:
|
|
|
|
;
|
|
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
|
; get there
|
|
;
|
|
|
|
; pick up after the FDIV that was left in flight previously
|
|
|
|
fld st(0) ; duplicate it
|
|
fmul st(0),st(4) ; s = s/z * z
|
|
fxch st(1)
|
|
fmul st(0),st(3) ; t = t/z * z
|
|
fxch st(1)
|
|
fistp ds:dword ptr[snext]
|
|
fistp ds:dword ptr[tnext]
|
|
mov eax,ds:dword ptr[snext]
|
|
mov edx,ds:dword ptr[tnext]
|
|
|
|
sub ecx,8 ; count off this segments' pixels
|
|
mov ebp,ds:dword ptr[_sadjust]
|
|
push ecx ; remember count of remaining pixels
|
|
mov ecx,ds:dword ptr[_tadjust]
|
|
|
|
add ebp,eax
|
|
add ecx,edx
|
|
|
|
mov eax,ds:dword ptr[_bbextents]
|
|
mov edx,ds:dword ptr[_bbextentt]
|
|
|
|
cmp ebp,2048
|
|
jl LClampLow2
|
|
cmp ebp,eax
|
|
ja LClampHigh2
|
|
LClampReentry2:
|
|
|
|
cmp ecx,2048
|
|
jl LClampLow3
|
|
cmp ecx,edx
|
|
ja LClampHigh3
|
|
LClampReentry3:
|
|
|
|
mov ds:dword ptr[snext],ebp
|
|
mov ds:dword ptr[tnext],ecx
|
|
|
|
sub ebp,ds:dword ptr[s]
|
|
sub ecx,ds:dword ptr[t]
|
|
|
|
;
|
|
; set up advancetable
|
|
;
|
|
mov eax,ecx
|
|
mov edx,ebp
|
|
sar edx,19 ; sstep >>= 16;
|
|
mov ebx,ds:dword ptr[_cachewidth]
|
|
sar eax,19 ; tstep >>= 16;
|
|
jz LIsZero
|
|
imul eax,ebx ; (tstep >> 16) * cachewidth;
|
|
LIsZero:
|
|
add eax,edx ; add in sstep
|
|
; (tstep >> 16) * cachewidth + (sstep >> 16);
|
|
mov edx,ds:dword ptr[tfracf]
|
|
mov ds:dword ptr[advancetable+4],eax ; advance base in t
|
|
add eax,ebx ; ((tstep >> 16) + 1) * cachewidth +
|
|
; (sstep >> 16);
|
|
shl ebp,13 ; left-justify sstep fractional part
|
|
mov ds:dword ptr[sstep],ebp
|
|
mov ebx,ds:dword ptr[sfracf]
|
|
shl ecx,13 ; left-justify tstep fractional part
|
|
mov ds:dword ptr[advancetable],eax ; advance extra in t
|
|
mov ds:dword ptr[tstep],ecx
|
|
|
|
mov ecx,ds:dword ptr[pz]
|
|
mov ebp,ds:dword ptr[izi]
|
|
|
|
cmp bp,ds:word ptr[ecx]
|
|
jl Lp1
|
|
mov al,ds:byte ptr[esi] ; get first source texel
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp1
|
|
mov ds:word ptr[ecx],bp
|
|
mov ds:byte ptr[edi],al ; store first dest pixel
|
|
Lp1:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep] ; advance tfrac fractional part by tstep frac
|
|
|
|
sbb eax,eax ; turn tstep carry into -1 (0 if none)
|
|
add ebx,ds:dword ptr[sstep] ; advance sfrac fractional part by sstep frac
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4] ; point to next source texel
|
|
|
|
cmp bp,ds:word ptr[2+ecx]
|
|
jl Lp2
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp2
|
|
mov ds:word ptr[2+ecx],bp
|
|
mov ds:byte ptr[1+edi],al
|
|
Lp2:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
cmp bp,ds:word ptr[4+ecx]
|
|
jl Lp3
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp3
|
|
mov ds:word ptr[4+ecx],bp
|
|
mov ds:byte ptr[2+edi],al
|
|
Lp3:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
cmp bp,ds:word ptr[6+ecx]
|
|
jl Lp4
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp4
|
|
mov ds:word ptr[6+ecx],bp
|
|
mov ds:byte ptr[3+edi],al
|
|
Lp4:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
cmp bp,ds:word ptr[8+ecx]
|
|
jl Lp5
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp5
|
|
mov ds:word ptr[8+ecx],bp
|
|
mov ds:byte ptr[4+edi],al
|
|
Lp5:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
;
|
|
; start FDIV for end of next segment in flight, so it can overlap
|
|
;
|
|
pop eax
|
|
cmp eax,8 ; more than one segment after this?
|
|
ja LSetupNotLast2 ; yes
|
|
|
|
dec eax
|
|
jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV
|
|
mov ds:dword ptr[spancountminus1],eax
|
|
fild ds:dword ptr[spancountminus1]
|
|
|
|
fld ds:dword ptr[_d_zistepu] ; _d_zistepu | spancountminus1
|
|
fmul st(0),st(1) ; _d_zistepu*scm1 | scm1
|
|
fld ds:dword ptr[_d_tdivzstepu] ; _d_tdivzstepu | _d_zistepu*scm1 | scm1
|
|
fmul st(0),st(2) ; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
|
|
fxch st(1) ; _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
|
|
faddp st(3),st(0) ; _d_tdivzstepu*scm1 | scm1
|
|
fxch st(1) ; scm1 | _d_tdivzstepu*scm1
|
|
fmul ds:dword ptr[_d_sdivzstepu] ; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
|
|
fxch st(1) ; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
|
|
faddp st(3),st(0) ; _d_sdivzstepu*scm1
|
|
fld ds:dword ptr[fp_64k] ; 64k | _d_sdivzstepu*scm1
|
|
fxch st(1) ; _d_sdivzstepu*scm1 | 64k
|
|
faddp st(4),st(0) ; 64k
|
|
|
|
fdiv st(0),st(1) ; this is what we've gone to all this trouble to
|
|
; overlap
|
|
jmp LFDIVInFlight2
|
|
|
|
align 4
|
|
LSetupNotLast2:
|
|
fadd ds:dword ptr[zi8stepu]
|
|
fxch st(2)
|
|
fadd ds:dword ptr[sdivz8stepu]
|
|
fxch st(2)
|
|
fld ds:dword ptr[tdivz8stepu]
|
|
faddp st(2),st(0)
|
|
fld ds:dword ptr[fp_64k]
|
|
fdiv st(0),st(1) ; z = 1/1/z
|
|
; this is what we've gone to all this trouble to
|
|
; overlap
|
|
LFDIVInFlight2:
|
|
push eax
|
|
|
|
cmp bp,ds:word ptr[10+ecx]
|
|
jl Lp6
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp6
|
|
mov ds:word ptr[10+ecx],bp
|
|
mov ds:byte ptr[5+edi],al
|
|
Lp6:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
cmp bp,ds:word ptr[12+ecx]
|
|
jl Lp7
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp7
|
|
mov ds:word ptr[12+ecx],bp
|
|
mov ds:byte ptr[6+edi],al
|
|
Lp7:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
cmp bp,ds:word ptr[14+ecx]
|
|
jl Lp8
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp8
|
|
mov ds:word ptr[14+ecx],bp
|
|
mov ds:byte ptr[7+edi],al
|
|
Lp8:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
add edi,8
|
|
add ecx,16
|
|
mov ds:dword ptr[tfracf],edx
|
|
mov edx,ds:dword ptr[snext]
|
|
mov ds:dword ptr[sfracf],ebx
|
|
mov ebx,ds:dword ptr[tnext]
|
|
mov ds:dword ptr[s],edx
|
|
mov ds:dword ptr[t],ebx
|
|
|
|
mov ds:dword ptr[pz],ecx
|
|
mov ds:dword ptr[izi],ebp
|
|
|
|
pop ecx ; retrieve count
|
|
|
|
;
|
|
; determine whether last span or not
|
|
;
|
|
cmp ecx,8 ; are there multiple segments remaining?
|
|
ja LNotLastSegment ; yes
|
|
|
|
;
|
|
; last segment of scan
|
|
;
|
|
LLastSegment:
|
|
|
|
;
|
|
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
|
|
; get there. The number of pixels left is variable, and we want to land on the
|
|
; last pixel, not step one past it, so we can't run into arithmetic problems
|
|
;
|
|
test ecx,ecx
|
|
jz LNoSteps ; just draw the last pixel and we're done
|
|
|
|
; pick up after the FDIV that was left in flight previously
|
|
|
|
|
|
fld st(0) ; duplicate it
|
|
fmul st(0),st(4) ; s = s/z * z
|
|
fxch st(1)
|
|
fmul st(0),st(3) ; t = t/z * z
|
|
fxch st(1)
|
|
fistp ds:dword ptr[snext]
|
|
fistp ds:dword ptr[tnext]
|
|
|
|
mov ebx,ds:dword ptr[_tadjust]
|
|
mov eax,ds:dword ptr[_sadjust]
|
|
|
|
add eax,ds:dword ptr[snext]
|
|
add ebx,ds:dword ptr[tnext]
|
|
|
|
mov ebp,ds:dword ptr[_bbextents]
|
|
mov edx,ds:dword ptr[_bbextentt]
|
|
|
|
cmp eax,2048
|
|
jl LClampLow4
|
|
cmp eax,ebp
|
|
ja LClampHigh4
|
|
LClampReentry4:
|
|
mov ds:dword ptr[snext],eax
|
|
|
|
cmp ebx,2048
|
|
jl LClampLow5
|
|
cmp ebx,edx
|
|
ja LClampHigh5
|
|
LClampReentry5:
|
|
|
|
cmp ecx,1 ; don't bother
|
|
je LOnlyOneStep ; if two pixels in segment, there's only one step,
|
|
; of the segment length
|
|
sub eax,ds:dword ptr[s]
|
|
sub ebx,ds:dword ptr[t]
|
|
|
|
add eax,eax ; convert to 15.17 format so multiply by 1.31
|
|
add ebx,ebx ; reciprocal yields 16.48
|
|
imul ds:dword ptr[reciprocal_table-8+ecx*4] ; sstep = (snext - s) / (spancount-1)
|
|
mov ebp,edx
|
|
|
|
mov eax,ebx
|
|
imul ds:dword ptr[reciprocal_table-8+ecx*4] ; tstep = (tnext - t) / (spancount-1)
|
|
|
|
LSetEntryvec:
|
|
;
|
|
; set up advancetable
|
|
;
|
|
mov ebx,ds:dword ptr[spr8entryvec_table+ecx*4]
|
|
mov eax,edx
|
|
push ebx ; entry point into code for RET later
|
|
mov ecx,ebp
|
|
sar ecx,16 ; sstep >>= 16;
|
|
mov ebx,ds:dword ptr[_cachewidth]
|
|
sar edx,16 ; tstep >>= 16;
|
|
jz LIsZeroLast
|
|
imul edx,ebx ; (tstep >> 16) * cachewidth;
|
|
LIsZeroLast:
|
|
add edx,ecx ; add in sstep
|
|
; (tstep >> 16) * cachewidth + (sstep >> 16);
|
|
mov ecx,ds:dword ptr[tfracf]
|
|
mov ds:dword ptr[advancetable+4],edx ; advance base in t
|
|
add edx,ebx ; ((tstep >> 16) + 1) * cachewidth +
|
|
; (sstep >> 16);
|
|
shl ebp,16 ; left-justify sstep fractional part
|
|
mov ebx,ds:dword ptr[sfracf]
|
|
shl eax,16 ; left-justify tstep fractional part
|
|
mov ds:dword ptr[advancetable],edx ; advance extra in t
|
|
|
|
mov ds:dword ptr[tstep],eax
|
|
mov ds:dword ptr[sstep],ebp
|
|
mov edx,ecx
|
|
|
|
mov ecx,ds:dword ptr[pz]
|
|
mov ebp,ds:dword ptr[izi]
|
|
|
|
ret ; jump to the number-of-pixels handler
|
|
|
|
;----------------------------------------
|
|
|
|
LNoSteps:
|
|
mov ecx,ds:dword ptr[pz]
|
|
sub edi,7 ; adjust for hardwired offset
|
|
sub ecx,14
|
|
jmp LEndSpan
|
|
|
|
|
|
LOnlyOneStep:
|
|
sub eax,ds:dword ptr[s]
|
|
sub ebx,ds:dword ptr[t]
|
|
mov ebp,eax
|
|
mov edx,ebx
|
|
jmp LSetEntryvec
|
|
|
|
;----------------------------------------
|
|
|
|
public Spr8Entry2_8
|
|
Spr8Entry2_8:
|
|
sub edi,6 ; adjust for hardwired offsets
|
|
sub ecx,12
|
|
mov al,ds:byte ptr[esi]
|
|
jmp LLEntry2_8
|
|
|
|
;----------------------------------------
|
|
|
|
public Spr8Entry3_8
|
|
Spr8Entry3_8:
|
|
sub edi,5 ; adjust for hardwired offsets
|
|
sub ecx,10
|
|
jmp LLEntry3_8
|
|
|
|
;----------------------------------------
|
|
|
|
public Spr8Entry4_8
|
|
Spr8Entry4_8:
|
|
sub edi,4 ; adjust for hardwired offsets
|
|
sub ecx,8
|
|
jmp LLEntry4_8
|
|
|
|
;----------------------------------------
|
|
|
|
public Spr8Entry5_8
|
|
Spr8Entry5_8:
|
|
sub edi,3 ; adjust for hardwired offsets
|
|
sub ecx,6
|
|
jmp LLEntry5_8
|
|
|
|
;----------------------------------------
|
|
|
|
public Spr8Entry6_8
|
|
Spr8Entry6_8:
|
|
sub edi,2 ; adjust for hardwired offsets
|
|
sub ecx,4
|
|
jmp LLEntry6_8
|
|
|
|
;----------------------------------------
|
|
|
|
public Spr8Entry7_8
|
|
Spr8Entry7_8:
|
|
dec edi ; adjust for hardwired offsets
|
|
sub ecx,2
|
|
jmp LLEntry7_8
|
|
|
|
;----------------------------------------
|
|
|
|
public Spr8Entry8_8
|
|
Spr8Entry8_8:
|
|
cmp bp,ds:word ptr[ecx]
|
|
jl Lp9
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp9
|
|
mov ds:word ptr[ecx],bp
|
|
mov ds:byte ptr[edi],al
|
|
Lp9:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
LLEntry7_8:
|
|
cmp bp,ds:word ptr[2+ecx]
|
|
jl Lp10
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp10
|
|
mov ds:word ptr[2+ecx],bp
|
|
mov ds:byte ptr[1+edi],al
|
|
Lp10:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
LLEntry6_8:
|
|
cmp bp,ds:word ptr[4+ecx]
|
|
jl Lp11
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp11
|
|
mov ds:word ptr[4+ecx],bp
|
|
mov ds:byte ptr[2+edi],al
|
|
Lp11:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
LLEntry5_8:
|
|
cmp bp,ds:word ptr[6+ecx]
|
|
jl Lp12
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp12
|
|
mov ds:word ptr[6+ecx],bp
|
|
mov ds:byte ptr[3+edi],al
|
|
Lp12:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
LLEntry4_8:
|
|
cmp bp,ds:word ptr[8+ecx]
|
|
jl Lp13
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp13
|
|
mov ds:word ptr[8+ecx],bp
|
|
mov ds:byte ptr[4+edi],al
|
|
Lp13:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
LLEntry3_8:
|
|
cmp bp,ds:word ptr[10+ecx]
|
|
jl Lp14
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp14
|
|
mov ds:word ptr[10+ecx],bp
|
|
mov ds:byte ptr[5+edi],al
|
|
Lp14:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
LLEntry2_8:
|
|
cmp bp,ds:word ptr[12+ecx]
|
|
jl Lp15
|
|
mov al,ds:byte ptr[esi]
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp15
|
|
mov ds:word ptr[12+ecx],bp
|
|
mov ds:byte ptr[6+edi],al
|
|
Lp15:
|
|
add ebp,ds:dword ptr[izistep]
|
|
adc ebp,0
|
|
add edx,ds:dword ptr[tstep]
|
|
sbb eax,eax
|
|
add ebx,ds:dword ptr[sstep]
|
|
adc esi,ds:dword ptr[advancetable+4+eax*4]
|
|
|
|
LEndSpan:
|
|
cmp bp,ds:word ptr[14+ecx]
|
|
jl Lp16
|
|
mov al,ds:byte ptr[esi] ; load first texel in segment
|
|
cmp al,offset TRANSPARENT_COLOR
|
|
jz Lp16
|
|
mov ds:word ptr[14+ecx],bp
|
|
mov ds:byte ptr[7+edi],al
|
|
Lp16:
|
|
|
|
;
|
|
; clear s/z, t/z, 1/z from FP stack
|
|
;
|
|
fstp st(0)
|
|
fstp st(0)
|
|
fstp st(0)
|
|
|
|
pop ebx ; restore spans pointer
|
|
LNextSpan:
|
|
add ebx,offset sspan_t_size ; point to next span
|
|
mov ecx,ds:dword ptr[sspan_t_count+ebx]
|
|
cmp ecx,0 ; any more spans?
|
|
jg LSpanLoop ; yes
|
|
jz LNextSpan ; yes, but this one's empty
|
|
|
|
pop ebx ; restore register variables
|
|
pop esi
|
|
pop edi
|
|
pop ebp ; restore the caller's stack frame
|
|
ret
|
|
|
|
_TEXT ENDS
|
|
endif ; id386
|
|
END
|