This commit is contained in:
Rachael Alexanderson 2016-12-04 09:34:49 -05:00
commit 4196a4d055

View file

@ -837,233 +837,6 @@ _R_DrawFuzzColumnP_ASM:
ret
;*----------------------------------------------------------------------
;*
;* R_DrawColumnHorizP_ASM
;*
;*----------------------------------------------------------------------
GLOBAL @R_DrawColumnHorizP_ASM@0
GLOBAL _R_DrawColumnHorizP_ASM
GLOBAL R_DrawColumnHorizP_ASM
align 16
@R_DrawColumnHorizP_ASM@0:
_R_DrawColumnHorizP_ASM:
R_DrawColumnHorizP_ASM:
; count = dc_yh - dc_yl;
mov eax,[dc_yh]
mov ecx,[dc_yl]
sub eax,ecx
mov edx,[dc_x]
jl near .leave ; count < 0: nothing to do, so leave
push ebp ; save registers
push ebx
push edi
push esi
inc eax ; make 0 count mean 0 pixels
and edx,3
push eax
mov eax,[dc_temp]
mov esi,[dc_ctspan+edx*4]
add eax,edx
lea eax,[eax+ecx*4] ; eax = top of column in buffer
mov ebp,[dc_yh]
mov [esi],ecx
mov [esi+4],ebp
add esi,8
mov edi,[dc_source]
mov [dc_ctspan+edx*4],esi
mov esi,[dc_iscale]
mov ecx,[dc_texturefrac] ; ecx = frac
mov dl,[edi] ; load cache
mov ebx,[esp]
and ebx,0xfffffff8
jnz .mthan8
; Register usage in the following code is:
;
; eax: dest
; edi: source
; ecx: frac (16.16)
; esi: fracstep (16.16)
; ebx: add1
; ebp: add2
; dl: texel1
; dh: texel2
;[esp] count
; there are fewer than 8 pixels to draw
mov ebx,[esp]
.lthan8 shr ebx,1
jnc .even
; do one pixel before loop (little opportunity for pairing)
mov ebp,ecx ; copy frac to ebx
add ecx,esi ; increment frac
shr ebp,16 ; shift frac over to low end
add eax,4
mov dl,[edi+ebp]
mov [eax-4],dl
.even test ebx,ebx
jz near .done
.loop2 mov [esp],ebx ; save counter
mov ebx,ecx ; copy frac for texel1 to ebx
shr ebx,16 ; shift frac for texel1 to low end
add ecx,esi ; increment frac
mov ebp,ecx ; copy frac for texel2 to ebp
shr ebp,16 ; shift frac for texel2 to low end
add ecx,esi ; increment frac
mov dl,[edi+ebx] ; read texel1
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; read texel2
mov [eax],dl ; write texel1
mov [eax+4],dh ; write texel2
add eax,8 ; increment dest
dec ebx ; decrement counter
jnz .loop2 ; loop until it hits 0
jmp .done
; there are more than 8 pixels to draw. position eax as close to a 32 byte
; boundary as possible, then do whatever is left.
.mthan8 test eax,4
jz .try2
mov ebp,ecx ; frac: in ebp
add ecx,esi ; step
shr ebp,16 ; frac: shift
add eax,4 ; increment dest
mov ebx,[esp] ; fetch counter
mov dl,[edi+ebp] ; tex: read
dec ebx ; decrement counter
mov [eax-4],dl ; tex: write
mov [esp],ebx ; store counter
.try2 test eax,8
jz .try4
mov ebx,ecx ; frac1: in ebx
add ecx,esi ; step
shr ebx,16 ; frac1: shift
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; tex2: read
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
sub ebx,2 ; decrement counter
add eax,8 ; increment dest
mov [esp],ebx ; store counter
.try4 test eax,16
jz .try8
mov ebx,ecx ; frac1: in ebx
add ecx,esi ; step
shr ebx,16 ; frac1: shift
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,ecx ; frac3: in ebx
shr ebx,16 ; frac3: shift
mov dh,[edi+ebp] ; tex2: read
add ecx,esi ; step
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
mov ebp,ecx ; frac4: in ebp
shr ebp,16 ; frac4: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex3: read
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; tex4: read
sub ebx,4 ; decrement counter
mov [esp],ebx ; store counter
mov [eax+8],dl ; tex3: write
mov [eax+12],dh ; tex4: write
add eax,16 ; increment dest
.try8 mov ebx,[esp] ; make counter count groups of 8
sub esp,4
shr ebx,3
jmp .tail8
align 16
.loop8 mov [esp],ebx ; save counter
mov ebx,ecx ; frac1: in ebx
shr ebx,16 ; frac1: shift
add ecx,esi ; step
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,ecx ; frac3: in ebx
mov dh,[edi+ebp] ; tex2: read
shr ebx,16 ; frac3: shift
add ecx,esi ; step
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
mov ebp,ecx ; frac4: in ebp
shr ebp,16 ; frac4: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex3: read
mov ebx,ecx ; frac5: in ebx
mov dh,[edi+ebp] ; tex4: read
shr ebx,16 ; frac5: shift
mov [eax+8],dl ; tex3: write
mov [eax+12],dh ; tex4: write
add ecx,esi ; step
mov ebp,ecx ; frac6: in ebp
shr ebp,16 ; frac6: shift
mov dl,[edi+ebx] ; tex5: read
add ecx,esi ; step
mov ebx,ecx ; frac7: in ebx
mov [eax+16],dl ; tex5: write
shr ebx,16 ; frac7: shift
mov dh,[edi+ebp] ; tex6: read
add ecx,esi ; step
mov ebp,ecx ; frac8: in ebp
mov [eax+20],dh ; tex6: write
shr ebp,16 ; frac8: shift
add eax,32 ; increment dest pointer
mov dl,[edi+ebx] ; tex7: read
mov ebx,[esp] ; fetch counter
mov [eax-8],dl ; tex7: write
mov dh,[edi+ebp] ; tex8: read
add ecx,esi ; step
mov [eax-4],dh ; tex8: write
mov dl,[eax] ; load cache
dec ebx ; decrement counter
.tail8 jnz near .loop8 ; loop if more to do
pop ebp
mov ebx,[esp]
and ebx,7
jnz near .lthan8
.done pop eax
pop esi
pop edi
pop ebx
pop ebp
.leave ret
;*----------------------------------------------------------------------
;*
;* rt_copy1col_asm