- removed R_DrawColumnHorizP_ASM completely after discovering that the compiler generated code isn't really anything worse than the old assembly code. This looks like something that may have been relevant 10 years ago but today it looks like there's no need for hand optimization here anymore. And since it appears to be broken anyway, off this goes.

This commit is contained in:
Christoph Oelckers 2016-12-04 15:31:08 +01:00
parent d0cf34890c
commit 93163d12f1

View file

@ -837,233 +837,6 @@ _R_DrawFuzzColumnP_ASM:
ret ret
;*----------------------------------------------------------------------
;*
;* R_DrawColumnHorizP_ASM
;*
;*----------------------------------------------------------------------
GLOBAL @R_DrawColumnHorizP_ASM@0
GLOBAL _R_DrawColumnHorizP_ASM
GLOBAL R_DrawColumnHorizP_ASM
align 16
@R_DrawColumnHorizP_ASM@0:
_R_DrawColumnHorizP_ASM:
R_DrawColumnHorizP_ASM:
; count = dc_yh - dc_yl;
mov eax,[dc_yh]
mov ecx,[dc_yl]
sub eax,ecx
mov edx,[dc_x]
jl near .leave ; count < 0: nothing to do, so leave
push ebp ; save registers
push ebx
push edi
push esi
inc eax ; make 0 count mean 0 pixels
and edx,3
push eax
mov eax,[dc_temp]
mov esi,[dc_ctspan+edx*4]
add eax,edx
lea eax,[eax+ecx*4] ; eax = top of column in buffer
mov ebp,[dc_yh]
mov [esi],ecx
mov [esi+4],ebp
add esi,8
mov edi,[dc_source]
mov [dc_ctspan+edx*4],esi
mov esi,[dc_iscale]
mov ecx,[dc_texturefrac] ; ecx = frac
mov dl,[edi] ; load cache
mov ebx,[esp]
and ebx,0xfffffff8
jnz .mthan8
; Register usage in the following code is:
;
; eax: dest
; edi: source
; ecx: frac (16.16)
; esi: fracstep (16.16)
; ebx: add1
; ebp: add2
; dl: texel1
; dh: texel2
;[esp] count
; there are fewer than 8 pixels to draw
mov ebx,[esp]
.lthan8 shr ebx,1
jnc .even
; do one pixel before loop (little opportunity for pairing)
mov ebp,ecx ; copy frac to ebx
add ecx,esi ; increment frac
shr ebp,16 ; shift frac over to low end
add eax,4
mov dl,[edi+ebp]
mov [eax-4],dl
.even test ebx,ebx
jz near .done
.loop2 mov [esp],ebx ; save counter
mov ebx,ecx ; copy frac for texel1 to ebx
shr ebx,16 ; shift frac for texel1 to low end
add ecx,esi ; increment frac
mov ebp,ecx ; copy frac for texel2 to ebp
shr ebp,16 ; shift frac for texel2 to low end
add ecx,esi ; increment frac
mov dl,[edi+ebx] ; read texel1
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; read texel2
mov [eax],dl ; write texel1
mov [eax+4],dh ; write texel2
add eax,8 ; increment dest
dec ebx ; decrement counter
jnz .loop2 ; loop until it hits 0
jmp .done
; there are more than 8 pixels to draw. position eax as close to a 32 byte
; boundary as possible, then do whatever is left.
.mthan8 test eax,4
jz .try2
mov ebp,ecx ; frac: in ebp
add ecx,esi ; step
shr ebp,16 ; frac: shift
add eax,4 ; increment dest
mov ebx,[esp] ; fetch counter
mov dl,[edi+ebp] ; tex: read
dec ebx ; decrement counter
mov [eax-4],dl ; tex: write
mov [esp],ebx ; store counter
.try2 test eax,8
jz .try4
mov ebx,ecx ; frac1: in ebx
add ecx,esi ; step
shr ebx,16 ; frac1: shift
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; tex2: read
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
sub ebx,2 ; decrement counter
add eax,8 ; increment dest
mov [esp],ebx ; store counter
.try4 test eax,16
jz .try8
mov ebx,ecx ; frac1: in ebx
add ecx,esi ; step
shr ebx,16 ; frac1: shift
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,ecx ; frac3: in ebx
shr ebx,16 ; frac3: shift
mov dh,[edi+ebp] ; tex2: read
add ecx,esi ; step
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
mov ebp,ecx ; frac4: in ebp
shr ebp,16 ; frac4: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex3: read
mov ebx,[esp] ; fetch counter
mov dh,[edi+ebp] ; tex4: read
sub ebx,4 ; decrement counter
mov [esp],ebx ; store counter
mov [eax+8],dl ; tex3: write
mov [eax+12],dh ; tex4: write
add eax,16 ; increment dest
.try8 mov ebx,[esp] ; make counter count groups of 8
sub esp,4
shr ebx,3
jmp .tail8
align 16
.loop8 mov [esp],ebx ; save counter
mov ebx,ecx ; frac1: in ebx
shr ebx,16 ; frac1: shift
add ecx,esi ; step
mov ebp,ecx ; frac2: in ebp
shr ebp,16 ; frac2: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex1: read
mov ebx,ecx ; frac3: in ebx
mov dh,[edi+ebp] ; tex2: read
shr ebx,16 ; frac3: shift
add ecx,esi ; step
mov [eax],dl ; tex1: write
mov [eax+4],dh ; tex2: write
mov ebp,ecx ; frac4: in ebp
shr ebp,16 ; frac4: shift
add ecx,esi ; step
mov dl,[edi+ebx] ; tex3: read
mov ebx,ecx ; frac5: in ebx
mov dh,[edi+ebp] ; tex4: read
shr ebx,16 ; frac5: shift
mov [eax+8],dl ; tex3: write
mov [eax+12],dh ; tex4: write
add ecx,esi ; step
mov ebp,ecx ; frac6: in ebp
shr ebp,16 ; frac6: shift
mov dl,[edi+ebx] ; tex5: read
add ecx,esi ; step
mov ebx,ecx ; frac7: in ebx
mov [eax+16],dl ; tex5: write
shr ebx,16 ; frac7: shift
mov dh,[edi+ebp] ; tex6: read
add ecx,esi ; step
mov ebp,ecx ; frac8: in ebp
mov [eax+20],dh ; tex6: write
shr ebp,16 ; frac8: shift
add eax,32 ; increment dest pointer
mov dl,[edi+ebx] ; tex7: read
mov ebx,[esp] ; fetch counter
mov [eax-8],dl ; tex7: write
mov dh,[edi+ebp] ; tex8: read
add ecx,esi ; step
mov [eax-4],dh ; tex8: write
mov dl,[eax] ; load cache
dec ebx ; decrement counter
.tail8 jnz near .loop8 ; loop if more to do
pop ebp
mov ebx,[esp]
and ebx,7
jnz near .lthan8
.done pop eax
pop esi
pop edi
pop ebx
pop ebp
.leave ret
;*---------------------------------------------------------------------- ;*----------------------------------------------------------------------
;* ;*
;* rt_copy1col_asm ;* rt_copy1col_asm