mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-25 05:31:00 +00:00
- removed R_DrawColumnHorizP_ASM completely after discovering that the compiler generated code isn't really anything worse than the old assembly code. This looks like something that may have been relevant 10 years ago but today it looks like there's no need for hand optimization here anymore. And since it appears to be broken anyway, off this goes.
This commit is contained in:
parent
d0cf34890c
commit
93163d12f1
1 changed files with 0 additions and 227 deletions
|
@ -837,233 +837,6 @@ _R_DrawFuzzColumnP_ASM:
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
;*----------------------------------------------------------------------
|
|
||||||
;*
|
|
||||||
;* R_DrawColumnHorizP_ASM
|
|
||||||
;*
|
|
||||||
;*----------------------------------------------------------------------
|
|
||||||
|
|
||||||
GLOBAL @R_DrawColumnHorizP_ASM@0
|
|
||||||
GLOBAL _R_DrawColumnHorizP_ASM
|
|
||||||
GLOBAL R_DrawColumnHorizP_ASM
|
|
||||||
|
|
||||||
align 16
|
|
||||||
|
|
||||||
@R_DrawColumnHorizP_ASM@0:
|
|
||||||
_R_DrawColumnHorizP_ASM:
|
|
||||||
R_DrawColumnHorizP_ASM:
|
|
||||||
|
|
||||||
; count = dc_yh - dc_yl;
|
|
||||||
|
|
||||||
mov eax,[dc_yh]
|
|
||||||
mov ecx,[dc_yl]
|
|
||||||
sub eax,ecx
|
|
||||||
mov edx,[dc_x]
|
|
||||||
|
|
||||||
jl near .leave ; count < 0: nothing to do, so leave
|
|
||||||
|
|
||||||
push ebp ; save registers
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
push esi
|
|
||||||
|
|
||||||
inc eax ; make 0 count mean 0 pixels
|
|
||||||
and edx,3
|
|
||||||
push eax
|
|
||||||
mov eax,[dc_temp]
|
|
||||||
mov esi,[dc_ctspan+edx*4]
|
|
||||||
add eax,edx
|
|
||||||
lea eax,[eax+ecx*4] ; eax = top of column in buffer
|
|
||||||
mov ebp,[dc_yh]
|
|
||||||
mov [esi],ecx
|
|
||||||
mov [esi+4],ebp
|
|
||||||
add esi,8
|
|
||||||
mov edi,[dc_source]
|
|
||||||
mov [dc_ctspan+edx*4],esi
|
|
||||||
mov esi,[dc_iscale]
|
|
||||||
mov ecx,[dc_texturefrac] ; ecx = frac
|
|
||||||
mov dl,[edi] ; load cache
|
|
||||||
mov ebx,[esp]
|
|
||||||
and ebx,0xfffffff8
|
|
||||||
jnz .mthan8
|
|
||||||
|
|
||||||
; Register usage in the following code is:
|
|
||||||
;
|
|
||||||
; eax: dest
|
|
||||||
; edi: source
|
|
||||||
; ecx: frac (16.16)
|
|
||||||
; esi: fracstep (16.16)
|
|
||||||
; ebx: add1
|
|
||||||
; ebp: add2
|
|
||||||
; dl: texel1
|
|
||||||
; dh: texel2
|
|
||||||
;[esp] count
|
|
||||||
|
|
||||||
; there are fewer than 8 pixels to draw
|
|
||||||
|
|
||||||
mov ebx,[esp]
|
|
||||||
.lthan8 shr ebx,1
|
|
||||||
jnc .even
|
|
||||||
|
|
||||||
; do one pixel before loop (little opportunity for pairing)
|
|
||||||
|
|
||||||
mov ebp,ecx ; copy frac to ebx
|
|
||||||
add ecx,esi ; increment frac
|
|
||||||
shr ebp,16 ; shift frac over to low end
|
|
||||||
add eax,4
|
|
||||||
mov dl,[edi+ebp]
|
|
||||||
mov [eax-4],dl
|
|
||||||
|
|
||||||
.even test ebx,ebx
|
|
||||||
jz near .done
|
|
||||||
|
|
||||||
.loop2 mov [esp],ebx ; save counter
|
|
||||||
mov ebx,ecx ; copy frac for texel1 to ebx
|
|
||||||
shr ebx,16 ; shift frac for texel1 to low end
|
|
||||||
add ecx,esi ; increment frac
|
|
||||||
mov ebp,ecx ; copy frac for texel2 to ebp
|
|
||||||
shr ebp,16 ; shift frac for texel2 to low end
|
|
||||||
add ecx,esi ; increment frac
|
|
||||||
mov dl,[edi+ebx] ; read texel1
|
|
||||||
mov ebx,[esp] ; fetch counter
|
|
||||||
mov dh,[edi+ebp] ; read texel2
|
|
||||||
mov [eax],dl ; write texel1
|
|
||||||
mov [eax+4],dh ; write texel2
|
|
||||||
add eax,8 ; increment dest
|
|
||||||
dec ebx ; decrement counter
|
|
||||||
jnz .loop2 ; loop until it hits 0
|
|
||||||
|
|
||||||
jmp .done
|
|
||||||
|
|
||||||
; there are more than 8 pixels to draw. position eax as close to a 32 byte
|
|
||||||
; boundary as possible, then do whatever is left.
|
|
||||||
|
|
||||||
.mthan8 test eax,4
|
|
||||||
jz .try2
|
|
||||||
|
|
||||||
mov ebp,ecx ; frac: in ebp
|
|
||||||
add ecx,esi ; step
|
|
||||||
shr ebp,16 ; frac: shift
|
|
||||||
add eax,4 ; increment dest
|
|
||||||
mov ebx,[esp] ; fetch counter
|
|
||||||
mov dl,[edi+ebp] ; tex: read
|
|
||||||
dec ebx ; decrement counter
|
|
||||||
mov [eax-4],dl ; tex: write
|
|
||||||
mov [esp],ebx ; store counter
|
|
||||||
|
|
||||||
.try2 test eax,8
|
|
||||||
jz .try4
|
|
||||||
|
|
||||||
mov ebx,ecx ; frac1: in ebx
|
|
||||||
add ecx,esi ; step
|
|
||||||
shr ebx,16 ; frac1: shift
|
|
||||||
mov ebp,ecx ; frac2: in ebp
|
|
||||||
shr ebp,16 ; frac2: shift
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov dl,[edi+ebx] ; tex1: read
|
|
||||||
mov ebx,[esp] ; fetch counter
|
|
||||||
mov dh,[edi+ebp] ; tex2: read
|
|
||||||
mov [eax],dl ; tex1: write
|
|
||||||
mov [eax+4],dh ; tex2: write
|
|
||||||
sub ebx,2 ; decrement counter
|
|
||||||
add eax,8 ; increment dest
|
|
||||||
mov [esp],ebx ; store counter
|
|
||||||
|
|
||||||
.try4 test eax,16
|
|
||||||
jz .try8
|
|
||||||
|
|
||||||
mov ebx,ecx ; frac1: in ebx
|
|
||||||
add ecx,esi ; step
|
|
||||||
shr ebx,16 ; frac1: shift
|
|
||||||
mov ebp,ecx ; frac2: in ebp
|
|
||||||
shr ebp,16 ; frac2: shift
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov dl,[edi+ebx] ; tex1: read
|
|
||||||
mov ebx,ecx ; frac3: in ebx
|
|
||||||
shr ebx,16 ; frac3: shift
|
|
||||||
mov dh,[edi+ebp] ; tex2: read
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov [eax],dl ; tex1: write
|
|
||||||
mov [eax+4],dh ; tex2: write
|
|
||||||
mov ebp,ecx ; frac4: in ebp
|
|
||||||
shr ebp,16 ; frac4: shift
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov dl,[edi+ebx] ; tex3: read
|
|
||||||
mov ebx,[esp] ; fetch counter
|
|
||||||
mov dh,[edi+ebp] ; tex4: read
|
|
||||||
sub ebx,4 ; decrement counter
|
|
||||||
mov [esp],ebx ; store counter
|
|
||||||
mov [eax+8],dl ; tex3: write
|
|
||||||
mov [eax+12],dh ; tex4: write
|
|
||||||
add eax,16 ; increment dest
|
|
||||||
|
|
||||||
.try8 mov ebx,[esp] ; make counter count groups of 8
|
|
||||||
sub esp,4
|
|
||||||
shr ebx,3
|
|
||||||
jmp .tail8
|
|
||||||
|
|
||||||
align 16
|
|
||||||
|
|
||||||
.loop8 mov [esp],ebx ; save counter
|
|
||||||
mov ebx,ecx ; frac1: in ebx
|
|
||||||
shr ebx,16 ; frac1: shift
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov ebp,ecx ; frac2: in ebp
|
|
||||||
shr ebp,16 ; frac2: shift
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov dl,[edi+ebx] ; tex1: read
|
|
||||||
mov ebx,ecx ; frac3: in ebx
|
|
||||||
mov dh,[edi+ebp] ; tex2: read
|
|
||||||
shr ebx,16 ; frac3: shift
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov [eax],dl ; tex1: write
|
|
||||||
mov [eax+4],dh ; tex2: write
|
|
||||||
mov ebp,ecx ; frac4: in ebp
|
|
||||||
shr ebp,16 ; frac4: shift
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov dl,[edi+ebx] ; tex3: read
|
|
||||||
mov ebx,ecx ; frac5: in ebx
|
|
||||||
mov dh,[edi+ebp] ; tex4: read
|
|
||||||
shr ebx,16 ; frac5: shift
|
|
||||||
mov [eax+8],dl ; tex3: write
|
|
||||||
mov [eax+12],dh ; tex4: write
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov ebp,ecx ; frac6: in ebp
|
|
||||||
shr ebp,16 ; frac6: shift
|
|
||||||
mov dl,[edi+ebx] ; tex5: read
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov ebx,ecx ; frac7: in ebx
|
|
||||||
mov [eax+16],dl ; tex5: write
|
|
||||||
shr ebx,16 ; frac7: shift
|
|
||||||
mov dh,[edi+ebp] ; tex6: read
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov ebp,ecx ; frac8: in ebp
|
|
||||||
mov [eax+20],dh ; tex6: write
|
|
||||||
shr ebp,16 ; frac8: shift
|
|
||||||
add eax,32 ; increment dest pointer
|
|
||||||
mov dl,[edi+ebx] ; tex7: read
|
|
||||||
mov ebx,[esp] ; fetch counter
|
|
||||||
mov [eax-8],dl ; tex7: write
|
|
||||||
mov dh,[edi+ebp] ; tex8: read
|
|
||||||
add ecx,esi ; step
|
|
||||||
mov [eax-4],dh ; tex8: write
|
|
||||||
mov dl,[eax] ; load cache
|
|
||||||
dec ebx ; decrement counter
|
|
||||||
.tail8 jnz near .loop8 ; loop if more to do
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
mov ebx,[esp]
|
|
||||||
and ebx,7
|
|
||||||
jnz near .lthan8
|
|
||||||
|
|
||||||
.done pop eax
|
|
||||||
pop esi
|
|
||||||
pop edi
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
.leave ret
|
|
||||||
|
|
||||||
|
|
||||||
;*----------------------------------------------------------------------
|
;*----------------------------------------------------------------------
|
||||||
;*
|
;*
|
||||||
;* rt_copy1col_asm
|
;* rt_copy1col_asm
|
||||||
|
|
Loading…
Reference in a new issue