From 93163d12f121704a0c70a915e5c56d500524fcad Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Sun, 4 Dec 2016 15:31:08 +0100 Subject: [PATCH] - removed R_DrawColumnHorizP_ASM completely after discovering that the compiler generated code isn't really anything worse than the old assembly code. This looks like something that may have been relevant 10 years ago but today it looks like there's no need for hand optimization here anymore. And since it appears to be broken anyway, off this goes. --- src/asm_ia32/tmap.asm | 227 ------------------------------------------ 1 file changed, 227 deletions(-) diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm index fb372d488..d9e689ee1 100644 --- a/src/asm_ia32/tmap.asm +++ b/src/asm_ia32/tmap.asm @@ -837,233 +837,6 @@ _R_DrawFuzzColumnP_ASM: ret -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnHorizP_ASM -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnHorizP_ASM@0 -GLOBAL _R_DrawColumnHorizP_ASM -GLOBAL R_DrawColumnHorizP_ASM - - align 16 - -@R_DrawColumnHorizP_ASM@0: -_R_DrawColumnHorizP_ASM: -R_DrawColumnHorizP_ASM: - -; count = dc_yh - dc_yl; - - mov eax,[dc_yh] - mov ecx,[dc_yl] - sub eax,ecx - mov edx,[dc_x] - - jl near .leave ; count < 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - - inc eax ; make 0 count mean 0 pixels - and edx,3 - push eax - mov eax,[dc_temp] - mov esi,[dc_ctspan+edx*4] - add eax,edx - lea eax,[eax+ecx*4] ; eax = top of column in buffer - mov ebp,[dc_yh] - mov [esi],ecx - mov [esi+4],ebp - add esi,8 - mov edi,[dc_source] - mov [dc_ctspan+edx*4],esi - mov esi,[dc_iscale] - mov ecx,[dc_texturefrac] ; ecx = frac - mov dl,[edi] ; load cache - mov ebx,[esp] - and ebx,0xfffffff8 - jnz .mthan8 - -; Register usage in the following code is: -; -; eax: dest -; edi: source -; ecx: frac (16.16) -; esi: fracstep (16.16) -; ebx: add1 -; ebp: add2 -; dl: texel1 -; dh: texel2 -;[esp] count - -; there are fewer than 8 pixels to draw - - mov ebx,[esp] -.lthan8 shr ebx,1 - jnc .even - -; do one pixel before loop (little opportunity for pairing) - - mov ebp,ecx ; copy frac to ebx - add ecx,esi ; increment frac - shr ebp,16 ; shift frac over to low end - add eax,4 - mov dl,[edi+ebp] - mov [eax-4],dl - -.even test ebx,ebx - jz near .done - -.loop2 mov [esp],ebx ; save counter - mov ebx,ecx ; copy frac for texel1 to ebx - shr ebx,16 ; shift frac for texel1 to low end - add ecx,esi ; increment frac - mov ebp,ecx ; copy frac for texel2 to ebp - shr ebp,16 ; shift frac for texel2 to low end - add ecx,esi ; increment frac - mov dl,[edi+ebx] ; read texel1 - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; read texel2 - mov [eax],dl ; write texel1 - mov [eax+4],dh ; write texel2 - add eax,8 ; increment dest - dec ebx ; decrement counter - jnz .loop2 ; loop until it hits 0 - - jmp .done - -; there are more than 8 pixels to draw. position eax as close to a 32 byte -; boundary as possible, then do whatever is left. - -.mthan8 test eax,4 - jz .try2 - - mov ebp,ecx ; frac: in ebp - add ecx,esi ; step - shr ebp,16 ; frac: shift - add eax,4 ; increment dest - mov ebx,[esp] ; fetch counter - mov dl,[edi+ebp] ; tex: read - dec ebx ; decrement counter - mov [eax-4],dl ; tex: write - mov [esp],ebx ; store counter - -.try2 test eax,8 - jz .try4 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex2: read - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - sub ebx,2 ; decrement counter - add eax,8 ; increment dest - mov [esp],ebx ; store counter - -.try4 test eax,16 - jz .try8 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - shr ebx,16 ; frac3: shift - mov dh,[edi+ebp] ; tex2: read - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex4: read - sub ebx,4 ; decrement counter - mov [esp],ebx ; store counter - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add eax,16 ; increment dest - -.try8 mov ebx,[esp] ; make counter count groups of 8 - sub esp,4 - shr ebx,3 - jmp .tail8 - - align 16 - -.loop8 mov [esp],ebx ; save counter - mov ebx,ecx ; frac1: in ebx - shr ebx,16 ; frac1: shift - add ecx,esi ; step - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - mov dh,[edi+ebp] ; tex2: read - shr ebx,16 ; frac3: shift - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,ecx ; frac5: in ebx - mov dh,[edi+ebp] ; tex4: read - shr ebx,16 ; frac5: shift - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add ecx,esi ; step - mov ebp,ecx ; frac6: in ebp - shr ebp,16 ; frac6: shift - mov dl,[edi+ebx] ; tex5: read - add ecx,esi ; step - mov ebx,ecx ; frac7: in ebx - mov [eax+16],dl ; tex5: write - shr ebx,16 ; frac7: shift - mov dh,[edi+ebp] ; tex6: read - add ecx,esi ; step - mov ebp,ecx ; frac8: in ebp - mov [eax+20],dh ; tex6: write - shr ebp,16 ; frac8: shift - add eax,32 ; increment dest pointer - mov dl,[edi+ebx] ; tex7: read - mov ebx,[esp] ; fetch counter - mov [eax-8],dl ; tex7: write - mov dh,[edi+ebp] ; tex8: read - add ecx,esi ; step - mov [eax-4],dh ; tex8: write - mov dl,[eax] ; load cache - dec ebx ; decrement counter -.tail8 jnz near .loop8 ; loop if more to do - - pop ebp - mov ebx,[esp] - and ebx,7 - jnz near .lthan8 - -.done pop eax - pop esi - pop edi - pop ebx - pop ebp -.leave ret - - ;*---------------------------------------------------------------------- ;* ;* rt_copy1col_asm