- removed the asm version of R_DrawColumn because it doesn't provide any significant benefit.

On modern systems it is actually slower than the C version, only on old ones it is marginally faster - but the overall execution time for this function is so low that even in the worst case scenario the minor loss of performance on older systems is still not relevant.
This commit is contained in:
Christoph Oelckers 2016-12-04 18:05:34 +01:00
parent 8fd03bc4a3
commit 13efb34964
3 changed files with 13 additions and 132 deletions

View file

@ -598,106 +598,6 @@ dmsdone add esp,8
;*----------------------------------------------------------------------
;*
;* R_DrawColumnP
;*
;*----------------------------------------------------------------------
GLOBAL @R_DrawColumnP_ASM@0
GLOBAL _R_DrawColumnP_ASM
GLOBAL R_DrawColumnP_ASM
align 16
R_DrawColumnP_ASM:
_R_DrawColumnP_ASM:
@R_DrawColumnP_ASM@0:
; count = dc_yh - dc_yl;
mov ecx,[dc_count]
test ecx,ecx
jle near rdcpret ; count <= 0: nothing to do, so leave
push ebp ; save registers
push ebx
push edi
push esi
; dest = ylookup[dc_yl] + dc_x + dc_destorg;
mov edi,[dc_dest]
mov ebp,ecx
mov ebx,[dc_texturefrac] ; ebx = frac
rdcp1: sub edi,SPACEFILLER4
mov ecx,ebx
shr ecx,16
mov esi,[dc_source]
mov edx,[dc_iscale]
mov eax,[dc_colormap]
cmp BYTE [CPU+66],byte 5
jg rdcploop2
align 16
; The registers should now look like this:
;
; [31 .. 16][15 .. 8][7 .. 0]
; eax [colormap ]
; ebx [yi ][yf ]
; ecx [scratch ]
; edx [dyi ][dyf ]
; esi [source texture column ]
; edi [destination screen pointer ]
; ebp [counter ]
;
; Note the partial register stalls on anything better than a Pentium
; That's why there are two versions of this loop.
rdcploop:
mov cl,[esi+ecx] ; Fetch texel
xor ch,ch
add ebx,edx ; increment frac
rdcp2: add edi,SPACEFILLER4 ; increment destination pointer
mov cl,[eax+ecx] ; colormap texel
mov [edi],cl ; Store texel
mov ecx,ebx
shr ecx,16
dec ebp
jnz rdcploop ; loop
pop esi
pop edi
pop ebx
pop ebp
rdcpret:
ret
align 16
rdcploop2:
movzx ecx,byte [esi+ecx] ; Fetch texel
add ebx,edx ; increment frac
mov cl,[eax+ecx] ; colormap texel
rdcp3: add edi,SPACEFILLER4 ; increment destination pointer
mov [edi],cl ; Store texel
mov ecx,ebx
shr ecx,16
dec ebp
jnz rdcploop2 ; loop
pop esi
pop edi
pop ebx
pop ebp
ret
;*----------------------------------------------------------------------
;*
;* R_DrawFuzzColumnP
@ -1648,9 +1548,6 @@ ASM_PatchPitch:
_ASM_PatchPitch:
@ASM_PatchPitch@0:
mov eax,[dc_pitch]
mov [rdcp1+2],eax
mov [rdcp2+2],eax
mov [rdcp3+2],eax
mov [s4p+1],eax
mov [a4p+1],eax
mov [ac4p+1],eax

View file

@ -69,16 +69,11 @@ int scaledviewwidth;
// These get changed depending on the current
// screen depth and asm/no asm.
void (*R_DrawColumnHoriz)(void);
void (*R_DrawColumn)(void);
void (*R_DrawFuzzColumn)(void);
void (*R_DrawTranslatedColumn)(void);
void (*R_DrawShadedColumn)(void);
void (*R_DrawSpan)(void);
void (*R_DrawSpanMasked)(void);
void (*R_DrawSpanTranslucent)(void);
void (*R_DrawSpanMaskedTranslucent)(void);
void (*R_DrawSpanAddClamp)(void);
void (*R_DrawSpanMaskedAddClamp)(void);
void (*rt_map4cols)(int,int,int);
//
@ -171,7 +166,6 @@ void R_InitShadeMaps()
/* */
/************************************/
#ifndef X86_ASM
//
// A column is a vertical slice/span from a wall texture that,
// given the DOOM style restrictions on the view orientation,
@ -179,7 +173,7 @@ void R_InitShadeMaps()
// Thus a special case loop for very fast rendering can
// be used. It has also been used with Wolfenstein 3D.
//
void R_DrawColumnP_C (void)
void R_DrawColumn (void)
{
int count;
BYTE* dest;
@ -222,7 +216,7 @@ void R_DrawColumnP_C (void)
} while (--count);
}
}
#endif
// [RH] Just fills a column with a color
void R_FillColumnP (void)
@ -1192,7 +1186,7 @@ void R_DrawSpanMaskedP_C (void)
}
#endif
void R_DrawSpanTranslucentP_C (void)
void R_DrawSpanTranslucent (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -1252,7 +1246,7 @@ void R_DrawSpanTranslucentP_C (void)
}
}
void R_DrawSpanMaskedTranslucentP_C (void)
void R_DrawSpanMaskedTranslucent (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -1326,7 +1320,7 @@ void R_DrawSpanMaskedTranslucentP_C (void)
}
}
void R_DrawSpanAddClampP_C (void)
void R_DrawSpanAddClamp (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -1392,7 +1386,7 @@ void R_DrawSpanAddClampP_C (void)
}
}
void R_DrawSpanMaskedAddClampP_C (void)
void R_DrawSpanMaskedAddClamp (void)
{
dsfixed_t xfrac;
dsfixed_t yfrac;
@ -2550,12 +2544,10 @@ const BYTE *R_GetColumn (FTexture *tex, int col)
return tex->GetColumn (col, NULL);
}
// [RH] Initialize the column drawer pointers
void R_InitColumnDrawers ()
{
#ifdef X86_ASM
R_DrawColumn = R_DrawColumnP_ASM;
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
@ -2572,7 +2564,6 @@ void R_InitColumnDrawers ()
}
#else
R_DrawColumnHoriz = R_DrawColumnHorizP_C;
R_DrawColumn = R_DrawColumnP_C;
R_DrawFuzzColumn = R_DrawFuzzColumnP_C;
R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C;
R_DrawShadedColumn = R_DrawShadedColumnP_C;
@ -2580,10 +2571,6 @@ void R_InitColumnDrawers ()
R_DrawSpanMasked = R_DrawSpanMaskedP_C;
rt_map4cols = rt_map4cols_c;
#endif
R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C;
R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C;
R_DrawSpanAddClamp = R_DrawSpanAddClampP_C;
R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C;
}
// [RH] Choose column drawers in a single place

View file

@ -65,7 +65,6 @@ extern "C" unsigned int horizspans[4];
// The span blitting interface.
// Hook in assembler or system specific BLT here.
extern void (*R_DrawColumn)(void);
extern DWORD (*dovline1) ();
extern DWORD (*doprevline1) ();
@ -103,16 +102,16 @@ void R_SetSpanSource(const BYTE *pixels);
extern void (*R_DrawSpanMasked)(void);
// Span drawing for translucent textures.
extern void (*R_DrawSpanTranslucent)(void);
void R_DrawSpanTranslucent(void);
// Span drawing for masked, translucent textures.
extern void (*R_DrawSpanMaskedTranslucent)(void);
void R_DrawSpanMaskedTranslucent(void);
// Span drawing for translucent, additive textures.
extern void (*R_DrawSpanAddClamp)(void);
void R_DrawSpanAddClamp(void);
// Span drawing for masked, translucent, additive textures.
extern void (*R_DrawSpanMaskedAddClamp)(void);
void R_DrawSpanMaskedAddClamp(void);
// [RH] Span blit into an interleaved intermediate buffer
extern void (*R_DrawColumnHoriz)(void);
@ -193,8 +192,6 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip);
#ifdef X86_ASM
extern "C" void R_DrawColumnP_Unrolled (void);
extern "C" void R_DrawColumnP_ASM (void);
extern "C" void R_DrawFuzzColumnP_ASM (void);
void R_DrawShadedColumnP_C (void);
extern "C" void R_DrawSpanP_ASM (void);
@ -204,7 +201,6 @@ void R_DrawColumnHorizP_C(void);
#else
void R_DrawColumnP_C (void);
void R_DrawFuzzColumnP_C (void);
void R_DrawShadedColumnP_C (void);
void R_DrawSpanP_C (void);
@ -212,10 +208,11 @@ void R_DrawSpanMaskedP_C (void);
#endif
void R_DrawColumn();
void R_DrawColumnHorizP_C(void);
void R_DrawTranslatedColumnP_C(void);
void R_DrawSpanTranslucentP_C (void);
void R_DrawSpanMaskedTranslucentP_C (void);
void R_DrawSpanTranslucent (void);
void R_DrawSpanMaskedTranslucent (void);
void R_DrawTlatedLucentColumnP_C (void);
#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C