From 13efb349646e8de132b78e58d9b703c94758113b Mon Sep 17 00:00:00 2001
From: Christoph Oelckers <c.oelckers@users.noreply.github.com>
Date: Sun, 4 Dec 2016 18:05:34 +0100
Subject: [PATCH] - removed the asm version of R_DrawColumn because it doesn't
 provide any significant benefit.

On modern systems it is actually slower than the C version, only on old ones it is marginally faster - but the overall execution time for this function is so low that even in the worst case scenario the minor loss of performance on older systems is still not relevant.
---
 src/asm_ia32/tmap.asm | 103 ------------------------------------------
 src/r_draw.cpp        |  25 +++-------
 src/r_draw.h          |  17 +++----
 3 files changed, 13 insertions(+), 132 deletions(-)

diff --git a/src/asm_ia32/tmap.asm b/src/asm_ia32/tmap.asm
index d9e689ee1..4ffcc370d 100644
--- a/src/asm_ia32/tmap.asm
+++ b/src/asm_ia32/tmap.asm
@@ -598,106 +598,6 @@ dmsdone	add esp,8
 
 
 
-;*----------------------------------------------------------------------
-;*
-;* R_DrawColumnP
-;*
-;*----------------------------------------------------------------------
-
-GLOBAL	@R_DrawColumnP_ASM@0
-GLOBAL	_R_DrawColumnP_ASM
-GLOBAL	R_DrawColumnP_ASM
-
-	align 16
-
-R_DrawColumnP_ASM:
-_R_DrawColumnP_ASM:
-@R_DrawColumnP_ASM@0:
-
-; count = dc_yh - dc_yl;
-
-	mov	ecx,[dc_count]
-	test	ecx,ecx
-	jle	near rdcpret		; count <= 0: nothing to do, so leave
-
-	push	ebp			; save registers
-	 push	ebx
-	push	edi
-	 push	esi
-
-; dest = ylookup[dc_yl] + dc_x + dc_destorg;
-
-	mov	edi,[dc_dest]
-	mov	ebp,ecx
-	mov	ebx,[dc_texturefrac]	; ebx = frac
-rdcp1:	sub	edi,SPACEFILLER4
-	mov	ecx,ebx
-	shr	ecx,16
-	mov	esi,[dc_source]
-	mov	edx,[dc_iscale]
-	mov	eax,[dc_colormap]
-
-	cmp	BYTE [CPU+66],byte 5
-	jg	rdcploop2
-
-	align 16
-
-; The registers should now look like this:
-;
-;	[31  ..  16][15 .. 8][7 .. 0]
-; eax	[colormap		    ]
-; ebx	[yi	   ][yf		    ]
-; ecx	[scratch		    ]
-; edx	[dyi	   ][dyf	    ]
-; esi	[source texture column	    ]
-; edi	[destination screen pointer ]
-; ebp	[counter		    ]
-;
-
-
-; Note the partial register stalls on anything better than a Pentium
-; That's why there are two versions of this loop.
-
-rdcploop:
-	mov	cl,[esi+ecx]		; Fetch texel
-	 xor	ch,ch
-	add	ebx,edx			; increment frac
-rdcp2:	 add	edi,SPACEFILLER4	; increment destination pointer
-	mov	cl,[eax+ecx]		; colormap texel
-	mov	[edi],cl		; Store texel
-	 mov	ecx,ebx
-	shr	ecx,16
-	 dec	ebp
-	jnz	rdcploop		; loop
-
-	pop	esi
-	 pop	edi
-	pop	ebx
-	 pop	ebp
-rdcpret:
-	ret
-	
-	align 16
-
-rdcploop2:
-	movzx	ecx,byte [esi+ecx]	; Fetch texel
-	add	ebx,edx			; increment frac
-	mov	cl,[eax+ecx]		; colormap texel
-rdcp3:	add	edi,SPACEFILLER4	; increment destination pointer
-	mov	[edi],cl		; Store texel
-	mov	ecx,ebx
-	shr	ecx,16
-	dec	ebp
-	jnz	rdcploop2		; loop
-
-	pop	esi
-	pop	edi
-	pop	ebx
-	pop	ebp
-	ret
-	
-
-
 ;*----------------------------------------------------------------------
 ;*
 ;* R_DrawFuzzColumnP
@@ -1648,9 +1548,6 @@ ASM_PatchPitch:
 _ASM_PatchPitch:
 @ASM_PatchPitch@0:
 		mov		eax,[dc_pitch]
-		mov		[rdcp1+2],eax
-		mov		[rdcp2+2],eax
-		mov		[rdcp3+2],eax
 		mov		[s4p+1],eax
 		mov		[a4p+1],eax
 		mov		[ac4p+1],eax
diff --git a/src/r_draw.cpp b/src/r_draw.cpp
index 099904f25..7d8ebe036 100644
--- a/src/r_draw.cpp
+++ b/src/r_draw.cpp
@@ -69,16 +69,11 @@ int 			scaledviewwidth;
 //		These get changed depending on the current
 //		screen depth and asm/no asm.
 void (*R_DrawColumnHoriz)(void);
-void (*R_DrawColumn)(void);
 void (*R_DrawFuzzColumn)(void);
 void (*R_DrawTranslatedColumn)(void);
 void (*R_DrawShadedColumn)(void);
 void (*R_DrawSpan)(void);
 void (*R_DrawSpanMasked)(void);
-void (*R_DrawSpanTranslucent)(void);
-void (*R_DrawSpanMaskedTranslucent)(void);
-void (*R_DrawSpanAddClamp)(void);
-void (*R_DrawSpanMaskedAddClamp)(void);
 void (*rt_map4cols)(int,int,int);
 
 //
@@ -171,7 +166,6 @@ void R_InitShadeMaps()
 /*									*/
 /************************************/
 
-#ifndef	X86_ASM
 //
 // A column is a vertical slice/span from a wall texture that,
 //	given the DOOM style restrictions on the view orientation,
@@ -179,7 +173,7 @@ void R_InitShadeMaps()
 // Thus a special case loop for very fast rendering can
 //	be used. It has also been used with Wolfenstein 3D.
 // 
-void R_DrawColumnP_C (void)
+void R_DrawColumn (void)
 {
 	int 				count;
 	BYTE*				dest;
@@ -222,7 +216,7 @@ void R_DrawColumnP_C (void)
 		} while (--count);
 	}
 } 
-#endif
+
 
 // [RH] Just fills a column with a color
 void R_FillColumnP (void)
@@ -1192,7 +1186,7 @@ void R_DrawSpanMaskedP_C (void)
 }
 #endif
 
-void R_DrawSpanTranslucentP_C (void)
+void R_DrawSpanTranslucent (void)
 {
 	dsfixed_t			xfrac;
 	dsfixed_t			yfrac;
@@ -1252,7 +1246,7 @@ void R_DrawSpanTranslucentP_C (void)
 	}
 }
 
-void R_DrawSpanMaskedTranslucentP_C (void)
+void R_DrawSpanMaskedTranslucent (void)
 {
 	dsfixed_t			xfrac;
 	dsfixed_t			yfrac;
@@ -1326,7 +1320,7 @@ void R_DrawSpanMaskedTranslucentP_C (void)
 	}
 }
 
-void R_DrawSpanAddClampP_C (void)
+void R_DrawSpanAddClamp (void)
 {
 	dsfixed_t			xfrac;
 	dsfixed_t			yfrac;
@@ -1392,7 +1386,7 @@ void R_DrawSpanAddClampP_C (void)
 	}
 }
 
-void R_DrawSpanMaskedAddClampP_C (void)
+void R_DrawSpanMaskedAddClamp (void)
 {
 	dsfixed_t			xfrac;
 	dsfixed_t			yfrac;
@@ -2550,12 +2544,10 @@ const BYTE *R_GetColumn (FTexture *tex, int col)
 	return tex->GetColumn (col, NULL);
 }
 
-
 // [RH] Initialize the column drawer pointers
 void R_InitColumnDrawers ()
 {
 #ifdef X86_ASM
-	R_DrawColumn				= R_DrawColumnP_ASM;
 	R_DrawColumnHoriz			= R_DrawColumnHorizP_C;
 	R_DrawFuzzColumn			= R_DrawFuzzColumnP_ASM;
 	R_DrawTranslatedColumn		= R_DrawTranslatedColumnP_C;
@@ -2572,7 +2564,6 @@ void R_InitColumnDrawers ()
 	}
 #else
 	R_DrawColumnHoriz			= R_DrawColumnHorizP_C;
-	R_DrawColumn				= R_DrawColumnP_C;
 	R_DrawFuzzColumn			= R_DrawFuzzColumnP_C;
 	R_DrawTranslatedColumn		= R_DrawTranslatedColumnP_C;
 	R_DrawShadedColumn			= R_DrawShadedColumnP_C;
@@ -2580,10 +2571,6 @@ void R_InitColumnDrawers ()
 	R_DrawSpanMasked			= R_DrawSpanMaskedP_C;
 	rt_map4cols					= rt_map4cols_c;
 #endif
-	R_DrawSpanTranslucent		= R_DrawSpanTranslucentP_C;
-	R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C;
-	R_DrawSpanAddClamp			= R_DrawSpanAddClampP_C;
-	R_DrawSpanMaskedAddClamp	= R_DrawSpanMaskedAddClampP_C;
 }
 
 // [RH] Choose column drawers in a single place
diff --git a/src/r_draw.h b/src/r_draw.h
index 7d574c1c0..c5e99e610 100644
--- a/src/r_draw.h
+++ b/src/r_draw.h
@@ -65,7 +65,6 @@ extern "C" unsigned int	horizspans[4];
 
 // The span blitting interface.
 // Hook in assembler or system specific BLT here.
-extern void (*R_DrawColumn)(void);
 
 extern DWORD (*dovline1) ();
 extern DWORD (*doprevline1) ();
@@ -103,16 +102,16 @@ void R_SetSpanSource(const BYTE *pixels);
 extern void (*R_DrawSpanMasked)(void);
 
 // Span drawing for translucent textures.
-extern void (*R_DrawSpanTranslucent)(void);
+void R_DrawSpanTranslucent(void);
 
 // Span drawing for masked, translucent textures.
-extern void (*R_DrawSpanMaskedTranslucent)(void);
+void R_DrawSpanMaskedTranslucent(void);
 
 // Span drawing for translucent, additive textures.
-extern void (*R_DrawSpanAddClamp)(void);
+void R_DrawSpanAddClamp(void);
 
 // Span drawing for masked, translucent, additive textures.
-extern void (*R_DrawSpanMaskedAddClamp)(void);
+void R_DrawSpanMaskedAddClamp(void);
 
 // [RH] Span blit into an interleaved intermediate buffer
 extern void (*R_DrawColumnHoriz)(void);
@@ -193,8 +192,6 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip);
 
 #ifdef X86_ASM
 
-extern "C" void	R_DrawColumnP_Unrolled (void);
-extern "C" void	R_DrawColumnP_ASM (void);
 extern "C" void	R_DrawFuzzColumnP_ASM (void);
 		   void R_DrawShadedColumnP_C (void);
 extern "C" void	R_DrawSpanP_ASM (void);
@@ -204,7 +201,6 @@ void	R_DrawColumnHorizP_C(void);
 
 #else
 
-void	R_DrawColumnP_C (void);
 void	R_DrawFuzzColumnP_C (void);
 void	R_DrawShadedColumnP_C (void);
 void	R_DrawSpanP_C (void);
@@ -212,10 +208,11 @@ void	R_DrawSpanMaskedP_C (void);
 
 #endif
 
+void	R_DrawColumn();
 void	R_DrawColumnHorizP_C(void);
 void	R_DrawTranslatedColumnP_C(void);
-void	R_DrawSpanTranslucentP_C (void);
-void	R_DrawSpanMaskedTranslucentP_C (void);
+void	R_DrawSpanTranslucent (void);
+void	R_DrawSpanMaskedTranslucent (void);
 
 void	R_DrawTlatedLucentColumnP_C (void);
 #define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C