diff --git a/docs/rh-log.txt b/docs/rh-log.txt
index 0a6ac35dba..28ae704cac 100644
--- a/docs/rh-log.txt
+++ b/docs/rh-log.txt
@@ -1,3 +1,11 @@
+August 11, 2008
+- Ported asm_x86_64/tmap3.nas to AT&T syntax so it can be compiled with gas.
+  After finding out that gas does have directives to describe the .eh_frame
+  metadata, I figured that would be significantly easier and quicker than
+  trying to locate all the scattered docs needed to construct it by hand.
+  Unfortunately, this now means I have to maintain two versions of exactly
+  the same code. :(
+
 August 11, 2008 (Changes by Graf Zahl)
 - Removed 'eval' modifier from DECORATE. All int, float and bool parameters are
   'eval' now by default.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 804f91f9e8..52aabec8bd 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -168,24 +168,35 @@ endif( FMOD_LIBRARY )
 # Search for NASM
 
 if( NOT NO_ASM )
-	find_program( NASM_PATH NAMES ${NASM_NAMES} )
-	find_program( YASM_PATH yasm )
-
-	if( YASM_PATH )
-		set( ASSEMBLER ${YASM_PATH} )
-	else( YASM_PATH )
-		if( X64 )
-			message( STATUS "Could not find YASM. Disabling assembly code." )
+	if( UNIX AND X64 )
+		find_program( GAS_PATH as )
+		
+		if( GAS_PATH )
+			set( ASSEMBLER ${GAS_PATH} )
+		else( GAS_PATH )
+			message( STATUS "Could not find as. Disabling assembly code." )
 			set( NO_ASM ON )
-		else( X64 )
-			if( NOT NASM_PATH )
-				message( STATUS "Could not find YASM or NASM. Disabling assembly code." )
+		endif( GAS_PATH )
+	else( UNIX AND X64 )
+		find_program( NASM_PATH NAMES ${NASM_NAMES} )
+		find_program( YASM_PATH yasm )
+
+		if( YASM_PATH )
+			set( ASSEMBLER ${YASM_PATH} )
+		else( YASM_PATH )
+			if( X64 )
+				message( STATUS "Could not find YASM. Disabling assembly code." )
 				set( NO_ASM ON )
-			else( NOT NASM_PATH )
-				set( ASSEMBLER ${NASM_PATH} )
-			endif( NOT NASM_PATH )
-		endif( X64 )
-	endif( YASM_PATH )
+			else( X64 )
+				if( NOT NASM_PATH )
+					message( STATUS "Could not find YASM or NASM. Disabling assembly code." )
+					set( NO_ASM ON )
+				else( NOT NASM_PATH )
+					set( ASSEMBLER ${NASM_PATH} )
+				endif( NOT NASM_PATH )
+			endif( X64 )
+		endif( YASM_PATH )
+	endif( UNIX AND X64 )
 
 	# I think the only reason there was a version requirement was because the
 	# executable name for Windows changed from 0.x to 2.0, right? This is
@@ -211,16 +222,19 @@ if( NOT NO_ASM )
 	if( UNIX )
 		set( ASM_OUTPUT_EXTENSION .o )
 		if( X64 )
-			set( ASM_FLAGS -f elf64 -DM_TARGET_LINUX )
+			set( ASM_FLAGS )
+			set( ASM_SOURCE_EXTENSION .s )
 		else( X64 )
-			set( ASM_FLAGS -f elf -DM_TARGET_LINUX )
+			set( ASM_FLAGS -f elf -DM_TARGET_LINUX -i${CMAKE_CURRENT_SOURCE_DIR}/ )
+			set( ASM_SOURCE_EXTENSION .asm )
 		endif( X64 )
 	else( UNIX )
 		set( ASM_OUTPUT_EXTENSION .obj )
+		set( ASM_SOURCE_EXTENSION .asm )
 		if( X64 )
 			set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
 		else( X64 )
-			set( ASM_FLAGS -f win32 -DWIN32 )
+			set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
 		endif( X64 )
 	endif( UNIX )
 	if( WIN32 )
@@ -234,7 +248,7 @@ if( NOT NO_ASM )
 		endif( WIN32 )
 		add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
 			COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
-			COMMAND ${ASSEMBLER} ${ASM_FLAGS} -i${CMAKE_CURRENT_SOURCE_DIR}/ -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}.asm"
+			COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
 			${FIXRTEXT_${infile}}
 			DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
 		set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
diff --git a/src/asm_x86_64/tmap3.asm b/src/asm_x86_64/tmap3.asm
index f1b8a31bea..120bf5ae9f 100644
--- a/src/asm_x86_64/tmap3.asm
+++ b/src/asm_x86_64/tmap3.asm
@@ -1,36 +1,9 @@
-%include "valgrind.inc"
-
+%ifnidn __OUTPUT_FORMAT__,win64
+%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
+%endif
+
 BITS 64
-DEFAULT REL
-
-%ifnidn __OUTPUT_FORMAT__,win64
-
-%macro PROC_FRAME 1
-%1:
-%endmacro
-
-%macro rex_push_reg 1
-	push %1
-%endmacro
-
-%macro push_reg 1
-	push %1
-%endmacro
-
-%macro alloc_stack 1
-	sub rsp,%1
-%endmacro
-
-%define parm1lo		dil
-
-%else
-
-%define parm1lo		cl
-
-%endif
-
-SECTION .data
-
+DEFAULT REL
 EXTERN vplce
 EXTERN vince
 EXTERN palookupoffse
@@ -42,34 +15,28 @@ EXTERN dc_pitch
 
 SECTION .text
 
-ALIGN 16
 GLOBAL ASM_PatchPitch
 ASM_PatchPitch:
 	mov ecx, [dc_pitch]
 	mov [pm+3], ecx
 	mov	[vltpitch+3], ecx
 	selfmod pm, vltpitch+6
-	ret
+	ret
+	align 16
 
-ALIGN 16
 GLOBAL setupvlinetallasm
 setupvlinetallasm:
-	mov	[shifter1+2], parm1lo
-	mov	[shifter2+2], parm1lo
-	mov	[shifter3+2], parm1lo
-	mov	[shifter4+2], parm1lo
+	mov	[shifter1+2], cl
+	mov	[shifter2+2], cl
+	mov	[shifter3+2], cl
+	mov	[shifter4+2], cl
 	selfmod shifter1, shifter4+3
-	ret
+	ret
+	align 16
 
-%ifidn __OUTPUT_FORMAT__,win64
 ; Yasm can't do progbits alloc exec for win64?
 ; Hmm, looks like it's automatic. No worries, then.
-	SECTION .rtext	write ;progbits alloc exec
-%else
-	SECTION .rtext	progbits alloc exec write
-%endif
-
-ALIGN 16
+SECTION .rtext	write ;progbits alloc exec
 
 GLOBAL vlinetallasm4
 PROC_FRAME vlinetallasm4
@@ -138,7 +105,7 @@ loopit:
 			mov		edx, r8d
 shifter1:	shr		edx, 24
 step1:		add		r8d, 0x88888888
-			movzx	rdx, BYTE [rax+rdx]
+			movzx	edx, BYTE [rax+rdx]
 			mov		ebx, r9d
 			mov		dl, [r12+rdx]
 shifter2:	shr		ebx, 24
@@ -178,5 +145,8 @@ vltepilog:
 	pop		r15
 	pop		rdi
 	pop		rbx
-	ret
-ENDPROC_FRAME
+	ret
+vlinetallasm4_end:
+ENDPROC_FRAME
+	ALIGN 16
+
diff --git a/src/asm_x86_64/tmap3.s b/src/asm_x86_64/tmap3.s
new file mode 100644
index 0000000000..8a9b52e48f
--- /dev/null
+++ b/src/asm_x86_64/tmap3.s
@@ -0,0 +1,141 @@
+#%include "valgrind.inc"
+
+		.section	.text
+
+.globl ASM_PatchPitch
+ASM_PatchPitch:
+		movl 		dc_pitch(%rip), %ecx
+		movl 		%ecx, pm+3(%rip)
+		movl 		%ecx, vltpitch+3(%rip)
+#		selfmod pm, vltpitch+6
+		ret
+		.align 16
+
+.globl setupvlinetallasm
+setupvlinetallasm:
+		movb		%dil, shifter1+2(%rip)
+		movb		%dil, shifter2+2(%rip)
+		movb		%dil, shifter3+2(%rip)
+		movb		%dil, shifter4+2(%rip)
+#		selfmod shifter1, shifter4+3
+		ret
+		.align 16
+
+		.section .rtext,"awx"
+
+.globl vlinetallasm4
+		.type		vlinetallasm4,@function
+vlinetallasm4:
+		.cfi_startproc
+		push		%rbx
+		push		%rdi
+		push		%r15
+		push		%r14
+		push		%r13
+		push		%r12
+		push		%rbp
+		push		%rsi
+		subq		$8, %rsp	# Does the stack need to be 16-byte aligned for Linux?
+		.cfi_adjust_cfa_offset	8
+
+# rax =	bufplce base address
+# rbx = 
+# rcx = offset from rdi/count (negative)
+# edx/rdx = scratch
+# rdi = bottom of columns to write to
+# r8d-r11d = column offsets
+# r12-r15 = palookupoffse[0] - palookupoffse[4]
+
+		movl		dc_count(%rip), %ecx
+		movq		dc_dest(%rip), %rdi
+		testl		%ecx, %ecx
+		jle			vltepilog	# count must be positive
+
+		movq		bufplce(%rip), %rax
+		movq		bufplce+8(%rip), %r8
+		subq		%rax, %r8
+		movq		bufplce+16(%rip), %r9
+		subq		%rax, %r9
+		movq		bufplce+24(%rip), %r10
+		subq		%rax, %r10
+		movl		%r8d, source2+4(%rip)
+		movl		%r9d, source3+4(%rip)
+		movl		%r10d, source4+4(%rip)
+
+pm:		imulq		$320, %rcx
+
+		movq		palookupoffse(%rip), %r12
+		movq		palookupoffse+8(%rip), %r13
+		movq		palookupoffse+16(%rip), %r14
+		movq		palookupoffse+24(%rip), %r15
+
+		movl		vince(%rip), %r8d
+		movl		vince+4(%rip), %r9d
+		movl		vince+8(%rip), %r10d
+		movl		vince+12(%rip), %r11d
+		movl		%r8d, step1+3(%rip)
+		movl		%r9d, step2+3(%rip)
+		movl		%r10d, step3+3(%rip)
+		movl		%r11d, step4+3(%rip)
+
+		addq		%rcx, %rdi
+		negq		%rcx
+
+		movl		vplce(%rip), %r8d
+		movl		vplce+4(%rip), %r9d
+		movl		vplce+8(%rip), %r10d
+		movl		vplce+12(%rip), %r11d
+#		selfmod loopit, vltepilog
+		jmp			loopit
+
+		.align 16
+loopit:
+			movl	%r8d, %edx
+shifter1:	shrl	$24, %edx
+step1:		addl	$0x88888888, %r8d
+			movzbl	(%rax,%rdx), %edx
+			movl	%r9d, %ebx
+			movb	(%r12,%rdx), %dl
+shifter2:	shrl	$24, %ebx
+step2:		addl	$0x88888888, %r9d
+source2:	movzbl	0x88888888(%rax,%rbx), %ebx
+			movl	%r10d, %ebp
+			movb	(%r13,%rbx), %bl
+shifter3:	shr		$24, %ebp
+step3:		addl	$0x88888888, %r10d
+source3:	movzbl	0x88888888(%rax,%rbp), %ebp
+			movl	%r11d, %esi
+			movb	(%r14,%rbp), %bpl
+shifter4:	shr		$24, %esi
+step4:		add		$0x88888888, %r11d
+source4:	movzbl	0x88888888(%rax,%rsi), %esi
+			movb	%dl, (%rdi,%rcx)
+			movb	%bl, 1(%rdi,%rcx)
+			movb	(%r15,%rsi), %sil
+			movb	%bpl, 2(%rdi,%rcx)
+			movb	%sil, 3(%rdi,%rcx)
+
+vltpitch:	addq	$320, %rcx
+			jl		loopit
+
+		movl		%r8d, vplce(%rip)
+		movl		%r9d, vplce+4(%rip)
+		movl		%r10d, vplce+8(%rip)
+		movl		%r11d, vplce+12(%rip)
+
+vltepilog:
+		addq		$8, %rsp
+		.cfi_adjust_cfa_offset	-8
+		pop			%rsi
+		pop			%rbp
+		pop			%r12
+		pop			%r13
+		pop			%r14
+		pop			%r15
+		pop			%rdi
+		pop			%rbx
+		ret
+		.cfi_endproc
+		.align 16
+
+