- Ported asm_x86_64/tmap3.nas to AT&T syntax so it can be compiled with gas.

After finding out that gas does have directives to describe the .eh_frame
  metadata, I figured that would be significantly easier and quicker than
  trying to locate all the scattered docs I would need to read to figure out
  how to construct it by hand. Unfortunately, this now means I have to
  maintain two versions of exactly the same code. :( (But unless I add 32-bit
  color rendering in the future, the chances that I will have to touch it
  again are quite slim.)



SVN r1159 (trunk)
This commit is contained in:
Randy Heit 2008-08-12 02:49:00 +00:00
parent 9e4b2fb3ec
commit c9187a0e09
4 changed files with 203 additions and 70 deletions

View file

@ -1,3 +1,11 @@
August 11, 2008
- Ported asm_x86_64/tmap3.nas to AT&T syntax so it can be compiled with gas.
After finding out that gas does have directives to describe the .eh_frame
metadata, I figured that would be significantly easier and quicker than
trying to locate all the scattered docs needed to construct it by hand.
Unfortunately, this now means I have to maintain two versions of exactly
the same code. :(
August 11, 2008 (Changes by Graf Zahl)
- Removed 'eval' modifier from DECORATE. All int, float and bool parameters are
'eval' now by default.

View file

@ -168,6 +168,16 @@ endif( FMOD_LIBRARY )
# Search for NASM
if( NOT NO_ASM )
if( UNIX AND X64 )
find_program( GAS_PATH as )
if( GAS_PATH )
set( ASSEMBLER ${GAS_PATH} )
else( GAS_PATH )
message( STATUS "Could not find as. Disabling assembly code." )
set( NO_ASM ON )
endif( GAS_PATH )
else( UNIX AND X64 )
find_program( NASM_PATH NAMES ${NASM_NAMES} )
find_program( YASM_PATH yasm )
@ -186,6 +196,7 @@ if( NOT NO_ASM )
endif( NOT NASM_PATH )
endif( X64 )
endif( YASM_PATH )
endif( UNIX AND X64 )
# I think the only reason there was a version requirement was because the
# executable name for Windows changed from 0.x to 2.0, right? This is
@ -211,16 +222,19 @@ if( NOT NO_ASM )
if( UNIX )
set( ASM_OUTPUT_EXTENSION .o )
if( X64 )
set( ASM_FLAGS -f elf64 -DM_TARGET_LINUX )
set( ASM_FLAGS )
set( ASM_SOURCE_EXTENSION .s )
else( X64 )
set( ASM_FLAGS -f elf -DM_TARGET_LINUX )
set( ASM_FLAGS -f elf -DM_TARGET_LINUX -i${CMAKE_CURRENT_SOURCE_DIR}/ )
set( ASM_SOURCE_EXTENSION .asm )
endif( X64 )
else( UNIX )
set( ASM_OUTPUT_EXTENSION .obj )
set( ASM_SOURCE_EXTENSION .asm )
if( X64 )
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
else( X64 )
set( ASM_FLAGS -f win32 -DWIN32 )
set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
endif( X64 )
endif( UNIX )
if( WIN32 )
@ -234,7 +248,7 @@ if( NOT NO_ASM )
endif( WIN32 )
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -i${CMAKE_CURRENT_SOURCE_DIR}/ -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}.asm"
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
${FIXRTEXT_${infile}}
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )

View file

@ -1,36 +1,9 @@
%include "valgrind.inc"
%ifnidn __OUTPUT_FORMAT__,win64
%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
%endif
BITS 64
DEFAULT REL
%ifnidn __OUTPUT_FORMAT__,win64
%macro PROC_FRAME 1
%1:
%endmacro
%macro rex_push_reg 1
push %1
%endmacro
%macro push_reg 1
push %1
%endmacro
%macro alloc_stack 1
sub rsp,%1
%endmacro
%define parm1lo dil
%else
%define parm1lo cl
%endif
SECTION .data
EXTERN vplce
EXTERN vince
EXTERN palookupoffse
@ -42,7 +15,6 @@ EXTERN dc_pitch
SECTION .text
ALIGN 16
GLOBAL ASM_PatchPitch
ASM_PatchPitch:
mov ecx, [dc_pitch]
@ -50,26 +22,21 @@ ASM_PatchPitch:
mov [vltpitch+3], ecx
selfmod pm, vltpitch+6
ret
align 16
ALIGN 16
GLOBAL setupvlinetallasm
setupvlinetallasm:
mov [shifter1+2], parm1lo
mov [shifter2+2], parm1lo
mov [shifter3+2], parm1lo
mov [shifter4+2], parm1lo
mov [shifter1+2], cl
mov [shifter2+2], cl
mov [shifter3+2], cl
mov [shifter4+2], cl
selfmod shifter1, shifter4+3
ret
align 16
%ifidn __OUTPUT_FORMAT__,win64
; Yasm can't do progbits alloc exec for win64?
; Hmm, looks like it's automatic. No worries, then.
SECTION .rtext write ;progbits alloc exec
%else
SECTION .rtext progbits alloc exec write
%endif
ALIGN 16
GLOBAL vlinetallasm4
PROC_FRAME vlinetallasm4
@ -138,7 +105,7 @@ loopit:
mov edx, r8d
shifter1: shr edx, 24
step1: add r8d, 0x88888888
movzx rdx, BYTE [rax+rdx]
movzx edx, BYTE [rax+rdx]
mov ebx, r9d
mov dl, [r12+rdx]
shifter2: shr ebx, 24
@ -179,4 +146,7 @@ vltepilog:
pop rdi
pop rbx
ret
vlinetallasm4_end:
ENDPROC_FRAME
ALIGN 16

141
src/asm_x86_64/tmap3.s Normal file
View file

@ -0,0 +1,141 @@
#%include "valgrind.inc"
.section .text
.globl ASM_PatchPitch
ASM_PatchPitch:
movl dc_pitch(%rip), %ecx
movl %ecx, pm+3(%rip)
movl %ecx, vltpitch+3(%rip)
# selfmod pm, vltpitch+6
ret
.align 16
.globl setupvlinetallasm
setupvlinetallasm:
movb %dil, shifter1+2(%rip)
movb %dil, shifter2+2(%rip)
movb %dil, shifter3+2(%rip)
movb %dil, shifter4+2(%rip)
# selfmod shifter1, shifter4+3
ret
.align 16
.section .rtext,"awx"
.globl vlinetallasm4
.type vlinetallasm4,@function
vlinetallasm4:
.cfi_startproc
push %rbx
push %rdi
push %r15
push %r14
push %r13
push %r12
push %rbp
push %rsi
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
.cfi_adjust_cfa_offset 8
# rax = bufplce base address
# rbx =
# rcx = offset from rdi/count (negative)
# edx/rdx = scratch
# rdi = bottom of columns to write to
# r8d-r11d = column offsets
# r12-r15 = palookupoffse[0] - palookupoffse[4]
movl dc_count(%rip), %ecx
movq dc_dest(%rip), %rdi
testl %ecx, %ecx
jle vltepilog # count must be positive
movq bufplce(%rip), %rax
movq bufplce+8(%rip), %r8
subq %rax, %r8
movq bufplce+16(%rip), %r9
subq %rax, %r9
movq bufplce+24(%rip), %r10
subq %rax, %r10
movl %r8d, source2+4(%rip)
movl %r9d, source3+4(%rip)
movl %r10d, source4+4(%rip)
pm: imulq $320, %rcx
movq palookupoffse(%rip), %r12
movq palookupoffse+8(%rip), %r13
movq palookupoffse+16(%rip), %r14
movq palookupoffse+24(%rip), %r15
movl vince(%rip), %r8d
movl vince+4(%rip), %r9d
movl vince+8(%rip), %r10d
movl vince+12(%rip), %r11d
movl %r8d, step1+3(%rip)
movl %r9d, step2+3(%rip)
movl %r10d, step3+3(%rip)
movl %r11d, step4+3(%rip)
addq %rcx, %rdi
negq %rcx
movl vplce(%rip), %r8d
movl vplce+4(%rip), %r9d
movl vplce+8(%rip), %r10d
movl vplce+12(%rip), %r11d
# selfmod loopit, vltepilog
jmp loopit
.align 16
loopit:
movl %r8d, %edx
shifter1: shrl $24, %edx
step1: addl $0x88888888, %r8d
movzbl (%rax,%rdx), %edx
movl %r9d, %ebx
movb (%r12,%rdx), %dl
shifter2: shrl $24, %ebx
step2: addl $0x88888888, %r9d
source2: movzbl 0x88888888(%rax,%rbx), %ebx
movl %r10d, %ebp
movb (%r13,%rbx), %bl
shifter3: shr $24, %ebp
step3: addl $0x88888888, %r10d
source3: movzbl 0x88888888(%rax,%rbp), %ebp
movl %r11d, %esi
movb (%r14,%rbp), %bpl
shifter4: shr $24, %esi
step4: add $0x88888888, %r11d
source4: movzbl 0x88888888(%rax,%rsi), %esi
movb %dl, (%rdi,%rcx)
movb %bl, 1(%rdi,%rcx)
movb (%r15,%rsi), %sil
movb %bpl, 2(%rdi,%rcx)
movb %sil, 3(%rdi,%rcx)
vltpitch: addq $320, %rcx
jl loopit
movl %r8d, vplce(%rip)
movl %r9d, vplce+4(%rip)
movl %r10d, vplce+8(%rip)
movl %r11d, vplce+12(%rip)
vltepilog:
addq $8, %rsp
.cfi_adjust_cfa_offset -8
pop %rsi
pop %rbp
pop %r12
pop %r13
pop %r14
pop %r15
pop %rdi
pop %rbx
ret
.cfi_endproc
.align 16