mirror of
https://github.com/ZDoom/qzdoom-gpl.git
synced 2025-01-18 21:21:36 +00:00
- Ported asm_x86_64/tmap3.nas to AT&T syntax so it can be compiled with gas.
After finding out that gas does have directives to describe the .eh_frame metadata, I figured that would be significantly easier and quicker than trying to locate all the scattered docs I would need to read to figure out how to construct it by hand. Unfortunately, this now means I have to maintain two versions of exactly the same code. :( (But unless I add 32-bit color rendering in the future, the chances that I will have to touch it again are quite slim.) SVN r1159 (trunk)
This commit is contained in:
parent
9e4b2fb3ec
commit
c9187a0e09
4 changed files with 203 additions and 70 deletions
|
@ -1,3 +1,11 @@
|
||||||
|
August 11, 2008
|
||||||
|
- Ported asm_x86_64/tmap3.nas to AT&T syntax so it can be compiled with gas.
|
||||||
|
After finding out that gas does have directives to describe the .eh_frame
|
||||||
|
metadata, I figured that would be significantly easier and quicker than
|
||||||
|
trying to locate all the scattered docs needed to construct it by hand.
|
||||||
|
Unfortunately, this now means I have to maintain two versions of exactly
|
||||||
|
the same code. :(
|
||||||
|
|
||||||
August 11, 2008 (Changes by Graf Zahl)
|
August 11, 2008 (Changes by Graf Zahl)
|
||||||
- Removed 'eval' modifier from DECORATE. All int, float and bool parameters are
|
- Removed 'eval' modifier from DECORATE. All int, float and bool parameters are
|
||||||
'eval' now by default.
|
'eval' now by default.
|
||||||
|
|
|
@ -168,24 +168,35 @@ endif( FMOD_LIBRARY )
|
||||||
# Search for NASM
|
# Search for NASM
|
||||||
|
|
||||||
if( NOT NO_ASM )
|
if( NOT NO_ASM )
|
||||||
find_program( NASM_PATH NAMES ${NASM_NAMES} )
|
if( UNIX AND X64 )
|
||||||
find_program( YASM_PATH yasm )
|
find_program( GAS_PATH as )
|
||||||
|
|
||||||
if( YASM_PATH )
|
if( GAS_PATH )
|
||||||
set( ASSEMBLER ${YASM_PATH} )
|
set( ASSEMBLER ${GAS_PATH} )
|
||||||
else( YASM_PATH )
|
else( GAS_PATH )
|
||||||
if( X64 )
|
message( STATUS "Could not find as. Disabling assembly code." )
|
||||||
message( STATUS "Could not find YASM. Disabling assembly code." )
|
|
||||||
set( NO_ASM ON )
|
set( NO_ASM ON )
|
||||||
else( X64 )
|
endif( GAS_PATH )
|
||||||
if( NOT NASM_PATH )
|
else( UNIX AND X64 )
|
||||||
message( STATUS "Could not find YASM or NASM. Disabling assembly code." )
|
find_program( NASM_PATH NAMES ${NASM_NAMES} )
|
||||||
|
find_program( YASM_PATH yasm )
|
||||||
|
|
||||||
|
if( YASM_PATH )
|
||||||
|
set( ASSEMBLER ${YASM_PATH} )
|
||||||
|
else( YASM_PATH )
|
||||||
|
if( X64 )
|
||||||
|
message( STATUS "Could not find YASM. Disabling assembly code." )
|
||||||
set( NO_ASM ON )
|
set( NO_ASM ON )
|
||||||
else( NOT NASM_PATH )
|
else( X64 )
|
||||||
set( ASSEMBLER ${NASM_PATH} )
|
if( NOT NASM_PATH )
|
||||||
endif( NOT NASM_PATH )
|
message( STATUS "Could not find YASM or NASM. Disabling assembly code." )
|
||||||
endif( X64 )
|
set( NO_ASM ON )
|
||||||
endif( YASM_PATH )
|
else( NOT NASM_PATH )
|
||||||
|
set( ASSEMBLER ${NASM_PATH} )
|
||||||
|
endif( NOT NASM_PATH )
|
||||||
|
endif( X64 )
|
||||||
|
endif( YASM_PATH )
|
||||||
|
endif( UNIX AND X64 )
|
||||||
|
|
||||||
# I think the only reason there was a version requirement was because the
|
# I think the only reason there was a version requirement was because the
|
||||||
# executable name for Windows changed from 0.x to 2.0, right? This is
|
# executable name for Windows changed from 0.x to 2.0, right? This is
|
||||||
|
@ -211,16 +222,19 @@ if( NOT NO_ASM )
|
||||||
if( UNIX )
|
if( UNIX )
|
||||||
set( ASM_OUTPUT_EXTENSION .o )
|
set( ASM_OUTPUT_EXTENSION .o )
|
||||||
if( X64 )
|
if( X64 )
|
||||||
set( ASM_FLAGS -f elf64 -DM_TARGET_LINUX )
|
set( ASM_FLAGS )
|
||||||
|
set( ASM_SOURCE_EXTENSION .s )
|
||||||
else( X64 )
|
else( X64 )
|
||||||
set( ASM_FLAGS -f elf -DM_TARGET_LINUX )
|
set( ASM_FLAGS -f elf -DM_TARGET_LINUX -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||||
|
set( ASM_SOURCE_EXTENSION .asm )
|
||||||
endif( X64 )
|
endif( X64 )
|
||||||
else( UNIX )
|
else( UNIX )
|
||||||
set( ASM_OUTPUT_EXTENSION .obj )
|
set( ASM_OUTPUT_EXTENSION .obj )
|
||||||
|
set( ASM_SOURCE_EXTENSION .asm )
|
||||||
if( X64 )
|
if( X64 )
|
||||||
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
|
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
|
||||||
else( X64 )
|
else( X64 )
|
||||||
set( ASM_FLAGS -f win32 -DWIN32 )
|
set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||||
endif( X64 )
|
endif( X64 )
|
||||||
endif( UNIX )
|
endif( UNIX )
|
||||||
if( WIN32 )
|
if( WIN32 )
|
||||||
|
@ -234,7 +248,7 @@ if( NOT NO_ASM )
|
||||||
endif( WIN32 )
|
endif( WIN32 )
|
||||||
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
|
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
|
||||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
|
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
|
||||||
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -i${CMAKE_CURRENT_SOURCE_DIR}/ -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}.asm"
|
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
|
||||||
${FIXRTEXT_${infile}}
|
${FIXRTEXT_${infile}}
|
||||||
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
|
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
|
||||||
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
|
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
|
||||||
|
|
|
@ -1,36 +1,9 @@
|
||||||
%include "valgrind.inc"
|
%ifnidn __OUTPUT_FORMAT__,win64
|
||||||
|
%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
|
||||||
|
%endif
|
||||||
|
|
||||||
BITS 64
|
BITS 64
|
||||||
DEFAULT REL
|
DEFAULT REL
|
||||||
|
|
||||||
%ifnidn __OUTPUT_FORMAT__,win64
|
|
||||||
|
|
||||||
%macro PROC_FRAME 1
|
|
||||||
%1:
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro rex_push_reg 1
|
|
||||||
push %1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro push_reg 1
|
|
||||||
push %1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro alloc_stack 1
|
|
||||||
sub rsp,%1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%define parm1lo dil
|
|
||||||
|
|
||||||
%else
|
|
||||||
|
|
||||||
%define parm1lo cl
|
|
||||||
|
|
||||||
%endif
|
|
||||||
|
|
||||||
SECTION .data
|
|
||||||
|
|
||||||
EXTERN vplce
|
EXTERN vplce
|
||||||
EXTERN vince
|
EXTERN vince
|
||||||
EXTERN palookupoffse
|
EXTERN palookupoffse
|
||||||
|
@ -42,34 +15,28 @@ EXTERN dc_pitch
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL ASM_PatchPitch
|
GLOBAL ASM_PatchPitch
|
||||||
ASM_PatchPitch:
|
ASM_PatchPitch:
|
||||||
mov ecx, [dc_pitch]
|
mov ecx, [dc_pitch]
|
||||||
mov [pm+3], ecx
|
mov [pm+3], ecx
|
||||||
mov [vltpitch+3], ecx
|
mov [vltpitch+3], ecx
|
||||||
selfmod pm, vltpitch+6
|
selfmod pm, vltpitch+6
|
||||||
ret
|
ret
|
||||||
|
align 16
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL setupvlinetallasm
|
GLOBAL setupvlinetallasm
|
||||||
setupvlinetallasm:
|
setupvlinetallasm:
|
||||||
mov [shifter1+2], parm1lo
|
mov [shifter1+2], cl
|
||||||
mov [shifter2+2], parm1lo
|
mov [shifter2+2], cl
|
||||||
mov [shifter3+2], parm1lo
|
mov [shifter3+2], cl
|
||||||
mov [shifter4+2], parm1lo
|
mov [shifter4+2], cl
|
||||||
selfmod shifter1, shifter4+3
|
selfmod shifter1, shifter4+3
|
||||||
ret
|
ret
|
||||||
|
align 16
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__,win64
|
|
||||||
; Yasm can't do progbits alloc exec for win64?
|
; Yasm can't do progbits alloc exec for win64?
|
||||||
; Hmm, looks like it's automatic. No worries, then.
|
; Hmm, looks like it's automatic. No worries, then.
|
||||||
SECTION .rtext write ;progbits alloc exec
|
SECTION .rtext write ;progbits alloc exec
|
||||||
%else
|
|
||||||
SECTION .rtext progbits alloc exec write
|
|
||||||
%endif
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
|
|
||||||
GLOBAL vlinetallasm4
|
GLOBAL vlinetallasm4
|
||||||
PROC_FRAME vlinetallasm4
|
PROC_FRAME vlinetallasm4
|
||||||
|
@ -138,7 +105,7 @@ loopit:
|
||||||
mov edx, r8d
|
mov edx, r8d
|
||||||
shifter1: shr edx, 24
|
shifter1: shr edx, 24
|
||||||
step1: add r8d, 0x88888888
|
step1: add r8d, 0x88888888
|
||||||
movzx rdx, BYTE [rax+rdx]
|
movzx edx, BYTE [rax+rdx]
|
||||||
mov ebx, r9d
|
mov ebx, r9d
|
||||||
mov dl, [r12+rdx]
|
mov dl, [r12+rdx]
|
||||||
shifter2: shr ebx, 24
|
shifter2: shr ebx, 24
|
||||||
|
@ -178,5 +145,8 @@ vltepilog:
|
||||||
pop r15
|
pop r15
|
||||||
pop rdi
|
pop rdi
|
||||||
pop rbx
|
pop rbx
|
||||||
ret
|
ret
|
||||||
ENDPROC_FRAME
|
vlinetallasm4_end:
|
||||||
|
ENDPROC_FRAME
|
||||||
|
ALIGN 16
|
||||||
|
|
||||||
|
|
141
src/asm_x86_64/tmap3.s
Normal file
141
src/asm_x86_64/tmap3.s
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
#%include "valgrind.inc"
|
||||||
|
|
||||||
|
.section .text
|
||||||
|
|
||||||
|
.globl ASM_PatchPitch
|
||||||
|
ASM_PatchPitch:
|
||||||
|
movl dc_pitch(%rip), %ecx
|
||||||
|
movl %ecx, pm+3(%rip)
|
||||||
|
movl %ecx, vltpitch+3(%rip)
|
||||||
|
# selfmod pm, vltpitch+6
|
||||||
|
ret
|
||||||
|
.align 16
|
||||||
|
|
||||||
|
.globl setupvlinetallasm
|
||||||
|
setupvlinetallasm:
|
||||||
|
movb %dil, shifter1+2(%rip)
|
||||||
|
movb %dil, shifter2+2(%rip)
|
||||||
|
movb %dil, shifter3+2(%rip)
|
||||||
|
movb %dil, shifter4+2(%rip)
|
||||||
|
# selfmod shifter1, shifter4+3
|
||||||
|
ret
|
||||||
|
.align 16
|
||||||
|
|
||||||
|
.section .rtext,"awx"
|
||||||
|
|
||||||
|
.globl vlinetallasm4
|
||||||
|
.type vlinetallasm4,@function
|
||||||
|
vlinetallasm4:
|
||||||
|
.cfi_startproc
|
||||||
|
push %rbx
|
||||||
|
push %rdi
|
||||||
|
push %r15
|
||||||
|
push %r14
|
||||||
|
push %r13
|
||||||
|
push %r12
|
||||||
|
push %rbp
|
||||||
|
push %rsi
|
||||||
|
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
|
||||||
|
.cfi_adjust_cfa_offset 8
|
||||||
|
|
||||||
|
# rax = bufplce base address
|
||||||
|
# rbx =
|
||||||
|
# rcx = offset from rdi/count (negative)
|
||||||
|
# edx/rdx = scratch
|
||||||
|
# rdi = bottom of columns to write to
|
||||||
|
# r8d-r11d = column offsets
|
||||||
|
# r12-r15 = palookupoffse[0] - palookupoffse[4]
|
||||||
|
|
||||||
|
movl dc_count(%rip), %ecx
|
||||||
|
movq dc_dest(%rip), %rdi
|
||||||
|
testl %ecx, %ecx
|
||||||
|
jle vltepilog # count must be positive
|
||||||
|
|
||||||
|
movq bufplce(%rip), %rax
|
||||||
|
movq bufplce+8(%rip), %r8
|
||||||
|
subq %rax, %r8
|
||||||
|
movq bufplce+16(%rip), %r9
|
||||||
|
subq %rax, %r9
|
||||||
|
movq bufplce+24(%rip), %r10
|
||||||
|
subq %rax, %r10
|
||||||
|
movl %r8d, source2+4(%rip)
|
||||||
|
movl %r9d, source3+4(%rip)
|
||||||
|
movl %r10d, source4+4(%rip)
|
||||||
|
|
||||||
|
pm: imulq $320, %rcx
|
||||||
|
|
||||||
|
movq palookupoffse(%rip), %r12
|
||||||
|
movq palookupoffse+8(%rip), %r13
|
||||||
|
movq palookupoffse+16(%rip), %r14
|
||||||
|
movq palookupoffse+24(%rip), %r15
|
||||||
|
|
||||||
|
movl vince(%rip), %r8d
|
||||||
|
movl vince+4(%rip), %r9d
|
||||||
|
movl vince+8(%rip), %r10d
|
||||||
|
movl vince+12(%rip), %r11d
|
||||||
|
movl %r8d, step1+3(%rip)
|
||||||
|
movl %r9d, step2+3(%rip)
|
||||||
|
movl %r10d, step3+3(%rip)
|
||||||
|
movl %r11d, step4+3(%rip)
|
||||||
|
|
||||||
|
addq %rcx, %rdi
|
||||||
|
negq %rcx
|
||||||
|
|
||||||
|
movl vplce(%rip), %r8d
|
||||||
|
movl vplce+4(%rip), %r9d
|
||||||
|
movl vplce+8(%rip), %r10d
|
||||||
|
movl vplce+12(%rip), %r11d
|
||||||
|
# selfmod loopit, vltepilog
|
||||||
|
jmp loopit
|
||||||
|
|
||||||
|
.align 16
|
||||||
|
loopit:
|
||||||
|
movl %r8d, %edx
|
||||||
|
shifter1: shrl $24, %edx
|
||||||
|
step1: addl $0x88888888, %r8d
|
||||||
|
movzbl (%rax,%rdx), %edx
|
||||||
|
movl %r9d, %ebx
|
||||||
|
movb (%r12,%rdx), %dl
|
||||||
|
shifter2: shrl $24, %ebx
|
||||||
|
step2: addl $0x88888888, %r9d
|
||||||
|
source2: movzbl 0x88888888(%rax,%rbx), %ebx
|
||||||
|
movl %r10d, %ebp
|
||||||
|
movb (%r13,%rbx), %bl
|
||||||
|
shifter3: shr $24, %ebp
|
||||||
|
step3: addl $0x88888888, %r10d
|
||||||
|
source3: movzbl 0x88888888(%rax,%rbp), %ebp
|
||||||
|
movl %r11d, %esi
|
||||||
|
movb (%r14,%rbp), %bpl
|
||||||
|
shifter4: shr $24, %esi
|
||||||
|
step4: add $0x88888888, %r11d
|
||||||
|
source4: movzbl 0x88888888(%rax,%rsi), %esi
|
||||||
|
movb %dl, (%rdi,%rcx)
|
||||||
|
movb %bl, 1(%rdi,%rcx)
|
||||||
|
movb (%r15,%rsi), %sil
|
||||||
|
movb %bpl, 2(%rdi,%rcx)
|
||||||
|
movb %sil, 3(%rdi,%rcx)
|
||||||
|
|
||||||
|
vltpitch: addq $320, %rcx
|
||||||
|
jl loopit
|
||||||
|
|
||||||
|
movl %r8d, vplce(%rip)
|
||||||
|
movl %r9d, vplce+4(%rip)
|
||||||
|
movl %r10d, vplce+8(%rip)
|
||||||
|
movl %r11d, vplce+12(%rip)
|
||||||
|
|
||||||
|
vltepilog:
|
||||||
|
addq $8, %rsp
|
||||||
|
.cfi_adjust_cfa_offset -8
|
||||||
|
pop %rsi
|
||||||
|
pop %rbp
|
||||||
|
pop %r12
|
||||||
|
pop %r13
|
||||||
|
pop %r14
|
||||||
|
pop %r15
|
||||||
|
pop %rdi
|
||||||
|
pop %rbx
|
||||||
|
ret
|
||||||
|
.cfi_endproc
|
||||||
|
.align 16
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue