mirror of
https://github.com/ZDoom/qzdoom.git
synced 2024-11-10 14:51:51 +00:00
- Ported asm_x86_64/tmap3.nas to AT&T syntax so it can be compiled with gas.
After finding out that gas does have directives to describe the .eh_frame metadata, I figured that would be significantly easier and quicker than trying to locate all the scattered docs I would need to read to figure out how to construct it by hand. Unfortunately, this now means I have to maintain two versions of exactly the same code. :( (But unless I add 32-bit color rendering in the future, the chances that I will have to touch it again are quite slim.) SVN r1159 (trunk)
This commit is contained in:
parent
9e4b2fb3ec
commit
c9187a0e09
4 changed files with 203 additions and 70 deletions
|
@ -1,3 +1,11 @@
|
|||
August 11, 2008
|
||||
- Ported asm_x86_64/tmap3.nas to AT&T syntax so it can be compiled with gas.
|
||||
After finding out that gas does have directives to describe the .eh_frame
|
||||
metadata, I figured that would be significantly easier and quicker than
|
||||
trying to locate all the scattered docs needed to construct it by hand.
|
||||
Unfortunately, this now means I have to maintain two versions of exactly
|
||||
the same code. :(
|
||||
|
||||
August 11, 2008 (Changes by Graf Zahl)
|
||||
- Removed 'eval' modifier from DECORATE. All int, float and bool parameters are
|
||||
'eval' now by default.
|
||||
|
|
|
@ -168,24 +168,35 @@ endif( FMOD_LIBRARY )
|
|||
# Search for NASM
|
||||
|
||||
if( NOT NO_ASM )
|
||||
find_program( NASM_PATH NAMES ${NASM_NAMES} )
|
||||
find_program( YASM_PATH yasm )
|
||||
|
||||
if( YASM_PATH )
|
||||
set( ASSEMBLER ${YASM_PATH} )
|
||||
else( YASM_PATH )
|
||||
if( X64 )
|
||||
message( STATUS "Could not find YASM. Disabling assembly code." )
|
||||
if( UNIX AND X64 )
|
||||
find_program( GAS_PATH as )
|
||||
|
||||
if( GAS_PATH )
|
||||
set( ASSEMBLER ${GAS_PATH} )
|
||||
else( GAS_PATH )
|
||||
message( STATUS "Could not find as. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
else( X64 )
|
||||
if( NOT NASM_PATH )
|
||||
message( STATUS "Could not find YASM or NASM. Disabling assembly code." )
|
||||
endif( GAS_PATH )
|
||||
else( UNIX AND X64 )
|
||||
find_program( NASM_PATH NAMES ${NASM_NAMES} )
|
||||
find_program( YASM_PATH yasm )
|
||||
|
||||
if( YASM_PATH )
|
||||
set( ASSEMBLER ${YASM_PATH} )
|
||||
else( YASM_PATH )
|
||||
if( X64 )
|
||||
message( STATUS "Could not find YASM. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
else( NOT NASM_PATH )
|
||||
set( ASSEMBLER ${NASM_PATH} )
|
||||
endif( NOT NASM_PATH )
|
||||
endif( X64 )
|
||||
endif( YASM_PATH )
|
||||
else( X64 )
|
||||
if( NOT NASM_PATH )
|
||||
message( STATUS "Could not find YASM or NASM. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
else( NOT NASM_PATH )
|
||||
set( ASSEMBLER ${NASM_PATH} )
|
||||
endif( NOT NASM_PATH )
|
||||
endif( X64 )
|
||||
endif( YASM_PATH )
|
||||
endif( UNIX AND X64 )
|
||||
|
||||
# I think the only reason there was a version requirement was because the
|
||||
# executable name for Windows changed from 0.x to 2.0, right? This is
|
||||
|
@ -211,16 +222,19 @@ if( NOT NO_ASM )
|
|||
if( UNIX )
|
||||
set( ASM_OUTPUT_EXTENSION .o )
|
||||
if( X64 )
|
||||
set( ASM_FLAGS -f elf64 -DM_TARGET_LINUX )
|
||||
set( ASM_FLAGS )
|
||||
set( ASM_SOURCE_EXTENSION .s )
|
||||
else( X64 )
|
||||
set( ASM_FLAGS -f elf -DM_TARGET_LINUX )
|
||||
set( ASM_FLAGS -f elf -DM_TARGET_LINUX -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||
set( ASM_SOURCE_EXTENSION .asm )
|
||||
endif( X64 )
|
||||
else( UNIX )
|
||||
set( ASM_OUTPUT_EXTENSION .obj )
|
||||
set( ASM_SOURCE_EXTENSION .asm )
|
||||
if( X64 )
|
||||
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
|
||||
else( X64 )
|
||||
set( ASM_FLAGS -f win32 -DWIN32 )
|
||||
set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||
endif( X64 )
|
||||
endif( UNIX )
|
||||
if( WIN32 )
|
||||
|
@ -234,7 +248,7 @@ if( NOT NO_ASM )
|
|||
endif( WIN32 )
|
||||
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
|
||||
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -i${CMAKE_CURRENT_SOURCE_DIR}/ -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}.asm"
|
||||
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
|
||||
${FIXRTEXT_${infile}}
|
||||
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
|
||||
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
|
||||
|
|
|
@ -1,36 +1,9 @@
|
|||
%include "valgrind.inc"
|
||||
|
||||
%ifnidn __OUTPUT_FORMAT__,win64
|
||||
%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
|
||||
%endif
|
||||
|
||||
BITS 64
|
||||
DEFAULT REL
|
||||
|
||||
%ifnidn __OUTPUT_FORMAT__,win64
|
||||
|
||||
%macro PROC_FRAME 1
|
||||
%1:
|
||||
%endmacro
|
||||
|
||||
%macro rex_push_reg 1
|
||||
push %1
|
||||
%endmacro
|
||||
|
||||
%macro push_reg 1
|
||||
push %1
|
||||
%endmacro
|
||||
|
||||
%macro alloc_stack 1
|
||||
sub rsp,%1
|
||||
%endmacro
|
||||
|
||||
%define parm1lo dil
|
||||
|
||||
%else
|
||||
|
||||
%define parm1lo cl
|
||||
|
||||
%endif
|
||||
|
||||
SECTION .data
|
||||
|
||||
DEFAULT REL
|
||||
EXTERN vplce
|
||||
EXTERN vince
|
||||
EXTERN palookupoffse
|
||||
|
@ -42,34 +15,28 @@ EXTERN dc_pitch
|
|||
|
||||
SECTION .text
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL ASM_PatchPitch
|
||||
ASM_PatchPitch:
|
||||
mov ecx, [dc_pitch]
|
||||
mov [pm+3], ecx
|
||||
mov [vltpitch+3], ecx
|
||||
selfmod pm, vltpitch+6
|
||||
ret
|
||||
ret
|
||||
align 16
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
mov [shifter1+2], parm1lo
|
||||
mov [shifter2+2], parm1lo
|
||||
mov [shifter3+2], parm1lo
|
||||
mov [shifter4+2], parm1lo
|
||||
mov [shifter1+2], cl
|
||||
mov [shifter2+2], cl
|
||||
mov [shifter3+2], cl
|
||||
mov [shifter4+2], cl
|
||||
selfmod shifter1, shifter4+3
|
||||
ret
|
||||
ret
|
||||
align 16
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__,win64
|
||||
; Yasm can't do progbits alloc exec for win64?
|
||||
; Hmm, looks like it's automatic. No worries, then.
|
||||
SECTION .rtext write ;progbits alloc exec
|
||||
%else
|
||||
SECTION .rtext progbits alloc exec write
|
||||
%endif
|
||||
|
||||
ALIGN 16
|
||||
SECTION .rtext write ;progbits alloc exec
|
||||
|
||||
GLOBAL vlinetallasm4
|
||||
PROC_FRAME vlinetallasm4
|
||||
|
@ -138,7 +105,7 @@ loopit:
|
|||
mov edx, r8d
|
||||
shifter1: shr edx, 24
|
||||
step1: add r8d, 0x88888888
|
||||
movzx rdx, BYTE [rax+rdx]
|
||||
movzx edx, BYTE [rax+rdx]
|
||||
mov ebx, r9d
|
||||
mov dl, [r12+rdx]
|
||||
shifter2: shr ebx, 24
|
||||
|
@ -178,5 +145,8 @@ vltepilog:
|
|||
pop r15
|
||||
pop rdi
|
||||
pop rbx
|
||||
ret
|
||||
ENDPROC_FRAME
|
||||
ret
|
||||
vlinetallasm4_end:
|
||||
ENDPROC_FRAME
|
||||
ALIGN 16
|
||||
|
||||
|
|
141
src/asm_x86_64/tmap3.s
Normal file
141
src/asm_x86_64/tmap3.s
Normal file
|
@ -0,0 +1,141 @@
|
|||
#%include "valgrind.inc"
|
||||
|
||||
.section .text
|
||||
|
||||
.globl ASM_PatchPitch
|
||||
ASM_PatchPitch:
|
||||
movl dc_pitch(%rip), %ecx
|
||||
movl %ecx, pm+3(%rip)
|
||||
movl %ecx, vltpitch+3(%rip)
|
||||
# selfmod pm, vltpitch+6
|
||||
ret
|
||||
.align 16
|
||||
|
||||
.globl setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
movb %dil, shifter1+2(%rip)
|
||||
movb %dil, shifter2+2(%rip)
|
||||
movb %dil, shifter3+2(%rip)
|
||||
movb %dil, shifter4+2(%rip)
|
||||
# selfmod shifter1, shifter4+3
|
||||
ret
|
||||
.align 16
|
||||
|
||||
.section .rtext,"awx"
|
||||
|
||||
.globl vlinetallasm4
|
||||
.type vlinetallasm4,@function
|
||||
vlinetallasm4:
|
||||
.cfi_startproc
|
||||
push %rbx
|
||||
push %rdi
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
push %rbp
|
||||
push %rsi
|
||||
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
|
||||
.cfi_adjust_cfa_offset 8
|
||||
|
||||
# rax = bufplce base address
|
||||
# rbx =
|
||||
# rcx = offset from rdi/count (negative)
|
||||
# edx/rdx = scratch
|
||||
# rdi = bottom of columns to write to
|
||||
# r8d-r11d = column offsets
|
||||
# r12-r15 = palookupoffse[0] - palookupoffse[4]
|
||||
|
||||
movl dc_count(%rip), %ecx
|
||||
movq dc_dest(%rip), %rdi
|
||||
testl %ecx, %ecx
|
||||
jle vltepilog # count must be positive
|
||||
|
||||
movq bufplce(%rip), %rax
|
||||
movq bufplce+8(%rip), %r8
|
||||
subq %rax, %r8
|
||||
movq bufplce+16(%rip), %r9
|
||||
subq %rax, %r9
|
||||
movq bufplce+24(%rip), %r10
|
||||
subq %rax, %r10
|
||||
movl %r8d, source2+4(%rip)
|
||||
movl %r9d, source3+4(%rip)
|
||||
movl %r10d, source4+4(%rip)
|
||||
|
||||
pm: imulq $320, %rcx
|
||||
|
||||
movq palookupoffse(%rip), %r12
|
||||
movq palookupoffse+8(%rip), %r13
|
||||
movq palookupoffse+16(%rip), %r14
|
||||
movq palookupoffse+24(%rip), %r15
|
||||
|
||||
movl vince(%rip), %r8d
|
||||
movl vince+4(%rip), %r9d
|
||||
movl vince+8(%rip), %r10d
|
||||
movl vince+12(%rip), %r11d
|
||||
movl %r8d, step1+3(%rip)
|
||||
movl %r9d, step2+3(%rip)
|
||||
movl %r10d, step3+3(%rip)
|
||||
movl %r11d, step4+3(%rip)
|
||||
|
||||
addq %rcx, %rdi
|
||||
negq %rcx
|
||||
|
||||
movl vplce(%rip), %r8d
|
||||
movl vplce+4(%rip), %r9d
|
||||
movl vplce+8(%rip), %r10d
|
||||
movl vplce+12(%rip), %r11d
|
||||
# selfmod loopit, vltepilog
|
||||
jmp loopit
|
||||
|
||||
.align 16
|
||||
loopit:
|
||||
movl %r8d, %edx
|
||||
shifter1: shrl $24, %edx
|
||||
step1: addl $0x88888888, %r8d
|
||||
movzbl (%rax,%rdx), %edx
|
||||
movl %r9d, %ebx
|
||||
movb (%r12,%rdx), %dl
|
||||
shifter2: shrl $24, %ebx
|
||||
step2: addl $0x88888888, %r9d
|
||||
source2: movzbl 0x88888888(%rax,%rbx), %ebx
|
||||
movl %r10d, %ebp
|
||||
movb (%r13,%rbx), %bl
|
||||
shifter3: shr $24, %ebp
|
||||
step3: addl $0x88888888, %r10d
|
||||
source3: movzbl 0x88888888(%rax,%rbp), %ebp
|
||||
movl %r11d, %esi
|
||||
movb (%r14,%rbp), %bpl
|
||||
shifter4: shr $24, %esi
|
||||
step4: add $0x88888888, %r11d
|
||||
source4: movzbl 0x88888888(%rax,%rsi), %esi
|
||||
movb %dl, (%rdi,%rcx)
|
||||
movb %bl, 1(%rdi,%rcx)
|
||||
movb (%r15,%rsi), %sil
|
||||
movb %bpl, 2(%rdi,%rcx)
|
||||
movb %sil, 3(%rdi,%rcx)
|
||||
|
||||
vltpitch: addq $320, %rcx
|
||||
jl loopit
|
||||
|
||||
movl %r8d, vplce(%rip)
|
||||
movl %r9d, vplce+4(%rip)
|
||||
movl %r10d, vplce+8(%rip)
|
||||
movl %r11d, vplce+12(%rip)
|
||||
|
||||
vltepilog:
|
||||
addq $8, %rsp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
pop %rsi
|
||||
pop %rbp
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
pop %rdi
|
||||
pop %rbx
|
||||
ret
|
||||
.cfi_endproc
|
||||
.align 16
|
||||
|
||||
|
Loading…
Reference in a new issue