- Changed the four-byte fillers in asm_x86_64/tmap3.s from 0x88888888 to 0x44444444 because

newer version of gas complained about it.

SVN r2481 (trunk)
This commit is contained in:
Randy Heit 2010-08-01 04:31:18 +00:00
parent 677d07f837
commit 8ce9e178d4

View file

@ -1,28 +1,28 @@
#%include "valgrind.inc" #%include "valgrind.inc"
.section .text .section .text
.globl ASM_PatchPitch .globl ASM_PatchPitch
ASM_PatchPitch: ASM_PatchPitch:
movl dc_pitch(%rip), %ecx movl dc_pitch(%rip), %ecx
movl %ecx, pm+3(%rip) movl %ecx, pm+3(%rip)
movl %ecx, vltpitch+3(%rip) movl %ecx, vltpitch+3(%rip)
# selfmod pm, vltpitch+6 # selfmod pm, vltpitch+6
ret ret
.align 16 .align 16
.globl setupvlinetallasm .globl setupvlinetallasm
setupvlinetallasm: setupvlinetallasm:
movb %dil, shifter1+2(%rip) movb %dil, shifter1+2(%rip)
movb %dil, shifter2+2(%rip) movb %dil, shifter2+2(%rip)
movb %dil, shifter3+2(%rip) movb %dil, shifter3+2(%rip)
movb %dil, shifter4+2(%rip) movb %dil, shifter4+2(%rip)
# selfmod shifter1, shifter4+3 # selfmod shifter1, shifter4+3
ret ret
.align 16 .align 16
.section .rtext,"awx" .section .rtext,"awx"
.globl vlinetallasm4 .globl vlinetallasm4
.type vlinetallasm4,@function .type vlinetallasm4,@function
vlinetallasm4: vlinetallasm4:
@ -38,18 +38,18 @@ vlinetallasm4:
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux? subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
.cfi_adjust_cfa_offset 8 .cfi_adjust_cfa_offset 8
# rax = bufplce base address # rax = bufplce base address
# rbx = # rbx =
# rcx = offset from rdi/count (negative) # rcx = offset from rdi/count (negative)
# edx/rdx = scratch # edx/rdx = scratch
# rdi = bottom of columns to write to # rdi = bottom of columns to write to
# r8d-r11d = column offsets # r8d-r11d = column offsets
# r12-r15 = palookupoffse[0] - palookupoffse[4] # r12-r15 = palookupoffse[0] - palookupoffse[4]
movl dc_count(%rip), %ecx movl dc_count(%rip), %ecx
movq dc_dest(%rip), %rdi movq dc_dest(%rip), %rdi
testl %ecx, %ecx testl %ecx, %ecx
jle vltepilog # count must be positive jle vltepilog # count must be positive
movq bufplce(%rip), %rax movq bufplce(%rip), %rax
movq bufplce+8(%rip), %r8 movq bufplce+8(%rip), %r8
@ -60,14 +60,14 @@ vlinetallasm4:
subq %rax, %r10 subq %rax, %r10
movl %r8d, source2+4(%rip) movl %r8d, source2+4(%rip)
movl %r9d, source3+4(%rip) movl %r9d, source3+4(%rip)
movl %r10d, source4+4(%rip) movl %r10d, source4+4(%rip)
pm: imulq $320, %rcx pm: imulq $320, %rcx
movq palookupoffse(%rip), %r12 movq palookupoffse(%rip), %r12
movq palookupoffse+8(%rip), %r13 movq palookupoffse+8(%rip), %r13
movq palookupoffse+16(%rip), %r14 movq palookupoffse+16(%rip), %r14
movq palookupoffse+24(%rip), %r15 movq palookupoffse+24(%rip), %r15
movl vince(%rip), %r8d movl vince(%rip), %r8d
movl vince+4(%rip), %r9d movl vince+4(%rip), %r9d
@ -76,53 +76,53 @@ pm: imulq $320, %rcx
movl %r8d, step1+3(%rip) movl %r8d, step1+3(%rip)
movl %r9d, step2+3(%rip) movl %r9d, step2+3(%rip)
movl %r10d, step3+3(%rip) movl %r10d, step3+3(%rip)
movl %r11d, step4+3(%rip) movl %r11d, step4+3(%rip)
addq %rcx, %rdi addq %rcx, %rdi
negq %rcx negq %rcx
movl vplce(%rip), %r8d movl vplce(%rip), %r8d
movl vplce+4(%rip), %r9d movl vplce+4(%rip), %r9d
movl vplce+8(%rip), %r10d movl vplce+8(%rip), %r10d
movl vplce+12(%rip), %r11d movl vplce+12(%rip), %r11d
# selfmod loopit, vltepilog # selfmod loopit, vltepilog
jmp loopit jmp loopit
.align 16 .align 16
loopit: loopit:
movl %r8d, %edx movl %r8d, %edx
shifter1: shrl $24, %edx shifter1: shrl $24, %edx
step1: addl $0x88888888, %r8d step1: addl $0x44444444, %r8d
movzbl (%rax,%rdx), %edx movzbl (%rax,%rdx), %edx
movl %r9d, %ebx movl %r9d, %ebx
movb (%r12,%rdx), %dl movb (%r12,%rdx), %dl
shifter2: shrl $24, %ebx shifter2: shrl $24, %ebx
step2: addl $0x88888888, %r9d step2: addl $0x44444444, %r9d
source2: movzbl 0x88888888(%rax,%rbx), %ebx source2: movzbl 0x44444444(%rax,%rbx), %ebx
movl %r10d, %ebp movl %r10d, %ebp
movb (%r13,%rbx), %bl movb (%r13,%rbx), %bl
shifter3: shr $24, %ebp shifter3: shr $24, %ebp
step3: addl $0x88888888, %r10d step3: addl $0x44444444, %r10d
source3: movzbl 0x88888888(%rax,%rbp), %ebp source3: movzbl 0x44444444(%rax,%rbp), %ebp
movl %r11d, %esi movl %r11d, %esi
movb (%r14,%rbp), %bpl movb (%r14,%rbp), %bpl
shifter4: shr $24, %esi shifter4: shr $24, %esi
step4: add $0x88888888, %r11d step4: add $0x44444444, %r11d
source4: movzbl 0x88888888(%rax,%rsi), %esi source4: movzbl 0x44444444(%rax,%rsi), %esi
movb %dl, (%rdi,%rcx) movb %dl, (%rdi,%rcx)
movb %bl, 1(%rdi,%rcx) movb %bl, 1(%rdi,%rcx)
movb (%r15,%rsi), %sil movb (%r15,%rsi), %sil
movb %bpl, 2(%rdi,%rcx) movb %bpl, 2(%rdi,%rcx)
movb %sil, 3(%rdi,%rcx) movb %sil, 3(%rdi,%rcx)
vltpitch: addq $320, %rcx vltpitch: addq $320, %rcx
jl loopit jl loopit
movl %r8d, vplce(%rip) movl %r8d, vplce(%rip)
movl %r9d, vplce+4(%rip) movl %r9d, vplce+4(%rip)
movl %r10d, vplce+8(%rip) movl %r10d, vplce+8(%rip)
movl %r11d, vplce+12(%rip) movl %r11d, vplce+12(%rip)
vltepilog: vltepilog:
addq $8, %rsp addq $8, %rsp
.cfi_adjust_cfa_offset -8 .cfi_adjust_cfa_offset -8
@ -137,5 +137,5 @@ vltepilog:
ret ret
.cfi_endproc .cfi_endproc
.align 16 .align 16