etlegacy-libs/jpegturbo/simd/jsimdext.inc

376 lines
12 KiB
PHP
Raw Normal View History

2014-11-21 20:32:31 +00:00
;
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
2017-07-01 14:55:13 +00:00
; Copyright (C) 2010, D. R. Commander.
2014-11-21 20:32:31 +00:00
;
2017-07-01 14:55:13 +00:00
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
2014-11-21 20:32:31 +00:00
;
; Copyright (C) 1999-2006, MIYASAKA Masaru.
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
; [TAB8]
; ==========================================================================
; System-dependent configurations
2015-01-24 20:09:39 +00:00
%ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)--------
2014-11-21 20:32:31 +00:00
; * Microsoft Visual C++
; * MinGW (Minimalist GNU for Windows)
; * CygWin
; * LCC-Win32
; -- segment definition --
;
%ifdef __YASM_VER__
%define SEG_TEXT .text align=16
%define SEG_CONST .rdata align=16
%else
%define SEG_TEXT .text align=16 public use32 class=CODE
%define SEG_CONST .rdata align=16 public use32 class=CONST
%endif
2015-01-24 20:09:39 +00:00
%elifdef WIN64 ; ----(nasm -fwin64 -DWIN64 ...)--------
2014-11-21 20:32:31 +00:00
; * Microsoft Visual C++
; -- segment definition --
;
%ifdef __YASM_VER__
%define SEG_TEXT .text align=16
%define SEG_CONST .rdata align=16
%else
%define SEG_TEXT .text align=16 public use64 class=CODE
%define SEG_CONST .rdata align=16 public use64 class=CONST
%endif
2015-01-24 20:09:39 +00:00
%define EXTN(name) name ; foo() -> foo
2014-11-21 20:32:31 +00:00
2015-01-24 20:09:39 +00:00
%elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)----------
2014-11-21 20:32:31 +00:00
; * Borland C++ (Win32)
; -- segment definition --
;
2015-01-24 20:09:39 +00:00
%define SEG_TEXT _text align=16 public use32 class=CODE
%define SEG_CONST _data align=16 public use32 class=DATA
2014-11-21 20:32:31 +00:00
2015-01-24 20:09:39 +00:00
%elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------
2014-11-21 20:32:31 +00:00
; * Linux
; * *BSD family Unix using elf format
; * Unix System V, including Solaris x86, UnixWare and SCO Unix
; mark stack as non-executable
section .note.GNU-stack noalloc noexec nowrite progbits
; -- segment definition --
;
%ifdef __x86_64__
%define SEG_TEXT .text progbits align=16
%define SEG_CONST .rodata progbits align=16
%else
%define SEG_TEXT .text progbits alloc exec nowrite align=16
%define SEG_CONST .rodata progbits alloc noexec nowrite align=16
%endif
; To make the code position-independent, append -DPIC to the commandline
;
2015-01-24 20:09:39 +00:00
%define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC
%define EXTN(name) name ; foo() -> foo
2014-11-21 20:32:31 +00:00
2015-01-24 20:09:39 +00:00
%elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)----
2014-11-21 20:32:31 +00:00
; * Older Linux using a.out format (nasm -f aout -DAOUT ...)
; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...)
; -- segment definition --
;
%define SEG_TEXT .text
%define SEG_CONST .data
; To make the code position-independent, append -DPIC to the commandline
;
2015-01-24 20:09:39 +00:00
%define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC
2014-11-21 20:32:31 +00:00
2015-01-24 20:09:39 +00:00
%elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)--------
2014-11-21 20:32:31 +00:00
; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format)
; -- segment definition --
;
2015-01-24 20:09:39 +00:00
%define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why?
2014-11-21 20:32:31 +00:00
%define SEG_CONST .rodata align=16
; The generation of position-independent code (PIC) is the default on Darwin.
;
%define PIC
2015-01-24 20:09:39 +00:00
%define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing
2014-11-21 20:32:31 +00:00
2015-01-24 20:09:39 +00:00
%else ; ----(Other case)----------------------
2014-11-21 20:32:31 +00:00
; -- segment definition --
;
%define SEG_TEXT .text
%define SEG_CONST .data
2015-01-24 20:09:39 +00:00
%endif ; ----------------------------------------------
2014-11-21 20:32:31 +00:00
; ==========================================================================
; --------------------------------------------------------------------------
; Common types
;
%ifdef __x86_64__
%define POINTER qword ; general pointer type
%define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER)
%define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT
%else
%define POINTER dword ; general pointer type
%define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER)
%define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT
%endif
%define INT dword ; signed integer type
%define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT)
%define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT
%define FP32 dword ; IEEE754 single
%define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32)
%define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT
%define MMWORD qword ; int64 (MMX register)
%define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD)
%define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT
; NASM is buggy and doesn't properly handle operand sizes for SSE
; instructions, so for now we have to define XMMWORD as blank.
%define XMMWORD ; int128 (SSE register)
%define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD)
%define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT
; Similar hacks for when we load a dword or MMWORD into an xmm# register
%define XMM_DWORD
%define XMM_MMWORD
%define SIZEOF_BYTE 1 ; sizeof(BYTE)
%define SIZEOF_WORD 2 ; sizeof(WORD)
%define SIZEOF_DWORD 4 ; sizeof(DWORD)
%define SIZEOF_QWORD 8 ; sizeof(QWORD)
%define SIZEOF_OWORD 16 ; sizeof(OWORD)
%define BYTE_BIT 8 ; CHAR_BIT in C
%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
; --------------------------------------------------------------------------
; External Symbol Name
;
%ifndef EXTN
2015-01-24 20:09:39 +00:00
%define EXTN(name) _ %+ name ; foo() -> _foo
2014-11-21 20:32:31 +00:00
%endif
; --------------------------------------------------------------------------
; Macros for position-independent code (PIC) support
;
%ifndef GOT_SYMBOL
%undef PIC
%endif
%ifdef PIC ; -------------------------------------------
%ifidn GOT_SYMBOL,_MACHO_PIC_ ; --------------------
; At present, nasm doesn't seem to support PIC generation for Mach-O.
; The PIC support code below is a little tricky.
2015-01-24 20:09:39 +00:00
SECTION SEG_CONST
2014-11-21 20:32:31 +00:00
const_base:
%define GOTOFF(got,sym) (got) + (sym) - const_base
2015-01-24 20:09:39 +00:00
%imacro get_GOT 1
; NOTE: this macro destroys ecx resister.
call %%geteip
add ecx, byte (%%ref - $)
jmp short %%adjust
2014-11-21 20:32:31 +00:00
%%geteip:
2015-01-24 20:09:39 +00:00
mov ecx, POINTER [esp]
ret
2014-11-21 20:32:31 +00:00
%%adjust:
2015-01-24 20:09:39 +00:00
push ebp
xor ebp,ebp ; ebp = 0
%ifidni %1,ebx ; (%1 == ebx)
; db 0x8D,0x9C + jmp near const_base =
; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32)
db 0x8D,0x9C ; 8D,9C
jmp near const_base ; E9,(const_base-%%ref)
2014-11-21 20:32:31 +00:00
%%ref:
%else ; (%1 != ebx)
2015-01-24 20:09:39 +00:00
; db 0x8D,0x8C + jmp near const_base =
; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32)
db 0x8D,0x8C ; 8D,8C
jmp near const_base ; E9,(const_base-%%ref)
%%ref: mov %1, ecx
2014-11-21 20:32:31 +00:00
%endif ; (%1 == ebx)
2015-01-24 20:09:39 +00:00
pop ebp
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%else ; GOT_SYMBOL != _MACHO_PIC_ ----------------
2014-11-21 20:32:31 +00:00
%define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff
2015-01-24 20:09:39 +00:00
%imacro get_GOT 1
extern GOT_SYMBOL
call %%geteip
add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc
jmp short %%done
2014-11-21 20:32:31 +00:00
%%geteip:
2015-01-24 20:09:39 +00:00
mov %1, POINTER [esp]
ret
2014-11-21 20:32:31 +00:00
%%done:
%endmacro
2015-01-24 20:09:39 +00:00
%endif ; GOT_SYMBOL == _MACHO_PIC_ ----------------
2014-11-21 20:32:31 +00:00
2015-01-24 20:09:39 +00:00
%imacro pushpic 1.nolist
push %1
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%imacro poppic 1.nolist
pop %1
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%imacro movpic 2.nolist
mov %1,%2
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%else ; !PIC -----------------------------------------
2014-11-21 20:32:31 +00:00
%define GOTOFF(got,sym) (sym)
2015-01-24 20:09:39 +00:00
%imacro get_GOT 1.nolist
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%imacro pushpic 1.nolist
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%imacro poppic 1.nolist
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%imacro movpic 2.nolist
2014-11-21 20:32:31 +00:00
%endmacro
2015-01-24 20:09:39 +00:00
%endif ; PIC -----------------------------------------
2014-11-21 20:32:31 +00:00
; --------------------------------------------------------------------------
; Align the next instruction on {2,4,8,16,..}-byte boundary.
; ".balign n,,m" in GNU as
;
%define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16)
%define FILLB(b,n) (($$-(b)) & ((n)-1))
%imacro alignx 1-2.nolist 0xFFFF
2015-01-24 20:09:39 +00:00
%%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \
db 0x90 ; nop
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \
db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000]
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \
db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \
db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000]
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \
db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00]
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \
db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00]
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \
db 0x8B,0xED ; mov ebp,ebp
times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \
db 0x90 ; nop
2014-11-21 20:32:31 +00:00
%endmacro
; Align the next data on {2,4,8,16,..}-byte boundary.
;
%imacro alignz 1.nolist
2015-01-24 20:09:39 +00:00
align %1, db 0 ; filling zeros
2014-11-21 20:32:31 +00:00
%endmacro
%ifdef __x86_64__
%ifdef WIN64
%imacro collect_args 0
2015-01-24 20:09:39 +00:00
push r12
push r13
push r14
push r15
mov r10, rcx
mov r11, rdx
mov r12, r8
mov r13, r9
mov r14, [rax+48]
mov r15, [rax+56]
push rsi
push rdi
sub rsp, SIZEOF_XMMWORD
movaps XMMWORD [rsp], xmm6
sub rsp, SIZEOF_XMMWORD
movaps XMMWORD [rsp], xmm7
2014-11-21 20:32:31 +00:00
%endmacro
%imacro uncollect_args 0
2015-01-24 20:09:39 +00:00
movaps xmm7, XMMWORD [rsp]
add rsp, SIZEOF_XMMWORD
movaps xmm6, XMMWORD [rsp]
add rsp, SIZEOF_XMMWORD
pop rdi
pop rsi
pop r15
pop r14
pop r13
pop r12
2014-11-21 20:32:31 +00:00
%endmacro
%else
%imacro collect_args 0
2015-01-24 20:09:39 +00:00
push r10
push r11
push r12
push r13
push r14
push r15
mov r10, rdi
mov r11, rsi
mov r12, rdx
mov r13, rcx
mov r14, r8
mov r15, r9
2014-11-21 20:32:31 +00:00
%endmacro
%imacro uncollect_args 0
2015-01-24 20:09:39 +00:00
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
2014-11-21 20:32:31 +00:00
%endmacro
%endif
%endif
; --------------------------------------------------------------------------
; Defines picked up from the C headers
;
%include "jsimdcfg.inc"
; --------------------------------------------------------------------------