mirror of
https://github.com/ZDoom/raze-gles.git
synced 2024-11-07 21:40:27 +00:00
221 lines
4.6 KiB
Text
221 lines
4.6 KiB
Text
|
;;; x86_64 assembly for basic texture mapping functions, based on a.nasm.
|
||
|
;;; For use with System V AMD64 calling convention (rdi rsi rdx rcx r8 r9)
|
||
|
;;; See the included license file "BUILDLIC.TXT" for license info.
|
||
|
|
||
|
SECTION .text
|
||
|
|
||
|
%ifdef UNDERSCORES
|
||
|
%define asm1 _asm1
|
||
|
%define asm2 _asm2
|
||
|
|
||
|
%define a64_bpl _a64_bpl
|
||
|
%define a64_transmode _a64_transmode
|
||
|
%define a64_glogy _a64_glogy
|
||
|
%define a64_gtrans _a64_gtrans
|
||
|
%define a64_paloffs _a64_paloffs
|
||
|
|
||
|
%define tvlineasm1 _tvlineasm1
|
||
|
%define tvlineasm2 _tvlineasm2
|
||
|
%endif
|
||
|
|
||
|
;;; Imports
|
||
|
EXTERN asm1 ; intptr_t
|
||
|
EXTERN asm2 ; intptr_t
|
||
|
|
||
|
EXTERN a64_bpl ; int32_t
|
||
|
EXTERN a64_transmode ; int32_t
|
||
|
EXTERN a64_glogy ; int32_t
|
||
|
EXTERN a64_gtrans ; char *
|
||
|
EXTERN a64_paloffs ; intptr_t
|
||
|
|
||
|
;;; Exports
|
||
|
GLOBAL tvlineasm1 ; Masked & transluscent 1-pixel wide vline
|
||
|
GLOBAL tvlineasm2 ; Masked & transluscent 2-pixel wide vline
|
||
|
|
||
|
|
||
|
;;; ========== MACROS ==========
|
||
|
|
||
|
; Construct name referring to low doubleword of GPR, only for r8-r15.
|
||
|
%define REGd(reg) reg %+ d
|
||
|
; Construct name of whole GPR from word-sized register, only for ax-bp.
|
||
|
%define rREG(regw) r %+ regw
|
||
|
; Construct name of low doubleword of GPR from word-sized register, only for ax-bp.
|
||
|
%define eREG(regw) e %+ regw
|
||
|
|
||
|
;;; Multi-line macros for code shared between functions
|
||
|
|
||
|
;; Prologue getters
|
||
|
%macro PGET_rBpl 0
|
||
|
push rBpl
|
||
|
mov REGd(rBpl), dword [rel a64_bpl]
|
||
|
%endmacro
|
||
|
|
||
|
%macro PGET_rATransluc 0
|
||
|
push rATransluc
|
||
|
mov rATransluc, [rel a64_gtrans]
|
||
|
%endmacro
|
||
|
|
||
|
; Get right shift count 'glogy' into cl
|
||
|
%macro GET_cl 0
|
||
|
mov cl, byte [rel a64_glogy]
|
||
|
%endmacro
|
||
|
|
||
|
; Look up texel
|
||
|
; <dest_reg_lobyte> := bufplc[vplc>>glogy]
|
||
|
; High bytes are cleared to zero.
|
||
|
%macro gettexel 3 ; <dest_reg_word> <bufplc_reg> <vlpc_reg>
|
||
|
mov eREG(%1), %3
|
||
|
shr eREG(%1), cl
|
||
|
movzx eREG(%1), byte [%2 + rREG(%1)]
|
||
|
%endmacro
|
||
|
|
||
|
; Do palette/shade and translucency lookup
|
||
|
%macro dotranspal 4 ; <tmp_reg> <tmp_reg_lobyte> <texel_reg> <dst_ofs>
|
||
|
; Get palette index of the pixel in the frame buffer.
|
||
|
; NOTE: e.g. "mov ah, byte [reg]" is not encodeable.
|
||
|
movzx %1, byte [rDst + %4]
|
||
|
shl %1, 8
|
||
|
|
||
|
mov %2, byte [rAPalookup + %3] ; palette/shade
|
||
|
mov %2, byte [rATransluc + %1] ; translucency
|
||
|
%endmacro
|
||
|
|
||
|
|
||
|
;;; ========== TVLINEASM1 ==========
|
||
|
;;; TODO: transmode, nonpow2
|
||
|
|
||
|
;;; Registers used in the loop
|
||
|
%define rBpl r10
|
||
|
%define rATransluc r12
|
||
|
%define rVplc r11d
|
||
|
|
||
|
;;; Registers of input args also used in the loop
|
||
|
%define rVinc edi
|
||
|
%define rAPalookup rsi
|
||
|
%define rCnt edx
|
||
|
%define rABufplc r8
|
||
|
%define rDst r9
|
||
|
|
||
|
;;; int32_t tvlineasm1(vinc, *paloffs, cnt, vplc, *bufplc, *p)
|
||
|
;;; eax edi rsi edx ecx r8 r9
|
||
|
ALIGN 16
|
||
|
tvlineasm1:
|
||
|
inc rCnt
|
||
|
;; First, back up callee-saved registers and set up those used in the loop.
|
||
|
PGET_rBpl
|
||
|
mov rVplc, ecx
|
||
|
GET_cl
|
||
|
PGET_rATransluc
|
||
|
push rbx
|
||
|
jmp short tv1_loop
|
||
|
|
||
|
ALIGN 16
|
||
|
tv1_loop:
|
||
|
gettexel bx, rABufplc, rVplc
|
||
|
cmp bl, 255
|
||
|
je short tv1_skiptrans
|
||
|
|
||
|
dotranspal rax, al, rbx, 0
|
||
|
mov byte [rDst], al
|
||
|
tv1_skiptrans:
|
||
|
add rVplc, rVinc
|
||
|
add rDst, rBpl
|
||
|
dec rCnt
|
||
|
jnz short tv1_loop
|
||
|
|
||
|
mov eax, rVplc ; return vplc
|
||
|
pop rbx
|
||
|
pop rATransluc
|
||
|
pop rBpl
|
||
|
ret
|
||
|
|
||
|
;;; Undefine input arg macros
|
||
|
%undef rVinc
|
||
|
%undef rAPalookup
|
||
|
%undef rCnt
|
||
|
%undef rABufplc
|
||
|
%undef rDst
|
||
|
|
||
|
;;; Udefine rVplc register macro, but keep rBpl and rATransluc (r10 and r12).
|
||
|
;;; Take care not to clash with them in the following.
|
||
|
%undef rVplc
|
||
|
|
||
|
|
||
|
;;; ========== TVLINEASM2 ==========
|
||
|
|
||
|
;;; Registers used in the loop
|
||
|
%define rVinc2 r11d
|
||
|
%define rABufplc2 r13
|
||
|
%define rDstEnd r14
|
||
|
%define rAPalookup r15 ; TODO: second paloffs!
|
||
|
|
||
|
;;; Registers of input args also used in the loop
|
||
|
%define rVplc2 edi
|
||
|
%define rVinc1 esi
|
||
|
%define rABufplc1 rdx
|
||
|
; bufplc2: rcx -> r13
|
||
|
%define rVplc1 r8d
|
||
|
%define rDst r9
|
||
|
|
||
|
;;; Pass: asm1=vinc2, asm2=pend
|
||
|
;;; Return: asm1=vplc1, asm2=vplc2
|
||
|
;;; void tvlineasm2(vplc2, vinc1, *bufplc1, *bufplc2, vplc1, *p)
|
||
|
;;; edi esi rdx rcx r8d r9
|
||
|
|
||
|
ALIGN 16
|
||
|
tvlineasm2:
|
||
|
;; First, back up callee-saved registers and set up those used in the loop.
|
||
|
mov rVinc2, dword [rel asm1]
|
||
|
PGET_rBpl
|
||
|
push rABufplc2
|
||
|
mov rABufplc2, rcx
|
||
|
GET_cl
|
||
|
PGET_rATransluc
|
||
|
|
||
|
push rDstEnd
|
||
|
mov rDstEnd, [rel asm2]
|
||
|
dec rDstEnd
|
||
|
add rDstEnd, rBpl ; one more: compare with a-c.c's tvlineasm2()
|
||
|
|
||
|
push rbx
|
||
|
push rbp
|
||
|
push rAPalookup
|
||
|
mov rAPalookup, [rel a64_paloffs]
|
||
|
jmp short tv2_loop
|
||
|
|
||
|
ALIGN 16
|
||
|
tv2_loop:
|
||
|
gettexel bx, rABufplc1, rVplc1
|
||
|
gettexel bp, rABufplc2, rVplc2
|
||
|
|
||
|
mov eax, ebp
|
||
|
shl eax, 8
|
||
|
or eax, ebx
|
||
|
|
||
|
cmp eax, 0xffff
|
||
|
jz tv2_skiptrans
|
||
|
|
||
|
dotranspal rax, al, rbx, 0
|
||
|
dotranspal rbx, bl, rbp, 1
|
||
|
and rax, 0xff
|
||
|
shl rbx, 8
|
||
|
or rax, rbx
|
||
|
|
||
|
mov word [rDst], ax
|
||
|
|
||
|
tv2_skiptrans:
|
||
|
add rVplc1, rVinc1
|
||
|
add rVplc2, rVinc2
|
||
|
add rDst, rBpl
|
||
|
cmp rDst, rDstEnd
|
||
|
jnz tv2_loop
|
||
|
|
||
|
pop r15
|
||
|
pop rbp
|
||
|
pop rbx
|
||
|
pop rDstEnd
|
||
|
pop rATransluc
|
||
|
pop rABufplc2
|
||
|
pop rBpl
|
||
|
ret
|