;;; x86_64 assembly for basic texture mapping functions, based on a.nasm. ;;; For use with System V AMD64 calling convention (rdi rsi rdx rcx r8 r9) ;;; See the included license file "BUILDLIC.TXT" for license info. SECTION .text %ifdef UNDERSCORES %define asm1 _asm1 %define asm2 _asm2 %define a64_bpl _a64_bpl %define a64_transmode _a64_transmode %define a64_glogy _a64_glogy %define a64_gtrans _a64_gtrans %define a64_paloffs _a64_paloffs %define tvlineasm1 _tvlineasm1 %define tvlineasm2 _tvlineasm2 %endif ;;; Imports EXTERN asm1 ; intptr_t EXTERN asm2 ; intptr_t EXTERN a64_bpl ; int32_t EXTERN a64_transmode ; int32_t EXTERN a64_glogy ; int32_t EXTERN a64_gtrans ; char * EXTERN a64_paloffs ; intptr_t ;;; Exports GLOBAL tvlineasm1 ; Masked & transluscent 1-pixel wide vline GLOBAL tvlineasm2 ; Masked & transluscent 2-pixel wide vline ;;; ========== MACROS ========== ; Construct name referring to low doubleword of GPR, only for r8-r15. %define REGd(reg) reg %+ d ; Construct name of whole GPR from word-sized register, only for ax-bp. %define rREG(regw) r %+ regw ; Construct name of low doubleword of GPR from word-sized register, only for ax-bp. %define eREG(regw) e %+ regw ;;; Multi-line macros for code shared between functions ;; Prologue getters %macro PGET_rBpl 0 push rBpl mov REGd(rBpl), dword [rel a64_bpl] %endmacro %macro PGET_rATransluc 0 push rATransluc mov rATransluc, [rel a64_gtrans] %endmacro ; Get right shift count 'glogy' into cl %macro GET_cl 0 mov cl, byte [rel a64_glogy] %endmacro ; Look up texel ; := bufplc[vplc>>glogy] ; High bytes are cleared to zero. %macro gettexel 3 ; mov eREG(%1), %3 shr eREG(%1), cl movzx eREG(%1), byte [%2 + rREG(%1)] %endmacro ; Do palette/shade and translucency lookup %macro dotranspal 4 ; ; Get palette index of the pixel in the frame buffer. ; NOTE: e.g. "mov ah, byte [reg]" is not encodeable. movzx %1, byte [rDst + %4] shl %1, 8 mov %2, byte [rAPalookup + %3] ; palette/shade mov %2, byte [rATransluc + %1] ; translucency %endmacro ;;; ========== TVLINEASM1 ========== ;;; TODO: transmode, nonpow2 ;;; Registers used in the loop %define rBpl r10 %define rATransluc r12 %define rVplc r11d ;;; Registers of input args also used in the loop %define rVinc edi %define rAPalookup rsi %define rCnt edx %define rABufplc r8 %define rDst r9 ;;; int32_t tvlineasm1(vinc, *paloffs, cnt, vplc, *bufplc, *p) ;;; eax edi rsi edx ecx r8 r9 ALIGN 16 tvlineasm1: inc rCnt ;; First, back up callee-saved registers and set up those used in the loop. PGET_rBpl mov rVplc, ecx GET_cl PGET_rATransluc push rbx jmp short tv1_loop ALIGN 16 tv1_loop: gettexel bx, rABufplc, rVplc cmp bl, 255 je short tv1_skiptrans dotranspal rax, al, rbx, 0 mov byte [rDst], al tv1_skiptrans: add rVplc, rVinc add rDst, rBpl dec rCnt jnz short tv1_loop mov eax, rVplc ; return vplc pop rbx pop rATransluc pop rBpl ret ;;; Undefine input arg macros %undef rVinc %undef rAPalookup %undef rCnt %undef rABufplc %undef rDst ;;; Udefine rVplc register macro, but keep rBpl and rATransluc (r10 and r12). ;;; Take care not to clash with them in the following. %undef rVplc ;;; ========== TVLINEASM2 ========== ;;; Registers used in the loop %define rVinc2 r11d %define rABufplc2 r13 %define rDstEnd r14 %define rAPalookup r15 ; TODO: second paloffs! ;;; Registers of input args also used in the loop %define rVplc2 edi %define rVinc1 esi %define rABufplc1 rdx ; bufplc2: rcx -> r13 %define rVplc1 r8d %define rDst r9 ;;; Pass: asm1=vinc2, asm2=pend ;;; Return: asm1=vplc1, asm2=vplc2 ;;; void tvlineasm2(vplc2, vinc1, *bufplc1, *bufplc2, vplc1, *p) ;;; edi esi rdx rcx r8d r9 ALIGN 16 tvlineasm2: ;; First, back up callee-saved registers and set up those used in the loop. mov rVinc2, dword [rel asm1] PGET_rBpl push rABufplc2 mov rABufplc2, rcx GET_cl PGET_rATransluc push rDstEnd mov rDstEnd, [rel asm2] dec rDstEnd add rDstEnd, rBpl ; one more: compare with a-c.c's tvlineasm2() push rbx push rbp push rAPalookup mov rAPalookup, [rel a64_paloffs] jmp short tv2_loop ALIGN 16 tv2_loop: gettexel bx, rABufplc1, rVplc1 gettexel bp, rABufplc2, rVplc2 mov eax, ebp shl eax, 8 or eax, ebx cmp eax, 0xffff jz tv2_skiptrans dotranspal rax, al, rbx, 0 dotranspal rbx, bl, rbp, 1 and rax, 0xff shl rbx, 8 or rax, rbx mov word [rDst], ax tv2_skiptrans: add rVplc1, rVinc1 add rVplc2, rVinc2 add rDst, rBpl cmp rDst, rDstEnd jnz tv2_loop pop r15 pop rbp pop rbx pop rDstEnd pop rATransluc pop rABufplc2 pop rBpl ret