mirror of
https://github.com/ZDoom/raze-gles.git
synced 2024-11-10 23:02:03 +00:00
Add WIP x86_64 assembly texture mapping routines.
- Currently: only tvlineasm1 and tvlineasm2, but incomplete (no reverse translucency, nonpow2 textures will crash) - For System V AMD64 calling conventions; requires YASM git-svn-id: https://svn.eduke32.com/eduke32@4066 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
d5b6496ddb
commit
eac9dd04bf
5 changed files with 272 additions and 7 deletions
|
@ -146,6 +146,9 @@ USE_LIBVPX ?= 1
|
|||
NETCODE ?= 1
|
||||
LUNATIC ?= 0
|
||||
|
||||
# EXPERIMENTAL, unfinished x86_64 assembly routines. DO NOT ENABLE.
|
||||
USE_ASM64 ?= 0
|
||||
|
||||
ifeq (0,$(USE_OPENGL))
|
||||
POLYMER = 0
|
||||
USE_LIBVPX = 0
|
||||
|
|
|
@ -52,6 +52,9 @@ ifeq (0,$(NOASM))
|
|||
ENGINEOBJS+= $(OBJ)/a.$o
|
||||
else
|
||||
ENGINEOBJS+= $(OBJ)/a-c.$o
|
||||
ifneq (0,$(USE_ASM64))
|
||||
ENGINEOBJS+= $(OBJ)/a64.$o
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
@ -288,6 +291,11 @@ $(OBJ)/%.$o: $(SRC)/%.nasm
|
|||
$(COMPILE_STATUS)
|
||||
if $(AS) $(OURASFLAGS) $< -o $@; then $(COMPILE_OK); else $(COMPILE_FAILED); fi
|
||||
|
||||
# TODO: Makefile vars...
|
||||
$(OBJ)/%.$o: $(SRC)/%.yasm
|
||||
$(COMPILE_STATUS)
|
||||
if yasm -f elf64 $< -o $@; then $(COMPILE_OK); else $(COMPILE_FAILED); fi
|
||||
|
||||
# Comment out the following rule to debug a-c.o
|
||||
$(OBJ)/a-c.$o: $(SRC)/a-c.c
|
||||
$(COMPILE_STATUS)
|
||||
|
|
|
@ -244,6 +244,9 @@ endif
|
|||
ifneq (0,$(NOASM))
|
||||
BUILDCOMMONFLAGS+= -DNOASM
|
||||
endif
|
||||
ifneq (0,$(USE_ASM64))
|
||||
BUILDCOMMONFLAGS+= -DUSE_ASM64
|
||||
endif
|
||||
ifneq (0,$(LINKED_GTK))
|
||||
BUILDCOMMONFLAGS+= -DLINKED_GTK
|
||||
endif
|
||||
|
|
|
@ -19,16 +19,33 @@ extern intptr_t asm1, asm2, asm3, asm4;
|
|||
extern int32_t fpuasm, globalx3, globaly3;
|
||||
extern void *reciptable;
|
||||
|
||||
#ifdef USE_ASM64
|
||||
# define A64_ASSIGN(var, val) var=val
|
||||
#else
|
||||
# define A64_ASSIGN(var, val)
|
||||
#endif
|
||||
|
||||
#ifdef USE_ASM64
|
||||
// variables for a64.yasm
|
||||
int32_t a64_bpl, a64_transmode, a64_glogy;
|
||||
intptr_t a64_paloffs;
|
||||
char *a64_gtrans;
|
||||
#endif
|
||||
|
||||
static int32_t bpl, transmode = 0;
|
||||
static int32_t glogx, glogy, gbxinc, gbyinc, gpinc;
|
||||
static char *gbuf, *gpal, *ghlinepal, *gtrans;
|
||||
static char *gpal2;
|
||||
|
||||
//Global variable functions
|
||||
void setvlinebpl(int32_t dabpl) { bpl = dabpl; }
|
||||
void fixtransluscence(intptr_t datransoff) { gtrans = (char *)datransoff; }
|
||||
void settransnormal(void) { transmode = 0; }
|
||||
void settransreverse(void) { transmode = 1; }
|
||||
void setvlinebpl(int32_t dabpl) { A64_ASSIGN(a64_bpl, dabpl); bpl = dabpl;}
|
||||
void fixtransluscence(intptr_t datransoff)
|
||||
{
|
||||
A64_ASSIGN(a64_gtrans, (char *)datransoff);
|
||||
gtrans = (char *)datransoff;
|
||||
}
|
||||
void settransnormal(void) { A64_ASSIGN(a64_transmode, 0); transmode = 0; }
|
||||
void settransreverse(void) { A64_ASSIGN(a64_transmode, 1); transmode = 1; }
|
||||
|
||||
|
||||
///// Ceiling/floor horizontal line functions /////
|
||||
|
@ -230,8 +247,18 @@ void mvlineasm4(int32_t cnt, char *p)
|
|||
Bmemcpy(vplce, vplc, sizeof(vplce));
|
||||
}
|
||||
|
||||
#ifdef USE_ASM64
|
||||
# define GLOGY a64_glogy
|
||||
#else
|
||||
# define GLOGY glogy
|
||||
#endif
|
||||
|
||||
void setuptvlineasm(int32_t neglogy) { glogy = neglogy; }
|
||||
void setuptvlineasm(int32_t neglogy)
|
||||
{
|
||||
GLOGY = neglogy;
|
||||
}
|
||||
|
||||
#if !defined USE_ASM64
|
||||
// cnt+1 loop iterations!
|
||||
int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
|
||||
{
|
||||
|
@ -270,13 +297,17 @@ int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
|
|||
|
||||
return vplc;
|
||||
}
|
||||
#endif
|
||||
|
||||
void setuptvlineasm2(int32_t neglogy, intptr_t paloffs1, intptr_t paloffs2)
|
||||
{
|
||||
glogy = neglogy;
|
||||
GLOGY = neglogy;
|
||||
A64_ASSIGN(a64_paloffs, paloffs1);
|
||||
gpal = (char *)paloffs1;
|
||||
gpal2 = (char *)paloffs2;
|
||||
}
|
||||
|
||||
#if !defined USE_ASM64
|
||||
// Pass: asm1=vinc2, asm2=pend
|
||||
// Return: asm1=vplc1, asm2=vplc2
|
||||
void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc2, uint32_t vplc1, intptr_t p)
|
||||
|
@ -330,7 +361,7 @@ void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc
|
|||
asm1 = vplc1;
|
||||
asm2 = vplc2;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//Floor sprite horizontal line functions
|
||||
void msethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; }
|
||||
|
|
220
polymer/eduke32/build/src/a64.yasm
Normal file
220
polymer/eduke32/build/src/a64.yasm
Normal file
|
@ -0,0 +1,220 @@
|
|||
;;; x86_64 assembly for basic texture mapping functions, based on a.nasm.
|
||||
;;; For use with System V AMD64 calling convention (rdi rsi rdx rcx r8 r9)
|
||||
;;; See the included license file "BUILDLIC.TXT" for license info.
|
||||
|
||||
SECTION .text
|
||||
|
||||
%ifdef UNDERSCORES
|
||||
%define asm1 _asm1
|
||||
%define asm2 _asm2
|
||||
|
||||
%define a64_bpl _a64_bpl
|
||||
%define a64_transmode _a64_transmode
|
||||
%define a64_glogy _a64_glogy
|
||||
%define a64_gtrans _a64_gtrans
|
||||
%define a64_paloffs _a64_paloffs
|
||||
|
||||
%define tvlineasm1 _tvlineasm1
|
||||
%define tvlineasm2 _tvlineasm2
|
||||
%endif
|
||||
|
||||
;;; Imports
|
||||
EXTERN asm1 ; intptr_t
|
||||
EXTERN asm2 ; intptr_t
|
||||
|
||||
EXTERN a64_bpl ; int32_t
|
||||
EXTERN a64_transmode ; int32_t
|
||||
EXTERN a64_glogy ; int32_t
|
||||
EXTERN a64_gtrans ; char *
|
||||
EXTERN a64_paloffs ; intptr_t
|
||||
|
||||
;;; Exports
|
||||
GLOBAL tvlineasm1 ; Masked & transluscent 1-pixel wide vline
|
||||
GLOBAL tvlineasm2 ; Masked & transluscent 2-pixel wide vline
|
||||
|
||||
|
||||
;;; ========== MACROS ==========
|
||||
|
||||
; Construct name referring to low doubleword of GPR, only for r8-r15.
|
||||
%define REGd(reg) reg %+ d
|
||||
; Construct name of whole GPR from word-sized register, only for ax-bp.
|
||||
%define rREG(regw) r %+ regw
|
||||
; Construct name of low doubleword of GPR from word-sized register, only for ax-bp.
|
||||
%define eREG(regw) e %+ regw
|
||||
|
||||
;;; Multi-line macros for code shared between functions
|
||||
|
||||
;; Prologue getters
|
||||
%macro PGET_rBpl 0
|
||||
push rBpl
|
||||
mov REGd(rBpl), dword [rel a64_bpl]
|
||||
%endmacro
|
||||
|
||||
%macro PGET_rATransluc 0
|
||||
push rATransluc
|
||||
mov rATransluc, [rel a64_gtrans]
|
||||
%endmacro
|
||||
|
||||
; Get right shift count 'glogy' into cl
|
||||
%macro GET_cl 0
|
||||
mov cl, byte [rel a64_glogy]
|
||||
%endmacro
|
||||
|
||||
; Look up texel
|
||||
; <dest_reg_lobyte> := bufplc[vplc>>glogy]
|
||||
; High bytes are cleared to zero.
|
||||
%macro gettexel 3 ; <dest_reg_word> <bufplc_reg> <vlpc_reg>
|
||||
mov eREG(%1), %3
|
||||
shr eREG(%1), cl
|
||||
movzx eREG(%1), byte [%2 + rREG(%1)]
|
||||
%endmacro
|
||||
|
||||
; Do palette/shade and translucency lookup
|
||||
%macro dotranspal 4 ; <tmp_reg> <tmp_reg_lobyte> <texel_reg> <dst_ofs>
|
||||
; Get palette index of the pixel in the frame buffer.
|
||||
; NOTE: e.g. "mov ah, byte [reg]" is not encodeable.
|
||||
movzx %1, byte [rDst + %4]
|
||||
shl %1, 8
|
||||
|
||||
mov %2, byte [rAPalookup + %3] ; palette/shade
|
||||
mov %2, byte [rATransluc + %1] ; translucency
|
||||
%endmacro
|
||||
|
||||
|
||||
;;; ========== TVLINEASM1 ==========
|
||||
;;; TODO: transmode, nonpow2
|
||||
|
||||
;;; Registers used in the loop
|
||||
%define rBpl r10
|
||||
%define rATransluc r12
|
||||
%define rVplc r11d
|
||||
|
||||
;;; Registers of input args also used in the loop
|
||||
%define rVinc edi
|
||||
%define rAPalookup rsi
|
||||
%define rCnt edx
|
||||
%define rABufplc r8
|
||||
%define rDst r9
|
||||
|
||||
;;; int32_t tvlineasm1(vinc, *paloffs, cnt, vplc, *bufplc, *p)
|
||||
;;; eax edi rsi edx ecx r8 r9
|
||||
ALIGN 16
|
||||
tvlineasm1:
|
||||
inc rCnt
|
||||
;; First, back up callee-saved registers and set up those used in the loop.
|
||||
PGET_rBpl
|
||||
mov rVplc, ecx
|
||||
GET_cl
|
||||
PGET_rATransluc
|
||||
push rbx
|
||||
jmp short tv1_loop
|
||||
|
||||
ALIGN 16
|
||||
tv1_loop:
|
||||
gettexel bx, rABufplc, rVplc
|
||||
cmp bl, 255
|
||||
je short tv1_skiptrans
|
||||
|
||||
dotranspal rax, al, rbx, 0
|
||||
mov byte [rDst], al
|
||||
tv1_skiptrans:
|
||||
add rVplc, rVinc
|
||||
add rDst, rBpl
|
||||
dec rCnt
|
||||
jnz short tv1_loop
|
||||
|
||||
mov eax, rVplc ; return vplc
|
||||
pop rbx
|
||||
pop rATransluc
|
||||
pop rBpl
|
||||
ret
|
||||
|
||||
;;; Undefine input arg macros
|
||||
%undef rVinc
|
||||
%undef rAPalookup
|
||||
%undef rCnt
|
||||
%undef rABufplc
|
||||
%undef rDst
|
||||
|
||||
;;; Udefine rVplc register macro, but keep rBpl and rATransluc (r10 and r12).
|
||||
;;; Take care not to clash with them in the following.
|
||||
%undef rVplc
|
||||
|
||||
|
||||
;;; ========== TVLINEASM2 ==========
|
||||
|
||||
;;; Registers used in the loop
|
||||
%define rVinc2 r11d
|
||||
%define rABufplc2 r13
|
||||
%define rDstEnd r14
|
||||
%define rAPalookup r15 ; TODO: second paloffs!
|
||||
|
||||
;;; Registers of input args also used in the loop
|
||||
%define rVplc2 edi
|
||||
%define rVinc1 esi
|
||||
%define rABufplc1 rdx
|
||||
; bufplc2: rcx -> r13
|
||||
%define rVplc1 r8d
|
||||
%define rDst r9
|
||||
|
||||
;;; Pass: asm1=vinc2, asm2=pend
|
||||
;;; Return: asm1=vplc1, asm2=vplc2
|
||||
;;; void tvlineasm2(vplc2, vinc1, *bufplc1, *bufplc2, vplc1, *p)
|
||||
;;; edi esi rdx rcx r8d r9
|
||||
|
||||
ALIGN 16
|
||||
tvlineasm2:
|
||||
;; First, back up callee-saved registers and set up those used in the loop.
|
||||
mov rVinc2, dword [rel asm1]
|
||||
PGET_rBpl
|
||||
push rABufplc2
|
||||
mov rABufplc2, rcx
|
||||
GET_cl
|
||||
PGET_rATransluc
|
||||
|
||||
push rDstEnd
|
||||
mov rDstEnd, [rel asm2]
|
||||
dec rDstEnd
|
||||
add rDstEnd, rBpl ; one more: compare with a-c.c's tvlineasm2()
|
||||
|
||||
push rbx
|
||||
push rbp
|
||||
push rAPalookup
|
||||
mov rAPalookup, [rel a64_paloffs]
|
||||
jmp short tv2_loop
|
||||
|
||||
ALIGN 16
|
||||
tv2_loop:
|
||||
gettexel bx, rABufplc1, rVplc1
|
||||
gettexel bp, rABufplc2, rVplc2
|
||||
|
||||
mov eax, ebp
|
||||
shl eax, 8
|
||||
or eax, ebx
|
||||
|
||||
cmp eax, 0xffff
|
||||
jz tv2_skiptrans
|
||||
|
||||
dotranspal rax, al, rbx, 0
|
||||
dotranspal rbx, bl, rbp, 1
|
||||
and rax, 0xff
|
||||
shl rbx, 8
|
||||
or rax, rbx
|
||||
|
||||
mov word [rDst], ax
|
||||
|
||||
tv2_skiptrans:
|
||||
add rVplc1, rVinc1
|
||||
add rVplc2, rVinc2
|
||||
add rDst, rBpl
|
||||
cmp rDst, rDstEnd
|
||||
jnz tv2_loop
|
||||
|
||||
pop r15
|
||||
pop rbp
|
||||
pop rbx
|
||||
pop rDstEnd
|
||||
pop rATransluc
|
||||
pop rABufplc2
|
||||
pop rBpl
|
||||
ret
|
Loading…
Reference in a new issue