mirror of
https://github.com/ZDoom/raze-gles.git
synced 2025-01-26 00:40:56 +00:00
Add WIP x86_64 assembly texture mapping routines.
- Currently: only tvlineasm1 and tvlineasm2, but incomplete (no reverse translucency, nonpow2 textures will crash) - For System V AMD64 calling conventions; requires YASM git-svn-id: https://svn.eduke32.com/eduke32@4066 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
d5b6496ddb
commit
eac9dd04bf
5 changed files with 272 additions and 7 deletions
|
@ -146,6 +146,9 @@ USE_LIBVPX ?= 1
|
||||||
NETCODE ?= 1
|
NETCODE ?= 1
|
||||||
LUNATIC ?= 0
|
LUNATIC ?= 0
|
||||||
|
|
||||||
|
# EXPERIMENTAL, unfinished x86_64 assembly routines. DO NOT ENABLE.
|
||||||
|
USE_ASM64 ?= 0
|
||||||
|
|
||||||
ifeq (0,$(USE_OPENGL))
|
ifeq (0,$(USE_OPENGL))
|
||||||
POLYMER = 0
|
POLYMER = 0
|
||||||
USE_LIBVPX = 0
|
USE_LIBVPX = 0
|
||||||
|
|
|
@ -52,6 +52,9 @@ ifeq (0,$(NOASM))
|
||||||
ENGINEOBJS+= $(OBJ)/a.$o
|
ENGINEOBJS+= $(OBJ)/a.$o
|
||||||
else
|
else
|
||||||
ENGINEOBJS+= $(OBJ)/a-c.$o
|
ENGINEOBJS+= $(OBJ)/a-c.$o
|
||||||
|
ifneq (0,$(USE_ASM64))
|
||||||
|
ENGINEOBJS+= $(OBJ)/a64.$o
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -288,6 +291,11 @@ $(OBJ)/%.$o: $(SRC)/%.nasm
|
||||||
$(COMPILE_STATUS)
|
$(COMPILE_STATUS)
|
||||||
if $(AS) $(OURASFLAGS) $< -o $@; then $(COMPILE_OK); else $(COMPILE_FAILED); fi
|
if $(AS) $(OURASFLAGS) $< -o $@; then $(COMPILE_OK); else $(COMPILE_FAILED); fi
|
||||||
|
|
||||||
|
# TODO: Makefile vars...
|
||||||
|
$(OBJ)/%.$o: $(SRC)/%.yasm
|
||||||
|
$(COMPILE_STATUS)
|
||||||
|
if yasm -f elf64 $< -o $@; then $(COMPILE_OK); else $(COMPILE_FAILED); fi
|
||||||
|
|
||||||
# Comment out the following rule to debug a-c.o
|
# Comment out the following rule to debug a-c.o
|
||||||
$(OBJ)/a-c.$o: $(SRC)/a-c.c
|
$(OBJ)/a-c.$o: $(SRC)/a-c.c
|
||||||
$(COMPILE_STATUS)
|
$(COMPILE_STATUS)
|
||||||
|
|
|
@ -244,6 +244,9 @@ endif
|
||||||
ifneq (0,$(NOASM))
|
ifneq (0,$(NOASM))
|
||||||
BUILDCOMMONFLAGS+= -DNOASM
|
BUILDCOMMONFLAGS+= -DNOASM
|
||||||
endif
|
endif
|
||||||
|
ifneq (0,$(USE_ASM64))
|
||||||
|
BUILDCOMMONFLAGS+= -DUSE_ASM64
|
||||||
|
endif
|
||||||
ifneq (0,$(LINKED_GTK))
|
ifneq (0,$(LINKED_GTK))
|
||||||
BUILDCOMMONFLAGS+= -DLINKED_GTK
|
BUILDCOMMONFLAGS+= -DLINKED_GTK
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -19,16 +19,33 @@ extern intptr_t asm1, asm2, asm3, asm4;
|
||||||
extern int32_t fpuasm, globalx3, globaly3;
|
extern int32_t fpuasm, globalx3, globaly3;
|
||||||
extern void *reciptable;
|
extern void *reciptable;
|
||||||
|
|
||||||
|
#ifdef USE_ASM64
|
||||||
|
# define A64_ASSIGN(var, val) var=val
|
||||||
|
#else
|
||||||
|
# define A64_ASSIGN(var, val)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_ASM64
|
||||||
|
// variables for a64.yasm
|
||||||
|
int32_t a64_bpl, a64_transmode, a64_glogy;
|
||||||
|
intptr_t a64_paloffs;
|
||||||
|
char *a64_gtrans;
|
||||||
|
#endif
|
||||||
|
|
||||||
static int32_t bpl, transmode = 0;
|
static int32_t bpl, transmode = 0;
|
||||||
static int32_t glogx, glogy, gbxinc, gbyinc, gpinc;
|
static int32_t glogx, glogy, gbxinc, gbyinc, gpinc;
|
||||||
static char *gbuf, *gpal, *ghlinepal, *gtrans;
|
static char *gbuf, *gpal, *ghlinepal, *gtrans;
|
||||||
static char *gpal2;
|
static char *gpal2;
|
||||||
|
|
||||||
//Global variable functions
|
//Global variable functions
|
||||||
void setvlinebpl(int32_t dabpl) { bpl = dabpl; }
|
void setvlinebpl(int32_t dabpl) { A64_ASSIGN(a64_bpl, dabpl); bpl = dabpl;}
|
||||||
void fixtransluscence(intptr_t datransoff) { gtrans = (char *)datransoff; }
|
void fixtransluscence(intptr_t datransoff)
|
||||||
void settransnormal(void) { transmode = 0; }
|
{
|
||||||
void settransreverse(void) { transmode = 1; }
|
A64_ASSIGN(a64_gtrans, (char *)datransoff);
|
||||||
|
gtrans = (char *)datransoff;
|
||||||
|
}
|
||||||
|
void settransnormal(void) { A64_ASSIGN(a64_transmode, 0); transmode = 0; }
|
||||||
|
void settransreverse(void) { A64_ASSIGN(a64_transmode, 1); transmode = 1; }
|
||||||
|
|
||||||
|
|
||||||
///// Ceiling/floor horizontal line functions /////
|
///// Ceiling/floor horizontal line functions /////
|
||||||
|
@ -230,8 +247,18 @@ void mvlineasm4(int32_t cnt, char *p)
|
||||||
Bmemcpy(vplce, vplc, sizeof(vplce));
|
Bmemcpy(vplce, vplc, sizeof(vplce));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_ASM64
|
||||||
|
# define GLOGY a64_glogy
|
||||||
|
#else
|
||||||
|
# define GLOGY glogy
|
||||||
|
#endif
|
||||||
|
|
||||||
void setuptvlineasm(int32_t neglogy) { glogy = neglogy; }
|
void setuptvlineasm(int32_t neglogy)
|
||||||
|
{
|
||||||
|
GLOGY = neglogy;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if !defined USE_ASM64
|
||||||
// cnt+1 loop iterations!
|
// cnt+1 loop iterations!
|
||||||
int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
|
int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, intptr_t bufplc, intptr_t p)
|
||||||
{
|
{
|
||||||
|
@ -270,13 +297,17 @@ int32_t tvlineasm1(int32_t vinc, intptr_t paloffs, int32_t cnt, uint32_t vplc, i
|
||||||
|
|
||||||
return vplc;
|
return vplc;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void setuptvlineasm2(int32_t neglogy, intptr_t paloffs1, intptr_t paloffs2)
|
void setuptvlineasm2(int32_t neglogy, intptr_t paloffs1, intptr_t paloffs2)
|
||||||
{
|
{
|
||||||
glogy = neglogy;
|
GLOGY = neglogy;
|
||||||
|
A64_ASSIGN(a64_paloffs, paloffs1);
|
||||||
gpal = (char *)paloffs1;
|
gpal = (char *)paloffs1;
|
||||||
gpal2 = (char *)paloffs2;
|
gpal2 = (char *)paloffs2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined USE_ASM64
|
||||||
// Pass: asm1=vinc2, asm2=pend
|
// Pass: asm1=vinc2, asm2=pend
|
||||||
// Return: asm1=vplc1, asm2=vplc2
|
// Return: asm1=vplc1, asm2=vplc2
|
||||||
void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc2, uint32_t vplc1, intptr_t p)
|
void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc2, uint32_t vplc1, intptr_t p)
|
||||||
|
@ -330,7 +361,7 @@ void tvlineasm2(uint32_t vplc2, int32_t vinc1, intptr_t bufplc1, intptr_t bufplc
|
||||||
asm1 = vplc1;
|
asm1 = vplc1;
|
||||||
asm2 = vplc2;
|
asm2 = vplc2;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//Floor sprite horizontal line functions
|
//Floor sprite horizontal line functions
|
||||||
void msethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; }
|
void msethlineshift(int32_t logx, int32_t logy) { glogx = logx; glogy = logy; }
|
||||||
|
|
220
polymer/eduke32/build/src/a64.yasm
Normal file
220
polymer/eduke32/build/src/a64.yasm
Normal file
|
@ -0,0 +1,220 @@
|
||||||
|
;;; x86_64 assembly for basic texture mapping functions, based on a.nasm.
|
||||||
|
;;; For use with System V AMD64 calling convention (rdi rsi rdx rcx r8 r9)
|
||||||
|
;;; See the included license file "BUILDLIC.TXT" for license info.
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
%ifdef UNDERSCORES
|
||||||
|
%define asm1 _asm1
|
||||||
|
%define asm2 _asm2
|
||||||
|
|
||||||
|
%define a64_bpl _a64_bpl
|
||||||
|
%define a64_transmode _a64_transmode
|
||||||
|
%define a64_glogy _a64_glogy
|
||||||
|
%define a64_gtrans _a64_gtrans
|
||||||
|
%define a64_paloffs _a64_paloffs
|
||||||
|
|
||||||
|
%define tvlineasm1 _tvlineasm1
|
||||||
|
%define tvlineasm2 _tvlineasm2
|
||||||
|
%endif
|
||||||
|
|
||||||
|
;;; Imports
|
||||||
|
EXTERN asm1 ; intptr_t
|
||||||
|
EXTERN asm2 ; intptr_t
|
||||||
|
|
||||||
|
EXTERN a64_bpl ; int32_t
|
||||||
|
EXTERN a64_transmode ; int32_t
|
||||||
|
EXTERN a64_glogy ; int32_t
|
||||||
|
EXTERN a64_gtrans ; char *
|
||||||
|
EXTERN a64_paloffs ; intptr_t
|
||||||
|
|
||||||
|
;;; Exports
|
||||||
|
GLOBAL tvlineasm1 ; Masked & transluscent 1-pixel wide vline
|
||||||
|
GLOBAL tvlineasm2 ; Masked & transluscent 2-pixel wide vline
|
||||||
|
|
||||||
|
|
||||||
|
;;; ========== MACROS ==========
|
||||||
|
|
||||||
|
; Construct name referring to low doubleword of GPR, only for r8-r15.
|
||||||
|
%define REGd(reg) reg %+ d
|
||||||
|
; Construct name of whole GPR from word-sized register, only for ax-bp.
|
||||||
|
%define rREG(regw) r %+ regw
|
||||||
|
; Construct name of low doubleword of GPR from word-sized register, only for ax-bp.
|
||||||
|
%define eREG(regw) e %+ regw
|
||||||
|
|
||||||
|
;;; Multi-line macros for code shared between functions
|
||||||
|
|
||||||
|
;; Prologue getters
|
||||||
|
%macro PGET_rBpl 0
|
||||||
|
push rBpl
|
||||||
|
mov REGd(rBpl), dword [rel a64_bpl]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro PGET_rATransluc 0
|
||||||
|
push rATransluc
|
||||||
|
mov rATransluc, [rel a64_gtrans]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
; Get right shift count 'glogy' into cl
|
||||||
|
%macro GET_cl 0
|
||||||
|
mov cl, byte [rel a64_glogy]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
; Look up texel
|
||||||
|
; <dest_reg_lobyte> := bufplc[vplc>>glogy]
|
||||||
|
; High bytes are cleared to zero.
|
||||||
|
%macro gettexel 3 ; <dest_reg_word> <bufplc_reg> <vlpc_reg>
|
||||||
|
mov eREG(%1), %3
|
||||||
|
shr eREG(%1), cl
|
||||||
|
movzx eREG(%1), byte [%2 + rREG(%1)]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
; Do palette/shade and translucency lookup
|
||||||
|
%macro dotranspal 4 ; <tmp_reg> <tmp_reg_lobyte> <texel_reg> <dst_ofs>
|
||||||
|
; Get palette index of the pixel in the frame buffer.
|
||||||
|
; NOTE: e.g. "mov ah, byte [reg]" is not encodeable.
|
||||||
|
movzx %1, byte [rDst + %4]
|
||||||
|
shl %1, 8
|
||||||
|
|
||||||
|
mov %2, byte [rAPalookup + %3] ; palette/shade
|
||||||
|
mov %2, byte [rATransluc + %1] ; translucency
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
|
;;; ========== TVLINEASM1 ==========
|
||||||
|
;;; TODO: transmode, nonpow2
|
||||||
|
|
||||||
|
;;; Registers used in the loop
|
||||||
|
%define rBpl r10
|
||||||
|
%define rATransluc r12
|
||||||
|
%define rVplc r11d
|
||||||
|
|
||||||
|
;;; Registers of input args also used in the loop
|
||||||
|
%define rVinc edi
|
||||||
|
%define rAPalookup rsi
|
||||||
|
%define rCnt edx
|
||||||
|
%define rABufplc r8
|
||||||
|
%define rDst r9
|
||||||
|
|
||||||
|
;;; int32_t tvlineasm1(vinc, *paloffs, cnt, vplc, *bufplc, *p)
|
||||||
|
;;; eax edi rsi edx ecx r8 r9
|
||||||
|
ALIGN 16
|
||||||
|
tvlineasm1:
|
||||||
|
inc rCnt
|
||||||
|
;; First, back up callee-saved registers and set up those used in the loop.
|
||||||
|
PGET_rBpl
|
||||||
|
mov rVplc, ecx
|
||||||
|
GET_cl
|
||||||
|
PGET_rATransluc
|
||||||
|
push rbx
|
||||||
|
jmp short tv1_loop
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
tv1_loop:
|
||||||
|
gettexel bx, rABufplc, rVplc
|
||||||
|
cmp bl, 255
|
||||||
|
je short tv1_skiptrans
|
||||||
|
|
||||||
|
dotranspal rax, al, rbx, 0
|
||||||
|
mov byte [rDst], al
|
||||||
|
tv1_skiptrans:
|
||||||
|
add rVplc, rVinc
|
||||||
|
add rDst, rBpl
|
||||||
|
dec rCnt
|
||||||
|
jnz short tv1_loop
|
||||||
|
|
||||||
|
mov eax, rVplc ; return vplc
|
||||||
|
pop rbx
|
||||||
|
pop rATransluc
|
||||||
|
pop rBpl
|
||||||
|
ret
|
||||||
|
|
||||||
|
;;; Undefine input arg macros
|
||||||
|
%undef rVinc
|
||||||
|
%undef rAPalookup
|
||||||
|
%undef rCnt
|
||||||
|
%undef rABufplc
|
||||||
|
%undef rDst
|
||||||
|
|
||||||
|
;;; Udefine rVplc register macro, but keep rBpl and rATransluc (r10 and r12).
|
||||||
|
;;; Take care not to clash with them in the following.
|
||||||
|
%undef rVplc
|
||||||
|
|
||||||
|
|
||||||
|
;;; ========== TVLINEASM2 ==========
|
||||||
|
|
||||||
|
;;; Registers used in the loop
|
||||||
|
%define rVinc2 r11d
|
||||||
|
%define rABufplc2 r13
|
||||||
|
%define rDstEnd r14
|
||||||
|
%define rAPalookup r15 ; TODO: second paloffs!
|
||||||
|
|
||||||
|
;;; Registers of input args also used in the loop
|
||||||
|
%define rVplc2 edi
|
||||||
|
%define rVinc1 esi
|
||||||
|
%define rABufplc1 rdx
|
||||||
|
; bufplc2: rcx -> r13
|
||||||
|
%define rVplc1 r8d
|
||||||
|
%define rDst r9
|
||||||
|
|
||||||
|
;;; Pass: asm1=vinc2, asm2=pend
|
||||||
|
;;; Return: asm1=vplc1, asm2=vplc2
|
||||||
|
;;; void tvlineasm2(vplc2, vinc1, *bufplc1, *bufplc2, vplc1, *p)
|
||||||
|
;;; edi esi rdx rcx r8d r9
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
tvlineasm2:
|
||||||
|
;; First, back up callee-saved registers and set up those used in the loop.
|
||||||
|
mov rVinc2, dword [rel asm1]
|
||||||
|
PGET_rBpl
|
||||||
|
push rABufplc2
|
||||||
|
mov rABufplc2, rcx
|
||||||
|
GET_cl
|
||||||
|
PGET_rATransluc
|
||||||
|
|
||||||
|
push rDstEnd
|
||||||
|
mov rDstEnd, [rel asm2]
|
||||||
|
dec rDstEnd
|
||||||
|
add rDstEnd, rBpl ; one more: compare with a-c.c's tvlineasm2()
|
||||||
|
|
||||||
|
push rbx
|
||||||
|
push rbp
|
||||||
|
push rAPalookup
|
||||||
|
mov rAPalookup, [rel a64_paloffs]
|
||||||
|
jmp short tv2_loop
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
tv2_loop:
|
||||||
|
gettexel bx, rABufplc1, rVplc1
|
||||||
|
gettexel bp, rABufplc2, rVplc2
|
||||||
|
|
||||||
|
mov eax, ebp
|
||||||
|
shl eax, 8
|
||||||
|
or eax, ebx
|
||||||
|
|
||||||
|
cmp eax, 0xffff
|
||||||
|
jz tv2_skiptrans
|
||||||
|
|
||||||
|
dotranspal rax, al, rbx, 0
|
||||||
|
dotranspal rbx, bl, rbp, 1
|
||||||
|
and rax, 0xff
|
||||||
|
shl rbx, 8
|
||||||
|
or rax, rbx
|
||||||
|
|
||||||
|
mov word [rDst], ax
|
||||||
|
|
||||||
|
tv2_skiptrans:
|
||||||
|
add rVplc1, rVinc1
|
||||||
|
add rVplc2, rVinc2
|
||||||
|
add rDst, rBpl
|
||||||
|
cmp rDst, rDstEnd
|
||||||
|
jnz tv2_loop
|
||||||
|
|
||||||
|
pop r15
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
pop rDstEnd
|
||||||
|
pop rATransluc
|
||||||
|
pop rABufplc2
|
||||||
|
pop rBpl
|
||||||
|
ret
|
Loading…
Reference in a new issue