# Conflicts:
#	src/CMakeLists.txt
This commit is contained in:
Christoph Oelckers 2016-12-07 22:51:05 +01:00
commit f547daccc8
38 changed files with 6032 additions and 8114 deletions

View file

@ -15,12 +15,6 @@ include( CheckLibraryExists )
include( FindPkgConfig )
include( FindOpenGL )
if( NOT APPLE )
option( NO_ASM "Disable assembly code" OFF )
else()
# At the moment asm code doesn't work with OS X, so disable by default
option( NO_ASM "Disable assembly code" ON )
endif()
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
option( NO_STRIP "Do not strip Release or MinSizeRel builds" )
# At least some versions of Xcode fail if you strip with the linker
@ -115,7 +109,6 @@ if( WIN32 )
)
set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc )
set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib )
set( NASM_NAMES nasmw nasm )
find_path( D3D_INCLUDE_DIR d3d9.h
PATHS ENV DXSDK_DIR
@ -240,7 +233,6 @@ else()
endif()
endif()
endif()
set( NASM_NAMES nasm )
if( NO_GTK )
add_definitions( -DNO_GTK )
@ -388,105 +380,6 @@ endif()
find_package( FluidSynth )
# Search for NASM
if( NOT NO_ASM )
if( UNIX AND X64 )
find_program( GAS_PATH as )
if( GAS_PATH )
set( ASSEMBLER ${GAS_PATH} )
else()
message( STATUS "Could not find as. Disabling assembly code." )
set( NO_ASM ON )
endif()
else()
find_program( NASM_PATH NAMES ${NASM_NAMES} )
find_program( YASM_PATH yasm )
if( X64 )
if( YASM_PATH )
set( ASSEMBLER ${YASM_PATH} )
else()
message( STATUS "Could not find YASM. Disabling assembly code." )
set( NO_ASM ON )
endif()
else()
if( NASM_PATH )
set( ASSEMBLER ${NASM_PATH} )
else()
message( STATUS "Could not find NASM. Disabling assembly code." )
set( NO_ASM ON )
endif()
endif()
endif()
# I think the only reason there was a version requirement was because the
# executable name for Windows changed from 0.x to 2.0, right? This is
# how to do it in case I need to do something similar later.
# execute_process( COMMAND ${NASM_PATH} -v
# OUTPUT_VARIABLE NASM_VER_STRING )
# string( REGEX REPLACE ".*version ([0-9]+[.][0-9]+).*" "\\1" NASM_VER "${NASM_VER_STRING}" )
# if( NOT NASM_VER LESS 2 )
# message( SEND_ERROR "NASM version should be 2 or later. (Installed version is ${NASM_VER}.)" )
# endif()
endif()
if( NOT NO_ASM )
# Valgrind support is meaningless without assembly code.
if( VALGRIND )
add_definitions( -DVALGRIND_AWARE=1 )
# If you're Valgrinding, you probably want to keep symbols around.
set( NO_STRIP ON )
endif()
# Tell CMake how to assemble our files
if( UNIX )
set( ASM_OUTPUT_EXTENSION .o )
if( X64 )
set( ASM_FLAGS )
set( ASM_SOURCE_EXTENSION .s )
else()
if( APPLE )
set( ASM_FLAGS -fmacho -DM_TARGET_MACHO )
else()
set( ASM_FLAGS -felf -DM_TARGET_LINUX )
endif()
set( ASM_FLAGS "${ASM_FLAGS}" -i${CMAKE_CURRENT_SOURCE_DIR}/ )
set( ASM_SOURCE_EXTENSION .asm )
endif()
else()
set( ASM_OUTPUT_EXTENSION .obj )
set( ASM_SOURCE_EXTENSION .asm )
if( X64 )
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
else()
set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
endif()
endif()
if( WIN32 AND NOT X64 )
set( FIXRTEXT fixrtext )
else()
set( FIXRTEXT "" )
endif()
message( STATUS "Selected assembler: ${ASSEMBLER}" )
MACRO( ADD_ASM_FILE indir infile )
set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" )
if( WIN32 AND NOT X64 )
set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" )
else()
set( FIXRTEXT_${infile} COMMAND "" )
endif()
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
${FIXRTEXT_${infile}}
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
ENDMACRO()
endif()
# Decide on SSE setup
set( SSE_MATTERS NO )
@ -801,25 +694,6 @@ if( HAVE_MMX )
PROPERTIES COMPILE_FLAGS "-mmmx" )
endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
endif( HAVE_MMX )
if( NOT ASM_SOURCES )
set( ASM_SOURCES "" )
endif()
if( NO_ASM )
add_definitions( -DNOASM )
else()
if( X64 )
ADD_ASM_FILE( asm_x86_64 tmap3 )
else()
ADD_ASM_FILE( asm_ia32 a )
ADD_ASM_FILE( asm_ia32 misc )
ADD_ASM_FILE( asm_ia32 tmap )
ADD_ASM_FILE( asm_ia32 tmap2 )
ADD_ASM_FILE( asm_ia32 tmap3 )
endif()
endif()
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
@ -927,16 +801,6 @@ set( NOT_COMPILED_SOURCE_FILES
scripting/zscript/zcc-parse.lemon
zcc-parse.c
zcc-parse.h
# We could have the ASM macro add these files, but it wouldn't add all
# platforms.
asm_ia32/a.asm
asm_ia32/misc.asm
asm_ia32/tmap.asm
asm_ia32/tmap2.asm
asm_ia32/tmap3.asm
asm_x86_64/tmap3.asm
asm_x86_64/tmap3.s
)
set( FASTMATH_PCH_SOURCES
@ -944,7 +808,9 @@ set( FASTMATH_PCH_SOURCES
r_3dfloors.cpp
r_bsp.cpp
r_draw.cpp
r_drawt.cpp
r_draw_pal.cpp
r_drawt_pal.cpp
r_thread.cpp
r_main.cpp
r_plane.cpp
r_segs.cpp
@ -1351,7 +1217,6 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE
${HEADER_FILES}
${NOT_COMPILED_SOURCE_FILES}
__autostart.cpp
${ASM_SOURCES}
${SYSTEM_SOURCES}
${X86_SOURCES}
${FASTMATH_SOURCES}
@ -1514,8 +1379,6 @@ install(TARGETS zdoom
DESTINATION ${INSTALL_PATH}
COMPONENT "Game executable")
source_group("Assembly Files\\ia32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_ia32/.+")
source_group("Assembly Files\\x86_64" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_x86_64/.+")
source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+")
source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+")
source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h)

View file

@ -1,812 +0,0 @@
; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
; Ken Silverman's official web site: "http://www.advsys.net/ken"
; See the included license file "BUILDLIC.TXT" for license info.
; This file has been modified from Ken Silverman's original release
%include "valgrind.inc"
SECTION .data
%ifndef M_TARGET_LINUX
%define ylookup _ylookup
%define vince _vince
%define vplce _vplce
%define palookupoffse _palookupoffse
%define bufplce _bufplce
%define dc_iscale _dc_iscale
%define dc_colormap _dc_colormap
%define dc_count _dc_count
%define dc_dest _dc_dest
%define dc_source _dc_source
%define dc_texturefrac _dc_texturefrac
%define setupvlineasm _setupvlineasm
%define prevlineasm1 _prevlineasm1
%define vlineasm1 _vlineasm1
%define vlineasm4 _vlineasm4
%define setupmvlineasm _setupmvlineasm
%define mvlineasm1 _mvlineasm1
%define mvlineasm4 _mvlineasm4
%define R_SetupDrawSlabA _R_SetupDrawSlabA
%define R_DrawSlabA _R_DrawSlabA
%endif
EXTERN ylookup ; near
EXTERN vplce ; near
EXTERN vince ; near
EXTERN palookupoffse ; near
EXTERN bufplce ; near
EXTERN dc_iscale
EXTERN dc_colormap
EXTERN dc_count
EXTERN dc_dest
EXTERN dc_source
EXTERN dc_texturefrac
SECTION .text
ALIGN 16
GLOBAL setvlinebpl_
setvlinebpl_:
mov [fixchain1a+2], eax
mov [fixchain1b+2], eax
mov [fixchain2a+2], eax
mov [fixchain1m+2], eax
mov [fixchain2ma+2], eax
mov [fixchain2mb+2], eax
selfmod fixchain1a, fixchain2mb+6
setdrawslabbpl:
mov dword [voxbpl1+2], eax
mov dword [voxbpl2+2], eax
mov dword [voxbpl3+2], eax
mov dword [voxbpl4+2], eax
mov dword [voxbpl5+2], eax
mov dword [voxbpl6+2], eax
mov dword [voxbpl7+2], eax
mov dword [voxbpl8+2], eax
selfmod voxbpl1, voxpl8+6
ret
SECTION .data
lastslabcolormap:
dd 4
SECTION .text
GLOBAL R_SetupDrawSlabA
GLOBAL @R_SetupDrawSlabA@4
R_SetupDrawSlabA:
mov ecx, [esp+4]
@R_SetupDrawSlabA@4:
cmp [lastslabcolormap], ecx
je .done
mov [lastslabcolormap], ecx
mov dword [voxpal1+2], ecx
mov dword [voxpal2+2], ecx
mov dword [voxpal3+2], ecx
mov dword [voxpal4+2], ecx
mov dword [voxpal5+2], ecx
mov dword [voxpal6+2], ecx
mov dword [voxpal7+2], ecx
mov dword [voxpal8+2], ecx
.done ret
; pass it log2(texheight)
ALIGN 16
GLOBAL setupvlineasm
setupvlineasm:
mov ecx, [esp+4]
;First 2 lines for VLINEASM1, rest for VLINEASM4
mov byte [premach3a+2], cl
mov byte [mach3a+2], cl
mov byte [machvsh1+2], cl ;32-shy
mov byte [machvsh3+2], cl ;32-shy
mov byte [machvsh5+2], cl ;32-shy
mov byte [machvsh6+2], cl ;32-shy
mov ch, cl
sub ch, 16
mov byte [machvsh8+2], ch ;16-shy
neg cl
mov byte [machvsh7+2], cl ;shy
mov byte [machvsh9+2], cl ;shy
mov byte [machvsh10+2], cl ;shy
mov byte [machvsh11+2], cl ;shy
mov byte [machvsh12+2], cl ;shy
mov eax, 1
shl eax, cl
dec eax
mov dword [machvsh2+2], eax ;(1<<shy)-1
mov dword [machvsh4+2], eax ;(1<<shy)-1
selfmod premach3a, machvsh8+6
ret
%ifdef M_TARGET_MACHO
SECTION .text align=64
%else
SECTION .rtext progbits alloc exec write align=64
%endif
%ifdef M_TARGET_MACHO
GLOBAL _rtext_a_start
_rtext_a_start:
%endif
;eax = xscale
;ebx = palookupoffse
;ecx = # pixels to draw-1
;edx = texturefrac
;esi = texturecolumn
;edi = buffer pointer
ALIGN 16
GLOBAL prevlineasm1
prevlineasm1:
mov ecx, [dc_count]
cmp ecx, 1
ja vlineasm1
mov eax, [dc_iscale]
mov edx, [dc_texturefrac]
add eax, edx
mov ecx, [dc_source]
premach3a: shr edx, 32
push ebx
push edi
mov edi, [dc_colormap]
xor ebx, ebx
mov bl, byte [ecx+edx]
mov ecx, [dc_dest]
mov bl, byte [edi+ebx]
pop edi
mov byte [ecx], bl
pop ebx
ret
GLOBAL vlineasm1
ALIGN 16
vlineasm1:
push ebx
push edi
push esi
push ebp
mov ecx, [dc_count]
mov ebp, [dc_colormap]
mov edi, [dc_dest]
mov eax, [dc_iscale]
mov edx, [dc_texturefrac]
mov esi, [dc_source]
fixchain1a: sub edi, 320
nop
nop
nop
beginvline:
mov ebx, edx
mach3a: shr ebx, 32
fixchain1b: add edi, 320
mov bl, byte [esi+ebx]
add edx, eax
dec ecx
mov bl, byte [ebp+ebx]
mov byte [edi], bl
jnz short beginvline
pop ebp
pop esi
pop edi
pop ebx
mov eax, edx
ret
;eax: -------temp1-------
;ebx: -------temp2-------
;ecx: dat dat dat dat
;edx: ylo2 ylo4
;esi: yhi1 yhi2
;edi: ---videoplc/cnt----
;ebp: yhi3 yhi4
;esp:
ALIGN 16
GLOBAL vlineasm4
vlineasm4:
mov ecx, [dc_count]
push ebp
push ebx
push esi
push edi
mov edi, [dc_dest]
mov eax, dword [ylookup+ecx*4-4]
add eax, edi
mov dword [machvline4end+2], eax
sub edi, eax
mov eax, dword [bufplce+0]
mov ebx, dword [bufplce+4]
mov ecx, dword [bufplce+8]
mov edx, dword [bufplce+12]
mov dword [machvbuf1+2], ecx
mov dword [machvbuf2+2], edx
mov dword [machvbuf3+2], eax
mov dword [machvbuf4+2], ebx
mov eax, dword [palookupoffse+0]
mov ebx, dword [palookupoffse+4]
mov ecx, dword [palookupoffse+8]
mov edx, dword [palookupoffse+12]
mov dword [machvpal1+2], ecx
mov dword [machvpal2+2], edx
mov dword [machvpal3+2], eax
mov dword [machvpal4+2], ebx
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
;edx: ³v3lo ³v1lo ³
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
;esi: ³v2hi v2lo ³ v3hi³
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
;ebp: ³v0hi v0lo ³ v1hi³
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
mov ebp, dword [vince+0]
mov ebx, dword [vince+4]
mov esi, dword [vince+8]
mov eax, dword [vince+12]
and esi, 0fffffe00h
and ebp, 0fffffe00h
machvsh9: rol eax, 88h ;sh
machvsh10: rol ebx, 88h ;sh
mov edx, eax
mov ecx, ebx
shr ecx, 16
and edx, 0ffff0000h
add edx, ecx
and eax, 000001ffh
and ebx, 000001ffh
add esi, eax
add ebp, ebx
;
mov eax, edx
and eax, 0ffff0000h
mov dword [machvinc1+2], eax
mov dword [machvinc2+2], esi
mov byte [machvinc3+2], dl
mov byte [machvinc4+2], dh
mov dword [machvinc5+2], ebp
mov ebp, dword [vplce+0]
mov ebx, dword [vplce+4]
mov esi, dword [vplce+8]
mov eax, dword [vplce+12]
and esi, 0fffffe00h
and ebp, 0fffffe00h
machvsh11: rol eax, 88h ;sh
machvsh12: rol ebx, 88h ;sh
mov edx, eax
mov ecx, ebx
shr ecx, 16
and edx, 0ffff0000h
add edx, ecx
and eax, 000001ffh
and ebx, 000001ffh
add esi, eax
add ebp, ebx
mov ecx, esi
selfmod beginvlineasm4, machvline4end+6
jmp short beginvlineasm4
ALIGN 16
beginvlineasm4:
machvsh1: shr ecx, 88h ;32-sh
mov ebx, esi
machvsh2: and ebx, 00000088h ;(1<<sh)-1
machvinc1: add edx, 88880000h
machvinc2: adc esi, 88888088h
machvbuf1: mov cl, byte [ecx+88888888h]
machvbuf2: mov bl, byte [ebx+88888888h]
mov eax, ebp
machvsh3: shr eax, 88h ;32-sh
machvpal1: mov cl, byte [ecx+88888888h]
machvpal2: mov ch, byte [ebx+88888888h]
mov ebx, ebp
shl ecx, 16
machvsh4: and ebx, 00000088h ;(1<<sh)-1
machvinc3: add dl, 88h
machvbuf3: mov al, byte [eax+88888888h]
machvinc4: adc dh, 88h
machvbuf4: mov bl, byte [ebx+88888888h]
machvinc5: adc ebp, 88888088h
machvpal3: mov cl, byte [eax+88888888h]
machvpal4: mov ch, byte [ebx+88888888h]
machvline4end: mov dword [edi+88888888h], ecx
fixchain2a: add edi, 88888888h
mov ecx, esi
jle short beginvlineasm4
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
;edx: ³v3lo ³v1lo ³
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
;esi: ³v2hi v2lo ³ v3hi³
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
;ebp: ³v0hi v0lo ³ v1hi³
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
mov dword [vplce+8], esi
mov dword [vplce+0], ebp
;vplc2 = (esi<<(32-sh))+(edx>>sh)
;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh))
machvsh5: shl esi, 88h ;32-sh
mov eax, edx
machvsh6: shl ebp, 88h ;32-sh
and edx, 0000ffffh
machvsh7: shr eax, 88h ;sh
add esi, eax
machvsh8: shl edx, 88h ;16-sh
add ebp, edx
mov dword [vplce+12], esi
mov dword [vplce+4], ebp
pop edi
pop esi
pop ebx
pop ebp
ret
;*************************************************************************
;************************* Masked Vertical Lines *************************
;*************************************************************************
; pass it log2(texheight)
ALIGN 16
GLOBAL setupmvlineasm
setupmvlineasm:
mov ecx, dword [esp+4]
mov byte [maskmach3a+2], cl
mov byte [machmv13+2], cl
mov byte [machmv14+2], cl
mov byte [machmv15+2], cl
mov byte [machmv16+2], cl
selfmod maskmach3a, machmv13+6
ret
ALIGN 16
GLOBAL mvlineasm1 ;Masked vline
mvlineasm1:
push ebx
push edi
push esi
push ebp
mov ecx, [dc_count]
mov ebp, [dc_colormap]
mov edi, [dc_dest]
mov eax, [dc_iscale]
mov edx, [dc_texturefrac]
mov esi, [dc_source]
beginmvline:
mov ebx, edx
maskmach3a: shr ebx, 32
movzx ebx, byte [esi+ebx]
cmp ebx, 0
je short skipmask1
maskmach3c: mov bl, byte [ebp+ebx]
mov [edi], bl
skipmask1: add edx, eax
fixchain1m: add edi, 320
dec ecx
jnz short beginmvline
pop ebp
pop esi
pop edi
pop ebx
mov eax, edx
ret
ALIGN 16
GLOBAL mvlineasm4
mvlineasm4:
push ebx
push esi
push edi
push ebp
mov ecx,[dc_count]
mov edi,[dc_dest]
mov eax, [bufplce+0]
mov ebx, [bufplce+4]
mov [machmv1+3], eax
mov [machmv4+3], ebx
mov eax, [bufplce+8]
mov ebx, [bufplce+12]
mov [machmv7+3], eax
mov [machmv10+3], ebx
mov eax, [palookupoffse]
mov ebx, [palookupoffse+4]
mov [machmv2+2], eax
mov [machmv5+2], ebx
mov eax, [palookupoffse+8]
mov ebx, [palookupoffse+12]
mov [machmv8+2], eax
mov [machmv11+2], ebx
mov eax, [vince] ;vince
mov ebx, [vince+4]
xor bl, bl
mov [machmv3+2], eax
mov [machmv6+2], ebx
mov eax, [vince+8]
mov ebx, [vince+12]
mov [machmv9+2], eax
mov [machmv12+2], ebx
inc ecx
push ecx
mov ecx, [vplce+0]
mov edx, [vplce+4]
mov esi, [vplce+8]
mov ebp, [vplce+12]
fixchain2ma: sub edi, 320
selfmod beginmvlineasm4, machmv2+6
jmp short beginmvlineasm4
ALIGN 16
beginmvlineasm4:
dec dword [esp]
jz near endmvlineasm4
mov eax, ebp
mov ebx, esi
machmv16: shr eax, 32
machmv12: add ebp, 0x88888888 ;vince[3]
machmv15: shr ebx, 32
machmv9: add esi, 0x88888888 ;vince[2]
machmv10: movzx eax, byte [eax+0x88888888];bufplce[3]
machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2]
cmp eax, 1
adc dl, dl
cmp ebx, 1
adc dl, dl
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
mov eax, edx
machmv6: add edx, 0x88888888 ;vince[1]
machmv14: shr eax, 32
shl ebx, 16
machmv4: movzx eax, byte [eax+0x88888888];bufplce[1]
cmp eax, 1
adc dl, dl
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
mov eax, ecx
machmv3: add ecx, 0x88888888 ;vince[0]
machmv13: shr eax, 32
machmv1: movzx eax, byte [eax+0x88888888];bufplce[0]
cmp eax, 1
adc dl, dl
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
xor eax, eax
shl dl, 4
fixchain2mb: add edi, 320
mov al, dl
add eax, mvcase15
jmp eax ;16 byte cases
ALIGN 16
endmvlineasm4:
mov [vplce], ecx
mov [vplce+4], edx
mov [vplce+8], esi
mov [vplce+12], ebp
pop ecx
pop ebp
pop edi
pop esi
pop ebx
ret
;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7
ALIGN 16
mvcase15: mov [edi], ebx
jmp beginmvlineasm4
ALIGN 16
mvcase14: mov [edi+1], bh
shr ebx, 16
mov [edi+2], bx
jmp beginmvlineasm4
ALIGN 16
mvcase13: mov [edi], bl
shr ebx, 16
mov [edi+2], bx
jmp beginmvlineasm4
ALIGN 16
mvcase12: shr ebx, 16
mov [edi+2], bx
jmp beginmvlineasm4
ALIGN 16
mvcase11: mov [edi], bx
shr ebx, 16
mov [edi+3], bh
jmp beginmvlineasm4
ALIGN 16
mvcase10: mov [edi+1], bh
shr ebx, 16
mov [edi+3], bh
jmp beginmvlineasm4
ALIGN 16
mvcase9: mov [edi], bl
shr ebx, 16
mov [edi+3], bh
jmp beginmvlineasm4
ALIGN 16
mvcase8: shr ebx, 16
mov [edi+3], bh
jmp beginmvlineasm4
ALIGN 16
mvcase7: mov [edi], bx
shr ebx, 16
mov [edi+2], bl
jmp beginmvlineasm4
ALIGN 16
mvcase6: shr ebx, 8
mov [edi+1], bx
jmp beginmvlineasm4
ALIGN 16
mvcase5: mov [edi], bl
shr ebx, 16
mov [edi+2], bl
jmp beginmvlineasm4
ALIGN 16
mvcase4: shr ebx, 16
mov [edi+2], bl
jmp beginmvlineasm4
ALIGN 16
mvcase3: mov [edi], bx
jmp beginmvlineasm4
ALIGN 16
mvcase2: mov [edi+1], bh
jmp beginmvlineasm4
ALIGN 16
mvcase1: mov [edi], bl
jmp beginmvlineasm4
ALIGN 16
mvcase0: jmp beginmvlineasm4
align 16
;*************************************************************************
;***************************** Voxel Slabs *******************************
;*************************************************************************
GLOBAL R_DrawSlabA
R_DrawSlabA:
push ebx
push ebp
push esi
push edi
mov eax, [esp+5*4+0]
mov ebx, [esp+5*4+4]
mov ecx, [esp+5*4+8]
mov edx, [esp+5*4+12]
mov esi, [esp+5*4+16]
mov edi, [esp+5*4+20]
cmp eax, 2
je voxbegdraw2
ja voxskip2
xor eax, eax
voxbegdraw1:
mov ebp, ebx
shr ebp, 16
add ebx, edx
dec ecx
mov al, byte [esi+ebp]
voxpal1: mov al, byte [eax+88888888h]
mov byte [edi], al
voxbpl1: lea edi, [edi+88888888h]
jnz voxbegdraw1
jmp voxskipslab5
voxbegdraw2:
mov ebp, ebx
shr ebp, 16
add ebx, edx
xor eax, eax
dec ecx
mov al, byte [esi+ebp]
voxpal2: mov al, byte [eax+88888888h]
mov ah, al
mov word [edi], ax
voxbpl2: lea edi, [edi+88888888h]
jnz voxbegdraw2
jmp voxskipslab5
voxskip2:
cmp eax, 4
jne voxskip4
xor eax, eax
voxbegdraw4:
mov ebp, ebx
add ebx, edx
shr ebp, 16
xor eax, eax
mov al, byte [esi+ebp]
voxpal3: mov al, byte [eax+88888888h]
mov ah, al
shl eax, 8
mov al, ah
shl eax, 8
mov al, ah
mov dword [edi], eax
voxbpl3: add edi, 88888888h
dec ecx
jnz voxbegdraw4
jmp voxskipslab5
voxskip4:
add eax, edi
test edi, 1
jz voxskipslab1
cmp edi, eax
je voxskipslab1
push eax
push ebx
push ecx
push edi
voxbegslab1:
mov ebp, ebx
add ebx, edx
shr ebp, 16
xor eax, eax
mov al, byte [esi+ebp]
voxpal4: mov al, byte [eax+88888888h]
mov byte [edi], al
voxbpl4: add edi, 88888888h
dec ecx
jnz voxbegslab1
pop edi
pop ecx
pop ebx
pop eax
inc edi
voxskipslab1:
push eax
test edi, 2
jz voxskipslab2
dec eax
cmp edi, eax
jge voxskipslab2
push ebx
push ecx
push edi
voxbegslab2:
mov ebp, ebx
add ebx, edx
shr ebp, 16
xor eax, eax
mov al, byte [esi+ebp]
voxpal5: mov al, byte [eax+88888888h]
mov ah, al
mov word [edi], ax
voxbpl5: add edi, 88888888h
dec ecx
jnz voxbegslab2
pop edi
pop ecx
pop ebx
add edi, 2
voxskipslab2:
mov eax, [esp]
sub eax, 3
cmp edi, eax
jge voxskipslab3
voxprebegslab3:
push ebx
push ecx
push edi
voxbegslab3:
mov ebp, ebx
add ebx, edx
shr ebp, 16
xor eax, eax
mov al, byte [esi+ebp]
voxpal6: mov al, byte [eax+88888888h]
mov ah, al
shl eax, 8
mov al, ah
shl eax, 8
mov al, ah
mov dword [edi], eax
voxbpl6: add edi, 88888888h
dec ecx
jnz voxbegslab3
pop edi
pop ecx
pop ebx
add edi, 4
mov eax, [esp]
sub eax, 3
cmp edi, eax
jl voxprebegslab3
voxskipslab3:
mov eax, [esp]
dec eax
cmp edi, eax
jge voxskipslab4
push ebx
push ecx
push edi
voxbegslab4:
mov ebp, ebx
add ebx, edx
shr ebp, 16
xor eax, eax
mov al, byte [esi+ebp]
voxpal7: mov al, byte [eax+88888888h]
mov ah, al
mov word [edi], ax
voxbpl7: add edi, 88888888h
dec ecx
jnz voxbegslab4
pop edi
pop ecx
pop ebx
add edi, 2
voxskipslab4:
pop eax
cmp edi, eax
je voxskipslab5
voxbegslab5:
mov ebp, ebx
add ebx, edx
shr ebp, 16
xor eax, eax
mov al, byte [esi+ebp]
voxpal8: mov al, byte [eax+88888888h]
mov byte [edi], al
voxbpl8: add edi, 88888888h
dec ecx
jnz voxbegslab5
voxskipslab5:
pop edi
pop esi
pop ebp
pop ebx
ret
align 16
%ifdef M_TARGET_MACHO
GLOBAL _rtext_a_end
_rtext_a_end:
%endif

View file

@ -1,200 +0,0 @@
;*
;* misc.nas
;* Miscellaneous assembly functions
;*
;*---------------------------------------------------------------------------
;* Copyright 1998-2006 Randy Heit
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* 1. Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;* 2. Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in the
;* documentation and/or other materials provided with the distribution.
;* 3. The name of the author may not be used to endorse or promote products
;* derived from this software without specific prior written permission.
;*
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;*---------------------------------------------------------------------------
;*
BITS 32
%ifndef M_TARGET_LINUX
%define DoBlending_MMX _DoBlending_MMX
%define BestColor_MMX _BestColor_MMX
%endif
%ifdef M_TARGET_WATCOM
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
SEGMENT DATA
%else
SECTION .data
%endif
Blending256:
dd 0x01000100,0x00000100
%ifdef M_TARGET_WATCOM
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
SEGMENT CODE
%else
SECTION .text
%endif
;-----------------------------------------------------------
;
; DoBlending_MMX
;
; MMX version of DoBlending
;
; (DWORD *from, DWORD *to, count, tor, tog, tob, toa)
;-----------------------------------------------------------
GLOBAL DoBlending_MMX
DoBlending_MMX:
pxor mm0,mm0 ; mm0 = 0
mov eax,[esp+4*4]
shl eax,16
mov edx,[esp+4*5]
shl edx,8
or eax,[esp+4*6]
or eax,edx
mov ecx,[esp+4*3] ; ecx = count
movd mm1,eax ; mm1 = 00000000 00RRGGBB
mov eax,[esp+4*7]
shl eax,16
mov edx,[esp+4*7]
shl edx,8
or eax,[esp+4*7]
or eax,edx
mov edx,[esp+4*2] ; edx = dest
movd mm6,eax ; mm6 = 00000000 00AAAAAA
punpcklbw mm1,mm0 ; mm1 = 000000RR 00GG00BB
movq mm7,[Blending256]
punpcklbw mm6,mm0 ; mm6 = 000000AA 00AA00AA
mov eax,[esp+4*1] ; eax = source
pmullw mm1,mm6 ; mm1 = 000000RR 00GG00BB (multiplied by alpha)
psubusw mm7,mm6 ; mm7 = 000000aa 00aa00aa (one minus alpha)
nop ; Does this actually pair on a Pentium?
; Do four colors per iteration: Count must be a multiple of four.
.loop movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
add eax,8
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
add edx,8
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
sub ecx,2
paddusw mm2,mm1
psrlw mm2,8
paddusw mm3,mm1
psrlw mm3,8
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
movq [edx-8],mm2
movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
add eax,8
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
add edx,8
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
sub ecx,2
paddusw mm2,mm1
psrlw mm2,8
paddusw mm3,mm1
psrlw mm3,8
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
movq [edx-8],mm2
jnz .loop
emms
ret
;-----------------------------------------------------------
;
; BestColor_MMX
;
; Picks the closest matching color from a palette
;
; Passed FFRRGGBB and palette array in same format
; FF is the index of the first palette entry to consider
;
;-----------------------------------------------------------
GLOBAL BestColor_MMX
GLOBAL @BestColor_MMX@8
BestColor_MMX:
mov ecx,[esp+4]
mov edx,[esp+8]
@BestColor_MMX@8:
pxor mm0,mm0
movd mm1,ecx ; mm1 = color searching for
mov eax,257*257+257*257+257*257 ;eax = bestdist
push ebx
punpcklbw mm1,mm0
mov ebx,ecx ; ebx = best color
shr ecx,24 ; ecx = count
and ebx,0xffffff
push esi
push ebp
.loop movd mm2,[edx+ecx*4] ; mm2 = color considering now
inc ecx
punpcklbw mm2,mm0
movq mm3,mm1
psubsw mm3,mm2
pmullw mm3,mm3 ; mm3 = color distance squared
movd ebp,mm3 ; add the three components
psrlq mm3,32 ; into ebp to get the real
mov esi,ebp ; (squared) distance
shr esi,16
and ebp,0xffff
add ebp,esi
movd esi,mm3
add ebp,esi
jz .perf ; found a perfect match
cmp eax,ebp
jb .skip
mov eax,ebp
lea ebx,[ecx-1]
.skip cmp ecx,256
jne .loop
mov eax,ebx
pop ebp
pop esi
pop ebx
emms
ret
.perf lea eax,[ecx-1]
pop ebp
pop esi
pop ebx
emms
ret

File diff suppressed because it is too large Load diff

View file

@ -1,643 +0,0 @@
;*
;* tmap2.nas
;* The tilted plane inner loop.
;*
;*---------------------------------------------------------------------------
;* Copyright 1998-2006 Randy Heit
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* 1. Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;* 2. Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in the
;* documentation and/or other materials provided with the distribution.
;* 3. The name of the author may not be used to endorse or promote products
;* derived from this software without specific prior written permission.
;*
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;*---------------------------------------------------------------------------
;*
;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was
;* actually slightly slower than the more straight-forward approach
;* used here, probably because the trick requires too much setup time.
;*
BITS 32
%include "valgrind.inc"
%define SPACEFILLER4 (0x44444444)
%ifndef M_TARGET_LINUX
%define plane_sz _plane_sz
%define plane_su _plane_su
%define plane_sv _plane_sv
%define plane_shade _plane_shade
%define planelightfloat _planelightfloat
%define spanend _spanend
%define ylookup _ylookup
%define dc_destorg _dc_destorg
%define ds_colormap _ds_colormap
%define ds_source _ds_source
%define centery _centery
%define centerx _centerx
%define ds_curtiltedsource _ds_curtiltedsource
%define pviewx _pviewx
%define pviewy _pviewy
%define tiltlighting _tiltlighting
%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM
%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM
%define R_CalcTiltedLighting _R_CalcTiltedLighting
%endif
EXTERN plane_sz
EXTERN plane_su
EXTERN plane_sv
EXTERN planelightfloat
EXTERN spanend
EXTERN ylookup
EXTERN dc_destorg
EXTERN ds_colormap
EXTERN centery
EXTERN centerx
EXTERN ds_source
EXTERN plane_shade
EXTERN pviewx
EXTERN pviewy
EXTERN tiltlighting
EXTERN R_CalcTiltedLighting
GLOBAL ds_curtiltedsource
%define sv_i plane_sv
%define sv_j plane_sv+4
%define sv_k plane_sv+8
%define su_i plane_su
%define su_j plane_su+4
%define su_k plane_su+8
%define sz_i plane_sz
%define sz_j plane_sz+4
%define sz_k plane_sz+8
%define SPANBITS 3
section .bss
start_u: resq 1
start_v: resq 1
step_u: resq 1
step_v: resq 1
step_iz: resq 1
step_uz: resq 1
step_vz: resq 1
end_z: resd 1
section .data
ds_curtiltedsource: dd SPACEFILLER4
fp_1:
spanrecips: dd 0x3f800000 ; 1/1
dd 0x3f000000 ; 1/2
dd 0x3eaaaaab ; 1/3
dd 0x3e800000 ; 1/4
dd 0x3e4ccccd ; 1/5
dd 0x3e2aaaab ; 1/6
dd 0x3e124925 ; 1/7
fp_8recip: dd 0x3e000000 ; 1/8
dd 0x3de38e39 ; 1/9
dd 0x3dcccccd ; 1/10
dd 0x3dba2e8c ; 1/11
dd 0x3daaaaab ; 1/12
dd 0x3d9d89d9 ; 1/13
dd 0x3d924925 ; 1/14
dd 0x3d888889 ; 1/15
fp_quickint: dd 0x3f800000 ; 1
dd 0x40000000 ; 2
dd 0x40400000 ; 3
dd 0x40800000 ; 4
dd 0x40a00000 ; 5
dd 0x40c00000 ; 6
dd 0x40e00000 ; 7
fp_8: dd 0x41000000 ; 8
section .text
GLOBAL R_SetTiltedSpanSource_ASM
GLOBAL @R_SetTiltedSpanSource_ASM@4
R_SetTiltedSpanSource_ASM:
mov ecx,[esp+4]
@R_SetTiltedSpanSource_ASM@4:
mov [fetch1+3],ecx
mov [fetch2+3],ecx
mov [fetch3+3],ecx
mov [fetch4+3],ecx
mov [fetch5+3],ecx
mov [fetch6+3],ecx
mov [fetch7+3],ecx
mov [fetch8+3],ecx
mov [fetch9+3],ecx
mov [fetch10+3],ecx
mov [ds_curtiltedsource],ecx
selfmod rtext_start, rtext_end
ret
GLOBAL SetTiltedSpanSize
SetTiltedSpanSize:
push ecx
mov cl,dl
neg cl
mov eax,1
shl eax,cl
mov cl,[esp]
neg cl
mov [x1+2],cl
mov [x2+2],cl
mov [x3+2],cl
mov [x4+2],cl
mov [x5+2],cl
mov [x6+2],cl
mov [x7+2],cl
mov [x8+2],cl
mov [x9+2],cl
mov [x10+2],cl
sub cl,dl
dec eax
mov [y1+2],cl
mov [y2+2],cl
mov [y3+2],cl
mov [y4+2],cl
mov [y5+2],cl
mov [y6+2],cl
mov [y7+2],cl
mov [y8+2],cl
mov [y9+2],cl
mov [y10+2],cl
cmp eax,0 ; if x bits is 0, mask must be 0 too.
jz .notted
not eax
.notted:
pop ecx
mov [m1+2],eax
mov [m2+2],eax
mov [m3+2],eax
mov [m4+2],eax
mov [m5+2],eax
mov [m6+2],eax
mov [m7+2],eax
mov [m8+2],eax
mov [m9+2],eax
mov [m10+2],eax
selfmod rtext_start, rtext_end
ret
%ifndef M_TARGET_MACHO
SECTION .rtext progbits alloc exec write align=64
%else
SECTION .text align=64
GLOBAL _rtext_tmap2_start
_rtext_tmap2_start:
%endif
rtext_start:
GLOBAL R_DrawTiltedPlane_ASM
GLOBAL @R_DrawTiltedPlane_ASM@8
R_DrawTiltedPlane_ASM:
mov ecx,[esp+4]
mov edx,[esp+8]
; ecx = y
; edx = x
@R_DrawTiltedPlane_ASM@8:
push ebx
push esi
push edi
push ebp
mov eax,[centery]
movzx ebx,word [spanend+ecx*2]
sub eax,ecx ; eax = centery-y
sub ebx,edx ; ebx = span length - 1
mov edi,[ylookup+ecx*4]
push eax
add edi,[dc_destorg]
add edi,edx ; edi = frame buffer pointer
sub edx,[centerx] ; edx = x-centerx
push edx
xor eax,eax
fild dword [esp+4] ; ymul
fild dword [esp] ; xmul | ymul
fld dword [sv_j] ; sv.j | xmul | ymul
fmul st0,st2 ; sv.j*ymul | xmul | ymul
fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul
fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul
fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul
fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul
faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul
fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul
fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z
fadd dword [su_k] ; u/z | v/z | 1/z
fxch st2 ; 1/z | v/z | u/z
fxch st1 ; v/z | 1/z | u/z
; if lighting is on, fill out the light table
mov al,[plane_shade]
test al,al
jz .litup
push ebx
fild dword [esp] ; width | v/z | 1/z | u/z
fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z
fadd st0,st2 ; 1/endz | v/z | 1/z | u/z
fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z
fmul dword [planelightfloat]
fxch st1
fmul dword [planelightfloat]
sub esp,16
fstp qword [esp]
fstp qword [esp+8]
call R_CalcTiltedLighting
add esp, 20
xor eax, eax
.litup add esp, 8
; calculate initial z, u, and v values
fld st1 ; 1/z | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | v/z | 1/z | u/z
fld st3 ; u/z | z | v/z | 1/z | u/z
fmul st0,st1 ; u | z | v/z | 1/z | u/z
fld st2 ; v/z | u | z | v/z | 1/z | u/z
fmulp st2,st0 ; u | v | v/z | 1/z | u/z
fld st0
fistp qword [start_u]
fld st1
fistp qword [start_v]
cmp ebx,7 ; Do we have at least 8 pixels to plot?
jl near ShortStrip
; yes, we do, so figure out tex coords at end of this span
; multiply i values by span length (8)
fld dword [su_i] ; su.i
fmul dword [fp_8] ; su.i*8
fld dword [sv_i] ; sv.i | su.i*8
fmul dword [fp_8] ; sv.i*8 | su.i*8
fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8
fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8
fxch st2 ; su.i*8 | sv.i*8 | sz.i*8
fstp qword [step_uz] ; sv.i*8 | sz.i*8
fstp qword [step_vz] ; sz.i*8
fst qword [step_iz] ; sz.i*8
; find tex coords at start of next span
faddp st4
fld qword [step_vz]
faddp st3
fld qword [step_uz]
faddp st5
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
fst dword [end_z]
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
; now subtract to get stepping values for this span
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
FullSpan:
xor eax,eax
cmp ebx,15 ; is there another complete span after this one?
jl NextIsShort
; there is a complete span after this one
fld qword [step_iz]
faddp st4,st0
fld qword [step_vz]
faddp st3,st0
fld qword [step_uz]
faddp st5,st0
jmp StartDiv
NextIsShort:
cmp ebx,8 ; if next span is no more than 1 pixel, then we already
jle DrawFullSpan ; know everything we need to draw it
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint-8*4+ebx*4]
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint-8*4+ebx*4]
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint-8*4+ebx*4]
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
faddp st5,st0 ; u | v | v/z | 1/z | u/z
StartDiv:
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
DrawFullSpan:
mov ecx,[start_v]
mov edx,[start_u]
add ecx,[pviewy]
add edx,[pviewx]
mov esi,edx
mov ebp,ecx
x1 shr ebp,26
m1 and esi,0xfc000000
y1 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch1 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+0],al
x2 shr ebp,26
m2 and esi,0xfc000000
y2 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch2 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-4]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+1],al
x3 shr ebp,26
m3 and esi,0xfc000000
y3 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch3 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-8]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+2],al
x4 shr ebp,26
m4 and esi,0xfc000000
y4 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch4 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-12]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+3],al
x5 shr ebp,26
m5 and esi,0xfc000000
y5 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch5 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-16]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+4],al
x6 shr ebp,26
m6 and esi,0xfc000000
y6 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch6 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-20]
mov esi,edx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi+5],al
x7 shr ebp,26
m7 and esi,0xfc000000
y7 shr esi,20
add ecx,[step_v]
add edx,[step_u]
fetch7 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4-24]
x8 shr ecx,26
mov al,[ebp+eax]
m8 and edx,0xfc000000
mov [edi+6],al
y8 shr edx,20
mov ebp,[tiltlighting+ebx*4-28]
fetch8 mov al,[edx+ecx+SPACEFILLER4]
mov al,[ebp+eax]
mov [edi+7],al
add edi,8
sub ebx,8
jl near Done
fld st1
fistp qword [start_u]
fld st2
fistp qword [start_v]
cmp ebx,7
jl near EndIsShort
fst dword [end_z]
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
jmp FullSpan
OnlyOnePixelAtEnd:
fld st0
fistp qword [start_u]
fld st1
fistp qword [start_v]
OnlyOnePixel:
mov edx,[start_v]
mov ecx,[start_u]
add edx,[pviewy]
add ecx,[pviewx]
x9 shr edx,26
m9 and ecx,0xfc000000
y9 shr ecx,20
mov ebp,[tiltlighting]
fetch9 mov al,[ecx+edx+SPACEFILLER4]
mov al,[ebp+eax]
mov [edi],al
Done:
fcompp
fcompp
fstp st0
pop ebp
pop edi
pop esi
pop ebx
ret
ShortStrip:
cmp ebx,0
jle near OnlyOnePixel
MoreThanOnePixel:
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint+ebx*4]
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint+ebx*4]
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
fmul dword [fp_quickint+ebx*4]
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
faddp st5,st0 ; u | v | v/z | 1/z | u/z
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
jmp CalcPartialSteps
EndIsShort:
cmp ebx,0
je near OnlyOnePixelAtEnd
CalcPartialSteps:
fst dword [end_z]
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z
fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z
fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z
fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z
fxch st1 ; v'-v | ustep | v/z | 1/z | u/z
fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z
fxch st1 ; ustep | vstep | v/z | 1/z | u/z
fistp qword [step_u] ; vstep | v/z | 1/z | u/z
fistp qword [step_v] ; v/z | 1/z | u/z
mov ecx,[start_v]
mov edx,[start_u]
add ecx,[pviewy]
add edx,[pviewx]
mov esi,edx
mov ebp,ecx
endloop:
x10 shr ebp,26
m10 and esi,0xfc000000
y10 shr esi,20
inc edi
add ecx,[step_v]
add edx,[step_u]
fetch10 mov al,[ebp+esi+SPACEFILLER4]
mov ebp,[tiltlighting+ebx*4]
mov esi,edx
dec ebx
mov al,[ebp+eax]
mov ebp,ecx
mov [edi-1],al
jge endloop
fcompp
fstp st0
pop ebp
pop edi
pop esi
pop ebx
ret
rtext_end:
%ifdef M_TARGET_MACHO
GLOBAL _rtext_tmap2_end
_rtext_tmap2_end:
%endif

View file

@ -1,344 +0,0 @@
%include "valgrind.inc"
%ifdef M_TARGET_WATCOM
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
SEGMENT DATA
%else
SECTION .data
%endif
%ifndef M_TARGET_LINUX
%define ylookup _ylookup
%define vplce _vplce
%define vince _vince
%define palookupoffse _palookupoffse
%define bufplce _bufplce
%define dc_iscale _dc_iscale
%define dc_colormap _dc_colormap
%define dc_count _dc_count
%define dc_dest _dc_dest
%define dc_source _dc_source
%define dc_texturefrac _dc_texturefrac
%define dc_pitch _dc_pitch
%define setupvlinetallasm _setupvlinetallasm
%define vlinetallasm4 _vlinetallasm4
%define vlinetallasmathlon4 _vlinetallasmathlon4
%define vlinetallasm1 _vlinetallasm1
%define prevlinetallasm1 _prevlinetallasm1
%endif
EXTERN vplce
EXTERN vince
EXTERN palookupoffse
EXTERN bufplce
EXTERN ylookup
EXTERN dc_iscale
EXTERN dc_colormap
EXTERN dc_count
EXTERN dc_dest
EXTERN dc_source
EXTERN dc_texturefrac
EXTERN dc_pitch
GLOBAL vlt4pitch
GLOBAL vlt1pitch
%ifdef M_TARGET_WATCOM
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
SEGMENT CODE
%else
SECTION .text
%endif
ALIGN 16
GLOBAL setpitch3
setpitch3:
mov [vltpitch+2], eax
mov [vltpitcha+2],eax
mov [vlt1pitch1+2], eax
mov [vlt1pitch2+2], eax
selfmod vltpitch, vlt1pitch2+6
ret
ALIGN 16
GLOBAL setupvlinetallasm
setupvlinetallasm:
mov ecx, [esp+4]
mov [shifter1+2], cl
mov [shifter2+2], cl
mov [shifter3+2], cl
mov [shifter4+2], cl
mov [shifter1a+2], cl
mov [shifter2a+2], cl
mov [shifter3a+2], cl
mov [shifter4a+2], cl
mov [preshift+2], cl
mov [shift11+2], cl
mov [shift12+2], cl
selfmod shifter1, shift12+6
ret
%ifdef M_TARGET_MACHO
SECTION .text align=64
GLOBAL _rtext_tmap3_start
_rtext_tmap3_start:
%else
SECTION .rtext progbits alloc exec write align=64
%endif
ALIGN 16
GLOBAL vlinetallasm4
vlinetallasm4:
push ebx
mov eax, [bufplce+0]
mov ebx, [bufplce+4]
mov ecx, [bufplce+8]
mov edx, [bufplce+12]
mov [source1+3], eax
mov [source2+3], ebx
mov [source3+3], ecx
mov [source4+3], edx
mov eax, [palookupoffse+0]
mov ebx, [palookupoffse+4]
mov ecx, [palookupoffse+8]
mov edx, [palookupoffse+12]
mov [lookup1+2], eax
mov [lookup2+2], ebx
mov [lookup3+2], ecx
mov [lookup4+2], edx
mov eax, [vince+0]
mov ebx, [vince+4]
mov ecx, [vince+8]
mov edx, [vince+12]
mov [step1+2], eax
mov [step2+2], ebx
mov [step3+2], ecx
mov [step4+1], edx
push ebp
push esi
push edi
mov ecx, [dc_count]
mov edi, [dc_dest]
mov eax, dword [ylookup+ecx*4-4]
add eax, edi
sub edi, eax
mov [write1+2],eax
inc eax
mov [write2+2],eax
inc eax
mov [write3+2],eax
inc eax
mov [write4+2],eax
mov ebx, [vplce]
mov ecx, [vplce+4]
mov esi, [vplce+8]
mov eax, [vplce+12]
selfmod loopit, vltpitch
jmp loopit
ALIGN 16
loopit:
mov edx, ebx
shifter1: shr edx, 24
source1: movzx edx, BYTE [edx+0x88888888]
lookup1: mov dl, [edx+0x88888888]
write1: mov [edi+0x88888880], dl
step1: add ebx, 0x88888888
mov edx, ecx
shifter2: shr edx, 24
source2: movzx edx, BYTE [edx+0x88888888]
lookup2: mov dl, [edx+0x88888888]
write2: mov [edi+0x88888881], dl
step2: add ecx, 0x88888888
mov edx, esi
shifter3: shr edx, 24
source3: movzx edx, BYTE [edx+0x88888888]
lookup3: mov dl, BYTE [edx+0x88888888]
write3: mov [edi+0x88888882], dl
step3: add esi, 0x88888888
mov edx, eax
shifter4: shr edx, 24
source4: movzx edx, BYTE [edx+0x88888888]
lookup4: mov dl, [edx+0x88888888]
write4: mov [edi+0x88888883], dl
step4: add eax, 0x88888888
vltpitch: add edi, 320
jle near loopit
mov [vplce], ebx
mov [vplce+4], ecx
mov [vplce+8], esi
mov [vplce+12], eax
pop edi
pop esi
pop ebp
pop ebx
ret
ALIGN 16
GLOBAL vlinetallasmathlon4
vlinetallasmathlon4:
push ebx
mov eax, [bufplce+0]
mov ebx, [bufplce+4]
mov ecx, [bufplce+8]
mov edx, [bufplce+12]
mov [source1a+3], eax
mov [source2a+3], ebx
mov [source3a+3], ecx
mov [source4a+3], edx
mov eax, [palookupoffse+0]
mov ebx, [palookupoffse+4]
mov ecx, [palookupoffse+8]
mov edx, [palookupoffse+12]
mov [lookup1a+2], eax
mov [lookup2a+2], ebx
mov [lookup3a+2], ecx
mov [lookup4a+2], edx
mov eax, [vince+0]
mov ebx, [vince+4]
mov ecx, [vince+8]
mov edx, [vince+12]
mov [step1a+2], eax
mov [step2a+2], ebx
mov [step3a+2], ecx
mov [step4a+1], edx
push ebp
push esi
push edi
mov ecx, [dc_count]
mov edi, [dc_dest]
mov eax, dword [ylookup+ecx*4-4]
add eax, edi
sub edi, eax
mov [write1a+2],eax
inc eax
mov [write2a+2],eax
inc eax
mov [write3a+2],eax
inc eax
mov [write4a+2],eax
mov ebp, [vplce]
mov ecx, [vplce+4]
mov esi, [vplce+8]
mov eax, [vplce+12]
selfmod loopita, vltpitcha
jmp loopita
; Unfortunately, this code has not been carefully analyzed to determine
; how well it utilizes the processor's instruction units. Instead, I just
; kept rearranging code, seeing what sped it up and what slowed it down
; until I arrived at this. The is the fastest version I was able to
; manage, but that does not mean it cannot be made faster with careful
; instructing shuffling.
ALIGN 64
loopita: mov edx, ebp
mov ebx, ecx
shifter1a: shr edx, 24
shifter2a: shr ebx, 24
source1a: movzx edx, BYTE [edx+0x88888888]
source2a: movzx ebx, BYTE [ebx+0x88888888]
step1a: add ebp, 0x88888888
step2a: add ecx, 0x88888888
lookup1a: mov dl, [edx+0x88888888]
lookup2a: mov dh, [ebx+0x88888888]
mov ebx, esi
write1a: mov [edi+0x88888880], dl
write2a: mov [edi+0x88888881], dh
shifter3a: shr ebx, 24
mov edx, eax
source3a: movzx ebx, BYTE [ebx+0x88888888]
shifter4a: shr edx, 24
step3a: add esi, 0x88888888
source4a: movzx edx, BYTE [edx+0x88888888]
step4a: add eax, 0x88888888
lookup3a: mov bl, [ebx+0x88888888]
lookup4a: mov dl, [edx+0x88888888]
write3a: mov [edi+0x88888882], bl
write4a: mov [edi+0x88888883], dl
vltpitcha: add edi, 320
jle near loopita
mov [vplce], ebp
mov [vplce+4], ecx
mov [vplce+8], esi
mov [vplce+12], eax
pop edi
pop esi
pop ebp
pop ebx
ret
ALIGN 16
GLOBAL prevlinetallasm1
prevlinetallasm1:
mov ecx, [dc_count]
cmp ecx, 1
ja vlinetallasm1
mov eax, [dc_iscale]
mov edx, [dc_texturefrac]
add eax, edx
mov ecx, [dc_source]
preshift: shr edx, 16
push ebx
push edi
mov edi, [dc_colormap]
movzx ebx, byte [ecx+edx]
mov ecx, [dc_dest]
mov bl, byte [edi+ebx]
pop edi
mov byte [ecx], bl
pop ebx
ret
ALIGN 16
GLOBAL vlinetallasm1
vlinetallasm1:
push ebp
push ebx
push edi
push esi
mov ebp, [dc_count]
mov ebx, [dc_texturefrac] ; ebx = frac
mov edi, [dc_dest]
mov ecx, ebx
shift11: shr ecx, 16
mov esi, [dc_source]
mov edx, [dc_iscale]
vlt1pitch1: sub edi, 0x88888888
mov eax, [dc_colormap]
loop2:
movzx ecx, BYTE [esi+ecx]
add ebx, edx
vlt1pitch2: add edi, 0x88888888
mov cl,[eax+ecx]
mov [edi],cl
mov ecx,ebx
shift12: shr ecx,16
dec ebp
jnz loop2
mov eax,ebx
pop esi
pop edi
pop ebx
pop ebp
ret
%ifdef M_TARGET_MACHO
GLOBAL _rtext_tmap3_end
_rtext_tmap3_end:
%endif

View file

@ -1,150 +0,0 @@
%ifnidn __OUTPUT_FORMAT__,win64
%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
%endif
BITS 64
DEFAULT REL
EXTERN vplce
EXTERN vince
EXTERN palookupoffse
EXTERN bufplce
EXTERN dc_count
EXTERN dc_dest
EXTERN dc_pitch
SECTION .text
GLOBAL ASM_PatchPitch
ASM_PatchPitch:
mov ecx, [dc_pitch]
mov [pm+3], ecx
mov [vltpitch+3], ecx
ret
align 16
GLOBAL setupvlinetallasm
setupvlinetallasm:
mov [shifter1+2], cl
mov [shifter2+2], cl
mov [shifter3+2], cl
mov [shifter4+2], cl
ret
align 16
; Yasm can't do progbits alloc exec for win64?
; Hmm, looks like it's automatic. No worries, then.
SECTION .rtext write ;progbits alloc exec
GLOBAL vlinetallasm4
PROC_FRAME vlinetallasm4
rex_push_reg rbx
push_reg rdi
push_reg r15
push_reg r14
push_reg r13
push_reg r12
push_reg rbp
push_reg rsi
alloc_stack 8 ; Stack must be 16-byte aligned
END_PROLOGUE
; rax = bufplce base address
; rbx =
; rcx = offset from rdi/count (negative)
; edx/rdx = scratch
; rdi = bottom of columns to write to
; r8d-r11d = column offsets
; r12-r15 = palookupoffse[0] - palookupoffse[4]
mov ecx, [dc_count]
mov rdi, [dc_dest]
test ecx, ecx
jle vltepilog ; count must be positive
mov rax, [bufplce]
mov r8, [bufplce+8]
sub r8, rax
mov r9, [bufplce+16]
sub r9, rax
mov r10, [bufplce+24]
sub r10, rax
mov [source2+4], r8d
mov [source3+4], r9d
mov [source4+4], r10d
pm: imul rcx, 320
mov r12, [palookupoffse]
mov r13, [palookupoffse+8]
mov r14, [palookupoffse+16]
mov r15, [palookupoffse+24]
mov r8d, [vince]
mov r9d, [vince+4]
mov r10d, [vince+8]
mov r11d, [vince+12]
mov [step1+3], r8d
mov [step2+3], r9d
mov [step3+3], r10d
mov [step4+3], r11d
add rdi, rcx
neg rcx
mov r8d, [vplce]
mov r9d, [vplce+4]
mov r10d, [vplce+8]
mov r11d, [vplce+12]
jmp loopit
ALIGN 16
loopit:
mov edx, r8d
shifter1: shr edx, 24
step1: add r8d, 0x88888888
movzx edx, BYTE [rax+rdx]
mov ebx, r9d
mov dl, [r12+rdx]
shifter2: shr ebx, 24
step2: add r9d, 0x88888888
source2: movzx ebx, BYTE [rax+rbx+0x88888888]
mov ebp, r10d
mov bl, [r13+rbx]
shifter3: shr ebp, 24
step3: add r10d, 0x88888888
source3: movzx ebp, BYTE [rax+rbp+0x88888888]
mov esi, r11d
mov bpl, BYTE [r14+rbp]
shifter4: shr esi, 24
step4: add r11d, 0x88888888
source4: movzx esi, BYTE [rax+rsi+0x88888888]
mov [rdi+rcx], dl
mov [rdi+rcx+1], bl
mov sil, BYTE [r15+rsi]
mov [rdi+rcx+2], bpl
mov [rdi+rcx+3], sil
vltpitch: add rcx, 320
jl loopit
mov [vplce], r8d
mov [vplce+4], r9d
mov [vplce+8], r10d
mov [vplce+12], r11d
vltepilog:
add rsp, 8
pop rsi
pop rbp
pop r12
pop r13
pop r14
pop r15
pop rdi
pop rbx
ret
vlinetallasm4_end:
ENDPROC_FRAME
ALIGN 16

View file

@ -1,141 +0,0 @@
#%include "valgrind.inc"
.section .text
.globl ASM_PatchPitch
ASM_PatchPitch:
movl dc_pitch(%rip), %ecx
movl %ecx, pm+3(%rip)
movl %ecx, vltpitch+3(%rip)
# selfmod pm, vltpitch+6
ret
.align 16
.globl setupvlinetallasm
setupvlinetallasm:
movb %dil, shifter1+2(%rip)
movb %dil, shifter2+2(%rip)
movb %dil, shifter3+2(%rip)
movb %dil, shifter4+2(%rip)
# selfmod shifter1, shifter4+3
ret
.align 16
.section .rtext,"awx"
.globl vlinetallasm4
.type vlinetallasm4,@function
vlinetallasm4:
.cfi_startproc
push %rbx
push %rdi
push %r15
push %r14
push %r13
push %r12
push %rbp
push %rsi
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
.cfi_adjust_cfa_offset 8
# rax = bufplce base address
# rbx =
# rcx = offset from rdi/count (negative)
# edx/rdx = scratch
# rdi = bottom of columns to write to
# r8d-r11d = column offsets
# r12-r15 = palookupoffse[0] - palookupoffse[4]
movl dc_count(%rip), %ecx
movq dc_dest(%rip), %rdi
testl %ecx, %ecx
jle vltepilog # count must be positive
movq bufplce(%rip), %rax
movq bufplce+8(%rip), %r8
subq %rax, %r8
movq bufplce+16(%rip), %r9
subq %rax, %r9
movq bufplce+24(%rip), %r10
subq %rax, %r10
movl %r8d, source2+4(%rip)
movl %r9d, source3+4(%rip)
movl %r10d, source4+4(%rip)
pm: imulq $320, %rcx
movq palookupoffse(%rip), %r12
movq palookupoffse+8(%rip), %r13
movq palookupoffse+16(%rip), %r14
movq palookupoffse+24(%rip), %r15
movl vince(%rip), %r8d
movl vince+4(%rip), %r9d
movl vince+8(%rip), %r10d
movl vince+12(%rip), %r11d
movl %r8d, step1+3(%rip)
movl %r9d, step2+3(%rip)
movl %r10d, step3+3(%rip)
movl %r11d, step4+3(%rip)
addq %rcx, %rdi
negq %rcx
movl vplce(%rip), %r8d
movl vplce+4(%rip), %r9d
movl vplce+8(%rip), %r10d
movl vplce+12(%rip), %r11d
# selfmod loopit, vltepilog
jmp loopit
.align 16
loopit:
movl %r8d, %edx
shifter1: shrl $24, %edx
step1: addl $0x44444444, %r8d
movzbl (%rax,%rdx), %edx
movl %r9d, %ebx
movb (%r12,%rdx), %dl
shifter2: shrl $24, %ebx
step2: addl $0x44444444, %r9d
source2: movzbl 0x44444444(%rax,%rbx), %ebx
movl %r10d, %ebp
movb (%r13,%rbx), %bl
shifter3: shr $24, %ebp
step3: addl $0x44444444, %r10d
source3: movzbl 0x44444444(%rax,%rbp), %ebp
movl %r11d, %esi
movb (%r14,%rbp), %bpl
shifter4: shr $24, %esi
step4: add $0x44444444, %r11d
source4: movzbl 0x44444444(%rax,%rsi), %esi
movb %dl, (%rdi,%rcx)
movb %bl, 1(%rdi,%rcx)
movb (%r15,%rsi), %sil
movb %bpl, 2(%rdi,%rcx)
movb %sil, 3(%rdi,%rcx)
vltpitch: addq $320, %rcx
jl loopit
movl %r8d, vplce(%rip)
movl %r9d, vplce+4(%rip)
movl %r10d, vplce+8(%rip)
movl %r11d, vplce+12(%rip)
vltepilog:
addq $8, %rsp
.cfi_adjust_cfa_offset -8
pop %rsi
pop %rbp
pop %r12
pop %r13
pop %r14
pop %r15
pop %rdi
pop %rbx
ret
.cfi_endproc
.align 16

View file

@ -1030,7 +1030,7 @@ void D_DoomLoop ()
catch (CVMAbortException &error)
{
error.MaybePrintMessage();
Printf("%s", error.stacktrace);
Printf("%s", error.stacktrace.GetChars());
D_ErrorCleanup();
}
}

View file

@ -48,57 +48,6 @@
class PClassActor;
typedef TMap<int, PClassActor *> FClassMap;
// Since this file is included by everything, it seems an appropriate place
// to check the NOASM/USEASM macros.
// There are three assembly-related macros:
//
// NOASM - Assembly code is disabled
// X86_ASM - Using ia32 assembly code
// X64_ASM - Using amd64 assembly code
//
// Note that these relate only to using the pure assembly code. Inline
// assembly may still be used without respect to these macros, as
// deemed appropriate.
#ifndef NOASM
// Select the appropriate type of assembly code to use.
#if defined(_M_IX86) || defined(__i386__)
#define X86_ASM
#ifdef X64_ASM
#undef X64_ASM
#endif
#elif defined(_M_X64) || defined(__amd64__)
#define X64_ASM
#ifdef X86_ASM
#undef X86_ASM
#endif
#else
#define NOASM
#endif
#endif
#ifdef NOASM
// Ensure no assembly macros are defined if NOASM is defined.
#ifdef X86_ASM
#undef X86_ASM
#endif
#ifdef X64_ASM
#undef X64_ASM
#endif
#endif
#if defined(_MSC_VER)
#define NOVTABLE __declspec(novtable)

View file

@ -15,6 +15,11 @@
#include "c_cvars.h"
#include "r_3dfloors.h"
CVAR(Int, r_3dfloors, true, 0);
namespace swrenderer
{
// external variables
int fake3D;
F3DFloor *fakeFloor;
@ -28,8 +33,6 @@ HeightLevel *height_cur = NULL;
int CurrentMirror = 0;
int CurrentSkybox = 0;
CVAR(Int, r_3dfloors, true, 0);
// private variables
int height_max = -1;
TArray<HeightStack> toplist;
@ -160,3 +163,4 @@ void R_3D_LeaveSkybox()
CurrentSkybox--;
}
}

View file

@ -3,6 +3,11 @@
#include "p_3dfloors.h"
EXTERN_CVAR(Int, r_3dfloors);
namespace swrenderer
{
// special types
struct HeightLevel
@ -57,7 +62,6 @@ extern HeightLevel *height_top;
extern HeightLevel *height_cur;
extern int CurrentMirror;
extern int CurrentSkybox;
EXTERN_CVAR(Int, r_3dfloors);
// functions
void R_3D_DeleteHeights();
@ -67,4 +71,6 @@ void R_3D_ResetClip();
void R_3D_EnterSkybox();
void R_3D_LeaveSkybox();
}
#endif

View file

@ -58,6 +58,13 @@
#include "po_man.h"
#include "r_data/colormaps.h"
CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs?
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor);
namespace swrenderer
{
using namespace drawerargs;
seg_t* curline;
side_t* sidedef;
line_t* linedef;
@ -104,8 +111,6 @@ TArray<PortalDrawseg> WallPortals(1000); // note: this array needs to go away as
subsector_t *InSubsector;
CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs?
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor);
void R_StoreWallRange (int start, int stop);
@ -1396,3 +1401,5 @@ void R_RenderBSPNode (void *node)
}
R_Subsector ((subsector_t *)((BYTE *)node - 1));
}
}

View file

@ -27,6 +27,11 @@
#include <stddef.h>
#include "r_defs.h"
EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs?
namespace swrenderer
{
// The 3072 below is just an arbitrary value picked to avoid
// drawing lines the player is too close to that would overflow
// the texture calculations.
@ -109,8 +114,6 @@ extern WORD MirrorFlags;
typedef void (*drawfunc_t) (int start, int stop);
EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs?
// BSP?
void R_ClearClipSegs (short left, short right);
void R_ClearDrawSegs ();
@ -119,5 +122,6 @@ void R_RenderBSPNode (void *node);
// killough 4/13/98: fake floors/ceilings for deep water / fake ceilings:
sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool);
}
#endif

View file

@ -59,7 +59,7 @@ enum
SIL_BOTH
};
extern size_t MaxDrawSegs;
namespace swrenderer { extern size_t MaxDrawSegs; }
struct FDisplacement;
//

File diff suppressed because it is too large Load diff

View file

@ -1,287 +1,208 @@
// Emacs style mode select -*- C++ -*-
//-----------------------------------------------------------------------------
//
// $Id:$
//
// Copyright (C) 1993-1996 by id Software, Inc.
//
// This source is available for distribution and/or modification
// only under the terms of the DOOM Source Code License as
// published by id Software. All rights reserved.
//
// The source is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License
// for more details.
//
// DESCRIPTION:
// System specific interface stuff.
//
//-----------------------------------------------------------------------------
#ifndef __R_DRAW__
#define __R_DRAW__
#pragma once
#include "r_defs.h"
extern "C" int ylookup[MAXHEIGHT];
EXTERN_CVAR(Bool, r_multithreaded);
EXTERN_CVAR(Int, r_drawfuzz);
EXTERN_CVAR(Bool, r_drawtrans);
EXTERN_CVAR(Float, transsouls);
EXTERN_CVAR(Int, r_columnmethod);
extern "C" int dc_pitch; // [RH] Distance between rows
extern "C" lighttable_t*dc_colormap;
extern "C" int dc_x;
extern "C" int dc_yl;
extern "C" int dc_yh;
extern "C" fixed_t dc_iscale;
extern double dc_texturemid;
extern "C" fixed_t dc_texturefrac;
extern "C" int dc_color; // [RH] For flat colors (no texturing)
extern "C" DWORD dc_srccolor;
extern "C" DWORD *dc_srcblend;
extern "C" DWORD *dc_destblend;
// first pixel in a column
extern "C" const BYTE* dc_source;
extern "C" BYTE *dc_dest, *dc_destorg;
extern "C" int dc_count;
extern "C" DWORD vplce[4];
extern "C" DWORD vince[4];
extern "C" BYTE* palookupoffse[4];
extern "C" const BYTE* bufplce[4];
extern "C" const BYTE* bufplce2[4];
extern "C" uint32_t bufheight[4];
// [RH] Temporary buffer for column drawing
extern "C" BYTE *dc_temp;
extern "C" unsigned int dc_tspans[4][MAXHEIGHT];
extern "C" unsigned int *dc_ctspan[4];
extern "C" unsigned int horizspans[4];
// [RH] Pointers to the different column and span drawers...
// The span blitting interface.
// Hook in assembler or system specific BLT here.
extern DWORD (*dovline1) ();
extern DWORD (*doprevline1) ();
#ifdef X64_ASM
#define dovline4 vlinetallasm4
extern "C" void vlinetallasm4();
#else
extern void (*dovline4) ();
#endif
extern void setupvline (int);
extern DWORD (*domvline1) ();
extern void (*domvline4) ();
extern void setupmvline (int);
extern void setuptmvline (int);
// The Spectre/Invisibility effect.
extern void R_DrawFuzzColumn(void);
// [RH] Draw shaded column
extern void (*R_DrawShadedColumn)(void);
// Draw with color translation tables, for player sprite rendering,
// Green/Red/Blue/Indigo shirts.
extern void (*R_DrawTranslatedColumn)(void);
// Span drawing for rows, floor/ceiling. No Spectre effect needed.
extern void (*R_DrawSpan)(void);
void R_SetupSpanBits(FTexture *tex);
void R_SetSpanColormap(BYTE *colormap);
void R_SetSpanSource(const BYTE *pixels);
// Span drawing for masked textures.
extern void (*R_DrawSpanMasked)(void);
// Span drawing for translucent textures.
void R_DrawSpanTranslucent(void);
// Span drawing for masked, translucent textures.
void R_DrawSpanMaskedTranslucent(void);
// Span drawing for translucent, additive textures.
void R_DrawSpanAddClamp(void);
// Span drawing for masked, translucent, additive textures.
void R_DrawSpanMaskedAddClamp(void);
// [RH] Span blit into an interleaved intermediate buffer
extern void (*R_DrawColumnHoriz)(void);
// [RH] Initialize the above pointers
void R_InitColumnDrawers ();
// [RH] Moves data from the temporary buffer to the screen.
void rt_copy1col(int hx, int sx, int yl, int yh);
void rt_copy4cols(int sx, int yl, int yh);
void rt_map4cols(int sx, int yl, int yh);
extern "C"
namespace swrenderer
{
struct vissprite_t;
void rt_shaded1col (int hx, int sx, int yl, int yh);
void rt_shaded4cols_c (int sx, int yl, int yh);
void rt_shaded4cols_asm (int sx, int yl, int yh);
extern double dc_texturemid;
void rt_map1col (int hx, int sx, int yl, int yh);
void rt_add1col (int hx, int sx, int yl, int yh);
void rt_addclamp1col (int hx, int sx, int yl, int yh);
void rt_subclamp1col (int hx, int sx, int yl, int yh);
void rt_revsubclamp1col (int hx, int sx, int yl, int yh);
namespace drawerargs
{
extern int dc_pitch;
extern lighttable_t *dc_colormap;
extern int dc_x;
extern int dc_yl;
extern int dc_yh;
extern fixed_t dc_iscale;
extern fixed_t dc_texturefrac;
extern uint32_t dc_textureheight;
extern int dc_color;
extern uint32_t dc_srccolor;
extern uint32_t dc_srccolor_bgra;
extern uint32_t *dc_srcblend;
extern uint32_t *dc_destblend;
extern fixed_t dc_srcalpha;
extern fixed_t dc_destalpha;
extern const uint8_t *dc_source;
extern const uint8_t *dc_source2;
extern uint32_t dc_texturefracx;
extern uint8_t *dc_translation;
extern uint8_t *dc_dest;
extern uint8_t *dc_destorg;
extern int dc_destheight;
extern int dc_count;
void rt_tlate1col (int hx, int sx, int yl, int yh);
void rt_tlateadd1col (int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh);
extern uint32_t vplce[4];
extern uint32_t vince[4];
extern uint8_t *palookupoffse[4];
extern fixed_t palookuplight[4];
extern const uint8_t *bufplce[4];
extern const uint8_t *bufplce2[4];
extern uint32_t buftexturefracx[4];
extern uint32_t bufheight[4];
extern int vlinebits;
extern int mvlinebits;
extern int tmvlinebits;
void rt_add4cols_c (int sx, int yl, int yh);
void rt_addclamp4cols_c (int sx, int yl, int yh);
void rt_subclamp4cols (int sx, int yl, int yh);
void rt_revsubclamp4cols (int sx, int yl, int yh);
extern int ds_y;
extern int ds_x1;
extern int ds_x2;
extern lighttable_t * ds_colormap;
extern dsfixed_t ds_light;
extern dsfixed_t ds_xfrac;
extern dsfixed_t ds_yfrac;
extern dsfixed_t ds_xstep;
extern dsfixed_t ds_ystep;
extern int ds_xbits;
extern int ds_ybits;
extern fixed_t ds_alpha;
extern double ds_lod;
extern const uint8_t *ds_source;
extern int ds_color;
void rt_tlate4cols (int sx, int yl, int yh);
void rt_tlateadd4cols (int sx, int yl, int yh);
void rt_tlateaddclamp4cols (int sx, int yl, int yh);
void rt_tlatesubclamp4cols (int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols (int sx, int yl, int yh);
extern unsigned int dc_tspans[4][MAXHEIGHT];
extern unsigned int *dc_ctspan[4];
extern unsigned int *horizspan[4];
}
void rt_add4cols_asm (int sx, int yl, int yh);
void rt_addclamp4cols_asm (int sx, int yl, int yh);
extern int ylookup[MAXHEIGHT];
extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/];
extern FDynamicColormap ShadeFakeColormap[16];
extern uint8_t identitymap[256];
extern FDynamicColormap identitycolormap;
// Spectre/Invisibility.
#define FUZZTABLE 50
extern int fuzzoffset[FUZZTABLE + 1];
extern int fuzzpos;
extern int fuzzviewheight;
void R_InitColumnDrawers();
void R_InitShadeMaps();
void R_InitFuzzTable(int fuzzoff);
enum ESPSResult
{
DontDraw, // not useful to draw this
DoDraw0, // draw this as if r_columnmethod is 0
DoDraw1, // draw this as if r_columnmethod is 1
};
ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color);
ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color);
void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade
bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)());
const uint8_t *R_GetColumn(FTexture *tex, int col);
void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn);
void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn);
void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn);
void rt_initcols(uint8_t *buffer = nullptr);
void rt_span_coverage(int x, int start, int stop);
void rt_draw4cols(int sx);
void rt_flip_posts();
void rt_copy1col(int hx, int sx, int yl, int yh);
void rt_copy4cols(int sx, int yl, int yh);
void rt_shaded1col(int hx, int sx, int yl, int yh);
void rt_shaded4cols(int sx, int yl, int yh);
void rt_map1col(int hx, int sx, int yl, int yh);
void rt_add1col(int hx, int sx, int yl, int yh);
void rt_addclamp1col(int hx, int sx, int yl, int yh);
void rt_subclamp1col(int hx, int sx, int yl, int yh);
void rt_revsubclamp1col(int hx, int sx, int yl, int yh);
void rt_tlate1col(int hx, int sx, int yl, int yh);
void rt_tlateadd1col(int hx, int sx, int yl, int yh);
void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh);
void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh);
void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh);
void rt_map4cols(int sx, int yl, int yh);
void rt_add4cols(int sx, int yl, int yh);
void rt_addclamp4cols(int sx, int yl, int yh);
void rt_subclamp4cols(int sx, int yl, int yh);
void rt_revsubclamp4cols(int sx, int yl, int yh);
void rt_tlate4cols(int sx, int yl, int yh);
void rt_tlateadd4cols(int sx, int yl, int yh);
void rt_tlateaddclamp4cols(int sx, int yl, int yh);
void rt_tlatesubclamp4cols(int sx, int yl, int yh);
void rt_tlaterevsubclamp4cols(int sx, int yl, int yh);
void R_DrawColumnHoriz();
void R_DrawColumn();
void R_DrawFuzzColumn();
void R_DrawTranslatedColumn();
void R_DrawShadedColumn();
void R_FillColumn();
void R_FillAddColumn();
void R_FillAddClampColumn();
void R_FillSubClampColumn();
void R_FillRevSubClampColumn();
void R_DrawAddColumn();
void R_DrawTlatedAddColumn();
void R_DrawAddClampColumn();
void R_DrawAddClampTranslatedColumn();
void R_DrawSubClampColumn();
void R_DrawSubClampTranslatedColumn();
void R_DrawRevSubClampColumn();
void R_DrawRevSubClampTranslatedColumn();
void R_DrawSpan();
void R_DrawSpanMasked();
void R_DrawSpanTranslucent();
void R_DrawSpanMaskedTranslucent();
void R_DrawSpanAddClamp();
void R_DrawSpanMaskedAddClamp();
void R_FillSpan();
void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy);
void R_DrawColoredSpan(int y, int x1, int x2);
void R_SetupDrawSlab(uint8_t *colormap);
void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p);
void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip);
uint32_t vlinec1();
void vlinec4();
uint32_t mvlinec1();
void mvlinec4();
fixed_t tmvline1_add();
void tmvline4_add();
fixed_t tmvline1_addclamp();
void tmvline4_addclamp();
fixed_t tmvline1_subclamp();
void tmvline4_subclamp();
fixed_t tmvline1_revsubclamp();
void tmvline4_revsubclamp();
void R_FillColumnHoriz();
void R_FillSpan();
inline uint32_t dovline1() { return vlinec1(); }
inline void dovline4() { vlinec4(); }
inline uint32_t domvline1() { return mvlinec1(); }
inline void domvline4() { mvlinec4(); }
void setupvline(int fracbits);
void setupmvline(int fracbits);
void setuptmvline(int fracbits);
void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade);
void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade);
void R_SetTranslationMap(lighttable_t *translation);
void R_SetupSpanBits(FTexture *tex);
void R_SetSpanColormap(lighttable_t *colormap);
void R_SetSpanSource(FTexture *tex);
void R_MapTiltedPlane(int y, int x1);
void R_MapColoredPlane(int y, int x1);
void R_DrawParticle(vissprite_t *);
}
#ifdef X86_ASM
#define rt_shaded4cols rt_shaded4cols_asm
#define rt_add4cols rt_add4cols_asm
#define rt_addclamp4cols rt_addclamp4cols_asm
#else
#define rt_shaded4cols rt_shaded4cols_c
#define rt_add4cols rt_add4cols_c
#define rt_addclamp4cols rt_addclamp4cols_c
#endif
void rt_flip_posts();
void rt_draw4cols (int sx);
// [RH] Preps the temporary horizontal buffer.
void rt_initcols (BYTE *buffer=NULL);
void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip);
#ifdef X86_ASM
void R_DrawShadedColumnP_C (void);
extern "C" void R_DrawSpanP_ASM (void);
extern "C" void R_DrawSpanMaskedP_ASM (void);
void R_DrawColumnHorizP_C(void);
#else
void R_DrawShadedColumnP_C (void);
void R_DrawSpanP_C (void);
void R_DrawSpanMaskedP_C (void);
#endif
void R_DrawColumn();
void R_DrawColumnHorizP_C(void);
void R_DrawTranslatedColumnP_C(void);
void R_DrawSpanTranslucent (void);
void R_DrawSpanMaskedTranslucent (void);
void R_DrawTlatedLucentColumnP_C (void);
#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C
void R_FillColumnP (void);
void R_FillColumnHorizP (void);
void R_FillSpan (void);
#ifdef X86_ASM
#define R_SetupDrawSlab R_SetupDrawSlabA
#define R_DrawSlab R_DrawSlabA
#else
#define R_SetupDrawSlab R_SetupDrawSlabC
#define R_DrawSlab R_DrawSlabC
#endif
extern "C" void R_SetupDrawSlab(const BYTE *colormap);
extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
extern "C" int ds_y;
extern "C" int ds_x1;
extern "C" int ds_x2;
extern "C" lighttable_t* ds_colormap;
extern "C" dsfixed_t ds_xfrac;
extern "C" dsfixed_t ds_yfrac;
extern "C" dsfixed_t ds_xstep;
extern "C" dsfixed_t ds_ystep;
extern "C" int ds_xbits;
extern "C" int ds_ybits;
extern "C" fixed_t ds_alpha;
// start of a 64*64 tile image
extern "C" const BYTE* ds_source;
extern "C" int ds_color; // [RH] For flat color (no texturing)
extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/];
extern FDynamicColormap ShadeFakeColormap[16];
extern BYTE identitymap[256];
extern BYTE *dc_translation;
// [RH] Added for muliresolution support
void R_InitShadeMaps();
void R_InitFuzzTable (int fuzzoff);
// [RH] Consolidate column drawer selection
enum ESPSResult
{
DontDraw, // not useful to draw this
DoDraw0, // draw this as if r_columnmethod is 0
DoDraw1, // draw this as if r_columnmethod is 1
};
ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color);
inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color)
{
return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color);
}
// Call this after finished drawing the current thing, in case its
// style was STYLE_Shade
void R_FinishSetPatchStyle ();
// transmaskwallscan calls this to find out what column drawers to use
bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)());
// Retrieve column data for wallscan. Should probably be removed
// to just use the texture's GetColumn() method. It just exists
// for double-layer skies.
const BYTE *R_GetColumn (FTexture *tex, int col);
void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0.
void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
// transmaskwallscan is like maskwallscan, but it can also blend to the background
void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
#endif

2626
src/r_draw_pal.cpp Normal file

File diff suppressed because it is too large Load diff

333
src/r_draw_pal.h Normal file
View file

@ -0,0 +1,333 @@
#pragma once
#include "r_draw.h"
#include "v_palette.h"
#include "r_thread.h"
namespace swrenderer
{
class PalWall1Command : public DrawerCommand
{
public:
PalWall1Command();
FString DebugInfo() override { return "PalWallCommand"; }
protected:
uint32_t _iscale;
uint32_t _texturefrac;
uint8_t *_colormap;
int _count;
const uint8_t *_source;
uint8_t *_dest;
int _vlinebits;
int _mvlinebits;
int _tmvlinebits;
int _pitch;
uint32_t *_srcblend;
uint32_t *_destblend;
};
class PalWall4Command : public DrawerCommand
{
public:
PalWall4Command();
FString DebugInfo() override { return "PalWallCommand"; }
protected:
uint8_t *_dest;
int _count;
int _pitch;
int _vlinebits;
int _mvlinebits;
int _tmvlinebits;
uint8_t *_palookupoffse[4];
const uint8_t *_bufplce[4];
uint32_t _vince[4];
uint32_t _vplce[4];
uint32_t *_srcblend;
uint32_t *_destblend;
};
class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class PalSkyCommand : public DrawerCommand
{
public:
PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom);
FString DebugInfo() override { return "PalSkyCommand"; }
protected:
uint32_t solid_top;
uint32_t solid_bottom;
uint8_t *_dest;
int _count;
int _pitch;
const uint8_t *_bufplce[4];
const uint8_t *_bufplce2[4];
int _bufheight[4];
uint32_t _vince[4];
uint32_t _vplce[4];
};
class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class PalColumnCommand : public DrawerCommand
{
public:
PalColumnCommand();
FString DebugInfo() override { return "PalColumnCommand"; }
protected:
int _count;
uint8_t *_dest;
int _pitch;
fixed_t _iscale;
fixed_t _texturefrac;
const uint8_t *_colormap;
const uint8_t *_source;
const uint8_t *_translation;
int _color;
uint32_t *_srcblend;
uint32_t *_destblend;
uint32_t _srccolor;
};
class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class FillColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class FillColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class FillColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class FillColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnShadedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
class DrawFuzzColumnPalCommand : public DrawerCommand
{
public:
DrawFuzzColumnPalCommand();
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; }
private:
int _yl;
int _yh;
int _x;
uint8_t *_destorg;
int _pitch;
int _fuzzpos;
int _fuzzviewheight;
};
class PalSpanCommand : public DrawerCommand
{
public:
PalSpanCommand();
FString DebugInfo() override { return "PalSpanCommand"; }
protected:
const uint8_t *_source;
const uint8_t *_colormap;
dsfixed_t _xfrac;
dsfixed_t _yfrac;
int _y;
int _x1;
int _x2;
uint8_t *_destorg;
dsfixed_t _xstep;
dsfixed_t _ystep;
int _xbits;
int _ybits;
uint32_t *_srcblend;
uint32_t *_destblend;
int _color;
};
class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
class DrawSpanMaskedPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
class DrawSpanAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
class FillSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
class DrawTiltedSpanPalCommand : public DrawerCommand
{
public:
DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy);
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; }
private:
void CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread);
int y;
int x1;
int x2;
FVector3 plane_sz;
FVector3 plane_su;
FVector3 plane_sv;
bool plane_shade;
int planeshade;
float planelightfloat;
fixed_t pviewx;
fixed_t pviewy;
const uint8_t *_colormap;
uint8_t *_destorg;
int _ybits;
int _xbits;
const uint8_t *_source;
uint8_t *basecolormapdata;
};
class DrawColoredSpanPalCommand : public PalSpanCommand
{
public:
DrawColoredSpanPalCommand(int y, int x1, int x2);
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "DrawColoredSpanPalCommand"; }
private:
int y;
int x1;
int x2;
int color;
uint8_t *destorg;
};
class DrawSlabPalCommand : public PalSpanCommand
{
public:
DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap);
void Execute(DrawerThread *thread) override;
private:
int _dx;
fixed_t _v;
int _dy;
fixed_t _vi;
const uint8_t *_vvptr;
uint8_t *_p;
const uint8_t *_colormap;
int _pitch;
int _start_y;
};
class DrawFogBoundaryLinePalCommand : public PalSpanCommand
{
public:
DrawFogBoundaryLinePalCommand(int y, int x1, int x2);
void Execute(DrawerThread *thread) override;
private:
int y, x1, x2;
const uint8_t *_colormap;
uint8_t *_destorg;
};
class RtInitColsPalCommand : public DrawerCommand
{
public:
RtInitColsPalCommand(uint8_t *buff);
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "RtInitColsPalCommand"; }
private:
uint8_t *buff;
};
class PalColumnHorizCommand : public DrawerCommand
{
public:
PalColumnHorizCommand();
protected:
const uint8_t *_source;
fixed_t _iscale;
fixed_t _texturefrac;
int _count;
int _color;
int _x;
int _yl;
};
class DrawColumnHorizPalCommand : public PalColumnHorizCommand
{
public:
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "DrawColumnHorizPalCommand"; }
};
class FillColumnHorizPalCommand : public PalColumnHorizCommand
{
public:
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "FillColumnHorizPalCommand"; }
};
class PalRtCommand : public DrawerCommand
{
public:
PalRtCommand(int hx, int sx, int yl, int yh);
FString DebugInfo() override { return "PalRtCommand"; }
protected:
int hx, sx, yl, yh;
uint8_t *_destorg;
int _pitch;
const uint8_t *_colormap;
const uint32_t *_srcblend;
const uint32_t *_destblend;
const uint8_t *_translation;
int _color;
};
class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
//class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
}

File diff suppressed because it is too large Load diff

867
src/r_drawt_pal.cpp Normal file
View file

@ -0,0 +1,867 @@
/*
** r_drawt.cpp
** Faster column drawers for modern processors
**
**---------------------------------------------------------------------------
** Copyright 1998-2006 Randy Heit
** All rights reserved.
**
** Redistribution and use in source and binary forms, with or without
** modification, are permitted provided that the following conditions
** are met:
**
** 1. Redistributions of source code must retain the above copyright
** notice, this list of conditions and the following disclaimer.
** 2. Redistributions in binary form must reproduce the above copyright
** notice, this list of conditions and the following disclaimer in the
** documentation and/or other materials provided with the distribution.
** 3. The name of the author may not be used to endorse or promote products
** derived from this software without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**---------------------------------------------------------------------------
**
** These functions stretch columns into a temporary buffer and then
** map them to the screen. On modern machines, this is faster than drawing
** them directly to the screen.
**
** Will I be able to even understand any of this if I come back to it later?
** Let's hope so. :-)
*/
#include "templates.h"
#include "doomtype.h"
#include "doomdef.h"
#include "r_defs.h"
#include "r_draw.h"
#include "r_main.h"
#include "r_things.h"
#include "v_video.h"
#include "r_draw_pal.h"
// I should have commented this stuff better.
//
// dc_temp is the buffer R_DrawColumnHoriz writes into.
// dc_tspans points into it.
// dc_ctspan points into dc_tspans.
// horizspan also points into dc_tspans.
// dc_ctspan is advanced while drawing into dc_temp.
// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen.
namespace swrenderer
{
RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff)
{
}
void RtInitColsPalCommand::Execute(DrawerThread *thread)
{
thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff;
}
/////////////////////////////////////////////////////////////////////
PalColumnHorizCommand::PalColumnHorizCommand()
{
using namespace drawerargs;
_source = dc_source;
_iscale = dc_iscale;
_texturefrac = dc_texturefrac;
_count = dc_count;
_color = dc_color;
_x = dc_x;
_yl = dc_yl;
}
void DrawColumnHorizPalCommand::Execute(DrawerThread *thread)
{
int count = _count;
uint8_t *dest;
fixed_t fracstep;
fixed_t frac;
count = thread->count_for_thread(_yl, count);
if (count <= 0)
return;
fracstep = _iscale;
frac = _texturefrac;
const uint8_t *source = _source;
int x = _x & 3;
dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4];
frac += fracstep * thread->skipped_by_thread(_yl);
fracstep *= thread->num_cores;
if (count & 1) {
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
}
if (count & 2) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest += 8;
}
if (count & 4) {
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest += 16;
}
count >>= 3;
if (!count) return;
do
{
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
dest += 32;
} while (--count);
}
void FillColumnHorizPalCommand::Execute(DrawerThread *thread)
{
int count = _count;
uint8_t color = _color;
uint8_t *dest;
count = thread->count_for_thread(_yl, count);
if (count <= 0)
return;
int x = _x & 3;
dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4];
if (count & 1) {
*dest = color;
dest += 4;
}
if (!(count >>= 1))
return;
do {
dest[0] = color; dest[4] = color;
dest += 8;
} while (--count);
}
/////////////////////////////////////////////////////////////////////
PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh)
{
using namespace drawerargs;
_destorg = dc_destorg;
_pitch = dc_pitch;
_colormap = dc_colormap;
_srcblend = dc_srcblend;
_destblend = dc_destblend;
_translation = dc_translation;
_color = dc_color;
}
void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread)
{
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch * thread->num_cores;
if (count & 1) {
*dest = *source;
source += 4;
dest += pitch;
}
if (count & 2) {
dest[0] = source[0];
dest[pitch] = source[4];
source += 8;
dest += pitch*2;
}
if (!(count >>= 2))
return;
do {
dest[0] = source[0];
dest[pitch] = source[4];
dest[pitch*2] = source[8];
dest[pitch*3] = source[12];
source += 16;
dest += pitch*4;
} while (--count);
}
void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread)
{
int *source;
int *dest;
int count;
int pitch;
count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg);
source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]);
pitch = _pitch*thread->num_cores/sizeof(int);
if (count & 1) {
*dest = *source;
source += 4/sizeof(int);
dest += pitch;
}
if (!(count >>= 1))
return;
do {
dest[0] = source[0];
dest[pitch] = source[4/sizeof(int)];
source += 8/sizeof(int);
dest += pitch*2;
} while (--count);
}
void DrawColumnRt1PalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
colormap = _colormap;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx];
pitch = _pitch*thread->num_cores;
if (count & 1) {
*dest = colormap[*source];
source += 4;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
dest[0] = colormap[source[0]];
dest[pitch] = colormap[source[4]];
source += 8;
dest += pitch*2;
} while (--count);
}
void DrawColumnRt4PalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int count;
int pitch;
count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
colormap = _colormap;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch*thread->num_cores;
if (count & 1) {
dest[0] = colormap[source[0]];
dest[1] = colormap[source[1]];
dest[2] = colormap[source[2]];
dest[3] = colormap[source[3]];
source += 4;
dest += pitch;
}
if (!(count >>= 1))
return;
do {
dest[0] = colormap[source[0]];
dest[1] = colormap[source[1]];
dest[2] = colormap[source[2]];
dest[3] = colormap[source[3]];
dest[pitch] = colormap[source[4]];
dest[pitch+1] = colormap[source[5]];
dest[pitch+2] = colormap[source[6]];
dest[pitch+3] = colormap[source[7]];
source += 8;
dest += pitch*2;
} while (--count);
}
void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread)
{
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
const uint8_t *translation = _translation;
// Things we do to hit the compiler's optimizer with a clue bat:
// 1. Parallelism is explicitly spelled out by using a separate
// C instruction for each assembly instruction. GCC lets me
// have four temporaries, but VC++ spills to the stack with
// more than two. Two is probably optimal, anyway.
// 2. The results of the translation lookups are explicitly
// stored in byte-sized variables. This causes the VC++ code
// to use byte mov instructions in most cases; for apparently
// random reasons, it will use movzx for some places. GCC
// ignores this and uses movzx always.
// Do 8 rows at a time.
for (int count8 = count >> 3; count8; --count8)
{
int c0, c1;
uint8_t b0, b1;
c0 = source[0]; c1 = source[4];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[4] = b1;
c0 = source[8]; c1 = source[12];
b0 = translation[c0]; b1 = translation[c1];
source[8] = b0; source[12] = b1;
c0 = source[16]; c1 = source[20];
b0 = translation[c0]; b1 = translation[c1];
source[16] = b0; source[20] = b1;
c0 = source[24]; c1 = source[28];
b0 = translation[c0]; b1 = translation[c1];
source[24] = b0; source[28] = b1;
source += 32;
}
// Finish by doing 1 row at a time.
for (count &= 7; count; --count, source += 4)
{
source[0] = translation[source[0]];
}
}
void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread)
{
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
const uint8_t *translation = _translation;
int c0, c1;
uint8_t b0, b1;
// Do 2 rows at a time.
for (int count8 = count >> 1; count8; --count8)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
c0 = source[4]; c1 = source[5];
b0 = translation[c0]; b1 = translation[c1];
source[4] = b0; source[5] = b1;
c0 = source[6]; c1 = source[7];
b0 = translation[c0]; b1 = translation[c1];
source[6] = b0; source[7] = b1;
source += 8;
}
// Do the final row if count was odd.
if (count & 1)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
}
}
void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
do {
uint32_t fg = colormap[*source];
uint32_t bg = *dest;
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
*dest = RGB32k.All[fg & (fg>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
do {
uint32_t fg = colormap[source[0]];
uint32_t bg = dest[0];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[0] = RGB32k.All[fg & (fg>>15)];
fg = colormap[source[1]];
bg = dest[1];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[1] = RGB32k.All[fg & (fg>>15)];
fg = colormap[source[2]];
bg = dest[2];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[2] = RGB32k.All[fg & (fg>>15)];
fg = colormap[source[3]];
bg = dest[3];
fg = fg2rgb[fg];
bg = bg2rgb[bg];
fg = (fg+bg) | 0x1f07c1f;
dest[3] = RGB32k.All[fg & (fg>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread)
{
uint32_t *fgstart;
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
fgstart = &Col2RGB8[0][_color];
colormap = _colormap;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch * thread->num_cores;
do {
uint32_t val = colormap[*source];
uint32_t fg = fgstart[val<<8];
val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f;
*dest = RGB32k.All[val & (val>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread)
{
uint32_t *fgstart;
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
fgstart = &Col2RGB8[0][_color];
colormap = _colormap;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch * thread->num_cores;
do {
uint32_t val;
val = colormap[source[0]];
val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f;
dest[0] = RGB32k.All[val & (val>>15)];
val = colormap[source[1]];
val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f;
dest[1] = RGB32k.All[val & (val>>15)];
val = colormap[source[2]];
val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f;
dest[2] = RGB32k.All[val & (val>>15)];
val = colormap[source[3]];
val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f;
dest[3] = RGB32k.All[val & (val>>15)];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
do {
uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest];
uint32_t b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
*dest = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
do {
uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]];
uint32_t b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[0] = RGB32k.All[(a>>15) & a];
a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]];
b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[1] = RGB32k.All[(a>>15) & a];
a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]];
b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[2] = RGB32k.All[(a>>15) & a];
a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]];
b = a;
a |= 0x01f07c1f;
b &= 0x40100400;
a &= 0x3fffffff;
b = b - (b >> 5);
a |= b;
dest[3] = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
do {
uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
*dest = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
do {
uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[0] = RGB32k.All[(a>>15) & a];
a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[1] = RGB32k.All[(a>>15) & a];
a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[2] = RGB32k.All[(a>>15) & a];
a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[3] = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
do {
uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
*dest = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread)
{
const uint8_t *colormap;
uint8_t *source;
uint8_t *dest;
int pitch;
int count = yh - yl + 1;
count = thread->count_for_thread(yl, count);
if (count <= 0)
return;
const uint32_t *fg2rgb = _srcblend;
const uint32_t *bg2rgb = _destblend;
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
pitch = _pitch * thread->num_cores;
colormap = _colormap;
do {
uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[0] = RGB32k.All[(a>>15) & a];
a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[1] = RGB32k.All[(a>>15) & a];
a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[2] = RGB32k.All[(a>>15) & a];
a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]];
b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[3] = RGB32k.All[(a>>15) & a];
source += 4;
dest += pitch;
} while (--count);
}
}

View file

@ -58,6 +58,38 @@
#include "v_font.h"
#include "r_data/colormaps.h"
#include "p_maputl.h"
#include "r_thread.h"
CVAR (String, r_viewsize, "", CVAR_NOSET)
CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE)
CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
{
if (self != 0 && self != 1)
{
self = 1;
}
else
{ // Trigger the change
setsizeneeded = true;
}
}
CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE)
CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE)
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor)
extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
extern cycle_t FrameCycles;
extern bool r_showviewer;
cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
namespace swrenderer
{
using namespace drawerargs;
// MACROS ------------------------------------------------------------------
@ -86,9 +118,8 @@ static void R_ShutdownRenderer();
extern short *openings;
extern bool r_fakingunderwater;
extern "C" int fuzzviewheight;
extern int fuzzviewheight;
extern subsector_t *InSubsector;
extern bool r_showviewer;
// PRIVATE DATA DECLARATIONS -----------------------------------------------
@ -100,9 +131,6 @@ bool r_dontmaplines;
// PUBLIC DATA DEFINITIONS -------------------------------------------------
CVAR (String, r_viewsize, "", CVAR_NOSET)
CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE)
double r_BaseVisibility;
double r_WallVisibility;
double r_FloorVisibility;
@ -157,8 +185,6 @@ void (*hcolfunc_post1) (int hx, int sx, int yl, int yh);
void (*hcolfunc_post2) (int hx, int sx, int yl, int yh);
void (*hcolfunc_post4) (int sx, int yl, int yh);
cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
// PRIVATE DATA DEFINITIONS ------------------------------------------------
static int lastcenteryfrac;
@ -361,26 +387,6 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight,
R_SetVisibility(R_GetVisibility());
}
//==========================================================================
//
// CVAR r_columnmethod
//
// Selects which version of the seg renderers to use.
//
//==========================================================================
CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
{
if (self != 0 && self != 1)
{
self = 1;
}
else
{ // Trigger the change
setsizeneeded = true;
}
}
//==========================================================================
//
// R_Init
@ -455,8 +461,6 @@ void R_CopyStackedViewParameters()
//
//==========================================================================
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor)
void R_SetupColormap(player_t *player)
{
realfixedcolormap = NULL;
@ -574,9 +578,6 @@ void R_SetupFreelook()
//
//==========================================================================
CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE)
CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE)
void R_HighlightPortal (PortalDrawseg* pds)
{
// [ZZ] NO OVERFLOW CHECKS HERE
@ -811,9 +812,6 @@ void R_SetupBuffer ()
{
dc_pitch = pitch;
R_InitFuzzTable (pitch);
#if defined(X86_ASM) || defined(X64_ASM)
ASM_PatchPitch ();
#endif
}
dc_destorg = lineptr;
for (int i = 0; i < RenderTarget->GetHeight(); i++)
@ -853,10 +851,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
// [RH] Show off segs if r_drawflat is 1
if (r_drawflat)
{
hcolfunc_pre = R_FillColumnHorizP;
hcolfunc_pre = R_FillColumnHoriz;
hcolfunc_post1 = rt_copy1col;
hcolfunc_post4 = rt_copy4cols;
colfunc = R_FillColumnP;
colfunc = R_FillColumn;
spanfunc = R_FillSpan;
}
else
@ -950,6 +948,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
{
const bool savedviewactive = viewactive;
R_BeginDrawerCommands();
viewwidth = width;
RenderTarget = canvas;
bRenderingToCanvas = true;
@ -961,6 +961,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
R_RenderActorView (actor, dontmaplines);
R_EndDrawerCommands();
RenderTarget = screen;
bRenderingToCanvas = false;
R_ExecuteSetViewSize ();
@ -991,8 +993,6 @@ void R_MultiresInit ()
// Displays statistics about rendering times
//
//==========================================================================
extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
extern cycle_t FrameCycles;
ADD_STAT (fps)
{
@ -1072,3 +1072,5 @@ CCMD (clearscancycles)
bestscancycles = HUGE_VAL;
}
#endif
}

View file

@ -28,23 +28,26 @@
#include "v_palette.h"
#include "r_data/colormaps.h"
extern double ViewCos;
extern double ViewSin;
extern int viewwindowx;
extern int viewwindowy;
typedef BYTE lighttable_t; // This could be wider for >8 bit display.
namespace swrenderer
{
//
// POV related.
//
extern bool bRenderingToCanvas;
extern double ViewCos;
extern double ViewSin;
extern fixed_t viewingrangerecip;
extern double FocalLengthX, FocalLengthY;
extern double InvZtoScale;
extern double WallTMapScale2;
extern int viewwindowx;
extern int viewwindowy;
extern double CenterX;
extern double CenterY;
@ -142,5 +145,6 @@ extern DAngle stacked_angle;
extern void R_CopyStackedViewParameters();
}
#endif // __R_MAIN_H__

View file

@ -63,10 +63,14 @@
#pragma warning(disable:4244)
#endif
CVAR(Bool, tilt, false, 0);
CVAR(Bool, r_skyboxes, true, 0)
EXTERN_CVAR(Int, r_skymode)
//EXTERN_CVAR (Int, tx)
//EXTERN_CVAR (Int, ty)
namespace swrenderer
{
using namespace drawerargs;
extern subsector_t *InSubsector;
@ -132,15 +136,12 @@ extern "C" {
// spanend holds the end of a plane span in each screen row
//
short spanend[MAXHEIGHT];
BYTE *tiltlighting[MAXWIDTH];
int planeshade;
FVector3 plane_sz, plane_su, plane_sv;
float planelightfloat;
bool plane_shade;
fixed_t pviewx, pviewy;
void R_DrawTiltedPlane_ASM (int y, int x1);
}
float yslope[MAXHEIGHT];
@ -148,13 +149,6 @@ static fixed_t xscale, yscale;
static double xstepscale, ystepscale;
static double basexfrac, baseyfrac;
#ifdef X86_ASM
extern "C" void R_SetSpanSource_ASM (const BYTE *flat);
extern "C" void R_SetSpanSize_ASM (int xbits, int ybits);
extern "C" void R_SetSpanColormap_ASM (BYTE *colormap);
extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat);
extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource;
#endif
void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked);
//==========================================================================
@ -249,11 +243,6 @@ void R_MapPlane (int y, int x1)
GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT);
}
#ifdef X86_ASM
if (ds_colormap != ds_curcolormap)
R_SetSpanColormap_ASM (ds_colormap);
#endif
ds_y = y;
ds_x1 = x1;
ds_x2 = x2;
@ -261,239 +250,15 @@ void R_MapPlane (int y, int x1)
spanfunc ();
}
//==========================================================================
//
// R_CalcTiltedLighting
//
// Calculates the lighting for one row of a tilted plane. If the definition
// of GETPALOOKUP changes, this needs to change, too.
//
//==========================================================================
extern "C" {
void R_CalcTiltedLighting (double lval, double lend, int width)
{
double lstep;
BYTE *lightfiller;
BYTE *basecolormapdata = basecolormap->Maps;
int i = 0;
if (width == 0 || lval == lend)
{ // Constant lighting
lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT);
}
else
{
lstep = (lend - lval) / width;
if (lval >= MAXLIGHTVIS)
{ // lval starts "too bright".
lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT);
for (; i <= width && lval >= MAXLIGHTVIS; ++i)
{
tiltlighting[i] = lightfiller;
lval += lstep;
}
}
if (lend >= MAXLIGHTVIS)
{ // lend ends "too bright".
lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT);
for (; width > i && lend >= MAXLIGHTVIS; --width)
{
tiltlighting[width] = lightfiller;
lend -= lstep;
}
}
if (width > 0)
{
lval = FIXED2DBL(planeshade) - lval;
lend = FIXED2DBL(planeshade) - lend;
lstep = (lend - lval) / width;
if (lstep < 0)
{ // Going from dark to light
if (lval < 1.)
{ // All bright
lightfiller = basecolormapdata;
}
else
{
if (lval >= NUMCOLORMAPS)
{ // Starts beyond the dark end
BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT);
while (lval >= NUMCOLORMAPS && i <= width)
{
tiltlighting[i++] = clight;
lval += lstep;
}
if (i > width)
return;
}
while (i <= width && lval >= 0)
{
tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT);
lval += lstep;
}
lightfiller = basecolormapdata;
}
}
else
{ // Going from light to dark
if (lval >= (NUMCOLORMAPS-1))
{ // All dark
lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT);
}
else
{
while (lval < 0 && i <= width)
{
tiltlighting[i++] = basecolormapdata;
lval += lstep;
}
if (i > width)
return;
while (i <= width && lval < (NUMCOLORMAPS-1))
{
tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT);
lval += lstep;
}
lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT);
}
}
}
}
for (; i <= width; i++)
{
tiltlighting[i] = lightfiller;
}
}
} // extern "C"
//==========================================================================
//
// R_MapTiltedPlane
//
//==========================================================================
void R_MapTiltedPlane(int y, int x1)
void R_MapTiltedPlane (int y, int x1)
{
int x2 = spanend[y];
int width = x2 - x1;
double iz, uz, vz;
BYTE *fb;
DWORD u, v;
int i;
iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx);
// Lighting is simple. It's just linear interpolation from start to end
if (plane_shade)
{
uz = (iz + plane_sz[0] * width) * planelightfloat;
vz = iz * planelightfloat;
R_CalcTiltedLighting(vz, uz, width);
}
uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx);
vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx);
fb = ylookup[y] + x1 + dc_destorg;
BYTE vshift = 32 - ds_ybits;
BYTE ushift = vshift - ds_xbits;
int umask = ((1 << ds_xbits) - 1) << ds_ybits;
#if 0 // The "perfect" reference version of this routine. Pretty slow.
// Use it only to see how things are supposed to look.
i = 0;
do
{
double z = 1.f/iz;
u = SQWORD(uz*z) + pviewx;
v = SQWORD(vz*z) + pviewy;
ds_colormap = tiltlighting[i];
fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]];
iz += plane_sz[0];
uz += plane_su[0];
vz += plane_sv[0];
} while (--width >= 0);
#else
//#define SPANSIZE 32
//#define INVSPAN 0.03125f
//#define SPANSIZE 8
//#define INVSPAN 0.125f
#define SPANSIZE 16
#define INVSPAN 0.0625f
double startz = 1.f/iz;
double startu = uz*startz;
double startv = vz*startz;
double izstep, uzstep, vzstep;
izstep = plane_sz[0] * SPANSIZE;
uzstep = plane_su[0] * SPANSIZE;
vzstep = plane_sv[0] * SPANSIZE;
x1 = 0;
width++;
while (width >= SPANSIZE)
{
iz += izstep;
uz += uzstep;
vz += vzstep;
double endz = 1.f/iz;
double endu = uz*endz;
double endv = vz*endz;
DWORD stepu = SQWORD((endu - startu) * INVSPAN);
DWORD stepv = SQWORD((endv - startv) * INVSPAN);
u = SQWORD(startu) + pviewx;
v = SQWORD(startv) + pviewy;
for (i = SPANSIZE-1; i >= 0; i--)
{
fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]);
x1++;
u += stepu;
v += stepv;
}
startu = endu;
startv = endv;
width -= SPANSIZE;
}
if (width > 0)
{
if (width == 1)
{
u = SQWORD(startu);
v = SQWORD(startv);
fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]);
}
else
{
double left = width;
iz += plane_sz[0] * left;
uz += plane_su[0] * left;
vz += plane_sv[0] * left;
double endz = 1.f/iz;
double endu = uz*endz;
double endv = vz*endz;
left = 1.f/left;
DWORD stepu = SQWORD((endu - startu) * left);
DWORD stepv = SQWORD((endv - startv) * left);
u = SQWORD(startu) + pviewx;
v = SQWORD(startv) + pviewy;
for (; width != 0; width--)
{
fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]);
x1++;
u += stepu;
v += stepv;
}
}
}
#endif
R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy);
}
//==========================================================================
@ -502,9 +267,9 @@ void R_MapTiltedPlane(int y, int x1)
//
//==========================================================================
void R_MapColoredPlane (int y, int x1)
void R_MapColoredPlane(int y, int x1)
{
memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1);
R_DrawColoredSpan(y, x1, spanend[y]);
}
//==========================================================================
@ -1179,9 +944,6 @@ static void R_DrawSkyStriped (visplane_t *pl)
//
//==========================================================================
CVAR (Bool, tilt, false, 0);
//CVAR (Int, pa, 0, 0)
int R_DrawPlanes ()
{
visplane_t *pl;
@ -1317,7 +1079,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske
// 9. Put the camera back where it was to begin with.
//
//==========================================================================
CVAR (Bool, r_skyboxes, true, 0)
static int numskyboxes;
void R_DrawPortals ()
@ -1665,13 +1426,6 @@ void R_DrawSkyPlane (visplane_t *pl)
void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked)
{
#ifdef X86_ASM
if (ds_source != ds_cursource)
{
R_SetSpanSource_ASM (ds_source);
}
#endif
if (alpha <= 0)
{
return;
@ -1896,14 +1650,6 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
else
ds_colormap = basecolormap->Maps, plane_shade = true;
if (!plane_shade)
{
for (int i = 0; i < viewwidth; ++i)
{
tiltlighting[i] = ds_colormap;
}
}
// Hack in support for 1 x Z and Z x 1 texture sizes
if (ds_ybits == 0)
{
@ -1913,13 +1659,8 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
{
plane_su[2] = plane_su[1] = plane_su[0] = 0;
}
#if defined(X86_ASM)
if (ds_source != ds_curtiltedsource)
R_SetTiltedSpanSource_ASM (ds_source);
R_MapVisPlane (pl, R_DrawTiltedPlane_ASM);
#else
R_MapVisPlane (pl, R_MapTiltedPlane);
#endif
}
//==========================================================================
@ -2023,3 +1764,5 @@ bool R_PlaneInitData ()
return true;
}
}

View file

@ -27,6 +27,9 @@
class ASkyViewpoint;
namespace swrenderer
{
//
// The infamous visplane
//
@ -113,4 +116,6 @@ bool R_PlaneInitData (void);
extern visplane_t* floorplane;
extern visplane_t* ceilingplane;
}
#endif // __R_PLANE_H__

View file

@ -57,10 +57,13 @@
CVAR(Bool, r_np2, true, 0)
CVAR(Bool, r_fogboundary, true, 0)
CVAR(Bool, r_drawmirrors, true, 0)
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor);
//CVAR (Int, ty, 8, 0)
//CVAR (Int, tx, 8, 0)
namespace swrenderer
{
using namespace drawerargs;
#define HEIGHTBITS 12
#define HEIGHTSHIFT (FRACBITS-HEIGHTBITS)
@ -141,16 +144,6 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t
static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat);
static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask);
//=============================================================================
//
// CVAR r_fogboundary
//
// If true, makes fog look more "real" by shading the walls separating two
// sectors with different fog.
//=============================================================================
CVAR(Bool, r_fogboundary, true, 0)
inline bool IsFogBoundary (sector_t *front, sector_t *back)
{
return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade &&
@ -158,14 +151,6 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back)
(front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum);
}
//=============================================================================
//
// CVAR r_drawmirrors
//
// Set to false to disable rendering of mirrors
//=============================================================================
CVAR(Bool, r_drawmirrors, true, 0)
//
// R_RenderMaskedSegRange
@ -2994,3 +2979,5 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper,
done:
WallC = savecoord;
}
}

View file

@ -23,6 +23,9 @@
#ifndef __R_SEGS_H__
#define __R_SEGS_H__
namespace swrenderer
{
struct drawseg_t;
void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2);
@ -70,4 +73,6 @@ extern int CurrentPortalUniq;
extern bool CurrentPortalInSkybox;
extern TArray<PortalDrawseg> WallPortals;
}
#endif

View file

@ -80,7 +80,7 @@ extern int numgamesubsectors;
extern AActor* camera; // [RH] camera instead of viewplayer
extern sector_t* viewsector; // [RH] keep track of sector viewing from
extern angle_t xtoviewangle[MAXWIDTH+1];
namespace swrenderer { extern angle_t xtoviewangle[MAXWIDTH+1]; }
extern DAngle FieldOfView;
int R_FindSkin (const char *name, int pclass); // [RH] Find a skin

View file

@ -42,13 +42,20 @@
#include "r_3dfloors.h"
#include "textures/textures.h"
#include "r_data/voxels.h"
#include "r_thread.h"
namespace swrenderer
{
void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio);
void R_SetupColormap(player_t *);
void R_SetupFreelook();
void R_InitRenderer();
}
using namespace swrenderer;
//==========================================================================
//
// DCanvas :: Init
@ -154,9 +161,11 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap<PClassActor*, bool> &act
void FSoftwareRenderer::RenderView(player_t *player)
{
R_BeginDrawerCommands();
R_RenderActorView (player->mo);
// [RH] Let cameras draw onto textures that were visible this frame.
FCanvasTextureInfo::UpdateAll ();
R_EndDrawerCommands();
}
//==========================================================================

View file

@ -64,6 +64,21 @@
#include "r_data/voxels.h"
#include "p_local.h"
#include "p_maputl.h"
#include "r_thread.h"
EXTERN_CVAR(Bool, st_scale)
EXTERN_CVAR(Bool, r_shadercolormaps)
EXTERN_CVAR(Int, r_drawfuzz)
EXTERN_CVAR(Bool, r_deathcamera);
EXTERN_CVAR(Bool, r_drawplayersprites)
EXTERN_CVAR(Bool, r_drawvoxels)
CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
//CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE)
namespace swrenderer
{
using namespace drawerargs;
// [RH] A c-buffer. Used for keeping track of offscreen voxel spans.
@ -95,12 +110,6 @@ extern float MaskedScaleY;
#define BASEXCENTER (160)
#define BASEYCENTER (100)
EXTERN_CVAR (Bool, st_scale)
EXTERN_CVAR(Bool, r_shadercolormaps)
EXTERN_CVAR(Int, r_drawfuzz)
EXTERN_CVAR(Bool, r_deathcamera);
CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
//
// Sprite rotation 0 is facing the viewer,
// rotation 1 is one angle turn CLOCKWISE around the axis.
@ -132,9 +141,6 @@ FTexture *WallSpriteTile;
short zeroarray[MAXWIDTH];
short screenheightarray[MAXWIDTH];
EXTERN_CVAR (Bool, r_drawplayersprites)
EXTERN_CVAR (Bool, r_drawvoxels)
//
// INITIALIZATION FUNCTIONS
//
@ -639,7 +645,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
{
return;
}
if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP)
if (colfunc == fuzzcolfunc || colfunc == R_FillColumn)
{
flags = DVF_OFFSCREEN | DVF_SPANSONLY;
}
@ -1758,8 +1764,6 @@ static int sd_comparex (const void *arg1, const void *arg2)
return (*(drawseg_t **)arg2)->x2 - (*(drawseg_t **)arg1)->x2;
}
CVAR (Bool, r_splitsprites, true, CVAR_ARCHIVE)
// Split up vissprites that intersect drawsegs
void R_SplitVisSprites ()
{
@ -2628,7 +2632,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis)
}
}
void R_DrawParticle (vissprite_t *vis)
void R_DrawParticle_C (vissprite_t *vis)
{
DWORD *bg2rgb;
int spacing;
@ -2642,6 +2646,8 @@ void R_DrawParticle (vissprite_t *vis)
R_DrawMaskedSegsBehindParticle (vis);
DrawerCommandQueue::WaitForWorkers();
// vis->renderflags holds translucency level (0-255)
{
fixed_t fglevel, bglevel;
@ -3237,3 +3243,5 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly)
OffscreenBufferWidth = width;
OffscreenBufferHeight = height;
}
}

View file

@ -25,6 +25,12 @@
#include "r_bsp.h"
struct particle_t;
struct FVoxel;
namespace swrenderer
{
// A vissprite_t is a thing
// that will be drawn during a refresh.
// I.e. a sprite object that is partly visible.
@ -95,9 +101,7 @@ struct vissprite_t
vissprite_t() {}
};
struct particle_t;
void R_DrawParticle (vissprite_t *);
void R_DrawParticle_C (vissprite_t *);
void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside);
extern int MaxVisSprites;
@ -146,5 +150,6 @@ void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle,
void R_ClipVisSprite (vissprite_t *vis, int xl, int xh);
}
#endif

297
src/r_thread.cpp Normal file
View file

@ -0,0 +1,297 @@
/*
** Renderer multithreading framework
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#include <stddef.h>
#include "templates.h"
#include "doomdef.h"
#include "i_system.h"
#include "w_wad.h"
#include "r_local.h"
#include "v_video.h"
#include "doomstat.h"
#include "st_stuff.h"
#include "g_game.h"
#include "g_level.h"
#include "r_thread.h"
CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
void R_BeginDrawerCommands()
{
DrawerCommandQueue::Begin();
}
void R_EndDrawerCommands()
{
DrawerCommandQueue::End();
}
/////////////////////////////////////////////////////////////////////////////
DrawerCommandQueue *DrawerCommandQueue::Instance()
{
static DrawerCommandQueue queue;
return &queue;
}
DrawerCommandQueue::DrawerCommandQueue()
{
}
DrawerCommandQueue::~DrawerCommandQueue()
{
StopThreads();
}
void* DrawerCommandQueue::AllocMemory(size_t size)
{
// Make sure allocations remain 16-byte aligned
size = (size + 15) / 16 * 16;
auto queue = Instance();
if (queue->memorypool_pos + size > memorypool_size)
return nullptr;
void *data = queue->memorypool + queue->memorypool_pos;
queue->memorypool_pos += size;
return data;
}
void DrawerCommandQueue::Begin()
{
auto queue = Instance();
queue->Finish();
queue->threaded_render++;
}
void DrawerCommandQueue::End()
{
auto queue = Instance();
queue->Finish();
if (queue->threaded_render > 0)
queue->threaded_render--;
}
void DrawerCommandQueue::WaitForWorkers()
{
Instance()->Finish();
}
void DrawerCommandQueue::Finish()
{
auto queue = Instance();
if (queue->commands.empty())
return;
// Give worker threads something to do:
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
queue->active_commands.swap(queue->commands);
queue->run_id++;
start_lock.unlock();
queue->StartThreads();
queue->start_condition.notify_all();
// Do one thread ourselves:
DrawerThread thread;
thread.core = 0;
thread.num_cores = (int)(queue->threads.size() + 1);
struct TryCatchData
{
DrawerCommandQueue *queue;
DrawerThread *thread;
size_t command_index;
} data;
data.queue = queue;
data.thread = &thread;
data.command_index = 0;
VectoredTryCatch(&data,
[](void *data)
{
TryCatchData *d = (TryCatchData*)data;
for (int pass = 0; pass < d->queue->num_passes; pass++)
{
d->thread->pass_start_y = pass * d->queue->rows_in_pass;
d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass;
if (pass + 1 == d->queue->num_passes)
d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT);
size_t size = d->queue->active_commands.size();
for (d->command_index = 0; d->command_index < size; d->command_index++)
{
auto &command = d->queue->active_commands[d->command_index];
command->Execute(d->thread);
}
}
},
[](void *data, const char *reason, bool fatal)
{
TryCatchData *d = (TryCatchData*)data;
ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal);
});
// Wait for everyone to finish:
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); });
if (!queue->thread_error.IsEmpty())
{
static bool first = true;
if (queue->thread_error_fatal)
I_FatalError("%s", queue->thread_error.GetChars());
else if (first)
Printf("%s\n", queue->thread_error.GetChars());
first = false;
}
// Clean up batch:
for (auto &command : queue->active_commands)
command->~DrawerCommand();
queue->active_commands.clear();
queue->memorypool_pos = 0;
queue->finished_threads = 0;
}
void DrawerCommandQueue::StartThreads()
{
if (!threads.empty())
return;
int num_threads = std::thread::hardware_concurrency();
if (num_threads == 0)
num_threads = 4;
threads.resize(num_threads - 1);
for (int i = 0; i < num_threads - 1; i++)
{
DrawerCommandQueue *queue = this;
DrawerThread *thread = &threads[i];
thread->core = i + 1;
thread->num_cores = num_threads;
thread->thread = std::thread([=]()
{
int run_id = 0;
while (true)
{
// Wait until we are signalled to run:
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; });
if (queue->shutdown_flag)
break;
run_id = queue->run_id;
start_lock.unlock();
// Do the work:
struct TryCatchData
{
DrawerCommandQueue *queue;
DrawerThread *thread;
size_t command_index;
} data;
data.queue = queue;
data.thread = thread;
data.command_index = 0;
VectoredTryCatch(&data,
[](void *data)
{
TryCatchData *d = (TryCatchData*)data;
for (int pass = 0; pass < d->queue->num_passes; pass++)
{
d->thread->pass_start_y = pass * d->queue->rows_in_pass;
d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass;
if (pass + 1 == d->queue->num_passes)
d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT);
size_t size = d->queue->active_commands.size();
for (d->command_index = 0; d->command_index < size; d->command_index++)
{
auto &command = d->queue->active_commands[d->command_index];
command->Execute(d->thread);
}
}
},
[](void *data, const char *reason, bool fatal)
{
TryCatchData *d = (TryCatchData*)data;
ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal);
});
// Notify main thread that we finished:
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
queue->finished_threads++;
end_lock.unlock();
queue->end_condition.notify_all();
}
});
}
}
void DrawerCommandQueue::StopThreads()
{
std::unique_lock<std::mutex> lock(start_mutex);
shutdown_flag = true;
lock.unlock();
start_condition.notify_all();
for (auto &thread : threads)
thread.thread.join();
threads.clear();
lock.lock();
shutdown_flag = false;
}
void DrawerCommandQueue::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal)
{
if (worker_thread)
{
std::unique_lock<std::mutex> end_lock(Instance()->end_mutex);
if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal))
{
Instance()->thread_error = reason + (FString)": " + command->DebugInfo();
Instance()->thread_error_fatal = fatal;
}
}
else
{
static bool first = true;
if (fatal)
I_FatalError("%s: %s", reason, command->DebugInfo().GetChars());
else if (first)
Printf("%s: %s\n", reason, command->DebugInfo().GetChars());
first = false;
}
}
void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal))
{
tryBlock(data);
}

235
src/r_thread.h Normal file
View file

@ -0,0 +1,235 @@
/*
** Renderer multithreading framework
** Copyright (c) 2016 Magnus Norddahl
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any damages
** arising from the use of this software.
**
** Permission is granted to anyone to use this software for any purpose,
** including commercial applications, and to alter it and redistribute it
** freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you must not
** claim that you wrote the original software. If you use this software
** in a product, an acknowledgment in the product documentation would be
** appreciated but is not required.
** 2. Altered source versions must be plainly marked as such, and must not be
** misrepresented as being the original software.
** 3. This notice may not be removed or altered from any source distribution.
**
*/
#pragma once
#include "r_draw.h"
#include <vector>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
// Use multiple threads when drawing
EXTERN_CVAR(Bool, r_multithreaded)
// Redirect drawer commands to worker threads
void R_BeginDrawerCommands();
// Wait until all drawers finished executing
void R_EndDrawerCommands();
// Worker data for each thread executing drawer commands
class DrawerThread
{
public:
std::thread thread;
// Thread line index of this thread
int core = 0;
// Number of active threads
int num_cores = 1;
// Range of rows processed this pass
int pass_start_y = 0;
int pass_end_y = MAXHEIGHT;
// Working buffer used by Rt drawers
uint8_t dc_temp_buff[MAXHEIGHT * 4];
uint8_t *dc_temp = nullptr;
// Working buffer used by Rt drawers, true color edition
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
uint32_t *dc_temp_rgba = nullptr;
// Working buffer used by the tilted (sloped) span drawer
const uint8_t *tiltlighting[MAXWIDTH];
// Checks if a line is rendered by this thread
bool line_skipped_by_thread(int line)
{
return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
}
// The number of lines to skip to reach the first line to be rendered by this thread
int skipped_by_thread(int first_line)
{
int pass_skip = MAX(pass_start_y - first_line, 0);
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
return pass_skip + core_skip;
}
// The number of lines to be rendered by this thread
int count_for_thread(int first_line, int count)
{
int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
count = MIN(count, lines_until_pass_end);
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
return MAX(c, 0);
}
// Calculate the dest address for the first line to be rendered by this thread
template<typename T>
T *dest_for_thread(int first_line, int pitch, T *dest)
{
return dest + skipped_by_thread(first_line) * pitch;
}
// The first line in the dc_temp buffer used this thread
int temp_line_for_thread(int first_line)
{
return (first_line + skipped_by_thread(first_line)) / num_cores;
}
};
// Task to be executed by each worker thread
class DrawerCommand
{
protected:
int _dest_y;
void DetectRangeError(uint32_t *&dest, int &dest_y, int &count)
{
#if defined(_MSC_VER) && defined(_DEBUG)
if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::drawerargs::dc_destheight)
__debugbreak(); // Buffer overrun detected!
#endif
if (dest_y < 0)
{
count += dest_y;
dest_y = 0;
dest = (uint32_t*)swrenderer::drawerargs::dc_destorg;
}
else if (dest_y >= swrenderer::drawerargs::dc_destheight)
{
dest_y = 0;
count = 0;
}
if (count < 0 || count > MAXHEIGHT) count = 0;
if (dest_y + count >= swrenderer::drawerargs::dc_destheight)
count = swrenderer::drawerargs::dc_destheight - dest_y;
}
public:
DrawerCommand()
{
_dest_y = static_cast<int>((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch));
}
virtual ~DrawerCommand() { }
virtual void Execute(DrawerThread *thread) = 0;
virtual FString DebugInfo() = 0;
};
void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal));
// Manages queueing up commands and executing them on worker threads
class DrawerCommandQueue
{
enum { memorypool_size = 16 * 1024 * 1024 };
char memorypool[memorypool_size];
size_t memorypool_pos = 0;
std::vector<DrawerCommand *> commands;
std::vector<DrawerThread> threads;
std::mutex start_mutex;
std::condition_variable start_condition;
std::vector<DrawerCommand *> active_commands;
bool shutdown_flag = false;
int run_id = 0;
std::mutex end_mutex;
std::condition_variable end_condition;
size_t finished_threads = 0;
FString thread_error;
bool thread_error_fatal = false;
int threaded_render = 0;
DrawerThread single_core_thread;
int num_passes = 1;
int rows_in_pass = MAXHEIGHT;
void StartThreads();
void StopThreads();
void Finish();
static DrawerCommandQueue *Instance();
static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal);
DrawerCommandQueue();
~DrawerCommandQueue();
public:
// Allocate memory valid for the duration of a command execution
static void* AllocMemory(size_t size);
// Queue command to be executed by drawer worker threads
template<typename T, typename... Types>
static void QueueCommand(Types &&... args)
{
auto queue = Instance();
if (queue->threaded_render == 0 || !r_multithreaded)
{
T command(std::forward<Types>(args)...);
VectoredTryCatch(&command,
[](void *data)
{
T *c = (T*)data;
c->Execute(&Instance()->single_core_thread);
},
[](void *data, const char *reason, bool fatal)
{
T *c = (T*)data;
ReportDrawerError(c, false, reason, fatal);
});
}
else
{
void *ptr = AllocMemory(sizeof(T));
if (!ptr) // Out of memory - render what we got
{
queue->Finish();
ptr = AllocMemory(sizeof(T));
if (!ptr)
return;
}
T *command = new (ptr)T(std::forward<Types>(args)...);
queue->commands.push_back(command);
}
}
// Redirects all drawing commands to worker threads until End is called
// Begin/End blocks can be nested.
static void Begin();
// End redirection and wait until all worker threads finished executing
static void End();
// Waits until all worker threads finished executing
static void WaitForWorkers();
};

View file

@ -132,6 +132,9 @@ void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, ..
void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
{
#ifndef NO_SWRENDER
using namespace swrenderer;
using namespace drawerargs;
FTexture::Span unmaskedSpan[2];
const FTexture::Span **spanptr, *spans;
static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH];
@ -1285,6 +1288,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
FDynamicColormap *colormap, int lightlevel, int bottomclip)
{
#ifndef NO_SWRENDER
using namespace swrenderer;
using namespace drawerargs;
// Use an equation similar to player sprites to determine shade
fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT;
float topy, boty, leftx, rightx;
@ -1352,7 +1358,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
// Setup constant texture mapping parameters.
R_SetupSpanBits(tex);
R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap);
R_SetSpanSource(tex->GetPixels());
R_SetSpanSource(tex);
if (ds_xbits != 0)
{
scalex = double(1u << (32 - ds_xbits)) / scalex;

View file

@ -106,20 +106,11 @@ CCMD (bumpgamma)
/* Palette management stuff */
/****************************/
extern "C" BYTE BestColor_MMX (DWORD rgb, const DWORD *pal);
int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num)
{
#ifdef X86_ASM
if (CPU.bMMX)
{
int pre = 256 - num - first;
return BestColor_MMX (((first+pre)<<24)|(r<<16)|(g<<8)|b, pal_in-pre) - pre;
}
#endif
const PalEntry *pal = (const PalEntry *)pal_in;
int bestcolor = first;
int bestdist = 257*257+257*257+257*257;
int bestdist = 257 * 257 + 257 * 257 + 257 * 257;
for (int color = first; color < num; color++)
{
@ -384,8 +375,8 @@ void InitPalette ()
R_InitColormaps ();
}
extern "C" void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
extern void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
{
@ -395,6 +386,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
{
memcpy (to, from, count * sizeof(DWORD));
}
return;
}
else if (a == 256)
{
@ -405,6 +397,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
{
to[i] = t;
}
return;
}
#if defined(_M_X64) || defined(_M_IX86) || defined(__i386__) || defined(__amd64__)
else if (CPU.bSSE2)
@ -423,7 +416,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
}
}
#endif
#ifdef X86_ASM
#if defined(_M_IX86) || defined(__i386__)
else if (CPU.bMMX)
{
if (count >= 4)

View file

@ -516,10 +516,6 @@ void V_RefreshViewBorder ();
void V_SetBorderNeedRefresh();
#if defined(X86_ASM) || defined(X64_ASM)
extern "C" void ASM_PatchPitch (void);
#endif
int CheckRatio (int width, int height, int *trueratio=NULL);
static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); }
inline bool IsRatioWidescreen(int ratio) { return (ratio & 3) != 0; }

View file

@ -1375,17 +1375,16 @@ void D3DFB::Draw3DPart(bool copy3d)
D3DCOLOR color0, color1;
if (Accel2D)
{
if (realfixedcolormap == NULL)
auto &map = swrenderer::realfixedcolormap;
if (map == NULL)
{
color0 = 0;
color1 = 0xFFFFFFF;
}
else
{
color0 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeStart[0]/2,
realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0);
color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2,
realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1);
color0 = D3DCOLOR_COLORVALUE(map->ColorizeStart[0] / 2, map->ColorizeStart[1] / 2, map->ColorizeStart[2] / 2, 0);
color1 = D3DCOLOR_COLORVALUE(map->ColorizeEnd[0] / 2, map->ColorizeEnd[1] / 2, map->ColorizeEnd[2] / 2, 1);
SetPixelShader(Shaders[SHADER_SpecialColormapPal]);
}
}

View file

@ -227,10 +227,9 @@ void DumpCPUInfo(const CPUInfo *cpu)
}
}
#if 0
// Compiler output for this function is crap compared to the assembly
// version, which is why it isn't used.
void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
#if !defined(__amd64__) && !defined(_M_X64)
void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
{
__m64 blendcolor;
__m64 blendalpha;
@ -272,9 +271,6 @@ void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g
}
#endif
#ifdef X86_ASM
extern "C" void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
#endif
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
{
@ -288,17 +284,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
unaligned = ((size_t)from | (size_t)to) & 0xF;
#ifdef X86_ASM
// For unaligned accesses, the assembly MMX version is slightly faster.
// Note that using unaligned SSE loads and stores is still faster than
// the compiler-generated MMX version.
if (unaligned)
{
DoBlending_MMX(from, to, count, r, g, b, a);
return;
}
#endif
#if defined(__amd64__) || defined(_M_X64)
long long color;
@ -326,7 +311,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
zero = _mm_setzero_si128();
#ifndef X86_ASM
if (unaligned)
{
for (count >>= 2; count > 0; --count)
@ -346,7 +330,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
}
}
else
#endif
{
for (count >>= 2; count > 0; --count)
{