mirror of
https://github.com/ZDoom/qzdoom.git
synced 2024-11-28 23:12:24 +00:00
Merge branch 'master' of https://github.com/rheit/zdoom
# Conflicts: # src/CMakeLists.txt
This commit is contained in:
commit
f547daccc8
38 changed files with 6032 additions and 8114 deletions
|
@ -15,12 +15,6 @@ include( CheckLibraryExists )
|
|||
include( FindPkgConfig )
|
||||
include( FindOpenGL )
|
||||
|
||||
if( NOT APPLE )
|
||||
option( NO_ASM "Disable assembly code" OFF )
|
||||
else()
|
||||
# At the moment asm code doesn't work with OS X, so disable by default
|
||||
option( NO_ASM "Disable assembly code" ON )
|
||||
endif()
|
||||
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||
option( NO_STRIP "Do not strip Release or MinSizeRel builds" )
|
||||
# At least some versions of Xcode fail if you strip with the linker
|
||||
|
@ -115,7 +109,6 @@ if( WIN32 )
|
|||
)
|
||||
set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc )
|
||||
set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib )
|
||||
set( NASM_NAMES nasmw nasm )
|
||||
|
||||
find_path( D3D_INCLUDE_DIR d3d9.h
|
||||
PATHS ENV DXSDK_DIR
|
||||
|
@ -240,7 +233,6 @@ else()
|
|||
endif()
|
||||
endif()
|
||||
endif()
|
||||
set( NASM_NAMES nasm )
|
||||
|
||||
if( NO_GTK )
|
||||
add_definitions( -DNO_GTK )
|
||||
|
@ -388,105 +380,6 @@ endif()
|
|||
|
||||
find_package( FluidSynth )
|
||||
|
||||
# Search for NASM
|
||||
|
||||
if( NOT NO_ASM )
|
||||
if( UNIX AND X64 )
|
||||
find_program( GAS_PATH as )
|
||||
|
||||
if( GAS_PATH )
|
||||
set( ASSEMBLER ${GAS_PATH} )
|
||||
else()
|
||||
message( STATUS "Could not find as. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
endif()
|
||||
else()
|
||||
find_program( NASM_PATH NAMES ${NASM_NAMES} )
|
||||
find_program( YASM_PATH yasm )
|
||||
|
||||
if( X64 )
|
||||
if( YASM_PATH )
|
||||
set( ASSEMBLER ${YASM_PATH} )
|
||||
else()
|
||||
message( STATUS "Could not find YASM. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
endif()
|
||||
else()
|
||||
if( NASM_PATH )
|
||||
set( ASSEMBLER ${NASM_PATH} )
|
||||
else()
|
||||
message( STATUS "Could not find NASM. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# I think the only reason there was a version requirement was because the
|
||||
# executable name for Windows changed from 0.x to 2.0, right? This is
|
||||
# how to do it in case I need to do something similar later.
|
||||
|
||||
# execute_process( COMMAND ${NASM_PATH} -v
|
||||
# OUTPUT_VARIABLE NASM_VER_STRING )
|
||||
# string( REGEX REPLACE ".*version ([0-9]+[.][0-9]+).*" "\\1" NASM_VER "${NASM_VER_STRING}" )
|
||||
# if( NOT NASM_VER LESS 2 )
|
||||
# message( SEND_ERROR "NASM version should be 2 or later. (Installed version is ${NASM_VER}.)" )
|
||||
# endif()
|
||||
endif()
|
||||
|
||||
if( NOT NO_ASM )
|
||||
# Valgrind support is meaningless without assembly code.
|
||||
if( VALGRIND )
|
||||
add_definitions( -DVALGRIND_AWARE=1 )
|
||||
# If you're Valgrinding, you probably want to keep symbols around.
|
||||
set( NO_STRIP ON )
|
||||
endif()
|
||||
|
||||
# Tell CMake how to assemble our files
|
||||
if( UNIX )
|
||||
set( ASM_OUTPUT_EXTENSION .o )
|
||||
if( X64 )
|
||||
set( ASM_FLAGS )
|
||||
set( ASM_SOURCE_EXTENSION .s )
|
||||
else()
|
||||
if( APPLE )
|
||||
set( ASM_FLAGS -fmacho -DM_TARGET_MACHO )
|
||||
else()
|
||||
set( ASM_FLAGS -felf -DM_TARGET_LINUX )
|
||||
endif()
|
||||
set( ASM_FLAGS "${ASM_FLAGS}" -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||
set( ASM_SOURCE_EXTENSION .asm )
|
||||
endif()
|
||||
else()
|
||||
set( ASM_OUTPUT_EXTENSION .obj )
|
||||
set( ASM_SOURCE_EXTENSION .asm )
|
||||
if( X64 )
|
||||
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
|
||||
else()
|
||||
set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||
endif()
|
||||
endif()
|
||||
if( WIN32 AND NOT X64 )
|
||||
set( FIXRTEXT fixrtext )
|
||||
else()
|
||||
set( FIXRTEXT "" )
|
||||
endif()
|
||||
message( STATUS "Selected assembler: ${ASSEMBLER}" )
|
||||
MACRO( ADD_ASM_FILE indir infile )
|
||||
set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" )
|
||||
if( WIN32 AND NOT X64 )
|
||||
set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" )
|
||||
else()
|
||||
set( FIXRTEXT_${infile} COMMAND "" )
|
||||
endif()
|
||||
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
|
||||
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
|
||||
${FIXRTEXT_${infile}}
|
||||
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
|
||||
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
|
||||
ENDMACRO()
|
||||
endif()
|
||||
|
||||
# Decide on SSE setup
|
||||
|
||||
set( SSE_MATTERS NO )
|
||||
|
@ -801,25 +694,6 @@ if( HAVE_MMX )
|
|||
PROPERTIES COMPILE_FLAGS "-mmmx" )
|
||||
endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||
endif( HAVE_MMX )
|
||||
|
||||
if( NOT ASM_SOURCES )
|
||||
set( ASM_SOURCES "" )
|
||||
endif()
|
||||
|
||||
if( NO_ASM )
|
||||
add_definitions( -DNOASM )
|
||||
else()
|
||||
if( X64 )
|
||||
ADD_ASM_FILE( asm_x86_64 tmap3 )
|
||||
else()
|
||||
ADD_ASM_FILE( asm_ia32 a )
|
||||
ADD_ASM_FILE( asm_ia32 misc )
|
||||
ADD_ASM_FILE( asm_ia32 tmap )
|
||||
ADD_ASM_FILE( asm_ia32 tmap2 )
|
||||
ADD_ASM_FILE( asm_ia32 tmap3 )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
|
||||
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
|
||||
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
|
||||
|
@ -927,16 +801,6 @@ set( NOT_COMPILED_SOURCE_FILES
|
|||
scripting/zscript/zcc-parse.lemon
|
||||
zcc-parse.c
|
||||
zcc-parse.h
|
||||
|
||||
# We could have the ASM macro add these files, but it wouldn't add all
|
||||
# platforms.
|
||||
asm_ia32/a.asm
|
||||
asm_ia32/misc.asm
|
||||
asm_ia32/tmap.asm
|
||||
asm_ia32/tmap2.asm
|
||||
asm_ia32/tmap3.asm
|
||||
asm_x86_64/tmap3.asm
|
||||
asm_x86_64/tmap3.s
|
||||
)
|
||||
|
||||
set( FASTMATH_PCH_SOURCES
|
||||
|
@ -944,7 +808,9 @@ set( FASTMATH_PCH_SOURCES
|
|||
r_3dfloors.cpp
|
||||
r_bsp.cpp
|
||||
r_draw.cpp
|
||||
r_drawt.cpp
|
||||
r_draw_pal.cpp
|
||||
r_drawt_pal.cpp
|
||||
r_thread.cpp
|
||||
r_main.cpp
|
||||
r_plane.cpp
|
||||
r_segs.cpp
|
||||
|
@ -1351,7 +1217,6 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE
|
|||
${HEADER_FILES}
|
||||
${NOT_COMPILED_SOURCE_FILES}
|
||||
__autostart.cpp
|
||||
${ASM_SOURCES}
|
||||
${SYSTEM_SOURCES}
|
||||
${X86_SOURCES}
|
||||
${FASTMATH_SOURCES}
|
||||
|
@ -1514,8 +1379,6 @@ install(TARGETS zdoom
|
|||
DESTINATION ${INSTALL_PATH}
|
||||
COMPONENT "Game executable")
|
||||
|
||||
source_group("Assembly Files\\ia32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_ia32/.+")
|
||||
source_group("Assembly Files\\x86_64" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_x86_64/.+")
|
||||
source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+")
|
||||
source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+")
|
||||
source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h)
|
||||
|
|
|
@ -1,812 +0,0 @@
|
|||
; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
|
||||
; Ken Silverman's official web site: "http://www.advsys.net/ken"
|
||||
; See the included license file "BUILDLIC.TXT" for license info.
|
||||
; This file has been modified from Ken Silverman's original release
|
||||
|
||||
%include "valgrind.inc"
|
||||
|
||||
SECTION .data
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
%define ylookup _ylookup
|
||||
%define vince _vince
|
||||
%define vplce _vplce
|
||||
%define palookupoffse _palookupoffse
|
||||
%define bufplce _bufplce
|
||||
%define dc_iscale _dc_iscale
|
||||
%define dc_colormap _dc_colormap
|
||||
%define dc_count _dc_count
|
||||
%define dc_dest _dc_dest
|
||||
%define dc_source _dc_source
|
||||
%define dc_texturefrac _dc_texturefrac
|
||||
|
||||
%define setupvlineasm _setupvlineasm
|
||||
%define prevlineasm1 _prevlineasm1
|
||||
%define vlineasm1 _vlineasm1
|
||||
%define vlineasm4 _vlineasm4
|
||||
|
||||
%define setupmvlineasm _setupmvlineasm
|
||||
%define mvlineasm1 _mvlineasm1
|
||||
%define mvlineasm4 _mvlineasm4
|
||||
|
||||
%define R_SetupDrawSlabA _R_SetupDrawSlabA
|
||||
%define R_DrawSlabA _R_DrawSlabA
|
||||
%endif
|
||||
|
||||
EXTERN ylookup ; near
|
||||
|
||||
EXTERN vplce ; near
|
||||
EXTERN vince ; near
|
||||
EXTERN palookupoffse ; near
|
||||
EXTERN bufplce ; near
|
||||
|
||||
EXTERN dc_iscale
|
||||
EXTERN dc_colormap
|
||||
EXTERN dc_count
|
||||
EXTERN dc_dest
|
||||
EXTERN dc_source
|
||||
EXTERN dc_texturefrac
|
||||
|
||||
SECTION .text
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setvlinebpl_
|
||||
setvlinebpl_:
|
||||
mov [fixchain1a+2], eax
|
||||
mov [fixchain1b+2], eax
|
||||
mov [fixchain2a+2], eax
|
||||
mov [fixchain1m+2], eax
|
||||
mov [fixchain2ma+2], eax
|
||||
mov [fixchain2mb+2], eax
|
||||
selfmod fixchain1a, fixchain2mb+6
|
||||
|
||||
setdrawslabbpl:
|
||||
mov dword [voxbpl1+2], eax
|
||||
mov dword [voxbpl2+2], eax
|
||||
mov dword [voxbpl3+2], eax
|
||||
mov dword [voxbpl4+2], eax
|
||||
mov dword [voxbpl5+2], eax
|
||||
mov dword [voxbpl6+2], eax
|
||||
mov dword [voxbpl7+2], eax
|
||||
mov dword [voxbpl8+2], eax
|
||||
selfmod voxbpl1, voxpl8+6
|
||||
ret
|
||||
|
||||
SECTION .data
|
||||
|
||||
lastslabcolormap:
|
||||
dd 4
|
||||
|
||||
SECTION .text
|
||||
|
||||
GLOBAL R_SetupDrawSlabA
|
||||
GLOBAL @R_SetupDrawSlabA@4
|
||||
R_SetupDrawSlabA:
|
||||
mov ecx, [esp+4]
|
||||
@R_SetupDrawSlabA@4:
|
||||
cmp [lastslabcolormap], ecx
|
||||
je .done
|
||||
mov [lastslabcolormap], ecx
|
||||
mov dword [voxpal1+2], ecx
|
||||
mov dword [voxpal2+2], ecx
|
||||
mov dword [voxpal3+2], ecx
|
||||
mov dword [voxpal4+2], ecx
|
||||
mov dword [voxpal5+2], ecx
|
||||
mov dword [voxpal6+2], ecx
|
||||
mov dword [voxpal7+2], ecx
|
||||
mov dword [voxpal8+2], ecx
|
||||
.done ret
|
||||
|
||||
|
||||
; pass it log2(texheight)
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setupvlineasm
|
||||
setupvlineasm:
|
||||
mov ecx, [esp+4]
|
||||
|
||||
;First 2 lines for VLINEASM1, rest for VLINEASM4
|
||||
mov byte [premach3a+2], cl
|
||||
mov byte [mach3a+2], cl
|
||||
|
||||
mov byte [machvsh1+2], cl ;32-shy
|
||||
mov byte [machvsh3+2], cl ;32-shy
|
||||
mov byte [machvsh5+2], cl ;32-shy
|
||||
mov byte [machvsh6+2], cl ;32-shy
|
||||
mov ch, cl
|
||||
sub ch, 16
|
||||
mov byte [machvsh8+2], ch ;16-shy
|
||||
neg cl
|
||||
mov byte [machvsh7+2], cl ;shy
|
||||
mov byte [machvsh9+2], cl ;shy
|
||||
mov byte [machvsh10+2], cl ;shy
|
||||
mov byte [machvsh11+2], cl ;shy
|
||||
mov byte [machvsh12+2], cl ;shy
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
mov dword [machvsh2+2], eax ;(1<<shy)-1
|
||||
mov dword [machvsh4+2], eax ;(1<<shy)-1
|
||||
selfmod premach3a, machvsh8+6
|
||||
ret
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
SECTION .text align=64
|
||||
%else
|
||||
SECTION .rtext progbits alloc exec write align=64
|
||||
%endif
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_a_start
|
||||
_rtext_a_start:
|
||||
%endif
|
||||
|
||||
;eax = xscale
|
||||
;ebx = palookupoffse
|
||||
;ecx = # pixels to draw-1
|
||||
;edx = texturefrac
|
||||
;esi = texturecolumn
|
||||
;edi = buffer pointer
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL prevlineasm1
|
||||
prevlineasm1:
|
||||
mov ecx, [dc_count]
|
||||
cmp ecx, 1
|
||||
ja vlineasm1
|
||||
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
add eax, edx
|
||||
mov ecx, [dc_source]
|
||||
premach3a: shr edx, 32
|
||||
push ebx
|
||||
push edi
|
||||
mov edi, [dc_colormap]
|
||||
xor ebx, ebx
|
||||
mov bl, byte [ecx+edx]
|
||||
mov ecx, [dc_dest]
|
||||
mov bl, byte [edi+ebx]
|
||||
pop edi
|
||||
mov byte [ecx], bl
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
GLOBAL vlineasm1
|
||||
ALIGN 16
|
||||
vlineasm1:
|
||||
push ebx
|
||||
push edi
|
||||
push esi
|
||||
push ebp
|
||||
mov ecx, [dc_count]
|
||||
mov ebp, [dc_colormap]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
mov esi, [dc_source]
|
||||
fixchain1a: sub edi, 320
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
beginvline:
|
||||
mov ebx, edx
|
||||
mach3a: shr ebx, 32
|
||||
fixchain1b: add edi, 320
|
||||
mov bl, byte [esi+ebx]
|
||||
add edx, eax
|
||||
dec ecx
|
||||
mov bl, byte [ebp+ebx]
|
||||
mov byte [edi], bl
|
||||
jnz short beginvline
|
||||
pop ebp
|
||||
pop esi
|
||||
pop edi
|
||||
pop ebx
|
||||
mov eax, edx
|
||||
ret
|
||||
|
||||
;eax: -------temp1-------
|
||||
;ebx: -------temp2-------
|
||||
;ecx: dat dat dat dat
|
||||
;edx: ylo2 ylo4
|
||||
;esi: yhi1 yhi2
|
||||
;edi: ---videoplc/cnt----
|
||||
;ebp: yhi3 yhi4
|
||||
;esp:
|
||||
ALIGN 16
|
||||
GLOBAL vlineasm4
|
||||
vlineasm4:
|
||||
mov ecx, [dc_count]
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
mov edi, [dc_dest]
|
||||
|
||||
mov eax, dword [ylookup+ecx*4-4]
|
||||
add eax, edi
|
||||
mov dword [machvline4end+2], eax
|
||||
sub edi, eax
|
||||
|
||||
mov eax, dword [bufplce+0]
|
||||
mov ebx, dword [bufplce+4]
|
||||
mov ecx, dword [bufplce+8]
|
||||
mov edx, dword [bufplce+12]
|
||||
mov dword [machvbuf1+2], ecx
|
||||
mov dword [machvbuf2+2], edx
|
||||
mov dword [machvbuf3+2], eax
|
||||
mov dword [machvbuf4+2], ebx
|
||||
|
||||
mov eax, dword [palookupoffse+0]
|
||||
mov ebx, dword [palookupoffse+4]
|
||||
mov ecx, dword [palookupoffse+8]
|
||||
mov edx, dword [palookupoffse+12]
|
||||
mov dword [machvpal1+2], ecx
|
||||
mov dword [machvpal2+2], edx
|
||||
mov dword [machvpal3+2], eax
|
||||
mov dword [machvpal4+2], ebx
|
||||
|
||||
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
||||
;edx: ³v3lo ³v1lo ³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
|
||||
;esi: ³v2hi v2lo ³ v3hi³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
|
||||
;ebp: ³v0hi v0lo ³ v1hi³
|
||||
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
|
||||
|
||||
mov ebp, dword [vince+0]
|
||||
mov ebx, dword [vince+4]
|
||||
mov esi, dword [vince+8]
|
||||
mov eax, dword [vince+12]
|
||||
and esi, 0fffffe00h
|
||||
and ebp, 0fffffe00h
|
||||
machvsh9: rol eax, 88h ;sh
|
||||
machvsh10: rol ebx, 88h ;sh
|
||||
mov edx, eax
|
||||
mov ecx, ebx
|
||||
shr ecx, 16
|
||||
and edx, 0ffff0000h
|
||||
add edx, ecx
|
||||
and eax, 000001ffh
|
||||
and ebx, 000001ffh
|
||||
add esi, eax
|
||||
add ebp, ebx
|
||||
;
|
||||
mov eax, edx
|
||||
and eax, 0ffff0000h
|
||||
mov dword [machvinc1+2], eax
|
||||
mov dword [machvinc2+2], esi
|
||||
mov byte [machvinc3+2], dl
|
||||
mov byte [machvinc4+2], dh
|
||||
mov dword [machvinc5+2], ebp
|
||||
|
||||
mov ebp, dword [vplce+0]
|
||||
mov ebx, dword [vplce+4]
|
||||
mov esi, dword [vplce+8]
|
||||
mov eax, dword [vplce+12]
|
||||
and esi, 0fffffe00h
|
||||
and ebp, 0fffffe00h
|
||||
machvsh11: rol eax, 88h ;sh
|
||||
machvsh12: rol ebx, 88h ;sh
|
||||
mov edx, eax
|
||||
mov ecx, ebx
|
||||
shr ecx, 16
|
||||
and edx, 0ffff0000h
|
||||
add edx, ecx
|
||||
and eax, 000001ffh
|
||||
and ebx, 000001ffh
|
||||
add esi, eax
|
||||
add ebp, ebx
|
||||
|
||||
mov ecx, esi
|
||||
selfmod beginvlineasm4, machvline4end+6
|
||||
jmp short beginvlineasm4
|
||||
ALIGN 16
|
||||
beginvlineasm4:
|
||||
machvsh1: shr ecx, 88h ;32-sh
|
||||
mov ebx, esi
|
||||
machvsh2: and ebx, 00000088h ;(1<<sh)-1
|
||||
machvinc1: add edx, 88880000h
|
||||
machvinc2: adc esi, 88888088h
|
||||
machvbuf1: mov cl, byte [ecx+88888888h]
|
||||
machvbuf2: mov bl, byte [ebx+88888888h]
|
||||
mov eax, ebp
|
||||
machvsh3: shr eax, 88h ;32-sh
|
||||
machvpal1: mov cl, byte [ecx+88888888h]
|
||||
machvpal2: mov ch, byte [ebx+88888888h]
|
||||
mov ebx, ebp
|
||||
shl ecx, 16
|
||||
machvsh4: and ebx, 00000088h ;(1<<sh)-1
|
||||
machvinc3: add dl, 88h
|
||||
machvbuf3: mov al, byte [eax+88888888h]
|
||||
machvinc4: adc dh, 88h
|
||||
machvbuf4: mov bl, byte [ebx+88888888h]
|
||||
machvinc5: adc ebp, 88888088h
|
||||
machvpal3: mov cl, byte [eax+88888888h]
|
||||
machvpal4: mov ch, byte [ebx+88888888h]
|
||||
machvline4end: mov dword [edi+88888888h], ecx
|
||||
fixchain2a: add edi, 88888888h
|
||||
mov ecx, esi
|
||||
jle short beginvlineasm4
|
||||
|
||||
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
||||
;edx: ³v3lo ³v1lo ³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
|
||||
;esi: ³v2hi v2lo ³ v3hi³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
|
||||
;ebp: ³v0hi v0lo ³ v1hi³
|
||||
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
|
||||
|
||||
mov dword [vplce+8], esi
|
||||
mov dword [vplce+0], ebp
|
||||
;vplc2 = (esi<<(32-sh))+(edx>>sh)
|
||||
;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh))
|
||||
machvsh5: shl esi, 88h ;32-sh
|
||||
mov eax, edx
|
||||
machvsh6: shl ebp, 88h ;32-sh
|
||||
and edx, 0000ffffh
|
||||
machvsh7: shr eax, 88h ;sh
|
||||
add esi, eax
|
||||
machvsh8: shl edx, 88h ;16-sh
|
||||
add ebp, edx
|
||||
mov dword [vplce+12], esi
|
||||
mov dword [vplce+4], ebp
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
;*************************************************************************
|
||||
;************************* Masked Vertical Lines *************************
|
||||
;*************************************************************************
|
||||
|
||||
; pass it log2(texheight)
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setupmvlineasm
|
||||
setupmvlineasm:
|
||||
mov ecx, dword [esp+4]
|
||||
mov byte [maskmach3a+2], cl
|
||||
mov byte [machmv13+2], cl
|
||||
|
||||
mov byte [machmv14+2], cl
|
||||
mov byte [machmv15+2], cl
|
||||
mov byte [machmv16+2], cl
|
||||
selfmod maskmach3a, machmv13+6
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL mvlineasm1 ;Masked vline
|
||||
mvlineasm1:
|
||||
push ebx
|
||||
push edi
|
||||
push esi
|
||||
push ebp
|
||||
mov ecx, [dc_count]
|
||||
mov ebp, [dc_colormap]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
mov esi, [dc_source]
|
||||
beginmvline:
|
||||
mov ebx, edx
|
||||
maskmach3a: shr ebx, 32
|
||||
movzx ebx, byte [esi+ebx]
|
||||
cmp ebx, 0
|
||||
je short skipmask1
|
||||
maskmach3c: mov bl, byte [ebp+ebx]
|
||||
mov [edi], bl
|
||||
skipmask1: add edx, eax
|
||||
fixchain1m: add edi, 320
|
||||
dec ecx
|
||||
jnz short beginmvline
|
||||
|
||||
pop ebp
|
||||
pop esi
|
||||
pop edi
|
||||
pop ebx
|
||||
mov eax, edx
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL mvlineasm4
|
||||
mvlineasm4:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
|
||||
mov ecx,[dc_count]
|
||||
mov edi,[dc_dest]
|
||||
|
||||
mov eax, [bufplce+0]
|
||||
mov ebx, [bufplce+4]
|
||||
mov [machmv1+3], eax
|
||||
mov [machmv4+3], ebx
|
||||
mov eax, [bufplce+8]
|
||||
mov ebx, [bufplce+12]
|
||||
mov [machmv7+3], eax
|
||||
mov [machmv10+3], ebx
|
||||
|
||||
mov eax, [palookupoffse]
|
||||
mov ebx, [palookupoffse+4]
|
||||
mov [machmv2+2], eax
|
||||
mov [machmv5+2], ebx
|
||||
mov eax, [palookupoffse+8]
|
||||
mov ebx, [palookupoffse+12]
|
||||
mov [machmv8+2], eax
|
||||
mov [machmv11+2], ebx
|
||||
|
||||
mov eax, [vince] ;vince
|
||||
mov ebx, [vince+4]
|
||||
xor bl, bl
|
||||
mov [machmv3+2], eax
|
||||
mov [machmv6+2], ebx
|
||||
mov eax, [vince+8]
|
||||
mov ebx, [vince+12]
|
||||
mov [machmv9+2], eax
|
||||
mov [machmv12+2], ebx
|
||||
|
||||
inc ecx
|
||||
push ecx
|
||||
mov ecx, [vplce+0]
|
||||
mov edx, [vplce+4]
|
||||
mov esi, [vplce+8]
|
||||
mov ebp, [vplce+12]
|
||||
fixchain2ma: sub edi, 320
|
||||
|
||||
selfmod beginmvlineasm4, machmv2+6
|
||||
jmp short beginmvlineasm4
|
||||
ALIGN 16
|
||||
beginmvlineasm4:
|
||||
dec dword [esp]
|
||||
jz near endmvlineasm4
|
||||
|
||||
mov eax, ebp
|
||||
mov ebx, esi
|
||||
machmv16: shr eax, 32
|
||||
machmv12: add ebp, 0x88888888 ;vince[3]
|
||||
machmv15: shr ebx, 32
|
||||
machmv9: add esi, 0x88888888 ;vince[2]
|
||||
machmv10: movzx eax, byte [eax+0x88888888];bufplce[3]
|
||||
machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
cmp ebx, 1
|
||||
adc dl, dl
|
||||
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
|
||||
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
|
||||
|
||||
mov eax, edx
|
||||
machmv6: add edx, 0x88888888 ;vince[1]
|
||||
machmv14: shr eax, 32
|
||||
shl ebx, 16
|
||||
machmv4: movzx eax, byte [eax+0x88888888];bufplce[1]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
|
||||
|
||||
mov eax, ecx
|
||||
machmv3: add ecx, 0x88888888 ;vince[0]
|
||||
machmv13: shr eax, 32
|
||||
machmv1: movzx eax, byte [eax+0x88888888];bufplce[0]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
|
||||
|
||||
xor eax, eax
|
||||
shl dl, 4
|
||||
fixchain2mb: add edi, 320
|
||||
mov al, dl
|
||||
add eax, mvcase15
|
||||
jmp eax ;16 byte cases
|
||||
|
||||
ALIGN 16
|
||||
endmvlineasm4:
|
||||
mov [vplce], ecx
|
||||
mov [vplce+4], edx
|
||||
mov [vplce+8], esi
|
||||
mov [vplce+12], ebp
|
||||
pop ecx
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7
|
||||
ALIGN 16
|
||||
mvcase15: mov [edi], ebx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase14: mov [edi+1], bh
|
||||
shr ebx, 16
|
||||
mov [edi+2], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase13: mov [edi], bl
|
||||
shr ebx, 16
|
||||
mov [edi+2], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase12: shr ebx, 16
|
||||
mov [edi+2], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase11: mov [edi], bx
|
||||
shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase10: mov [edi+1], bh
|
||||
shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase9: mov [edi], bl
|
||||
shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase8: shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase7: mov [edi], bx
|
||||
shr ebx, 16
|
||||
mov [edi+2], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase6: shr ebx, 8
|
||||
mov [edi+1], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase5: mov [edi], bl
|
||||
shr ebx, 16
|
||||
mov [edi+2], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase4: shr ebx, 16
|
||||
mov [edi+2], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase3: mov [edi], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase2: mov [edi+1], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase1: mov [edi], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase0: jmp beginmvlineasm4
|
||||
|
||||
align 16
|
||||
|
||||
|
||||
;*************************************************************************
|
||||
;***************************** Voxel Slabs *******************************
|
||||
;*************************************************************************
|
||||
|
||||
GLOBAL R_DrawSlabA
|
||||
R_DrawSlabA:
|
||||
push ebx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
|
||||
mov eax, [esp+5*4+0]
|
||||
mov ebx, [esp+5*4+4]
|
||||
mov ecx, [esp+5*4+8]
|
||||
mov edx, [esp+5*4+12]
|
||||
mov esi, [esp+5*4+16]
|
||||
mov edi, [esp+5*4+20]
|
||||
|
||||
cmp eax, 2
|
||||
je voxbegdraw2
|
||||
ja voxskip2
|
||||
xor eax, eax
|
||||
voxbegdraw1:
|
||||
mov ebp, ebx
|
||||
shr ebp, 16
|
||||
add ebx, edx
|
||||
dec ecx
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal1: mov al, byte [eax+88888888h]
|
||||
mov byte [edi], al
|
||||
voxbpl1: lea edi, [edi+88888888h]
|
||||
jnz voxbegdraw1
|
||||
jmp voxskipslab5
|
||||
|
||||
voxbegdraw2:
|
||||
mov ebp, ebx
|
||||
shr ebp, 16
|
||||
add ebx, edx
|
||||
xor eax, eax
|
||||
dec ecx
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal2: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
mov word [edi], ax
|
||||
voxbpl2: lea edi, [edi+88888888h]
|
||||
jnz voxbegdraw2
|
||||
jmp voxskipslab5
|
||||
|
||||
voxskip2:
|
||||
cmp eax, 4
|
||||
jne voxskip4
|
||||
xor eax, eax
|
||||
voxbegdraw4:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal3: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
mov dword [edi], eax
|
||||
voxbpl3: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegdraw4
|
||||
jmp voxskipslab5
|
||||
|
||||
voxskip4:
|
||||
add eax, edi
|
||||
|
||||
test edi, 1
|
||||
jz voxskipslab1
|
||||
cmp edi, eax
|
||||
je voxskipslab1
|
||||
|
||||
push eax
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab1:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal4: mov al, byte [eax+88888888h]
|
||||
mov byte [edi], al
|
||||
voxbpl4: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab1
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
pop eax
|
||||
inc edi
|
||||
|
||||
voxskipslab1:
|
||||
push eax
|
||||
test edi, 2
|
||||
jz voxskipslab2
|
||||
dec eax
|
||||
cmp edi, eax
|
||||
jge voxskipslab2
|
||||
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab2:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal5: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
mov word [edi], ax
|
||||
voxbpl5: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab2
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
add edi, 2
|
||||
|
||||
voxskipslab2:
|
||||
mov eax, [esp]
|
||||
|
||||
sub eax, 3
|
||||
cmp edi, eax
|
||||
jge voxskipslab3
|
||||
|
||||
voxprebegslab3:
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab3:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal6: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
mov dword [edi], eax
|
||||
voxbpl6: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab3
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
add edi, 4
|
||||
|
||||
mov eax, [esp]
|
||||
|
||||
sub eax, 3
|
||||
cmp edi, eax
|
||||
jl voxprebegslab3
|
||||
|
||||
voxskipslab3:
|
||||
mov eax, [esp]
|
||||
|
||||
dec eax
|
||||
cmp edi, eax
|
||||
jge voxskipslab4
|
||||
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab4:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal7: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
mov word [edi], ax
|
||||
voxbpl7: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab4
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
add edi, 2
|
||||
|
||||
voxskipslab4:
|
||||
pop eax
|
||||
|
||||
cmp edi, eax
|
||||
je voxskipslab5
|
||||
|
||||
voxbegslab5:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal8: mov al, byte [eax+88888888h]
|
||||
mov byte [edi], al
|
||||
voxbpl8: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab5
|
||||
|
||||
voxskipslab5:
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
align 16
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_a_end
|
||||
_rtext_a_end:
|
||||
%endif
|
|
@ -1,200 +0,0 @@
|
|||
;*
|
||||
;* misc.nas
|
||||
;* Miscellaneous assembly functions
|
||||
;*
|
||||
;*---------------------------------------------------------------------------
|
||||
;* Copyright 1998-2006 Randy Heit
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* 1. Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;* 2. Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in the
|
||||
;* documentation and/or other materials provided with the distribution.
|
||||
;* 3. The name of the author may not be used to endorse or promote products
|
||||
;* derived from this software without specific prior written permission.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;*---------------------------------------------------------------------------
|
||||
;*
|
||||
|
||||
BITS 32
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
|
||||
%define DoBlending_MMX _DoBlending_MMX
|
||||
%define BestColor_MMX _BestColor_MMX
|
||||
|
||||
%endif
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
|
||||
SEGMENT DATA
|
||||
%else
|
||||
SECTION .data
|
||||
%endif
|
||||
|
||||
Blending256:
|
||||
dd 0x01000100,0x00000100
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
|
||||
SEGMENT CODE
|
||||
%else
|
||||
SECTION .text
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------
|
||||
;
|
||||
; DoBlending_MMX
|
||||
;
|
||||
; MMX version of DoBlending
|
||||
;
|
||||
; (DWORD *from, DWORD *to, count, tor, tog, tob, toa)
|
||||
;-----------------------------------------------------------
|
||||
|
||||
GLOBAL DoBlending_MMX
|
||||
|
||||
DoBlending_MMX:
|
||||
pxor mm0,mm0 ; mm0 = 0
|
||||
mov eax,[esp+4*4]
|
||||
shl eax,16
|
||||
mov edx,[esp+4*5]
|
||||
shl edx,8
|
||||
or eax,[esp+4*6]
|
||||
or eax,edx
|
||||
mov ecx,[esp+4*3] ; ecx = count
|
||||
movd mm1,eax ; mm1 = 00000000 00RRGGBB
|
||||
mov eax,[esp+4*7]
|
||||
shl eax,16
|
||||
mov edx,[esp+4*7]
|
||||
shl edx,8
|
||||
or eax,[esp+4*7]
|
||||
or eax,edx
|
||||
mov edx,[esp+4*2] ; edx = dest
|
||||
movd mm6,eax ; mm6 = 00000000 00AAAAAA
|
||||
punpcklbw mm1,mm0 ; mm1 = 000000RR 00GG00BB
|
||||
movq mm7,[Blending256]
|
||||
punpcklbw mm6,mm0 ; mm6 = 000000AA 00AA00AA
|
||||
mov eax,[esp+4*1] ; eax = source
|
||||
pmullw mm1,mm6 ; mm1 = 000000RR 00GG00BB (multiplied by alpha)
|
||||
psubusw mm7,mm6 ; mm7 = 000000aa 00aa00aa (one minus alpha)
|
||||
nop ; Does this actually pair on a Pentium?
|
||||
|
||||
; Do four colors per iteration: Count must be a multiple of four.
|
||||
|
||||
.loop movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
|
||||
add eax,8
|
||||
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
|
||||
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
|
||||
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
|
||||
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
|
||||
add edx,8
|
||||
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
|
||||
sub ecx,2
|
||||
paddusw mm2,mm1
|
||||
psrlw mm2,8
|
||||
paddusw mm3,mm1
|
||||
psrlw mm3,8
|
||||
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
|
||||
movq [edx-8],mm2
|
||||
|
||||
movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
|
||||
add eax,8
|
||||
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
|
||||
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
|
||||
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
|
||||
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
|
||||
add edx,8
|
||||
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
|
||||
sub ecx,2
|
||||
paddusw mm2,mm1
|
||||
psrlw mm2,8
|
||||
paddusw mm3,mm1
|
||||
psrlw mm3,8
|
||||
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
|
||||
movq [edx-8],mm2
|
||||
|
||||
jnz .loop
|
||||
|
||||
emms
|
||||
ret
|
||||
|
||||
;-----------------------------------------------------------
|
||||
;
|
||||
; BestColor_MMX
|
||||
;
|
||||
; Picks the closest matching color from a palette
|
||||
;
|
||||
; Passed FFRRGGBB and palette array in same format
|
||||
; FF is the index of the first palette entry to consider
|
||||
;
|
||||
;-----------------------------------------------------------
|
||||
|
||||
GLOBAL BestColor_MMX
|
||||
GLOBAL @BestColor_MMX@8
|
||||
|
||||
BestColor_MMX:
|
||||
mov ecx,[esp+4]
|
||||
mov edx,[esp+8]
|
||||
@BestColor_MMX@8:
|
||||
pxor mm0,mm0
|
||||
movd mm1,ecx ; mm1 = color searching for
|
||||
mov eax,257*257+257*257+257*257 ;eax = bestdist
|
||||
push ebx
|
||||
punpcklbw mm1,mm0
|
||||
mov ebx,ecx ; ebx = best color
|
||||
shr ecx,24 ; ecx = count
|
||||
and ebx,0xffffff
|
||||
push esi
|
||||
push ebp
|
||||
|
||||
.loop movd mm2,[edx+ecx*4] ; mm2 = color considering now
|
||||
inc ecx
|
||||
punpcklbw mm2,mm0
|
||||
movq mm3,mm1
|
||||
psubsw mm3,mm2
|
||||
pmullw mm3,mm3 ; mm3 = color distance squared
|
||||
|
||||
movd ebp,mm3 ; add the three components
|
||||
psrlq mm3,32 ; into ebp to get the real
|
||||
mov esi,ebp ; (squared) distance
|
||||
shr esi,16
|
||||
and ebp,0xffff
|
||||
add ebp,esi
|
||||
movd esi,mm3
|
||||
add ebp,esi
|
||||
|
||||
jz .perf ; found a perfect match
|
||||
cmp eax,ebp
|
||||
jb .skip
|
||||
mov eax,ebp
|
||||
lea ebx,[ecx-1]
|
||||
.skip cmp ecx,256
|
||||
jne .loop
|
||||
mov eax,ebx
|
||||
pop ebp
|
||||
pop esi
|
||||
pop ebx
|
||||
emms
|
||||
ret
|
||||
|
||||
.perf lea eax,[ecx-1]
|
||||
pop ebp
|
||||
pop esi
|
||||
pop ebx
|
||||
emms
|
||||
ret
|
File diff suppressed because it is too large
Load diff
|
@ -1,643 +0,0 @@
|
|||
;*
|
||||
;* tmap2.nas
|
||||
;* The tilted plane inner loop.
|
||||
;*
|
||||
;*---------------------------------------------------------------------------
|
||||
;* Copyright 1998-2006 Randy Heit
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* 1. Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;* 2. Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in the
|
||||
;* documentation and/or other materials provided with the distribution.
|
||||
;* 3. The name of the author may not be used to endorse or promote products
|
||||
;* derived from this software without specific prior written permission.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;*---------------------------------------------------------------------------
|
||||
;*
|
||||
;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was
|
||||
;* actually slightly slower than the more straight-forward approach
|
||||
;* used here, probably because the trick requires too much setup time.
|
||||
;*
|
||||
|
||||
BITS 32
|
||||
|
||||
%include "valgrind.inc"
|
||||
|
||||
%define SPACEFILLER4 (0x44444444)
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
|
||||
%define plane_sz _plane_sz
|
||||
%define plane_su _plane_su
|
||||
%define plane_sv _plane_sv
|
||||
%define plane_shade _plane_shade
|
||||
%define planelightfloat _planelightfloat
|
||||
%define spanend _spanend
|
||||
%define ylookup _ylookup
|
||||
%define dc_destorg _dc_destorg
|
||||
%define ds_colormap _ds_colormap
|
||||
%define ds_source _ds_source
|
||||
%define centery _centery
|
||||
%define centerx _centerx
|
||||
%define ds_curtiltedsource _ds_curtiltedsource
|
||||
%define pviewx _pviewx
|
||||
%define pviewy _pviewy
|
||||
%define tiltlighting _tiltlighting
|
||||
|
||||
%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM
|
||||
%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM
|
||||
%define R_CalcTiltedLighting _R_CalcTiltedLighting
|
||||
|
||||
%endif
|
||||
|
||||
EXTERN plane_sz
|
||||
EXTERN plane_su
|
||||
EXTERN plane_sv
|
||||
EXTERN planelightfloat
|
||||
EXTERN spanend
|
||||
EXTERN ylookup
|
||||
EXTERN dc_destorg
|
||||
EXTERN ds_colormap
|
||||
EXTERN centery
|
||||
EXTERN centerx
|
||||
EXTERN ds_source
|
||||
EXTERN plane_shade
|
||||
EXTERN pviewx
|
||||
EXTERN pviewy
|
||||
EXTERN tiltlighting
|
||||
EXTERN R_CalcTiltedLighting
|
||||
|
||||
GLOBAL ds_curtiltedsource
|
||||
|
||||
%define sv_i plane_sv
|
||||
%define sv_j plane_sv+4
|
||||
%define sv_k plane_sv+8
|
||||
|
||||
%define su_i plane_su
|
||||
%define su_j plane_su+4
|
||||
%define su_k plane_su+8
|
||||
|
||||
%define sz_i plane_sz
|
||||
%define sz_j plane_sz+4
|
||||
%define sz_k plane_sz+8
|
||||
|
||||
%define SPANBITS 3
|
||||
|
||||
section .bss
|
||||
|
||||
start_u: resq 1
|
||||
start_v: resq 1
|
||||
step_u: resq 1
|
||||
step_v: resq 1
|
||||
|
||||
step_iz: resq 1
|
||||
step_uz: resq 1
|
||||
step_vz: resq 1
|
||||
|
||||
end_z: resd 1
|
||||
|
||||
section .data
|
||||
|
||||
ds_curtiltedsource: dd SPACEFILLER4
|
||||
|
||||
fp_1:
|
||||
spanrecips: dd 0x3f800000 ; 1/1
|
||||
dd 0x3f000000 ; 1/2
|
||||
dd 0x3eaaaaab ; 1/3
|
||||
dd 0x3e800000 ; 1/4
|
||||
dd 0x3e4ccccd ; 1/5
|
||||
dd 0x3e2aaaab ; 1/6
|
||||
dd 0x3e124925 ; 1/7
|
||||
fp_8recip: dd 0x3e000000 ; 1/8
|
||||
dd 0x3de38e39 ; 1/9
|
||||
dd 0x3dcccccd ; 1/10
|
||||
dd 0x3dba2e8c ; 1/11
|
||||
dd 0x3daaaaab ; 1/12
|
||||
dd 0x3d9d89d9 ; 1/13
|
||||
dd 0x3d924925 ; 1/14
|
||||
dd 0x3d888889 ; 1/15
|
||||
|
||||
fp_quickint: dd 0x3f800000 ; 1
|
||||
dd 0x40000000 ; 2
|
||||
dd 0x40400000 ; 3
|
||||
dd 0x40800000 ; 4
|
||||
dd 0x40a00000 ; 5
|
||||
dd 0x40c00000 ; 6
|
||||
dd 0x40e00000 ; 7
|
||||
fp_8: dd 0x41000000 ; 8
|
||||
|
||||
section .text
|
||||
|
||||
GLOBAL R_SetTiltedSpanSource_ASM
|
||||
GLOBAL @R_SetTiltedSpanSource_ASM@4
|
||||
|
||||
R_SetTiltedSpanSource_ASM:
|
||||
mov ecx,[esp+4]
|
||||
|
||||
@R_SetTiltedSpanSource_ASM@4:
|
||||
mov [fetch1+3],ecx
|
||||
mov [fetch2+3],ecx
|
||||
mov [fetch3+3],ecx
|
||||
mov [fetch4+3],ecx
|
||||
mov [fetch5+3],ecx
|
||||
mov [fetch6+3],ecx
|
||||
mov [fetch7+3],ecx
|
||||
mov [fetch8+3],ecx
|
||||
mov [fetch9+3],ecx
|
||||
mov [fetch10+3],ecx
|
||||
mov [ds_curtiltedsource],ecx
|
||||
selfmod rtext_start, rtext_end
|
||||
ret
|
||||
|
||||
GLOBAL SetTiltedSpanSize
|
||||
|
||||
SetTiltedSpanSize:
|
||||
push ecx
|
||||
mov cl,dl
|
||||
neg cl
|
||||
mov eax,1
|
||||
shl eax,cl
|
||||
mov cl,[esp]
|
||||
neg cl
|
||||
mov [x1+2],cl
|
||||
mov [x2+2],cl
|
||||
mov [x3+2],cl
|
||||
mov [x4+2],cl
|
||||
mov [x5+2],cl
|
||||
mov [x6+2],cl
|
||||
mov [x7+2],cl
|
||||
mov [x8+2],cl
|
||||
mov [x9+2],cl
|
||||
mov [x10+2],cl
|
||||
|
||||
sub cl,dl
|
||||
dec eax
|
||||
mov [y1+2],cl
|
||||
mov [y2+2],cl
|
||||
mov [y3+2],cl
|
||||
mov [y4+2],cl
|
||||
mov [y5+2],cl
|
||||
mov [y6+2],cl
|
||||
mov [y7+2],cl
|
||||
mov [y8+2],cl
|
||||
mov [y9+2],cl
|
||||
mov [y10+2],cl
|
||||
cmp eax,0 ; if x bits is 0, mask must be 0 too.
|
||||
jz .notted
|
||||
not eax
|
||||
.notted:
|
||||
pop ecx
|
||||
|
||||
mov [m1+2],eax
|
||||
mov [m2+2],eax
|
||||
mov [m3+2],eax
|
||||
mov [m4+2],eax
|
||||
mov [m5+2],eax
|
||||
mov [m6+2],eax
|
||||
mov [m7+2],eax
|
||||
mov [m8+2],eax
|
||||
mov [m9+2],eax
|
||||
mov [m10+2],eax
|
||||
|
||||
selfmod rtext_start, rtext_end
|
||||
|
||||
ret
|
||||
|
||||
%ifndef M_TARGET_MACHO
|
||||
SECTION .rtext progbits alloc exec write align=64
|
||||
%else
|
||||
SECTION .text align=64
|
||||
GLOBAL _rtext_tmap2_start
|
||||
_rtext_tmap2_start:
|
||||
%endif
|
||||
|
||||
rtext_start:
|
||||
|
||||
GLOBAL R_DrawTiltedPlane_ASM
|
||||
GLOBAL @R_DrawTiltedPlane_ASM@8
|
||||
|
||||
R_DrawTiltedPlane_ASM:
|
||||
mov ecx,[esp+4]
|
||||
mov edx,[esp+8]
|
||||
|
||||
; ecx = y
|
||||
; edx = x
|
||||
|
||||
@R_DrawTiltedPlane_ASM@8:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
|
||||
mov eax,[centery]
|
||||
movzx ebx,word [spanend+ecx*2]
|
||||
sub eax,ecx ; eax = centery-y
|
||||
sub ebx,edx ; ebx = span length - 1
|
||||
mov edi,[ylookup+ecx*4]
|
||||
push eax
|
||||
add edi,[dc_destorg]
|
||||
add edi,edx ; edi = frame buffer pointer
|
||||
sub edx,[centerx] ; edx = x-centerx
|
||||
push edx
|
||||
xor eax,eax
|
||||
|
||||
fild dword [esp+4] ; ymul
|
||||
fild dword [esp] ; xmul | ymul
|
||||
fld dword [sv_j] ; sv.j | xmul | ymul
|
||||
fmul st0,st2 ; sv.j*ymul | xmul | ymul
|
||||
fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul
|
||||
fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul
|
||||
fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul
|
||||
fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul
|
||||
faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
||||
fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
||||
fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul
|
||||
fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul
|
||||
fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z
|
||||
fadd dword [su_k] ; u/z | v/z | 1/z
|
||||
fxch st2 ; 1/z | v/z | u/z
|
||||
fxch st1 ; v/z | 1/z | u/z
|
||||
|
||||
; if lighting is on, fill out the light table
|
||||
mov al,[plane_shade]
|
||||
test al,al
|
||||
jz .litup
|
||||
|
||||
push ebx
|
||||
fild dword [esp] ; width | v/z | 1/z | u/z
|
||||
fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z
|
||||
fadd st0,st2 ; 1/endz | v/z | 1/z | u/z
|
||||
fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z
|
||||
fmul dword [planelightfloat]
|
||||
fxch st1
|
||||
fmul dword [planelightfloat]
|
||||
sub esp,16
|
||||
fstp qword [esp]
|
||||
fstp qword [esp+8]
|
||||
call R_CalcTiltedLighting
|
||||
add esp, 20
|
||||
xor eax, eax
|
||||
|
||||
.litup add esp, 8
|
||||
|
||||
; calculate initial z, u, and v values
|
||||
fld st1 ; 1/z | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | v/z | 1/z | u/z
|
||||
|
||||
fld st3 ; u/z | z | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u | z | v/z | 1/z | u/z
|
||||
fld st2 ; v/z | u | z | v/z | 1/z | u/z
|
||||
fmulp st2,st0 ; u | v | v/z | 1/z | u/z
|
||||
fld st0
|
||||
fistp qword [start_u]
|
||||
fld st1
|
||||
fistp qword [start_v]
|
||||
|
||||
cmp ebx,7 ; Do we have at least 8 pixels to plot?
|
||||
jl near ShortStrip
|
||||
|
||||
; yes, we do, so figure out tex coords at end of this span
|
||||
|
||||
; multiply i values by span length (8)
|
||||
fld dword [su_i] ; su.i
|
||||
fmul dword [fp_8] ; su.i*8
|
||||
fld dword [sv_i] ; sv.i | su.i*8
|
||||
fmul dword [fp_8] ; sv.i*8 | su.i*8
|
||||
fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8
|
||||
fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8
|
||||
fxch st2 ; su.i*8 | sv.i*8 | sz.i*8
|
||||
fstp qword [step_uz] ; sv.i*8 | sz.i*8
|
||||
fstp qword [step_vz] ; sz.i*8
|
||||
fst qword [step_iz] ; sz.i*8
|
||||
|
||||
; find tex coords at start of next span
|
||||
faddp st4
|
||||
fld qword [step_vz]
|
||||
faddp st3
|
||||
fld qword [step_uz]
|
||||
faddp st5
|
||||
|
||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
||||
fst dword [end_z]
|
||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
||||
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
||||
|
||||
; now subtract to get stepping values for this span
|
||||
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
||||
|
||||
FullSpan:
|
||||
xor eax,eax
|
||||
cmp ebx,15 ; is there another complete span after this one?
|
||||
jl NextIsShort
|
||||
|
||||
; there is a complete span after this one
|
||||
fld qword [step_iz]
|
||||
faddp st4,st0
|
||||
fld qword [step_vz]
|
||||
faddp st3,st0
|
||||
fld qword [step_uz]
|
||||
faddp st5,st0
|
||||
jmp StartDiv
|
||||
|
||||
NextIsShort:
|
||||
cmp ebx,8 ; if next span is no more than 1 pixel, then we already
|
||||
jle DrawFullSpan ; know everything we need to draw it
|
||||
|
||||
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint-8*4+ebx*4]
|
||||
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint-8*4+ebx*4]
|
||||
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint-8*4+ebx*4]
|
||||
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
||||
|
||||
StartDiv:
|
||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
||||
|
||||
DrawFullSpan:
|
||||
mov ecx,[start_v]
|
||||
mov edx,[start_u]
|
||||
|
||||
add ecx,[pviewy]
|
||||
add edx,[pviewx]
|
||||
|
||||
mov esi,edx
|
||||
mov ebp,ecx
|
||||
x1 shr ebp,26
|
||||
m1 and esi,0xfc000000
|
||||
y1 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch1 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+0],al
|
||||
|
||||
x2 shr ebp,26
|
||||
m2 and esi,0xfc000000
|
||||
y2 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch2 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-4]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+1],al
|
||||
|
||||
x3 shr ebp,26
|
||||
m3 and esi,0xfc000000
|
||||
y3 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch3 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-8]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+2],al
|
||||
|
||||
x4 shr ebp,26
|
||||
m4 and esi,0xfc000000
|
||||
y4 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch4 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-12]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+3],al
|
||||
|
||||
x5 shr ebp,26
|
||||
m5 and esi,0xfc000000
|
||||
y5 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch5 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-16]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+4],al
|
||||
|
||||
x6 shr ebp,26
|
||||
m6 and esi,0xfc000000
|
||||
y6 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch6 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-20]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+5],al
|
||||
|
||||
x7 shr ebp,26
|
||||
m7 and esi,0xfc000000
|
||||
y7 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch7 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-24]
|
||||
x8 shr ecx,26
|
||||
mov al,[ebp+eax]
|
||||
m8 and edx,0xfc000000
|
||||
mov [edi+6],al
|
||||
|
||||
y8 shr edx,20
|
||||
mov ebp,[tiltlighting+ebx*4-28]
|
||||
fetch8 mov al,[edx+ecx+SPACEFILLER4]
|
||||
mov al,[ebp+eax]
|
||||
mov [edi+7],al
|
||||
add edi,8
|
||||
|
||||
sub ebx,8
|
||||
jl near Done
|
||||
|
||||
fld st1
|
||||
fistp qword [start_u]
|
||||
fld st2
|
||||
fistp qword [start_v]
|
||||
|
||||
cmp ebx,7
|
||||
jl near EndIsShort
|
||||
|
||||
fst dword [end_z]
|
||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
||||
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
||||
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
||||
jmp FullSpan
|
||||
|
||||
OnlyOnePixelAtEnd:
|
||||
fld st0
|
||||
fistp qword [start_u]
|
||||
fld st1
|
||||
fistp qword [start_v]
|
||||
|
||||
OnlyOnePixel:
|
||||
mov edx,[start_v]
|
||||
mov ecx,[start_u]
|
||||
add edx,[pviewy]
|
||||
add ecx,[pviewx]
|
||||
x9 shr edx,26
|
||||
m9 and ecx,0xfc000000
|
||||
y9 shr ecx,20
|
||||
mov ebp,[tiltlighting]
|
||||
fetch9 mov al,[ecx+edx+SPACEFILLER4]
|
||||
mov al,[ebp+eax]
|
||||
mov [edi],al
|
||||
|
||||
Done:
|
||||
fcompp
|
||||
fcompp
|
||||
fstp st0
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
ShortStrip:
|
||||
cmp ebx,0
|
||||
jle near OnlyOnePixel
|
||||
|
||||
MoreThanOnePixel:
|
||||
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint+ebx*4]
|
||||
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint+ebx*4]
|
||||
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint+ebx*4]
|
||||
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
||||
jmp CalcPartialSteps
|
||||
|
||||
EndIsShort:
|
||||
cmp ebx,0
|
||||
je near OnlyOnePixelAtEnd
|
||||
|
||||
CalcPartialSteps:
|
||||
fst dword [end_z]
|
||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z
|
||||
fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z
|
||||
fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z
|
||||
fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z
|
||||
fxch st1 ; v'-v | ustep | v/z | 1/z | u/z
|
||||
fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z
|
||||
fxch st1 ; ustep | vstep | v/z | 1/z | u/z
|
||||
fistp qword [step_u] ; vstep | v/z | 1/z | u/z
|
||||
fistp qword [step_v] ; v/z | 1/z | u/z
|
||||
|
||||
mov ecx,[start_v]
|
||||
mov edx,[start_u]
|
||||
|
||||
add ecx,[pviewy]
|
||||
add edx,[pviewx]
|
||||
|
||||
mov esi,edx
|
||||
mov ebp,ecx
|
||||
endloop:
|
||||
x10 shr ebp,26
|
||||
m10 and esi,0xfc000000
|
||||
|
||||
y10 shr esi,20
|
||||
inc edi
|
||||
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
|
||||
fetch10 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4]
|
||||
|
||||
mov esi,edx
|
||||
dec ebx
|
||||
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
|
||||
mov [edi-1],al
|
||||
jge endloop
|
||||
|
||||
fcompp
|
||||
fstp st0
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
rtext_end:
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_tmap2_end
|
||||
_rtext_tmap2_end:
|
||||
%endif
|
|
@ -1,344 +0,0 @@
|
|||
%include "valgrind.inc"
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
|
||||
SEGMENT DATA
|
||||
%else
|
||||
SECTION .data
|
||||
%endif
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
%define ylookup _ylookup
|
||||
%define vplce _vplce
|
||||
%define vince _vince
|
||||
%define palookupoffse _palookupoffse
|
||||
%define bufplce _bufplce
|
||||
%define dc_iscale _dc_iscale
|
||||
%define dc_colormap _dc_colormap
|
||||
%define dc_count _dc_count
|
||||
%define dc_dest _dc_dest
|
||||
%define dc_source _dc_source
|
||||
%define dc_texturefrac _dc_texturefrac
|
||||
%define dc_pitch _dc_pitch
|
||||
|
||||
%define setupvlinetallasm _setupvlinetallasm
|
||||
%define vlinetallasm4 _vlinetallasm4
|
||||
%define vlinetallasmathlon4 _vlinetallasmathlon4
|
||||
%define vlinetallasm1 _vlinetallasm1
|
||||
%define prevlinetallasm1 _prevlinetallasm1
|
||||
%endif
|
||||
|
||||
EXTERN vplce
|
||||
EXTERN vince
|
||||
EXTERN palookupoffse
|
||||
EXTERN bufplce
|
||||
|
||||
EXTERN ylookup
|
||||
EXTERN dc_iscale
|
||||
EXTERN dc_colormap
|
||||
EXTERN dc_count
|
||||
EXTERN dc_dest
|
||||
EXTERN dc_source
|
||||
EXTERN dc_texturefrac
|
||||
EXTERN dc_pitch
|
||||
|
||||
GLOBAL vlt4pitch
|
||||
GLOBAL vlt1pitch
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
|
||||
SEGMENT CODE
|
||||
%else
|
||||
SECTION .text
|
||||
%endif
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setpitch3
|
||||
setpitch3:
|
||||
mov [vltpitch+2], eax
|
||||
mov [vltpitcha+2],eax
|
||||
mov [vlt1pitch1+2], eax
|
||||
mov [vlt1pitch2+2], eax
|
||||
selfmod vltpitch, vlt1pitch2+6
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
mov ecx, [esp+4]
|
||||
mov [shifter1+2], cl
|
||||
mov [shifter2+2], cl
|
||||
mov [shifter3+2], cl
|
||||
mov [shifter4+2], cl
|
||||
mov [shifter1a+2], cl
|
||||
mov [shifter2a+2], cl
|
||||
mov [shifter3a+2], cl
|
||||
mov [shifter4a+2], cl
|
||||
mov [preshift+2], cl
|
||||
mov [shift11+2], cl
|
||||
mov [shift12+2], cl
|
||||
selfmod shifter1, shift12+6
|
||||
ret
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
SECTION .text align=64
|
||||
GLOBAL _rtext_tmap3_start
|
||||
_rtext_tmap3_start:
|
||||
%else
|
||||
SECTION .rtext progbits alloc exec write align=64
|
||||
%endif
|
||||
|
||||
ALIGN 16
|
||||
|
||||
GLOBAL vlinetallasm4
|
||||
vlinetallasm4:
|
||||
push ebx
|
||||
mov eax, [bufplce+0]
|
||||
mov ebx, [bufplce+4]
|
||||
mov ecx, [bufplce+8]
|
||||
mov edx, [bufplce+12]
|
||||
mov [source1+3], eax
|
||||
mov [source2+3], ebx
|
||||
mov [source3+3], ecx
|
||||
mov [source4+3], edx
|
||||
mov eax, [palookupoffse+0]
|
||||
mov ebx, [palookupoffse+4]
|
||||
mov ecx, [palookupoffse+8]
|
||||
mov edx, [palookupoffse+12]
|
||||
mov [lookup1+2], eax
|
||||
mov [lookup2+2], ebx
|
||||
mov [lookup3+2], ecx
|
||||
mov [lookup4+2], edx
|
||||
mov eax, [vince+0]
|
||||
mov ebx, [vince+4]
|
||||
mov ecx, [vince+8]
|
||||
mov edx, [vince+12]
|
||||
mov [step1+2], eax
|
||||
mov [step2+2], ebx
|
||||
mov [step3+2], ecx
|
||||
mov [step4+1], edx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
mov ecx, [dc_count]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, dword [ylookup+ecx*4-4]
|
||||
add eax, edi
|
||||
sub edi, eax
|
||||
mov [write1+2],eax
|
||||
inc eax
|
||||
mov [write2+2],eax
|
||||
inc eax
|
||||
mov [write3+2],eax
|
||||
inc eax
|
||||
mov [write4+2],eax
|
||||
mov ebx, [vplce]
|
||||
mov ecx, [vplce+4]
|
||||
mov esi, [vplce+8]
|
||||
mov eax, [vplce+12]
|
||||
selfmod loopit, vltpitch
|
||||
jmp loopit
|
||||
|
||||
ALIGN 16
|
||||
loopit:
|
||||
mov edx, ebx
|
||||
shifter1: shr edx, 24
|
||||
source1: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup1: mov dl, [edx+0x88888888]
|
||||
write1: mov [edi+0x88888880], dl
|
||||
step1: add ebx, 0x88888888
|
||||
mov edx, ecx
|
||||
shifter2: shr edx, 24
|
||||
source2: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup2: mov dl, [edx+0x88888888]
|
||||
write2: mov [edi+0x88888881], dl
|
||||
step2: add ecx, 0x88888888
|
||||
mov edx, esi
|
||||
shifter3: shr edx, 24
|
||||
source3: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup3: mov dl, BYTE [edx+0x88888888]
|
||||
write3: mov [edi+0x88888882], dl
|
||||
step3: add esi, 0x88888888
|
||||
mov edx, eax
|
||||
shifter4: shr edx, 24
|
||||
source4: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup4: mov dl, [edx+0x88888888]
|
||||
write4: mov [edi+0x88888883], dl
|
||||
step4: add eax, 0x88888888
|
||||
vltpitch: add edi, 320
|
||||
jle near loopit
|
||||
|
||||
mov [vplce], ebx
|
||||
mov [vplce+4], ecx
|
||||
mov [vplce+8], esi
|
||||
mov [vplce+12], eax
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
|
||||
GLOBAL vlinetallasmathlon4
|
||||
vlinetallasmathlon4:
|
||||
push ebx
|
||||
mov eax, [bufplce+0]
|
||||
mov ebx, [bufplce+4]
|
||||
mov ecx, [bufplce+8]
|
||||
mov edx, [bufplce+12]
|
||||
mov [source1a+3], eax
|
||||
mov [source2a+3], ebx
|
||||
mov [source3a+3], ecx
|
||||
mov [source4a+3], edx
|
||||
mov eax, [palookupoffse+0]
|
||||
mov ebx, [palookupoffse+4]
|
||||
mov ecx, [palookupoffse+8]
|
||||
mov edx, [palookupoffse+12]
|
||||
mov [lookup1a+2], eax
|
||||
mov [lookup2a+2], ebx
|
||||
mov [lookup3a+2], ecx
|
||||
mov [lookup4a+2], edx
|
||||
mov eax, [vince+0]
|
||||
mov ebx, [vince+4]
|
||||
mov ecx, [vince+8]
|
||||
mov edx, [vince+12]
|
||||
mov [step1a+2], eax
|
||||
mov [step2a+2], ebx
|
||||
mov [step3a+2], ecx
|
||||
mov [step4a+1], edx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
mov ecx, [dc_count]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, dword [ylookup+ecx*4-4]
|
||||
add eax, edi
|
||||
sub edi, eax
|
||||
mov [write1a+2],eax
|
||||
inc eax
|
||||
mov [write2a+2],eax
|
||||
inc eax
|
||||
mov [write3a+2],eax
|
||||
inc eax
|
||||
mov [write4a+2],eax
|
||||
mov ebp, [vplce]
|
||||
mov ecx, [vplce+4]
|
||||
mov esi, [vplce+8]
|
||||
mov eax, [vplce+12]
|
||||
selfmod loopita, vltpitcha
|
||||
jmp loopita
|
||||
|
||||
; Unfortunately, this code has not been carefully analyzed to determine
|
||||
; how well it utilizes the processor's instruction units. Instead, I just
|
||||
; kept rearranging code, seeing what sped it up and what slowed it down
|
||||
; until I arrived at this. The is the fastest version I was able to
|
||||
; manage, but that does not mean it cannot be made faster with careful
|
||||
; instructing shuffling.
|
||||
|
||||
ALIGN 64
|
||||
|
||||
loopita: mov edx, ebp
|
||||
mov ebx, ecx
|
||||
shifter1a: shr edx, 24
|
||||
shifter2a: shr ebx, 24
|
||||
source1a: movzx edx, BYTE [edx+0x88888888]
|
||||
source2a: movzx ebx, BYTE [ebx+0x88888888]
|
||||
step1a: add ebp, 0x88888888
|
||||
step2a: add ecx, 0x88888888
|
||||
lookup1a: mov dl, [edx+0x88888888]
|
||||
lookup2a: mov dh, [ebx+0x88888888]
|
||||
mov ebx, esi
|
||||
write1a: mov [edi+0x88888880], dl
|
||||
write2a: mov [edi+0x88888881], dh
|
||||
shifter3a: shr ebx, 24
|
||||
mov edx, eax
|
||||
source3a: movzx ebx, BYTE [ebx+0x88888888]
|
||||
shifter4a: shr edx, 24
|
||||
step3a: add esi, 0x88888888
|
||||
source4a: movzx edx, BYTE [edx+0x88888888]
|
||||
step4a: add eax, 0x88888888
|
||||
lookup3a: mov bl, [ebx+0x88888888]
|
||||
lookup4a: mov dl, [edx+0x88888888]
|
||||
write3a: mov [edi+0x88888882], bl
|
||||
write4a: mov [edi+0x88888883], dl
|
||||
vltpitcha: add edi, 320
|
||||
jle near loopita
|
||||
|
||||
mov [vplce], ebp
|
||||
mov [vplce+4], ecx
|
||||
mov [vplce+8], esi
|
||||
mov [vplce+12], eax
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL prevlinetallasm1
|
||||
prevlinetallasm1:
|
||||
mov ecx, [dc_count]
|
||||
cmp ecx, 1
|
||||
ja vlinetallasm1
|
||||
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
add eax, edx
|
||||
mov ecx, [dc_source]
|
||||
preshift: shr edx, 16
|
||||
push ebx
|
||||
push edi
|
||||
mov edi, [dc_colormap]
|
||||
movzx ebx, byte [ecx+edx]
|
||||
mov ecx, [dc_dest]
|
||||
mov bl, byte [edi+ebx]
|
||||
pop edi
|
||||
mov byte [ecx], bl
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL vlinetallasm1
|
||||
vlinetallasm1:
|
||||
push ebp
|
||||
push ebx
|
||||
push edi
|
||||
push esi
|
||||
|
||||
mov ebp, [dc_count]
|
||||
mov ebx, [dc_texturefrac] ; ebx = frac
|
||||
mov edi, [dc_dest]
|
||||
mov ecx, ebx
|
||||
shift11: shr ecx, 16
|
||||
mov esi, [dc_source]
|
||||
mov edx, [dc_iscale]
|
||||
vlt1pitch1: sub edi, 0x88888888
|
||||
mov eax, [dc_colormap]
|
||||
|
||||
loop2:
|
||||
movzx ecx, BYTE [esi+ecx]
|
||||
add ebx, edx
|
||||
vlt1pitch2: add edi, 0x88888888
|
||||
mov cl,[eax+ecx]
|
||||
mov [edi],cl
|
||||
mov ecx,ebx
|
||||
shift12: shr ecx,16
|
||||
dec ebp
|
||||
jnz loop2
|
||||
|
||||
mov eax,ebx
|
||||
pop esi
|
||||
pop edi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_tmap3_end
|
||||
_rtext_tmap3_end:
|
||||
%endif
|
|
@ -1,150 +0,0 @@
|
|||
%ifnidn __OUTPUT_FORMAT__,win64
|
||||
%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
|
||||
%endif
|
||||
|
||||
BITS 64
|
||||
DEFAULT REL
|
||||
|
||||
EXTERN vplce
|
||||
EXTERN vince
|
||||
EXTERN palookupoffse
|
||||
EXTERN bufplce
|
||||
|
||||
EXTERN dc_count
|
||||
EXTERN dc_dest
|
||||
EXTERN dc_pitch
|
||||
|
||||
SECTION .text
|
||||
|
||||
GLOBAL ASM_PatchPitch
|
||||
ASM_PatchPitch:
|
||||
mov ecx, [dc_pitch]
|
||||
mov [pm+3], ecx
|
||||
mov [vltpitch+3], ecx
|
||||
ret
|
||||
align 16
|
||||
|
||||
GLOBAL setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
mov [shifter1+2], cl
|
||||
mov [shifter2+2], cl
|
||||
mov [shifter3+2], cl
|
||||
mov [shifter4+2], cl
|
||||
ret
|
||||
align 16
|
||||
|
||||
; Yasm can't do progbits alloc exec for win64?
|
||||
; Hmm, looks like it's automatic. No worries, then.
|
||||
SECTION .rtext write ;progbits alloc exec
|
||||
|
||||
GLOBAL vlinetallasm4
|
||||
PROC_FRAME vlinetallasm4
|
||||
rex_push_reg rbx
|
||||
push_reg rdi
|
||||
push_reg r15
|
||||
push_reg r14
|
||||
push_reg r13
|
||||
push_reg r12
|
||||
push_reg rbp
|
||||
push_reg rsi
|
||||
alloc_stack 8 ; Stack must be 16-byte aligned
|
||||
END_PROLOGUE
|
||||
; rax = bufplce base address
|
||||
; rbx =
|
||||
; rcx = offset from rdi/count (negative)
|
||||
; edx/rdx = scratch
|
||||
; rdi = bottom of columns to write to
|
||||
; r8d-r11d = column offsets
|
||||
; r12-r15 = palookupoffse[0] - palookupoffse[4]
|
||||
|
||||
mov ecx, [dc_count]
|
||||
mov rdi, [dc_dest]
|
||||
test ecx, ecx
|
||||
jle vltepilog ; count must be positive
|
||||
|
||||
mov rax, [bufplce]
|
||||
mov r8, [bufplce+8]
|
||||
sub r8, rax
|
||||
mov r9, [bufplce+16]
|
||||
sub r9, rax
|
||||
mov r10, [bufplce+24]
|
||||
sub r10, rax
|
||||
mov [source2+4], r8d
|
||||
mov [source3+4], r9d
|
||||
mov [source4+4], r10d
|
||||
|
||||
pm: imul rcx, 320
|
||||
|
||||
mov r12, [palookupoffse]
|
||||
mov r13, [palookupoffse+8]
|
||||
mov r14, [palookupoffse+16]
|
||||
mov r15, [palookupoffse+24]
|
||||
|
||||
mov r8d, [vince]
|
||||
mov r9d, [vince+4]
|
||||
mov r10d, [vince+8]
|
||||
mov r11d, [vince+12]
|
||||
mov [step1+3], r8d
|
||||
mov [step2+3], r9d
|
||||
mov [step3+3], r10d
|
||||
mov [step4+3], r11d
|
||||
|
||||
add rdi, rcx
|
||||
neg rcx
|
||||
|
||||
mov r8d, [vplce]
|
||||
mov r9d, [vplce+4]
|
||||
mov r10d, [vplce+8]
|
||||
mov r11d, [vplce+12]
|
||||
jmp loopit
|
||||
|
||||
ALIGN 16
|
||||
loopit:
|
||||
mov edx, r8d
|
||||
shifter1: shr edx, 24
|
||||
step1: add r8d, 0x88888888
|
||||
movzx edx, BYTE [rax+rdx]
|
||||
mov ebx, r9d
|
||||
mov dl, [r12+rdx]
|
||||
shifter2: shr ebx, 24
|
||||
step2: add r9d, 0x88888888
|
||||
source2: movzx ebx, BYTE [rax+rbx+0x88888888]
|
||||
mov ebp, r10d
|
||||
mov bl, [r13+rbx]
|
||||
shifter3: shr ebp, 24
|
||||
step3: add r10d, 0x88888888
|
||||
source3: movzx ebp, BYTE [rax+rbp+0x88888888]
|
||||
mov esi, r11d
|
||||
mov bpl, BYTE [r14+rbp]
|
||||
shifter4: shr esi, 24
|
||||
step4: add r11d, 0x88888888
|
||||
source4: movzx esi, BYTE [rax+rsi+0x88888888]
|
||||
mov [rdi+rcx], dl
|
||||
mov [rdi+rcx+1], bl
|
||||
mov sil, BYTE [r15+rsi]
|
||||
mov [rdi+rcx+2], bpl
|
||||
mov [rdi+rcx+3], sil
|
||||
|
||||
vltpitch: add rcx, 320
|
||||
jl loopit
|
||||
|
||||
mov [vplce], r8d
|
||||
mov [vplce+4], r9d
|
||||
mov [vplce+8], r10d
|
||||
mov [vplce+12], r11d
|
||||
|
||||
vltepilog:
|
||||
add rsp, 8
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop r12
|
||||
pop r13
|
||||
pop r14
|
||||
pop r15
|
||||
pop rdi
|
||||
pop rbx
|
||||
ret
|
||||
vlinetallasm4_end:
|
||||
ENDPROC_FRAME
|
||||
ALIGN 16
|
||||
|
|
@ -1,141 +0,0 @@
|
|||
#%include "valgrind.inc"
|
||||
|
||||
.section .text
|
||||
|
||||
.globl ASM_PatchPitch
|
||||
ASM_PatchPitch:
|
||||
movl dc_pitch(%rip), %ecx
|
||||
movl %ecx, pm+3(%rip)
|
||||
movl %ecx, vltpitch+3(%rip)
|
||||
# selfmod pm, vltpitch+6
|
||||
ret
|
||||
.align 16
|
||||
|
||||
.globl setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
movb %dil, shifter1+2(%rip)
|
||||
movb %dil, shifter2+2(%rip)
|
||||
movb %dil, shifter3+2(%rip)
|
||||
movb %dil, shifter4+2(%rip)
|
||||
# selfmod shifter1, shifter4+3
|
||||
ret
|
||||
.align 16
|
||||
|
||||
.section .rtext,"awx"
|
||||
|
||||
.globl vlinetallasm4
|
||||
.type vlinetallasm4,@function
|
||||
vlinetallasm4:
|
||||
.cfi_startproc
|
||||
push %rbx
|
||||
push %rdi
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
push %rbp
|
||||
push %rsi
|
||||
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
|
||||
.cfi_adjust_cfa_offset 8
|
||||
|
||||
# rax = bufplce base address
|
||||
# rbx =
|
||||
# rcx = offset from rdi/count (negative)
|
||||
# edx/rdx = scratch
|
||||
# rdi = bottom of columns to write to
|
||||
# r8d-r11d = column offsets
|
||||
# r12-r15 = palookupoffse[0] - palookupoffse[4]
|
||||
|
||||
movl dc_count(%rip), %ecx
|
||||
movq dc_dest(%rip), %rdi
|
||||
testl %ecx, %ecx
|
||||
jle vltepilog # count must be positive
|
||||
|
||||
movq bufplce(%rip), %rax
|
||||
movq bufplce+8(%rip), %r8
|
||||
subq %rax, %r8
|
||||
movq bufplce+16(%rip), %r9
|
||||
subq %rax, %r9
|
||||
movq bufplce+24(%rip), %r10
|
||||
subq %rax, %r10
|
||||
movl %r8d, source2+4(%rip)
|
||||
movl %r9d, source3+4(%rip)
|
||||
movl %r10d, source4+4(%rip)
|
||||
|
||||
pm: imulq $320, %rcx
|
||||
|
||||
movq palookupoffse(%rip), %r12
|
||||
movq palookupoffse+8(%rip), %r13
|
||||
movq palookupoffse+16(%rip), %r14
|
||||
movq palookupoffse+24(%rip), %r15
|
||||
|
||||
movl vince(%rip), %r8d
|
||||
movl vince+4(%rip), %r9d
|
||||
movl vince+8(%rip), %r10d
|
||||
movl vince+12(%rip), %r11d
|
||||
movl %r8d, step1+3(%rip)
|
||||
movl %r9d, step2+3(%rip)
|
||||
movl %r10d, step3+3(%rip)
|
||||
movl %r11d, step4+3(%rip)
|
||||
|
||||
addq %rcx, %rdi
|
||||
negq %rcx
|
||||
|
||||
movl vplce(%rip), %r8d
|
||||
movl vplce+4(%rip), %r9d
|
||||
movl vplce+8(%rip), %r10d
|
||||
movl vplce+12(%rip), %r11d
|
||||
# selfmod loopit, vltepilog
|
||||
jmp loopit
|
||||
|
||||
.align 16
|
||||
loopit:
|
||||
movl %r8d, %edx
|
||||
shifter1: shrl $24, %edx
|
||||
step1: addl $0x44444444, %r8d
|
||||
movzbl (%rax,%rdx), %edx
|
||||
movl %r9d, %ebx
|
||||
movb (%r12,%rdx), %dl
|
||||
shifter2: shrl $24, %ebx
|
||||
step2: addl $0x44444444, %r9d
|
||||
source2: movzbl 0x44444444(%rax,%rbx), %ebx
|
||||
movl %r10d, %ebp
|
||||
movb (%r13,%rbx), %bl
|
||||
shifter3: shr $24, %ebp
|
||||
step3: addl $0x44444444, %r10d
|
||||
source3: movzbl 0x44444444(%rax,%rbp), %ebp
|
||||
movl %r11d, %esi
|
||||
movb (%r14,%rbp), %bpl
|
||||
shifter4: shr $24, %esi
|
||||
step4: add $0x44444444, %r11d
|
||||
source4: movzbl 0x44444444(%rax,%rsi), %esi
|
||||
movb %dl, (%rdi,%rcx)
|
||||
movb %bl, 1(%rdi,%rcx)
|
||||
movb (%r15,%rsi), %sil
|
||||
movb %bpl, 2(%rdi,%rcx)
|
||||
movb %sil, 3(%rdi,%rcx)
|
||||
|
||||
vltpitch: addq $320, %rcx
|
||||
jl loopit
|
||||
|
||||
movl %r8d, vplce(%rip)
|
||||
movl %r9d, vplce+4(%rip)
|
||||
movl %r10d, vplce+8(%rip)
|
||||
movl %r11d, vplce+12(%rip)
|
||||
|
||||
vltepilog:
|
||||
addq $8, %rsp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
pop %rsi
|
||||
pop %rbp
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
pop %rdi
|
||||
pop %rbx
|
||||
ret
|
||||
.cfi_endproc
|
||||
.align 16
|
||||
|
||||
|
|
@ -1030,7 +1030,7 @@ void D_DoomLoop ()
|
|||
catch (CVMAbortException &error)
|
||||
{
|
||||
error.MaybePrintMessage();
|
||||
Printf("%s", error.stacktrace);
|
||||
Printf("%s", error.stacktrace.GetChars());
|
||||
D_ErrorCleanup();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,57 +48,6 @@
|
|||
class PClassActor;
|
||||
typedef TMap<int, PClassActor *> FClassMap;
|
||||
|
||||
// Since this file is included by everything, it seems an appropriate place
|
||||
// to check the NOASM/USEASM macros.
|
||||
|
||||
// There are three assembly-related macros:
|
||||
//
|
||||
// NOASM - Assembly code is disabled
|
||||
// X86_ASM - Using ia32 assembly code
|
||||
// X64_ASM - Using amd64 assembly code
|
||||
//
|
||||
// Note that these relate only to using the pure assembly code. Inline
|
||||
// assembly may still be used without respect to these macros, as
|
||||
// deemed appropriate.
|
||||
|
||||
#ifndef NOASM
|
||||
// Select the appropriate type of assembly code to use.
|
||||
|
||||
#if defined(_M_IX86) || defined(__i386__)
|
||||
|
||||
#define X86_ASM
|
||||
#ifdef X64_ASM
|
||||
#undef X64_ASM
|
||||
#endif
|
||||
|
||||
#elif defined(_M_X64) || defined(__amd64__)
|
||||
|
||||
#define X64_ASM
|
||||
#ifdef X86_ASM
|
||||
#undef X86_ASM
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define NOASM
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef NOASM
|
||||
// Ensure no assembly macros are defined if NOASM is defined.
|
||||
|
||||
#ifdef X86_ASM
|
||||
#undef X86_ASM
|
||||
#endif
|
||||
|
||||
#ifdef X64_ASM
|
||||
#undef X64_ASM
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define NOVTABLE __declspec(novtable)
|
||||
|
|
|
@ -15,6 +15,11 @@
|
|||
#include "c_cvars.h"
|
||||
#include "r_3dfloors.h"
|
||||
|
||||
CVAR(Int, r_3dfloors, true, 0);
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
// external variables
|
||||
int fake3D;
|
||||
F3DFloor *fakeFloor;
|
||||
|
@ -28,8 +33,6 @@ HeightLevel *height_cur = NULL;
|
|||
int CurrentMirror = 0;
|
||||
int CurrentSkybox = 0;
|
||||
|
||||
CVAR(Int, r_3dfloors, true, 0);
|
||||
|
||||
// private variables
|
||||
int height_max = -1;
|
||||
TArray<HeightStack> toplist;
|
||||
|
@ -160,3 +163,4 @@ void R_3D_LeaveSkybox()
|
|||
CurrentSkybox--;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -3,6 +3,11 @@
|
|||
|
||||
#include "p_3dfloors.h"
|
||||
|
||||
EXTERN_CVAR(Int, r_3dfloors);
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
// special types
|
||||
|
||||
struct HeightLevel
|
||||
|
@ -57,7 +62,6 @@ extern HeightLevel *height_top;
|
|||
extern HeightLevel *height_cur;
|
||||
extern int CurrentMirror;
|
||||
extern int CurrentSkybox;
|
||||
EXTERN_CVAR(Int, r_3dfloors);
|
||||
|
||||
// functions
|
||||
void R_3D_DeleteHeights();
|
||||
|
@ -67,4 +71,6 @@ void R_3D_ResetClip();
|
|||
void R_3D_EnterSkybox();
|
||||
void R_3D_LeaveSkybox();
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -58,6 +58,13 @@
|
|||
#include "po_man.h"
|
||||
#include "r_data/colormaps.h"
|
||||
|
||||
CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs?
|
||||
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor);
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
using namespace drawerargs;
|
||||
|
||||
seg_t* curline;
|
||||
side_t* sidedef;
|
||||
line_t* linedef;
|
||||
|
@ -104,8 +111,6 @@ TArray<PortalDrawseg> WallPortals(1000); // note: this array needs to go away as
|
|||
|
||||
subsector_t *InSubsector;
|
||||
|
||||
CVAR (Bool, r_drawflat, false, 0) // [RH] Don't texture segs?
|
||||
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor);
|
||||
|
||||
|
||||
void R_StoreWallRange (int start, int stop);
|
||||
|
@ -1396,3 +1401,5 @@ void R_RenderBSPNode (void *node)
|
|||
}
|
||||
R_Subsector ((subsector_t *)((BYTE *)node - 1));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,6 +27,11 @@
|
|||
#include <stddef.h>
|
||||
#include "r_defs.h"
|
||||
|
||||
EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs?
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
// The 3072 below is just an arbitrary value picked to avoid
|
||||
// drawing lines the player is too close to that would overflow
|
||||
// the texture calculations.
|
||||
|
@ -109,8 +114,6 @@ extern WORD MirrorFlags;
|
|||
|
||||
typedef void (*drawfunc_t) (int start, int stop);
|
||||
|
||||
EXTERN_CVAR (Bool, r_drawflat) // [RH] Don't texture segs?
|
||||
|
||||
// BSP?
|
||||
void R_ClearClipSegs (short left, short right);
|
||||
void R_ClearDrawSegs ();
|
||||
|
@ -119,5 +122,6 @@ void R_RenderBSPNode (void *node);
|
|||
// killough 4/13/98: fake floors/ceilings for deep water / fake ceilings:
|
||||
sector_t *R_FakeFlat(sector_t *, sector_t *, int *, int *, bool);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -59,7 +59,7 @@ enum
|
|||
SIL_BOTH
|
||||
};
|
||||
|
||||
extern size_t MaxDrawSegs;
|
||||
namespace swrenderer { extern size_t MaxDrawSegs; }
|
||||
struct FDisplacement;
|
||||
|
||||
//
|
||||
|
|
4145
src/r_draw.cpp
4145
src/r_draw.cpp
File diff suppressed because it is too large
Load diff
469
src/r_draw.h
469
src/r_draw.h
|
@ -1,287 +1,208 @@
|
|||
// Emacs style mode select -*- C++ -*-
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// $Id:$
|
||||
//
|
||||
// Copyright (C) 1993-1996 by id Software, Inc.
|
||||
//
|
||||
// This source is available for distribution and/or modification
|
||||
// only under the terms of the DOOM Source Code License as
|
||||
// published by id Software. All rights reserved.
|
||||
//
|
||||
// The source is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License
|
||||
// for more details.
|
||||
//
|
||||
// DESCRIPTION:
|
||||
// System specific interface stuff.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
|
||||
#ifndef __R_DRAW__
|
||||
#define __R_DRAW__
|
||||
#pragma once
|
||||
|
||||
#include "r_defs.h"
|
||||
|
||||
extern "C" int ylookup[MAXHEIGHT];
|
||||
EXTERN_CVAR(Bool, r_multithreaded);
|
||||
EXTERN_CVAR(Int, r_drawfuzz);
|
||||
EXTERN_CVAR(Bool, r_drawtrans);
|
||||
EXTERN_CVAR(Float, transsouls);
|
||||
EXTERN_CVAR(Int, r_columnmethod);
|
||||
|
||||
extern "C" int dc_pitch; // [RH] Distance between rows
|
||||
|
||||
extern "C" lighttable_t*dc_colormap;
|
||||
extern "C" int dc_x;
|
||||
extern "C" int dc_yl;
|
||||
extern "C" int dc_yh;
|
||||
extern "C" fixed_t dc_iscale;
|
||||
extern double dc_texturemid;
|
||||
extern "C" fixed_t dc_texturefrac;
|
||||
extern "C" int dc_color; // [RH] For flat colors (no texturing)
|
||||
extern "C" DWORD dc_srccolor;
|
||||
extern "C" DWORD *dc_srcblend;
|
||||
extern "C" DWORD *dc_destblend;
|
||||
|
||||
// first pixel in a column
|
||||
extern "C" const BYTE* dc_source;
|
||||
|
||||
extern "C" BYTE *dc_dest, *dc_destorg;
|
||||
extern "C" int dc_count;
|
||||
|
||||
extern "C" DWORD vplce[4];
|
||||
extern "C" DWORD vince[4];
|
||||
extern "C" BYTE* palookupoffse[4];
|
||||
extern "C" const BYTE* bufplce[4];
|
||||
extern "C" const BYTE* bufplce2[4];
|
||||
extern "C" uint32_t bufheight[4];
|
||||
|
||||
// [RH] Temporary buffer for column drawing
|
||||
extern "C" BYTE *dc_temp;
|
||||
extern "C" unsigned int dc_tspans[4][MAXHEIGHT];
|
||||
extern "C" unsigned int *dc_ctspan[4];
|
||||
extern "C" unsigned int horizspans[4];
|
||||
|
||||
|
||||
// [RH] Pointers to the different column and span drawers...
|
||||
|
||||
// The span blitting interface.
|
||||
// Hook in assembler or system specific BLT here.
|
||||
|
||||
extern DWORD (*dovline1) ();
|
||||
extern DWORD (*doprevline1) ();
|
||||
#ifdef X64_ASM
|
||||
#define dovline4 vlinetallasm4
|
||||
extern "C" void vlinetallasm4();
|
||||
#else
|
||||
extern void (*dovline4) ();
|
||||
#endif
|
||||
extern void setupvline (int);
|
||||
|
||||
extern DWORD (*domvline1) ();
|
||||
extern void (*domvline4) ();
|
||||
extern void setupmvline (int);
|
||||
|
||||
extern void setuptmvline (int);
|
||||
|
||||
// The Spectre/Invisibility effect.
|
||||
extern void R_DrawFuzzColumn(void);
|
||||
|
||||
// [RH] Draw shaded column
|
||||
extern void (*R_DrawShadedColumn)(void);
|
||||
|
||||
// Draw with color translation tables, for player sprite rendering,
|
||||
// Green/Red/Blue/Indigo shirts.
|
||||
extern void (*R_DrawTranslatedColumn)(void);
|
||||
|
||||
// Span drawing for rows, floor/ceiling. No Spectre effect needed.
|
||||
extern void (*R_DrawSpan)(void);
|
||||
void R_SetupSpanBits(FTexture *tex);
|
||||
void R_SetSpanColormap(BYTE *colormap);
|
||||
void R_SetSpanSource(const BYTE *pixels);
|
||||
|
||||
// Span drawing for masked textures.
|
||||
extern void (*R_DrawSpanMasked)(void);
|
||||
|
||||
// Span drawing for translucent textures.
|
||||
void R_DrawSpanTranslucent(void);
|
||||
|
||||
// Span drawing for masked, translucent textures.
|
||||
void R_DrawSpanMaskedTranslucent(void);
|
||||
|
||||
// Span drawing for translucent, additive textures.
|
||||
void R_DrawSpanAddClamp(void);
|
||||
|
||||
// Span drawing for masked, translucent, additive textures.
|
||||
void R_DrawSpanMaskedAddClamp(void);
|
||||
|
||||
// [RH] Span blit into an interleaved intermediate buffer
|
||||
extern void (*R_DrawColumnHoriz)(void);
|
||||
|
||||
// [RH] Initialize the above pointers
|
||||
void R_InitColumnDrawers ();
|
||||
|
||||
// [RH] Moves data from the temporary buffer to the screen.
|
||||
|
||||
void rt_copy1col(int hx, int sx, int yl, int yh);
|
||||
void rt_copy4cols(int sx, int yl, int yh);
|
||||
void rt_map4cols(int sx, int yl, int yh);
|
||||
|
||||
extern "C"
|
||||
namespace swrenderer
|
||||
{
|
||||
struct vissprite_t;
|
||||
|
||||
void rt_shaded1col (int hx, int sx, int yl, int yh);
|
||||
void rt_shaded4cols_c (int sx, int yl, int yh);
|
||||
void rt_shaded4cols_asm (int sx, int yl, int yh);
|
||||
extern double dc_texturemid;
|
||||
|
||||
void rt_map1col (int hx, int sx, int yl, int yh);
|
||||
void rt_add1col (int hx, int sx, int yl, int yh);
|
||||
void rt_addclamp1col (int hx, int sx, int yl, int yh);
|
||||
void rt_subclamp1col (int hx, int sx, int yl, int yh);
|
||||
void rt_revsubclamp1col (int hx, int sx, int yl, int yh);
|
||||
namespace drawerargs
|
||||
{
|
||||
extern int dc_pitch;
|
||||
extern lighttable_t *dc_colormap;
|
||||
extern int dc_x;
|
||||
extern int dc_yl;
|
||||
extern int dc_yh;
|
||||
extern fixed_t dc_iscale;
|
||||
extern fixed_t dc_texturefrac;
|
||||
extern uint32_t dc_textureheight;
|
||||
extern int dc_color;
|
||||
extern uint32_t dc_srccolor;
|
||||
extern uint32_t dc_srccolor_bgra;
|
||||
extern uint32_t *dc_srcblend;
|
||||
extern uint32_t *dc_destblend;
|
||||
extern fixed_t dc_srcalpha;
|
||||
extern fixed_t dc_destalpha;
|
||||
extern const uint8_t *dc_source;
|
||||
extern const uint8_t *dc_source2;
|
||||
extern uint32_t dc_texturefracx;
|
||||
extern uint8_t *dc_translation;
|
||||
extern uint8_t *dc_dest;
|
||||
extern uint8_t *dc_destorg;
|
||||
extern int dc_destheight;
|
||||
extern int dc_count;
|
||||
|
||||
void rt_tlate1col (int hx, int sx, int yl, int yh);
|
||||
void rt_tlateadd1col (int hx, int sx, int yl, int yh);
|
||||
void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh);
|
||||
void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh);
|
||||
void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh);
|
||||
extern uint32_t vplce[4];
|
||||
extern uint32_t vince[4];
|
||||
extern uint8_t *palookupoffse[4];
|
||||
extern fixed_t palookuplight[4];
|
||||
extern const uint8_t *bufplce[4];
|
||||
extern const uint8_t *bufplce2[4];
|
||||
extern uint32_t buftexturefracx[4];
|
||||
extern uint32_t bufheight[4];
|
||||
extern int vlinebits;
|
||||
extern int mvlinebits;
|
||||
extern int tmvlinebits;
|
||||
|
||||
void rt_add4cols_c (int sx, int yl, int yh);
|
||||
void rt_addclamp4cols_c (int sx, int yl, int yh);
|
||||
void rt_subclamp4cols (int sx, int yl, int yh);
|
||||
void rt_revsubclamp4cols (int sx, int yl, int yh);
|
||||
extern int ds_y;
|
||||
extern int ds_x1;
|
||||
extern int ds_x2;
|
||||
extern lighttable_t * ds_colormap;
|
||||
extern dsfixed_t ds_light;
|
||||
extern dsfixed_t ds_xfrac;
|
||||
extern dsfixed_t ds_yfrac;
|
||||
extern dsfixed_t ds_xstep;
|
||||
extern dsfixed_t ds_ystep;
|
||||
extern int ds_xbits;
|
||||
extern int ds_ybits;
|
||||
extern fixed_t ds_alpha;
|
||||
extern double ds_lod;
|
||||
extern const uint8_t *ds_source;
|
||||
extern int ds_color;
|
||||
|
||||
void rt_tlate4cols (int sx, int yl, int yh);
|
||||
void rt_tlateadd4cols (int sx, int yl, int yh);
|
||||
void rt_tlateaddclamp4cols (int sx, int yl, int yh);
|
||||
void rt_tlatesubclamp4cols (int sx, int yl, int yh);
|
||||
void rt_tlaterevsubclamp4cols (int sx, int yl, int yh);
|
||||
extern unsigned int dc_tspans[4][MAXHEIGHT];
|
||||
extern unsigned int *dc_ctspan[4];
|
||||
extern unsigned int *horizspan[4];
|
||||
}
|
||||
|
||||
void rt_add4cols_asm (int sx, int yl, int yh);
|
||||
void rt_addclamp4cols_asm (int sx, int yl, int yh);
|
||||
extern int ylookup[MAXHEIGHT];
|
||||
extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/];
|
||||
extern FDynamicColormap ShadeFakeColormap[16];
|
||||
extern uint8_t identitymap[256];
|
||||
extern FDynamicColormap identitycolormap;
|
||||
|
||||
// Spectre/Invisibility.
|
||||
#define FUZZTABLE 50
|
||||
extern int fuzzoffset[FUZZTABLE + 1];
|
||||
extern int fuzzpos;
|
||||
extern int fuzzviewheight;
|
||||
|
||||
void R_InitColumnDrawers();
|
||||
void R_InitShadeMaps();
|
||||
void R_InitFuzzTable(int fuzzoff);
|
||||
|
||||
enum ESPSResult
|
||||
{
|
||||
DontDraw, // not useful to draw this
|
||||
DoDraw0, // draw this as if r_columnmethod is 0
|
||||
DoDraw1, // draw this as if r_columnmethod is 1
|
||||
};
|
||||
|
||||
ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color);
|
||||
ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color);
|
||||
void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade
|
||||
bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)());
|
||||
|
||||
const uint8_t *R_GetColumn(FTexture *tex, int col);
|
||||
void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn);
|
||||
void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn);
|
||||
void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn);
|
||||
|
||||
void rt_initcols(uint8_t *buffer = nullptr);
|
||||
void rt_span_coverage(int x, int start, int stop);
|
||||
void rt_draw4cols(int sx);
|
||||
void rt_flip_posts();
|
||||
void rt_copy1col(int hx, int sx, int yl, int yh);
|
||||
void rt_copy4cols(int sx, int yl, int yh);
|
||||
void rt_shaded1col(int hx, int sx, int yl, int yh);
|
||||
void rt_shaded4cols(int sx, int yl, int yh);
|
||||
void rt_map1col(int hx, int sx, int yl, int yh);
|
||||
void rt_add1col(int hx, int sx, int yl, int yh);
|
||||
void rt_addclamp1col(int hx, int sx, int yl, int yh);
|
||||
void rt_subclamp1col(int hx, int sx, int yl, int yh);
|
||||
void rt_revsubclamp1col(int hx, int sx, int yl, int yh);
|
||||
void rt_tlate1col(int hx, int sx, int yl, int yh);
|
||||
void rt_tlateadd1col(int hx, int sx, int yl, int yh);
|
||||
void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh);
|
||||
void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh);
|
||||
void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh);
|
||||
void rt_map4cols(int sx, int yl, int yh);
|
||||
void rt_add4cols(int sx, int yl, int yh);
|
||||
void rt_addclamp4cols(int sx, int yl, int yh);
|
||||
void rt_subclamp4cols(int sx, int yl, int yh);
|
||||
void rt_revsubclamp4cols(int sx, int yl, int yh);
|
||||
void rt_tlate4cols(int sx, int yl, int yh);
|
||||
void rt_tlateadd4cols(int sx, int yl, int yh);
|
||||
void rt_tlateaddclamp4cols(int sx, int yl, int yh);
|
||||
void rt_tlatesubclamp4cols(int sx, int yl, int yh);
|
||||
void rt_tlaterevsubclamp4cols(int sx, int yl, int yh);
|
||||
void R_DrawColumnHoriz();
|
||||
void R_DrawColumn();
|
||||
void R_DrawFuzzColumn();
|
||||
void R_DrawTranslatedColumn();
|
||||
void R_DrawShadedColumn();
|
||||
void R_FillColumn();
|
||||
void R_FillAddColumn();
|
||||
void R_FillAddClampColumn();
|
||||
void R_FillSubClampColumn();
|
||||
void R_FillRevSubClampColumn();
|
||||
void R_DrawAddColumn();
|
||||
void R_DrawTlatedAddColumn();
|
||||
void R_DrawAddClampColumn();
|
||||
void R_DrawAddClampTranslatedColumn();
|
||||
void R_DrawSubClampColumn();
|
||||
void R_DrawSubClampTranslatedColumn();
|
||||
void R_DrawRevSubClampColumn();
|
||||
void R_DrawRevSubClampTranslatedColumn();
|
||||
void R_DrawSpan();
|
||||
void R_DrawSpanMasked();
|
||||
void R_DrawSpanTranslucent();
|
||||
void R_DrawSpanMaskedTranslucent();
|
||||
void R_DrawSpanAddClamp();
|
||||
void R_DrawSpanMaskedAddClamp();
|
||||
void R_FillSpan();
|
||||
void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy);
|
||||
void R_DrawColoredSpan(int y, int x1, int x2);
|
||||
void R_SetupDrawSlab(uint8_t *colormap);
|
||||
void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p);
|
||||
void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip);
|
||||
uint32_t vlinec1();
|
||||
void vlinec4();
|
||||
uint32_t mvlinec1();
|
||||
void mvlinec4();
|
||||
fixed_t tmvline1_add();
|
||||
void tmvline4_add();
|
||||
fixed_t tmvline1_addclamp();
|
||||
void tmvline4_addclamp();
|
||||
fixed_t tmvline1_subclamp();
|
||||
void tmvline4_subclamp();
|
||||
fixed_t tmvline1_revsubclamp();
|
||||
void tmvline4_revsubclamp();
|
||||
void R_FillColumnHoriz();
|
||||
void R_FillSpan();
|
||||
|
||||
inline uint32_t dovline1() { return vlinec1(); }
|
||||
inline void dovline4() { vlinec4(); }
|
||||
inline uint32_t domvline1() { return mvlinec1(); }
|
||||
inline void domvline4() { mvlinec4(); }
|
||||
|
||||
void setupvline(int fracbits);
|
||||
void setupmvline(int fracbits);
|
||||
void setuptmvline(int fracbits);
|
||||
|
||||
void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
|
||||
void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
|
||||
void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
|
||||
void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
|
||||
|
||||
void R_SetColorMapLight(lighttable_t *base_colormap, float light, int shade);
|
||||
void R_SetDSColorMapLight(lighttable_t *base_colormap, float light, int shade);
|
||||
void R_SetTranslationMap(lighttable_t *translation);
|
||||
|
||||
void R_SetupSpanBits(FTexture *tex);
|
||||
void R_SetSpanColormap(lighttable_t *colormap);
|
||||
void R_SetSpanSource(FTexture *tex);
|
||||
|
||||
void R_MapTiltedPlane(int y, int x1);
|
||||
void R_MapColoredPlane(int y, int x1);
|
||||
void R_DrawParticle(vissprite_t *);
|
||||
}
|
||||
|
||||
|
||||
#ifdef X86_ASM
|
||||
#define rt_shaded4cols rt_shaded4cols_asm
|
||||
#define rt_add4cols rt_add4cols_asm
|
||||
#define rt_addclamp4cols rt_addclamp4cols_asm
|
||||
#else
|
||||
#define rt_shaded4cols rt_shaded4cols_c
|
||||
#define rt_add4cols rt_add4cols_c
|
||||
#define rt_addclamp4cols rt_addclamp4cols_c
|
||||
#endif
|
||||
|
||||
void rt_flip_posts();
|
||||
void rt_draw4cols (int sx);
|
||||
|
||||
// [RH] Preps the temporary horizontal buffer.
|
||||
void rt_initcols (BYTE *buffer=NULL);
|
||||
|
||||
void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip);
|
||||
|
||||
|
||||
#ifdef X86_ASM
|
||||
|
||||
void R_DrawShadedColumnP_C (void);
|
||||
extern "C" void R_DrawSpanP_ASM (void);
|
||||
extern "C" void R_DrawSpanMaskedP_ASM (void);
|
||||
|
||||
void R_DrawColumnHorizP_C(void);
|
||||
|
||||
#else
|
||||
|
||||
void R_DrawShadedColumnP_C (void);
|
||||
void R_DrawSpanP_C (void);
|
||||
void R_DrawSpanMaskedP_C (void);
|
||||
|
||||
#endif
|
||||
|
||||
void R_DrawColumn();
|
||||
void R_DrawColumnHorizP_C(void);
|
||||
void R_DrawTranslatedColumnP_C(void);
|
||||
void R_DrawSpanTranslucent (void);
|
||||
void R_DrawSpanMaskedTranslucent (void);
|
||||
|
||||
void R_DrawTlatedLucentColumnP_C (void);
|
||||
#define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C
|
||||
|
||||
void R_FillColumnP (void);
|
||||
void R_FillColumnHorizP (void);
|
||||
void R_FillSpan (void);
|
||||
|
||||
#ifdef X86_ASM
|
||||
#define R_SetupDrawSlab R_SetupDrawSlabA
|
||||
#define R_DrawSlab R_DrawSlabA
|
||||
#else
|
||||
#define R_SetupDrawSlab R_SetupDrawSlabC
|
||||
#define R_DrawSlab R_DrawSlabC
|
||||
#endif
|
||||
|
||||
extern "C" void R_SetupDrawSlab(const BYTE *colormap);
|
||||
extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p);
|
||||
|
||||
extern "C" int ds_y;
|
||||
extern "C" int ds_x1;
|
||||
extern "C" int ds_x2;
|
||||
|
||||
extern "C" lighttable_t* ds_colormap;
|
||||
|
||||
extern "C" dsfixed_t ds_xfrac;
|
||||
extern "C" dsfixed_t ds_yfrac;
|
||||
extern "C" dsfixed_t ds_xstep;
|
||||
extern "C" dsfixed_t ds_ystep;
|
||||
extern "C" int ds_xbits;
|
||||
extern "C" int ds_ybits;
|
||||
extern "C" fixed_t ds_alpha;
|
||||
|
||||
// start of a 64*64 tile image
|
||||
extern "C" const BYTE* ds_source;
|
||||
|
||||
extern "C" int ds_color; // [RH] For flat color (no texturing)
|
||||
|
||||
extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/];
|
||||
extern FDynamicColormap ShadeFakeColormap[16];
|
||||
extern BYTE identitymap[256];
|
||||
extern BYTE *dc_translation;
|
||||
|
||||
// [RH] Added for muliresolution support
|
||||
void R_InitShadeMaps();
|
||||
void R_InitFuzzTable (int fuzzoff);
|
||||
|
||||
// [RH] Consolidate column drawer selection
|
||||
enum ESPSResult
|
||||
{
|
||||
DontDraw, // not useful to draw this
|
||||
DoDraw0, // draw this as if r_columnmethod is 0
|
||||
DoDraw1, // draw this as if r_columnmethod is 1
|
||||
};
|
||||
ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color);
|
||||
inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color)
|
||||
{
|
||||
return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color);
|
||||
}
|
||||
|
||||
// Call this after finished drawing the current thing, in case its
|
||||
// style was STYLE_Shade
|
||||
void R_FinishSetPatchStyle ();
|
||||
|
||||
// transmaskwallscan calls this to find out what column drawers to use
|
||||
bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)());
|
||||
|
||||
// Retrieve column data for wallscan. Should probably be removed
|
||||
// to just use the texture's GetColumn() method. It just exists
|
||||
// for double-layer skies.
|
||||
const BYTE *R_GetColumn (FTexture *tex, int col);
|
||||
void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
|
||||
|
||||
// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0.
|
||||
void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
|
||||
|
||||
// transmaskwallscan is like maskwallscan, but it can also blend to the background
|
||||
void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn);
|
||||
|
||||
void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
|
||||
void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
|
||||
void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
|
||||
void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
|
||||
|
||||
#endif
|
||||
|
|
2626
src/r_draw_pal.cpp
Normal file
2626
src/r_draw_pal.cpp
Normal file
File diff suppressed because it is too large
Load diff
333
src/r_draw_pal.h
Normal file
333
src/r_draw_pal.h
Normal file
|
@ -0,0 +1,333 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "r_draw.h"
|
||||
#include "v_palette.h"
|
||||
#include "r_thread.h"
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
class PalWall1Command : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
PalWall1Command();
|
||||
FString DebugInfo() override { return "PalWallCommand"; }
|
||||
|
||||
protected:
|
||||
uint32_t _iscale;
|
||||
uint32_t _texturefrac;
|
||||
uint8_t *_colormap;
|
||||
int _count;
|
||||
const uint8_t *_source;
|
||||
uint8_t *_dest;
|
||||
int _vlinebits;
|
||||
int _mvlinebits;
|
||||
int _tmvlinebits;
|
||||
int _pitch;
|
||||
uint32_t *_srcblend;
|
||||
uint32_t *_destblend;
|
||||
};
|
||||
|
||||
class PalWall4Command : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
PalWall4Command();
|
||||
FString DebugInfo() override { return "PalWallCommand"; }
|
||||
|
||||
protected:
|
||||
uint8_t *_dest;
|
||||
int _count;
|
||||
int _pitch;
|
||||
int _vlinebits;
|
||||
int _mvlinebits;
|
||||
int _tmvlinebits;
|
||||
uint8_t *_palookupoffse[4];
|
||||
const uint8_t *_bufplce[4];
|
||||
uint32_t _vince[4];
|
||||
uint32_t _vplce[4];
|
||||
uint32_t *_srcblend;
|
||||
uint32_t *_destblend;
|
||||
};
|
||||
|
||||
class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
|
||||
|
||||
class PalSkyCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom);
|
||||
FString DebugInfo() override { return "PalSkyCommand"; }
|
||||
|
||||
protected:
|
||||
uint32_t solid_top;
|
||||
uint32_t solid_bottom;
|
||||
|
||||
uint8_t *_dest;
|
||||
int _count;
|
||||
int _pitch;
|
||||
const uint8_t *_bufplce[4];
|
||||
const uint8_t *_bufplce2[4];
|
||||
int _bufheight[4];
|
||||
uint32_t _vince[4];
|
||||
uint32_t _vplce[4];
|
||||
};
|
||||
|
||||
class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
|
||||
|
||||
class PalColumnCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
PalColumnCommand();
|
||||
FString DebugInfo() override { return "PalColumnCommand"; }
|
||||
|
||||
protected:
|
||||
int _count;
|
||||
uint8_t *_dest;
|
||||
int _pitch;
|
||||
fixed_t _iscale;
|
||||
fixed_t _texturefrac;
|
||||
const uint8_t *_colormap;
|
||||
const uint8_t *_source;
|
||||
const uint8_t *_translation;
|
||||
int _color;
|
||||
uint32_t *_srcblend;
|
||||
uint32_t *_destblend;
|
||||
uint32_t _srccolor;
|
||||
};
|
||||
|
||||
class DrawColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class FillColumnPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class FillColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class FillColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class FillColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class FillColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnTlatedAddPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnShadedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnAddClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnAddClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRevSubClampPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRevSubClampTranslatedPalCommand : public PalColumnCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
|
||||
class DrawFuzzColumnPalCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
DrawFuzzColumnPalCommand();
|
||||
void Execute(DrawerThread *thread) override;
|
||||
FString DebugInfo() override { return "DrawFuzzColumnPalCommand"; }
|
||||
|
||||
private:
|
||||
int _yl;
|
||||
int _yh;
|
||||
int _x;
|
||||
uint8_t *_destorg;
|
||||
int _pitch;
|
||||
int _fuzzpos;
|
||||
int _fuzzviewheight;
|
||||
};
|
||||
|
||||
class PalSpanCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
PalSpanCommand();
|
||||
FString DebugInfo() override { return "PalSpanCommand"; }
|
||||
|
||||
protected:
|
||||
const uint8_t *_source;
|
||||
const uint8_t *_colormap;
|
||||
dsfixed_t _xfrac;
|
||||
dsfixed_t _yfrac;
|
||||
int _y;
|
||||
int _x1;
|
||||
int _x2;
|
||||
uint8_t *_destorg;
|
||||
dsfixed_t _xstep;
|
||||
dsfixed_t _ystep;
|
||||
int _xbits;
|
||||
int _ybits;
|
||||
uint32_t *_srcblend;
|
||||
uint32_t *_destblend;
|
||||
int _color;
|
||||
};
|
||||
|
||||
class DrawSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawSpanMaskedPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawSpanTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawSpanMaskedTranslucentPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawSpanAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class DrawSpanMaskedAddClampPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
class FillSpanPalCommand : public PalSpanCommand { public: void Execute(DrawerThread *thread) override; };
|
||||
|
||||
class DrawTiltedSpanPalCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
DrawTiltedSpanPalCommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy);
|
||||
void Execute(DrawerThread *thread) override;
|
||||
FString DebugInfo() override { return "DrawTiltedSpanPalCommand"; }
|
||||
|
||||
private:
|
||||
void CalcTiltedLighting(double lval, double lend, int width, DrawerThread *thread);
|
||||
|
||||
int y;
|
||||
int x1;
|
||||
int x2;
|
||||
FVector3 plane_sz;
|
||||
FVector3 plane_su;
|
||||
FVector3 plane_sv;
|
||||
bool plane_shade;
|
||||
int planeshade;
|
||||
float planelightfloat;
|
||||
fixed_t pviewx;
|
||||
fixed_t pviewy;
|
||||
|
||||
const uint8_t *_colormap;
|
||||
uint8_t *_destorg;
|
||||
int _ybits;
|
||||
int _xbits;
|
||||
const uint8_t *_source;
|
||||
uint8_t *basecolormapdata;
|
||||
};
|
||||
|
||||
class DrawColoredSpanPalCommand : public PalSpanCommand
|
||||
{
|
||||
public:
|
||||
DrawColoredSpanPalCommand(int y, int x1, int x2);
|
||||
void Execute(DrawerThread *thread) override;
|
||||
FString DebugInfo() override { return "DrawColoredSpanPalCommand"; }
|
||||
|
||||
private:
|
||||
int y;
|
||||
int x1;
|
||||
int x2;
|
||||
int color;
|
||||
uint8_t *destorg;
|
||||
};
|
||||
|
||||
class DrawSlabPalCommand : public PalSpanCommand
|
||||
{
|
||||
public:
|
||||
DrawSlabPalCommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, const uint8_t *colormap);
|
||||
void Execute(DrawerThread *thread) override;
|
||||
|
||||
private:
|
||||
int _dx;
|
||||
fixed_t _v;
|
||||
int _dy;
|
||||
fixed_t _vi;
|
||||
const uint8_t *_vvptr;
|
||||
uint8_t *_p;
|
||||
const uint8_t *_colormap;
|
||||
int _pitch;
|
||||
int _start_y;
|
||||
};
|
||||
|
||||
class DrawFogBoundaryLinePalCommand : public PalSpanCommand
|
||||
{
|
||||
public:
|
||||
DrawFogBoundaryLinePalCommand(int y, int x1, int x2);
|
||||
void Execute(DrawerThread *thread) override;
|
||||
|
||||
private:
|
||||
int y, x1, x2;
|
||||
const uint8_t *_colormap;
|
||||
uint8_t *_destorg;
|
||||
};
|
||||
|
||||
class RtInitColsPalCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
RtInitColsPalCommand(uint8_t *buff);
|
||||
void Execute(DrawerThread *thread) override;
|
||||
FString DebugInfo() override { return "RtInitColsPalCommand"; }
|
||||
|
||||
private:
|
||||
uint8_t *buff;
|
||||
};
|
||||
|
||||
class PalColumnHorizCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
PalColumnHorizCommand();
|
||||
|
||||
protected:
|
||||
const uint8_t *_source;
|
||||
fixed_t _iscale;
|
||||
fixed_t _texturefrac;
|
||||
int _count;
|
||||
int _color;
|
||||
int _x;
|
||||
int _yl;
|
||||
};
|
||||
|
||||
class DrawColumnHorizPalCommand : public PalColumnHorizCommand
|
||||
{
|
||||
public:
|
||||
void Execute(DrawerThread *thread) override;
|
||||
FString DebugInfo() override { return "DrawColumnHorizPalCommand"; }
|
||||
};
|
||||
|
||||
class FillColumnHorizPalCommand : public PalColumnHorizCommand
|
||||
{
|
||||
public:
|
||||
void Execute(DrawerThread *thread) override;
|
||||
FString DebugInfo() override { return "FillColumnHorizPalCommand"; }
|
||||
};
|
||||
|
||||
class PalRtCommand : public DrawerCommand
|
||||
{
|
||||
public:
|
||||
PalRtCommand(int hx, int sx, int yl, int yh);
|
||||
FString DebugInfo() override { return "PalRtCommand"; }
|
||||
|
||||
protected:
|
||||
int hx, sx, yl, yh;
|
||||
uint8_t *_destorg;
|
||||
int _pitch;
|
||||
const uint8_t *_colormap;
|
||||
const uint32_t *_srcblend;
|
||||
const uint32_t *_destblend;
|
||||
const uint8_t *_translation;
|
||||
int _color;
|
||||
};
|
||||
|
||||
class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
//class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; };
|
||||
}
|
1118
src/r_drawt.cpp
1118
src/r_drawt.cpp
File diff suppressed because it is too large
Load diff
867
src/r_drawt_pal.cpp
Normal file
867
src/r_drawt_pal.cpp
Normal file
|
@ -0,0 +1,867 @@
|
|||
/*
|
||||
** r_drawt.cpp
|
||||
** Faster column drawers for modern processors
|
||||
**
|
||||
**---------------------------------------------------------------------------
|
||||
** Copyright 1998-2006 Randy Heit
|
||||
** All rights reserved.
|
||||
**
|
||||
** Redistribution and use in source and binary forms, with or without
|
||||
** modification, are permitted provided that the following conditions
|
||||
** are met:
|
||||
**
|
||||
** 1. Redistributions of source code must retain the above copyright
|
||||
** notice, this list of conditions and the following disclaimer.
|
||||
** 2. Redistributions in binary form must reproduce the above copyright
|
||||
** notice, this list of conditions and the following disclaimer in the
|
||||
** documentation and/or other materials provided with the distribution.
|
||||
** 3. The name of the author may not be used to endorse or promote products
|
||||
** derived from this software without specific prior written permission.
|
||||
**
|
||||
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**---------------------------------------------------------------------------
|
||||
**
|
||||
** These functions stretch columns into a temporary buffer and then
|
||||
** map them to the screen. On modern machines, this is faster than drawing
|
||||
** them directly to the screen.
|
||||
**
|
||||
** Will I be able to even understand any of this if I come back to it later?
|
||||
** Let's hope so. :-)
|
||||
*/
|
||||
|
||||
#include "templates.h"
|
||||
#include "doomtype.h"
|
||||
#include "doomdef.h"
|
||||
#include "r_defs.h"
|
||||
#include "r_draw.h"
|
||||
#include "r_main.h"
|
||||
#include "r_things.h"
|
||||
#include "v_video.h"
|
||||
#include "r_draw_pal.h"
|
||||
|
||||
// I should have commented this stuff better.
|
||||
//
|
||||
// dc_temp is the buffer R_DrawColumnHoriz writes into.
|
||||
// dc_tspans points into it.
|
||||
// dc_ctspan points into dc_tspans.
|
||||
// horizspan also points into dc_tspans.
|
||||
|
||||
// dc_ctspan is advanced while drawing into dc_temp.
|
||||
// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen.
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff)
|
||||
{
|
||||
}
|
||||
|
||||
void RtInitColsPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
PalColumnHorizCommand::PalColumnHorizCommand()
|
||||
{
|
||||
using namespace drawerargs;
|
||||
|
||||
_source = dc_source;
|
||||
_iscale = dc_iscale;
|
||||
_texturefrac = dc_texturefrac;
|
||||
_count = dc_count;
|
||||
_color = dc_color;
|
||||
_x = dc_x;
|
||||
_yl = dc_yl;
|
||||
}
|
||||
|
||||
void DrawColumnHorizPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
int count = _count;
|
||||
uint8_t *dest;
|
||||
fixed_t fracstep;
|
||||
fixed_t frac;
|
||||
|
||||
count = thread->count_for_thread(_yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
fracstep = _iscale;
|
||||
frac = _texturefrac;
|
||||
|
||||
const uint8_t *source = _source;
|
||||
|
||||
int x = _x & 3;
|
||||
dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4];
|
||||
frac += fracstep * thread->skipped_by_thread(_yl);
|
||||
fracstep *= thread->num_cores;
|
||||
|
||||
if (count & 1) {
|
||||
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
|
||||
}
|
||||
if (count & 2) {
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 8;
|
||||
}
|
||||
if (count & 4) {
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 16;
|
||||
}
|
||||
count >>= 3;
|
||||
if (!count) return;
|
||||
|
||||
do
|
||||
{
|
||||
dest[0] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[4] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[8] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[12] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[16] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[20] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[24] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest[28] = source[frac >> FRACBITS]; frac += fracstep;
|
||||
dest += 32;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void FillColumnHorizPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
int count = _count;
|
||||
uint8_t color = _color;
|
||||
uint8_t *dest;
|
||||
|
||||
count = thread->count_for_thread(_yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
int x = _x & 3;
|
||||
dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4];
|
||||
|
||||
if (count & 1) {
|
||||
*dest = color;
|
||||
dest += 4;
|
||||
}
|
||||
if (!(count >>= 1))
|
||||
return;
|
||||
do {
|
||||
dest[0] = color; dest[4] = color;
|
||||
dest += 8;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
|
||||
PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh)
|
||||
{
|
||||
using namespace drawerargs;
|
||||
|
||||
_destorg = dc_destorg;
|
||||
_pitch = dc_pitch;
|
||||
_colormap = dc_colormap;
|
||||
_srcblend = dc_srcblend;
|
||||
_destblend = dc_destblend;
|
||||
_translation = dc_translation;
|
||||
_color = dc_color;
|
||||
}
|
||||
|
||||
void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int count;
|
||||
int pitch;
|
||||
|
||||
count = yh - yl + 1;
|
||||
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
|
||||
if (count & 1) {
|
||||
*dest = *source;
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
}
|
||||
if (count & 2) {
|
||||
dest[0] = source[0];
|
||||
dest[pitch] = source[4];
|
||||
source += 8;
|
||||
dest += pitch*2;
|
||||
}
|
||||
if (!(count >>= 2))
|
||||
return;
|
||||
|
||||
do {
|
||||
dest[0] = source[0];
|
||||
dest[pitch] = source[4];
|
||||
dest[pitch*2] = source[8];
|
||||
dest[pitch*3] = source[12];
|
||||
source += 16;
|
||||
dest += pitch*4;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
int *source;
|
||||
int *dest;
|
||||
int count;
|
||||
int pitch;
|
||||
|
||||
count = yh - yl + 1;
|
||||
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg);
|
||||
source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]);
|
||||
pitch = _pitch*thread->num_cores/sizeof(int);
|
||||
|
||||
if (count & 1) {
|
||||
*dest = *source;
|
||||
source += 4/sizeof(int);
|
||||
dest += pitch;
|
||||
}
|
||||
if (!(count >>= 1))
|
||||
return;
|
||||
|
||||
do {
|
||||
dest[0] = source[0];
|
||||
dest[pitch] = source[4/sizeof(int)];
|
||||
source += 8/sizeof(int);
|
||||
dest += pitch*2;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt1PalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int count;
|
||||
int pitch;
|
||||
|
||||
count = yh - yl + 1;
|
||||
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
colormap = _colormap;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx];
|
||||
pitch = _pitch*thread->num_cores;
|
||||
|
||||
if (count & 1) {
|
||||
*dest = colormap[*source];
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
}
|
||||
if (!(count >>= 1))
|
||||
return;
|
||||
|
||||
do {
|
||||
dest[0] = colormap[source[0]];
|
||||
dest[pitch] = colormap[source[4]];
|
||||
source += 8;
|
||||
dest += pitch*2;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt4PalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int count;
|
||||
int pitch;
|
||||
|
||||
count = yh - yl + 1;
|
||||
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
colormap = _colormap;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
|
||||
pitch = _pitch*thread->num_cores;
|
||||
|
||||
if (count & 1) {
|
||||
dest[0] = colormap[source[0]];
|
||||
dest[1] = colormap[source[1]];
|
||||
dest[2] = colormap[source[2]];
|
||||
dest[3] = colormap[source[3]];
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
}
|
||||
if (!(count >>= 1))
|
||||
return;
|
||||
|
||||
do {
|
||||
dest[0] = colormap[source[0]];
|
||||
dest[1] = colormap[source[1]];
|
||||
dest[2] = colormap[source[2]];
|
||||
dest[3] = colormap[source[3]];
|
||||
dest[pitch] = colormap[source[4]];
|
||||
dest[pitch+1] = colormap[source[5]];
|
||||
dest[pitch+2] = colormap[source[6]];
|
||||
dest[pitch+3] = colormap[source[7]];
|
||||
source += 8;
|
||||
dest += pitch*2;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
|
||||
const uint8_t *translation = _translation;
|
||||
|
||||
// Things we do to hit the compiler's optimizer with a clue bat:
|
||||
// 1. Parallelism is explicitly spelled out by using a separate
|
||||
// C instruction for each assembly instruction. GCC lets me
|
||||
// have four temporaries, but VC++ spills to the stack with
|
||||
// more than two. Two is probably optimal, anyway.
|
||||
// 2. The results of the translation lookups are explicitly
|
||||
// stored in byte-sized variables. This causes the VC++ code
|
||||
// to use byte mov instructions in most cases; for apparently
|
||||
// random reasons, it will use movzx for some places. GCC
|
||||
// ignores this and uses movzx always.
|
||||
|
||||
// Do 8 rows at a time.
|
||||
for (int count8 = count >> 3; count8; --count8)
|
||||
{
|
||||
int c0, c1;
|
||||
uint8_t b0, b1;
|
||||
|
||||
c0 = source[0]; c1 = source[4];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[0] = b0; source[4] = b1;
|
||||
|
||||
c0 = source[8]; c1 = source[12];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[8] = b0; source[12] = b1;
|
||||
|
||||
c0 = source[16]; c1 = source[20];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[16] = b0; source[20] = b1;
|
||||
|
||||
c0 = source[24]; c1 = source[28];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[24] = b0; source[28] = b1;
|
||||
|
||||
source += 32;
|
||||
}
|
||||
// Finish by doing 1 row at a time.
|
||||
for (count &= 7; count; --count, source += 4)
|
||||
{
|
||||
source[0] = translation[source[0]];
|
||||
}
|
||||
}
|
||||
|
||||
void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
|
||||
const uint8_t *translation = _translation;
|
||||
int c0, c1;
|
||||
uint8_t b0, b1;
|
||||
|
||||
// Do 2 rows at a time.
|
||||
for (int count8 = count >> 1; count8; --count8)
|
||||
{
|
||||
c0 = source[0]; c1 = source[1];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[0] = b0; source[1] = b1;
|
||||
|
||||
c0 = source[2]; c1 = source[3];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[2] = b0; source[3] = b1;
|
||||
|
||||
c0 = source[4]; c1 = source[5];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[4] = b0; source[5] = b1;
|
||||
|
||||
c0 = source[6]; c1 = source[7];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[6] = b0; source[7] = b1;
|
||||
|
||||
source += 8;
|
||||
}
|
||||
// Do the final row if count was odd.
|
||||
if (count & 1)
|
||||
{
|
||||
c0 = source[0]; c1 = source[1];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[0] = b0; source[1] = b1;
|
||||
|
||||
c0 = source[2]; c1 = source[3];
|
||||
b0 = translation[c0]; b1 = translation[c1];
|
||||
source[2] = b0; source[3] = b1;
|
||||
}
|
||||
}
|
||||
|
||||
void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
do {
|
||||
uint32_t fg = colormap[*source];
|
||||
uint32_t bg = *dest;
|
||||
|
||||
fg = fg2rgb[fg];
|
||||
bg = bg2rgb[bg];
|
||||
fg = (fg+bg) | 0x1f07c1f;
|
||||
*dest = RGB32k.All[fg & (fg>>15)];
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
do {
|
||||
uint32_t fg = colormap[source[0]];
|
||||
uint32_t bg = dest[0];
|
||||
fg = fg2rgb[fg];
|
||||
bg = bg2rgb[bg];
|
||||
fg = (fg+bg) | 0x1f07c1f;
|
||||
dest[0] = RGB32k.All[fg & (fg>>15)];
|
||||
|
||||
fg = colormap[source[1]];
|
||||
bg = dest[1];
|
||||
fg = fg2rgb[fg];
|
||||
bg = bg2rgb[bg];
|
||||
fg = (fg+bg) | 0x1f07c1f;
|
||||
dest[1] = RGB32k.All[fg & (fg>>15)];
|
||||
|
||||
|
||||
fg = colormap[source[2]];
|
||||
bg = dest[2];
|
||||
fg = fg2rgb[fg];
|
||||
bg = bg2rgb[bg];
|
||||
fg = (fg+bg) | 0x1f07c1f;
|
||||
dest[2] = RGB32k.All[fg & (fg>>15)];
|
||||
|
||||
fg = colormap[source[3]];
|
||||
bg = dest[3];
|
||||
fg = fg2rgb[fg];
|
||||
bg = bg2rgb[bg];
|
||||
fg = (fg+bg) | 0x1f07c1f;
|
||||
dest[3] = RGB32k.All[fg & (fg>>15)];
|
||||
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
uint32_t *fgstart;
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
fgstart = &Col2RGB8[0][_color];
|
||||
colormap = _colormap;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
|
||||
do {
|
||||
uint32_t val = colormap[*source];
|
||||
uint32_t fg = fgstart[val<<8];
|
||||
val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f;
|
||||
*dest = RGB32k.All[val & (val>>15)];
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
uint32_t *fgstart;
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
fgstart = &Col2RGB8[0][_color];
|
||||
colormap = _colormap;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
|
||||
do {
|
||||
uint32_t val;
|
||||
|
||||
val = colormap[source[0]];
|
||||
val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f;
|
||||
dest[0] = RGB32k.All[val & (val>>15)];
|
||||
|
||||
val = colormap[source[1]];
|
||||
val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f;
|
||||
dest[1] = RGB32k.All[val & (val>>15)];
|
||||
|
||||
val = colormap[source[2]];
|
||||
val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f;
|
||||
dest[2] = RGB32k.All[val & (val>>15)];
|
||||
|
||||
val = colormap[source[3]];
|
||||
val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f;
|
||||
dest[3] = RGB32k.All[val & (val>>15)];
|
||||
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
do {
|
||||
uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest];
|
||||
uint32_t b = a;
|
||||
|
||||
a |= 0x01f07c1f;
|
||||
b &= 0x40100400;
|
||||
a &= 0x3fffffff;
|
||||
b = b - (b >> 5);
|
||||
a |= b;
|
||||
*dest = RGB32k.All[(a>>15) & a];
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
|
||||
do {
|
||||
uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]];
|
||||
uint32_t b = a;
|
||||
|
||||
a |= 0x01f07c1f;
|
||||
b &= 0x40100400;
|
||||
a &= 0x3fffffff;
|
||||
b = b - (b >> 5);
|
||||
a |= b;
|
||||
dest[0] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]];
|
||||
b = a;
|
||||
a |= 0x01f07c1f;
|
||||
b &= 0x40100400;
|
||||
a &= 0x3fffffff;
|
||||
b = b - (b >> 5);
|
||||
a |= b;
|
||||
dest[1] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]];
|
||||
b = a;
|
||||
a |= 0x01f07c1f;
|
||||
b &= 0x40100400;
|
||||
a &= 0x3fffffff;
|
||||
b = b - (b >> 5);
|
||||
a |= b;
|
||||
dest[2] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]];
|
||||
b = a;
|
||||
a |= 0x01f07c1f;
|
||||
b &= 0x40100400;
|
||||
a &= 0x3fffffff;
|
||||
b = b - (b >> 5);
|
||||
a |= b;
|
||||
dest[3] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
do {
|
||||
uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest];
|
||||
uint32_t b = a;
|
||||
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
*dest = RGB32k.All[(a>>15) & a];
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
do {
|
||||
uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]];
|
||||
uint32_t b = a;
|
||||
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[0] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]];
|
||||
b = a;
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[1] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]];
|
||||
b = a;
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[2] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]];
|
||||
b = a;
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[3] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
do {
|
||||
uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]];
|
||||
uint32_t b = a;
|
||||
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
*dest = RGB32k.All[(a>>15) & a];
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
|
||||
void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread)
|
||||
{
|
||||
const uint8_t *colormap;
|
||||
uint8_t *source;
|
||||
uint8_t *dest;
|
||||
int pitch;
|
||||
|
||||
int count = yh - yl + 1;
|
||||
count = thread->count_for_thread(yl, count);
|
||||
if (count <= 0)
|
||||
return;
|
||||
|
||||
const uint32_t *fg2rgb = _srcblend;
|
||||
const uint32_t *bg2rgb = _destblend;
|
||||
dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg;
|
||||
source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4];
|
||||
pitch = _pitch * thread->num_cores;
|
||||
colormap = _colormap;
|
||||
|
||||
do {
|
||||
uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]];
|
||||
uint32_t b = a;
|
||||
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[0] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]];
|
||||
b = a;
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[1] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]];
|
||||
b = a;
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[2] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]];
|
||||
b = a;
|
||||
b &= 0x40100400;
|
||||
b = b - (b >> 5);
|
||||
a &= b;
|
||||
a |= 0x01f07c1f;
|
||||
dest[3] = RGB32k.All[(a>>15) & a];
|
||||
|
||||
source += 4;
|
||||
dest += pitch;
|
||||
} while (--count);
|
||||
}
|
||||
}
|
|
@ -58,6 +58,38 @@
|
|||
#include "v_font.h"
|
||||
#include "r_data/colormaps.h"
|
||||
#include "p_maputl.h"
|
||||
#include "r_thread.h"
|
||||
|
||||
CVAR (String, r_viewsize, "", CVAR_NOSET)
|
||||
CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE)
|
||||
|
||||
CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
|
||||
{
|
||||
if (self != 0 && self != 1)
|
||||
{
|
||||
self = 1;
|
||||
}
|
||||
else
|
||||
{ // Trigger the change
|
||||
setsizeneeded = true;
|
||||
}
|
||||
}
|
||||
|
||||
CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE)
|
||||
CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE)
|
||||
|
||||
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor)
|
||||
|
||||
extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
|
||||
extern cycle_t FrameCycles;
|
||||
|
||||
extern bool r_showviewer;
|
||||
|
||||
cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
using namespace drawerargs;
|
||||
|
||||
// MACROS ------------------------------------------------------------------
|
||||
|
||||
|
@ -86,9 +118,8 @@ static void R_ShutdownRenderer();
|
|||
|
||||
extern short *openings;
|
||||
extern bool r_fakingunderwater;
|
||||
extern "C" int fuzzviewheight;
|
||||
extern int fuzzviewheight;
|
||||
extern subsector_t *InSubsector;
|
||||
extern bool r_showviewer;
|
||||
|
||||
|
||||
// PRIVATE DATA DECLARATIONS -----------------------------------------------
|
||||
|
@ -100,9 +131,6 @@ bool r_dontmaplines;
|
|||
|
||||
// PUBLIC DATA DEFINITIONS -------------------------------------------------
|
||||
|
||||
CVAR (String, r_viewsize, "", CVAR_NOSET)
|
||||
CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE)
|
||||
|
||||
double r_BaseVisibility;
|
||||
double r_WallVisibility;
|
||||
double r_FloorVisibility;
|
||||
|
@ -157,8 +185,6 @@ void (*hcolfunc_post1) (int hx, int sx, int yl, int yh);
|
|||
void (*hcolfunc_post2) (int hx, int sx, int yl, int yh);
|
||||
void (*hcolfunc_post4) (int sx, int yl, int yh);
|
||||
|
||||
cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
|
||||
|
||||
// PRIVATE DATA DEFINITIONS ------------------------------------------------
|
||||
|
||||
static int lastcenteryfrac;
|
||||
|
@ -361,26 +387,6 @@ void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight,
|
|||
R_SetVisibility(R_GetVisibility());
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
//
|
||||
// CVAR r_columnmethod
|
||||
//
|
||||
// Selects which version of the seg renderers to use.
|
||||
//
|
||||
//==========================================================================
|
||||
|
||||
CUSTOM_CVAR (Int, r_columnmethod, 1, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
|
||||
{
|
||||
if (self != 0 && self != 1)
|
||||
{
|
||||
self = 1;
|
||||
}
|
||||
else
|
||||
{ // Trigger the change
|
||||
setsizeneeded = true;
|
||||
}
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
//
|
||||
// R_Init
|
||||
|
@ -455,8 +461,6 @@ void R_CopyStackedViewParameters()
|
|||
//
|
||||
//==========================================================================
|
||||
|
||||
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor)
|
||||
|
||||
void R_SetupColormap(player_t *player)
|
||||
{
|
||||
realfixedcolormap = NULL;
|
||||
|
@ -574,9 +578,6 @@ void R_SetupFreelook()
|
|||
//
|
||||
//==========================================================================
|
||||
|
||||
CVAR(Int, r_portal_recursions, 4, CVAR_ARCHIVE)
|
||||
CVAR(Bool, r_highlight_portals, false, CVAR_ARCHIVE)
|
||||
|
||||
void R_HighlightPortal (PortalDrawseg* pds)
|
||||
{
|
||||
// [ZZ] NO OVERFLOW CHECKS HERE
|
||||
|
@ -811,9 +812,6 @@ void R_SetupBuffer ()
|
|||
{
|
||||
dc_pitch = pitch;
|
||||
R_InitFuzzTable (pitch);
|
||||
#if defined(X86_ASM) || defined(X64_ASM)
|
||||
ASM_PatchPitch ();
|
||||
#endif
|
||||
}
|
||||
dc_destorg = lineptr;
|
||||
for (int i = 0; i < RenderTarget->GetHeight(); i++)
|
||||
|
@ -853,10 +851,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
|
|||
// [RH] Show off segs if r_drawflat is 1
|
||||
if (r_drawflat)
|
||||
{
|
||||
hcolfunc_pre = R_FillColumnHorizP;
|
||||
hcolfunc_pre = R_FillColumnHoriz;
|
||||
hcolfunc_post1 = rt_copy1col;
|
||||
hcolfunc_post4 = rt_copy4cols;
|
||||
colfunc = R_FillColumnP;
|
||||
colfunc = R_FillColumn;
|
||||
spanfunc = R_FillSpan;
|
||||
}
|
||||
else
|
||||
|
@ -950,6 +948,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
|
|||
{
|
||||
const bool savedviewactive = viewactive;
|
||||
|
||||
R_BeginDrawerCommands();
|
||||
|
||||
viewwidth = width;
|
||||
RenderTarget = canvas;
|
||||
bRenderingToCanvas = true;
|
||||
|
@ -961,6 +961,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas,
|
|||
|
||||
R_RenderActorView (actor, dontmaplines);
|
||||
|
||||
R_EndDrawerCommands();
|
||||
|
||||
RenderTarget = screen;
|
||||
bRenderingToCanvas = false;
|
||||
R_ExecuteSetViewSize ();
|
||||
|
@ -991,8 +993,6 @@ void R_MultiresInit ()
|
|||
// Displays statistics about rendering times
|
||||
//
|
||||
//==========================================================================
|
||||
extern cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles;
|
||||
extern cycle_t FrameCycles;
|
||||
|
||||
ADD_STAT (fps)
|
||||
{
|
||||
|
@ -1072,3 +1072,5 @@ CCMD (clearscancycles)
|
|||
bestscancycles = HUGE_VAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
12
src/r_main.h
12
src/r_main.h
|
@ -28,23 +28,26 @@
|
|||
#include "v_palette.h"
|
||||
#include "r_data/colormaps.h"
|
||||
|
||||
extern double ViewCos;
|
||||
extern double ViewSin;
|
||||
extern int viewwindowx;
|
||||
extern int viewwindowy;
|
||||
|
||||
typedef BYTE lighttable_t; // This could be wider for >8 bit display.
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
//
|
||||
// POV related.
|
||||
//
|
||||
extern bool bRenderingToCanvas;
|
||||
extern double ViewCos;
|
||||
extern double ViewSin;
|
||||
extern fixed_t viewingrangerecip;
|
||||
extern double FocalLengthX, FocalLengthY;
|
||||
extern double InvZtoScale;
|
||||
|
||||
extern double WallTMapScale2;
|
||||
|
||||
extern int viewwindowx;
|
||||
extern int viewwindowy;
|
||||
|
||||
extern double CenterX;
|
||||
extern double CenterY;
|
||||
|
@ -142,5 +145,6 @@ extern DAngle stacked_angle;
|
|||
|
||||
extern void R_CopyStackedViewParameters();
|
||||
|
||||
}
|
||||
|
||||
#endif // __R_MAIN_H__
|
||||
|
|
283
src/r_plane.cpp
283
src/r_plane.cpp
|
@ -63,10 +63,14 @@
|
|||
#pragma warning(disable:4244)
|
||||
#endif
|
||||
|
||||
CVAR(Bool, tilt, false, 0);
|
||||
CVAR(Bool, r_skyboxes, true, 0)
|
||||
|
||||
EXTERN_CVAR(Int, r_skymode)
|
||||
|
||||
//EXTERN_CVAR (Int, tx)
|
||||
//EXTERN_CVAR (Int, ty)
|
||||
namespace swrenderer
|
||||
{
|
||||
using namespace drawerargs;
|
||||
|
||||
extern subsector_t *InSubsector;
|
||||
|
||||
|
@ -132,15 +136,12 @@ extern "C" {
|
|||
// spanend holds the end of a plane span in each screen row
|
||||
//
|
||||
short spanend[MAXHEIGHT];
|
||||
BYTE *tiltlighting[MAXWIDTH];
|
||||
|
||||
int planeshade;
|
||||
FVector3 plane_sz, plane_su, plane_sv;
|
||||
float planelightfloat;
|
||||
bool plane_shade;
|
||||
fixed_t pviewx, pviewy;
|
||||
|
||||
void R_DrawTiltedPlane_ASM (int y, int x1);
|
||||
}
|
||||
|
||||
float yslope[MAXHEIGHT];
|
||||
|
@ -148,13 +149,6 @@ static fixed_t xscale, yscale;
|
|||
static double xstepscale, ystepscale;
|
||||
static double basexfrac, baseyfrac;
|
||||
|
||||
#ifdef X86_ASM
|
||||
extern "C" void R_SetSpanSource_ASM (const BYTE *flat);
|
||||
extern "C" void R_SetSpanSize_ASM (int xbits, int ybits);
|
||||
extern "C" void R_SetSpanColormap_ASM (BYTE *colormap);
|
||||
extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat);
|
||||
extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource;
|
||||
#endif
|
||||
void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked);
|
||||
|
||||
//==========================================================================
|
||||
|
@ -249,11 +243,6 @@ void R_MapPlane (int y, int x1)
|
|||
GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT);
|
||||
}
|
||||
|
||||
#ifdef X86_ASM
|
||||
if (ds_colormap != ds_curcolormap)
|
||||
R_SetSpanColormap_ASM (ds_colormap);
|
||||
#endif
|
||||
|
||||
ds_y = y;
|
||||
ds_x1 = x1;
|
||||
ds_x2 = x2;
|
||||
|
@ -261,239 +250,15 @@ void R_MapPlane (int y, int x1)
|
|||
spanfunc ();
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
//
|
||||
// R_CalcTiltedLighting
|
||||
//
|
||||
// Calculates the lighting for one row of a tilted plane. If the definition
|
||||
// of GETPALOOKUP changes, this needs to change, too.
|
||||
//
|
||||
//==========================================================================
|
||||
|
||||
extern "C" {
|
||||
void R_CalcTiltedLighting (double lval, double lend, int width)
|
||||
{
|
||||
double lstep;
|
||||
BYTE *lightfiller;
|
||||
BYTE *basecolormapdata = basecolormap->Maps;
|
||||
int i = 0;
|
||||
|
||||
if (width == 0 || lval == lend)
|
||||
{ // Constant lighting
|
||||
lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT);
|
||||
}
|
||||
else
|
||||
{
|
||||
lstep = (lend - lval) / width;
|
||||
if (lval >= MAXLIGHTVIS)
|
||||
{ // lval starts "too bright".
|
||||
lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT);
|
||||
for (; i <= width && lval >= MAXLIGHTVIS; ++i)
|
||||
{
|
||||
tiltlighting[i] = lightfiller;
|
||||
lval += lstep;
|
||||
}
|
||||
}
|
||||
if (lend >= MAXLIGHTVIS)
|
||||
{ // lend ends "too bright".
|
||||
lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT);
|
||||
for (; width > i && lend >= MAXLIGHTVIS; --width)
|
||||
{
|
||||
tiltlighting[width] = lightfiller;
|
||||
lend -= lstep;
|
||||
}
|
||||
}
|
||||
if (width > 0)
|
||||
{
|
||||
lval = FIXED2DBL(planeshade) - lval;
|
||||
lend = FIXED2DBL(planeshade) - lend;
|
||||
lstep = (lend - lval) / width;
|
||||
if (lstep < 0)
|
||||
{ // Going from dark to light
|
||||
if (lval < 1.)
|
||||
{ // All bright
|
||||
lightfiller = basecolormapdata;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (lval >= NUMCOLORMAPS)
|
||||
{ // Starts beyond the dark end
|
||||
BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT);
|
||||
while (lval >= NUMCOLORMAPS && i <= width)
|
||||
{
|
||||
tiltlighting[i++] = clight;
|
||||
lval += lstep;
|
||||
}
|
||||
if (i > width)
|
||||
return;
|
||||
}
|
||||
while (i <= width && lval >= 0)
|
||||
{
|
||||
tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT);
|
||||
lval += lstep;
|
||||
}
|
||||
lightfiller = basecolormapdata;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // Going from light to dark
|
||||
if (lval >= (NUMCOLORMAPS-1))
|
||||
{ // All dark
|
||||
lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT);
|
||||
}
|
||||
else
|
||||
{
|
||||
while (lval < 0 && i <= width)
|
||||
{
|
||||
tiltlighting[i++] = basecolormapdata;
|
||||
lval += lstep;
|
||||
}
|
||||
if (i > width)
|
||||
return;
|
||||
while (i <= width && lval < (NUMCOLORMAPS-1))
|
||||
{
|
||||
tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT);
|
||||
lval += lstep;
|
||||
}
|
||||
lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (; i <= width; i++)
|
||||
{
|
||||
tiltlighting[i] = lightfiller;
|
||||
}
|
||||
}
|
||||
} // extern "C"
|
||||
|
||||
//==========================================================================
|
||||
//
|
||||
// R_MapTiltedPlane
|
||||
//
|
||||
//==========================================================================
|
||||
|
||||
void R_MapTiltedPlane(int y, int x1)
|
||||
void R_MapTiltedPlane (int y, int x1)
|
||||
{
|
||||
int x2 = spanend[y];
|
||||
int width = x2 - x1;
|
||||
double iz, uz, vz;
|
||||
BYTE *fb;
|
||||
DWORD u, v;
|
||||
int i;
|
||||
|
||||
iz = plane_sz[2] + plane_sz[1] * (centery - y) + plane_sz[0] * (x1 - centerx);
|
||||
|
||||
// Lighting is simple. It's just linear interpolation from start to end
|
||||
if (plane_shade)
|
||||
{
|
||||
uz = (iz + plane_sz[0] * width) * planelightfloat;
|
||||
vz = iz * planelightfloat;
|
||||
R_CalcTiltedLighting(vz, uz, width);
|
||||
}
|
||||
|
||||
uz = plane_su[2] + plane_su[1] * (centery - y) + plane_su[0] * (x1 - centerx);
|
||||
vz = plane_sv[2] + plane_sv[1] * (centery - y) + plane_sv[0] * (x1 - centerx);
|
||||
|
||||
fb = ylookup[y] + x1 + dc_destorg;
|
||||
|
||||
BYTE vshift = 32 - ds_ybits;
|
||||
BYTE ushift = vshift - ds_xbits;
|
||||
int umask = ((1 << ds_xbits) - 1) << ds_ybits;
|
||||
|
||||
#if 0 // The "perfect" reference version of this routine. Pretty slow.
|
||||
// Use it only to see how things are supposed to look.
|
||||
i = 0;
|
||||
do
|
||||
{
|
||||
double z = 1.f/iz;
|
||||
|
||||
u = SQWORD(uz*z) + pviewx;
|
||||
v = SQWORD(vz*z) + pviewy;
|
||||
ds_colormap = tiltlighting[i];
|
||||
fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]];
|
||||
iz += plane_sz[0];
|
||||
uz += plane_su[0];
|
||||
vz += plane_sv[0];
|
||||
} while (--width >= 0);
|
||||
#else
|
||||
//#define SPANSIZE 32
|
||||
//#define INVSPAN 0.03125f
|
||||
//#define SPANSIZE 8
|
||||
//#define INVSPAN 0.125f
|
||||
#define SPANSIZE 16
|
||||
#define INVSPAN 0.0625f
|
||||
|
||||
double startz = 1.f/iz;
|
||||
double startu = uz*startz;
|
||||
double startv = vz*startz;
|
||||
double izstep, uzstep, vzstep;
|
||||
|
||||
izstep = plane_sz[0] * SPANSIZE;
|
||||
uzstep = plane_su[0] * SPANSIZE;
|
||||
vzstep = plane_sv[0] * SPANSIZE;
|
||||
x1 = 0;
|
||||
width++;
|
||||
|
||||
while (width >= SPANSIZE)
|
||||
{
|
||||
iz += izstep;
|
||||
uz += uzstep;
|
||||
vz += vzstep;
|
||||
|
||||
double endz = 1.f/iz;
|
||||
double endu = uz*endz;
|
||||
double endv = vz*endz;
|
||||
DWORD stepu = SQWORD((endu - startu) * INVSPAN);
|
||||
DWORD stepv = SQWORD((endv - startv) * INVSPAN);
|
||||
u = SQWORD(startu) + pviewx;
|
||||
v = SQWORD(startv) + pviewy;
|
||||
|
||||
for (i = SPANSIZE-1; i >= 0; i--)
|
||||
{
|
||||
fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]);
|
||||
x1++;
|
||||
u += stepu;
|
||||
v += stepv;
|
||||
}
|
||||
startu = endu;
|
||||
startv = endv;
|
||||
width -= SPANSIZE;
|
||||
}
|
||||
if (width > 0)
|
||||
{
|
||||
if (width == 1)
|
||||
{
|
||||
u = SQWORD(startu);
|
||||
v = SQWORD(startv);
|
||||
fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]);
|
||||
}
|
||||
else
|
||||
{
|
||||
double left = width;
|
||||
iz += plane_sz[0] * left;
|
||||
uz += plane_su[0] * left;
|
||||
vz += plane_sv[0] * left;
|
||||
|
||||
double endz = 1.f/iz;
|
||||
double endu = uz*endz;
|
||||
double endv = vz*endz;
|
||||
left = 1.f/left;
|
||||
DWORD stepu = SQWORD((endu - startu) * left);
|
||||
DWORD stepv = SQWORD((endv - startv) * left);
|
||||
u = SQWORD(startu) + pviewx;
|
||||
v = SQWORD(startv) + pviewy;
|
||||
|
||||
for (; width != 0; width--)
|
||||
{
|
||||
fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]);
|
||||
x1++;
|
||||
u += stepu;
|
||||
v += stepv;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy);
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
|
@ -502,9 +267,9 @@ void R_MapTiltedPlane(int y, int x1)
|
|||
//
|
||||
//==========================================================================
|
||||
|
||||
void R_MapColoredPlane (int y, int x1)
|
||||
void R_MapColoredPlane(int y, int x1)
|
||||
{
|
||||
memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1);
|
||||
R_DrawColoredSpan(y, x1, spanend[y]);
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
|
@ -1179,9 +944,6 @@ static void R_DrawSkyStriped (visplane_t *pl)
|
|||
//
|
||||
//==========================================================================
|
||||
|
||||
CVAR (Bool, tilt, false, 0);
|
||||
//CVAR (Int, pa, 0, 0)
|
||||
|
||||
int R_DrawPlanes ()
|
||||
{
|
||||
visplane_t *pl;
|
||||
|
@ -1317,7 +1079,6 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske
|
|||
// 9. Put the camera back where it was to begin with.
|
||||
//
|
||||
//==========================================================================
|
||||
CVAR (Bool, r_skyboxes, true, 0)
|
||||
static int numskyboxes;
|
||||
|
||||
void R_DrawPortals ()
|
||||
|
@ -1665,13 +1426,6 @@ void R_DrawSkyPlane (visplane_t *pl)
|
|||
|
||||
void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked)
|
||||
{
|
||||
#ifdef X86_ASM
|
||||
if (ds_source != ds_cursource)
|
||||
{
|
||||
R_SetSpanSource_ASM (ds_source);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (alpha <= 0)
|
||||
{
|
||||
return;
|
||||
|
@ -1896,14 +1650,6 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
|
|||
else
|
||||
ds_colormap = basecolormap->Maps, plane_shade = true;
|
||||
|
||||
if (!plane_shade)
|
||||
{
|
||||
for (int i = 0; i < viewwidth; ++i)
|
||||
{
|
||||
tiltlighting[i] = ds_colormap;
|
||||
}
|
||||
}
|
||||
|
||||
// Hack in support for 1 x Z and Z x 1 texture sizes
|
||||
if (ds_ybits == 0)
|
||||
{
|
||||
|
@ -1913,13 +1659,8 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t
|
|||
{
|
||||
plane_su[2] = plane_su[1] = plane_su[0] = 0;
|
||||
}
|
||||
#if defined(X86_ASM)
|
||||
if (ds_source != ds_curtiltedsource)
|
||||
R_SetTiltedSpanSource_ASM (ds_source);
|
||||
R_MapVisPlane (pl, R_DrawTiltedPlane_ASM);
|
||||
#else
|
||||
|
||||
R_MapVisPlane (pl, R_MapTiltedPlane);
|
||||
#endif
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
|
@ -2023,3 +1764,5 @@ bool R_PlaneInitData ()
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -27,6 +27,9 @@
|
|||
|
||||
class ASkyViewpoint;
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
//
|
||||
// The infamous visplane
|
||||
//
|
||||
|
@ -113,4 +116,6 @@ bool R_PlaneInitData (void);
|
|||
extern visplane_t* floorplane;
|
||||
extern visplane_t* ceilingplane;
|
||||
|
||||
}
|
||||
|
||||
#endif // __R_PLANE_H__
|
||||
|
|
|
@ -57,10 +57,13 @@
|
|||
|
||||
|
||||
CVAR(Bool, r_np2, true, 0)
|
||||
CVAR(Bool, r_fogboundary, true, 0)
|
||||
CVAR(Bool, r_drawmirrors, true, 0)
|
||||
EXTERN_CVAR(Bool, r_fullbrightignoresectorcolor);
|
||||
|
||||
//CVAR (Int, ty, 8, 0)
|
||||
//CVAR (Int, tx, 8, 0)
|
||||
namespace swrenderer
|
||||
{
|
||||
using namespace drawerargs;
|
||||
|
||||
#define HEIGHTBITS 12
|
||||
#define HEIGHTSHIFT (FRACBITS-HEIGHTBITS)
|
||||
|
@ -141,16 +144,6 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t
|
|||
static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat);
|
||||
static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, bool mask);
|
||||
|
||||
//=============================================================================
|
||||
//
|
||||
// CVAR r_fogboundary
|
||||
//
|
||||
// If true, makes fog look more "real" by shading the walls separating two
|
||||
// sectors with different fog.
|
||||
//=============================================================================
|
||||
|
||||
CVAR(Bool, r_fogboundary, true, 0)
|
||||
|
||||
inline bool IsFogBoundary (sector_t *front, sector_t *back)
|
||||
{
|
||||
return r_fogboundary && fixedcolormap == NULL && front->ColorMap->Fade &&
|
||||
|
@ -158,14 +151,6 @@ inline bool IsFogBoundary (sector_t *front, sector_t *back)
|
|||
(front->GetTexture(sector_t::ceiling) != skyflatnum || back->GetTexture(sector_t::ceiling) != skyflatnum);
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//
|
||||
// CVAR r_drawmirrors
|
||||
//
|
||||
// Set to false to disable rendering of mirrors
|
||||
//=============================================================================
|
||||
|
||||
CVAR(Bool, r_drawmirrors, true, 0)
|
||||
|
||||
//
|
||||
// R_RenderMaskedSegRange
|
||||
|
@ -2994,3 +2979,5 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper,
|
|||
done:
|
||||
WallC = savecoord;
|
||||
}
|
||||
|
||||
}
|
|
@ -23,6 +23,9 @@
|
|||
#ifndef __R_SEGS_H__
|
||||
#define __R_SEGS_H__
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
struct drawseg_t;
|
||||
|
||||
void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2);
|
||||
|
@ -70,4 +73,6 @@ extern int CurrentPortalUniq;
|
|||
extern bool CurrentPortalInSkybox;
|
||||
extern TArray<PortalDrawseg> WallPortals;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -80,7 +80,7 @@ extern int numgamesubsectors;
|
|||
extern AActor* camera; // [RH] camera instead of viewplayer
|
||||
extern sector_t* viewsector; // [RH] keep track of sector viewing from
|
||||
|
||||
extern angle_t xtoviewangle[MAXWIDTH+1];
|
||||
namespace swrenderer { extern angle_t xtoviewangle[MAXWIDTH+1]; }
|
||||
extern DAngle FieldOfView;
|
||||
|
||||
int R_FindSkin (const char *name, int pclass); // [RH] Find a skin
|
||||
|
|
|
@ -42,13 +42,20 @@
|
|||
#include "r_3dfloors.h"
|
||||
#include "textures/textures.h"
|
||||
#include "r_data/voxels.h"
|
||||
#include "r_thread.h"
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, float trueratio);
|
||||
void R_SetupColormap(player_t *);
|
||||
void R_SetupFreelook();
|
||||
void R_InitRenderer();
|
||||
|
||||
}
|
||||
|
||||
using namespace swrenderer;
|
||||
|
||||
//==========================================================================
|
||||
//
|
||||
// DCanvas :: Init
|
||||
|
@ -154,9 +161,11 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap<PClassActor*, bool> &act
|
|||
|
||||
void FSoftwareRenderer::RenderView(player_t *player)
|
||||
{
|
||||
R_BeginDrawerCommands();
|
||||
R_RenderActorView (player->mo);
|
||||
// [RH] Let cameras draw onto textures that were visible this frame.
|
||||
FCanvasTextureInfo::UpdateAll ();
|
||||
R_EndDrawerCommands();
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
|
|
|
@ -64,6 +64,21 @@
|
|||
#include "r_data/voxels.h"
|
||||
#include "p_local.h"
|
||||
#include "p_maputl.h"
|
||||
#include "r_thread.h"
|
||||
|
||||
EXTERN_CVAR(Bool, st_scale)
|
||||
EXTERN_CVAR(Bool, r_shadercolormaps)
|
||||
EXTERN_CVAR(Int, r_drawfuzz)
|
||||
EXTERN_CVAR(Bool, r_deathcamera);
|
||||
EXTERN_CVAR(Bool, r_drawplayersprites)
|
||||
EXTERN_CVAR(Bool, r_drawvoxels)
|
||||
|
||||
CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
||||
//CVAR(Bool, r_splitsprites, true, CVAR_ARCHIVE)
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
using namespace drawerargs;
|
||||
|
||||
// [RH] A c-buffer. Used for keeping track of offscreen voxel spans.
|
||||
|
||||
|
@ -95,12 +110,6 @@ extern float MaskedScaleY;
|
|||
#define BASEXCENTER (160)
|
||||
#define BASEYCENTER (100)
|
||||
|
||||
EXTERN_CVAR (Bool, st_scale)
|
||||
EXTERN_CVAR(Bool, r_shadercolormaps)
|
||||
EXTERN_CVAR(Int, r_drawfuzz)
|
||||
EXTERN_CVAR(Bool, r_deathcamera);
|
||||
CVAR(Bool, r_fullbrightignoresectorcolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
||||
|
||||
//
|
||||
// Sprite rotation 0 is facing the viewer,
|
||||
// rotation 1 is one angle turn CLOCKWISE around the axis.
|
||||
|
@ -132,9 +141,6 @@ FTexture *WallSpriteTile;
|
|||
short zeroarray[MAXWIDTH];
|
||||
short screenheightarray[MAXWIDTH];
|
||||
|
||||
EXTERN_CVAR (Bool, r_drawplayersprites)
|
||||
EXTERN_CVAR (Bool, r_drawvoxels)
|
||||
|
||||
//
|
||||
// INITIALIZATION FUNCTIONS
|
||||
//
|
||||
|
@ -639,7 +645,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop
|
|||
{
|
||||
return;
|
||||
}
|
||||
if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP)
|
||||
if (colfunc == fuzzcolfunc || colfunc == R_FillColumn)
|
||||
{
|
||||
flags = DVF_OFFSCREEN | DVF_SPANSONLY;
|
||||
}
|
||||
|
@ -1758,8 +1764,6 @@ static int sd_comparex (const void *arg1, const void *arg2)
|
|||
return (*(drawseg_t **)arg2)->x2 - (*(drawseg_t **)arg1)->x2;
|
||||
}
|
||||
|
||||
CVAR (Bool, r_splitsprites, true, CVAR_ARCHIVE)
|
||||
|
||||
// Split up vissprites that intersect drawsegs
|
||||
void R_SplitVisSprites ()
|
||||
{
|
||||
|
@ -2628,7 +2632,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis)
|
|||
}
|
||||
}
|
||||
|
||||
void R_DrawParticle (vissprite_t *vis)
|
||||
void R_DrawParticle_C (vissprite_t *vis)
|
||||
{
|
||||
DWORD *bg2rgb;
|
||||
int spacing;
|
||||
|
@ -2642,6 +2646,8 @@ void R_DrawParticle (vissprite_t *vis)
|
|||
|
||||
R_DrawMaskedSegsBehindParticle (vis);
|
||||
|
||||
DrawerCommandQueue::WaitForWorkers();
|
||||
|
||||
// vis->renderflags holds translucency level (0-255)
|
||||
{
|
||||
fixed_t fglevel, bglevel;
|
||||
|
@ -3237,3 +3243,5 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly)
|
|||
OffscreenBufferWidth = width;
|
||||
OffscreenBufferHeight = height;
|
||||
}
|
||||
|
||||
}
|
|
@ -25,6 +25,12 @@
|
|||
|
||||
#include "r_bsp.h"
|
||||
|
||||
struct particle_t;
|
||||
struct FVoxel;
|
||||
|
||||
namespace swrenderer
|
||||
{
|
||||
|
||||
// A vissprite_t is a thing
|
||||
// that will be drawn during a refresh.
|
||||
// I.e. a sprite object that is partly visible.
|
||||
|
@ -95,9 +101,7 @@ struct vissprite_t
|
|||
vissprite_t() {}
|
||||
};
|
||||
|
||||
struct particle_t;
|
||||
|
||||
void R_DrawParticle (vissprite_t *);
|
||||
void R_DrawParticle_C (vissprite_t *);
|
||||
void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside);
|
||||
|
||||
extern int MaxVisSprites;
|
||||
|
@ -146,5 +150,6 @@ void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle,
|
|||
|
||||
void R_ClipVisSprite (vissprite_t *vis, int xl, int xh);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
297
src/r_thread.cpp
Normal file
297
src/r_thread.cpp
Normal file
|
@ -0,0 +1,297 @@
|
|||
/*
|
||||
** Renderer multithreading framework
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include "templates.h"
|
||||
#include "doomdef.h"
|
||||
#include "i_system.h"
|
||||
#include "w_wad.h"
|
||||
#include "r_local.h"
|
||||
#include "v_video.h"
|
||||
#include "doomstat.h"
|
||||
#include "st_stuff.h"
|
||||
#include "g_game.h"
|
||||
#include "g_level.h"
|
||||
#include "r_thread.h"
|
||||
|
||||
CVAR(Bool, r_multithreaded, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
||||
|
||||
void R_BeginDrawerCommands()
|
||||
{
|
||||
DrawerCommandQueue::Begin();
|
||||
}
|
||||
|
||||
void R_EndDrawerCommands()
|
||||
{
|
||||
DrawerCommandQueue::End();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
DrawerCommandQueue *DrawerCommandQueue::Instance()
|
||||
{
|
||||
static DrawerCommandQueue queue;
|
||||
return &queue;
|
||||
}
|
||||
|
||||
DrawerCommandQueue::DrawerCommandQueue()
|
||||
{
|
||||
}
|
||||
|
||||
DrawerCommandQueue::~DrawerCommandQueue()
|
||||
{
|
||||
StopThreads();
|
||||
}
|
||||
|
||||
void* DrawerCommandQueue::AllocMemory(size_t size)
|
||||
{
|
||||
// Make sure allocations remain 16-byte aligned
|
||||
size = (size + 15) / 16 * 16;
|
||||
|
||||
auto queue = Instance();
|
||||
if (queue->memorypool_pos + size > memorypool_size)
|
||||
return nullptr;
|
||||
|
||||
void *data = queue->memorypool + queue->memorypool_pos;
|
||||
queue->memorypool_pos += size;
|
||||
return data;
|
||||
}
|
||||
|
||||
void DrawerCommandQueue::Begin()
|
||||
{
|
||||
auto queue = Instance();
|
||||
queue->Finish();
|
||||
queue->threaded_render++;
|
||||
}
|
||||
|
||||
void DrawerCommandQueue::End()
|
||||
{
|
||||
auto queue = Instance();
|
||||
queue->Finish();
|
||||
if (queue->threaded_render > 0)
|
||||
queue->threaded_render--;
|
||||
}
|
||||
|
||||
void DrawerCommandQueue::WaitForWorkers()
|
||||
{
|
||||
Instance()->Finish();
|
||||
}
|
||||
|
||||
void DrawerCommandQueue::Finish()
|
||||
{
|
||||
auto queue = Instance();
|
||||
if (queue->commands.empty())
|
||||
return;
|
||||
|
||||
// Give worker threads something to do:
|
||||
|
||||
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
|
||||
queue->active_commands.swap(queue->commands);
|
||||
queue->run_id++;
|
||||
start_lock.unlock();
|
||||
|
||||
queue->StartThreads();
|
||||
queue->start_condition.notify_all();
|
||||
|
||||
// Do one thread ourselves:
|
||||
|
||||
DrawerThread thread;
|
||||
thread.core = 0;
|
||||
thread.num_cores = (int)(queue->threads.size() + 1);
|
||||
|
||||
struct TryCatchData
|
||||
{
|
||||
DrawerCommandQueue *queue;
|
||||
DrawerThread *thread;
|
||||
size_t command_index;
|
||||
} data;
|
||||
|
||||
data.queue = queue;
|
||||
data.thread = &thread;
|
||||
data.command_index = 0;
|
||||
VectoredTryCatch(&data,
|
||||
[](void *data)
|
||||
{
|
||||
TryCatchData *d = (TryCatchData*)data;
|
||||
|
||||
for (int pass = 0; pass < d->queue->num_passes; pass++)
|
||||
{
|
||||
d->thread->pass_start_y = pass * d->queue->rows_in_pass;
|
||||
d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass;
|
||||
if (pass + 1 == d->queue->num_passes)
|
||||
d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT);
|
||||
|
||||
size_t size = d->queue->active_commands.size();
|
||||
for (d->command_index = 0; d->command_index < size; d->command_index++)
|
||||
{
|
||||
auto &command = d->queue->active_commands[d->command_index];
|
||||
command->Execute(d->thread);
|
||||
}
|
||||
}
|
||||
},
|
||||
[](void *data, const char *reason, bool fatal)
|
||||
{
|
||||
TryCatchData *d = (TryCatchData*)data;
|
||||
ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal);
|
||||
});
|
||||
|
||||
// Wait for everyone to finish:
|
||||
|
||||
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
|
||||
queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); });
|
||||
|
||||
if (!queue->thread_error.IsEmpty())
|
||||
{
|
||||
static bool first = true;
|
||||
if (queue->thread_error_fatal)
|
||||
I_FatalError("%s", queue->thread_error.GetChars());
|
||||
else if (first)
|
||||
Printf("%s\n", queue->thread_error.GetChars());
|
||||
first = false;
|
||||
}
|
||||
|
||||
// Clean up batch:
|
||||
|
||||
for (auto &command : queue->active_commands)
|
||||
command->~DrawerCommand();
|
||||
queue->active_commands.clear();
|
||||
queue->memorypool_pos = 0;
|
||||
queue->finished_threads = 0;
|
||||
}
|
||||
|
||||
void DrawerCommandQueue::StartThreads()
|
||||
{
|
||||
if (!threads.empty())
|
||||
return;
|
||||
|
||||
int num_threads = std::thread::hardware_concurrency();
|
||||
if (num_threads == 0)
|
||||
num_threads = 4;
|
||||
|
||||
threads.resize(num_threads - 1);
|
||||
|
||||
for (int i = 0; i < num_threads - 1; i++)
|
||||
{
|
||||
DrawerCommandQueue *queue = this;
|
||||
DrawerThread *thread = &threads[i];
|
||||
thread->core = i + 1;
|
||||
thread->num_cores = num_threads;
|
||||
thread->thread = std::thread([=]()
|
||||
{
|
||||
int run_id = 0;
|
||||
while (true)
|
||||
{
|
||||
// Wait until we are signalled to run:
|
||||
std::unique_lock<std::mutex> start_lock(queue->start_mutex);
|
||||
queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; });
|
||||
if (queue->shutdown_flag)
|
||||
break;
|
||||
run_id = queue->run_id;
|
||||
start_lock.unlock();
|
||||
|
||||
// Do the work:
|
||||
|
||||
struct TryCatchData
|
||||
{
|
||||
DrawerCommandQueue *queue;
|
||||
DrawerThread *thread;
|
||||
size_t command_index;
|
||||
} data;
|
||||
|
||||
data.queue = queue;
|
||||
data.thread = thread;
|
||||
data.command_index = 0;
|
||||
VectoredTryCatch(&data,
|
||||
[](void *data)
|
||||
{
|
||||
TryCatchData *d = (TryCatchData*)data;
|
||||
|
||||
for (int pass = 0; pass < d->queue->num_passes; pass++)
|
||||
{
|
||||
d->thread->pass_start_y = pass * d->queue->rows_in_pass;
|
||||
d->thread->pass_end_y = (pass + 1) * d->queue->rows_in_pass;
|
||||
if (pass + 1 == d->queue->num_passes)
|
||||
d->thread->pass_end_y = MAX(d->thread->pass_end_y, MAXHEIGHT);
|
||||
|
||||
size_t size = d->queue->active_commands.size();
|
||||
for (d->command_index = 0; d->command_index < size; d->command_index++)
|
||||
{
|
||||
auto &command = d->queue->active_commands[d->command_index];
|
||||
command->Execute(d->thread);
|
||||
}
|
||||
}
|
||||
},
|
||||
[](void *data, const char *reason, bool fatal)
|
||||
{
|
||||
TryCatchData *d = (TryCatchData*)data;
|
||||
ReportDrawerError(d->queue->active_commands[d->command_index], true, reason, fatal);
|
||||
});
|
||||
|
||||
// Notify main thread that we finished:
|
||||
std::unique_lock<std::mutex> end_lock(queue->end_mutex);
|
||||
queue->finished_threads++;
|
||||
end_lock.unlock();
|
||||
queue->end_condition.notify_all();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void DrawerCommandQueue::StopThreads()
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(start_mutex);
|
||||
shutdown_flag = true;
|
||||
lock.unlock();
|
||||
start_condition.notify_all();
|
||||
for (auto &thread : threads)
|
||||
thread.thread.join();
|
||||
threads.clear();
|
||||
lock.lock();
|
||||
shutdown_flag = false;
|
||||
}
|
||||
|
||||
void DrawerCommandQueue::ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal)
|
||||
{
|
||||
if (worker_thread)
|
||||
{
|
||||
std::unique_lock<std::mutex> end_lock(Instance()->end_mutex);
|
||||
if (Instance()->thread_error.IsEmpty() || (!Instance()->thread_error_fatal && fatal))
|
||||
{
|
||||
Instance()->thread_error = reason + (FString)": " + command->DebugInfo();
|
||||
Instance()->thread_error_fatal = fatal;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
static bool first = true;
|
||||
if (fatal)
|
||||
I_FatalError("%s: %s", reason, command->DebugInfo().GetChars());
|
||||
else if (first)
|
||||
Printf("%s: %s\n", reason, command->DebugInfo().GetChars());
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
|
||||
void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal))
|
||||
{
|
||||
tryBlock(data);
|
||||
}
|
235
src/r_thread.h
Normal file
235
src/r_thread.h
Normal file
|
@ -0,0 +1,235 @@
|
|||
/*
|
||||
** Renderer multithreading framework
|
||||
** Copyright (c) 2016 Magnus Norddahl
|
||||
**
|
||||
** This software is provided 'as-is', without any express or implied
|
||||
** warranty. In no event will the authors be held liable for any damages
|
||||
** arising from the use of this software.
|
||||
**
|
||||
** Permission is granted to anyone to use this software for any purpose,
|
||||
** including commercial applications, and to alter it and redistribute it
|
||||
** freely, subject to the following restrictions:
|
||||
**
|
||||
** 1. The origin of this software must not be misrepresented; you must not
|
||||
** claim that you wrote the original software. If you use this software
|
||||
** in a product, an acknowledgment in the product documentation would be
|
||||
** appreciated but is not required.
|
||||
** 2. Altered source versions must be plainly marked as such, and must not be
|
||||
** misrepresented as being the original software.
|
||||
** 3. This notice may not be removed or altered from any source distribution.
|
||||
**
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "r_draw.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
|
||||
// Use multiple threads when drawing
|
||||
EXTERN_CVAR(Bool, r_multithreaded)
|
||||
|
||||
// Redirect drawer commands to worker threads
|
||||
void R_BeginDrawerCommands();
|
||||
|
||||
// Wait until all drawers finished executing
|
||||
void R_EndDrawerCommands();
|
||||
|
||||
// Worker data for each thread executing drawer commands
|
||||
class DrawerThread
|
||||
{
|
||||
public:
|
||||
std::thread thread;
|
||||
|
||||
// Thread line index of this thread
|
||||
int core = 0;
|
||||
|
||||
// Number of active threads
|
||||
int num_cores = 1;
|
||||
|
||||
// Range of rows processed this pass
|
||||
int pass_start_y = 0;
|
||||
int pass_end_y = MAXHEIGHT;
|
||||
|
||||
// Working buffer used by Rt drawers
|
||||
uint8_t dc_temp_buff[MAXHEIGHT * 4];
|
||||
uint8_t *dc_temp = nullptr;
|
||||
|
||||
// Working buffer used by Rt drawers, true color edition
|
||||
uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4];
|
||||
uint32_t *dc_temp_rgba = nullptr;
|
||||
|
||||
// Working buffer used by the tilted (sloped) span drawer
|
||||
const uint8_t *tiltlighting[MAXWIDTH];
|
||||
|
||||
// Checks if a line is rendered by this thread
|
||||
bool line_skipped_by_thread(int line)
|
||||
{
|
||||
return line < pass_start_y || line >= pass_end_y || line % num_cores != core;
|
||||
}
|
||||
|
||||
// The number of lines to skip to reach the first line to be rendered by this thread
|
||||
int skipped_by_thread(int first_line)
|
||||
{
|
||||
int pass_skip = MAX(pass_start_y - first_line, 0);
|
||||
int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores;
|
||||
return pass_skip + core_skip;
|
||||
}
|
||||
|
||||
// The number of lines to be rendered by this thread
|
||||
int count_for_thread(int first_line, int count)
|
||||
{
|
||||
int lines_until_pass_end = MAX(pass_end_y - first_line, 0);
|
||||
count = MIN(count, lines_until_pass_end);
|
||||
int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores;
|
||||
return MAX(c, 0);
|
||||
}
|
||||
|
||||
// Calculate the dest address for the first line to be rendered by this thread
|
||||
template<typename T>
|
||||
T *dest_for_thread(int first_line, int pitch, T *dest)
|
||||
{
|
||||
return dest + skipped_by_thread(first_line) * pitch;
|
||||
}
|
||||
|
||||
// The first line in the dc_temp buffer used this thread
|
||||
int temp_line_for_thread(int first_line)
|
||||
{
|
||||
return (first_line + skipped_by_thread(first_line)) / num_cores;
|
||||
}
|
||||
};
|
||||
|
||||
// Task to be executed by each worker thread
|
||||
class DrawerCommand
|
||||
{
|
||||
protected:
|
||||
int _dest_y;
|
||||
|
||||
void DetectRangeError(uint32_t *&dest, int &dest_y, int &count)
|
||||
{
|
||||
#if defined(_MSC_VER) && defined(_DEBUG)
|
||||
if (dest_y < 0 || count < 0 || dest_y + count > swrenderer::drawerargs::dc_destheight)
|
||||
__debugbreak(); // Buffer overrun detected!
|
||||
#endif
|
||||
|
||||
if (dest_y < 0)
|
||||
{
|
||||
count += dest_y;
|
||||
dest_y = 0;
|
||||
dest = (uint32_t*)swrenderer::drawerargs::dc_destorg;
|
||||
}
|
||||
else if (dest_y >= swrenderer::drawerargs::dc_destheight)
|
||||
{
|
||||
dest_y = 0;
|
||||
count = 0;
|
||||
}
|
||||
|
||||
if (count < 0 || count > MAXHEIGHT) count = 0;
|
||||
if (dest_y + count >= swrenderer::drawerargs::dc_destheight)
|
||||
count = swrenderer::drawerargs::dc_destheight - dest_y;
|
||||
}
|
||||
|
||||
public:
|
||||
DrawerCommand()
|
||||
{
|
||||
_dest_y = static_cast<int>((swrenderer::drawerargs::dc_dest - swrenderer::drawerargs::dc_destorg) / (swrenderer::drawerargs::dc_pitch));
|
||||
}
|
||||
|
||||
virtual ~DrawerCommand() { }
|
||||
|
||||
virtual void Execute(DrawerThread *thread) = 0;
|
||||
virtual FString DebugInfo() = 0;
|
||||
};
|
||||
|
||||
void VectoredTryCatch(void *data, void(*tryBlock)(void *data), void(*catchBlock)(void *data, const char *reason, bool fatal));
|
||||
|
||||
// Manages queueing up commands and executing them on worker threads
|
||||
class DrawerCommandQueue
|
||||
{
|
||||
enum { memorypool_size = 16 * 1024 * 1024 };
|
||||
char memorypool[memorypool_size];
|
||||
size_t memorypool_pos = 0;
|
||||
|
||||
std::vector<DrawerCommand *> commands;
|
||||
|
||||
std::vector<DrawerThread> threads;
|
||||
|
||||
std::mutex start_mutex;
|
||||
std::condition_variable start_condition;
|
||||
std::vector<DrawerCommand *> active_commands;
|
||||
bool shutdown_flag = false;
|
||||
int run_id = 0;
|
||||
|
||||
std::mutex end_mutex;
|
||||
std::condition_variable end_condition;
|
||||
size_t finished_threads = 0;
|
||||
FString thread_error;
|
||||
bool thread_error_fatal = false;
|
||||
|
||||
int threaded_render = 0;
|
||||
DrawerThread single_core_thread;
|
||||
int num_passes = 1;
|
||||
int rows_in_pass = MAXHEIGHT;
|
||||
|
||||
void StartThreads();
|
||||
void StopThreads();
|
||||
void Finish();
|
||||
|
||||
static DrawerCommandQueue *Instance();
|
||||
static void ReportDrawerError(DrawerCommand *command, bool worker_thread, const char *reason, bool fatal);
|
||||
|
||||
DrawerCommandQueue();
|
||||
~DrawerCommandQueue();
|
||||
|
||||
public:
|
||||
// Allocate memory valid for the duration of a command execution
|
||||
static void* AllocMemory(size_t size);
|
||||
|
||||
// Queue command to be executed by drawer worker threads
|
||||
template<typename T, typename... Types>
|
||||
static void QueueCommand(Types &&... args)
|
||||
{
|
||||
auto queue = Instance();
|
||||
if (queue->threaded_render == 0 || !r_multithreaded)
|
||||
{
|
||||
T command(std::forward<Types>(args)...);
|
||||
VectoredTryCatch(&command,
|
||||
[](void *data)
|
||||
{
|
||||
T *c = (T*)data;
|
||||
c->Execute(&Instance()->single_core_thread);
|
||||
},
|
||||
[](void *data, const char *reason, bool fatal)
|
||||
{
|
||||
T *c = (T*)data;
|
||||
ReportDrawerError(c, false, reason, fatal);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
void *ptr = AllocMemory(sizeof(T));
|
||||
if (!ptr) // Out of memory - render what we got
|
||||
{
|
||||
queue->Finish();
|
||||
ptr = AllocMemory(sizeof(T));
|
||||
if (!ptr)
|
||||
return;
|
||||
}
|
||||
T *command = new (ptr)T(std::forward<Types>(args)...);
|
||||
queue->commands.push_back(command);
|
||||
}
|
||||
}
|
||||
|
||||
// Redirects all drawing commands to worker threads until End is called
|
||||
// Begin/End blocks can be nested.
|
||||
static void Begin();
|
||||
|
||||
// End redirection and wait until all worker threads finished executing
|
||||
static void End();
|
||||
|
||||
// Waits until all worker threads finished executing
|
||||
static void WaitForWorkers();
|
||||
};
|
|
@ -132,6 +132,9 @@ void DCanvas::DrawTexture (FTexture *img, double x, double y, int tags_first, ..
|
|||
void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
|
||||
{
|
||||
#ifndef NO_SWRENDER
|
||||
using namespace swrenderer;
|
||||
using namespace drawerargs;
|
||||
|
||||
FTexture::Span unmaskedSpan[2];
|
||||
const FTexture::Span **spanptr, *spans;
|
||||
static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH];
|
||||
|
@ -1285,6 +1288,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
|
|||
FDynamicColormap *colormap, int lightlevel, int bottomclip)
|
||||
{
|
||||
#ifndef NO_SWRENDER
|
||||
using namespace swrenderer;
|
||||
using namespace drawerargs;
|
||||
|
||||
// Use an equation similar to player sprites to determine shade
|
||||
fixed_t shade = LIGHT2SHADE(lightlevel) - 12*FRACUNIT;
|
||||
float topy, boty, leftx, rightx;
|
||||
|
@ -1352,7 +1358,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints,
|
|||
// Setup constant texture mapping parameters.
|
||||
R_SetupSpanBits(tex);
|
||||
R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap);
|
||||
R_SetSpanSource(tex->GetPixels());
|
||||
R_SetSpanSource(tex);
|
||||
if (ds_xbits != 0)
|
||||
{
|
||||
scalex = double(1u << (32 - ds_xbits)) / scalex;
|
||||
|
|
|
@ -106,20 +106,11 @@ CCMD (bumpgamma)
|
|||
/* Palette management stuff */
|
||||
/****************************/
|
||||
|
||||
extern "C" BYTE BestColor_MMX (DWORD rgb, const DWORD *pal);
|
||||
|
||||
int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num)
|
||||
{
|
||||
#ifdef X86_ASM
|
||||
if (CPU.bMMX)
|
||||
{
|
||||
int pre = 256 - num - first;
|
||||
return BestColor_MMX (((first+pre)<<24)|(r<<16)|(g<<8)|b, pal_in-pre) - pre;
|
||||
}
|
||||
#endif
|
||||
const PalEntry *pal = (const PalEntry *)pal_in;
|
||||
int bestcolor = first;
|
||||
int bestdist = 257*257+257*257+257*257;
|
||||
int bestdist = 257 * 257 + 257 * 257 + 257 * 257;
|
||||
|
||||
for (int color = first; color < num; color++)
|
||||
{
|
||||
|
@ -384,8 +375,8 @@ void InitPalette ()
|
|||
R_InitColormaps ();
|
||||
}
|
||||
|
||||
extern "C" void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
extern void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
|
||||
void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
{
|
||||
|
@ -395,6 +386,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
|||
{
|
||||
memcpy (to, from, count * sizeof(DWORD));
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (a == 256)
|
||||
{
|
||||
|
@ -405,6 +397,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
|||
{
|
||||
to[i] = t;
|
||||
}
|
||||
return;
|
||||
}
|
||||
#if defined(_M_X64) || defined(_M_IX86) || defined(__i386__) || defined(__amd64__)
|
||||
else if (CPU.bSSE2)
|
||||
|
@ -423,7 +416,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
|||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef X86_ASM
|
||||
#if defined(_M_IX86) || defined(__i386__)
|
||||
else if (CPU.bMMX)
|
||||
{
|
||||
if (count >= 4)
|
||||
|
|
|
@ -516,10 +516,6 @@ void V_RefreshViewBorder ();
|
|||
|
||||
void V_SetBorderNeedRefresh();
|
||||
|
||||
#if defined(X86_ASM) || defined(X64_ASM)
|
||||
extern "C" void ASM_PatchPitch (void);
|
||||
#endif
|
||||
|
||||
int CheckRatio (int width, int height, int *trueratio=NULL);
|
||||
static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); }
|
||||
inline bool IsRatioWidescreen(int ratio) { return (ratio & 3) != 0; }
|
||||
|
|
|
@ -1375,17 +1375,16 @@ void D3DFB::Draw3DPart(bool copy3d)
|
|||
D3DCOLOR color0, color1;
|
||||
if (Accel2D)
|
||||
{
|
||||
if (realfixedcolormap == NULL)
|
||||
auto &map = swrenderer::realfixedcolormap;
|
||||
if (map == NULL)
|
||||
{
|
||||
color0 = 0;
|
||||
color1 = 0xFFFFFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
color0 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeStart[0]/2,
|
||||
realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0);
|
||||
color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2,
|
||||
realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1);
|
||||
color0 = D3DCOLOR_COLORVALUE(map->ColorizeStart[0] / 2, map->ColorizeStart[1] / 2, map->ColorizeStart[2] / 2, 0);
|
||||
color1 = D3DCOLOR_COLORVALUE(map->ColorizeEnd[0] / 2, map->ColorizeEnd[1] / 2, map->ColorizeEnd[2] / 2, 1);
|
||||
SetPixelShader(Shaders[SHADER_SpecialColormapPal]);
|
||||
}
|
||||
}
|
||||
|
|
23
src/x86.cpp
23
src/x86.cpp
|
@ -227,10 +227,9 @@ void DumpCPUInfo(const CPUInfo *cpu)
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Compiler output for this function is crap compared to the assembly
|
||||
// version, which is why it isn't used.
|
||||
void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
#if !defined(__amd64__) && !defined(_M_X64)
|
||||
|
||||
void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
{
|
||||
__m64 blendcolor;
|
||||
__m64 blendalpha;
|
||||
|
@ -272,9 +271,6 @@ void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef X86_ASM
|
||||
extern "C" void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
#endif
|
||||
|
||||
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
{
|
||||
|
@ -288,17 +284,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
|
||||
unaligned = ((size_t)from | (size_t)to) & 0xF;
|
||||
|
||||
#ifdef X86_ASM
|
||||
// For unaligned accesses, the assembly MMX version is slightly faster.
|
||||
// Note that using unaligned SSE loads and stores is still faster than
|
||||
// the compiler-generated MMX version.
|
||||
if (unaligned)
|
||||
{
|
||||
DoBlending_MMX(from, to, count, r, g, b, a);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__amd64__) || defined(_M_X64)
|
||||
long long color;
|
||||
|
||||
|
@ -326,7 +311,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
|
||||
zero = _mm_setzero_si128();
|
||||
|
||||
#ifndef X86_ASM
|
||||
if (unaligned)
|
||||
{
|
||||
for (count >>= 2; count > 0; --count)
|
||||
|
@ -346,7 +330,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (count >>= 2; count > 0; --count)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue