mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-12-01 00:12:27 +00:00
Merge remote-tracking branch 'zdoom/master' into qzdoom
# Conflicts: # src/CMakeLists.txt # src/doomtype.h # src/r_drawt.cpp
This commit is contained in:
commit
9ed02a6aec
17 changed files with 76 additions and 4681 deletions
|
@ -15,12 +15,6 @@ include( CheckLibraryExists )
|
||||||
include( FindPkgConfig )
|
include( FindPkgConfig )
|
||||||
include( FindOpenGL )
|
include( FindOpenGL )
|
||||||
|
|
||||||
if( NOT APPLE )
|
|
||||||
option( NO_ASM "Disable assembly code" OFF )
|
|
||||||
else()
|
|
||||||
# At the moment asm code doesn't work with OS X, so disable by default
|
|
||||||
option( NO_ASM "Disable assembly code" ON )
|
|
||||||
endif()
|
|
||||||
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||||
option( NO_STRIP "Do not strip Release or MinSizeRel builds" )
|
option( NO_STRIP "Do not strip Release or MinSizeRel builds" )
|
||||||
# At least some versions of Xcode fail if you strip with the linker
|
# At least some versions of Xcode fail if you strip with the linker
|
||||||
|
@ -115,7 +109,6 @@ if( WIN32 )
|
||||||
)
|
)
|
||||||
set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc )
|
set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc )
|
||||||
set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib )
|
set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib )
|
||||||
set( NASM_NAMES nasmw nasm )
|
|
||||||
|
|
||||||
find_path( D3D_INCLUDE_DIR d3d9.h
|
find_path( D3D_INCLUDE_DIR d3d9.h
|
||||||
PATHS ENV DXSDK_DIR
|
PATHS ENV DXSDK_DIR
|
||||||
|
@ -240,7 +233,6 @@ else()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
set( NASM_NAMES nasm )
|
|
||||||
|
|
||||||
if( NO_GTK )
|
if( NO_GTK )
|
||||||
add_definitions( -DNO_GTK )
|
add_definitions( -DNO_GTK )
|
||||||
|
@ -388,105 +380,6 @@ endif()
|
||||||
|
|
||||||
find_package( FluidSynth )
|
find_package( FluidSynth )
|
||||||
|
|
||||||
# Search for NASM
|
|
||||||
|
|
||||||
if( NOT NO_ASM )
|
|
||||||
if( UNIX AND X64 )
|
|
||||||
find_program( GAS_PATH as )
|
|
||||||
|
|
||||||
if( GAS_PATH )
|
|
||||||
set( ASSEMBLER ${GAS_PATH} )
|
|
||||||
else()
|
|
||||||
message( STATUS "Could not find as. Disabling assembly code." )
|
|
||||||
set( NO_ASM ON )
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
find_program( NASM_PATH NAMES ${NASM_NAMES} )
|
|
||||||
find_program( YASM_PATH yasm )
|
|
||||||
|
|
||||||
if( X64 )
|
|
||||||
if( YASM_PATH )
|
|
||||||
set( ASSEMBLER ${YASM_PATH} )
|
|
||||||
else()
|
|
||||||
message( STATUS "Could not find YASM. Disabling assembly code." )
|
|
||||||
set( NO_ASM ON )
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
if( NASM_PATH )
|
|
||||||
set( ASSEMBLER ${NASM_PATH} )
|
|
||||||
else()
|
|
||||||
message( STATUS "Could not find NASM. Disabling assembly code." )
|
|
||||||
set( NO_ASM ON )
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# I think the only reason there was a version requirement was because the
|
|
||||||
# executable name for Windows changed from 0.x to 2.0, right? This is
|
|
||||||
# how to do it in case I need to do something similar later.
|
|
||||||
|
|
||||||
# execute_process( COMMAND ${NASM_PATH} -v
|
|
||||||
# OUTPUT_VARIABLE NASM_VER_STRING )
|
|
||||||
# string( REGEX REPLACE ".*version ([0-9]+[.][0-9]+).*" "\\1" NASM_VER "${NASM_VER_STRING}" )
|
|
||||||
# if( NOT NASM_VER LESS 2 )
|
|
||||||
# message( SEND_ERROR "NASM version should be 2 or later. (Installed version is ${NASM_VER}.)" )
|
|
||||||
# endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if( NOT NO_ASM )
|
|
||||||
# Valgrind support is meaningless without assembly code.
|
|
||||||
if( VALGRIND )
|
|
||||||
add_definitions( -DVALGRIND_AWARE=1 )
|
|
||||||
# If you're Valgrinding, you probably want to keep symbols around.
|
|
||||||
set( NO_STRIP ON )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Tell CMake how to assemble our files
|
|
||||||
if( UNIX )
|
|
||||||
set( ASM_OUTPUT_EXTENSION .o )
|
|
||||||
if( X64 )
|
|
||||||
set( ASM_FLAGS )
|
|
||||||
set( ASM_SOURCE_EXTENSION .s )
|
|
||||||
else()
|
|
||||||
if( APPLE )
|
|
||||||
set( ASM_FLAGS -fmacho -DM_TARGET_MACHO )
|
|
||||||
else()
|
|
||||||
set( ASM_FLAGS -felf -DM_TARGET_LINUX )
|
|
||||||
endif()
|
|
||||||
set( ASM_FLAGS "${ASM_FLAGS}" -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
|
||||||
set( ASM_SOURCE_EXTENSION .asm )
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
set( ASM_OUTPUT_EXTENSION .obj )
|
|
||||||
set( ASM_SOURCE_EXTENSION .asm )
|
|
||||||
if( X64 )
|
|
||||||
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
|
|
||||||
else()
|
|
||||||
set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
if( WIN32 AND NOT X64 )
|
|
||||||
set( FIXRTEXT fixrtext )
|
|
||||||
else()
|
|
||||||
set( FIXRTEXT "" )
|
|
||||||
endif()
|
|
||||||
message( STATUS "Selected assembler: ${ASSEMBLER}" )
|
|
||||||
MACRO( ADD_ASM_FILE indir infile )
|
|
||||||
set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" )
|
|
||||||
if( WIN32 AND NOT X64 )
|
|
||||||
set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" )
|
|
||||||
else()
|
|
||||||
set( FIXRTEXT_${infile} COMMAND "" )
|
|
||||||
endif()
|
|
||||||
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
|
|
||||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
|
|
||||||
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
|
|
||||||
${FIXRTEXT_${infile}}
|
|
||||||
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
|
|
||||||
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
|
|
||||||
ENDMACRO()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Decide on SSE setup
|
# Decide on SSE setup
|
||||||
|
|
||||||
set( SSE_MATTERS NO )
|
set( SSE_MATTERS NO )
|
||||||
|
@ -797,43 +690,6 @@ else()
|
||||||
set( OTHER_SYSTEM_SOURCES ${PLAT_WIN32_SOURCES} ${PLAT_OSX_SOURCES} ${PLAT_COCOA_SOURCES} )
|
set( OTHER_SYSTEM_SOURCES ${PLAT_WIN32_SOURCES} ${PLAT_OSX_SOURCES} ${PLAT_COCOA_SOURCES} )
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if( HAVE_MMX )
|
|
||||||
add_definitions( -DHAVE_MMX=1 )
|
|
||||||
|
|
||||||
set( SYSTEM_SOURCES ${SYSTEM_SOURCES}
|
|
||||||
gl/hqnx_asm/hq2x_asm.cpp
|
|
||||||
gl/hqnx_asm/hq3x_asm.cpp
|
|
||||||
gl/hqnx_asm/hq4x_asm.cpp
|
|
||||||
gl/hqnx_asm/hqnx_asm_Image.cpp)
|
|
||||||
|
|
||||||
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
|
||||||
set_source_files_properties(
|
|
||||||
gl/hqnx_asm/hq2x_asm.cpp
|
|
||||||
gl/hqnx_asm/hq3x_asm.cpp
|
|
||||||
gl/hqnx_asm/hq4x_asm.cpp
|
|
||||||
gl/textures/gl_hqresize.cpp
|
|
||||||
PROPERTIES COMPILE_FLAGS "-mmmx" )
|
|
||||||
endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
|
||||||
endif( HAVE_MMX )
|
|
||||||
|
|
||||||
if( NOT ASM_SOURCES )
|
|
||||||
set( ASM_SOURCES "" )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if( NO_ASM )
|
|
||||||
add_definitions( -DNOASM )
|
|
||||||
else()
|
|
||||||
if( X64 )
|
|
||||||
ADD_ASM_FILE( asm_x86_64 tmap3 )
|
|
||||||
else()
|
|
||||||
ADD_ASM_FILE( asm_ia32 a )
|
|
||||||
ADD_ASM_FILE( asm_ia32 misc )
|
|
||||||
ADD_ASM_FILE( asm_ia32 tmap )
|
|
||||||
ADD_ASM_FILE( asm_ia32 tmap2 )
|
|
||||||
ADD_ASM_FILE( asm_ia32 tmap3 )
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
|
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
|
||||||
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
|
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
|
||||||
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
|
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
|
||||||
|
@ -941,16 +797,6 @@ set( NOT_COMPILED_SOURCE_FILES
|
||||||
scripting/zscript/zcc-parse.lemon
|
scripting/zscript/zcc-parse.lemon
|
||||||
zcc-parse.c
|
zcc-parse.c
|
||||||
zcc-parse.h
|
zcc-parse.h
|
||||||
|
|
||||||
# We could have the ASM macro add these files, but it wouldn't add all
|
|
||||||
# platforms.
|
|
||||||
asm_ia32/a.asm
|
|
||||||
asm_ia32/misc.asm
|
|
||||||
asm_ia32/tmap.asm
|
|
||||||
asm_ia32/tmap2.asm
|
|
||||||
asm_ia32/tmap3.asm
|
|
||||||
asm_x86_64/tmap3.asm
|
|
||||||
asm_x86_64/tmap3.s
|
|
||||||
)
|
)
|
||||||
|
|
||||||
set( FASTMATH_PCH_SOURCES
|
set( FASTMATH_PCH_SOURCES
|
||||||
|
@ -1397,7 +1243,6 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE
|
||||||
${HEADER_FILES}
|
${HEADER_FILES}
|
||||||
${NOT_COMPILED_SOURCE_FILES}
|
${NOT_COMPILED_SOURCE_FILES}
|
||||||
__autostart.cpp
|
__autostart.cpp
|
||||||
${ASM_SOURCES}
|
|
||||||
${SYSTEM_SOURCES}
|
${SYSTEM_SOURCES}
|
||||||
${X86_SOURCES}
|
${X86_SOURCES}
|
||||||
${FASTMATH_SOURCES}
|
${FASTMATH_SOURCES}
|
||||||
|
@ -1568,8 +1413,6 @@ install(TARGETS zdoom
|
||||||
DESTINATION ${INSTALL_PATH}
|
DESTINATION ${INSTALL_PATH}
|
||||||
COMPONENT "Game executable")
|
COMPONENT "Game executable")
|
||||||
|
|
||||||
source_group("Assembly Files\\ia32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_ia32/.+")
|
|
||||||
source_group("Assembly Files\\x86_64" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_x86_64/.+")
|
|
||||||
source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+")
|
source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+")
|
||||||
source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+")
|
source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+")
|
||||||
source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h)
|
source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h)
|
||||||
|
|
|
@ -1,812 +0,0 @@
|
||||||
; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
|
|
||||||
; Ken Silverman's official web site: "http://www.advsys.net/ken"
|
|
||||||
; See the included license file "BUILDLIC.TXT" for license info.
|
|
||||||
; This file has been modified from Ken Silverman's original release
|
|
||||||
|
|
||||||
%include "valgrind.inc"
|
|
||||||
|
|
||||||
SECTION .data
|
|
||||||
|
|
||||||
%ifndef M_TARGET_LINUX
|
|
||||||
%define ylookup _ylookup
|
|
||||||
%define vince _vince
|
|
||||||
%define vplce _vplce
|
|
||||||
%define palookupoffse _palookupoffse
|
|
||||||
%define bufplce _bufplce
|
|
||||||
%define dc_iscale _dc_iscale
|
|
||||||
%define dc_colormap _dc_colormap
|
|
||||||
%define dc_count _dc_count
|
|
||||||
%define dc_dest _dc_dest
|
|
||||||
%define dc_source _dc_source
|
|
||||||
%define dc_texturefrac _dc_texturefrac
|
|
||||||
|
|
||||||
%define setupvlineasm _setupvlineasm
|
|
||||||
%define prevlineasm1 _prevlineasm1
|
|
||||||
%define vlineasm1 _vlineasm1
|
|
||||||
%define vlineasm4 _vlineasm4
|
|
||||||
|
|
||||||
%define setupmvlineasm _setupmvlineasm
|
|
||||||
%define mvlineasm1 _mvlineasm1
|
|
||||||
%define mvlineasm4 _mvlineasm4
|
|
||||||
|
|
||||||
%define R_SetupDrawSlabA _R_SetupDrawSlabA
|
|
||||||
%define R_DrawSlabA _R_DrawSlabA
|
|
||||||
%endif
|
|
||||||
|
|
||||||
EXTERN ylookup ; near
|
|
||||||
|
|
||||||
EXTERN vplce ; near
|
|
||||||
EXTERN vince ; near
|
|
||||||
EXTERN palookupoffse ; near
|
|
||||||
EXTERN bufplce ; near
|
|
||||||
|
|
||||||
EXTERN dc_iscale
|
|
||||||
EXTERN dc_colormap
|
|
||||||
EXTERN dc_count
|
|
||||||
EXTERN dc_dest
|
|
||||||
EXTERN dc_source
|
|
||||||
EXTERN dc_texturefrac
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL setvlinebpl_
|
|
||||||
setvlinebpl_:
|
|
||||||
mov [fixchain1a+2], eax
|
|
||||||
mov [fixchain1b+2], eax
|
|
||||||
mov [fixchain2a+2], eax
|
|
||||||
mov [fixchain1m+2], eax
|
|
||||||
mov [fixchain2ma+2], eax
|
|
||||||
mov [fixchain2mb+2], eax
|
|
||||||
selfmod fixchain1a, fixchain2mb+6
|
|
||||||
|
|
||||||
setdrawslabbpl:
|
|
||||||
mov dword [voxbpl1+2], eax
|
|
||||||
mov dword [voxbpl2+2], eax
|
|
||||||
mov dword [voxbpl3+2], eax
|
|
||||||
mov dword [voxbpl4+2], eax
|
|
||||||
mov dword [voxbpl5+2], eax
|
|
||||||
mov dword [voxbpl6+2], eax
|
|
||||||
mov dword [voxbpl7+2], eax
|
|
||||||
mov dword [voxbpl8+2], eax
|
|
||||||
selfmod voxbpl1, voxpl8+6
|
|
||||||
ret
|
|
||||||
|
|
||||||
SECTION .data
|
|
||||||
|
|
||||||
lastslabcolormap:
|
|
||||||
dd 4
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
GLOBAL R_SetupDrawSlabA
|
|
||||||
GLOBAL @R_SetupDrawSlabA@4
|
|
||||||
R_SetupDrawSlabA:
|
|
||||||
mov ecx, [esp+4]
|
|
||||||
@R_SetupDrawSlabA@4:
|
|
||||||
cmp [lastslabcolormap], ecx
|
|
||||||
je .done
|
|
||||||
mov [lastslabcolormap], ecx
|
|
||||||
mov dword [voxpal1+2], ecx
|
|
||||||
mov dword [voxpal2+2], ecx
|
|
||||||
mov dword [voxpal3+2], ecx
|
|
||||||
mov dword [voxpal4+2], ecx
|
|
||||||
mov dword [voxpal5+2], ecx
|
|
||||||
mov dword [voxpal6+2], ecx
|
|
||||||
mov dword [voxpal7+2], ecx
|
|
||||||
mov dword [voxpal8+2], ecx
|
|
||||||
.done ret
|
|
||||||
|
|
||||||
|
|
||||||
; pass it log2(texheight)
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL setupvlineasm
|
|
||||||
setupvlineasm:
|
|
||||||
mov ecx, [esp+4]
|
|
||||||
|
|
||||||
;First 2 lines for VLINEASM1, rest for VLINEASM4
|
|
||||||
mov byte [premach3a+2], cl
|
|
||||||
mov byte [mach3a+2], cl
|
|
||||||
|
|
||||||
mov byte [machvsh1+2], cl ;32-shy
|
|
||||||
mov byte [machvsh3+2], cl ;32-shy
|
|
||||||
mov byte [machvsh5+2], cl ;32-shy
|
|
||||||
mov byte [machvsh6+2], cl ;32-shy
|
|
||||||
mov ch, cl
|
|
||||||
sub ch, 16
|
|
||||||
mov byte [machvsh8+2], ch ;16-shy
|
|
||||||
neg cl
|
|
||||||
mov byte [machvsh7+2], cl ;shy
|
|
||||||
mov byte [machvsh9+2], cl ;shy
|
|
||||||
mov byte [machvsh10+2], cl ;shy
|
|
||||||
mov byte [machvsh11+2], cl ;shy
|
|
||||||
mov byte [machvsh12+2], cl ;shy
|
|
||||||
mov eax, 1
|
|
||||||
shl eax, cl
|
|
||||||
dec eax
|
|
||||||
mov dword [machvsh2+2], eax ;(1<<shy)-1
|
|
||||||
mov dword [machvsh4+2], eax ;(1<<shy)-1
|
|
||||||
selfmod premach3a, machvsh8+6
|
|
||||||
ret
|
|
||||||
|
|
||||||
%ifdef M_TARGET_MACHO
|
|
||||||
SECTION .text align=64
|
|
||||||
%else
|
|
||||||
SECTION .rtext progbits alloc exec write align=64
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%ifdef M_TARGET_MACHO
|
|
||||||
GLOBAL _rtext_a_start
|
|
||||||
_rtext_a_start:
|
|
||||||
%endif
|
|
||||||
|
|
||||||
;eax = xscale
|
|
||||||
;ebx = palookupoffse
|
|
||||||
;ecx = # pixels to draw-1
|
|
||||||
;edx = texturefrac
|
|
||||||
;esi = texturecolumn
|
|
||||||
;edi = buffer pointer
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL prevlineasm1
|
|
||||||
prevlineasm1:
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
cmp ecx, 1
|
|
||||||
ja vlineasm1
|
|
||||||
|
|
||||||
mov eax, [dc_iscale]
|
|
||||||
mov edx, [dc_texturefrac]
|
|
||||||
add eax, edx
|
|
||||||
mov ecx, [dc_source]
|
|
||||||
premach3a: shr edx, 32
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
mov edi, [dc_colormap]
|
|
||||||
xor ebx, ebx
|
|
||||||
mov bl, byte [ecx+edx]
|
|
||||||
mov ecx, [dc_dest]
|
|
||||||
mov bl, byte [edi+ebx]
|
|
||||||
pop edi
|
|
||||||
mov byte [ecx], bl
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
GLOBAL vlineasm1
|
|
||||||
ALIGN 16
|
|
||||||
vlineasm1:
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
push esi
|
|
||||||
push ebp
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
mov ebp, [dc_colormap]
|
|
||||||
mov edi, [dc_dest]
|
|
||||||
mov eax, [dc_iscale]
|
|
||||||
mov edx, [dc_texturefrac]
|
|
||||||
mov esi, [dc_source]
|
|
||||||
fixchain1a: sub edi, 320
|
|
||||||
nop
|
|
||||||
nop
|
|
||||||
nop
|
|
||||||
beginvline:
|
|
||||||
mov ebx, edx
|
|
||||||
mach3a: shr ebx, 32
|
|
||||||
fixchain1b: add edi, 320
|
|
||||||
mov bl, byte [esi+ebx]
|
|
||||||
add edx, eax
|
|
||||||
dec ecx
|
|
||||||
mov bl, byte [ebp+ebx]
|
|
||||||
mov byte [edi], bl
|
|
||||||
jnz short beginvline
|
|
||||||
pop ebp
|
|
||||||
pop esi
|
|
||||||
pop edi
|
|
||||||
pop ebx
|
|
||||||
mov eax, edx
|
|
||||||
ret
|
|
||||||
|
|
||||||
;eax: -------temp1-------
|
|
||||||
;ebx: -------temp2-------
|
|
||||||
;ecx: dat dat dat dat
|
|
||||||
;edx: ylo2 ylo4
|
|
||||||
;esi: yhi1 yhi2
|
|
||||||
;edi: ---videoplc/cnt----
|
|
||||||
;ebp: yhi3 yhi4
|
|
||||||
;esp:
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL vlineasm4
|
|
||||||
vlineasm4:
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
mov edi, [dc_dest]
|
|
||||||
|
|
||||||
mov eax, dword [ylookup+ecx*4-4]
|
|
||||||
add eax, edi
|
|
||||||
mov dword [machvline4end+2], eax
|
|
||||||
sub edi, eax
|
|
||||||
|
|
||||||
mov eax, dword [bufplce+0]
|
|
||||||
mov ebx, dword [bufplce+4]
|
|
||||||
mov ecx, dword [bufplce+8]
|
|
||||||
mov edx, dword [bufplce+12]
|
|
||||||
mov dword [machvbuf1+2], ecx
|
|
||||||
mov dword [machvbuf2+2], edx
|
|
||||||
mov dword [machvbuf3+2], eax
|
|
||||||
mov dword [machvbuf4+2], ebx
|
|
||||||
|
|
||||||
mov eax, dword [palookupoffse+0]
|
|
||||||
mov ebx, dword [palookupoffse+4]
|
|
||||||
mov ecx, dword [palookupoffse+8]
|
|
||||||
mov edx, dword [palookupoffse+12]
|
|
||||||
mov dword [machvpal1+2], ecx
|
|
||||||
mov dword [machvpal2+2], edx
|
|
||||||
mov dword [machvpal3+2], eax
|
|
||||||
mov dword [machvpal4+2], ebx
|
|
||||||
|
|
||||||
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
|
||||||
;edx: ³v3lo ³v1lo ³
|
|
||||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
|
|
||||||
;esi: ³v2hi v2lo ³ v3hi³
|
|
||||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
|
|
||||||
;ebp: ³v0hi v0lo ³ v1hi³
|
|
||||||
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
|
|
||||||
|
|
||||||
mov ebp, dword [vince+0]
|
|
||||||
mov ebx, dword [vince+4]
|
|
||||||
mov esi, dword [vince+8]
|
|
||||||
mov eax, dword [vince+12]
|
|
||||||
and esi, 0fffffe00h
|
|
||||||
and ebp, 0fffffe00h
|
|
||||||
machvsh9: rol eax, 88h ;sh
|
|
||||||
machvsh10: rol ebx, 88h ;sh
|
|
||||||
mov edx, eax
|
|
||||||
mov ecx, ebx
|
|
||||||
shr ecx, 16
|
|
||||||
and edx, 0ffff0000h
|
|
||||||
add edx, ecx
|
|
||||||
and eax, 000001ffh
|
|
||||||
and ebx, 000001ffh
|
|
||||||
add esi, eax
|
|
||||||
add ebp, ebx
|
|
||||||
;
|
|
||||||
mov eax, edx
|
|
||||||
and eax, 0ffff0000h
|
|
||||||
mov dword [machvinc1+2], eax
|
|
||||||
mov dword [machvinc2+2], esi
|
|
||||||
mov byte [machvinc3+2], dl
|
|
||||||
mov byte [machvinc4+2], dh
|
|
||||||
mov dword [machvinc5+2], ebp
|
|
||||||
|
|
||||||
mov ebp, dword [vplce+0]
|
|
||||||
mov ebx, dword [vplce+4]
|
|
||||||
mov esi, dword [vplce+8]
|
|
||||||
mov eax, dword [vplce+12]
|
|
||||||
and esi, 0fffffe00h
|
|
||||||
and ebp, 0fffffe00h
|
|
||||||
machvsh11: rol eax, 88h ;sh
|
|
||||||
machvsh12: rol ebx, 88h ;sh
|
|
||||||
mov edx, eax
|
|
||||||
mov ecx, ebx
|
|
||||||
shr ecx, 16
|
|
||||||
and edx, 0ffff0000h
|
|
||||||
add edx, ecx
|
|
||||||
and eax, 000001ffh
|
|
||||||
and ebx, 000001ffh
|
|
||||||
add esi, eax
|
|
||||||
add ebp, ebx
|
|
||||||
|
|
||||||
mov ecx, esi
|
|
||||||
selfmod beginvlineasm4, machvline4end+6
|
|
||||||
jmp short beginvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
beginvlineasm4:
|
|
||||||
machvsh1: shr ecx, 88h ;32-sh
|
|
||||||
mov ebx, esi
|
|
||||||
machvsh2: and ebx, 00000088h ;(1<<sh)-1
|
|
||||||
machvinc1: add edx, 88880000h
|
|
||||||
machvinc2: adc esi, 88888088h
|
|
||||||
machvbuf1: mov cl, byte [ecx+88888888h]
|
|
||||||
machvbuf2: mov bl, byte [ebx+88888888h]
|
|
||||||
mov eax, ebp
|
|
||||||
machvsh3: shr eax, 88h ;32-sh
|
|
||||||
machvpal1: mov cl, byte [ecx+88888888h]
|
|
||||||
machvpal2: mov ch, byte [ebx+88888888h]
|
|
||||||
mov ebx, ebp
|
|
||||||
shl ecx, 16
|
|
||||||
machvsh4: and ebx, 00000088h ;(1<<sh)-1
|
|
||||||
machvinc3: add dl, 88h
|
|
||||||
machvbuf3: mov al, byte [eax+88888888h]
|
|
||||||
machvinc4: adc dh, 88h
|
|
||||||
machvbuf4: mov bl, byte [ebx+88888888h]
|
|
||||||
machvinc5: adc ebp, 88888088h
|
|
||||||
machvpal3: mov cl, byte [eax+88888888h]
|
|
||||||
machvpal4: mov ch, byte [ebx+88888888h]
|
|
||||||
machvline4end: mov dword [edi+88888888h], ecx
|
|
||||||
fixchain2a: add edi, 88888888h
|
|
||||||
mov ecx, esi
|
|
||||||
jle short beginvlineasm4
|
|
||||||
|
|
||||||
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
|
||||||
;edx: ³v3lo ³v1lo ³
|
|
||||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
|
|
||||||
;esi: ³v2hi v2lo ³ v3hi³
|
|
||||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
|
|
||||||
;ebp: ³v0hi v0lo ³ v1hi³
|
|
||||||
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
|
|
||||||
|
|
||||||
mov dword [vplce+8], esi
|
|
||||||
mov dword [vplce+0], ebp
|
|
||||||
;vplc2 = (esi<<(32-sh))+(edx>>sh)
|
|
||||||
;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh))
|
|
||||||
machvsh5: shl esi, 88h ;32-sh
|
|
||||||
mov eax, edx
|
|
||||||
machvsh6: shl ebp, 88h ;32-sh
|
|
||||||
and edx, 0000ffffh
|
|
||||||
machvsh7: shr eax, 88h ;sh
|
|
||||||
add esi, eax
|
|
||||||
machvsh8: shl edx, 88h ;16-sh
|
|
||||||
add ebp, edx
|
|
||||||
mov dword [vplce+12], esi
|
|
||||||
mov dword [vplce+4], ebp
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
;*************************************************************************
|
|
||||||
;************************* Masked Vertical Lines *************************
|
|
||||||
;*************************************************************************
|
|
||||||
|
|
||||||
; pass it log2(texheight)
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL setupmvlineasm
|
|
||||||
setupmvlineasm:
|
|
||||||
mov ecx, dword [esp+4]
|
|
||||||
mov byte [maskmach3a+2], cl
|
|
||||||
mov byte [machmv13+2], cl
|
|
||||||
|
|
||||||
mov byte [machmv14+2], cl
|
|
||||||
mov byte [machmv15+2], cl
|
|
||||||
mov byte [machmv16+2], cl
|
|
||||||
selfmod maskmach3a, machmv13+6
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL mvlineasm1 ;Masked vline
|
|
||||||
mvlineasm1:
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
push esi
|
|
||||||
push ebp
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
mov ebp, [dc_colormap]
|
|
||||||
mov edi, [dc_dest]
|
|
||||||
mov eax, [dc_iscale]
|
|
||||||
mov edx, [dc_texturefrac]
|
|
||||||
mov esi, [dc_source]
|
|
||||||
beginmvline:
|
|
||||||
mov ebx, edx
|
|
||||||
maskmach3a: shr ebx, 32
|
|
||||||
movzx ebx, byte [esi+ebx]
|
|
||||||
cmp ebx, 0
|
|
||||||
je short skipmask1
|
|
||||||
maskmach3c: mov bl, byte [ebp+ebx]
|
|
||||||
mov [edi], bl
|
|
||||||
skipmask1: add edx, eax
|
|
||||||
fixchain1m: add edi, 320
|
|
||||||
dec ecx
|
|
||||||
jnz short beginmvline
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop esi
|
|
||||||
pop edi
|
|
||||||
pop ebx
|
|
||||||
mov eax, edx
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL mvlineasm4
|
|
||||||
mvlineasm4:
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
mov ecx,[dc_count]
|
|
||||||
mov edi,[dc_dest]
|
|
||||||
|
|
||||||
mov eax, [bufplce+0]
|
|
||||||
mov ebx, [bufplce+4]
|
|
||||||
mov [machmv1+3], eax
|
|
||||||
mov [machmv4+3], ebx
|
|
||||||
mov eax, [bufplce+8]
|
|
||||||
mov ebx, [bufplce+12]
|
|
||||||
mov [machmv7+3], eax
|
|
||||||
mov [machmv10+3], ebx
|
|
||||||
|
|
||||||
mov eax, [palookupoffse]
|
|
||||||
mov ebx, [palookupoffse+4]
|
|
||||||
mov [machmv2+2], eax
|
|
||||||
mov [machmv5+2], ebx
|
|
||||||
mov eax, [palookupoffse+8]
|
|
||||||
mov ebx, [palookupoffse+12]
|
|
||||||
mov [machmv8+2], eax
|
|
||||||
mov [machmv11+2], ebx
|
|
||||||
|
|
||||||
mov eax, [vince] ;vince
|
|
||||||
mov ebx, [vince+4]
|
|
||||||
xor bl, bl
|
|
||||||
mov [machmv3+2], eax
|
|
||||||
mov [machmv6+2], ebx
|
|
||||||
mov eax, [vince+8]
|
|
||||||
mov ebx, [vince+12]
|
|
||||||
mov [machmv9+2], eax
|
|
||||||
mov [machmv12+2], ebx
|
|
||||||
|
|
||||||
inc ecx
|
|
||||||
push ecx
|
|
||||||
mov ecx, [vplce+0]
|
|
||||||
mov edx, [vplce+4]
|
|
||||||
mov esi, [vplce+8]
|
|
||||||
mov ebp, [vplce+12]
|
|
||||||
fixchain2ma: sub edi, 320
|
|
||||||
|
|
||||||
selfmod beginmvlineasm4, machmv2+6
|
|
||||||
jmp short beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
beginmvlineasm4:
|
|
||||||
dec dword [esp]
|
|
||||||
jz near endmvlineasm4
|
|
||||||
|
|
||||||
mov eax, ebp
|
|
||||||
mov ebx, esi
|
|
||||||
machmv16: shr eax, 32
|
|
||||||
machmv12: add ebp, 0x88888888 ;vince[3]
|
|
||||||
machmv15: shr ebx, 32
|
|
||||||
machmv9: add esi, 0x88888888 ;vince[2]
|
|
||||||
machmv10: movzx eax, byte [eax+0x88888888];bufplce[3]
|
|
||||||
machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2]
|
|
||||||
cmp eax, 1
|
|
||||||
adc dl, dl
|
|
||||||
cmp ebx, 1
|
|
||||||
adc dl, dl
|
|
||||||
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
|
|
||||||
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
|
|
||||||
|
|
||||||
mov eax, edx
|
|
||||||
machmv6: add edx, 0x88888888 ;vince[1]
|
|
||||||
machmv14: shr eax, 32
|
|
||||||
shl ebx, 16
|
|
||||||
machmv4: movzx eax, byte [eax+0x88888888];bufplce[1]
|
|
||||||
cmp eax, 1
|
|
||||||
adc dl, dl
|
|
||||||
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
|
|
||||||
|
|
||||||
mov eax, ecx
|
|
||||||
machmv3: add ecx, 0x88888888 ;vince[0]
|
|
||||||
machmv13: shr eax, 32
|
|
||||||
machmv1: movzx eax, byte [eax+0x88888888];bufplce[0]
|
|
||||||
cmp eax, 1
|
|
||||||
adc dl, dl
|
|
||||||
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
|
|
||||||
|
|
||||||
xor eax, eax
|
|
||||||
shl dl, 4
|
|
||||||
fixchain2mb: add edi, 320
|
|
||||||
mov al, dl
|
|
||||||
add eax, mvcase15
|
|
||||||
jmp eax ;16 byte cases
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
endmvlineasm4:
|
|
||||||
mov [vplce], ecx
|
|
||||||
mov [vplce+4], edx
|
|
||||||
mov [vplce+8], esi
|
|
||||||
mov [vplce+12], ebp
|
|
||||||
pop ecx
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7
|
|
||||||
ALIGN 16
|
|
||||||
mvcase15: mov [edi], ebx
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase14: mov [edi+1], bh
|
|
||||||
shr ebx, 16
|
|
||||||
mov [edi+2], bx
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase13: mov [edi], bl
|
|
||||||
shr ebx, 16
|
|
||||||
mov [edi+2], bx
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase12: shr ebx, 16
|
|
||||||
mov [edi+2], bx
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase11: mov [edi], bx
|
|
||||||
shr ebx, 16
|
|
||||||
mov [edi+3], bh
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase10: mov [edi+1], bh
|
|
||||||
shr ebx, 16
|
|
||||||
mov [edi+3], bh
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase9: mov [edi], bl
|
|
||||||
shr ebx, 16
|
|
||||||
mov [edi+3], bh
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase8: shr ebx, 16
|
|
||||||
mov [edi+3], bh
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase7: mov [edi], bx
|
|
||||||
shr ebx, 16
|
|
||||||
mov [edi+2], bl
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase6: shr ebx, 8
|
|
||||||
mov [edi+1], bx
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase5: mov [edi], bl
|
|
||||||
shr ebx, 16
|
|
||||||
mov [edi+2], bl
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase4: shr ebx, 16
|
|
||||||
mov [edi+2], bl
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase3: mov [edi], bx
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase2: mov [edi+1], bh
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase1: mov [edi], bl
|
|
||||||
jmp beginmvlineasm4
|
|
||||||
ALIGN 16
|
|
||||||
mvcase0: jmp beginmvlineasm4
|
|
||||||
|
|
||||||
align 16
|
|
||||||
|
|
||||||
|
|
||||||
;*************************************************************************
|
|
||||||
;***************************** Voxel Slabs *******************************
|
|
||||||
;*************************************************************************
|
|
||||||
|
|
||||||
GLOBAL R_DrawSlabA
|
|
||||||
R_DrawSlabA:
|
|
||||||
push ebx
|
|
||||||
push ebp
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov eax, [esp+5*4+0]
|
|
||||||
mov ebx, [esp+5*4+4]
|
|
||||||
mov ecx, [esp+5*4+8]
|
|
||||||
mov edx, [esp+5*4+12]
|
|
||||||
mov esi, [esp+5*4+16]
|
|
||||||
mov edi, [esp+5*4+20]
|
|
||||||
|
|
||||||
cmp eax, 2
|
|
||||||
je voxbegdraw2
|
|
||||||
ja voxskip2
|
|
||||||
xor eax, eax
|
|
||||||
voxbegdraw1:
|
|
||||||
mov ebp, ebx
|
|
||||||
shr ebp, 16
|
|
||||||
add ebx, edx
|
|
||||||
dec ecx
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal1: mov al, byte [eax+88888888h]
|
|
||||||
mov byte [edi], al
|
|
||||||
voxbpl1: lea edi, [edi+88888888h]
|
|
||||||
jnz voxbegdraw1
|
|
||||||
jmp voxskipslab5
|
|
||||||
|
|
||||||
voxbegdraw2:
|
|
||||||
mov ebp, ebx
|
|
||||||
shr ebp, 16
|
|
||||||
add ebx, edx
|
|
||||||
xor eax, eax
|
|
||||||
dec ecx
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal2: mov al, byte [eax+88888888h]
|
|
||||||
mov ah, al
|
|
||||||
mov word [edi], ax
|
|
||||||
voxbpl2: lea edi, [edi+88888888h]
|
|
||||||
jnz voxbegdraw2
|
|
||||||
jmp voxskipslab5
|
|
||||||
|
|
||||||
voxskip2:
|
|
||||||
cmp eax, 4
|
|
||||||
jne voxskip4
|
|
||||||
xor eax, eax
|
|
||||||
voxbegdraw4:
|
|
||||||
mov ebp, ebx
|
|
||||||
add ebx, edx
|
|
||||||
shr ebp, 16
|
|
||||||
xor eax, eax
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal3: mov al, byte [eax+88888888h]
|
|
||||||
mov ah, al
|
|
||||||
shl eax, 8
|
|
||||||
mov al, ah
|
|
||||||
shl eax, 8
|
|
||||||
mov al, ah
|
|
||||||
mov dword [edi], eax
|
|
||||||
voxbpl3: add edi, 88888888h
|
|
||||||
dec ecx
|
|
||||||
jnz voxbegdraw4
|
|
||||||
jmp voxskipslab5
|
|
||||||
|
|
||||||
voxskip4:
|
|
||||||
add eax, edi
|
|
||||||
|
|
||||||
test edi, 1
|
|
||||||
jz voxskipslab1
|
|
||||||
cmp edi, eax
|
|
||||||
je voxskipslab1
|
|
||||||
|
|
||||||
push eax
|
|
||||||
push ebx
|
|
||||||
push ecx
|
|
||||||
push edi
|
|
||||||
voxbegslab1:
|
|
||||||
mov ebp, ebx
|
|
||||||
add ebx, edx
|
|
||||||
shr ebp, 16
|
|
||||||
xor eax, eax
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal4: mov al, byte [eax+88888888h]
|
|
||||||
mov byte [edi], al
|
|
||||||
voxbpl4: add edi, 88888888h
|
|
||||||
dec ecx
|
|
||||||
jnz voxbegslab1
|
|
||||||
pop edi
|
|
||||||
pop ecx
|
|
||||||
pop ebx
|
|
||||||
pop eax
|
|
||||||
inc edi
|
|
||||||
|
|
||||||
voxskipslab1:
|
|
||||||
push eax
|
|
||||||
test edi, 2
|
|
||||||
jz voxskipslab2
|
|
||||||
dec eax
|
|
||||||
cmp edi, eax
|
|
||||||
jge voxskipslab2
|
|
||||||
|
|
||||||
push ebx
|
|
||||||
push ecx
|
|
||||||
push edi
|
|
||||||
voxbegslab2:
|
|
||||||
mov ebp, ebx
|
|
||||||
add ebx, edx
|
|
||||||
shr ebp, 16
|
|
||||||
xor eax, eax
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal5: mov al, byte [eax+88888888h]
|
|
||||||
mov ah, al
|
|
||||||
mov word [edi], ax
|
|
||||||
voxbpl5: add edi, 88888888h
|
|
||||||
dec ecx
|
|
||||||
jnz voxbegslab2
|
|
||||||
pop edi
|
|
||||||
pop ecx
|
|
||||||
pop ebx
|
|
||||||
add edi, 2
|
|
||||||
|
|
||||||
voxskipslab2:
|
|
||||||
mov eax, [esp]
|
|
||||||
|
|
||||||
sub eax, 3
|
|
||||||
cmp edi, eax
|
|
||||||
jge voxskipslab3
|
|
||||||
|
|
||||||
voxprebegslab3:
|
|
||||||
push ebx
|
|
||||||
push ecx
|
|
||||||
push edi
|
|
||||||
voxbegslab3:
|
|
||||||
mov ebp, ebx
|
|
||||||
add ebx, edx
|
|
||||||
shr ebp, 16
|
|
||||||
xor eax, eax
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal6: mov al, byte [eax+88888888h]
|
|
||||||
mov ah, al
|
|
||||||
shl eax, 8
|
|
||||||
mov al, ah
|
|
||||||
shl eax, 8
|
|
||||||
mov al, ah
|
|
||||||
mov dword [edi], eax
|
|
||||||
voxbpl6: add edi, 88888888h
|
|
||||||
dec ecx
|
|
||||||
jnz voxbegslab3
|
|
||||||
pop edi
|
|
||||||
pop ecx
|
|
||||||
pop ebx
|
|
||||||
add edi, 4
|
|
||||||
|
|
||||||
mov eax, [esp]
|
|
||||||
|
|
||||||
sub eax, 3
|
|
||||||
cmp edi, eax
|
|
||||||
jl voxprebegslab3
|
|
||||||
|
|
||||||
voxskipslab3:
|
|
||||||
mov eax, [esp]
|
|
||||||
|
|
||||||
dec eax
|
|
||||||
cmp edi, eax
|
|
||||||
jge voxskipslab4
|
|
||||||
|
|
||||||
push ebx
|
|
||||||
push ecx
|
|
||||||
push edi
|
|
||||||
voxbegslab4:
|
|
||||||
mov ebp, ebx
|
|
||||||
add ebx, edx
|
|
||||||
shr ebp, 16
|
|
||||||
xor eax, eax
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal7: mov al, byte [eax+88888888h]
|
|
||||||
mov ah, al
|
|
||||||
mov word [edi], ax
|
|
||||||
voxbpl7: add edi, 88888888h
|
|
||||||
dec ecx
|
|
||||||
jnz voxbegslab4
|
|
||||||
pop edi
|
|
||||||
pop ecx
|
|
||||||
pop ebx
|
|
||||||
add edi, 2
|
|
||||||
|
|
||||||
voxskipslab4:
|
|
||||||
pop eax
|
|
||||||
|
|
||||||
cmp edi, eax
|
|
||||||
je voxskipslab5
|
|
||||||
|
|
||||||
voxbegslab5:
|
|
||||||
mov ebp, ebx
|
|
||||||
add ebx, edx
|
|
||||||
shr ebp, 16
|
|
||||||
xor eax, eax
|
|
||||||
mov al, byte [esi+ebp]
|
|
||||||
voxpal8: mov al, byte [eax+88888888h]
|
|
||||||
mov byte [edi], al
|
|
||||||
voxbpl8: add edi, 88888888h
|
|
||||||
dec ecx
|
|
||||||
jnz voxbegslab5
|
|
||||||
|
|
||||||
voxskipslab5:
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebp
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
align 16
|
|
||||||
|
|
||||||
%ifdef M_TARGET_MACHO
|
|
||||||
GLOBAL _rtext_a_end
|
|
||||||
_rtext_a_end:
|
|
||||||
%endif
|
|
|
@ -1,200 +0,0 @@
|
||||||
;*
|
|
||||||
;* misc.nas
|
|
||||||
;* Miscellaneous assembly functions
|
|
||||||
;*
|
|
||||||
;*---------------------------------------------------------------------------
|
|
||||||
;* Copyright 1998-2006 Randy Heit
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* 1. Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in the
|
|
||||||
;* documentation and/or other materials provided with the distribution.
|
|
||||||
;* 3. The name of the author may not be used to endorse or promote products
|
|
||||||
;* derived from this software without specific prior written permission.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
||||||
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
||||||
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
||||||
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
||||||
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
||||||
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*---------------------------------------------------------------------------
|
|
||||||
;*
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
%ifndef M_TARGET_LINUX
|
|
||||||
|
|
||||||
%define DoBlending_MMX _DoBlending_MMX
|
|
||||||
%define BestColor_MMX _BestColor_MMX
|
|
||||||
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%ifdef M_TARGET_WATCOM
|
|
||||||
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
|
|
||||||
SEGMENT DATA
|
|
||||||
%else
|
|
||||||
SECTION .data
|
|
||||||
%endif
|
|
||||||
|
|
||||||
Blending256:
|
|
||||||
dd 0x01000100,0x00000100
|
|
||||||
|
|
||||||
%ifdef M_TARGET_WATCOM
|
|
||||||
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
|
|
||||||
SEGMENT CODE
|
|
||||||
%else
|
|
||||||
SECTION .text
|
|
||||||
%endif
|
|
||||||
|
|
||||||
;-----------------------------------------------------------
|
|
||||||
;
|
|
||||||
; DoBlending_MMX
|
|
||||||
;
|
|
||||||
; MMX version of DoBlending
|
|
||||||
;
|
|
||||||
; (DWORD *from, DWORD *to, count, tor, tog, tob, toa)
|
|
||||||
;-----------------------------------------------------------
|
|
||||||
|
|
||||||
GLOBAL DoBlending_MMX
|
|
||||||
|
|
||||||
DoBlending_MMX:
|
|
||||||
pxor mm0,mm0 ; mm0 = 0
|
|
||||||
mov eax,[esp+4*4]
|
|
||||||
shl eax,16
|
|
||||||
mov edx,[esp+4*5]
|
|
||||||
shl edx,8
|
|
||||||
or eax,[esp+4*6]
|
|
||||||
or eax,edx
|
|
||||||
mov ecx,[esp+4*3] ; ecx = count
|
|
||||||
movd mm1,eax ; mm1 = 00000000 00RRGGBB
|
|
||||||
mov eax,[esp+4*7]
|
|
||||||
shl eax,16
|
|
||||||
mov edx,[esp+4*7]
|
|
||||||
shl edx,8
|
|
||||||
or eax,[esp+4*7]
|
|
||||||
or eax,edx
|
|
||||||
mov edx,[esp+4*2] ; edx = dest
|
|
||||||
movd mm6,eax ; mm6 = 00000000 00AAAAAA
|
|
||||||
punpcklbw mm1,mm0 ; mm1 = 000000RR 00GG00BB
|
|
||||||
movq mm7,[Blending256]
|
|
||||||
punpcklbw mm6,mm0 ; mm6 = 000000AA 00AA00AA
|
|
||||||
mov eax,[esp+4*1] ; eax = source
|
|
||||||
pmullw mm1,mm6 ; mm1 = 000000RR 00GG00BB (multiplied by alpha)
|
|
||||||
psubusw mm7,mm6 ; mm7 = 000000aa 00aa00aa (one minus alpha)
|
|
||||||
nop ; Does this actually pair on a Pentium?
|
|
||||||
|
|
||||||
; Do four colors per iteration: Count must be a multiple of four.
|
|
||||||
|
|
||||||
.loop movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
|
|
||||||
add eax,8
|
|
||||||
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
|
|
||||||
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
|
|
||||||
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
|
|
||||||
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
|
|
||||||
add edx,8
|
|
||||||
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
|
|
||||||
sub ecx,2
|
|
||||||
paddusw mm2,mm1
|
|
||||||
psrlw mm2,8
|
|
||||||
paddusw mm3,mm1
|
|
||||||
psrlw mm3,8
|
|
||||||
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
|
|
||||||
movq [edx-8],mm2
|
|
||||||
|
|
||||||
movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
|
|
||||||
add eax,8
|
|
||||||
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
|
|
||||||
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
|
|
||||||
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
|
|
||||||
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
|
|
||||||
add edx,8
|
|
||||||
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
|
|
||||||
sub ecx,2
|
|
||||||
paddusw mm2,mm1
|
|
||||||
psrlw mm2,8
|
|
||||||
paddusw mm3,mm1
|
|
||||||
psrlw mm3,8
|
|
||||||
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
|
|
||||||
movq [edx-8],mm2
|
|
||||||
|
|
||||||
jnz .loop
|
|
||||||
|
|
||||||
emms
|
|
||||||
ret
|
|
||||||
|
|
||||||
;-----------------------------------------------------------
|
|
||||||
;
|
|
||||||
; BestColor_MMX
|
|
||||||
;
|
|
||||||
; Picks the closest matching color from a palette
|
|
||||||
;
|
|
||||||
; Passed FFRRGGBB and palette array in same format
|
|
||||||
; FF is the index of the first palette entry to consider
|
|
||||||
;
|
|
||||||
;-----------------------------------------------------------
|
|
||||||
|
|
||||||
GLOBAL BestColor_MMX
|
|
||||||
GLOBAL @BestColor_MMX@8
|
|
||||||
|
|
||||||
BestColor_MMX:
|
|
||||||
mov ecx,[esp+4]
|
|
||||||
mov edx,[esp+8]
|
|
||||||
@BestColor_MMX@8:
|
|
||||||
pxor mm0,mm0
|
|
||||||
movd mm1,ecx ; mm1 = color searching for
|
|
||||||
mov eax,257*257+257*257+257*257 ;eax = bestdist
|
|
||||||
push ebx
|
|
||||||
punpcklbw mm1,mm0
|
|
||||||
mov ebx,ecx ; ebx = best color
|
|
||||||
shr ecx,24 ; ecx = count
|
|
||||||
and ebx,0xffffff
|
|
||||||
push esi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
.loop movd mm2,[edx+ecx*4] ; mm2 = color considering now
|
|
||||||
inc ecx
|
|
||||||
punpcklbw mm2,mm0
|
|
||||||
movq mm3,mm1
|
|
||||||
psubsw mm3,mm2
|
|
||||||
pmullw mm3,mm3 ; mm3 = color distance squared
|
|
||||||
|
|
||||||
movd ebp,mm3 ; add the three components
|
|
||||||
psrlq mm3,32 ; into ebp to get the real
|
|
||||||
mov esi,ebp ; (squared) distance
|
|
||||||
shr esi,16
|
|
||||||
and ebp,0xffff
|
|
||||||
add ebp,esi
|
|
||||||
movd esi,mm3
|
|
||||||
add ebp,esi
|
|
||||||
|
|
||||||
jz .perf ; found a perfect match
|
|
||||||
cmp eax,ebp
|
|
||||||
jb .skip
|
|
||||||
mov eax,ebp
|
|
||||||
lea ebx,[ecx-1]
|
|
||||||
.skip cmp ecx,256
|
|
||||||
jne .loop
|
|
||||||
mov eax,ebx
|
|
||||||
pop ebp
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
emms
|
|
||||||
ret
|
|
||||||
|
|
||||||
.perf lea eax,[ecx-1]
|
|
||||||
pop ebp
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
emms
|
|
||||||
ret
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,643 +0,0 @@
|
||||||
;*
|
|
||||||
;* tmap2.nas
|
|
||||||
;* The tilted plane inner loop.
|
|
||||||
;*
|
|
||||||
;*---------------------------------------------------------------------------
|
|
||||||
;* Copyright 1998-2006 Randy Heit
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* 1. Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;* 2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in the
|
|
||||||
;* documentation and/or other materials provided with the distribution.
|
|
||||||
;* 3. The name of the author may not be used to endorse or promote products
|
|
||||||
;* derived from this software without specific prior written permission.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
||||||
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
||||||
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
||||||
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
||||||
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
||||||
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
||||||
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
||||||
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
||||||
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*---------------------------------------------------------------------------
|
|
||||||
;*
|
|
||||||
;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was
|
|
||||||
;* actually slightly slower than the more straight-forward approach
|
|
||||||
;* used here, probably because the trick requires too much setup time.
|
|
||||||
;*
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
%include "valgrind.inc"
|
|
||||||
|
|
||||||
%define SPACEFILLER4 (0x44444444)
|
|
||||||
|
|
||||||
%ifndef M_TARGET_LINUX
|
|
||||||
|
|
||||||
%define plane_sz _plane_sz
|
|
||||||
%define plane_su _plane_su
|
|
||||||
%define plane_sv _plane_sv
|
|
||||||
%define plane_shade _plane_shade
|
|
||||||
%define planelightfloat _planelightfloat
|
|
||||||
%define spanend _spanend
|
|
||||||
%define ylookup _ylookup
|
|
||||||
%define dc_destorg _dc_destorg
|
|
||||||
%define ds_colormap _ds_colormap
|
|
||||||
%define ds_source _ds_source
|
|
||||||
%define centery _centery
|
|
||||||
%define centerx _centerx
|
|
||||||
%define ds_curtiltedsource _ds_curtiltedsource
|
|
||||||
%define pviewx _pviewx
|
|
||||||
%define pviewy _pviewy
|
|
||||||
%define tiltlighting _tiltlighting
|
|
||||||
|
|
||||||
%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM
|
|
||||||
%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM
|
|
||||||
%define R_CalcTiltedLighting _R_CalcTiltedLighting
|
|
||||||
|
|
||||||
%endif
|
|
||||||
|
|
||||||
EXTERN plane_sz
|
|
||||||
EXTERN plane_su
|
|
||||||
EXTERN plane_sv
|
|
||||||
EXTERN planelightfloat
|
|
||||||
EXTERN spanend
|
|
||||||
EXTERN ylookup
|
|
||||||
EXTERN dc_destorg
|
|
||||||
EXTERN ds_colormap
|
|
||||||
EXTERN centery
|
|
||||||
EXTERN centerx
|
|
||||||
EXTERN ds_source
|
|
||||||
EXTERN plane_shade
|
|
||||||
EXTERN pviewx
|
|
||||||
EXTERN pviewy
|
|
||||||
EXTERN tiltlighting
|
|
||||||
EXTERN R_CalcTiltedLighting
|
|
||||||
|
|
||||||
GLOBAL ds_curtiltedsource
|
|
||||||
|
|
||||||
%define sv_i plane_sv
|
|
||||||
%define sv_j plane_sv+4
|
|
||||||
%define sv_k plane_sv+8
|
|
||||||
|
|
||||||
%define su_i plane_su
|
|
||||||
%define su_j plane_su+4
|
|
||||||
%define su_k plane_su+8
|
|
||||||
|
|
||||||
%define sz_i plane_sz
|
|
||||||
%define sz_j plane_sz+4
|
|
||||||
%define sz_k plane_sz+8
|
|
||||||
|
|
||||||
%define SPANBITS 3
|
|
||||||
|
|
||||||
section .bss
|
|
||||||
|
|
||||||
start_u: resq 1
|
|
||||||
start_v: resq 1
|
|
||||||
step_u: resq 1
|
|
||||||
step_v: resq 1
|
|
||||||
|
|
||||||
step_iz: resq 1
|
|
||||||
step_uz: resq 1
|
|
||||||
step_vz: resq 1
|
|
||||||
|
|
||||||
end_z: resd 1
|
|
||||||
|
|
||||||
section .data
|
|
||||||
|
|
||||||
ds_curtiltedsource: dd SPACEFILLER4
|
|
||||||
|
|
||||||
fp_1:
|
|
||||||
spanrecips: dd 0x3f800000 ; 1/1
|
|
||||||
dd 0x3f000000 ; 1/2
|
|
||||||
dd 0x3eaaaaab ; 1/3
|
|
||||||
dd 0x3e800000 ; 1/4
|
|
||||||
dd 0x3e4ccccd ; 1/5
|
|
||||||
dd 0x3e2aaaab ; 1/6
|
|
||||||
dd 0x3e124925 ; 1/7
|
|
||||||
fp_8recip: dd 0x3e000000 ; 1/8
|
|
||||||
dd 0x3de38e39 ; 1/9
|
|
||||||
dd 0x3dcccccd ; 1/10
|
|
||||||
dd 0x3dba2e8c ; 1/11
|
|
||||||
dd 0x3daaaaab ; 1/12
|
|
||||||
dd 0x3d9d89d9 ; 1/13
|
|
||||||
dd 0x3d924925 ; 1/14
|
|
||||||
dd 0x3d888889 ; 1/15
|
|
||||||
|
|
||||||
fp_quickint: dd 0x3f800000 ; 1
|
|
||||||
dd 0x40000000 ; 2
|
|
||||||
dd 0x40400000 ; 3
|
|
||||||
dd 0x40800000 ; 4
|
|
||||||
dd 0x40a00000 ; 5
|
|
||||||
dd 0x40c00000 ; 6
|
|
||||||
dd 0x40e00000 ; 7
|
|
||||||
fp_8: dd 0x41000000 ; 8
|
|
||||||
|
|
||||||
section .text
|
|
||||||
|
|
||||||
GLOBAL R_SetTiltedSpanSource_ASM
|
|
||||||
GLOBAL @R_SetTiltedSpanSource_ASM@4
|
|
||||||
|
|
||||||
R_SetTiltedSpanSource_ASM:
|
|
||||||
mov ecx,[esp+4]
|
|
||||||
|
|
||||||
@R_SetTiltedSpanSource_ASM@4:
|
|
||||||
mov [fetch1+3],ecx
|
|
||||||
mov [fetch2+3],ecx
|
|
||||||
mov [fetch3+3],ecx
|
|
||||||
mov [fetch4+3],ecx
|
|
||||||
mov [fetch5+3],ecx
|
|
||||||
mov [fetch6+3],ecx
|
|
||||||
mov [fetch7+3],ecx
|
|
||||||
mov [fetch8+3],ecx
|
|
||||||
mov [fetch9+3],ecx
|
|
||||||
mov [fetch10+3],ecx
|
|
||||||
mov [ds_curtiltedsource],ecx
|
|
||||||
selfmod rtext_start, rtext_end
|
|
||||||
ret
|
|
||||||
|
|
||||||
GLOBAL SetTiltedSpanSize
|
|
||||||
|
|
||||||
SetTiltedSpanSize:
|
|
||||||
push ecx
|
|
||||||
mov cl,dl
|
|
||||||
neg cl
|
|
||||||
mov eax,1
|
|
||||||
shl eax,cl
|
|
||||||
mov cl,[esp]
|
|
||||||
neg cl
|
|
||||||
mov [x1+2],cl
|
|
||||||
mov [x2+2],cl
|
|
||||||
mov [x3+2],cl
|
|
||||||
mov [x4+2],cl
|
|
||||||
mov [x5+2],cl
|
|
||||||
mov [x6+2],cl
|
|
||||||
mov [x7+2],cl
|
|
||||||
mov [x8+2],cl
|
|
||||||
mov [x9+2],cl
|
|
||||||
mov [x10+2],cl
|
|
||||||
|
|
||||||
sub cl,dl
|
|
||||||
dec eax
|
|
||||||
mov [y1+2],cl
|
|
||||||
mov [y2+2],cl
|
|
||||||
mov [y3+2],cl
|
|
||||||
mov [y4+2],cl
|
|
||||||
mov [y5+2],cl
|
|
||||||
mov [y6+2],cl
|
|
||||||
mov [y7+2],cl
|
|
||||||
mov [y8+2],cl
|
|
||||||
mov [y9+2],cl
|
|
||||||
mov [y10+2],cl
|
|
||||||
cmp eax,0 ; if x bits is 0, mask must be 0 too.
|
|
||||||
jz .notted
|
|
||||||
not eax
|
|
||||||
.notted:
|
|
||||||
pop ecx
|
|
||||||
|
|
||||||
mov [m1+2],eax
|
|
||||||
mov [m2+2],eax
|
|
||||||
mov [m3+2],eax
|
|
||||||
mov [m4+2],eax
|
|
||||||
mov [m5+2],eax
|
|
||||||
mov [m6+2],eax
|
|
||||||
mov [m7+2],eax
|
|
||||||
mov [m8+2],eax
|
|
||||||
mov [m9+2],eax
|
|
||||||
mov [m10+2],eax
|
|
||||||
|
|
||||||
selfmod rtext_start, rtext_end
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
%ifndef M_TARGET_MACHO
|
|
||||||
SECTION .rtext progbits alloc exec write align=64
|
|
||||||
%else
|
|
||||||
SECTION .text align=64
|
|
||||||
GLOBAL _rtext_tmap2_start
|
|
||||||
_rtext_tmap2_start:
|
|
||||||
%endif
|
|
||||||
|
|
||||||
rtext_start:
|
|
||||||
|
|
||||||
GLOBAL R_DrawTiltedPlane_ASM
|
|
||||||
GLOBAL @R_DrawTiltedPlane_ASM@8
|
|
||||||
|
|
||||||
R_DrawTiltedPlane_ASM:
|
|
||||||
mov ecx,[esp+4]
|
|
||||||
mov edx,[esp+8]
|
|
||||||
|
|
||||||
; ecx = y
|
|
||||||
; edx = x
|
|
||||||
|
|
||||||
@R_DrawTiltedPlane_ASM@8:
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
mov eax,[centery]
|
|
||||||
movzx ebx,word [spanend+ecx*2]
|
|
||||||
sub eax,ecx ; eax = centery-y
|
|
||||||
sub ebx,edx ; ebx = span length - 1
|
|
||||||
mov edi,[ylookup+ecx*4]
|
|
||||||
push eax
|
|
||||||
add edi,[dc_destorg]
|
|
||||||
add edi,edx ; edi = frame buffer pointer
|
|
||||||
sub edx,[centerx] ; edx = x-centerx
|
|
||||||
push edx
|
|
||||||
xor eax,eax
|
|
||||||
|
|
||||||
fild dword [esp+4] ; ymul
|
|
||||||
fild dword [esp] ; xmul | ymul
|
|
||||||
fld dword [sv_j] ; sv.j | xmul | ymul
|
|
||||||
fmul st0,st2 ; sv.j*ymul | xmul | ymul
|
|
||||||
fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul
|
|
||||||
fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul
|
|
||||||
fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul
|
|
||||||
fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
||||||
fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
||||||
fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
||||||
fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
||||||
fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
||||||
fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
|
||||||
fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
||||||
fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
||||||
faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
||||||
faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
|
||||||
fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul
|
|
||||||
faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
|
||||||
fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
|
||||||
fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul
|
|
||||||
fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul
|
|
||||||
fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z
|
|
||||||
fadd dword [su_k] ; u/z | v/z | 1/z
|
|
||||||
fxch st2 ; 1/z | v/z | u/z
|
|
||||||
fxch st1 ; v/z | 1/z | u/z
|
|
||||||
|
|
||||||
; if lighting is on, fill out the light table
|
|
||||||
mov al,[plane_shade]
|
|
||||||
test al,al
|
|
||||||
jz .litup
|
|
||||||
|
|
||||||
push ebx
|
|
||||||
fild dword [esp] ; width | v/z | 1/z | u/z
|
|
||||||
fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z
|
|
||||||
fadd st0,st2 ; 1/endz | v/z | 1/z | u/z
|
|
||||||
fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z
|
|
||||||
fmul dword [planelightfloat]
|
|
||||||
fxch st1
|
|
||||||
fmul dword [planelightfloat]
|
|
||||||
sub esp,16
|
|
||||||
fstp qword [esp]
|
|
||||||
fstp qword [esp+8]
|
|
||||||
call R_CalcTiltedLighting
|
|
||||||
add esp, 20
|
|
||||||
xor eax, eax
|
|
||||||
|
|
||||||
.litup add esp, 8
|
|
||||||
|
|
||||||
; calculate initial z, u, and v values
|
|
||||||
fld st1 ; 1/z | v/z | 1/z | u/z
|
|
||||||
fdivr dword [fp_1] ; z | v/z | 1/z | u/z
|
|
||||||
|
|
||||||
fld st3 ; u/z | z | v/z | 1/z | u/z
|
|
||||||
fmul st0,st1 ; u | z | v/z | 1/z | u/z
|
|
||||||
fld st2 ; v/z | u | z | v/z | 1/z | u/z
|
|
||||||
fmulp st2,st0 ; u | v | v/z | 1/z | u/z
|
|
||||||
fld st0
|
|
||||||
fistp qword [start_u]
|
|
||||||
fld st1
|
|
||||||
fistp qword [start_v]
|
|
||||||
|
|
||||||
cmp ebx,7 ; Do we have at least 8 pixels to plot?
|
|
||||||
jl near ShortStrip
|
|
||||||
|
|
||||||
; yes, we do, so figure out tex coords at end of this span
|
|
||||||
|
|
||||||
; multiply i values by span length (8)
|
|
||||||
fld dword [su_i] ; su.i
|
|
||||||
fmul dword [fp_8] ; su.i*8
|
|
||||||
fld dword [sv_i] ; sv.i | su.i*8
|
|
||||||
fmul dword [fp_8] ; sv.i*8 | su.i*8
|
|
||||||
fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8
|
|
||||||
fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8
|
|
||||||
fxch st2 ; su.i*8 | sv.i*8 | sz.i*8
|
|
||||||
fstp qword [step_uz] ; sv.i*8 | sz.i*8
|
|
||||||
fstp qword [step_vz] ; sz.i*8
|
|
||||||
fst qword [step_iz] ; sz.i*8
|
|
||||||
|
|
||||||
; find tex coords at start of next span
|
|
||||||
faddp st4
|
|
||||||
fld qword [step_vz]
|
|
||||||
faddp st3
|
|
||||||
fld qword [step_uz]
|
|
||||||
faddp st5
|
|
||||||
|
|
||||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
|
||||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
|
||||||
fst dword [end_z]
|
|
||||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
|
||||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
|
||||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
|
||||||
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
|
||||||
|
|
||||||
; now subtract to get stepping values for this span
|
|
||||||
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
|
||||||
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
||||||
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
||||||
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
|
||||||
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
|
||||||
|
|
||||||
FullSpan:
|
|
||||||
xor eax,eax
|
|
||||||
cmp ebx,15 ; is there another complete span after this one?
|
|
||||||
jl NextIsShort
|
|
||||||
|
|
||||||
; there is a complete span after this one
|
|
||||||
fld qword [step_iz]
|
|
||||||
faddp st4,st0
|
|
||||||
fld qword [step_vz]
|
|
||||||
faddp st3,st0
|
|
||||||
fld qword [step_uz]
|
|
||||||
faddp st5,st0
|
|
||||||
jmp StartDiv
|
|
||||||
|
|
||||||
NextIsShort:
|
|
||||||
cmp ebx,8 ; if next span is no more than 1 pixel, then we already
|
|
||||||
jle DrawFullSpan ; know everything we need to draw it
|
|
||||||
|
|
||||||
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_quickint-8*4+ebx*4]
|
|
||||||
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_quickint-8*4+ebx*4]
|
|
||||||
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_quickint-8*4+ebx*4]
|
|
||||||
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
||||||
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
||||||
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
|
||||||
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
|
||||||
|
|
||||||
StartDiv:
|
|
||||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
|
||||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
|
||||||
|
|
||||||
DrawFullSpan:
|
|
||||||
mov ecx,[start_v]
|
|
||||||
mov edx,[start_u]
|
|
||||||
|
|
||||||
add ecx,[pviewy]
|
|
||||||
add edx,[pviewx]
|
|
||||||
|
|
||||||
mov esi,edx
|
|
||||||
mov ebp,ecx
|
|
||||||
x1 shr ebp,26
|
|
||||||
m1 and esi,0xfc000000
|
|
||||||
y1 shr esi,20
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
fetch1 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4]
|
|
||||||
mov esi,edx
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov ebp,ecx
|
|
||||||
mov [edi+0],al
|
|
||||||
|
|
||||||
x2 shr ebp,26
|
|
||||||
m2 and esi,0xfc000000
|
|
||||||
y2 shr esi,20
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
fetch2 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4-4]
|
|
||||||
mov esi,edx
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov ebp,ecx
|
|
||||||
mov [edi+1],al
|
|
||||||
|
|
||||||
x3 shr ebp,26
|
|
||||||
m3 and esi,0xfc000000
|
|
||||||
y3 shr esi,20
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
fetch3 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4-8]
|
|
||||||
mov esi,edx
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov ebp,ecx
|
|
||||||
mov [edi+2],al
|
|
||||||
|
|
||||||
x4 shr ebp,26
|
|
||||||
m4 and esi,0xfc000000
|
|
||||||
y4 shr esi,20
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
fetch4 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4-12]
|
|
||||||
mov esi,edx
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov ebp,ecx
|
|
||||||
mov [edi+3],al
|
|
||||||
|
|
||||||
x5 shr ebp,26
|
|
||||||
m5 and esi,0xfc000000
|
|
||||||
y5 shr esi,20
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
fetch5 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4-16]
|
|
||||||
mov esi,edx
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov ebp,ecx
|
|
||||||
mov [edi+4],al
|
|
||||||
|
|
||||||
x6 shr ebp,26
|
|
||||||
m6 and esi,0xfc000000
|
|
||||||
y6 shr esi,20
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
fetch6 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4-20]
|
|
||||||
mov esi,edx
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov ebp,ecx
|
|
||||||
mov [edi+5],al
|
|
||||||
|
|
||||||
x7 shr ebp,26
|
|
||||||
m7 and esi,0xfc000000
|
|
||||||
y7 shr esi,20
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
fetch7 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4-24]
|
|
||||||
x8 shr ecx,26
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
m8 and edx,0xfc000000
|
|
||||||
mov [edi+6],al
|
|
||||||
|
|
||||||
y8 shr edx,20
|
|
||||||
mov ebp,[tiltlighting+ebx*4-28]
|
|
||||||
fetch8 mov al,[edx+ecx+SPACEFILLER4]
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov [edi+7],al
|
|
||||||
add edi,8
|
|
||||||
|
|
||||||
sub ebx,8
|
|
||||||
jl near Done
|
|
||||||
|
|
||||||
fld st1
|
|
||||||
fistp qword [start_u]
|
|
||||||
fld st2
|
|
||||||
fistp qword [start_v]
|
|
||||||
|
|
||||||
cmp ebx,7
|
|
||||||
jl near EndIsShort
|
|
||||||
|
|
||||||
fst dword [end_z]
|
|
||||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
|
||||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
|
||||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
|
||||||
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
|
||||||
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
|
||||||
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
||||||
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
|
||||||
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
|
||||||
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
|
||||||
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
|
||||||
jmp FullSpan
|
|
||||||
|
|
||||||
OnlyOnePixelAtEnd:
|
|
||||||
fld st0
|
|
||||||
fistp qword [start_u]
|
|
||||||
fld st1
|
|
||||||
fistp qword [start_v]
|
|
||||||
|
|
||||||
OnlyOnePixel:
|
|
||||||
mov edx,[start_v]
|
|
||||||
mov ecx,[start_u]
|
|
||||||
add edx,[pviewy]
|
|
||||||
add ecx,[pviewx]
|
|
||||||
x9 shr edx,26
|
|
||||||
m9 and ecx,0xfc000000
|
|
||||||
y9 shr ecx,20
|
|
||||||
mov ebp,[tiltlighting]
|
|
||||||
fetch9 mov al,[ecx+edx+SPACEFILLER4]
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov [edi],al
|
|
||||||
|
|
||||||
Done:
|
|
||||||
fcompp
|
|
||||||
fcompp
|
|
||||||
fstp st0
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
ShortStrip:
|
|
||||||
cmp ebx,0
|
|
||||||
jle near OnlyOnePixel
|
|
||||||
|
|
||||||
MoreThanOnePixel:
|
|
||||||
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_quickint+ebx*4]
|
|
||||||
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_quickint+ebx*4]
|
|
||||||
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
|
||||||
fmul dword [fp_quickint+ebx*4]
|
|
||||||
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
||||||
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
|
||||||
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
|
||||||
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
|
||||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
|
||||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
|
||||||
jmp CalcPartialSteps
|
|
||||||
|
|
||||||
EndIsShort:
|
|
||||||
cmp ebx,0
|
|
||||||
je near OnlyOnePixelAtEnd
|
|
||||||
|
|
||||||
CalcPartialSteps:
|
|
||||||
fst dword [end_z]
|
|
||||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
|
||||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
|
||||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z
|
|
||||||
fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z
|
|
||||||
fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z
|
|
||||||
fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; v'-v | ustep | v/z | 1/z | u/z
|
|
||||||
fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z
|
|
||||||
fxch st1 ; ustep | vstep | v/z | 1/z | u/z
|
|
||||||
fistp qword [step_u] ; vstep | v/z | 1/z | u/z
|
|
||||||
fistp qword [step_v] ; v/z | 1/z | u/z
|
|
||||||
|
|
||||||
mov ecx,[start_v]
|
|
||||||
mov edx,[start_u]
|
|
||||||
|
|
||||||
add ecx,[pviewy]
|
|
||||||
add edx,[pviewx]
|
|
||||||
|
|
||||||
mov esi,edx
|
|
||||||
mov ebp,ecx
|
|
||||||
endloop:
|
|
||||||
x10 shr ebp,26
|
|
||||||
m10 and esi,0xfc000000
|
|
||||||
|
|
||||||
y10 shr esi,20
|
|
||||||
inc edi
|
|
||||||
|
|
||||||
add ecx,[step_v]
|
|
||||||
add edx,[step_u]
|
|
||||||
|
|
||||||
fetch10 mov al,[ebp+esi+SPACEFILLER4]
|
|
||||||
mov ebp,[tiltlighting+ebx*4]
|
|
||||||
|
|
||||||
mov esi,edx
|
|
||||||
dec ebx
|
|
||||||
|
|
||||||
mov al,[ebp+eax]
|
|
||||||
mov ebp,ecx
|
|
||||||
|
|
||||||
mov [edi-1],al
|
|
||||||
jge endloop
|
|
||||||
|
|
||||||
fcompp
|
|
||||||
fstp st0
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
rtext_end:
|
|
||||||
%ifdef M_TARGET_MACHO
|
|
||||||
GLOBAL _rtext_tmap2_end
|
|
||||||
_rtext_tmap2_end:
|
|
||||||
%endif
|
|
|
@ -1,344 +0,0 @@
|
||||||
%include "valgrind.inc"
|
|
||||||
|
|
||||||
%ifdef M_TARGET_WATCOM
|
|
||||||
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
|
|
||||||
SEGMENT DATA
|
|
||||||
%else
|
|
||||||
SECTION .data
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%ifndef M_TARGET_LINUX
|
|
||||||
%define ylookup _ylookup
|
|
||||||
%define vplce _vplce
|
|
||||||
%define vince _vince
|
|
||||||
%define palookupoffse _palookupoffse
|
|
||||||
%define bufplce _bufplce
|
|
||||||
%define dc_iscale _dc_iscale
|
|
||||||
%define dc_colormap _dc_colormap
|
|
||||||
%define dc_count _dc_count
|
|
||||||
%define dc_dest _dc_dest
|
|
||||||
%define dc_source _dc_source
|
|
||||||
%define dc_texturefrac _dc_texturefrac
|
|
||||||
%define dc_pitch _dc_pitch
|
|
||||||
|
|
||||||
%define setupvlinetallasm _setupvlinetallasm
|
|
||||||
%define vlinetallasm4 _vlinetallasm4
|
|
||||||
%define vlinetallasmathlon4 _vlinetallasmathlon4
|
|
||||||
%define vlinetallasm1 _vlinetallasm1
|
|
||||||
%define prevlinetallasm1 _prevlinetallasm1
|
|
||||||
%endif
|
|
||||||
|
|
||||||
EXTERN vplce
|
|
||||||
EXTERN vince
|
|
||||||
EXTERN palookupoffse
|
|
||||||
EXTERN bufplce
|
|
||||||
|
|
||||||
EXTERN ylookup
|
|
||||||
EXTERN dc_iscale
|
|
||||||
EXTERN dc_colormap
|
|
||||||
EXTERN dc_count
|
|
||||||
EXTERN dc_dest
|
|
||||||
EXTERN dc_source
|
|
||||||
EXTERN dc_texturefrac
|
|
||||||
EXTERN dc_pitch
|
|
||||||
|
|
||||||
GLOBAL vlt4pitch
|
|
||||||
GLOBAL vlt1pitch
|
|
||||||
|
|
||||||
%ifdef M_TARGET_WATCOM
|
|
||||||
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
|
|
||||||
SEGMENT CODE
|
|
||||||
%else
|
|
||||||
SECTION .text
|
|
||||||
%endif
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL setpitch3
|
|
||||||
setpitch3:
|
|
||||||
mov [vltpitch+2], eax
|
|
||||||
mov [vltpitcha+2],eax
|
|
||||||
mov [vlt1pitch1+2], eax
|
|
||||||
mov [vlt1pitch2+2], eax
|
|
||||||
selfmod vltpitch, vlt1pitch2+6
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL setupvlinetallasm
|
|
||||||
setupvlinetallasm:
|
|
||||||
mov ecx, [esp+4]
|
|
||||||
mov [shifter1+2], cl
|
|
||||||
mov [shifter2+2], cl
|
|
||||||
mov [shifter3+2], cl
|
|
||||||
mov [shifter4+2], cl
|
|
||||||
mov [shifter1a+2], cl
|
|
||||||
mov [shifter2a+2], cl
|
|
||||||
mov [shifter3a+2], cl
|
|
||||||
mov [shifter4a+2], cl
|
|
||||||
mov [preshift+2], cl
|
|
||||||
mov [shift11+2], cl
|
|
||||||
mov [shift12+2], cl
|
|
||||||
selfmod shifter1, shift12+6
|
|
||||||
ret
|
|
||||||
|
|
||||||
%ifdef M_TARGET_MACHO
|
|
||||||
SECTION .text align=64
|
|
||||||
GLOBAL _rtext_tmap3_start
|
|
||||||
_rtext_tmap3_start:
|
|
||||||
%else
|
|
||||||
SECTION .rtext progbits alloc exec write align=64
|
|
||||||
%endif
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
|
|
||||||
GLOBAL vlinetallasm4
|
|
||||||
vlinetallasm4:
|
|
||||||
push ebx
|
|
||||||
mov eax, [bufplce+0]
|
|
||||||
mov ebx, [bufplce+4]
|
|
||||||
mov ecx, [bufplce+8]
|
|
||||||
mov edx, [bufplce+12]
|
|
||||||
mov [source1+3], eax
|
|
||||||
mov [source2+3], ebx
|
|
||||||
mov [source3+3], ecx
|
|
||||||
mov [source4+3], edx
|
|
||||||
mov eax, [palookupoffse+0]
|
|
||||||
mov ebx, [palookupoffse+4]
|
|
||||||
mov ecx, [palookupoffse+8]
|
|
||||||
mov edx, [palookupoffse+12]
|
|
||||||
mov [lookup1+2], eax
|
|
||||||
mov [lookup2+2], ebx
|
|
||||||
mov [lookup3+2], ecx
|
|
||||||
mov [lookup4+2], edx
|
|
||||||
mov eax, [vince+0]
|
|
||||||
mov ebx, [vince+4]
|
|
||||||
mov ecx, [vince+8]
|
|
||||||
mov edx, [vince+12]
|
|
||||||
mov [step1+2], eax
|
|
||||||
mov [step2+2], ebx
|
|
||||||
mov [step3+2], ecx
|
|
||||||
mov [step4+1], edx
|
|
||||||
push ebp
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
mov edi, [dc_dest]
|
|
||||||
mov eax, dword [ylookup+ecx*4-4]
|
|
||||||
add eax, edi
|
|
||||||
sub edi, eax
|
|
||||||
mov [write1+2],eax
|
|
||||||
inc eax
|
|
||||||
mov [write2+2],eax
|
|
||||||
inc eax
|
|
||||||
mov [write3+2],eax
|
|
||||||
inc eax
|
|
||||||
mov [write4+2],eax
|
|
||||||
mov ebx, [vplce]
|
|
||||||
mov ecx, [vplce+4]
|
|
||||||
mov esi, [vplce+8]
|
|
||||||
mov eax, [vplce+12]
|
|
||||||
selfmod loopit, vltpitch
|
|
||||||
jmp loopit
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
loopit:
|
|
||||||
mov edx, ebx
|
|
||||||
shifter1: shr edx, 24
|
|
||||||
source1: movzx edx, BYTE [edx+0x88888888]
|
|
||||||
lookup1: mov dl, [edx+0x88888888]
|
|
||||||
write1: mov [edi+0x88888880], dl
|
|
||||||
step1: add ebx, 0x88888888
|
|
||||||
mov edx, ecx
|
|
||||||
shifter2: shr edx, 24
|
|
||||||
source2: movzx edx, BYTE [edx+0x88888888]
|
|
||||||
lookup2: mov dl, [edx+0x88888888]
|
|
||||||
write2: mov [edi+0x88888881], dl
|
|
||||||
step2: add ecx, 0x88888888
|
|
||||||
mov edx, esi
|
|
||||||
shifter3: shr edx, 24
|
|
||||||
source3: movzx edx, BYTE [edx+0x88888888]
|
|
||||||
lookup3: mov dl, BYTE [edx+0x88888888]
|
|
||||||
write3: mov [edi+0x88888882], dl
|
|
||||||
step3: add esi, 0x88888888
|
|
||||||
mov edx, eax
|
|
||||||
shifter4: shr edx, 24
|
|
||||||
source4: movzx edx, BYTE [edx+0x88888888]
|
|
||||||
lookup4: mov dl, [edx+0x88888888]
|
|
||||||
write4: mov [edi+0x88888883], dl
|
|
||||||
step4: add eax, 0x88888888
|
|
||||||
vltpitch: add edi, 320
|
|
||||||
jle near loopit
|
|
||||||
|
|
||||||
mov [vplce], ebx
|
|
||||||
mov [vplce+4], ecx
|
|
||||||
mov [vplce+8], esi
|
|
||||||
mov [vplce+12], eax
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebp
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
|
|
||||||
GLOBAL vlinetallasmathlon4
|
|
||||||
vlinetallasmathlon4:
|
|
||||||
push ebx
|
|
||||||
mov eax, [bufplce+0]
|
|
||||||
mov ebx, [bufplce+4]
|
|
||||||
mov ecx, [bufplce+8]
|
|
||||||
mov edx, [bufplce+12]
|
|
||||||
mov [source1a+3], eax
|
|
||||||
mov [source2a+3], ebx
|
|
||||||
mov [source3a+3], ecx
|
|
||||||
mov [source4a+3], edx
|
|
||||||
mov eax, [palookupoffse+0]
|
|
||||||
mov ebx, [palookupoffse+4]
|
|
||||||
mov ecx, [palookupoffse+8]
|
|
||||||
mov edx, [palookupoffse+12]
|
|
||||||
mov [lookup1a+2], eax
|
|
||||||
mov [lookup2a+2], ebx
|
|
||||||
mov [lookup3a+2], ecx
|
|
||||||
mov [lookup4a+2], edx
|
|
||||||
mov eax, [vince+0]
|
|
||||||
mov ebx, [vince+4]
|
|
||||||
mov ecx, [vince+8]
|
|
||||||
mov edx, [vince+12]
|
|
||||||
mov [step1a+2], eax
|
|
||||||
mov [step2a+2], ebx
|
|
||||||
mov [step3a+2], ecx
|
|
||||||
mov [step4a+1], edx
|
|
||||||
push ebp
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
mov edi, [dc_dest]
|
|
||||||
mov eax, dword [ylookup+ecx*4-4]
|
|
||||||
add eax, edi
|
|
||||||
sub edi, eax
|
|
||||||
mov [write1a+2],eax
|
|
||||||
inc eax
|
|
||||||
mov [write2a+2],eax
|
|
||||||
inc eax
|
|
||||||
mov [write3a+2],eax
|
|
||||||
inc eax
|
|
||||||
mov [write4a+2],eax
|
|
||||||
mov ebp, [vplce]
|
|
||||||
mov ecx, [vplce+4]
|
|
||||||
mov esi, [vplce+8]
|
|
||||||
mov eax, [vplce+12]
|
|
||||||
selfmod loopita, vltpitcha
|
|
||||||
jmp loopita
|
|
||||||
|
|
||||||
; Unfortunately, this code has not been carefully analyzed to determine
|
|
||||||
; how well it utilizes the processor's instruction units. Instead, I just
|
|
||||||
; kept rearranging code, seeing what sped it up and what slowed it down
|
|
||||||
; until I arrived at this. The is the fastest version I was able to
|
|
||||||
; manage, but that does not mean it cannot be made faster with careful
|
|
||||||
; instructing shuffling.
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
|
|
||||||
loopita: mov edx, ebp
|
|
||||||
mov ebx, ecx
|
|
||||||
shifter1a: shr edx, 24
|
|
||||||
shifter2a: shr ebx, 24
|
|
||||||
source1a: movzx edx, BYTE [edx+0x88888888]
|
|
||||||
source2a: movzx ebx, BYTE [ebx+0x88888888]
|
|
||||||
step1a: add ebp, 0x88888888
|
|
||||||
step2a: add ecx, 0x88888888
|
|
||||||
lookup1a: mov dl, [edx+0x88888888]
|
|
||||||
lookup2a: mov dh, [ebx+0x88888888]
|
|
||||||
mov ebx, esi
|
|
||||||
write1a: mov [edi+0x88888880], dl
|
|
||||||
write2a: mov [edi+0x88888881], dh
|
|
||||||
shifter3a: shr ebx, 24
|
|
||||||
mov edx, eax
|
|
||||||
source3a: movzx ebx, BYTE [ebx+0x88888888]
|
|
||||||
shifter4a: shr edx, 24
|
|
||||||
step3a: add esi, 0x88888888
|
|
||||||
source4a: movzx edx, BYTE [edx+0x88888888]
|
|
||||||
step4a: add eax, 0x88888888
|
|
||||||
lookup3a: mov bl, [ebx+0x88888888]
|
|
||||||
lookup4a: mov dl, [edx+0x88888888]
|
|
||||||
write3a: mov [edi+0x88888882], bl
|
|
||||||
write4a: mov [edi+0x88888883], dl
|
|
||||||
vltpitcha: add edi, 320
|
|
||||||
jle near loopita
|
|
||||||
|
|
||||||
mov [vplce], ebp
|
|
||||||
mov [vplce+4], ecx
|
|
||||||
mov [vplce+8], esi
|
|
||||||
mov [vplce+12], eax
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebp
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL prevlinetallasm1
|
|
||||||
prevlinetallasm1:
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
cmp ecx, 1
|
|
||||||
ja vlinetallasm1
|
|
||||||
|
|
||||||
mov eax, [dc_iscale]
|
|
||||||
mov edx, [dc_texturefrac]
|
|
||||||
add eax, edx
|
|
||||||
mov ecx, [dc_source]
|
|
||||||
preshift: shr edx, 16
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
mov edi, [dc_colormap]
|
|
||||||
movzx ebx, byte [ecx+edx]
|
|
||||||
mov ecx, [dc_dest]
|
|
||||||
mov bl, byte [edi+ebx]
|
|
||||||
pop edi
|
|
||||||
mov byte [ecx], bl
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
GLOBAL vlinetallasm1
|
|
||||||
vlinetallasm1:
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
push esi
|
|
||||||
|
|
||||||
mov ebp, [dc_count]
|
|
||||||
mov ebx, [dc_texturefrac] ; ebx = frac
|
|
||||||
mov edi, [dc_dest]
|
|
||||||
mov ecx, ebx
|
|
||||||
shift11: shr ecx, 16
|
|
||||||
mov esi, [dc_source]
|
|
||||||
mov edx, [dc_iscale]
|
|
||||||
vlt1pitch1: sub edi, 0x88888888
|
|
||||||
mov eax, [dc_colormap]
|
|
||||||
|
|
||||||
loop2:
|
|
||||||
movzx ecx, BYTE [esi+ecx]
|
|
||||||
add ebx, edx
|
|
||||||
vlt1pitch2: add edi, 0x88888888
|
|
||||||
mov cl,[eax+ecx]
|
|
||||||
mov [edi],cl
|
|
||||||
mov ecx,ebx
|
|
||||||
shift12: shr ecx,16
|
|
||||||
dec ebp
|
|
||||||
jnz loop2
|
|
||||||
|
|
||||||
mov eax,ebx
|
|
||||||
pop esi
|
|
||||||
pop edi
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
%ifdef M_TARGET_MACHO
|
|
||||||
GLOBAL _rtext_tmap3_end
|
|
||||||
_rtext_tmap3_end:
|
|
||||||
%endif
|
|
|
@ -1,150 +0,0 @@
|
||||||
%ifnidn __OUTPUT_FORMAT__,win64
|
|
||||||
%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
|
|
||||||
%endif
|
|
||||||
|
|
||||||
BITS 64
|
|
||||||
DEFAULT REL
|
|
||||||
|
|
||||||
EXTERN vplce
|
|
||||||
EXTERN vince
|
|
||||||
EXTERN palookupoffse
|
|
||||||
EXTERN bufplce
|
|
||||||
|
|
||||||
EXTERN dc_count
|
|
||||||
EXTERN dc_dest
|
|
||||||
EXTERN dc_pitch
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
GLOBAL ASM_PatchPitch
|
|
||||||
ASM_PatchPitch:
|
|
||||||
mov ecx, [dc_pitch]
|
|
||||||
mov [pm+3], ecx
|
|
||||||
mov [vltpitch+3], ecx
|
|
||||||
ret
|
|
||||||
align 16
|
|
||||||
|
|
||||||
GLOBAL setupvlinetallasm
|
|
||||||
setupvlinetallasm:
|
|
||||||
mov [shifter1+2], cl
|
|
||||||
mov [shifter2+2], cl
|
|
||||||
mov [shifter3+2], cl
|
|
||||||
mov [shifter4+2], cl
|
|
||||||
ret
|
|
||||||
align 16
|
|
||||||
|
|
||||||
; Yasm can't do progbits alloc exec for win64?
|
|
||||||
; Hmm, looks like it's automatic. No worries, then.
|
|
||||||
SECTION .rtext write ;progbits alloc exec
|
|
||||||
|
|
||||||
GLOBAL vlinetallasm4
|
|
||||||
PROC_FRAME vlinetallasm4
|
|
||||||
rex_push_reg rbx
|
|
||||||
push_reg rdi
|
|
||||||
push_reg r15
|
|
||||||
push_reg r14
|
|
||||||
push_reg r13
|
|
||||||
push_reg r12
|
|
||||||
push_reg rbp
|
|
||||||
push_reg rsi
|
|
||||||
alloc_stack 8 ; Stack must be 16-byte aligned
|
|
||||||
END_PROLOGUE
|
|
||||||
; rax = bufplce base address
|
|
||||||
; rbx =
|
|
||||||
; rcx = offset from rdi/count (negative)
|
|
||||||
; edx/rdx = scratch
|
|
||||||
; rdi = bottom of columns to write to
|
|
||||||
; r8d-r11d = column offsets
|
|
||||||
; r12-r15 = palookupoffse[0] - palookupoffse[4]
|
|
||||||
|
|
||||||
mov ecx, [dc_count]
|
|
||||||
mov rdi, [dc_dest]
|
|
||||||
test ecx, ecx
|
|
||||||
jle vltepilog ; count must be positive
|
|
||||||
|
|
||||||
mov rax, [bufplce]
|
|
||||||
mov r8, [bufplce+8]
|
|
||||||
sub r8, rax
|
|
||||||
mov r9, [bufplce+16]
|
|
||||||
sub r9, rax
|
|
||||||
mov r10, [bufplce+24]
|
|
||||||
sub r10, rax
|
|
||||||
mov [source2+4], r8d
|
|
||||||
mov [source3+4], r9d
|
|
||||||
mov [source4+4], r10d
|
|
||||||
|
|
||||||
pm: imul rcx, 320
|
|
||||||
|
|
||||||
mov r12, [palookupoffse]
|
|
||||||
mov r13, [palookupoffse+8]
|
|
||||||
mov r14, [palookupoffse+16]
|
|
||||||
mov r15, [palookupoffse+24]
|
|
||||||
|
|
||||||
mov r8d, [vince]
|
|
||||||
mov r9d, [vince+4]
|
|
||||||
mov r10d, [vince+8]
|
|
||||||
mov r11d, [vince+12]
|
|
||||||
mov [step1+3], r8d
|
|
||||||
mov [step2+3], r9d
|
|
||||||
mov [step3+3], r10d
|
|
||||||
mov [step4+3], r11d
|
|
||||||
|
|
||||||
add rdi, rcx
|
|
||||||
neg rcx
|
|
||||||
|
|
||||||
mov r8d, [vplce]
|
|
||||||
mov r9d, [vplce+4]
|
|
||||||
mov r10d, [vplce+8]
|
|
||||||
mov r11d, [vplce+12]
|
|
||||||
jmp loopit
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
loopit:
|
|
||||||
mov edx, r8d
|
|
||||||
shifter1: shr edx, 24
|
|
||||||
step1: add r8d, 0x88888888
|
|
||||||
movzx edx, BYTE [rax+rdx]
|
|
||||||
mov ebx, r9d
|
|
||||||
mov dl, [r12+rdx]
|
|
||||||
shifter2: shr ebx, 24
|
|
||||||
step2: add r9d, 0x88888888
|
|
||||||
source2: movzx ebx, BYTE [rax+rbx+0x88888888]
|
|
||||||
mov ebp, r10d
|
|
||||||
mov bl, [r13+rbx]
|
|
||||||
shifter3: shr ebp, 24
|
|
||||||
step3: add r10d, 0x88888888
|
|
||||||
source3: movzx ebp, BYTE [rax+rbp+0x88888888]
|
|
||||||
mov esi, r11d
|
|
||||||
mov bpl, BYTE [r14+rbp]
|
|
||||||
shifter4: shr esi, 24
|
|
||||||
step4: add r11d, 0x88888888
|
|
||||||
source4: movzx esi, BYTE [rax+rsi+0x88888888]
|
|
||||||
mov [rdi+rcx], dl
|
|
||||||
mov [rdi+rcx+1], bl
|
|
||||||
mov sil, BYTE [r15+rsi]
|
|
||||||
mov [rdi+rcx+2], bpl
|
|
||||||
mov [rdi+rcx+3], sil
|
|
||||||
|
|
||||||
vltpitch: add rcx, 320
|
|
||||||
jl loopit
|
|
||||||
|
|
||||||
mov [vplce], r8d
|
|
||||||
mov [vplce+4], r9d
|
|
||||||
mov [vplce+8], r10d
|
|
||||||
mov [vplce+12], r11d
|
|
||||||
|
|
||||||
vltepilog:
|
|
||||||
add rsp, 8
|
|
||||||
pop rsi
|
|
||||||
pop rbp
|
|
||||||
pop r12
|
|
||||||
pop r13
|
|
||||||
pop r14
|
|
||||||
pop r15
|
|
||||||
pop rdi
|
|
||||||
pop rbx
|
|
||||||
ret
|
|
||||||
vlinetallasm4_end:
|
|
||||||
ENDPROC_FRAME
|
|
||||||
ALIGN 16
|
|
||||||
|
|
|
@ -1,141 +0,0 @@
|
||||||
#%include "valgrind.inc"
|
|
||||||
|
|
||||||
.section .text
|
|
||||||
|
|
||||||
.globl ASM_PatchPitch
|
|
||||||
ASM_PatchPitch:
|
|
||||||
movl dc_pitch(%rip), %ecx
|
|
||||||
movl %ecx, pm+3(%rip)
|
|
||||||
movl %ecx, vltpitch+3(%rip)
|
|
||||||
# selfmod pm, vltpitch+6
|
|
||||||
ret
|
|
||||||
.align 16
|
|
||||||
|
|
||||||
.globl setupvlinetallasm
|
|
||||||
setupvlinetallasm:
|
|
||||||
movb %dil, shifter1+2(%rip)
|
|
||||||
movb %dil, shifter2+2(%rip)
|
|
||||||
movb %dil, shifter3+2(%rip)
|
|
||||||
movb %dil, shifter4+2(%rip)
|
|
||||||
# selfmod shifter1, shifter4+3
|
|
||||||
ret
|
|
||||||
.align 16
|
|
||||||
|
|
||||||
.section .rtext,"awx"
|
|
||||||
|
|
||||||
.globl vlinetallasm4
|
|
||||||
.type vlinetallasm4,@function
|
|
||||||
vlinetallasm4:
|
|
||||||
.cfi_startproc
|
|
||||||
push %rbx
|
|
||||||
push %rdi
|
|
||||||
push %r15
|
|
||||||
push %r14
|
|
||||||
push %r13
|
|
||||||
push %r12
|
|
||||||
push %rbp
|
|
||||||
push %rsi
|
|
||||||
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
|
|
||||||
.cfi_adjust_cfa_offset 8
|
|
||||||
|
|
||||||
# rax = bufplce base address
|
|
||||||
# rbx =
|
|
||||||
# rcx = offset from rdi/count (negative)
|
|
||||||
# edx/rdx = scratch
|
|
||||||
# rdi = bottom of columns to write to
|
|
||||||
# r8d-r11d = column offsets
|
|
||||||
# r12-r15 = palookupoffse[0] - palookupoffse[4]
|
|
||||||
|
|
||||||
movl dc_count(%rip), %ecx
|
|
||||||
movq dc_dest(%rip), %rdi
|
|
||||||
testl %ecx, %ecx
|
|
||||||
jle vltepilog # count must be positive
|
|
||||||
|
|
||||||
movq bufplce(%rip), %rax
|
|
||||||
movq bufplce+8(%rip), %r8
|
|
||||||
subq %rax, %r8
|
|
||||||
movq bufplce+16(%rip), %r9
|
|
||||||
subq %rax, %r9
|
|
||||||
movq bufplce+24(%rip), %r10
|
|
||||||
subq %rax, %r10
|
|
||||||
movl %r8d, source2+4(%rip)
|
|
||||||
movl %r9d, source3+4(%rip)
|
|
||||||
movl %r10d, source4+4(%rip)
|
|
||||||
|
|
||||||
pm: imulq $320, %rcx
|
|
||||||
|
|
||||||
movq palookupoffse(%rip), %r12
|
|
||||||
movq palookupoffse+8(%rip), %r13
|
|
||||||
movq palookupoffse+16(%rip), %r14
|
|
||||||
movq palookupoffse+24(%rip), %r15
|
|
||||||
|
|
||||||
movl vince(%rip), %r8d
|
|
||||||
movl vince+4(%rip), %r9d
|
|
||||||
movl vince+8(%rip), %r10d
|
|
||||||
movl vince+12(%rip), %r11d
|
|
||||||
movl %r8d, step1+3(%rip)
|
|
||||||
movl %r9d, step2+3(%rip)
|
|
||||||
movl %r10d, step3+3(%rip)
|
|
||||||
movl %r11d, step4+3(%rip)
|
|
||||||
|
|
||||||
addq %rcx, %rdi
|
|
||||||
negq %rcx
|
|
||||||
|
|
||||||
movl vplce(%rip), %r8d
|
|
||||||
movl vplce+4(%rip), %r9d
|
|
||||||
movl vplce+8(%rip), %r10d
|
|
||||||
movl vplce+12(%rip), %r11d
|
|
||||||
# selfmod loopit, vltepilog
|
|
||||||
jmp loopit
|
|
||||||
|
|
||||||
.align 16
|
|
||||||
loopit:
|
|
||||||
movl %r8d, %edx
|
|
||||||
shifter1: shrl $24, %edx
|
|
||||||
step1: addl $0x44444444, %r8d
|
|
||||||
movzbl (%rax,%rdx), %edx
|
|
||||||
movl %r9d, %ebx
|
|
||||||
movb (%r12,%rdx), %dl
|
|
||||||
shifter2: shrl $24, %ebx
|
|
||||||
step2: addl $0x44444444, %r9d
|
|
||||||
source2: movzbl 0x44444444(%rax,%rbx), %ebx
|
|
||||||
movl %r10d, %ebp
|
|
||||||
movb (%r13,%rbx), %bl
|
|
||||||
shifter3: shr $24, %ebp
|
|
||||||
step3: addl $0x44444444, %r10d
|
|
||||||
source3: movzbl 0x44444444(%rax,%rbp), %ebp
|
|
||||||
movl %r11d, %esi
|
|
||||||
movb (%r14,%rbp), %bpl
|
|
||||||
shifter4: shr $24, %esi
|
|
||||||
step4: add $0x44444444, %r11d
|
|
||||||
source4: movzbl 0x44444444(%rax,%rsi), %esi
|
|
||||||
movb %dl, (%rdi,%rcx)
|
|
||||||
movb %bl, 1(%rdi,%rcx)
|
|
||||||
movb (%r15,%rsi), %sil
|
|
||||||
movb %bpl, 2(%rdi,%rcx)
|
|
||||||
movb %sil, 3(%rdi,%rcx)
|
|
||||||
|
|
||||||
vltpitch: addq $320, %rcx
|
|
||||||
jl loopit
|
|
||||||
|
|
||||||
movl %r8d, vplce(%rip)
|
|
||||||
movl %r9d, vplce+4(%rip)
|
|
||||||
movl %r10d, vplce+8(%rip)
|
|
||||||
movl %r11d, vplce+12(%rip)
|
|
||||||
|
|
||||||
vltepilog:
|
|
||||||
addq $8, %rsp
|
|
||||||
.cfi_adjust_cfa_offset -8
|
|
||||||
pop %rsi
|
|
||||||
pop %rbp
|
|
||||||
pop %r12
|
|
||||||
pop %r13
|
|
||||||
pop %r14
|
|
||||||
pop %r15
|
|
||||||
pop %rdi
|
|
||||||
pop %rbx
|
|
||||||
ret
|
|
||||||
.cfi_endproc
|
|
||||||
.align 16
|
|
||||||
|
|
||||||
|
|
|
@ -1030,7 +1030,7 @@ void D_DoomLoop ()
|
||||||
catch (CVMAbortException &error)
|
catch (CVMAbortException &error)
|
||||||
{
|
{
|
||||||
error.MaybePrintMessage();
|
error.MaybePrintMessage();
|
||||||
Printf("%s", error.stacktrace);
|
Printf("%s", error.stacktrace.GetChars());
|
||||||
D_ErrorCleanup();
|
D_ErrorCleanup();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,62 +48,6 @@
|
||||||
class PClassActor;
|
class PClassActor;
|
||||||
typedef TMap<int, PClassActor *> FClassMap;
|
typedef TMap<int, PClassActor *> FClassMap;
|
||||||
|
|
||||||
// Since this file is included by everything, it seems an appropriate place
|
|
||||||
// to check the NOASM/USEASM macros.
|
|
||||||
|
|
||||||
// There are three assembly-related macros:
|
|
||||||
//
|
|
||||||
// NOASM - Assembly code is disabled
|
|
||||||
// X86_ASM - Using ia32 assembly code
|
|
||||||
// X64_ASM - Using amd64 assembly code
|
|
||||||
//
|
|
||||||
// Note that these relate only to using the pure assembly code. Inline
|
|
||||||
// assembly may still be used without respect to these macros, as
|
|
||||||
// deemed appropriate.
|
|
||||||
|
|
||||||
#ifndef NOASM
|
|
||||||
// Select the appropriate type of assembly code to use.
|
|
||||||
|
|
||||||
#if defined(_M_IX86) || defined(__i386__)
|
|
||||||
|
|
||||||
#define X86_ASM
|
|
||||||
#ifdef X64_ASM
|
|
||||||
#undef X64_ASM
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif defined(_M_X64) || defined(__amd64__)
|
|
||||||
|
|
||||||
#define X64_ASM
|
|
||||||
#ifdef X86_ASM
|
|
||||||
#undef X86_ASM
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define NOASM
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef NOASM
|
|
||||||
// Ensure no assembly macros are defined if NOASM is defined.
|
|
||||||
|
|
||||||
#ifdef X86_ASM
|
|
||||||
#undef X86_ASM
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef X64_ASM
|
|
||||||
#undef X64_ASM
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Only use SSE intrinsics on Intel architecture
|
|
||||||
#if !defined(_M_IX86) && !defined(__i386__) && !defined(_M_X64) && !defined(__amd64__)
|
|
||||||
#define NO_SSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#define NOVTABLE __declspec(novtable)
|
#define NOVTABLE __declspec(novtable)
|
||||||
|
|
|
@ -1,3 +1,36 @@
|
||||||
|
/*
|
||||||
|
** r_draw.cpp
|
||||||
|
**
|
||||||
|
**---------------------------------------------------------------------------
|
||||||
|
** Copyright 1998-2016 Randy Heit
|
||||||
|
** Copyright 2016 Magnus Norddahl
|
||||||
|
** All rights reserved.
|
||||||
|
**
|
||||||
|
** Redistribution and use in source and binary forms, with or without
|
||||||
|
** modification, are permitted provided that the following conditions
|
||||||
|
** are met:
|
||||||
|
**
|
||||||
|
** 1. Redistributions of source code must retain the above copyright
|
||||||
|
** notice, this list of conditions and the following disclaimer.
|
||||||
|
** 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
** notice, this list of conditions and the following disclaimer in the
|
||||||
|
** documentation and/or other materials provided with the distribution.
|
||||||
|
** 3. The name of the author may not be used to endorse or promote products
|
||||||
|
** derived from this software without specific prior written permission.
|
||||||
|
**
|
||||||
|
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**---------------------------------------------------------------------------
|
||||||
|
**
|
||||||
|
*/
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,36 @@
|
||||||
|
/*
|
||||||
|
** r_draw_pal.cpp
|
||||||
|
**
|
||||||
|
**---------------------------------------------------------------------------
|
||||||
|
** Copyright 1998-2016 Randy Heit
|
||||||
|
** Copyright 2016 Magnus Norddahl
|
||||||
|
** All rights reserved.
|
||||||
|
**
|
||||||
|
** Redistribution and use in source and binary forms, with or without
|
||||||
|
** modification, are permitted provided that the following conditions
|
||||||
|
** are met:
|
||||||
|
**
|
||||||
|
** 1. Redistributions of source code must retain the above copyright
|
||||||
|
** notice, this list of conditions and the following disclaimer.
|
||||||
|
** 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
** notice, this list of conditions and the following disclaimer in the
|
||||||
|
** documentation and/or other materials provided with the distribution.
|
||||||
|
** 3. The name of the author may not be used to endorse or promote products
|
||||||
|
** derived from this software without specific prior written permission.
|
||||||
|
**
|
||||||
|
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||||
|
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||||
|
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||||
|
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||||
|
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||||
|
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**---------------------------------------------------------------------------
|
||||||
|
**
|
||||||
|
*/
|
||||||
|
|
||||||
#include "templates.h"
|
#include "templates.h"
|
||||||
#include "doomtype.h"
|
#include "doomtype.h"
|
||||||
|
|
1135
src/r_drawt.cpp
1135
src/r_drawt.cpp
File diff suppressed because it is too large
Load diff
|
@ -825,9 +825,6 @@ void R_SetupBuffer ()
|
||||||
{
|
{
|
||||||
dc_pitch = pitch;
|
dc_pitch = pitch;
|
||||||
R_InitFuzzTable (pitch);
|
R_InitFuzzTable (pitch);
|
||||||
#if defined(X86_ASM) || defined(X64_ASM)
|
|
||||||
ASM_PatchPitch ();
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
dc_destorg = lineptr;
|
dc_destorg = lineptr;
|
||||||
dc_destheight = RenderTarget->GetHeight() - viewwindowy;
|
dc_destheight = RenderTarget->GetHeight() - viewwindowy;
|
||||||
|
|
|
@ -106,20 +106,11 @@ CCMD (bumpgamma)
|
||||||
/* Palette management stuff */
|
/* Palette management stuff */
|
||||||
/****************************/
|
/****************************/
|
||||||
|
|
||||||
extern "C" BYTE BestColor_MMX (DWORD rgb, const DWORD *pal);
|
|
||||||
|
|
||||||
int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num)
|
int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num)
|
||||||
{
|
{
|
||||||
#ifdef X86_ASM
|
|
||||||
if (CPU.bMMX)
|
|
||||||
{
|
|
||||||
int pre = 256 - num - first;
|
|
||||||
return BestColor_MMX (((first+pre)<<24)|(r<<16)|(g<<8)|b, pal_in-pre) - pre;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
const PalEntry *pal = (const PalEntry *)pal_in;
|
const PalEntry *pal = (const PalEntry *)pal_in;
|
||||||
int bestcolor = first;
|
int bestcolor = first;
|
||||||
int bestdist = 257*257+257*257+257*257;
|
int bestdist = 257 * 257 + 257 * 257 + 257 * 257;
|
||||||
|
|
||||||
for (int color = first; color < num; color++)
|
for (int color = first; color < num; color++)
|
||||||
{
|
{
|
||||||
|
@ -384,8 +375,8 @@ void InitPalette ()
|
||||||
R_InitColormaps ();
|
R_InitColormaps ();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||||
extern void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||||
|
|
||||||
void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||||
{
|
{
|
||||||
|
@ -395,6 +386,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
||||||
{
|
{
|
||||||
memcpy (to, from, count * sizeof(DWORD));
|
memcpy (to, from, count * sizeof(DWORD));
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
else if (a == 256)
|
else if (a == 256)
|
||||||
{
|
{
|
||||||
|
@ -405,6 +397,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
||||||
{
|
{
|
||||||
to[i] = t;
|
to[i] = t;
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
#if defined(_M_X64) || defined(_M_IX86) || defined(__i386__) || defined(__amd64__)
|
#if defined(_M_X64) || defined(_M_IX86) || defined(__i386__) || defined(__amd64__)
|
||||||
else if (CPU.bSSE2)
|
else if (CPU.bSSE2)
|
||||||
|
@ -423,7 +416,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef X86_ASM
|
#if defined(_M_IX86) || defined(__i386__)
|
||||||
else if (CPU.bMMX)
|
else if (CPU.bMMX)
|
||||||
{
|
{
|
||||||
if (count >= 4)
|
if (count >= 4)
|
||||||
|
|
|
@ -519,10 +519,6 @@ void V_RefreshViewBorder ();
|
||||||
|
|
||||||
void V_SetBorderNeedRefresh();
|
void V_SetBorderNeedRefresh();
|
||||||
|
|
||||||
#if defined(X86_ASM) || defined(X64_ASM)
|
|
||||||
extern "C" void ASM_PatchPitch (void);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int CheckRatio (int width, int height, int *trueratio=NULL);
|
int CheckRatio (int width, int height, int *trueratio=NULL);
|
||||||
static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); }
|
static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); }
|
||||||
inline bool IsRatioWidescreen(int ratio) { return (ratio & 3) != 0; }
|
inline bool IsRatioWidescreen(int ratio) { return (ratio & 3) != 0; }
|
||||||
|
|
23
src/x86.cpp
23
src/x86.cpp
|
@ -227,10 +227,9 @@ void DumpCPUInfo(const CPUInfo *cpu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if !defined(__amd64__) && !defined(_M_X64)
|
||||||
// Compiler output for this function is crap compared to the assembly
|
|
||||||
// version, which is why it isn't used.
|
void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||||
void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
|
||||||
{
|
{
|
||||||
__m64 blendcolor;
|
__m64 blendcolor;
|
||||||
__m64 blendalpha;
|
__m64 blendalpha;
|
||||||
|
@ -272,9 +271,6 @@ void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef X86_ASM
|
|
||||||
extern "C" void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||||
{
|
{
|
||||||
|
@ -288,17 +284,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
||||||
|
|
||||||
unaligned = ((size_t)from | (size_t)to) & 0xF;
|
unaligned = ((size_t)from | (size_t)to) & 0xF;
|
||||||
|
|
||||||
#ifdef X86_ASM
|
|
||||||
// For unaligned accesses, the assembly MMX version is slightly faster.
|
|
||||||
// Note that using unaligned SSE loads and stores is still faster than
|
|
||||||
// the compiler-generated MMX version.
|
|
||||||
if (unaligned)
|
|
||||||
{
|
|
||||||
DoBlending_MMX(from, to, count, r, g, b, a);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__amd64__) || defined(_M_X64)
|
#if defined(__amd64__) || defined(_M_X64)
|
||||||
long long color;
|
long long color;
|
||||||
|
|
||||||
|
@ -326,7 +311,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
||||||
|
|
||||||
zero = _mm_setzero_si128();
|
zero = _mm_setzero_si128();
|
||||||
|
|
||||||
#ifndef X86_ASM
|
|
||||||
if (unaligned)
|
if (unaligned)
|
||||||
{
|
{
|
||||||
for (count >>= 2; count > 0; --count)
|
for (count >>= 2; count > 0; --count)
|
||||||
|
@ -346,7 +330,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
for (count >>= 2; count > 0; --count)
|
for (count >>= 2; count > 0; --count)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue