mirror of
https://github.com/ZDoom/qzdoom.git
synced 2024-11-26 05:51:52 +00:00
Merge remote-tracking branch 'zdoom/master' into qzdoom
# Conflicts: # src/CMakeLists.txt # src/doomtype.h # src/r_drawt.cpp
This commit is contained in:
commit
9ed02a6aec
17 changed files with 76 additions and 4681 deletions
|
@ -15,12 +15,6 @@ include( CheckLibraryExists )
|
|||
include( FindPkgConfig )
|
||||
include( FindOpenGL )
|
||||
|
||||
if( NOT APPLE )
|
||||
option( NO_ASM "Disable assembly code" OFF )
|
||||
else()
|
||||
# At the moment asm code doesn't work with OS X, so disable by default
|
||||
option( NO_ASM "Disable assembly code" ON )
|
||||
endif()
|
||||
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||
option( NO_STRIP "Do not strip Release or MinSizeRel builds" )
|
||||
# At least some versions of Xcode fail if you strip with the linker
|
||||
|
@ -115,7 +109,6 @@ if( WIN32 )
|
|||
)
|
||||
set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc )
|
||||
set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib )
|
||||
set( NASM_NAMES nasmw nasm )
|
||||
|
||||
find_path( D3D_INCLUDE_DIR d3d9.h
|
||||
PATHS ENV DXSDK_DIR
|
||||
|
@ -240,7 +233,6 @@ else()
|
|||
endif()
|
||||
endif()
|
||||
endif()
|
||||
set( NASM_NAMES nasm )
|
||||
|
||||
if( NO_GTK )
|
||||
add_definitions( -DNO_GTK )
|
||||
|
@ -388,105 +380,6 @@ endif()
|
|||
|
||||
find_package( FluidSynth )
|
||||
|
||||
# Search for NASM
|
||||
|
||||
if( NOT NO_ASM )
|
||||
if( UNIX AND X64 )
|
||||
find_program( GAS_PATH as )
|
||||
|
||||
if( GAS_PATH )
|
||||
set( ASSEMBLER ${GAS_PATH} )
|
||||
else()
|
||||
message( STATUS "Could not find as. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
endif()
|
||||
else()
|
||||
find_program( NASM_PATH NAMES ${NASM_NAMES} )
|
||||
find_program( YASM_PATH yasm )
|
||||
|
||||
if( X64 )
|
||||
if( YASM_PATH )
|
||||
set( ASSEMBLER ${YASM_PATH} )
|
||||
else()
|
||||
message( STATUS "Could not find YASM. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
endif()
|
||||
else()
|
||||
if( NASM_PATH )
|
||||
set( ASSEMBLER ${NASM_PATH} )
|
||||
else()
|
||||
message( STATUS "Could not find NASM. Disabling assembly code." )
|
||||
set( NO_ASM ON )
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# I think the only reason there was a version requirement was because the
|
||||
# executable name for Windows changed from 0.x to 2.0, right? This is
|
||||
# how to do it in case I need to do something similar later.
|
||||
|
||||
# execute_process( COMMAND ${NASM_PATH} -v
|
||||
# OUTPUT_VARIABLE NASM_VER_STRING )
|
||||
# string( REGEX REPLACE ".*version ([0-9]+[.][0-9]+).*" "\\1" NASM_VER "${NASM_VER_STRING}" )
|
||||
# if( NOT NASM_VER LESS 2 )
|
||||
# message( SEND_ERROR "NASM version should be 2 or later. (Installed version is ${NASM_VER}.)" )
|
||||
# endif()
|
||||
endif()
|
||||
|
||||
if( NOT NO_ASM )
|
||||
# Valgrind support is meaningless without assembly code.
|
||||
if( VALGRIND )
|
||||
add_definitions( -DVALGRIND_AWARE=1 )
|
||||
# If you're Valgrinding, you probably want to keep symbols around.
|
||||
set( NO_STRIP ON )
|
||||
endif()
|
||||
|
||||
# Tell CMake how to assemble our files
|
||||
if( UNIX )
|
||||
set( ASM_OUTPUT_EXTENSION .o )
|
||||
if( X64 )
|
||||
set( ASM_FLAGS )
|
||||
set( ASM_SOURCE_EXTENSION .s )
|
||||
else()
|
||||
if( APPLE )
|
||||
set( ASM_FLAGS -fmacho -DM_TARGET_MACHO )
|
||||
else()
|
||||
set( ASM_FLAGS -felf -DM_TARGET_LINUX )
|
||||
endif()
|
||||
set( ASM_FLAGS "${ASM_FLAGS}" -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||
set( ASM_SOURCE_EXTENSION .asm )
|
||||
endif()
|
||||
else()
|
||||
set( ASM_OUTPUT_EXTENSION .obj )
|
||||
set( ASM_SOURCE_EXTENSION .asm )
|
||||
if( X64 )
|
||||
set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 )
|
||||
else()
|
||||
set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ )
|
||||
endif()
|
||||
endif()
|
||||
if( WIN32 AND NOT X64 )
|
||||
set( FIXRTEXT fixrtext )
|
||||
else()
|
||||
set( FIXRTEXT "" )
|
||||
endif()
|
||||
message( STATUS "Selected assembler: ${ASSEMBLER}" )
|
||||
MACRO( ADD_ASM_FILE indir infile )
|
||||
set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" )
|
||||
if( WIN32 AND NOT X64 )
|
||||
set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" )
|
||||
else()
|
||||
set( FIXRTEXT_${infile} COMMAND "" )
|
||||
endif()
|
||||
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
|
||||
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}"
|
||||
${FIXRTEXT_${infile}}
|
||||
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
|
||||
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
|
||||
ENDMACRO()
|
||||
endif()
|
||||
|
||||
# Decide on SSE setup
|
||||
|
||||
set( SSE_MATTERS NO )
|
||||
|
@ -797,43 +690,6 @@ else()
|
|||
set( OTHER_SYSTEM_SOURCES ${PLAT_WIN32_SOURCES} ${PLAT_OSX_SOURCES} ${PLAT_COCOA_SOURCES} )
|
||||
endif()
|
||||
|
||||
if( HAVE_MMX )
|
||||
add_definitions( -DHAVE_MMX=1 )
|
||||
|
||||
set( SYSTEM_SOURCES ${SYSTEM_SOURCES}
|
||||
gl/hqnx_asm/hq2x_asm.cpp
|
||||
gl/hqnx_asm/hq3x_asm.cpp
|
||||
gl/hqnx_asm/hq4x_asm.cpp
|
||||
gl/hqnx_asm/hqnx_asm_Image.cpp)
|
||||
|
||||
if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||
set_source_files_properties(
|
||||
gl/hqnx_asm/hq2x_asm.cpp
|
||||
gl/hqnx_asm/hq3x_asm.cpp
|
||||
gl/hqnx_asm/hq4x_asm.cpp
|
||||
gl/textures/gl_hqresize.cpp
|
||||
PROPERTIES COMPILE_FLAGS "-mmmx" )
|
||||
endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||
endif( HAVE_MMX )
|
||||
|
||||
if( NOT ASM_SOURCES )
|
||||
set( ASM_SOURCES "" )
|
||||
endif()
|
||||
|
||||
if( NO_ASM )
|
||||
add_definitions( -DNOASM )
|
||||
else()
|
||||
if( X64 )
|
||||
ADD_ASM_FILE( asm_x86_64 tmap3 )
|
||||
else()
|
||||
ADD_ASM_FILE( asm_ia32 a )
|
||||
ADD_ASM_FILE( asm_ia32 misc )
|
||||
ADD_ASM_FILE( asm_ia32 tmap )
|
||||
ADD_ASM_FILE( asm_ia32 tmap2 )
|
||||
ADD_ASM_FILE( asm_ia32 tmap3 )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
|
||||
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
|
||||
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
|
||||
|
@ -941,16 +797,6 @@ set( NOT_COMPILED_SOURCE_FILES
|
|||
scripting/zscript/zcc-parse.lemon
|
||||
zcc-parse.c
|
||||
zcc-parse.h
|
||||
|
||||
# We could have the ASM macro add these files, but it wouldn't add all
|
||||
# platforms.
|
||||
asm_ia32/a.asm
|
||||
asm_ia32/misc.asm
|
||||
asm_ia32/tmap.asm
|
||||
asm_ia32/tmap2.asm
|
||||
asm_ia32/tmap3.asm
|
||||
asm_x86_64/tmap3.asm
|
||||
asm_x86_64/tmap3.s
|
||||
)
|
||||
|
||||
set( FASTMATH_PCH_SOURCES
|
||||
|
@ -1397,7 +1243,6 @@ add_executable( zdoom WIN32 MACOSX_BUNDLE
|
|||
${HEADER_FILES}
|
||||
${NOT_COMPILED_SOURCE_FILES}
|
||||
__autostart.cpp
|
||||
${ASM_SOURCES}
|
||||
${SYSTEM_SOURCES}
|
||||
${X86_SOURCES}
|
||||
${FASTMATH_SOURCES}
|
||||
|
@ -1568,8 +1413,6 @@ install(TARGETS zdoom
|
|||
DESTINATION ${INSTALL_PATH}
|
||||
COMPONENT "Game executable")
|
||||
|
||||
source_group("Assembly Files\\ia32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_ia32/.+")
|
||||
source_group("Assembly Files\\x86_64" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_x86_64/.+")
|
||||
source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+")
|
||||
source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+")
|
||||
source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h)
|
||||
|
|
|
@ -1,812 +0,0 @@
|
|||
; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman
|
||||
; Ken Silverman's official web site: "http://www.advsys.net/ken"
|
||||
; See the included license file "BUILDLIC.TXT" for license info.
|
||||
; This file has been modified from Ken Silverman's original release
|
||||
|
||||
%include "valgrind.inc"
|
||||
|
||||
SECTION .data
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
%define ylookup _ylookup
|
||||
%define vince _vince
|
||||
%define vplce _vplce
|
||||
%define palookupoffse _palookupoffse
|
||||
%define bufplce _bufplce
|
||||
%define dc_iscale _dc_iscale
|
||||
%define dc_colormap _dc_colormap
|
||||
%define dc_count _dc_count
|
||||
%define dc_dest _dc_dest
|
||||
%define dc_source _dc_source
|
||||
%define dc_texturefrac _dc_texturefrac
|
||||
|
||||
%define setupvlineasm _setupvlineasm
|
||||
%define prevlineasm1 _prevlineasm1
|
||||
%define vlineasm1 _vlineasm1
|
||||
%define vlineasm4 _vlineasm4
|
||||
|
||||
%define setupmvlineasm _setupmvlineasm
|
||||
%define mvlineasm1 _mvlineasm1
|
||||
%define mvlineasm4 _mvlineasm4
|
||||
|
||||
%define R_SetupDrawSlabA _R_SetupDrawSlabA
|
||||
%define R_DrawSlabA _R_DrawSlabA
|
||||
%endif
|
||||
|
||||
EXTERN ylookup ; near
|
||||
|
||||
EXTERN vplce ; near
|
||||
EXTERN vince ; near
|
||||
EXTERN palookupoffse ; near
|
||||
EXTERN bufplce ; near
|
||||
|
||||
EXTERN dc_iscale
|
||||
EXTERN dc_colormap
|
||||
EXTERN dc_count
|
||||
EXTERN dc_dest
|
||||
EXTERN dc_source
|
||||
EXTERN dc_texturefrac
|
||||
|
||||
SECTION .text
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setvlinebpl_
|
||||
setvlinebpl_:
|
||||
mov [fixchain1a+2], eax
|
||||
mov [fixchain1b+2], eax
|
||||
mov [fixchain2a+2], eax
|
||||
mov [fixchain1m+2], eax
|
||||
mov [fixchain2ma+2], eax
|
||||
mov [fixchain2mb+2], eax
|
||||
selfmod fixchain1a, fixchain2mb+6
|
||||
|
||||
setdrawslabbpl:
|
||||
mov dword [voxbpl1+2], eax
|
||||
mov dword [voxbpl2+2], eax
|
||||
mov dword [voxbpl3+2], eax
|
||||
mov dword [voxbpl4+2], eax
|
||||
mov dword [voxbpl5+2], eax
|
||||
mov dword [voxbpl6+2], eax
|
||||
mov dword [voxbpl7+2], eax
|
||||
mov dword [voxbpl8+2], eax
|
||||
selfmod voxbpl1, voxpl8+6
|
||||
ret
|
||||
|
||||
SECTION .data
|
||||
|
||||
lastslabcolormap:
|
||||
dd 4
|
||||
|
||||
SECTION .text
|
||||
|
||||
GLOBAL R_SetupDrawSlabA
|
||||
GLOBAL @R_SetupDrawSlabA@4
|
||||
R_SetupDrawSlabA:
|
||||
mov ecx, [esp+4]
|
||||
@R_SetupDrawSlabA@4:
|
||||
cmp [lastslabcolormap], ecx
|
||||
je .done
|
||||
mov [lastslabcolormap], ecx
|
||||
mov dword [voxpal1+2], ecx
|
||||
mov dword [voxpal2+2], ecx
|
||||
mov dword [voxpal3+2], ecx
|
||||
mov dword [voxpal4+2], ecx
|
||||
mov dword [voxpal5+2], ecx
|
||||
mov dword [voxpal6+2], ecx
|
||||
mov dword [voxpal7+2], ecx
|
||||
mov dword [voxpal8+2], ecx
|
||||
.done ret
|
||||
|
||||
|
||||
; pass it log2(texheight)
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setupvlineasm
|
||||
setupvlineasm:
|
||||
mov ecx, [esp+4]
|
||||
|
||||
;First 2 lines for VLINEASM1, rest for VLINEASM4
|
||||
mov byte [premach3a+2], cl
|
||||
mov byte [mach3a+2], cl
|
||||
|
||||
mov byte [machvsh1+2], cl ;32-shy
|
||||
mov byte [machvsh3+2], cl ;32-shy
|
||||
mov byte [machvsh5+2], cl ;32-shy
|
||||
mov byte [machvsh6+2], cl ;32-shy
|
||||
mov ch, cl
|
||||
sub ch, 16
|
||||
mov byte [machvsh8+2], ch ;16-shy
|
||||
neg cl
|
||||
mov byte [machvsh7+2], cl ;shy
|
||||
mov byte [machvsh9+2], cl ;shy
|
||||
mov byte [machvsh10+2], cl ;shy
|
||||
mov byte [machvsh11+2], cl ;shy
|
||||
mov byte [machvsh12+2], cl ;shy
|
||||
mov eax, 1
|
||||
shl eax, cl
|
||||
dec eax
|
||||
mov dword [machvsh2+2], eax ;(1<<shy)-1
|
||||
mov dword [machvsh4+2], eax ;(1<<shy)-1
|
||||
selfmod premach3a, machvsh8+6
|
||||
ret
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
SECTION .text align=64
|
||||
%else
|
||||
SECTION .rtext progbits alloc exec write align=64
|
||||
%endif
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_a_start
|
||||
_rtext_a_start:
|
||||
%endif
|
||||
|
||||
;eax = xscale
|
||||
;ebx = palookupoffse
|
||||
;ecx = # pixels to draw-1
|
||||
;edx = texturefrac
|
||||
;esi = texturecolumn
|
||||
;edi = buffer pointer
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL prevlineasm1
|
||||
prevlineasm1:
|
||||
mov ecx, [dc_count]
|
||||
cmp ecx, 1
|
||||
ja vlineasm1
|
||||
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
add eax, edx
|
||||
mov ecx, [dc_source]
|
||||
premach3a: shr edx, 32
|
||||
push ebx
|
||||
push edi
|
||||
mov edi, [dc_colormap]
|
||||
xor ebx, ebx
|
||||
mov bl, byte [ecx+edx]
|
||||
mov ecx, [dc_dest]
|
||||
mov bl, byte [edi+ebx]
|
||||
pop edi
|
||||
mov byte [ecx], bl
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
GLOBAL vlineasm1
|
||||
ALIGN 16
|
||||
vlineasm1:
|
||||
push ebx
|
||||
push edi
|
||||
push esi
|
||||
push ebp
|
||||
mov ecx, [dc_count]
|
||||
mov ebp, [dc_colormap]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
mov esi, [dc_source]
|
||||
fixchain1a: sub edi, 320
|
||||
nop
|
||||
nop
|
||||
nop
|
||||
beginvline:
|
||||
mov ebx, edx
|
||||
mach3a: shr ebx, 32
|
||||
fixchain1b: add edi, 320
|
||||
mov bl, byte [esi+ebx]
|
||||
add edx, eax
|
||||
dec ecx
|
||||
mov bl, byte [ebp+ebx]
|
||||
mov byte [edi], bl
|
||||
jnz short beginvline
|
||||
pop ebp
|
||||
pop esi
|
||||
pop edi
|
||||
pop ebx
|
||||
mov eax, edx
|
||||
ret
|
||||
|
||||
;eax: -------temp1-------
|
||||
;ebx: -------temp2-------
|
||||
;ecx: dat dat dat dat
|
||||
;edx: ylo2 ylo4
|
||||
;esi: yhi1 yhi2
|
||||
;edi: ---videoplc/cnt----
|
||||
;ebp: yhi3 yhi4
|
||||
;esp:
|
||||
ALIGN 16
|
||||
GLOBAL vlineasm4
|
||||
vlineasm4:
|
||||
mov ecx, [dc_count]
|
||||
push ebp
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
mov edi, [dc_dest]
|
||||
|
||||
mov eax, dword [ylookup+ecx*4-4]
|
||||
add eax, edi
|
||||
mov dword [machvline4end+2], eax
|
||||
sub edi, eax
|
||||
|
||||
mov eax, dword [bufplce+0]
|
||||
mov ebx, dword [bufplce+4]
|
||||
mov ecx, dword [bufplce+8]
|
||||
mov edx, dword [bufplce+12]
|
||||
mov dword [machvbuf1+2], ecx
|
||||
mov dword [machvbuf2+2], edx
|
||||
mov dword [machvbuf3+2], eax
|
||||
mov dword [machvbuf4+2], ebx
|
||||
|
||||
mov eax, dword [palookupoffse+0]
|
||||
mov ebx, dword [palookupoffse+4]
|
||||
mov ecx, dword [palookupoffse+8]
|
||||
mov edx, dword [palookupoffse+12]
|
||||
mov dword [machvpal1+2], ecx
|
||||
mov dword [machvpal2+2], edx
|
||||
mov dword [machvpal3+2], eax
|
||||
mov dword [machvpal4+2], ebx
|
||||
|
||||
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
||||
;edx: ³v3lo ³v1lo ³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
|
||||
;esi: ³v2hi v2lo ³ v3hi³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
|
||||
;ebp: ³v0hi v0lo ³ v1hi³
|
||||
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
|
||||
|
||||
mov ebp, dword [vince+0]
|
||||
mov ebx, dword [vince+4]
|
||||
mov esi, dword [vince+8]
|
||||
mov eax, dword [vince+12]
|
||||
and esi, 0fffffe00h
|
||||
and ebp, 0fffffe00h
|
||||
machvsh9: rol eax, 88h ;sh
|
||||
machvsh10: rol ebx, 88h ;sh
|
||||
mov edx, eax
|
||||
mov ecx, ebx
|
||||
shr ecx, 16
|
||||
and edx, 0ffff0000h
|
||||
add edx, ecx
|
||||
and eax, 000001ffh
|
||||
and ebx, 000001ffh
|
||||
add esi, eax
|
||||
add ebp, ebx
|
||||
;
|
||||
mov eax, edx
|
||||
and eax, 0ffff0000h
|
||||
mov dword [machvinc1+2], eax
|
||||
mov dword [machvinc2+2], esi
|
||||
mov byte [machvinc3+2], dl
|
||||
mov byte [machvinc4+2], dh
|
||||
mov dword [machvinc5+2], ebp
|
||||
|
||||
mov ebp, dword [vplce+0]
|
||||
mov ebx, dword [vplce+4]
|
||||
mov esi, dword [vplce+8]
|
||||
mov eax, dword [vplce+12]
|
||||
and esi, 0fffffe00h
|
||||
and ebp, 0fffffe00h
|
||||
machvsh11: rol eax, 88h ;sh
|
||||
machvsh12: rol ebx, 88h ;sh
|
||||
mov edx, eax
|
||||
mov ecx, ebx
|
||||
shr ecx, 16
|
||||
and edx, 0ffff0000h
|
||||
add edx, ecx
|
||||
and eax, 000001ffh
|
||||
and ebx, 000001ffh
|
||||
add esi, eax
|
||||
add ebp, ebx
|
||||
|
||||
mov ecx, esi
|
||||
selfmod beginvlineasm4, machvline4end+6
|
||||
jmp short beginvlineasm4
|
||||
ALIGN 16
|
||||
beginvlineasm4:
|
||||
machvsh1: shr ecx, 88h ;32-sh
|
||||
mov ebx, esi
|
||||
machvsh2: and ebx, 00000088h ;(1<<sh)-1
|
||||
machvinc1: add edx, 88880000h
|
||||
machvinc2: adc esi, 88888088h
|
||||
machvbuf1: mov cl, byte [ecx+88888888h]
|
||||
machvbuf2: mov bl, byte [ebx+88888888h]
|
||||
mov eax, ebp
|
||||
machvsh3: shr eax, 88h ;32-sh
|
||||
machvpal1: mov cl, byte [ecx+88888888h]
|
||||
machvpal2: mov ch, byte [ebx+88888888h]
|
||||
mov ebx, ebp
|
||||
shl ecx, 16
|
||||
machvsh4: and ebx, 00000088h ;(1<<sh)-1
|
||||
machvinc3: add dl, 88h
|
||||
machvbuf3: mov al, byte [eax+88888888h]
|
||||
machvinc4: adc dh, 88h
|
||||
machvbuf4: mov bl, byte [ebx+88888888h]
|
||||
machvinc5: adc ebp, 88888088h
|
||||
machvpal3: mov cl, byte [eax+88888888h]
|
||||
machvpal4: mov ch, byte [ebx+88888888h]
|
||||
machvline4end: mov dword [edi+88888888h], ecx
|
||||
fixchain2a: add edi, 88888888h
|
||||
mov ecx, esi
|
||||
jle short beginvlineasm4
|
||||
|
||||
; ÚÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄ¿
|
||||
;edx: ³v3lo ³v1lo ³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÂÄÄÄÄÄÄÄ´
|
||||
;esi: ³v2hi v2lo ³ v3hi³
|
||||
; ÃÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÅÄÄÄÄÄÄÄ´
|
||||
;ebp: ³v0hi v0lo ³ v1hi³
|
||||
; ÀÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÄÁÄÄÄÄÄÄÄÙ
|
||||
|
||||
mov dword [vplce+8], esi
|
||||
mov dword [vplce+0], ebp
|
||||
;vplc2 = (esi<<(32-sh))+(edx>>sh)
|
||||
;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh))
|
||||
machvsh5: shl esi, 88h ;32-sh
|
||||
mov eax, edx
|
||||
machvsh6: shl ebp, 88h ;32-sh
|
||||
and edx, 0000ffffh
|
||||
machvsh7: shr eax, 88h ;sh
|
||||
add esi, eax
|
||||
machvsh8: shl edx, 88h ;16-sh
|
||||
add ebp, edx
|
||||
mov dword [vplce+12], esi
|
||||
mov dword [vplce+4], ebp
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
;*************************************************************************
|
||||
;************************* Masked Vertical Lines *************************
|
||||
;*************************************************************************
|
||||
|
||||
; pass it log2(texheight)
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setupmvlineasm
|
||||
setupmvlineasm:
|
||||
mov ecx, dword [esp+4]
|
||||
mov byte [maskmach3a+2], cl
|
||||
mov byte [machmv13+2], cl
|
||||
|
||||
mov byte [machmv14+2], cl
|
||||
mov byte [machmv15+2], cl
|
||||
mov byte [machmv16+2], cl
|
||||
selfmod maskmach3a, machmv13+6
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL mvlineasm1 ;Masked vline
|
||||
mvlineasm1:
|
||||
push ebx
|
||||
push edi
|
||||
push esi
|
||||
push ebp
|
||||
mov ecx, [dc_count]
|
||||
mov ebp, [dc_colormap]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
mov esi, [dc_source]
|
||||
beginmvline:
|
||||
mov ebx, edx
|
||||
maskmach3a: shr ebx, 32
|
||||
movzx ebx, byte [esi+ebx]
|
||||
cmp ebx, 0
|
||||
je short skipmask1
|
||||
maskmach3c: mov bl, byte [ebp+ebx]
|
||||
mov [edi], bl
|
||||
skipmask1: add edx, eax
|
||||
fixchain1m: add edi, 320
|
||||
dec ecx
|
||||
jnz short beginmvline
|
||||
|
||||
pop ebp
|
||||
pop esi
|
||||
pop edi
|
||||
pop ebx
|
||||
mov eax, edx
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL mvlineasm4
|
||||
mvlineasm4:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
|
||||
mov ecx,[dc_count]
|
||||
mov edi,[dc_dest]
|
||||
|
||||
mov eax, [bufplce+0]
|
||||
mov ebx, [bufplce+4]
|
||||
mov [machmv1+3], eax
|
||||
mov [machmv4+3], ebx
|
||||
mov eax, [bufplce+8]
|
||||
mov ebx, [bufplce+12]
|
||||
mov [machmv7+3], eax
|
||||
mov [machmv10+3], ebx
|
||||
|
||||
mov eax, [palookupoffse]
|
||||
mov ebx, [palookupoffse+4]
|
||||
mov [machmv2+2], eax
|
||||
mov [machmv5+2], ebx
|
||||
mov eax, [palookupoffse+8]
|
||||
mov ebx, [palookupoffse+12]
|
||||
mov [machmv8+2], eax
|
||||
mov [machmv11+2], ebx
|
||||
|
||||
mov eax, [vince] ;vince
|
||||
mov ebx, [vince+4]
|
||||
xor bl, bl
|
||||
mov [machmv3+2], eax
|
||||
mov [machmv6+2], ebx
|
||||
mov eax, [vince+8]
|
||||
mov ebx, [vince+12]
|
||||
mov [machmv9+2], eax
|
||||
mov [machmv12+2], ebx
|
||||
|
||||
inc ecx
|
||||
push ecx
|
||||
mov ecx, [vplce+0]
|
||||
mov edx, [vplce+4]
|
||||
mov esi, [vplce+8]
|
||||
mov ebp, [vplce+12]
|
||||
fixchain2ma: sub edi, 320
|
||||
|
||||
selfmod beginmvlineasm4, machmv2+6
|
||||
jmp short beginmvlineasm4
|
||||
ALIGN 16
|
||||
beginmvlineasm4:
|
||||
dec dword [esp]
|
||||
jz near endmvlineasm4
|
||||
|
||||
mov eax, ebp
|
||||
mov ebx, esi
|
||||
machmv16: shr eax, 32
|
||||
machmv12: add ebp, 0x88888888 ;vince[3]
|
||||
machmv15: shr ebx, 32
|
||||
machmv9: add esi, 0x88888888 ;vince[2]
|
||||
machmv10: movzx eax, byte [eax+0x88888888];bufplce[3]
|
||||
machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
cmp ebx, 1
|
||||
adc dl, dl
|
||||
machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2]
|
||||
machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3]
|
||||
|
||||
mov eax, edx
|
||||
machmv6: add edx, 0x88888888 ;vince[1]
|
||||
machmv14: shr eax, 32
|
||||
shl ebx, 16
|
||||
machmv4: movzx eax, byte [eax+0x88888888];bufplce[1]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1]
|
||||
|
||||
mov eax, ecx
|
||||
machmv3: add ecx, 0x88888888 ;vince[0]
|
||||
machmv13: shr eax, 32
|
||||
machmv1: movzx eax, byte [eax+0x88888888];bufplce[0]
|
||||
cmp eax, 1
|
||||
adc dl, dl
|
||||
machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0]
|
||||
|
||||
xor eax, eax
|
||||
shl dl, 4
|
||||
fixchain2mb: add edi, 320
|
||||
mov al, dl
|
||||
add eax, mvcase15
|
||||
jmp eax ;16 byte cases
|
||||
|
||||
ALIGN 16
|
||||
endmvlineasm4:
|
||||
mov [vplce], ecx
|
||||
mov [vplce+4], edx
|
||||
mov [vplce+8], esi
|
||||
mov [vplce+12], ebp
|
||||
pop ecx
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7
|
||||
ALIGN 16
|
||||
mvcase15: mov [edi], ebx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase14: mov [edi+1], bh
|
||||
shr ebx, 16
|
||||
mov [edi+2], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase13: mov [edi], bl
|
||||
shr ebx, 16
|
||||
mov [edi+2], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase12: shr ebx, 16
|
||||
mov [edi+2], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase11: mov [edi], bx
|
||||
shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase10: mov [edi+1], bh
|
||||
shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase9: mov [edi], bl
|
||||
shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase8: shr ebx, 16
|
||||
mov [edi+3], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase7: mov [edi], bx
|
||||
shr ebx, 16
|
||||
mov [edi+2], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase6: shr ebx, 8
|
||||
mov [edi+1], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase5: mov [edi], bl
|
||||
shr ebx, 16
|
||||
mov [edi+2], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase4: shr ebx, 16
|
||||
mov [edi+2], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase3: mov [edi], bx
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase2: mov [edi+1], bh
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase1: mov [edi], bl
|
||||
jmp beginmvlineasm4
|
||||
ALIGN 16
|
||||
mvcase0: jmp beginmvlineasm4
|
||||
|
||||
align 16
|
||||
|
||||
|
||||
;*************************************************************************
|
||||
;***************************** Voxel Slabs *******************************
|
||||
;*************************************************************************
|
||||
|
||||
GLOBAL R_DrawSlabA
|
||||
R_DrawSlabA:
|
||||
push ebx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
|
||||
mov eax, [esp+5*4+0]
|
||||
mov ebx, [esp+5*4+4]
|
||||
mov ecx, [esp+5*4+8]
|
||||
mov edx, [esp+5*4+12]
|
||||
mov esi, [esp+5*4+16]
|
||||
mov edi, [esp+5*4+20]
|
||||
|
||||
cmp eax, 2
|
||||
je voxbegdraw2
|
||||
ja voxskip2
|
||||
xor eax, eax
|
||||
voxbegdraw1:
|
||||
mov ebp, ebx
|
||||
shr ebp, 16
|
||||
add ebx, edx
|
||||
dec ecx
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal1: mov al, byte [eax+88888888h]
|
||||
mov byte [edi], al
|
||||
voxbpl1: lea edi, [edi+88888888h]
|
||||
jnz voxbegdraw1
|
||||
jmp voxskipslab5
|
||||
|
||||
voxbegdraw2:
|
||||
mov ebp, ebx
|
||||
shr ebp, 16
|
||||
add ebx, edx
|
||||
xor eax, eax
|
||||
dec ecx
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal2: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
mov word [edi], ax
|
||||
voxbpl2: lea edi, [edi+88888888h]
|
||||
jnz voxbegdraw2
|
||||
jmp voxskipslab5
|
||||
|
||||
voxskip2:
|
||||
cmp eax, 4
|
||||
jne voxskip4
|
||||
xor eax, eax
|
||||
voxbegdraw4:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal3: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
mov dword [edi], eax
|
||||
voxbpl3: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegdraw4
|
||||
jmp voxskipslab5
|
||||
|
||||
voxskip4:
|
||||
add eax, edi
|
||||
|
||||
test edi, 1
|
||||
jz voxskipslab1
|
||||
cmp edi, eax
|
||||
je voxskipslab1
|
||||
|
||||
push eax
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab1:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal4: mov al, byte [eax+88888888h]
|
||||
mov byte [edi], al
|
||||
voxbpl4: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab1
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
pop eax
|
||||
inc edi
|
||||
|
||||
voxskipslab1:
|
||||
push eax
|
||||
test edi, 2
|
||||
jz voxskipslab2
|
||||
dec eax
|
||||
cmp edi, eax
|
||||
jge voxskipslab2
|
||||
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab2:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal5: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
mov word [edi], ax
|
||||
voxbpl5: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab2
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
add edi, 2
|
||||
|
||||
voxskipslab2:
|
||||
mov eax, [esp]
|
||||
|
||||
sub eax, 3
|
||||
cmp edi, eax
|
||||
jge voxskipslab3
|
||||
|
||||
voxprebegslab3:
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab3:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal6: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
shl eax, 8
|
||||
mov al, ah
|
||||
mov dword [edi], eax
|
||||
voxbpl6: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab3
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
add edi, 4
|
||||
|
||||
mov eax, [esp]
|
||||
|
||||
sub eax, 3
|
||||
cmp edi, eax
|
||||
jl voxprebegslab3
|
||||
|
||||
voxskipslab3:
|
||||
mov eax, [esp]
|
||||
|
||||
dec eax
|
||||
cmp edi, eax
|
||||
jge voxskipslab4
|
||||
|
||||
push ebx
|
||||
push ecx
|
||||
push edi
|
||||
voxbegslab4:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal7: mov al, byte [eax+88888888h]
|
||||
mov ah, al
|
||||
mov word [edi], ax
|
||||
voxbpl7: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab4
|
||||
pop edi
|
||||
pop ecx
|
||||
pop ebx
|
||||
add edi, 2
|
||||
|
||||
voxskipslab4:
|
||||
pop eax
|
||||
|
||||
cmp edi, eax
|
||||
je voxskipslab5
|
||||
|
||||
voxbegslab5:
|
||||
mov ebp, ebx
|
||||
add ebx, edx
|
||||
shr ebp, 16
|
||||
xor eax, eax
|
||||
mov al, byte [esi+ebp]
|
||||
voxpal8: mov al, byte [eax+88888888h]
|
||||
mov byte [edi], al
|
||||
voxbpl8: add edi, 88888888h
|
||||
dec ecx
|
||||
jnz voxbegslab5
|
||||
|
||||
voxskipslab5:
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
align 16
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_a_end
|
||||
_rtext_a_end:
|
||||
%endif
|
|
@ -1,200 +0,0 @@
|
|||
;*
|
||||
;* misc.nas
|
||||
;* Miscellaneous assembly functions
|
||||
;*
|
||||
;*---------------------------------------------------------------------------
|
||||
;* Copyright 1998-2006 Randy Heit
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* 1. Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;* 2. Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in the
|
||||
;* documentation and/or other materials provided with the distribution.
|
||||
;* 3. The name of the author may not be used to endorse or promote products
|
||||
;* derived from this software without specific prior written permission.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;*---------------------------------------------------------------------------
|
||||
;*
|
||||
|
||||
BITS 32
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
|
||||
%define DoBlending_MMX _DoBlending_MMX
|
||||
%define BestColor_MMX _BestColor_MMX
|
||||
|
||||
%endif
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
|
||||
SEGMENT DATA
|
||||
%else
|
||||
SECTION .data
|
||||
%endif
|
||||
|
||||
Blending256:
|
||||
dd 0x01000100,0x00000100
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
|
||||
SEGMENT CODE
|
||||
%else
|
||||
SECTION .text
|
||||
%endif
|
||||
|
||||
;-----------------------------------------------------------
|
||||
;
|
||||
; DoBlending_MMX
|
||||
;
|
||||
; MMX version of DoBlending
|
||||
;
|
||||
; (DWORD *from, DWORD *to, count, tor, tog, tob, toa)
|
||||
;-----------------------------------------------------------
|
||||
|
||||
GLOBAL DoBlending_MMX
|
||||
|
||||
DoBlending_MMX:
|
||||
pxor mm0,mm0 ; mm0 = 0
|
||||
mov eax,[esp+4*4]
|
||||
shl eax,16
|
||||
mov edx,[esp+4*5]
|
||||
shl edx,8
|
||||
or eax,[esp+4*6]
|
||||
or eax,edx
|
||||
mov ecx,[esp+4*3] ; ecx = count
|
||||
movd mm1,eax ; mm1 = 00000000 00RRGGBB
|
||||
mov eax,[esp+4*7]
|
||||
shl eax,16
|
||||
mov edx,[esp+4*7]
|
||||
shl edx,8
|
||||
or eax,[esp+4*7]
|
||||
or eax,edx
|
||||
mov edx,[esp+4*2] ; edx = dest
|
||||
movd mm6,eax ; mm6 = 00000000 00AAAAAA
|
||||
punpcklbw mm1,mm0 ; mm1 = 000000RR 00GG00BB
|
||||
movq mm7,[Blending256]
|
||||
punpcklbw mm6,mm0 ; mm6 = 000000AA 00AA00AA
|
||||
mov eax,[esp+4*1] ; eax = source
|
||||
pmullw mm1,mm6 ; mm1 = 000000RR 00GG00BB (multiplied by alpha)
|
||||
psubusw mm7,mm6 ; mm7 = 000000aa 00aa00aa (one minus alpha)
|
||||
nop ; Does this actually pair on a Pentium?
|
||||
|
||||
; Do four colors per iteration: Count must be a multiple of four.
|
||||
|
||||
.loop movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
|
||||
add eax,8
|
||||
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
|
||||
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
|
||||
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
|
||||
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
|
||||
add edx,8
|
||||
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
|
||||
sub ecx,2
|
||||
paddusw mm2,mm1
|
||||
psrlw mm2,8
|
||||
paddusw mm3,mm1
|
||||
psrlw mm3,8
|
||||
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
|
||||
movq [edx-8],mm2
|
||||
|
||||
movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1
|
||||
add eax,8
|
||||
movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1
|
||||
punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1
|
||||
punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2
|
||||
pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb
|
||||
add edx,8
|
||||
pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb
|
||||
sub ecx,2
|
||||
paddusw mm2,mm1
|
||||
psrlw mm2,8
|
||||
paddusw mm3,mm1
|
||||
psrlw mm3,8
|
||||
packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1
|
||||
movq [edx-8],mm2
|
||||
|
||||
jnz .loop
|
||||
|
||||
emms
|
||||
ret
|
||||
|
||||
;-----------------------------------------------------------
|
||||
;
|
||||
; BestColor_MMX
|
||||
;
|
||||
; Picks the closest matching color from a palette
|
||||
;
|
||||
; Passed FFRRGGBB and palette array in same format
|
||||
; FF is the index of the first palette entry to consider
|
||||
;
|
||||
;-----------------------------------------------------------
|
||||
|
||||
GLOBAL BestColor_MMX
|
||||
GLOBAL @BestColor_MMX@8
|
||||
|
||||
BestColor_MMX:
|
||||
mov ecx,[esp+4]
|
||||
mov edx,[esp+8]
|
||||
@BestColor_MMX@8:
|
||||
pxor mm0,mm0
|
||||
movd mm1,ecx ; mm1 = color searching for
|
||||
mov eax,257*257+257*257+257*257 ;eax = bestdist
|
||||
push ebx
|
||||
punpcklbw mm1,mm0
|
||||
mov ebx,ecx ; ebx = best color
|
||||
shr ecx,24 ; ecx = count
|
||||
and ebx,0xffffff
|
||||
push esi
|
||||
push ebp
|
||||
|
||||
.loop movd mm2,[edx+ecx*4] ; mm2 = color considering now
|
||||
inc ecx
|
||||
punpcklbw mm2,mm0
|
||||
movq mm3,mm1
|
||||
psubsw mm3,mm2
|
||||
pmullw mm3,mm3 ; mm3 = color distance squared
|
||||
|
||||
movd ebp,mm3 ; add the three components
|
||||
psrlq mm3,32 ; into ebp to get the real
|
||||
mov esi,ebp ; (squared) distance
|
||||
shr esi,16
|
||||
and ebp,0xffff
|
||||
add ebp,esi
|
||||
movd esi,mm3
|
||||
add ebp,esi
|
||||
|
||||
jz .perf ; found a perfect match
|
||||
cmp eax,ebp
|
||||
jb .skip
|
||||
mov eax,ebp
|
||||
lea ebx,[ecx-1]
|
||||
.skip cmp ecx,256
|
||||
jne .loop
|
||||
mov eax,ebx
|
||||
pop ebp
|
||||
pop esi
|
||||
pop ebx
|
||||
emms
|
||||
ret
|
||||
|
||||
.perf lea eax,[ecx-1]
|
||||
pop ebp
|
||||
pop esi
|
||||
pop ebx
|
||||
emms
|
||||
ret
|
File diff suppressed because it is too large
Load diff
|
@ -1,643 +0,0 @@
|
|||
;*
|
||||
;* tmap2.nas
|
||||
;* The tilted plane inner loop.
|
||||
;*
|
||||
;*---------------------------------------------------------------------------
|
||||
;* Copyright 1998-2006 Randy Heit
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* 1. Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;* 2. Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in the
|
||||
;* documentation and/or other materials provided with the distribution.
|
||||
;* 3. The name of the author may not be used to endorse or promote products
|
||||
;* derived from this software without specific prior written permission.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
;*---------------------------------------------------------------------------
|
||||
;*
|
||||
;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was
|
||||
;* actually slightly slower than the more straight-forward approach
|
||||
;* used here, probably because the trick requires too much setup time.
|
||||
;*
|
||||
|
||||
BITS 32
|
||||
|
||||
%include "valgrind.inc"
|
||||
|
||||
%define SPACEFILLER4 (0x44444444)
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
|
||||
%define plane_sz _plane_sz
|
||||
%define plane_su _plane_su
|
||||
%define plane_sv _plane_sv
|
||||
%define plane_shade _plane_shade
|
||||
%define planelightfloat _planelightfloat
|
||||
%define spanend _spanend
|
||||
%define ylookup _ylookup
|
||||
%define dc_destorg _dc_destorg
|
||||
%define ds_colormap _ds_colormap
|
||||
%define ds_source _ds_source
|
||||
%define centery _centery
|
||||
%define centerx _centerx
|
||||
%define ds_curtiltedsource _ds_curtiltedsource
|
||||
%define pviewx _pviewx
|
||||
%define pviewy _pviewy
|
||||
%define tiltlighting _tiltlighting
|
||||
|
||||
%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM
|
||||
%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM
|
||||
%define R_CalcTiltedLighting _R_CalcTiltedLighting
|
||||
|
||||
%endif
|
||||
|
||||
EXTERN plane_sz
|
||||
EXTERN plane_su
|
||||
EXTERN plane_sv
|
||||
EXTERN planelightfloat
|
||||
EXTERN spanend
|
||||
EXTERN ylookup
|
||||
EXTERN dc_destorg
|
||||
EXTERN ds_colormap
|
||||
EXTERN centery
|
||||
EXTERN centerx
|
||||
EXTERN ds_source
|
||||
EXTERN plane_shade
|
||||
EXTERN pviewx
|
||||
EXTERN pviewy
|
||||
EXTERN tiltlighting
|
||||
EXTERN R_CalcTiltedLighting
|
||||
|
||||
GLOBAL ds_curtiltedsource
|
||||
|
||||
%define sv_i plane_sv
|
||||
%define sv_j plane_sv+4
|
||||
%define sv_k plane_sv+8
|
||||
|
||||
%define su_i plane_su
|
||||
%define su_j plane_su+4
|
||||
%define su_k plane_su+8
|
||||
|
||||
%define sz_i plane_sz
|
||||
%define sz_j plane_sz+4
|
||||
%define sz_k plane_sz+8
|
||||
|
||||
%define SPANBITS 3
|
||||
|
||||
section .bss
|
||||
|
||||
start_u: resq 1
|
||||
start_v: resq 1
|
||||
step_u: resq 1
|
||||
step_v: resq 1
|
||||
|
||||
step_iz: resq 1
|
||||
step_uz: resq 1
|
||||
step_vz: resq 1
|
||||
|
||||
end_z: resd 1
|
||||
|
||||
section .data
|
||||
|
||||
ds_curtiltedsource: dd SPACEFILLER4
|
||||
|
||||
fp_1:
|
||||
spanrecips: dd 0x3f800000 ; 1/1
|
||||
dd 0x3f000000 ; 1/2
|
||||
dd 0x3eaaaaab ; 1/3
|
||||
dd 0x3e800000 ; 1/4
|
||||
dd 0x3e4ccccd ; 1/5
|
||||
dd 0x3e2aaaab ; 1/6
|
||||
dd 0x3e124925 ; 1/7
|
||||
fp_8recip: dd 0x3e000000 ; 1/8
|
||||
dd 0x3de38e39 ; 1/9
|
||||
dd 0x3dcccccd ; 1/10
|
||||
dd 0x3dba2e8c ; 1/11
|
||||
dd 0x3daaaaab ; 1/12
|
||||
dd 0x3d9d89d9 ; 1/13
|
||||
dd 0x3d924925 ; 1/14
|
||||
dd 0x3d888889 ; 1/15
|
||||
|
||||
fp_quickint: dd 0x3f800000 ; 1
|
||||
dd 0x40000000 ; 2
|
||||
dd 0x40400000 ; 3
|
||||
dd 0x40800000 ; 4
|
||||
dd 0x40a00000 ; 5
|
||||
dd 0x40c00000 ; 6
|
||||
dd 0x40e00000 ; 7
|
||||
fp_8: dd 0x41000000 ; 8
|
||||
|
||||
section .text
|
||||
|
||||
GLOBAL R_SetTiltedSpanSource_ASM
|
||||
GLOBAL @R_SetTiltedSpanSource_ASM@4
|
||||
|
||||
R_SetTiltedSpanSource_ASM:
|
||||
mov ecx,[esp+4]
|
||||
|
||||
@R_SetTiltedSpanSource_ASM@4:
|
||||
mov [fetch1+3],ecx
|
||||
mov [fetch2+3],ecx
|
||||
mov [fetch3+3],ecx
|
||||
mov [fetch4+3],ecx
|
||||
mov [fetch5+3],ecx
|
||||
mov [fetch6+3],ecx
|
||||
mov [fetch7+3],ecx
|
||||
mov [fetch8+3],ecx
|
||||
mov [fetch9+3],ecx
|
||||
mov [fetch10+3],ecx
|
||||
mov [ds_curtiltedsource],ecx
|
||||
selfmod rtext_start, rtext_end
|
||||
ret
|
||||
|
||||
GLOBAL SetTiltedSpanSize
|
||||
|
||||
SetTiltedSpanSize:
|
||||
push ecx
|
||||
mov cl,dl
|
||||
neg cl
|
||||
mov eax,1
|
||||
shl eax,cl
|
||||
mov cl,[esp]
|
||||
neg cl
|
||||
mov [x1+2],cl
|
||||
mov [x2+2],cl
|
||||
mov [x3+2],cl
|
||||
mov [x4+2],cl
|
||||
mov [x5+2],cl
|
||||
mov [x6+2],cl
|
||||
mov [x7+2],cl
|
||||
mov [x8+2],cl
|
||||
mov [x9+2],cl
|
||||
mov [x10+2],cl
|
||||
|
||||
sub cl,dl
|
||||
dec eax
|
||||
mov [y1+2],cl
|
||||
mov [y2+2],cl
|
||||
mov [y3+2],cl
|
||||
mov [y4+2],cl
|
||||
mov [y5+2],cl
|
||||
mov [y6+2],cl
|
||||
mov [y7+2],cl
|
||||
mov [y8+2],cl
|
||||
mov [y9+2],cl
|
||||
mov [y10+2],cl
|
||||
cmp eax,0 ; if x bits is 0, mask must be 0 too.
|
||||
jz .notted
|
||||
not eax
|
||||
.notted:
|
||||
pop ecx
|
||||
|
||||
mov [m1+2],eax
|
||||
mov [m2+2],eax
|
||||
mov [m3+2],eax
|
||||
mov [m4+2],eax
|
||||
mov [m5+2],eax
|
||||
mov [m6+2],eax
|
||||
mov [m7+2],eax
|
||||
mov [m8+2],eax
|
||||
mov [m9+2],eax
|
||||
mov [m10+2],eax
|
||||
|
||||
selfmod rtext_start, rtext_end
|
||||
|
||||
ret
|
||||
|
||||
%ifndef M_TARGET_MACHO
|
||||
SECTION .rtext progbits alloc exec write align=64
|
||||
%else
|
||||
SECTION .text align=64
|
||||
GLOBAL _rtext_tmap2_start
|
||||
_rtext_tmap2_start:
|
||||
%endif
|
||||
|
||||
rtext_start:
|
||||
|
||||
GLOBAL R_DrawTiltedPlane_ASM
|
||||
GLOBAL @R_DrawTiltedPlane_ASM@8
|
||||
|
||||
R_DrawTiltedPlane_ASM:
|
||||
mov ecx,[esp+4]
|
||||
mov edx,[esp+8]
|
||||
|
||||
; ecx = y
|
||||
; edx = x
|
||||
|
||||
@R_DrawTiltedPlane_ASM@8:
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
|
||||
mov eax,[centery]
|
||||
movzx ebx,word [spanend+ecx*2]
|
||||
sub eax,ecx ; eax = centery-y
|
||||
sub ebx,edx ; ebx = span length - 1
|
||||
mov edi,[ylookup+ecx*4]
|
||||
push eax
|
||||
add edi,[dc_destorg]
|
||||
add edi,edx ; edi = frame buffer pointer
|
||||
sub edx,[centerx] ; edx = x-centerx
|
||||
push edx
|
||||
xor eax,eax
|
||||
|
||||
fild dword [esp+4] ; ymul
|
||||
fild dword [esp] ; xmul | ymul
|
||||
fld dword [sv_j] ; sv.j | xmul | ymul
|
||||
fmul st0,st2 ; sv.j*ymul | xmul | ymul
|
||||
fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul
|
||||
fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul
|
||||
fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul
|
||||
fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul
|
||||
fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul
|
||||
fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul
|
||||
faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
||||
fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul
|
||||
fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul
|
||||
fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul
|
||||
fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z
|
||||
fadd dword [su_k] ; u/z | v/z | 1/z
|
||||
fxch st2 ; 1/z | v/z | u/z
|
||||
fxch st1 ; v/z | 1/z | u/z
|
||||
|
||||
; if lighting is on, fill out the light table
|
||||
mov al,[plane_shade]
|
||||
test al,al
|
||||
jz .litup
|
||||
|
||||
push ebx
|
||||
fild dword [esp] ; width | v/z | 1/z | u/z
|
||||
fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z
|
||||
fadd st0,st2 ; 1/endz | v/z | 1/z | u/z
|
||||
fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z
|
||||
fmul dword [planelightfloat]
|
||||
fxch st1
|
||||
fmul dword [planelightfloat]
|
||||
sub esp,16
|
||||
fstp qword [esp]
|
||||
fstp qword [esp+8]
|
||||
call R_CalcTiltedLighting
|
||||
add esp, 20
|
||||
xor eax, eax
|
||||
|
||||
.litup add esp, 8
|
||||
|
||||
; calculate initial z, u, and v values
|
||||
fld st1 ; 1/z | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | v/z | 1/z | u/z
|
||||
|
||||
fld st3 ; u/z | z | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u | z | v/z | 1/z | u/z
|
||||
fld st2 ; v/z | u | z | v/z | 1/z | u/z
|
||||
fmulp st2,st0 ; u | v | v/z | 1/z | u/z
|
||||
fld st0
|
||||
fistp qword [start_u]
|
||||
fld st1
|
||||
fistp qword [start_v]
|
||||
|
||||
cmp ebx,7 ; Do we have at least 8 pixels to plot?
|
||||
jl near ShortStrip
|
||||
|
||||
; yes, we do, so figure out tex coords at end of this span
|
||||
|
||||
; multiply i values by span length (8)
|
||||
fld dword [su_i] ; su.i
|
||||
fmul dword [fp_8] ; su.i*8
|
||||
fld dword [sv_i] ; sv.i | su.i*8
|
||||
fmul dword [fp_8] ; sv.i*8 | su.i*8
|
||||
fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8
|
||||
fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8
|
||||
fxch st2 ; su.i*8 | sv.i*8 | sz.i*8
|
||||
fstp qword [step_uz] ; sv.i*8 | sz.i*8
|
||||
fstp qword [step_vz] ; sz.i*8
|
||||
fst qword [step_iz] ; sz.i*8
|
||||
|
||||
; find tex coords at start of next span
|
||||
faddp st4
|
||||
fld qword [step_vz]
|
||||
faddp st3
|
||||
fld qword [step_uz]
|
||||
faddp st5
|
||||
|
||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
||||
fst dword [end_z]
|
||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
||||
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
||||
|
||||
; now subtract to get stepping values for this span
|
||||
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
||||
|
||||
FullSpan:
|
||||
xor eax,eax
|
||||
cmp ebx,15 ; is there another complete span after this one?
|
||||
jl NextIsShort
|
||||
|
||||
; there is a complete span after this one
|
||||
fld qword [step_iz]
|
||||
faddp st4,st0
|
||||
fld qword [step_vz]
|
||||
faddp st3,st0
|
||||
fld qword [step_uz]
|
||||
faddp st5,st0
|
||||
jmp StartDiv
|
||||
|
||||
NextIsShort:
|
||||
cmp ebx,8 ; if next span is no more than 1 pixel, then we already
|
||||
jle DrawFullSpan ; know everything we need to draw it
|
||||
|
||||
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint-8*4+ebx*4]
|
||||
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint-8*4+ebx*4]
|
||||
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint-8*4+ebx*4]
|
||||
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
||||
|
||||
StartDiv:
|
||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
||||
|
||||
DrawFullSpan:
|
||||
mov ecx,[start_v]
|
||||
mov edx,[start_u]
|
||||
|
||||
add ecx,[pviewy]
|
||||
add edx,[pviewx]
|
||||
|
||||
mov esi,edx
|
||||
mov ebp,ecx
|
||||
x1 shr ebp,26
|
||||
m1 and esi,0xfc000000
|
||||
y1 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch1 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+0],al
|
||||
|
||||
x2 shr ebp,26
|
||||
m2 and esi,0xfc000000
|
||||
y2 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch2 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-4]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+1],al
|
||||
|
||||
x3 shr ebp,26
|
||||
m3 and esi,0xfc000000
|
||||
y3 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch3 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-8]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+2],al
|
||||
|
||||
x4 shr ebp,26
|
||||
m4 and esi,0xfc000000
|
||||
y4 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch4 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-12]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+3],al
|
||||
|
||||
x5 shr ebp,26
|
||||
m5 and esi,0xfc000000
|
||||
y5 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch5 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-16]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+4],al
|
||||
|
||||
x6 shr ebp,26
|
||||
m6 and esi,0xfc000000
|
||||
y6 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch6 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-20]
|
||||
mov esi,edx
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
mov [edi+5],al
|
||||
|
||||
x7 shr ebp,26
|
||||
m7 and esi,0xfc000000
|
||||
y7 shr esi,20
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
fetch7 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4-24]
|
||||
x8 shr ecx,26
|
||||
mov al,[ebp+eax]
|
||||
m8 and edx,0xfc000000
|
||||
mov [edi+6],al
|
||||
|
||||
y8 shr edx,20
|
||||
mov ebp,[tiltlighting+ebx*4-28]
|
||||
fetch8 mov al,[edx+ecx+SPACEFILLER4]
|
||||
mov al,[ebp+eax]
|
||||
mov [edi+7],al
|
||||
add edi,8
|
||||
|
||||
sub ebx,8
|
||||
jl near Done
|
||||
|
||||
fld st1
|
||||
fistp qword [start_u]
|
||||
fld st2
|
||||
fistp qword [start_v]
|
||||
|
||||
cmp ebx,7
|
||||
jl near EndIsShort
|
||||
|
||||
fst dword [end_z]
|
||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
||||
fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z
|
||||
fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z
|
||||
fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z
|
||||
fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z
|
||||
fistp qword [step_u] ; u | v | v/z | 1/z | u/z
|
||||
jmp FullSpan
|
||||
|
||||
OnlyOnePixelAtEnd:
|
||||
fld st0
|
||||
fistp qword [start_u]
|
||||
fld st1
|
||||
fistp qword [start_v]
|
||||
|
||||
OnlyOnePixel:
|
||||
mov edx,[start_v]
|
||||
mov ecx,[start_u]
|
||||
add edx,[pviewy]
|
||||
add ecx,[pviewx]
|
||||
x9 shr edx,26
|
||||
m9 and ecx,0xfc000000
|
||||
y9 shr ecx,20
|
||||
mov ebp,[tiltlighting]
|
||||
fetch9 mov al,[ecx+edx+SPACEFILLER4]
|
||||
mov al,[ebp+eax]
|
||||
mov [edi],al
|
||||
|
||||
Done:
|
||||
fcompp
|
||||
fcompp
|
||||
fstp st0
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
ShortStrip:
|
||||
cmp ebx,0
|
||||
jle near OnlyOnePixel
|
||||
|
||||
MoreThanOnePixel:
|
||||
fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint+ebx*4]
|
||||
fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint+ebx*4]
|
||||
fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z
|
||||
fmul dword [fp_quickint+ebx*4]
|
||||
fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z
|
||||
faddp st5,st0 ; u | v | v/z | 1/z | u/z
|
||||
fld st3 ; 1/z | u | v | v/z | 1/z | u/z
|
||||
fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z
|
||||
jmp CalcPartialSteps
|
||||
|
||||
EndIsShort:
|
||||
cmp ebx,0
|
||||
je near OnlyOnePixelAtEnd
|
||||
|
||||
CalcPartialSteps:
|
||||
fst dword [end_z]
|
||||
fld st5 ; u/z | z | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; z | u' | u | v | v/z | 1/z | u/z
|
||||
fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z
|
||||
fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z
|
||||
fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z
|
||||
fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z
|
||||
fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z
|
||||
fxch st1 ; v'-v | ustep | v/z | 1/z | u/z
|
||||
fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z
|
||||
fxch st1 ; ustep | vstep | v/z | 1/z | u/z
|
||||
fistp qword [step_u] ; vstep | v/z | 1/z | u/z
|
||||
fistp qword [step_v] ; v/z | 1/z | u/z
|
||||
|
||||
mov ecx,[start_v]
|
||||
mov edx,[start_u]
|
||||
|
||||
add ecx,[pviewy]
|
||||
add edx,[pviewx]
|
||||
|
||||
mov esi,edx
|
||||
mov ebp,ecx
|
||||
endloop:
|
||||
x10 shr ebp,26
|
||||
m10 and esi,0xfc000000
|
||||
|
||||
y10 shr esi,20
|
||||
inc edi
|
||||
|
||||
add ecx,[step_v]
|
||||
add edx,[step_u]
|
||||
|
||||
fetch10 mov al,[ebp+esi+SPACEFILLER4]
|
||||
mov ebp,[tiltlighting+ebx*4]
|
||||
|
||||
mov esi,edx
|
||||
dec ebx
|
||||
|
||||
mov al,[ebp+eax]
|
||||
mov ebp,ecx
|
||||
|
||||
mov [edi-1],al
|
||||
jge endloop
|
||||
|
||||
fcompp
|
||||
fstp st0
|
||||
|
||||
pop ebp
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
rtext_end:
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_tmap2_end
|
||||
_rtext_tmap2_end:
|
||||
%endif
|
|
@ -1,344 +0,0 @@
|
|||
%include "valgrind.inc"
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32
|
||||
SEGMENT DATA
|
||||
%else
|
||||
SECTION .data
|
||||
%endif
|
||||
|
||||
%ifndef M_TARGET_LINUX
|
||||
%define ylookup _ylookup
|
||||
%define vplce _vplce
|
||||
%define vince _vince
|
||||
%define palookupoffse _palookupoffse
|
||||
%define bufplce _bufplce
|
||||
%define dc_iscale _dc_iscale
|
||||
%define dc_colormap _dc_colormap
|
||||
%define dc_count _dc_count
|
||||
%define dc_dest _dc_dest
|
||||
%define dc_source _dc_source
|
||||
%define dc_texturefrac _dc_texturefrac
|
||||
%define dc_pitch _dc_pitch
|
||||
|
||||
%define setupvlinetallasm _setupvlinetallasm
|
||||
%define vlinetallasm4 _vlinetallasm4
|
||||
%define vlinetallasmathlon4 _vlinetallasmathlon4
|
||||
%define vlinetallasm1 _vlinetallasm1
|
||||
%define prevlinetallasm1 _prevlinetallasm1
|
||||
%endif
|
||||
|
||||
EXTERN vplce
|
||||
EXTERN vince
|
||||
EXTERN palookupoffse
|
||||
EXTERN bufplce
|
||||
|
||||
EXTERN ylookup
|
||||
EXTERN dc_iscale
|
||||
EXTERN dc_colormap
|
||||
EXTERN dc_count
|
||||
EXTERN dc_dest
|
||||
EXTERN dc_source
|
||||
EXTERN dc_texturefrac
|
||||
EXTERN dc_pitch
|
||||
|
||||
GLOBAL vlt4pitch
|
||||
GLOBAL vlt1pitch
|
||||
|
||||
%ifdef M_TARGET_WATCOM
|
||||
SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32
|
||||
SEGMENT CODE
|
||||
%else
|
||||
SECTION .text
|
||||
%endif
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setpitch3
|
||||
setpitch3:
|
||||
mov [vltpitch+2], eax
|
||||
mov [vltpitcha+2],eax
|
||||
mov [vlt1pitch1+2], eax
|
||||
mov [vlt1pitch2+2], eax
|
||||
selfmod vltpitch, vlt1pitch2+6
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
mov ecx, [esp+4]
|
||||
mov [shifter1+2], cl
|
||||
mov [shifter2+2], cl
|
||||
mov [shifter3+2], cl
|
||||
mov [shifter4+2], cl
|
||||
mov [shifter1a+2], cl
|
||||
mov [shifter2a+2], cl
|
||||
mov [shifter3a+2], cl
|
||||
mov [shifter4a+2], cl
|
||||
mov [preshift+2], cl
|
||||
mov [shift11+2], cl
|
||||
mov [shift12+2], cl
|
||||
selfmod shifter1, shift12+6
|
||||
ret
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
SECTION .text align=64
|
||||
GLOBAL _rtext_tmap3_start
|
||||
_rtext_tmap3_start:
|
||||
%else
|
||||
SECTION .rtext progbits alloc exec write align=64
|
||||
%endif
|
||||
|
||||
ALIGN 16
|
||||
|
||||
GLOBAL vlinetallasm4
|
||||
vlinetallasm4:
|
||||
push ebx
|
||||
mov eax, [bufplce+0]
|
||||
mov ebx, [bufplce+4]
|
||||
mov ecx, [bufplce+8]
|
||||
mov edx, [bufplce+12]
|
||||
mov [source1+3], eax
|
||||
mov [source2+3], ebx
|
||||
mov [source3+3], ecx
|
||||
mov [source4+3], edx
|
||||
mov eax, [palookupoffse+0]
|
||||
mov ebx, [palookupoffse+4]
|
||||
mov ecx, [palookupoffse+8]
|
||||
mov edx, [palookupoffse+12]
|
||||
mov [lookup1+2], eax
|
||||
mov [lookup2+2], ebx
|
||||
mov [lookup3+2], ecx
|
||||
mov [lookup4+2], edx
|
||||
mov eax, [vince+0]
|
||||
mov ebx, [vince+4]
|
||||
mov ecx, [vince+8]
|
||||
mov edx, [vince+12]
|
||||
mov [step1+2], eax
|
||||
mov [step2+2], ebx
|
||||
mov [step3+2], ecx
|
||||
mov [step4+1], edx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
mov ecx, [dc_count]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, dword [ylookup+ecx*4-4]
|
||||
add eax, edi
|
||||
sub edi, eax
|
||||
mov [write1+2],eax
|
||||
inc eax
|
||||
mov [write2+2],eax
|
||||
inc eax
|
||||
mov [write3+2],eax
|
||||
inc eax
|
||||
mov [write4+2],eax
|
||||
mov ebx, [vplce]
|
||||
mov ecx, [vplce+4]
|
||||
mov esi, [vplce+8]
|
||||
mov eax, [vplce+12]
|
||||
selfmod loopit, vltpitch
|
||||
jmp loopit
|
||||
|
||||
ALIGN 16
|
||||
loopit:
|
||||
mov edx, ebx
|
||||
shifter1: shr edx, 24
|
||||
source1: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup1: mov dl, [edx+0x88888888]
|
||||
write1: mov [edi+0x88888880], dl
|
||||
step1: add ebx, 0x88888888
|
||||
mov edx, ecx
|
||||
shifter2: shr edx, 24
|
||||
source2: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup2: mov dl, [edx+0x88888888]
|
||||
write2: mov [edi+0x88888881], dl
|
||||
step2: add ecx, 0x88888888
|
||||
mov edx, esi
|
||||
shifter3: shr edx, 24
|
||||
source3: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup3: mov dl, BYTE [edx+0x88888888]
|
||||
write3: mov [edi+0x88888882], dl
|
||||
step3: add esi, 0x88888888
|
||||
mov edx, eax
|
||||
shifter4: shr edx, 24
|
||||
source4: movzx edx, BYTE [edx+0x88888888]
|
||||
lookup4: mov dl, [edx+0x88888888]
|
||||
write4: mov [edi+0x88888883], dl
|
||||
step4: add eax, 0x88888888
|
||||
vltpitch: add edi, 320
|
||||
jle near loopit
|
||||
|
||||
mov [vplce], ebx
|
||||
mov [vplce+4], ecx
|
||||
mov [vplce+8], esi
|
||||
mov [vplce+12], eax
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
|
||||
GLOBAL vlinetallasmathlon4
|
||||
vlinetallasmathlon4:
|
||||
push ebx
|
||||
mov eax, [bufplce+0]
|
||||
mov ebx, [bufplce+4]
|
||||
mov ecx, [bufplce+8]
|
||||
mov edx, [bufplce+12]
|
||||
mov [source1a+3], eax
|
||||
mov [source2a+3], ebx
|
||||
mov [source3a+3], ecx
|
||||
mov [source4a+3], edx
|
||||
mov eax, [palookupoffse+0]
|
||||
mov ebx, [palookupoffse+4]
|
||||
mov ecx, [palookupoffse+8]
|
||||
mov edx, [palookupoffse+12]
|
||||
mov [lookup1a+2], eax
|
||||
mov [lookup2a+2], ebx
|
||||
mov [lookup3a+2], ecx
|
||||
mov [lookup4a+2], edx
|
||||
mov eax, [vince+0]
|
||||
mov ebx, [vince+4]
|
||||
mov ecx, [vince+8]
|
||||
mov edx, [vince+12]
|
||||
mov [step1a+2], eax
|
||||
mov [step2a+2], ebx
|
||||
mov [step3a+2], ecx
|
||||
mov [step4a+1], edx
|
||||
push ebp
|
||||
push esi
|
||||
push edi
|
||||
mov ecx, [dc_count]
|
||||
mov edi, [dc_dest]
|
||||
mov eax, dword [ylookup+ecx*4-4]
|
||||
add eax, edi
|
||||
sub edi, eax
|
||||
mov [write1a+2],eax
|
||||
inc eax
|
||||
mov [write2a+2],eax
|
||||
inc eax
|
||||
mov [write3a+2],eax
|
||||
inc eax
|
||||
mov [write4a+2],eax
|
||||
mov ebp, [vplce]
|
||||
mov ecx, [vplce+4]
|
||||
mov esi, [vplce+8]
|
||||
mov eax, [vplce+12]
|
||||
selfmod loopita, vltpitcha
|
||||
jmp loopita
|
||||
|
||||
; Unfortunately, this code has not been carefully analyzed to determine
|
||||
; how well it utilizes the processor's instruction units. Instead, I just
|
||||
; kept rearranging code, seeing what sped it up and what slowed it down
|
||||
; until I arrived at this. The is the fastest version I was able to
|
||||
; manage, but that does not mean it cannot be made faster with careful
|
||||
; instructing shuffling.
|
||||
|
||||
ALIGN 64
|
||||
|
||||
loopita: mov edx, ebp
|
||||
mov ebx, ecx
|
||||
shifter1a: shr edx, 24
|
||||
shifter2a: shr ebx, 24
|
||||
source1a: movzx edx, BYTE [edx+0x88888888]
|
||||
source2a: movzx ebx, BYTE [ebx+0x88888888]
|
||||
step1a: add ebp, 0x88888888
|
||||
step2a: add ecx, 0x88888888
|
||||
lookup1a: mov dl, [edx+0x88888888]
|
||||
lookup2a: mov dh, [ebx+0x88888888]
|
||||
mov ebx, esi
|
||||
write1a: mov [edi+0x88888880], dl
|
||||
write2a: mov [edi+0x88888881], dh
|
||||
shifter3a: shr ebx, 24
|
||||
mov edx, eax
|
||||
source3a: movzx ebx, BYTE [ebx+0x88888888]
|
||||
shifter4a: shr edx, 24
|
||||
step3a: add esi, 0x88888888
|
||||
source4a: movzx edx, BYTE [edx+0x88888888]
|
||||
step4a: add eax, 0x88888888
|
||||
lookup3a: mov bl, [ebx+0x88888888]
|
||||
lookup4a: mov dl, [edx+0x88888888]
|
||||
write3a: mov [edi+0x88888882], bl
|
||||
write4a: mov [edi+0x88888883], dl
|
||||
vltpitcha: add edi, 320
|
||||
jle near loopita
|
||||
|
||||
mov [vplce], ebp
|
||||
mov [vplce+4], ecx
|
||||
mov [vplce+8], esi
|
||||
mov [vplce+12], eax
|
||||
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
pop ebx
|
||||
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL prevlinetallasm1
|
||||
prevlinetallasm1:
|
||||
mov ecx, [dc_count]
|
||||
cmp ecx, 1
|
||||
ja vlinetallasm1
|
||||
|
||||
mov eax, [dc_iscale]
|
||||
mov edx, [dc_texturefrac]
|
||||
add eax, edx
|
||||
mov ecx, [dc_source]
|
||||
preshift: shr edx, 16
|
||||
push ebx
|
||||
push edi
|
||||
mov edi, [dc_colormap]
|
||||
movzx ebx, byte [ecx+edx]
|
||||
mov ecx, [dc_dest]
|
||||
mov bl, byte [edi+ebx]
|
||||
pop edi
|
||||
mov byte [ecx], bl
|
||||
pop ebx
|
||||
ret
|
||||
|
||||
ALIGN 16
|
||||
GLOBAL vlinetallasm1
|
||||
vlinetallasm1:
|
||||
push ebp
|
||||
push ebx
|
||||
push edi
|
||||
push esi
|
||||
|
||||
mov ebp, [dc_count]
|
||||
mov ebx, [dc_texturefrac] ; ebx = frac
|
||||
mov edi, [dc_dest]
|
||||
mov ecx, ebx
|
||||
shift11: shr ecx, 16
|
||||
mov esi, [dc_source]
|
||||
mov edx, [dc_iscale]
|
||||
vlt1pitch1: sub edi, 0x88888888
|
||||
mov eax, [dc_colormap]
|
||||
|
||||
loop2:
|
||||
movzx ecx, BYTE [esi+ecx]
|
||||
add ebx, edx
|
||||
vlt1pitch2: add edi, 0x88888888
|
||||
mov cl,[eax+ecx]
|
||||
mov [edi],cl
|
||||
mov ecx,ebx
|
||||
shift12: shr ecx,16
|
||||
dec ebp
|
||||
jnz loop2
|
||||
|
||||
mov eax,ebx
|
||||
pop esi
|
||||
pop edi
|
||||
pop ebx
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
%ifdef M_TARGET_MACHO
|
||||
GLOBAL _rtext_tmap3_end
|
||||
_rtext_tmap3_end:
|
||||
%endif
|
|
@ -1,150 +0,0 @@
|
|||
%ifnidn __OUTPUT_FORMAT__,win64
|
||||
%error tmap3.asm is for Win64 output. You should use tmap.s for other systems.
|
||||
%endif
|
||||
|
||||
BITS 64
|
||||
DEFAULT REL
|
||||
|
||||
EXTERN vplce
|
||||
EXTERN vince
|
||||
EXTERN palookupoffse
|
||||
EXTERN bufplce
|
||||
|
||||
EXTERN dc_count
|
||||
EXTERN dc_dest
|
||||
EXTERN dc_pitch
|
||||
|
||||
SECTION .text
|
||||
|
||||
GLOBAL ASM_PatchPitch
|
||||
ASM_PatchPitch:
|
||||
mov ecx, [dc_pitch]
|
||||
mov [pm+3], ecx
|
||||
mov [vltpitch+3], ecx
|
||||
ret
|
||||
align 16
|
||||
|
||||
GLOBAL setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
mov [shifter1+2], cl
|
||||
mov [shifter2+2], cl
|
||||
mov [shifter3+2], cl
|
||||
mov [shifter4+2], cl
|
||||
ret
|
||||
align 16
|
||||
|
||||
; Yasm can't do progbits alloc exec for win64?
|
||||
; Hmm, looks like it's automatic. No worries, then.
|
||||
SECTION .rtext write ;progbits alloc exec
|
||||
|
||||
GLOBAL vlinetallasm4
|
||||
PROC_FRAME vlinetallasm4
|
||||
rex_push_reg rbx
|
||||
push_reg rdi
|
||||
push_reg r15
|
||||
push_reg r14
|
||||
push_reg r13
|
||||
push_reg r12
|
||||
push_reg rbp
|
||||
push_reg rsi
|
||||
alloc_stack 8 ; Stack must be 16-byte aligned
|
||||
END_PROLOGUE
|
||||
; rax = bufplce base address
|
||||
; rbx =
|
||||
; rcx = offset from rdi/count (negative)
|
||||
; edx/rdx = scratch
|
||||
; rdi = bottom of columns to write to
|
||||
; r8d-r11d = column offsets
|
||||
; r12-r15 = palookupoffse[0] - palookupoffse[4]
|
||||
|
||||
mov ecx, [dc_count]
|
||||
mov rdi, [dc_dest]
|
||||
test ecx, ecx
|
||||
jle vltepilog ; count must be positive
|
||||
|
||||
mov rax, [bufplce]
|
||||
mov r8, [bufplce+8]
|
||||
sub r8, rax
|
||||
mov r9, [bufplce+16]
|
||||
sub r9, rax
|
||||
mov r10, [bufplce+24]
|
||||
sub r10, rax
|
||||
mov [source2+4], r8d
|
||||
mov [source3+4], r9d
|
||||
mov [source4+4], r10d
|
||||
|
||||
pm: imul rcx, 320
|
||||
|
||||
mov r12, [palookupoffse]
|
||||
mov r13, [palookupoffse+8]
|
||||
mov r14, [palookupoffse+16]
|
||||
mov r15, [palookupoffse+24]
|
||||
|
||||
mov r8d, [vince]
|
||||
mov r9d, [vince+4]
|
||||
mov r10d, [vince+8]
|
||||
mov r11d, [vince+12]
|
||||
mov [step1+3], r8d
|
||||
mov [step2+3], r9d
|
||||
mov [step3+3], r10d
|
||||
mov [step4+3], r11d
|
||||
|
||||
add rdi, rcx
|
||||
neg rcx
|
||||
|
||||
mov r8d, [vplce]
|
||||
mov r9d, [vplce+4]
|
||||
mov r10d, [vplce+8]
|
||||
mov r11d, [vplce+12]
|
||||
jmp loopit
|
||||
|
||||
ALIGN 16
|
||||
loopit:
|
||||
mov edx, r8d
|
||||
shifter1: shr edx, 24
|
||||
step1: add r8d, 0x88888888
|
||||
movzx edx, BYTE [rax+rdx]
|
||||
mov ebx, r9d
|
||||
mov dl, [r12+rdx]
|
||||
shifter2: shr ebx, 24
|
||||
step2: add r9d, 0x88888888
|
||||
source2: movzx ebx, BYTE [rax+rbx+0x88888888]
|
||||
mov ebp, r10d
|
||||
mov bl, [r13+rbx]
|
||||
shifter3: shr ebp, 24
|
||||
step3: add r10d, 0x88888888
|
||||
source3: movzx ebp, BYTE [rax+rbp+0x88888888]
|
||||
mov esi, r11d
|
||||
mov bpl, BYTE [r14+rbp]
|
||||
shifter4: shr esi, 24
|
||||
step4: add r11d, 0x88888888
|
||||
source4: movzx esi, BYTE [rax+rsi+0x88888888]
|
||||
mov [rdi+rcx], dl
|
||||
mov [rdi+rcx+1], bl
|
||||
mov sil, BYTE [r15+rsi]
|
||||
mov [rdi+rcx+2], bpl
|
||||
mov [rdi+rcx+3], sil
|
||||
|
||||
vltpitch: add rcx, 320
|
||||
jl loopit
|
||||
|
||||
mov [vplce], r8d
|
||||
mov [vplce+4], r9d
|
||||
mov [vplce+8], r10d
|
||||
mov [vplce+12], r11d
|
||||
|
||||
vltepilog:
|
||||
add rsp, 8
|
||||
pop rsi
|
||||
pop rbp
|
||||
pop r12
|
||||
pop r13
|
||||
pop r14
|
||||
pop r15
|
||||
pop rdi
|
||||
pop rbx
|
||||
ret
|
||||
vlinetallasm4_end:
|
||||
ENDPROC_FRAME
|
||||
ALIGN 16
|
||||
|
|
@ -1,141 +0,0 @@
|
|||
#%include "valgrind.inc"
|
||||
|
||||
.section .text
|
||||
|
||||
.globl ASM_PatchPitch
|
||||
ASM_PatchPitch:
|
||||
movl dc_pitch(%rip), %ecx
|
||||
movl %ecx, pm+3(%rip)
|
||||
movl %ecx, vltpitch+3(%rip)
|
||||
# selfmod pm, vltpitch+6
|
||||
ret
|
||||
.align 16
|
||||
|
||||
.globl setupvlinetallasm
|
||||
setupvlinetallasm:
|
||||
movb %dil, shifter1+2(%rip)
|
||||
movb %dil, shifter2+2(%rip)
|
||||
movb %dil, shifter3+2(%rip)
|
||||
movb %dil, shifter4+2(%rip)
|
||||
# selfmod shifter1, shifter4+3
|
||||
ret
|
||||
.align 16
|
||||
|
||||
.section .rtext,"awx"
|
||||
|
||||
.globl vlinetallasm4
|
||||
.type vlinetallasm4,@function
|
||||
vlinetallasm4:
|
||||
.cfi_startproc
|
||||
push %rbx
|
||||
push %rdi
|
||||
push %r15
|
||||
push %r14
|
||||
push %r13
|
||||
push %r12
|
||||
push %rbp
|
||||
push %rsi
|
||||
subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux?
|
||||
.cfi_adjust_cfa_offset 8
|
||||
|
||||
# rax = bufplce base address
|
||||
# rbx =
|
||||
# rcx = offset from rdi/count (negative)
|
||||
# edx/rdx = scratch
|
||||
# rdi = bottom of columns to write to
|
||||
# r8d-r11d = column offsets
|
||||
# r12-r15 = palookupoffse[0] - palookupoffse[4]
|
||||
|
||||
movl dc_count(%rip), %ecx
|
||||
movq dc_dest(%rip), %rdi
|
||||
testl %ecx, %ecx
|
||||
jle vltepilog # count must be positive
|
||||
|
||||
movq bufplce(%rip), %rax
|
||||
movq bufplce+8(%rip), %r8
|
||||
subq %rax, %r8
|
||||
movq bufplce+16(%rip), %r9
|
||||
subq %rax, %r9
|
||||
movq bufplce+24(%rip), %r10
|
||||
subq %rax, %r10
|
||||
movl %r8d, source2+4(%rip)
|
||||
movl %r9d, source3+4(%rip)
|
||||
movl %r10d, source4+4(%rip)
|
||||
|
||||
pm: imulq $320, %rcx
|
||||
|
||||
movq palookupoffse(%rip), %r12
|
||||
movq palookupoffse+8(%rip), %r13
|
||||
movq palookupoffse+16(%rip), %r14
|
||||
movq palookupoffse+24(%rip), %r15
|
||||
|
||||
movl vince(%rip), %r8d
|
||||
movl vince+4(%rip), %r9d
|
||||
movl vince+8(%rip), %r10d
|
||||
movl vince+12(%rip), %r11d
|
||||
movl %r8d, step1+3(%rip)
|
||||
movl %r9d, step2+3(%rip)
|
||||
movl %r10d, step3+3(%rip)
|
||||
movl %r11d, step4+3(%rip)
|
||||
|
||||
addq %rcx, %rdi
|
||||
negq %rcx
|
||||
|
||||
movl vplce(%rip), %r8d
|
||||
movl vplce+4(%rip), %r9d
|
||||
movl vplce+8(%rip), %r10d
|
||||
movl vplce+12(%rip), %r11d
|
||||
# selfmod loopit, vltepilog
|
||||
jmp loopit
|
||||
|
||||
.align 16
|
||||
loopit:
|
||||
movl %r8d, %edx
|
||||
shifter1: shrl $24, %edx
|
||||
step1: addl $0x44444444, %r8d
|
||||
movzbl (%rax,%rdx), %edx
|
||||
movl %r9d, %ebx
|
||||
movb (%r12,%rdx), %dl
|
||||
shifter2: shrl $24, %ebx
|
||||
step2: addl $0x44444444, %r9d
|
||||
source2: movzbl 0x44444444(%rax,%rbx), %ebx
|
||||
movl %r10d, %ebp
|
||||
movb (%r13,%rbx), %bl
|
||||
shifter3: shr $24, %ebp
|
||||
step3: addl $0x44444444, %r10d
|
||||
source3: movzbl 0x44444444(%rax,%rbp), %ebp
|
||||
movl %r11d, %esi
|
||||
movb (%r14,%rbp), %bpl
|
||||
shifter4: shr $24, %esi
|
||||
step4: add $0x44444444, %r11d
|
||||
source4: movzbl 0x44444444(%rax,%rsi), %esi
|
||||
movb %dl, (%rdi,%rcx)
|
||||
movb %bl, 1(%rdi,%rcx)
|
||||
movb (%r15,%rsi), %sil
|
||||
movb %bpl, 2(%rdi,%rcx)
|
||||
movb %sil, 3(%rdi,%rcx)
|
||||
|
||||
vltpitch: addq $320, %rcx
|
||||
jl loopit
|
||||
|
||||
movl %r8d, vplce(%rip)
|
||||
movl %r9d, vplce+4(%rip)
|
||||
movl %r10d, vplce+8(%rip)
|
||||
movl %r11d, vplce+12(%rip)
|
||||
|
||||
vltepilog:
|
||||
addq $8, %rsp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
pop %rsi
|
||||
pop %rbp
|
||||
pop %r12
|
||||
pop %r13
|
||||
pop %r14
|
||||
pop %r15
|
||||
pop %rdi
|
||||
pop %rbx
|
||||
ret
|
||||
.cfi_endproc
|
||||
.align 16
|
||||
|
||||
|
|
@ -1030,7 +1030,7 @@ void D_DoomLoop ()
|
|||
catch (CVMAbortException &error)
|
||||
{
|
||||
error.MaybePrintMessage();
|
||||
Printf("%s", error.stacktrace);
|
||||
Printf("%s", error.stacktrace.GetChars());
|
||||
D_ErrorCleanup();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,62 +48,6 @@
|
|||
class PClassActor;
|
||||
typedef TMap<int, PClassActor *> FClassMap;
|
||||
|
||||
// Since this file is included by everything, it seems an appropriate place
|
||||
// to check the NOASM/USEASM macros.
|
||||
|
||||
// There are three assembly-related macros:
|
||||
//
|
||||
// NOASM - Assembly code is disabled
|
||||
// X86_ASM - Using ia32 assembly code
|
||||
// X64_ASM - Using amd64 assembly code
|
||||
//
|
||||
// Note that these relate only to using the pure assembly code. Inline
|
||||
// assembly may still be used without respect to these macros, as
|
||||
// deemed appropriate.
|
||||
|
||||
#ifndef NOASM
|
||||
// Select the appropriate type of assembly code to use.
|
||||
|
||||
#if defined(_M_IX86) || defined(__i386__)
|
||||
|
||||
#define X86_ASM
|
||||
#ifdef X64_ASM
|
||||
#undef X64_ASM
|
||||
#endif
|
||||
|
||||
#elif defined(_M_X64) || defined(__amd64__)
|
||||
|
||||
#define X64_ASM
|
||||
#ifdef X86_ASM
|
||||
#undef X86_ASM
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define NOASM
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef NOASM
|
||||
// Ensure no assembly macros are defined if NOASM is defined.
|
||||
|
||||
#ifdef X86_ASM
|
||||
#undef X86_ASM
|
||||
#endif
|
||||
|
||||
#ifdef X64_ASM
|
||||
#undef X64_ASM
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// Only use SSE intrinsics on Intel architecture
|
||||
#if !defined(_M_IX86) && !defined(__i386__) && !defined(_M_X64) && !defined(__amd64__)
|
||||
#define NO_SSE
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define NOVTABLE __declspec(novtable)
|
||||
|
|
|
@ -1,3 +1,36 @@
|
|||
/*
|
||||
** r_draw.cpp
|
||||
**
|
||||
**---------------------------------------------------------------------------
|
||||
** Copyright 1998-2016 Randy Heit
|
||||
** Copyright 2016 Magnus Norddahl
|
||||
** All rights reserved.
|
||||
**
|
||||
** Redistribution and use in source and binary forms, with or without
|
||||
** modification, are permitted provided that the following conditions
|
||||
** are met:
|
||||
**
|
||||
** 1. Redistributions of source code must retain the above copyright
|
||||
** notice, this list of conditions and the following disclaimer.
|
||||
** 2. Redistributions in binary form must reproduce the above copyright
|
||||
** notice, this list of conditions and the following disclaimer in the
|
||||
** documentation and/or other materials provided with the distribution.
|
||||
** 3. The name of the author may not be used to endorse or promote products
|
||||
** derived from this software without specific prior written permission.
|
||||
**
|
||||
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**---------------------------------------------------------------------------
|
||||
**
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
|
|
|
@ -1,3 +1,36 @@
|
|||
/*
|
||||
** r_draw_pal.cpp
|
||||
**
|
||||
**---------------------------------------------------------------------------
|
||||
** Copyright 1998-2016 Randy Heit
|
||||
** Copyright 2016 Magnus Norddahl
|
||||
** All rights reserved.
|
||||
**
|
||||
** Redistribution and use in source and binary forms, with or without
|
||||
** modification, are permitted provided that the following conditions
|
||||
** are met:
|
||||
**
|
||||
** 1. Redistributions of source code must retain the above copyright
|
||||
** notice, this list of conditions and the following disclaimer.
|
||||
** 2. Redistributions in binary form must reproduce the above copyright
|
||||
** notice, this list of conditions and the following disclaimer in the
|
||||
** documentation and/or other materials provided with the distribution.
|
||||
** 3. The name of the author may not be used to endorse or promote products
|
||||
** derived from this software without specific prior written permission.
|
||||
**
|
||||
** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**---------------------------------------------------------------------------
|
||||
**
|
||||
*/
|
||||
|
||||
#include "templates.h"
|
||||
#include "doomtype.h"
|
||||
|
|
1135
src/r_drawt.cpp
1135
src/r_drawt.cpp
File diff suppressed because it is too large
Load diff
|
@ -825,9 +825,6 @@ void R_SetupBuffer ()
|
|||
{
|
||||
dc_pitch = pitch;
|
||||
R_InitFuzzTable (pitch);
|
||||
#if defined(X86_ASM) || defined(X64_ASM)
|
||||
ASM_PatchPitch ();
|
||||
#endif
|
||||
}
|
||||
dc_destorg = lineptr;
|
||||
dc_destheight = RenderTarget->GetHeight() - viewwindowy;
|
||||
|
|
|
@ -106,20 +106,11 @@ CCMD (bumpgamma)
|
|||
/* Palette management stuff */
|
||||
/****************************/
|
||||
|
||||
extern "C" BYTE BestColor_MMX (DWORD rgb, const DWORD *pal);
|
||||
|
||||
int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num)
|
||||
{
|
||||
#ifdef X86_ASM
|
||||
if (CPU.bMMX)
|
||||
{
|
||||
int pre = 256 - num - first;
|
||||
return BestColor_MMX (((first+pre)<<24)|(r<<16)|(g<<8)|b, pal_in-pre) - pre;
|
||||
}
|
||||
#endif
|
||||
const PalEntry *pal = (const PalEntry *)pal_in;
|
||||
int bestcolor = first;
|
||||
int bestdist = 257*257+257*257+257*257;
|
||||
int bestdist = 257 * 257 + 257 * 257 + 257 * 257;
|
||||
|
||||
for (int color = first; color < num; color++)
|
||||
{
|
||||
|
@ -384,8 +375,8 @@ void InitPalette ()
|
|||
R_InitColormaps ();
|
||||
}
|
||||
|
||||
extern "C" void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
extern void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
void DoBlending_MMX (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
void DoBlending_SSE2 (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
|
||||
void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
{
|
||||
|
@ -395,6 +386,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
|||
{
|
||||
memcpy (to, from, count * sizeof(DWORD));
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (a == 256)
|
||||
{
|
||||
|
@ -405,6 +397,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
|||
{
|
||||
to[i] = t;
|
||||
}
|
||||
return;
|
||||
}
|
||||
#if defined(_M_X64) || defined(_M_IX86) || defined(__i386__) || defined(__amd64__)
|
||||
else if (CPU.bSSE2)
|
||||
|
@ -423,7 +416,7 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in
|
|||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef X86_ASM
|
||||
#if defined(_M_IX86) || defined(__i386__)
|
||||
else if (CPU.bMMX)
|
||||
{
|
||||
if (count >= 4)
|
||||
|
|
|
@ -519,10 +519,6 @@ void V_RefreshViewBorder ();
|
|||
|
||||
void V_SetBorderNeedRefresh();
|
||||
|
||||
#if defined(X86_ASM) || defined(X64_ASM)
|
||||
extern "C" void ASM_PatchPitch (void);
|
||||
#endif
|
||||
|
||||
int CheckRatio (int width, int height, int *trueratio=NULL);
|
||||
static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); }
|
||||
inline bool IsRatioWidescreen(int ratio) { return (ratio & 3) != 0; }
|
||||
|
|
23
src/x86.cpp
23
src/x86.cpp
|
@ -227,10 +227,9 @@ void DumpCPUInfo(const CPUInfo *cpu)
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Compiler output for this function is crap compared to the assembly
|
||||
// version, which is why it isn't used.
|
||||
void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
#if !defined(__amd64__) && !defined(_M_X64)
|
||||
|
||||
void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
{
|
||||
__m64 blendcolor;
|
||||
__m64 blendalpha;
|
||||
|
@ -272,9 +271,6 @@ void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef X86_ASM
|
||||
extern "C" void DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
|
||||
#endif
|
||||
|
||||
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a)
|
||||
{
|
||||
|
@ -288,17 +284,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
|
||||
unaligned = ((size_t)from | (size_t)to) & 0xF;
|
||||
|
||||
#ifdef X86_ASM
|
||||
// For unaligned accesses, the assembly MMX version is slightly faster.
|
||||
// Note that using unaligned SSE loads and stores is still faster than
|
||||
// the compiler-generated MMX version.
|
||||
if (unaligned)
|
||||
{
|
||||
DoBlending_MMX(from, to, count, r, g, b, a);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__amd64__) || defined(_M_X64)
|
||||
long long color;
|
||||
|
||||
|
@ -326,7 +311,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
|
||||
zero = _mm_setzero_si128();
|
||||
|
||||
#ifndef X86_ASM
|
||||
if (unaligned)
|
||||
{
|
||||
for (count >>= 2; count > 0; --count)
|
||||
|
@ -346,7 +330,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g
|
|||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (count >>= 2; count > 0; --count)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue