mirror of
https://git.do.srb2.org/KartKrew/Kart-Public.git
synced 2025-03-25 04:11:05 +00:00
Merge branch 'removeasm' into 'master'
Remove ASM code See merge request KartKrew/Kart-Public!348
This commit is contained in:
commit
cc6386cd1e
40 changed files with 27 additions and 5104 deletions
|
@ -36,7 +36,7 @@ jobs:
|
|||
- v1-SRB2-APT
|
||||
- run:
|
||||
name: Install SDK
|
||||
command: apt-get -qq -y --no-install-recommends install git build-essential nasm libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
|
||||
command: apt-get -qq -y --no-install-recommends install git build-essential libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
|
||||
- save_cache:
|
||||
key: v1-SRB2-APT
|
||||
paths:
|
||||
|
|
18
SRB2.cbp
18
SRB2.cbp
|
@ -1996,24 +1996,6 @@ HW3SOUND for 3D hardware sound support
|
|||
<Option compilerVar="CC" />
|
||||
</Unit>
|
||||
<Unit filename="src/v_video.h" />
|
||||
<Unit filename="src/vid_copy.s">
|
||||
<Option compilerVar="CC" />
|
||||
<Option compiler="avrgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="gnu_gcc_compiler_for_mingw32" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="gnu_gcc_compiler_for_mingw64" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="armelfgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="tricoregcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="ppcgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="gcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option target="Debug Native/SDL" />
|
||||
<Option target="Release Native/SDL" />
|
||||
<Option target="Debug Linux/SDL" />
|
||||
<Option target="Release Linux/SDL" />
|
||||
<Option target="Debug Mingw/SDL" />
|
||||
<Option target="Release Mingw/SDL" />
|
||||
<Option target="Debug Mingw/DirectX" />
|
||||
<Option target="Release Mingw/DirectX" />
|
||||
</Unit>
|
||||
<Unit filename="src/w_wad.c">
|
||||
<Option compilerVar="CC" />
|
||||
</Unit>
|
||||
|
|
|
@ -25,9 +25,6 @@
|
|||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(PlatformTarget)'=='x86'">
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>USEASM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
||||
</Link>
|
||||
|
|
52
Srb2.dev
52
Srb2.dev
|
@ -5,7 +5,7 @@ Ver=3
|
|||
IsCpp=0
|
||||
Type=0
|
||||
UnitCount=279
|
||||
Folders=A_Asm,B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
|
||||
Folders=B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
|
||||
CommandLine=
|
||||
CompilerSettings=00000000000100000111e1
|
||||
PchHead=-1
|
||||
|
@ -1473,36 +1473,6 @@ Priority=1000
|
|||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit149]
|
||||
FileName=src\tmap.nas
|
||||
Folder=A_Asm
|
||||
Compile=0
|
||||
CompileCpp=0
|
||||
Link=0
|
||||
Priority=1000
|
||||
OverrideBuildCmd=1
|
||||
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap.nas
|
||||
|
||||
[Unit150]
|
||||
FileName=src\asm_defs.inc
|
||||
Folder=A_Asm
|
||||
Compile=0
|
||||
CompileCpp=0
|
||||
Link=0
|
||||
Priority=1000
|
||||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit151]
|
||||
FileName=src\vid_copy.s
|
||||
Folder=A_Asm
|
||||
Compile=1
|
||||
CompileCpp=0
|
||||
Link=1
|
||||
Priority=1000
|
||||
OverrideBuildCmd=1
|
||||
BuildCmd=$(CC) $(CFLAGS) -x assembler-with-cpp -c src/vid_copy.s -o $@
|
||||
|
||||
[Unit152]
|
||||
FileName=src\y_inter.h
|
||||
Folder=H_Hud
|
||||
|
@ -1543,26 +1513,6 @@ Priority=1000
|
|||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit156]
|
||||
FileName=src\p5prof.h
|
||||
Folder=A_Asm
|
||||
Compile=1
|
||||
CompileCpp=0
|
||||
Link=1
|
||||
Priority=1000
|
||||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit157]
|
||||
FileName=src\tmap_mmx.nas
|
||||
Folder=A_Asm
|
||||
Compile=0
|
||||
CompileCpp=0
|
||||
Link=0
|
||||
Priority=1000
|
||||
OverrideBuildCmd=1
|
||||
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap_mmx.nas
|
||||
|
||||
[Unit159]
|
||||
FileName=src\lzf.h
|
||||
Folder=W_Wad
|
||||
|
|
|
@ -11,8 +11,6 @@ environment:
|
|||
# c:\msys64 x86_64 has gcc 8.2.0, so use c:\mingw-w64 7.3.0 instead
|
||||
MINGW_SDK_64: C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64
|
||||
CFLAGS: -Wall -W -Werror -Wno-error=implicit-fallthrough -Wimplicit-fallthrough=3 -Wno-tautological-compare -Wno-error=suggest-attribute=noreturn
|
||||
NASM_ZIP: nasm-2.12.01
|
||||
NASM_URL: http://www.nasm.us/pub/nasm/releasebuilds/2.12.01/win64/nasm-2.12.01-win64.zip
|
||||
UPX_ZIP: upx391w
|
||||
UPX_URL: http://upx.sourceforge.net/download/upx391w.zip
|
||||
CCACHE_EXE: ccache.exe
|
||||
|
@ -46,7 +44,6 @@ environment:
|
|||
ASSET_CLEAN: 0
|
||||
|
||||
cache:
|
||||
- nasm-2.12.01.zip
|
||||
- upx391w.zip
|
||||
- ccache.exe
|
||||
- C:\Users\appveyor\.ccache
|
||||
|
@ -58,10 +55,6 @@ install:
|
|||
- if [%X86_64%] == [1] ( set "MINGW_SDK=%MINGW_SDK_64%" )
|
||||
- if [%X86_64%] == [1] ( set "CCACHE_CC=%CCACHE_CC_64%" )
|
||||
|
||||
- if not exist "%NASM_ZIP%.zip" appveyor DownloadFile "%NASM_URL%" -FileName "%NASM_ZIP%.zip"
|
||||
- 7z x -y "%NASM_ZIP%.zip" -o%TMP% >null
|
||||
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%NASM_ZIP%" "%MINGW_SDK%\bin" nasm.exe || exit 0
|
||||
|
||||
- if not exist "%UPX_ZIP%.zip" appveyor DownloadFile "%UPX_URL%" -FileName "%UPX_ZIP%.zip"
|
||||
- 7z x -y "%UPX_ZIP%.zip" -o%TMP% >null
|
||||
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%UPX_ZIP%" "%MINGW_SDK%\bin" upx.exe || exit 0
|
||||
|
@ -78,7 +71,6 @@ before_build:
|
|||
- set "Path=%MINGW_SDK%\bin;%Path%"
|
||||
- if [%X86_64%] == [1] ( x86_64-w64-mingw32-gcc --version ) else ( i686-w64-mingw32-gcc --version )
|
||||
- mingw32-make --version
|
||||
- if not [%X86_64%] == [1] ( nasm -v )
|
||||
- if not [%NOUPX%] == [1] ( upx -V )
|
||||
- ccache -V
|
||||
- ccache -s
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
|
||||
#=============================================================================
|
||||
# Copyright 2010 Kitware, Inc.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
# see accompanying file Copyright.txt for details.
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
# (To distribute this file outside of CMake, substitute the full
|
||||
# License text for the above reference.)
|
||||
|
||||
# support for the yasm assembler
|
||||
|
||||
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS nasm yasm asm)
|
||||
|
||||
if(NOT CMAKE_ASM_YASM_OBJECT_FORMAT)
|
||||
if(WIN32)
|
||||
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT win64)
|
||||
else()
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT win32)
|
||||
endif()
|
||||
elseif(APPLE)
|
||||
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho64)
|
||||
else()
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho)
|
||||
endif()
|
||||
else()
|
||||
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf64)
|
||||
else()
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_ASM_YASM_COMPILE_OBJECT "<CMAKE_ASM_YASM_COMPILER> <FLAGS> -f ${CMAKE_ASM_YASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>")
|
||||
|
||||
# Load the generic ASMInformation file:
|
||||
set(ASM_DIALECT "_YASM")
|
||||
include(CMakeASMInformation)
|
||||
set(ASM_DIALECT)
|
|
@ -1,27 +0,0 @@
|
|||
|
||||
#=============================================================================
|
||||
# Copyright 2010 Kitware, Inc.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
# see accompanying file Copyright.txt for details.
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
# (To distribute this file outside of CMake, substitute the full
|
||||
# License text for the above reference.)
|
||||
|
||||
# Find the nasm assembler. yasm (http://www.tortall.net/projects/yasm/) is nasm compatible
|
||||
|
||||
set(CMAKE_ASM_YASM_COMPILER_LIST nasm yasm)
|
||||
|
||||
if(NOT CMAKE_ASM_YASM_COMPILER)
|
||||
find_program(CMAKE_ASM_YASM_COMPILER yasm
|
||||
"$ENV{ProgramFiles}/YASM")
|
||||
endif()
|
||||
|
||||
# Load the generic DetermineASM compiler file with the DIALECT set properly:
|
||||
set(ASM_DIALECT "_YASM")
|
||||
include(CMakeDetermineASMCompiler)
|
||||
set(ASM_DIALECT)
|
|
@ -1,23 +0,0 @@
|
|||
|
||||
#=============================================================================
|
||||
# Copyright 2010 Kitware, Inc.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
# see accompanying file Copyright.txt for details.
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
# (To distribute this file outside of CMake, substitute the full
|
||||
# License text for the above reference.)
|
||||
|
||||
# This file is used by EnableLanguage in cmGlobalGenerator to
|
||||
# determine that the selected ASM_NASM "compiler" works.
|
||||
# For assembler this can only check whether the compiler has been found,
|
||||
# because otherwise there would have to be a separate assembler source file
|
||||
# for each assembler on every architecture.
|
||||
|
||||
set(ASM_DIALECT "_YASM")
|
||||
include(CMakeTestASMCompiler)
|
||||
set(ASM_DIALECT)
|
|
@ -76,7 +76,7 @@ LOCAL_SRC_FILES := am_map.c \
|
|||
android/i_system.c \
|
||||
android/i_video.c
|
||||
|
||||
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOASM -DNOPIX -DUNIXCOMMON -DNOTERMIOS
|
||||
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOPIX -DUNIXCOMMON -DNOTERMIOS
|
||||
|
||||
LOCAL_MODULE := libsrb2
|
||||
|
||||
|
|
|
@ -190,16 +190,6 @@ source_group("Main" FILES ${SRB2_CORE_SOURCES} ${SRB2_CORE_HEADERS})
|
|||
source_group("Renderer" FILES ${SRB2_CORE_RENDER_SOURCES})
|
||||
source_group("Game" FILES ${SRB2_CORE_GAME_SOURCES})
|
||||
|
||||
|
||||
set(SRB2_ASM_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vid_copy.s
|
||||
)
|
||||
|
||||
set(SRB2_NASM_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tmap_mmx.nas
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tmap.nas
|
||||
)
|
||||
|
||||
if(MSVC)
|
||||
list(APPEND SRB2_NASM_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/tmap_vc.nas)
|
||||
endif()
|
||||
|
@ -231,10 +221,6 @@ set(SRB2_CONFIG_HAVE_CURL ON CACHE BOOL
|
|||
"Enable curl support, used for downloading files via HTTP.")
|
||||
set(SRB2_CONFIG_HWRENDER ON CACHE BOOL
|
||||
"Enable hardware rendering through OpenGL.")
|
||||
set(SRB2_CONFIG_USEASM OFF CACHE BOOL
|
||||
"Enable NASM tmap implementation for software mode speedup.")
|
||||
set(SRB2_CONFIG_YASM OFF CACHE BOOL
|
||||
"Use YASM in place of NASM.")
|
||||
set(SRB2_CONFIG_STATIC_OPENGL OFF CACHE BOOL
|
||||
"Use statically linked OpenGL. NOT RECOMMENDED.")
|
||||
|
||||
|
@ -503,29 +489,6 @@ if(${SRB2_CONFIG_HWRENDER} AND ${SRB2_CONFIG_STATIC_OPENGL})
|
|||
endif()
|
||||
endif()
|
||||
|
||||
if(${SRB2_CONFIG_USEASM})
|
||||
#SRB2_ASM_FLAGS can be used to pass flags to either nasm or yasm.
|
||||
if(${CMAKE_SYSTEM} MATCHES "Linux")
|
||||
set(SRB2_ASM_FLAGS "-DLINUX ${SRB2_ASM_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(${SRB2_CONFIG_YASM})
|
||||
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS} nas)
|
||||
set(CMAKE_ASM_YASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
|
||||
enable_language(ASM_YASM)
|
||||
else()
|
||||
set(CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS} nas)
|
||||
set(CMAKE_ASM_NASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
|
||||
enable_language(ASM_NASM)
|
||||
endif()
|
||||
set(SRB2_USEASM ON)
|
||||
add_definitions(-DUSEASM)
|
||||
set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} -msse3 -mfpmath=sse)
|
||||
else()
|
||||
set(SRB2_USEASM OFF)
|
||||
add_definitions(-DNONX86 -DNORUSEASM)
|
||||
endif()
|
||||
|
||||
# Targets
|
||||
|
||||
# Compatibility flag with later versions of GCC
|
||||
|
|
35
src/Makefile
35
src/Makefile
|
@ -62,7 +62,6 @@
|
|||
# Compile a debug version, add 'DEBUGMODE=1'
|
||||
# Compile with less warnings, add 'RELAXWARNINGS=1'
|
||||
# Generate compiler errors for most compiler warnings, add 'ERRORMODE=1'
|
||||
# Compile without NASM's tmap.nas, add 'NOASM=1'
|
||||
# Compile without 3D hardware support, add 'NOHW=1'
|
||||
# Compile with GDBstubs, add 'RDB=1'
|
||||
# Compile without PNG, add 'NOPNG=1'
|
||||
|
@ -165,7 +164,6 @@ endif
|
|||
ifdef MINGW64
|
||||
MINGW=1
|
||||
NONX86=1
|
||||
NOASM=1
|
||||
# MINGW64 should not necessarily imply X86_64=1, but we make that assumption elsewhere
|
||||
# Once that changes, remove this
|
||||
X86_64=1
|
||||
|
@ -188,7 +186,6 @@ NOPNG=1
|
|||
NOZLIB=1
|
||||
NONET=1
|
||||
NOHW=1
|
||||
NOASM=1
|
||||
NOIPX=1
|
||||
EXENAME?=srb2dummy
|
||||
OBJS=$(OBJDIR)/i_video.o
|
||||
|
@ -197,7 +194,6 @@ endif
|
|||
|
||||
ifdef HAIKU
|
||||
NOIPX=1
|
||||
NOASM=1
|
||||
ifndef NONET
|
||||
LIBS=-lnetwork
|
||||
endif
|
||||
|
@ -267,7 +263,6 @@ NOPNG=1
|
|||
NOZLIB=1
|
||||
NONET=1
|
||||
#NOHW=1
|
||||
NOASM=1
|
||||
NOIPX=1
|
||||
NONX86=1
|
||||
OBJS+=$(OBJDIR)/i_video.o
|
||||
|
@ -295,7 +290,6 @@ endif
|
|||
MSGFMT?=msgfmt
|
||||
|
||||
ifndef ECHO
|
||||
NASM:=@$(NASM)
|
||||
REMOVE:=@$(REMOVE)
|
||||
CC:=@$(CC)
|
||||
CXX:=@$(CXX)
|
||||
|
@ -350,13 +344,6 @@ ifdef X86_64
|
|||
endif
|
||||
endif
|
||||
|
||||
ifndef NOASM
|
||||
ifndef NONX86
|
||||
OBJS+=$(OBJDIR)/tmap.o $(OBJDIR)/tmap_mmx.o
|
||||
OPTS+=-DUSEASM
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef NOPNG
|
||||
OPTS+=-DHAVE_PNG
|
||||
|
||||
|
@ -495,16 +482,6 @@ else
|
|||
endif
|
||||
CFLAGS+=-g $(OPTS) $(ARCHOPTS) $(WINDRESFLAGS)
|
||||
|
||||
ifdef YASM
|
||||
ifdef STABS
|
||||
NASMOPTS?= -g stabs
|
||||
else
|
||||
NASMOPTS?= -g dwarf2
|
||||
endif
|
||||
else
|
||||
NASMOPTS?= -g
|
||||
endif
|
||||
|
||||
ifdef PROFILEMODE
|
||||
# build with profiling information
|
||||
CFLAGS+=-pg
|
||||
|
@ -698,12 +675,6 @@ ifdef CYGWIN32
|
|||
$(REMOVE) $(OBJDIR)/*.res
|
||||
endif
|
||||
|
||||
#make a big srb2.s that is the disasm of the exe (dos only ?)
|
||||
asm:
|
||||
$(CC) $(LDFLAGS) $(OBJS) -o $(OBJDIR)/tmp.exe $(LIBS)
|
||||
$(OBJDUMP) -d $(OBJDIR)/tmp.exe --no-show-raw-insn > srb2.s
|
||||
$(REMOVE) $(OBJDIR)/tmp.exe
|
||||
|
||||
# executable
|
||||
# NOTE: DJGPP's objcopy do not have --add-gnu-debuglink
|
||||
|
||||
|
@ -888,12 +859,6 @@ $(OBJDIR)/%.o: hardware/%.c
|
|||
$(OBJDIR)/%.o: blua/%.c
|
||||
$(CC) $(CFLAGS) $(LUA_CFLAGS) $(WFLAGS) -c $< -o $@
|
||||
|
||||
$(OBJDIR)/%.o: %.nas
|
||||
$(NASM) $(NASMOPTS) -o $@ -f $(NASMFORMAT) $<
|
||||
|
||||
$(OBJDIR)/vid_copy.o: vid_copy.s asm_defs.inc
|
||||
$(CC) $(OPTS) $(ASFLAGS) -x assembler-with-cpp -c $< -o $@
|
||||
|
||||
$(OBJDIR)/%.o: %.s
|
||||
$(CC) $(OPTS) -x assembler-with-cpp -c $< -o $@
|
||||
|
||||
|
|
|
@ -501,12 +501,6 @@ i_main_o=$(OBJDIR)/i_main.o
|
|||
#set OBJDIR and BIN's starting place
|
||||
OBJDIR=../objs
|
||||
BIN=../bin
|
||||
#Nasm ASM and rm
|
||||
ifdef YASM
|
||||
NASM?=yasm
|
||||
else
|
||||
NASM?=nasm
|
||||
endif
|
||||
REMOVE?=rm -f
|
||||
CP?=cp
|
||||
MKDIR?=mkdir -p
|
||||
|
@ -524,7 +518,6 @@ endif
|
|||
#Interface Setup
|
||||
ifdef DJGPPDOS
|
||||
INTERFACE=djgppdos
|
||||
NASMFORMAT=coff
|
||||
OBJDIR:=$(OBJDIR)/djgppdos
|
||||
ifdef WATTCP
|
||||
OBJDIR:=$(OBJDIR)/wattcp
|
||||
|
@ -538,7 +531,6 @@ ifdef DUMMY
|
|||
BIN:=$(BIN)/dummy
|
||||
else
|
||||
ifdef LINUX
|
||||
NASMFORMAT=elf -DLINUX
|
||||
SDL=1
|
||||
ifdef LINUX64
|
||||
OBJDIR:=$(OBJDIR)/Linux64
|
||||
|
@ -550,7 +542,6 @@ endif
|
|||
else
|
||||
ifdef FREEBSD
|
||||
INTERFACE=sdl
|
||||
NASMFORMAT=elf -DLINUX
|
||||
SDL=1
|
||||
|
||||
OBJDIR:=$(OBJDIR)/FreeBSD
|
||||
|
@ -558,7 +549,6 @@ ifdef FREEBSD
|
|||
else
|
||||
ifdef SOLARIS
|
||||
INTERFACE=sdl
|
||||
NASMFORMAT=elf -DLINUX
|
||||
SDL=1
|
||||
|
||||
OBJDIR:=$(OBJDIR)/Solaris
|
||||
|
@ -566,7 +556,6 @@ ifdef SOLARIS
|
|||
else
|
||||
ifdef CYGWIN32
|
||||
INTERFACE=sdl
|
||||
NASMFORMAT=win32
|
||||
SDL=1
|
||||
|
||||
OBJDIR:=$(OBJDIR)/cygwin
|
||||
|
@ -574,7 +563,6 @@ ifdef CYGWIN32
|
|||
else
|
||||
ifdef MINGW64
|
||||
INTERFACE=win32
|
||||
#NASMFORMAT=win64
|
||||
OBJDIR:=$(OBJDIR)/Mingw64
|
||||
BIN:=$(BIN)/Mingw64
|
||||
else
|
||||
|
@ -606,13 +594,11 @@ ifdef PS3N
|
|||
else
|
||||
ifdef MINGW
|
||||
INTERFACE=win32
|
||||
NASMFORMAT=win32
|
||||
OBJDIR:=$(OBJDIR)/Mingw
|
||||
BIN:=$(BIN)/Mingw
|
||||
else
|
||||
ifdef XBOX
|
||||
INTERFACE=sdl12
|
||||
NASMFORMAT=win32
|
||||
PREFIX?=/usr/local/openxdk/bin/i386-pc-xbox
|
||||
SDL=1
|
||||
SDL12=1
|
||||
|
|
|
@ -233,11 +233,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
|
|||
return -1;
|
||||
}
|
||||
|
||||
const CPUInfoFlags *I_CPUInfo(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *I_LocateWad(void)
|
||||
{
|
||||
return "/sdcard/srb2";
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
// SONIC ROBO BLAST 2
|
||||
//-----------------------------------------------------------------------------
|
||||
// Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
// Copyright (C) 1999-2018 by Sonic Team Junior.
|
||||
//
|
||||
// This program is free software distributed under the
|
||||
// terms of the GNU General Public License, version 2.
|
||||
// See the 'LICENSE' file for more details.
|
||||
//-----------------------------------------------------------------------------
|
||||
/// \file asm_defs.inc
|
||||
/// \brief must match the C structures
|
||||
|
||||
#ifndef __ASM_DEFS__
|
||||
#define __ASM_DEFS__
|
||||
|
||||
// this makes variables more noticable,
|
||||
// and make the label match with C code
|
||||
|
||||
// Linux, unlike DOS, has no "_" 19990119 by Kin
|
||||
// and nasm needs .data code segs under linux 20010210 by metzgermeister
|
||||
// FIXME: nasm ignores these settings, so I put the macros into the makefile
|
||||
#ifdef __ELF__
|
||||
#define C(label) label
|
||||
#define CODE_SEG .data
|
||||
#else
|
||||
#define C(label) _##label
|
||||
#define CODE_SEG .text
|
||||
#endif
|
||||
|
||||
/* This is a more readable way to access the arguments passed from C code */
|
||||
/* PLEASE NOTE: it is supposed that all arguments passed from C code are */
|
||||
/* 32bit integer (INT32, long, and most *pointers) */
|
||||
#define ARG1 8(%ebp)
|
||||
#define ARG2 12(%ebp)
|
||||
#define ARG3 16(%ebp)
|
||||
#define ARG4 20(%ebp)
|
||||
#define ARG5 24(%ebp)
|
||||
#define ARG6 28(%ebp)
|
||||
#define ARG7 32(%ebp)
|
||||
#define ARG8 36(%ebp)
|
||||
#define ARG9 40(%ebp) //(c)tm ... Allegro by Shawn Hargreaves.
|
||||
|
||||
#endif
|
|
@ -492,7 +492,7 @@ void M_StartupLocale(void);
|
|||
// M_GetText function that just returns the string.
|
||||
#define M_GetText(x) (x)
|
||||
#endif
|
||||
extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL;
|
||||
void *M_Memcpy(void *dest, const void *src, size_t n);
|
||||
char *va(const char *format, ...) FUNCPRINTF;
|
||||
char *M_GetToken(const char *inputString);
|
||||
char *sizeu1(size_t num);
|
||||
|
|
|
@ -137,11 +137,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
|
|||
return -1;
|
||||
}
|
||||
|
||||
const CPUInfoFlags *I_CPUInfo(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const char *I_LocateWad(void)
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
@ -296,40 +296,6 @@ char *I_GetUserName(void);
|
|||
*/
|
||||
INT32 I_mkdir(const char *dirname, INT32 unixright);
|
||||
|
||||
typedef struct {
|
||||
int FPU : 1; ///< FPU availabile
|
||||
int CPUID : 1; ///< CPUID instruction
|
||||
int RDTSC : 1; ///< RDTSC instruction
|
||||
int MMX : 1; ///< MMX features
|
||||
int MMXExt : 1; ///< MMX Ext. features
|
||||
int CMOV : 1; ///< Pentium Pro's "cmov"
|
||||
int AMD3DNow : 1; ///< 3DNow features
|
||||
int AMD3DNowExt: 1; ///< 3DNow! Ext. features
|
||||
int SSE : 1; ///< SSE features
|
||||
int SSE2 : 1; ///< SSE2 features
|
||||
int SSE3 : 1; ///< SSE3 features
|
||||
int IA64 : 1; ///< Running on IA64
|
||||
int AMD64 : 1; ///< Running on AMD64
|
||||
int AltiVec : 1; ///< AltiVec features
|
||||
int FPPE : 1; ///< floating-point precision error
|
||||
int PFC : 1; ///< TBD?
|
||||
int cmpxchg : 1; ///< ?
|
||||
int cmpxchg16b : 1; ///< ?
|
||||
int cmp8xchg16 : 1; ///< ?
|
||||
int FPE : 1; ///< FPU Emu
|
||||
int DEP : 1; ///< Data excution prevent
|
||||
int PPCMM64 : 1; ///< PowerPC Movemem 64bit ok?
|
||||
int ALPHAbyte : 1; ///< ?
|
||||
int PAE : 1; ///< Physical Address Extension
|
||||
int CPUs : 8;
|
||||
} CPUInfoFlags;
|
||||
|
||||
|
||||
/** \brief Info about CPU
|
||||
\return CPUInfo in bits
|
||||
*/
|
||||
const CPUInfoFlags *I_CPUInfo(void);
|
||||
|
||||
/** \brief Find main WAD
|
||||
\return path to main WAD
|
||||
*/
|
||||
|
|
|
@ -23,49 +23,6 @@
|
|||
#include "m_fixed.h"
|
||||
#include "tables.h" // ANGLETOFINESHIFT
|
||||
|
||||
#ifdef __USE_C_FIXEDMUL__
|
||||
|
||||
/** \brief The FixedMul function
|
||||
|
||||
\param a fixed_t number
|
||||
\param b fixed_t number
|
||||
|
||||
\return a*b>>FRACBITS
|
||||
|
||||
*/
|
||||
fixed_t FixedMul(fixed_t a, fixed_t b)
|
||||
{
|
||||
// Need to cast to unsigned before shifting to avoid undefined behaviour
|
||||
// for negative integers
|
||||
return (fixed_t)(((UINT64)((INT64)a * b)) >> FRACBITS);
|
||||
}
|
||||
|
||||
#endif //__USE_C_FIXEDMUL__
|
||||
|
||||
#ifdef __USE_C_FIXEDDIV__
|
||||
/** \brief The FixedDiv2 function
|
||||
|
||||
\param a fixed_t number
|
||||
\param b fixed_t number
|
||||
|
||||
\return a/b * FRACUNIT
|
||||
|
||||
*/
|
||||
fixed_t FixedDiv2(fixed_t a, fixed_t b)
|
||||
{
|
||||
INT64 ret;
|
||||
|
||||
if (b == 0)
|
||||
I_Error("FixedDiv: divide by zero");
|
||||
|
||||
ret = (((INT64)a * FRACUNIT)) / b;
|
||||
|
||||
if ((ret > INT32_MAX) || (ret < INT32_MIN))
|
||||
I_Error("FixedDiv: divide by zero");
|
||||
return (fixed_t)ret;
|
||||
}
|
||||
|
||||
#endif // __USE_C_FIXEDDIV__
|
||||
|
||||
fixed_t FixedSqrt(fixed_t x)
|
||||
{
|
||||
|
|
143
src/m_fixed.h
143
src/m_fixed.h
|
@ -46,127 +46,29 @@ typedef INT32 fixed_t;
|
|||
#define FIXED_TO_FLOAT(x) (((float)(x)) / ((float)FRACUNIT))
|
||||
#define FLOAT_TO_FIXED(f) (fixed_t)((f) * ((float)FRACUNIT))
|
||||
|
||||
/** \brief The FixedMul function
|
||||
\param a fixed_t number
|
||||
\param b fixed_t number
|
||||
\return a*b>>FRACBITS
|
||||
*/
|
||||
FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedMul(fixed_t a, fixed_t b)
|
||||
{
|
||||
// Need to cast to unsigned before shifting to avoid undefined behaviour
|
||||
// for negative integers
|
||||
return (fixed_t)(((UINT64)((INT64)a * b)) >> FRACBITS);
|
||||
}
|
||||
|
||||
#if defined (__WATCOMC__) && FRACBITS == 16
|
||||
#pragma aux FixedMul = \
|
||||
"imul ebx", \
|
||||
"shrd eax,edx,16" \
|
||||
parm [eax] [ebx] \
|
||||
value [eax] \
|
||||
modify exact [eax edx]
|
||||
|
||||
#pragma aux FixedDiv2 = \
|
||||
"cdq", \
|
||||
"shld edx,eax,16", \
|
||||
"sal eax,16", \
|
||||
"idiv ebx" \
|
||||
parm [eax] [ebx] \
|
||||
value [eax] \
|
||||
modify exact [eax edx]
|
||||
#elif defined (__GNUC__) && defined (__i386__) && !defined (NOASM)
|
||||
// DJGPP, i386 linux, cygwin or mingw
|
||||
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
|
||||
{
|
||||
fixed_t ret;
|
||||
asm
|
||||
(
|
||||
"imull %2;" // a*b
|
||||
"shrdl %3,%%edx,%0;" // shift logical right FRACBITS bits
|
||||
:"=a" (ret) // eax is always the result and the first operand (%0,%1)
|
||||
:"0" (a), "r" (b) // and %2 is what we use imull on with what in %1
|
||||
, "I" (FRACBITS) // %3 holds FRACBITS (normally 16)
|
||||
:"cc", "%edx" // edx and condition codes clobbered
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
|
||||
FUNCMATH FUNCINLINE static inline fixed_t FixedDiv2(fixed_t a, fixed_t b)
|
||||
{
|
||||
fixed_t ret;
|
||||
asm
|
||||
(
|
||||
"movl %1,%%edx;" // these two instructions allow the next two to pair, on the Pentium processor.
|
||||
"sarl $31,%%edx;" // shift arithmetic right 31 on EDX
|
||||
"shldl %3,%1,%%edx;" // DP shift logical left FRACBITS on EDX
|
||||
"sall %3,%0;" // shift arithmetic left FRACBITS on EAX
|
||||
"idivl %2;" // EDX/b = EAX
|
||||
: "=a" (ret)
|
||||
: "0" (a), "r" (b)
|
||||
, "I" (FRACBITS)
|
||||
: "%edx"
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
#elif defined (__GNUC__) && defined (__arm__) && !defined(__thumb__) && !defined(NOASM) //ARMv4 ASM
|
||||
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // let abuse smull
|
||||
{
|
||||
fixed_t ret;
|
||||
asm
|
||||
(
|
||||
"smull %[lo], r1, %[a], %[b];"
|
||||
"mov %[lo], %[lo], lsr %3;"
|
||||
"orr %[lo], %[lo], r1, lsl %3;"
|
||||
: [lo] "=&r" (ret) // rhi, rlo and rm must be distinct registers
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
, "i" (FRACBITS)
|
||||
: "r1"
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define __USE_C_FIXEDDIV__ // no double or asm div in ARM land
|
||||
#elif defined (__GNUC__) && defined (__ppc__) && !defined(NOASM) && 0 // WII: PPC CPU
|
||||
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
|
||||
{
|
||||
fixed_t ret, hi, lo;
|
||||
asm
|
||||
(
|
||||
"mullw %0, %2, %3;"
|
||||
"mulhw %1, %2, %3"
|
||||
: "=r" (hi), "=r" (lo)
|
||||
: "r" (a), "r" (b)
|
||||
, "I" (FRACBITS)
|
||||
);
|
||||
ret = (INT64)((hi>>FRACBITS)+lo)<<FRACBITS;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define __USE_C_FIXEDDIV__// Alam: I am lazy
|
||||
#elif defined (__GNUC__) && defined (__mips__) && !defined(NOASM) && 0 // PSP: MIPS CPU
|
||||
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
|
||||
{
|
||||
fixed_t ret;
|
||||
asm
|
||||
(
|
||||
"mult %3, %4;" // a*b=h<32+l
|
||||
: "=r" (ret), "=l" (a), "=h" (b) //todo: abuse shr opcode
|
||||
: "0" (a), "r" (b)
|
||||
, "I" (FRACBITS)
|
||||
//: "+l", "+h"
|
||||
);
|
||||
ret = (INT64)((a>>FRACBITS)+b)<<FRACBITS;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define __USE_C_FIXEDDIV__ // no 64b asm div in MIPS land
|
||||
#elif defined (__GNUC__) && defined (__sh__) && 0 // DC: SH4 CPU
|
||||
#elif defined (__GNUC__) && defined (__m68k__) && 0 // DEAD: Motorola 6800 CPU
|
||||
#elif defined (_MSC_VER) && defined(USEASM) && FRACBITS == 16
|
||||
// Microsoft Visual C++ (no asm inline)
|
||||
fixed_t __cdecl FixedMul(fixed_t a, fixed_t b);
|
||||
fixed_t __cdecl FixedDiv2(fixed_t a, fixed_t b);
|
||||
#else
|
||||
#define __USE_C_FIXEDMUL__
|
||||
#define __USE_C_FIXEDDIV__
|
||||
#endif
|
||||
|
||||
#ifdef __USE_C_FIXEDMUL__
|
||||
FUNCMATH fixed_t FixedMul(fixed_t a, fixed_t b);
|
||||
#endif
|
||||
|
||||
#ifdef __USE_C_FIXEDDIV__
|
||||
FUNCMATH fixed_t FixedDiv2(fixed_t a, fixed_t b);
|
||||
#endif
|
||||
/** \brief The FixedDiv2 function
|
||||
\param a fixed_t number
|
||||
\param b fixed_t number
|
||||
\return a/b * FRACUNIT
|
||||
*/
|
||||
FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedDiv2(fixed_t a, fixed_t b)
|
||||
{
|
||||
// This does not check for division overflow or division by 0!
|
||||
// That is the caller's responsibility.
|
||||
return (fixed_t)(((INT64)a * FRACUNIT) / b);
|
||||
}
|
||||
|
||||
/** \brief The FixedInt function
|
||||
|
||||
|
@ -174,7 +76,6 @@ FUNCMATH fixed_t FixedDiv2(fixed_t a, fixed_t b);
|
|||
|
||||
\return a/FRACUNIT
|
||||
*/
|
||||
|
||||
FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedInt(fixed_t a)
|
||||
{
|
||||
return FixedMul(a, 1);
|
||||
|
|
421
src/m_misc.c
421
src/m_misc.c
|
@ -1943,430 +1943,11 @@ char *sizeu5(size_t num)
|
|||
return sizeu5_buf;
|
||||
}
|
||||
|
||||
#if defined (__GNUC__) && defined (__i386__) // from libkwave, under GPL
|
||||
// Alam: note libkwave memcpy code comes from mplayer's libvo/aclib_template.c, r699
|
||||
|
||||
/* for small memory blocks (<256 bytes) this version is faster */
|
||||
#define small_memcpy(dest,src,n)\
|
||||
{\
|
||||
register unsigned long int dummy;\
|
||||
__asm__ __volatile__(\
|
||||
"cld\n\t"\
|
||||
"rep; movsb"\
|
||||
:"=&D"(dest), "=&S"(src), "=&c"(dummy)\
|
||||
:"0" (dest), "1" (src),"2" (n)\
|
||||
: "memory", "cc");\
|
||||
}
|
||||
/* linux kernel __memcpy (from: /include/asm/string.h) */
|
||||
ATTRINLINE static FUNCINLINE void *__memcpy (void *dest, const void * src, size_t n)
|
||||
void *M_Memcpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
int d0, d1, d2;
|
||||
|
||||
if ( n < 4 )
|
||||
{
|
||||
small_memcpy(dest, src, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"rep ; movsl;"
|
||||
"testb $2,%b4;"
|
||||
"je 1f;"
|
||||
"movsw;"
|
||||
"1:\ttestb $1,%b4;"
|
||||
"je 2f;"
|
||||
"movsb;"
|
||||
"2:"
|
||||
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
||||
:"0" (n/4), "q" (n),"1" ((long) dest),"2" ((long) src)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
#define SSE_MMREG_SIZE 16
|
||||
#define MMX_MMREG_SIZE 8
|
||||
|
||||
#define MMX1_MIN_LEN 0x800 /* 2K blocks */
|
||||
#define MIN_LEN 0x40 /* 64-byte blocks */
|
||||
|
||||
/* SSE note: i tried to move 128 bytes a time instead of 64 but it
|
||||
didn't make any measureable difference. i'm using 64 for the sake of
|
||||
simplicity. [MF] */
|
||||
static /*FUNCTARGET("sse2")*/ void *sse_cpy(void * dest, const void * src, size_t n)
|
||||
{
|
||||
void *retval = dest;
|
||||
size_t i;
|
||||
|
||||
/* PREFETCH has effect even for MOVSB instruction ;) */
|
||||
__asm__ __volatile__ (
|
||||
"prefetchnta (%0);"
|
||||
"prefetchnta 32(%0);"
|
||||
"prefetchnta 64(%0);"
|
||||
"prefetchnta 96(%0);"
|
||||
"prefetchnta 128(%0);"
|
||||
"prefetchnta 160(%0);"
|
||||
"prefetchnta 192(%0);"
|
||||
"prefetchnta 224(%0);"
|
||||
"prefetchnta 256(%0);"
|
||||
"prefetchnta 288(%0);"
|
||||
: : "r" (src) );
|
||||
|
||||
if (n >= MIN_LEN)
|
||||
{
|
||||
register unsigned long int delta;
|
||||
/* Align destinition to MMREG_SIZE -boundary */
|
||||
delta = ((unsigned long int)dest)&(SSE_MMREG_SIZE-1);
|
||||
if (delta)
|
||||
{
|
||||
delta=SSE_MMREG_SIZE-delta;
|
||||
n -= delta;
|
||||
small_memcpy(dest, src, delta);
|
||||
}
|
||||
i = n >> 6; /* n/64 */
|
||||
n&=63;
|
||||
if (((unsigned long)src) & 15)
|
||||
/* if SRC is misaligned */
|
||||
for (; i>0; i--)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"prefetchnta 320(%0);"
|
||||
"prefetchnta 352(%0);"
|
||||
"movups (%0), %%xmm0;"
|
||||
"movups 16(%0), %%xmm1;"
|
||||
"movups 32(%0), %%xmm2;"
|
||||
"movups 48(%0), %%xmm3;"
|
||||
"movntps %%xmm0, (%1);"
|
||||
"movntps %%xmm1, 16(%1);"
|
||||
"movntps %%xmm2, 32(%1);"
|
||||
"movntps %%xmm3, 48(%1);"
|
||||
:: "r" (src), "r" (dest) : "memory");
|
||||
src = (const unsigned char *)src + 64;
|
||||
dest = (unsigned char *)dest + 64;
|
||||
}
|
||||
else
|
||||
/*
|
||||
Only if SRC is aligned on 16-byte boundary.
|
||||
It allows to use movaps instead of movups, which required data
|
||||
to be aligned or a general-protection exception (#GP) is generated.
|
||||
*/
|
||||
for (; i>0; i--)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"prefetchnta 320(%0);"
|
||||
"prefetchnta 352(%0);"
|
||||
"movaps (%0), %%xmm0;"
|
||||
"movaps 16(%0), %%xmm1;"
|
||||
"movaps 32(%0), %%xmm2;"
|
||||
"movaps 48(%0), %%xmm3;"
|
||||
"movntps %%xmm0, (%1);"
|
||||
"movntps %%xmm1, 16(%1);"
|
||||
"movntps %%xmm2, 32(%1);"
|
||||
"movntps %%xmm3, 48(%1);"
|
||||
:: "r" (src), "r" (dest) : "memory");
|
||||
src = ((const unsigned char *)src) + 64;
|
||||
dest = ((unsigned char *)dest) + 64;
|
||||
}
|
||||
/* since movntq is weakly-ordered, a "sfence"
|
||||
* is needed to become ordered again. */
|
||||
__asm__ __volatile__ ("sfence":::"memory");
|
||||
/* enables to use FPU */
|
||||
__asm__ __volatile__ ("emms":::"memory");
|
||||
}
|
||||
/*
|
||||
* Now do the tail of the block
|
||||
*/
|
||||
if (n) __memcpy(dest, src, n);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static FUNCTARGET("mmx") void *mmx2_cpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
void *retval = dest;
|
||||
size_t i;
|
||||
|
||||
/* PREFETCH has effect even for MOVSB instruction ;) */
|
||||
__asm__ __volatile__ (
|
||||
"prefetchnta (%0);"
|
||||
"prefetchnta 32(%0);"
|
||||
"prefetchnta 64(%0);"
|
||||
"prefetchnta 96(%0);"
|
||||
"prefetchnta 128(%0);"
|
||||
"prefetchnta 160(%0);"
|
||||
"prefetchnta 192(%0);"
|
||||
"prefetchnta 224(%0);"
|
||||
"prefetchnta 256(%0);"
|
||||
"prefetchnta 288(%0);"
|
||||
: : "r" (src));
|
||||
|
||||
if (n >= MIN_LEN)
|
||||
{
|
||||
register unsigned long int delta;
|
||||
/* Align destinition to MMREG_SIZE -boundary */
|
||||
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
|
||||
if (delta)
|
||||
{
|
||||
delta=MMX_MMREG_SIZE-delta;
|
||||
n -= delta;
|
||||
small_memcpy(dest, src, delta);
|
||||
}
|
||||
i = n >> 6; /* n/64 */
|
||||
n&=63;
|
||||
for (; i>0; i--)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"prefetchnta 320(%0);"
|
||||
"prefetchnta 352(%0);"
|
||||
"movq (%0), %%mm0;"
|
||||
"movq 8(%0), %%mm1;"
|
||||
"movq 16(%0), %%mm2;"
|
||||
"movq 24(%0), %%mm3;"
|
||||
"movq 32(%0), %%mm4;"
|
||||
"movq 40(%0), %%mm5;"
|
||||
"movq 48(%0), %%mm6;"
|
||||
"movq 56(%0), %%mm7;"
|
||||
"movntq %%mm0, (%1);"
|
||||
"movntq %%mm1, 8(%1);"
|
||||
"movntq %%mm2, 16(%1);"
|
||||
"movntq %%mm3, 24(%1);"
|
||||
"movntq %%mm4, 32(%1);"
|
||||
"movntq %%mm5, 40(%1);"
|
||||
"movntq %%mm6, 48(%1);"
|
||||
"movntq %%mm7, 56(%1);"
|
||||
:: "r" (src), "r" (dest) : "memory");
|
||||
src = ((const unsigned char *)src) + 64;
|
||||
dest = ((unsigned char *)dest) + 64;
|
||||
}
|
||||
/* since movntq is weakly-ordered, a "sfence"
|
||||
* is needed to become ordered again. */
|
||||
__asm__ __volatile__ ("sfence":::"memory");
|
||||
__asm__ __volatile__ ("emms":::"memory");
|
||||
}
|
||||
/*
|
||||
* Now do the tail of the block
|
||||
*/
|
||||
if (n) __memcpy(dest, src, n);
|
||||
return retval;
|
||||
}
|
||||
|
||||
static FUNCTARGET("mmx") void *mmx1_cpy(void *dest, const void *src, size_t n) //3DNOW
|
||||
{
|
||||
void *retval = dest;
|
||||
size_t i;
|
||||
|
||||
/* PREFETCH has effect even for MOVSB instruction ;) */
|
||||
__asm__ __volatile__ (
|
||||
"prefetch (%0);"
|
||||
"prefetch 32(%0);"
|
||||
"prefetch 64(%0);"
|
||||
"prefetch 96(%0);"
|
||||
"prefetch 128(%0);"
|
||||
"prefetch 160(%0);"
|
||||
"prefetch 192(%0);"
|
||||
"prefetch 224(%0);"
|
||||
"prefetch 256(%0);"
|
||||
"prefetch 288(%0);"
|
||||
: : "r" (src));
|
||||
|
||||
if (n >= MMX1_MIN_LEN)
|
||||
{
|
||||
register unsigned long int delta;
|
||||
/* Align destinition to MMREG_SIZE -boundary */
|
||||
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
|
||||
if (delta)
|
||||
{
|
||||
delta=MMX_MMREG_SIZE-delta;
|
||||
n -= delta;
|
||||
small_memcpy(dest, src, delta);
|
||||
}
|
||||
i = n >> 6; /* n/64 */
|
||||
n&=63;
|
||||
for (; i>0; i--)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"prefetch 320(%0);"
|
||||
"prefetch 352(%0);"
|
||||
"movq (%0), %%mm0;"
|
||||
"movq 8(%0), %%mm1;"
|
||||
"movq 16(%0), %%mm2;"
|
||||
"movq 24(%0), %%mm3;"
|
||||
"movq 32(%0), %%mm4;"
|
||||
"movq 40(%0), %%mm5;"
|
||||
"movq 48(%0), %%mm6;"
|
||||
"movq 56(%0), %%mm7;"
|
||||
"movq %%mm0, (%1);"
|
||||
"movq %%mm1, 8(%1);"
|
||||
"movq %%mm2, 16(%1);"
|
||||
"movq %%mm3, 24(%1);"
|
||||
"movq %%mm4, 32(%1);"
|
||||
"movq %%mm5, 40(%1);"
|
||||
"movq %%mm6, 48(%1);"
|
||||
"movq %%mm7, 56(%1);"
|
||||
:: "r" (src), "r" (dest) : "memory");
|
||||
src = ((const unsigned char *)src) + 64;
|
||||
dest = ((unsigned char *)dest) + 64;
|
||||
}
|
||||
__asm__ __volatile__ ("femms":::"memory"); // same as mmx_cpy() but with a femms
|
||||
}
|
||||
/*
|
||||
* Now do the tail of the block
|
||||
*/
|
||||
if (n) __memcpy(dest, src, n);
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Alam: why? memcpy may be __cdecl/_System and our code may be not the same type
|
||||
static void *cpu_cpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
if (src == NULL)
|
||||
{
|
||||
CONS_Debug(DBG_MEMORY, "Memcpy from 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
|
||||
return dest;
|
||||
}
|
||||
|
||||
if(dest == NULL)
|
||||
{
|
||||
CONS_Debug(DBG_MEMORY, "Memcpy to 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
|
||||
return dest;
|
||||
}
|
||||
|
||||
return memcpy(dest, src, n);
|
||||
}
|
||||
|
||||
static /*FUNCTARGET("mmx")*/ void *mmx_cpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
#if defined (_MSC_VER) && defined (_X86_)
|
||||
_asm
|
||||
{
|
||||
mov ecx, [n]
|
||||
mov esi, [src]
|
||||
mov edi, [dest]
|
||||
shr ecx, 6 // mit mmx: 64bytes per iteration
|
||||
jz lower_64 // if lower than 64 bytes
|
||||
loop_64: // MMX transfers multiples of 64bytes
|
||||
movq mm0, 0[ESI] // read sources
|
||||
movq mm1, 8[ESI]
|
||||
movq mm2, 16[ESI]
|
||||
movq mm3, 24[ESI]
|
||||
movq mm4, 32[ESI]
|
||||
movq mm5, 40[ESI]
|
||||
movq mm6, 48[ESI]
|
||||
movq mm7, 56[ESI]
|
||||
|
||||
movq 0[EDI], mm0 // write destination
|
||||
movq 8[EDI], mm1
|
||||
movq 16[EDI], mm2
|
||||
movq 24[EDI], mm3
|
||||
movq 32[EDI], mm4
|
||||
movq 40[EDI], mm5
|
||||
movq 48[EDI], mm6
|
||||
movq 56[EDI], mm7
|
||||
|
||||
add esi, 64
|
||||
add edi, 64
|
||||
dec ecx
|
||||
jnz loop_64
|
||||
emms // close mmx operation
|
||||
lower_64:// transfer rest of buffer
|
||||
mov ebx,esi
|
||||
sub ebx,src
|
||||
mov ecx,[n]
|
||||
sub ecx,ebx
|
||||
shr ecx, 3 // multiples of 8 bytes
|
||||
jz lower_8
|
||||
loop_8:
|
||||
movq mm0, [esi] // read source
|
||||
movq [edi], mm0 // write destination
|
||||
add esi, 8
|
||||
add edi, 8
|
||||
dec ecx
|
||||
jnz loop_8
|
||||
emms // close mmx operation
|
||||
lower_8:
|
||||
mov ebx,esi
|
||||
sub ebx,src
|
||||
mov ecx,[n]
|
||||
sub ecx,ebx
|
||||
rep movsb
|
||||
mov eax, [dest] // return dest
|
||||
}
|
||||
#elif defined (__GNUC__) && defined (__i386__)
|
||||
void *retval = dest;
|
||||
size_t i;
|
||||
|
||||
if (n >= MMX1_MIN_LEN)
|
||||
{
|
||||
register unsigned long int delta;
|
||||
/* Align destinition to MMREG_SIZE -boundary */
|
||||
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
|
||||
if (delta)
|
||||
{
|
||||
delta=MMX_MMREG_SIZE-delta;
|
||||
n -= delta;
|
||||
small_memcpy(dest, src, delta);
|
||||
}
|
||||
i = n >> 6; /* n/64 */
|
||||
n&=63;
|
||||
for (; i>0; i--)
|
||||
{
|
||||
__asm__ __volatile__ (
|
||||
"movq (%0), %%mm0;"
|
||||
"movq 8(%0), %%mm1;"
|
||||
"movq 16(%0), %%mm2;"
|
||||
"movq 24(%0), %%mm3;"
|
||||
"movq 32(%0), %%mm4;"
|
||||
"movq 40(%0), %%mm5;"
|
||||
"movq 48(%0), %%mm6;"
|
||||
"movq 56(%0), %%mm7;"
|
||||
"movq %%mm0, (%1);"
|
||||
"movq %%mm1, 8(%1);"
|
||||
"movq %%mm2, 16(%1);"
|
||||
"movq %%mm3, 24(%1);"
|
||||
"movq %%mm4, 32(%1);"
|
||||
"movq %%mm5, 40(%1);"
|
||||
"movq %%mm6, 48(%1);"
|
||||
"movq %%mm7, 56(%1);"
|
||||
:: "r" (src), "r" (dest) : "memory");
|
||||
src = ((const unsigned char *)src) + 64;
|
||||
dest = ((unsigned char *)dest) + 64;
|
||||
}
|
||||
__asm__ __volatile__ ("emms":::"memory");
|
||||
}
|
||||
/*
|
||||
* Now do the tail of the block
|
||||
*/
|
||||
if (n) __memcpy(dest, src, n);
|
||||
return retval;
|
||||
#else
|
||||
return cpu_cpy(dest, src, n);
|
||||
#endif
|
||||
}
|
||||
|
||||
void *(*M_Memcpy)(void* dest, const void* src, size_t n) = cpu_cpy;
|
||||
|
||||
/** Memcpy that uses MMX, 3DNow, MMXExt or even SSE
|
||||
* Do not use on overlapped memory, use memmove for that
|
||||
*/
|
||||
void M_SetupMemcpy(void)
|
||||
{
|
||||
#if defined (__GNUC__) && defined (__i386__)
|
||||
if (R_SSE2)
|
||||
M_Memcpy = sse_cpy;
|
||||
else if (R_MMXExt)
|
||||
M_Memcpy = mmx2_cpy;
|
||||
else if (R_3DNow)
|
||||
M_Memcpy = mmx1_cpy;
|
||||
else
|
||||
#endif
|
||||
if (R_MMX)
|
||||
M_Memcpy = mmx_cpy;
|
||||
#if 0
|
||||
M_Memcpy = cpu_cpy;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Return the appropriate message for a file error or end of file.
|
||||
*/
|
||||
const char *M_FileError(FILE *fp)
|
||||
|
|
|
@ -98,8 +98,6 @@ TMatrix *RotateZMatrix(angle_t rad);
|
|||
// s1 = s2+s3+s1 (1024 lenghtmax)
|
||||
void strcatbf(char *s1, const char *s2, const char *s3);
|
||||
|
||||
void M_SetupMemcpy(void);
|
||||
|
||||
const char *M_FileError(FILE *handle);
|
||||
|
||||
// counting bits, for weapon ammo code, usually
|
||||
|
|
278
src/p5prof.h
278
src/p5prof.h
|
@ -1,278 +0,0 @@
|
|||
/*********************************************************
|
||||
*
|
||||
* File: p5prof.h
|
||||
* By: Kevin Baca
|
||||
*
|
||||
* MODIFIED BY Fab SO THAT RDMSR(...) WRITES EDX : EAX TO A LONG LONG
|
||||
* (WHICH MEANS WRITE THE LOW DWORD FIRST)
|
||||
*
|
||||
* Now in yer code do:
|
||||
* INT64 count,total;
|
||||
*
|
||||
* ...
|
||||
* RDMSR(0x10,&count); //inner loop count
|
||||
* total += count;
|
||||
* ...
|
||||
*
|
||||
* printf("0x%x %x", (INT32)total, *((INT32 *)&total+1));
|
||||
* // HIGH LOW
|
||||
*
|
||||
*********************************************************/
|
||||
/**\file
|
||||
\brief This file provides macros to profile your code.
|
||||
|
||||
Here's how they work...
|
||||
|
||||
As you may or may not know, the Pentium class of
|
||||
processors provides extremely fine grained profiling
|
||||
capabilities through the use of what are called
|
||||
Machine Specific Registers (MSRs). These registers
|
||||
can provide information about almost any aspect of
|
||||
CPU performance down to a single cycle.
|
||||
|
||||
The MSRs of interest for profiling are specified by
|
||||
indices 0x10, 0x11, 0x12, and 0x13. Here is a brief
|
||||
description of each of these registers:
|
||||
|
||||
MSR 0x10
|
||||
This register is simple a cycle counter.
|
||||
|
||||
MSR 0x11
|
||||
This register controls what type of profiling data
|
||||
will be gathered.
|
||||
|
||||
MSRs 0x12 and 0x13
|
||||
These registers gather the profiling data specified in
|
||||
MSR 0x11.
|
||||
|
||||
Each MSR is 64 bits wide. For the Pentium processor,
|
||||
only the lower 32 bits of MSR 0x11 are valid. Bits 0-15
|
||||
specify what data will be gathered in MSR 0x12. Bits 16-31
|
||||
specify what data will be gathered in MSR 0x13. Both sets
|
||||
of bits have the same format:
|
||||
|
||||
Bits 0-5 specify which hardware event will be tracked.
|
||||
Bit 6, if set, indicates events will be tracked in
|
||||
rings 0-2.
|
||||
Bit 7, if set, indicates events will be tracked in
|
||||
ring 3.
|
||||
Bit 8, if set, indicates cycles should be counted for
|
||||
the specified event. If clear, it indicates the
|
||||
number of events should be counted.
|
||||
|
||||
Two instructions are provided for manupulating the MSRs.
|
||||
RDMSR (Read Machine Specific Register) and WRMSR
|
||||
(Write Machine Specific Register). These opcodes were
|
||||
originally undocumented and therefore most assemblers don't
|
||||
recognize them. Their byte codes are provided in the
|
||||
macros below.
|
||||
|
||||
RDMSR takes the MSR index in ecx and the profiling criteria
|
||||
in edx : eax.
|
||||
|
||||
WRMSR takes the MSR index in ecx and returns the profile data
|
||||
in edx : eax.
|
||||
|
||||
Two profiling registers limits profiling capability to
|
||||
gathering only two types of information. The register
|
||||
usage can, however, be combined in interesting ways.
|
||||
For example, you can set one register to gather the
|
||||
number of a specific type of event while the other gathers
|
||||
the number of cycles for the same event. Or you can
|
||||
gather the number of two separate events while using
|
||||
MSR 0x10 to gather the number of cycles.
|
||||
|
||||
The enumerated list provides somewhat readable labels for
|
||||
the types of events that can be tracked.
|
||||
|
||||
For more information, get ahold of appendix H from the
|
||||
Intel Pentium programmer's manual (I don't remember the
|
||||
order number) or go to
|
||||
http://green.kaist.ac.kr/jwhahn/art3.htm.
|
||||
That's an article by Terje Mathisen where I got most of
|
||||
my information.
|
||||
|
||||
You may use this code however you wish. I hope it's
|
||||
useful and I hope I got everything right.
|
||||
|
||||
-Kevin
|
||||
|
||||
kbaca@skygames.com
|
||||
|
||||
*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
#define RDTSC(_dst) \
|
||||
__asm__("
|
||||
.byte 0x0F,0x31
|
||||
movl %%edx,(%%edi)
|
||||
movl %%eax,4(%%edi)"\
|
||||
: : "D" (_dst) : "eax", "edx", "edi")
|
||||
|
||||
// the old code... swapped it
|
||||
// movl %%edx,(%%edi)
|
||||
// movl %%eax,4(%%edi)"
|
||||
#define RDMSR(_msri, _msrd) \
|
||||
__asm__("
|
||||
.byte 0x0F,0x32
|
||||
movl %%eax,(%%edi)
|
||||
movl %%edx,4(%%edi)"\
|
||||
: : "c" (_msri), "D" (_msrd) : "eax", "ecx", "edx", "edi")
|
||||
|
||||
#define WRMSR(_msri, _msrd) \
|
||||
__asm__("
|
||||
xorl %%edx,%%edx
|
||||
.byte 0x0F,0x30"\
|
||||
: : "c" (_msri), "a" (_msrd) : "eax", "ecx", "edx")
|
||||
|
||||
#define RDMSR_0x12_0x13(_msr12, _msr13) \
|
||||
__asm__("
|
||||
movl $0x12,%%ecx
|
||||
.byte 0x0F,0x32
|
||||
movl %%edx,(%%edi)
|
||||
movl %%eax,4(%%edi)
|
||||
movl $0x13,%%ecx
|
||||
.byte 0x0F,0x32
|
||||
movl %%edx,(%%esi)
|
||||
movl %%eax,4(%%esi)"\
|
||||
: : "D" (_msr12), "S" (_msr13) : "eax", "ecx", "edx", "edi")
|
||||
|
||||
#define ZERO_MSR_0x12_0x13() \
|
||||
__asm__("
|
||||
xorl %%edx,%%edx
|
||||
xorl %%eax,%%eax
|
||||
movl $0x12,%%ecx
|
||||
.byte 0x0F,0x30
|
||||
movl $0x13,%%ecx
|
||||
.byte 0x0F,0x30"\
|
||||
: : : "eax", "ecx", "edx")
|
||||
|
||||
#elif defined (__WATCOMC__)
|
||||
|
||||
extern void RDTSC(UINT32 *dst);
|
||||
#pragma aux RDTSC =\
|
||||
"db 0x0F,0x31"\
|
||||
"mov [edi],edx"\
|
||||
"mov [4+edi],eax"\
|
||||
parm [edi]\
|
||||
modify [eax edx edi];
|
||||
|
||||
extern void RDMSR(UINT32 msri, UINT32 *msrd);
|
||||
#pragma aux RDMSR =\
|
||||
"db 0x0F,0x32"\
|
||||
"mov [edi],edx"\
|
||||
"mov [4+edi],eax"\
|
||||
parm [ecx] [edi]\
|
||||
modify [eax ecx edx edi];
|
||||
|
||||
extern void WRMSR(UINT32 msri, UINT32 msrd);
|
||||
#pragma aux WRMSR =\
|
||||
"xor edx,edx"\
|
||||
"db 0x0F,0x30"\
|
||||
parm [ecx] [eax]\
|
||||
modify [eax ecx edx];
|
||||
|
||||
extern void RDMSR_0x12_0x13(UINT32 *msr12, UINT32 *msr13);
|
||||
#pragma aux RDMSR_0x12_0x13 =\
|
||||
"mov ecx,0x12"\
|
||||
"db 0x0F,0x32"\
|
||||
"mov [edi],edx"\
|
||||
"mov [4+edi],eax"\
|
||||
"mov ecx,0x13"\
|
||||
"db 0x0F,0x32"\
|
||||
"mov [esi],edx"\
|
||||
"mov [4+esi],eax"\
|
||||
parm [edi] [esi]\
|
||||
modify [eax ecx edx edi esi];
|
||||
|
||||
extern void ZERO_MSR_0x12_0x13(void);
|
||||
#pragma aux ZERO_MSR_0x12_0x13 =\
|
||||
"xor edx,edx"\
|
||||
"xor eax,eax"\
|
||||
"mov ecx,0x12"\
|
||||
"db 0x0F,0x30"\
|
||||
"mov ecx,0x13"\
|
||||
"db 0x0F,0x30"\
|
||||
modify [eax ecx edx];
|
||||
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DataRead,
|
||||
DataWrite,
|
||||
DataTLBMiss,
|
||||
DataReadMiss,
|
||||
DataWriteMiss,
|
||||
WriteHitEM,
|
||||
DataCacheLinesWritten,
|
||||
DataCacheSnoops,
|
||||
DataCacheSnoopHit,
|
||||
MemAccessBothPipes,
|
||||
BankConflict,
|
||||
MisalignedDataRef,
|
||||
CodeRead,
|
||||
CodeTLBMiss,
|
||||
CodeCacheMiss,
|
||||
SegRegLoad,
|
||||
RESERVED0,
|
||||
RESERVED1,
|
||||
Branch,
|
||||
BTBHit,
|
||||
TakenBranchOrBTBHit,
|
||||
PipelineFlush,
|
||||
InstructionsExeced,
|
||||
InstructionsExecedVPipe,
|
||||
BusUtilizationClocks,
|
||||
PipelineStalledWriteBackup,
|
||||
PipelineStalledDateMemRead,
|
||||
PipeLineStalledWriteEM,
|
||||
LockedBusCycle,
|
||||
IOReadOrWriteCycle,
|
||||
NonCacheableMemRef,
|
||||
AGI,
|
||||
RESERVED2,
|
||||
RESERVED3,
|
||||
FPOperation,
|
||||
Breakpoint0Match,
|
||||
Breakpoint1Match,
|
||||
Breakpoint2Match,
|
||||
Breakpoint3Match,
|
||||
HWInterrupt,
|
||||
DataReadOrWrite,
|
||||
DataReadOrWriteMiss
|
||||
};
|
||||
|
||||
#define PROF_CYCLES (0x100)
|
||||
#define PROF_EVENTS (0x000)
|
||||
#define RING_012 (0x40)
|
||||
#define RING_3 (0x80)
|
||||
#define RING_0123 (RING_012 | RING_3)
|
||||
|
||||
/*void ProfSetProfiles(UINT32 msr12, UINT32 msr13);*/
|
||||
#define ProfSetProfiles(_msr12, _msr13)\
|
||||
{\
|
||||
UINT32 prof;\
|
||||
\
|
||||
prof = (_msr12) | ((_msr13) << 16);\
|
||||
WRMSR(0x11, prof);\
|
||||
}
|
||||
|
||||
/*void ProfBeginProfiles(void);*/
|
||||
#define ProfBeginProfiles()\
|
||||
ZERO_MSR_0x12_0x13();
|
||||
|
||||
/*void ProfGetProfiles(UINT32 msr12[2], UINT32 msr13[2]);*/
|
||||
#define ProfGetProfiles(_msr12, _msr13)\
|
||||
RDMSR_0x12_0x13(_msr12, _msr13);
|
||||
|
||||
/*void ProfZeroTimer(void);*/
|
||||
#define ProfZeroTimer()\
|
||||
WRMSR(0x10, 0);
|
||||
|
||||
/*void ProfReadTimer(UINT32 timer[2]);*/
|
||||
#define ProfReadTimer(timer)\
|
||||
RDMSR(0x10, timer);
|
||||
|
||||
/*EOF*/
|
14
src/r_draw.h
14
src/r_draw.h
|
@ -138,20 +138,6 @@ void R_DrawColumn_8(void);
|
|||
void R_DrawShadeColumn_8(void);
|
||||
void R_DrawTranslucentColumn_8(void);
|
||||
|
||||
#ifdef USEASM
|
||||
void ASMCALL R_DrawColumn_8_ASM(void);
|
||||
#define R_DrawWallColumn_8_ASM R_DrawColumn_8_ASM
|
||||
void ASMCALL R_DrawShadeColumn_8_ASM(void);
|
||||
void ASMCALL R_DrawTranslucentColumn_8_ASM(void);
|
||||
void ASMCALL R_Draw2sMultiPatchColumn_8_ASM(void);
|
||||
|
||||
void ASMCALL R_DrawColumn_8_MMX(void);
|
||||
#define R_DrawWallColumn_8_MMX R_DrawColumn_8_MMX
|
||||
|
||||
void ASMCALL R_Draw2sMultiPatchColumn_8_MMX(void);
|
||||
void ASMCALL R_DrawSpan_8_MMX(void);
|
||||
#endif
|
||||
|
||||
void R_DrawTranslatedColumn_8(void);
|
||||
void R_DrawTranslatedTranslucentColumn_8(void);
|
||||
void R_DrawSpan_8(void);
|
||||
|
|
|
@ -23,11 +23,6 @@ static wallsplat_t wallsplats[MAXLEVELSPLATS]; // WALL splats
|
|||
static INT32 freewallsplat;
|
||||
#endif
|
||||
|
||||
#ifdef USEASM
|
||||
/// \brief for floorsplats \note accessed by asm code
|
||||
struct rastery_s *prastertab;
|
||||
#endif
|
||||
|
||||
#ifdef FLOORSPLATS
|
||||
static floorsplat_t floorsplats[1]; // FLOOR splats
|
||||
static INT32 freefloorsplat;
|
||||
|
@ -339,12 +334,6 @@ void R_AddVisibleFloorSplats(subsector_t *subsec)
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef USEASM
|
||||
// tv1, tv2 = x/y qui varie dans la texture, tc = x/y qui est constant.
|
||||
void ASMCALL rasterize_segment_tex(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2,
|
||||
INT32 tc, INT32 dir);
|
||||
#endif
|
||||
|
||||
// current test with floor tile
|
||||
//#define FLOORSPLATSOLIDCOLOR
|
||||
|
||||
|
|
80
src/screen.c
80
src/screen.c
|
@ -33,10 +33,6 @@
|
|||
// SRB2Kart
|
||||
#include "r_fps.h" // R_GetFramerateCap
|
||||
|
||||
#if defined (USEASM) && !defined (NORUSEASM)//&& (!defined (_MSC_VER) || (_MSC_VER <= 1200))
|
||||
#define RUSEASM //MSC.NET can't patch itself
|
||||
#endif
|
||||
|
||||
// --------------------------------------------
|
||||
// assembly or c drawer routines for 8bpp/16bpp
|
||||
// --------------------------------------------
|
||||
|
@ -94,16 +90,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
|
|||
// Short and Tall sky drawer, for the current color mode
|
||||
void (*walldrawerfunc)(void);
|
||||
|
||||
boolean R_ASM = true;
|
||||
boolean R_486 = false;
|
||||
boolean R_586 = false;
|
||||
boolean R_MMX = false;
|
||||
boolean R_SSE = false;
|
||||
boolean R_3DNow = false;
|
||||
boolean R_MMXExt = false;
|
||||
boolean R_SSE2 = false;
|
||||
|
||||
|
||||
void SCR_SetMode(void)
|
||||
{
|
||||
if (dedicated)
|
||||
|
@ -132,28 +118,6 @@ void SCR_SetMode(void)
|
|||
walldrawerfunc = R_DrawWallColumn_8;
|
||||
twosmultipatchfunc = R_Draw2sMultiPatchColumn_8;
|
||||
twosmultipatchtransfunc = R_Draw2sMultiPatchTranslucentColumn_8;
|
||||
#ifdef RUSEASM
|
||||
if (R_ASM)
|
||||
{
|
||||
if (R_MMX)
|
||||
{
|
||||
colfunc = basecolfunc = R_DrawColumn_8_MMX;
|
||||
//shadecolfunc = R_DrawShadeColumn_8_ASM;
|
||||
//fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
|
||||
walldrawerfunc = R_DrawWallColumn_8_MMX;
|
||||
twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_MMX;
|
||||
spanfunc = basespanfunc = R_DrawSpan_8_MMX;
|
||||
}
|
||||
else
|
||||
{
|
||||
colfunc = basecolfunc = R_DrawColumn_8_ASM;
|
||||
//shadecolfunc = R_DrawShadeColumn_8_ASM;
|
||||
//fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
|
||||
walldrawerfunc = R_DrawWallColumn_8_ASM;
|
||||
twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_ASM;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* else if (vid.bpp > 1)
|
||||
{
|
||||
|
@ -181,50 +145,6 @@ void SCR_SetMode(void)
|
|||
//
|
||||
void SCR_Startup(void)
|
||||
{
|
||||
const CPUInfoFlags *RCpuInfo = I_CPUInfo();
|
||||
if (!M_CheckParm("-NOCPUID") && RCpuInfo)
|
||||
{
|
||||
#if defined (__i386__) || defined (_M_IX86) || defined (__WATCOMC__)
|
||||
R_486 = true;
|
||||
#endif
|
||||
if (RCpuInfo->RDTSC)
|
||||
R_586 = true;
|
||||
if (RCpuInfo->MMX)
|
||||
R_MMX = true;
|
||||
if (RCpuInfo->AMD3DNow)
|
||||
R_3DNow = true;
|
||||
if (RCpuInfo->MMXExt)
|
||||
R_MMXExt = true;
|
||||
if (RCpuInfo->SSE)
|
||||
R_SSE = true;
|
||||
if (RCpuInfo->SSE2)
|
||||
R_SSE2 = true;
|
||||
CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
|
||||
}
|
||||
|
||||
if (M_CheckParm("-noASM"))
|
||||
R_ASM = false;
|
||||
if (M_CheckParm("-486"))
|
||||
R_486 = true;
|
||||
if (M_CheckParm("-586"))
|
||||
R_586 = true;
|
||||
if (M_CheckParm("-MMX"))
|
||||
R_MMX = true;
|
||||
if (M_CheckParm("-3DNow"))
|
||||
R_3DNow = true;
|
||||
if (M_CheckParm("-MMXExt"))
|
||||
R_MMXExt = true;
|
||||
|
||||
if (M_CheckParm("-SSE"))
|
||||
R_SSE = true;
|
||||
if (M_CheckParm("-noSSE"))
|
||||
R_SSE = false;
|
||||
|
||||
if (M_CheckParm("-SSE2"))
|
||||
R_SSE2 = true;
|
||||
|
||||
M_SetupMemcpy();
|
||||
|
||||
if (dedicated)
|
||||
{
|
||||
V_Init();
|
||||
|
|
11
src/screen.h
11
src/screen.h
|
@ -138,17 +138,6 @@ extern void (*transtransfunc)(void);
|
|||
extern void (*twosmultipatchfunc)(void);
|
||||
extern void (*twosmultipatchtransfunc)(void);
|
||||
|
||||
// -----
|
||||
// CPUID
|
||||
// -----
|
||||
extern boolean R_ASM;
|
||||
extern boolean R_486;
|
||||
extern boolean R_586;
|
||||
extern boolean R_MMX;
|
||||
extern boolean R_3DNow;
|
||||
extern boolean R_MMXExt;
|
||||
extern boolean R_SSE2;
|
||||
|
||||
// ----------------
|
||||
// screen variables
|
||||
// ----------------
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
|
||||
NOHW=1
|
||||
NOHS=1
|
||||
NOASM=1
|
||||
|
||||
OPTS+=-DLINUX
|
||||
|
||||
|
|
|
@ -39,7 +39,6 @@ endif
|
|||
#
|
||||
ifdef SOLARIS
|
||||
NOIPX=1
|
||||
NOASM=1
|
||||
OPTS+=-DSOLARIS -DINADDR_NONE=INADDR_ANY -DBSD_COMP
|
||||
OPTS+=-I/usr/local/include -I/opt/sfw/include
|
||||
LDFLAGS+=-L/opt/sfw/lib
|
||||
|
|
|
@ -37,14 +37,6 @@ else
|
|||
endif
|
||||
endif
|
||||
|
||||
|
||||
#use the x86 asm code
|
||||
ifndef CYGWIN32
|
||||
ifndef NOASM
|
||||
USEASM=1
|
||||
endif
|
||||
endif
|
||||
|
||||
OBJS+=$(OBJDIR)/i_video.o $(OBJDIR)/dosstr.o $(OBJDIR)/endtxt.o $(OBJDIR)/hwsym_sdl.o
|
||||
|
||||
OPTS+=-DDIRECTFULLSCREEN -DHAVE_SDL
|
||||
|
|
|
@ -70,40 +70,6 @@ char logfilename[1024];
|
|||
typedef BOOL (WINAPI *p_IsDebuggerPresent)(VOID);
|
||||
#endif
|
||||
|
||||
#if defined (_WIN32)
|
||||
static inline VOID MakeCodeWritable(VOID)
|
||||
{
|
||||
#ifdef USEASM // Disable write-protection of code segment
|
||||
DWORD OldRights;
|
||||
const DWORD NewRights = PAGE_EXECUTE_READWRITE;
|
||||
PBYTE pBaseOfImage = (PBYTE)GetModuleHandle(NULL);
|
||||
PIMAGE_DOS_HEADER dosH =(PIMAGE_DOS_HEADER)pBaseOfImage;
|
||||
PIMAGE_NT_HEADERS ntH = (PIMAGE_NT_HEADERS)(pBaseOfImage + dosH->e_lfanew);
|
||||
PIMAGE_OPTIONAL_HEADER oH = (PIMAGE_OPTIONAL_HEADER)
|
||||
((PBYTE)ntH + sizeof (IMAGE_NT_SIGNATURE) + sizeof (IMAGE_FILE_HEADER));
|
||||
LPVOID pA = pBaseOfImage+oH->BaseOfCode;
|
||||
SIZE_T pS = oH->SizeOfCode;
|
||||
#if 1 // try to find the text section
|
||||
PIMAGE_SECTION_HEADER ntS = IMAGE_FIRST_SECTION (ntH);
|
||||
WORD s;
|
||||
for (s = 0; s < ntH->FileHeader.NumberOfSections; s++)
|
||||
{
|
||||
if (memcmp (ntS[s].Name, ".text\0\0", 8) == 0)
|
||||
{
|
||||
pA = pBaseOfImage+ntS[s].VirtualAddress;
|
||||
pS = ntS[s].Misc.VirtualSize;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!VirtualProtect(pA,pS,NewRights,&OldRights))
|
||||
I_Error("Could not make code writable\n");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
static void
|
||||
ChDirToExe (void)
|
||||
|
@ -185,7 +151,6 @@ int main(int argc, char **argv)
|
|||
#ifndef __MINGW32__
|
||||
prevExceptionFilter = SetUnhandledExceptionFilter(RecordExceptionInfo);
|
||||
#endif
|
||||
MakeCodeWritable();
|
||||
#endif
|
||||
|
||||
// startup SRB2
|
||||
|
|
|
@ -3910,69 +3910,6 @@ UINT32 I_GetFreeMem(UINT32 *total)
|
|||
#endif
|
||||
}
|
||||
|
||||
const CPUInfoFlags *I_CPUInfo(void)
|
||||
{
|
||||
#if defined (_WIN32)
|
||||
static CPUInfoFlags WIN_CPUInfo;
|
||||
SYSTEM_INFO SI;
|
||||
p_IsProcessorFeaturePresent pfnCPUID = (p_IsProcessorFeaturePresent)(LPVOID)GetProcAddress(GetModuleHandleA("kernel32.dll"), "IsProcessorFeaturePresent");
|
||||
|
||||
ZeroMemory(&WIN_CPUInfo,sizeof (WIN_CPUInfo));
|
||||
if (pfnCPUID)
|
||||
{
|
||||
WIN_CPUInfo.FPPE = pfnCPUID( 0); //PF_FLOATING_POINT_PRECISION_ERRATA
|
||||
WIN_CPUInfo.FPE = pfnCPUID( 1); //PF_FLOATING_POINT_EMULATED
|
||||
WIN_CPUInfo.cmpxchg = pfnCPUID( 2); //PF_COMPARE_EXCHANGE_DOUBLE
|
||||
WIN_CPUInfo.MMX = pfnCPUID( 3); //PF_MMX_INSTRUCTIONS_AVAILABLE
|
||||
WIN_CPUInfo.PPCMM64 = pfnCPUID( 4); //PF_PPC_MOVEMEM_64BIT_OK
|
||||
WIN_CPUInfo.ALPHAbyte = pfnCPUID( 5); //PF_ALPHA_BYTE_INSTRUCTIONS
|
||||
WIN_CPUInfo.SSE = pfnCPUID( 6); //PF_XMMI_INSTRUCTIONS_AVAILABLE
|
||||
WIN_CPUInfo.AMD3DNow = pfnCPUID( 7); //PF_3DNOW_INSTRUCTIONS_AVAILABLE
|
||||
WIN_CPUInfo.RDTSC = pfnCPUID( 8); //PF_RDTSC_INSTRUCTION_AVAILABLE
|
||||
WIN_CPUInfo.PAE = pfnCPUID( 9); //PF_PAE_ENABLED
|
||||
WIN_CPUInfo.SSE2 = pfnCPUID(10); //PF_XMMI64_INSTRUCTIONS_AVAILABLE
|
||||
//WIN_CPUInfo.blank = pfnCPUID(11); //PF_SSE_DAZ_MODE_AVAILABLE
|
||||
WIN_CPUInfo.DEP = pfnCPUID(12); //PF_NX_ENABLED
|
||||
WIN_CPUInfo.SSE3 = pfnCPUID(13); //PF_SSE3_INSTRUCTIONS_AVAILABLE
|
||||
WIN_CPUInfo.cmpxchg16b = pfnCPUID(14); //PF_COMPARE_EXCHANGE128
|
||||
WIN_CPUInfo.cmp8xchg16 = pfnCPUID(15); //PF_COMPARE64_EXCHANGE128
|
||||
WIN_CPUInfo.PFC = pfnCPUID(16); //PF_CHANNELS_ENABLED
|
||||
}
|
||||
#ifdef HAVE_SDLCPUINFO
|
||||
else
|
||||
{
|
||||
WIN_CPUInfo.RDTSC = SDL_HasRDTSC();
|
||||
WIN_CPUInfo.MMX = SDL_HasMMX();
|
||||
WIN_CPUInfo.AMD3DNow = SDL_Has3DNow();
|
||||
WIN_CPUInfo.SSE = SDL_HasSSE();
|
||||
WIN_CPUInfo.SSE2 = SDL_HasSSE2();
|
||||
WIN_CPUInfo.AltiVec = SDL_HasAltiVec();
|
||||
}
|
||||
WIN_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
|
||||
WIN_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
|
||||
#endif
|
||||
GetSystemInfo(&SI);
|
||||
WIN_CPUInfo.CPUs = SI.dwNumberOfProcessors;
|
||||
WIN_CPUInfo.IA64 = (SI.dwProcessorType == 2200); // PROCESSOR_INTEL_IA64
|
||||
WIN_CPUInfo.AMD64 = (SI.dwProcessorType == 8664); // PROCESSOR_AMD_X8664
|
||||
return &WIN_CPUInfo;
|
||||
#elif defined (HAVE_SDLCPUINFO)
|
||||
static CPUInfoFlags SDL_CPUInfo;
|
||||
memset(&SDL_CPUInfo,0,sizeof (CPUInfoFlags));
|
||||
SDL_CPUInfo.RDTSC = SDL_HasRDTSC();
|
||||
SDL_CPUInfo.MMX = SDL_HasMMX();
|
||||
SDL_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
|
||||
SDL_CPUInfo.AMD3DNow = SDL_Has3DNow();
|
||||
SDL_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
|
||||
SDL_CPUInfo.SSE = SDL_HasSSE();
|
||||
SDL_CPUInfo.SSE2 = SDL_HasSSE2();
|
||||
SDL_CPUInfo.AltiVec = SDL_HasAltiVec();
|
||||
return &SDL_CPUInfo;
|
||||
#else
|
||||
return NULL; /// \todo CPUID asm
|
||||
#endif
|
||||
}
|
||||
|
||||
// note CPUAFFINITY code used to reside here
|
||||
void I_RegisterSysCommands(void) {}
|
||||
#endif
|
||||
|
|
957
src/tmap.nas
957
src/tmap.nas
|
@ -1,957 +0,0 @@
|
|||
;; SONIC ROBO BLAST 2
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
;; Copyright (C) 1999-2018 by Sonic Team Junior.
|
||||
;;
|
||||
;; This program is free software distributed under the
|
||||
;; terms of the GNU General Public License, version 2.
|
||||
;; See the 'LICENSE' file for more details.
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; FILE:
|
||||
;; tmap.nas
|
||||
;; DESCRIPTION:
|
||||
;; Assembler optimised rendering code for software mode.
|
||||
;; Draw wall columns.
|
||||
|
||||
|
||||
[BITS 32]
|
||||
|
||||
%define FRACBITS 16
|
||||
%define TRANSPARENTPIXEL 247
|
||||
|
||||
%ifdef LINUX
|
||||
%macro cextern 1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%else
|
||||
%macro cextern 1
|
||||
%define %1 _%1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
%define %1 _%1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
; The viddef_s structure. We only need the width field.
|
||||
struc viddef_s
|
||||
resb 12
|
||||
.width: resb 4
|
||||
resb 44
|
||||
endstruc
|
||||
|
||||
;; externs
|
||||
;; columns
|
||||
cextern dc_x
|
||||
cextern dc_yl
|
||||
cextern dc_yh
|
||||
cextern ylookup
|
||||
cextern columnofs
|
||||
cextern dc_source
|
||||
cextern dc_texturemid
|
||||
cextern dc_texheight
|
||||
cextern dc_iscale
|
||||
cextern dc_hires
|
||||
cextern centery
|
||||
cextern centeryfrac
|
||||
cextern dc_colormap
|
||||
cextern dc_transmap
|
||||
cextern colormaps
|
||||
cextern vid
|
||||
cextern topleft
|
||||
|
||||
; DELME
|
||||
cextern R_DrawColumn_8
|
||||
|
||||
; polygon edge rasterizer
|
||||
cextern prastertab
|
||||
|
||||
[SECTION .data]
|
||||
|
||||
;;.align 4
|
||||
loopcount dd 0
|
||||
pixelcount dd 0
|
||||
tystep dd 0
|
||||
|
||||
[SECTION .text]
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_DrawColumn : 8bpp column drawer
|
||||
;;
|
||||
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
|
||||
;; Revised by G. Dick July 2010 to support the intervening twelve years'
|
||||
;; worth of changes to the renderer. Since I only vaguely know what I'm
|
||||
;; doing, this is probably rather suboptimal. Help appreciated!
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; fracstep, vid.width in memory
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = heightmask
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = frac
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_DrawColumn_8_ASM
|
||||
; align 16
|
||||
R_DrawColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov edi,[ylookup+ebp*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx,[dc_yh]
|
||||
add ecx,1
|
||||
sub ecx,ebp ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
;;
|
||||
;; fracstep = dc_iscale; // But we just use [dc_iscale]
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
mov eax,ebp ;; dc_yl
|
||||
shl eax,FRACBITS
|
||||
sub eax,[centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax,edx,FRACBITS
|
||||
add eax,[dc_texturemid]
|
||||
mov ebp,eax ;; ebp = frac
|
||||
|
||||
mov ebx,[dc_colormap]
|
||||
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires],0x01
|
||||
jz .texheightcheck
|
||||
xor ebp,ebp
|
||||
|
||||
;;
|
||||
;; Check for power of two
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx,[dc_texheight]
|
||||
sub edx,1 ;; edx = heightmask
|
||||
test edx,[dc_texheight]
|
||||
jnz .notpowertwo
|
||||
|
||||
test ecx,0x01 ;; Test for odd no. pixels
|
||||
jnz .odd
|
||||
|
||||
;;
|
||||
;; Texture height is a power of two, so we get modular arithmetic by
|
||||
;; masking
|
||||
;;
|
||||
.powertwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
.odd:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
|
||||
sub ecx,2 ;; count -= 2
|
||||
jg .powertwo
|
||||
|
||||
jmp .done
|
||||
|
||||
.notpowertwo:
|
||||
add edx,1
|
||||
shl edx,FRACBITS
|
||||
test ebp,ebp
|
||||
jns .notpowtwoloop
|
||||
|
||||
.makefracpos:
|
||||
add ebp,edx ;; frac is negative; make it positive
|
||||
js .makefracpos
|
||||
|
||||
.notpowtwoloop:
|
||||
cmp ebp,edx ;; Reduce mod height
|
||||
jl .writenonpowtwo
|
||||
sub ebp,edx
|
||||
jmp .notpowtwoloop
|
||||
|
||||
.writenonpowtwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part.
|
||||
mov bl,[esi + eax] ;; ebx = colormap + texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
movzx eax,byte [ebx] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
sub ecx,1
|
||||
jnz .notpowtwoloop
|
||||
|
||||
;;
|
||||
|
||||
.done:
|
||||
pop ebx ;; restore register variables
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp ;; restore caller's stack frame pointer
|
||||
ret
|
||||
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
|
||||
;; pixels.
|
||||
;;
|
||||
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
|
||||
;; Revised by G. Dick July 2010 to support the intervening twelve years'
|
||||
;; worth of changes to the renderer. Since I only vaguely know what I'm
|
||||
;; doing, this is probably rather suboptimal. Help appreciated!
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; fracstep, vid.width in memory
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = heightmask
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = frac
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_Draw2sMultiPatchColumn_8_ASM
|
||||
; align 16
|
||||
R_Draw2sMultiPatchColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov edi,[ylookup+ebp*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx,[dc_yh]
|
||||
add ecx,1
|
||||
sub ecx,ebp ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
;;
|
||||
;; fracstep = dc_iscale; // But we just use [dc_iscale]
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
mov eax,ebp ;; dc_yl
|
||||
shl eax,FRACBITS
|
||||
sub eax,[centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax,edx,FRACBITS
|
||||
add eax,[dc_texturemid]
|
||||
mov ebp,eax ;; ebp = frac
|
||||
|
||||
mov ebx,[dc_colormap]
|
||||
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires],0x01
|
||||
jz .texheightcheck
|
||||
xor ebp,ebp
|
||||
|
||||
;;
|
||||
;; Check for power of two
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx,[dc_texheight]
|
||||
sub edx,1 ;; edx = heightmask
|
||||
test edx,[dc_texheight]
|
||||
jnz .notpowertwo
|
||||
|
||||
test ecx,0x01 ;; Test for odd no. pixels
|
||||
jnz .odd
|
||||
|
||||
;;
|
||||
;; Texture height is a power of two, so we get modular arithmetic by
|
||||
;; masking
|
||||
;;
|
||||
.powertwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
|
||||
je .nextpowtwoeven ;; If so, advance.
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
.nextpowtwoeven:
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
.odd:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
|
||||
je .nextpowtwoodd ;; If so, advance.
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
.nextpowtwoodd:
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
|
||||
sub ecx,2 ;; count -= 2
|
||||
jg .powertwo
|
||||
|
||||
jmp .done
|
||||
|
||||
.notpowertwo:
|
||||
add edx,1
|
||||
shl edx,FRACBITS
|
||||
test ebp,ebp
|
||||
jns .notpowtwoloop
|
||||
|
||||
.makefracpos:
|
||||
add ebp,edx ;; frac is negative; make it positive
|
||||
js .makefracpos
|
||||
|
||||
.notpowtwoloop:
|
||||
cmp ebp,edx ;; Reduce mod height
|
||||
jl .writenonpowtwo
|
||||
sub ebp,edx
|
||||
jmp .notpowtwoloop
|
||||
|
||||
.writenonpowtwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part.
|
||||
mov bl,[esi + eax] ;; ebx = colormap + texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
cmp bl,TRANSPARENTPIXEL ;; Is pixel transparent?
|
||||
je .nextnonpowtwo ;; If so, advance.
|
||||
movzx eax,byte [ebx] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
.nextnonpowtwo:
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
sub ecx,1
|
||||
jnz .notpowtwoloop
|
||||
|
||||
;;
|
||||
|
||||
.done:
|
||||
pop ebx ;; restore register variables
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp ;; restore caller's stack frame pointer
|
||||
ret
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;; R_DrawTranslucentColumnA_8
|
||||
;;
|
||||
;; Vertical column texture drawer, with transparency. Replaces Doom2's
|
||||
;; 'fuzz' effect, which was not so beautiful.
|
||||
;; Transparency is always impressive in some way, don't know why...
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_DrawTranslucentColumn_8_ASM
|
||||
R_DrawTranslucentColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov ebx,ebp
|
||||
mov edi,[ylookup+ebx*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov eax,[dc_yh]
|
||||
inc eax
|
||||
sub eax,ebp ;; pixel count
|
||||
mov [pixelcount],eax ;; save for final pixel
|
||||
jle near vtdone ;; nothing to scale
|
||||
;;
|
||||
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
|
||||
;;
|
||||
mov ecx,[dc_iscale] ;; fracstep
|
||||
mov eax,[centery]
|
||||
sub eax,ebp
|
||||
imul eax,ecx
|
||||
mov edx,[dc_texturemid]
|
||||
sub edx,eax
|
||||
mov ebx,edx
|
||||
|
||||
shr ebx,16 ;; frac int.
|
||||
and ebx,0x7f
|
||||
shl edx,16 ;; y frac up
|
||||
|
||||
mov ebp,ecx
|
||||
shl ebp,16 ;; fracstep f. up
|
||||
shr ecx,16 ;; fracstep i. ->cl
|
||||
and cl,0x7f
|
||||
push cx
|
||||
mov ecx,edx
|
||||
pop cx
|
||||
mov edx,[dc_colormap]
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; lets rock :) !
|
||||
;;
|
||||
mov eax,[pixelcount]
|
||||
shr eax,0x2
|
||||
test byte [pixelcount],0x3
|
||||
mov ch,al ;; quad count
|
||||
mov eax,[dc_transmap]
|
||||
je vt4quadloop
|
||||
;;
|
||||
;; do un-even pixel
|
||||
;;
|
||||
test byte [pixelcount],0x1
|
||||
je trf2
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add ecx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
pf: add edi,0x12345678
|
||||
;;
|
||||
;; do two non-quad-aligned pixels
|
||||
;;
|
||||
trf2: test byte [pixelcount],0x2
|
||||
je trf3
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add ecx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
pg: add edi,0x12345678
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add ecx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
ph: add edi,0x12345678
|
||||
;;
|
||||
;; test if there was at least 4 pixels
|
||||
;;
|
||||
trf3: test ch,0xff ;; test quad count
|
||||
je near vtdone
|
||||
|
||||
;;
|
||||
;; ebp : ystep frac. upper 24 bits
|
||||
;; edx : y frac. upper 24 bits
|
||||
;; ebx : y i. lower 7 bits, masked for index
|
||||
;; ecx : ch = counter, cl = y step i.
|
||||
;; eax : colormap aligned 256
|
||||
;; esi : source texture column
|
||||
;; edi : dest screen
|
||||
;;
|
||||
vt4quadloop:
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [tystep],ebp
|
||||
pi: add edi,0x12345678
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
pj: sub edi,0x12345678
|
||||
mov ebp,edi
|
||||
pk: sub edi,0x12345678
|
||||
jmp short inloop
|
||||
align 4
|
||||
vtquadloop:
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q1: add ebp,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
inloop:
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q2: add edi,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [ebp+0x0],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q3: add ebp,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q4: add edi,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [ebp],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
dec ch
|
||||
jne vtquadloop
|
||||
vtdone:
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;; R_DrawShadeColumn
|
||||
;;
|
||||
;; for smoke..etc.. test.
|
||||
;;----------------------------------------------------------------------
|
||||
cglobal R_DrawShadeColumn_8_ASM
|
||||
R_DrawShadeColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov ebx,ebp
|
||||
mov edi,[ylookup+ebx*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov eax,[dc_yh]
|
||||
inc eax
|
||||
sub eax,ebp ;; pixel count
|
||||
mov [pixelcount],eax ;; save for final pixel
|
||||
jle near shdone ;; nothing to scale
|
||||
;;
|
||||
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
|
||||
;;
|
||||
mov ecx,[dc_iscale] ;; fracstep
|
||||
mov eax,[centery]
|
||||
sub eax,ebp
|
||||
imul eax,ecx
|
||||
mov edx,[dc_texturemid]
|
||||
sub edx,eax
|
||||
mov ebx,edx
|
||||
shr ebx,16 ;; frac int.
|
||||
and ebx,byte +0x7f
|
||||
shl edx,16 ;; y frac up
|
||||
|
||||
mov ebp,ecx
|
||||
shl ebp,16 ;; fracstep f. up
|
||||
shr ecx,16 ;; fracstep i. ->cl
|
||||
and cl,0x7f
|
||||
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; lets rock :) !
|
||||
;;
|
||||
mov eax,[pixelcount]
|
||||
mov dh,al
|
||||
shr eax,2
|
||||
mov ch,al ;; quad count
|
||||
mov eax,[colormaps]
|
||||
test dh,3
|
||||
je sh4quadloop
|
||||
;;
|
||||
;; do un-even pixel
|
||||
;;
|
||||
test dh,0x1
|
||||
je shf2
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add edx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov [edi],dl
|
||||
pl: add edi,0x12345678
|
||||
;;
|
||||
;; do two non-quad-aligned pixels
|
||||
;;
|
||||
shf2:
|
||||
test dh,0x2
|
||||
je shf3
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add edx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov [edi],dl
|
||||
pm: add edi,0x12345678
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add edx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov [edi],dl
|
||||
pn: add edi,0x12345678
|
||||
;;
|
||||
;; test if there was at least 4 pixels
|
||||
;;
|
||||
shf3:
|
||||
test ch,0xff ;; test quad count
|
||||
je near shdone
|
||||
|
||||
;;
|
||||
;; ebp : ystep frac. upper 24 bits
|
||||
;; edx : y frac. upper 24 bits
|
||||
;; ebx : y i. lower 7 bits, masked for index
|
||||
;; ecx : ch = counter, cl = y step i.
|
||||
;; eax : colormap aligned 256
|
||||
;; esi : source texture column
|
||||
;; edi : dest screen
|
||||
;;
|
||||
sh4quadloop:
|
||||
mov dh,0x7f ;; prep mask
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [tystep],ebp
|
||||
po: add edi,0x12345678
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
pp: sub edi,0x12345678
|
||||
mov ebp,edi
|
||||
pq: sub edi,0x12345678
|
||||
jmp short shinloop
|
||||
|
||||
align 4
|
||||
shquadloop:
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q5: add ebp,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
shinloop:
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q6: add edi,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [ebp],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q7: add ebp,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q8: add edi,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [ebp],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
dec ch
|
||||
jne shquadloop
|
||||
|
||||
shdone:
|
||||
pop ebx ;; restore register variables
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp ;; restore caller's stack frame pointer
|
||||
ret
|
||||
|
||||
|
||||
;; ========================================================================
|
||||
;; Rasterization of the segments of a LINEAR polygne textur of manire.
|
||||
;; It is thus a question of interpolating coordinate them at the edges of texture in
|
||||
;; the time that the X-coordinates minx/maxx for each line.
|
||||
;; the argument ' dir' indicates which edges of texture are Interpol?:
|
||||
;; 0: segments associs at edge TOP? and BOTTOM? (constant TY)
|
||||
;; 1: segments associs at the LEFT and RIGHT edge (constant TX)
|
||||
;; ========================================================================
|
||||
;;
|
||||
;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
|
||||
;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
|
||||
;;
|
||||
;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
|
||||
;;
|
||||
;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
|
||||
;;
|
||||
;;
|
||||
;; Uses: extern struct rastery *_rastertab;
|
||||
;;
|
||||
|
||||
MINX EQU 0
|
||||
MAXX EQU 4
|
||||
TX1 EQU 8
|
||||
TY1 EQU 12
|
||||
TX2 EQU 16
|
||||
TY2 EQU 20
|
||||
RASTERY_SIZEOF EQU 24
|
||||
|
||||
cglobal rasterize_segment_tex
|
||||
rasterize_segment_tex:
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
|
||||
sub esp,byte +0x8 ;; allocate the local variables
|
||||
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
o16 mov ax,es
|
||||
push eax
|
||||
|
||||
;; #define DX [ebp-4]
|
||||
;; #define TD [ebp-8]
|
||||
|
||||
mov eax,[ebp+0xc] ;; y1
|
||||
mov ebx,[ebp+0x14] ;; y2
|
||||
cmp ebx,eax
|
||||
je near .L_finished ;; special (y1==y2) segment horizontal, exit!
|
||||
|
||||
jg near .L_rasterize_right
|
||||
|
||||
;;rasterize_left: ;; one rasterize a segment LEFT of the polygne
|
||||
|
||||
mov ecx,eax
|
||||
sub ecx,ebx
|
||||
inc ecx ;; y1-y2+1
|
||||
|
||||
mov eax,RASTERY_SIZEOF
|
||||
mul ebx ;; * y2
|
||||
mov esi,[prastertab]
|
||||
add esi,eax ;; point into rastertab[y2]
|
||||
|
||||
mov eax,[ebp+0x8] ;; ARG1
|
||||
sub eax,[ebp+0x10] ;; ARG3
|
||||
shl eax,0x10 ;; ((x1-x2)<<PRE) ...
|
||||
cdq
|
||||
idiv ecx ;; dx = ... / (y1-y2+1)
|
||||
mov [ebp-0x4],eax ;; DX
|
||||
|
||||
mov eax,[ebp+0x18] ;; ARG5
|
||||
sub eax,[ebp+0x1c] ;; ARG6
|
||||
shl eax,0x10
|
||||
cdq
|
||||
idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
|
||||
mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
|
||||
|
||||
mov eax,[ebp+0x10] ;; ARG3
|
||||
shl eax,0x10 ;; x = x2<<PRE
|
||||
|
||||
mov ebx,[ebp+0x1c] ;; ARG6
|
||||
shl ebx,0x10 ;; tx = tx2<<PRE d0
|
||||
;; ty = ty2<<PRE d1
|
||||
mov edx,[ebp+0x20] ;; ARG7
|
||||
shl edx,0x10 ;; ty = ty<<PRE d0
|
||||
;; tx = tx<<PRE d1
|
||||
push ebp
|
||||
mov edi,[ebp-0x4] ;; DX
|
||||
cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ?
|
||||
|
||||
mov ebp,[ebp-0x8] ;; TD
|
||||
je .L_rleft_h_loop
|
||||
;;
|
||||
;; TY varies, TX is constant
|
||||
;;
|
||||
.L_rleft_v_loop:
|
||||
mov [esi+MINX],eax ;; rastertab[y].minx = x
|
||||
add ebx,ebp
|
||||
mov [esi+TX1],edx ;; .tx1 = tx
|
||||
add eax,edi
|
||||
mov [esi+TY1],ebx ;; .ty1 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // ty += tdy
|
||||
|
||||
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
|
||||
dec ecx
|
||||
jne .L_rleft_v_loop
|
||||
pop ebp
|
||||
jmp .L_finished
|
||||
;;
|
||||
;; TX varies, TY is constant
|
||||
;;
|
||||
.L_rleft_h_loop:
|
||||
mov [esi+MINX],eax ;; rastertab[y].minx = x
|
||||
add eax,edi
|
||||
mov [esi+TX1],ebx ;; .tx1 = tx
|
||||
add ebx,ebp
|
||||
mov [esi+TY1],edx ;; .ty1 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // tx += tdx
|
||||
|
||||
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
|
||||
dec ecx
|
||||
jne .L_rleft_h_loop
|
||||
pop ebp
|
||||
jmp .L_finished
|
||||
;;
|
||||
;; one rasterize a segment LINE of the polygne
|
||||
;;
|
||||
.L_rasterize_right:
|
||||
mov ecx,ebx
|
||||
sub ecx,eax
|
||||
inc ecx ;; y2-y1+1
|
||||
|
||||
mov ebx,RASTERY_SIZEOF
|
||||
mul ebx ;; * y1
|
||||
mov esi,[prastertab]
|
||||
add esi,eax ;; point into rastertab[y1]
|
||||
|
||||
mov eax,[ebp+0x10] ;; ARG3
|
||||
sub eax,[ebp+0x8] ;; ARG1
|
||||
shl eax,0x10 ;; ((x2-x1)<<PRE) ...
|
||||
cdq
|
||||
idiv ecx ;; dx = ... / (y2-y1+1)
|
||||
mov [ebp-0x4],eax ;; DX
|
||||
|
||||
mov eax,[ebp+0x1c] ;; ARG6
|
||||
sub eax,[ebp+0x18] ;; ARG5
|
||||
shl eax,0x10
|
||||
cdq
|
||||
idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
|
||||
mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
|
||||
|
||||
mov eax,[ebp+0x8] ;; ARG1
|
||||
shl eax,0x10 ;; x = x1<<PRE
|
||||
|
||||
mov ebx,[ebp+0x18] ;; ARG5
|
||||
shl ebx,0x10 ;; tx = tx1<<PRE d0
|
||||
;; ty = ty1<<PRE d1
|
||||
mov edx,[ebp+0x20] ;; ARG7
|
||||
shl edx,0x10 ;; ty = ty<<PRE d0
|
||||
;; tx = tx<<PRE d1
|
||||
push ebp
|
||||
mov edi,[ebp-0x4] ;; DX
|
||||
|
||||
cmp dword [ebp+0x24], 0 ;; direction ?
|
||||
|
||||
mov ebp,[ebp-0x8] ;; TD
|
||||
je .L_rright_h_loop
|
||||
;;
|
||||
;; TY varies, TX is constant
|
||||
;;
|
||||
.L_rright_v_loop:
|
||||
|
||||
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
|
||||
add ebx,ebp
|
||||
mov [esi+TX2],edx ;; .tx2 = tx
|
||||
add eax,edi
|
||||
mov [esi+TY2],ebx ;; .ty2 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // ty += tdy
|
||||
|
||||
add esi,RASTERY_SIZEOF
|
||||
dec ecx
|
||||
jne .L_rright_v_loop
|
||||
|
||||
pop ebp
|
||||
|
||||
jmp short .L_finished
|
||||
;;
|
||||
;; TX varies, TY is constant
|
||||
;;
|
||||
.L_rright_h_loop:
|
||||
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
|
||||
add eax,edi
|
||||
mov [esi+TX2],ebx ;; .tx2 = tx
|
||||
add ebx,ebp
|
||||
mov [esi+TY2],edx ;; .ty2 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // tx += tdx
|
||||
|
||||
add esi,RASTERY_SIZEOF
|
||||
dec ecx
|
||||
jne .L_rright_h_loop
|
||||
|
||||
pop ebp
|
||||
|
||||
.L_finished:
|
||||
pop eax
|
||||
o16 mov es,ax
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
|
||||
mov esp,ebp
|
||||
pop ebp
|
||||
ret
|
1587
src/tmap.s
1587
src/tmap.s
File diff suppressed because it is too large
Load diff
322
src/tmap_asm.s
322
src/tmap_asm.s
|
@ -1,322 +0,0 @@
|
|||
// SONIC ROBO BLAST 2
|
||||
//-----------------------------------------------------------------------------
|
||||
// Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
// Copyright (C) 1999-2018 by Sonic Team Junior.
|
||||
//
|
||||
// This program is free software distributed under the
|
||||
// terms of the GNU General Public License, version 2.
|
||||
// See the 'LICENSE' file for more details.
|
||||
//-----------------------------------------------------------------------------
|
||||
/// \file tmap_asm.s
|
||||
/// \brief ???
|
||||
|
||||
//.comm _dc_colormap,4
|
||||
//.comm _dc_x,4
|
||||
//.comm _dc_yl,4
|
||||
//.comm _dc_yh,4
|
||||
//.comm _dc_iscale,4
|
||||
//.comm _dc_texturemid,4
|
||||
//.comm _dc_source,4
|
||||
//.comm _ylookup,4
|
||||
//.comm _columnofs,4
|
||||
//.comm _loopcount,4
|
||||
//.comm _pixelcount,4
|
||||
.data
|
||||
_pixelcount:
|
||||
.long 0x00000000
|
||||
_loopcount:
|
||||
.long 0x00000000
|
||||
.align 8
|
||||
_mmxcomm:
|
||||
.long 0x00000000
|
||||
.text
|
||||
|
||||
.align 4
|
||||
.globl _R_DrawColumn8_NOMMX
|
||||
_R_DrawColumn8_NOMMX:
|
||||
pushl %ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
movl _dc_yl,%edx
|
||||
movl _dc_yh,%eax
|
||||
subl %edx,%eax
|
||||
leal 1(%eax),%ebx
|
||||
testl %ebx,%ebx
|
||||
jle rdc8ndone
|
||||
movl _dc_x,%eax
|
||||
movl _ylookup, %edi
|
||||
movl (%edi,%edx,4),%esi
|
||||
movl _columnofs, %edi
|
||||
addl (%edi,%eax,4),%esi
|
||||
movl _dc_iscale,%edi
|
||||
movl %edx,%eax
|
||||
imull %edi,%eax
|
||||
movl _dc_texturemid,%ecx
|
||||
addl %eax,%ecx
|
||||
|
||||
movl _dc_source,%ebp
|
||||
xorl %edx, %edx
|
||||
subl $0x12345678, %esi
|
||||
.globl rdc8nwidth1
|
||||
rdc8nwidth1:
|
||||
.align 4,0x90
|
||||
rdc8nloop:
|
||||
movl %ecx,%eax
|
||||
shrl $16,%eax
|
||||
addl %edi,%ecx
|
||||
andl $127,%eax
|
||||
addl $0x12345678,%esi
|
||||
.globl rdc8nwidth2
|
||||
rdc8nwidth2:
|
||||
movb (%eax,%ebp),%dl
|
||||
movl _dc_colormap,%eax
|
||||
movb (%eax,%edx),%al
|
||||
movb %al,(%esi)
|
||||
decl %ebx
|
||||
jne rdc8nloop
|
||||
rdc8ndone:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebp
|
||||
ret
|
||||
|
||||
//
|
||||
// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
|
||||
// By ES 1998/08/01
|
||||
//
|
||||
|
||||
.globl _R_DrawColumn_8_Pentium
|
||||
_R_DrawColumn_8_Pentium:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl _dc_yl,%eax // Top pixel
|
||||
movl _dc_yh,%ebx // Bottom pixel
|
||||
movl _ylookup, %edi
|
||||
movl (%edi,%ebx,4),%ecx
|
||||
subl %eax,%ebx // ebx=number of pixels-1
|
||||
jl rdc8pdone // no pixel to draw, done
|
||||
jnz rdc8pmany
|
||||
movl _dc_x,%edx // Special case: only one pixel
|
||||
movl _columnofs, %edi
|
||||
addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
|
||||
movl _dc_iscale,%esi
|
||||
imull %esi,%eax
|
||||
movl _dc_texturemid,%edi
|
||||
addl %eax,%edi // texture index in edi
|
||||
movl _dc_colormap,%edx
|
||||
shrl $16, %edi
|
||||
movl _dc_source,%ebp
|
||||
andl $127,%edi
|
||||
movb (%edi,%ebp),%dl // read texture pixel
|
||||
movb (%edx),%al // lookup for light
|
||||
movb %al,0(%ecx) // write it
|
||||
jmp rdc8pdone // done!
|
||||
.align 4, 0x90
|
||||
rdc8pmany: // draw >1 pixel
|
||||
movl _dc_x,%edx
|
||||
movl _columnofs, %edi
|
||||
movl (%edi,%edx,4),%edx
|
||||
leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom
|
||||
.globl rdc8pwidth5
|
||||
rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH
|
||||
movl _dc_iscale,%edx // edx = fracstep
|
||||
imull %edx,%eax
|
||||
shll $9, %edx // fixme: Should get 7.25 fix as input
|
||||
movl _dc_texturemid,%ecx
|
||||
addl %eax,%ecx // ecx = frac
|
||||
movl _dc_colormap,%eax // eax = lighting/special effects LUT
|
||||
shll $9, %ecx
|
||||
movl _dc_source,%esi // esi = source ptr
|
||||
|
||||
imull $0x12345678, %ebx // ebx = negative offset to pixel
|
||||
.globl rdc8pwidth6
|
||||
rdc8pwidth6: // DeadBeef = -SCREENWIDTH
|
||||
|
||||
// Begin the calculation of the two first pixels
|
||||
leal (%ecx, %edx), %ebp
|
||||
shrl $25, %ecx
|
||||
movb (%esi, %ecx), %al
|
||||
leal (%edx, %ebp), %ecx
|
||||
shrl $25, %ebp
|
||||
movb (%eax), %dl
|
||||
|
||||
// The main loop
|
||||
rdc8ploop:
|
||||
movb (%esi,%ebp), %al // load 1
|
||||
leal (%ecx, %edx), %ebp // calc frac 3
|
||||
|
||||
shrl $25, %ecx // shift frac 2
|
||||
movb %dl, 0x12345678(%edi, %ebx)// store 0
|
||||
.globl rdc8pwidth1
|
||||
rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH
|
||||
|
||||
movb (%eax), %al // lookup 1
|
||||
|
||||
movb %al, 0x12345678(%edi, %ebx)// store 1
|
||||
.globl rdc8pwidth2
|
||||
rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH
|
||||
movb (%esi, %ecx), %al // load 2
|
||||
|
||||
leal (%ebp, %edx), %ecx // calc frac 4
|
||||
|
||||
shrl $25, %ebp // shift frac 3
|
||||
movb (%eax), %dl // lookup 2
|
||||
|
||||
addl $0x12345678, %ebx // counter
|
||||
.globl rdc8pwidth3
|
||||
rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH
|
||||
jl rdc8ploop // loop
|
||||
|
||||
// End of loop. Write extra pixel or just exit.
|
||||
jnz rdc8pdone
|
||||
movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
|
||||
.globl rdc8pwidth4
|
||||
rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH
|
||||
|
||||
rdc8pdone:
|
||||
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
|
||||
//
|
||||
// MMX asm version, optimised for K6
|
||||
// By ES 1998/07/05
|
||||
//
|
||||
|
||||
.globl _R_DrawColumn_8_K6_MMX
|
||||
_R_DrawColumn_8_K6_MMX:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
|
||||
andl $7,%eax
|
||||
addl $8,%eax
|
||||
movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
|
||||
subl %eax,%esp
|
||||
|
||||
movl _dc_yl,%edx // Top pixel
|
||||
movl _dc_yh,%ebx // Bottom pixel
|
||||
movl _ylookup, %edi
|
||||
movl (%edi,%ebx,4),%ecx
|
||||
subl %edx,%ebx // ebx=number of pixels-1
|
||||
jl 0x12345678 // no pixel to draw, done
|
||||
.globl rdc8moffs1
|
||||
rdc8moffs1:
|
||||
jnz rdc8mmany
|
||||
movl _dc_x,%eax // Special case: only one pixel
|
||||
movl _columnofs, %edi
|
||||
addl (%edi,%eax,4),%ecx // dest pixel at (%ecx)
|
||||
movl _dc_iscale,%esi
|
||||
imull %esi,%edx
|
||||
movl _dc_texturemid,%edi
|
||||
addl %edx,%edi // texture index in edi
|
||||
movl _dc_colormap,%edx
|
||||
shrl $16, %edi
|
||||
movl _dc_source,%ebp
|
||||
andl $127,%edi
|
||||
movb (%edi,%ebp),%dl // read texture pixel
|
||||
movb (%edx),%al // lookup for light
|
||||
movb %al,0(%ecx) // write it
|
||||
jmp rdc8mdone // done!
|
||||
.globl rdc8moffs2
|
||||
rdc8moffs2:
|
||||
.align 4, 0x90
|
||||
rdc8mmany: // draw >1 pixel
|
||||
movl _dc_x,%eax
|
||||
movl _columnofs, %edi
|
||||
movl (%edi,%eax,4),%eax
|
||||
leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom
|
||||
.globl rdc8mwidth3
|
||||
rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH
|
||||
movl _dc_iscale,%ecx // ecx = fracstep
|
||||
imull %ecx,%edx
|
||||
shll $9, %ecx // fixme: Should get 7.25 fix as input
|
||||
movl _dc_texturemid,%eax
|
||||
addl %edx,%eax // eax = frac
|
||||
movl _dc_colormap,%edx // edx = lighting/special effects LUT
|
||||
shll $9, %eax
|
||||
leal (%ecx, %ecx), %edi
|
||||
movl _dc_source,%ebp // ebp = source ptr
|
||||
movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs
|
||||
|
||||
imull $0x12345678, %ebx // ebx = negative offset to pixel
|
||||
.globl rdc8mwidth5
|
||||
rdc8mwidth5: // DeadBeef = -SCREENWIDTH
|
||||
|
||||
movl %edi, 4(%esp)
|
||||
leal (%eax, %ecx), %edi
|
||||
movq 0(%esp), %mm1 // fracstep:fracstep in mm1
|
||||
movl %eax, 0(%esp)
|
||||
shrl $25, %eax
|
||||
movl %edi, 4(%esp)
|
||||
movzbl (%ebp, %eax), %eax
|
||||
movq 0(%esp), %mm0 // frac:frac in mm0
|
||||
|
||||
paddd %mm1, %mm0
|
||||
shrl $25, %edi
|
||||
movq %mm0, %mm2
|
||||
psrld $25, %mm2 // texture index in mm2
|
||||
paddd %mm1, %mm0
|
||||
movq %mm2, 0(%esp)
|
||||
|
||||
.globl rdc8mloop
|
||||
rdc8mloop: // The main loop
|
||||
movq %mm0, %mm2 // move 4-5 to temp reg
|
||||
movzbl (%ebp, %edi), %edi // read 1
|
||||
|
||||
psrld $25, %mm2 // shift 4-5
|
||||
movb (%edx,%eax), %cl // lookup 0
|
||||
|
||||
movl 0(%esp), %eax // load 2
|
||||
addl $0x12345678, %ebx // counter
|
||||
.globl rdc8mwidth2
|
||||
rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH
|
||||
|
||||
movb %cl, (%esi, %ebx) // write 0
|
||||
movb (%edx,%edi), %ch // lookup 1
|
||||
|
||||
movb %ch, 0x12345678(%esi, %ebx) // write 1
|
||||
.globl rdc8mwidth1
|
||||
rdc8mwidth1: // DeadBeef = SCREENWIDTH
|
||||
movl 4(%esp), %edi // load 3
|
||||
|
||||
paddd %mm1, %mm0 // frac 6-7
|
||||
movzbl (%ebp, %eax), %eax // lookup 2
|
||||
|
||||
movq %mm2, 0(%esp) // store texture index 4-5
|
||||
jl rdc8mloop
|
||||
|
||||
jnz rdc8mno_odd
|
||||
movb (%edx,%eax), %cl // write the last odd pixel
|
||||
movb %cl, 0x12345678(%esi)
|
||||
.globl rdc8mwidth4
|
||||
rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH
|
||||
rdc8mno_odd:
|
||||
|
||||
.globl rdc8mdone
|
||||
rdc8mdone:
|
||||
emms
|
||||
|
||||
addl _mmxcomm, %esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
|
||||
// Need some extra space to align run-time
|
||||
.globl R_DrawColumn_8_K6_MMX_end
|
||||
R_DrawColumn_8_K6_MMX_end:
|
||||
nop;nop;nop;nop;nop;nop;nop;nop;
|
||||
nop;nop;nop;nop;nop;nop;nop;nop;
|
||||
nop;nop;nop;nop;nop;nop;nop;nop;
|
||||
nop;nop;nop;nop;nop;nop;nop;
|
674
src/tmap_mmx.nas
674
src/tmap_mmx.nas
|
@ -1,674 +0,0 @@
|
|||
;; SONIC ROBO BLAST 2
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; Copyright (C) 1998-2000 by DOSDOOM.
|
||||
;; Copyright (C) 2010-2018 by Sonic Team Junior.
|
||||
;;
|
||||
;; This program is free software distributed under the
|
||||
;; terms of the GNU General Public License, version 2.
|
||||
;; See the 'LICENSE' file for more details.
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; FILE:
|
||||
;; tmap_mmx.nas
|
||||
;; DESCRIPTION:
|
||||
;; Assembler optimised rendering code for software mode, using SIMD
|
||||
;; instructions.
|
||||
;; Draw wall columns.
|
||||
|
||||
|
||||
[BITS 32]
|
||||
|
||||
%define FRACBITS 16
|
||||
%define TRANSPARENTPIXEL 247
|
||||
|
||||
%ifdef LINUX
|
||||
%macro cextern 1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%else
|
||||
%macro cextern 1
|
||||
%define %1 _%1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
%define %1 _%1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
; The viddef_s structure. We only need the width field.
|
||||
struc viddef_s
|
||||
resb 12
|
||||
.width: resb 4
|
||||
resb 44
|
||||
endstruc
|
||||
|
||||
|
||||
;; externs
|
||||
;; columns
|
||||
cextern dc_colormap
|
||||
cextern dc_x
|
||||
cextern dc_yl
|
||||
cextern dc_yh
|
||||
cextern dc_iscale
|
||||
cextern dc_texturemid
|
||||
cextern dc_texheight
|
||||
cextern dc_source
|
||||
cextern dc_hires
|
||||
cextern centery
|
||||
cextern centeryfrac
|
||||
cextern dc_transmap
|
||||
|
||||
cextern R_DrawColumn_8_ASM
|
||||
cextern R_Draw2sMultiPatchColumn_8_ASM
|
||||
|
||||
;; spans
|
||||
cextern nflatshiftup
|
||||
cextern nflatxshift
|
||||
cextern nflatyshift
|
||||
cextern nflatmask
|
||||
cextern ds_xfrac
|
||||
cextern ds_yfrac
|
||||
cextern ds_xstep
|
||||
cextern ds_ystep
|
||||
cextern ds_x1
|
||||
cextern ds_x2
|
||||
cextern ds_y
|
||||
cextern ds_source
|
||||
cextern ds_colormap
|
||||
|
||||
cextern ylookup
|
||||
cextern columnofs
|
||||
cextern vid
|
||||
|
||||
[SECTION .data]
|
||||
|
||||
nflatmask64 dq 0
|
||||
|
||||
|
||||
[SECTION .text]
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_DrawColumn : 8bpp column drawer
|
||||
;;
|
||||
;; MMX column drawer.
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = accumulator
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = vid.width
|
||||
;; mm0 = accumulator
|
||||
;; mm1 = heightmask, twice
|
||||
;; mm2 = 2 * fracstep, twice
|
||||
;; mm3 = pair of consecutive fracs
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
|
||||
cglobal R_DrawColumn_8_MMX
|
||||
R_DrawColumn_8_MMX:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; Our algorithm requires that the texture height be a power of two.
|
||||
;; If not, fall back to the non-MMX drawer.
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx, [dc_texheight]
|
||||
sub edx, 1 ;; edx = heightmask
|
||||
test edx, [dc_texheight]
|
||||
jnz near .usenonMMX
|
||||
|
||||
mov ebp, edx ;; Keep a copy of heightmask in a
|
||||
;; GPR for the time being.
|
||||
|
||||
;;
|
||||
;; Fill mm1 with heightmask
|
||||
;;
|
||||
movd mm1, edx ;; low dword = heightmask
|
||||
punpckldq mm1, mm1 ;; copy low dword to high dword
|
||||
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov eax, [dc_yl]
|
||||
mov edi, [ylookup+eax*4]
|
||||
mov ebx, [dc_x]
|
||||
add edi, [columnofs+ebx*4] ;; edi = dest
|
||||
|
||||
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx, [dc_yh]
|
||||
add ecx, 1
|
||||
sub ecx, eax ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
|
||||
;;
|
||||
;; fracstep = dc_iscale;
|
||||
;;
|
||||
movd mm2, [dc_iscale] ;; fracstep in low dword
|
||||
punpckldq mm2, mm2 ;; copy to high dword
|
||||
|
||||
mov ebx, [dc_colormap]
|
||||
mov esi, [dc_source]
|
||||
|
||||
;;
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
;; eax == dc_yl already
|
||||
shl eax, FRACBITS
|
||||
sub eax, [centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax, edx, FRACBITS
|
||||
add eax, [dc_texturemid]
|
||||
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires], 0x01
|
||||
jz .mod2
|
||||
xor eax, eax
|
||||
|
||||
|
||||
;;
|
||||
;; Do mod-2 pixel.
|
||||
;;
|
||||
.mod2:
|
||||
test ecx, 1
|
||||
jz .pairprepare
|
||||
mov edx, eax ;; edx = frac
|
||||
add eax, [dc_iscale] ;; eax += fracstep
|
||||
sar edx, FRACBITS
|
||||
and edx, ebp ;; edx &= heightmask
|
||||
movzx edx, byte [esi + edx]
|
||||
movzx edx, byte [ebx + edx]
|
||||
mov [edi], dl
|
||||
|
||||
add edi, [vid + viddef_s.width]
|
||||
sub ecx, 1
|
||||
jz .done
|
||||
|
||||
.pairprepare:
|
||||
;;
|
||||
;; Prepare for the main loop.
|
||||
;;
|
||||
movd mm3, eax ;; Low dword = frac
|
||||
movq mm4, mm3 ;; Copy to intermediate register
|
||||
paddd mm4, mm2 ;; dwords of mm4 += fracstep
|
||||
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
|
||||
pslld mm2, 1 ;; fracstep *= 2
|
||||
|
||||
;;
|
||||
;; ebp = vid.width
|
||||
;;
|
||||
mov ebp, [vid + viddef_s.width]
|
||||
|
||||
align 16
|
||||
.pairloop:
|
||||
movq mm0, mm3 ;; 3B 1u.
|
||||
psrad mm0, FRACBITS ;; 4B 1u.
|
||||
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
|
||||
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
|
||||
|
||||
movd eax, mm0 ;; 3B 1u. Get first frac
|
||||
;; IFETCH boundary
|
||||
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
|
||||
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
|
||||
|
||||
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
|
||||
movd edx, mm0 ;; 3B 1u. Get second frac
|
||||
mov [edi], al ;; 2B 1(2)u. First pixel
|
||||
;; IFETCH boundary
|
||||
|
||||
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
|
||||
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
|
||||
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
|
||||
|
||||
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
|
||||
;; IFETCH boundary
|
||||
sub ecx, 2 ;; 3B 1u. count -= 2
|
||||
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
|
||||
|
||||
|
||||
.done:
|
||||
;;
|
||||
;; Clear MMX state, or else FPU operations will go badly awry.
|
||||
;;
|
||||
emms
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
.usenonMMX:
|
||||
call R_DrawColumn_8_ASM
|
||||
jmp .done
|
||||
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
|
||||
;; pixels.
|
||||
;;
|
||||
;; MMX column drawer.
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = accumulator
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = vid.width
|
||||
;; mm0 = accumulator
|
||||
;; mm1 = heightmask, twice
|
||||
;; mm2 = 2 * fracstep, twice
|
||||
;; mm3 = pair of consecutive fracs
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
|
||||
cglobal R_Draw2sMultiPatchColumn_8_MMX
|
||||
R_Draw2sMultiPatchColumn_8_MMX:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; Our algorithm requires that the texture height be a power of two.
|
||||
;; If not, fall back to the non-MMX drawer.
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx, [dc_texheight]
|
||||
sub edx, 1 ;; edx = heightmask
|
||||
test edx, [dc_texheight]
|
||||
jnz near .usenonMMX
|
||||
|
||||
mov ebp, edx ;; Keep a copy of heightmask in a
|
||||
;; GPR for the time being.
|
||||
|
||||
;;
|
||||
;; Fill mm1 with heightmask
|
||||
;;
|
||||
movd mm1, edx ;; low dword = heightmask
|
||||
punpckldq mm1, mm1 ;; copy low dword to high dword
|
||||
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov eax, [dc_yl]
|
||||
mov edi, [ylookup+eax*4]
|
||||
mov ebx, [dc_x]
|
||||
add edi, [columnofs+ebx*4] ;; edi = dest
|
||||
|
||||
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx, [dc_yh]
|
||||
add ecx, 1
|
||||
sub ecx, eax ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
;;
|
||||
;; fracstep = dc_iscale;
|
||||
;;
|
||||
movd mm2, [dc_iscale] ;; fracstep in low dword
|
||||
punpckldq mm2, mm2 ;; copy to high dword
|
||||
|
||||
mov ebx, [dc_colormap]
|
||||
mov esi, [dc_source]
|
||||
|
||||
;;
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
;; eax == dc_yl already
|
||||
shl eax, FRACBITS
|
||||
sub eax, [centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax, edx, FRACBITS
|
||||
add eax, [dc_texturemid]
|
||||
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires], 0x01
|
||||
jz .mod2
|
||||
xor eax, eax
|
||||
|
||||
|
||||
;;
|
||||
;; Do mod-2 pixel.
|
||||
;;
|
||||
.mod2:
|
||||
test ecx, 1
|
||||
jz .pairprepare
|
||||
mov edx, eax ;; edx = frac
|
||||
add eax, [dc_iscale] ;; eax += fracstep
|
||||
sar edx, FRACBITS
|
||||
and edx, ebp ;; edx &= heightmask
|
||||
movzx edx, byte [esi + edx]
|
||||
cmp dl, TRANSPARENTPIXEL
|
||||
je .nextmod2
|
||||
movzx edx, byte [ebx + edx]
|
||||
mov [edi], dl
|
||||
|
||||
.nextmod2:
|
||||
add edi, [vid + viddef_s.width]
|
||||
sub ecx, 1
|
||||
jz .done
|
||||
|
||||
.pairprepare:
|
||||
;;
|
||||
;; Prepare for the main loop.
|
||||
;;
|
||||
movd mm3, eax ;; Low dword = frac
|
||||
movq mm4, mm3 ;; Copy to intermediate register
|
||||
paddd mm4, mm2 ;; dwords of mm4 += fracstep
|
||||
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
|
||||
pslld mm2, 1 ;; fracstep *= 2
|
||||
|
||||
;;
|
||||
;; ebp = vid.width
|
||||
;;
|
||||
mov ebp, [vid + viddef_s.width]
|
||||
|
||||
align 16
|
||||
.pairloop:
|
||||
movq mm0, mm3 ;; 3B 1u.
|
||||
psrad mm0, FRACBITS ;; 4B 1u.
|
||||
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
|
||||
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
|
||||
|
||||
movd eax, mm0 ;; 3B 1u. Get first frac
|
||||
;; IFETCH boundary
|
||||
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
|
||||
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
|
||||
movd edx, mm0 ;; 3B 1u. Get second frac
|
||||
cmp al, TRANSPARENTPIXEL ;; 2B 1u.
|
||||
je .secondinpair ;; 2B 1u.
|
||||
;; IFETCH boundary
|
||||
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
|
||||
mov [edi], al ;; 2B 1(2)u. First pixel
|
||||
|
||||
.secondinpair:
|
||||
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
|
||||
cmp dl, TRANSPARENTPIXEL ;; 2B 1u.
|
||||
je .nextpair ;; 2B 1u.
|
||||
;; IFETCH boundary
|
||||
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
|
||||
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
|
||||
|
||||
.nextpair:
|
||||
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
|
||||
sub ecx, 2 ;; 3B 1u. count -= 2
|
||||
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
|
||||
|
||||
|
||||
.done:
|
||||
;;
|
||||
;; Clear MMX state, or else FPU operations will go badly awry.
|
||||
;;
|
||||
emms
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
.usenonMMX:
|
||||
call R_Draw2sMultiPatchColumn_8_ASM
|
||||
jmp .done
|
||||
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_DrawSpan : 8bpp span drawer
|
||||
;;
|
||||
;; MMX span drawer.
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = accumulator
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = two pixels
|
||||
;; mm0 = accumulator
|
||||
;; mm1 = xposition
|
||||
;; mm2 = yposition
|
||||
;; mm3 = 2 * xstep
|
||||
;; mm4 = 2 * ystep
|
||||
;; mm5 = nflatxshift
|
||||
;; mm6 = nflatyshift
|
||||
;; mm7 = accumulator
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_DrawSpan_8_MMX
|
||||
R_DrawSpan_8_MMX:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; esi = ds_source
|
||||
;; ebx = ds_colormap
|
||||
;;
|
||||
mov esi, [ds_source]
|
||||
mov ebx, [ds_colormap]
|
||||
|
||||
;;
|
||||
;; edi = ylookup[ds_y] + columnofs[ds_x1]
|
||||
;;
|
||||
mov eax, [ds_y]
|
||||
mov edi, [ylookup + eax*4]
|
||||
mov edx, [ds_x1]
|
||||
add edi, [columnofs + edx*4]
|
||||
|
||||
;;
|
||||
;; ecx = ds_x2 - ds_x1 + 1
|
||||
;;
|
||||
mov ecx, [ds_x2]
|
||||
sub ecx, edx
|
||||
add ecx, 1
|
||||
|
||||
;;
|
||||
;; Needed for fracs and steps
|
||||
;;
|
||||
movd mm7, [nflatshiftup]
|
||||
|
||||
;;
|
||||
;; mm3 = xstep
|
||||
;;
|
||||
movd mm3, [ds_xstep]
|
||||
pslld mm3, mm7
|
||||
punpckldq mm3, mm3
|
||||
|
||||
;;
|
||||
;; mm4 = ystep
|
||||
;;
|
||||
movd mm4, [ds_ystep]
|
||||
pslld mm4, mm7
|
||||
punpckldq mm4, mm4
|
||||
|
||||
;;
|
||||
;; mm1 = pair of consecutive xpositions
|
||||
;;
|
||||
movd mm1, [ds_xfrac]
|
||||
pslld mm1, mm7
|
||||
movq mm6, mm1
|
||||
paddd mm6, mm3
|
||||
punpckldq mm1, mm6
|
||||
|
||||
;;
|
||||
;; mm2 = pair of consecutive ypositions
|
||||
;;
|
||||
movd mm2, [ds_yfrac]
|
||||
pslld mm2, mm7
|
||||
movq mm6, mm2
|
||||
paddd mm6, mm4
|
||||
punpckldq mm2, mm6
|
||||
|
||||
;;
|
||||
;; mm5 = nflatxshift
|
||||
;; mm6 = nflatyshift
|
||||
;;
|
||||
movd mm5, [nflatxshift]
|
||||
movd mm6, [nflatyshift]
|
||||
|
||||
;;
|
||||
;; Mask is in memory due to lack of registers.
|
||||
;;
|
||||
mov eax, [nflatmask]
|
||||
mov [nflatmask64], eax
|
||||
mov [nflatmask64 + 4], eax
|
||||
|
||||
|
||||
;;
|
||||
;; Go until we reach a dword boundary.
|
||||
;;
|
||||
.unaligned:
|
||||
test edi, 3
|
||||
jz .alignedprep
|
||||
.stragglers:
|
||||
cmp ecx, 0
|
||||
je .done ;; If ecx == 0, we're finished.
|
||||
|
||||
;;
|
||||
;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)
|
||||
;;
|
||||
movq mm0, mm1 ;; mm0 = xposition
|
||||
movq mm7, mm2 ;; mm7 = yposition
|
||||
paddd mm1, mm3 ;; xposition += xstep (once!)
|
||||
paddd mm2, mm4 ;; yposition += ystep (once!)
|
||||
psrld mm0, mm5 ;; shift
|
||||
psrld mm7, mm6 ;; shift
|
||||
pand mm7, [nflatmask64] ;; mask
|
||||
por mm0, mm7 ;; or x and y together
|
||||
|
||||
movd eax, mm0 ;; eax = index of first pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
|
||||
mov [edi], al
|
||||
add edi, 1
|
||||
|
||||
sub ecx, 1
|
||||
jmp .unaligned
|
||||
|
||||
|
||||
.alignedprep:
|
||||
;;
|
||||
;; We can double the steps now.
|
||||
;;
|
||||
pslld mm3, 1
|
||||
pslld mm4, 1
|
||||
|
||||
|
||||
;;
|
||||
;; Generate chunks of four pixels.
|
||||
;;
|
||||
.alignedloop:
|
||||
|
||||
;;
|
||||
;; Make sure we have at least four pixels.
|
||||
;;
|
||||
cmp ecx, 4
|
||||
jl .prestragglers
|
||||
|
||||
;;
|
||||
;; First two pixels.
|
||||
;;
|
||||
movq mm0, mm1 ;; mm0 = xposition
|
||||
movq mm7, mm2 ;; mm7 = yposition
|
||||
paddd mm1, mm3 ;; xposition += xstep
|
||||
paddd mm2, mm4 ;; yposition += ystep
|
||||
psrld mm0, mm5 ;; shift
|
||||
psrld mm7, mm6 ;; shift
|
||||
pand mm7, [nflatmask64] ;; mask
|
||||
por mm0, mm7 ;; or x and y together
|
||||
|
||||
movd eax, mm0 ;; eax = index of first pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx ebp, byte [ebx + eax] ;; ebp = colormap[al]
|
||||
|
||||
punpckhdq mm0, mm0 ;; both dwords = high dword
|
||||
movd eax, mm0 ;; eax = index of second pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
shl eax, 8 ;; get pixel in right byte
|
||||
or ebp, eax ;; put pixel in ebp
|
||||
|
||||
;;
|
||||
;; Next two pixels.
|
||||
;;
|
||||
movq mm0, mm1 ;; mm0 = xposition
|
||||
movq mm7, mm2 ;; mm7 = yposition
|
||||
paddd mm1, mm3 ;; xposition += xstep
|
||||
paddd mm2, mm4 ;; yposition += ystep
|
||||
psrld mm0, mm5 ;; shift
|
||||
psrld mm7, mm6 ;; shift
|
||||
pand mm7, [nflatmask64] ;; mask
|
||||
por mm0, mm7 ;; or x and y together
|
||||
|
||||
movd eax, mm0 ;; eax = index of third pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
shl eax, 16 ;; get pixel in right byte
|
||||
or ebp, eax ;; put pixel in ebp
|
||||
|
||||
punpckhdq mm0, mm0 ;; both dwords = high dword
|
||||
movd eax, mm0 ;; eax = index of second pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
shl eax, 24 ;; get pixel in right byte
|
||||
or ebp, eax ;; put pixel in ebp
|
||||
|
||||
;;
|
||||
;; Write pixels.
|
||||
;;
|
||||
mov [edi], ebp
|
||||
add edi, 4
|
||||
|
||||
sub ecx, 4
|
||||
jmp .alignedloop
|
||||
|
||||
.prestragglers:
|
||||
;;
|
||||
;; Back to one step at a time.
|
||||
;;
|
||||
psrad mm3, 1
|
||||
psrad mm4, 1
|
||||
jmp .stragglers
|
||||
|
||||
.done:
|
||||
;;
|
||||
;; Clear MMX state, or else FPU operations will go badly awry.
|
||||
;;
|
||||
emms
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
|
@ -1,48 +0,0 @@
|
|||
;; SONIC ROBO BLAST 2
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
;; Copyright (C) 1999-2018 by Sonic Team Junior.
|
||||
;;
|
||||
;; This program is free software distributed under the
|
||||
;; terms of the GNU General Public License, version 2.
|
||||
;; See the 'LICENSE' file for more details.
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; FILE:
|
||||
;; tmap_vc.nas
|
||||
;; DESCRIPTION:
|
||||
;; Assembler optimised math code for Visual C++.
|
||||
|
||||
|
||||
[BITS 32]
|
||||
|
||||
%macro cglobal 1
|
||||
%define %1 _%1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
[SECTION .text write]
|
||||
|
||||
;----------------------------------------------------------------------------
|
||||
;fixed_t FixedMul (fixed_t a, fixed_t b)
|
||||
;----------------------------------------------------------------------------
|
||||
cglobal FixedMul
|
||||
; align 16
|
||||
FixedMul:
|
||||
mov eax,[esp+4]
|
||||
imul dword [esp+8]
|
||||
shrd eax,edx,16
|
||||
ret
|
||||
|
||||
;----------------------------------------------------------------------------
|
||||
;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
|
||||
;----------------------------------------------------------------------------
|
||||
cglobal FixedDiv2
|
||||
; align 16
|
||||
FixedDiv2:
|
||||
mov eax,[esp+4]
|
||||
mov edx,eax ;; these two instructions allow the next
|
||||
sar edx,31 ;; two to pair, on the Pentium processor.
|
||||
shld edx,eax,16
|
||||
sal eax,16
|
||||
idiv dword [esp+8]
|
||||
ret
|
|
@ -266,12 +266,6 @@ static void CV_Gammaxxx_ONChange(void)
|
|||
#endif
|
||||
|
||||
|
||||
#if defined (__GNUC__) && defined (__i386__) && !defined (NOASM) && !defined (__APPLE__) && !defined (NORUSEASM)
|
||||
void VID_BlitLinearScreen_ASM(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
|
||||
size_t destrowbytes);
|
||||
#define HAVE_VIDCOPY
|
||||
#endif
|
||||
|
||||
static void CV_constextsize_OnChange(void)
|
||||
{
|
||||
con_recalc = true;
|
||||
|
@ -284,9 +278,6 @@ static void CV_constextsize_OnChange(void)
|
|||
void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
|
||||
size_t destrowbytes)
|
||||
{
|
||||
#ifdef HAVE_VIDCOPY
|
||||
VID_BlitLinearScreen_ASM(srcptr,destptr,width,height,srcrowbytes,destrowbytes);
|
||||
#else
|
||||
if ((srcrowbytes == destrowbytes) && (srcrowbytes == (size_t)width))
|
||||
M_Memcpy(destptr, srcptr, srcrowbytes * height);
|
||||
else
|
||||
|
@ -299,7 +290,6 @@ void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT3
|
|||
srcptr += srcrowbytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static UINT8 hudplusalpha[11] = { 10, 8, 6, 4, 2, 0, 0, 0, 0, 0, 0};
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
// SONIC ROBO BLAST 2
|
||||
//-----------------------------------------------------------------------------
|
||||
// Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
// Copyright (C) 1999-2018 by Sonic Team Junior.
|
||||
//
|
||||
// This program is free software distributed under the
|
||||
// terms of the GNU General Public License, version 2.
|
||||
// See the 'LICENSE' file for more details.
|
||||
//-----------------------------------------------------------------------------
|
||||
/// \file vid_copy.s
|
||||
/// \brief code for updating the linear frame buffer screen.
|
||||
|
||||
#include "asm_defs.inc" // structures, must match the C structures!
|
||||
|
||||
// DJGPPv2 is as fast as this one, but then someone may compile with a less
|
||||
// good version of DJGPP than mine, so this little asm will do the trick!
|
||||
|
||||
#define srcptr 4+16
|
||||
#define destptr 8+16
|
||||
#define width 12+16
|
||||
#define height 16+16
|
||||
#define srcrowbytes 20+16
|
||||
#define destrowbytes 24+16
|
||||
|
||||
// VID_BlitLinearScreen( src, dest, width, height, srcwidth, destwidth );
|
||||
// width is given as BYTES
|
||||
|
||||
#ifdef __i386__
|
||||
|
||||
.globl C(VID_BlitLinearScreen_ASM)
|
||||
C(VID_BlitLinearScreen_ASM):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
cld
|
||||
movl srcptr(%esp),%esi
|
||||
movl destptr(%esp),%edi
|
||||
movl width(%esp),%ebx
|
||||
movl srcrowbytes(%esp),%eax
|
||||
subl %ebx,%eax
|
||||
movl destrowbytes(%esp),%edx
|
||||
subl %ebx,%edx
|
||||
shrl $2,%ebx
|
||||
movl height(%esp),%ebp
|
||||
LLRowLoop:
|
||||
movl %ebx,%ecx
|
||||
rep/movsl (%esi),(%edi)
|
||||
addl %eax,%esi
|
||||
addl %edx,%edi
|
||||
decl %ebp
|
||||
jnz LLRowLoop
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
|
||||
ret
|
||||
#endif
|
|
@ -20,10 +20,6 @@ else
|
|||
SDL_LDFLAGS?=-L../libs/SDL2/i686-w64-mingw32/lib -L../libs/SDL2_mixer/i686-w64-mingw32/lib -lmingw32 -lSDL2main -lSDL2 -mwindows
|
||||
endif
|
||||
|
||||
ifndef NOASM
|
||||
USEASM=1
|
||||
endif
|
||||
|
||||
ifndef NONET
|
||||
ifndef MINGW64 #miniupnc is broken with MINGW64
|
||||
HAVE_MINIUPNPC=1
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#ifdef _MSC_VER
|
||||
#include <assert.h>
|
||||
#endif
|
||||
#define NOASM
|
||||
#include "../src/tables.h"
|
||||
#define NO_M
|
||||
#include "../src/m_fixed.c"
|
||||
|
|
Loading…
Reference in a new issue