mirror of
https://git.do.srb2.org/STJr/SRB2.git
synced 2025-01-17 23:21:22 +00:00
delete ASM code, remove NASM
This commit is contained in:
parent
5068e0fe5d
commit
7634a96031
34 changed files with 2 additions and 4349 deletions
18
SRB2.cbp
18
SRB2.cbp
|
@ -1992,24 +1992,6 @@ HW3SOUND for 3D hardware sound support
|
|||
<Option compilerVar="CC" />
|
||||
</Unit>
|
||||
<Unit filename="src/v_video.h" />
|
||||
<Unit filename="src/vid_copy.s">
|
||||
<Option compilerVar="CC" />
|
||||
<Option compiler="avrgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="gnu_gcc_compiler_for_mingw32" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="gnu_gcc_compiler_for_mingw64" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="armelfgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="tricoregcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="ppcgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option compiler="gcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
|
||||
<Option target="Debug Native/SDL" />
|
||||
<Option target="Release Native/SDL" />
|
||||
<Option target="Debug Linux/SDL" />
|
||||
<Option target="Release Linux/SDL" />
|
||||
<Option target="Debug Mingw/SDL" />
|
||||
<Option target="Release Mingw/SDL" />
|
||||
<Option target="Debug Mingw/DirectX" />
|
||||
<Option target="Release Mingw/DirectX" />
|
||||
</Unit>
|
||||
<Unit filename="src/w_wad.c">
|
||||
<Option compilerVar="CC" />
|
||||
</Unit>
|
||||
|
|
|
@ -25,9 +25,6 @@
|
|||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(PlatformTarget)'=='x86'">
|
||||
<ClCompile>
|
||||
<PreprocessorDefinitions>USEASM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
|
||||
</Link>
|
||||
|
|
52
Srb2.dev
52
Srb2.dev
|
@ -5,7 +5,7 @@ Ver=3
|
|||
IsCpp=0
|
||||
Type=0
|
||||
UnitCount=279
|
||||
Folders=A_Asm,B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
|
||||
Folders=B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
|
||||
CommandLine=
|
||||
CompilerSettings=00000000000100000111e1
|
||||
PchHead=-1
|
||||
|
@ -1473,36 +1473,6 @@ Priority=1000
|
|||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit149]
|
||||
FileName=src\tmap.nas
|
||||
Folder=A_Asm
|
||||
Compile=0
|
||||
CompileCpp=0
|
||||
Link=0
|
||||
Priority=1000
|
||||
OverrideBuildCmd=1
|
||||
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap.nas
|
||||
|
||||
[Unit150]
|
||||
FileName=src\asm_defs.inc
|
||||
Folder=A_Asm
|
||||
Compile=0
|
||||
CompileCpp=0
|
||||
Link=0
|
||||
Priority=1000
|
||||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit151]
|
||||
FileName=src\vid_copy.s
|
||||
Folder=A_Asm
|
||||
Compile=1
|
||||
CompileCpp=0
|
||||
Link=1
|
||||
Priority=1000
|
||||
OverrideBuildCmd=1
|
||||
BuildCmd=$(CC) $(CFLAGS) -x assembler-with-cpp -c src/vid_copy.s -o $@
|
||||
|
||||
[Unit152]
|
||||
FileName=src\y_inter.h
|
||||
Folder=H_Hud
|
||||
|
@ -1543,26 +1513,6 @@ Priority=1000
|
|||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit156]
|
||||
FileName=src\p5prof.h
|
||||
Folder=A_Asm
|
||||
Compile=1
|
||||
CompileCpp=0
|
||||
Link=1
|
||||
Priority=1000
|
||||
OverrideBuildCmd=0
|
||||
BuildCmd=
|
||||
|
||||
[Unit157]
|
||||
FileName=src\tmap_mmx.nas
|
||||
Folder=A_Asm
|
||||
Compile=0
|
||||
CompileCpp=0
|
||||
Link=0
|
||||
Priority=1000
|
||||
OverrideBuildCmd=1
|
||||
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap_mmx.nas
|
||||
|
||||
[Unit159]
|
||||
FileName=src\lzf.h
|
||||
Folder=W_Wad
|
||||
|
|
|
@ -7,8 +7,6 @@ environment:
|
|||
# c:\mingw-w64 i686 has gcc 6.3.0, so use c:\msys64 7.3.0 instead
|
||||
MINGW_SDK: c:\msys64\mingw32
|
||||
CFLAGS: -Wno-implicit-fallthrough
|
||||
NASM_ZIP: nasm-2.12.01
|
||||
NASM_URL: http://www.nasm.us/pub/nasm/releasebuilds/2.12.01/win64/nasm-2.12.01-win64.zip
|
||||
UPX_ZIP: upx391w
|
||||
UPX_URL: http://upx.sourceforge.net/download/upx391w.zip
|
||||
CCACHE_EXE: ccache.exe
|
||||
|
@ -40,17 +38,12 @@ environment:
|
|||
ASSET_CLEAN: 0
|
||||
|
||||
cache:
|
||||
- nasm-2.12.01.zip
|
||||
- upx391w.zip
|
||||
- ccache.exe
|
||||
- C:\Users\appveyor\.ccache
|
||||
- C:\Users\appveyor\srb2_cache
|
||||
|
||||
install:
|
||||
- if not exist "%NASM_ZIP%.zip" appveyor DownloadFile "%NASM_URL%" -FileName "%NASM_ZIP%.zip"
|
||||
- 7z x -y "%NASM_ZIP%.zip" -o%TMP% >null
|
||||
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%NASM_ZIP%" "%MINGW_SDK%\bin" nasm.exe || exit 0
|
||||
|
||||
- if not exist "%UPX_ZIP%.zip" appveyor DownloadFile "%UPX_URL%" -FileName "%UPX_ZIP%.zip"
|
||||
- 7z x -y "%UPX_ZIP%.zip" -o%TMP% >null
|
||||
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%UPX_ZIP%" "%MINGW_SDK%\bin" upx.exe || exit 0
|
||||
|
@ -65,7 +58,6 @@ configuration:
|
|||
before_build:
|
||||
- set "Path=%MINGW_SDK%\bin;%Path%"
|
||||
- mingw32-make --version
|
||||
- nasm -v
|
||||
- if not [%NOUPX%] == [1] ( upx -V )
|
||||
- ccache -V
|
||||
- ccache -s
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
|
||||
#=============================================================================
|
||||
# Copyright 2010 Kitware, Inc.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
# see accompanying file Copyright.txt for details.
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
# (To distribute this file outside of CMake, substitute the full
|
||||
# License text for the above reference.)
|
||||
|
||||
# support for the yasm assembler
|
||||
|
||||
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS nasm yasm asm)
|
||||
|
||||
if(NOT CMAKE_ASM_YASM_OBJECT_FORMAT)
|
||||
if(WIN32)
|
||||
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT win64)
|
||||
else()
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT win32)
|
||||
endif()
|
||||
elseif(APPLE)
|
||||
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho64)
|
||||
else()
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho)
|
||||
endif()
|
||||
else()
|
||||
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf64)
|
||||
else()
|
||||
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_ASM_YASM_COMPILE_OBJECT "<CMAKE_ASM_YASM_COMPILER> <FLAGS> -f ${CMAKE_ASM_YASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>")
|
||||
|
||||
# Load the generic ASMInformation file:
|
||||
set(ASM_DIALECT "_YASM")
|
||||
include(CMakeASMInformation)
|
||||
set(ASM_DIALECT)
|
|
@ -1,27 +0,0 @@
|
|||
|
||||
#=============================================================================
|
||||
# Copyright 2010 Kitware, Inc.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
# see accompanying file Copyright.txt for details.
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
# (To distribute this file outside of CMake, substitute the full
|
||||
# License text for the above reference.)
|
||||
|
||||
# Find the nasm assembler. yasm (http://www.tortall.net/projects/yasm/) is nasm compatible
|
||||
|
||||
set(CMAKE_ASM_YASM_COMPILER_LIST nasm yasm)
|
||||
|
||||
if(NOT CMAKE_ASM_YASM_COMPILER)
|
||||
find_program(CMAKE_ASM_YASM_COMPILER yasm
|
||||
"$ENV{ProgramFiles}/YASM")
|
||||
endif()
|
||||
|
||||
# Load the generic DetermineASM compiler file with the DIALECT set properly:
|
||||
set(ASM_DIALECT "_YASM")
|
||||
include(CMakeDetermineASMCompiler)
|
||||
set(ASM_DIALECT)
|
|
@ -1,23 +0,0 @@
|
|||
|
||||
#=============================================================================
|
||||
# Copyright 2010 Kitware, Inc.
|
||||
#
|
||||
# Distributed under the OSI-approved BSD License (the "License");
|
||||
# see accompanying file Copyright.txt for details.
|
||||
#
|
||||
# This software is distributed WITHOUT ANY WARRANTY; without even the
|
||||
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the License for more information.
|
||||
#=============================================================================
|
||||
# (To distribute this file outside of CMake, substitute the full
|
||||
# License text for the above reference.)
|
||||
|
||||
# This file is used by EnableLanguage in cmGlobalGenerator to
|
||||
# determine that the selected ASM_NASM "compiler" works.
|
||||
# For assembler this can only check whether the compiler has been found,
|
||||
# because otherwise there would have to be a separate assembler source file
|
||||
# for each assembler on every architecture.
|
||||
|
||||
set(ASM_DIALECT "_YASM")
|
||||
include(CMakeTestASMCompiler)
|
||||
set(ASM_DIALECT)
|
|
@ -76,7 +76,7 @@ LOCAL_SRC_FILES := am_map.c \
|
|||
android/i_system.c \
|
||||
android/i_video.c
|
||||
|
||||
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOASM -DNOPIX -DUNIXCOMMON -DNOTERMIOS
|
||||
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOPIX -DUNIXCOMMON -DNOTERMIOS
|
||||
|
||||
LOCAL_MODULE := libsrb2
|
||||
|
||||
|
|
|
@ -13,15 +13,7 @@ target_compile_features(SRB2SDL2 PRIVATE c_std_11 cxx_std_17)
|
|||
target_sourcefile(c)
|
||||
target_sources(SRB2SDL2 PRIVATE comptime.c md5.c config.h.in)
|
||||
|
||||
set(SRB2_ASM_SOURCES vid_copy.s)
|
||||
|
||||
set(SRB2_NASM_SOURCES tmap_mmx.nas tmap.nas)
|
||||
|
||||
### Configuration
|
||||
set(SRB2_CONFIG_USEASM OFF CACHE BOOL
|
||||
"Enable NASM tmap implementation for software mode speedup.")
|
||||
set(SRB2_CONFIG_YASM OFF CACHE BOOL
|
||||
"Use YASM in place of NASM.")
|
||||
set(SRB2_CONFIG_DEV_BUILD OFF CACHE BOOL
|
||||
"Compile a development build of SRB2.")
|
||||
|
||||
|
@ -78,33 +70,6 @@ if("${SRB2_CONFIG_HWRENDER}")
|
|||
endif()
|
||||
endif()
|
||||
|
||||
if(${SRB2_CONFIG_USEASM})
|
||||
#SRB2_ASM_FLAGS can be used to pass flags to either nasm or yasm.
|
||||
if("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
|
||||
set(SRB2_ASM_FLAGS "-DLINUX ${SRB2_ASM_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(${SRB2_CONFIG_YASM})
|
||||
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS} nas)
|
||||
set(CMAKE_ASM_YASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
|
||||
enable_language(ASM_YASM)
|
||||
else()
|
||||
set(CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS} nas)
|
||||
set(CMAKE_ASM_NASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
|
||||
enable_language(ASM_NASM)
|
||||
endif()
|
||||
|
||||
set(SRB2_USEASM ON)
|
||||
target_compile_definitions(SRB2SDL2 PRIVATE -DUSEASM)
|
||||
target_compile_options(SRB2SDL2 PRIVATE -msse3 -mfpmath=sse)
|
||||
|
||||
target_sources(SRB2SDL2 PRIVATE ${SRB2_ASM_SOURCES}
|
||||
${SRB2_NASM_SOURCES})
|
||||
else()
|
||||
set(SRB2_USEASM OFF)
|
||||
target_compile_definitions(SRB2SDL2 PRIVATE -DNONX86 -DNORUSEASM)
|
||||
endif()
|
||||
|
||||
# Targets
|
||||
|
||||
# If using CCACHE, then force it.
|
||||
|
|
23
src/Makefile
23
src/Makefile
|
@ -47,8 +47,6 @@
|
|||
# HAVE_MINIUPNPC=1 - Enable automated port forwarding.
|
||||
# Already enabled by default for 32-bit
|
||||
# Windows.
|
||||
# NOASM=1 - Disable hand optimized assembly code for the
|
||||
# Software renderer.
|
||||
# NOPNG=1 - Disable PNG graphics support. (TODO: double
|
||||
# check netplay compatible.)
|
||||
# NOCURL=1 - Disable libcurl--HTTP capability.
|
||||
|
@ -88,7 +86,6 @@
|
|||
# executable.
|
||||
# WINDOWSHELL=1 - Use Windows commands.
|
||||
# PREFIX= - Prefix to many commands, for cross compiling.
|
||||
# YASM=1 - Use Yasm instead of NASM assembler.
|
||||
# STABS=1 - ?
|
||||
# ECHO=1 - Print out each command in the build process.
|
||||
# NOECHOFILENAMES=1 - Don't print out each that is being
|
||||
|
@ -148,22 +145,6 @@ OBJCOPY:=$(call Prefix,objcopy)
|
|||
OBJDUMP:=$(call Prefix,objdump)
|
||||
WINDRES:=$(call Prefix,windres)
|
||||
|
||||
ifdef YASM
|
||||
NASM?=yasm
|
||||
else
|
||||
NASM?=nasm
|
||||
endif
|
||||
|
||||
ifdef YASM
|
||||
ifdef STABS
|
||||
NASMOPTS?=-g stabs
|
||||
else
|
||||
NASMOPTS?=-g dwarf2
|
||||
endif
|
||||
else
|
||||
NASMOPTS?=-g
|
||||
endif
|
||||
|
||||
GZIP?=gzip
|
||||
GZIP_OPTS?=-9 -f -n
|
||||
ifdef WINDOWSHELL
|
||||
|
@ -187,8 +168,6 @@ makedir:=../make
|
|||
opts:=-DCOMPVERSION -g
|
||||
libs:=
|
||||
|
||||
nasm_format:=
|
||||
|
||||
# This is a list of variables names, of which if defined,
|
||||
# also defines the name as a macro to the compiler.
|
||||
passthru_opts:=
|
||||
|
@ -316,7 +295,6 @@ endif
|
|||
|
||||
LD:=$(CC)
|
||||
cc:=$(cc) $(opts)
|
||||
nasm=$(NASM) $(NASMOPTS) -f $(nasm_format)
|
||||
ifdef UPX
|
||||
upx=$(UPX) $(UPX_OPTS)
|
||||
endif
|
||||
|
@ -393,7 +371,6 @@ $(objdir)/%.$(1) : %.$(2) | $$$$(@D)/
|
|||
endef
|
||||
|
||||
$(eval $(call _recipe,o,c,$(cc) -c -o $$@ $$<))
|
||||
$(eval $(call _recipe,o,nas,$(nasm) -o $$@ $$<))
|
||||
$(eval $(call _recipe,o,s,$(cc) $(asflags) -c -o $$@ $$<))
|
||||
$(eval $(call _recipe,res,rc,$(windres) -i $$< -o $$@))
|
||||
|
||||
|
|
|
@ -18,13 +18,6 @@ opts+=-DHWRENDER
|
|||
sources+=$(call List,hardware/Sourcefile)
|
||||
endif
|
||||
|
||||
ifndef NOASM
|
||||
ifndef NONX86
|
||||
sources+=tmap.nas tmap_mmx.nas
|
||||
opts+=-DUSEASM
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef NOMD5
|
||||
sources+=md5.c
|
||||
endif
|
||||
|
|
|
@ -9,10 +9,6 @@ opts+=-DUNIXCOMMON -DLUA_USE_POSIX
|
|||
# instead of addresses
|
||||
libs+=-lm -rdynamic
|
||||
|
||||
ifndef nasm_format
|
||||
nasm_format:=elf -DLINUX
|
||||
endif
|
||||
|
||||
ifndef NOHW
|
||||
opts+=-I/usr/X11R6/include
|
||||
libs+=-L/usr/X11R6/lib
|
||||
|
@ -35,7 +31,6 @@ endif
|
|||
# FIXME: UNTESTED
|
||||
#ifdef SOLARIS
|
||||
#NOIPX=1
|
||||
#NOASM=1
|
||||
#opts+=-I/usr/local/include -I/opt/sfw/include \
|
||||
# -DSOLARIS -DINADDR_NONE=INADDR_ANY -DBSD_COMP
|
||||
#libs+=-L/opt/sfw/lib -lsocket -lnsl
|
||||
|
|
|
@ -39,7 +39,6 @@ else ifdef SOLARIS # FIXME: UNTESTED
|
|||
UNIX=1
|
||||
platform=solaris
|
||||
else ifdef CYGWIN32 # FIXME: UNTESTED
|
||||
nasm_format=win32
|
||||
platform=cygwin
|
||||
else ifdef MINGW
|
||||
ifdef MINGW64
|
||||
|
|
|
@ -56,13 +56,6 @@ SDL_LDFLAGS?=$(shell $(SDL_CONFIG) \
|
|||
$(eval $(call Propogate_flags,SDL))
|
||||
endif
|
||||
|
||||
# use the x86 asm code
|
||||
ifndef CYGWIN32
|
||||
ifndef NOASM
|
||||
USEASM=1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef MINGW
|
||||
ifndef NOSDLMAIN
|
||||
SDLMAIN=1
|
||||
|
|
|
@ -17,8 +17,6 @@ sources+=win32/Srb2win.rc
|
|||
opts+=-DSTDC_HEADERS
|
||||
libs+=-ladvapi32 -lkernel32 -lmsvcrt -luser32
|
||||
|
||||
nasm_format:=win32
|
||||
|
||||
SDL?=1
|
||||
|
||||
ifndef NOHW
|
||||
|
|
|
@ -81,7 +81,6 @@ mserv.c
|
|||
http-mserv.c
|
||||
i_tcp.c
|
||||
lzf.c
|
||||
vid_copy.s
|
||||
b_bot.c
|
||||
u_list.c
|
||||
lua_script.c
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
// SONIC ROBO BLAST 2
|
||||
//-----------------------------------------------------------------------------
|
||||
// Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
// Copyright (C) 1999-2023 by Sonic Team Junior.
|
||||
//
|
||||
// This program is free software distributed under the
|
||||
// terms of the GNU General Public License, version 2.
|
||||
// See the 'LICENSE' file for more details.
|
||||
//-----------------------------------------------------------------------------
|
||||
/// \file asm_defs.inc
|
||||
/// \brief must match the C structures
|
||||
|
||||
#ifndef __ASM_DEFS__
|
||||
#define __ASM_DEFS__
|
||||
|
||||
// this makes variables more noticable,
|
||||
// and make the label match with C code
|
||||
|
||||
// Linux, unlike DOS, has no "_" 19990119 by Kin
|
||||
// and nasm needs .data code segs under linux 20010210 by metzgermeister
|
||||
// FIXME: nasm ignores these settings, so I put the macros into the makefile
|
||||
#ifdef __ELF__
|
||||
#define C(label) label
|
||||
#define CODE_SEG .data
|
||||
#else
|
||||
#define C(label) _##label
|
||||
#define CODE_SEG .text
|
||||
#endif
|
||||
|
||||
/* This is a more readable way to access the arguments passed from C code */
|
||||
/* PLEASE NOTE: it is supposed that all arguments passed from C code are */
|
||||
/* 32bit integer (INT32, long, and most *pointers) */
|
||||
#define ARG1 8(%ebp)
|
||||
#define ARG2 12(%ebp)
|
||||
#define ARG3 16(%ebp)
|
||||
#define ARG4 20(%ebp)
|
||||
#define ARG5 24(%ebp)
|
||||
#define ARG6 28(%ebp)
|
||||
#define ARG7 32(%ebp)
|
||||
#define ARG8 36(%ebp)
|
||||
#define ARG9 40(%ebp) //(c)tm ... Allegro by Shawn Hargreaves.
|
||||
|
||||
#endif
|
|
@ -3893,11 +3893,6 @@ static void Command_Version_f(void)
|
|||
else // 16-bit? 128-bit?
|
||||
CONS_Printf("Bits Unknown ");
|
||||
|
||||
// No ASM?
|
||||
#ifdef NOASM
|
||||
CONS_Printf("\x85" "NOASM " "\x80");
|
||||
#endif
|
||||
|
||||
// Debug build
|
||||
#ifdef _DEBUG
|
||||
CONS_Printf("\x85" "DEBUG " "\x80");
|
||||
|
|
278
src/p5prof.h
278
src/p5prof.h
|
@ -1,278 +0,0 @@
|
|||
/*********************************************************
|
||||
*
|
||||
* File: p5prof.h
|
||||
* By: Kevin Baca
|
||||
*
|
||||
* MODIFIED BY Fab SO THAT RDMSR(...) WRITES EDX : EAX TO A LONG LONG
|
||||
* (WHICH MEANS WRITE THE LOW DWORD FIRST)
|
||||
*
|
||||
* Now in yer code do:
|
||||
* INT64 count,total;
|
||||
*
|
||||
* ...
|
||||
* RDMSR(0x10,&count); //inner loop count
|
||||
* total += count;
|
||||
* ...
|
||||
*
|
||||
* printf("0x%x %x", (INT32)total, *((INT32 *)&total+1));
|
||||
* // HIGH LOW
|
||||
*
|
||||
*********************************************************/
|
||||
/**\file
|
||||
\brief This file provides macros to profile your code.
|
||||
|
||||
Here's how they work...
|
||||
|
||||
As you may or may not know, the Pentium class of
|
||||
processors provides extremely fine grained profiling
|
||||
capabilities through the use of what are called
|
||||
Machine Specific Registers (MSRs). These registers
|
||||
can provide information about almost any aspect of
|
||||
CPU performance down to a single cycle.
|
||||
|
||||
The MSRs of interest for profiling are specified by
|
||||
indices 0x10, 0x11, 0x12, and 0x13. Here is a brief
|
||||
description of each of these registers:
|
||||
|
||||
MSR 0x10
|
||||
This register is simple a cycle counter.
|
||||
|
||||
MSR 0x11
|
||||
This register controls what type of profiling data
|
||||
will be gathered.
|
||||
|
||||
MSRs 0x12 and 0x13
|
||||
These registers gather the profiling data specified in
|
||||
MSR 0x11.
|
||||
|
||||
Each MSR is 64 bits wide. For the Pentium processor,
|
||||
only the lower 32 bits of MSR 0x11 are valid. Bits 0-15
|
||||
specify what data will be gathered in MSR 0x12. Bits 16-31
|
||||
specify what data will be gathered in MSR 0x13. Both sets
|
||||
of bits have the same format:
|
||||
|
||||
Bits 0-5 specify which hardware event will be tracked.
|
||||
Bit 6, if set, indicates events will be tracked in
|
||||
rings 0-2.
|
||||
Bit 7, if set, indicates events will be tracked in
|
||||
ring 3.
|
||||
Bit 8, if set, indicates cycles should be counted for
|
||||
the specified event. If clear, it indicates the
|
||||
number of events should be counted.
|
||||
|
||||
Two instructions are provided for manupulating the MSRs.
|
||||
RDMSR (Read Machine Specific Register) and WRMSR
|
||||
(Write Machine Specific Register). These opcodes were
|
||||
originally undocumented and therefore most assemblers don't
|
||||
recognize them. Their byte codes are provided in the
|
||||
macros below.
|
||||
|
||||
RDMSR takes the MSR index in ecx and the profiling criteria
|
||||
in edx : eax.
|
||||
|
||||
WRMSR takes the MSR index in ecx and returns the profile data
|
||||
in edx : eax.
|
||||
|
||||
Two profiling registers limits profiling capability to
|
||||
gathering only two types of information. The register
|
||||
usage can, however, be combined in interesting ways.
|
||||
For example, you can set one register to gather the
|
||||
number of a specific type of event while the other gathers
|
||||
the number of cycles for the same event. Or you can
|
||||
gather the number of two separate events while using
|
||||
MSR 0x10 to gather the number of cycles.
|
||||
|
||||
The enumerated list provides somewhat readable labels for
|
||||
the types of events that can be tracked.
|
||||
|
||||
For more information, get ahold of appendix H from the
|
||||
Intel Pentium programmer's manual (I don't remember the
|
||||
order number) or go to
|
||||
http://green.kaist.ac.kr/jwhahn/art3.htm.
|
||||
That's an article by Terje Mathisen where I got most of
|
||||
my information.
|
||||
|
||||
You may use this code however you wish. I hope it's
|
||||
useful and I hope I got everything right.
|
||||
|
||||
-Kevin
|
||||
|
||||
kbaca@skygames.com
|
||||
|
||||
*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
#define RDTSC(_dst) \
|
||||
__asm__("
|
||||
.byte 0x0F,0x31
|
||||
movl %%edx,(%%edi)
|
||||
movl %%eax,4(%%edi)"\
|
||||
: : "D" (_dst) : "eax", "edx", "edi")
|
||||
|
||||
// the old code... swapped it
|
||||
// movl %%edx,(%%edi)
|
||||
// movl %%eax,4(%%edi)"
|
||||
#define RDMSR(_msri, _msrd) \
|
||||
__asm__("
|
||||
.byte 0x0F,0x32
|
||||
movl %%eax,(%%edi)
|
||||
movl %%edx,4(%%edi)"\
|
||||
: : "c" (_msri), "D" (_msrd) : "eax", "ecx", "edx", "edi")
|
||||
|
||||
#define WRMSR(_msri, _msrd) \
|
||||
__asm__("
|
||||
xorl %%edx,%%edx
|
||||
.byte 0x0F,0x30"\
|
||||
: : "c" (_msri), "a" (_msrd) : "eax", "ecx", "edx")
|
||||
|
||||
#define RDMSR_0x12_0x13(_msr12, _msr13) \
|
||||
__asm__("
|
||||
movl $0x12,%%ecx
|
||||
.byte 0x0F,0x32
|
||||
movl %%edx,(%%edi)
|
||||
movl %%eax,4(%%edi)
|
||||
movl $0x13,%%ecx
|
||||
.byte 0x0F,0x32
|
||||
movl %%edx,(%%esi)
|
||||
movl %%eax,4(%%esi)"\
|
||||
: : "D" (_msr12), "S" (_msr13) : "eax", "ecx", "edx", "edi")
|
||||
|
||||
#define ZERO_MSR_0x12_0x13() \
|
||||
__asm__("
|
||||
xorl %%edx,%%edx
|
||||
xorl %%eax,%%eax
|
||||
movl $0x12,%%ecx
|
||||
.byte 0x0F,0x30
|
||||
movl $0x13,%%ecx
|
||||
.byte 0x0F,0x30"\
|
||||
: : : "eax", "ecx", "edx")
|
||||
|
||||
#elif defined (__WATCOMC__)
|
||||
|
||||
extern void RDTSC(UINT32 *dst);
|
||||
#pragma aux RDTSC =\
|
||||
"db 0x0F,0x31"\
|
||||
"mov [edi],edx"\
|
||||
"mov [4+edi],eax"\
|
||||
parm [edi]\
|
||||
modify [eax edx edi];
|
||||
|
||||
extern void RDMSR(UINT32 msri, UINT32 *msrd);
|
||||
#pragma aux RDMSR =\
|
||||
"db 0x0F,0x32"\
|
||||
"mov [edi],edx"\
|
||||
"mov [4+edi],eax"\
|
||||
parm [ecx] [edi]\
|
||||
modify [eax ecx edx edi];
|
||||
|
||||
extern void WRMSR(UINT32 msri, UINT32 msrd);
|
||||
#pragma aux WRMSR =\
|
||||
"xor edx,edx"\
|
||||
"db 0x0F,0x30"\
|
||||
parm [ecx] [eax]\
|
||||
modify [eax ecx edx];
|
||||
|
||||
extern void RDMSR_0x12_0x13(UINT32 *msr12, UINT32 *msr13);
|
||||
#pragma aux RDMSR_0x12_0x13 =\
|
||||
"mov ecx,0x12"\
|
||||
"db 0x0F,0x32"\
|
||||
"mov [edi],edx"\
|
||||
"mov [4+edi],eax"\
|
||||
"mov ecx,0x13"\
|
||||
"db 0x0F,0x32"\
|
||||
"mov [esi],edx"\
|
||||
"mov [4+esi],eax"\
|
||||
parm [edi] [esi]\
|
||||
modify [eax ecx edx edi esi];
|
||||
|
||||
extern void ZERO_MSR_0x12_0x13(void);
|
||||
#pragma aux ZERO_MSR_0x12_0x13 =\
|
||||
"xor edx,edx"\
|
||||
"xor eax,eax"\
|
||||
"mov ecx,0x12"\
|
||||
"db 0x0F,0x30"\
|
||||
"mov ecx,0x13"\
|
||||
"db 0x0F,0x30"\
|
||||
modify [eax ecx edx];
|
||||
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DataRead,
|
||||
DataWrite,
|
||||
DataTLBMiss,
|
||||
DataReadMiss,
|
||||
DataWriteMiss,
|
||||
WriteHitEM,
|
||||
DataCacheLinesWritten,
|
||||
DataCacheSnoops,
|
||||
DataCacheSnoopHit,
|
||||
MemAccessBothPipes,
|
||||
BankConflict,
|
||||
MisalignedDataRef,
|
||||
CodeRead,
|
||||
CodeTLBMiss,
|
||||
CodeCacheMiss,
|
||||
SegRegLoad,
|
||||
RESERVED0,
|
||||
RESERVED1,
|
||||
Branch,
|
||||
BTBHit,
|
||||
TakenBranchOrBTBHit,
|
||||
PipelineFlush,
|
||||
InstructionsExeced,
|
||||
InstructionsExecedVPipe,
|
||||
BusUtilizationClocks,
|
||||
PipelineStalledWriteBackup,
|
||||
PipelineStalledDateMemRead,
|
||||
PipeLineStalledWriteEM,
|
||||
LockedBusCycle,
|
||||
IOReadOrWriteCycle,
|
||||
NonCacheableMemRef,
|
||||
AGI,
|
||||
RESERVED2,
|
||||
RESERVED3,
|
||||
FPOperation,
|
||||
Breakpoint0Match,
|
||||
Breakpoint1Match,
|
||||
Breakpoint2Match,
|
||||
Breakpoint3Match,
|
||||
HWInterrupt,
|
||||
DataReadOrWrite,
|
||||
DataReadOrWriteMiss
|
||||
};
|
||||
|
||||
#define PROF_CYCLES (0x100)
|
||||
#define PROF_EVENTS (0x000)
|
||||
#define RING_012 (0x40)
|
||||
#define RING_3 (0x80)
|
||||
#define RING_0123 (RING_012 | RING_3)
|
||||
|
||||
/*void ProfSetProfiles(UINT32 msr12, UINT32 msr13);*/
|
||||
#define ProfSetProfiles(_msr12, _msr13)\
|
||||
{\
|
||||
UINT32 prof;\
|
||||
\
|
||||
prof = (_msr12) | ((_msr13) << 16);\
|
||||
WRMSR(0x11, prof);\
|
||||
}
|
||||
|
||||
/*void ProfBeginProfiles(void);*/
|
||||
#define ProfBeginProfiles()\
|
||||
ZERO_MSR_0x12_0x13();
|
||||
|
||||
/*void ProfGetProfiles(UINT32 msr12[2], UINT32 msr13[2]);*/
|
||||
#define ProfGetProfiles(_msr12, _msr13)\
|
||||
RDMSR_0x12_0x13(_msr12, _msr13);
|
||||
|
||||
/*void ProfZeroTimer(void);*/
|
||||
#define ProfZeroTimer()\
|
||||
WRMSR(0x10, 0);
|
||||
|
||||
/*void ProfReadTimer(UINT32 timer[2]);*/
|
||||
#define ProfReadTimer(timer)\
|
||||
RDMSR(0x10, timer);
|
||||
|
||||
/*EOF*/
|
|
@ -179,8 +179,6 @@ CV_PossibleValue_t Color_cons_t[MAXSKINCOLORS+1];
|
|||
void R_InitTranslucencyTables(void)
|
||||
{
|
||||
// Load here the transparency lookup tables 'TRANSx0'
|
||||
// NOTE: the TRANSx0 resources MUST BE aligned on 64k for the asm
|
||||
// optimised code (in other words, transtables pointer low word is 0)
|
||||
transtables = Z_MallocAlign(NUMTRANSTABLES*0x10000, PU_STATIC,
|
||||
NULL, 16);
|
||||
|
||||
|
|
12
src/r_draw.h
12
src/r_draw.h
|
@ -225,18 +225,6 @@ void R_DrawTiltedTransSolidColorSpan_8(void);
|
|||
void R_DrawWaterSolidColorSpan_8(void);
|
||||
void R_DrawTiltedWaterSolidColorSpan_8(void);
|
||||
|
||||
#ifdef USEASM
|
||||
void ASMCALL R_DrawColumn_8_ASM(void);
|
||||
void ASMCALL R_DrawShadeColumn_8_ASM(void);
|
||||
void ASMCALL R_DrawTranslucentColumn_8_ASM(void);
|
||||
void ASMCALL R_Draw2sMultiPatchColumn_8_ASM(void);
|
||||
|
||||
void ASMCALL R_DrawColumn_8_MMX(void);
|
||||
|
||||
void ASMCALL R_Draw2sMultiPatchColumn_8_MMX(void);
|
||||
void ASMCALL R_DrawSpan_8_MMX(void);
|
||||
#endif
|
||||
|
||||
// ------------------
|
||||
// 16bpp DRAWING CODE
|
||||
// ------------------
|
||||
|
|
|
@ -31,20 +31,8 @@ static void prepare_rastertab(void);
|
|||
|
||||
static void R_RasterizeFloorSplat(floorsplat_t *pSplat, vector2_t *verts, vissprite_t *vis);
|
||||
|
||||
#ifdef USEASM
|
||||
void ASMCALL rasterize_segment_tex_asm(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2, INT32 tc, INT32 dir);
|
||||
#endif
|
||||
|
||||
static void rasterize_segment_tex(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2, INT32 tc, INT32 dir)
|
||||
{
|
||||
#ifdef USEASM
|
||||
if (R_ASM)
|
||||
{
|
||||
rasterize_segment_tex_asm(x1, y1, x2, y2, tv1, tv2, tc, dir);
|
||||
return;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
fixed_t xs, xe, count;
|
||||
fixed_t dx0, dx1;
|
||||
|
|
27
src/screen.c
27
src/screen.c
|
@ -44,10 +44,6 @@
|
|||
// SRB2Kart
|
||||
#include "r_fps.h" // R_GetFramerateCap
|
||||
|
||||
#if defined (USEASM) && !defined (NORUSEASM)//&& (!defined (_MSC_VER) || (_MSC_VER <= 1200))
|
||||
#define RUSEASM //MSC.NET can't patch itself
|
||||
#endif
|
||||
|
||||
// --------------------------------------------
|
||||
// assembly or c drawer routines for 8bpp/16bpp
|
||||
// --------------------------------------------
|
||||
|
@ -102,7 +98,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
|
|||
// Short and Tall sky drawer, for the current color mode
|
||||
void (*walldrawerfunc)(void);
|
||||
|
||||
boolean R_ASM = true;
|
||||
boolean R_486 = false;
|
||||
boolean R_586 = false;
|
||||
boolean R_MMX = false;
|
||||
|
@ -169,26 +164,6 @@ void SCR_SetDrawFuncs(void)
|
|||
spanfuncs_npo2[SPANDRAWFUNC_WATER] = R_DrawWaterSpan_NPO2_8;
|
||||
spanfuncs_npo2[SPANDRAWFUNC_TILTEDWATER] = R_DrawTiltedWaterSpan_NPO2_8;
|
||||
|
||||
#ifdef RUSEASM
|
||||
if (R_ASM)
|
||||
{
|
||||
if (R_MMX)
|
||||
{
|
||||
colfuncs[BASEDRAWFUNC] = R_DrawColumn_8_MMX;
|
||||
//colfuncs[COLDRAWFUNC_SHADE] = R_DrawShadeColumn_8_ASM;
|
||||
//colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_8_ASM;
|
||||
colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_8_MMX;
|
||||
spanfuncs[BASEDRAWFUNC] = R_DrawSpan_8_MMX;
|
||||
}
|
||||
else
|
||||
{
|
||||
colfuncs[BASEDRAWFUNC] = R_DrawColumn_8_ASM;
|
||||
//colfuncs[COLDRAWFUNC_SHADE] = R_DrawShadeColumn_8_ASM;
|
||||
//colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_8_ASM;
|
||||
colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_8_ASM;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* else if (vid.bpp > 1)
|
||||
{
|
||||
|
@ -271,8 +246,6 @@ void SCR_Startup(void)
|
|||
CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
|
||||
}
|
||||
|
||||
if (M_CheckParm("-noASM"))
|
||||
R_ASM = false;
|
||||
if (M_CheckParm("-486"))
|
||||
R_486 = true;
|
||||
if (M_CheckParm("-586"))
|
||||
|
|
|
@ -8,11 +8,6 @@ target_sources(SRB2SDL2 PRIVATE ogl_sdl.c)
|
|||
|
||||
target_sources(SRB2SDL2 PRIVATE i_threads.c)
|
||||
|
||||
if(${SRB2_USEASM})
|
||||
set_source_files_properties(${SRB2_ASM_SOURCES} PROPERTIES LANGUAGE C)
|
||||
set_source_files_properties(${SRB2_ASM_SOURCES} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_SYSTEM_NAME}" MATCHES Windows)
|
||||
target_sources(SRB2SDL2 PRIVATE
|
||||
../win32/win_dbg.c
|
||||
|
@ -68,18 +63,6 @@ if("${CMAKE_SYSTEM_NAME}" MATCHES Linux)
|
|||
target_link_libraries(SRB2SDL2 PRIVATE m rt)
|
||||
endif()
|
||||
|
||||
if(${SRB2_USEASM})
|
||||
if(${SRB2_CONFIG_YASM})
|
||||
set(ASM_ASSEMBLER_TEMP ${CMAKE_ASM_YASM_COMPILER})
|
||||
set(ASM_ASSEMBLER_OBJFORMAT ${CMAKE_ASM_YASM_OBJECT_FORMAT})
|
||||
set_source_files_properties(${SRB2_NASM_SOURCES} LANGUAGE ASM_YASM)
|
||||
else()
|
||||
set(ASM_ASSEMBLER_TEMP ${CMAKE_ASM_NASM_COMPILER})
|
||||
set(ASM_ASSEMBLER_OBJFORMAT ${CMAKE_ASM_NASM_OBJECT_FORMAT})
|
||||
set_source_files_properties(${SRB2_NASM_SOURCES} LANGUAGE ASM_NASM)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if("${CMAKE_SYSTEM_NAME}" MATCHES Windows)
|
||||
target_link_libraries(SRB2SDL2 PRIVATE
|
||||
ws2_32
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
|
||||
NOHW=1
|
||||
NOHS=1
|
||||
NOASM=1
|
||||
|
||||
OPTS+=-DLINUX
|
||||
|
||||
|
|
|
@ -70,39 +70,6 @@ char logfilename[1024];
|
|||
typedef BOOL (WINAPI *p_IsDebuggerPresent)(VOID);
|
||||
#endif
|
||||
|
||||
#if defined (_WIN32)
|
||||
static inline VOID MakeCodeWritable(VOID)
|
||||
{
|
||||
#ifdef USEASM // Disable write-protection of code segment
|
||||
DWORD OldRights;
|
||||
const DWORD NewRights = PAGE_EXECUTE_READWRITE;
|
||||
PBYTE pBaseOfImage = (PBYTE)GetModuleHandle(NULL);
|
||||
PIMAGE_DOS_HEADER dosH =(PIMAGE_DOS_HEADER)pBaseOfImage;
|
||||
PIMAGE_NT_HEADERS ntH = (PIMAGE_NT_HEADERS)(pBaseOfImage + dosH->e_lfanew);
|
||||
PIMAGE_OPTIONAL_HEADER oH = (PIMAGE_OPTIONAL_HEADER)
|
||||
((PBYTE)ntH + sizeof (IMAGE_NT_SIGNATURE) + sizeof (IMAGE_FILE_HEADER));
|
||||
LPVOID pA = pBaseOfImage+oH->BaseOfCode;
|
||||
SIZE_T pS = oH->SizeOfCode;
|
||||
#if 1 // try to find the text section
|
||||
PIMAGE_SECTION_HEADER ntS = IMAGE_FIRST_SECTION (ntH);
|
||||
WORD s;
|
||||
for (s = 0; s < ntH->FileHeader.NumberOfSections; s++)
|
||||
{
|
||||
if (memcmp (ntS[s].Name, ".text\0\0", 8) == 0)
|
||||
{
|
||||
pA = pBaseOfImage+ntS[s].VirtualAddress;
|
||||
pS = ntS[s].Misc.VirtualSize;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!VirtualProtect(pA,pS,NewRights,&OldRights))
|
||||
I_Error("Could not make code writable\n");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef LOGMESSAGES
|
||||
static void InitLogging(void)
|
||||
{
|
||||
|
@ -243,7 +210,6 @@ int main(int argc, char **argv)
|
|||
#ifndef __MINGW32__
|
||||
prevExceptionFilter = SetUnhandledExceptionFilter(RecordExceptionInfo);
|
||||
#endif
|
||||
MakeCodeWritable();
|
||||
#endif
|
||||
|
||||
// startup SRB2
|
||||
|
|
957
src/tmap.nas
957
src/tmap.nas
|
@ -1,957 +0,0 @@
|
|||
;; SONIC ROBO BLAST 2
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
;; Copyright (C) 1999-2023 by Sonic Team Junior.
|
||||
;;
|
||||
;; This program is free software distributed under the
|
||||
;; terms of the GNU General Public License, version 2.
|
||||
;; See the 'LICENSE' file for more details.
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; FILE:
|
||||
;; tmap.nas
|
||||
;; DESCRIPTION:
|
||||
;; Assembler optimised rendering code for software mode.
|
||||
;; Draw wall columns.
|
||||
|
||||
|
||||
[BITS 32]
|
||||
|
||||
%define FRACBITS 16
|
||||
%define TRANSPARENTPIXEL 255
|
||||
|
||||
%ifdef LINUX
|
||||
%macro cextern 1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%else
|
||||
%macro cextern 1
|
||||
%define %1 _%1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
%define %1 _%1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
; The viddef_s structure. We only need the width field.
|
||||
struc viddef_s
|
||||
resb 12
|
||||
.width: resb 4
|
||||
resb 44
|
||||
endstruc
|
||||
|
||||
;; externs
|
||||
;; columns
|
||||
cextern dc_x
|
||||
cextern dc_yl
|
||||
cextern dc_yh
|
||||
cextern ylookup
|
||||
cextern columnofs
|
||||
cextern dc_source
|
||||
cextern dc_texturemid
|
||||
cextern dc_texheight
|
||||
cextern dc_iscale
|
||||
cextern dc_hires
|
||||
cextern centery
|
||||
cextern centeryfrac
|
||||
cextern dc_colormap
|
||||
cextern dc_transmap
|
||||
cextern colormaps
|
||||
cextern vid
|
||||
cextern topleft
|
||||
|
||||
; DELME
|
||||
cextern R_DrawColumn_8
|
||||
|
||||
; polygon edge rasterizer
|
||||
cextern prastertab
|
||||
|
||||
[SECTION .data]
|
||||
|
||||
;;.align 4
|
||||
loopcount dd 0
|
||||
pixelcount dd 0
|
||||
tystep dd 0
|
||||
|
||||
[SECTION .text]
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_DrawColumn : 8bpp column drawer
|
||||
;;
|
||||
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
|
||||
;; Revised by G. Dick July 2010 to support the intervening twelve years'
|
||||
;; worth of changes to the renderer. Since I only vaguely know what I'm
|
||||
;; doing, this is probably rather suboptimal. Help appreciated!
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; fracstep, vid.width in memory
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = heightmask
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = frac
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_DrawColumn_8_ASM
|
||||
; align 16
|
||||
R_DrawColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov edi,[ylookup+ebp*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx,[dc_yh]
|
||||
add ecx,1
|
||||
sub ecx,ebp ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
;;
|
||||
;; fracstep = dc_iscale; // But we just use [dc_iscale]
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
mov eax,ebp ;; dc_yl
|
||||
shl eax,FRACBITS
|
||||
sub eax,[centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax,edx,FRACBITS
|
||||
add eax,[dc_texturemid]
|
||||
mov ebp,eax ;; ebp = frac
|
||||
|
||||
mov ebx,[dc_colormap]
|
||||
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires],0x01
|
||||
jz .texheightcheck
|
||||
xor ebp,ebp
|
||||
|
||||
;;
|
||||
;; Check for power of two
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx,[dc_texheight]
|
||||
sub edx,1 ;; edx = heightmask
|
||||
test edx,[dc_texheight]
|
||||
jnz .notpowertwo
|
||||
|
||||
test ecx,0x01 ;; Test for odd no. pixels
|
||||
jnz .odd
|
||||
|
||||
;;
|
||||
;; Texture height is a power of two, so we get modular arithmetic by
|
||||
;; masking
|
||||
;;
|
||||
.powertwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
.odd:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
|
||||
sub ecx,2 ;; count -= 2
|
||||
jg .powertwo
|
||||
|
||||
jmp .done
|
||||
|
||||
.notpowertwo:
|
||||
add edx,1
|
||||
shl edx,FRACBITS
|
||||
test ebp,ebp
|
||||
jns .notpowtwoloop
|
||||
|
||||
.makefracpos:
|
||||
add ebp,edx ;; frac is negative; make it positive
|
||||
js .makefracpos
|
||||
|
||||
.notpowtwoloop:
|
||||
cmp ebp,edx ;; Reduce mod height
|
||||
jl .writenonpowtwo
|
||||
sub ebp,edx
|
||||
jmp .notpowtwoloop
|
||||
|
||||
.writenonpowtwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part.
|
||||
mov bl,[esi + eax] ;; ebx = colormap + texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
movzx eax,byte [ebx] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
sub ecx,1
|
||||
jnz .notpowtwoloop
|
||||
|
||||
;;
|
||||
|
||||
.done:
|
||||
pop ebx ;; restore register variables
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp ;; restore caller's stack frame pointer
|
||||
ret
|
||||
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
|
||||
;; pixels.
|
||||
;;
|
||||
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
|
||||
;; Revised by G. Dick July 2010 to support the intervening twelve years'
|
||||
;; worth of changes to the renderer. Since I only vaguely know what I'm
|
||||
;; doing, this is probably rather suboptimal. Help appreciated!
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; fracstep, vid.width in memory
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = heightmask
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = frac
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_Draw2sMultiPatchColumn_8_ASM
|
||||
; align 16
|
||||
R_Draw2sMultiPatchColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov edi,[ylookup+ebp*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx,[dc_yh]
|
||||
add ecx,1
|
||||
sub ecx,ebp ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
;;
|
||||
;; fracstep = dc_iscale; // But we just use [dc_iscale]
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
mov eax,ebp ;; dc_yl
|
||||
shl eax,FRACBITS
|
||||
sub eax,[centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax,edx,FRACBITS
|
||||
add eax,[dc_texturemid]
|
||||
mov ebp,eax ;; ebp = frac
|
||||
|
||||
mov ebx,[dc_colormap]
|
||||
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires],0x01
|
||||
jz .texheightcheck
|
||||
xor ebp,ebp
|
||||
|
||||
;;
|
||||
;; Check for power of two
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx,[dc_texheight]
|
||||
sub edx,1 ;; edx = heightmask
|
||||
test edx,[dc_texheight]
|
||||
jnz .notpowertwo
|
||||
|
||||
test ecx,0x01 ;; Test for odd no. pixels
|
||||
jnz .odd
|
||||
|
||||
;;
|
||||
;; Texture height is a power of two, so we get modular arithmetic by
|
||||
;; masking
|
||||
;;
|
||||
.powertwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
|
||||
je .nextpowtwoeven ;; If so, advance.
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
.nextpowtwoeven:
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
.odd:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part
|
||||
and eax,edx ;; eax &= heightmask
|
||||
movzx eax,byte [esi + eax] ;; eax = texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
|
||||
je .nextpowtwoodd ;; If so, advance.
|
||||
movzx eax,byte [ebx+eax] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
.nextpowtwoodd:
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
|
||||
sub ecx,2 ;; count -= 2
|
||||
jg .powertwo
|
||||
|
||||
jmp .done
|
||||
|
||||
.notpowertwo:
|
||||
add edx,1
|
||||
shl edx,FRACBITS
|
||||
test ebp,ebp
|
||||
jns .notpowtwoloop
|
||||
|
||||
.makefracpos:
|
||||
add ebp,edx ;; frac is negative; make it positive
|
||||
js .makefracpos
|
||||
|
||||
.notpowtwoloop:
|
||||
cmp ebp,edx ;; Reduce mod height
|
||||
jl .writenonpowtwo
|
||||
sub ebp,edx
|
||||
jmp .notpowtwoloop
|
||||
|
||||
.writenonpowtwo:
|
||||
mov eax,ebp ;; eax = frac
|
||||
sar eax,FRACBITS ;; Integer part.
|
||||
mov bl,[esi + eax] ;; ebx = colormap + texel
|
||||
add ebp,[dc_iscale] ;; frac += fracstep
|
||||
cmp bl,TRANSPARENTPIXEL ;; Is pixel transparent?
|
||||
je .nextnonpowtwo ;; If so, advance.
|
||||
movzx eax,byte [ebx] ;; Map through colormap
|
||||
mov [edi],al ;; Write pixel
|
||||
.nextnonpowtwo:
|
||||
;; dest += vid.width
|
||||
add edi,[vid + viddef_s.width]
|
||||
|
||||
sub ecx,1
|
||||
jnz .notpowtwoloop
|
||||
|
||||
;;
|
||||
|
||||
.done:
|
||||
pop ebx ;; restore register variables
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp ;; restore caller's stack frame pointer
|
||||
ret
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;; R_DrawTranslucentColumnA_8
|
||||
;;
|
||||
;; Vertical column texture drawer, with transparency. Replaces Doom2's
|
||||
;; 'fuzz' effect, which was not so beautiful.
|
||||
;; Transparency is always impressive in some way, don't know why...
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_DrawTranslucentColumn_8_ASM
|
||||
R_DrawTranslucentColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov ebx,ebp
|
||||
mov edi,[ylookup+ebx*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov eax,[dc_yh]
|
||||
inc eax
|
||||
sub eax,ebp ;; pixel count
|
||||
mov [pixelcount],eax ;; save for final pixel
|
||||
jle near vtdone ;; nothing to scale
|
||||
;;
|
||||
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
|
||||
;;
|
||||
mov ecx,[dc_iscale] ;; fracstep
|
||||
mov eax,[centery]
|
||||
sub eax,ebp
|
||||
imul eax,ecx
|
||||
mov edx,[dc_texturemid]
|
||||
sub edx,eax
|
||||
mov ebx,edx
|
||||
|
||||
shr ebx,16 ;; frac int.
|
||||
and ebx,0x7f
|
||||
shl edx,16 ;; y frac up
|
||||
|
||||
mov ebp,ecx
|
||||
shl ebp,16 ;; fracstep f. up
|
||||
shr ecx,16 ;; fracstep i. ->cl
|
||||
and cl,0x7f
|
||||
push cx
|
||||
mov ecx,edx
|
||||
pop cx
|
||||
mov edx,[dc_colormap]
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; lets rock :) !
|
||||
;;
|
||||
mov eax,[pixelcount]
|
||||
shr eax,0x2
|
||||
test byte [pixelcount],0x3
|
||||
mov ch,al ;; quad count
|
||||
mov eax,[dc_transmap]
|
||||
je vt4quadloop
|
||||
;;
|
||||
;; do un-even pixel
|
||||
;;
|
||||
test byte [pixelcount],0x1
|
||||
je trf2
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add ecx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
pf: add edi,0x12345678
|
||||
;;
|
||||
;; do two non-quad-aligned pixels
|
||||
;;
|
||||
trf2: test byte [pixelcount],0x2
|
||||
je trf3
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add ecx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
pg: add edi,0x12345678
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add ecx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
ph: add edi,0x12345678
|
||||
;;
|
||||
;; test if there was at least 4 pixels
|
||||
;;
|
||||
trf3: test ch,0xff ;; test quad count
|
||||
je near vtdone
|
||||
|
||||
;;
|
||||
;; ebp : ystep frac. upper 24 bits
|
||||
;; edx : y frac. upper 24 bits
|
||||
;; ebx : y i. lower 7 bits, masked for index
|
||||
;; ecx : ch = counter, cl = y step i.
|
||||
;; eax : colormap aligned 256
|
||||
;; esi : source texture column
|
||||
;; edi : dest screen
|
||||
;;
|
||||
vt4quadloop:
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [tystep],ebp
|
||||
pi: add edi,0x12345678
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
pj: sub edi,0x12345678
|
||||
mov ebp,edi
|
||||
pk: sub edi,0x12345678
|
||||
jmp short inloop
|
||||
align 4
|
||||
vtquadloop:
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q1: add ebp,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
inloop:
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q2: add edi,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [ebp+0x0],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q3: add ebp,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
|
||||
add ecx,[tystep]
|
||||
adc bl,cl
|
||||
q4: add edi,0x23456789
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov dl,[edx]
|
||||
mov [ebp],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
dec ch
|
||||
jne vtquadloop
|
||||
vtdone:
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;; R_DrawShadeColumn
|
||||
;;
|
||||
;; for smoke..etc.. test.
|
||||
;;----------------------------------------------------------------------
|
||||
cglobal R_DrawShadeColumn_8_ASM
|
||||
R_DrawShadeColumn_8_ASM:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov ebp,[dc_yl]
|
||||
mov ebx,ebp
|
||||
mov edi,[ylookup+ebx*4]
|
||||
mov ebx,[dc_x]
|
||||
add edi,[columnofs+ebx*4] ;; edi = dest
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov eax,[dc_yh]
|
||||
inc eax
|
||||
sub eax,ebp ;; pixel count
|
||||
mov [pixelcount],eax ;; save for final pixel
|
||||
jle near shdone ;; nothing to scale
|
||||
;;
|
||||
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
|
||||
;;
|
||||
mov ecx,[dc_iscale] ;; fracstep
|
||||
mov eax,[centery]
|
||||
sub eax,ebp
|
||||
imul eax,ecx
|
||||
mov edx,[dc_texturemid]
|
||||
sub edx,eax
|
||||
mov ebx,edx
|
||||
shr ebx,16 ;; frac int.
|
||||
and ebx,byte +0x7f
|
||||
shl edx,16 ;; y frac up
|
||||
|
||||
mov ebp,ecx
|
||||
shl ebp,16 ;; fracstep f. up
|
||||
shr ecx,16 ;; fracstep i. ->cl
|
||||
and cl,0x7f
|
||||
|
||||
mov esi,[dc_source]
|
||||
;;
|
||||
;; lets rock :) !
|
||||
;;
|
||||
mov eax,[pixelcount]
|
||||
mov dh,al
|
||||
shr eax,2
|
||||
mov ch,al ;; quad count
|
||||
mov eax,[colormaps]
|
||||
test dh,3
|
||||
je sh4quadloop
|
||||
;;
|
||||
;; do un-even pixel
|
||||
;;
|
||||
test dh,0x1
|
||||
je shf2
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add edx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov [edi],dl
|
||||
pl: add edi,0x12345678
|
||||
;;
|
||||
;; do two non-quad-aligned pixels
|
||||
;;
|
||||
shf2:
|
||||
test dh,0x2
|
||||
je shf3
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add edx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov [edi],dl
|
||||
pm: add edi,0x12345678
|
||||
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
add edx,ebp
|
||||
adc bl,cl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
and bl,0x7f
|
||||
mov dl,[eax]
|
||||
mov [edi],dl
|
||||
pn: add edi,0x12345678
|
||||
;;
|
||||
;; test if there was at least 4 pixels
|
||||
;;
|
||||
shf3:
|
||||
test ch,0xff ;; test quad count
|
||||
je near shdone
|
||||
|
||||
;;
|
||||
;; ebp : ystep frac. upper 24 bits
|
||||
;; edx : y frac. upper 24 bits
|
||||
;; ebx : y i. lower 7 bits, masked for index
|
||||
;; ecx : ch = counter, cl = y step i.
|
||||
;; eax : colormap aligned 256
|
||||
;; esi : source texture column
|
||||
;; edi : dest screen
|
||||
;;
|
||||
sh4quadloop:
|
||||
mov dh,0x7f ;; prep mask
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [tystep],ebp
|
||||
po: add edi,0x12345678
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
pp: sub edi,0x12345678
|
||||
mov ebp,edi
|
||||
pq: sub edi,0x12345678
|
||||
jmp short shinloop
|
||||
|
||||
align 4
|
||||
shquadloop:
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q5: add ebp,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
shinloop:
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q6: add edi,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [ebp],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q7: add ebp,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [edi],dl
|
||||
mov al,[ebp] ;; fetch dest : index into colormap
|
||||
|
||||
add edx,[tystep]
|
||||
adc bl,cl
|
||||
and bl,dh
|
||||
q8: add edi,0x12345678
|
||||
mov dl,[eax]
|
||||
mov ah,[esi+ebx] ;; fetch texel : colormap number
|
||||
mov [ebp],dl
|
||||
mov al,[edi] ;; fetch dest : index into colormap
|
||||
|
||||
dec ch
|
||||
jne shquadloop
|
||||
|
||||
shdone:
|
||||
pop ebx ;; restore register variables
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp ;; restore caller's stack frame pointer
|
||||
ret
|
||||
|
||||
|
||||
;; ========================================================================
|
||||
;; Rasterization of the segments of a LINEAR polygne textur of manire.
|
||||
;; It is thus a question of interpolating coordinate them at the edges of texture in
|
||||
;; the time that the X-coordinates minx/maxx for each line.
|
||||
;; the argument ' dir' indicates which edges of texture are Interpol?:
|
||||
;; 0: segments associs at edge TOP? and BOTTOM? (constant TY)
|
||||
;; 1: segments associs at the LEFT and RIGHT edge (constant TX)
|
||||
;; ========================================================================
|
||||
;;
|
||||
;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
|
||||
;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
|
||||
;;
|
||||
;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
|
||||
;;
|
||||
;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
|
||||
;;
|
||||
;;
|
||||
;; Uses: extern struct rastery *_rastertab;
|
||||
;;
|
||||
|
||||
MINX EQU 0
|
||||
MAXX EQU 4
|
||||
TX1 EQU 8
|
||||
TY1 EQU 12
|
||||
TX2 EQU 16
|
||||
TY2 EQU 20
|
||||
RASTERY_SIZEOF EQU 24
|
||||
|
||||
cglobal rasterize_segment_tex_asm
|
||||
rasterize_segment_tex_asm:
|
||||
push ebp
|
||||
mov ebp,esp
|
||||
|
||||
sub esp,byte +0x8 ;; allocate the local variables
|
||||
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
o16 mov ax,es
|
||||
push eax
|
||||
|
||||
;; #define DX [ebp-4]
|
||||
;; #define TD [ebp-8]
|
||||
|
||||
mov eax,[ebp+0xc] ;; y1
|
||||
mov ebx,[ebp+0x14] ;; y2
|
||||
cmp ebx,eax
|
||||
je near .L_finished ;; special (y1==y2) segment horizontal, exit!
|
||||
|
||||
jg near .L_rasterize_right
|
||||
|
||||
;;rasterize_left: ;; one rasterize a segment LEFT of the polygne
|
||||
|
||||
mov ecx,eax
|
||||
sub ecx,ebx
|
||||
inc ecx ;; y1-y2+1
|
||||
|
||||
mov eax,RASTERY_SIZEOF
|
||||
mul ebx ;; * y2
|
||||
mov esi,[prastertab]
|
||||
add esi,eax ;; point into rastertab[y2]
|
||||
|
||||
mov eax,[ebp+0x8] ;; ARG1
|
||||
sub eax,[ebp+0x10] ;; ARG3
|
||||
shl eax,0x10 ;; ((x1-x2)<<PRE) ...
|
||||
cdq
|
||||
idiv ecx ;; dx = ... / (y1-y2+1)
|
||||
mov [ebp-0x4],eax ;; DX
|
||||
|
||||
mov eax,[ebp+0x18] ;; ARG5
|
||||
sub eax,[ebp+0x1c] ;; ARG6
|
||||
shl eax,0x10
|
||||
cdq
|
||||
idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
|
||||
mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
|
||||
|
||||
mov eax,[ebp+0x10] ;; ARG3
|
||||
shl eax,0x10 ;; x = x2<<PRE
|
||||
|
||||
mov ebx,[ebp+0x1c] ;; ARG6
|
||||
shl ebx,0x10 ;; tx = tx2<<PRE d0
|
||||
;; ty = ty2<<PRE d1
|
||||
mov edx,[ebp+0x20] ;; ARG7
|
||||
shl edx,0x10 ;; ty = ty<<PRE d0
|
||||
;; tx = tx<<PRE d1
|
||||
push ebp
|
||||
mov edi,[ebp-0x4] ;; DX
|
||||
cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ?
|
||||
|
||||
mov ebp,[ebp-0x8] ;; TD
|
||||
je .L_rleft_h_loop
|
||||
;;
|
||||
;; TY varies, TX is constant
|
||||
;;
|
||||
.L_rleft_v_loop:
|
||||
mov [esi+MINX],eax ;; rastertab[y].minx = x
|
||||
add ebx,ebp
|
||||
mov [esi+TX1],edx ;; .tx1 = tx
|
||||
add eax,edi
|
||||
mov [esi+TY1],ebx ;; .ty1 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // ty += tdy
|
||||
|
||||
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
|
||||
dec ecx
|
||||
jne .L_rleft_v_loop
|
||||
pop ebp
|
||||
jmp .L_finished
|
||||
;;
|
||||
;; TX varies, TY is constant
|
||||
;;
|
||||
.L_rleft_h_loop:
|
||||
mov [esi+MINX],eax ;; rastertab[y].minx = x
|
||||
add eax,edi
|
||||
mov [esi+TX1],ebx ;; .tx1 = tx
|
||||
add ebx,ebp
|
||||
mov [esi+TY1],edx ;; .ty1 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // tx += tdx
|
||||
|
||||
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
|
||||
dec ecx
|
||||
jne .L_rleft_h_loop
|
||||
pop ebp
|
||||
jmp .L_finished
|
||||
;;
|
||||
;; one rasterize a segment LINE of the polygne
|
||||
;;
|
||||
.L_rasterize_right:
|
||||
mov ecx,ebx
|
||||
sub ecx,eax
|
||||
inc ecx ;; y2-y1+1
|
||||
|
||||
mov ebx,RASTERY_SIZEOF
|
||||
mul ebx ;; * y1
|
||||
mov esi,[prastertab]
|
||||
add esi,eax ;; point into rastertab[y1]
|
||||
|
||||
mov eax,[ebp+0x10] ;; ARG3
|
||||
sub eax,[ebp+0x8] ;; ARG1
|
||||
shl eax,0x10 ;; ((x2-x1)<<PRE) ...
|
||||
cdq
|
||||
idiv ecx ;; dx = ... / (y2-y1+1)
|
||||
mov [ebp-0x4],eax ;; DX
|
||||
|
||||
mov eax,[ebp+0x1c] ;; ARG6
|
||||
sub eax,[ebp+0x18] ;; ARG5
|
||||
shl eax,0x10
|
||||
cdq
|
||||
idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
|
||||
mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
|
||||
|
||||
mov eax,[ebp+0x8] ;; ARG1
|
||||
shl eax,0x10 ;; x = x1<<PRE
|
||||
|
||||
mov ebx,[ebp+0x18] ;; ARG5
|
||||
shl ebx,0x10 ;; tx = tx1<<PRE d0
|
||||
;; ty = ty1<<PRE d1
|
||||
mov edx,[ebp+0x20] ;; ARG7
|
||||
shl edx,0x10 ;; ty = ty<<PRE d0
|
||||
;; tx = tx<<PRE d1
|
||||
push ebp
|
||||
mov edi,[ebp-0x4] ;; DX
|
||||
|
||||
cmp dword [ebp+0x24], 0 ;; direction ?
|
||||
|
||||
mov ebp,[ebp-0x8] ;; TD
|
||||
je .L_rright_h_loop
|
||||
;;
|
||||
;; TY varies, TX is constant
|
||||
;;
|
||||
.L_rright_v_loop:
|
||||
|
||||
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
|
||||
add ebx,ebp
|
||||
mov [esi+TX2],edx ;; .tx2 = tx
|
||||
add eax,edi
|
||||
mov [esi+TY2],ebx ;; .ty2 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // ty += tdy
|
||||
|
||||
add esi,RASTERY_SIZEOF
|
||||
dec ecx
|
||||
jne .L_rright_v_loop
|
||||
|
||||
pop ebp
|
||||
|
||||
jmp short .L_finished
|
||||
;;
|
||||
;; TX varies, TY is constant
|
||||
;;
|
||||
.L_rright_h_loop:
|
||||
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
|
||||
add eax,edi
|
||||
mov [esi+TX2],ebx ;; .tx2 = tx
|
||||
add ebx,ebp
|
||||
mov [esi+TY2],edx ;; .ty2 = ty
|
||||
|
||||
;;addl DX, %eax // x += dx
|
||||
;;addl TD, %ebx // tx += tdx
|
||||
|
||||
add esi,RASTERY_SIZEOF
|
||||
dec ecx
|
||||
jne .L_rright_h_loop
|
||||
|
||||
pop ebp
|
||||
|
||||
.L_finished:
|
||||
pop eax
|
||||
o16 mov es,ax
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebx
|
||||
|
||||
mov esp,ebp
|
||||
pop ebp
|
||||
ret
|
1587
src/tmap.s
1587
src/tmap.s
File diff suppressed because it is too large
Load diff
322
src/tmap_asm.s
322
src/tmap_asm.s
|
@ -1,322 +0,0 @@
|
|||
// SONIC ROBO BLAST 2
|
||||
//-----------------------------------------------------------------------------
|
||||
// Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
// Copyright (C) 1999-2023 by Sonic Team Junior.
|
||||
//
|
||||
// This program is free software distributed under the
|
||||
// terms of the GNU General Public License, version 2.
|
||||
// See the 'LICENSE' file for more details.
|
||||
//-----------------------------------------------------------------------------
|
||||
/// \file tmap_asm.s
|
||||
/// \brief ???
|
||||
|
||||
//.comm _dc_colormap,4
|
||||
//.comm _dc_x,4
|
||||
//.comm _dc_yl,4
|
||||
//.comm _dc_yh,4
|
||||
//.comm _dc_iscale,4
|
||||
//.comm _dc_texturemid,4
|
||||
//.comm _dc_source,4
|
||||
//.comm _ylookup,4
|
||||
//.comm _columnofs,4
|
||||
//.comm _loopcount,4
|
||||
//.comm _pixelcount,4
|
||||
.data
|
||||
_pixelcount:
|
||||
.long 0x00000000
|
||||
_loopcount:
|
||||
.long 0x00000000
|
||||
.align 8
|
||||
_mmxcomm:
|
||||
.long 0x00000000
|
||||
.text
|
||||
|
||||
.align 4
|
||||
.globl _R_DrawColumn8_NOMMX
|
||||
_R_DrawColumn8_NOMMX:
|
||||
pushl %ebp
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
pushl %ebx
|
||||
movl _dc_yl,%edx
|
||||
movl _dc_yh,%eax
|
||||
subl %edx,%eax
|
||||
leal 1(%eax),%ebx
|
||||
testl %ebx,%ebx
|
||||
jle rdc8ndone
|
||||
movl _dc_x,%eax
|
||||
movl _ylookup, %edi
|
||||
movl (%edi,%edx,4),%esi
|
||||
movl _columnofs, %edi
|
||||
addl (%edi,%eax,4),%esi
|
||||
movl _dc_iscale,%edi
|
||||
movl %edx,%eax
|
||||
imull %edi,%eax
|
||||
movl _dc_texturemid,%ecx
|
||||
addl %eax,%ecx
|
||||
|
||||
movl _dc_source,%ebp
|
||||
xorl %edx, %edx
|
||||
subl $0x12345678, %esi
|
||||
.globl rdc8nwidth1
|
||||
rdc8nwidth1:
|
||||
.align 4,0x90
|
||||
rdc8nloop:
|
||||
movl %ecx,%eax
|
||||
shrl $16,%eax
|
||||
addl %edi,%ecx
|
||||
andl $127,%eax
|
||||
addl $0x12345678,%esi
|
||||
.globl rdc8nwidth2
|
||||
rdc8nwidth2:
|
||||
movb (%eax,%ebp),%dl
|
||||
movl _dc_colormap,%eax
|
||||
movb (%eax,%edx),%al
|
||||
movb %al,(%esi)
|
||||
decl %ebx
|
||||
jne rdc8nloop
|
||||
rdc8ndone:
|
||||
popl %ebx
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebp
|
||||
ret
|
||||
|
||||
//
|
||||
// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
|
||||
// By ES 1998/08/01
|
||||
//
|
||||
|
||||
.globl _R_DrawColumn_8_Pentium
|
||||
_R_DrawColumn_8_Pentium:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
movl _dc_yl,%eax // Top pixel
|
||||
movl _dc_yh,%ebx // Bottom pixel
|
||||
movl _ylookup, %edi
|
||||
movl (%edi,%ebx,4),%ecx
|
||||
subl %eax,%ebx // ebx=number of pixels-1
|
||||
jl rdc8pdone // no pixel to draw, done
|
||||
jnz rdc8pmany
|
||||
movl _dc_x,%edx // Special case: only one pixel
|
||||
movl _columnofs, %edi
|
||||
addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
|
||||
movl _dc_iscale,%esi
|
||||
imull %esi,%eax
|
||||
movl _dc_texturemid,%edi
|
||||
addl %eax,%edi // texture index in edi
|
||||
movl _dc_colormap,%edx
|
||||
shrl $16, %edi
|
||||
movl _dc_source,%ebp
|
||||
andl $127,%edi
|
||||
movb (%edi,%ebp),%dl // read texture pixel
|
||||
movb (%edx),%al // lookup for light
|
||||
movb %al,0(%ecx) // write it
|
||||
jmp rdc8pdone // done!
|
||||
.align 4, 0x90
|
||||
rdc8pmany: // draw >1 pixel
|
||||
movl _dc_x,%edx
|
||||
movl _columnofs, %edi
|
||||
movl (%edi,%edx,4),%edx
|
||||
leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom
|
||||
.globl rdc8pwidth5
|
||||
rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH
|
||||
movl _dc_iscale,%edx // edx = fracstep
|
||||
imull %edx,%eax
|
||||
shll $9, %edx // fixme: Should get 7.25 fix as input
|
||||
movl _dc_texturemid,%ecx
|
||||
addl %eax,%ecx // ecx = frac
|
||||
movl _dc_colormap,%eax // eax = lighting/special effects LUT
|
||||
shll $9, %ecx
|
||||
movl _dc_source,%esi // esi = source ptr
|
||||
|
||||
imull $0x12345678, %ebx // ebx = negative offset to pixel
|
||||
.globl rdc8pwidth6
|
||||
rdc8pwidth6: // DeadBeef = -SCREENWIDTH
|
||||
|
||||
// Begin the calculation of the two first pixels
|
||||
leal (%ecx, %edx), %ebp
|
||||
shrl $25, %ecx
|
||||
movb (%esi, %ecx), %al
|
||||
leal (%edx, %ebp), %ecx
|
||||
shrl $25, %ebp
|
||||
movb (%eax), %dl
|
||||
|
||||
// The main loop
|
||||
rdc8ploop:
|
||||
movb (%esi,%ebp), %al // load 1
|
||||
leal (%ecx, %edx), %ebp // calc frac 3
|
||||
|
||||
shrl $25, %ecx // shift frac 2
|
||||
movb %dl, 0x12345678(%edi, %ebx)// store 0
|
||||
.globl rdc8pwidth1
|
||||
rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH
|
||||
|
||||
movb (%eax), %al // lookup 1
|
||||
|
||||
movb %al, 0x12345678(%edi, %ebx)// store 1
|
||||
.globl rdc8pwidth2
|
||||
rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH
|
||||
movb (%esi, %ecx), %al // load 2
|
||||
|
||||
leal (%ebp, %edx), %ecx // calc frac 4
|
||||
|
||||
shrl $25, %ebp // shift frac 3
|
||||
movb (%eax), %dl // lookup 2
|
||||
|
||||
addl $0x12345678, %ebx // counter
|
||||
.globl rdc8pwidth3
|
||||
rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH
|
||||
jl rdc8ploop // loop
|
||||
|
||||
// End of loop. Write extra pixel or just exit.
|
||||
jnz rdc8pdone
|
||||
movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
|
||||
.globl rdc8pwidth4
|
||||
rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH
|
||||
|
||||
rdc8pdone:
|
||||
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
|
||||
//
|
||||
// MMX asm version, optimised for K6
|
||||
// By ES 1998/07/05
|
||||
//
|
||||
|
||||
.globl _R_DrawColumn_8_K6_MMX
|
||||
_R_DrawColumn_8_K6_MMX:
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
|
||||
movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
|
||||
andl $7,%eax
|
||||
addl $8,%eax
|
||||
movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
|
||||
subl %eax,%esp
|
||||
|
||||
movl _dc_yl,%edx // Top pixel
|
||||
movl _dc_yh,%ebx // Bottom pixel
|
||||
movl _ylookup, %edi
|
||||
movl (%edi,%ebx,4),%ecx
|
||||
subl %edx,%ebx // ebx=number of pixels-1
|
||||
jl 0x12345678 // no pixel to draw, done
|
||||
.globl rdc8moffs1
|
||||
rdc8moffs1:
|
||||
jnz rdc8mmany
|
||||
movl _dc_x,%eax // Special case: only one pixel
|
||||
movl _columnofs, %edi
|
||||
addl (%edi,%eax,4),%ecx // dest pixel at (%ecx)
|
||||
movl _dc_iscale,%esi
|
||||
imull %esi,%edx
|
||||
movl _dc_texturemid,%edi
|
||||
addl %edx,%edi // texture index in edi
|
||||
movl _dc_colormap,%edx
|
||||
shrl $16, %edi
|
||||
movl _dc_source,%ebp
|
||||
andl $127,%edi
|
||||
movb (%edi,%ebp),%dl // read texture pixel
|
||||
movb (%edx),%al // lookup for light
|
||||
movb %al,0(%ecx) // write it
|
||||
jmp rdc8mdone // done!
|
||||
.globl rdc8moffs2
|
||||
rdc8moffs2:
|
||||
.align 4, 0x90
|
||||
rdc8mmany: // draw >1 pixel
|
||||
movl _dc_x,%eax
|
||||
movl _columnofs, %edi
|
||||
movl (%edi,%eax,4),%eax
|
||||
leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom
|
||||
.globl rdc8mwidth3
|
||||
rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH
|
||||
movl _dc_iscale,%ecx // ecx = fracstep
|
||||
imull %ecx,%edx
|
||||
shll $9, %ecx // fixme: Should get 7.25 fix as input
|
||||
movl _dc_texturemid,%eax
|
||||
addl %edx,%eax // eax = frac
|
||||
movl _dc_colormap,%edx // edx = lighting/special effects LUT
|
||||
shll $9, %eax
|
||||
leal (%ecx, %ecx), %edi
|
||||
movl _dc_source,%ebp // ebp = source ptr
|
||||
movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs
|
||||
|
||||
imull $0x12345678, %ebx // ebx = negative offset to pixel
|
||||
.globl rdc8mwidth5
|
||||
rdc8mwidth5: // DeadBeef = -SCREENWIDTH
|
||||
|
||||
movl %edi, 4(%esp)
|
||||
leal (%eax, %ecx), %edi
|
||||
movq 0(%esp), %mm1 // fracstep:fracstep in mm1
|
||||
movl %eax, 0(%esp)
|
||||
shrl $25, %eax
|
||||
movl %edi, 4(%esp)
|
||||
movzbl (%ebp, %eax), %eax
|
||||
movq 0(%esp), %mm0 // frac:frac in mm0
|
||||
|
||||
paddd %mm1, %mm0
|
||||
shrl $25, %edi
|
||||
movq %mm0, %mm2
|
||||
psrld $25, %mm2 // texture index in mm2
|
||||
paddd %mm1, %mm0
|
||||
movq %mm2, 0(%esp)
|
||||
|
||||
.globl rdc8mloop
|
||||
rdc8mloop: // The main loop
|
||||
movq %mm0, %mm2 // move 4-5 to temp reg
|
||||
movzbl (%ebp, %edi), %edi // read 1
|
||||
|
||||
psrld $25, %mm2 // shift 4-5
|
||||
movb (%edx,%eax), %cl // lookup 0
|
||||
|
||||
movl 0(%esp), %eax // load 2
|
||||
addl $0x12345678, %ebx // counter
|
||||
.globl rdc8mwidth2
|
||||
rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH
|
||||
|
||||
movb %cl, (%esi, %ebx) // write 0
|
||||
movb (%edx,%edi), %ch // lookup 1
|
||||
|
||||
movb %ch, 0x12345678(%esi, %ebx) // write 1
|
||||
.globl rdc8mwidth1
|
||||
rdc8mwidth1: // DeadBeef = SCREENWIDTH
|
||||
movl 4(%esp), %edi // load 3
|
||||
|
||||
paddd %mm1, %mm0 // frac 6-7
|
||||
movzbl (%ebp, %eax), %eax // lookup 2
|
||||
|
||||
movq %mm2, 0(%esp) // store texture index 4-5
|
||||
jl rdc8mloop
|
||||
|
||||
jnz rdc8mno_odd
|
||||
movb (%edx,%eax), %cl // write the last odd pixel
|
||||
movb %cl, 0x12345678(%esi)
|
||||
.globl rdc8mwidth4
|
||||
rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH
|
||||
rdc8mno_odd:
|
||||
|
||||
.globl rdc8mdone
|
||||
rdc8mdone:
|
||||
emms
|
||||
|
||||
addl _mmxcomm, %esp
|
||||
popl %edi
|
||||
popl %esi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
ret
|
||||
|
||||
// Need some extra space to align run-time
|
||||
.globl R_DrawColumn_8_K6_MMX_end
|
||||
R_DrawColumn_8_K6_MMX_end:
|
||||
nop;nop;nop;nop;nop;nop;nop;nop;
|
||||
nop;nop;nop;nop;nop;nop;nop;nop;
|
||||
nop;nop;nop;nop;nop;nop;nop;nop;
|
||||
nop;nop;nop;nop;nop;nop;nop;
|
674
src/tmap_mmx.nas
674
src/tmap_mmx.nas
|
@ -1,674 +0,0 @@
|
|||
;; SONIC ROBO BLAST 2
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; Copyright (C) 1998-2000 by DOSDOOM.
|
||||
;; Copyright (C) 2010-2023 by Sonic Team Junior.
|
||||
;;
|
||||
;; This program is free software distributed under the
|
||||
;; terms of the GNU General Public License, version 2.
|
||||
;; See the 'LICENSE' file for more details.
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; FILE:
|
||||
;; tmap_mmx.nas
|
||||
;; DESCRIPTION:
|
||||
;; Assembler optimised rendering code for software mode, using SIMD
|
||||
;; instructions.
|
||||
;; Draw wall columns.
|
||||
|
||||
|
||||
[BITS 32]
|
||||
|
||||
%define FRACBITS 16
|
||||
%define TRANSPARENTPIXEL 255
|
||||
|
||||
%ifdef LINUX
|
||||
%macro cextern 1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%else
|
||||
%macro cextern 1
|
||||
%define %1 _%1
|
||||
[extern %1]
|
||||
%endmacro
|
||||
|
||||
%macro cglobal 1
|
||||
%define %1 _%1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
%endif
|
||||
|
||||
|
||||
; The viddef_s structure. We only need the width field.
|
||||
struc viddef_s
|
||||
resb 12
|
||||
.width: resb 4
|
||||
resb 44
|
||||
endstruc
|
||||
|
||||
|
||||
;; externs
|
||||
;; columns
|
||||
cextern dc_colormap
|
||||
cextern dc_x
|
||||
cextern dc_yl
|
||||
cextern dc_yh
|
||||
cextern dc_iscale
|
||||
cextern dc_texturemid
|
||||
cextern dc_texheight
|
||||
cextern dc_source
|
||||
cextern dc_hires
|
||||
cextern centery
|
||||
cextern centeryfrac
|
||||
cextern dc_transmap
|
||||
|
||||
cextern R_DrawColumn_8_ASM
|
||||
cextern R_Draw2sMultiPatchColumn_8_ASM
|
||||
|
||||
;; spans
|
||||
cextern nflatshiftup
|
||||
cextern nflatxshift
|
||||
cextern nflatyshift
|
||||
cextern nflatmask
|
||||
cextern ds_xfrac
|
||||
cextern ds_yfrac
|
||||
cextern ds_xstep
|
||||
cextern ds_ystep
|
||||
cextern ds_x1
|
||||
cextern ds_x2
|
||||
cextern ds_y
|
||||
cextern ds_source
|
||||
cextern ds_colormap
|
||||
|
||||
cextern ylookup
|
||||
cextern columnofs
|
||||
cextern vid
|
||||
|
||||
[SECTION .data]
|
||||
|
||||
nflatmask64 dq 0
|
||||
|
||||
|
||||
[SECTION .text]
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_DrawColumn : 8bpp column drawer
|
||||
;;
|
||||
;; MMX column drawer.
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = accumulator
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = vid.width
|
||||
;; mm0 = accumulator
|
||||
;; mm1 = heightmask, twice
|
||||
;; mm2 = 2 * fracstep, twice
|
||||
;; mm3 = pair of consecutive fracs
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
|
||||
cglobal R_DrawColumn_8_MMX
|
||||
R_DrawColumn_8_MMX:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; Our algorithm requires that the texture height be a power of two.
|
||||
;; If not, fall back to the non-MMX drawer.
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx, [dc_texheight]
|
||||
sub edx, 1 ;; edx = heightmask
|
||||
test edx, [dc_texheight]
|
||||
jnz near .usenonMMX
|
||||
|
||||
mov ebp, edx ;; Keep a copy of heightmask in a
|
||||
;; GPR for the time being.
|
||||
|
||||
;;
|
||||
;; Fill mm1 with heightmask
|
||||
;;
|
||||
movd mm1, edx ;; low dword = heightmask
|
||||
punpckldq mm1, mm1 ;; copy low dword to high dword
|
||||
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov eax, [dc_yl]
|
||||
mov edi, [ylookup+eax*4]
|
||||
mov ebx, [dc_x]
|
||||
add edi, [columnofs+ebx*4] ;; edi = dest
|
||||
|
||||
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx, [dc_yh]
|
||||
add ecx, 1
|
||||
sub ecx, eax ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
|
||||
;;
|
||||
;; fracstep = dc_iscale;
|
||||
;;
|
||||
movd mm2, [dc_iscale] ;; fracstep in low dword
|
||||
punpckldq mm2, mm2 ;; copy to high dword
|
||||
|
||||
mov ebx, [dc_colormap]
|
||||
mov esi, [dc_source]
|
||||
|
||||
;;
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
;; eax == dc_yl already
|
||||
shl eax, FRACBITS
|
||||
sub eax, [centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax, edx, FRACBITS
|
||||
add eax, [dc_texturemid]
|
||||
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires], 0x01
|
||||
jz .mod2
|
||||
xor eax, eax
|
||||
|
||||
|
||||
;;
|
||||
;; Do mod-2 pixel.
|
||||
;;
|
||||
.mod2:
|
||||
test ecx, 1
|
||||
jz .pairprepare
|
||||
mov edx, eax ;; edx = frac
|
||||
add eax, [dc_iscale] ;; eax += fracstep
|
||||
sar edx, FRACBITS
|
||||
and edx, ebp ;; edx &= heightmask
|
||||
movzx edx, byte [esi + edx]
|
||||
movzx edx, byte [ebx + edx]
|
||||
mov [edi], dl
|
||||
|
||||
add edi, [vid + viddef_s.width]
|
||||
sub ecx, 1
|
||||
jz .done
|
||||
|
||||
.pairprepare:
|
||||
;;
|
||||
;; Prepare for the main loop.
|
||||
;;
|
||||
movd mm3, eax ;; Low dword = frac
|
||||
movq mm4, mm3 ;; Copy to intermediate register
|
||||
paddd mm4, mm2 ;; dwords of mm4 += fracstep
|
||||
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
|
||||
pslld mm2, 1 ;; fracstep *= 2
|
||||
|
||||
;;
|
||||
;; ebp = vid.width
|
||||
;;
|
||||
mov ebp, [vid + viddef_s.width]
|
||||
|
||||
align 16
|
||||
.pairloop:
|
||||
movq mm0, mm3 ;; 3B 1u.
|
||||
psrad mm0, FRACBITS ;; 4B 1u.
|
||||
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
|
||||
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
|
||||
|
||||
movd eax, mm0 ;; 3B 1u. Get first frac
|
||||
;; IFETCH boundary
|
||||
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
|
||||
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
|
||||
|
||||
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
|
||||
movd edx, mm0 ;; 3B 1u. Get second frac
|
||||
mov [edi], al ;; 2B 1(2)u. First pixel
|
||||
;; IFETCH boundary
|
||||
|
||||
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
|
||||
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
|
||||
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
|
||||
|
||||
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
|
||||
;; IFETCH boundary
|
||||
sub ecx, 2 ;; 3B 1u. count -= 2
|
||||
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
|
||||
|
||||
|
||||
.done:
|
||||
;;
|
||||
;; Clear MMX state, or else FPU operations will go badly awry.
|
||||
;;
|
||||
emms
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
.usenonMMX:
|
||||
call R_DrawColumn_8_ASM
|
||||
jmp .done
|
||||
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
|
||||
;; pixels.
|
||||
;;
|
||||
;; MMX column drawer.
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = accumulator
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = vid.width
|
||||
;; mm0 = accumulator
|
||||
;; mm1 = heightmask, twice
|
||||
;; mm2 = 2 * fracstep, twice
|
||||
;; mm3 = pair of consecutive fracs
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
|
||||
cglobal R_Draw2sMultiPatchColumn_8_MMX
|
||||
R_Draw2sMultiPatchColumn_8_MMX:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; Our algorithm requires that the texture height be a power of two.
|
||||
;; If not, fall back to the non-MMX drawer.
|
||||
;;
|
||||
.texheightcheck:
|
||||
mov edx, [dc_texheight]
|
||||
sub edx, 1 ;; edx = heightmask
|
||||
test edx, [dc_texheight]
|
||||
jnz near .usenonMMX
|
||||
|
||||
mov ebp, edx ;; Keep a copy of heightmask in a
|
||||
;; GPR for the time being.
|
||||
|
||||
;;
|
||||
;; Fill mm1 with heightmask
|
||||
;;
|
||||
movd mm1, edx ;; low dword = heightmask
|
||||
punpckldq mm1, mm1 ;; copy low dword to high dword
|
||||
|
||||
;;
|
||||
;; dest = ylookup[dc_yl] + columnofs[dc_x];
|
||||
;;
|
||||
mov eax, [dc_yl]
|
||||
mov edi, [ylookup+eax*4]
|
||||
mov ebx, [dc_x]
|
||||
add edi, [columnofs+ebx*4] ;; edi = dest
|
||||
|
||||
|
||||
;;
|
||||
;; pixelcount = yh - yl + 1
|
||||
;;
|
||||
mov ecx, [dc_yh]
|
||||
add ecx, 1
|
||||
sub ecx, eax ;; pixel count
|
||||
jle near .done ;; nothing to scale
|
||||
;;
|
||||
;; fracstep = dc_iscale;
|
||||
;;
|
||||
movd mm2, [dc_iscale] ;; fracstep in low dword
|
||||
punpckldq mm2, mm2 ;; copy to high dword
|
||||
|
||||
mov ebx, [dc_colormap]
|
||||
mov esi, [dc_source]
|
||||
|
||||
;;
|
||||
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
|
||||
;;
|
||||
;; eax == dc_yl already
|
||||
shl eax, FRACBITS
|
||||
sub eax, [centeryfrac]
|
||||
imul dword [dc_iscale]
|
||||
shrd eax, edx, FRACBITS
|
||||
add eax, [dc_texturemid]
|
||||
|
||||
;;
|
||||
;; if (dc_hires) frac = 0;
|
||||
;;
|
||||
test byte [dc_hires], 0x01
|
||||
jz .mod2
|
||||
xor eax, eax
|
||||
|
||||
|
||||
;;
|
||||
;; Do mod-2 pixel.
|
||||
;;
|
||||
.mod2:
|
||||
test ecx, 1
|
||||
jz .pairprepare
|
||||
mov edx, eax ;; edx = frac
|
||||
add eax, [dc_iscale] ;; eax += fracstep
|
||||
sar edx, FRACBITS
|
||||
and edx, ebp ;; edx &= heightmask
|
||||
movzx edx, byte [esi + edx]
|
||||
cmp dl, TRANSPARENTPIXEL
|
||||
je .nextmod2
|
||||
movzx edx, byte [ebx + edx]
|
||||
mov [edi], dl
|
||||
|
||||
.nextmod2:
|
||||
add edi, [vid + viddef_s.width]
|
||||
sub ecx, 1
|
||||
jz .done
|
||||
|
||||
.pairprepare:
|
||||
;;
|
||||
;; Prepare for the main loop.
|
||||
;;
|
||||
movd mm3, eax ;; Low dword = frac
|
||||
movq mm4, mm3 ;; Copy to intermediate register
|
||||
paddd mm4, mm2 ;; dwords of mm4 += fracstep
|
||||
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
|
||||
pslld mm2, 1 ;; fracstep *= 2
|
||||
|
||||
;;
|
||||
;; ebp = vid.width
|
||||
;;
|
||||
mov ebp, [vid + viddef_s.width]
|
||||
|
||||
align 16
|
||||
.pairloop:
|
||||
movq mm0, mm3 ;; 3B 1u.
|
||||
psrad mm0, FRACBITS ;; 4B 1u.
|
||||
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
|
||||
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
|
||||
|
||||
movd eax, mm0 ;; 3B 1u. Get first frac
|
||||
;; IFETCH boundary
|
||||
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
|
||||
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
|
||||
movd edx, mm0 ;; 3B 1u. Get second frac
|
||||
cmp al, TRANSPARENTPIXEL ;; 2B 1u.
|
||||
je .secondinpair ;; 2B 1u.
|
||||
;; IFETCH boundary
|
||||
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
|
||||
mov [edi], al ;; 2B 1(2)u. First pixel
|
||||
|
||||
.secondinpair:
|
||||
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
|
||||
cmp dl, TRANSPARENTPIXEL ;; 2B 1u.
|
||||
je .nextpair ;; 2B 1u.
|
||||
;; IFETCH boundary
|
||||
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
|
||||
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
|
||||
|
||||
.nextpair:
|
||||
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
|
||||
sub ecx, 2 ;; 3B 1u. count -= 2
|
||||
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
|
||||
|
||||
|
||||
.done:
|
||||
;;
|
||||
;; Clear MMX state, or else FPU operations will go badly awry.
|
||||
;;
|
||||
emms
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
||||
|
||||
.usenonMMX:
|
||||
call R_Draw2sMultiPatchColumn_8_ASM
|
||||
jmp .done
|
||||
|
||||
|
||||
;;----------------------------------------------------------------------
|
||||
;;
|
||||
;; R_DrawSpan : 8bpp span drawer
|
||||
;;
|
||||
;; MMX span drawer.
|
||||
;;
|
||||
;;----------------------------------------------------------------------
|
||||
;; eax = accumulator
|
||||
;; ebx = colormap
|
||||
;; ecx = count
|
||||
;; edx = accumulator
|
||||
;; esi = source
|
||||
;; edi = dest
|
||||
;; ebp = two pixels
|
||||
;; mm0 = accumulator
|
||||
;; mm1 = xposition
|
||||
;; mm2 = yposition
|
||||
;; mm3 = 2 * xstep
|
||||
;; mm4 = 2 * ystep
|
||||
;; mm5 = nflatxshift
|
||||
;; mm6 = nflatyshift
|
||||
;; mm7 = accumulator
|
||||
;;----------------------------------------------------------------------
|
||||
|
||||
cglobal R_DrawSpan_8_MMX
|
||||
R_DrawSpan_8_MMX:
|
||||
push ebp ;; preserve caller's stack frame pointer
|
||||
push esi ;; preserve register variables
|
||||
push edi
|
||||
push ebx
|
||||
|
||||
;;
|
||||
;; esi = ds_source
|
||||
;; ebx = ds_colormap
|
||||
;;
|
||||
mov esi, [ds_source]
|
||||
mov ebx, [ds_colormap]
|
||||
|
||||
;;
|
||||
;; edi = ylookup[ds_y] + columnofs[ds_x1]
|
||||
;;
|
||||
mov eax, [ds_y]
|
||||
mov edi, [ylookup + eax*4]
|
||||
mov edx, [ds_x1]
|
||||
add edi, [columnofs + edx*4]
|
||||
|
||||
;;
|
||||
;; ecx = ds_x2 - ds_x1 + 1
|
||||
;;
|
||||
mov ecx, [ds_x2]
|
||||
sub ecx, edx
|
||||
add ecx, 1
|
||||
|
||||
;;
|
||||
;; Needed for fracs and steps
|
||||
;;
|
||||
movd mm7, [nflatshiftup]
|
||||
|
||||
;;
|
||||
;; mm3 = xstep
|
||||
;;
|
||||
movd mm3, [ds_xstep]
|
||||
pslld mm3, mm7
|
||||
punpckldq mm3, mm3
|
||||
|
||||
;;
|
||||
;; mm4 = ystep
|
||||
;;
|
||||
movd mm4, [ds_ystep]
|
||||
pslld mm4, mm7
|
||||
punpckldq mm4, mm4
|
||||
|
||||
;;
|
||||
;; mm1 = pair of consecutive xpositions
|
||||
;;
|
||||
movd mm1, [ds_xfrac]
|
||||
pslld mm1, mm7
|
||||
movq mm6, mm1
|
||||
paddd mm6, mm3
|
||||
punpckldq mm1, mm6
|
||||
|
||||
;;
|
||||
;; mm2 = pair of consecutive ypositions
|
||||
;;
|
||||
movd mm2, [ds_yfrac]
|
||||
pslld mm2, mm7
|
||||
movq mm6, mm2
|
||||
paddd mm6, mm4
|
||||
punpckldq mm2, mm6
|
||||
|
||||
;;
|
||||
;; mm5 = nflatxshift
|
||||
;; mm6 = nflatyshift
|
||||
;;
|
||||
movd mm5, [nflatxshift]
|
||||
movd mm6, [nflatyshift]
|
||||
|
||||
;;
|
||||
;; Mask is in memory due to lack of registers.
|
||||
;;
|
||||
mov eax, [nflatmask]
|
||||
mov [nflatmask64], eax
|
||||
mov [nflatmask64 + 4], eax
|
||||
|
||||
|
||||
;;
|
||||
;; Go until we reach a dword boundary.
|
||||
;;
|
||||
.unaligned:
|
||||
test edi, 3
|
||||
jz .alignedprep
|
||||
.stragglers:
|
||||
cmp ecx, 0
|
||||
je .done ;; If ecx == 0, we're finished.
|
||||
|
||||
;;
|
||||
;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)
|
||||
;;
|
||||
movq mm0, mm1 ;; mm0 = xposition
|
||||
movq mm7, mm2 ;; mm7 = yposition
|
||||
paddd mm1, mm3 ;; xposition += xstep (once!)
|
||||
paddd mm2, mm4 ;; yposition += ystep (once!)
|
||||
psrld mm0, mm5 ;; shift
|
||||
psrld mm7, mm6 ;; shift
|
||||
pand mm7, [nflatmask64] ;; mask
|
||||
por mm0, mm7 ;; or x and y together
|
||||
|
||||
movd eax, mm0 ;; eax = index of first pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
|
||||
mov [edi], al
|
||||
add edi, 1
|
||||
|
||||
sub ecx, 1
|
||||
jmp .unaligned
|
||||
|
||||
|
||||
.alignedprep:
|
||||
;;
|
||||
;; We can double the steps now.
|
||||
;;
|
||||
pslld mm3, 1
|
||||
pslld mm4, 1
|
||||
|
||||
|
||||
;;
|
||||
;; Generate chunks of four pixels.
|
||||
;;
|
||||
.alignedloop:
|
||||
|
||||
;;
|
||||
;; Make sure we have at least four pixels.
|
||||
;;
|
||||
cmp ecx, 4
|
||||
jl .prestragglers
|
||||
|
||||
;;
|
||||
;; First two pixels.
|
||||
;;
|
||||
movq mm0, mm1 ;; mm0 = xposition
|
||||
movq mm7, mm2 ;; mm7 = yposition
|
||||
paddd mm1, mm3 ;; xposition += xstep
|
||||
paddd mm2, mm4 ;; yposition += ystep
|
||||
psrld mm0, mm5 ;; shift
|
||||
psrld mm7, mm6 ;; shift
|
||||
pand mm7, [nflatmask64] ;; mask
|
||||
por mm0, mm7 ;; or x and y together
|
||||
|
||||
movd eax, mm0 ;; eax = index of first pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx ebp, byte [ebx + eax] ;; ebp = colormap[al]
|
||||
|
||||
punpckhdq mm0, mm0 ;; both dwords = high dword
|
||||
movd eax, mm0 ;; eax = index of second pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
shl eax, 8 ;; get pixel in right byte
|
||||
or ebp, eax ;; put pixel in ebp
|
||||
|
||||
;;
|
||||
;; Next two pixels.
|
||||
;;
|
||||
movq mm0, mm1 ;; mm0 = xposition
|
||||
movq mm7, mm2 ;; mm7 = yposition
|
||||
paddd mm1, mm3 ;; xposition += xstep
|
||||
paddd mm2, mm4 ;; yposition += ystep
|
||||
psrld mm0, mm5 ;; shift
|
||||
psrld mm7, mm6 ;; shift
|
||||
pand mm7, [nflatmask64] ;; mask
|
||||
por mm0, mm7 ;; or x and y together
|
||||
|
||||
movd eax, mm0 ;; eax = index of third pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
shl eax, 16 ;; get pixel in right byte
|
||||
or ebp, eax ;; put pixel in ebp
|
||||
|
||||
punpckhdq mm0, mm0 ;; both dwords = high dword
|
||||
movd eax, mm0 ;; eax = index of second pixel
|
||||
movzx eax, byte [esi + eax] ;; al = source[eax]
|
||||
movzx eax, byte [ebx + eax] ;; al = colormap[al]
|
||||
shl eax, 24 ;; get pixel in right byte
|
||||
or ebp, eax ;; put pixel in ebp
|
||||
|
||||
;;
|
||||
;; Write pixels.
|
||||
;;
|
||||
mov [edi], ebp
|
||||
add edi, 4
|
||||
|
||||
sub ecx, 4
|
||||
jmp .alignedloop
|
||||
|
||||
.prestragglers:
|
||||
;;
|
||||
;; Back to one step at a time.
|
||||
;;
|
||||
psrad mm3, 1
|
||||
psrad mm4, 1
|
||||
jmp .stragglers
|
||||
|
||||
.done:
|
||||
;;
|
||||
;; Clear MMX state, or else FPU operations will go badly awry.
|
||||
;;
|
||||
emms
|
||||
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
pop ebp
|
||||
ret
|
|
@ -1,48 +0,0 @@
|
|||
;; SONIC ROBO BLAST 2
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
;; Copyright (C) 1999-2023 by Sonic Team Junior.
|
||||
;;
|
||||
;; This program is free software distributed under the
|
||||
;; terms of the GNU General Public License, version 2.
|
||||
;; See the 'LICENSE' file for more details.
|
||||
;;-----------------------------------------------------------------------------
|
||||
;; FILE:
|
||||
;; tmap_vc.nas
|
||||
;; DESCRIPTION:
|
||||
;; Assembler optimised math code for Visual C++.
|
||||
|
||||
|
||||
[BITS 32]
|
||||
|
||||
%macro cglobal 1
|
||||
%define %1 _%1
|
||||
[global %1]
|
||||
%endmacro
|
||||
|
||||
[SECTION .text write]
|
||||
|
||||
;----------------------------------------------------------------------------
|
||||
;fixed_t FixedMul (fixed_t a, fixed_t b)
|
||||
;----------------------------------------------------------------------------
|
||||
cglobal FixedMul
|
||||
; align 16
|
||||
FixedMul:
|
||||
mov eax,[esp+4]
|
||||
imul dword [esp+8]
|
||||
shrd eax,edx,16
|
||||
ret
|
||||
|
||||
;----------------------------------------------------------------------------
|
||||
;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
|
||||
;----------------------------------------------------------------------------
|
||||
cglobal FixedDiv2
|
||||
; align 16
|
||||
FixedDiv2:
|
||||
mov eax,[esp+4]
|
||||
mov edx,eax ;; these two instructions allow the next
|
||||
sar edx,31 ;; two to pair, on the Pentium processor.
|
||||
shld edx,eax,16
|
||||
sal eax,16
|
||||
idiv dword [esp+8]
|
||||
ret
|
|
@ -447,12 +447,6 @@ static void CV_palette_OnChange(void)
|
|||
V_SetPalette(0);
|
||||
}
|
||||
|
||||
#if defined (__GNUC__) && defined (__i386__) && !defined (NOASM) && !defined (__APPLE__) && !defined (NORUSEASM)
|
||||
void VID_BlitLinearScreen_ASM(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
|
||||
size_t destrowbytes);
|
||||
#define HAVE_VIDCOPY
|
||||
#endif
|
||||
|
||||
static void CV_constextsize_OnChange(void)
|
||||
{
|
||||
if (!con_refresh)
|
||||
|
@ -466,9 +460,6 @@ static void CV_constextsize_OnChange(void)
|
|||
void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
|
||||
size_t destrowbytes)
|
||||
{
|
||||
#ifdef HAVE_VIDCOPY
|
||||
VID_BlitLinearScreen_ASM(srcptr,destptr,width,height,srcrowbytes,destrowbytes);
|
||||
#else
|
||||
if (srcrowbytes == destrowbytes)
|
||||
M_Memcpy(destptr, srcptr, srcrowbytes * height);
|
||||
else
|
||||
|
@ -481,7 +472,6 @@ void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT3
|
|||
srcptr += srcrowbytes;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static UINT8 hudplusalpha[11] = { 10, 8, 6, 4, 2, 0, 0, 0, 0, 0, 0};
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
// SONIC ROBO BLAST 2
|
||||
//-----------------------------------------------------------------------------
|
||||
// Copyright (C) 1998-2000 by DooM Legacy Team.
|
||||
// Copyright (C) 1999-2023 by Sonic Team Junior.
|
||||
//
|
||||
// This program is free software distributed under the
|
||||
// terms of the GNU General Public License, version 2.
|
||||
// See the 'LICENSE' file for more details.
|
||||
//-----------------------------------------------------------------------------
|
||||
/// \file vid_copy.s
|
||||
/// \brief code for updating the linear frame buffer screen.
|
||||
|
||||
#include "asm_defs.inc" // structures, must match the C structures!
|
||||
|
||||
// DJGPPv2 is as fast as this one, but then someone may compile with a less
|
||||
// good version of DJGPP than mine, so this little asm will do the trick!
|
||||
|
||||
#define srcptr 4+16
|
||||
#define destptr 8+16
|
||||
#define width 12+16
|
||||
#define height 16+16
|
||||
#define srcrowbytes 20+16
|
||||
#define destrowbytes 24+16
|
||||
|
||||
// VID_BlitLinearScreen( src, dest, width, height, srcwidth, destwidth );
|
||||
// width is given as BYTES
|
||||
|
||||
#ifdef __i386__
|
||||
|
||||
.globl C(VID_BlitLinearScreen_ASM)
|
||||
C(VID_BlitLinearScreen_ASM):
|
||||
pushl %ebp // preserve caller's stack frame
|
||||
pushl %edi
|
||||
pushl %esi // preserve register variables
|
||||
pushl %ebx
|
||||
|
||||
cld
|
||||
movl srcptr(%esp),%esi
|
||||
movl destptr(%esp),%edi
|
||||
movl width(%esp),%ebx
|
||||
movl srcrowbytes(%esp),%eax
|
||||
subl %ebx,%eax
|
||||
movl destrowbytes(%esp),%edx
|
||||
subl %ebx,%edx
|
||||
shrl $2,%ebx
|
||||
movl height(%esp),%ebp
|
||||
LLRowLoop:
|
||||
movl %ebx,%ecx
|
||||
rep/movsl (%esi),(%edi)
|
||||
addl %eax,%esi
|
||||
addl %edx,%edi
|
||||
decl %ebp
|
||||
jnz LLRowLoop
|
||||
|
||||
popl %ebx // restore register variables
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp // restore the caller's stack frame
|
||||
|
||||
ret
|
||||
#endif
|
|
@ -22,7 +22,6 @@
|
|||
#ifdef _MSC_VER
|
||||
#include <assert.h>
|
||||
#endif
|
||||
#define NOASM
|
||||
#include "../src/tables.h"
|
||||
#define NO_M
|
||||
#include "../src/m_fixed.c"
|
||||
|
|
Loading…
Reference in a new issue