Merge branch 'removeasm' into 'master'

Remove ASM code

See merge request KartKrew/Kart-Public!348
This commit is contained in:
Alu Folie 2024-03-31 20:40:11 +00:00
commit cc6386cd1e
40 changed files with 27 additions and 5104 deletions

View file

@ -36,7 +36,7 @@ jobs:
- v1-SRB2-APT
- run:
name: Install SDK
command: apt-get -qq -y --no-install-recommends install git build-essential nasm libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
command: apt-get -qq -y --no-install-recommends install git build-essential libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
- save_cache:
key: v1-SRB2-APT
paths:

View file

@ -1996,24 +1996,6 @@ HW3SOUND for 3D hardware sound support
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/v_video.h" />
<Unit filename="src/vid_copy.s">
<Option compilerVar="CC" />
<Option compiler="avrgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="gnu_gcc_compiler_for_mingw32" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="gnu_gcc_compiler_for_mingw64" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="armelfgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="tricoregcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="ppcgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="gcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option target="Debug Native/SDL" />
<Option target="Release Native/SDL" />
<Option target="Debug Linux/SDL" />
<Option target="Release Linux/SDL" />
<Option target="Debug Mingw/SDL" />
<Option target="Release Mingw/SDL" />
<Option target="Debug Mingw/DirectX" />
<Option target="Release Mingw/DirectX" />
</Unit>
<Unit filename="src/w_wad.c">
<Option compilerVar="CC" />
</Unit>

View file

@ -25,9 +25,6 @@
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(PlatformTarget)'=='x86'">
<ClCompile>
<PreprocessorDefinitions>USEASM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
</Link>

View file

@ -5,7 +5,7 @@ Ver=3
IsCpp=0
Type=0
UnitCount=279
Folders=A_Asm,B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
Folders=B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
CommandLine=
CompilerSettings=00000000000100000111e1
PchHead=-1
@ -1473,36 +1473,6 @@ Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit149]
FileName=src\tmap.nas
Folder=A_Asm
Compile=0
CompileCpp=0
Link=0
Priority=1000
OverrideBuildCmd=1
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap.nas
[Unit150]
FileName=src\asm_defs.inc
Folder=A_Asm
Compile=0
CompileCpp=0
Link=0
Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit151]
FileName=src\vid_copy.s
Folder=A_Asm
Compile=1
CompileCpp=0
Link=1
Priority=1000
OverrideBuildCmd=1
BuildCmd=$(CC) $(CFLAGS) -x assembler-with-cpp -c src/vid_copy.s -o $@
[Unit152]
FileName=src\y_inter.h
Folder=H_Hud
@ -1543,26 +1513,6 @@ Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit156]
FileName=src\p5prof.h
Folder=A_Asm
Compile=1
CompileCpp=0
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit157]
FileName=src\tmap_mmx.nas
Folder=A_Asm
Compile=0
CompileCpp=0
Link=0
Priority=1000
OverrideBuildCmd=1
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap_mmx.nas
[Unit159]
FileName=src\lzf.h
Folder=W_Wad

View file

@ -11,8 +11,6 @@ environment:
# c:\msys64 x86_64 has gcc 8.2.0, so use c:\mingw-w64 7.3.0 instead
MINGW_SDK_64: C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64
CFLAGS: -Wall -W -Werror -Wno-error=implicit-fallthrough -Wimplicit-fallthrough=3 -Wno-tautological-compare -Wno-error=suggest-attribute=noreturn
NASM_ZIP: nasm-2.12.01
NASM_URL: http://www.nasm.us/pub/nasm/releasebuilds/2.12.01/win64/nasm-2.12.01-win64.zip
UPX_ZIP: upx391w
UPX_URL: http://upx.sourceforge.net/download/upx391w.zip
CCACHE_EXE: ccache.exe
@ -46,7 +44,6 @@ environment:
ASSET_CLEAN: 0
cache:
- nasm-2.12.01.zip
- upx391w.zip
- ccache.exe
- C:\Users\appveyor\.ccache
@ -58,10 +55,6 @@ install:
- if [%X86_64%] == [1] ( set "MINGW_SDK=%MINGW_SDK_64%" )
- if [%X86_64%] == [1] ( set "CCACHE_CC=%CCACHE_CC_64%" )
- if not exist "%NASM_ZIP%.zip" appveyor DownloadFile "%NASM_URL%" -FileName "%NASM_ZIP%.zip"
- 7z x -y "%NASM_ZIP%.zip" -o%TMP% >null
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%NASM_ZIP%" "%MINGW_SDK%\bin" nasm.exe || exit 0
- if not exist "%UPX_ZIP%.zip" appveyor DownloadFile "%UPX_URL%" -FileName "%UPX_ZIP%.zip"
- 7z x -y "%UPX_ZIP%.zip" -o%TMP% >null
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%UPX_ZIP%" "%MINGW_SDK%\bin" upx.exe || exit 0
@ -78,7 +71,6 @@ before_build:
- set "Path=%MINGW_SDK%\bin;%Path%"
- if [%X86_64%] == [1] ( x86_64-w64-mingw32-gcc --version ) else ( i686-w64-mingw32-gcc --version )
- mingw32-make --version
- if not [%X86_64%] == [1] ( nasm -v )
- if not [%NOUPX%] == [1] ( upx -V )
- ccache -V
- ccache -s

View file

@ -1,46 +0,0 @@
#=============================================================================
# Copyright 2010 Kitware, Inc.
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
# support for the yasm assembler
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS nasm yasm asm)
if(NOT CMAKE_ASM_YASM_OBJECT_FORMAT)
if(WIN32)
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
set(CMAKE_ASM_YASM_OBJECT_FORMAT win64)
else()
set(CMAKE_ASM_YASM_OBJECT_FORMAT win32)
endif()
elseif(APPLE)
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho64)
else()
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho)
endif()
else()
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf64)
else()
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf)
endif()
endif()
endif()
set(CMAKE_ASM_YASM_COMPILE_OBJECT "<CMAKE_ASM_YASM_COMPILER> <FLAGS> -f ${CMAKE_ASM_YASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>")
# Load the generic ASMInformation file:
set(ASM_DIALECT "_YASM")
include(CMakeASMInformation)
set(ASM_DIALECT)

View file

@ -1,27 +0,0 @@
#=============================================================================
# Copyright 2010 Kitware, Inc.
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
# Find the nasm assembler. yasm (http://www.tortall.net/projects/yasm/) is nasm compatible
set(CMAKE_ASM_YASM_COMPILER_LIST nasm yasm)
if(NOT CMAKE_ASM_YASM_COMPILER)
find_program(CMAKE_ASM_YASM_COMPILER yasm
"$ENV{ProgramFiles}/YASM")
endif()
# Load the generic DetermineASM compiler file with the DIALECT set properly:
set(ASM_DIALECT "_YASM")
include(CMakeDetermineASMCompiler)
set(ASM_DIALECT)

View file

@ -1,23 +0,0 @@
#=============================================================================
# Copyright 2010 Kitware, Inc.
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
# This file is used by EnableLanguage in cmGlobalGenerator to
# determine that the selected ASM_NASM "compiler" works.
# For assembler this can only check whether the compiler has been found,
# because otherwise there would have to be a separate assembler source file
# for each assembler on every architecture.
set(ASM_DIALECT "_YASM")
include(CMakeTestASMCompiler)
set(ASM_DIALECT)

View file

@ -76,7 +76,7 @@ LOCAL_SRC_FILES := am_map.c \
android/i_system.c \
android/i_video.c
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOASM -DNOPIX -DUNIXCOMMON -DNOTERMIOS
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOPIX -DUNIXCOMMON -DNOTERMIOS
LOCAL_MODULE := libsrb2

View file

@ -190,16 +190,6 @@ source_group("Main" FILES ${SRB2_CORE_SOURCES} ${SRB2_CORE_HEADERS})
source_group("Renderer" FILES ${SRB2_CORE_RENDER_SOURCES})
source_group("Game" FILES ${SRB2_CORE_GAME_SOURCES})
set(SRB2_ASM_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/vid_copy.s
)
set(SRB2_NASM_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/tmap_mmx.nas
${CMAKE_CURRENT_SOURCE_DIR}/tmap.nas
)
if(MSVC)
list(APPEND SRB2_NASM_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/tmap_vc.nas)
endif()
@ -231,10 +221,6 @@ set(SRB2_CONFIG_HAVE_CURL ON CACHE BOOL
"Enable curl support, used for downloading files via HTTP.")
set(SRB2_CONFIG_HWRENDER ON CACHE BOOL
"Enable hardware rendering through OpenGL.")
set(SRB2_CONFIG_USEASM OFF CACHE BOOL
"Enable NASM tmap implementation for software mode speedup.")
set(SRB2_CONFIG_YASM OFF CACHE BOOL
"Use YASM in place of NASM.")
set(SRB2_CONFIG_STATIC_OPENGL OFF CACHE BOOL
"Use statically linked OpenGL. NOT RECOMMENDED.")
@ -503,29 +489,6 @@ if(${SRB2_CONFIG_HWRENDER} AND ${SRB2_CONFIG_STATIC_OPENGL})
endif()
endif()
if(${SRB2_CONFIG_USEASM})
#SRB2_ASM_FLAGS can be used to pass flags to either nasm or yasm.
if(${CMAKE_SYSTEM} MATCHES "Linux")
set(SRB2_ASM_FLAGS "-DLINUX ${SRB2_ASM_FLAGS}")
endif()
if(${SRB2_CONFIG_YASM})
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS} nas)
set(CMAKE_ASM_YASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
enable_language(ASM_YASM)
else()
set(CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS} nas)
set(CMAKE_ASM_NASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
enable_language(ASM_NASM)
endif()
set(SRB2_USEASM ON)
add_definitions(-DUSEASM)
set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} -msse3 -mfpmath=sse)
else()
set(SRB2_USEASM OFF)
add_definitions(-DNONX86 -DNORUSEASM)
endif()
# Targets
# Compatibility flag with later versions of GCC

View file

@ -62,7 +62,6 @@
# Compile a debug version, add 'DEBUGMODE=1'
# Compile with less warnings, add 'RELAXWARNINGS=1'
# Generate compiler errors for most compiler warnings, add 'ERRORMODE=1'
# Compile without NASM's tmap.nas, add 'NOASM=1'
# Compile without 3D hardware support, add 'NOHW=1'
# Compile with GDBstubs, add 'RDB=1'
# Compile without PNG, add 'NOPNG=1'
@ -165,7 +164,6 @@ endif
ifdef MINGW64
MINGW=1
NONX86=1
NOASM=1
# MINGW64 should not necessarily imply X86_64=1, but we make that assumption elsewhere
# Once that changes, remove this
X86_64=1
@ -188,7 +186,6 @@ NOPNG=1
NOZLIB=1
NONET=1
NOHW=1
NOASM=1
NOIPX=1
EXENAME?=srb2dummy
OBJS=$(OBJDIR)/i_video.o
@ -197,7 +194,6 @@ endif
ifdef HAIKU
NOIPX=1
NOASM=1
ifndef NONET
LIBS=-lnetwork
endif
@ -267,7 +263,6 @@ NOPNG=1
NOZLIB=1
NONET=1
#NOHW=1
NOASM=1
NOIPX=1
NONX86=1
OBJS+=$(OBJDIR)/i_video.o
@ -295,7 +290,6 @@ endif
MSGFMT?=msgfmt
ifndef ECHO
NASM:=@$(NASM)
REMOVE:=@$(REMOVE)
CC:=@$(CC)
CXX:=@$(CXX)
@ -350,13 +344,6 @@ ifdef X86_64
endif
endif
ifndef NOASM
ifndef NONX86
OBJS+=$(OBJDIR)/tmap.o $(OBJDIR)/tmap_mmx.o
OPTS+=-DUSEASM
endif
endif
ifndef NOPNG
OPTS+=-DHAVE_PNG
@ -495,16 +482,6 @@ else
endif
CFLAGS+=-g $(OPTS) $(ARCHOPTS) $(WINDRESFLAGS)
ifdef YASM
ifdef STABS
NASMOPTS?= -g stabs
else
NASMOPTS?= -g dwarf2
endif
else
NASMOPTS?= -g
endif
ifdef PROFILEMODE
# build with profiling information
CFLAGS+=-pg
@ -698,12 +675,6 @@ ifdef CYGWIN32
$(REMOVE) $(OBJDIR)/*.res
endif
#make a big srb2.s that is the disasm of the exe (dos only ?)
asm:
$(CC) $(LDFLAGS) $(OBJS) -o $(OBJDIR)/tmp.exe $(LIBS)
$(OBJDUMP) -d $(OBJDIR)/tmp.exe --no-show-raw-insn > srb2.s
$(REMOVE) $(OBJDIR)/tmp.exe
# executable
# NOTE: DJGPP's objcopy do not have --add-gnu-debuglink
@ -888,12 +859,6 @@ $(OBJDIR)/%.o: hardware/%.c
$(OBJDIR)/%.o: blua/%.c
$(CC) $(CFLAGS) $(LUA_CFLAGS) $(WFLAGS) -c $< -o $@
$(OBJDIR)/%.o: %.nas
$(NASM) $(NASMOPTS) -o $@ -f $(NASMFORMAT) $<
$(OBJDIR)/vid_copy.o: vid_copy.s asm_defs.inc
$(CC) $(OPTS) $(ASFLAGS) -x assembler-with-cpp -c $< -o $@
$(OBJDIR)/%.o: %.s
$(CC) $(OPTS) -x assembler-with-cpp -c $< -o $@

View file

@ -501,12 +501,6 @@ i_main_o=$(OBJDIR)/i_main.o
#set OBJDIR and BIN's starting place
OBJDIR=../objs
BIN=../bin
#Nasm ASM and rm
ifdef YASM
NASM?=yasm
else
NASM?=nasm
endif
REMOVE?=rm -f
CP?=cp
MKDIR?=mkdir -p
@ -524,7 +518,6 @@ endif
#Interface Setup
ifdef DJGPPDOS
INTERFACE=djgppdos
NASMFORMAT=coff
OBJDIR:=$(OBJDIR)/djgppdos
ifdef WATTCP
OBJDIR:=$(OBJDIR)/wattcp
@ -538,7 +531,6 @@ ifdef DUMMY
BIN:=$(BIN)/dummy
else
ifdef LINUX
NASMFORMAT=elf -DLINUX
SDL=1
ifdef LINUX64
OBJDIR:=$(OBJDIR)/Linux64
@ -550,7 +542,6 @@ endif
else
ifdef FREEBSD
INTERFACE=sdl
NASMFORMAT=elf -DLINUX
SDL=1
OBJDIR:=$(OBJDIR)/FreeBSD
@ -558,7 +549,6 @@ ifdef FREEBSD
else
ifdef SOLARIS
INTERFACE=sdl
NASMFORMAT=elf -DLINUX
SDL=1
OBJDIR:=$(OBJDIR)/Solaris
@ -566,7 +556,6 @@ ifdef SOLARIS
else
ifdef CYGWIN32
INTERFACE=sdl
NASMFORMAT=win32
SDL=1
OBJDIR:=$(OBJDIR)/cygwin
@ -574,7 +563,6 @@ ifdef CYGWIN32
else
ifdef MINGW64
INTERFACE=win32
#NASMFORMAT=win64
OBJDIR:=$(OBJDIR)/Mingw64
BIN:=$(BIN)/Mingw64
else
@ -606,13 +594,11 @@ ifdef PS3N
else
ifdef MINGW
INTERFACE=win32
NASMFORMAT=win32
OBJDIR:=$(OBJDIR)/Mingw
BIN:=$(BIN)/Mingw
else
ifdef XBOX
INTERFACE=sdl12
NASMFORMAT=win32
PREFIX?=/usr/local/openxdk/bin/i386-pc-xbox
SDL=1
SDL12=1

View file

@ -233,11 +233,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
return -1;
}
const CPUInfoFlags *I_CPUInfo(void)
{
return NULL;
}
const char *I_LocateWad(void)
{
return "/sdcard/srb2";

View file

@ -1,43 +0,0 @@
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2018 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file asm_defs.inc
/// \brief must match the C structures
#ifndef __ASM_DEFS__
#define __ASM_DEFS__
// this makes variables more noticable,
// and make the label match with C code
// Linux, unlike DOS, has no "_" 19990119 by Kin
// and nasm needs .data code segs under linux 20010210 by metzgermeister
// FIXME: nasm ignores these settings, so I put the macros into the makefile
#ifdef __ELF__
#define C(label) label
#define CODE_SEG .data
#else
#define C(label) _##label
#define CODE_SEG .text
#endif
/* This is a more readable way to access the arguments passed from C code */
/* PLEASE NOTE: it is supposed that all arguments passed from C code are */
/* 32bit integer (INT32, long, and most *pointers) */
#define ARG1 8(%ebp)
#define ARG2 12(%ebp)
#define ARG3 16(%ebp)
#define ARG4 20(%ebp)
#define ARG5 24(%ebp)
#define ARG6 28(%ebp)
#define ARG7 32(%ebp)
#define ARG8 36(%ebp)
#define ARG9 40(%ebp) //(c)tm ... Allegro by Shawn Hargreaves.
#endif

View file

@ -492,7 +492,7 @@ void M_StartupLocale(void);
// M_GetText function that just returns the string.
#define M_GetText(x) (x)
#endif
extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL;
void *M_Memcpy(void *dest, const void *src, size_t n);
char *va(const char *format, ...) FUNCPRINTF;
char *M_GetToken(const char *inputString);
char *sizeu1(size_t num);

View file

@ -137,11 +137,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
return -1;
}
const CPUInfoFlags *I_CPUInfo(void)
{
return NULL;
}
const char *I_LocateWad(void)
{
return NULL;

View file

@ -296,40 +296,6 @@ char *I_GetUserName(void);
*/
INT32 I_mkdir(const char *dirname, INT32 unixright);
typedef struct {
int FPU : 1; ///< FPU availabile
int CPUID : 1; ///< CPUID instruction
int RDTSC : 1; ///< RDTSC instruction
int MMX : 1; ///< MMX features
int MMXExt : 1; ///< MMX Ext. features
int CMOV : 1; ///< Pentium Pro's "cmov"
int AMD3DNow : 1; ///< 3DNow features
int AMD3DNowExt: 1; ///< 3DNow! Ext. features
int SSE : 1; ///< SSE features
int SSE2 : 1; ///< SSE2 features
int SSE3 : 1; ///< SSE3 features
int IA64 : 1; ///< Running on IA64
int AMD64 : 1; ///< Running on AMD64
int AltiVec : 1; ///< AltiVec features
int FPPE : 1; ///< floating-point precision error
int PFC : 1; ///< TBD?
int cmpxchg : 1; ///< ?
int cmpxchg16b : 1; ///< ?
int cmp8xchg16 : 1; ///< ?
int FPE : 1; ///< FPU Emu
int DEP : 1; ///< Data excution prevent
int PPCMM64 : 1; ///< PowerPC Movemem 64bit ok?
int ALPHAbyte : 1; ///< ?
int PAE : 1; ///< Physical Address Extension
int CPUs : 8;
} CPUInfoFlags;
/** \brief Info about CPU
\return CPUInfo in bits
*/
const CPUInfoFlags *I_CPUInfo(void);
/** \brief Find main WAD
\return path to main WAD
*/

View file

@ -23,49 +23,6 @@
#include "m_fixed.h"
#include "tables.h" // ANGLETOFINESHIFT
#ifdef __USE_C_FIXEDMUL__
/** \brief The FixedMul function
\param a fixed_t number
\param b fixed_t number
\return a*b>>FRACBITS
*/
fixed_t FixedMul(fixed_t a, fixed_t b)
{
// Need to cast to unsigned before shifting to avoid undefined behaviour
// for negative integers
return (fixed_t)(((UINT64)((INT64)a * b)) >> FRACBITS);
}
#endif //__USE_C_FIXEDMUL__
#ifdef __USE_C_FIXEDDIV__
/** \brief The FixedDiv2 function
\param a fixed_t number
\param b fixed_t number
\return a/b * FRACUNIT
*/
fixed_t FixedDiv2(fixed_t a, fixed_t b)
{
INT64 ret;
if (b == 0)
I_Error("FixedDiv: divide by zero");
ret = (((INT64)a * FRACUNIT)) / b;
if ((ret > INT32_MAX) || (ret < INT32_MIN))
I_Error("FixedDiv: divide by zero");
return (fixed_t)ret;
}
#endif // __USE_C_FIXEDDIV__
fixed_t FixedSqrt(fixed_t x)
{

View file

@ -46,127 +46,29 @@ typedef INT32 fixed_t;
#define FIXED_TO_FLOAT(x) (((float)(x)) / ((float)FRACUNIT))
#define FLOAT_TO_FIXED(f) (fixed_t)((f) * ((float)FRACUNIT))
/** \brief The FixedMul function
\param a fixed_t number
\param b fixed_t number
\return a*b>>FRACBITS
*/
FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedMul(fixed_t a, fixed_t b)
{
// Need to cast to unsigned before shifting to avoid undefined behaviour
// for negative integers
return (fixed_t)(((UINT64)((INT64)a * b)) >> FRACBITS);
}
#if defined (__WATCOMC__) && FRACBITS == 16
#pragma aux FixedMul = \
"imul ebx", \
"shrd eax,edx,16" \
parm [eax] [ebx] \
value [eax] \
modify exact [eax edx]
#pragma aux FixedDiv2 = \
"cdq", \
"shld edx,eax,16", \
"sal eax,16", \
"idiv ebx" \
parm [eax] [ebx] \
value [eax] \
modify exact [eax edx]
#elif defined (__GNUC__) && defined (__i386__) && !defined (NOASM)
// DJGPP, i386 linux, cygwin or mingw
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
{
fixed_t ret;
asm
(
"imull %2;" // a*b
"shrdl %3,%%edx,%0;" // shift logical right FRACBITS bits
:"=a" (ret) // eax is always the result and the first operand (%0,%1)
:"0" (a), "r" (b) // and %2 is what we use imull on with what in %1
, "I" (FRACBITS) // %3 holds FRACBITS (normally 16)
:"cc", "%edx" // edx and condition codes clobbered
);
return ret;
}
FUNCMATH FUNCINLINE static inline fixed_t FixedDiv2(fixed_t a, fixed_t b)
{
fixed_t ret;
asm
(
"movl %1,%%edx;" // these two instructions allow the next two to pair, on the Pentium processor.
"sarl $31,%%edx;" // shift arithmetic right 31 on EDX
"shldl %3,%1,%%edx;" // DP shift logical left FRACBITS on EDX
"sall %3,%0;" // shift arithmetic left FRACBITS on EAX
"idivl %2;" // EDX/b = EAX
: "=a" (ret)
: "0" (a), "r" (b)
, "I" (FRACBITS)
: "%edx"
);
return ret;
}
#elif defined (__GNUC__) && defined (__arm__) && !defined(__thumb__) && !defined(NOASM) //ARMv4 ASM
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // let abuse smull
{
fixed_t ret;
asm
(
"smull %[lo], r1, %[a], %[b];"
"mov %[lo], %[lo], lsr %3;"
"orr %[lo], %[lo], r1, lsl %3;"
: [lo] "=&r" (ret) // rhi, rlo and rm must be distinct registers
: [a] "r" (a), [b] "r" (b)
, "i" (FRACBITS)
: "r1"
);
return ret;
}
#define __USE_C_FIXEDDIV__ // no double or asm div in ARM land
#elif defined (__GNUC__) && defined (__ppc__) && !defined(NOASM) && 0 // WII: PPC CPU
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
{
fixed_t ret, hi, lo;
asm
(
"mullw %0, %2, %3;"
"mulhw %1, %2, %3"
: "=r" (hi), "=r" (lo)
: "r" (a), "r" (b)
, "I" (FRACBITS)
);
ret = (INT64)((hi>>FRACBITS)+lo)<<FRACBITS;
return ret;
}
#define __USE_C_FIXEDDIV__// Alam: I am lazy
#elif defined (__GNUC__) && defined (__mips__) && !defined(NOASM) && 0 // PSP: MIPS CPU
FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
{
fixed_t ret;
asm
(
"mult %3, %4;" // a*b=h<32+l
: "=r" (ret), "=l" (a), "=h" (b) //todo: abuse shr opcode
: "0" (a), "r" (b)
, "I" (FRACBITS)
//: "+l", "+h"
);
ret = (INT64)((a>>FRACBITS)+b)<<FRACBITS;
return ret;
}
#define __USE_C_FIXEDDIV__ // no 64b asm div in MIPS land
#elif defined (__GNUC__) && defined (__sh__) && 0 // DC: SH4 CPU
#elif defined (__GNUC__) && defined (__m68k__) && 0 // DEAD: Motorola 6800 CPU
#elif defined (_MSC_VER) && defined(USEASM) && FRACBITS == 16
// Microsoft Visual C++ (no asm inline)
fixed_t __cdecl FixedMul(fixed_t a, fixed_t b);
fixed_t __cdecl FixedDiv2(fixed_t a, fixed_t b);
#else
#define __USE_C_FIXEDMUL__
#define __USE_C_FIXEDDIV__
#endif
#ifdef __USE_C_FIXEDMUL__
FUNCMATH fixed_t FixedMul(fixed_t a, fixed_t b);
#endif
#ifdef __USE_C_FIXEDDIV__
FUNCMATH fixed_t FixedDiv2(fixed_t a, fixed_t b);
#endif
/** \brief The FixedDiv2 function
\param a fixed_t number
\param b fixed_t number
\return a/b * FRACUNIT
*/
FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedDiv2(fixed_t a, fixed_t b)
{
// This does not check for division overflow or division by 0!
// That is the caller's responsibility.
return (fixed_t)(((INT64)a * FRACUNIT) / b);
}
/** \brief The FixedInt function
@ -174,7 +76,6 @@ FUNCMATH fixed_t FixedDiv2(fixed_t a, fixed_t b);
\return a/FRACUNIT
*/
FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedInt(fixed_t a)
{
return FixedMul(a, 1);

View file

@ -1943,430 +1943,11 @@ char *sizeu5(size_t num)
return sizeu5_buf;
}
#if defined (__GNUC__) && defined (__i386__) // from libkwave, under GPL
// Alam: note libkwave memcpy code comes from mplayer's libvo/aclib_template.c, r699
/* for small memory blocks (<256 bytes) this version is faster */
#define small_memcpy(dest,src,n)\
{\
register unsigned long int dummy;\
__asm__ __volatile__(\
"cld\n\t"\
"rep; movsb"\
:"=&D"(dest), "=&S"(src), "=&c"(dummy)\
:"0" (dest), "1" (src),"2" (n)\
: "memory", "cc");\
}
/* linux kernel __memcpy (from: /include/asm/string.h) */
ATTRINLINE static FUNCINLINE void *__memcpy (void *dest, const void * src, size_t n)
void *M_Memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
if ( n < 4 )
{
small_memcpy(dest, src, n);
}
else
{
__asm__ __volatile__ (
"rep ; movsl;"
"testb $2,%b4;"
"je 1f;"
"movsw;"
"1:\ttestb $1,%b4;"
"je 2f;"
"movsb;"
"2:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
:"0" (n/4), "q" (n),"1" ((long) dest),"2" ((long) src)
: "memory");
}
return dest;
}
#define SSE_MMREG_SIZE 16
#define MMX_MMREG_SIZE 8
#define MMX1_MIN_LEN 0x800 /* 2K blocks */
#define MIN_LEN 0x40 /* 64-byte blocks */
/* SSE note: i tried to move 128 bytes a time instead of 64 but it
didn't make any measureable difference. i'm using 64 for the sake of
simplicity. [MF] */
static /*FUNCTARGET("sse2")*/ void *sse_cpy(void * dest, const void * src, size_t n)
{
void *retval = dest;
size_t i;
/* PREFETCH has effect even for MOVSB instruction ;) */
__asm__ __volatile__ (
"prefetchnta (%0);"
"prefetchnta 32(%0);"
"prefetchnta 64(%0);"
"prefetchnta 96(%0);"
"prefetchnta 128(%0);"
"prefetchnta 160(%0);"
"prefetchnta 192(%0);"
"prefetchnta 224(%0);"
"prefetchnta 256(%0);"
"prefetchnta 288(%0);"
: : "r" (src) );
if (n >= MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(SSE_MMREG_SIZE-1);
if (delta)
{
delta=SSE_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
if (((unsigned long)src) & 15)
/* if SRC is misaligned */
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetchnta 320(%0);"
"prefetchnta 352(%0);"
"movups (%0), %%xmm0;"
"movups 16(%0), %%xmm1;"
"movups 32(%0), %%xmm2;"
"movups 48(%0), %%xmm3;"
"movntps %%xmm0, (%1);"
"movntps %%xmm1, 16(%1);"
"movntps %%xmm2, 32(%1);"
"movntps %%xmm3, 48(%1);"
:: "r" (src), "r" (dest) : "memory");
src = (const unsigned char *)src + 64;
dest = (unsigned char *)dest + 64;
}
else
/*
Only if SRC is aligned on 16-byte boundary.
It allows to use movaps instead of movups, which required data
to be aligned or a general-protection exception (#GP) is generated.
*/
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetchnta 320(%0);"
"prefetchnta 352(%0);"
"movaps (%0), %%xmm0;"
"movaps 16(%0), %%xmm1;"
"movaps 32(%0), %%xmm2;"
"movaps 48(%0), %%xmm3;"
"movntps %%xmm0, (%1);"
"movntps %%xmm1, 16(%1);"
"movntps %%xmm2, 32(%1);"
"movntps %%xmm3, 48(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
/* since movntq is weakly-ordered, a "sfence"
* is needed to become ordered again. */
__asm__ __volatile__ ("sfence":::"memory");
/* enables to use FPU */
__asm__ __volatile__ ("emms":::"memory");
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
}
static FUNCTARGET("mmx") void *mmx2_cpy(void *dest, const void *src, size_t n)
{
void *retval = dest;
size_t i;
/* PREFETCH has effect even for MOVSB instruction ;) */
__asm__ __volatile__ (
"prefetchnta (%0);"
"prefetchnta 32(%0);"
"prefetchnta 64(%0);"
"prefetchnta 96(%0);"
"prefetchnta 128(%0);"
"prefetchnta 160(%0);"
"prefetchnta 192(%0);"
"prefetchnta 224(%0);"
"prefetchnta 256(%0);"
"prefetchnta 288(%0);"
: : "r" (src));
if (n >= MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
if (delta)
{
delta=MMX_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetchnta 320(%0);"
"prefetchnta 352(%0);"
"movq (%0), %%mm0;"
"movq 8(%0), %%mm1;"
"movq 16(%0), %%mm2;"
"movq 24(%0), %%mm3;"
"movq 32(%0), %%mm4;"
"movq 40(%0), %%mm5;"
"movq 48(%0), %%mm6;"
"movq 56(%0), %%mm7;"
"movntq %%mm0, (%1);"
"movntq %%mm1, 8(%1);"
"movntq %%mm2, 16(%1);"
"movntq %%mm3, 24(%1);"
"movntq %%mm4, 32(%1);"
"movntq %%mm5, 40(%1);"
"movntq %%mm6, 48(%1);"
"movntq %%mm7, 56(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
/* since movntq is weakly-ordered, a "sfence"
* is needed to become ordered again. */
__asm__ __volatile__ ("sfence":::"memory");
__asm__ __volatile__ ("emms":::"memory");
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
}
static FUNCTARGET("mmx") void *mmx1_cpy(void *dest, const void *src, size_t n) //3DNOW
{
void *retval = dest;
size_t i;
/* PREFETCH has effect even for MOVSB instruction ;) */
__asm__ __volatile__ (
"prefetch (%0);"
"prefetch 32(%0);"
"prefetch 64(%0);"
"prefetch 96(%0);"
"prefetch 128(%0);"
"prefetch 160(%0);"
"prefetch 192(%0);"
"prefetch 224(%0);"
"prefetch 256(%0);"
"prefetch 288(%0);"
: : "r" (src));
if (n >= MMX1_MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
if (delta)
{
delta=MMX_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
for (; i>0; i--)
{
__asm__ __volatile__ (
"prefetch 320(%0);"
"prefetch 352(%0);"
"movq (%0), %%mm0;"
"movq 8(%0), %%mm1;"
"movq 16(%0), %%mm2;"
"movq 24(%0), %%mm3;"
"movq 32(%0), %%mm4;"
"movq 40(%0), %%mm5;"
"movq 48(%0), %%mm6;"
"movq 56(%0), %%mm7;"
"movq %%mm0, (%1);"
"movq %%mm1, 8(%1);"
"movq %%mm2, 16(%1);"
"movq %%mm3, 24(%1);"
"movq %%mm4, 32(%1);"
"movq %%mm5, 40(%1);"
"movq %%mm6, 48(%1);"
"movq %%mm7, 56(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
__asm__ __volatile__ ("femms":::"memory"); // same as mmx_cpy() but with a femms
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
}
#endif
// Alam: why? memcpy may be __cdecl/_System and our code may be not the same type
static void *cpu_cpy(void *dest, const void *src, size_t n)
{
if (src == NULL)
{
CONS_Debug(DBG_MEMORY, "Memcpy from 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
return dest;
}
if(dest == NULL)
{
CONS_Debug(DBG_MEMORY, "Memcpy to 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
return dest;
}
return memcpy(dest, src, n);
}
static /*FUNCTARGET("mmx")*/ void *mmx_cpy(void *dest, const void *src, size_t n)
{
#if defined (_MSC_VER) && defined (_X86_)
_asm
{
mov ecx, [n]
mov esi, [src]
mov edi, [dest]
shr ecx, 6 // mit mmx: 64bytes per iteration
jz lower_64 // if lower than 64 bytes
loop_64: // MMX transfers multiples of 64bytes
movq mm0, 0[ESI] // read sources
movq mm1, 8[ESI]
movq mm2, 16[ESI]
movq mm3, 24[ESI]
movq mm4, 32[ESI]
movq mm5, 40[ESI]
movq mm6, 48[ESI]
movq mm7, 56[ESI]
movq 0[EDI], mm0 // write destination
movq 8[EDI], mm1
movq 16[EDI], mm2
movq 24[EDI], mm3
movq 32[EDI], mm4
movq 40[EDI], mm5
movq 48[EDI], mm6
movq 56[EDI], mm7
add esi, 64
add edi, 64
dec ecx
jnz loop_64
emms // close mmx operation
lower_64:// transfer rest of buffer
mov ebx,esi
sub ebx,src
mov ecx,[n]
sub ecx,ebx
shr ecx, 3 // multiples of 8 bytes
jz lower_8
loop_8:
movq mm0, [esi] // read source
movq [edi], mm0 // write destination
add esi, 8
add edi, 8
dec ecx
jnz loop_8
emms // close mmx operation
lower_8:
mov ebx,esi
sub ebx,src
mov ecx,[n]
sub ecx,ebx
rep movsb
mov eax, [dest] // return dest
}
#elif defined (__GNUC__) && defined (__i386__)
void *retval = dest;
size_t i;
if (n >= MMX1_MIN_LEN)
{
register unsigned long int delta;
/* Align destinition to MMREG_SIZE -boundary */
delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
if (delta)
{
delta=MMX_MMREG_SIZE-delta;
n -= delta;
small_memcpy(dest, src, delta);
}
i = n >> 6; /* n/64 */
n&=63;
for (; i>0; i--)
{
__asm__ __volatile__ (
"movq (%0), %%mm0;"
"movq 8(%0), %%mm1;"
"movq 16(%0), %%mm2;"
"movq 24(%0), %%mm3;"
"movq 32(%0), %%mm4;"
"movq 40(%0), %%mm5;"
"movq 48(%0), %%mm6;"
"movq 56(%0), %%mm7;"
"movq %%mm0, (%1);"
"movq %%mm1, 8(%1);"
"movq %%mm2, 16(%1);"
"movq %%mm3, 24(%1);"
"movq %%mm4, 32(%1);"
"movq %%mm5, 40(%1);"
"movq %%mm6, 48(%1);"
"movq %%mm7, 56(%1);"
:: "r" (src), "r" (dest) : "memory");
src = ((const unsigned char *)src) + 64;
dest = ((unsigned char *)dest) + 64;
}
__asm__ __volatile__ ("emms":::"memory");
}
/*
* Now do the tail of the block
*/
if (n) __memcpy(dest, src, n);
return retval;
#else
return cpu_cpy(dest, src, n);
#endif
}
void *(*M_Memcpy)(void* dest, const void* src, size_t n) = cpu_cpy;
/** Memcpy that uses MMX, 3DNow, MMXExt or even SSE
* Do not use on overlapped memory, use memmove for that
*/
void M_SetupMemcpy(void)
{
#if defined (__GNUC__) && defined (__i386__)
if (R_SSE2)
M_Memcpy = sse_cpy;
else if (R_MMXExt)
M_Memcpy = mmx2_cpy;
else if (R_3DNow)
M_Memcpy = mmx1_cpy;
else
#endif
if (R_MMX)
M_Memcpy = mmx_cpy;
#if 0
M_Memcpy = cpu_cpy;
#endif
}
/** Return the appropriate message for a file error or end of file.
*/
const char *M_FileError(FILE *fp)

View file

@ -98,8 +98,6 @@ TMatrix *RotateZMatrix(angle_t rad);
// s1 = s2+s3+s1 (1024 lenghtmax)
void strcatbf(char *s1, const char *s2, const char *s3);
void M_SetupMemcpy(void);
const char *M_FileError(FILE *handle);
// counting bits, for weapon ammo code, usually

View file

@ -1,278 +0,0 @@
/*********************************************************
*
* File: p5prof.h
* By: Kevin Baca
*
* MODIFIED BY Fab SO THAT RDMSR(...) WRITES EDX : EAX TO A LONG LONG
* (WHICH MEANS WRITE THE LOW DWORD FIRST)
*
* Now in yer code do:
* INT64 count,total;
*
* ...
* RDMSR(0x10,&count); //inner loop count
* total += count;
* ...
*
* printf("0x%x %x", (INT32)total, *((INT32 *)&total+1));
* // HIGH LOW
*
*********************************************************/
/**\file
\brief This file provides macros to profile your code.
Here's how they work...
As you may or may not know, the Pentium class of
processors provides extremely fine grained profiling
capabilities through the use of what are called
Machine Specific Registers (MSRs). These registers
can provide information about almost any aspect of
CPU performance down to a single cycle.
The MSRs of interest for profiling are specified by
indices 0x10, 0x11, 0x12, and 0x13. Here is a brief
description of each of these registers:
MSR 0x10
This register is simple a cycle counter.
MSR 0x11
This register controls what type of profiling data
will be gathered.
MSRs 0x12 and 0x13
These registers gather the profiling data specified in
MSR 0x11.
Each MSR is 64 bits wide. For the Pentium processor,
only the lower 32 bits of MSR 0x11 are valid. Bits 0-15
specify what data will be gathered in MSR 0x12. Bits 16-31
specify what data will be gathered in MSR 0x13. Both sets
of bits have the same format:
Bits 0-5 specify which hardware event will be tracked.
Bit 6, if set, indicates events will be tracked in
rings 0-2.
Bit 7, if set, indicates events will be tracked in
ring 3.
Bit 8, if set, indicates cycles should be counted for
the specified event. If clear, it indicates the
number of events should be counted.
Two instructions are provided for manupulating the MSRs.
RDMSR (Read Machine Specific Register) and WRMSR
(Write Machine Specific Register). These opcodes were
originally undocumented and therefore most assemblers don't
recognize them. Their byte codes are provided in the
macros below.
RDMSR takes the MSR index in ecx and the profiling criteria
in edx : eax.
WRMSR takes the MSR index in ecx and returns the profile data
in edx : eax.
Two profiling registers limits profiling capability to
gathering only two types of information. The register
usage can, however, be combined in interesting ways.
For example, you can set one register to gather the
number of a specific type of event while the other gathers
the number of cycles for the same event. Or you can
gather the number of two separate events while using
MSR 0x10 to gather the number of cycles.
The enumerated list provides somewhat readable labels for
the types of events that can be tracked.
For more information, get ahold of appendix H from the
Intel Pentium programmer's manual (I don't remember the
order number) or go to
http://green.kaist.ac.kr/jwhahn/art3.htm.
That's an article by Terje Mathisen where I got most of
my information.
You may use this code however you wish. I hope it's
useful and I hope I got everything right.
-Kevin
kbaca@skygames.com
*/
#ifdef __GNUC__
#define RDTSC(_dst) \
__asm__("
.byte 0x0F,0x31
movl %%edx,(%%edi)
movl %%eax,4(%%edi)"\
: : "D" (_dst) : "eax", "edx", "edi")
// the old code... swapped it
// movl %%edx,(%%edi)
// movl %%eax,4(%%edi)"
#define RDMSR(_msri, _msrd) \
__asm__("
.byte 0x0F,0x32
movl %%eax,(%%edi)
movl %%edx,4(%%edi)"\
: : "c" (_msri), "D" (_msrd) : "eax", "ecx", "edx", "edi")
#define WRMSR(_msri, _msrd) \
__asm__("
xorl %%edx,%%edx
.byte 0x0F,0x30"\
: : "c" (_msri), "a" (_msrd) : "eax", "ecx", "edx")
#define RDMSR_0x12_0x13(_msr12, _msr13) \
__asm__("
movl $0x12,%%ecx
.byte 0x0F,0x32
movl %%edx,(%%edi)
movl %%eax,4(%%edi)
movl $0x13,%%ecx
.byte 0x0F,0x32
movl %%edx,(%%esi)
movl %%eax,4(%%esi)"\
: : "D" (_msr12), "S" (_msr13) : "eax", "ecx", "edx", "edi")
#define ZERO_MSR_0x12_0x13() \
__asm__("
xorl %%edx,%%edx
xorl %%eax,%%eax
movl $0x12,%%ecx
.byte 0x0F,0x30
movl $0x13,%%ecx
.byte 0x0F,0x30"\
: : : "eax", "ecx", "edx")
#elif defined (__WATCOMC__)
extern void RDTSC(UINT32 *dst);
#pragma aux RDTSC =\
"db 0x0F,0x31"\
"mov [edi],edx"\
"mov [4+edi],eax"\
parm [edi]\
modify [eax edx edi];
extern void RDMSR(UINT32 msri, UINT32 *msrd);
#pragma aux RDMSR =\
"db 0x0F,0x32"\
"mov [edi],edx"\
"mov [4+edi],eax"\
parm [ecx] [edi]\
modify [eax ecx edx edi];
extern void WRMSR(UINT32 msri, UINT32 msrd);
#pragma aux WRMSR =\
"xor edx,edx"\
"db 0x0F,0x30"\
parm [ecx] [eax]\
modify [eax ecx edx];
extern void RDMSR_0x12_0x13(UINT32 *msr12, UINT32 *msr13);
#pragma aux RDMSR_0x12_0x13 =\
"mov ecx,0x12"\
"db 0x0F,0x32"\
"mov [edi],edx"\
"mov [4+edi],eax"\
"mov ecx,0x13"\
"db 0x0F,0x32"\
"mov [esi],edx"\
"mov [4+esi],eax"\
parm [edi] [esi]\
modify [eax ecx edx edi esi];
extern void ZERO_MSR_0x12_0x13(void);
#pragma aux ZERO_MSR_0x12_0x13 =\
"xor edx,edx"\
"xor eax,eax"\
"mov ecx,0x12"\
"db 0x0F,0x30"\
"mov ecx,0x13"\
"db 0x0F,0x30"\
modify [eax ecx edx];
#endif
typedef enum
{
DataRead,
DataWrite,
DataTLBMiss,
DataReadMiss,
DataWriteMiss,
WriteHitEM,
DataCacheLinesWritten,
DataCacheSnoops,
DataCacheSnoopHit,
MemAccessBothPipes,
BankConflict,
MisalignedDataRef,
CodeRead,
CodeTLBMiss,
CodeCacheMiss,
SegRegLoad,
RESERVED0,
RESERVED1,
Branch,
BTBHit,
TakenBranchOrBTBHit,
PipelineFlush,
InstructionsExeced,
InstructionsExecedVPipe,
BusUtilizationClocks,
PipelineStalledWriteBackup,
PipelineStalledDateMemRead,
PipeLineStalledWriteEM,
LockedBusCycle,
IOReadOrWriteCycle,
NonCacheableMemRef,
AGI,
RESERVED2,
RESERVED3,
FPOperation,
Breakpoint0Match,
Breakpoint1Match,
Breakpoint2Match,
Breakpoint3Match,
HWInterrupt,
DataReadOrWrite,
DataReadOrWriteMiss
};
#define PROF_CYCLES (0x100)
#define PROF_EVENTS (0x000)
#define RING_012 (0x40)
#define RING_3 (0x80)
#define RING_0123 (RING_012 | RING_3)
/*void ProfSetProfiles(UINT32 msr12, UINT32 msr13);*/
#define ProfSetProfiles(_msr12, _msr13)\
{\
UINT32 prof;\
\
prof = (_msr12) | ((_msr13) << 16);\
WRMSR(0x11, prof);\
}
/*void ProfBeginProfiles(void);*/
#define ProfBeginProfiles()\
ZERO_MSR_0x12_0x13();
/*void ProfGetProfiles(UINT32 msr12[2], UINT32 msr13[2]);*/
#define ProfGetProfiles(_msr12, _msr13)\
RDMSR_0x12_0x13(_msr12, _msr13);
/*void ProfZeroTimer(void);*/
#define ProfZeroTimer()\
WRMSR(0x10, 0);
/*void ProfReadTimer(UINT32 timer[2]);*/
#define ProfReadTimer(timer)\
RDMSR(0x10, timer);
/*EOF*/

View file

@ -138,20 +138,6 @@ void R_DrawColumn_8(void);
void R_DrawShadeColumn_8(void);
void R_DrawTranslucentColumn_8(void);
#ifdef USEASM
void ASMCALL R_DrawColumn_8_ASM(void);
#define R_DrawWallColumn_8_ASM R_DrawColumn_8_ASM
void ASMCALL R_DrawShadeColumn_8_ASM(void);
void ASMCALL R_DrawTranslucentColumn_8_ASM(void);
void ASMCALL R_Draw2sMultiPatchColumn_8_ASM(void);
void ASMCALL R_DrawColumn_8_MMX(void);
#define R_DrawWallColumn_8_MMX R_DrawColumn_8_MMX
void ASMCALL R_Draw2sMultiPatchColumn_8_MMX(void);
void ASMCALL R_DrawSpan_8_MMX(void);
#endif
void R_DrawTranslatedColumn_8(void);
void R_DrawTranslatedTranslucentColumn_8(void);
void R_DrawSpan_8(void);

View file

@ -23,11 +23,6 @@ static wallsplat_t wallsplats[MAXLEVELSPLATS]; // WALL splats
static INT32 freewallsplat;
#endif
#ifdef USEASM
/// \brief for floorsplats \note accessed by asm code
struct rastery_s *prastertab;
#endif
#ifdef FLOORSPLATS
static floorsplat_t floorsplats[1]; // FLOOR splats
static INT32 freefloorsplat;
@ -339,12 +334,6 @@ void R_AddVisibleFloorSplats(subsector_t *subsec)
}
}
#ifdef USEASM
// tv1, tv2 = x/y qui varie dans la texture, tc = x/y qui est constant.
void ASMCALL rasterize_segment_tex(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2,
INT32 tc, INT32 dir);
#endif
// current test with floor tile
//#define FLOORSPLATSOLIDCOLOR

View file

@ -33,10 +33,6 @@
// SRB2Kart
#include "r_fps.h" // R_GetFramerateCap
#if defined (USEASM) && !defined (NORUSEASM)//&& (!defined (_MSC_VER) || (_MSC_VER <= 1200))
#define RUSEASM //MSC.NET can't patch itself
#endif
// --------------------------------------------
// assembly or c drawer routines for 8bpp/16bpp
// --------------------------------------------
@ -94,16 +90,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
// Short and Tall sky drawer, for the current color mode
void (*walldrawerfunc)(void);
boolean R_ASM = true;
boolean R_486 = false;
boolean R_586 = false;
boolean R_MMX = false;
boolean R_SSE = false;
boolean R_3DNow = false;
boolean R_MMXExt = false;
boolean R_SSE2 = false;
void SCR_SetMode(void)
{
if (dedicated)
@ -132,28 +118,6 @@ void SCR_SetMode(void)
walldrawerfunc = R_DrawWallColumn_8;
twosmultipatchfunc = R_Draw2sMultiPatchColumn_8;
twosmultipatchtransfunc = R_Draw2sMultiPatchTranslucentColumn_8;
#ifdef RUSEASM
if (R_ASM)
{
if (R_MMX)
{
colfunc = basecolfunc = R_DrawColumn_8_MMX;
//shadecolfunc = R_DrawShadeColumn_8_ASM;
//fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
walldrawerfunc = R_DrawWallColumn_8_MMX;
twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_MMX;
spanfunc = basespanfunc = R_DrawSpan_8_MMX;
}
else
{
colfunc = basecolfunc = R_DrawColumn_8_ASM;
//shadecolfunc = R_DrawShadeColumn_8_ASM;
//fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
walldrawerfunc = R_DrawWallColumn_8_ASM;
twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_ASM;
}
}
#endif
}
/* else if (vid.bpp > 1)
{
@ -181,50 +145,6 @@ void SCR_SetMode(void)
//
void SCR_Startup(void)
{
const CPUInfoFlags *RCpuInfo = I_CPUInfo();
if (!M_CheckParm("-NOCPUID") && RCpuInfo)
{
#if defined (__i386__) || defined (_M_IX86) || defined (__WATCOMC__)
R_486 = true;
#endif
if (RCpuInfo->RDTSC)
R_586 = true;
if (RCpuInfo->MMX)
R_MMX = true;
if (RCpuInfo->AMD3DNow)
R_3DNow = true;
if (RCpuInfo->MMXExt)
R_MMXExt = true;
if (RCpuInfo->SSE)
R_SSE = true;
if (RCpuInfo->SSE2)
R_SSE2 = true;
CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
}
if (M_CheckParm("-noASM"))
R_ASM = false;
if (M_CheckParm("-486"))
R_486 = true;
if (M_CheckParm("-586"))
R_586 = true;
if (M_CheckParm("-MMX"))
R_MMX = true;
if (M_CheckParm("-3DNow"))
R_3DNow = true;
if (M_CheckParm("-MMXExt"))
R_MMXExt = true;
if (M_CheckParm("-SSE"))
R_SSE = true;
if (M_CheckParm("-noSSE"))
R_SSE = false;
if (M_CheckParm("-SSE2"))
R_SSE2 = true;
M_SetupMemcpy();
if (dedicated)
{
V_Init();

View file

@ -138,17 +138,6 @@ extern void (*transtransfunc)(void);
extern void (*twosmultipatchfunc)(void);
extern void (*twosmultipatchtransfunc)(void);
// -----
// CPUID
// -----
extern boolean R_ASM;
extern boolean R_486;
extern boolean R_586;
extern boolean R_MMX;
extern boolean R_3DNow;
extern boolean R_MMXExt;
extern boolean R_SSE2;
// ----------------
// screen variables
// ----------------

View file

@ -7,7 +7,6 @@
NOHW=1
NOHS=1
NOASM=1
OPTS+=-DLINUX

View file

@ -39,7 +39,6 @@ endif
#
ifdef SOLARIS
NOIPX=1
NOASM=1
OPTS+=-DSOLARIS -DINADDR_NONE=INADDR_ANY -DBSD_COMP
OPTS+=-I/usr/local/include -I/opt/sfw/include
LDFLAGS+=-L/opt/sfw/lib

View file

@ -37,14 +37,6 @@ else
endif
endif
#use the x86 asm code
ifndef CYGWIN32
ifndef NOASM
USEASM=1
endif
endif
OBJS+=$(OBJDIR)/i_video.o $(OBJDIR)/dosstr.o $(OBJDIR)/endtxt.o $(OBJDIR)/hwsym_sdl.o
OPTS+=-DDIRECTFULLSCREEN -DHAVE_SDL

View file

@ -70,40 +70,6 @@ char logfilename[1024];
typedef BOOL (WINAPI *p_IsDebuggerPresent)(VOID);
#endif
#if defined (_WIN32)
static inline VOID MakeCodeWritable(VOID)
{
#ifdef USEASM // Disable write-protection of code segment
DWORD OldRights;
const DWORD NewRights = PAGE_EXECUTE_READWRITE;
PBYTE pBaseOfImage = (PBYTE)GetModuleHandle(NULL);
PIMAGE_DOS_HEADER dosH =(PIMAGE_DOS_HEADER)pBaseOfImage;
PIMAGE_NT_HEADERS ntH = (PIMAGE_NT_HEADERS)(pBaseOfImage + dosH->e_lfanew);
PIMAGE_OPTIONAL_HEADER oH = (PIMAGE_OPTIONAL_HEADER)
((PBYTE)ntH + sizeof (IMAGE_NT_SIGNATURE) + sizeof (IMAGE_FILE_HEADER));
LPVOID pA = pBaseOfImage+oH->BaseOfCode;
SIZE_T pS = oH->SizeOfCode;
#if 1 // try to find the text section
PIMAGE_SECTION_HEADER ntS = IMAGE_FIRST_SECTION (ntH);
WORD s;
for (s = 0; s < ntH->FileHeader.NumberOfSections; s++)
{
if (memcmp (ntS[s].Name, ".text\0\0", 8) == 0)
{
pA = pBaseOfImage+ntS[s].VirtualAddress;
pS = ntS[s].Misc.VirtualSize;
break;
}
}
#endif
if (!VirtualProtect(pA,pS,NewRights,&OldRights))
I_Error("Could not make code writable\n");
#endif
}
#endif
#ifdef _WIN32
static void
ChDirToExe (void)
@ -185,7 +151,6 @@ int main(int argc, char **argv)
#ifndef __MINGW32__
prevExceptionFilter = SetUnhandledExceptionFilter(RecordExceptionInfo);
#endif
MakeCodeWritable();
#endif
// startup SRB2

View file

@ -3910,69 +3910,6 @@ UINT32 I_GetFreeMem(UINT32 *total)
#endif
}
const CPUInfoFlags *I_CPUInfo(void)
{
#if defined (_WIN32)
static CPUInfoFlags WIN_CPUInfo;
SYSTEM_INFO SI;
p_IsProcessorFeaturePresent pfnCPUID = (p_IsProcessorFeaturePresent)(LPVOID)GetProcAddress(GetModuleHandleA("kernel32.dll"), "IsProcessorFeaturePresent");
ZeroMemory(&WIN_CPUInfo,sizeof (WIN_CPUInfo));
if (pfnCPUID)
{
WIN_CPUInfo.FPPE = pfnCPUID( 0); //PF_FLOATING_POINT_PRECISION_ERRATA
WIN_CPUInfo.FPE = pfnCPUID( 1); //PF_FLOATING_POINT_EMULATED
WIN_CPUInfo.cmpxchg = pfnCPUID( 2); //PF_COMPARE_EXCHANGE_DOUBLE
WIN_CPUInfo.MMX = pfnCPUID( 3); //PF_MMX_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.PPCMM64 = pfnCPUID( 4); //PF_PPC_MOVEMEM_64BIT_OK
WIN_CPUInfo.ALPHAbyte = pfnCPUID( 5); //PF_ALPHA_BYTE_INSTRUCTIONS
WIN_CPUInfo.SSE = pfnCPUID( 6); //PF_XMMI_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.AMD3DNow = pfnCPUID( 7); //PF_3DNOW_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.RDTSC = pfnCPUID( 8); //PF_RDTSC_INSTRUCTION_AVAILABLE
WIN_CPUInfo.PAE = pfnCPUID( 9); //PF_PAE_ENABLED
WIN_CPUInfo.SSE2 = pfnCPUID(10); //PF_XMMI64_INSTRUCTIONS_AVAILABLE
//WIN_CPUInfo.blank = pfnCPUID(11); //PF_SSE_DAZ_MODE_AVAILABLE
WIN_CPUInfo.DEP = pfnCPUID(12); //PF_NX_ENABLED
WIN_CPUInfo.SSE3 = pfnCPUID(13); //PF_SSE3_INSTRUCTIONS_AVAILABLE
WIN_CPUInfo.cmpxchg16b = pfnCPUID(14); //PF_COMPARE_EXCHANGE128
WIN_CPUInfo.cmp8xchg16 = pfnCPUID(15); //PF_COMPARE64_EXCHANGE128
WIN_CPUInfo.PFC = pfnCPUID(16); //PF_CHANNELS_ENABLED
}
#ifdef HAVE_SDLCPUINFO
else
{
WIN_CPUInfo.RDTSC = SDL_HasRDTSC();
WIN_CPUInfo.MMX = SDL_HasMMX();
WIN_CPUInfo.AMD3DNow = SDL_Has3DNow();
WIN_CPUInfo.SSE = SDL_HasSSE();
WIN_CPUInfo.SSE2 = SDL_HasSSE2();
WIN_CPUInfo.AltiVec = SDL_HasAltiVec();
}
WIN_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
WIN_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
#endif
GetSystemInfo(&SI);
WIN_CPUInfo.CPUs = SI.dwNumberOfProcessors;
WIN_CPUInfo.IA64 = (SI.dwProcessorType == 2200); // PROCESSOR_INTEL_IA64
WIN_CPUInfo.AMD64 = (SI.dwProcessorType == 8664); // PROCESSOR_AMD_X8664
return &WIN_CPUInfo;
#elif defined (HAVE_SDLCPUINFO)
static CPUInfoFlags SDL_CPUInfo;
memset(&SDL_CPUInfo,0,sizeof (CPUInfoFlags));
SDL_CPUInfo.RDTSC = SDL_HasRDTSC();
SDL_CPUInfo.MMX = SDL_HasMMX();
SDL_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
SDL_CPUInfo.AMD3DNow = SDL_Has3DNow();
SDL_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
SDL_CPUInfo.SSE = SDL_HasSSE();
SDL_CPUInfo.SSE2 = SDL_HasSSE2();
SDL_CPUInfo.AltiVec = SDL_HasAltiVec();
return &SDL_CPUInfo;
#else
return NULL; /// \todo CPUID asm
#endif
}
// note CPUAFFINITY code used to reside here
void I_RegisterSysCommands(void) {}
#endif

View file

@ -1,957 +0,0 @@
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DooM Legacy Team.
;; Copyright (C) 1999-2018 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap.nas
;; DESCRIPTION:
;; Assembler optimised rendering code for software mode.
;; Draw wall columns.
[BITS 32]
%define FRACBITS 16
%define TRANSPARENTPIXEL 247
%ifdef LINUX
%macro cextern 1
[extern %1]
%endmacro
%macro cglobal 1
[global %1]
%endmacro
%else
%macro cextern 1
%define %1 _%1
[extern %1]
%endmacro
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
%endif
; The viddef_s structure. We only need the width field.
struc viddef_s
resb 12
.width: resb 4
resb 44
endstruc
;; externs
;; columns
cextern dc_x
cextern dc_yl
cextern dc_yh
cextern ylookup
cextern columnofs
cextern dc_source
cextern dc_texturemid
cextern dc_texheight
cextern dc_iscale
cextern dc_hires
cextern centery
cextern centeryfrac
cextern dc_colormap
cextern dc_transmap
cextern colormaps
cextern vid
cextern topleft
; DELME
cextern R_DrawColumn_8
; polygon edge rasterizer
cextern prastertab
[SECTION .data]
;;.align 4
loopcount dd 0
pixelcount dd 0
tystep dd 0
[SECTION .text]
;;----------------------------------------------------------------------
;;
;; R_DrawColumn : 8bpp column drawer
;;
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
;; Revised by G. Dick July 2010 to support the intervening twelve years'
;; worth of changes to the renderer. Since I only vaguely know what I'm
;; doing, this is probably rather suboptimal. Help appreciated!
;;
;;----------------------------------------------------------------------
;; fracstep, vid.width in memory
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = heightmask
;; esi = source
;; edi = dest
;; ebp = frac
;;----------------------------------------------------------------------
cglobal R_DrawColumn_8_ASM
; align 16
R_DrawColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov edi,[ylookup+ebp*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx,[dc_yh]
add ecx,1
sub ecx,ebp ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale; // But we just use [dc_iscale]
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
mov eax,ebp ;; dc_yl
shl eax,FRACBITS
sub eax,[centeryfrac]
imul dword [dc_iscale]
shrd eax,edx,FRACBITS
add eax,[dc_texturemid]
mov ebp,eax ;; ebp = frac
mov ebx,[dc_colormap]
mov esi,[dc_source]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires],0x01
jz .texheightcheck
xor ebp,ebp
;;
;; Check for power of two
;;
.texheightcheck:
mov edx,[dc_texheight]
sub edx,1 ;; edx = heightmask
test edx,[dc_texheight]
jnz .notpowertwo
test ecx,0x01 ;; Test for odd no. pixels
jnz .odd
;;
;; Texture height is a power of two, so we get modular arithmetic by
;; masking
;;
.powertwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
.odd:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,2 ;; count -= 2
jg .powertwo
jmp .done
.notpowertwo:
add edx,1
shl edx,FRACBITS
test ebp,ebp
jns .notpowtwoloop
.makefracpos:
add ebp,edx ;; frac is negative; make it positive
js .makefracpos
.notpowtwoloop:
cmp ebp,edx ;; Reduce mod height
jl .writenonpowtwo
sub ebp,edx
jmp .notpowtwoloop
.writenonpowtwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part.
mov bl,[esi + eax] ;; ebx = colormap + texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,1
jnz .notpowtwoloop
;;
.done:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;;----------------------------------------------------------------------
;;
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
;; pixels.
;;
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
;; Revised by G. Dick July 2010 to support the intervening twelve years'
;; worth of changes to the renderer. Since I only vaguely know what I'm
;; doing, this is probably rather suboptimal. Help appreciated!
;;
;;----------------------------------------------------------------------
;; fracstep, vid.width in memory
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = heightmask
;; esi = source
;; edi = dest
;; ebp = frac
;;----------------------------------------------------------------------
cglobal R_Draw2sMultiPatchColumn_8_ASM
; align 16
R_Draw2sMultiPatchColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov edi,[ylookup+ebp*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx,[dc_yh]
add ecx,1
sub ecx,ebp ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale; // But we just use [dc_iscale]
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
mov eax,ebp ;; dc_yl
shl eax,FRACBITS
sub eax,[centeryfrac]
imul dword [dc_iscale]
shrd eax,edx,FRACBITS
add eax,[dc_texturemid]
mov ebp,eax ;; ebp = frac
mov ebx,[dc_colormap]
mov esi,[dc_source]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires],0x01
jz .texheightcheck
xor ebp,ebp
;;
;; Check for power of two
;;
.texheightcheck:
mov edx,[dc_texheight]
sub edx,1 ;; edx = heightmask
test edx,[dc_texheight]
jnz .notpowertwo
test ecx,0x01 ;; Test for odd no. pixels
jnz .odd
;;
;; Texture height is a power of two, so we get modular arithmetic by
;; masking
;;
.powertwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextpowtwoeven ;; If so, advance.
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextpowtwoeven:
;; dest += vid.width
add edi,[vid + viddef_s.width]
.odd:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextpowtwoodd ;; If so, advance.
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextpowtwoodd:
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,2 ;; count -= 2
jg .powertwo
jmp .done
.notpowertwo:
add edx,1
shl edx,FRACBITS
test ebp,ebp
jns .notpowtwoloop
.makefracpos:
add ebp,edx ;; frac is negative; make it positive
js .makefracpos
.notpowtwoloop:
cmp ebp,edx ;; Reduce mod height
jl .writenonpowtwo
sub ebp,edx
jmp .notpowtwoloop
.writenonpowtwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part.
mov bl,[esi + eax] ;; ebx = colormap + texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp bl,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextnonpowtwo ;; If so, advance.
movzx eax,byte [ebx] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextnonpowtwo:
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,1
jnz .notpowtwoloop
;;
.done:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;;----------------------------------------------------------------------
;; R_DrawTranslucentColumnA_8
;;
;; Vertical column texture drawer, with transparency. Replaces Doom2's
;; 'fuzz' effect, which was not so beautiful.
;; Transparency is always impressive in some way, don't know why...
;;----------------------------------------------------------------------
cglobal R_DrawTranslucentColumn_8_ASM
R_DrawTranslucentColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov ebx,ebp
mov edi,[ylookup+ebx*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov eax,[dc_yh]
inc eax
sub eax,ebp ;; pixel count
mov [pixelcount],eax ;; save for final pixel
jle near vtdone ;; nothing to scale
;;
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
;;
mov ecx,[dc_iscale] ;; fracstep
mov eax,[centery]
sub eax,ebp
imul eax,ecx
mov edx,[dc_texturemid]
sub edx,eax
mov ebx,edx
shr ebx,16 ;; frac int.
and ebx,0x7f
shl edx,16 ;; y frac up
mov ebp,ecx
shl ebp,16 ;; fracstep f. up
shr ecx,16 ;; fracstep i. ->cl
and cl,0x7f
push cx
mov ecx,edx
pop cx
mov edx,[dc_colormap]
mov esi,[dc_source]
;;
;; lets rock :) !
;;
mov eax,[pixelcount]
shr eax,0x2
test byte [pixelcount],0x3
mov ch,al ;; quad count
mov eax,[dc_transmap]
je vt4quadloop
;;
;; do un-even pixel
;;
test byte [pixelcount],0x1
je trf2
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
pf: add edi,0x12345678
;;
;; do two non-quad-aligned pixels
;;
trf2: test byte [pixelcount],0x2
je trf3
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
pg: add edi,0x12345678
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
ph: add edi,0x12345678
;;
;; test if there was at least 4 pixels
;;
trf3: test ch,0xff ;; test quad count
je near vtdone
;;
;; ebp : ystep frac. upper 24 bits
;; edx : y frac. upper 24 bits
;; ebx : y i. lower 7 bits, masked for index
;; ecx : ch = counter, cl = y step i.
;; eax : colormap aligned 256
;; esi : source texture column
;; edi : dest screen
;;
vt4quadloop:
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [tystep],ebp
pi: add edi,0x12345678
mov al,[edi] ;; fetch dest : index into colormap
pj: sub edi,0x12345678
mov ebp,edi
pk: sub edi,0x12345678
jmp short inloop
align 4
vtquadloop:
add ecx,[tystep]
adc bl,cl
q1: add ebp,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
inloop:
add ecx,[tystep]
adc bl,cl
q2: add edi,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [ebp+0x0],dl
mov al,[edi] ;; fetch dest : index into colormap
add ecx,[tystep]
adc bl,cl
q3: add ebp,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
add ecx,[tystep]
adc bl,cl
q4: add edi,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
dec ch
jne vtquadloop
vtdone:
pop ebx
pop edi
pop esi
pop ebp
ret
;;----------------------------------------------------------------------
;; R_DrawShadeColumn
;;
;; for smoke..etc.. test.
;;----------------------------------------------------------------------
cglobal R_DrawShadeColumn_8_ASM
R_DrawShadeColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov ebx,ebp
mov edi,[ylookup+ebx*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov eax,[dc_yh]
inc eax
sub eax,ebp ;; pixel count
mov [pixelcount],eax ;; save for final pixel
jle near shdone ;; nothing to scale
;;
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
;;
mov ecx,[dc_iscale] ;; fracstep
mov eax,[centery]
sub eax,ebp
imul eax,ecx
mov edx,[dc_texturemid]
sub edx,eax
mov ebx,edx
shr ebx,16 ;; frac int.
and ebx,byte +0x7f
shl edx,16 ;; y frac up
mov ebp,ecx
shl ebp,16 ;; fracstep f. up
shr ecx,16 ;; fracstep i. ->cl
and cl,0x7f
mov esi,[dc_source]
;;
;; lets rock :) !
;;
mov eax,[pixelcount]
mov dh,al
shr eax,2
mov ch,al ;; quad count
mov eax,[colormaps]
test dh,3
je sh4quadloop
;;
;; do un-even pixel
;;
test dh,0x1
je shf2
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pl: add edi,0x12345678
;;
;; do two non-quad-aligned pixels
;;
shf2:
test dh,0x2
je shf3
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pm: add edi,0x12345678
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pn: add edi,0x12345678
;;
;; test if there was at least 4 pixels
;;
shf3:
test ch,0xff ;; test quad count
je near shdone
;;
;; ebp : ystep frac. upper 24 bits
;; edx : y frac. upper 24 bits
;; ebx : y i. lower 7 bits, masked for index
;; ecx : ch = counter, cl = y step i.
;; eax : colormap aligned 256
;; esi : source texture column
;; edi : dest screen
;;
sh4quadloop:
mov dh,0x7f ;; prep mask
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [tystep],ebp
po: add edi,0x12345678
mov al,[edi] ;; fetch dest : index into colormap
pp: sub edi,0x12345678
mov ebp,edi
pq: sub edi,0x12345678
jmp short shinloop
align 4
shquadloop:
add edx,[tystep]
adc bl,cl
and bl,dh
q5: add ebp,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
shinloop:
add edx,[tystep]
adc bl,cl
and bl,dh
q6: add edi,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
add edx,[tystep]
adc bl,cl
and bl,dh
q7: add ebp,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
add edx,[tystep]
adc bl,cl
and bl,dh
q8: add edi,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
dec ch
jne shquadloop
shdone:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;; ========================================================================
;; Rasterization of the segments of a LINEAR polygne textur of manire.
;; It is thus a question of interpolating coordinate them at the edges of texture in
;; the time that the X-coordinates minx/maxx for each line.
;; the argument ' dir' indicates which edges of texture are Interpol?:
;; 0: segments associs at edge TOP? and BOTTOM? (constant TY)
;; 1: segments associs at the LEFT and RIGHT edge (constant TX)
;; ========================================================================
;;
;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
;;
;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
;;
;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
;;
;;
;; Uses: extern struct rastery *_rastertab;
;;
MINX EQU 0
MAXX EQU 4
TX1 EQU 8
TY1 EQU 12
TX2 EQU 16
TY2 EQU 20
RASTERY_SIZEOF EQU 24
cglobal rasterize_segment_tex
rasterize_segment_tex:
push ebp
mov ebp,esp
sub esp,byte +0x8 ;; allocate the local variables
push ebx
push esi
push edi
o16 mov ax,es
push eax
;; #define DX [ebp-4]
;; #define TD [ebp-8]
mov eax,[ebp+0xc] ;; y1
mov ebx,[ebp+0x14] ;; y2
cmp ebx,eax
je near .L_finished ;; special (y1==y2) segment horizontal, exit!
jg near .L_rasterize_right
;;rasterize_left: ;; one rasterize a segment LEFT of the polygne
mov ecx,eax
sub ecx,ebx
inc ecx ;; y1-y2+1
mov eax,RASTERY_SIZEOF
mul ebx ;; * y2
mov esi,[prastertab]
add esi,eax ;; point into rastertab[y2]
mov eax,[ebp+0x8] ;; ARG1
sub eax,[ebp+0x10] ;; ARG3
shl eax,0x10 ;; ((x1-x2)<<PRE) ...
cdq
idiv ecx ;; dx = ... / (y1-y2+1)
mov [ebp-0x4],eax ;; DX
mov eax,[ebp+0x18] ;; ARG5
sub eax,[ebp+0x1c] ;; ARG6
shl eax,0x10
cdq
idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
mov eax,[ebp+0x10] ;; ARG3
shl eax,0x10 ;; x = x2<<PRE
mov ebx,[ebp+0x1c] ;; ARG6
shl ebx,0x10 ;; tx = tx2<<PRE d0
;; ty = ty2<<PRE d1
mov edx,[ebp+0x20] ;; ARG7
shl edx,0x10 ;; ty = ty<<PRE d0
;; tx = tx<<PRE d1
push ebp
mov edi,[ebp-0x4] ;; DX
cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ?
mov ebp,[ebp-0x8] ;; TD
je .L_rleft_h_loop
;;
;; TY varies, TX is constant
;;
.L_rleft_v_loop:
mov [esi+MINX],eax ;; rastertab[y].minx = x
add ebx,ebp
mov [esi+TX1],edx ;; .tx1 = tx
add eax,edi
mov [esi+TY1],ebx ;; .ty1 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // ty += tdy
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
dec ecx
jne .L_rleft_v_loop
pop ebp
jmp .L_finished
;;
;; TX varies, TY is constant
;;
.L_rleft_h_loop:
mov [esi+MINX],eax ;; rastertab[y].minx = x
add eax,edi
mov [esi+TX1],ebx ;; .tx1 = tx
add ebx,ebp
mov [esi+TY1],edx ;; .ty1 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // tx += tdx
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
dec ecx
jne .L_rleft_h_loop
pop ebp
jmp .L_finished
;;
;; one rasterize a segment LINE of the polygne
;;
.L_rasterize_right:
mov ecx,ebx
sub ecx,eax
inc ecx ;; y2-y1+1
mov ebx,RASTERY_SIZEOF
mul ebx ;; * y1
mov esi,[prastertab]
add esi,eax ;; point into rastertab[y1]
mov eax,[ebp+0x10] ;; ARG3
sub eax,[ebp+0x8] ;; ARG1
shl eax,0x10 ;; ((x2-x1)<<PRE) ...
cdq
idiv ecx ;; dx = ... / (y2-y1+1)
mov [ebp-0x4],eax ;; DX
mov eax,[ebp+0x1c] ;; ARG6
sub eax,[ebp+0x18] ;; ARG5
shl eax,0x10
cdq
idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
mov eax,[ebp+0x8] ;; ARG1
shl eax,0x10 ;; x = x1<<PRE
mov ebx,[ebp+0x18] ;; ARG5
shl ebx,0x10 ;; tx = tx1<<PRE d0
;; ty = ty1<<PRE d1
mov edx,[ebp+0x20] ;; ARG7
shl edx,0x10 ;; ty = ty<<PRE d0
;; tx = tx<<PRE d1
push ebp
mov edi,[ebp-0x4] ;; DX
cmp dword [ebp+0x24], 0 ;; direction ?
mov ebp,[ebp-0x8] ;; TD
je .L_rright_h_loop
;;
;; TY varies, TX is constant
;;
.L_rright_v_loop:
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
add ebx,ebp
mov [esi+TX2],edx ;; .tx2 = tx
add eax,edi
mov [esi+TY2],ebx ;; .ty2 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // ty += tdy
add esi,RASTERY_SIZEOF
dec ecx
jne .L_rright_v_loop
pop ebp
jmp short .L_finished
;;
;; TX varies, TY is constant
;;
.L_rright_h_loop:
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
add eax,edi
mov [esi+TX2],ebx ;; .tx2 = tx
add ebx,ebp
mov [esi+TY2],edx ;; .ty2 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // tx += tdx
add esi,RASTERY_SIZEOF
dec ecx
jne .L_rright_h_loop
pop ebp
.L_finished:
pop eax
o16 mov es,ax
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret

1587
src/tmap.s

File diff suppressed because it is too large Load diff

View file

@ -1,322 +0,0 @@
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2018 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file tmap_asm.s
/// \brief ???
//.comm _dc_colormap,4
//.comm _dc_x,4
//.comm _dc_yl,4
//.comm _dc_yh,4
//.comm _dc_iscale,4
//.comm _dc_texturemid,4
//.comm _dc_source,4
//.comm _ylookup,4
//.comm _columnofs,4
//.comm _loopcount,4
//.comm _pixelcount,4
.data
_pixelcount:
.long 0x00000000
_loopcount:
.long 0x00000000
.align 8
_mmxcomm:
.long 0x00000000
.text
.align 4
.globl _R_DrawColumn8_NOMMX
_R_DrawColumn8_NOMMX:
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
movl _dc_yl,%edx
movl _dc_yh,%eax
subl %edx,%eax
leal 1(%eax),%ebx
testl %ebx,%ebx
jle rdc8ndone
movl _dc_x,%eax
movl _ylookup, %edi
movl (%edi,%edx,4),%esi
movl _columnofs, %edi
addl (%edi,%eax,4),%esi
movl _dc_iscale,%edi
movl %edx,%eax
imull %edi,%eax
movl _dc_texturemid,%ecx
addl %eax,%ecx
movl _dc_source,%ebp
xorl %edx, %edx
subl $0x12345678, %esi
.globl rdc8nwidth1
rdc8nwidth1:
.align 4,0x90
rdc8nloop:
movl %ecx,%eax
shrl $16,%eax
addl %edi,%ecx
andl $127,%eax
addl $0x12345678,%esi
.globl rdc8nwidth2
rdc8nwidth2:
movb (%eax,%ebp),%dl
movl _dc_colormap,%eax
movb (%eax,%edx),%al
movb %al,(%esi)
decl %ebx
jne rdc8nloop
rdc8ndone:
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
//
// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
// By ES 1998/08/01
//
.globl _R_DrawColumn_8_Pentium
_R_DrawColumn_8_Pentium:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl _dc_yl,%eax // Top pixel
movl _dc_yh,%ebx // Bottom pixel
movl _ylookup, %edi
movl (%edi,%ebx,4),%ecx
subl %eax,%ebx // ebx=number of pixels-1
jl rdc8pdone // no pixel to draw, done
jnz rdc8pmany
movl _dc_x,%edx // Special case: only one pixel
movl _columnofs, %edi
addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
movl _dc_iscale,%esi
imull %esi,%eax
movl _dc_texturemid,%edi
addl %eax,%edi // texture index in edi
movl _dc_colormap,%edx
shrl $16, %edi
movl _dc_source,%ebp
andl $127,%edi
movb (%edi,%ebp),%dl // read texture pixel
movb (%edx),%al // lookup for light
movb %al,0(%ecx) // write it
jmp rdc8pdone // done!
.align 4, 0x90
rdc8pmany: // draw >1 pixel
movl _dc_x,%edx
movl _columnofs, %edi
movl (%edi,%edx,4),%edx
leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom
.globl rdc8pwidth5
rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH
movl _dc_iscale,%edx // edx = fracstep
imull %edx,%eax
shll $9, %edx // fixme: Should get 7.25 fix as input
movl _dc_texturemid,%ecx
addl %eax,%ecx // ecx = frac
movl _dc_colormap,%eax // eax = lighting/special effects LUT
shll $9, %ecx
movl _dc_source,%esi // esi = source ptr
imull $0x12345678, %ebx // ebx = negative offset to pixel
.globl rdc8pwidth6
rdc8pwidth6: // DeadBeef = -SCREENWIDTH
// Begin the calculation of the two first pixels
leal (%ecx, %edx), %ebp
shrl $25, %ecx
movb (%esi, %ecx), %al
leal (%edx, %ebp), %ecx
shrl $25, %ebp
movb (%eax), %dl
// The main loop
rdc8ploop:
movb (%esi,%ebp), %al // load 1
leal (%ecx, %edx), %ebp // calc frac 3
shrl $25, %ecx // shift frac 2
movb %dl, 0x12345678(%edi, %ebx)// store 0
.globl rdc8pwidth1
rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH
movb (%eax), %al // lookup 1
movb %al, 0x12345678(%edi, %ebx)// store 1
.globl rdc8pwidth2
rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH
movb (%esi, %ecx), %al // load 2
leal (%ebp, %edx), %ecx // calc frac 4
shrl $25, %ebp // shift frac 3
movb (%eax), %dl // lookup 2
addl $0x12345678, %ebx // counter
.globl rdc8pwidth3
rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH
jl rdc8ploop // loop
// End of loop. Write extra pixel or just exit.
jnz rdc8pdone
movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
.globl rdc8pwidth4
rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH
rdc8pdone:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
//
// MMX asm version, optimised for K6
// By ES 1998/07/05
//
.globl _R_DrawColumn_8_K6_MMX
_R_DrawColumn_8_K6_MMX:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
andl $7,%eax
addl $8,%eax
movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
subl %eax,%esp
movl _dc_yl,%edx // Top pixel
movl _dc_yh,%ebx // Bottom pixel
movl _ylookup, %edi
movl (%edi,%ebx,4),%ecx
subl %edx,%ebx // ebx=number of pixels-1
jl 0x12345678 // no pixel to draw, done
.globl rdc8moffs1
rdc8moffs1:
jnz rdc8mmany
movl _dc_x,%eax // Special case: only one pixel
movl _columnofs, %edi
addl (%edi,%eax,4),%ecx // dest pixel at (%ecx)
movl _dc_iscale,%esi
imull %esi,%edx
movl _dc_texturemid,%edi
addl %edx,%edi // texture index in edi
movl _dc_colormap,%edx
shrl $16, %edi
movl _dc_source,%ebp
andl $127,%edi
movb (%edi,%ebp),%dl // read texture pixel
movb (%edx),%al // lookup for light
movb %al,0(%ecx) // write it
jmp rdc8mdone // done!
.globl rdc8moffs2
rdc8moffs2:
.align 4, 0x90
rdc8mmany: // draw >1 pixel
movl _dc_x,%eax
movl _columnofs, %edi
movl (%edi,%eax,4),%eax
leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom
.globl rdc8mwidth3
rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH
movl _dc_iscale,%ecx // ecx = fracstep
imull %ecx,%edx
shll $9, %ecx // fixme: Should get 7.25 fix as input
movl _dc_texturemid,%eax
addl %edx,%eax // eax = frac
movl _dc_colormap,%edx // edx = lighting/special effects LUT
shll $9, %eax
leal (%ecx, %ecx), %edi
movl _dc_source,%ebp // ebp = source ptr
movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs
imull $0x12345678, %ebx // ebx = negative offset to pixel
.globl rdc8mwidth5
rdc8mwidth5: // DeadBeef = -SCREENWIDTH
movl %edi, 4(%esp)
leal (%eax, %ecx), %edi
movq 0(%esp), %mm1 // fracstep:fracstep in mm1
movl %eax, 0(%esp)
shrl $25, %eax
movl %edi, 4(%esp)
movzbl (%ebp, %eax), %eax
movq 0(%esp), %mm0 // frac:frac in mm0
paddd %mm1, %mm0
shrl $25, %edi
movq %mm0, %mm2
psrld $25, %mm2 // texture index in mm2
paddd %mm1, %mm0
movq %mm2, 0(%esp)
.globl rdc8mloop
rdc8mloop: // The main loop
movq %mm0, %mm2 // move 4-5 to temp reg
movzbl (%ebp, %edi), %edi // read 1
psrld $25, %mm2 // shift 4-5
movb (%edx,%eax), %cl // lookup 0
movl 0(%esp), %eax // load 2
addl $0x12345678, %ebx // counter
.globl rdc8mwidth2
rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH
movb %cl, (%esi, %ebx) // write 0
movb (%edx,%edi), %ch // lookup 1
movb %ch, 0x12345678(%esi, %ebx) // write 1
.globl rdc8mwidth1
rdc8mwidth1: // DeadBeef = SCREENWIDTH
movl 4(%esp), %edi // load 3
paddd %mm1, %mm0 // frac 6-7
movzbl (%ebp, %eax), %eax // lookup 2
movq %mm2, 0(%esp) // store texture index 4-5
jl rdc8mloop
jnz rdc8mno_odd
movb (%edx,%eax), %cl // write the last odd pixel
movb %cl, 0x12345678(%esi)
.globl rdc8mwidth4
rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH
rdc8mno_odd:
.globl rdc8mdone
rdc8mdone:
emms
addl _mmxcomm, %esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
// Need some extra space to align run-time
.globl R_DrawColumn_8_K6_MMX_end
R_DrawColumn_8_K6_MMX_end:
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;

View file

@ -1,674 +0,0 @@
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DOSDOOM.
;; Copyright (C) 2010-2018 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap_mmx.nas
;; DESCRIPTION:
;; Assembler optimised rendering code for software mode, using SIMD
;; instructions.
;; Draw wall columns.
[BITS 32]
%define FRACBITS 16
%define TRANSPARENTPIXEL 247
%ifdef LINUX
%macro cextern 1
[extern %1]
%endmacro
%macro cglobal 1
[global %1]
%endmacro
%else
%macro cextern 1
%define %1 _%1
[extern %1]
%endmacro
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
%endif
; The viddef_s structure. We only need the width field.
struc viddef_s
resb 12
.width: resb 4
resb 44
endstruc
;; externs
;; columns
cextern dc_colormap
cextern dc_x
cextern dc_yl
cextern dc_yh
cextern dc_iscale
cextern dc_texturemid
cextern dc_texheight
cextern dc_source
cextern dc_hires
cextern centery
cextern centeryfrac
cextern dc_transmap
cextern R_DrawColumn_8_ASM
cextern R_Draw2sMultiPatchColumn_8_ASM
;; spans
cextern nflatshiftup
cextern nflatxshift
cextern nflatyshift
cextern nflatmask
cextern ds_xfrac
cextern ds_yfrac
cextern ds_xstep
cextern ds_ystep
cextern ds_x1
cextern ds_x2
cextern ds_y
cextern ds_source
cextern ds_colormap
cextern ylookup
cextern columnofs
cextern vid
[SECTION .data]
nflatmask64 dq 0
[SECTION .text]
;;----------------------------------------------------------------------
;;
;; R_DrawColumn : 8bpp column drawer
;;
;; MMX column drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = vid.width
;; mm0 = accumulator
;; mm1 = heightmask, twice
;; mm2 = 2 * fracstep, twice
;; mm3 = pair of consecutive fracs
;;----------------------------------------------------------------------
cglobal R_DrawColumn_8_MMX
R_DrawColumn_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; Our algorithm requires that the texture height be a power of two.
;; If not, fall back to the non-MMX drawer.
;;
.texheightcheck:
mov edx, [dc_texheight]
sub edx, 1 ;; edx = heightmask
test edx, [dc_texheight]
jnz near .usenonMMX
mov ebp, edx ;; Keep a copy of heightmask in a
;; GPR for the time being.
;;
;; Fill mm1 with heightmask
;;
movd mm1, edx ;; low dword = heightmask
punpckldq mm1, mm1 ;; copy low dword to high dword
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov eax, [dc_yl]
mov edi, [ylookup+eax*4]
mov ebx, [dc_x]
add edi, [columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx, [dc_yh]
add ecx, 1
sub ecx, eax ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale;
;;
movd mm2, [dc_iscale] ;; fracstep in low dword
punpckldq mm2, mm2 ;; copy to high dword
mov ebx, [dc_colormap]
mov esi, [dc_source]
;;
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
;; eax == dc_yl already
shl eax, FRACBITS
sub eax, [centeryfrac]
imul dword [dc_iscale]
shrd eax, edx, FRACBITS
add eax, [dc_texturemid]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires], 0x01
jz .mod2
xor eax, eax
;;
;; Do mod-2 pixel.
;;
.mod2:
test ecx, 1
jz .pairprepare
mov edx, eax ;; edx = frac
add eax, [dc_iscale] ;; eax += fracstep
sar edx, FRACBITS
and edx, ebp ;; edx &= heightmask
movzx edx, byte [esi + edx]
movzx edx, byte [ebx + edx]
mov [edi], dl
add edi, [vid + viddef_s.width]
sub ecx, 1
jz .done
.pairprepare:
;;
;; Prepare for the main loop.
;;
movd mm3, eax ;; Low dword = frac
movq mm4, mm3 ;; Copy to intermediate register
paddd mm4, mm2 ;; dwords of mm4 += fracstep
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
pslld mm2, 1 ;; fracstep *= 2
;;
;; ebp = vid.width
;;
mov ebp, [vid + viddef_s.width]
align 16
.pairloop:
movq mm0, mm3 ;; 3B 1u.
psrad mm0, FRACBITS ;; 4B 1u.
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
movd eax, mm0 ;; 3B 1u. Get first frac
;; IFETCH boundary
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
movd edx, mm0 ;; 3B 1u. Get second frac
mov [edi], al ;; 2B 1(2)u. First pixel
;; IFETCH boundary
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
;; IFETCH boundary
sub ecx, 2 ;; 3B 1u. count -= 2
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret
.usenonMMX:
call R_DrawColumn_8_ASM
jmp .done
;;----------------------------------------------------------------------
;;
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
;; pixels.
;;
;; MMX column drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = vid.width
;; mm0 = accumulator
;; mm1 = heightmask, twice
;; mm2 = 2 * fracstep, twice
;; mm3 = pair of consecutive fracs
;;----------------------------------------------------------------------
cglobal R_Draw2sMultiPatchColumn_8_MMX
R_Draw2sMultiPatchColumn_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; Our algorithm requires that the texture height be a power of two.
;; If not, fall back to the non-MMX drawer.
;;
.texheightcheck:
mov edx, [dc_texheight]
sub edx, 1 ;; edx = heightmask
test edx, [dc_texheight]
jnz near .usenonMMX
mov ebp, edx ;; Keep a copy of heightmask in a
;; GPR for the time being.
;;
;; Fill mm1 with heightmask
;;
movd mm1, edx ;; low dword = heightmask
punpckldq mm1, mm1 ;; copy low dword to high dword
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov eax, [dc_yl]
mov edi, [ylookup+eax*4]
mov ebx, [dc_x]
add edi, [columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx, [dc_yh]
add ecx, 1
sub ecx, eax ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale;
;;
movd mm2, [dc_iscale] ;; fracstep in low dword
punpckldq mm2, mm2 ;; copy to high dword
mov ebx, [dc_colormap]
mov esi, [dc_source]
;;
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
;; eax == dc_yl already
shl eax, FRACBITS
sub eax, [centeryfrac]
imul dword [dc_iscale]
shrd eax, edx, FRACBITS
add eax, [dc_texturemid]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires], 0x01
jz .mod2
xor eax, eax
;;
;; Do mod-2 pixel.
;;
.mod2:
test ecx, 1
jz .pairprepare
mov edx, eax ;; edx = frac
add eax, [dc_iscale] ;; eax += fracstep
sar edx, FRACBITS
and edx, ebp ;; edx &= heightmask
movzx edx, byte [esi + edx]
cmp dl, TRANSPARENTPIXEL
je .nextmod2
movzx edx, byte [ebx + edx]
mov [edi], dl
.nextmod2:
add edi, [vid + viddef_s.width]
sub ecx, 1
jz .done
.pairprepare:
;;
;; Prepare for the main loop.
;;
movd mm3, eax ;; Low dword = frac
movq mm4, mm3 ;; Copy to intermediate register
paddd mm4, mm2 ;; dwords of mm4 += fracstep
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
pslld mm2, 1 ;; fracstep *= 2
;;
;; ebp = vid.width
;;
mov ebp, [vid + viddef_s.width]
align 16
.pairloop:
movq mm0, mm3 ;; 3B 1u.
psrad mm0, FRACBITS ;; 4B 1u.
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
movd eax, mm0 ;; 3B 1u. Get first frac
;; IFETCH boundary
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
movd edx, mm0 ;; 3B 1u. Get second frac
cmp al, TRANSPARENTPIXEL ;; 2B 1u.
je .secondinpair ;; 2B 1u.
;; IFETCH boundary
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
mov [edi], al ;; 2B 1(2)u. First pixel
.secondinpair:
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
cmp dl, TRANSPARENTPIXEL ;; 2B 1u.
je .nextpair ;; 2B 1u.
;; IFETCH boundary
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
.nextpair:
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
sub ecx, 2 ;; 3B 1u. count -= 2
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret
.usenonMMX:
call R_Draw2sMultiPatchColumn_8_ASM
jmp .done
;;----------------------------------------------------------------------
;;
;; R_DrawSpan : 8bpp span drawer
;;
;; MMX span drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = two pixels
;; mm0 = accumulator
;; mm1 = xposition
;; mm2 = yposition
;; mm3 = 2 * xstep
;; mm4 = 2 * ystep
;; mm5 = nflatxshift
;; mm6 = nflatyshift
;; mm7 = accumulator
;;----------------------------------------------------------------------
cglobal R_DrawSpan_8_MMX
R_DrawSpan_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; esi = ds_source
;; ebx = ds_colormap
;;
mov esi, [ds_source]
mov ebx, [ds_colormap]
;;
;; edi = ylookup[ds_y] + columnofs[ds_x1]
;;
mov eax, [ds_y]
mov edi, [ylookup + eax*4]
mov edx, [ds_x1]
add edi, [columnofs + edx*4]
;;
;; ecx = ds_x2 - ds_x1 + 1
;;
mov ecx, [ds_x2]
sub ecx, edx
add ecx, 1
;;
;; Needed for fracs and steps
;;
movd mm7, [nflatshiftup]
;;
;; mm3 = xstep
;;
movd mm3, [ds_xstep]
pslld mm3, mm7
punpckldq mm3, mm3
;;
;; mm4 = ystep
;;
movd mm4, [ds_ystep]
pslld mm4, mm7
punpckldq mm4, mm4
;;
;; mm1 = pair of consecutive xpositions
;;
movd mm1, [ds_xfrac]
pslld mm1, mm7
movq mm6, mm1
paddd mm6, mm3
punpckldq mm1, mm6
;;
;; mm2 = pair of consecutive ypositions
;;
movd mm2, [ds_yfrac]
pslld mm2, mm7
movq mm6, mm2
paddd mm6, mm4
punpckldq mm2, mm6
;;
;; mm5 = nflatxshift
;; mm6 = nflatyshift
;;
movd mm5, [nflatxshift]
movd mm6, [nflatyshift]
;;
;; Mask is in memory due to lack of registers.
;;
mov eax, [nflatmask]
mov [nflatmask64], eax
mov [nflatmask64 + 4], eax
;;
;; Go until we reach a dword boundary.
;;
.unaligned:
test edi, 3
jz .alignedprep
.stragglers:
cmp ecx, 0
je .done ;; If ecx == 0, we're finished.
;;
;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep (once!)
paddd mm2, mm4 ;; yposition += ystep (once!)
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of first pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
mov [edi], al
add edi, 1
sub ecx, 1
jmp .unaligned
.alignedprep:
;;
;; We can double the steps now.
;;
pslld mm3, 1
pslld mm4, 1
;;
;; Generate chunks of four pixels.
;;
.alignedloop:
;;
;; Make sure we have at least four pixels.
;;
cmp ecx, 4
jl .prestragglers
;;
;; First two pixels.
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep
paddd mm2, mm4 ;; yposition += ystep
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of first pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx ebp, byte [ebx + eax] ;; ebp = colormap[al]
punpckhdq mm0, mm0 ;; both dwords = high dword
movd eax, mm0 ;; eax = index of second pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 8 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
;;
;; Next two pixels.
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep
paddd mm2, mm4 ;; yposition += ystep
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of third pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 16 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
punpckhdq mm0, mm0 ;; both dwords = high dword
movd eax, mm0 ;; eax = index of second pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 24 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
;;
;; Write pixels.
;;
mov [edi], ebp
add edi, 4
sub ecx, 4
jmp .alignedloop
.prestragglers:
;;
;; Back to one step at a time.
;;
psrad mm3, 1
psrad mm4, 1
jmp .stragglers
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret

View file

@ -1,48 +0,0 @@
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DooM Legacy Team.
;; Copyright (C) 1999-2018 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap_vc.nas
;; DESCRIPTION:
;; Assembler optimised math code for Visual C++.
[BITS 32]
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
[SECTION .text write]
;----------------------------------------------------------------------------
;fixed_t FixedMul (fixed_t a, fixed_t b)
;----------------------------------------------------------------------------
cglobal FixedMul
; align 16
FixedMul:
mov eax,[esp+4]
imul dword [esp+8]
shrd eax,edx,16
ret
;----------------------------------------------------------------------------
;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
;----------------------------------------------------------------------------
cglobal FixedDiv2
; align 16
FixedDiv2:
mov eax,[esp+4]
mov edx,eax ;; these two instructions allow the next
sar edx,31 ;; two to pair, on the Pentium processor.
shld edx,eax,16
sal eax,16
idiv dword [esp+8]
ret

View file

@ -266,12 +266,6 @@ static void CV_Gammaxxx_ONChange(void)
#endif
#if defined (__GNUC__) && defined (__i386__) && !defined (NOASM) && !defined (__APPLE__) && !defined (NORUSEASM)
void VID_BlitLinearScreen_ASM(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
size_t destrowbytes);
#define HAVE_VIDCOPY
#endif
static void CV_constextsize_OnChange(void)
{
con_recalc = true;
@ -284,9 +278,6 @@ static void CV_constextsize_OnChange(void)
void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
size_t destrowbytes)
{
#ifdef HAVE_VIDCOPY
VID_BlitLinearScreen_ASM(srcptr,destptr,width,height,srcrowbytes,destrowbytes);
#else
if ((srcrowbytes == destrowbytes) && (srcrowbytes == (size_t)width))
M_Memcpy(destptr, srcptr, srcrowbytes * height);
else
@ -299,7 +290,6 @@ void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT3
srcptr += srcrowbytes;
}
}
#endif
}
static UINT8 hudplusalpha[11] = { 10, 8, 6, 4, 2, 0, 0, 0, 0, 0, 0};

View file

@ -1,61 +0,0 @@
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2018 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file vid_copy.s
/// \brief code for updating the linear frame buffer screen.
#include "asm_defs.inc" // structures, must match the C structures!
// DJGPPv2 is as fast as this one, but then someone may compile with a less
// good version of DJGPP than mine, so this little asm will do the trick!
#define srcptr 4+16
#define destptr 8+16
#define width 12+16
#define height 16+16
#define srcrowbytes 20+16
#define destrowbytes 24+16
// VID_BlitLinearScreen( src, dest, width, height, srcwidth, destwidth );
// width is given as BYTES
#ifdef __i386__
.globl C(VID_BlitLinearScreen_ASM)
C(VID_BlitLinearScreen_ASM):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
cld
movl srcptr(%esp),%esi
movl destptr(%esp),%edi
movl width(%esp),%ebx
movl srcrowbytes(%esp),%eax
subl %ebx,%eax
movl destrowbytes(%esp),%edx
subl %ebx,%edx
shrl $2,%ebx
movl height(%esp),%ebp
LLRowLoop:
movl %ebx,%ecx
rep/movsl (%esi),(%edi)
addl %eax,%esi
addl %edx,%edi
decl %ebp
jnz LLRowLoop
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
#endif

View file

@ -20,10 +20,6 @@ else
SDL_LDFLAGS?=-L../libs/SDL2/i686-w64-mingw32/lib -L../libs/SDL2_mixer/i686-w64-mingw32/lib -lmingw32 -lSDL2main -lSDL2 -mwindows
endif
ifndef NOASM
USEASM=1
endif
ifndef NONET
ifndef MINGW64 #miniupnc is broken with MINGW64
HAVE_MINIUPNPC=1

View file

@ -22,7 +22,6 @@
#ifdef _MSC_VER
#include <assert.h>
#endif
#define NOASM
#include "../src/tables.h"
#define NO_M
#include "../src/m_fixed.c"