Merge branch 'kill-asm' into 'next'

Kill ASM

Closes #1063

See merge request STJr/SRB2!2074
This commit is contained in:
Sal 2023-07-28 17:03:27 +00:00
commit 205ab612f7
36 changed files with 3 additions and 4351 deletions

View file

@ -50,7 +50,7 @@ jobs:
- v1-SRB2-APT
- run:
name: Install SDK
command: apt-get -o Dir::Cache="/root/.cache/apt" -qq -y --no-install-recommends install git build-essential nasm libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 libopenmpt-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
command: apt-get -o Dir::Cache="/root/.cache/apt" -qq -y --no-install-recommends install git build-essential libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 libopenmpt-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
- run:
name: make md5sum
command: find /root/.cache/apt/archives -type f -print0 | sort -z | xargs -r0 md5sum > /root/.cache/apt_archives.md5

View file

@ -8,7 +8,6 @@
[Sonic Robo Blast 2](https://srb2.org/) is a 3D Sonic the Hedgehog fangame based on a modified version of [Doom Legacy](http://doomlegacy.sourceforge.net/).
## Dependencies
- NASM (x86 builds only)
- SDL2 (Linux/OS X only)
- SDL2-Mixer (Linux/OS X only)
- libupnp (Linux/OS X only)

View file

@ -1992,24 +1992,6 @@ HW3SOUND for 3D hardware sound support
<Option compilerVar="CC" />
</Unit>
<Unit filename="src/v_video.h" />
<Unit filename="src/vid_copy.s">
<Option compilerVar="CC" />
<Option compiler="avrgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="gnu_gcc_compiler_for_mingw32" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="gnu_gcc_compiler_for_mingw64" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="armelfgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="tricoregcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="ppcgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option compiler="gcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
<Option target="Debug Native/SDL" />
<Option target="Release Native/SDL" />
<Option target="Debug Linux/SDL" />
<Option target="Release Linux/SDL" />
<Option target="Debug Mingw/SDL" />
<Option target="Release Mingw/SDL" />
<Option target="Debug Mingw/DirectX" />
<Option target="Release Mingw/DirectX" />
</Unit>
<Unit filename="src/w_wad.c">
<Option compilerVar="CC" />
</Unit>

View file

@ -25,9 +25,6 @@
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(PlatformTarget)'=='x86'">
<ClCompile>
<PreprocessorDefinitions>USEASM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
</Link>

View file

@ -5,7 +5,7 @@ Ver=3
IsCpp=0
Type=0
UnitCount=279
Folders=A_Asm,B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
Folders=B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
CommandLine=
CompilerSettings=00000000000100000111e1
PchHead=-1
@ -1473,36 +1473,6 @@ Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit149]
FileName=src\tmap.nas
Folder=A_Asm
Compile=0
CompileCpp=0
Link=0
Priority=1000
OverrideBuildCmd=1
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap.nas
[Unit150]
FileName=src\asm_defs.inc
Folder=A_Asm
Compile=0
CompileCpp=0
Link=0
Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit151]
FileName=src\vid_copy.s
Folder=A_Asm
Compile=1
CompileCpp=0
Link=1
Priority=1000
OverrideBuildCmd=1
BuildCmd=$(CC) $(CFLAGS) -x assembler-with-cpp -c src/vid_copy.s -o $@
[Unit152]
FileName=src\y_inter.h
Folder=H_Hud
@ -1543,26 +1513,6 @@ Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit156]
FileName=src\p5prof.h
Folder=A_Asm
Compile=1
CompileCpp=0
Link=1
Priority=1000
OverrideBuildCmd=0
BuildCmd=
[Unit157]
FileName=src\tmap_mmx.nas
Folder=A_Asm
Compile=0
CompileCpp=0
Link=0
Priority=1000
OverrideBuildCmd=1
BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap_mmx.nas
[Unit159]
FileName=src\lzf.h
Folder=W_Wad

View file

@ -7,8 +7,6 @@ environment:
# c:\mingw-w64 i686 has gcc 6.3.0, so use c:\msys64 7.3.0 instead
MINGW_SDK: c:\msys64\mingw32
CFLAGS: -Wno-implicit-fallthrough
NASM_ZIP: nasm-2.12.01
NASM_URL: http://www.nasm.us/pub/nasm/releasebuilds/2.12.01/win64/nasm-2.12.01-win64.zip
UPX_ZIP: upx391w
UPX_URL: http://upx.sourceforge.net/download/upx391w.zip
CCACHE_EXE: ccache.exe
@ -40,17 +38,12 @@ environment:
ASSET_CLEAN: 0
cache:
- nasm-2.12.01.zip
- upx391w.zip
- ccache.exe
- C:\Users\appveyor\.ccache
- C:\Users\appveyor\srb2_cache
install:
- if not exist "%NASM_ZIP%.zip" appveyor DownloadFile "%NASM_URL%" -FileName "%NASM_ZIP%.zip"
- 7z x -y "%NASM_ZIP%.zip" -o%TMP% >null
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%NASM_ZIP%" "%MINGW_SDK%\bin" nasm.exe || exit 0
- if not exist "%UPX_ZIP%.zip" appveyor DownloadFile "%UPX_URL%" -FileName "%UPX_ZIP%.zip"
- 7z x -y "%UPX_ZIP%.zip" -o%TMP% >null
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%UPX_ZIP%" "%MINGW_SDK%\bin" upx.exe || exit 0
@ -65,7 +58,6 @@ configuration:
before_build:
- set "Path=%MINGW_SDK%\bin;%Path%"
- mingw32-make --version
- nasm -v
- if not [%NOUPX%] == [1] ( upx -V )
- ccache -V
- ccache -s

View file

@ -1,46 +0,0 @@
#=============================================================================
# Copyright 2010 Kitware, Inc.
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
# support for the yasm assembler
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS nasm yasm asm)
if(NOT CMAKE_ASM_YASM_OBJECT_FORMAT)
if(WIN32)
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
set(CMAKE_ASM_YASM_OBJECT_FORMAT win64)
else()
set(CMAKE_ASM_YASM_OBJECT_FORMAT win32)
endif()
elseif(APPLE)
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho64)
else()
set(CMAKE_ASM_YASM_OBJECT_FORMAT macho)
endif()
else()
if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf64)
else()
set(CMAKE_ASM_YASM_OBJECT_FORMAT elf)
endif()
endif()
endif()
set(CMAKE_ASM_YASM_COMPILE_OBJECT "<CMAKE_ASM_YASM_COMPILER> <FLAGS> -f ${CMAKE_ASM_YASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>")
# Load the generic ASMInformation file:
set(ASM_DIALECT "_YASM")
include(CMakeASMInformation)
set(ASM_DIALECT)

View file

@ -1,27 +0,0 @@
#=============================================================================
# Copyright 2010 Kitware, Inc.
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
# Find the nasm assembler. yasm (http://www.tortall.net/projects/yasm/) is nasm compatible
set(CMAKE_ASM_YASM_COMPILER_LIST nasm yasm)
if(NOT CMAKE_ASM_YASM_COMPILER)
find_program(CMAKE_ASM_YASM_COMPILER yasm
"$ENV{ProgramFiles}/YASM")
endif()
# Load the generic DetermineASM compiler file with the DIALECT set properly:
set(ASM_DIALECT "_YASM")
include(CMakeDetermineASMCompiler)
set(ASM_DIALECT)

View file

@ -1,23 +0,0 @@
#=============================================================================
# Copyright 2010 Kitware, Inc.
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
# This file is used by EnableLanguage in cmGlobalGenerator to
# determine that the selected ASM_NASM "compiler" works.
# For assembler this can only check whether the compiler has been found,
# because otherwise there would have to be a separate assembler source file
# for each assembler on every architecture.
set(ASM_DIALECT "_YASM")
include(CMakeTestASMCompiler)
set(ASM_DIALECT)

View file

@ -76,7 +76,7 @@ LOCAL_SRC_FILES := am_map.c \
android/i_system.c \
android/i_video.c
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOASM -DNOPIX -DUNIXCOMMON -DNOTERMIOS
LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOPIX -DUNIXCOMMON -DNOTERMIOS
LOCAL_MODULE := libsrb2

View file

@ -13,15 +13,7 @@ target_compile_features(SRB2SDL2 PRIVATE c_std_11 cxx_std_17)
target_sourcefile(c)
target_sources(SRB2SDL2 PRIVATE comptime.c md5.c config.h.in)
set(SRB2_ASM_SOURCES vid_copy.s)
set(SRB2_NASM_SOURCES tmap_mmx.nas tmap.nas)
### Configuration
set(SRB2_CONFIG_USEASM OFF CACHE BOOL
"Enable NASM tmap implementation for software mode speedup.")
set(SRB2_CONFIG_YASM OFF CACHE BOOL
"Use YASM in place of NASM.")
set(SRB2_CONFIG_DEV_BUILD OFF CACHE BOOL
"Compile a development build of SRB2.")
@ -78,33 +70,6 @@ if("${SRB2_CONFIG_HWRENDER}")
endif()
endif()
if(${SRB2_CONFIG_USEASM})
#SRB2_ASM_FLAGS can be used to pass flags to either nasm or yasm.
if("${CMAKE_SYSTEM_NAME}" MATCHES "Linux")
set(SRB2_ASM_FLAGS "-DLINUX ${SRB2_ASM_FLAGS}")
endif()
if(${SRB2_CONFIG_YASM})
set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS} nas)
set(CMAKE_ASM_YASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
enable_language(ASM_YASM)
else()
set(CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS} nas)
set(CMAKE_ASM_NASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
enable_language(ASM_NASM)
endif()
set(SRB2_USEASM ON)
target_compile_definitions(SRB2SDL2 PRIVATE -DUSEASM)
target_compile_options(SRB2SDL2 PRIVATE -msse3 -mfpmath=sse)
target_sources(SRB2SDL2 PRIVATE ${SRB2_ASM_SOURCES}
${SRB2_NASM_SOURCES})
else()
set(SRB2_USEASM OFF)
target_compile_definitions(SRB2SDL2 PRIVATE -DNONX86 -DNORUSEASM)
endif()
# Targets
# If using CCACHE, then force it.

View file

@ -47,8 +47,6 @@
# HAVE_MINIUPNPC=1 - Enable automated port forwarding.
# Already enabled by default for 32-bit
# Windows.
# NOASM=1 - Disable hand optimized assembly code for the
# Software renderer.
# NOPNG=1 - Disable PNG graphics support. (TODO: double
# check netplay compatible.)
# NOCURL=1 - Disable libcurl--HTTP capability.
@ -88,7 +86,6 @@
# executable.
# WINDOWSHELL=1 - Use Windows commands.
# PREFIX= - Prefix to many commands, for cross compiling.
# YASM=1 - Use Yasm instead of NASM assembler.
# STABS=1 - ?
# ECHO=1 - Print out each command in the build process.
# NOECHOFILENAMES=1 - Don't print out each that is being
@ -148,22 +145,6 @@ OBJCOPY:=$(call Prefix,objcopy)
OBJDUMP:=$(call Prefix,objdump)
WINDRES:=$(call Prefix,windres)
ifdef YASM
NASM?=yasm
else
NASM?=nasm
endif
ifdef YASM
ifdef STABS
NASMOPTS?=-g stabs
else
NASMOPTS?=-g dwarf2
endif
else
NASMOPTS?=-g
endif
GZIP?=gzip
GZIP_OPTS?=-9 -f -n
ifdef WINDOWSHELL
@ -187,8 +168,6 @@ makedir:=../make
opts:=-DCOMPVERSION -g
libs:=
nasm_format:=
# This is a list of variables names, of which if defined,
# also defines the name as a macro to the compiler.
passthru_opts:=
@ -316,7 +295,6 @@ endif
LD:=$(CC)
cc:=$(cc) $(opts)
nasm=$(NASM) $(NASMOPTS) -f $(nasm_format)
ifdef UPX
upx=$(UPX) $(UPX_OPTS)
endif
@ -393,7 +371,6 @@ $(objdir)/%.$(1) : %.$(2) | $$$$(@D)/
endef
$(eval $(call _recipe,o,c,$(cc) -c -o $$@ $$<))
$(eval $(call _recipe,o,nas,$(nasm) -o $$@ $$<))
$(eval $(call _recipe,o,s,$(cc) $(asflags) -c -o $$@ $$<))
$(eval $(call _recipe,res,rc,$(windres) -i $$< -o $$@))

View file

@ -18,13 +18,6 @@ opts+=-DHWRENDER
sources+=$(call List,hardware/Sourcefile)
endif
ifndef NOASM
ifndef NONX86
sources+=tmap.nas tmap_mmx.nas
opts+=-DUSEASM
endif
endif
ifndef NOMD5
sources+=md5.c
endif

View file

@ -9,10 +9,6 @@ opts+=-DUNIXCOMMON -DLUA_USE_POSIX
# instead of addresses
libs+=-lm -rdynamic
ifndef nasm_format
nasm_format:=elf -DLINUX
endif
ifndef NOHW
opts+=-I/usr/X11R6/include
libs+=-L/usr/X11R6/lib
@ -35,7 +31,6 @@ endif
# FIXME: UNTESTED
#ifdef SOLARIS
#NOIPX=1
#NOASM=1
#opts+=-I/usr/local/include -I/opt/sfw/include \
# -DSOLARIS -DINADDR_NONE=INADDR_ANY -DBSD_COMP
#libs+=-L/opt/sfw/lib -lsocket -lnsl

View file

@ -39,7 +39,6 @@ else ifdef SOLARIS # FIXME: UNTESTED
UNIX=1
platform=solaris
else ifdef CYGWIN32 # FIXME: UNTESTED
nasm_format=win32
platform=cygwin
else ifdef MINGW
ifdef MINGW64

View file

@ -56,13 +56,6 @@ SDL_LDFLAGS?=$(shell $(SDL_CONFIG) \
$(eval $(call Propogate_flags,SDL))
endif
# use the x86 asm code
ifndef CYGWIN32
ifndef NOASM
USEASM=1
endif
endif
ifdef MINGW
ifndef NOSDLMAIN
SDLMAIN=1

View file

@ -17,8 +17,6 @@ sources+=win32/Srb2win.rc
opts+=-DSTDC_HEADERS
libs+=-ladvapi32 -lkernel32 -lmsvcrt -luser32
nasm_format:=win32
SDL?=1
ifndef NOHW

View file

@ -81,7 +81,6 @@ mserv.c
http-mserv.c
i_tcp.c
lzf.c
vid_copy.s
b_bot.c
u_list.c
lua_script.c

View file

@ -1,43 +0,0 @@
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2023 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file asm_defs.inc
/// \brief must match the C structures
#ifndef __ASM_DEFS__
#define __ASM_DEFS__
// this makes variables more noticable,
// and make the label match with C code
// Linux, unlike DOS, has no "_" 19990119 by Kin
// and nasm needs .data code segs under linux 20010210 by metzgermeister
// FIXME: nasm ignores these settings, so I put the macros into the makefile
#ifdef __ELF__
#define C(label) label
#define CODE_SEG .data
#else
#define C(label) _##label
#define CODE_SEG .text
#endif
/* This is a more readable way to access the arguments passed from C code */
/* PLEASE NOTE: it is supposed that all arguments passed from C code are */
/* 32bit integer (INT32, long, and most *pointers) */
#define ARG1 8(%ebp)
#define ARG2 12(%ebp)
#define ARG3 16(%ebp)
#define ARG4 20(%ebp)
#define ARG5 24(%ebp)
#define ARG6 28(%ebp)
#define ARG7 32(%ebp)
#define ARG8 36(%ebp)
#define ARG9 40(%ebp) //(c)tm ... Allegro by Shawn Hargreaves.
#endif

View file

@ -3893,11 +3893,6 @@ static void Command_Version_f(void)
else // 16-bit? 128-bit?
CONS_Printf("Bits Unknown ");
// No ASM?
#ifdef NOASM
CONS_Printf("\x85" "NOASM " "\x80");
#endif
// Debug build
#ifdef _DEBUG
CONS_Printf("\x85" "DEBUG " "\x80");

View file

@ -1,278 +0,0 @@
/*********************************************************
*
* File: p5prof.h
* By: Kevin Baca
*
* MODIFIED BY Fab SO THAT RDMSR(...) WRITES EDX : EAX TO A LONG LONG
* (WHICH MEANS WRITE THE LOW DWORD FIRST)
*
* Now in yer code do:
* INT64 count,total;
*
* ...
* RDMSR(0x10,&count); //inner loop count
* total += count;
* ...
*
* printf("0x%x %x", (INT32)total, *((INT32 *)&total+1));
* // HIGH LOW
*
*********************************************************/
/**\file
\brief This file provides macros to profile your code.
Here's how they work...
As you may or may not know, the Pentium class of
processors provides extremely fine grained profiling
capabilities through the use of what are called
Machine Specific Registers (MSRs). These registers
can provide information about almost any aspect of
CPU performance down to a single cycle.
The MSRs of interest for profiling are specified by
indices 0x10, 0x11, 0x12, and 0x13. Here is a brief
description of each of these registers:
MSR 0x10
This register is simple a cycle counter.
MSR 0x11
This register controls what type of profiling data
will be gathered.
MSRs 0x12 and 0x13
These registers gather the profiling data specified in
MSR 0x11.
Each MSR is 64 bits wide. For the Pentium processor,
only the lower 32 bits of MSR 0x11 are valid. Bits 0-15
specify what data will be gathered in MSR 0x12. Bits 16-31
specify what data will be gathered in MSR 0x13. Both sets
of bits have the same format:
Bits 0-5 specify which hardware event will be tracked.
Bit 6, if set, indicates events will be tracked in
rings 0-2.
Bit 7, if set, indicates events will be tracked in
ring 3.
Bit 8, if set, indicates cycles should be counted for
the specified event. If clear, it indicates the
number of events should be counted.
Two instructions are provided for manupulating the MSRs.
RDMSR (Read Machine Specific Register) and WRMSR
(Write Machine Specific Register). These opcodes were
originally undocumented and therefore most assemblers don't
recognize them. Their byte codes are provided in the
macros below.
RDMSR takes the MSR index in ecx and the profiling criteria
in edx : eax.
WRMSR takes the MSR index in ecx and returns the profile data
in edx : eax.
Two profiling registers limits profiling capability to
gathering only two types of information. The register
usage can, however, be combined in interesting ways.
For example, you can set one register to gather the
number of a specific type of event while the other gathers
the number of cycles for the same event. Or you can
gather the number of two separate events while using
MSR 0x10 to gather the number of cycles.
The enumerated list provides somewhat readable labels for
the types of events that can be tracked.
For more information, get ahold of appendix H from the
Intel Pentium programmer's manual (I don't remember the
order number) or go to
http://green.kaist.ac.kr/jwhahn/art3.htm.
That's an article by Terje Mathisen where I got most of
my information.
You may use this code however you wish. I hope it's
useful and I hope I got everything right.
-Kevin
kbaca@skygames.com
*/
#ifdef __GNUC__
#define RDTSC(_dst) \
__asm__("
.byte 0x0F,0x31
movl %%edx,(%%edi)
movl %%eax,4(%%edi)"\
: : "D" (_dst) : "eax", "edx", "edi")
// the old code... swapped it
// movl %%edx,(%%edi)
// movl %%eax,4(%%edi)"
#define RDMSR(_msri, _msrd) \
__asm__("
.byte 0x0F,0x32
movl %%eax,(%%edi)
movl %%edx,4(%%edi)"\
: : "c" (_msri), "D" (_msrd) : "eax", "ecx", "edx", "edi")
#define WRMSR(_msri, _msrd) \
__asm__("
xorl %%edx,%%edx
.byte 0x0F,0x30"\
: : "c" (_msri), "a" (_msrd) : "eax", "ecx", "edx")
#define RDMSR_0x12_0x13(_msr12, _msr13) \
__asm__("
movl $0x12,%%ecx
.byte 0x0F,0x32
movl %%edx,(%%edi)
movl %%eax,4(%%edi)
movl $0x13,%%ecx
.byte 0x0F,0x32
movl %%edx,(%%esi)
movl %%eax,4(%%esi)"\
: : "D" (_msr12), "S" (_msr13) : "eax", "ecx", "edx", "edi")
#define ZERO_MSR_0x12_0x13() \
__asm__("
xorl %%edx,%%edx
xorl %%eax,%%eax
movl $0x12,%%ecx
.byte 0x0F,0x30
movl $0x13,%%ecx
.byte 0x0F,0x30"\
: : : "eax", "ecx", "edx")
#elif defined (__WATCOMC__)
extern void RDTSC(UINT32 *dst);
#pragma aux RDTSC =\
"db 0x0F,0x31"\
"mov [edi],edx"\
"mov [4+edi],eax"\
parm [edi]\
modify [eax edx edi];
extern void RDMSR(UINT32 msri, UINT32 *msrd);
#pragma aux RDMSR =\
"db 0x0F,0x32"\
"mov [edi],edx"\
"mov [4+edi],eax"\
parm [ecx] [edi]\
modify [eax ecx edx edi];
extern void WRMSR(UINT32 msri, UINT32 msrd);
#pragma aux WRMSR =\
"xor edx,edx"\
"db 0x0F,0x30"\
parm [ecx] [eax]\
modify [eax ecx edx];
extern void RDMSR_0x12_0x13(UINT32 *msr12, UINT32 *msr13);
#pragma aux RDMSR_0x12_0x13 =\
"mov ecx,0x12"\
"db 0x0F,0x32"\
"mov [edi],edx"\
"mov [4+edi],eax"\
"mov ecx,0x13"\
"db 0x0F,0x32"\
"mov [esi],edx"\
"mov [4+esi],eax"\
parm [edi] [esi]\
modify [eax ecx edx edi esi];
extern void ZERO_MSR_0x12_0x13(void);
#pragma aux ZERO_MSR_0x12_0x13 =\
"xor edx,edx"\
"xor eax,eax"\
"mov ecx,0x12"\
"db 0x0F,0x30"\
"mov ecx,0x13"\
"db 0x0F,0x30"\
modify [eax ecx edx];
#endif
typedef enum
{
DataRead,
DataWrite,
DataTLBMiss,
DataReadMiss,
DataWriteMiss,
WriteHitEM,
DataCacheLinesWritten,
DataCacheSnoops,
DataCacheSnoopHit,
MemAccessBothPipes,
BankConflict,
MisalignedDataRef,
CodeRead,
CodeTLBMiss,
CodeCacheMiss,
SegRegLoad,
RESERVED0,
RESERVED1,
Branch,
BTBHit,
TakenBranchOrBTBHit,
PipelineFlush,
InstructionsExeced,
InstructionsExecedVPipe,
BusUtilizationClocks,
PipelineStalledWriteBackup,
PipelineStalledDateMemRead,
PipeLineStalledWriteEM,
LockedBusCycle,
IOReadOrWriteCycle,
NonCacheableMemRef,
AGI,
RESERVED2,
RESERVED3,
FPOperation,
Breakpoint0Match,
Breakpoint1Match,
Breakpoint2Match,
Breakpoint3Match,
HWInterrupt,
DataReadOrWrite,
DataReadOrWriteMiss
};
#define PROF_CYCLES (0x100)
#define PROF_EVENTS (0x000)
#define RING_012 (0x40)
#define RING_3 (0x80)
#define RING_0123 (RING_012 | RING_3)
/*void ProfSetProfiles(UINT32 msr12, UINT32 msr13);*/
#define ProfSetProfiles(_msr12, _msr13)\
{\
UINT32 prof;\
\
prof = (_msr12) | ((_msr13) << 16);\
WRMSR(0x11, prof);\
}
/*void ProfBeginProfiles(void);*/
#define ProfBeginProfiles()\
ZERO_MSR_0x12_0x13();
/*void ProfGetProfiles(UINT32 msr12[2], UINT32 msr13[2]);*/
#define ProfGetProfiles(_msr12, _msr13)\
RDMSR_0x12_0x13(_msr12, _msr13);
/*void ProfZeroTimer(void);*/
#define ProfZeroTimer()\
WRMSR(0x10, 0);
/*void ProfReadTimer(UINT32 timer[2]);*/
#define ProfReadTimer(timer)\
RDMSR(0x10, timer);
/*EOF*/

View file

@ -179,8 +179,6 @@ CV_PossibleValue_t Color_cons_t[MAXSKINCOLORS+1];
void R_InitTranslucencyTables(void)
{
// Load here the transparency lookup tables 'TRANSx0'
// NOTE: the TRANSx0 resources MUST BE aligned on 64k for the asm
// optimised code (in other words, transtables pointer low word is 0)
transtables = Z_MallocAlign(NUMTRANSTABLES*0x10000, PU_STATIC,
NULL, 16);

View file

@ -225,18 +225,6 @@ void R_DrawTiltedTransSolidColorSpan_8(void);
void R_DrawWaterSolidColorSpan_8(void);
void R_DrawTiltedWaterSolidColorSpan_8(void);
#ifdef USEASM
void ASMCALL R_DrawColumn_8_ASM(void);
void ASMCALL R_DrawShadeColumn_8_ASM(void);
void ASMCALL R_DrawTranslucentColumn_8_ASM(void);
void ASMCALL R_Draw2sMultiPatchColumn_8_ASM(void);
void ASMCALL R_DrawColumn_8_MMX(void);
void ASMCALL R_Draw2sMultiPatchColumn_8_MMX(void);
void ASMCALL R_DrawSpan_8_MMX(void);
#endif
// ------------------
// 16bpp DRAWING CODE
// ------------------

View file

@ -31,20 +31,8 @@ static void prepare_rastertab(void);
static void R_RasterizeFloorSplat(floorsplat_t *pSplat, vector2_t *verts, vissprite_t *vis);
#ifdef USEASM
void ASMCALL rasterize_segment_tex_asm(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2, INT32 tc, INT32 dir);
#endif
static void rasterize_segment_tex(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2, INT32 tc, INT32 dir)
{
#ifdef USEASM
if (R_ASM)
{
rasterize_segment_tex_asm(x1, y1, x2, y2, tv1, tv2, tc, dir);
return;
}
else
#endif
{
fixed_t xs, xe, count;
fixed_t dx0, dx1;

View file

@ -44,10 +44,6 @@
// SRB2Kart
#include "r_fps.h" // R_GetFramerateCap
#if defined (USEASM) && !defined (NORUSEASM)//&& (!defined (_MSC_VER) || (_MSC_VER <= 1200))
#define RUSEASM //MSC.NET can't patch itself
#endif
// --------------------------------------------
// assembly or c drawer routines for 8bpp/16bpp
// --------------------------------------------
@ -102,7 +98,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
// Short and Tall sky drawer, for the current color mode
void (*walldrawerfunc)(void);
boolean R_ASM = true;
boolean R_486 = false;
boolean R_586 = false;
boolean R_MMX = false;
@ -169,26 +164,6 @@ void SCR_SetDrawFuncs(void)
spanfuncs_npo2[SPANDRAWFUNC_WATER] = R_DrawWaterSpan_NPO2_8;
spanfuncs_npo2[SPANDRAWFUNC_TILTEDWATER] = R_DrawTiltedWaterSpan_NPO2_8;
#ifdef RUSEASM
if (R_ASM)
{
if (R_MMX)
{
colfuncs[BASEDRAWFUNC] = R_DrawColumn_8_MMX;
//colfuncs[COLDRAWFUNC_SHADE] = R_DrawShadeColumn_8_ASM;
//colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_8_ASM;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_8_MMX;
spanfuncs[BASEDRAWFUNC] = R_DrawSpan_8_MMX;
}
else
{
colfuncs[BASEDRAWFUNC] = R_DrawColumn_8_ASM;
//colfuncs[COLDRAWFUNC_SHADE] = R_DrawShadeColumn_8_ASM;
//colfuncs[COLDRAWFUNC_FUZZY] = R_DrawTranslucentColumn_8_ASM;
colfuncs[COLDRAWFUNC_TWOSMULTIPATCH] = R_Draw2sMultiPatchColumn_8_ASM;
}
}
#endif
}
/* else if (vid.bpp > 1)
{
@ -271,8 +246,6 @@ void SCR_Startup(void)
CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
}
if (M_CheckParm("-noASM"))
R_ASM = false;
if (M_CheckParm("-486"))
R_486 = true;
if (M_CheckParm("-586"))

View file

@ -8,11 +8,6 @@ target_sources(SRB2SDL2 PRIVATE ogl_sdl.c)
target_sources(SRB2SDL2 PRIVATE i_threads.c)
if(${SRB2_USEASM})
set_source_files_properties(${SRB2_ASM_SOURCES} PROPERTIES LANGUAGE C)
set_source_files_properties(${SRB2_ASM_SOURCES} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
if("${CMAKE_SYSTEM_NAME}" MATCHES Windows)
target_sources(SRB2SDL2 PRIVATE
../win32/win_dbg.c
@ -68,18 +63,6 @@ if("${CMAKE_SYSTEM_NAME}" MATCHES Linux)
target_link_libraries(SRB2SDL2 PRIVATE m rt)
endif()
if(${SRB2_USEASM})
if(${SRB2_CONFIG_YASM})
set(ASM_ASSEMBLER_TEMP ${CMAKE_ASM_YASM_COMPILER})
set(ASM_ASSEMBLER_OBJFORMAT ${CMAKE_ASM_YASM_OBJECT_FORMAT})
set_source_files_properties(${SRB2_NASM_SOURCES} LANGUAGE ASM_YASM)
else()
set(ASM_ASSEMBLER_TEMP ${CMAKE_ASM_NASM_COMPILER})
set(ASM_ASSEMBLER_OBJFORMAT ${CMAKE_ASM_NASM_OBJECT_FORMAT})
set_source_files_properties(${SRB2_NASM_SOURCES} LANGUAGE ASM_NASM)
endif()
endif()
if("${CMAKE_SYSTEM_NAME}" MATCHES Windows)
target_link_libraries(SRB2SDL2 PRIVATE
ws2_32

View file

@ -7,7 +7,6 @@
NOHW=1
NOHS=1
NOASM=1
OPTS+=-DLINUX

View file

@ -70,39 +70,6 @@ char logfilename[1024];
typedef BOOL (WINAPI *p_IsDebuggerPresent)(VOID);
#endif
#if defined (_WIN32)
static inline VOID MakeCodeWritable(VOID)
{
#ifdef USEASM // Disable write-protection of code segment
DWORD OldRights;
const DWORD NewRights = PAGE_EXECUTE_READWRITE;
PBYTE pBaseOfImage = (PBYTE)GetModuleHandle(NULL);
PIMAGE_DOS_HEADER dosH =(PIMAGE_DOS_HEADER)pBaseOfImage;
PIMAGE_NT_HEADERS ntH = (PIMAGE_NT_HEADERS)(pBaseOfImage + dosH->e_lfanew);
PIMAGE_OPTIONAL_HEADER oH = (PIMAGE_OPTIONAL_HEADER)
((PBYTE)ntH + sizeof (IMAGE_NT_SIGNATURE) + sizeof (IMAGE_FILE_HEADER));
LPVOID pA = pBaseOfImage+oH->BaseOfCode;
SIZE_T pS = oH->SizeOfCode;
#if 1 // try to find the text section
PIMAGE_SECTION_HEADER ntS = IMAGE_FIRST_SECTION (ntH);
WORD s;
for (s = 0; s < ntH->FileHeader.NumberOfSections; s++)
{
if (memcmp (ntS[s].Name, ".text\0\0", 8) == 0)
{
pA = pBaseOfImage+ntS[s].VirtualAddress;
pS = ntS[s].Misc.VirtualSize;
break;
}
}
#endif
if (!VirtualProtect(pA,pS,NewRights,&OldRights))
I_Error("Could not make code writable\n");
#endif
}
#endif
#ifdef LOGMESSAGES
static void InitLogging(void)
{
@ -243,7 +210,6 @@ int main(int argc, char **argv)
#ifndef __MINGW32__
prevExceptionFilter = SetUnhandledExceptionFilter(RecordExceptionInfo);
#endif
MakeCodeWritable();
#endif
// startup SRB2

View file

@ -1,957 +0,0 @@
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DooM Legacy Team.
;; Copyright (C) 1999-2023 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap.nas
;; DESCRIPTION:
;; Assembler optimised rendering code for software mode.
;; Draw wall columns.
[BITS 32]
%define FRACBITS 16
%define TRANSPARENTPIXEL 255
%ifdef LINUX
%macro cextern 1
[extern %1]
%endmacro
%macro cglobal 1
[global %1]
%endmacro
%else
%macro cextern 1
%define %1 _%1
[extern %1]
%endmacro
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
%endif
; The viddef_s structure. We only need the width field.
struc viddef_s
resb 12
.width: resb 4
resb 44
endstruc
;; externs
;; columns
cextern dc_x
cextern dc_yl
cextern dc_yh
cextern ylookup
cextern columnofs
cextern dc_source
cextern dc_texturemid
cextern dc_texheight
cextern dc_iscale
cextern dc_hires
cextern centery
cextern centeryfrac
cextern dc_colormap
cextern dc_transmap
cextern colormaps
cextern vid
cextern topleft
; DELME
cextern R_DrawColumn_8
; polygon edge rasterizer
cextern prastertab
[SECTION .data]
;;.align 4
loopcount dd 0
pixelcount dd 0
tystep dd 0
[SECTION .text]
;;----------------------------------------------------------------------
;;
;; R_DrawColumn : 8bpp column drawer
;;
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
;; Revised by G. Dick July 2010 to support the intervening twelve years'
;; worth of changes to the renderer. Since I only vaguely know what I'm
;; doing, this is probably rather suboptimal. Help appreciated!
;;
;;----------------------------------------------------------------------
;; fracstep, vid.width in memory
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = heightmask
;; esi = source
;; edi = dest
;; ebp = frac
;;----------------------------------------------------------------------
cglobal R_DrawColumn_8_ASM
; align 16
R_DrawColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov edi,[ylookup+ebp*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx,[dc_yh]
add ecx,1
sub ecx,ebp ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale; // But we just use [dc_iscale]
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
mov eax,ebp ;; dc_yl
shl eax,FRACBITS
sub eax,[centeryfrac]
imul dword [dc_iscale]
shrd eax,edx,FRACBITS
add eax,[dc_texturemid]
mov ebp,eax ;; ebp = frac
mov ebx,[dc_colormap]
mov esi,[dc_source]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires],0x01
jz .texheightcheck
xor ebp,ebp
;;
;; Check for power of two
;;
.texheightcheck:
mov edx,[dc_texheight]
sub edx,1 ;; edx = heightmask
test edx,[dc_texheight]
jnz .notpowertwo
test ecx,0x01 ;; Test for odd no. pixels
jnz .odd
;;
;; Texture height is a power of two, so we get modular arithmetic by
;; masking
;;
.powertwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
.odd:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,2 ;; count -= 2
jg .powertwo
jmp .done
.notpowertwo:
add edx,1
shl edx,FRACBITS
test ebp,ebp
jns .notpowtwoloop
.makefracpos:
add ebp,edx ;; frac is negative; make it positive
js .makefracpos
.notpowtwoloop:
cmp ebp,edx ;; Reduce mod height
jl .writenonpowtwo
sub ebp,edx
jmp .notpowtwoloop
.writenonpowtwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part.
mov bl,[esi + eax] ;; ebx = colormap + texel
add ebp,[dc_iscale] ;; frac += fracstep
movzx eax,byte [ebx] ;; Map through colormap
mov [edi],al ;; Write pixel
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,1
jnz .notpowtwoloop
;;
.done:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;;----------------------------------------------------------------------
;;
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
;; pixels.
;;
;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
;; Revised by G. Dick July 2010 to support the intervening twelve years'
;; worth of changes to the renderer. Since I only vaguely know what I'm
;; doing, this is probably rather suboptimal. Help appreciated!
;;
;;----------------------------------------------------------------------
;; fracstep, vid.width in memory
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = heightmask
;; esi = source
;; edi = dest
;; ebp = frac
;;----------------------------------------------------------------------
cglobal R_Draw2sMultiPatchColumn_8_ASM
; align 16
R_Draw2sMultiPatchColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov edi,[ylookup+ebp*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx,[dc_yh]
add ecx,1
sub ecx,ebp ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale; // But we just use [dc_iscale]
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
mov eax,ebp ;; dc_yl
shl eax,FRACBITS
sub eax,[centeryfrac]
imul dword [dc_iscale]
shrd eax,edx,FRACBITS
add eax,[dc_texturemid]
mov ebp,eax ;; ebp = frac
mov ebx,[dc_colormap]
mov esi,[dc_source]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires],0x01
jz .texheightcheck
xor ebp,ebp
;;
;; Check for power of two
;;
.texheightcheck:
mov edx,[dc_texheight]
sub edx,1 ;; edx = heightmask
test edx,[dc_texheight]
jnz .notpowertwo
test ecx,0x01 ;; Test for odd no. pixels
jnz .odd
;;
;; Texture height is a power of two, so we get modular arithmetic by
;; masking
;;
.powertwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextpowtwoeven ;; If so, advance.
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextpowtwoeven:
;; dest += vid.width
add edi,[vid + viddef_s.width]
.odd:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part
and eax,edx ;; eax &= heightmask
movzx eax,byte [esi + eax] ;; eax = texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextpowtwoodd ;; If so, advance.
movzx eax,byte [ebx+eax] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextpowtwoodd:
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,2 ;; count -= 2
jg .powertwo
jmp .done
.notpowertwo:
add edx,1
shl edx,FRACBITS
test ebp,ebp
jns .notpowtwoloop
.makefracpos:
add ebp,edx ;; frac is negative; make it positive
js .makefracpos
.notpowtwoloop:
cmp ebp,edx ;; Reduce mod height
jl .writenonpowtwo
sub ebp,edx
jmp .notpowtwoloop
.writenonpowtwo:
mov eax,ebp ;; eax = frac
sar eax,FRACBITS ;; Integer part.
mov bl,[esi + eax] ;; ebx = colormap + texel
add ebp,[dc_iscale] ;; frac += fracstep
cmp bl,TRANSPARENTPIXEL ;; Is pixel transparent?
je .nextnonpowtwo ;; If so, advance.
movzx eax,byte [ebx] ;; Map through colormap
mov [edi],al ;; Write pixel
.nextnonpowtwo:
;; dest += vid.width
add edi,[vid + viddef_s.width]
sub ecx,1
jnz .notpowtwoloop
;;
.done:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;;----------------------------------------------------------------------
;; R_DrawTranslucentColumnA_8
;;
;; Vertical column texture drawer, with transparency. Replaces Doom2's
;; 'fuzz' effect, which was not so beautiful.
;; Transparency is always impressive in some way, don't know why...
;;----------------------------------------------------------------------
cglobal R_DrawTranslucentColumn_8_ASM
R_DrawTranslucentColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov ebx,ebp
mov edi,[ylookup+ebx*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov eax,[dc_yh]
inc eax
sub eax,ebp ;; pixel count
mov [pixelcount],eax ;; save for final pixel
jle near vtdone ;; nothing to scale
;;
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
;;
mov ecx,[dc_iscale] ;; fracstep
mov eax,[centery]
sub eax,ebp
imul eax,ecx
mov edx,[dc_texturemid]
sub edx,eax
mov ebx,edx
shr ebx,16 ;; frac int.
and ebx,0x7f
shl edx,16 ;; y frac up
mov ebp,ecx
shl ebp,16 ;; fracstep f. up
shr ecx,16 ;; fracstep i. ->cl
and cl,0x7f
push cx
mov ecx,edx
pop cx
mov edx,[dc_colormap]
mov esi,[dc_source]
;;
;; lets rock :) !
;;
mov eax,[pixelcount]
shr eax,0x2
test byte [pixelcount],0x3
mov ch,al ;; quad count
mov eax,[dc_transmap]
je vt4quadloop
;;
;; do un-even pixel
;;
test byte [pixelcount],0x1
je trf2
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
pf: add edi,0x12345678
;;
;; do two non-quad-aligned pixels
;;
trf2: test byte [pixelcount],0x2
je trf3
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
pg: add edi,0x12345678
mov ah,[esi+ebx] ;; fetch texel : colormap number
add ecx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov dl,[edx]
mov [edi],dl
ph: add edi,0x12345678
;;
;; test if there was at least 4 pixels
;;
trf3: test ch,0xff ;; test quad count
je near vtdone
;;
;; ebp : ystep frac. upper 24 bits
;; edx : y frac. upper 24 bits
;; ebx : y i. lower 7 bits, masked for index
;; ecx : ch = counter, cl = y step i.
;; eax : colormap aligned 256
;; esi : source texture column
;; edi : dest screen
;;
vt4quadloop:
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [tystep],ebp
pi: add edi,0x12345678
mov al,[edi] ;; fetch dest : index into colormap
pj: sub edi,0x12345678
mov ebp,edi
pk: sub edi,0x12345678
jmp short inloop
align 4
vtquadloop:
add ecx,[tystep]
adc bl,cl
q1: add ebp,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
inloop:
add ecx,[tystep]
adc bl,cl
q2: add edi,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [ebp+0x0],dl
mov al,[edi] ;; fetch dest : index into colormap
add ecx,[tystep]
adc bl,cl
q3: add ebp,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
add ecx,[tystep]
adc bl,cl
q4: add edi,0x23456789
and bl,0x7f
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov dl,[edx]
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
dec ch
jne vtquadloop
vtdone:
pop ebx
pop edi
pop esi
pop ebp
ret
;;----------------------------------------------------------------------
;; R_DrawShadeColumn
;;
;; for smoke..etc.. test.
;;----------------------------------------------------------------------
cglobal R_DrawShadeColumn_8_ASM
R_DrawShadeColumn_8_ASM:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov ebp,[dc_yl]
mov ebx,ebp
mov edi,[ylookup+ebx*4]
mov ebx,[dc_x]
add edi,[columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov eax,[dc_yh]
inc eax
sub eax,ebp ;; pixel count
mov [pixelcount],eax ;; save for final pixel
jle near shdone ;; nothing to scale
;;
;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
;;
mov ecx,[dc_iscale] ;; fracstep
mov eax,[centery]
sub eax,ebp
imul eax,ecx
mov edx,[dc_texturemid]
sub edx,eax
mov ebx,edx
shr ebx,16 ;; frac int.
and ebx,byte +0x7f
shl edx,16 ;; y frac up
mov ebp,ecx
shl ebp,16 ;; fracstep f. up
shr ecx,16 ;; fracstep i. ->cl
and cl,0x7f
mov esi,[dc_source]
;;
;; lets rock :) !
;;
mov eax,[pixelcount]
mov dh,al
shr eax,2
mov ch,al ;; quad count
mov eax,[colormaps]
test dh,3
je sh4quadloop
;;
;; do un-even pixel
;;
test dh,0x1
je shf2
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pl: add edi,0x12345678
;;
;; do two non-quad-aligned pixels
;;
shf2:
test dh,0x2
je shf3
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pm: add edi,0x12345678
mov ah,[esi+ebx] ;; fetch texel : colormap number
add edx,ebp
adc bl,cl
mov al,[edi] ;; fetch dest : index into colormap
and bl,0x7f
mov dl,[eax]
mov [edi],dl
pn: add edi,0x12345678
;;
;; test if there was at least 4 pixels
;;
shf3:
test ch,0xff ;; test quad count
je near shdone
;;
;; ebp : ystep frac. upper 24 bits
;; edx : y frac. upper 24 bits
;; ebx : y i. lower 7 bits, masked for index
;; ecx : ch = counter, cl = y step i.
;; eax : colormap aligned 256
;; esi : source texture column
;; edi : dest screen
;;
sh4quadloop:
mov dh,0x7f ;; prep mask
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [tystep],ebp
po: add edi,0x12345678
mov al,[edi] ;; fetch dest : index into colormap
pp: sub edi,0x12345678
mov ebp,edi
pq: sub edi,0x12345678
jmp short shinloop
align 4
shquadloop:
add edx,[tystep]
adc bl,cl
and bl,dh
q5: add ebp,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
shinloop:
add edx,[tystep]
adc bl,cl
and bl,dh
q6: add edi,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
add edx,[tystep]
adc bl,cl
and bl,dh
q7: add ebp,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [edi],dl
mov al,[ebp] ;; fetch dest : index into colormap
add edx,[tystep]
adc bl,cl
and bl,dh
q8: add edi,0x12345678
mov dl,[eax]
mov ah,[esi+ebx] ;; fetch texel : colormap number
mov [ebp],dl
mov al,[edi] ;; fetch dest : index into colormap
dec ch
jne shquadloop
shdone:
pop ebx ;; restore register variables
pop edi
pop esi
pop ebp ;; restore caller's stack frame pointer
ret
;; ========================================================================
;; Rasterization of the segments of a LINEAR polygne textur of manire.
;; It is thus a question of interpolating coordinate them at the edges of texture in
;; the time that the X-coordinates minx/maxx for each line.
;; the argument ' dir' indicates which edges of texture are Interpol?:
;; 0: segments associs at edge TOP? and BOTTOM? (constant TY)
;; 1: segments associs at the LEFT and RIGHT edge (constant TX)
;; ========================================================================
;;
;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
;;
;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
;;
;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
;;
;;
;; Uses: extern struct rastery *_rastertab;
;;
MINX EQU 0
MAXX EQU 4
TX1 EQU 8
TY1 EQU 12
TX2 EQU 16
TY2 EQU 20
RASTERY_SIZEOF EQU 24
cglobal rasterize_segment_tex_asm
rasterize_segment_tex_asm:
push ebp
mov ebp,esp
sub esp,byte +0x8 ;; allocate the local variables
push ebx
push esi
push edi
o16 mov ax,es
push eax
;; #define DX [ebp-4]
;; #define TD [ebp-8]
mov eax,[ebp+0xc] ;; y1
mov ebx,[ebp+0x14] ;; y2
cmp ebx,eax
je near .L_finished ;; special (y1==y2) segment horizontal, exit!
jg near .L_rasterize_right
;;rasterize_left: ;; one rasterize a segment LEFT of the polygne
mov ecx,eax
sub ecx,ebx
inc ecx ;; y1-y2+1
mov eax,RASTERY_SIZEOF
mul ebx ;; * y2
mov esi,[prastertab]
add esi,eax ;; point into rastertab[y2]
mov eax,[ebp+0x8] ;; ARG1
sub eax,[ebp+0x10] ;; ARG3
shl eax,0x10 ;; ((x1-x2)<<PRE) ...
cdq
idiv ecx ;; dx = ... / (y1-y2+1)
mov [ebp-0x4],eax ;; DX
mov eax,[ebp+0x18] ;; ARG5
sub eax,[ebp+0x1c] ;; ARG6
shl eax,0x10
cdq
idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
mov eax,[ebp+0x10] ;; ARG3
shl eax,0x10 ;; x = x2<<PRE
mov ebx,[ebp+0x1c] ;; ARG6
shl ebx,0x10 ;; tx = tx2<<PRE d0
;; ty = ty2<<PRE d1
mov edx,[ebp+0x20] ;; ARG7
shl edx,0x10 ;; ty = ty<<PRE d0
;; tx = tx<<PRE d1
push ebp
mov edi,[ebp-0x4] ;; DX
cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ?
mov ebp,[ebp-0x8] ;; TD
je .L_rleft_h_loop
;;
;; TY varies, TX is constant
;;
.L_rleft_v_loop:
mov [esi+MINX],eax ;; rastertab[y].minx = x
add ebx,ebp
mov [esi+TX1],edx ;; .tx1 = tx
add eax,edi
mov [esi+TY1],ebx ;; .ty1 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // ty += tdy
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
dec ecx
jne .L_rleft_v_loop
pop ebp
jmp .L_finished
;;
;; TX varies, TY is constant
;;
.L_rleft_h_loop:
mov [esi+MINX],eax ;; rastertab[y].minx = x
add eax,edi
mov [esi+TX1],ebx ;; .tx1 = tx
add ebx,ebp
mov [esi+TY1],edx ;; .ty1 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // tx += tdx
add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
dec ecx
jne .L_rleft_h_loop
pop ebp
jmp .L_finished
;;
;; one rasterize a segment LINE of the polygne
;;
.L_rasterize_right:
mov ecx,ebx
sub ecx,eax
inc ecx ;; y2-y1+1
mov ebx,RASTERY_SIZEOF
mul ebx ;; * y1
mov esi,[prastertab]
add esi,eax ;; point into rastertab[y1]
mov eax,[ebp+0x10] ;; ARG3
sub eax,[ebp+0x8] ;; ARG1
shl eax,0x10 ;; ((x2-x1)<<PRE) ...
cdq
idiv ecx ;; dx = ... / (y2-y1+1)
mov [ebp-0x4],eax ;; DX
mov eax,[ebp+0x1c] ;; ARG6
sub eax,[ebp+0x18] ;; ARG5
shl eax,0x10
cdq
idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
mov eax,[ebp+0x8] ;; ARG1
shl eax,0x10 ;; x = x1<<PRE
mov ebx,[ebp+0x18] ;; ARG5
shl ebx,0x10 ;; tx = tx1<<PRE d0
;; ty = ty1<<PRE d1
mov edx,[ebp+0x20] ;; ARG7
shl edx,0x10 ;; ty = ty<<PRE d0
;; tx = tx<<PRE d1
push ebp
mov edi,[ebp-0x4] ;; DX
cmp dword [ebp+0x24], 0 ;; direction ?
mov ebp,[ebp-0x8] ;; TD
je .L_rright_h_loop
;;
;; TY varies, TX is constant
;;
.L_rright_v_loop:
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
add ebx,ebp
mov [esi+TX2],edx ;; .tx2 = tx
add eax,edi
mov [esi+TY2],ebx ;; .ty2 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // ty += tdy
add esi,RASTERY_SIZEOF
dec ecx
jne .L_rright_v_loop
pop ebp
jmp short .L_finished
;;
;; TX varies, TY is constant
;;
.L_rright_h_loop:
mov [esi+MAXX],eax ;; rastertab[y].maxx = x
add eax,edi
mov [esi+TX2],ebx ;; .tx2 = tx
add ebx,ebp
mov [esi+TY2],edx ;; .ty2 = ty
;;addl DX, %eax // x += dx
;;addl TD, %ebx // tx += tdx
add esi,RASTERY_SIZEOF
dec ecx
jne .L_rright_h_loop
pop ebp
.L_finished:
pop eax
o16 mov es,ax
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret

1587
src/tmap.s

File diff suppressed because it is too large Load diff

View file

@ -1,322 +0,0 @@
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2023 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file tmap_asm.s
/// \brief ???
//.comm _dc_colormap,4
//.comm _dc_x,4
//.comm _dc_yl,4
//.comm _dc_yh,4
//.comm _dc_iscale,4
//.comm _dc_texturemid,4
//.comm _dc_source,4
//.comm _ylookup,4
//.comm _columnofs,4
//.comm _loopcount,4
//.comm _pixelcount,4
.data
_pixelcount:
.long 0x00000000
_loopcount:
.long 0x00000000
.align 8
_mmxcomm:
.long 0x00000000
.text
.align 4
.globl _R_DrawColumn8_NOMMX
_R_DrawColumn8_NOMMX:
pushl %ebp
pushl %esi
pushl %edi
pushl %ebx
movl _dc_yl,%edx
movl _dc_yh,%eax
subl %edx,%eax
leal 1(%eax),%ebx
testl %ebx,%ebx
jle rdc8ndone
movl _dc_x,%eax
movl _ylookup, %edi
movl (%edi,%edx,4),%esi
movl _columnofs, %edi
addl (%edi,%eax,4),%esi
movl _dc_iscale,%edi
movl %edx,%eax
imull %edi,%eax
movl _dc_texturemid,%ecx
addl %eax,%ecx
movl _dc_source,%ebp
xorl %edx, %edx
subl $0x12345678, %esi
.globl rdc8nwidth1
rdc8nwidth1:
.align 4,0x90
rdc8nloop:
movl %ecx,%eax
shrl $16,%eax
addl %edi,%ecx
andl $127,%eax
addl $0x12345678,%esi
.globl rdc8nwidth2
rdc8nwidth2:
movb (%eax,%ebp),%dl
movl _dc_colormap,%eax
movb (%eax,%edx),%al
movb %al,(%esi)
decl %ebx
jne rdc8nloop
rdc8ndone:
popl %ebx
popl %edi
popl %esi
popl %ebp
ret
//
// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
// By ES 1998/08/01
//
.globl _R_DrawColumn_8_Pentium
_R_DrawColumn_8_Pentium:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl _dc_yl,%eax // Top pixel
movl _dc_yh,%ebx // Bottom pixel
movl _ylookup, %edi
movl (%edi,%ebx,4),%ecx
subl %eax,%ebx // ebx=number of pixels-1
jl rdc8pdone // no pixel to draw, done
jnz rdc8pmany
movl _dc_x,%edx // Special case: only one pixel
movl _columnofs, %edi
addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
movl _dc_iscale,%esi
imull %esi,%eax
movl _dc_texturemid,%edi
addl %eax,%edi // texture index in edi
movl _dc_colormap,%edx
shrl $16, %edi
movl _dc_source,%ebp
andl $127,%edi
movb (%edi,%ebp),%dl // read texture pixel
movb (%edx),%al // lookup for light
movb %al,0(%ecx) // write it
jmp rdc8pdone // done!
.align 4, 0x90
rdc8pmany: // draw >1 pixel
movl _dc_x,%edx
movl _columnofs, %edi
movl (%edi,%edx,4),%edx
leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom
.globl rdc8pwidth5
rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH
movl _dc_iscale,%edx // edx = fracstep
imull %edx,%eax
shll $9, %edx // fixme: Should get 7.25 fix as input
movl _dc_texturemid,%ecx
addl %eax,%ecx // ecx = frac
movl _dc_colormap,%eax // eax = lighting/special effects LUT
shll $9, %ecx
movl _dc_source,%esi // esi = source ptr
imull $0x12345678, %ebx // ebx = negative offset to pixel
.globl rdc8pwidth6
rdc8pwidth6: // DeadBeef = -SCREENWIDTH
// Begin the calculation of the two first pixels
leal (%ecx, %edx), %ebp
shrl $25, %ecx
movb (%esi, %ecx), %al
leal (%edx, %ebp), %ecx
shrl $25, %ebp
movb (%eax), %dl
// The main loop
rdc8ploop:
movb (%esi,%ebp), %al // load 1
leal (%ecx, %edx), %ebp // calc frac 3
shrl $25, %ecx // shift frac 2
movb %dl, 0x12345678(%edi, %ebx)// store 0
.globl rdc8pwidth1
rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH
movb (%eax), %al // lookup 1
movb %al, 0x12345678(%edi, %ebx)// store 1
.globl rdc8pwidth2
rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH
movb (%esi, %ecx), %al // load 2
leal (%ebp, %edx), %ecx // calc frac 4
shrl $25, %ebp // shift frac 3
movb (%eax), %dl // lookup 2
addl $0x12345678, %ebx // counter
.globl rdc8pwidth3
rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH
jl rdc8ploop // loop
// End of loop. Write extra pixel or just exit.
jnz rdc8pdone
movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
.globl rdc8pwidth4
rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH
rdc8pdone:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
//
// MMX asm version, optimised for K6
// By ES 1998/07/05
//
.globl _R_DrawColumn_8_K6_MMX
_R_DrawColumn_8_K6_MMX:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
andl $7,%eax
addl $8,%eax
movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
subl %eax,%esp
movl _dc_yl,%edx // Top pixel
movl _dc_yh,%ebx // Bottom pixel
movl _ylookup, %edi
movl (%edi,%ebx,4),%ecx
subl %edx,%ebx // ebx=number of pixels-1
jl 0x12345678 // no pixel to draw, done
.globl rdc8moffs1
rdc8moffs1:
jnz rdc8mmany
movl _dc_x,%eax // Special case: only one pixel
movl _columnofs, %edi
addl (%edi,%eax,4),%ecx // dest pixel at (%ecx)
movl _dc_iscale,%esi
imull %esi,%edx
movl _dc_texturemid,%edi
addl %edx,%edi // texture index in edi
movl _dc_colormap,%edx
shrl $16, %edi
movl _dc_source,%ebp
andl $127,%edi
movb (%edi,%ebp),%dl // read texture pixel
movb (%edx),%al // lookup for light
movb %al,0(%ecx) // write it
jmp rdc8mdone // done!
.globl rdc8moffs2
rdc8moffs2:
.align 4, 0x90
rdc8mmany: // draw >1 pixel
movl _dc_x,%eax
movl _columnofs, %edi
movl (%edi,%eax,4),%eax
leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom
.globl rdc8mwidth3
rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH
movl _dc_iscale,%ecx // ecx = fracstep
imull %ecx,%edx
shll $9, %ecx // fixme: Should get 7.25 fix as input
movl _dc_texturemid,%eax
addl %edx,%eax // eax = frac
movl _dc_colormap,%edx // edx = lighting/special effects LUT
shll $9, %eax
leal (%ecx, %ecx), %edi
movl _dc_source,%ebp // ebp = source ptr
movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs
imull $0x12345678, %ebx // ebx = negative offset to pixel
.globl rdc8mwidth5
rdc8mwidth5: // DeadBeef = -SCREENWIDTH
movl %edi, 4(%esp)
leal (%eax, %ecx), %edi
movq 0(%esp), %mm1 // fracstep:fracstep in mm1
movl %eax, 0(%esp)
shrl $25, %eax
movl %edi, 4(%esp)
movzbl (%ebp, %eax), %eax
movq 0(%esp), %mm0 // frac:frac in mm0
paddd %mm1, %mm0
shrl $25, %edi
movq %mm0, %mm2
psrld $25, %mm2 // texture index in mm2
paddd %mm1, %mm0
movq %mm2, 0(%esp)
.globl rdc8mloop
rdc8mloop: // The main loop
movq %mm0, %mm2 // move 4-5 to temp reg
movzbl (%ebp, %edi), %edi // read 1
psrld $25, %mm2 // shift 4-5
movb (%edx,%eax), %cl // lookup 0
movl 0(%esp), %eax // load 2
addl $0x12345678, %ebx // counter
.globl rdc8mwidth2
rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH
movb %cl, (%esi, %ebx) // write 0
movb (%edx,%edi), %ch // lookup 1
movb %ch, 0x12345678(%esi, %ebx) // write 1
.globl rdc8mwidth1
rdc8mwidth1: // DeadBeef = SCREENWIDTH
movl 4(%esp), %edi // load 3
paddd %mm1, %mm0 // frac 6-7
movzbl (%ebp, %eax), %eax // lookup 2
movq %mm2, 0(%esp) // store texture index 4-5
jl rdc8mloop
jnz rdc8mno_odd
movb (%edx,%eax), %cl // write the last odd pixel
movb %cl, 0x12345678(%esi)
.globl rdc8mwidth4
rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH
rdc8mno_odd:
.globl rdc8mdone
rdc8mdone:
emms
addl _mmxcomm, %esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
// Need some extra space to align run-time
.globl R_DrawColumn_8_K6_MMX_end
R_DrawColumn_8_K6_MMX_end:
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;nop;
nop;nop;nop;nop;nop;nop;nop;

View file

@ -1,674 +0,0 @@
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DOSDOOM.
;; Copyright (C) 2010-2023 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap_mmx.nas
;; DESCRIPTION:
;; Assembler optimised rendering code for software mode, using SIMD
;; instructions.
;; Draw wall columns.
[BITS 32]
%define FRACBITS 16
%define TRANSPARENTPIXEL 255
%ifdef LINUX
%macro cextern 1
[extern %1]
%endmacro
%macro cglobal 1
[global %1]
%endmacro
%else
%macro cextern 1
%define %1 _%1
[extern %1]
%endmacro
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
%endif
; The viddef_s structure. We only need the width field.
struc viddef_s
resb 12
.width: resb 4
resb 44
endstruc
;; externs
;; columns
cextern dc_colormap
cextern dc_x
cextern dc_yl
cextern dc_yh
cextern dc_iscale
cextern dc_texturemid
cextern dc_texheight
cextern dc_source
cextern dc_hires
cextern centery
cextern centeryfrac
cextern dc_transmap
cextern R_DrawColumn_8_ASM
cextern R_Draw2sMultiPatchColumn_8_ASM
;; spans
cextern nflatshiftup
cextern nflatxshift
cextern nflatyshift
cextern nflatmask
cextern ds_xfrac
cextern ds_yfrac
cextern ds_xstep
cextern ds_ystep
cextern ds_x1
cextern ds_x2
cextern ds_y
cextern ds_source
cextern ds_colormap
cextern ylookup
cextern columnofs
cextern vid
[SECTION .data]
nflatmask64 dq 0
[SECTION .text]
;;----------------------------------------------------------------------
;;
;; R_DrawColumn : 8bpp column drawer
;;
;; MMX column drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = vid.width
;; mm0 = accumulator
;; mm1 = heightmask, twice
;; mm2 = 2 * fracstep, twice
;; mm3 = pair of consecutive fracs
;;----------------------------------------------------------------------
cglobal R_DrawColumn_8_MMX
R_DrawColumn_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; Our algorithm requires that the texture height be a power of two.
;; If not, fall back to the non-MMX drawer.
;;
.texheightcheck:
mov edx, [dc_texheight]
sub edx, 1 ;; edx = heightmask
test edx, [dc_texheight]
jnz near .usenonMMX
mov ebp, edx ;; Keep a copy of heightmask in a
;; GPR for the time being.
;;
;; Fill mm1 with heightmask
;;
movd mm1, edx ;; low dword = heightmask
punpckldq mm1, mm1 ;; copy low dword to high dword
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov eax, [dc_yl]
mov edi, [ylookup+eax*4]
mov ebx, [dc_x]
add edi, [columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx, [dc_yh]
add ecx, 1
sub ecx, eax ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale;
;;
movd mm2, [dc_iscale] ;; fracstep in low dword
punpckldq mm2, mm2 ;; copy to high dword
mov ebx, [dc_colormap]
mov esi, [dc_source]
;;
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
;; eax == dc_yl already
shl eax, FRACBITS
sub eax, [centeryfrac]
imul dword [dc_iscale]
shrd eax, edx, FRACBITS
add eax, [dc_texturemid]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires], 0x01
jz .mod2
xor eax, eax
;;
;; Do mod-2 pixel.
;;
.mod2:
test ecx, 1
jz .pairprepare
mov edx, eax ;; edx = frac
add eax, [dc_iscale] ;; eax += fracstep
sar edx, FRACBITS
and edx, ebp ;; edx &= heightmask
movzx edx, byte [esi + edx]
movzx edx, byte [ebx + edx]
mov [edi], dl
add edi, [vid + viddef_s.width]
sub ecx, 1
jz .done
.pairprepare:
;;
;; Prepare for the main loop.
;;
movd mm3, eax ;; Low dword = frac
movq mm4, mm3 ;; Copy to intermediate register
paddd mm4, mm2 ;; dwords of mm4 += fracstep
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
pslld mm2, 1 ;; fracstep *= 2
;;
;; ebp = vid.width
;;
mov ebp, [vid + viddef_s.width]
align 16
.pairloop:
movq mm0, mm3 ;; 3B 1u.
psrad mm0, FRACBITS ;; 4B 1u.
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
movd eax, mm0 ;; 3B 1u. Get first frac
;; IFETCH boundary
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
movd edx, mm0 ;; 3B 1u. Get second frac
mov [edi], al ;; 2B 1(2)u. First pixel
;; IFETCH boundary
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
;; IFETCH boundary
sub ecx, 2 ;; 3B 1u. count -= 2
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret
.usenonMMX:
call R_DrawColumn_8_ASM
jmp .done
;;----------------------------------------------------------------------
;;
;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
;; pixels.
;;
;; MMX column drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = vid.width
;; mm0 = accumulator
;; mm1 = heightmask, twice
;; mm2 = 2 * fracstep, twice
;; mm3 = pair of consecutive fracs
;;----------------------------------------------------------------------
cglobal R_Draw2sMultiPatchColumn_8_MMX
R_Draw2sMultiPatchColumn_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; Our algorithm requires that the texture height be a power of two.
;; If not, fall back to the non-MMX drawer.
;;
.texheightcheck:
mov edx, [dc_texheight]
sub edx, 1 ;; edx = heightmask
test edx, [dc_texheight]
jnz near .usenonMMX
mov ebp, edx ;; Keep a copy of heightmask in a
;; GPR for the time being.
;;
;; Fill mm1 with heightmask
;;
movd mm1, edx ;; low dword = heightmask
punpckldq mm1, mm1 ;; copy low dword to high dword
;;
;; dest = ylookup[dc_yl] + columnofs[dc_x];
;;
mov eax, [dc_yl]
mov edi, [ylookup+eax*4]
mov ebx, [dc_x]
add edi, [columnofs+ebx*4] ;; edi = dest
;;
;; pixelcount = yh - yl + 1
;;
mov ecx, [dc_yh]
add ecx, 1
sub ecx, eax ;; pixel count
jle near .done ;; nothing to scale
;;
;; fracstep = dc_iscale;
;;
movd mm2, [dc_iscale] ;; fracstep in low dword
punpckldq mm2, mm2 ;; copy to high dword
mov ebx, [dc_colormap]
mov esi, [dc_source]
;;
;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
;;
;; eax == dc_yl already
shl eax, FRACBITS
sub eax, [centeryfrac]
imul dword [dc_iscale]
shrd eax, edx, FRACBITS
add eax, [dc_texturemid]
;;
;; if (dc_hires) frac = 0;
;;
test byte [dc_hires], 0x01
jz .mod2
xor eax, eax
;;
;; Do mod-2 pixel.
;;
.mod2:
test ecx, 1
jz .pairprepare
mov edx, eax ;; edx = frac
add eax, [dc_iscale] ;; eax += fracstep
sar edx, FRACBITS
and edx, ebp ;; edx &= heightmask
movzx edx, byte [esi + edx]
cmp dl, TRANSPARENTPIXEL
je .nextmod2
movzx edx, byte [ebx + edx]
mov [edi], dl
.nextmod2:
add edi, [vid + viddef_s.width]
sub ecx, 1
jz .done
.pairprepare:
;;
;; Prepare for the main loop.
;;
movd mm3, eax ;; Low dword = frac
movq mm4, mm3 ;; Copy to intermediate register
paddd mm4, mm2 ;; dwords of mm4 += fracstep
punpckldq mm3, mm4 ;; Low dword = first frac, high = second
pslld mm2, 1 ;; fracstep *= 2
;;
;; ebp = vid.width
;;
mov ebp, [vid + viddef_s.width]
align 16
.pairloop:
movq mm0, mm3 ;; 3B 1u.
psrad mm0, FRACBITS ;; 4B 1u.
pand mm0, mm1 ;; 3B 1u. frac &= heightmask
paddd mm3, mm2 ;; 3B 1u. frac += fracstep
movd eax, mm0 ;; 3B 1u. Get first frac
;; IFETCH boundary
movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
movd edx, mm0 ;; 3B 1u. Get second frac
cmp al, TRANSPARENTPIXEL ;; 2B 1u.
je .secondinpair ;; 2B 1u.
;; IFETCH boundary
movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
mov [edi], al ;; 2B 1(2)u. First pixel
.secondinpair:
movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
cmp dl, TRANSPARENTPIXEL ;; 2B 1u.
je .nextpair ;; 2B 1u.
;; IFETCH boundary
movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
.nextpair:
lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
sub ecx, 2 ;; 3B 1u. count -= 2
jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret
.usenonMMX:
call R_Draw2sMultiPatchColumn_8_ASM
jmp .done
;;----------------------------------------------------------------------
;;
;; R_DrawSpan : 8bpp span drawer
;;
;; MMX span drawer.
;;
;;----------------------------------------------------------------------
;; eax = accumulator
;; ebx = colormap
;; ecx = count
;; edx = accumulator
;; esi = source
;; edi = dest
;; ebp = two pixels
;; mm0 = accumulator
;; mm1 = xposition
;; mm2 = yposition
;; mm3 = 2 * xstep
;; mm4 = 2 * ystep
;; mm5 = nflatxshift
;; mm6 = nflatyshift
;; mm7 = accumulator
;;----------------------------------------------------------------------
cglobal R_DrawSpan_8_MMX
R_DrawSpan_8_MMX:
push ebp ;; preserve caller's stack frame pointer
push esi ;; preserve register variables
push edi
push ebx
;;
;; esi = ds_source
;; ebx = ds_colormap
;;
mov esi, [ds_source]
mov ebx, [ds_colormap]
;;
;; edi = ylookup[ds_y] + columnofs[ds_x1]
;;
mov eax, [ds_y]
mov edi, [ylookup + eax*4]
mov edx, [ds_x1]
add edi, [columnofs + edx*4]
;;
;; ecx = ds_x2 - ds_x1 + 1
;;
mov ecx, [ds_x2]
sub ecx, edx
add ecx, 1
;;
;; Needed for fracs and steps
;;
movd mm7, [nflatshiftup]
;;
;; mm3 = xstep
;;
movd mm3, [ds_xstep]
pslld mm3, mm7
punpckldq mm3, mm3
;;
;; mm4 = ystep
;;
movd mm4, [ds_ystep]
pslld mm4, mm7
punpckldq mm4, mm4
;;
;; mm1 = pair of consecutive xpositions
;;
movd mm1, [ds_xfrac]
pslld mm1, mm7
movq mm6, mm1
paddd mm6, mm3
punpckldq mm1, mm6
;;
;; mm2 = pair of consecutive ypositions
;;
movd mm2, [ds_yfrac]
pslld mm2, mm7
movq mm6, mm2
paddd mm6, mm4
punpckldq mm2, mm6
;;
;; mm5 = nflatxshift
;; mm6 = nflatyshift
;;
movd mm5, [nflatxshift]
movd mm6, [nflatyshift]
;;
;; Mask is in memory due to lack of registers.
;;
mov eax, [nflatmask]
mov [nflatmask64], eax
mov [nflatmask64 + 4], eax
;;
;; Go until we reach a dword boundary.
;;
.unaligned:
test edi, 3
jz .alignedprep
.stragglers:
cmp ecx, 0
je .done ;; If ecx == 0, we're finished.
;;
;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep (once!)
paddd mm2, mm4 ;; yposition += ystep (once!)
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of first pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
mov [edi], al
add edi, 1
sub ecx, 1
jmp .unaligned
.alignedprep:
;;
;; We can double the steps now.
;;
pslld mm3, 1
pslld mm4, 1
;;
;; Generate chunks of four pixels.
;;
.alignedloop:
;;
;; Make sure we have at least four pixels.
;;
cmp ecx, 4
jl .prestragglers
;;
;; First two pixels.
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep
paddd mm2, mm4 ;; yposition += ystep
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of first pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx ebp, byte [ebx + eax] ;; ebp = colormap[al]
punpckhdq mm0, mm0 ;; both dwords = high dword
movd eax, mm0 ;; eax = index of second pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 8 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
;;
;; Next two pixels.
;;
movq mm0, mm1 ;; mm0 = xposition
movq mm7, mm2 ;; mm7 = yposition
paddd mm1, mm3 ;; xposition += xstep
paddd mm2, mm4 ;; yposition += ystep
psrld mm0, mm5 ;; shift
psrld mm7, mm6 ;; shift
pand mm7, [nflatmask64] ;; mask
por mm0, mm7 ;; or x and y together
movd eax, mm0 ;; eax = index of third pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 16 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
punpckhdq mm0, mm0 ;; both dwords = high dword
movd eax, mm0 ;; eax = index of second pixel
movzx eax, byte [esi + eax] ;; al = source[eax]
movzx eax, byte [ebx + eax] ;; al = colormap[al]
shl eax, 24 ;; get pixel in right byte
or ebp, eax ;; put pixel in ebp
;;
;; Write pixels.
;;
mov [edi], ebp
add edi, 4
sub ecx, 4
jmp .alignedloop
.prestragglers:
;;
;; Back to one step at a time.
;;
psrad mm3, 1
psrad mm4, 1
jmp .stragglers
.done:
;;
;; Clear MMX state, or else FPU operations will go badly awry.
;;
emms
pop ebx
pop edi
pop esi
pop ebp
ret

View file

@ -1,48 +0,0 @@
;; SONIC ROBO BLAST 2
;;-----------------------------------------------------------------------------
;; Copyright (C) 1998-2000 by DooM Legacy Team.
;; Copyright (C) 1999-2023 by Sonic Team Junior.
;;
;; This program is free software distributed under the
;; terms of the GNU General Public License, version 2.
;; See the 'LICENSE' file for more details.
;;-----------------------------------------------------------------------------
;; FILE:
;; tmap_vc.nas
;; DESCRIPTION:
;; Assembler optimised math code for Visual C++.
[BITS 32]
%macro cglobal 1
%define %1 _%1
[global %1]
%endmacro
[SECTION .text write]
;----------------------------------------------------------------------------
;fixed_t FixedMul (fixed_t a, fixed_t b)
;----------------------------------------------------------------------------
cglobal FixedMul
; align 16
FixedMul:
mov eax,[esp+4]
imul dword [esp+8]
shrd eax,edx,16
ret
;----------------------------------------------------------------------------
;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
;----------------------------------------------------------------------------
cglobal FixedDiv2
; align 16
FixedDiv2:
mov eax,[esp+4]
mov edx,eax ;; these two instructions allow the next
sar edx,31 ;; two to pair, on the Pentium processor.
shld edx,eax,16
sal eax,16
idiv dword [esp+8]
ret

View file

@ -447,12 +447,6 @@ static void CV_palette_OnChange(void)
V_SetPalette(0);
}
#if defined (__GNUC__) && defined (__i386__) && !defined (NOASM) && !defined (__APPLE__) && !defined (NORUSEASM)
void VID_BlitLinearScreen_ASM(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
size_t destrowbytes);
#define HAVE_VIDCOPY
#endif
static void CV_constextsize_OnChange(void)
{
if (!con_refresh)
@ -466,9 +460,6 @@ static void CV_constextsize_OnChange(void)
void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
size_t destrowbytes)
{
#ifdef HAVE_VIDCOPY
VID_BlitLinearScreen_ASM(srcptr,destptr,width,height,srcrowbytes,destrowbytes);
#else
if (srcrowbytes == destrowbytes)
M_Memcpy(destptr, srcptr, srcrowbytes * height);
else
@ -481,7 +472,6 @@ void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT3
srcptr += srcrowbytes;
}
}
#endif
}
static UINT8 hudplusalpha[11] = { 10, 8, 6, 4, 2, 0, 0, 0, 0, 0, 0};

View file

@ -1,61 +0,0 @@
// SONIC ROBO BLAST 2
//-----------------------------------------------------------------------------
// Copyright (C) 1998-2000 by DooM Legacy Team.
// Copyright (C) 1999-2023 by Sonic Team Junior.
//
// This program is free software distributed under the
// terms of the GNU General Public License, version 2.
// See the 'LICENSE' file for more details.
//-----------------------------------------------------------------------------
/// \file vid_copy.s
/// \brief code for updating the linear frame buffer screen.
#include "asm_defs.inc" // structures, must match the C structures!
// DJGPPv2 is as fast as this one, but then someone may compile with a less
// good version of DJGPP than mine, so this little asm will do the trick!
#define srcptr 4+16
#define destptr 8+16
#define width 12+16
#define height 16+16
#define srcrowbytes 20+16
#define destrowbytes 24+16
// VID_BlitLinearScreen( src, dest, width, height, srcwidth, destwidth );
// width is given as BYTES
#ifdef __i386__
.globl C(VID_BlitLinearScreen_ASM)
C(VID_BlitLinearScreen_ASM):
pushl %ebp // preserve caller's stack frame
pushl %edi
pushl %esi // preserve register variables
pushl %ebx
cld
movl srcptr(%esp),%esi
movl destptr(%esp),%edi
movl width(%esp),%ebx
movl srcrowbytes(%esp),%eax
subl %ebx,%eax
movl destrowbytes(%esp),%edx
subl %ebx,%edx
shrl $2,%ebx
movl height(%esp),%ebp
LLRowLoop:
movl %ebx,%ecx
rep/movsl (%esi),(%edi)
addl %eax,%esi
addl %edx,%edi
decl %ebp
jnz LLRowLoop
popl %ebx // restore register variables
popl %esi
popl %edi
popl %ebp // restore the caller's stack frame
ret
#endif

View file

@ -22,7 +22,6 @@
#ifdef _MSC_VER
#include <assert.h>
#endif
#define NOASM
#include "../src/tables.h"
#define NO_M
#include "../src/m_fixed.c"