Merge branch 'removeasm' into 'master'

Remove ASM code See merge request KartKrew/Kart-Public!348
2025-03-25 04:11:05 +00:00 · 2024-03-31 20:40:11 +00:00 · 2024-03-31 20:40:11 +00:00 · cc6386cd1e
commit cc6386cd1e
parent 44b4a68528 9d9572af8f
40 changed files with 27 additions and 5104 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -36,7 +36,7 @@ jobs:
            - v1-SRB2-APT
      - run:
          name: Install SDK
-          command: apt-get -qq -y --no-install-recommends install git build-essential nasm libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
+          command: apt-get -qq -y --no-install-recommends install git build-essential libpng-dev:i386 libsdl2-mixer-dev:i386 libgme-dev:i386 libcurl4-openssl-dev:i386 gettext ccache wget gcc-multilib upx openssh-client
      - save_cache:
          key: v1-SRB2-APT
          paths:
--- a/SRB2.cbp
+++ b/SRB2.cbp
@ -1996,24 +1996,6 @@ HW3SOUND for 3D hardware sound  support
 			<Option compilerVar="CC" />
 		</Unit>
 		<Unit filename="src/v_video.h" />
-		<Unit filename="src/vid_copy.s">
-			<Option compilerVar="CC" />
-			<Option compiler="avrgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
-			<Option compiler="gnu_gcc_compiler_for_mingw32" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
-			<Option compiler="gnu_gcc_compiler_for_mingw64" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
-			<Option compiler="armelfgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
-			<Option compiler="tricoregcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
-			<Option compiler="ppcgcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
-			<Option compiler="gcc" use="1" buildCommand="$compiler $options -x assembler-with-cpp -c $file -o $object" />
-			<Option target="Debug Native/SDL" />
-			<Option target="Release Native/SDL" />
-			<Option target="Debug Linux/SDL" />
-			<Option target="Release Linux/SDL" />
-			<Option target="Debug Mingw/SDL" />
-			<Option target="Release Mingw/SDL" />
-			<Option target="Debug Mingw/DirectX" />
-			<Option target="Release Mingw/DirectX" />
-		</Unit>
 		<Unit filename="src/w_wad.c">
 			<Option compilerVar="CC" />
 		</Unit>
--- a/SRB2_common.props
+++ b/SRB2_common.props
@ -25,9 +25,6 @@
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(PlatformTarget)'=='x86'">
-    <ClCompile>
-      <PreprocessorDefinitions>USEASM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-    </ClCompile>
    <Link>
      <ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
    </Link>
--- a/Srb2.dev
+++ b/Srb2.dev
@ -5,7 +5,7 @@ Ver=3
 IsCpp=0
 Type=0
 UnitCount=279
-Folders=A_Asm,B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
+Folders=B_Bot,BLUA,D_Doom,F_Frame,G_Game,H_Hud,Hw_Hardware,Hw_Hardware/r_opengl,I_Interface,I_Interface/Dummy,I_Interface/SDL,I_Interface/Win32,LUA,M_Misc,P_Play,R_Rend,S_Sounds,W_Wad
 CommandLine=
 CompilerSettings=00000000000100000111e1
 PchHead=-1
@ -1473,36 +1473,6 @@ Priority=1000
 OverrideBuildCmd=0
 BuildCmd=

-[Unit149]
-FileName=src\tmap.nas
-Folder=A_Asm
-Compile=0
-CompileCpp=0
-Link=0
-Priority=1000
-OverrideBuildCmd=1
-BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap.nas
-
-[Unit150]
-FileName=src\asm_defs.inc
-Folder=A_Asm
-Compile=0
-CompileCpp=0
-Link=0
-Priority=1000
-OverrideBuildCmd=0
-BuildCmd=
-
-[Unit151]
-FileName=src\vid_copy.s
-Folder=A_Asm
-Compile=1
-CompileCpp=0
-Link=1
-Priority=1000
-OverrideBuildCmd=1
-BuildCmd=$(CC) $(CFLAGS) -x assembler-with-cpp -c src/vid_copy.s -o $@
-
 [Unit152]
 FileName=src\y_inter.h
 Folder=H_Hud
@ -1543,26 +1513,6 @@ Priority=1000
 OverrideBuildCmd=0
 BuildCmd=

-[Unit156]
-FileName=src\p5prof.h
-Folder=A_Asm
-Compile=1
-CompileCpp=0
-Link=1
-Priority=1000
-OverrideBuildCmd=0
-BuildCmd=
-
-[Unit157]
-FileName=src\tmap_mmx.nas
-Folder=A_Asm
-Compile=0
-CompileCpp=0
-Link=0
-Priority=1000
-OverrideBuildCmd=1
-BuildCmd=nasm.exe -g -o $@ -f win32 src/tmap_mmx.nas
-
 [Unit159]
 FileName=src\lzf.h
 Folder=W_Wad
--- a/appveyor.yml
+++ b/appveyor.yml
@ -11,8 +11,6 @@ environment:
 # c:\msys64 x86_64 has gcc 8.2.0, so use c:\mingw-w64 7.3.0 instead
 MINGW_SDK_64: C:\mingw-w64\x86_64-8.1.0-posix-seh-rt_v6-rev0\mingw64
 CFLAGS: -Wall -W -Werror -Wno-error=implicit-fallthrough -Wimplicit-fallthrough=3 -Wno-tautological-compare -Wno-error=suggest-attribute=noreturn
- NASM_ZIP: nasm-2.12.01
- NASM_URL: http://www.nasm.us/pub/nasm/releasebuilds/2.12.01/win64/nasm-2.12.01-win64.zip
 UPX_ZIP: upx391w
 UPX_URL: http://upx.sourceforge.net/download/upx391w.zip
 CCACHE_EXE: ccache.exe
@ -46,7 +44,6 @@ environment:
 ASSET_CLEAN: 0

 cache:
- nasm-2.12.01.zip
 - upx391w.zip
 - ccache.exe
 - C:\Users\appveyor\.ccache
@ -58,10 +55,6 @@ install:
 - if [%X86_64%] == [1] ( set "MINGW_SDK=%MINGW_SDK_64%" )
 - if [%X86_64%] == [1] ( set "CCACHE_CC=%CCACHE_CC_64%" )

- if not exist "%NASM_ZIP%.zip" appveyor DownloadFile "%NASM_URL%" -FileName "%NASM_ZIP%.zip"
- 7z x -y "%NASM_ZIP%.zip" -o%TMP% >null
- robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%NASM_ZIP%" "%MINGW_SDK%\bin" nasm.exe || exit 0
-
 - if not exist "%UPX_ZIP%.zip" appveyor DownloadFile "%UPX_URL%" -FileName "%UPX_ZIP%.zip"
 - 7z x -y "%UPX_ZIP%.zip" -o%TMP% >null
 - robocopy /S /xx /ns /nc /nfl /ndl /np /njh /njs "%TMP%\%UPX_ZIP%" "%MINGW_SDK%\bin" upx.exe || exit 0
@ -78,7 +71,6 @@ before_build:
 - set "Path=%MINGW_SDK%\bin;%Path%"
 - if [%X86_64%] == [1] ( x86_64-w64-mingw32-gcc --version ) else ( i686-w64-mingw32-gcc --version )
 - mingw32-make --version
- if not [%X86_64%] == [1] ( nasm -v )
 - if not [%NOUPX%] == [1] ( upx -V )
 - ccache -V
 - ccache -s
--- a/cmake/Modules/CMakeASM_YASMInformation.cmake
+++ b/cmake/Modules/CMakeASM_YASMInformation.cmake
@ -1,46 +0,0 @@
-
-#=============================================================================
-# Copyright 2010 Kitware, Inc.
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-# support for the yasm assembler
-
-set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS nasm yasm asm)
-
-if(NOT CMAKE_ASM_YASM_OBJECT_FORMAT)
-  if(WIN32)
-    if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
-      set(CMAKE_ASM_YASM_OBJECT_FORMAT win64)
-    else()
-      set(CMAKE_ASM_YASM_OBJECT_FORMAT win32)
-    endif()
-  elseif(APPLE)
-    if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
-      set(CMAKE_ASM_YASM_OBJECT_FORMAT macho64)
-    else()
-      set(CMAKE_ASM_YASM_OBJECT_FORMAT macho)
-    endif()
-  else()
-    if(CMAKE_C_SIZEOF_DATA_PTR EQUAL 8)
-      set(CMAKE_ASM_YASM_OBJECT_FORMAT elf64)
-    else()
-      set(CMAKE_ASM_YASM_OBJECT_FORMAT elf)
-    endif()
-  endif()
-endif()
-
-set(CMAKE_ASM_YASM_COMPILE_OBJECT "<CMAKE_ASM_YASM_COMPILER> <FLAGS> -f ${CMAKE_ASM_YASM_OBJECT_FORMAT} -o <OBJECT> <SOURCE>")
-
-# Load the generic ASMInformation file:
-set(ASM_DIALECT "_YASM")
-include(CMakeASMInformation)
-set(ASM_DIALECT)
--- a/cmake/Modules/CMakeDetermineASM_YASMCompiler.cmake
+++ b/cmake/Modules/CMakeDetermineASM_YASMCompiler.cmake
@ -1,27 +0,0 @@
-
-#=============================================================================
-# Copyright 2010 Kitware, Inc.
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-# Find the nasm assembler. yasm (http://www.tortall.net/projects/yasm/) is nasm compatible
-
-set(CMAKE_ASM_YASM_COMPILER_LIST nasm yasm)
-
-if(NOT CMAKE_ASM_YASM_COMPILER)
-  find_program(CMAKE_ASM_YASM_COMPILER yasm
-    "$ENV{ProgramFiles}/YASM")
-endif()
-
-# Load the generic DetermineASM compiler file with the DIALECT set properly:
-set(ASM_DIALECT "_YASM")
-include(CMakeDetermineASMCompiler)
-set(ASM_DIALECT)
--- a/cmake/Modules/CMakeTestASM_YASMCompiler.cmake
+++ b/cmake/Modules/CMakeTestASM_YASMCompiler.cmake
@ -1,23 +0,0 @@
-
-#=============================================================================
-# Copyright 2010 Kitware, Inc.
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-# This file is used by EnableLanguage in cmGlobalGenerator to
-# determine that the selected ASM_NASM "compiler" works.
-# For assembler this can only check whether the compiler has been found,
-# because otherwise there would have to be a separate assembler source file
-# for each assembler on every architecture.
-
-set(ASM_DIALECT "_YASM")
-include(CMakeTestASMCompiler)
-set(ASM_DIALECT)
--- a/src/Android.mk
+++ b/src/Android.mk
@ -76,7 +76,7 @@ LOCAL_SRC_FILES :=      am_map.c \
                        android/i_system.c \
                        android/i_video.c

-LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOASM -DNOPIX -DUNIXCOMMON -DNOTERMIOS
+LOCAL_CFLAGS += -DPLATFORM_ANDROID -DNONX86 -DLINUX -DDEBUGMODE -DNOPIX -DUNIXCOMMON -DNOTERMIOS

 LOCAL_MODULE := libsrb2

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -190,16 +190,6 @@ source_group("Main" FILES ${SRB2_CORE_SOURCES} ${SRB2_CORE_HEADERS})
 source_group("Renderer" FILES ${SRB2_CORE_RENDER_SOURCES})
 source_group("Game" FILES ${SRB2_CORE_GAME_SOURCES})

-
-set(SRB2_ASM_SOURCES
-	${CMAKE_CURRENT_SOURCE_DIR}/vid_copy.s
-)
-
-set(SRB2_NASM_SOURCES
-	${CMAKE_CURRENT_SOURCE_DIR}/tmap_mmx.nas
-	${CMAKE_CURRENT_SOURCE_DIR}/tmap.nas
-)
-
 if(MSVC)
 	list(APPEND SRB2_NASM_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/tmap_vc.nas)
 endif()
@ -231,10 +221,6 @@ set(SRB2_CONFIG_HAVE_CURL ON CACHE BOOL
 	"Enable curl support, used for downloading files via HTTP.")
 set(SRB2_CONFIG_HWRENDER ON CACHE BOOL
 	"Enable hardware rendering through OpenGL.")
-set(SRB2_CONFIG_USEASM OFF CACHE BOOL
-	"Enable NASM tmap implementation for software mode speedup.")
-set(SRB2_CONFIG_YASM OFF CACHE BOOL
-	"Use YASM in place of NASM.")
 set(SRB2_CONFIG_STATIC_OPENGL OFF CACHE BOOL
 	"Use statically linked OpenGL. NOT RECOMMENDED.")

@ -503,29 +489,6 @@ if(${SRB2_CONFIG_HWRENDER} AND ${SRB2_CONFIG_STATIC_OPENGL})
 	endif()
 endif()

-if(${SRB2_CONFIG_USEASM})
-	#SRB2_ASM_FLAGS can be used to pass flags to either nasm or yasm.
-	if(${CMAKE_SYSTEM} MATCHES "Linux")
-		set(SRB2_ASM_FLAGS "-DLINUX ${SRB2_ASM_FLAGS}")
-	endif()
-
-	if(${SRB2_CONFIG_YASM})
-		set(CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_YASM_SOURCE_FILE_EXTENSIONS} nas)
-		set(CMAKE_ASM_YASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
-		enable_language(ASM_YASM)
-	else()
-		set(CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS ${CMAKE_ASM_NASM_SOURCE_FILE_EXTENSIONS} nas)
-		set(CMAKE_ASM_NASM_FLAGS "${SRB2_ASM_FLAGS}" CACHE STRING "Flags used by the assembler during all build types.")
-		enable_language(ASM_NASM)
-	endif()
-	set(SRB2_USEASM ON)
-	add_definitions(-DUSEASM)
-	set(CMAKE_C_FLAGS ${CMAKE_C_FLAGS} -msse3 -mfpmath=sse)
-else()
-	set(SRB2_USEASM OFF)
-	add_definitions(-DNONX86 -DNORUSEASM)
-endif()
-
 # Targets

 # Compatibility flag with later versions of GCC
--- a/src/Makefile
+++ b/src/Makefile
@ -62,7 +62,6 @@
 #     Compile a debug version, add 'DEBUGMODE=1'
 #     Compile with less warnings, add 'RELAXWARNINGS=1'
 #     Generate compiler errors for most compiler warnings, add 'ERRORMODE=1'
-#     Compile without NASM's tmap.nas, add 'NOASM=1'
 #     Compile without 3D hardware support, add 'NOHW=1'
 #     Compile with GDBstubs, add 'RDB=1'
 #     Compile without PNG, add 'NOPNG=1'
@ -165,7 +164,6 @@ endif
 ifdef MINGW64
 MINGW=1
 NONX86=1
-NOASM=1
 # MINGW64 should not necessarily imply X86_64=1, but we make that assumption elsewhere
 # Once that changes, remove this
 X86_64=1
@ -188,7 +186,6 @@ NOPNG=1
 NOZLIB=1
 NONET=1
 NOHW=1
-NOASM=1
 NOIPX=1
 EXENAME?=srb2dummy
 OBJS=$(OBJDIR)/i_video.o
@ -197,7 +194,6 @@ endif

 ifdef HAIKU
 NOIPX=1
-NOASM=1
 ifndef NONET
 LIBS=-lnetwork
 endif
@ -267,7 +263,6 @@ NOPNG=1
 NOZLIB=1
 NONET=1
 #NOHW=1
-NOASM=1
 NOIPX=1
 NONX86=1
 OBJS+=$(OBJDIR)/i_video.o
@ -295,7 +290,6 @@ endif
 MSGFMT?=msgfmt

 ifndef ECHO
-	NASM:=@$(NASM)
 	REMOVE:=@$(REMOVE)
 	CC:=@$(CC)
 	CXX:=@$(CXX)
@ -350,13 +344,6 @@ ifdef X86_64
 endif
 endif

-ifndef NOASM
-ifndef NONX86
-	OBJS+=$(OBJDIR)/tmap.o $(OBJDIR)/tmap_mmx.o
-	OPTS+=-DUSEASM
-endif
-endif
-
 ifndef NOPNG
 OPTS+=-DHAVE_PNG

@ -495,16 +482,6 @@ else
 endif
 	CFLAGS+=-g $(OPTS) $(ARCHOPTS) $(WINDRESFLAGS)

-ifdef YASM
-ifdef STABS
-	NASMOPTS?= -g stabs
-else
-	NASMOPTS?= -g dwarf2
-endif
-else
-	NASMOPTS?= -g
-endif
-
 ifdef PROFILEMODE
 	# build with profiling information
 	CFLAGS+=-pg
@ -698,12 +675,6 @@ ifdef CYGWIN32
 	$(REMOVE) $(OBJDIR)/*.res
 endif

-#make a big srb2.s that is the disasm of the exe (dos only ?)
-asm:
-	$(CC) $(LDFLAGS) $(OBJS) -o $(OBJDIR)/tmp.exe $(LIBS)
-	$(OBJDUMP) -d $(OBJDIR)/tmp.exe --no-show-raw-insn > srb2.s
-	$(REMOVE) $(OBJDIR)/tmp.exe
-
 # executable
 # NOTE: DJGPP's objcopy do not have --add-gnu-debuglink

@ -888,12 +859,6 @@ $(OBJDIR)/%.o: hardware/%.c
 $(OBJDIR)/%.o: blua/%.c
 	$(CC) $(CFLAGS) $(LUA_CFLAGS) $(WFLAGS) -c $< -o $@

-$(OBJDIR)/%.o: %.nas
-	$(NASM) $(NASMOPTS) -o $@ -f $(NASMFORMAT) $<
-
-$(OBJDIR)/vid_copy.o: vid_copy.s asm_defs.inc
-	$(CC) $(OPTS) $(ASFLAGS) -x assembler-with-cpp -c $< -o $@
-
 $(OBJDIR)/%.o: %.s
 	$(CC) $(OPTS) -x assembler-with-cpp -c $< -o $@

--- a/src/Makefile.cfg
+++ b/src/Makefile.cfg
@ -501,12 +501,6 @@ i_main_o=$(OBJDIR)/i_main.o
 #set OBJDIR and BIN's starting place
 OBJDIR=../objs
 BIN=../bin
-#Nasm ASM and rm
-ifdef YASM
-NASM?=yasm
-else
-NASM?=nasm
-endif
 REMOVE?=rm -f
 CP?=cp
 MKDIR?=mkdir -p
@ -524,7 +518,6 @@ endif
 #Interface Setup
 ifdef DJGPPDOS
 	INTERFACE=djgppdos
-	NASMFORMAT=coff
 	OBJDIR:=$(OBJDIR)/djgppdos
 ifdef WATTCP
 	OBJDIR:=$(OBJDIR)/wattcp
@ -538,7 +531,6 @@ ifdef DUMMY
 	BIN:=$(BIN)/dummy
 else
 ifdef LINUX
-	NASMFORMAT=elf -DLINUX
 	SDL=1
 ifdef LINUX64
 	OBJDIR:=$(OBJDIR)/Linux64
@ -550,7 +542,6 @@ endif
 else
 ifdef FREEBSD
 	INTERFACE=sdl
-	NASMFORMAT=elf -DLINUX
 	SDL=1

 	OBJDIR:=$(OBJDIR)/FreeBSD
@ -558,7 +549,6 @@ ifdef FREEBSD
 else
 ifdef SOLARIS
 	INTERFACE=sdl
-	NASMFORMAT=elf -DLINUX
 	SDL=1

 	OBJDIR:=$(OBJDIR)/Solaris
@ -566,7 +556,6 @@ ifdef SOLARIS
 else
 ifdef CYGWIN32
 	INTERFACE=sdl
-	NASMFORMAT=win32
 	SDL=1

 	OBJDIR:=$(OBJDIR)/cygwin
@ -574,7 +563,6 @@ ifdef CYGWIN32
 else
 ifdef MINGW64
 	INTERFACE=win32
-	#NASMFORMAT=win64
 	OBJDIR:=$(OBJDIR)/Mingw64
 	BIN:=$(BIN)/Mingw64
 else
@ -606,13 +594,11 @@ ifdef PS3N
 else
 ifdef MINGW
 	INTERFACE=win32
-	NASMFORMAT=win32
 	OBJDIR:=$(OBJDIR)/Mingw
 	BIN:=$(BIN)/Mingw
 else
 ifdef XBOX
 	INTERFACE=sdl12
-	NASMFORMAT=win32
 	PREFIX?=/usr/local/openxdk/bin/i386-pc-xbox
 	SDL=1
 	SDL12=1
--- a/src/android/i_system.c
+++ b/src/android/i_system.c
@ -233,11 +233,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
  return -1;
 }

-const CPUInfoFlags *I_CPUInfo(void)
-{
-  return NULL;
-}
-
 const char *I_LocateWad(void)
 {
  return "/sdcard/srb2";
--- a/src/asm_defs.inc
+++ b/src/asm_defs.inc
@ -1,43 +0,0 @@
-// SONIC ROBO BLAST 2
-//-----------------------------------------------------------------------------
-// Copyright (C) 1998-2000 by DooM Legacy Team.
-// Copyright (C) 1999-2018 by Sonic Team Junior.
-//
-// This program is free software distributed under the
-// terms of the GNU General Public License, version 2.
-// See the 'LICENSE' file for more details.
-//-----------------------------------------------------------------------------
-/// \file  asm_defs.inc
-/// \brief must match the C structures
-
-#ifndef __ASM_DEFS__
-#define __ASM_DEFS__
-
-// this makes variables more noticable,
-// and make the label match with C code
-
-// Linux, unlike DOS, has no "_" 19990119 by Kin
-// and nasm needs .data code segs under linux 20010210 by metzgermeister
-// FIXME: nasm ignores these settings, so I put the macros into the makefile
-#ifdef __ELF__
-#define C(label) label
-#define CODE_SEG .data
-#else
-#define C(label) _##label
-#define CODE_SEG .text
-#endif
-
-/* This is a more readable way to access the arguments passed from C code   */
-/* PLEASE NOTE: it is supposed that all arguments passed from C code are    */
-/*              32bit integer (INT32, long, and most *pointers)               */
-#define ARG1      8(%ebp)
-#define ARG2      12(%ebp)
-#define ARG3      16(%ebp)
-#define ARG4      20(%ebp)
-#define ARG5      24(%ebp)
-#define ARG6      28(%ebp)
-#define ARG7      32(%ebp)
-#define ARG8      36(%ebp)
-#define ARG9      40(%ebp)      //(c)tm ... Allegro by Shawn Hargreaves.
-
-#endif
--- a/src/doomdef.h
+++ b/src/doomdef.h
@ -492,7 +492,7 @@ void M_StartupLocale(void);
 // M_GetText function that just returns the string.
 #define M_GetText(x) (x)
 #endif
-extern void *(*M_Memcpy)(void* dest, const void* src, size_t n) FUNCNONNULL;
+void *M_Memcpy(void *dest, const void *src, size_t n);
 char *va(const char *format, ...) FUNCPRINTF;
 char *M_GetToken(const char *inputString);
 char *sizeu1(size_t num);
--- a/src/dummy/i_system.c
+++ b/src/dummy/i_system.c
@ -137,11 +137,6 @@ INT32 I_mkdir(const char *dirname, INT32 unixright)
 	return -1;
 }

-const CPUInfoFlags *I_CPUInfo(void)
-{
-	return NULL;
-}
-
 const char *I_LocateWad(void)
 {
 	return NULL;
--- a/src/i_system.h
+++ b/src/i_system.h
@ -296,40 +296,6 @@ char *I_GetUserName(void);
 */
 INT32 I_mkdir(const char *dirname, INT32 unixright);

-typedef struct {
-	int FPU        : 1; ///< FPU availabile
-	int CPUID      : 1; ///< CPUID instruction
-	int RDTSC      : 1; ///< RDTSC instruction
-	int MMX        : 1; ///< MMX features
-	int MMXExt     : 1; ///< MMX Ext. features
-	int CMOV       : 1; ///< Pentium Pro's "cmov"
-	int AMD3DNow   : 1; ///< 3DNow features
-	int AMD3DNowExt: 1; ///< 3DNow! Ext. features
-	int SSE        : 1; ///< SSE features
-	int SSE2       : 1; ///< SSE2 features
-	int SSE3       : 1; ///< SSE3 features
-	int IA64       : 1; ///< Running on IA64
-	int AMD64      : 1; ///< Running on AMD64
-	int AltiVec    : 1; ///< AltiVec features
-	int FPPE       : 1; ///< floating-point precision error
-	int PFC        : 1; ///< TBD?
-	int cmpxchg    : 1; ///< ?
-	int cmpxchg16b : 1; ///< ?
-	int cmp8xchg16 : 1; ///< ?
-	int FPE        : 1; ///< FPU Emu
-	int DEP        : 1; ///< Data excution prevent
-	int PPCMM64    : 1; ///< PowerPC Movemem 64bit ok?
-	int ALPHAbyte  : 1; ///< ?
-	int PAE        : 1; ///< Physical Address Extension
-	int CPUs       : 8;
-} CPUInfoFlags;
-
-
-/**	\brief Info about CPU
-		\return CPUInfo in bits
-*/
-const CPUInfoFlags *I_CPUInfo(void);
-
 /**	\brief Find main WAD
 		\return path to main WAD
 */
--- a/src/m_fixed.c
+++ b/src/m_fixed.c
@ -23,49 +23,6 @@
 #include "m_fixed.h"
 #include "tables.h" // ANGLETOFINESHIFT

-#ifdef __USE_C_FIXEDMUL__
-
-/**	\brief	The FixedMul function
-
-	\param	a	fixed_t number
-	\param	b	fixed_t number
-
-	\return	a*b>>FRACBITS
-
-*/
-fixed_t FixedMul(fixed_t a, fixed_t b)
-{
-	// Need to cast to unsigned before shifting to avoid undefined behaviour
-	// for negative integers
-	return (fixed_t)(((UINT64)((INT64)a * b)) >> FRACBITS);
-}
-
-#endif //__USE_C_FIXEDMUL__
-
-#ifdef __USE_C_FIXEDDIV__
-/**	\brief	The FixedDiv2 function
-
-	\param	a	fixed_t number
-	\param	b	fixed_t number
-
-	\return	a/b * FRACUNIT
-
-*/
-fixed_t FixedDiv2(fixed_t a, fixed_t b)
-{
-	INT64 ret;
-
-	if (b == 0)
-		I_Error("FixedDiv: divide by zero");
-
-	ret = (((INT64)a * FRACUNIT)) / b;
-
-	if ((ret > INT32_MAX) || (ret < INT32_MIN))
-		I_Error("FixedDiv: divide by zero");
-	return (fixed_t)ret;
-}
-
-#endif // __USE_C_FIXEDDIV__

 fixed_t FixedSqrt(fixed_t x)
 {
--- a/src/m_fixed.h
+++ b/src/m_fixed.h
@ -46,127 +46,29 @@ typedef INT32 fixed_t;
 #define FIXED_TO_FLOAT(x) (((float)(x)) / ((float)FRACUNIT))
 #define FLOAT_TO_FIXED(f) (fixed_t)((f) * ((float)FRACUNIT))

+/**	\brief	The FixedMul function
+	\param	a	fixed_t number
+	\param	b	fixed_t number
+	\return	a*b>>FRACBITS
+*/
+FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedMul(fixed_t a, fixed_t b)
+{
+	// Need to cast to unsigned before shifting to avoid undefined behaviour
+	// for negative integers
+	return (fixed_t)(((UINT64)((INT64)a * b)) >> FRACBITS);
+}

-#if defined (__WATCOMC__) && FRACBITS == 16
-	#pragma aux FixedMul =  \
-		"imul ebx",         \
-		"shrd eax,edx,16"   \
-		parm    [eax] [ebx] \
-		value   [eax]       \
-		modify exact [eax edx]
-
-	#pragma aux FixedDiv2 = \
-		"cdq",              \
-		"shld edx,eax,16",  \
-		"sal eax,16",       \
-		"idiv ebx"          \
-		parm    [eax] [ebx] \
-		value   [eax]       \
-		modify exact [eax edx]
-#elif defined (__GNUC__) && defined (__i386__) && !defined (NOASM)
-	// DJGPP, i386 linux, cygwin or mingw
-	FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
-	{
-		fixed_t ret;
-		asm
-		(
-			 "imull %2;"           // a*b
-			 "shrdl %3,%%edx,%0;"  // shift logical right FRACBITS bits
-			:"=a" (ret)            // eax is always the result and the first operand (%0,%1)
-			:"0" (a), "r" (b)      // and %2 is what we use imull on with what in %1
-			, "I" (FRACBITS)       // %3 holds FRACBITS (normally 16)
-			:"cc", "%edx"         // edx and condition codes clobbered
-		);
-		return ret;
-	}
-
-	FUNCMATH FUNCINLINE static inline fixed_t FixedDiv2(fixed_t a, fixed_t b)
-	{
-		fixed_t ret;
-		asm
-		(
-			  "movl  %1,%%edx;"    // these two instructions allow the next two to pair, on the Pentium processor.
-			  "sarl  $31,%%edx;"   // shift arithmetic right 31 on EDX
-			  "shldl %3,%1,%%edx;" // DP shift logical left FRACBITS on EDX
-			  "sall  %3,%0;"       // shift arithmetic left FRACBITS on EAX
-			  "idivl %2;"          // EDX/b = EAX
-			: "=a" (ret)
-			: "0" (a), "r" (b)
-			, "I" (FRACBITS)
-			: "%edx"
-		);
-		return ret;
-	}
-#elif defined (__GNUC__) && defined (__arm__) && !defined(__thumb__) && !defined(NOASM) //ARMv4 ASM
-	FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // let abuse smull
-	{
-		fixed_t ret;
-		asm
-		(
-			  "smull %[lo], r1, %[a], %[b];"
-			  "mov %[lo], %[lo], lsr %3;"
-			  "orr %[lo], %[lo], r1, lsl %3;"
-			: [lo] "=&r" (ret) // rhi, rlo and rm must be distinct registers
-			: [a] "r" (a), [b] "r" (b)
-			, "i" (FRACBITS)
-			: "r1"
-		);
-		return ret;
-	}
-
-	#define __USE_C_FIXEDDIV__ // no double or asm div in ARM land
-#elif defined (__GNUC__) && defined (__ppc__) && !defined(NOASM) && 0 // WII: PPC CPU
-	FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
-	{
-		fixed_t ret, hi, lo;
-		asm
-		(
-			  "mullw %0, %2, %3;"
-			  "mulhw %1, %2, %3"
-			: "=r" (hi), "=r" (lo)
-			: "r" (a), "r" (b)
-			, "I" (FRACBITS)
-		);
-		ret = (INT64)((hi>>FRACBITS)+lo)<<FRACBITS;
-		return ret;
-	}
-
-	#define __USE_C_FIXEDDIV__// Alam: I am lazy
-#elif defined (__GNUC__) && defined (__mips__) && !defined(NOASM) && 0 // PSP: MIPS CPU
-	FUNCMATH FUNCINLINE static inline fixed_t FixedMul(fixed_t a, fixed_t b) // asm
-	{
-		fixed_t ret;
-		asm
-		(
-			  "mult %3, %4;"    // a*b=h<32+l
-			: "=r" (ret), "=l" (a), "=h" (b) //todo: abuse shr opcode
-			: "0" (a), "r" (b)
-			, "I" (FRACBITS)
-			//: "+l", "+h"
-		);
-		ret = (INT64)((a>>FRACBITS)+b)<<FRACBITS;
-		return ret;
-	}
-
-	#define __USE_C_FIXEDDIV__ // no 64b asm div in MIPS land
-#elif defined (__GNUC__) && defined (__sh__) && 0 // DC: SH4 CPU
-#elif defined (__GNUC__) && defined (__m68k__) && 0 // DEAD: Motorola 6800 CPU
-#elif defined (_MSC_VER) && defined(USEASM) && FRACBITS == 16
-	// Microsoft Visual C++ (no asm inline)
-	fixed_t __cdecl FixedMul(fixed_t a, fixed_t b);
-	fixed_t __cdecl FixedDiv2(fixed_t a, fixed_t b);
-#else
-	#define __USE_C_FIXEDMUL__
-	#define __USE_C_FIXEDDIV__
-#endif
-
-#ifdef __USE_C_FIXEDMUL__
-FUNCMATH fixed_t FixedMul(fixed_t a, fixed_t b);
-#endif
-
-#ifdef __USE_C_FIXEDDIV__
-FUNCMATH fixed_t FixedDiv2(fixed_t a, fixed_t b);
-#endif
+/**	\brief	The FixedDiv2 function
+	\param	a	fixed_t number
+	\param	b	fixed_t number
+	\return	a/b * FRACUNIT
+*/
+FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedDiv2(fixed_t a, fixed_t b)
+{
+	// This does not check for division overflow or division by 0!
+	// That is the caller's responsibility.
+	return (fixed_t)(((INT64)a * FRACUNIT) / b);
+}

 /**	\brief	The FixedInt function

@ -174,7 +76,6 @@ FUNCMATH fixed_t FixedDiv2(fixed_t a, fixed_t b);

 	\return	 a/FRACUNIT
 */
-
 FUNCMATH FUNCINLINE static ATTRINLINE fixed_t FixedInt(fixed_t a)
 {
 	return FixedMul(a, 1);
--- a/src/m_misc.c
+++ b/src/m_misc.c
@ -1943,430 +1943,11 @@ char *sizeu5(size_t num)
 	return sizeu5_buf;
 }

-#if defined (__GNUC__) && defined (__i386__) // from libkwave, under GPL
-// Alam: note libkwave memcpy code comes from mplayer's libvo/aclib_template.c, r699
-
-/* for small memory blocks (<256 bytes) this version is faster */
-#define small_memcpy(dest,src,n)\
-{\
-register unsigned long int dummy;\
-__asm__ __volatile__(\
-	"cld\n\t"\
-	"rep; movsb"\
-	:"=&D"(dest), "=&S"(src), "=&c"(dummy)\
-	:"0" (dest), "1" (src),"2" (n)\
-	: "memory", "cc");\
-}
-/* linux kernel __memcpy (from: /include/asm/string.h) */
-ATTRINLINE static FUNCINLINE void *__memcpy (void *dest, const void * src, size_t n)
+void *M_Memcpy(void *dest, const void *src, size_t n)
 {
-	int d0, d1, d2;
-
-	if ( n < 4 )
-	{
-		small_memcpy(dest, src, n);
-	}
-	else
-	{
-		__asm__ __volatile__ (
-			"rep ; movsl;"
-			"testb $2,%b4;"
-			"je 1f;"
-			"movsw;"
-			"1:\ttestb $1,%b4;"
-			"je 2f;"
-			"movsb;"
-			"2:"
-		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
-		:"0" (n/4), "q" (n),"1" ((long) dest),"2" ((long) src)
-		: "memory");
-	}
-
-	return dest;
-}
-
-#define SSE_MMREG_SIZE 16
-#define MMX_MMREG_SIZE 8
-
-#define MMX1_MIN_LEN 0x800  /* 2K blocks */
-#define MIN_LEN 0x40  /* 64-byte blocks */
-
-/* SSE note: i tried to move 128 bytes a time instead of 64 but it
-didn't make any measureable difference. i'm using 64 for the sake of
-simplicity. [MF] */
-static /*FUNCTARGET("sse2")*/ void *sse_cpy(void * dest, const void * src, size_t n)
-{
-	void *retval = dest;
-	size_t i;
-
-	/* PREFETCH has effect even for MOVSB instruction ;) */
-	__asm__ __volatile__ (
-		"prefetchnta (%0);"
-		"prefetchnta 32(%0);"
-		"prefetchnta 64(%0);"
-		"prefetchnta 96(%0);"
-		"prefetchnta 128(%0);"
-		"prefetchnta 160(%0);"
-		"prefetchnta 192(%0);"
-		"prefetchnta 224(%0);"
-		"prefetchnta 256(%0);"
-		"prefetchnta 288(%0);"
-		: : "r" (src) );
-
-	if (n >= MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(SSE_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=SSE_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		if (((unsigned long)src) & 15)
-		/* if SRC is misaligned */
-		 for (; i>0; i--)
-		 {
-			__asm__ __volatile__ (
-				"prefetchnta 320(%0);"
-				"prefetchnta 352(%0);"
-				"movups (%0), %%xmm0;"
-				"movups 16(%0), %%xmm1;"
-				"movups 32(%0), %%xmm2;"
-				"movups 48(%0), %%xmm3;"
-				"movntps %%xmm0, (%1);"
-				"movntps %%xmm1, 16(%1);"
-				"movntps %%xmm2, 32(%1);"
-				"movntps %%xmm3, 48(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = (const unsigned char *)src + 64;
-			dest = (unsigned char *)dest + 64;
-		}
-		else
-			/*
-			   Only if SRC is aligned on 16-byte boundary.
-			   It allows to use movaps instead of movups, which required data
-			   to be aligned or a general-protection exception (#GP) is generated.
-			*/
-		 for (; i>0; i--)
-		 {
-			__asm__ __volatile__ (
-				"prefetchnta 320(%0);"
-				"prefetchnta 352(%0);"
-				"movaps (%0), %%xmm0;"
-				"movaps 16(%0), %%xmm1;"
-				"movaps 32(%0), %%xmm2;"
-				"movaps 48(%0), %%xmm3;"
-				"movntps %%xmm0, (%1);"
-				"movntps %%xmm1, 16(%1);"
-				"movntps %%xmm2, 32(%1);"
-				"movntps %%xmm3, 48(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		/* since movntq is weakly-ordered, a "sfence"
-		 * is needed to become ordered again. */
-		__asm__ __volatile__ ("sfence":::"memory");
-		/* enables to use FPU */
-		__asm__ __volatile__ ("emms":::"memory");
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-}
-
-static FUNCTARGET("mmx") void *mmx2_cpy(void *dest, const void *src, size_t n)
-{
-	void *retval = dest;
-	size_t i;
-
-	/* PREFETCH has effect even for MOVSB instruction ;) */
-	__asm__ __volatile__ (
-		"prefetchnta (%0);"
-		"prefetchnta 32(%0);"
-		"prefetchnta 64(%0);"
-		"prefetchnta 96(%0);"
-		"prefetchnta 128(%0);"
-		"prefetchnta 160(%0);"
-		"prefetchnta 192(%0);"
-		"prefetchnta 224(%0);"
-		"prefetchnta 256(%0);"
-		"prefetchnta 288(%0);"
-	: : "r" (src));
-
-	if (n >= MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=MMX_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		for (; i>0; i--)
-		{
-			__asm__ __volatile__ (
-				"prefetchnta 320(%0);"
-				"prefetchnta 352(%0);"
-				"movq (%0), %%mm0;"
-				"movq 8(%0), %%mm1;"
-				"movq 16(%0), %%mm2;"
-				"movq 24(%0), %%mm3;"
-				"movq 32(%0), %%mm4;"
-				"movq 40(%0), %%mm5;"
-				"movq 48(%0), %%mm6;"
-				"movq 56(%0), %%mm7;"
-				"movntq %%mm0, (%1);"
-				"movntq %%mm1, 8(%1);"
-				"movntq %%mm2, 16(%1);"
-				"movntq %%mm3, 24(%1);"
-				"movntq %%mm4, 32(%1);"
-				"movntq %%mm5, 40(%1);"
-				"movntq %%mm6, 48(%1);"
-				"movntq %%mm7, 56(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		/* since movntq is weakly-ordered, a "sfence"
-		* is needed to become ordered again. */
-		__asm__ __volatile__ ("sfence":::"memory");
-		__asm__ __volatile__ ("emms":::"memory");
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-}
-
-static FUNCTARGET("mmx") void *mmx1_cpy(void *dest, const void *src, size_t n) //3DNOW
-{
-	void *retval = dest;
-	size_t i;
-
-	/* PREFETCH has effect even for MOVSB instruction ;) */
-	__asm__ __volatile__ (
-		"prefetch (%0);"
-		"prefetch 32(%0);"
-		"prefetch 64(%0);"
-		"prefetch 96(%0);"
-		"prefetch 128(%0);"
-		"prefetch 160(%0);"
-		"prefetch 192(%0);"
-		"prefetch 224(%0);"
-		"prefetch 256(%0);"
-		"prefetch 288(%0);"
-	: : "r" (src));
-
-	if (n >= MMX1_MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=MMX_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		for (; i>0; i--)
-		{
-			__asm__ __volatile__ (
-				"prefetch 320(%0);"
-				"prefetch 352(%0);"
-				"movq (%0), %%mm0;"
-				"movq 8(%0), %%mm1;"
-				"movq 16(%0), %%mm2;"
-				"movq 24(%0), %%mm3;"
-				"movq 32(%0), %%mm4;"
-				"movq 40(%0), %%mm5;"
-				"movq 48(%0), %%mm6;"
-				"movq 56(%0), %%mm7;"
-				"movq %%mm0, (%1);"
-				"movq %%mm1, 8(%1);"
-				"movq %%mm2, 16(%1);"
-				"movq %%mm3, 24(%1);"
-				"movq %%mm4, 32(%1);"
-				"movq %%mm5, 40(%1);"
-				"movq %%mm6, 48(%1);"
-				"movq %%mm7, 56(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		__asm__ __volatile__ ("femms":::"memory"); // same as mmx_cpy() but with a femms
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-}
-#endif
-
-// Alam: why? memcpy may be __cdecl/_System and our code may be not the same type
-static void *cpu_cpy(void *dest, const void *src, size_t n)
-{
-	if (src == NULL)
-	{
-		CONS_Debug(DBG_MEMORY, "Memcpy from 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
-		return dest;
-	}
-
-	if(dest == NULL)
-	{
-		CONS_Debug(DBG_MEMORY, "Memcpy to 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
-		return dest;
-	}
-
 	return memcpy(dest, src, n);
 }

-static /*FUNCTARGET("mmx")*/ void *mmx_cpy(void *dest, const void *src, size_t n)
-{
-#if defined (_MSC_VER) && defined (_X86_)
-	_asm
-	{
-		mov ecx, [n]
-		mov esi, [src]
-		mov edi, [dest]
-		shr ecx, 6 // mit mmx: 64bytes per iteration
-		jz lower_64 // if lower than 64 bytes
-		loop_64: // MMX transfers multiples of 64bytes
-		movq mm0,  0[ESI] // read sources
-		movq mm1,  8[ESI]
-		movq mm2, 16[ESI]
-		movq mm3, 24[ESI]
-		movq mm4, 32[ESI]
-		movq mm5, 40[ESI]
-		movq mm6, 48[ESI]
-		movq mm7, 56[ESI]
-
-		movq  0[EDI], mm0 // write destination
-		movq  8[EDI], mm1
-		movq 16[EDI], mm2
-		movq 24[EDI], mm3
-		movq 32[EDI], mm4
-		movq 40[EDI], mm5
-		movq 48[EDI], mm6
-		movq 56[EDI], mm7
-
-		add esi, 64
-		add edi, 64
-		dec ecx
-		jnz loop_64
-		emms // close mmx operation
-		lower_64:// transfer rest of buffer
-		mov ebx,esi
-		sub ebx,src
-		mov ecx,[n]
-		sub ecx,ebx
-		shr ecx, 3 // multiples of 8 bytes
-		jz lower_8
-		loop_8:
-		movq  mm0, [esi] // read source
-		movq [edi], mm0 // write destination
-		add esi, 8
-		add edi, 8
-		dec ecx
-		jnz loop_8
-		emms // close mmx operation
-		lower_8:
-		mov ebx,esi
-		sub ebx,src
-		mov ecx,[n]
-		sub ecx,ebx
-		rep movsb
-		mov eax, [dest] // return dest
-	}
-#elif defined (__GNUC__) && defined (__i386__)
-	void *retval = dest;
-	size_t i;
-
-	if (n >= MMX1_MIN_LEN)
-	{
-		register unsigned long int delta;
-		/* Align destinition to MMREG_SIZE -boundary */
-		delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
-		if (delta)
-		{
-			delta=MMX_MMREG_SIZE-delta;
-			n -= delta;
-			small_memcpy(dest, src, delta);
-		}
-		i = n >> 6; /* n/64 */
-		n&=63;
-		for (; i>0; i--)
-		{
-			__asm__ __volatile__ (
-				"movq (%0), %%mm0;"
-				"movq 8(%0), %%mm1;"
-				"movq 16(%0), %%mm2;"
-				"movq 24(%0), %%mm3;"
-				"movq 32(%0), %%mm4;"
-				"movq 40(%0), %%mm5;"
-				"movq 48(%0), %%mm6;"
-				"movq 56(%0), %%mm7;"
-				"movq %%mm0, (%1);"
-				"movq %%mm1, 8(%1);"
-				"movq %%mm2, 16(%1);"
-				"movq %%mm3, 24(%1);"
-				"movq %%mm4, 32(%1);"
-				"movq %%mm5, 40(%1);"
-				"movq %%mm6, 48(%1);"
-				"movq %%mm7, 56(%1);"
-			:: "r" (src), "r" (dest) : "memory");
-			src = ((const unsigned char *)src) + 64;
-			dest = ((unsigned char *)dest) + 64;
-		}
-		__asm__ __volatile__ ("emms":::"memory");
-	}
-	/*
-	 *	Now do the tail of the block
-	 */
-	if (n) __memcpy(dest, src, n);
-	return retval;
-#else
-	return cpu_cpy(dest, src, n);
-#endif
-}
-
-void *(*M_Memcpy)(void* dest, const void* src, size_t n) = cpu_cpy;
-
-/** Memcpy that uses MMX, 3DNow, MMXExt or even SSE
-  * Do not use on overlapped memory, use memmove for that
-  */
-void M_SetupMemcpy(void)
-{
-#if defined (__GNUC__) && defined (__i386__)
-	if (R_SSE2)
-		M_Memcpy = sse_cpy;
-	else if (R_MMXExt)
-		M_Memcpy = mmx2_cpy;
-	else if (R_3DNow)
-		M_Memcpy = mmx1_cpy;
-	else
-#endif
-	if (R_MMX)
-		M_Memcpy = mmx_cpy;
-#if 0
-	M_Memcpy = cpu_cpy;
-#endif
-}
-
 /** Return the appropriate message for a file error or end of file.
 */
 const char *M_FileError(FILE *fp)
--- a/src/m_misc.h
+++ b/src/m_misc.h
@ -98,8 +98,6 @@ TMatrix *RotateZMatrix(angle_t rad);
 // s1 = s2+s3+s1 (1024 lenghtmax)
 void strcatbf(char *s1, const char *s2, const char *s3);

-void M_SetupMemcpy(void);
-
 const char *M_FileError(FILE *handle);

 // counting bits, for weapon ammo code, usually
--- a/src/p5prof.h
+++ b/src/p5prof.h
@ -1,278 +0,0 @@
-/*********************************************************
- *
- * File:  p5prof.h
- * By:    Kevin Baca
- *
- * MODIFIED BY Fab SO THAT RDMSR(...) WRITES EDX : EAX TO A LONG LONG
- * (WHICH MEANS WRITE THE LOW DWORD FIRST)
- *
- * Now in yer code do:
- *   INT64 count,total;
- *
- *   ...
- *   RDMSR(0x10,&count);        //inner loop count
- *   total += count;
- *   ...
- *
- *   printf("0x%x %x", (INT32)total, *((INT32 *)&total+1));
- *   //                  HIGH        LOW
- *
- *********************************************************/
-/**\file
-	\brief  This file provides macros to profile your code.
-
- Here's how they work...
-
- As you may or may not know, the Pentium class of
- processors provides extremely fine grained profiling
- capabilities through the use of what are called
- Machine Specific Registers (MSRs). These registers
- can provide information about almost any aspect of
- CPU performance down to a single cycle.
-
- The MSRs of interest for profiling are specified by
- indices 0x10, 0x11, 0x12, and 0x13.  Here is a brief
- description of each of these registers:
-
- MSR 0x10
-    This register is simple a cycle counter.
-
- MSR 0x11
-    This register controls what type of profiling data
- will be gathered.
-
- MSRs 0x12 and 0x13
-    These registers gather the profiling data specified in
- MSR 0x11.
-
- Each MSR is 64 bits wide.  For the Pentium processor,
- only the lower 32 bits of MSR 0x11 are valid.  Bits 0-15
- specify what data will be gathered in MSR 0x12.  Bits 16-31
- specify what data will be gathered in MSR 0x13.  Both sets
- of bits have the same format:
-
- Bits 0-5 specify which hardware event will be tracked.
- Bit 6, if set, indicates events will be tracked in
- rings 0-2.
- Bit 7, if set, indicates events will be tracked in
- ring 3.
- Bit 8, if set, indicates cycles should be counted for
- the specified event.  If clear, it indicates the
- number of events should be counted.
-
- Two instructions are provided for manupulating the MSRs.
- RDMSR (Read Machine Specific Register) and WRMSR
- (Write Machine Specific Register).  These opcodes were
- originally undocumented and therefore most assemblers don't
- recognize them.  Their byte codes are provided in the
- macros below.
-
- RDMSR takes the MSR index in ecx and the profiling criteria
- in edx : eax.
-
- WRMSR takes the MSR index in ecx and returns the profile data
- in edx : eax.
-
- Two profiling registers limits profiling capability to
- gathering only two types of information.  The register
- usage can, however, be combined in interesting ways.
- For example, you can set one register to gather the
- number of a specific type of event while the other gathers
- the number of cycles for the same event.  Or you can
- gather the number of two separate events while using
- MSR 0x10 to gather the number of cycles.
-
- The enumerated list provides somewhat readable labels for
- the types of events that can be tracked.
-
- For more information, get ahold of appendix H from the
- Intel Pentium programmer's manual (I don't remember the
- order number) or go to
- http://green.kaist.ac.kr/jwhahn/art3.htm.
- That's an article by Terje Mathisen where I got most of
- my information.
-
- You may use this code however you wish.  I hope it's
- useful and I hope I got everything right.
-
- -Kevin
-
- kbaca@skygames.com
-
-*/
-
-#ifdef __GNUC__
-
-#define RDTSC(_dst) \
-__asm__("
-     .byte 0x0F,0x31
-     movl %%edx,(%%edi)
-     movl %%eax,4(%%edi)"\
-: : "D" (_dst) : "eax", "edx", "edi")
-
-// the old code... swapped it
-//     movl %%edx,(%%edi)
-//     movl %%eax,4(%%edi)"
-#define RDMSR(_msri, _msrd) \
-__asm__("
-     .byte 0x0F,0x32
-     movl %%eax,(%%edi)
-     movl %%edx,4(%%edi)"\
-: : "c" (_msri), "D" (_msrd) : "eax", "ecx", "edx", "edi")
-
-#define WRMSR(_msri, _msrd) \
-__asm__("
-     xorl %%edx,%%edx
-     .byte 0x0F,0x30"\
-: : "c" (_msri), "a" (_msrd) : "eax", "ecx", "edx")
-
-#define RDMSR_0x12_0x13(_msr12, _msr13) \
-__asm__("
-     movl $0x12,%%ecx
-     .byte 0x0F,0x32
-     movl %%edx,(%%edi)
-     movl %%eax,4(%%edi)
-     movl $0x13,%%ecx
-     .byte 0x0F,0x32
-     movl %%edx,(%%esi)
-     movl %%eax,4(%%esi)"\
-: : "D" (_msr12), "S" (_msr13) : "eax", "ecx", "edx", "edi")
-
-#define ZERO_MSR_0x12_0x13() \
-__asm__("
-     xorl %%edx,%%edx
-     xorl %%eax,%%eax
-     movl $0x12,%%ecx
-     .byte 0x0F,0x30
-     movl $0x13,%%ecx
-     .byte 0x0F,0x30"\
-: : : "eax", "ecx", "edx")
-
-#elif defined (__WATCOMC__)
-
-extern void RDTSC(UINT32 *dst);
-#pragma aux RDTSC =\
-   "db 0x0F,0x31"\
-   "mov [edi],edx"\
-   "mov [4+edi],eax"\
-   parm [edi]\
-   modify [eax edx edi];
-
-extern void RDMSR(UINT32 msri, UINT32 *msrd);
-#pragma aux RDMSR =\
-   "db 0x0F,0x32"\
-   "mov [edi],edx"\
-   "mov [4+edi],eax"\
-   parm [ecx] [edi]\
-   modify [eax ecx edx edi];
-
-extern void WRMSR(UINT32 msri, UINT32 msrd);
-#pragma aux WRMSR =\
-   "xor edx,edx"\
-   "db 0x0F,0x30"\
-   parm [ecx] [eax]\
-   modify [eax ecx edx];
-
-extern void RDMSR_0x12_0x13(UINT32 *msr12, UINT32 *msr13);
-#pragma aux RDMSR_0x12_0x13 =\
-   "mov ecx,0x12"\
-   "db 0x0F,0x32"\
-   "mov [edi],edx"\
-   "mov [4+edi],eax"\
-   "mov ecx,0x13"\
-   "db 0x0F,0x32"\
-   "mov [esi],edx"\
-   "mov [4+esi],eax"\
-   parm [edi] [esi]\
-   modify [eax ecx edx edi esi];
-
-extern void ZERO_MSR_0x12_0x13(void);
-#pragma aux ZERO_MSR_0x12_0x13 =\
-   "xor edx,edx"\
-   "xor eax,eax"\
-   "mov ecx,0x12"\
-   "db 0x0F,0x30"\
-   "mov ecx,0x13"\
-   "db 0x0F,0x30"\
-   modify [eax ecx edx];
-
-#endif
-
-typedef enum
-{
-   DataRead,
-     DataWrite,
-     DataTLBMiss,
-     DataReadMiss,
-     DataWriteMiss,
-     WriteHitEM,
-     DataCacheLinesWritten,
-     DataCacheSnoops,
-     DataCacheSnoopHit,
-     MemAccessBothPipes,
-     BankConflict,
-     MisalignedDataRef,
-     CodeRead,
-     CodeTLBMiss,
-     CodeCacheMiss,
-     SegRegLoad,
-     RESERVED0,
-     RESERVED1,
-     Branch,
-     BTBHit,
-     TakenBranchOrBTBHit,
-     PipelineFlush,
-     InstructionsExeced,
-     InstructionsExecedVPipe,
-     BusUtilizationClocks,
-     PipelineStalledWriteBackup,
-     PipelineStalledDateMemRead,
-     PipeLineStalledWriteEM,
-     LockedBusCycle,
-     IOReadOrWriteCycle,
-     NonCacheableMemRef,
-     AGI,
-     RESERVED2,
-     RESERVED3,
-     FPOperation,
-     Breakpoint0Match,
-     Breakpoint1Match,
-     Breakpoint2Match,
-     Breakpoint3Match,
-     HWInterrupt,
-     DataReadOrWrite,
-     DataReadOrWriteMiss
-};
-
-#define PROF_CYCLES (0x100)
-#define PROF_EVENTS (0x000)
-#define RING_012    (0x40)
-#define RING_3      (0x80)
-#define RING_0123   (RING_012 | RING_3)
-
-/*void ProfSetProfiles(UINT32 msr12, UINT32 msr13);*/
-#define ProfSetProfiles(_msr12, _msr13)\
-{\
-   UINT32 prof;\
-\
-   prof = (_msr12) | ((_msr13) << 16);\
-   WRMSR(0x11, prof);\
-}
-
-/*void ProfBeginProfiles(void);*/
-#define ProfBeginProfiles()\
-   ZERO_MSR_0x12_0x13();
-
-/*void ProfGetProfiles(UINT32 msr12[2], UINT32 msr13[2]);*/
-#define ProfGetProfiles(_msr12, _msr13)\
-   RDMSR_0x12_0x13(_msr12, _msr13);
-
-/*void ProfZeroTimer(void);*/
-#define ProfZeroTimer()\
-   WRMSR(0x10, 0);
-
-/*void ProfReadTimer(UINT32 timer[2]);*/
-#define ProfReadTimer(timer)\
-   RDMSR(0x10, timer);
-
-/*EOF*/
--- a/src/r_draw.h
+++ b/src/r_draw.h
@ -138,20 +138,6 @@ void R_DrawColumn_8(void);
 void R_DrawShadeColumn_8(void);
 void R_DrawTranslucentColumn_8(void);

-#ifdef USEASM
-void ASMCALL R_DrawColumn_8_ASM(void);
-#define R_DrawWallColumn_8_ASM	R_DrawColumn_8_ASM
-void ASMCALL R_DrawShadeColumn_8_ASM(void);
-void ASMCALL R_DrawTranslucentColumn_8_ASM(void);
-void ASMCALL R_Draw2sMultiPatchColumn_8_ASM(void);
-
-void ASMCALL R_DrawColumn_8_MMX(void);
-#define R_DrawWallColumn_8_MMX	R_DrawColumn_8_MMX
-
-void ASMCALL R_Draw2sMultiPatchColumn_8_MMX(void);
-void ASMCALL R_DrawSpan_8_MMX(void);
-#endif
-
 void R_DrawTranslatedColumn_8(void);
 void R_DrawTranslatedTranslucentColumn_8(void);
 void R_DrawSpan_8(void);
--- a/src/r_splats.c
+++ b/src/r_splats.c
@ -23,11 +23,6 @@ static wallsplat_t wallsplats[MAXLEVELSPLATS]; // WALL splats
 static INT32 freewallsplat;
 #endif

-#ifdef USEASM
-/// \brief for floorsplats \note accessed by asm code
-struct rastery_s *prastertab;
-#endif
-
 #ifdef FLOORSPLATS
 static floorsplat_t floorsplats[1]; // FLOOR splats
 static INT32 freefloorsplat;
@ -339,12 +334,6 @@ void R_AddVisibleFloorSplats(subsector_t *subsec)
 	}
 }

-#ifdef USEASM
-// tv1, tv2 = x/y qui varie dans la texture, tc = x/y qui est constant.
-void ASMCALL rasterize_segment_tex(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2,
-	INT32 tc, INT32 dir);
-#endif
-
 // current test with floor tile
 //#define FLOORSPLATSOLIDCOLOR

--- a/src/screen.c
+++ b/src/screen.c
@ -33,10 +33,6 @@
 // SRB2Kart
 #include "r_fps.h" // R_GetFramerateCap

-#if defined (USEASM) && !defined (NORUSEASM)//&& (!defined (_MSC_VER) || (_MSC_VER <= 1200))
-#define RUSEASM //MSC.NET can't patch itself
-#endif
-
 // --------------------------------------------
 // assembly or c drawer routines for 8bpp/16bpp
 // --------------------------------------------
@ -94,16 +90,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
 //  Short and Tall sky drawer, for the current color mode
 void (*walldrawerfunc)(void);

-boolean R_ASM = true;
-boolean R_486 = false;
-boolean R_586 = false;
-boolean R_MMX = false;
-boolean R_SSE = false;
-boolean R_3DNow = false;
-boolean R_MMXExt = false;
-boolean R_SSE2 = false;
-
-
 void SCR_SetMode(void)
 {
 	if (dedicated)
@ -132,28 +118,6 @@ void SCR_SetMode(void)
 		walldrawerfunc = R_DrawWallColumn_8;
 		twosmultipatchfunc = R_Draw2sMultiPatchColumn_8;
 		twosmultipatchtransfunc = R_Draw2sMultiPatchTranslucentColumn_8;
-#ifdef RUSEASM
-		if (R_ASM)
-		{
-			if (R_MMX)
-			{
-				colfunc = basecolfunc = R_DrawColumn_8_MMX;
-				//shadecolfunc = R_DrawShadeColumn_8_ASM;
-				//fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
-				walldrawerfunc = R_DrawWallColumn_8_MMX;
-				twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_MMX;
-				spanfunc = basespanfunc = R_DrawSpan_8_MMX;
-			}
-			else
-			{
-				colfunc = basecolfunc = R_DrawColumn_8_ASM;
-				//shadecolfunc = R_DrawShadeColumn_8_ASM;
-				//fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
-				walldrawerfunc = R_DrawWallColumn_8_ASM;
-				twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_ASM;
-			}
-		}
-#endif
 	}
 /*	else if (vid.bpp > 1)
 	{
@ -181,50 +145,6 @@ void SCR_SetMode(void)
 //
 void SCR_Startup(void)
 {
-	const CPUInfoFlags *RCpuInfo = I_CPUInfo();
-	if (!M_CheckParm("-NOCPUID") && RCpuInfo)
-	{
-#if defined (__i386__) || defined (_M_IX86) || defined (__WATCOMC__)
-		R_486 = true;
-#endif
-		if (RCpuInfo->RDTSC)
-			R_586 = true;
-		if (RCpuInfo->MMX)
-			R_MMX = true;
-		if (RCpuInfo->AMD3DNow)
-			R_3DNow = true;
-		if (RCpuInfo->MMXExt)
-			R_MMXExt = true;
-		if (RCpuInfo->SSE)
-			R_SSE = true;
-		if (RCpuInfo->SSE2)
-			R_SSE2 = true;
-		CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
-	}
-
-	if (M_CheckParm("-noASM"))
-		R_ASM = false;
-	if (M_CheckParm("-486"))
-		R_486 = true;
-	if (M_CheckParm("-586"))
-		R_586 = true;
-	if (M_CheckParm("-MMX"))
-		R_MMX = true;
-	if (M_CheckParm("-3DNow"))
-		R_3DNow = true;
-	if (M_CheckParm("-MMXExt"))
-		R_MMXExt = true;
-
-	if (M_CheckParm("-SSE"))
-		R_SSE = true;
-	if (M_CheckParm("-noSSE"))
-		R_SSE = false;
-
-	if (M_CheckParm("-SSE2"))
-		R_SSE2 = true;
-
-	M_SetupMemcpy();
-
 	if (dedicated)
 	{
 		V_Init();
--- a/src/screen.h
+++ b/src/screen.h
@ -138,17 +138,6 @@ extern void (*transtransfunc)(void);
 extern void (*twosmultipatchfunc)(void);
 extern void (*twosmultipatchtransfunc)(void);

-// -----
-// CPUID
-// -----
-extern boolean R_ASM;
-extern boolean R_486;
-extern boolean R_586;
-extern boolean R_MMX;
-extern boolean R_3DNow;
-extern boolean R_MMXExt;
-extern boolean R_SSE2;
-
 // ----------------
 // screen variables
 // ----------------
--- a/src/sdl/MakeCYG.cfg
+++ b/src/sdl/MakeCYG.cfg
@ -7,7 +7,6 @@

 	NOHW=1
 	NOHS=1
-	NOASM=1

 	OPTS+=-DLINUX

--- a/src/sdl/MakeNIX.cfg
+++ b/src/sdl/MakeNIX.cfg
@ -39,7 +39,6 @@ endif
 #
 ifdef SOLARIS
 	NOIPX=1
-	NOASM=1
 	OPTS+=-DSOLARIS -DINADDR_NONE=INADDR_ANY -DBSD_COMP
 	OPTS+=-I/usr/local/include -I/opt/sfw/include
 	LDFLAGS+=-L/opt/sfw/lib
--- a/src/sdl/Makefile.cfg
+++ b/src/sdl/Makefile.cfg
@ -37,14 +37,6 @@ else
 endif
 endif

-
-	#use the x86 asm code
-ifndef CYGWIN32
-ifndef NOASM
-	USEASM=1
-endif
-endif
-
 	OBJS+=$(OBJDIR)/i_video.o $(OBJDIR)/dosstr.o $(OBJDIR)/endtxt.o $(OBJDIR)/hwsym_sdl.o

 	OPTS+=-DDIRECTFULLSCREEN -DHAVE_SDL
--- a/src/sdl/i_main.c
+++ b/src/sdl/i_main.c
@ -70,40 +70,6 @@ char  logfilename[1024];
 typedef BOOL (WINAPI *p_IsDebuggerPresent)(VOID);
 #endif

-#if defined (_WIN32)
-static inline VOID MakeCodeWritable(VOID)
-{
-#ifdef USEASM // Disable write-protection of code segment
-	DWORD OldRights;
-	const DWORD NewRights = PAGE_EXECUTE_READWRITE;
-	PBYTE pBaseOfImage = (PBYTE)GetModuleHandle(NULL);
-	PIMAGE_DOS_HEADER dosH =(PIMAGE_DOS_HEADER)pBaseOfImage;
-	PIMAGE_NT_HEADERS ntH = (PIMAGE_NT_HEADERS)(pBaseOfImage + dosH->e_lfanew);
-	PIMAGE_OPTIONAL_HEADER oH = (PIMAGE_OPTIONAL_HEADER)
-		((PBYTE)ntH + sizeof (IMAGE_NT_SIGNATURE) + sizeof (IMAGE_FILE_HEADER));
-	LPVOID pA = pBaseOfImage+oH->BaseOfCode;
-	SIZE_T pS = oH->SizeOfCode;
-#if 1 // try to find the text section
-	PIMAGE_SECTION_HEADER ntS = IMAGE_FIRST_SECTION (ntH);
-	WORD s;
-	for (s = 0; s < ntH->FileHeader.NumberOfSections; s++)
-	{
-		if (memcmp (ntS[s].Name, ".text\0\0", 8) == 0)
-		{
-			pA = pBaseOfImage+ntS[s].VirtualAddress;
-			pS = ntS[s].Misc.VirtualSize;
-			break;
-		}
-	}
-#endif
-
-	if (!VirtualProtect(pA,pS,NewRights,&OldRights))
-		I_Error("Could not make code writable\n");
-#endif
-}
-#endif
-
-
 #ifdef _WIN32
 static void
 ChDirToExe (void)
@ -185,7 +151,6 @@ int main(int argc, char **argv)
 #ifndef __MINGW32__
 	prevExceptionFilter = SetUnhandledExceptionFilter(RecordExceptionInfo);
 #endif
-	MakeCodeWritable();
 #endif

 	// startup SRB2
--- a/src/sdl/i_system.c
+++ b/src/sdl/i_system.c
@ -3910,69 +3910,6 @@ UINT32 I_GetFreeMem(UINT32 *total)
 #endif
 }

-const CPUInfoFlags *I_CPUInfo(void)
-{
-#if defined (_WIN32)
-	static CPUInfoFlags WIN_CPUInfo;
-	SYSTEM_INFO SI;
-	p_IsProcessorFeaturePresent pfnCPUID = (p_IsProcessorFeaturePresent)(LPVOID)GetProcAddress(GetModuleHandleA("kernel32.dll"), "IsProcessorFeaturePresent");
-
-	ZeroMemory(&WIN_CPUInfo,sizeof (WIN_CPUInfo));
-	if (pfnCPUID)
-	{
-		WIN_CPUInfo.FPPE       = pfnCPUID( 0); //PF_FLOATING_POINT_PRECISION_ERRATA
-		WIN_CPUInfo.FPE        = pfnCPUID( 1); //PF_FLOATING_POINT_EMULATED
-		WIN_CPUInfo.cmpxchg    = pfnCPUID( 2); //PF_COMPARE_EXCHANGE_DOUBLE
-		WIN_CPUInfo.MMX        = pfnCPUID( 3); //PF_MMX_INSTRUCTIONS_AVAILABLE
-		WIN_CPUInfo.PPCMM64    = pfnCPUID( 4); //PF_PPC_MOVEMEM_64BIT_OK
-		WIN_CPUInfo.ALPHAbyte  = pfnCPUID( 5); //PF_ALPHA_BYTE_INSTRUCTIONS
-		WIN_CPUInfo.SSE        = pfnCPUID( 6); //PF_XMMI_INSTRUCTIONS_AVAILABLE
-		WIN_CPUInfo.AMD3DNow   = pfnCPUID( 7); //PF_3DNOW_INSTRUCTIONS_AVAILABLE
-		WIN_CPUInfo.RDTSC      = pfnCPUID( 8); //PF_RDTSC_INSTRUCTION_AVAILABLE
-		WIN_CPUInfo.PAE        = pfnCPUID( 9); //PF_PAE_ENABLED
-		WIN_CPUInfo.SSE2       = pfnCPUID(10); //PF_XMMI64_INSTRUCTIONS_AVAILABLE
-		//WIN_CPUInfo.blank    = pfnCPUID(11); //PF_SSE_DAZ_MODE_AVAILABLE
-		WIN_CPUInfo.DEP        = pfnCPUID(12); //PF_NX_ENABLED
-		WIN_CPUInfo.SSE3       = pfnCPUID(13); //PF_SSE3_INSTRUCTIONS_AVAILABLE
-		WIN_CPUInfo.cmpxchg16b = pfnCPUID(14); //PF_COMPARE_EXCHANGE128
-		WIN_CPUInfo.cmp8xchg16 = pfnCPUID(15); //PF_COMPARE64_EXCHANGE128
-		WIN_CPUInfo.PFC        = pfnCPUID(16); //PF_CHANNELS_ENABLED
-	}
-#ifdef HAVE_SDLCPUINFO
-	else
-	{
-		WIN_CPUInfo.RDTSC       = SDL_HasRDTSC();
-		WIN_CPUInfo.MMX         = SDL_HasMMX();
-		WIN_CPUInfo.AMD3DNow    = SDL_Has3DNow();
-		WIN_CPUInfo.SSE         = SDL_HasSSE();
-		WIN_CPUInfo.SSE2        = SDL_HasSSE2();
-		WIN_CPUInfo.AltiVec     = SDL_HasAltiVec();
-	}
-	WIN_CPUInfo.MMXExt      = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
-	WIN_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
-#endif
-	GetSystemInfo(&SI);
-	WIN_CPUInfo.CPUs = SI.dwNumberOfProcessors;
-	WIN_CPUInfo.IA64 = (SI.dwProcessorType == 2200); // PROCESSOR_INTEL_IA64
-	WIN_CPUInfo.AMD64 = (SI.dwProcessorType == 8664); // PROCESSOR_AMD_X8664
-	return &WIN_CPUInfo;
-#elif defined (HAVE_SDLCPUINFO)
-	static CPUInfoFlags SDL_CPUInfo;
-	memset(&SDL_CPUInfo,0,sizeof (CPUInfoFlags));
-	SDL_CPUInfo.RDTSC       = SDL_HasRDTSC();
-	SDL_CPUInfo.MMX         = SDL_HasMMX();
-	SDL_CPUInfo.MMXExt      = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
-	SDL_CPUInfo.AMD3DNow    = SDL_Has3DNow();
-	SDL_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
-	SDL_CPUInfo.SSE         = SDL_HasSSE();
-	SDL_CPUInfo.SSE2        = SDL_HasSSE2();
-	SDL_CPUInfo.AltiVec     = SDL_HasAltiVec();
-	return &SDL_CPUInfo;
-#else
-	return NULL; /// \todo CPUID asm
-#endif
-}
-
 // note CPUAFFINITY code used to reside here
 void I_RegisterSysCommands(void) {}
 #endif
--- a/src/tmap.nas
+++ b/src/tmap.nas
@ -1,957 +0,0 @@
-;; SONIC ROBO BLAST 2
-;;-----------------------------------------------------------------------------
-;; Copyright (C) 1998-2000 by DooM Legacy Team.
-;; Copyright (C) 1999-2018 by Sonic Team Junior.
-;;
-;; This program is free software distributed under the
-;; terms of the GNU General Public License, version 2.
-;; See the 'LICENSE' file for more details.
-;;-----------------------------------------------------------------------------
-;; FILE:
-;;      tmap.nas
-;; DESCRIPTION:
-;;      Assembler optimised rendering code for software mode.
-;;      Draw wall columns.
-
-
-[BITS 32]
-
-%define FRACBITS 16
-%define TRANSPARENTPIXEL 247
-
-%ifdef LINUX
-%macro cextern 1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-[global %1]
-%endmacro
-
-%else
-%macro cextern 1
-%define %1 _%1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-%define %1 _%1
-[global %1]
-%endmacro
-
-%endif
-
-
-; The viddef_s structure. We only need the width field.
-struc viddef_s
-        resb 12
-.width: resb 4
-        resb 44
-endstruc
-
-;; externs
-;; columns
-cextern dc_x
-cextern dc_yl
-cextern dc_yh
-cextern ylookup
-cextern columnofs
-cextern dc_source
-cextern dc_texturemid
-cextern dc_texheight
-cextern dc_iscale
-cextern dc_hires
-cextern centery
-cextern centeryfrac
-cextern dc_colormap
-cextern dc_transmap
-cextern colormaps
-cextern vid
-cextern topleft
-
-; DELME
-cextern R_DrawColumn_8
-
-; polygon edge rasterizer
-cextern prastertab
-
-[SECTION .data]
-
-;;.align        4
-loopcount       dd      0
-pixelcount      dd      0
-tystep          dd      0
-
-[SECTION .text]
-
-;;----------------------------------------------------------------------
-;;
-;; R_DrawColumn : 8bpp column drawer
-;;
-;; New  optimised version 10-01-1998 by D.Fabrice and P.Boris
-;; Revised by G. Dick July 2010 to support the intervening twelve years'
-;; worth of changes to the renderer. Since I only vaguely know what I'm
-;; doing, this is probably rather suboptimal. Help appreciated!
-;;
-;;----------------------------------------------------------------------
-;; fracstep, vid.width in memory
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = heightmask
-;; esi = source
-;; edi = dest
-;; ebp = frac
-;;----------------------------------------------------------------------
-
-cglobal R_DrawColumn_8_ASM
-;       align   16
-R_DrawColumn_8_ASM:
-        push    ebp                     ;; preserve caller's stack frame pointer
-        push    esi                     ;; preserve register variables
-        push    edi
-        push    ebx
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
-        mov     ebp,[dc_yl]
-        mov     edi,[ylookup+ebp*4]
-        mov     ebx,[dc_x]
-        add     edi,[columnofs+ebx*4]  ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
-        mov     ecx,[dc_yh]
-        add     ecx,1
-        sub     ecx,ebp                 ;; pixel count
-        jle     near .done              ;; nothing to scale
-;;
-;; fracstep = dc_iscale;	// But we just use [dc_iscale]
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
-        mov     eax,ebp                 ;; dc_yl
-        shl     eax,FRACBITS
-        sub     eax,[centeryfrac]
-        imul    dword [dc_iscale]
-        shrd    eax,edx,FRACBITS
-        add     eax,[dc_texturemid]
-        mov     ebp,eax                 ;; ebp = frac
-
-        mov     ebx,[dc_colormap]
-
-        mov     esi,[dc_source]
-;;
-;; if (dc_hires) frac = 0;
-;;
-        test    byte [dc_hires],0x01
-        jz      .texheightcheck
-        xor     ebp,ebp
-
-;;
-;; Check for power of two
-;;
-.texheightcheck:
-        mov     edx,[dc_texheight]
-        sub     edx,1                   ;; edx = heightmask
-        test    edx,[dc_texheight]
-        jnz     .notpowertwo
-
-        test    ecx,0x01                ;; Test for odd no. pixels
-        jnz     .odd
-
-;;
-;; Texture height is a power of two, so we get modular arithmetic by
-;; masking
-;;
-.powertwo:
-        mov     eax,ebp                 ;; eax = frac
-        sar     eax,FRACBITS            ;; Integer part
-        and     eax,edx                 ;; eax &= heightmask
-        movzx   eax,byte [esi + eax]    ;; eax = texel
-        add     ebp,[dc_iscale]         ;; frac += fracstep
-        movzx   eax,byte [ebx+eax]      ;; Map through colormap
-        mov     [edi],al                ;; Write pixel
-                                        ;; dest += vid.width
-        add     edi,[vid + viddef_s.width]
-
-.odd:
-        mov     eax,ebp                 ;; eax = frac
-        sar     eax,FRACBITS            ;; Integer part
-        and     eax,edx                 ;; eax &= heightmask
-        movzx   eax,byte [esi + eax]    ;; eax = texel
-        add     ebp,[dc_iscale]         ;; frac += fracstep
-        movzx   eax,byte [ebx+eax]      ;; Map through colormap
-        mov     [edi],al                ;; Write pixel
-                                        ;; dest += vid.width
-        add     edi,[vid + viddef_s.width]
-
-
-        sub     ecx,2                   ;; count -= 2
-        jg      .powertwo
-
-        jmp     .done
-
-.notpowertwo:
-        add     edx,1
-        shl     edx,FRACBITS
-        test    ebp,ebp
-        jns     .notpowtwoloop
-
-.makefracpos:
-        add     ebp,edx                 ;; frac is negative; make it positive
-        js      .makefracpos
-
-.notpowtwoloop:
-        cmp     ebp,edx                 ;; Reduce mod height
-        jl      .writenonpowtwo
-        sub     ebp,edx
-        jmp     .notpowtwoloop
-
-.writenonpowtwo:
-        mov     eax,ebp                 ;; eax = frac
-        sar     eax,FRACBITS            ;; Integer part.
-        mov     bl,[esi + eax]          ;; ebx = colormap + texel
-        add     ebp,[dc_iscale]         ;; frac += fracstep
-        movzx   eax,byte [ebx]          ;; Map through colormap
-        mov     [edi],al                ;; Write pixel
-                                        ;; dest += vid.width
-        add     edi,[vid + viddef_s.width]
-
-        sub     ecx,1
-        jnz     .notpowtwoloop
-
-;;
-
-.done:
-        pop     ebx                     ;; restore register variables
-        pop     edi
-        pop     esi
-        pop     ebp                     ;; restore caller's stack frame pointer
-        ret
-
-
-;;----------------------------------------------------------------------
-;;
-;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
-;;                            pixels.
-;;
-;; New  optimised version 10-01-1998 by D.Fabrice and P.Boris
-;; Revised by G. Dick July 2010 to support the intervening twelve years'
-;; worth of changes to the renderer. Since I only vaguely know what I'm
-;; doing, this is probably rather suboptimal. Help appreciated!
-;;
-;;----------------------------------------------------------------------
-;; fracstep, vid.width in memory
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = heightmask
-;; esi = source
-;; edi = dest
-;; ebp = frac
-;;----------------------------------------------------------------------
-
-cglobal R_Draw2sMultiPatchColumn_8_ASM
-;       align   16
-R_Draw2sMultiPatchColumn_8_ASM:
-        push    ebp                     ;; preserve caller's stack frame pointer
-        push    esi                     ;; preserve register variables
-        push    edi
-        push    ebx
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
-        mov     ebp,[dc_yl]
-        mov     edi,[ylookup+ebp*4]
-        mov     ebx,[dc_x]
-        add     edi,[columnofs+ebx*4]  ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
-        mov     ecx,[dc_yh]
-        add     ecx,1
-        sub     ecx,ebp                 ;; pixel count
-        jle     near .done              ;; nothing to scale
-;;
-;; fracstep = dc_iscale;	// But we just use [dc_iscale]
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
-        mov     eax,ebp                 ;; dc_yl
-        shl     eax,FRACBITS
-        sub     eax,[centeryfrac]
-        imul    dword [dc_iscale]
-        shrd    eax,edx,FRACBITS
-        add     eax,[dc_texturemid]
-        mov     ebp,eax                 ;; ebp = frac
-
-        mov     ebx,[dc_colormap]
-
-        mov     esi,[dc_source]
-;;
-;; if (dc_hires) frac = 0;
-;;
-        test    byte [dc_hires],0x01
-        jz      .texheightcheck
-        xor     ebp,ebp
-
-;;
-;; Check for power of two
-;;
-.texheightcheck:
-        mov     edx,[dc_texheight]
-        sub     edx,1                   ;; edx = heightmask
-        test    edx,[dc_texheight]
-        jnz     .notpowertwo
-
-        test    ecx,0x01                ;; Test for odd no. pixels
-        jnz     .odd
-
-;;
-;; Texture height is a power of two, so we get modular arithmetic by
-;; masking
-;;
-.powertwo:
-        mov     eax,ebp                 ;; eax = frac
-        sar     eax,FRACBITS            ;; Integer part
-        and     eax,edx                 ;; eax &= heightmask
-        movzx   eax,byte [esi + eax]    ;; eax = texel
-        add     ebp,[dc_iscale]         ;; frac += fracstep
-        cmp     al,TRANSPARENTPIXEL     ;; Is pixel transparent?
-        je      .nextpowtwoeven         ;; If so, advance.
-        movzx   eax,byte [ebx+eax]      ;; Map through colormap
-        mov	    [edi],al                ;; Write pixel
-.nextpowtwoeven:
-                                        ;; dest += vid.width
-        add     edi,[vid + viddef_s.width]
-
-.odd:
-        mov     eax,ebp                 ;; eax = frac
-        sar     eax,FRACBITS            ;; Integer part
-        and     eax,edx                 ;; eax &= heightmask
-        movzx   eax,byte [esi + eax]    ;; eax = texel
-        add     ebp,[dc_iscale]         ;; frac += fracstep
-        cmp     al,TRANSPARENTPIXEL     ;; Is pixel transparent?
-        je      .nextpowtwoodd          ;; If so, advance.
-        movzx   eax,byte [ebx+eax]      ;; Map through colormap
-        mov     [edi],al                ;; Write pixel
-.nextpowtwoodd:
-                                        ;; dest += vid.width
-        add     edi,[vid + viddef_s.width]
-
-
-        sub     ecx,2                   ;; count -= 2
-        jg      .powertwo
-
-        jmp     .done
-
-.notpowertwo:
-        add     edx,1
-        shl     edx,FRACBITS
-        test    ebp,ebp
-        jns     .notpowtwoloop
-
-.makefracpos:
-        add     ebp,edx                 ;; frac is negative; make it positive
-        js      .makefracpos
-
-.notpowtwoloop:
-        cmp     ebp,edx                 ;; Reduce mod height
-        jl      .writenonpowtwo
-        sub     ebp,edx
-        jmp     .notpowtwoloop
-
-.writenonpowtwo:
-        mov     eax,ebp                 ;; eax = frac
-        sar     eax,FRACBITS            ;; Integer part.
-        mov     bl,[esi + eax]          ;; ebx = colormap + texel
-        add     ebp,[dc_iscale]         ;; frac += fracstep
-        cmp     bl,TRANSPARENTPIXEL     ;; Is pixel transparent?
-        je      .nextnonpowtwo          ;; If so, advance.
-        movzx   eax,byte [ebx]          ;; Map through colormap
-        mov     [edi],al                ;; Write pixel
-.nextnonpowtwo:
-                                        ;; dest += vid.width
-        add     edi,[vid + viddef_s.width]
-
-        sub     ecx,1
-        jnz     .notpowtwoloop
-
-;;
-
-.done:
-        pop     ebx                     ;; restore register variables
-        pop     edi
-        pop     esi
-        pop     ebp                     ;; restore caller's stack frame pointer
-        ret
-
-;;----------------------------------------------------------------------
-;; R_DrawTranslucentColumnA_8
-;;
-;; Vertical column texture drawer, with transparency. Replaces Doom2's
-;; 'fuzz' effect, which was not so beautiful.
-;; Transparency is always impressive in some way, don't know why...
-;;----------------------------------------------------------------------
-
-cglobal R_DrawTranslucentColumn_8_ASM
-R_DrawTranslucentColumn_8_ASM:
-        push    ebp                     ;; preserve caller's stack frame pointer
-        push    esi                     ;; preserve register variables
-        push    edi
-        push    ebx
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
-        mov     ebp,[dc_yl]
-        mov     ebx,ebp
-        mov     edi,[ylookup+ebx*4]
-        mov     ebx,[dc_x]
-        add     edi,[columnofs+ebx*4]   ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
-        mov     eax,[dc_yh]
-        inc     eax
-        sub     eax,ebp                 ;; pixel count
-        mov     [pixelcount],eax        ;; save for final pixel
-        jle     near    vtdone         ;; nothing to scale
-;;
-;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
-;;
-        mov     ecx,[dc_iscale]        ;; fracstep
-        mov     eax,[centery]
-        sub     eax,ebp
-        imul    eax,ecx
-        mov     edx,[dc_texturemid]
-        sub     edx,eax
-        mov     ebx,edx
-
-        shr     ebx,16                  ;; frac int.
-        and     ebx,0x7f
-        shl     edx,16                  ;; y frac up
-
-        mov     ebp,ecx
-        shl     ebp,16                  ;; fracstep f. up
-        shr     ecx,16                  ;; fracstep i. ->cl
-        and     cl,0x7f
-        push    cx
-        mov     ecx,edx
-        pop     cx
-        mov     edx,[dc_colormap]
-        mov     esi,[dc_source]
-;;
-;; lets rock :) !
-;;
-        mov     eax,[pixelcount]
-        shr     eax,0x2
-        test    byte [pixelcount],0x3
-        mov     ch,al                   ;; quad count
-        mov     eax,[dc_transmap]
-        je      vt4quadloop
-;;
-;;  do un-even pixel
-;;
-        test    byte [pixelcount],0x1
-        je      trf2
-
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        add     ecx,ebp
-        adc     bl,cl
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     dl,[edx]
-        mov     [edi],dl
-pf:     add     edi,0x12345678
-;;
-;;  do two non-quad-aligned pixels
-;;
-trf2:    test    byte [pixelcount],0x2
-        je      trf3
-
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        add     ecx,ebp
-        adc     bl,cl
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     dl,[edx]
-        mov     [edi],dl
-pg:     add     edi,0x12345678
-
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        add     ecx,ebp
-        adc     bl,cl
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     dl,[edx]
-        mov     [edi],dl
-ph:     add     edi,0x12345678
-;;
-;;  test if there was at least 4 pixels
-;;
-trf3:   test    ch,0xff                 ;; test quad count
-        je near vtdone
-
-;;
-;; ebp : ystep frac. upper 24 bits
-;; edx : y     frac. upper 24 bits
-;; ebx : y     i.    lower 7 bits,  masked for index
-;; ecx : ch = counter, cl = y step i.
-;; eax : colormap aligned 256
-;; esi : source texture column
-;; edi : dest screen
-;;
-vt4quadloop:
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     [tystep],ebp
-pi:     add     edi,0x12345678
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-pj:     sub     edi,0x12345678
-        mov     ebp,edi
-pk:     sub     edi,0x12345678
-        jmp short inloop
-align 4
-vtquadloop:
-        add     ecx,[tystep]
-        adc     bl,cl
-q1:     add     ebp,0x23456789
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     dl,[edx]
-        mov     [edi],dl
-        mov     al,[ebp]                ;; fetch dest   : index into colormap
-inloop:
-        add     ecx,[tystep]
-        adc     bl,cl
-q2:     add     edi,0x23456789
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     dl,[edx]
-        mov     [ebp+0x0],dl
-        mov     al,[edi]                ;; fetch dest   : index into colormap
-
-        add     ecx,[tystep]
-        adc     bl,cl
-q3:     add     ebp,0x23456789
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     dl,[edx]
-        mov     [edi],dl
-        mov     al,[ebp]                ;; fetch dest   : index into colormap
-
-        add     ecx,[tystep]
-        adc     bl,cl
-q4:     add     edi,0x23456789
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     dl,[edx]
-        mov     [ebp],dl
-        mov     al,[edi]                ;; fetch dest   : index into colormap
-
-        dec     ch
-        jne     vtquadloop
-vtdone:
-        pop     ebx
-        pop     edi
-        pop     esi
-        pop     ebp
-        ret
-
-;;----------------------------------------------------------------------
-;; R_DrawShadeColumn
-;;
-;;   for smoke..etc.. test.
-;;----------------------------------------------------------------------
-cglobal R_DrawShadeColumn_8_ASM
-R_DrawShadeColumn_8_ASM:
-        push    ebp                     ;; preserve caller's stack frame pointer
-        push    esi                     ;; preserve register variables
-        push    edi
-        push    ebx
-
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
-        mov     ebp,[dc_yl]
-        mov     ebx,ebp
-        mov     edi,[ylookup+ebx*4]
-        mov     ebx,[dc_x]
-        add     edi,[columnofs+ebx*4]  ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
-        mov     eax,[dc_yh]
-        inc     eax
-        sub     eax,ebp                 ;; pixel count
-        mov     [pixelcount],eax       ;; save for final pixel
-        jle near shdone                ;; nothing to scale
-;;
-;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
-;;
-        mov     ecx,[dc_iscale]        ;; fracstep
-        mov     eax,[centery]
-        sub     eax,ebp
-        imul    eax,ecx
-        mov     edx,[dc_texturemid]
-        sub     edx,eax
-        mov     ebx,edx
-        shr     ebx,16                  ;; frac int.
-        and     ebx,byte +0x7f
-        shl     edx,16                  ;; y frac up
-
-        mov     ebp,ecx
-        shl     ebp,16                  ;; fracstep f. up
-        shr     ecx,16                  ;; fracstep i. ->cl
-        and     cl,0x7f
-
-        mov     esi,[dc_source]
-;;
-;; lets rock :) !
-;;
-        mov     eax,[pixelcount]
-        mov     dh,al
-        shr     eax,2
-        mov     ch,al                   ;; quad count
-        mov     eax,[colormaps]
-        test    dh,3
-        je      sh4quadloop
-;;
-;;  do un-even pixel
-;;
-        test    dh,0x1
-        je      shf2
-
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        add     edx,ebp
-        adc     bl,cl
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     [edi],dl
-pl:     add     edi,0x12345678
-;;
-;;  do two non-quad-aligned pixels
-;;
-shf2:
-        test    dh,0x2
-        je      shf3
-
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        add     edx,ebp
-        adc     bl,cl
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     [edi],dl
-pm:     add     edi,0x12345678
-
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        add     edx,ebp
-        adc     bl,cl
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-        and     bl,0x7f
-        mov     dl,[eax]
-        mov     [edi],dl
-pn:     add     edi,0x12345678
-;;
-;;  test if there was at least 4 pixels
-;;
-shf3:
-        test    ch,0xff                 ;; test quad count
-        je near shdone
-
-;;
-;; ebp : ystep frac. upper 24 bits
-;; edx : y     frac. upper 24 bits
-;; ebx : y     i.    lower 7 bits,  masked for index
-;; ecx : ch = counter, cl = y step i.
-;; eax : colormap aligned 256
-;; esi : source texture column
-;; edi : dest screen
-;;
-sh4quadloop:
-        mov     dh,0x7f                 ;; prep mask
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     [tystep],ebp
-po:     add     edi,0x12345678
-        mov     al,[edi]                ;; fetch dest  : index into colormap
-pp:     sub     edi,0x12345678
-        mov     ebp,edi
-pq:     sub     edi,0x12345678
-        jmp short shinloop
-
-align  4
-shquadloop:
-        add     edx,[tystep]
-        adc     bl,cl
-        and     bl,dh
-q5:     add     ebp,0x12345678
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     [edi],dl
-        mov     al,[ebp]                ;; fetch dest : index into colormap
-shinloop:
-        add     edx,[tystep]
-        adc     bl,cl
-        and     bl,dh
-q6:     add     edi,0x12345678
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     [ebp],dl
-        mov     al,[edi]                ;; fetch dest : index into colormap
-
-        add     edx,[tystep]
-        adc     bl,cl
-        and     bl,dh
-q7:     add     ebp,0x12345678
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     [edi],dl
-        mov     al,[ebp]                ;; fetch dest : index into colormap
-
-        add     edx,[tystep]
-        adc     bl,cl
-        and     bl,dh
-q8:     add     edi,0x12345678
-        mov     dl,[eax]
-        mov     ah,[esi+ebx]            ;; fetch texel : colormap number
-        mov     [ebp],dl
-        mov     al,[edi]                ;; fetch dest : index into colormap
-
-        dec     ch
-        jne     shquadloop
-
-shdone:
-        pop     ebx                     ;; restore register variables
-        pop     edi
-        pop     esi
-        pop     ebp                     ;; restore caller's stack frame pointer
-        ret
-
-
-;; ========================================================================
-;;  Rasterization of the segments of a LINEAR polygne textur of manire.
-;;  It is thus a question of interpolating coordinate them at the edges of texture in
-;;  the time that the X-coordinates minx/maxx for each line.
-;;  the argument ' dir' indicates which edges of texture are Interpol?:
-;;    0:  segments associs at edge TOP? and BOTTOM? (constant TY)
-;;    1:  segments associs at the LEFT and RIGHT edge (constant TX)
-;; ========================================================================
-;;
-;;  void   rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
-;;                                   ARG1     ARG2     ARG3     ARG4      ARG5      ARG6     ARG7       ARG8
-;;
-;;  Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
-;;
-;;  Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
-;;
-;;
-;;  Uses:  extern struct rastery *_rastertab;
-;;
-
-MINX            EQU    0
-MAXX            EQU    4
-TX1             EQU    8
-TY1             EQU    12
-TX2             EQU    16
-TY2             EQU    20
-RASTERY_SIZEOF  EQU    24
-
-cglobal rasterize_segment_tex
-rasterize_segment_tex:
-        push    ebp
-        mov     ebp,esp
-
-        sub     esp,byte +0x8           ;; allocate the local variables
-
-        push    ebx
-        push    esi
-        push    edi
-        o16 mov ax,es
-        push    eax
-
-;;        #define DX       [ebp-4]
-;;        #define TD       [ebp-8]
-
-        mov     eax,[ebp+0xc]           ;; y1
-        mov     ebx,[ebp+0x14]          ;; y2
-        cmp     ebx,eax
-        je near .L_finished             ;; special (y1==y2) segment horizontal, exit!
-
-        jg near .L_rasterize_right
-
-;;rasterize_left:       ;; one rasterize a segment LEFT of the polygne
-
-        mov     ecx,eax
-        sub     ecx,ebx
-        inc     ecx                     ;; y1-y2+1
-
-        mov     eax,RASTERY_SIZEOF
-        mul     ebx                     ;; * y2
-        mov     esi,[prastertab]
-        add     esi,eax                 ;; point into rastertab[y2]
-
-        mov     eax,[ebp+0x8]           ;; ARG1
-        sub     eax,[ebp+0x10]          ;; ARG3
-        shl     eax,0x10                ;;     ((x1-x2)<<PRE) ...
-        cdq
-        idiv    ecx                     ;; dx =     ...        / (y1-y2+1)
-        mov     [ebp-0x4],eax           ;; DX
-
-        mov     eax,[ebp+0x18]          ;; ARG5
-        sub     eax,[ebp+0x1c]          ;; ARG6
-        shl     eax,0x10
-        cdq
-        idiv    ecx                     ;;      tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
-        mov     [ebp-0x8],eax           ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
-
-        mov     eax,[ebp+0x10]          ;; ARG3
-        shl     eax,0x10                ;; x = x2<<PRE
-
-        mov     ebx,[ebp+0x1c]          ;; ARG6
-        shl     ebx,0x10                ;; tx = tx2<<PRE    d0
-                                        ;; ty = ty2<<PRE    d1
-        mov     edx,[ebp+0x20]          ;; ARG7
-        shl     edx,0x10                ;; ty = ty<<PRE     d0
-                                        ;; tx = tx<<PRE     d1
-        push    ebp
-        mov     edi,[ebp-0x4]           ;; DX
-        cmp     dword [ebp+0x24],byte +0x0      ;; ARG8   direction ?
-
-        mov     ebp,[ebp-0x8]           ;; TD
-        je      .L_rleft_h_loop
-;;
-;; TY varies, TX is constant
-;;
-.L_rleft_v_loop:
-        mov     [esi+MINX],eax           ;; rastertab[y].minx = x
-          add     ebx,ebp
-        mov     [esi+TX1],edx           ;;             .tx1  = tx
-          add     eax,edi
-        mov     [esi+TY1],ebx           ;;             .ty1  = ty
-
-        ;;addl    DX, %eax        // x     += dx
-        ;;addl    TD, %ebx        // ty    += tdy
-
-        add     esi,RASTERY_SIZEOF      ;; next raster line into rastertab[]
-        dec     ecx
-        jne     .L_rleft_v_loop
-        pop     ebp
-        jmp     .L_finished
-;;
-;; TX varies, TY is constant
-;;
-.L_rleft_h_loop:
-        mov     [esi+MINX],eax           ;; rastertab[y].minx = x
-          add     eax,edi
-        mov     [esi+TX1],ebx           ;;             .tx1  = tx
-          add     ebx,ebp
-        mov     [esi+TY1],edx           ;;             .ty1  = ty
-
-        ;;addl    DX, %eax        // x     += dx
-        ;;addl    TD, %ebx        // tx    += tdx
-
-        add     esi,RASTERY_SIZEOF      ;; next raster line into rastertab[]
-        dec     ecx
-        jne     .L_rleft_h_loop
-        pop     ebp
-        jmp     .L_finished
-;;
-;; one rasterize a segment LINE of the polygne
-;;
-.L_rasterize_right:
-        mov     ecx,ebx
-        sub     ecx,eax
-        inc     ecx                     ;; y2-y1+1
-
-        mov     ebx,RASTERY_SIZEOF
-        mul     ebx                     ;;   * y1
-        mov     esi,[prastertab]
-        add     esi,eax                 ;;  point into rastertab[y1]
-
-        mov     eax,[ebp+0x10]          ;; ARG3
-        sub     eax,[ebp+0x8]           ;; ARG1
-        shl     eax,0x10                ;; ((x2-x1)<<PRE) ...
-        cdq
-        idiv    ecx                     ;;  dx =     ...        / (y2-y1+1)
-        mov     [ebp-0x4],eax           ;; DX
-
-        mov     eax,[ebp+0x1c]          ;; ARG6
-        sub     eax,[ebp+0x18]          ;; ARG5
-        shl     eax,0x10
-        cdq
-        idiv    ecx                     ;;       tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
-        mov     [ebp-0x8],eax           ;;  idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
-
-        mov     eax,[ebp+0x8]           ;; ARG1
-        shl     eax,0x10                ;; x  = x1<<PRE
-
-        mov     ebx,[ebp+0x18]          ;; ARG5
-        shl     ebx,0x10                ;; tx = tx1<<PRE    d0
-                                        ;; ty = ty1<<PRE    d1
-        mov     edx,[ebp+0x20]          ;; ARG7
-        shl     edx,0x10                ;; ty = ty<<PRE     d0
-                                        ;; tx = tx<<PRE     d1
-        push    ebp
-        mov     edi,[ebp-0x4]           ;; DX
-
-        cmp     dword [ebp+0x24], 0     ;; direction ?
-
-         mov     ebp,[ebp-0x8]          ;; TD
-        je      .L_rright_h_loop
-;;
-;; TY varies, TX is constant
-;;
-.L_rright_v_loop:
-
-        mov     [esi+MAXX],eax           ;; rastertab[y].maxx = x
-          add     ebx,ebp
-        mov     [esi+TX2],edx          ;;             .tx2  = tx
-          add     eax,edi
-        mov     [esi+TY2],ebx          ;;             .ty2  = ty
-
-        ;;addl    DX, %eax        // x     += dx
-        ;;addl    TD, %ebx        // ty    += tdy
-
-        add     esi,RASTERY_SIZEOF
-        dec     ecx
-        jne     .L_rright_v_loop
-
-        pop     ebp
-
-        jmp     short .L_finished
-;;
-;; TX varies, TY is constant
-;;
-.L_rright_h_loop:
-        mov     [esi+MAXX],eax           ;; rastertab[y].maxx = x
-          add     eax,edi
-        mov     [esi+TX2],ebx          ;;             .tx2  = tx
-          add     ebx,ebp
-        mov     [esi+TY2],edx          ;;             .ty2  = ty
-
-        ;;addl    DX, %eax        // x     += dx
-        ;;addl    TD, %ebx        // tx    += tdx
-
-        add     esi,RASTERY_SIZEOF
-        dec     ecx
-        jne     .L_rright_h_loop
-
-        pop     ebp
-
-.L_finished:
-        pop     eax
-        o16 mov es,ax
-        pop     edi
-        pop     esi
-        pop     ebx
-
-        mov     esp,ebp
-        pop     ebp
-        ret
--- a/src/tmap.s
+++ b/src/tmap.s
--- a/src/tmap_asm.s
+++ b/src/tmap_asm.s
@ -1,322 +0,0 @@
-// SONIC ROBO BLAST 2
-//-----------------------------------------------------------------------------
-// Copyright (C) 1998-2000 by DooM Legacy Team.
-// Copyright (C) 1999-2018 by Sonic Team Junior.
-//
-// This program is free software distributed under the
-// terms of the GNU General Public License, version 2.
-// See the 'LICENSE' file for more details.
-//-----------------------------------------------------------------------------
-/// \file  tmap_asm.s
-/// \brief ???
-
-//.comm _dc_colormap,4
-//.comm _dc_x,4
-//.comm _dc_yl,4
-//.comm _dc_yh,4
-//.comm _dc_iscale,4
-//.comm _dc_texturemid,4
-//.comm _dc_source,4
-//.comm _ylookup,4
-//.comm _columnofs,4
-//.comm _loopcount,4
-//.comm _pixelcount,4
-.data
-_pixelcount:
-.long 0x00000000
-_loopcount:
-.long 0x00000000
-.align 8
-_mmxcomm:
-.long 0x00000000
-.text
-
-        .align 4
-.globl _R_DrawColumn8_NOMMX
-_R_DrawColumn8_NOMMX:
-   pushl %ebp
-   pushl %esi
-   pushl %edi
-   pushl %ebx
-	movl _dc_yl,%edx
-	movl _dc_yh,%eax
-	subl %edx,%eax
-	leal 1(%eax),%ebx
-	testl %ebx,%ebx
-	jle rdc8ndone
-	movl _dc_x,%eax
-        movl _ylookup, %edi
-	movl (%edi,%edx,4),%esi
-	movl _columnofs, %edi
-	addl (%edi,%eax,4),%esi
-	movl _dc_iscale,%edi
-	movl %edx,%eax
-	imull %edi,%eax
-	movl _dc_texturemid,%ecx
-	addl %eax,%ecx
-
-	movl _dc_source,%ebp
-   xorl %edx, %edx
-   subl $0x12345678, %esi
-.globl rdc8nwidth1
-rdc8nwidth1:
-	.align 4,0x90
-rdc8nloop:
-	movl %ecx,%eax
-	shrl $16,%eax
-	addl %edi,%ecx
-	andl $127,%eax
-	addl $0x12345678,%esi
-.globl rdc8nwidth2
-rdc8nwidth2:
-	movb (%eax,%ebp),%dl
-	movl _dc_colormap,%eax
-	movb (%eax,%edx),%al
-	movb %al,(%esi)
-	decl %ebx
-	jne rdc8nloop
-rdc8ndone:
-   popl %ebx
-   popl %edi
-   popl %esi
-   popl %ebp
-   ret
-
-//
-// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
-// By ES 1998/08/01
-//
-
-.globl _R_DrawColumn_8_Pentium
-_R_DrawColumn_8_Pentium:
-	pushl %ebp
-        pushl %ebx
-	pushl %esi
-        pushl %edi
-	movl _dc_yl,%eax        // Top pixel
-	movl _dc_yh,%ebx        // Bottom pixel
-        movl _ylookup, %edi
-	movl (%edi,%ebx,4),%ecx
-	subl %eax,%ebx          // ebx=number of pixels-1
-	jl rdc8pdone            // no pixel to draw, done
-	jnz rdc8pmany
-	movl _dc_x,%edx         // Special case: only one pixel
-        movl _columnofs, %edi
-	addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
-	movl _dc_iscale,%esi
-	imull %esi,%eax
-	movl _dc_texturemid,%edi
-	addl %eax,%edi          // texture index in edi
-	movl _dc_colormap,%edx
-   	shrl $16, %edi
-   	movl _dc_source,%ebp
-	andl $127,%edi
-	movb (%edi,%ebp),%dl    // read texture pixel
-	movb (%edx),%al	        // lookup for light
-	movb %al,0(%ecx) 	// write it
-	jmp rdc8pdone		// done!
-.align 4, 0x90
-rdc8pmany:			// draw >1 pixel
-	movl _dc_x,%edx
-        movl _columnofs, %edi
-	movl (%edi,%edx,4),%edx
-	leal 0x12345678(%edx, %ecx), %edi  // edi = two pixels above bottom
-.globl rdc8pwidth5
-rdc8pwidth5:  // DeadBeef = -2*SCREENWIDTH
-        movl _dc_iscale,%edx	// edx = fracstep
-	imull %edx,%eax
-   	shll $9, %edx           // fixme: Should get 7.25 fix as input
-	movl _dc_texturemid,%ecx
-	addl %eax,%ecx          // ecx = frac
-	movl _dc_colormap,%eax  // eax = lighting/special effects LUT
-   	shll $9, %ecx
-   	movl _dc_source,%esi    // esi = source ptr
-
-	imull $0x12345678, %ebx // ebx = negative offset to pixel
-.globl rdc8pwidth6
-rdc8pwidth6:  // DeadBeef = -SCREENWIDTH
-
-// Begin the calculation of the two first pixels
-        leal (%ecx, %edx), %ebp
-	shrl $25, %ecx
-	movb (%esi, %ecx), %al
-	leal (%edx, %ebp), %ecx
-	shrl $25, %ebp
-        movb (%eax), %dl
-
-// The main loop
-rdc8ploop:
-	movb (%esi,%ebp), %al		// load 1
-        leal (%ecx, %edx), %ebp         // calc frac 3
-
-	shrl $25, %ecx                  // shift frac 2
-        movb %dl, 0x12345678(%edi, %ebx)// store 0
-.globl rdc8pwidth1
-rdc8pwidth1:  // DeadBeef = 2*SCREENWIDTH
-
-        movb (%eax), %al                // lookup 1
-
-        movb %al, 0x12345678(%edi, %ebx)// store 1
-.globl rdc8pwidth2
-rdc8pwidth2:  // DeadBeef = 3*SCREENWIDTH
-        movb (%esi, %ecx), %al          // load 2
-
-        leal (%ebp, %edx), %ecx         // calc frac 4
-
-        shrl $25, %ebp                  // shift frac 3
-        movb (%eax), %dl                // lookup 2
-
-        addl $0x12345678, %ebx          // counter
-.globl rdc8pwidth3
-rdc8pwidth3:  // DeadBeef = 2*SCREENWIDTH
-        jl rdc8ploop                    // loop
-
-// End of loop. Write extra pixel or just exit.
-        jnz rdc8pdone
-        movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
-.globl rdc8pwidth4
-rdc8pwidth4:  // DeadBeef = 2*SCREENWIDTH
-
-rdc8pdone:
-
-        popl %edi
-	popl %esi
-        popl %ebx
-	popl %ebp
-        ret
-
-//
-// MMX asm version, optimised for K6
-// By ES 1998/07/05
-//
-
-.globl _R_DrawColumn_8_K6_MMX
-_R_DrawColumn_8_K6_MMX:
-	pushl %ebp
-        pushl %ebx
-	pushl %esi
-        pushl %edi
-
-        movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
-        andl $7,%eax
-        addl $8,%eax
-        movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
-        subl %eax,%esp
-
-	movl _dc_yl,%edx        // Top pixel
-	movl _dc_yh,%ebx        // Bottom pixel
-        movl _ylookup, %edi
-	movl (%edi,%ebx,4),%ecx
-	subl %edx,%ebx         // ebx=number of pixels-1
-	jl 0x12345678            // no pixel to draw, done
-.globl rdc8moffs1
-rdc8moffs1:
-	jnz rdc8mmany
-	movl _dc_x,%eax         // Special case: only one pixel
-        movl _columnofs, %edi
-	addl (%edi,%eax,4),%ecx  // dest pixel at (%ecx)
-	movl _dc_iscale,%esi
-	imull %esi,%edx
-	movl _dc_texturemid,%edi
-	addl %edx,%edi         // texture index in edi
-	movl _dc_colormap,%edx
-   	shrl $16, %edi
-   	movl _dc_source,%ebp
-	andl $127,%edi
-	movb (%edi,%ebp),%dl  // read texture pixel
-	movb (%edx),%al	 // lookup for light
-	movb %al,0(%ecx) 	 // write it
-	jmp rdc8mdone		 // done!
-.globl rdc8moffs2
-rdc8moffs2:
-.align 4, 0x90
-rdc8mmany:			 // draw >1 pixel
-	movl _dc_x,%eax
-        movl _columnofs, %edi
-	movl (%edi,%eax,4),%eax
-	leal 0x12345678(%eax, %ecx), %esi  // esi = two pixels above bottom
-.globl rdc8mwidth3
-rdc8mwidth3:  // DeadBeef = -2*SCREENWIDTH
-        movl _dc_iscale,%ecx	 // ecx = fracstep
-	imull %ecx,%edx
-   	shll $9, %ecx           // fixme: Should get 7.25 fix as input
-	movl _dc_texturemid,%eax
-	addl %edx,%eax         // eax = frac
-	movl _dc_colormap,%edx  // edx = lighting/special effects LUT
-   	shll $9, %eax
-	leal (%ecx, %ecx), %edi
-   	movl _dc_source,%ebp    // ebp = source ptr
-	movl %edi, 0(%esp)     // Start moving frac and fracstep to MMX regs
-
-	imull $0x12345678, %ebx  // ebx = negative offset to pixel
-.globl rdc8mwidth5
-rdc8mwidth5:  // DeadBeef = -SCREENWIDTH
-
-	movl %edi, 4(%esp)
-	leal (%eax, %ecx), %edi
-	movq 0(%esp), %mm1     // fracstep:fracstep in mm1
-	movl %eax, 0(%esp)
-	shrl $25, %eax
-	movl %edi, 4(%esp)
-	movzbl (%ebp, %eax), %eax
-	movq 0(%esp), %mm0     // frac:frac in mm0
-
-	paddd %mm1, %mm0
-	shrl $25, %edi
-	movq %mm0, %mm2
-	psrld $25, %mm2         // texture index in mm2
-	paddd %mm1, %mm0
-	movq %mm2, 0(%esp)
-
-.globl rdc8mloop
-rdc8mloop:                      		// The main loop
-	movq %mm0, %mm2                    // move 4-5 to temp reg
-	movzbl (%ebp, %edi), %edi 		// read 1
-
-	psrld $25, %mm2 			// shift 4-5
-	movb (%edx,%eax), %cl 		// lookup 0
-
-	movl 0(%esp), %eax 			// load 2
-	addl $0x12345678, %ebx 		// counter
-.globl rdc8mwidth2
-rdc8mwidth2:  // DeadBeef = 2*SCREENWIDTH
-
-	movb %cl, (%esi, %ebx)		// write 0
-	movb (%edx,%edi), %ch 		// lookup 1
-
-	movb %ch, 0x12345678(%esi, %ebx) 	// write 1
-.globl rdc8mwidth1
-rdc8mwidth1:  // DeadBeef = SCREENWIDTH
-	movl 4(%esp), %edi			// load 3
-
-	paddd %mm1, %mm0 			// frac 6-7
-	movzbl (%ebp, %eax), %eax 		// lookup 2
-
-	movq %mm2, 0(%esp) 		     // store texture index 4-5
-	jl rdc8mloop
-
-	jnz rdc8mno_odd
-	movb (%edx,%eax), %cl  // write the last odd pixel
-	movb %cl, 0x12345678(%esi)
-.globl rdc8mwidth4
-rdc8mwidth4:  // DeadBeef = 2*SCREENWIDTH
-rdc8mno_odd:
-
-.globl rdc8mdone
-rdc8mdone:
-        emms
-
-        addl _mmxcomm, %esp
-        popl %edi
-	popl %esi
-        popl %ebx
-	popl %ebp
-        ret
-
-// Need some extra space to align run-time
-.globl R_DrawColumn_8_K6_MMX_end
-R_DrawColumn_8_K6_MMX_end:
-nop;nop;nop;nop;nop;nop;nop;nop;
-nop;nop;nop;nop;nop;nop;nop;nop;
-nop;nop;nop;nop;nop;nop;nop;nop;
-nop;nop;nop;nop;nop;nop;nop;
--- a/src/tmap_mmx.nas
+++ b/src/tmap_mmx.nas
@ -1,674 +0,0 @@
-;; SONIC ROBO BLAST 2
-;;-----------------------------------------------------------------------------
-;; Copyright (C) 1998-2000 by DOSDOOM.
-;; Copyright (C) 2010-2018 by Sonic Team Junior.
-;;
-;; This program is free software distributed under the
-;; terms of the GNU General Public License, version 2.
-;; See the 'LICENSE' file for more details.
-;;-----------------------------------------------------------------------------
-;; FILE:
-;;      tmap_mmx.nas
-;; DESCRIPTION:
-;;      Assembler optimised rendering code for software mode, using SIMD
-;;      instructions.
-;;      Draw wall columns.
-
-
-[BITS 32]
-
-%define FRACBITS 16
-%define TRANSPARENTPIXEL 247
-
-%ifdef LINUX
-%macro cextern 1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-[global %1]
-%endmacro
-
-%else
-%macro cextern 1
-%define %1 _%1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-%define %1 _%1
-[global %1]
-%endmacro
-
-%endif
-
-
-; The viddef_s structure. We only need the width field.
-struc viddef_s
-		resb 12
-.width: resb 4
-		resb 44
-endstruc
-
-
-;; externs
-;; columns
-cextern dc_colormap
-cextern dc_x
-cextern dc_yl
-cextern dc_yh
-cextern dc_iscale
-cextern dc_texturemid
-cextern dc_texheight
-cextern dc_source
-cextern dc_hires
-cextern centery
-cextern centeryfrac
-cextern dc_transmap
-
-cextern R_DrawColumn_8_ASM
-cextern R_Draw2sMultiPatchColumn_8_ASM
-
-;; spans
-cextern nflatshiftup
-cextern nflatxshift
-cextern nflatyshift
-cextern nflatmask
-cextern ds_xfrac
-cextern ds_yfrac
-cextern ds_xstep
-cextern ds_ystep
-cextern ds_x1
-cextern ds_x2
-cextern ds_y
-cextern ds_source
-cextern ds_colormap
-
-cextern ylookup
-cextern columnofs
-cextern vid
-
-[SECTION .data]
-
-nflatmask64		dq		0
-
-
-[SECTION .text]
-
-;;----------------------------------------------------------------------
-;;
-;; R_DrawColumn : 8bpp column drawer
-;;
-;; MMX column drawer.
-;;
-;;----------------------------------------------------------------------
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = accumulator
-;; esi = source
-;; edi = dest
-;; ebp = vid.width
-;; mm0 = accumulator
-;; mm1 = heightmask, twice
-;; mm2 = 2 * fracstep, twice
-;; mm3 = pair of consecutive fracs
-;;----------------------------------------------------------------------
-
-
-cglobal R_DrawColumn_8_MMX
-R_DrawColumn_8_MMX:
-		push		ebp						;; preserve caller's stack frame pointer
-		push		esi						;; preserve register variables
-		push		edi
-		push		ebx
-
-;;
-;; Our algorithm requires that the texture height be a power of two.
-;; If not, fall back to the non-MMX drawer.
-;;
-.texheightcheck:
-		mov			edx, [dc_texheight]
-		sub			edx, 1					;; edx = heightmask
-		test		edx, [dc_texheight]
-		jnz			near .usenonMMX
-
-		mov			ebp, edx				;; Keep a copy of heightmask in a
-											;; GPR for the time being.
-
-;;
-;; Fill mm1 with heightmask
-;;
-		movd		mm1, edx				;; low dword = heightmask
-		punpckldq	mm1, mm1				;; copy low dword to high dword
-
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
-		mov			eax, [dc_yl]
-		mov			edi, [ylookup+eax*4]
-		mov			ebx, [dc_x]
-		add			edi, [columnofs+ebx*4]	;; edi = dest
-
-
-;;
-;; pixelcount = yh - yl + 1
-;;
-		mov			ecx, [dc_yh]
-		add			ecx, 1
-		sub			ecx, eax				;; pixel count
-		jle			near .done				;; nothing to scale
-
-;;
-;; fracstep = dc_iscale;
-;;
-		movd		mm2, [dc_iscale]		;; fracstep in low dword
-		punpckldq	mm2, mm2				;; copy to high dword
-
-		mov			ebx, [dc_colormap]
-		mov			esi, [dc_source]
-
-;;
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
-											;; eax == dc_yl already
-		shl			eax, FRACBITS
-		sub			eax, [centeryfrac]
-		imul		dword [dc_iscale]
-		shrd		eax, edx, FRACBITS
-		add			eax, [dc_texturemid]
-
-;;
-;; if (dc_hires) frac = 0;
-;;
-		test		byte [dc_hires], 0x01
-		jz			.mod2
-		xor			eax, eax
-
-
-;;
-;; Do mod-2 pixel.
-;;
-.mod2:
-		test		ecx, 1
-		jz			.pairprepare
-		mov			edx, eax				;; edx = frac
-		add			eax, [dc_iscale]		;; eax += fracstep
-		sar			edx, FRACBITS
-		and			edx, ebp				;; edx &= heightmask
-		movzx		edx, byte [esi + edx]
-		movzx		edx, byte [ebx + edx]
-		mov			[edi], dl
-
-		add			edi, [vid + viddef_s.width]
-		sub			ecx, 1
-		jz			.done
-
-.pairprepare:
-;;
-;; Prepare for the main loop.
-;;
-		movd		mm3, eax				;; Low dword = frac
-		movq		mm4, mm3				;; Copy to intermediate register
-		paddd		mm4, mm2				;; dwords of mm4 += fracstep
-		punpckldq	mm3, mm4				;; Low dword = first frac, high = second
-		pslld		mm2, 1					;; fracstep *= 2
-
-;;
-;; ebp = vid.width
-;;
-		mov			ebp, [vid + viddef_s.width]
-
-		align		16
-.pairloop:
-		movq		mm0, mm3				;; 3B 1u.
-		psrad		mm0, FRACBITS			;; 4B 1u.
-		pand		mm0, mm1				;; 3B 1u. frac &= heightmask
-		paddd		mm3, mm2				;; 3B 1u. frac += fracstep
-
-		movd		eax, mm0				;; 3B 1u. Get first frac
-;; IFETCH boundary
-		movzx		eax, byte [esi + eax]	;; 4B 1u. Texture map
-		movzx		eax, byte [ebx + eax]	;; 4B 1u. Colormap
-
-		punpckhdq	mm0, mm0				;; 3B 1(2)u. low dword = high dword
-		movd		edx, mm0				;; 3B 1u. Get second frac
-		mov			[edi], al				;; 2B 1(2)u. First pixel
-;; IFETCH boundary
-
-		movzx		edx, byte [esi + edx]	;; 4B 1u. Texture map
-		movzx		edx, byte [ebx + edx]	;; 4B 1u. Colormap
-		mov			[edi + 1*ebp], dl		;; 3B 1(2)u. Second pixel
-
-		lea			edi, [edi + 2*ebp]		;; 3B 1u. edi += 2 * vid.width
-;; IFETCH boundary
-		sub			ecx, 2					;; 3B 1u. count -= 2
-		jnz			.pairloop				;; 2B 1u. if(count != 0) goto .pairloop
-
-
-.done:
-;;
-;; Clear MMX state, or else FPU operations will go badly awry.
-;;
-		emms
-
-		pop			ebx
-		pop			edi
-		pop			esi
-		pop			ebp
-		ret
-
-.usenonMMX:
-		call		R_DrawColumn_8_ASM
-		jmp			.done
-
-
-;;----------------------------------------------------------------------
-;;
-;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
-;;                            pixels.
-;;
-;; MMX column drawer.
-;;
-;;----------------------------------------------------------------------
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = accumulator
-;; esi = source
-;; edi = dest
-;; ebp = vid.width
-;; mm0 = accumulator
-;; mm1 = heightmask, twice
-;; mm2 = 2 * fracstep, twice
-;; mm3 = pair of consecutive fracs
-;;----------------------------------------------------------------------
-
-
-cglobal R_Draw2sMultiPatchColumn_8_MMX
-R_Draw2sMultiPatchColumn_8_MMX:
-		push		ebp						;; preserve caller's stack frame pointer
-		push		esi						;; preserve register variables
-		push		edi
-		push		ebx
-
-;;
-;; Our algorithm requires that the texture height be a power of two.
-;; If not, fall back to the non-MMX drawer.
-;;
-.texheightcheck:
-		mov			edx, [dc_texheight]
-		sub			edx, 1					;; edx = heightmask
-		test		edx, [dc_texheight]
-		jnz			near .usenonMMX
-
-		mov			ebp, edx				;; Keep a copy of heightmask in a
-											;; GPR for the time being.
-
-;;
-;; Fill mm1 with heightmask
-;;
-		movd		mm1, edx				;; low dword = heightmask
-		punpckldq	mm1, mm1				;; copy low dword to high dword
-
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
-		mov			eax, [dc_yl]
-		mov			edi, [ylookup+eax*4]
-		mov			ebx, [dc_x]
-		add			edi, [columnofs+ebx*4]	;; edi = dest
-
-
-;;
-;; pixelcount = yh - yl + 1
-;;
-		mov			ecx, [dc_yh]
-		add			ecx, 1
-		sub			ecx, eax				;; pixel count
-		jle			near .done				;; nothing to scale
-;;
-;; fracstep = dc_iscale;
-;;
-		movd		mm2, [dc_iscale]		;; fracstep in low dword
-		punpckldq	mm2, mm2				;; copy to high dword
-
-		mov			ebx, [dc_colormap]
-		mov			esi, [dc_source]
-
-;;
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
-											;; eax == dc_yl already
-		shl			eax, FRACBITS
-		sub			eax, [centeryfrac]
-		imul		dword [dc_iscale]
-		shrd		eax, edx, FRACBITS
-		add			eax, [dc_texturemid]
-
-;;
-;; if (dc_hires) frac = 0;
-;;
-		test		byte [dc_hires], 0x01
-		jz			.mod2
-		xor			eax, eax
-
-
-;;
-;; Do mod-2 pixel.
-;;
-.mod2:
-		test		ecx, 1
-		jz			.pairprepare
-		mov			edx, eax				;; edx = frac
-		add			eax, [dc_iscale]		;; eax += fracstep
-		sar			edx, FRACBITS
-		and			edx, ebp				;; edx &= heightmask
-		movzx		edx, byte [esi + edx]
-		cmp			dl, TRANSPARENTPIXEL
-		je			.nextmod2
-		movzx		edx, byte [ebx + edx]
-		mov			[edi], dl
-
-.nextmod2:
-		add			edi, [vid + viddef_s.width]
-		sub			ecx, 1
-		jz			.done
-
-.pairprepare:
-;;
-;; Prepare for the main loop.
-;;
-		movd		mm3, eax				;; Low dword = frac
-		movq		mm4, mm3				;; Copy to intermediate register
-		paddd		mm4, mm2				;; dwords of mm4 += fracstep
-		punpckldq	mm3, mm4				;; Low dword = first frac, high = second
-		pslld		mm2, 1					;; fracstep *= 2
-
-;;
-;; ebp = vid.width
-;;
-		mov			ebp, [vid + viddef_s.width]
-
-		align		16
-.pairloop:
-		movq		mm0, mm3				;; 3B 1u.
-		psrad		mm0, FRACBITS			;; 4B 1u.
-		pand		mm0, mm1				;; 3B 1u. frac &= heightmask
-		paddd		mm3, mm2				;; 3B 1u. frac += fracstep
-
-		movd		eax, mm0				;; 3B 1u. Get first frac
-;; IFETCH boundary
-		movzx		eax, byte [esi + eax]	;; 4B 1u. Texture map
-		punpckhdq	mm0, mm0				;; 3B 1(2)u. low dword = high dword
-		movd		edx, mm0				;; 3B 1u. Get second frac
-		cmp			al, TRANSPARENTPIXEL	;; 2B 1u.
-		je			.secondinpair			;; 2B 1u.
-;; IFETCH boundary
-		movzx		eax, byte [ebx + eax]	;; 4B 1u. Colormap
-		mov			[edi], al				;; 2B 1(2)u. First pixel
-
-.secondinpair:
-		movzx		edx, byte [esi + edx]	;; 4B 1u. Texture map
-		cmp			dl, TRANSPARENTPIXEL	;; 2B 1u.
-		je			.nextpair				;; 2B 1u.
-;; IFETCH boundary
-		movzx		edx, byte [ebx + edx]	;; 4B 1u. Colormap
-		mov			[edi + 1*ebp], dl		;; 3B 1(2)u. Second pixel
-
-.nextpair:
-		lea			edi, [edi + 2*ebp]		;; 3B 1u. edi += 2 * vid.width
-		sub			ecx, 2					;; 3B 1u. count -= 2
-		jnz			.pairloop				;; 2B 1u. if(count != 0) goto .pairloop
-
-
-.done:
-;;
-;; Clear MMX state, or else FPU operations will go badly awry.
-;;
-		emms
-
-		pop			ebx
-		pop			edi
-		pop			esi
-		pop			ebp
-		ret
-
-.usenonMMX:
-		call		R_Draw2sMultiPatchColumn_8_ASM
-		jmp			.done
-
-
-;;----------------------------------------------------------------------
-;;
-;; R_DrawSpan : 8bpp span drawer
-;;
-;; MMX span drawer.
-;;
-;;----------------------------------------------------------------------
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = accumulator
-;; esi = source
-;; edi = dest
-;; ebp = two pixels
-;; mm0 = accumulator
-;; mm1 = xposition
-;; mm2 = yposition
-;; mm3 = 2 * xstep
-;; mm4 = 2 * ystep
-;; mm5 = nflatxshift
-;; mm6 = nflatyshift
-;; mm7 = accumulator
-;;----------------------------------------------------------------------
-
-cglobal R_DrawSpan_8_MMX
-R_DrawSpan_8_MMX:
-		push		ebp						;; preserve caller's stack frame pointer
-		push		esi						;; preserve register variables
-		push		edi
-		push		ebx
-
-;;
-;; esi = ds_source
-;; ebx = ds_colormap
-;;
-		mov			esi, [ds_source]
-		mov			ebx, [ds_colormap]
-
-;;
-;; edi = ylookup[ds_y] + columnofs[ds_x1]
-;;
-		mov			eax, [ds_y]
-		mov			edi, [ylookup + eax*4]
-		mov			edx, [ds_x1]
-		add			edi, [columnofs + edx*4]
-
-;;
-;; ecx = ds_x2 - ds_x1 + 1
-;;
-		mov			ecx, [ds_x2]
-		sub			ecx, edx
-		add			ecx, 1
-
-;;
-;; Needed for fracs and steps
-;;
-		movd		mm7, [nflatshiftup]
-
-;;
-;; mm3 = xstep
-;;
-		movd		mm3, [ds_xstep]
-		pslld		mm3, mm7
-		punpckldq	mm3, mm3
-
-;;
-;; mm4 = ystep
-;;
-		movd		mm4, [ds_ystep]
-		pslld		mm4, mm7
-		punpckldq	mm4, mm4
-
-;;
-;; mm1 = pair of consecutive xpositions
-;;
-		movd		mm1, [ds_xfrac]
-		pslld		mm1, mm7
-		movq		mm6, mm1
-		paddd		mm6, mm3
-		punpckldq	mm1, mm6
-
-;;
-;; mm2 = pair of consecutive ypositions
-;;
-		movd		mm2, [ds_yfrac]
-		pslld		mm2, mm7
-		movq		mm6, mm2
-		paddd		mm6, mm4
-		punpckldq	mm2, mm6
-
-;;
-;; mm5 = nflatxshift
-;; mm6 = nflatyshift
-;;
-		movd		mm5, [nflatxshift]
-		movd		mm6, [nflatyshift]
-
-;;
-;; Mask is in memory due to lack of registers.
-;;
-		mov			eax, [nflatmask]
-		mov			[nflatmask64], eax
-		mov			[nflatmask64 + 4], eax
-
-
-;;
-;; Go until we reach a dword boundary.
-;;
-.unaligned:
-		test		edi, 3
-		jz			.alignedprep
-.stragglers:
-		cmp			ecx, 0
-		je			.done					;; If ecx == 0, we're finished.
-
-;;
-;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)
-;;
-		movq		mm0, mm1				;; mm0 = xposition
-		movq		mm7, mm2				;; mm7 = yposition
-		paddd		mm1, mm3				;; xposition += xstep (once!)
-		paddd		mm2, mm4				;; yposition += ystep (once!)
-		psrld		mm0, mm5				;; shift
-		psrld		mm7, mm6				;; shift
-		pand		mm7, [nflatmask64]		;; mask
-		por			mm0, mm7				;; or x and y together
-
-		movd		eax, mm0				;; eax = index of first pixel
-		movzx		eax, byte [esi + eax]	;; al = source[eax]
-		movzx		eax, byte [ebx + eax]	;; al = colormap[al]
-
-		mov			[edi], al
-		add			edi, 1
-
-		sub			ecx, 1
-		jmp			.unaligned
-
-
-.alignedprep:
-;;
-;; We can double the steps now.
-;;
-		pslld		mm3, 1
-		pslld		mm4, 1
-
-
-;;
-;; Generate chunks of four pixels.
-;;
-.alignedloop:
-
-;;
-;; Make sure we have at least four pixels.
-;;
-		cmp			ecx, 4
-		jl			.prestragglers
-
-;;
-;; First two pixels.
-;;
-		movq		mm0, mm1				;; mm0 = xposition
-		movq		mm7, mm2				;; mm7 = yposition
-		paddd		mm1, mm3				;; xposition += xstep
-		paddd		mm2, mm4				;; yposition += ystep
-		psrld		mm0, mm5				;; shift
-		psrld		mm7, mm6				;; shift
-		pand		mm7, [nflatmask64]		;; mask
-		por			mm0, mm7				;; or x and y together
-
-		movd		eax, mm0				;; eax = index of first pixel
-		movzx		eax, byte [esi + eax]	;; al = source[eax]
-		movzx		ebp, byte [ebx + eax]	;; ebp = colormap[al]
-
-		punpckhdq	mm0, mm0				;; both dwords = high dword
-		movd		eax, mm0				;; eax = index of second pixel
-		movzx		eax, byte [esi + eax]	;; al = source[eax]
-		movzx		eax, byte [ebx + eax]	;; al = colormap[al]
-		shl			eax, 8					;; get pixel in right byte
-		or			ebp, eax				;; put pixel in ebp
-
-;;
-;; Next two pixels.
-;;
-		movq		mm0, mm1				;; mm0 = xposition
-		movq		mm7, mm2				;; mm7 = yposition
-		paddd		mm1, mm3				;; xposition += xstep
-		paddd		mm2, mm4				;; yposition += ystep
-		psrld		mm0, mm5				;; shift
-		psrld		mm7, mm6				;; shift
-		pand		mm7, [nflatmask64]		;; mask
-		por			mm0, mm7				;; or x and y together
-
-		movd		eax, mm0				;; eax = index of third pixel
-		movzx		eax, byte [esi + eax]	;; al = source[eax]
-		movzx		eax, byte [ebx + eax]	;; al = colormap[al]
-		shl			eax, 16					;; get pixel in right byte
-		or			ebp, eax				;; put pixel in ebp
-
-		punpckhdq	mm0, mm0				;; both dwords = high dword
-		movd		eax, mm0				;; eax = index of second pixel
-		movzx		eax, byte [esi + eax]	;; al = source[eax]
-		movzx		eax, byte [ebx + eax]	;; al = colormap[al]
-		shl			eax, 24					;; get pixel in right byte
-		or			ebp, eax				;; put pixel in ebp
-
-;;
-;; Write pixels.
-;;
-		mov			[edi], ebp
-		add			edi, 4
-
-		sub			ecx, 4
-		jmp			.alignedloop
-
-.prestragglers:
-;;
-;; Back to one step at a time.
-;;
-		psrad		mm3, 1
-		psrad		mm4, 1
-		jmp			.stragglers
-
-.done:
-;;
-;; Clear MMX state, or else FPU operations will go badly awry.
-;;
-		emms
-
-		pop			ebx
-		pop			edi
-		pop			esi
-		pop			ebp
-		ret
--- a/src/tmap_vc.nas
+++ b/src/tmap_vc.nas
@ -1,48 +0,0 @@
-;; SONIC ROBO BLAST 2
-;;-----------------------------------------------------------------------------
-;; Copyright (C) 1998-2000 by DooM Legacy Team.
-;; Copyright (C) 1999-2018 by Sonic Team Junior.
-;;
-;; This program is free software distributed under the
-;; terms of the GNU General Public License, version 2.
-;; See the 'LICENSE' file for more details.
-;;-----------------------------------------------------------------------------
-;; FILE:
-;;      tmap_vc.nas
-;; DESCRIPTION:
-;;      Assembler optimised math code for Visual C++.
-
-
-[BITS 32]
-
-%macro cglobal 1
-%define %1 _%1
-[global %1]
-%endmacro
-
-[SECTION .text write]
-
-;----------------------------------------------------------------------------
-;fixed_t FixedMul (fixed_t a, fixed_t b)
-;----------------------------------------------------------------------------
-cglobal FixedMul
-;       align   16
-FixedMul:
-        mov     eax,[esp+4]
-        imul    dword [esp+8]
-        shrd    eax,edx,16
-        ret
-
-;----------------------------------------------------------------------------
-;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
-;----------------------------------------------------------------------------
-cglobal FixedDiv2
-;       align   16
-FixedDiv2:
-        mov     eax,[esp+4]
-        mov     edx,eax                 ;; these two instructions allow the next
-        sar     edx,31                  ;; two to pair, on the Pentium processor.
-        shld    edx,eax,16
-        sal     eax,16
-        idiv    dword [esp+8]
-        ret
--- a/src/v_video.c
+++ b/src/v_video.c
@ -266,12 +266,6 @@ static void CV_Gammaxxx_ONChange(void)
 #endif


-#if defined (__GNUC__) && defined (__i386__) && !defined (NOASM) && !defined (__APPLE__) && !defined (NORUSEASM)
-void VID_BlitLinearScreen_ASM(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
-	size_t destrowbytes);
-#define HAVE_VIDCOPY
-#endif
-
 static void CV_constextsize_OnChange(void)
 {
 	con_recalc = true;
@ -284,9 +278,6 @@ static void CV_constextsize_OnChange(void)
 void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
 	size_t destrowbytes)
 {
-#ifdef HAVE_VIDCOPY
-    VID_BlitLinearScreen_ASM(srcptr,destptr,width,height,srcrowbytes,destrowbytes);
-#else
 	if ((srcrowbytes == destrowbytes) && (srcrowbytes == (size_t)width))
 		M_Memcpy(destptr, srcptr, srcrowbytes * height);
 	else
@ -299,7 +290,6 @@ void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT3
 			srcptr += srcrowbytes;
 		}
 	}
-#endif
 }

 static UINT8 hudplusalpha[11]  = { 10,  8,  6,  4,  2,  0,  0,  0,  0,  0,  0};
--- a/src/vid_copy.s
+++ b/src/vid_copy.s
@ -1,61 +0,0 @@
-// SONIC ROBO BLAST 2
-//-----------------------------------------------------------------------------
-// Copyright (C) 1998-2000 by DooM Legacy Team.
-// Copyright (C) 1999-2018 by Sonic Team Junior.
-//
-// This program is free software distributed under the
-// terms of the GNU General Public License, version 2.
-// See the 'LICENSE' file for more details.
-//-----------------------------------------------------------------------------
-/// \file  vid_copy.s
-/// \brief code for updating the linear frame buffer screen.
-
-#include "asm_defs.inc"           // structures, must match the C structures!
-
-// DJGPPv2 is as fast as this one, but then someone may compile with a less
-// good version of DJGPP than mine, so this little asm will do the trick!
-
-#define srcptr          4+16
-#define destptr         8+16
-#define width           12+16
-#define height          16+16
-#define srcrowbytes     20+16
-#define destrowbytes    24+16
-
-// VID_BlitLinearScreen( src, dest, width, height, srcwidth, destwidth );
-//         width is given as BYTES
-
-#ifdef __i386__
-
-.globl C(VID_BlitLinearScreen_ASM)
-C(VID_BlitLinearScreen_ASM):
-    pushl   %ebp                // preserve caller's stack frame
-    pushl   %edi
-    pushl   %esi                // preserve register variables
-    pushl   %ebx
-
-    cld
-    movl    srcptr(%esp),%esi
-    movl    destptr(%esp),%edi
-    movl    width(%esp),%ebx
-    movl    srcrowbytes(%esp),%eax
-    subl    %ebx,%eax
-    movl    destrowbytes(%esp),%edx
-    subl    %ebx,%edx
-    shrl    $2,%ebx
-    movl    height(%esp),%ebp
-LLRowLoop:
-    movl    %ebx,%ecx
-    rep/movsl   (%esi),(%edi)
-    addl    %eax,%esi
-    addl    %edx,%edi
-    decl    %ebp
-    jnz     LLRowLoop
-
-    popl    %ebx                // restore register variables
-    popl    %esi
-    popl    %edi
-    popl    %ebp                // restore the caller's stack frame
-
-    ret
-#endif
--- a/src/win32/Makefile.cfg
+++ b/src/win32/Makefile.cfg
@ -20,10 +20,6 @@ else
 	SDL_LDFLAGS?=-L../libs/SDL2/i686-w64-mingw32/lib -L../libs/SDL2_mixer/i686-w64-mingw32/lib -lmingw32 -lSDL2main -lSDL2 -mwindows
 endif

-ifndef NOASM
-	USEASM=1
-endif
-
 ifndef NONET
 ifndef MINGW64 #miniupnc is broken with MINGW64
 	HAVE_MINIUPNPC=1
--- a/tools/anglechk.c
+++ b/tools/anglechk.c
@ -22,7 +22,6 @@
 #ifdef _MSC_VER
 #include <assert.h>
 #endif
-#define NOASM
 #include "../src/tables.h"
 #define NO_M
 #include "../src/m_fixed.c"