For ZDBSP:

- Fixed the OrgSectorMap generation in FLevel::RemoveExtraSectors().
- Added a version of ClassifyLine compiled with SSE (but not SSE2) optimization
  for people with Pentium 3/Athlon XPs to use.
- Added ClassifyLine backpatching for the GCC Windows build. The first time a
  function calls ClassifyLine, it has to check which version to use and jump
  to the appropriate one. After that, it calls the desired one straight away.

SVN r227 (trunk)
This commit is contained in:
Randy Heit 2006-06-26 20:39:57 +00:00
parent 50b17f8af6
commit c51e49f02f
11 changed files with 243 additions and 74 deletions

View file

@ -1,4 +1,7 @@
CFLAGS = -Wall -fomit-frame-pointer -Izlib -pipe -ffast-math -MMD
CC = gcc
CXX = g++
CFLAGS = -Wall -Izlib -pipe -ffast-math -MMD
# Optimization flags
CFLAGS += -O3 -fomit-frame-pointer -DNDEBUG
@ -37,22 +40,31 @@ ifeq ($(strip),1)
LDFLAGS += -s
endif
# To use SSE2 math for everything, pass sse=1 to make.
ifeq ($(sse),1)
CFLAGS += -msse -msse2 -mfpmath=sse
# To compile without support for backpatching ClassifyLine calls, pass nobackpatch=1 to make.
ifeq ($(nobackpatch),1)
CFLAGS += -DDISABLE_BACKPATCH
endif
CC = gcc
CXX = g++
CXXFLAGS = $(CFLAGS)
# To use SSE2 math for everything, pass sse=1 to make.
ifeq ($(sse),1)
CFLAGS += -msse2 -mfpmath=sse
endif
OBJS = main.o getopt.o getopt1.o blockmapbuilder.o processor.o view.o wad.o \
nodebuild.o nodebuild_events.o nodebuild_extract.o nodebuild_gl.o \
nodebuild_utility.o nodebuild_classify_sse2.o nodebuild_classify_nosse2.o \
nodebuild_utility.o nodebuild_classify_nosse2.o \
zlib/adler32.o zlib/compress.o zlib/crc32.o zlib/deflate.o zlib/trees.o \
zlib/zutil.o
# To compile without any SSE support, pass nosse=1 to make.
ifeq ($(nosse),1)
CFLAGS += -DDISABLE_SSE
else
OBJS += nodebuild_classify_sse1.o nodebuild_classify_sse2.o
endif
CXXFLAGS = $(CFLAGS)
ifeq (Windows_NT,$(OS))
OBJS += resource.o
endif
@ -69,10 +81,13 @@ profile-use:
$(MAKE) all CXXFLAGS="$(CXXFLAGS) -fprofile-use"
$(EXE): $(OBJS)
$(CCDV) $(CXX) -o $(EXE) $(OBJS) $(LDFLAGS)
$(CXX) -o $(EXE) $(OBJS) $(LDFLAGS)
nodebuild_classify_sse2.o: nodebuild_classify_sse2.cpp nodebuild.h
$(CXX) $(CXXFLAGS) -msse2 -mfpmath=sse -c -o $@ $<
$(CXX) $(CXXFLAGS) -msse -msse2 -march=i686 -mfpmath=sse -c -o $@ $<
nodebuild_classify_sse1.o: nodebuild_classify_sse1.cpp nodebuild.h
$(CXX) $(CXXFLAGS) -msse -march=i686 -mfpmath=sse -c -o $@ $<
resource.o: resource.rc
windres -o $@ -i $<

View file

@ -78,7 +78,10 @@ static void ParseArgs (int argc, char **argv);
static void ShowUsage ();
static void ShowVersion ();
static bool CheckInOutNames ();
static void VerifySSE2 ();
#ifndef DISABLE_SSE
static void CheckSSE ();
#endif
// EXTERNAL DATA DECLARATIONS ----------------------------------------------
@ -107,7 +110,7 @@ bool CompressNodes = false;
bool CompressGLNodes = false;
bool GLOnly = false;
bool V5GLNodes = false;
bool HaveSSE2 = true;
bool HaveSSE1, HaveSSE2;
// PRIVATE DATA DEFINITIONS ------------------------------------------------
@ -140,7 +143,7 @@ static option long_opts[] =
{"gl-only", no_argument, 0, 'x'},
{"gl-v5", no_argument, 0, '5'},
{"no-sse", no_argument, 0, 1002},
{"no-sse2", no_argument, 0, 1002},
{"no-sse2", no_argument, 0, 1003},
{0,0,0,0}
};
@ -152,8 +155,15 @@ int main (int argc, char **argv)
{
bool fixSame = false;
#ifdef DISABLE_SSE
HaveSSE1 = HaveSSE2 = false;
#else
HaveSSE1 = HaveSSE2 = true;
#endif
ParseArgs (argc, argv);
VerifySSE2 ();
#ifndef DISABLE_SSE
CheckSSE ();
#endif
if (InName == NULL)
{
@ -218,7 +228,7 @@ int main (int argc, char **argv)
START_COUNTER(t2a, t2b, t2c)
FProcessor builder (inwad, lump);
builder.Write (outwad);
END_COUNTER(t2a, t2b, t2c, " %g seconds.\n")
END_COUNTER(t2a, t2b, t2c, " %.3f seconds.\n")
lump = inwad.LumpAfterMap (lump);
}
@ -256,7 +266,7 @@ int main (int argc, char **argv)
}
}
END_COUNTER(t1a, t1b, t1c, "\nTotal time: %g seconds.\n")
END_COUNTER(t1a, t1b, t1c, "\nTotal time: %.3f seconds.\n")
}
catch (std::runtime_error msg)
{
@ -273,11 +283,13 @@ int main (int argc, char **argv)
printf ("%s\n", msg.what());
return 20;
}
#ifndef _DEBUG
catch (...)
{
printf ("Unhandled exception. ZDBSP cannot continue.\n");
return 20;
}
#endif
return 0;
}
@ -390,7 +402,11 @@ static void ParseArgs (int argc, char **argv)
ShowVersion ();
exit (0);
break;
case 1002: // Disable SSE2 ClassifyLine routine
case 1002: // Disable SSE/SSE2 ClassifyLine routine
HaveSSE1 = false;
HaveSSE2 = false;
break;
case 1003: // Disable only SSE2 ClassifyLine routine
HaveSSE2 = false;
break;
case 1000:
@ -524,33 +540,37 @@ static bool CheckInOutNames ()
//==========================================================================
//
// VerifySSE2
// CheckSSE
//
// Ensure that if HaveSSE2 is set, that we actually do have SSE2.
// Checks if the processor supports SSE or SSE2.
//
//==========================================================================
static void VerifySSE2 ()
#ifndef DISABLE_SSE
static void CheckSSE ()
{
#ifdef __SSE2__
// If we compiled with SSE2 support enabled for everything, then
// obviously it's available, or the program won't get very far.
return;
#endif
#if defined(_MSC_VER) && defined(_M_X64)
#endif
if (!HaveSSE2 && !HaveSSE1)
{
return;
}
bool forcenosse1 = !HaveSSE1;
bool forcenosse2 = !HaveSSE2;
HaveSSE1 = false;
HaveSSE2 = false;
#if defined(_MSC_VER)
#ifdef _M_X64
// Processors implementing AMD64 are required to support SSE2.
return;
#else
if (!HaveSSE2)
{
return;
}
HaveSSE2 = false;
__asm
{
pushfd // save EFLAGS
@ -564,10 +584,12 @@ static void VerifySSE2 ()
cmp eax,edx // see if bit 21 has changed
jz noid // if no change, then no CPUID
// Check the feature flag for SSE2
// Check the feature flag for SSE/SSE2
mov eax,1
cpuid
test edx,(1<<26)
test edx,(1<<25) // Check for SSE
setnz HaveSSE1
test edx,(1<<26) // Check for SSE2
setnz HaveSSE2
noid:
}
@ -576,11 +598,6 @@ noid:
#elif defined(__GNUC__)
// Same as above, but for GCC
if (!HaveSSE2)
{
return;
}
HaveSSE2 = false;
asm volatile
("pushfl\n\t"
"popl %%eax\n\t"
@ -594,16 +611,25 @@ noid:
"jz noid\n\t"
"mov $1,%%eax\n\t"
"cpuid\n\t"
"test $(1<<26),%%edx\n\t"
"test $(1<<25),%%edx\n\t"
"setneb %0\n"
"test $(1<<26),%%edx\n\t"
"setneb %1\n"
"noid:"
:"=m" (HaveSSE2)::"eax","ebx","ecx","edx");
:"=m" (HaveSSE1),"=m" (HaveSSE2)::"eax","ebx","ecx","edx");
#else
// Can't compile a check, so assume SSE2 is not present.
HaveSSE2 = false;
#endif
if (forcenosse1)
{
HaveSSE1 = false;
}
if (forcenosse2)
{
HaveSSE2 = false;
}
}
#endif
//==========================================================================
//

View file

@ -40,12 +40,12 @@
FNodeBuilder::FNodeBuilder (FLevel &level,
TArray<FPolyStart> &polyspots, TArray<FPolyStart> &anchors,
const char *name, bool makeGLnodes, bool enableSSE2)
const char *name, bool makeGLnodes, BYTE sselevel)
: Level(level), SegsStuffed(0), MapName(name)
{
VertexMap = new FVertexMap (*this, Level.MinX, Level.MinY, Level.MaxX, Level.MaxY);
GLNodes = makeGLnodes;
EnableSSE2 = enableSSE2;
SSELevel = sselevel;
FindUsedVertices (Level.Vertices, Level.NumVertices);
MakeSegsFromSides ();
FindPolyContainers (polyspots, anchors);
@ -1019,3 +1019,54 @@ void FNodeBuilder::PrintSet (int l, DWORD set)
}
Printf ("*\n");
}
#if defined(_WIN32) && !defined(__SSE2__) && !defined(DISABLE_SSE) && !defined(DISABLE_BACKPATCH) && !defined(_M_X64) && defined(__GNUC__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
int FNodeBuilder::ClassifyLineBackpatch (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2)
{
// Select the routine based on SSELevel and patch the caller so that
// they call that routine directly next time instead of going through here.
int *calleroffset = (int *)__builtin_return_address(0) - 1;
int diff;
int (*func)(FNodeBuilder *, node_t &, const FNodeBuilder::FPrivSeg *, int &, int &);
DWORD oldprotect;
// printf ("Patching for SSE %d\n", SSELevel);
// I wasn't sure how to calculate the difference between the function addresses with C++
// (or if it's even possible), so here's some asm to do it instead:
if (SSELevel == 2)
{
__asm (
"movl $__ZN12FNodeBuilder16ClassifyLineSSE2ER6node_tPKNS_8FPrivSegERiS5_,%1\n\t"
"movl $__ZN12FNodeBuilder16ClassifyLineSSE2ER6node_tPKNS_8FPrivSegERiS5_-__ZN12FNodeBuilder21ClassifyLineBackpatchER6node_tPKNS_8FPrivSegERiS5_,%0\n\t"
: "=r" (diff), "=r" (func));
}
else if (SSELevel == 1)
{
__asm (
"movl $__ZN12FNodeBuilder16ClassifyLineSSE1ER6node_tPKNS_8FPrivSegERiS5_,%1\n\t"
"movl $__ZN12FNodeBuilder16ClassifyLineSSE1ER6node_tPKNS_8FPrivSegERiS5_-__ZN12FNodeBuilder21ClassifyLineBackpatchER6node_tPKNS_8FPrivSegERiS5_,%0\n\t"
: "=r" (diff), "=r" (func));
}
else
{
__asm (
"movl $__ZN12FNodeBuilder13ClassifyLine2ER6node_tPKNS_8FPrivSegERiS5_,%1\n\t"
"movl $__ZN12FNodeBuilder13ClassifyLine2ER6node_tPKNS_8FPrivSegERiS5_-__ZN12FNodeBuilder21ClassifyLineBackpatchER6node_tPKNS_8FPrivSegERiS5_,%0\n\t"
: "=r" (diff), "=r" (func));
}
// Patch the caller.
if (VirtualProtect (calleroffset, 4, PAGE_EXECUTE_READWRITE, &oldprotect))
{
*calleroffset += diff;
VirtualProtect (calleroffset, 4, oldprotect, &oldprotect);
}
// And return by calling the real function.
return func (this, node, seg, sidev1, sidev2);
}
#endif

View file

@ -137,7 +137,7 @@ public:
FNodeBuilder (FLevel &level,
TArray<FPolyStart> &polyspots, TArray<FPolyStart> &anchors,
const char *name, bool makeGLnodes, bool enableSSE2);
const char *name, bool makeGLnodes, BYTE sselevel);
~FNodeBuilder ();
void GetVertices (WideVertex *&verts, int &count);
@ -177,7 +177,7 @@ private:
DWORD HackMate; // Seg to use in front of hack seg
FLevel &Level;
bool GLNodes;
bool EnableSSE2;
int SSELevel;
// Progress meter stuff
int SegsStuffed;
@ -211,8 +211,15 @@ private:
inline int ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
int ClassifyLine2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
#ifndef DISABLE_SSE
int ClassifyLineSSE1 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
int ClassifyLineSSE2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
#if defined(_WIN32) && defined(__GNUC__) && !defined(DISABLE_BACKPATCH)
int ClassifyLineBackpatch (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2) __attribute__((noinline));
#endif
#endif
void FixSplitSharers ();
double AddIntersection (const node_t &node, int vertex);
void AddMinisegs (const node_t &node, DWORD splitseg, DWORD &fset, DWORD &rset);
@ -271,17 +278,27 @@ inline int FNodeBuilder::PointOnSide (int x, int y, int x1, int y1, int dx, int
inline int FNodeBuilder::ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2)
{
#ifdef __SSE2__
#ifdef DISABLE_SSE
return ClassifyLine2 (node, seg, sidev1, sidev2);
#else
#if defined(__SSE2__) || defined(_M_IX64)
// If compiling with SSE2 support everywhere, just use the SSE2 version.
return ClassifyLineSSE2 (node, seg, sidev1, sidev2);
#elif defined(_MSC_VER) && _MSC_VER < 1300
// VC 6 does not support SSE2 optimizations.
// VC 6 does not support SSE optimizations.
return ClassifyLine2 (node, seg, sidev1, sidev2);
#else
// Select the routine based on our flag.
if (EnableSSE2)
#if defined(_WIN32) && defined(__GNUC__) && !defined(DISABLE_BACKPATCH)
return ClassifyLineBackpatch (node, seg, sidev1, sidev2);
#else
if (SSELevel == 2)
return ClassifyLineSSE2 (node, seg, sidev1, sidev2);
else if (SSELevel == 1)
return ClassifyLineSSE1 (node, seg, sidev1, sidev2);
else
return ClassifyLine2 (node, seg, sidev1, sidev2);
#endif
#endif
#endif
}

View file

@ -18,6 +18,8 @@
*/
#ifndef DISABLE_SSE
#include "zdbsp.h"
#include "nodebuild.h"
@ -161,3 +163,5 @@ int FNodeBuilder::ClassifyLineSSE2 (node_t &node, const FPrivSeg *seg, int &side
}
return -1;
}
#endif

View file

@ -382,9 +382,12 @@ void FLevel::RemoveExtraSectors ()
// Make a reverse map for fixing reject lumps
OrgSectorMap = new WORD[newNumSectors];
for (i = 0; i < NumSectors; ++i)
{
if (remap[i] != NO_INDEX)
{
OrgSectorMap[remap[i]] = i;
}
}
NumSectors = newNumSectors;
}
@ -493,7 +496,21 @@ void FProcessor::Write (FWadWriter &out)
try
{
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), BuildGLNodes, HaveSSE2);
int ssetype;
if (HaveSSE2)
{
ssetype = 2;
}
else if (HaveSSE1)
{
ssetype = 1;
}
else
{
ssetype = 0;
}
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), BuildGLNodes, ssetype);
if (builder == NULL)
{
throw std::runtime_error(" Not enough memory to build nodes!");
@ -529,7 +546,7 @@ void FProcessor::Write (FWadWriter &out)
{
// Now repeat the process to obtain regular nodes
delete builder;
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), false, HaveSSE2);
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), false, ssetype);
if (builder == NULL)
{
throw std::runtime_error(" Not enough memory to build regular nodes!");

View file

@ -45,7 +45,7 @@ MTL=midl.exe
# ADD BASE MTL /nologo /tlb".\Release\zdbsp.tlb" /win32
# ADD MTL /nologo /tlb".\Release\zdbsp.tlb" /win32
# ADD BASE CPP /nologo /W3 /GX /Zi /Ot /Og /Oi /Oy /Ob2 /Gy /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /GA /GF /c
# ADD CPP /nologo /MD /W3 /GX /Zi /Ot /Og /Oi /Oy /Ob2 /Gy /I "zlib" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D _MSC_VER=1200 /GA /GF /c
# ADD CPP /nologo /MD /W3 /GX /Zi /Ot /Og /Oi /Oy /Ob2 /Gy /I "zlib" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D _MSC_VER=1200 /D "DISABLE_SSE" /GA /GF /c
# SUBTRACT CPP /YX /Yc /Yu
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
@ -74,7 +74,7 @@ MTL=midl.exe
# ADD BASE MTL /nologo /tlb".\Debug\zdbsp.tlb" /win32
# ADD MTL /nologo /tlb".\Debug\zdbsp.tlb" /win32
# ADD BASE CPP /nologo /W3 /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /GZ /c
# ADD CPP /nologo /W3 /GX /ZI /Od /I "zlib" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D _MSC_VER=1200 /GZ /c
# ADD CPP /nologo /W3 /GX /ZI /Od /I "zlib" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D _MSC_VER=1200 /D "DISABLE_SSE" /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@ -187,7 +187,7 @@ DEP_CPP_NODEBU=\
# End Source File
# Begin Source File
SOURCE=.\nodebuild_classify_sse2.cpp
SOURCE=nodebuild_events.cpp
DEP_CPP_NODEBUI=\
".\doomdata.h"\
".\nodebuild.h"\
@ -198,19 +198,8 @@ DEP_CPP_NODEBUI=\
# End Source File
# Begin Source File
SOURCE=nodebuild_events.cpp
DEP_CPP_NODEBUIL=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
".\workdata.h"\
".\zdbsp.h"\
# End Source File
# Begin Source File
SOURCE=nodebuild_extract.cpp
DEP_CPP_NODEBUILD=\
DEP_CPP_NODEBUIL=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
@ -222,7 +211,7 @@ DEP_CPP_NODEBUILD=\
# Begin Source File
SOURCE=nodebuild_gl.cpp
DEP_CPP_NODEBUILD_=\
DEP_CPP_NODEBUILD=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
@ -233,7 +222,7 @@ DEP_CPP_NODEBUILD_=\
# Begin Source File
SOURCE=nodebuild_utility.cpp
DEP_CPP_NODEBUILD_U=\
DEP_CPP_NODEBUILD_=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\

View file

@ -16,7 +16,7 @@ typedef __int32 int32_t;
#include <stdint.h>
#endif
#define ZDBSP_VERSION "1.8"
#define ZDBSP_VERSION "1.9"
enum EBlockmapMode
{
@ -45,7 +45,7 @@ extern int AAPreference;
extern bool CheckPolyobjs;
extern bool ShowMap;
extern bool CompressNodes, CompressGLNodes, V5GLNodes;
extern bool HaveSSE2;
extern bool HaveSSE1, HaveSSE2;
#define FIXED_MAX INT_MAX

View file

@ -152,9 +152,13 @@
<dd>
If you don't care how long it takes to build nodes, use this option and ZDBSP
won't tell you.</dd>
<dt>--no-sse or --no-sse2</dt>
<dd>Disable SSE2 optimizations in the nodebuilder, which can be useful if you just
want to measure the kind of speed up SSE2 provides.</dd>
<dt>--no-sse</dt>
<dd>Disables all SSE optimizations in the nodebuilder, which can be useful if you just
want to measure the kind of speed up SSE or SSE2 provides.</dd>
<dt>--no-sse2</dt>
<dd>Disables all SSE2 optimizations in the nodebuilder. SSE1 will still be used if
your processor supports it. Unless you want to compare the speed difference
between SSE and SSE2, there is again little reason to use this option.</dd>
<dt>--warn or -w</dt>
<dd>
Displays extra warning messages that ZDBSP might generate while building GL

View file

@ -248,6 +248,27 @@
<File
RelativePath=".\nodebuild_classify_nosse2.cpp">
</File>
<File
RelativePath=".\nodebuild_classify_sse1.cpp">
<FileConfiguration
Name="Release|Win32">
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="1"/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32">
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="1"/>
</FileConfiguration>
<FileConfiguration
Name="Release (SSE2)|Win32">
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="1"/>
</FileConfiguration>
</File>
<File
RelativePath=".\nodebuild_classify_sse2.cpp">
<FileConfiguration

View file

@ -99,7 +99,7 @@
IntermediateDirectory="$(ConfigurationName)_vs2005"
ConfigurationType="1"
CharacterSet="0"
WholeProgramOptimization="1"
WholeProgramOptimization="0"
>
<Tool
Name="VCPreBuildEventTool"
@ -118,8 +118,13 @@
/>
<Tool
Name="VCCLCompilerTool"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="0"
OmitFramePointers="true"
AdditionalIncludeDirectories="zlib"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
StringPooling="true"
RuntimeLibrary="0"
EnableEnhancedInstructionSet="0"
FloatingPointModel="2"
@ -291,6 +296,26 @@
RelativePath=".\nodebuild_classify_nosse2.cpp"
>
</File>
<File
RelativePath=".\nodebuild_classify_sse1.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="1"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="1"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\nodebuild_classify_sse2.cpp"
>