- Added runtime detection of SSE2 so that ZDBSP can select either the regular

ClassifyLine routine or a version compiled with SSE2 optimizations. This
  pretty much removes any reason to do a separate SSE2 build, since most of
  the time is spent in ClassifyLine, so using SSE2 in just that one function
  helps the most.

SVN r172 (trunk)
This commit is contained in:
Randy Heit 2006-06-06 19:15:39 +00:00
parent 0d37e97f13
commit 6a1a0e007a
9 changed files with 240 additions and 166 deletions

View file

@ -8,7 +8,7 @@ CFLAGS += -O3 -fomit-frame-pointer -DNDEBUG
# Processor features flags
CFLAGS += -mtune=i686
#CFLAGS += -march=k8 -mfpmath=sse
#CFLAGS += -march=k8
LDFLAGS =
RM = rm -f FILE
@ -37,6 +37,11 @@ ifneq ($(strip),)
LDFLAGS += -s
endif
# To use SSE2 math for everything, pass sse=1 to make.
ifneq ($(sse),)
CFLAGS += -msse -msse2 -mfpmath=sse
endif
CC = gcc
CXX = g++
@ -44,7 +49,7 @@ CXXFLAGS = $(CFLAGS)
OBJS = main.o getopt.o getopt1.o blockmapbuilder.o processor.o view.o wad.o \
nodebuild.o nodebuild_events.o nodebuild_extract.o nodebuild_gl.o \
nodebuild_utility.o \
nodebuild_utility.o nodebuild_classify_sse2.o nodebuild_classify_nosse2.o \
zlib/adler32.o zlib/compress.o zlib/crc32.o zlib/deflate.o zlib/trees.o \
zlib/zutil.o
@ -62,6 +67,9 @@ profile-use:
$(EXE): $(OBJS)
$(CCDV) $(CXX) -o $(EXE) $(OBJS) $(LDFLAGS)
nodebuild_classify_sse2.o: nodebuild_classify_sse2.cpp nodebuild.h
$(CXX) $(CXXFLAGS) -msse2 -mfpmath=sse -c -o $@ $<
.PHONY: clean
clean:

100
main.cpp
View file

@ -78,6 +78,7 @@ static void ParseArgs (int argc, char **argv);
static void ShowUsage ();
static void ShowVersion ();
static bool CheckInOutNames ();
static void VerifySSE2 ();
// EXTERNAL DATA DECLARATIONS ----------------------------------------------
@ -106,6 +107,7 @@ bool CompressNodes = false;
bool CompressGLNodes = false;
bool GLOnly = false;
bool V5GLNodes = false;
bool HaveSSE2 = true;
// PRIVATE DATA DEFINITIONS ------------------------------------------------
@ -137,6 +139,8 @@ static option long_opts[] =
{"compress-normal", no_argument, 0, 'Z'},
{"gl-only", no_argument, 0, 'x'},
{"gl-v5", no_argument, 0, '5'},
{"no-sse", no_argument, 0, 1002},
{"no-sse2", no_argument, 0, 1002},
{0,0,0,0}
};
@ -149,6 +153,7 @@ int main (int argc, char **argv)
bool fixSame = false;
ParseArgs (argc, argv);
VerifySSE2 ();
if (InName == NULL)
{
@ -370,6 +375,9 @@ static void ParseArgs (int argc, char **argv)
ShowVersion ();
exit (0);
break;
case 1002: // Disable SSE2 ClassifyLine routine
HaveSSE2 = false;
break;
case 1000:
ShowUsage ();
exit (0);
@ -499,6 +507,89 @@ static bool CheckInOutNames ()
#endif
}
//==========================================================================
//
// VerifySSE2
//
// Ensure that if HaveSSE2 is set, that we actually do have SSE2.
//
//==========================================================================
static void VerifySSE2 ()
{
#ifdef __SSE2__
// If we compiled with SSE2 support enabled for everything, then
// obviously it's available, or the program won't get very far.
return;
#endif
#if defined(_MSC_VER) && defined(_M_X64)
#endif
#if defined(_MSC_VER)
#ifdef _M_X64
// Processors implementing AMD64 are required to support SSE2.
return;
#else
if (!HaveSSE2)
{
return;
}
HaveSSE2 = false;
__asm
{
pushfd // save EFLAGS
pop eax // store EFLAGS in EAX
mov edx,eax // save in EDX for later testing
xor eax,0x00200000 // toggle bit 21
push eax // put to stack
popfd // save changed EAX to EFLAGS
pushfd // push EFLAGS to TOS
pop eax // store EFLAGS in EAX
cmp eax,edx // see if bit 21 has changed
jz noid // if no change, then no CPUID
// Check the feature flag for SSE2
mov eax,1
cpuid
test edx,(1<<26)
setnz HaveSSE2
noid:
}
#endif
#elif defined(__GNUC__)
// Same as above, but for GCC
if (!HaveSSE2)
{
return;
}
HaveSSE2 = false;
asm volatile
("pushfl\n\t"
"popl %%eax\n\t"
"movl %%eax,%%edx\n\t"
"xorl $0x200000,%%eax\n\t"
"pushl %%eax\n\t"
"popfl\n\t"
"pushfl\n\t"
"popl %%eax\n\t"
"cmp %%edx,%%eax\n\t"
"jz noid\n\t"
"mov $1,%%eax\n\t"
"cpuid\n\t"
"test $(1<<26),%%edx\n\t"
"setneb %0\n"
"noid:"
:"=m" (HaveSSE2)::"eax","ebx","ecx","edx");
#else
// Can't compile a check, so assume SSE2 is not present.
HaveSSE2 = false;
#endif
}
//==========================================================================
//
// PointToAngle
@ -509,13 +600,8 @@ angle_t PointToAngle (fixed_t x, fixed_t y)
{
double ang = atan2 (double(y), double(x));
const double rad2bam = double(1<<30) / M_PI;
#if 0
if (ang < 0.0)
{
ang = 2*M_PI+ang;
}
#endif
return angle_t(ang * rad2bam) << 1;
double dbam = ang * rad2bam;
return angle_t(dbam) << 1;
}
//==========================================================================

View file

@ -40,11 +40,12 @@
FNodeBuilder::FNodeBuilder (FLevel &level,
TArray<FPolyStart> &polyspots, TArray<FPolyStart> &anchors,
const char *name, bool makeGLnodes)
const char *name, bool makeGLnodes, bool enableSSE2)
: Level(level), SegsStuffed(0), MapName(name)
{
VertexMap = new FVertexMap (*this, Level.MinX, Level.MinY, Level.MaxX, Level.MaxY);
GLNodes = makeGLnodes;
EnableSSE2 = enableSSE2;
FindUsedVertices (Level.Vertices, Level.NumVertices);
MakeSegsFromSides ();
FindPolyContainers (polyspots, anchors);
@ -709,150 +710,6 @@ int FNodeBuilder::Heuristic (node_t &node, DWORD set, bool honorNoSplit)
return score;
}
// Returns:
// 0 = seg is in front
// 1 = seg is in back
// -1 = seg cuts the node
int FNodeBuilder::ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2)
{
#define FAR_ENOUGH 17179869184.f // 4<<32
const FPrivVert *v1 = &Vertices[seg->v1];
const FPrivVert *v2 = &Vertices[seg->v2];
double d_x1 = double(node.x);
double d_y1 = double(node.y);
double d_dx = double(node.dx);
double d_dy = double(node.dy);
double d_xv1 = double(v1->x);
double d_xv2 = double(v2->x);
double d_yv1 = double(v1->y);
double d_yv2 = double(v2->y);
double s_num1 = (d_y1 - d_yv1) * d_dx - (d_x1 - d_xv1) * d_dy;
double s_num2 = (d_y1 - d_yv2) * d_dx - (d_x1 - d_xv2) * d_dy;
int near = 0;
if (s_num1 <= -FAR_ENOUGH)
{
if (s_num2 <= -FAR_ENOUGH)
{
sidev1 = sidev2 = 1;
return 1;
}
if (s_num2 >= FAR_ENOUGH)
{
sidev1 = 1;
sidev2 = -1;
return -1;
}
near = 2;
}
else if (s_num1 >= FAR_ENOUGH)
{
if (s_num2 >= FAR_ENOUGH)
{
sidev1 = sidev2 = -1;
return 0;
}
if (s_num2 <= -FAR_ENOUGH)
{
sidev1 = -1;
sidev2 = 1;
return -1;
}
near = 2;
}
else
{
near = 1 | ((fabs(s_num2) < FAR_ENOUGH) << 1);
}
if (near)
{
double l = 1.0 / (d_dx*d_dx + d_dy*d_dy);
if (near & 1)
{
double dist = s_num1 * s_num1 * l;
if (dist < SIDE_EPSILON*SIDE_EPSILON)
{
sidev1 = 0;
}
else
{
sidev1 = s_num1 > 0.0 ? -1 : 1;
}
}
else
{
sidev1 = s_num1 > 0.0 ? -1 : 1;
}
if (near & 2)
{
double dist = s_num2 * s_num2 * l;
if (dist < SIDE_EPSILON*SIDE_EPSILON)
{
sidev2 = 0;
}
else
{
sidev2 = s_num2 > 0.0 ? -1 : 1;
}
}
else
{
sidev2 = s_num2 > 0.0 ? -1 : 1;
}
}
else
{
sidev1 = s_num1 > 0.0 ? -1 : 1;
sidev2 = s_num2 > 0.0 ? -1 : 1;
}
if ((sidev1 | sidev2) == 0)
{ // seg is coplanar with the splitter, so use its orientation to determine
// which child it ends up in. If it faces the same direction as the splitter,
// it goes in front. Otherwise, it goes in back.
// return DMulScale32 (node.dx, v2->x - v1->x, node.dy, v2->y - v1->y) < 0;
// return (d_dx * (d_xv2 - d_xv1) + d_dy * (d_yv2 - d_yv1)) < 0.0;
#if 1
if (node.dx != 0)
{
if ((node.dx > 0 && v2->x > v1->x) || (node.dx < 0 && v2->x < v1->x))
{
return 0;
}
else
{
return 1;
}
}
else
{
if ((node.dy > 0 && v2->y > v1->y) || (node.dy < 0 && v2->y < v1->y))
{
return 0;
}
else
{
return 1;
}
}
#endif
}
else if (sidev1 <= 0 && sidev2 <= 0)
{
return 0;
}
else if (sidev1 >= 0 && sidev2 >= 0)
{
return 1;
}
return -1;
}
void FNodeBuilder::SplitSegs (DWORD set, node_t &node, DWORD splitseg, DWORD &outset0, DWORD &outset1)
{
outset0 = DWORD_MAX;

View file

@ -125,6 +125,8 @@ class FNodeBuilder
}
};
friend class FVertexMap;
public:
struct FPolyStart
@ -135,7 +137,7 @@ public:
FNodeBuilder (FLevel &level,
TArray<FPolyStart> &polyspots, TArray<FPolyStart> &anchors,
const char *name, bool makeGLnodes);
const char *name, bool makeGLnodes, bool enableSSE2);
~FNodeBuilder ();
void GetVertices (WideVertex *&verts, int &count);
@ -175,6 +177,7 @@ private:
DWORD HackMate; // Seg to use in front of hack seg
FLevel &Level;
bool GLNodes;
bool EnableSSE2;
// Progress meter stuff
int SegsStuffed;
@ -199,9 +202,17 @@ private:
void SplitSegs (DWORD set, node_t &node, DWORD splitseg, DWORD &outset0, DWORD &outset1);
DWORD SplitSeg (DWORD segnum, int splitvert, int v1InFront);
int Heuristic (node_t &node, DWORD set, bool honorNoSplit);
int ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
int CountSegs (DWORD set) const;
// Returns:
// 0 = seg is in front
// 1 = seg is in back
// -1 = seg cuts the node
inline int ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
int ClassifyLine2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
int ClassifyLineSSE2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2);
void FixSplitSharers ();
double AddIntersection (const node_t &node, int vertex);
void AddMinisegs (const node_t &node, DWORD splitseg, DWORD &fset, DWORD &rset);
@ -258,3 +269,19 @@ inline int FNodeBuilder::PointOnSide (int x, int y, int x1, int y1, int dx, int
return s_num > 0.0 ? -1 : 1;
}
inline int FNodeBuilder::ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2)
{
#ifdef __SSE2__
// If compiling with SSE2 support everywhere, just use the SSE2 version.
return ClassifyLineSSE2 (node, seg, sidev1, sidev2);
#elif defined(_MSC_VER) && _MSC_VER < 1300
// VC 6 does not support SSE2 optimizations.
return ClassifyLine2 (node, seg, sidev1, sidev2);
#else
// Select the routine based on our flag.
if (EnableSSE2)
return ClassifyLineSSE2 (node, seg, sidev1, sidev2);
else
return ClassifyLine2 (node, seg, sidev1, sidev2);
#endif
}

View file

@ -484,7 +484,7 @@ void FProcessor::Write (FWadWriter &out)
try
{
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), BuildGLNodes);
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), BuildGLNodes, HaveSSE2);
if (builder == NULL)
{
throw std::runtime_error(" Not enough memory to build nodes!");
@ -520,7 +520,7 @@ void FProcessor::Write (FWadWriter &out)
{
// Now repeat the process to obtain regular nodes
delete builder;
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), false);
builder = new FNodeBuilder (Level, PolyStarts, PolyAnchors, Wad.LumpName (Lump), false, HaveSSE2);
if (builder == NULL)
{
throw std::runtime_error(" Not enough memory to build regular nodes!");

View file

@ -39,12 +39,13 @@ RSC=rc.exe
# PROP Use_Debug_Libraries 0
# PROP Output_Dir ".\Release"
# PROP Intermediate_Dir ".\Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
MTL=midl.exe
# ADD BASE MTL /nologo /tlb".\Release\zdbsp.tlb" /win32
# ADD MTL /nologo /tlb".\Release\zdbsp.tlb" /win32
# ADD BASE CPP /nologo /W3 /GX /Zi /Ot /Og /Oi /Oy /Ob2 /Gy /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /GA /GF /c
# ADD CPP /nologo /MD /W3 /GX /Zi /Ot /Og /Oi /Oy /Ob2 /Gy /I "zlib" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /GA /GF /c
# ADD CPP /nologo /MD /W3 /GX /Zi /Ot /Og /Oi /Oy /Ob2 /Gy /I "zlib" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D _MSC_VER=1200 /GA /GF /c
# SUBTRACT CPP /YX /Yc /Yu
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
@ -73,7 +74,7 @@ MTL=midl.exe
# ADD BASE MTL /nologo /tlb".\Debug\zdbsp.tlb" /win32
# ADD MTL /nologo /tlb".\Debug\zdbsp.tlb" /win32
# ADD BASE CPP /nologo /W3 /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /GZ /c
# ADD CPP /nologo /W3 /GX /ZI /Od /I "zlib" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /GZ /c
# ADD CPP /nologo /W3 /GX /ZI /Od /I "zlib" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D _MSC_VER=1200 /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
@ -157,6 +158,8 @@ DEP_CPP_MAIN_=\
".\wad.h"\
".\workdata.h"\
".\zdbsp.h"\
".\zlib\zconf.h"\
".\zlib\zlib.h"\
# End Source File
# Begin Source File
@ -173,7 +176,7 @@ DEP_CPP_NODEB=\
# End Source File
# Begin Source File
SOURCE=nodebuild_events.cpp
SOURCE=.\nodebuild_classify_nosse2.cpp
DEP_CPP_NODEBU=\
".\doomdata.h"\
".\nodebuild.h"\
@ -184,8 +187,30 @@ DEP_CPP_NODEBU=\
# End Source File
# Begin Source File
SOURCE=nodebuild_extract.cpp
SOURCE=.\nodebuild_classify_sse2.cpp
DEP_CPP_NODEBUI=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
".\workdata.h"\
".\zdbsp.h"\
# End Source File
# Begin Source File
SOURCE=nodebuild_events.cpp
DEP_CPP_NODEBUIL=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
".\workdata.h"\
".\zdbsp.h"\
# End Source File
# Begin Source File
SOURCE=nodebuild_extract.cpp
DEP_CPP_NODEBUILD=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
@ -197,7 +222,7 @@ DEP_CPP_NODEBUI=\
# Begin Source File
SOURCE=nodebuild_gl.cpp
DEP_CPP_NODEBUIL=\
DEP_CPP_NODEBUILD_=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
@ -208,10 +233,11 @@ DEP_CPP_NODEBUIL=\
# Begin Source File
SOURCE=nodebuild_utility.cpp
DEP_CPP_NODEBUILD=\
DEP_CPP_NODEBUILD_U=\
".\doomdata.h"\
".\nodebuild.h"\
".\tarray.h"\
".\templates.h"\
".\workdata.h"\
".\zdbsp.h"\
@ -228,6 +254,8 @@ DEP_CPP_PROCE=\
".\wad.h"\
".\workdata.h"\
".\zdbsp.h"\
".\zlib\zconf.h"\
".\zlib\zlib.h"\
# End Source File
# Begin Source File

View file

@ -45,6 +45,7 @@ extern int AAPreference;
extern bool CheckPolyobjs;
extern bool ShowMap;
extern bool CompressNodes, CompressGLNodes, V5GLNodes;
extern bool HaveSSE2;
#define FIXED_MAX INT_MAX

View file

@ -162,7 +162,7 @@
OptimizeForProcessor="0"
OptimizeForWindowsApplication="TRUE"
AdditionalIncludeDirectories="zlib"
PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE"
PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE,__SSE2__"
StringPooling="TRUE"
RuntimeLibrary="4"
EnableFunctionLevelLinking="TRUE"
@ -232,6 +232,36 @@
</File>
<File
RelativePath=".\nodebuild.cpp">
<FileConfiguration
Name="Release|Win32">
<Tool
Name="VCCLCompilerTool"
AssemblerOutput="4"/>
</FileConfiguration>
<FileConfiguration
Name="Release (SSE2)|Win32">
<Tool
Name="VCCLCompilerTool"
AssemblerOutput="4"/>
</FileConfiguration>
</File>
<File
RelativePath=".\nodebuild_classify_nosse2.cpp">
</File>
<File
RelativePath=".\nodebuild_classify_sse2.cpp">
<FileConfiguration
Name="Release|Win32">
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="2"/>
</FileConfiguration>
<FileConfiguration
Name="Debug|Win32">
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="2"/>
</FileConfiguration>
</File>
<File
RelativePath="nodebuild_events.cpp">

View file

@ -198,8 +198,13 @@
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
InlineFunctionExpansion="2"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="1"
OmitFramePointers="true"
AdditionalIncludeDirectories="zlib"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;__SSE2__"
RuntimeLibrary="0"
EnableEnhancedInstructionSet="2"
FloatingPointModel="2"
@ -282,6 +287,38 @@
RelativePath=".\nodebuild.cpp"
>
</File>
<File
RelativePath=".\nodebuild_classify_nosse2.cpp"
>
</File>
<File
RelativePath=".\nodebuild_classify_sse2.cpp"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="2"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="2"
/>
</FileConfiguration>
<FileConfiguration
Name="Release, SSE2|Win32"
>
<Tool
Name="VCCLCompilerTool"
EnableEnhancedInstructionSet="2"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\nodebuild_events.cpp"
>