From 7a601515df65bf586ad02602a87c9712192f4d4f Mon Sep 17 00:00:00 2001 From: Randy Heit Date: Tue, 6 Jun 2006 21:39:08 +0000 Subject: [PATCH] - ClassifyLine now chooses either SSE2 or regular x87 math depending on whether or not SSE2 is available at runtime. Since most of the time is spent in ClassifyLine, using SSE2 in just this one function helps the most. - Nodebuilding is a little faster if we inline PointOnSide. - Changed FEventTree into a regular binary tree, since there just aren't enough nodes inserted into it to make a red-black tree worthwhile. - Added more checks at the start of ClassifyLine so that it has a better chance of avoiding the more complicated checking, and it seems to have paid off with a reasonably modest performance boost. - Added a "vertex map" for ZDBSP's vertex selection. (Think BLOCKMAP for vertices instead of lines.) On large maps, this can result in a very significant speed up. (In one particular map, ZDBSP had previously spent 40% of its time just scanning through all the vertices in the map. Now the time it spends finding vertices is immeasurable.) On small maps, this won't make much of a difference, because the number of vertices to search was so small to begin with. SVN r173 (trunk) --- Makefile.linux | 11 ++ Makefile.mgw | 6 +- Makefile.mingw | 6 + default.cbd | 6 + docs/rh-log.txt | 22 ++- src/nodebuild.cpp | 115 ++-------------- src/nodebuild.h | 122 +++++++++++++++-- src/nodebuild_classify_nosse2.cpp | 139 +++++++++++++++++++ src/nodebuild_classify_sse2.cpp | 144 ++++++++++++++++++++ src/nodebuild_events.cpp | 219 ------------------------------ src/nodebuild_utility.cpp | 156 +++++++++++++++++---- src/p_setup.cpp | 3 +- zdoom.vcproj | 24 ++++ 13 files changed, 604 insertions(+), 369 deletions(-) create mode 100644 src/nodebuild_classify_nosse2.cpp create mode 100644 src/nodebuild_classify_sse2.cpp diff --git a/Makefile.linux b/Makefile.linux index 5a47d9836..8e6cd5a70 100644 --- a/Makefile.linux +++ b/Makefile.linux @@ -1,4 +1,10 @@ # created on 4/12/2006 by James Bentler + +# RH says: Might I suggest using -MMD instead of -MM? You can create the +# .o files and the .d files in the same step and avoid rerunning make. +# I'd do it myself, but I don't want to break anything without testing +# it first. + CXX ?= g++ CC ?= gcc NASM ?= nasm @@ -105,6 +111,11 @@ _obj_,$(OBJDIR)/$(patsubst %.c,%.o,$(notdir $$$(src))),$(CBUILD_PATTERN))))) $(OBJDIR)/%.o: $(CXX) -c $(CXXFLAGS) -o $@ -c $< +# Hi, you Linux people. This works with Makefile.mingw, so i assume it works here too. +# This file needs special handling so that it actually gets compiled with SSE2 support. +$(OBJDIR)/nodebuild_classify_sse2.o: nodebuild_classify_sse2.cpp + $(CXX) $(CXXFLAGS) -msse2 -mfpmath=sse -c -o $@ $< + # start a new instance of make after dependency files have been made deps: $(DEPS) ifdef RESTART diff --git a/Makefile.mgw b/Makefile.mgw index 5ec4febfd..63c7a4e26 100644 --- a/Makefile.mgw +++ b/Makefile.mgw @@ -19,9 +19,9 @@ $(RELEASETARGET): game $(DEBUGTARGET): debuggame basetools: ccdv.exe - $(MAKE) -C tools/lemon -f Makefile.mgw - $(MAKE) -C tools/re2c -f Makefile.mgw - $(MAKE) -C wadsrc -f Makefile.mgw + $(MAKE) -C tools/lemon -f Makefile + $(MAKE) -C tools/re2c -f Makefile + $(MAKE) -C wadsrc -f Makefile $(MAKE) -C zlib -f Makefile.mgw $(MAKE) -C flac -f Makefile.mgw diff --git a/Makefile.mingw b/Makefile.mingw index 8f6b85072..181f75754 100644 --- a/Makefile.mingw +++ b/Makefile.mingw @@ -124,6 +124,8 @@ OBJECTS += \ $(OBJDIR)/m_random.o \ $(OBJDIR)/mus2midi.o \ $(OBJDIR)/nodebuild.o \ + $(OBJDIR)/nodebuild_classify_nosse2.o \ + $(OBJDIR)/nodebuild_classify_sse2.o \ $(OBJDIR)/nodebuild_events.o \ $(OBJDIR)/nodebuild_extract.o \ $(OBJDIR)/nodebuild_gl.o \ @@ -393,6 +395,10 @@ all: $(TARGET) $(OBJDIR)/%.o : %.cpp $(CCDV) $(CXX) $(CXXFLAGS) -o $@ -c $< +# This file needs special handling so that it actually gets compiled with SSE2 support. +$(OBJDIR)/nodebuild_classify_sse2.o: nodebuild_classify_sse2.cpp + $(CCDV) $(CXX) $(CXXFLAGS) -msse2 -mfpmath=sse -c -o $@ $< + $(OBJDIR)/%.o : %.nas $(CCDV) nasmw -o $@ -f win32 $< diff --git a/default.cbd b/default.cbd index c1b8d18d3..54edb3b90 100644 --- a/default.cbd +++ b/default.cbd @@ -142,6 +142,10 @@ else done +# How do you pass -msse2 -mfpmath=sse2 with cbuild when compiling nodebuild_classify_sse2.cpp? +# Since I don't know the answer, that means you won't get an SSE2-supporting nodebuilder if +# you build with cbuild. Use make instead. + ${COMPILER} "autostart.cpp \ a.nas \ misc.nas \ @@ -204,6 +208,8 @@ ${COMPILER} "autostart.cpp \ m_random.cpp \ mus2midi.cpp \ nodebuild.cpp \ + nodebuild_classify_nosse2.cpp \ + nodebuild_classify_sse2.cpp \ nodebuild_events.cpp \ nodebuild_extract.cpp \ nodebuild_gl.cpp \ diff --git a/docs/rh-log.txt b/docs/rh-log.txt index 61c1e2e69..0b7d0e267 100644 --- a/docs/rh-log.txt +++ b/docs/rh-log.txt @@ -1,3 +1,22 @@ +June 6, 2006 +- Integrated recent ZDBSP improvements into the internal nodebuilder: + - ClassifyLine now chooses either SSE2 or regular x87 math depending on whether + or not SSE2 is available at runtime. Since most of the time is spent in + ClassifyLine, using SSE2 in just this one function helps the most. + - Nodebuilding is a little faster if we inline PointOnSide. + - Changed FEventTree into a regular binary tree, since there just aren't enough + nodes inserted into it to make a red-black tree worthwhile. + - Added more checks at the start of ClassifyLine so that it has a better chance + of avoiding the more complicated checking, and it seems to have paid off with + a reasonably modest performance boost. + - Added a "vertex map" for ZDBSP's vertex selection. (Think BLOCKMAP for + vertices instead of lines.) On large maps, this can result in a very + significant speed up. (In one particular map, ZDBSP had previously + spent 40% of its time just scanning through all the vertices in the + map. Now the time it spends finding vertices is immeasurable.) On small maps, + this won't make much of a difference, because the number of vertices to search + was so small to begin with. + June 3, 2006 (Changes by Graf Zahl) - Added a StartConversation special that allows automatic activation of Strife dialogs. @@ -22,9 +41,6 @@ May 31, 2006 - Red-Black Tree implementation was broken and colored every node red. - Moved most of the code for outputting degenerate GL subsectors into another function. - - Removed forgotten debugging file dump from WriteSSectors2(). - - Enabled reference optimization and COMDAT folding in the linker for a slightly - smaller executable. May 31, 2006 (Changes by Graf Zahl) - Fixed: Ammo items dropped by monsters that have a default amount of 1 didn't diff --git a/src/nodebuild.cpp b/src/nodebuild.cpp index e7e6bc209..e9d76cc9b 100644 --- a/src/nodebuild.cpp +++ b/src/nodebuild.cpp @@ -56,14 +56,6 @@ const int MaxSegs = 64; const int SplitCost = 8; const int AAPreference = 16; -// Points within this distance of a line will be considered on the line. -// Units are in fixed_ts. -const double SIDE_EPSILON = 6.5536; - -// Vertices within this distance of each other vertically and horizontally -// will be considered as the same vertex. -const fixed_t VERTEX_EPSILON = 6; - #if 0 #define D(x) x #else @@ -72,9 +64,10 @@ const fixed_t VERTEX_EPSILON = 6; FNodeBuilder::FNodeBuilder (FLevel &level, TArray &polyspots, TArray &anchors, - bool makeGLNodes) - : Level (level), GLNodes (makeGLNodes), SegsStuffed (0) + bool makeGLNodes, bool enableSSE2) + : Level(level), GLNodes(makeGLNodes), EnableSSE2(enableSSE2), SegsStuffed(0) { + VertexMap = new FVertexMap (*this, Level.MinX, Level.MinY, Level.MaxX, Level.MaxY); FindUsedVertices (Level.Vertices, Level.NumVertices); MakeSegsFromSides (); FindPolyContainers (polyspots, anchors); @@ -82,6 +75,14 @@ FNodeBuilder::FNodeBuilder (FLevel &level, BuildTree (); } +FNodeBuilder::~FNodeBuilder() +{ + if (VertexMap != 0) + { + delete VertexMap; + } +} + void FNodeBuilder::BuildTree () { fixed_t bbox[4]; @@ -194,7 +195,7 @@ void FNodeBuilder::CreateSubsectorsForReal () sub.numlines = (DWORD)(SegList.Size() - sub.firstline); // Sort segs by linedef for special effects - qsort (&SegList[sub.firstline], sub.numlines, sizeof(int), SortSegs); + qsort (&SegList[sub.firstline], sub.numlines, sizeof(USegPtr), SortSegs); // Convert seg pointers into indices for (unsigned int i = sub.firstline; i < SegList.Size(); ++i) @@ -701,52 +702,6 @@ int FNodeBuilder::Heuristic (node_t &node, DWORD set, bool honorNoSplit) return score; } -int FNodeBuilder::ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2) -{ - const FPrivVert *v1 = &Vertices[seg->v1]; - const FPrivVert *v2 = &Vertices[seg->v2]; - sidev1 = PointOnSide (v1->x, v1->y, node.x, node.y, node.dx, node.dy); - sidev2 = PointOnSide (v2->x, v2->y, node.x, node.y, node.dx, node.dy); - - if ((sidev1 | sidev2) == 0) - { // seg is coplanar with the splitter, so use its orientation to determine - // which child it ends up in. If it faces the same direction as the splitter, - // it goes in front. Otherwise, it goes in back. - - if (node.dx != 0) - { - if ((node.dx > 0 && v2->x > v1->x) || (node.dx < 0 && v2->x < v1->x)) - { - return 0; - } - else - { - return 1; - } - } - else - { - if ((node.dy > 0 && v2->y > v1->y) || (node.dy < 0 && v2->y < v1->y)) - { - return 0; - } - else - { - return 1; - } - } - } - else if (sidev1 <= 0 && sidev2 <= 0) - { - return 0; - } - else if (sidev1 >= 0 && sidev2 >= 0) - { - return 1; - } - return -1; -} - void FNodeBuilder::SplitSegs (DWORD set, node_t &node, DWORD splitseg, DWORD &outset0, DWORD &outset1) { outset0 = DWORD_MAX; @@ -809,24 +764,7 @@ void FNodeBuilder::SplitSegs (DWORD set, node_t &node, DWORD splitseg, DWORD &ou newvert.y = Vertices[seg->v1].y; newvert.x += fixed_t(frac * double(Vertices[seg->v2].x - newvert.x)); newvert.y += fixed_t(frac * double(Vertices[seg->v2].y - newvert.y)); - for (i = 0; i < Vertices.Size(); ++i) - { - if (abs(Vertices[i].x - newvert.x) < VERTEX_EPSILON && - abs(Vertices[i].y - newvert.y) < VERTEX_EPSILON) - { - break; - } - } - if (i < Vertices.Size()) - { - vertnum = i; - } - else - { - newvert.segs = DWORD_MAX; - newvert.segs2 = DWORD_MAX; - vertnum = Vertices.Push (newvert); - } + vertnum = VertexMap->SelectVertexClose (newvert); seg2 = SplitSeg (set, vertnum, sidev1); @@ -1049,33 +987,6 @@ double FNodeBuilder::InterceptVector (const node_t &splitter, const FPrivSeg &se return frac; } -int FNodeBuilder::PointOnSide (int x, int y, int x1, int y1, int dx, int dy) -{ - // For most cases, a simple dot product is enough. - double d_dx = double(dx); - double d_dy = double(dy); - double d_x = double(x); - double d_y = double(y); - double d_x1 = double(x1); - double d_y1 = double(y1); - - double s_num = (d_y1-d_y)*d_dx - (d_x1-d_x)*d_dy; - - if (fabs(s_num) < 17179869184.0) // 4<<32 - { - // Either the point is very near the line, or the segment defining - // the line is very short: Do a more expensive test to determine - // just how far from the line the point is. - double l = sqrt(d_dx*d_dx+d_dy*d_dy); - double dist = fabs(s_num)/l; - if (dist < SIDE_EPSILON) - { - return 0; - } - } - return s_num > 0.0 ? -1 : 1; -} - void FNodeBuilder::PrintSet (int l, DWORD set) { Printf ("set %d:\n", l); diff --git a/src/nodebuild.h b/src/nodebuild.h index 167e26065..0ca9d1cee 100644 --- a/src/nodebuild.h +++ b/src/nodebuild.h @@ -11,7 +11,6 @@ struct FEventInfo struct FEvent { FEvent *Parent, *Left, *Right; - enum { RED, BLACK } Color; double Distance; FEventInfo Info; }; @@ -28,7 +27,6 @@ public: FEvent *GetNewNode (); void Insert (FEvent *event); - void Delete (FEvent *event); FEvent *FindEvent (double distance) const; void DeleteAll (); @@ -37,9 +35,6 @@ private: FEvent *Root; FEvent *Spare; - void LeftRotate (FEvent *event); - void RightRotate (FEvent *event); - void DeleteFixUp (FEvent *event); void DeletionTraverser (FEvent *event); FEvent *Successor (FEvent *event) const; FEvent *Predecessor (FEvent *event) const; @@ -91,12 +86,51 @@ class FNodeBuilder DWORD Seg; bool Forward; }; + + // Like a blockmap, but for vertices instead of lines + class FVertexMap + { + public: + FVertexMap (FNodeBuilder &builder, fixed_t minx, fixed_t miny, fixed_t maxx, fixed_t maxy); + ~FVertexMap (); + + int SelectVertexExact (FPrivVert &vert); + int SelectVertexClose (FPrivVert &vert); + + private: + FNodeBuilder &MyBuilder; + TArray *VertexGrid; + + fixed_t MinX, MinY, MaxX, MaxY; + int BlocksWide, BlocksTall; + + enum { BLOCK_SHIFT = 8 + FRACBITS }; + enum { BLOCK_SIZE = 1 << BLOCK_SHIFT }; + + int InsertVertex (FPrivVert &vert); + inline int GetBlock (fixed_t x, fixed_t y) + { + assert (x >= MinX); + assert (y >= MinY); + assert (x <= MaxX); + assert (y <= MaxY); + return (unsigned(x - MinX) >> BLOCK_SHIFT) + (unsigned(y - MinY) >> BLOCK_SHIFT) * BlocksWide; + } + }; + + friend class FVertexMap; + + public: struct FLevel { vertex_t *Vertices; int NumVertices; side_t *Sides; int NumSides; line_t *Lines; int NumLines; + + fixed_t MinX, MinY, MaxX, MaxY; + + void FindMapBounds (); }; struct FPolyStart @@ -107,7 +141,8 @@ public: FNodeBuilder (FLevel &level, TArray &polyspots, TArray &anchors, - bool makeGLNodes); + bool makeGLNodes, bool enableSSE2); + ~FNodeBuilder (); void Extract (node_t *&nodes, int &nodeCount, seg_t *&segs, int &segCount, @@ -116,7 +151,15 @@ public: static angle_t PointToAngle (fixed_t dx, fixed_t dy); + // < 0 : in front of line + // == 0 : on line + // > 0 : behind line + + static inline int PointOnSide (int x, int y, int x1, int y1, int dx, int dy); + private: + FVertexMap *VertexMap; + TArray Nodes; TArray Subsectors; TArray SubsectorSets; @@ -130,18 +173,18 @@ private: TArray Colinear; // Loops with edges colinear to a splitter FEventTree Events; // Vertices intersected by the current splitter - TArray SplitSharers; // Segs collinear with the current splitter + TArray SplitSharers; // Segs colinear with the current splitter DWORD HackSeg; // Seg to force to back of splitter DWORD HackMate; // Seg to use in front of hack seg FLevel &Level; bool GLNodes; // Add minisegs to make GL nodes? + bool EnableSSE2; // Progress meter stuff int SegsStuffed; void FindUsedVertices (vertex_t *vertices, int max); - int SelectVertexExact (FPrivVert &vertex); void BuildTree (); void MakeSegsFromSides (); int CreateSeg (int linenum, int sidenum); @@ -159,9 +202,17 @@ private: void SplitSegs (DWORD set, node_t &node, DWORD splitseg, DWORD &outset0, DWORD &outset1); DWORD SplitSeg (DWORD segnum, int splitvert, int v1InFront); int Heuristic (node_t &node, DWORD set, bool honorNoSplit); - int ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2); int CountSegs (DWORD set) const; + // Returns: + // 0 = seg is in front + // 1 = seg is in back + // -1 = seg cuts the node + + inline int ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2); + int ClassifyLine2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2); + int ClassifyLineSSE2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2); + void FixSplitSharers (const node_t &node); double AddIntersection (const node_t &node, int vertex); void AddMinisegs (const node_t &node, DWORD splitseg, DWORD &fset, DWORD &rset); @@ -179,12 +230,55 @@ private: static int STACK_ARGS SortSegs (const void *a, const void *b); - // < 0 : in front of line - // == 0 : on line - // > 0 : behind line - - int PointOnSide (int x, int y, int x1, int y1, int dx, int dy); double InterceptVector (const node_t &splitter, const FPrivSeg &seg); void PrintSet (int l, DWORD set); }; + +// Points within this distance of a line will be considered on the line. +// Units are in fixed_ts. +const double SIDE_EPSILON = 6.5536; + +inline int FNodeBuilder::PointOnSide (int x, int y, int x1, int y1, int dx, int dy) +{ + // For most cases, a simple dot product is enough. + double d_dx = double(dx); + double d_dy = double(dy); + double d_x = double(x); + double d_y = double(y); + double d_x1 = double(x1); + double d_y1 = double(y1); + + double s_num = (d_y1-d_y)*d_dx - (d_x1-d_x)*d_dy; + + if (fabs(s_num) < 17179869184.f) // 4<<32 + { + // Either the point is very near the line, or the segment defining + // the line is very short: Do a more expensive test to determine + // just how far from the line the point is. + double l = d_dx*d_dx + d_dy*d_dy; // double l = sqrt(d_dx*d_dx+d_dy*d_dy); + double dist = s_num * s_num / l; // double dist = fabs(s_num)/l; + if (dist < SIDE_EPSILON*SIDE_EPSILON) // if (dist < SIDE_EPSILON) + { + return 0; + } + } + return s_num > 0.0 ? -1 : 1; +} + +inline int FNodeBuilder::ClassifyLine (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2) +{ +#ifdef __SSE2__ + // If compiling with SSE2 support everywhere, just use the SSE2 version. + return ClassifyLineSSE2 (node, seg, sidev1, sidev2); +#elif defined(_MSC_VER) && _MSC_VER < 1300 + // VC 6 does not support SSE2 optimizations. + return ClassifyLine2 (node, seg, sidev1, sidev2); +#else + // Select the routine based on our flag. + if (EnableSSE2) + return ClassifyLineSSE2 (node, seg, sidev1, sidev2); + else + return ClassifyLine2 (node, seg, sidev1, sidev2); +#endif +} diff --git a/src/nodebuild_classify_nosse2.cpp b/src/nodebuild_classify_nosse2.cpp new file mode 100644 index 000000000..2b7f6bec6 --- /dev/null +++ b/src/nodebuild_classify_nosse2.cpp @@ -0,0 +1,139 @@ +#include "doomtype.h" +#include "nodebuild.h" + +#define FAR_ENOUGH 17179869184.f // 4<<32 + +int FNodeBuilder::ClassifyLine2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2) +{ + const FPrivVert *v1 = &Vertices[seg->v1]; + const FPrivVert *v2 = &Vertices[seg->v2]; + + double d_x1 = double(node.x); + double d_y1 = double(node.y); + double d_dx = double(node.dx); + double d_dy = double(node.dy); + double d_xv1 = double(v1->x); + double d_xv2 = double(v2->x); + double d_yv1 = double(v1->y); + double d_yv2 = double(v2->y); + + double s_num1 = (d_y1 - d_yv1) * d_dx - (d_x1 - d_xv1) * d_dy; + double s_num2 = (d_y1 - d_yv2) * d_dx - (d_x1 - d_xv2) * d_dy; + + int nears = 0; + + if (s_num1 <= -FAR_ENOUGH) + { + if (s_num2 <= -FAR_ENOUGH) + { + sidev1 = sidev2 = 1; + return 1; + } + if (s_num2 >= FAR_ENOUGH) + { + sidev1 = 1; + sidev2 = -1; + return -1; + } + nears = 1; + } + else if (s_num1 >= FAR_ENOUGH) + { + if (s_num2 >= FAR_ENOUGH) + { + sidev1 = sidev2 = -1; + return 0; + } + if (s_num2 <= -FAR_ENOUGH) + { + sidev1 = -1; + sidev2 = 1; + return -1; + } + nears = 1; + } + else + { + nears = 2 | int(fabs(s_num2) < FAR_ENOUGH); + } + + if (nears) + { + double l = 1.f / (d_dx*d_dx + d_dy*d_dy); + if (nears & 2) + { + double dist = s_num1 * s_num1 * l; + if (dist < SIDE_EPSILON*SIDE_EPSILON) + { + sidev1 = 0; + } + else + { + sidev1 = s_num1 > 0.0 ? -1 : 1; + } + } + else + { + sidev1 = s_num1 > 0.0 ? -1 : 1; + } + if (nears & 1) + { + double dist = s_num2 * s_num2 * l; + if (dist < SIDE_EPSILON*SIDE_EPSILON) + { + sidev2 = 0; + } + else + { + sidev2 = s_num2 > 0.0 ? -1 : 1; + } + } + else + { + sidev2 = s_num2 > 0.0 ? -1 : 1; + } + } + else + { + sidev1 = s_num1 > 0.0 ? -1 : 1; + sidev2 = s_num2 > 0.0 ? -1 : 1; + } + + if ((sidev1 | sidev2) == 0) + { // seg is coplanar with the splitter, so use its orientation to determine + // which child it ends up in. If it faces the same direction as the splitter, + // it goes in front. Otherwise, it goes in back. + + if (node.dx != 0) + { + if ((node.dx > 0 && v2->x > v1->x) || (node.dx < 0 && v2->x < v1->x)) + { + return 0; + } + else + { + return 1; + } + } + else + { + if ((node.dy > 0 && v2->y > v1->y) || (node.dy < 0 && v2->y < v1->y)) + { + return 0; + } + else + { + return 1; + } + } + } + else if (sidev1 <= 0 && sidev2 <= 0) + { + return 0; + } + else if (sidev1 >= 0 && sidev2 >= 0) + { + return 1; + } + return -1; +} diff --git a/src/nodebuild_classify_sse2.cpp b/src/nodebuild_classify_sse2.cpp new file mode 100644 index 000000000..05e4684a8 --- /dev/null +++ b/src/nodebuild_classify_sse2.cpp @@ -0,0 +1,144 @@ +#include "doomtype.h" +#include "nodebuild.h" + +#define FAR_ENOUGH 17179869184.f // 4<<32 + +// This function is identical to the ClassifyLine2 version. So how does it use SSE2? +// Easy! By explicitly enabling SSE2 in the configuration properties for this one +// file, we can build it with SSE2 enabled without forcing SSE2 on the rest of the +// project. + +int FNodeBuilder::ClassifyLineSSE2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2) +{ + const FPrivVert *v1 = &Vertices[seg->v1]; + const FPrivVert *v2 = &Vertices[seg->v2]; + + double d_x1 = double(node.x); + double d_y1 = double(node.y); + double d_dx = double(node.dx); + double d_dy = double(node.dy); + double d_xv1 = double(v1->x); + double d_xv2 = double(v2->x); + double d_yv1 = double(v1->y); + double d_yv2 = double(v2->y); + + double s_num1 = (d_y1 - d_yv1) * d_dx - (d_x1 - d_xv1) * d_dy; + double s_num2 = (d_y1 - d_yv2) * d_dx - (d_x1 - d_xv2) * d_dy; + + int nears = 0; + + if (s_num1 <= -FAR_ENOUGH) + { + if (s_num2 <= -FAR_ENOUGH) + { + sidev1 = sidev2 = 1; + return 1; + } + if (s_num2 >= FAR_ENOUGH) + { + sidev1 = 1; + sidev2 = -1; + return -1; + } + nears = 1; + } + else if (s_num1 >= FAR_ENOUGH) + { + if (s_num2 >= FAR_ENOUGH) + { + sidev1 = sidev2 = -1; + return 0; + } + if (s_num2 <= -FAR_ENOUGH) + { + sidev1 = -1; + sidev2 = 1; + return -1; + } + nears = 1; + } + else + { + nears = 2 | int(fabs(s_num2) < FAR_ENOUGH); + } + + if (nears) + { + double l = 1.f / (d_dx*d_dx + d_dy*d_dy); + if (nears & 2) + { + double dist = s_num1 * s_num1 * l; + if (dist < SIDE_EPSILON*SIDE_EPSILON) + { + sidev1 = 0; + } + else + { + sidev1 = s_num1 > 0.0 ? -1 : 1; + } + } + else + { + sidev1 = s_num1 > 0.0 ? -1 : 1; + } + if (nears & 1) + { + double dist = s_num2 * s_num2 * l; + if (dist < SIDE_EPSILON*SIDE_EPSILON) + { + sidev2 = 0; + } + else + { + sidev2 = s_num2 > 0.0 ? -1 : 1; + } + } + else + { + sidev2 = s_num2 > 0.0 ? -1 : 1; + } + } + else + { + sidev1 = s_num1 > 0.0 ? -1 : 1; + sidev2 = s_num2 > 0.0 ? -1 : 1; + } + + if ((sidev1 | sidev2) == 0) + { // seg is coplanar with the splitter, so use its orientation to determine + // which child it ends up in. If it faces the same direction as the splitter, + // it goes in front. Otherwise, it goes in back. + + if (node.dx != 0) + { + if ((node.dx > 0 && v2->x > v1->x) || (node.dx < 0 && v2->x < v1->x)) + { + return 0; + } + else + { + return 1; + } + } + else + { + if ((node.dy > 0 && v2->y > v1->y) || (node.dy < 0 && v2->y < v1->y)) + { + return 0; + } + else + { + return 1; + } + } + } + else if (sidev1 <= 0 && sidev2 <= 0) + { + return 0; + } + else if (sidev1 >= 0 && sidev2 >= 0) + { + return 1; + } + return -1; +} diff --git a/src/nodebuild_events.cpp b/src/nodebuild_events.cpp index d2831f6e2..550910473 100644 --- a/src/nodebuild_events.cpp +++ b/src/nodebuild_events.cpp @@ -45,7 +45,6 @@ FEventTree::FEventTree () : Root (&Nil), Spare (NULL) { memset (&Nil, 0, sizeof(Nil)); - Nil.Color = FEvent::BLACK; } FEventTree::~FEventTree () @@ -79,56 +78,6 @@ void FEventTree::DeletionTraverser (FEvent *node) } } -void FEventTree::LeftRotate (FEvent *x) -{ - FEvent *y = x->Right; - x->Right = y->Left; - if (y->Left != &Nil) - { - y->Left->Parent = x; - } - y->Parent = x->Parent; - if (x->Parent == &Nil) - { - Root = y; - } - else if (x == x->Parent->Left) - { - x->Parent->Left = y; - } - else - { - x->Parent->Right = y; - } - y->Left = x; - x->Parent = y; -} - -void FEventTree::RightRotate (FEvent *x) -{ - FEvent *y = x->Left; - x->Left = y->Right; - if (y->Right != &Nil) - { - y->Right->Parent = x; - } - y->Parent = x->Parent; - if (x->Parent == &Nil) - { - Root = y; - } - else if (x == x->Parent->Left) - { - x->Parent->Left = y; - } - else - { - x->Parent->Right = y; - } - y->Right = x; - x->Parent = y; -} - FEvent *FEventTree::GetNewNode () { FEvent *node; @@ -177,174 +126,6 @@ void FEventTree::Insert (FEvent *z) } z->Left = &Nil; z->Right = &Nil; - - z->Color = FEvent::RED; - while (z != Root && z->Parent->Color == FEvent::RED) - { - if (z->Parent == z->Parent->Parent->Left) - { - y = z->Parent->Parent->Right; - if (y->Color == FEvent::RED) - { - z->Parent->Color = FEvent::BLACK; - y->Color = FEvent::BLACK; - z->Parent->Parent->Color = FEvent::RED; - z = z->Parent->Parent; - } - else - { - if (z == z->Parent->Right) - { - z = z->Parent; - LeftRotate (z); - } - z->Parent->Color = FEvent::BLACK; - z->Parent->Parent->Color = FEvent::RED; - RightRotate (z->Parent->Parent); - } - } - else - { - y = z->Parent->Parent->Left; - if (y->Color == FEvent::RED) - { - z->Parent->Color = FEvent::BLACK; - y->Color = FEvent::BLACK; - z->Parent->Parent->Color = FEvent::RED; - z = z->Parent->Parent; - } - else - { - if (z == z->Parent->Left) - { - z = z->Parent; - RightRotate (z); - } - z->Parent->Color = FEvent::BLACK; - z->Parent->Parent->Color = FEvent::RED; - LeftRotate (z->Parent->Parent); - } - } - } - Root->Color = FEvent::BLACK; -} - -void FEventTree::Delete (FEvent *z) -{ - FEvent *x, *y; - - if (z->Left == &Nil || z->Right == &Nil) - { - y = z; - } - else - { - y = Successor (z); - } - if (y->Left != &Nil) - { - x = y->Left; - } - else - { - x = y->Right; - } - x->Parent = y->Parent; - if (y->Parent == &Nil) - { - Root = x; - } - else if (y == y->Parent->Left) - { - y->Parent->Left = x; - } - else - { - y->Parent->Right = x; - } - if (y != z) - { - z->Distance = y->Distance; - z->Info = y->Info; - } - if (y->Color == FEvent::BLACK) - { - DeleteFixUp (x); - } - - y->Left = Spare; - Spare = y; -} - -void FEventTree::DeleteFixUp (FEvent *x) -{ - FEvent *w; - - while (x != Root && x->Color == FEvent::BLACK) - { - if (x == x->Parent->Left) - { - w = x->Parent->Right; - if (w->Color == FEvent::RED) - { - w->Color = FEvent::BLACK; - x->Parent->Color = FEvent::RED; - LeftRotate (x->Parent); - w = x->Parent->Right; - } - if (w->Left->Color == FEvent::BLACK && w->Right->Color == FEvent::BLACK) - { - w->Color = FEvent::RED; - x = x->Parent; - } - else - { - if (w->Right->Color == FEvent::BLACK) - { - w->Left->Color = FEvent::BLACK; - w->Color = FEvent::RED; - RightRotate (w); - w = x->Parent->Right; - } - w->Color = x->Parent->Color; - x->Parent->Color = FEvent::BLACK; - w->Right->Color = FEvent::BLACK; - LeftRotate (x->Parent); - x = Root; - } - } - else - { - w = x->Parent->Left; - if (w->Color == FEvent::RED) - { - w->Color = FEvent::BLACK; - x->Parent->Color = FEvent::RED; - RightRotate (x->Parent); - w = x->Parent->Left; - } - if (w->Right->Color == FEvent::BLACK && w->Left->Color == FEvent::BLACK) - { - w->Color = FEvent::RED; - x = x->Parent; - } - else - { - if (w->Left->Color == FEvent::BLACK) - { - w->Right->Color = FEvent::BLACK; - w->Color = FEvent::RED; - LeftRotate (w); - w = x->Parent->Left; - } - w->Color = x->Parent->Color; - x->Parent->Color = FEvent::BLACK; - w->Left->Color = FEvent::BLACK; - RightRotate (x->Parent); - x = Root; - } - } - } } FEvent *FEventTree::Successor (FEvent *event) const diff --git a/src/nodebuild_utility.cpp b/src/nodebuild_utility.cpp index e1c23d5a6..528f59349 100644 --- a/src/nodebuild_utility.cpp +++ b/src/nodebuild_utility.cpp @@ -45,6 +45,7 @@ #include #include "nodebuild.h" +#include "templates.h" #include "m_bbox.h" #include "r_main.h" #include "i_system.h" @@ -52,6 +53,10 @@ static const int PO_LINE_START = 1; static const int PO_LINE_EXPLICIT = 5; +// Vertices within this distance of each other vertically and horizontally +// will be considered as the same vertex. +const fixed_t VERTEX_EPSILON = 6; + #if 0 #define D(x) x #else @@ -68,40 +73,33 @@ angle_t FNodeBuilder::PointToAngle (fixed_t x, fixed_t y) { const double rad2bam = double(1<<30) / M_PI; double ang = atan2 (double(y), double(x)); - if (ang < 0.0) - { - ang = 2*M_PI+ang; - } return angle_t(ang * rad2bam) << 1; } void FNodeBuilder::FindUsedVertices (vertex_t *oldverts, int max) { - size_t *map = (size_t *)alloca (max*sizeof(size_t)); + int *map = (int *)alloca (max*sizeof(int)); int i; FPrivVert newvert; - memset (&map[0], -1, sizeof(size_t)*max); - - newvert.segs = DWORD_MAX; - newvert.segs2 = DWORD_MAX; + memset (&map[0], -1, sizeof(int)*max); for (i = 0; i < Level.NumLines; ++i) { ptrdiff_t v1 = Level.Lines[i].v1 - oldverts; ptrdiff_t v2 = Level.Lines[i].v2 - oldverts; - if (map[v1] == (size_t)-1) + if (map[v1] == -1) { newvert.x = oldverts[v1].x; newvert.y = oldverts[v1].y; - map[v1] = SelectVertexExact (newvert); + map[v1] = VertexMap->SelectVertexExact (newvert); } - if (map[v2] == (size_t)-1) + if (map[v2] == -1) { newvert.x = oldverts[v2].x; newvert.y = oldverts[v2].y; - map[v2] = SelectVertexExact (newvert); + map[v2] = VertexMap->SelectVertexExact (newvert); } Level.Lines[i].v1 = (vertex_t *)map[v1]; @@ -109,18 +107,6 @@ void FNodeBuilder::FindUsedVertices (vertex_t *oldverts, int max) } } -int FNodeBuilder::SelectVertexExact (FPrivVert &vertex) -{ - for (unsigned int i = 0; i < Vertices.Size(); ++i) - { - if (Vertices[i].x == vertex.x && Vertices[i].y == vertex.y) - { - return (int)i; - } - } - return (int)Vertices.Push (vertex); -} - // For every sidedef in the map, create a corresponding seg. void FNodeBuilder::MakeSegsFromSides () @@ -279,8 +265,7 @@ void FNodeBuilder::GroupSegPlanes () D(Printf ("%d planes from %d segs\n", planenum, Segs.Size())); - planenum = (planenum+7)/8; - PlaneChecked.Reserve (planenum); + PlaneChecked.Reserve ((planenum + 7) / 8); } // Find "loops" of segs surrounding polyobject's origin. Note that a polyobject's origin @@ -497,3 +482,120 @@ void FNodeBuilder::AddSegToBBox (fixed_t bbox[4], const FPrivSeg *seg) if (v2->y < bbox[BOXBOTTOM]) bbox[BOXBOTTOM] = v2->y; if (v2->y > bbox[BOXTOP]) bbox[BOXTOP] = v2->y; } + +void FNodeBuilder::FLevel::FindMapBounds () +{ + fixed_t minx, maxx, miny, maxy; + + minx = maxx = Vertices[0].x; + miny = maxy = Vertices[0].y; + + for (int i = 1; i < NumVertices; ++i) + { + if (Vertices[i].x < minx) minx = Vertices[i].x; + else if (Vertices[i].x > maxx) maxx = Vertices[i].x; + if (Vertices[i].y < miny) miny = Vertices[i].y; + else if (Vertices[i].y > maxy) maxy = Vertices[i].y; + } + + MinX = minx; + MinY = miny; + MaxX = maxx; + MaxY = maxy; +} + +FNodeBuilder::FVertexMap::FVertexMap (FNodeBuilder &builder, + fixed_t minx, fixed_t miny, fixed_t maxx, fixed_t maxy) + : MyBuilder(builder) +{ + MinX = minx; + MinY = miny; + BlocksWide = int(((double(maxx) - minx + 1) + (BLOCK_SIZE - 1)) / BLOCK_SIZE); + BlocksTall = int(((double(maxy) - miny + 1) + (BLOCK_SIZE - 1)) / BLOCK_SIZE); + MaxX = MinX + BlocksWide * BLOCK_SIZE - 1; + MaxY = MinY + BlocksTall * BLOCK_SIZE - 1; + VertexGrid = new TArray[BlocksWide * BlocksTall]; +} + +FNodeBuilder::FVertexMap::~FVertexMap () +{ + delete[] VertexGrid; +} + +int FNodeBuilder::FVertexMap::SelectVertexExact (FNodeBuilder::FPrivVert &vert) +{ + TArray &block = VertexGrid[GetBlock (vert.x, vert.y)]; + FPrivVert *vertices = &MyBuilder.Vertices[0]; + unsigned int i; + + for (i = 0; i < block.Size(); ++i) + { + if (vertices[block[i]].x == vert.x && vertices[block[i]].y == vert.y) + { + return block[i]; + } + } + + // Not present: add it! + return InsertVertex (vert); +} + +int FNodeBuilder::FVertexMap::SelectVertexClose (FNodeBuilder::FPrivVert &vert) +{ + TArray &block = VertexGrid[GetBlock (vert.x, vert.y)]; + FPrivVert *vertices = &MyBuilder.Vertices[0]; + unsigned int i; + + for (i = 0; i < block.Size(); ++i) + { + if (abs(vertices[block[i]].x - vert.x) < VERTEX_EPSILON && + abs(vertices[block[i]].y - vert.y) < VERTEX_EPSILON) + { + return block[i]; + } + } + + // Not present: add it! + return InsertVertex (vert); +} + +int FNodeBuilder::FVertexMap::InsertVertex (FNodeBuilder::FPrivVert &vert) +{ + int vertnum; + + vert.segs = DWORD_MAX; + vert.segs2 = DWORD_MAX; + vertnum = (int)MyBuilder.Vertices.Push (vert); + + // If a vertex is near a block boundary, then it will be inserted on + // both sides of the boundary so that SelectVertexClose can find + // it by checking in only one block. + fixed_t minx = MAX (MinX, vert.x - VERTEX_EPSILON); + fixed_t maxx = MIN (MaxX, vert.x + VERTEX_EPSILON); + fixed_t miny = MAX (MinY, vert.y - VERTEX_EPSILON); + fixed_t maxy = MIN (MaxY, vert.y + VERTEX_EPSILON); + + int blk[4] = + { + GetBlock (minx, miny), + GetBlock (maxx, miny), + GetBlock (minx, maxy), + GetBlock (maxx, maxy) + }; + unsigned int blkcount[4] = + { + VertexGrid[blk[0]].Size(), + VertexGrid[blk[1]].Size(), + VertexGrid[blk[2]].Size(), + VertexGrid[blk[3]].Size() + }; + for (int i = 0; i < 4; ++i) + { + if (VertexGrid[blk[i]].Size() == blkcount[i]) + { + VertexGrid[blk[i]].Push (vertnum); + } + } + + return vertnum; +} diff --git a/src/p_setup.cpp b/src/p_setup.cpp index 118dece77..2222e267f 100644 --- a/src/p_setup.cpp +++ b/src/p_setup.cpp @@ -3244,7 +3244,8 @@ void P_SetupLevel (char *lumpname, int position) sides, numsides, lines, numlines }; - FNodeBuilder builder (leveldata, polyspots, anchors, genglnodes); + leveldata.FindMapBounds (); + FNodeBuilder builder (leveldata, polyspots, anchors, genglnodes, CPU.bSSE2); UsingGLNodes = genglnodes; delete[] vertexes; builder.Extract (nodes, numnodes, diff --git a/zdoom.vcproj b/zdoom.vcproj index d23a32bc5..27b79d398 100644 --- a/zdoom.vcproj +++ b/zdoom.vcproj @@ -2319,6 +2319,30 @@ RelativePath=".\src\nodebuild.cpp" > + + + + + + + + + +