mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-08 22:11:09 +00:00
7a601515df
or not SSE2 is available at runtime. Since most of the time is spent in ClassifyLine, using SSE2 in just this one function helps the most. - Nodebuilding is a little faster if we inline PointOnSide. - Changed FEventTree into a regular binary tree, since there just aren't enough nodes inserted into it to make a red-black tree worthwhile. - Added more checks at the start of ClassifyLine so that it has a better chance of avoiding the more complicated checking, and it seems to have paid off with a reasonably modest performance boost. - Added a "vertex map" for ZDBSP's vertex selection. (Think BLOCKMAP for vertices instead of lines.) On large maps, this can result in a very significant speed up. (In one particular map, ZDBSP had previously spent 40% of its time just scanning through all the vertices in the map. Now the time it spends finding vertices is immeasurable.) On small maps, this won't make much of a difference, because the number of vertices to search was so small to begin with. SVN r173 (trunk)
144 lines
2.8 KiB
C++
144 lines
2.8 KiB
C++
#include "doomtype.h"
|
|
#include "nodebuild.h"
|
|
|
|
#define FAR_ENOUGH 17179869184.f // 4<<32
|
|
|
|
// This function is identical to the ClassifyLine2 version. So how does it use SSE2?
|
|
// Easy! By explicitly enabling SSE2 in the configuration properties for this one
|
|
// file, we can build it with SSE2 enabled without forcing SSE2 on the rest of the
|
|
// project.
|
|
|
|
int FNodeBuilder::ClassifyLineSSE2 (node_t &node, const FPrivSeg *seg, int &sidev1, int &sidev2)
|
|
{
|
|
const FPrivVert *v1 = &Vertices[seg->v1];
|
|
const FPrivVert *v2 = &Vertices[seg->v2];
|
|
|
|
double d_x1 = double(node.x);
|
|
double d_y1 = double(node.y);
|
|
double d_dx = double(node.dx);
|
|
double d_dy = double(node.dy);
|
|
double d_xv1 = double(v1->x);
|
|
double d_xv2 = double(v2->x);
|
|
double d_yv1 = double(v1->y);
|
|
double d_yv2 = double(v2->y);
|
|
|
|
double s_num1 = (d_y1 - d_yv1) * d_dx - (d_x1 - d_xv1) * d_dy;
|
|
double s_num2 = (d_y1 - d_yv2) * d_dx - (d_x1 - d_xv2) * d_dy;
|
|
|
|
int nears = 0;
|
|
|
|
if (s_num1 <= -FAR_ENOUGH)
|
|
{
|
|
if (s_num2 <= -FAR_ENOUGH)
|
|
{
|
|
sidev1 = sidev2 = 1;
|
|
return 1;
|
|
}
|
|
if (s_num2 >= FAR_ENOUGH)
|
|
{
|
|
sidev1 = 1;
|
|
sidev2 = -1;
|
|
return -1;
|
|
}
|
|
nears = 1;
|
|
}
|
|
else if (s_num1 >= FAR_ENOUGH)
|
|
{
|
|
if (s_num2 >= FAR_ENOUGH)
|
|
{
|
|
sidev1 = sidev2 = -1;
|
|
return 0;
|
|
}
|
|
if (s_num2 <= -FAR_ENOUGH)
|
|
{
|
|
sidev1 = -1;
|
|
sidev2 = 1;
|
|
return -1;
|
|
}
|
|
nears = 1;
|
|
}
|
|
else
|
|
{
|
|
nears = 2 | int(fabs(s_num2) < FAR_ENOUGH);
|
|
}
|
|
|
|
if (nears)
|
|
{
|
|
double l = 1.f / (d_dx*d_dx + d_dy*d_dy);
|
|
if (nears & 2)
|
|
{
|
|
double dist = s_num1 * s_num1 * l;
|
|
if (dist < SIDE_EPSILON*SIDE_EPSILON)
|
|
{
|
|
sidev1 = 0;
|
|
}
|
|
else
|
|
{
|
|
sidev1 = s_num1 > 0.0 ? -1 : 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sidev1 = s_num1 > 0.0 ? -1 : 1;
|
|
}
|
|
if (nears & 1)
|
|
{
|
|
double dist = s_num2 * s_num2 * l;
|
|
if (dist < SIDE_EPSILON*SIDE_EPSILON)
|
|
{
|
|
sidev2 = 0;
|
|
}
|
|
else
|
|
{
|
|
sidev2 = s_num2 > 0.0 ? -1 : 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sidev2 = s_num2 > 0.0 ? -1 : 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sidev1 = s_num1 > 0.0 ? -1 : 1;
|
|
sidev2 = s_num2 > 0.0 ? -1 : 1;
|
|
}
|
|
|
|
if ((sidev1 | sidev2) == 0)
|
|
{ // seg is coplanar with the splitter, so use its orientation to determine
|
|
// which child it ends up in. If it faces the same direction as the splitter,
|
|
// it goes in front. Otherwise, it goes in back.
|
|
|
|
if (node.dx != 0)
|
|
{
|
|
if ((node.dx > 0 && v2->x > v1->x) || (node.dx < 0 && v2->x < v1->x))
|
|
{
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
return 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ((node.dy > 0 && v2->y > v1->y) || (node.dy < 0 && v2->y < v1->y))
|
|
{
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
else if (sidev1 <= 0 && sidev2 <= 0)
|
|
{
|
|
return 0;
|
|
}
|
|
else if (sidev1 >= 0 && sidev2 >= 0)
|
|
{
|
|
return 1;
|
|
}
|
|
return -1;
|
|
}
|