mirror of
https://github.com/ZDoom/qzdoom-gpl.git
synced 2025-01-22 14:51:07 +00:00
- removed the separate SSE2 version of the node builder's ClassifyLine function and all code associated with it.
Like everything else related to doing standard math with SSE2 vs. x87, there's nothing to be gained here with anything but first generation SSE2 systems which are irrelevant these days. Taking 'thespir2.wad' from https://forum.zdoom.org/viewtopic.php?f=1&t=10655 the SSE2 version is reproducably ~3% slower than the x87 version on my Core i7, which quite closely mirrors all my previous tests since 2007. Overall this just looks like an optimization not worth doing.
This commit is contained in:
parent
bfa7a2d737
commit
90b8dbb096
5 changed files with 2 additions and 311 deletions
|
@ -397,22 +397,6 @@ if (NOT ZDOOM_USE_SSE2)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if( SSE_MATTERS )
|
|
||||||
if( WIN32 )
|
|
||||||
set( BACKPATCH 1 CACHE BOOL "Enable backpatching." )
|
|
||||||
else()
|
|
||||||
CHECK_FUNCTION_EXISTS(mprotect HAVE_MPROTECT)
|
|
||||||
if( HAVE_MPROTECT )
|
|
||||||
set( BACKPATCH 1 CACHE BOOL "Enable backpatching." )
|
|
||||||
else()
|
|
||||||
set( BACKPATCH 0 )
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
set( SSE 1 CACHE BOOL "Build SSE and SSE2 versions of key code." )
|
|
||||||
else()
|
|
||||||
set( BACKPATCH 0 )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if( X64 )
|
if( X64 )
|
||||||
set( HAVE_MMX 1 )
|
set( HAVE_MMX 1 )
|
||||||
else( X64 )
|
else( X64 )
|
||||||
|
@ -577,10 +561,6 @@ endif()
|
||||||
|
|
||||||
# Flags
|
# Flags
|
||||||
|
|
||||||
if( BACKPATCH )
|
|
||||||
add_definitions( -DBACKPATCH )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Update gitinfo.h
|
# Update gitinfo.h
|
||||||
|
|
||||||
add_custom_target( revision_check ALL
|
add_custom_target( revision_check ALL
|
||||||
|
@ -726,18 +706,6 @@ add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/sc_man_scanner.h
|
||||||
|
|
||||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR} )
|
include_directories( ${CMAKE_CURRENT_BINARY_DIR} )
|
||||||
|
|
||||||
if( SSE_MATTERS )
|
|
||||||
if( SSE )
|
|
||||||
set( X86_SOURCES nodebuild_classify_sse2.cpp )
|
|
||||||
set_source_files_properties( nodebuild_classify_sse2.cpp PROPERTIES COMPILE_FLAGS "${SSE2_ENABLE}" )
|
|
||||||
else()
|
|
||||||
add_definitions( -DDISABLE_SSE )
|
|
||||||
endif()
|
|
||||||
else()
|
|
||||||
add_definitions( -DDISABLE_SSE )
|
|
||||||
set( X86_SOURCES )
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if( SNDFILE_FOUND )
|
if( SNDFILE_FOUND )
|
||||||
add_definitions( -DHAVE_SNDFILE )
|
add_definitions( -DHAVE_SNDFILE )
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -1062,95 +1062,3 @@ void FNodeBuilder::PrintSet (int l, DWORD set)
|
||||||
}
|
}
|
||||||
Printf (PRINT_LOG, "*\n");
|
Printf (PRINT_LOG, "*\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef BACKPATCH
|
|
||||||
#ifdef _WIN32
|
|
||||||
extern "C" {
|
|
||||||
__declspec(dllimport) int __stdcall VirtualProtect(void *, unsigned long, unsigned long, unsigned long *);
|
|
||||||
}
|
|
||||||
#define PAGE_EXECUTE_READWRITE 64
|
|
||||||
#else
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __GNUC__
|
|
||||||
extern "C" int ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2])
|
|
||||||
#else
|
|
||||||
static int *CallerOffset;
|
|
||||||
int ClassifyLineBackpatchC (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2])
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
// Select the routine based on SSE2 availability and patch the caller so that
|
|
||||||
// they call that routine directly next time instead of going through here.
|
|
||||||
int *calleroffset;
|
|
||||||
int diff;
|
|
||||||
int (*func)(node_t &, const FSimpleVert *, const FSimpleVert *, int[2]);
|
|
||||||
|
|
||||||
#ifdef __GNUC__
|
|
||||||
calleroffset = (int *)__builtin_return_address(0);
|
|
||||||
#else
|
|
||||||
calleroffset = CallerOffset;
|
|
||||||
#endif
|
|
||||||
// printf ("Patching for SSE %d @ %p %d\n", SSELevel, calleroffset, *calleroffset);
|
|
||||||
|
|
||||||
#ifndef DISABLE_SSE
|
|
||||||
if (CPU.bSSE2)
|
|
||||||
{
|
|
||||||
func = ClassifyLineSSE2;
|
|
||||||
diff = int((char *)ClassifyLineSSE2 - (char *)calleroffset);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
func = ClassifyLine2;
|
|
||||||
diff = int((char *)ClassifyLine2 - (char *)calleroffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
calleroffset--;
|
|
||||||
// Patch the caller.
|
|
||||||
#ifdef _WIN32
|
|
||||||
unsigned long oldprotect;
|
|
||||||
if (VirtualProtect (calleroffset, 4, PAGE_EXECUTE_READWRITE, &oldprotect))
|
|
||||||
#else
|
|
||||||
// must make this page-aligned for mprotect
|
|
||||||
long pagesize = sysconf(_SC_PAGESIZE);
|
|
||||||
char *callerpage = (char *)((intptr_t)calleroffset & ~(pagesize - 1));
|
|
||||||
size_t protectlen = (intptr_t)calleroffset + sizeof(void*) - (intptr_t)callerpage;
|
|
||||||
int ptect;
|
|
||||||
if (!(ptect = mprotect(callerpage, protectlen, PROT_READ|PROT_WRITE|PROT_EXEC)))
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
*calleroffset = diff;
|
|
||||||
#ifdef _WIN32
|
|
||||||
VirtualProtect (calleroffset, sizeof(void*), oldprotect, &oldprotect);
|
|
||||||
#else
|
|
||||||
mprotect(callerpage, protectlen, PROT_READ|PROT_EXEC);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// And return by calling the real function.
|
|
||||||
return func (node, v1, v2, sidev);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef __GNUC__
|
|
||||||
// The ClassifyLineBackpatch() function here is a stub that uses inline assembly and nakedness
|
|
||||||
// to retrieve the return address of the stack before sending control to the real
|
|
||||||
// ClassifyLineBackpatchC() function. Since BACKPATCH shouldn't be defined on 64-bit builds,
|
|
||||||
// we're okay that VC++ can't do inline assembly on that target.
|
|
||||||
|
|
||||||
extern "C" __declspec(noinline) __declspec(naked) int ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2])
|
|
||||||
{
|
|
||||||
// We store the return address in a global, so as not to need to mess with the parameter list.
|
|
||||||
__asm
|
|
||||||
{
|
|
||||||
mov eax, [esp]
|
|
||||||
mov CallerOffset, eax
|
|
||||||
jmp ClassifyLineBackpatchC
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -53,22 +53,6 @@ struct FSimpleVert
|
||||||
fixed_t x, y;
|
fixed_t x, y;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
int ClassifyLine2 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]);
|
|
||||||
#ifndef DISABLE_SSE
|
|
||||||
int ClassifyLineSSE1 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]);
|
|
||||||
int ClassifyLineSSE2 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]);
|
|
||||||
#ifdef BACKPATCH
|
|
||||||
#ifdef __GNUC__
|
|
||||||
int ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]) __attribute__((noinline));
|
|
||||||
#else
|
|
||||||
int __declspec(noinline) ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]);
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
class FNodeBuilder
|
class FNodeBuilder
|
||||||
{
|
{
|
||||||
struct FPrivSeg
|
struct FPrivSeg
|
||||||
|
@ -282,7 +266,7 @@ private:
|
||||||
// 1 = seg is in back
|
// 1 = seg is in back
|
||||||
// -1 = seg cuts the node
|
// -1 = seg cuts the node
|
||||||
|
|
||||||
inline int ClassifyLine (node_t &node, const FPrivVert *v1, const FPrivVert *v2, int sidev[2]);
|
int ClassifyLine (node_t &node, const FPrivVert *v1, const FPrivVert *v2, int sidev[2]);
|
||||||
|
|
||||||
void FixSplitSharers (const node_t &node);
|
void FixSplitSharers (const node_t &node);
|
||||||
double AddIntersection (const node_t &node, int vertex);
|
double AddIntersection (const node_t &node, int vertex);
|
||||||
|
@ -341,28 +325,3 @@ inline int FNodeBuilder::PointOnSide (int x, int y, int x1, int y1, int dx, int
|
||||||
}
|
}
|
||||||
return s_num > 0.0 ? -1 : 1;
|
return s_num > 0.0 ? -1 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int FNodeBuilder::ClassifyLine (node_t &node, const FPrivVert *v1, const FPrivVert *v2, int sidev[2])
|
|
||||||
{
|
|
||||||
#ifdef DISABLE_SSE
|
|
||||||
return ClassifyLine2 (node, v1, v2, sidev);
|
|
||||||
#else
|
|
||||||
#if defined(__SSE2__) || defined(_M_X64)
|
|
||||||
// If compiling with SSE2 support everywhere, just use the SSE2 version.
|
|
||||||
return ClassifyLineSSE2 (node, v1, v2, sidev);
|
|
||||||
#elif defined(_MSC_VER) && _MSC_VER < 1300
|
|
||||||
// VC 6 does not support SSE optimizations.
|
|
||||||
return ClassifyLine2 (node, v1, v2, sidev);
|
|
||||||
#else
|
|
||||||
// Select the routine based on our flag.
|
|
||||||
#ifdef BACKPATCH
|
|
||||||
return ClassifyLineBackpatch (node, v1, v2, sidev);
|
|
||||||
#else
|
|
||||||
if (CPU.bSSE2)
|
|
||||||
return ClassifyLineSSE2 (node, v1, v2, sidev);
|
|
||||||
else
|
|
||||||
return ClassifyLine2 (node, v1, v2, sidev);
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#define FAR_ENOUGH 17179869184.f // 4<<32
|
#define FAR_ENOUGH 17179869184.f // 4<<32
|
||||||
|
|
||||||
extern "C" int ClassifyLine2 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2])
|
int FNodeBuilder::ClassifyLine(node_t &node, const FPrivVert *v1, const FPrivVert *v2, int sidev[2])
|
||||||
{
|
{
|
||||||
double d_x1 = double(node.x);
|
double d_x1 = double(node.x);
|
||||||
double d_y1 = double(node.y);
|
double d_y1 = double(node.y);
|
||||||
|
|
|
@ -1,144 +0,0 @@
|
||||||
#ifndef DISABLE_SSE
|
|
||||||
|
|
||||||
#include "doomtype.h"
|
|
||||||
#include "nodebuild.h"
|
|
||||||
|
|
||||||
#define FAR_ENOUGH 17179869184.f // 4<<32
|
|
||||||
|
|
||||||
// You may notice that this function is identical to ClassifyLine2.
|
|
||||||
// The reason it is SSE2 is because this file is explicitly compiled
|
|
||||||
// with SSE2 math enabled, but the other files are not.
|
|
||||||
|
|
||||||
extern "C" int ClassifyLineSSE2 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2])
|
|
||||||
{
|
|
||||||
double d_x1 = double(node.x);
|
|
||||||
double d_y1 = double(node.y);
|
|
||||||
double d_dx = double(node.dx);
|
|
||||||
double d_dy = double(node.dy);
|
|
||||||
double d_xv1 = double(v1->x);
|
|
||||||
double d_xv2 = double(v2->x);
|
|
||||||
double d_yv1 = double(v1->y);
|
|
||||||
double d_yv2 = double(v2->y);
|
|
||||||
|
|
||||||
double s_num1 = (d_y1 - d_yv1) * d_dx - (d_x1 - d_xv1) * d_dy;
|
|
||||||
double s_num2 = (d_y1 - d_yv2) * d_dx - (d_x1 - d_xv2) * d_dy;
|
|
||||||
|
|
||||||
int nears = 0;
|
|
||||||
|
|
||||||
if (s_num1 <= -FAR_ENOUGH)
|
|
||||||
{
|
|
||||||
if (s_num2 <= -FAR_ENOUGH)
|
|
||||||
{
|
|
||||||
sidev[0] = sidev[1] = 1;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (s_num2 >= FAR_ENOUGH)
|
|
||||||
{
|
|
||||||
sidev[0] = 1;
|
|
||||||
sidev[1] = -1;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
nears = 1;
|
|
||||||
}
|
|
||||||
else if (s_num1 >= FAR_ENOUGH)
|
|
||||||
{
|
|
||||||
if (s_num2 >= FAR_ENOUGH)
|
|
||||||
{
|
|
||||||
sidev[0] = sidev[1] = -1;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (s_num2 <= -FAR_ENOUGH)
|
|
||||||
{
|
|
||||||
sidev[0] = -1;
|
|
||||||
sidev[1] = 1;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
nears = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
nears = 2 | int(fabs(s_num2) < FAR_ENOUGH);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nears)
|
|
||||||
{
|
|
||||||
double l = 1.f / (d_dx*d_dx + d_dy*d_dy);
|
|
||||||
if (nears & 2)
|
|
||||||
{
|
|
||||||
double dist = s_num1 * s_num1 * l;
|
|
||||||
if (dist < SIDE_EPSILON*SIDE_EPSILON)
|
|
||||||
{
|
|
||||||
sidev[0] = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sidev[0] = s_num1 > 0.0 ? -1 : 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sidev[0] = s_num1 > 0.0 ? -1 : 1;
|
|
||||||
}
|
|
||||||
if (nears & 1)
|
|
||||||
{
|
|
||||||
double dist = s_num2 * s_num2 * l;
|
|
||||||
if (dist < SIDE_EPSILON*SIDE_EPSILON)
|
|
||||||
{
|
|
||||||
sidev[1] = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sidev[1] = s_num2 > 0.0 ? -1 : 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sidev[1] = s_num2 > 0.0 ? -1 : 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sidev[0] = s_num1 > 0.0 ? -1 : 1;
|
|
||||||
sidev[1] = s_num2 > 0.0 ? -1 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((sidev[0] | sidev[1]) == 0)
|
|
||||||
{ // seg is coplanar with the splitter, so use its orientation to determine
|
|
||||||
// which child it ends up in. If it faces the same direction as the splitter,
|
|
||||||
// it goes in front. Otherwise, it goes in back.
|
|
||||||
|
|
||||||
if (node.dx != 0)
|
|
||||||
{
|
|
||||||
if ((node.dx > 0 && v2->x > v1->x) || (node.dx < 0 && v2->x < v1->x))
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if ((node.dy > 0 && v2->y > v1->y) || (node.dy < 0 && v2->y < v1->y))
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (sidev[0] <= 0 && sidev[1] <= 0)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
else if (sidev[0] >= 0 && sidev[1] >= 0)
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
Loading…
Reference in a new issue