mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2025-01-18 14:41:40 +00:00
- did some profiling of P_InterceptVector. Here's the results I got:
* the unaltered floating point version is 10% faster than the 64 bit integer version. * using doubles instead of floats increases performance by another 25%. * another 15% can be gained by manually optimizing the code. - P_InterceptVector now uses the optimized floating point version which is almost twice as fast as the 64bit integer version. SVN r2395 (trunk)
This commit is contained in:
parent
9bf543af99
commit
853b8f8963
1 changed files with 22 additions and 17 deletions
|
@ -63,21 +63,20 @@ fixed_t P_AproxDistance (fixed_t dx, fixed_t dy)
|
|||
// P_InterceptVector
|
||||
//
|
||||
// Returns the fractional intercept point along the first divline.
|
||||
// This is only called by the addthings and addlines traversers.
|
||||
//
|
||||
//==========================================================================
|
||||
|
||||
fixed_t P_InterceptVector (const divline_t *v2, const divline_t *v1)
|
||||
{
|
||||
#if 1 // [RH] Use 64 bit ints, so long divlines don't overflow
|
||||
#if 0 // [RH] Use 64 bit ints, so long divlines don't overflow
|
||||
|
||||
SQWORD den = ((SQWORD)v1->dy*v2->dx - (SQWORD)v1->dx*v2->dy) >> FRACBITS;
|
||||
SQWORD den = ( ((SQWORD)v1->dy*v2->dx - (SQWORD)v1->dx*v2->dy) >> FRACBITS );
|
||||
if (den == 0)
|
||||
return 0; // parallel
|
||||
SQWORD num = ((SQWORD)(v1->x - v2->x)*v1->dy + (SQWORD)(v2->y - v1->y)*v1->dx);
|
||||
return (fixed_t)(num / den);
|
||||
|
||||
#elif 1 // This is the original Doom version
|
||||
#elif 0 // This is the original Doom version
|
||||
|
||||
fixed_t frac;
|
||||
fixed_t num;
|
||||
|
@ -97,19 +96,24 @@ fixed_t P_InterceptVector (const divline_t *v2, const divline_t *v1)
|
|||
|
||||
return frac;
|
||||
|
||||
#else // UNUSED, float debug.
|
||||
#else // optimized version of the float debug version. A lot faster on modern systens.
|
||||
|
||||
float frac;
|
||||
float num;
|
||||
float den;
|
||||
float v1x = (float)v1->x/FRACUNIT;
|
||||
float v1y = (float)v1->y/FRACUNIT;
|
||||
float v1dx = (float)v1->dx/FRACUNIT;
|
||||
float v1dy = (float)v1->dy/FRACUNIT;
|
||||
float v2x = (float)v2->x/FRACUNIT;
|
||||
float v2y = (float)v2->y/FRACUNIT;
|
||||
float v2dx = (float)v2->dx/FRACUNIT;
|
||||
float v2dy = (float)v2->dy/FRACUNIT;
|
||||
|
||||
double frac;
|
||||
double num;
|
||||
double den;
|
||||
|
||||
// There's no need to divide by FRACUNIT here.
|
||||
// At the end both num and den will contain a factor
|
||||
// 1/(FRACUNIT*FRACUNIT) so they'll cancel each other out.
|
||||
double v1x = (double)v1->x;
|
||||
double v1y = (double)v1->y;
|
||||
double v1dx = (double)v1->dx;
|
||||
double v1dy = (double)v1->dy;
|
||||
double v2x = (double)v2->x;
|
||||
double v2y = (double)v2->y;
|
||||
double v2dx = (double)v2->dx;
|
||||
double v2dy = (double)v2->dy;
|
||||
|
||||
den = v1dy*v2dx - v1dx*v2dy;
|
||||
|
||||
|
@ -119,10 +123,11 @@ fixed_t P_InterceptVector (const divline_t *v2, const divline_t *v1)
|
|||
num = (v1x - v2x)*v1dy + (v2y - v1y)*v1dx;
|
||||
frac = num / den;
|
||||
|
||||
return frac*FRACUNIT;
|
||||
return FLOAT2FIXED(frac);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
//==========================================================================
|
||||
//
|
||||
// P_LineOpening
|
||||
|
|
Loading…
Reference in a new issue