- did some profiling of P_InterceptVector. Here's the results I got:

* the unaltered floating point version is 10% faster than the 64 bit integer version.
  * using doubles instead of floats increases performance by another 25%.
  * another 15% can be gained by manually optimizing the code.
- P_InterceptVector now uses the optimized floating point version which is almost twice as fast as the 64bit integer version.

SVN r2395 (trunk)
This commit is contained in:
Christoph Oelckers 2010-06-30 15:20:18 +00:00
parent 9bf543af99
commit 853b8f8963

View file

@ -63,21 +63,20 @@ fixed_t P_AproxDistance (fixed_t dx, fixed_t dy)
// P_InterceptVector // P_InterceptVector
// //
// Returns the fractional intercept point along the first divline. // Returns the fractional intercept point along the first divline.
// This is only called by the addthings and addlines traversers.
// //
//========================================================================== //==========================================================================
fixed_t P_InterceptVector (const divline_t *v2, const divline_t *v1) fixed_t P_InterceptVector (const divline_t *v2, const divline_t *v1)
{ {
#if 1 // [RH] Use 64 bit ints, so long divlines don't overflow #if 0 // [RH] Use 64 bit ints, so long divlines don't overflow
SQWORD den = ((SQWORD)v1->dy*v2->dx - (SQWORD)v1->dx*v2->dy) >> FRACBITS; SQWORD den = ( ((SQWORD)v1->dy*v2->dx - (SQWORD)v1->dx*v2->dy) >> FRACBITS );
if (den == 0) if (den == 0)
return 0; // parallel return 0; // parallel
SQWORD num = ((SQWORD)(v1->x - v2->x)*v1->dy + (SQWORD)(v2->y - v1->y)*v1->dx); SQWORD num = ((SQWORD)(v1->x - v2->x)*v1->dy + (SQWORD)(v2->y - v1->y)*v1->dx);
return (fixed_t)(num / den); return (fixed_t)(num / den);
#elif 1 // This is the original Doom version #elif 0 // This is the original Doom version
fixed_t frac; fixed_t frac;
fixed_t num; fixed_t num;
@ -97,19 +96,24 @@ fixed_t P_InterceptVector (const divline_t *v2, const divline_t *v1)
return frac; return frac;
#else // UNUSED, float debug. #else // optimized version of the float debug version. A lot faster on modern systens.
float frac;
float num; double frac;
float den; double num;
float v1x = (float)v1->x/FRACUNIT; double den;
float v1y = (float)v1->y/FRACUNIT;
float v1dx = (float)v1->dx/FRACUNIT; // There's no need to divide by FRACUNIT here.
float v1dy = (float)v1->dy/FRACUNIT; // At the end both num and den will contain a factor
float v2x = (float)v2->x/FRACUNIT; // 1/(FRACUNIT*FRACUNIT) so they'll cancel each other out.
float v2y = (float)v2->y/FRACUNIT; double v1x = (double)v1->x;
float v2dx = (float)v2->dx/FRACUNIT; double v1y = (double)v1->y;
float v2dy = (float)v2->dy/FRACUNIT; double v1dx = (double)v1->dx;
double v1dy = (double)v1->dy;
double v2x = (double)v2->x;
double v2y = (double)v2->y;
double v2dx = (double)v2->dx;
double v2dy = (double)v2->dy;
den = v1dy*v2dx - v1dx*v2dy; den = v1dy*v2dx - v1dx*v2dy;
@ -119,10 +123,11 @@ fixed_t P_InterceptVector (const divline_t *v2, const divline_t *v1)
num = (v1x - v2x)*v1dy + (v2y - v1y)*v1dx; num = (v1x - v2x)*v1dy + (v2y - v1y)*v1dx;
frac = num / den; frac = num / den;
return frac*FRACUNIT; return FLOAT2FIXED(frac);
#endif #endif
} }
//========================================================================== //==========================================================================
// //
// P_LineOpening // P_LineOpening