Fixed critical bug in the generic C++ code of DotProduct_SIMD that caused massive errors in the physics system

This commit is contained in:
Robert Beckebans 2013-06-01 18:29:12 +02:00
parent 2a4970c86c
commit 3b67eabf79

View file

@ -220,31 +220,49 @@ static float DotProduct_SIMD( const float* src0, const float* src1, const int co
#else #else
// RB: the old loop caused completely broken rigid body physics and NaN errors
#if 1
float s0 = 0.0f; float s0 = 0.0f;
float s1 = 0.0f; float s1 = 0.0f;
float s2 = 0.0f; float s2 = 0.0f;
float s3 = 0.0f; float s3 = 0.0f;
int i = 0; int i = 0;
for( ; i < count - 3; i += 4 ) for( ; i + 4 <= count; i += 4 )
{ {
s0 += src0[i + 4] * src1[i + 4]; s0 += src0[i + 0] * src1[i + 0];
s1 += src0[i + 5] * src1[i + 5]; s1 += src0[i + 1] * src1[i + 1];
s2 += src0[i + 6] * src1[i + 6]; s2 += src0[i + 2] * src1[i + 2];
s3 += src0[i + 7] * src1[i + 7]; s3 += src0[i + 3] * src1[i + 3];
} }
switch( count - i ) switch( count - i )
{ {
NODEFAULT; NODEFAULT;
case 4:
s3 += src0[i + 3] * src1[i + 3];
case 3: case 3:
s0 += src0[i + 2] * src1[i + 2]; s2 += src0[i + 2] * src1[i + 2];
case 2: case 2:
s1 += src0[i + 1] * src1[i + 1]; s1 += src0[i + 1] * src1[i + 1];
case 1: case 1:
s2 += src0[i + 0] * src1[i + 0]; s0 += src0[i + 0] * src1[i + 0];
case 0: case 0:
break; break;
} }
return s0 + s1 + s2 + s3; return s0 + s1 + s2 + s3;
#else
float s = 0;
for( int i = 0; i < count; i++ )
{
s += src0[i] * src1[i];
}
return s;
#endif
// RB end
#endif #endif
} }
@ -1519,114 +1537,64 @@ static void GetMaxStep_SIMD( const float* f, const float* a, const float* delta_
_mm_store_ss( & maxStep, vMaxStep ); _mm_store_ss( & maxStep, vMaxStep );
limit = _mm_cvtsi128_si32( vLimit ); limit = _mm_cvtsi128_si32( vLimit );
limitSide = _mm_cvtsi128_si32( vLimitSide ); limitSide = _mm_cvtsi128_si32( vLimitSide );
#else #else
int i;
float s;
// default to a full step for the current variable // default to a full step for the current variable
if( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON )
{ {
maxStep = -a[d] / delta_a[d]; float negAccel = -a[d];
float deltaAccel = delta_a[d];
int m0 = ( fabs( deltaAccel ) > LCP_DELTA_ACCEL_EPSILON );
float step = negAccel / ( m0 ? deltaAccel : 1.0f );
maxStep = m0 ? step : 0.0f;
limit = d;
limitSide = 0;
} }
else
{
maxStep = 0.0f;
}
limit = d;
limitSide = 0;
// test the current variable // test the current variable
if( dir < 0.0f )
{ {
if( lo[d] != -idMath::INFINITY ) float deltaForce = dir;
{ float forceLimit = ( deltaForce < 0.0f ) ? lo[d] : hi[d];
s = ( lo[d] - f[d] ) / dir; float step = ( forceLimit - f[d] ) / deltaForce;
if( s < maxStep ) int setSide = ( deltaForce < 0.0f ) ? -1 : 1;
{ int m0 = ( fabs( deltaForce ) > LCP_DELTA_FORCE_EPSILON );
maxStep = s; int m1 = ( fabs( forceLimit ) != idMath::INFINITY );
limitSide = -1; int m2 = ( step < maxStep );
} int m3 = ( m0 & m1 & m2 );
} maxStep = m3 ? step : maxStep;
} limit = m3 ? d : limit;
else limitSide = m3 ? setSide : limitSide;
{
if( hi[d] != idMath::INFINITY )
{
s = ( hi[d] - f[d] ) / dir;
if( s < maxStep )
{
maxStep = s;
limitSide = 1;
}
}
} }
// test the clamped bounded variables // test the clamped bounded variables
for( i = numUnbounded; i < numClamped; i++ ) for( int i = numUnbounded; i < numClamped; i++ )
{ {
if( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) float deltaForce = delta_f[i];
{ float forceLimit = ( deltaForce < 0.0f ) ? lo[i] : hi[i];
// if there is a low boundary int m0 = ( fabs( deltaForce ) > LCP_DELTA_FORCE_EPSILON );
if( lo[i] != -idMath::INFINITY ) float step = ( forceLimit - f[i] ) / ( m0 ? deltaForce : 1.0f );
{ int setSide = ( deltaForce < 0.0f ) ? -1 : 1;
s = ( lo[i] - f[i] ) / delta_f[i]; int m1 = ( fabs( forceLimit ) != idMath::INFINITY );
if( s < maxStep ) int m2 = ( step < maxStep );
{ int m3 = ( m0 & m1 & m2 );
maxStep = s; maxStep = m3 ? step : maxStep;
limit = i; limit = m3 ? i : limit;
limitSide = -1; limitSide = m3 ? setSide : limitSide;
}
}
}
else if( delta_f[i] > LCP_DELTA_FORCE_EPSILON )
{
// if there is a high boundary
if( hi[i] != idMath::INFINITY )
{
s = ( hi[i] - f[i] ) / delta_f[i];
if( s < maxStep )
{
maxStep = s;
limit = i;
limitSide = 1;
}
}
}
} }
// test the not clamped bounded variables // test the not clamped bounded variables
for( i = numClamped; i < d; i++ ) for( int i = numClamped; i < d; i++ )
{ {
if( side[i] == -1 ) float negAccel = -a[i];
{ float deltaAccel = delta_a[i];
if( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) int m0 = ( side[i] * deltaAccel > LCP_DELTA_ACCEL_EPSILON );
{ float step = negAccel / ( m0 ? deltaAccel : 1.0f );
continue; int m1 = ( lo[i] < -LCP_BOUND_EPSILON || hi[i] > LCP_BOUND_EPSILON );
} int m2 = ( step < maxStep );
} int m3 = ( m0 & m1 & m2 );
else if( side[i] == 1 ) maxStep = m3 ? step : maxStep;
{ limit = m3 ? i : limit;
if( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) limitSide = m3 ? 0 : limitSide;
{
continue;
}
}
else
{
continue;
}
// ignore variables for which the force is not allowed to take any substantial value
if( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON )
{
continue;
}
s = -a[i] / delta_a[i];
if( s < maxStep )
{
maxStep = s;
limit = i;
limitSide = 0;
}
} }
#endif #endif