Fixed critical bug in the generic C++ code of DotProduct_SIMD that caused massive errors in the physics system

2024-12-02 08:51:57 +00:00 · 2013-06-01 18:29:12 +02:00 · 2013-06-01 18:29:12 +02:00 · 3b67eabf79
commit 3b67eabf79
parent 2a4970c86c
1 changed files with 67 additions and 99 deletions
--- a/neo/idlib/math/Lcp.cpp
+++ b/neo/idlib/math/Lcp.cpp
@ -220,31 +220,49 @@ static float DotProduct_SIMD( const float* src0, const float* src1, const int co
 #else
 	// RB: the old loop caused completely broken rigid body physics and NaN errors
 #if 1
 	float s0 = 0.0f;
 	float s1 = 0.0f;
 	float s2 = 0.0f;
 	float s3 = 0.0f;
 	int i = 0;
-	for( ; i < count - 3; i += 4 )
+	for( ; i + 4 <= count; i += 4 )
 	{
-		s0 += src0[i + 4] * src1[i + 4];
+		s0 += src0[i + 0] * src1[i + 0];
-		s1 += src0[i + 5] * src1[i + 5];
+		s1 += src0[i + 1] * src1[i + 1];
-		s2 += src0[i + 6] * src1[i + 6];
+		s2 += src0[i + 2] * src1[i + 2];
-		s3 += src0[i + 7] * src1[i + 7];
+		s3 += src0[i + 3] * src1[i + 3];
 	}
 	switch( count - i )
 	{
 			NODEFAULT;
 		case 4:
 			s3 += src0[i + 3] * src1[i + 3];
 		case 3:
-			s0 += src0[i + 2] * src1[i + 2];
+			s2 += src0[i + 2] * src1[i + 2];
 		case 2:
 			s1 += src0[i + 1] * src1[i + 1];
 		case 1:
-			s2 += src0[i + 0] * src1[i + 0];
+			s0 += src0[i + 0] * src1[i + 0];
 		case 0:
 			break;
 	}
 	return s0 + s1 + s2 + s3;
 #else
 	float s = 0;
 	for( int i = 0; i < count; i++ )
 	{
 		s += src0[i] * src1[i];
 	}
 	return s;
 #endif
 	// RB end
 #endif
 }
@ -1519,114 +1537,64 @@ static void GetMaxStep_SIMD( const float* f, const float* a, const float* delta_
 	_mm_store_ss( & maxStep, vMaxStep );
 	limit = _mm_cvtsi128_si32( vLimit );
 	limitSide = _mm_cvtsi128_si32( vLimitSide );
 #else
 	int i;
 	float s;
 	// default to a full step for the current variable
 	if( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON )
 	{
-		maxStep = -a[d] / delta_a[d];
+		float negAccel = -a[d];
 		float deltaAccel = delta_a[d];
 		int m0 = ( fabs( deltaAccel ) > LCP_DELTA_ACCEL_EPSILON );
 		float step = negAccel / ( m0 ? deltaAccel : 1.0f );
 		maxStep = m0 ? step : 0.0f;
 		limit = d;
 		limitSide = 0;
 	}
 	else
 	{
 		maxStep = 0.0f;
 	}
 	limit = d;
 	limitSide = 0;
 	// test the current variable
 	if( dir < 0.0f )
 	{
-		if( lo[d] != -idMath::INFINITY )
+		float deltaForce = dir;
-		{
+		float forceLimit = ( deltaForce < 0.0f ) ? lo[d] : hi[d];
-			s = ( lo[d] - f[d] ) / dir;
+		float step = ( forceLimit - f[d] ) / deltaForce;
-			if( s < maxStep )
+		int setSide = ( deltaForce < 0.0f ) ? -1 : 1;
-			{
+		int m0 = ( fabs( deltaForce ) > LCP_DELTA_FORCE_EPSILON );
-				maxStep = s;
+		int m1 = ( fabs( forceLimit ) != idMath::INFINITY );
-				limitSide = -1;
+		int m2 = ( step < maxStep );
-			}
+		int m3 = ( m0 & m1 & m2 );
-		}
+		maxStep = m3 ? step : maxStep;
-	}
+		limit = m3 ? d : limit;
-	else
+		limitSide = m3 ? setSide : limitSide;
 	{
 		if( hi[d] != idMath::INFINITY )
 		{
 			s = ( hi[d] - f[d] ) / dir;
 			if( s < maxStep )
 			{
 				maxStep = s;
 				limitSide = 1;
 			}
 		}
 	}
 	// test the clamped bounded variables
-	for( i = numUnbounded; i < numClamped; i++ )
+	for( int i = numUnbounded; i < numClamped; i++ )
 	{
-		if( delta_f[i] < -LCP_DELTA_FORCE_EPSILON )
+		float deltaForce = delta_f[i];
-		{
+		float forceLimit = ( deltaForce < 0.0f ) ? lo[i] : hi[i];
-			// if there is a low boundary
+		int m0 = ( fabs( deltaForce ) > LCP_DELTA_FORCE_EPSILON );
-			if( lo[i] != -idMath::INFINITY )
+		float step = ( forceLimit - f[i] ) / ( m0 ? deltaForce : 1.0f );
-			{
+		int setSide = ( deltaForce < 0.0f ) ? -1 : 1;
-				s = ( lo[i] - f[i] ) / delta_f[i];
+		int m1 = ( fabs( forceLimit ) != idMath::INFINITY );
-				if( s < maxStep )
+		int m2 = ( step < maxStep );
-				{
+		int m3 = ( m0 & m1 & m2 );
-					maxStep = s;
+		maxStep = m3 ? step : maxStep;
-					limit = i;
+		limit = m3 ? i : limit;
-					limitSide = -1;
+		limitSide = m3 ? setSide : limitSide;
 				}
 			}
 		}
 		else if( delta_f[i] > LCP_DELTA_FORCE_EPSILON )
 		{
 			// if there is a high boundary
 			if( hi[i] != idMath::INFINITY )
 			{
 				s = ( hi[i] - f[i] ) / delta_f[i];
 				if( s < maxStep )
 				{
 					maxStep = s;
 					limit = i;
 					limitSide = 1;
 				}
 			}
 		}
 	}
 	// test the not clamped bounded variables
-	for( i = numClamped; i < d; i++ )
+	for( int i = numClamped; i < d; i++ )
 	{
-		if( side[i] == -1 )
+		float negAccel = -a[i];
-		{
+		float deltaAccel = delta_a[i];
-			if( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON )
+		int m0 = ( side[i] * deltaAccel > LCP_DELTA_ACCEL_EPSILON );
-			{
+		float step = negAccel / ( m0 ? deltaAccel : 1.0f );
-				continue;
+		int m1 = ( lo[i] < -LCP_BOUND_EPSILON || hi[i] > LCP_BOUND_EPSILON );
-			}
+		int m2 = ( step < maxStep );
-		}
+		int m3 = ( m0 & m1 & m2 );
-		else if( side[i] == 1 )
+		maxStep = m3 ? step : maxStep;
-		{
+		limit = m3 ? i : limit;
-			if( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON )
+		limitSide = m3 ? 0 : limitSide;
 			{
 				continue;
 			}
 		}
 		else
 		{
 			continue;
 		}
 		// ignore variables for which the force is not allowed to take any substantial value
 		if( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON )
 		{
 			continue;
 		}
 		s = -a[i] / delta_a[i];
 		if( s < maxStep )
 		{
 			maxStep = s;
 			limit = i;
 			limitSide = 0;
 		}
 	}
 #endif