Fixed critical bug in the generic C++ code of DotProduct_SIMD that caused massive errors in the physics system

2024-12-02 08:51:57 +00:00 · 2013-06-01 18:29:12 +02:00 · 2013-06-01 18:29:12 +02:00 · 3b67eabf79
commit 3b67eabf79
parent 2a4970c86c
1 changed files with 67 additions and 99 deletions
--- a/neo/idlib/math/Lcp.cpp
+++ b/neo/idlib/math/Lcp.cpp
@ -220,31 +220,49 @@ static float DotProduct_SIMD( const float* src0, const float* src1, const int co
 	
 #else
 	
+	// RB: the old loop caused completely broken rigid body physics and NaN errors
+#if 1
 	float s0 = 0.0f;
 	float s1 = 0.0f;
 	float s2 = 0.0f;
 	float s3 = 0.0f;
+	
 	int i = 0;
-	for( ; i < count - 3; i += 4 )
+	for( ; i + 4 <= count; i += 4 )
 	{
-		s0 += src0[i + 4] * src1[i + 4];
-		s1 += src0[i + 5] * src1[i + 5];
-		s2 += src0[i + 6] * src1[i + 6];
-		s3 += src0[i + 7] * src1[i + 7];
+		s0 += src0[i + 0] * src1[i + 0];
+		s1 += src0[i + 1] * src1[i + 1];
+		s2 += src0[i + 2] * src1[i + 2];
+		s3 += src0[i + 3] * src1[i + 3];
 	}
+	
 	switch( count - i )
 	{
 			NODEFAULT;
+	
+		case 4:
+			s3 += src0[i + 3] * src1[i + 3];
 		case 3:
-			s0 += src0[i + 2] * src1[i + 2];
+			s2 += src0[i + 2] * src1[i + 2];
 		case 2:
 			s1 += src0[i + 1] * src1[i + 1];
 		case 1:
-			s2 += src0[i + 0] * src1[i + 0];
+			s0 += src0[i + 0] * src1[i + 0];
 		case 0:
 			break;
 	}
 	return s0 + s1 + s2 + s3;
+#else
+	
+	float s = 0;
+	for( int i = 0; i < count; i++ )
+	{
+		s += src0[i] * src1[i];
+	}
+	
+	return s;
+#endif
+	// RB end
 	
 #endif
 }
@ -1519,114 +1537,64 @@ static void GetMaxStep_SIMD( const float* f, const float* a, const float* delta_
 	_mm_store_ss( & maxStep, vMaxStep );
 	limit = _mm_cvtsi128_si32( vLimit );
 	limitSide = _mm_cvtsi128_si32( vLimitSide );
+	
 #else
-	int i;
-	float s;
 	
 	// default to a full step for the current variable
-	if( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON )
 	{
-		maxStep = -a[d] / delta_a[d];
+		float negAccel = -a[d];
+		float deltaAccel = delta_a[d];
+		int m0 = ( fabs( deltaAccel ) > LCP_DELTA_ACCEL_EPSILON );
+		float step = negAccel / ( m0 ? deltaAccel : 1.0f );
+		maxStep = m0 ? step : 0.0f;
+		limit = d;
+		limitSide = 0;
 	}
-	else
-	{
-		maxStep = 0.0f;
-	}
-	limit = d;
-	limitSide = 0;
 	
 	// test the current variable
-	if( dir < 0.0f )
 	{
-		if( lo[d] != -idMath::INFINITY )
-		{
-			s = ( lo[d] - f[d] ) / dir;
-			if( s < maxStep )
-			{
-				maxStep = s;
-				limitSide = -1;
-			}
-		}
-	}
-	else
-	{
-		if( hi[d] != idMath::INFINITY )
-		{
-			s = ( hi[d] - f[d] ) / dir;
-			if( s < maxStep )
-			{
-				maxStep = s;
-				limitSide = 1;
-			}
-		}
+		float deltaForce = dir;
+		float forceLimit = ( deltaForce < 0.0f ) ? lo[d] : hi[d];
+		float step = ( forceLimit - f[d] ) / deltaForce;
+		int setSide = ( deltaForce < 0.0f ) ? -1 : 1;
+		int m0 = ( fabs( deltaForce ) > LCP_DELTA_FORCE_EPSILON );
+		int m1 = ( fabs( forceLimit ) != idMath::INFINITY );
+		int m2 = ( step < maxStep );
+		int m3 = ( m0 & m1 & m2 );
+		maxStep = m3 ? step : maxStep;
+		limit = m3 ? d : limit;
+		limitSide = m3 ? setSide : limitSide;
 	}
 	
 	// test the clamped bounded variables
-	for( i = numUnbounded; i < numClamped; i++ )
+	for( int i = numUnbounded; i < numClamped; i++ )
 	{
-		if( delta_f[i] < -LCP_DELTA_FORCE_EPSILON )
-		{
-			// if there is a low boundary
-			if( lo[i] != -idMath::INFINITY )
-			{
-				s = ( lo[i] - f[i] ) / delta_f[i];
-				if( s < maxStep )
-				{
-					maxStep = s;
-					limit = i;
-					limitSide = -1;
-				}
-			}
-		}
-		else if( delta_f[i] > LCP_DELTA_FORCE_EPSILON )
-		{
-			// if there is a high boundary
-			if( hi[i] != idMath::INFINITY )
-			{
-				s = ( hi[i] - f[i] ) / delta_f[i];
-				if( s < maxStep )
-				{
-					maxStep = s;
-					limit = i;
-					limitSide = 1;
-				}
-			}
-		}
+		float deltaForce = delta_f[i];
+		float forceLimit = ( deltaForce < 0.0f ) ? lo[i] : hi[i];
+		int m0 = ( fabs( deltaForce ) > LCP_DELTA_FORCE_EPSILON );
+		float step = ( forceLimit - f[i] ) / ( m0 ? deltaForce : 1.0f );
+		int setSide = ( deltaForce < 0.0f ) ? -1 : 1;
+		int m1 = ( fabs( forceLimit ) != idMath::INFINITY );
+		int m2 = ( step < maxStep );
+		int m3 = ( m0 & m1 & m2 );
+		maxStep = m3 ? step : maxStep;
+		limit = m3 ? i : limit;
+		limitSide = m3 ? setSide : limitSide;
 	}
 	
 	// test the not clamped bounded variables
-	for( i = numClamped; i < d; i++ )
+	for( int i = numClamped; i < d; i++ )
 	{
-		if( side[i] == -1 )
-		{
-			if( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON )
-			{
-				continue;
-			}
-		}
-		else if( side[i] == 1 )
-		{
-			if( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON )
-			{
-				continue;
-			}
-		}
-		else
-		{
-			continue;
-		}
-		// ignore variables for which the force is not allowed to take any substantial value
-		if( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON )
-		{
-			continue;
-		}
-		s = -a[i] / delta_a[i];
-		if( s < maxStep )
-		{
-			maxStep = s;
-			limit = i;
-			limitSide = 0;
-		}
+		float negAccel = -a[i];
+		float deltaAccel = delta_a[i];
+		int m0 = ( side[i] * deltaAccel > LCP_DELTA_ACCEL_EPSILON );
+		float step = negAccel / ( m0 ? deltaAccel : 1.0f );
+		int m1 = ( lo[i] < -LCP_BOUND_EPSILON || hi[i] > LCP_BOUND_EPSILON );
+		int m2 = ( step < maxStep );
+		int m3 = ( m0 & m1 & m2 );
+		maxStep = m3 ? step : maxStep;
+		limit = m3 ? i : limit;
+		limitSide = m3 ? 0 : limitSide;
 	}
 	
 #endif