/*
===========================================================================

Doom 3 GPL Source Code
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.

This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").

Doom 3 Source Code is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Doom 3 Source Code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Doom 3 Source Code.  If not, see <http://www.gnu.org/licenses/>.

In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code.  If not, please request a copy in writing from id Software at the address below.

If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.

===========================================================================
*/

#include "../precompiled.h"
#pragma hdrstop

static idCVar lcp_showFailures( "lcp_showFailures", "0", CVAR_SYSTEM | CVAR_BOOL, "show LCP solver failures" );

const float LCP_BOUND_EPSILON			= 1e-5f;
const float LCP_ACCEL_EPSILON			= 1e-5f;
const float LCP_DELTA_ACCEL_EPSILON		= 1e-9f;
const float LCP_DELTA_FORCE_EPSILON		= 1e-9f;

#define IGNORE_UNSATISFIABLE_VARIABLES

//===============================================================
//                                                        M
//  idLCP_Square                                         MrE
//                                                        E
//===============================================================

class idLCP_Square : public idLCP {
public:
	virtual bool	Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );

private:
	idMatX			m;					// original matrix
	idVecX			b;					// right hand side
	idVecX			lo, hi;				// low and high bounds
	idVecX			f, a;				// force and acceleration
	idVecX			delta_f, delta_a;	// delta force and delta acceleration
	idMatX			clamped;			// LU factored sub matrix for clamped variables
	idVecX			diagonal;			// reciprocal of diagonal of U of the LU factored sub matrix for clamped variables
	int				numUnbounded;		// number of unbounded variables
	int				numClamped;			// number of clamped variables
	float **		rowPtrs;			// pointers to the rows of m
	int *			boxIndex;			// box index
	int *			side;				// tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
	int *			permuted;			// index to keep track of the permutation
	bool			padded;				// set to true if the rows of the initial matrix are 16 byte padded

private:
	bool			FactorClamped( void );
	void			SolveClamped( idVecX &x, const float *b );
	void			Swap( int i, int j );
	void			AddClamped( int r );
	void			RemoveClamped( int r );
	void			CalcForceDelta( int d, float dir );
	void			CalcAccelDelta( int d );
	void			ChangeForce( int d, float step );
	void			ChangeAccel( int d, float step );
	void			GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
};

/*
============
idLCP_Square::FactorClamped
============
*/
bool idLCP_Square::FactorClamped( void ) {
	int i, j, k;
	float s, d;

	for ( i = 0; i < numClamped; i++ ) {
		memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
	}

	for ( i = 0; i < numClamped; i++ ) {

		s = idMath::Fabs( clamped[i][i] );

		if ( s == 0.0f ) {
			return false;
		}

		diagonal[i] = d = 1.0f / clamped[i][i];
		for ( j = i + 1; j < numClamped; j++ ) {
			clamped[j][i] *= d;
		}

		for ( j = i + 1; j < numClamped; j++ ) {
			d = clamped[j][i];
			for ( k = i + 1; k < numClamped; k++ ) {
				clamped[j][k] -= d * clamped[i][k];
			}
		}
	}

	return true;
}

/*
============
idLCP_Square::SolveClamped
============
*/
void idLCP_Square::SolveClamped( idVecX &x, const float *b ) {
	int i, j;
	float sum;

	// solve L
	for ( i = 0; i < numClamped; i++ ) {
		sum = b[i];
		for ( j = 0; j < i; j++ ) {
			sum -= clamped[i][j] * x[j];
		}
		x[i] = sum;
	}

	// solve U
	for ( i = numClamped - 1; i >= 0; i-- ) {
		sum = x[i];
		for ( j = i + 1; j < numClamped; j++ ) {
			sum -= clamped[i][j] * x[j];
		}
		x[i] = sum * diagonal[i];
	}
}

/*
============
idLCP_Square::Swap
============
*/
void idLCP_Square::Swap( int i, int j ) {

	if ( i == j ) {
		return;
	}

	idSwap( rowPtrs[i], rowPtrs[j] );
	m.SwapColumns( i, j );
	b.SwapElements( i, j );
	lo.SwapElements( i, j );
	hi.SwapElements( i, j );
	a.SwapElements( i, j );
	f.SwapElements( i, j );
	if ( boxIndex ) {
		idSwap( boxIndex[i], boxIndex[j] );
	}
	idSwap( side[i], side[j] );
	idSwap( permuted[i], permuted[j] );
}

/*
============
idLCP_Square::AddClamped
============
*/
void idLCP_Square::AddClamped( int r ) {
	int i, j;
	float sum;

	assert( r >= numClamped );

	// add a row at the bottom and a column at the right of the factored
	// matrix for the clamped variables

	Swap( numClamped, r );

	// add row to L
	for ( i = 0; i < numClamped; i++ ) {
		sum = rowPtrs[numClamped][i];
		for ( j = 0; j < i; j++ ) {
			sum -= clamped[numClamped][j] * clamped[j][i];
		}
		clamped[numClamped][i] = sum * diagonal[i];
	}

	// add column to U
	for ( i = 0; i <= numClamped; i++ ) {
		sum = rowPtrs[i][numClamped];
		for ( j = 0; j < i; j++ ) {
			sum -= clamped[i][j] * clamped[j][numClamped];
		}
		clamped[i][numClamped] = sum;
	}

	diagonal[numClamped] = 1.0f / clamped[numClamped][numClamped];

	numClamped++;
}

/*
============
idLCP_Square::RemoveClamped
============
*/
void idLCP_Square::RemoveClamped( int r ) {
	int i, j;
	float *y0, *y1, *z0, *z1;
	double diag, beta0, beta1, p0, p1, q0, q1, d;

	assert( r < numClamped );

	numClamped--;

	// no need to swap and update the factored matrix when the last row and column are removed
	if ( r == numClamped ) {
		return;
	}

	y0 = (float *) _alloca16( numClamped * sizeof( float ) );
	z0 = (float *) _alloca16( numClamped * sizeof( float ) );
	y1 = (float *) _alloca16( numClamped * sizeof( float ) );
	z1 = (float *) _alloca16( numClamped * sizeof( float ) );

	// the row/column need to be subtracted from the factorization
	for ( i = 0; i < numClamped; i++ ) {
		y0[i] = -rowPtrs[i][r];
	}

	memset( y1, 0, numClamped * sizeof( float ) );
	y1[r] = 1.0f;

	memset( z0, 0, numClamped * sizeof( float ) );
	z0[r] = 1.0f;

	for ( i = 0; i < numClamped; i++ ) {
		z1[i] = -rowPtrs[r][i];
	}

	// swap the to be removed row/column with the last row/column
	Swap( r, numClamped );

	// the swapped last row/column need to be added to the factorization
	for ( i = 0; i < numClamped; i++ ) {
		y0[i] += rowPtrs[i][r];
	}

	for ( i = 0; i < numClamped; i++ ) {
		z1[i] += rowPtrs[r][i];
	}
	z1[r] = 0.0f;

	// update the beginning of the to be updated row and column
	for ( i = 0; i < r; i++ ) {
		p0 = y0[i];
		beta1 = z1[i] * diagonal[i];

		clamped[i][r] += p0;
		for ( j = i+1; j < numClamped; j++ ) {
			z1[j] -= beta1 * clamped[i][j];
		}
		for ( j = i+1; j < numClamped; j++ ) {
			y0[j] -= p0 * clamped[j][i];
		}
		clamped[r][i] += beta1;
	}

	// update the lower right corner starting at r,r
	for ( i = r; i < numClamped; i++ ) {
		diag = clamped[i][i];

		p0 = y0[i];
		p1 = z0[i];
		diag += p0 * p1;

		if ( diag == 0.0f ) {
			idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
			return;
		}

		beta0 = p1 / diag;

		q0 = y1[i];
		q1 = z1[i];
		diag += q0 * q1;

		if ( diag == 0.0f ) {
			idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
			return;
		}

		d = 1.0f / diag;
		beta1 = q1 * d;

		clamped[i][i] = diag;
		diagonal[i] = d;

		for ( j = i+1; j < numClamped; j++ ) {

			d = clamped[i][j];

			d += p0 * z0[j];
			z0[j] -= beta0 * d;

			d += q0 * z1[j];
			z1[j] -= beta1 * d;

			clamped[i][j] = d;
		}

		for ( j = i+1; j < numClamped; j++ ) {

			d = clamped[j][i];

			y0[j] -= p0 * d;
			d += beta0 * y0[j];

			y1[j] -= q0 * d;
			d += beta1 * y1[j];

			clamped[j][i] = d;
		}
	}
	return;
}

/*
============
idLCP_Square::CalcForceDelta

  modifies this->delta_f
============
*/
ID_INLINE void idLCP_Square::CalcForceDelta( int d, float dir ) {
	int i;
	float *ptr;

	delta_f[d] = dir;

	if ( numClamped == 0 ) {
		return;
	}

	// get column d of matrix
	ptr = (float *) _alloca16( numClamped * sizeof( float ) );
	for ( i = 0; i < numClamped; i++ ) {
		ptr[i] = rowPtrs[i][d];
	}

	// solve force delta
	SolveClamped( delta_f, ptr );

	// flip force delta based on direction
	if ( dir > 0.0f ) {
		ptr = delta_f.ToFloatPtr();
		for ( i = 0; i < numClamped; i++ ) {
			ptr[i] = - ptr[i];
		}
	}
}

/*
============
idLCP_Square::CalcAccelDelta

  modifies this->delta_a and uses this->delta_f
============
*/
ID_INLINE void idLCP_Square::CalcAccelDelta( int d ) {
	int j;
	float dot;

	// only the not clamped variables, including the current variable, can have a change in acceleration
	for ( j = numClamped; j <= d; j++ ) {
		// only the clamped variables and the current variable have a force delta unequal zero
		SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
		delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
	}
}

/*
============
idLCP_Square::ChangeForce

  modifies this->f and uses this->delta_f
============
*/
ID_INLINE void idLCP_Square::ChangeForce( int d, float step ) {
	// only the clamped variables and current variable have a force delta unequal zero
	SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
	f[d] += step * delta_f[d];
}

/*
============
idLCP_Square::ChangeAccel

  modifies this->a and uses this->delta_a
============
*/
ID_INLINE void idLCP_Square::ChangeAccel( int d, float step ) {
	// only the not clamped variables, including the current variable, can have an acceleration unequal zero
	SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
}

/*
============
idLCP_Square::GetMaxStep
============
*/
void idLCP_Square::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
	int i;
	float s;

	// default to a full step for the current variable
	if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
		maxStep = -a[d] / delta_a[d];
	} else {
		maxStep = 0.0f;
	}
	limit = d;
	limitSide = 0;

	// test the current variable
	if ( dir < 0.0f ) {
		if ( lo[d] != -idMath::INFINITY ) {
			s = ( lo[d] - f[d] ) / dir;
			if ( s < maxStep ) {
				maxStep = s;
				limitSide = -1;
			}
		}
	} else {
		if ( hi[d] != idMath::INFINITY ) {
			s = ( hi[d] - f[d] ) / dir;
			if ( s < maxStep ) {
				maxStep = s;
				limitSide = 1;
			}
		}
	}

	// test the clamped bounded variables
	for ( i = numUnbounded; i < numClamped; i++ ) {
		if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
			// if there is a low boundary
			if ( lo[i] != -idMath::INFINITY ) {
				s = ( lo[i] - f[i] ) / delta_f[i];
				if ( s < maxStep ) {
					maxStep = s;
					limit = i;
					limitSide = -1;
				}
			}
		} else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
			// if there is a high boundary
			if ( hi[i] != idMath::INFINITY ) {
				s = ( hi[i] - f[i] ) / delta_f[i];
				if ( s < maxStep ) {
					maxStep = s;
					limit = i;
					limitSide = 1;
				}
			}
		}
	}

	// test the not clamped bounded variables
	for ( i = numClamped; i < d; i++ ) {
		if ( side[i] == -1 ) {
			if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
				continue;
			}
		} else if ( side[i] == 1 ) {
			if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
				continue;
			}
		} else {
			continue;
		}
		// ignore variables for which the force is not allowed to take any substantial value
		if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
			continue;
		}
		s = -a[i] / delta_a[i];
		if ( s < maxStep ) {
			maxStep = s;
			limit = i;
			limitSide = 0;
		}
	}
}

/*
============
idLCP_Square::Solve
============
*/
bool idLCP_Square::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
	int i, j, n, limit, limitSide, boxStartIndex;
	float dir, maxStep, dot, s;
	char *failed;

	// true when the matrix rows are 16 byte padded
	padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();

	assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
	assert( o_x.GetSize() == o_m.GetNumRows() );
	assert( o_b.GetSize() == o_m.GetNumRows() );
	assert( o_lo.GetSize() == o_m.GetNumRows() );
	assert( o_hi.GetSize() == o_m.GetNumRows() );

	// allocate memory for permuted input
	f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
	a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
	b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
	lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
	hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
	if ( o_boxIndex ) {
		boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
		memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
	} else {
		boxIndex = NULL;
	}

	// we override the const on o_m here but on exit the matrix is unchanged
	m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
	f.Zero();
	a.Zero();
	b = o_b;
	lo = o_lo;
	hi = o_hi;

	// pointers to the rows of m
	rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		rowPtrs[i] = m[i];
	}

	// tells if a variable is at the low boundary, high boundary or inbetween
	side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );

	// index to keep track of the permutation
	permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		permuted[i] = i;
	}

	// permute input so all unbounded variables come first
	numUnbounded = 0;
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
			if ( numUnbounded != i ) {
				Swap( numUnbounded, i );
			}
			numUnbounded++;
		}
	}

	// permute input so all variables using the boxIndex come last
	boxStartIndex = m.GetNumRows();
	if ( boxIndex ) {
		for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
			if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
				boxStartIndex--;
				if ( boxStartIndex != i ) {
					Swap( boxStartIndex, i );
				}
			}
		}
	}

	// sub matrix for factorization
	clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
	diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );

	// all unbounded variables are clamped
	numClamped = numUnbounded;

	// if there are unbounded variables
	if ( numUnbounded ) {

		// factor and solve for unbounded variables
		if ( !FactorClamped() ) {
			idLib::common->Printf( "idLCP_Square::Solve: unbounded factorization failed\n" );
			return false;
		}
		SolveClamped( f, b.ToFloatPtr() );

		// if there are no bounded variables we are done
		if ( numUnbounded == m.GetNumRows() ) {
			o_x = f;	// the vector is not permuted
			return true;
		}
	}

#ifdef IGNORE_UNSATISFIABLE_VARIABLES
	int numIgnored = 0;
#endif

	// allocate for delta force and delta acceleration
	delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
	delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );

	// solve for bounded variables
	failed = NULL;
	for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {

		// once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
		if ( i == boxStartIndex ) {
			for ( j = 0; j < boxStartIndex; j++ ) {
				o_x[permuted[j]] = f[j];
			}
			for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
				s = o_x[boxIndex[j]];
				if ( lo[j] != -idMath::INFINITY ) {
					lo[j] = - idMath::Fabs( lo[j] * s );
				}
				if ( hi[j] != idMath::INFINITY ) {
					hi[j] = idMath::Fabs( hi[j] * s );
				}
			}
		}

		// calculate acceleration for current variable
		SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
		a[i] = dot - b[i];

		// if already at the low boundary
		if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
			side[i] = -1;
			continue;
		}

		// if already at the high boundary
		if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
			side[i] = 1;
			continue;
		}

		// if inside the clamped region
		if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
			side[i] = 0;
			AddClamped( i );
			continue;
		}

		// drive the current variable into a valid region
		for ( n = 0; n < maxIterations; n++ ) {

			// direction to move
			if ( a[i] <= 0.0f ) {
				dir = 1.0f;
			} else {
				dir = -1.0f;
			}

			// calculate force delta
			CalcForceDelta( i, dir );

			// calculate acceleration delta: delta_a = m * delta_f;
			CalcAccelDelta( i );

			// maximum step we can take
			GetMaxStep( i, dir, maxStep, limit, limitSide );

			if ( maxStep <= 0.0f ) {
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
				// ignore the current variable completely
				lo[i] = hi[i] = 0.0f;
				f[i] = 0.0f;
				side[i] = -1;
				numIgnored++;
#else
				failed = va( "invalid step size %.4f", maxStep );
#endif
				break;
			}

			// change force
			ChangeForce( i, maxStep );

			// change acceleration
			ChangeAccel( i, maxStep );

			// clamp/unclamp the variable that limited this step
			side[limit] = limitSide;
			switch( limitSide ) {
				case 0: {
					a[limit] = 0.0f;
					AddClamped( limit );
					break;
				}
				case -1: {
					f[limit] = lo[limit];
					if ( limit != i ) {
						RemoveClamped( limit );
					}
					break;
				}
				case 1: {
					f[limit] = hi[limit];
					if ( limit != i ) {
						RemoveClamped( limit );
					}
					break;
				}
			}

			// if the current variable limited the step we can continue with the next variable
			if ( limit == i ) {
				break;
			}
		}

		if ( n >= maxIterations ) {
			failed = va( "max iterations %d", maxIterations );
			break;
		}

		if ( failed ) {
			break;
		}
	}

#ifdef IGNORE_UNSATISFIABLE_VARIABLES
	if ( numIgnored ) {
		if ( lcp_showFailures.GetBool() ) {
			idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
		}
	}
#endif

	// if failed clear remaining forces
	if ( failed ) {
		if ( lcp_showFailures.GetBool() ) {
			idLib::common->Printf( "idLCP_Square::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
		}
		for ( j = i; j < m.GetNumRows(); j++ ) {
			f[j] = 0.0f;
		}
	}

#if defined(_DEBUG) && 0
	if ( !failed ) {
		// test whether or not the solution satisfies the complementarity conditions
		for ( i = 0; i < m.GetNumRows(); i++ ) {
			a[i] = -b[i];
			for ( j = 0; j < m.GetNumRows(); j++ ) {
				a[i] += rowPtrs[i][j] * f[j];
			}

			if ( f[i] == lo[i] ) {
				if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
					int bah1 = 1;
				}
			} else if ( f[i] == hi[i] ) {
				if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
					int bah2 = 1;
				}
			} else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
				int bah3 = 1;
			}
		}
	}
#endif

	// unpermute result
	for ( i = 0; i < f.GetSize(); i++ ) {
		o_x[permuted[i]] = f[i];
	}

	// unpermute original matrix
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		for ( j = 0; j < m.GetNumRows(); j++ ) {
			if ( permuted[j] == i ) {
				break;
			}
		}
		if ( i != j ) {
			m.SwapColumns( i, j );
			idSwap( permuted[i], permuted[j] );
		}
	}

	return true;
}


//===============================================================
//                                                        M
//  idLCP_Symmetric                                      MrE
//                                                        E
//===============================================================

class idLCP_Symmetric : public idLCP {
public:
	virtual bool	Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );

private:
	idMatX			m;					// original matrix
	idVecX			b;					// right hand side
	idVecX			lo, hi;				// low and high bounds
	idVecX			f, a;				// force and acceleration
	idVecX			delta_f, delta_a;	// delta force and delta acceleration
	idMatX			clamped;			// LDLt factored sub matrix for clamped variables
	idVecX			diagonal;			// reciprocal of diagonal of LDLt factored sub matrix for clamped variables
	idVecX			solveCache1;		// intermediate result cached in SolveClamped
	idVecX			solveCache2;		// "
	int				numUnbounded;		// number of unbounded variables
	int				numClamped;			// number of clamped variables
	int				clampedChangeStart;	// lowest row/column changed in the clamped matrix during an iteration
	float **		rowPtrs;			// pointers to the rows of m
	int *			boxIndex;			// box index
	int *			side;				// tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
	int *			permuted;			// index to keep track of the permutation
	bool			padded;				// set to true if the rows of the initial matrix are 16 byte padded

private:
	bool			FactorClamped( void );
	void			SolveClamped( idVecX &x, const float *b );
	void			Swap( int i, int j );
	void			AddClamped( int r, bool useSolveCache );
	void			RemoveClamped( int r );
	void			CalcForceDelta( int d, float dir );
	void			CalcAccelDelta( int d );
	void			ChangeForce( int d, float step );
	void			ChangeAccel( int d, float step );
	void			GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
};

/*
============
idLCP_Symmetric::FactorClamped
============
*/
bool idLCP_Symmetric::FactorClamped( void ) {

	clampedChangeStart = 0;

	for ( int i = 0; i < numClamped; i++ ) {
		memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
	}
	return SIMDProcessor->MatX_LDLTFactor( clamped, diagonal, numClamped );
}

/*
============
idLCP_Symmetric::SolveClamped
============
*/
void idLCP_Symmetric::SolveClamped( idVecX &x, const float *b ) {

	// solve L
	SIMDProcessor->MatX_LowerTriangularSolve( clamped, solveCache1.ToFloatPtr(), b, numClamped, clampedChangeStart );

	// solve D
	SIMDProcessor->Mul( solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), diagonal.ToFloatPtr(), numClamped );

	// solve Lt
	SIMDProcessor->MatX_LowerTriangularSolveTranspose( clamped, x.ToFloatPtr(), solveCache2.ToFloatPtr(), numClamped );

	clampedChangeStart = numClamped;
}

/*
============
idLCP_Symmetric::Swap
============
*/
void idLCP_Symmetric::Swap( int i, int j ) {

	if ( i == j ) {
		return;
	}

	idSwap( rowPtrs[i], rowPtrs[j] );
	m.SwapColumns( i, j );
	b.SwapElements( i, j );
	lo.SwapElements( i, j );
	hi.SwapElements( i, j );
	a.SwapElements( i, j );
	f.SwapElements( i, j );
	if ( boxIndex ) {
		idSwap( boxIndex[i], boxIndex[j] );
	}
	idSwap( side[i], side[j] );
	idSwap( permuted[i], permuted[j] );
}

/*
============
idLCP_Symmetric::AddClamped
============
*/
void idLCP_Symmetric::AddClamped( int r, bool useSolveCache ) {
	float d, dot;

	assert( r >= numClamped );

	if ( numClamped < clampedChangeStart ) {
		clampedChangeStart = numClamped;
	}

	// add a row at the bottom and a column at the right of the factored
	// matrix for the clamped variables

	Swap( numClamped, r );

	// solve for v in L * v = rowPtr[numClamped]
	if ( useSolveCache ) {

		// the lower triangular solve was cached in SolveClamped called by CalcForceDelta
		memcpy( clamped[numClamped], solveCache2.ToFloatPtr(), numClamped * sizeof( float ) );
		// calculate row dot product
		SIMDProcessor->Dot( dot, solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), numClamped );

	} else {

		float *v = (float *) _alloca16( numClamped * sizeof( float ) );

		SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[numClamped], numClamped );
		// add bottom row to L
		SIMDProcessor->Mul( clamped[numClamped], v, diagonal.ToFloatPtr(), numClamped );
		// calculate row dot product
		SIMDProcessor->Dot( dot, clamped[numClamped], v, numClamped );
	}

	// update diagonal[numClamped]
	d = rowPtrs[numClamped][numClamped] - dot;

	if ( d == 0.0f ) {
		idLib::common->Printf( "idLCP_Symmetric::AddClamped: updating factorization failed\n" );
		numClamped++;
		return;
	}

	clamped[numClamped][numClamped] = d;
	diagonal[numClamped] = 1.0f / d;

	numClamped++;
}

/*
============
idLCP_Symmetric::RemoveClamped
============
*/
void idLCP_Symmetric::RemoveClamped( int r ) {
	int i, j, n;
	float *addSub, *original, *v, *ptr, *v1, *v2, dot;
	double sum, diag, newDiag, invNewDiag, p1, p2, alpha1, alpha2, beta1, beta2;

	assert( r < numClamped );

	if ( r < clampedChangeStart ) {
		clampedChangeStart = r;
	}

	numClamped--;

	// no need to swap and update the factored matrix when the last row and column are removed
	if ( r == numClamped ) {
		return;
	}

	// swap the to be removed row/column with the last row/column
	Swap( r, numClamped );

	// update the factored matrix
	addSub = (float *) _alloca16( numClamped * sizeof( float ) );

	if ( r == 0 ) {

		if ( numClamped == 1 ) {
			diag = rowPtrs[0][0];
			if ( diag == 0.0f ) {
				idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
				return;
			}
			clamped[0][0] = diag;
			diagonal[0] = 1.0f / diag;
			return;
		}

		// calculate the row/column to be added to the lower right sub matrix starting at (r, r)
		original = rowPtrs[numClamped];
		ptr = rowPtrs[r];
		addSub[0] = ptr[0] - original[numClamped];
		for ( i = 1; i < numClamped; i++ ) {
			addSub[i] = ptr[i] - original[i];
		}

	} else {

		v = (float *) _alloca16( numClamped * sizeof( float ) );

		// solve for v in L * v = rowPtr[r]
		SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[r], r );

		// update removed row
		SIMDProcessor->Mul( clamped[r], v, diagonal.ToFloatPtr(), r );

		// if the last row/column of the matrix is updated
		if ( r == numClamped - 1 ) {
			// only calculate new diagonal
			SIMDProcessor->Dot( dot, clamped[r], v, r );
			diag = rowPtrs[r][r] - dot;
			if ( diag == 0.0f ) {
				idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
				return;
			}
			clamped[r][r] = diag;
			diagonal[r] = 1.0f / diag;
			return;
		}

		// calculate the row/column to be added to the lower right sub matrix starting at (r, r)
		for ( i = 0; i < r; i++ ) {
			v[i] = clamped[r][i] * clamped[i][i];
		}
		for ( i = r; i < numClamped; i++ ) {
			if ( i == r ) {
				sum = clamped[r][r];
			} else {
				sum = clamped[r][r] * clamped[i][r];
			}
			ptr = clamped[i];
			for ( j = 0; j < r; j++ ) {
				sum += ptr[j] * v[j];
			}
			addSub[i] = rowPtrs[r][i] - sum;
		}
	}

	// add row/column to the lower right sub matrix starting at (r, r)

	v1 = (float *) _alloca16( numClamped * sizeof( float ) );
	v2 = (float *) _alloca16( numClamped * sizeof( float ) );

	diag = idMath::SQRT_1OVER2;
	v1[r] = ( 0.5f * addSub[r] + 1.0f ) * diag;
	v2[r] = ( 0.5f * addSub[r] - 1.0f ) * diag;
	for ( i = r+1; i < numClamped; i++ ) {
		v1[i] = v2[i] = addSub[i] * diag;
	}

	alpha1 = 1.0f;
	alpha2 = -1.0f;

	// simultaneous update/downdate of the sub matrix starting at (r, r)
	n = clamped.GetNumColumns();
	for ( i = r; i < numClamped; i++ ) {

		diag = clamped[i][i];
		p1 = v1[i];
		newDiag = diag + alpha1 * p1 * p1;

		if ( newDiag == 0.0f ) {
			idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
			return;
		}

		alpha1 /= newDiag;
		beta1 = p1 * alpha1;
		alpha1 *= diag;

		diag = newDiag;
		p2 = v2[i];
		newDiag = diag + alpha2 * p2 * p2;

		if ( newDiag == 0.0f ) {
			idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
			return;
		}

		clamped[i][i] = newDiag;
		diagonal[i] = invNewDiag = 1.0f / newDiag;

		alpha2 *= invNewDiag;
		beta2 = p2 * alpha2;
		alpha2 *= diag;

		// update column below diagonal (i,i)
		ptr = clamped.ToFloatPtr() + i;

		for ( j = i+1; j < numClamped - 1; j += 2 ) {

			float sum0 = ptr[(j+0)*n];
			float sum1 = ptr[(j+1)*n];

			v1[j+0] -= p1 * sum0;
			v1[j+1] -= p1 * sum1;

			sum0 += beta1 * v1[j+0];
			sum1 += beta1 * v1[j+1];

			v2[j+0] -= p2 * sum0;
			v2[j+1] -= p2 * sum1;

			sum0 += beta2 * v2[j+0];
			sum1 += beta2 * v2[j+1];

			ptr[(j+0)*n] = sum0;
			ptr[(j+1)*n] = sum1;
		}

		for ( ; j < numClamped; j++ ) {

			sum = ptr[j*n];

			v1[j] -= p1 * sum;
			sum += beta1 * v1[j];

			v2[j] -= p2 * sum;
			sum += beta2 * v2[j];

			ptr[j*n] = sum;
		}
	}
}

/*
============
idLCP_Symmetric::CalcForceDelta

  modifies this->delta_f
============
*/
ID_INLINE void idLCP_Symmetric::CalcForceDelta( int d, float dir ) {
	int i;
	float *ptr;

	delta_f[d] = dir;

	if ( numClamped == 0 ) {
		return;
	}

	// solve force delta
	SolveClamped( delta_f, rowPtrs[d] );

	// flip force delta based on direction
	if ( dir > 0.0f ) {
		ptr = delta_f.ToFloatPtr();
		for ( i = 0; i < numClamped; i++ ) {
			ptr[i] = - ptr[i];
		}
	}
}

/*
============
idLCP_Symmetric::CalcAccelDelta

  modifies this->delta_a and uses this->delta_f
============
*/
ID_INLINE void idLCP_Symmetric::CalcAccelDelta( int d ) {
	int j;
	float dot;

	// only the not clamped variables, including the current variable, can have a change in acceleration
	for ( j = numClamped; j <= d; j++ ) {
		// only the clamped variables and the current variable have a force delta unequal zero
		SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
		delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
	}
}

/*
============
idLCP_Symmetric::ChangeForce

  modifies this->f and uses this->delta_f
============
*/
ID_INLINE void idLCP_Symmetric::ChangeForce( int d, float step ) {
	// only the clamped variables and current variable have a force delta unequal zero
	SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
	f[d] += step * delta_f[d];
}

/*
============
idLCP_Symmetric::ChangeAccel

  modifies this->a and uses this->delta_a
============
*/
ID_INLINE void idLCP_Symmetric::ChangeAccel( int d, float step ) {
	// only the not clamped variables, including the current variable, can have an acceleration unequal zero
	SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
}

/*
============
idLCP_Symmetric::GetMaxStep
============
*/
void idLCP_Symmetric::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
	int i;
	float s;

	// default to a full step for the current variable
	if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
		maxStep = -a[d] / delta_a[d];
	} else {
		maxStep = 0.0f;
	}
	limit = d;
	limitSide = 0;

	// test the current variable
	if ( dir < 0.0f ) {
		if ( lo[d] != -idMath::INFINITY ) {
			s = ( lo[d] - f[d] ) / dir;
			if ( s < maxStep ) {
				maxStep = s;
				limitSide = -1;
			}
		}
	} else {
		if ( hi[d] != idMath::INFINITY ) {
			s = ( hi[d] - f[d] ) / dir;
			if ( s < maxStep ) {
				maxStep = s;
				limitSide = 1;
			}
		}
	}

	// test the clamped bounded variables
	for ( i = numUnbounded; i < numClamped; i++ ) {
		if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
			// if there is a low boundary
			if ( lo[i] != -idMath::INFINITY ) {
				s = ( lo[i] - f[i] ) / delta_f[i];
				if ( s < maxStep ) {
					maxStep = s;
					limit = i;
					limitSide = -1;
				}
			}
		} else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
			// if there is a high boundary
			if ( hi[i] != idMath::INFINITY ) {
				s = ( hi[i] - f[i] ) / delta_f[i];
				if ( s < maxStep ) {
					maxStep = s;
					limit = i;
					limitSide = 1;
				}
			}
		}
	}

	// test the not clamped bounded variables
	for ( i = numClamped; i < d; i++ ) {
		if ( side[i] == -1 ) {
			if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
				continue;
			}
		} else if ( side[i] == 1 ) {
			if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
				continue;
			}
		} else {
			continue;
		}
		// ignore variables for which the force is not allowed to take any substantial value
		if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
			continue;
		}
		s = -a[i] / delta_a[i];
		if ( s < maxStep ) {
			maxStep = s;
			limit = i;
			limitSide = 0;
		}
	}
}

/*
============
idLCP_Symmetric::Solve
============
*/
bool idLCP_Symmetric::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
	int i, j, n, limit, limitSide, boxStartIndex;
	float dir, maxStep, dot, s;
	char *failed;

	// true when the matrix rows are 16 byte padded
	padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();

	assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
	assert( o_x.GetSize() == o_m.GetNumRows() );
	assert( o_b.GetSize() == o_m.GetNumRows() );
	assert( o_lo.GetSize() == o_m.GetNumRows() );
	assert( o_hi.GetSize() == o_m.GetNumRows() );

	// allocate memory for permuted input
	f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
	a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
	b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
	lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
	hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
	if ( o_boxIndex ) {
		boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
		memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
	} else {
		boxIndex = NULL;
	}

	// we override the const on o_m here but on exit the matrix is unchanged
	m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
	f.Zero();
	a.Zero();
	b = o_b;
	lo = o_lo;
	hi = o_hi;

	// pointers to the rows of m
	rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		rowPtrs[i] = m[i];
	}

	// tells if a variable is at the low boundary, high boundary or inbetween
	side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );

	// index to keep track of the permutation
	permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		permuted[i] = i;
	}

	// permute input so all unbounded variables come first
	numUnbounded = 0;
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
			if ( numUnbounded != i ) {
				Swap( numUnbounded, i );
			}
			numUnbounded++;
		}
	}

	// permute input so all variables using the boxIndex come last
	boxStartIndex = m.GetNumRows();
	if ( boxIndex ) {
		for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
			if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
				boxStartIndex--;
				if ( boxStartIndex != i ) {
					Swap( boxStartIndex, i );
				}
			}
		}
	}

	// sub matrix for factorization
	clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
	diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
	solveCache1.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
	solveCache2.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );

	// all unbounded variables are clamped
	numClamped = numUnbounded;

	// if there are unbounded variables
	if ( numUnbounded ) {

		// factor and solve for unbounded variables
		if ( !FactorClamped() ) {
			idLib::common->Printf( "idLCP_Symmetric::Solve: unbounded factorization failed\n" );
			return false;
		}
		SolveClamped( f, b.ToFloatPtr() );

		// if there are no bounded variables we are done
		if ( numUnbounded == m.GetNumRows() ) {
			o_x = f;	// the vector is not permuted
			return true;
		}
	}

#ifdef IGNORE_UNSATISFIABLE_VARIABLES
	int numIgnored = 0;
#endif

	// allocate for delta force and delta acceleration
	delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
	delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );

	// solve for bounded variables
	failed = NULL;
	for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {

		clampedChangeStart = 0;

		// once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
		if ( i == boxStartIndex ) {
			for ( j = 0; j < boxStartIndex; j++ ) {
				o_x[permuted[j]] = f[j];
			}
			for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
				s = o_x[boxIndex[j]];
				if ( lo[j] != -idMath::INFINITY ) {
					lo[j] = - idMath::Fabs( lo[j] * s );
				}
				if ( hi[j] != idMath::INFINITY ) {
					hi[j] = idMath::Fabs( hi[j] * s );
				}
			}
		}

		// calculate acceleration for current variable
		SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
		a[i] = dot - b[i];

		// if already at the low boundary
		if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
			side[i] = -1;
			continue;
		}

		// if already at the high boundary
		if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
			side[i] = 1;
			continue;
		}

		// if inside the clamped region
		if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
			side[i] = 0;
			AddClamped( i, false );
			continue;
		}

		// drive the current variable into a valid region
		for ( n = 0; n < maxIterations; n++ ) {

			// direction to move
			if ( a[i] <= 0.0f ) {
				dir = 1.0f;
			} else {
				dir = -1.0f;
			}

			// calculate force delta
			CalcForceDelta( i, dir );

			// calculate acceleration delta: delta_a = m * delta_f;
			CalcAccelDelta( i );

			// maximum step we can take
			GetMaxStep( i, dir, maxStep, limit, limitSide );

			if ( maxStep <= 0.0f ) {
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
				// ignore the current variable completely
				lo[i] = hi[i] = 0.0f;
				f[i] = 0.0f;
				side[i] = -1;
				numIgnored++;
#else
				failed = va( "invalid step size %.4f", maxStep );
#endif
				break;
			}

			// change force
			ChangeForce( i, maxStep );

			// change acceleration
			ChangeAccel( i, maxStep );

			// clamp/unclamp the variable that limited this step
			side[limit] = limitSide;
			switch( limitSide ) {
				case 0: {
					a[limit] = 0.0f;
					AddClamped( limit, ( limit == i ) );
					break;
				}
				case -1: {
					f[limit] = lo[limit];
					if ( limit != i ) {
						RemoveClamped( limit );
					}
					break;
				}
				case 1: {
					f[limit] = hi[limit];
					if ( limit != i ) {
						RemoveClamped( limit );
					}
					break;
				}
			}

			// if the current variable limited the step we can continue with the next variable
			if ( limit == i ) {
				break;
			}
		}

		if ( n >= maxIterations ) {
			failed = va( "max iterations %d", maxIterations );
			break;
		}

		if ( failed ) {
			break;
		}
	}

#ifdef IGNORE_UNSATISFIABLE_VARIABLES
	if ( numIgnored ) {
		if ( lcp_showFailures.GetBool() ) {
			idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
		}
	}
#endif

	// if failed clear remaining forces
	if ( failed ) {
		if ( lcp_showFailures.GetBool() ) {
			idLib::common->Printf( "idLCP_Symmetric::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
		}
		for ( j = i; j < m.GetNumRows(); j++ ) {
			f[j] = 0.0f;
		}
	}

#if defined(_DEBUG) && 0
	if ( !failed ) {
		// test whether or not the solution satisfies the complementarity conditions
		for ( i = 0; i < m.GetNumRows(); i++ ) {
			a[i] = -b[i];
			for ( j = 0; j < m.GetNumRows(); j++ ) {
				a[i] += rowPtrs[i][j] * f[j];
			}

			if ( f[i] == lo[i] ) {
				if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
					int bah1 = 1;
				}
			} else if ( f[i] == hi[i] ) {
				if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
					int bah2 = 1;
				}
			} else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
				int bah3 = 1;
			}
		}
	}
#endif

	// unpermute result
	for ( i = 0; i < f.GetSize(); i++ ) {
		o_x[permuted[i]] = f[i];
	}

	// unpermute original matrix
	for ( i = 0; i < m.GetNumRows(); i++ ) {
		for ( j = 0; j < m.GetNumRows(); j++ ) {
			if ( permuted[j] == i ) {
				break;
			}
		}
		if ( i != j ) {
			m.SwapColumns( i, j );
			idSwap( permuted[i], permuted[j] );
		}
	}

	return true;
}


//===============================================================
//
//	idLCP
//
//===============================================================

/*
============
idLCP::AllocSquare
============
*/
idLCP *idLCP::AllocSquare( void ) {
	idLCP *lcp = new idLCP_Square;
	lcp->SetMaxIterations( 32 );
	return lcp;
}

/*
============
idLCP::AllocSymmetric
============
*/
idLCP *idLCP::AllocSymmetric( void ) {
	idLCP *lcp = new idLCP_Symmetric;
	lcp->SetMaxIterations( 32 );
	return lcp;
}

/*
============
idLCP::~idLCP
============
*/
idLCP::~idLCP( void ) {
}

/*
============
idLCP::SetMaxIterations
============
*/
void idLCP::SetMaxIterations( int max ) {
	maxIterations = max;
}

/*
============
idLCP::GetMaxIterations
============
*/
int idLCP::GetMaxIterations( void ) {
	return maxIterations;
}