mirror of
https://github.com/dhewm/dhewm3-sdk.git
synced 2024-11-25 14:01:10 +00:00
912f2d6dd7
On Windows, ID_INLINE does __forceinline, which is bad if the function calls alloca() and is called in a loop.. So use just __inline there so the compiler can choose not to inline (if called in a loop). This didn't cause actual stack overflows as far as I know, but it could (and MSVC warns about it). (This includes "Fix ID_MAYBE_INLINE on non-Windows platforms")
1645 lines
40 KiB
C++
1645 lines
40 KiB
C++
/*
|
|
===========================================================================
|
|
|
|
Doom 3 GPL Source Code
|
|
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
|
|
|
|
This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
|
|
|
|
Doom 3 Source Code is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Doom 3 Source Code is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
|
|
|
|
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
|
|
|
|
===========================================================================
|
|
*/
|
|
|
|
#include "sys/platform.h"
|
|
|
|
#include "idlib/math/Lcp.h"
|
|
|
|
static idCVar lcp_showFailures( "lcp_showFailures", "0", CVAR_SYSTEM | CVAR_BOOL, "show LCP solver failures" );
|
|
|
|
const float LCP_BOUND_EPSILON = 1e-5f;
|
|
const float LCP_ACCEL_EPSILON = 1e-5f;
|
|
const float LCP_DELTA_ACCEL_EPSILON = 1e-9f;
|
|
const float LCP_DELTA_FORCE_EPSILON = 1e-9f;
|
|
|
|
#define IGNORE_UNSATISFIABLE_VARIABLES
|
|
|
|
//===============================================================
|
|
// M
|
|
// idLCP_Square MrE
|
|
// E
|
|
//===============================================================
|
|
|
|
class idLCP_Square : public idLCP {
|
|
public:
|
|
virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
|
|
|
|
private:
|
|
idMatX m; // original matrix
|
|
idVecX b; // right hand side
|
|
idVecX lo, hi; // low and high bounds
|
|
idVecX f, a; // force and acceleration
|
|
idVecX delta_f, delta_a; // delta force and delta acceleration
|
|
idMatX clamped; // LU factored sub matrix for clamped variables
|
|
idVecX diagonal; // reciprocal of diagonal of U of the LU factored sub matrix for clamped variables
|
|
int numUnbounded; // number of unbounded variables
|
|
int numClamped; // number of clamped variables
|
|
float ** rowPtrs; // pointers to the rows of m
|
|
int * boxIndex; // box index
|
|
int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
|
|
int * permuted; // index to keep track of the permutation
|
|
bool padded; // set to true if the rows of the initial matrix are 16 byte padded
|
|
|
|
private:
|
|
bool FactorClamped( void );
|
|
void SolveClamped( idVecX &x, const float *b );
|
|
void Swap( int i, int j );
|
|
void AddClamped( int r );
|
|
void RemoveClamped( int r );
|
|
void CalcForceDelta( int d, float dir );
|
|
void CalcAccelDelta( int d );
|
|
void ChangeForce( int d, float step );
|
|
void ChangeAccel( int d, float step );
|
|
void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
|
|
};
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::FactorClamped
|
|
============
|
|
*/
|
|
bool idLCP_Square::FactorClamped( void ) {
|
|
int i, j, k;
|
|
float s, d;
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
|
|
}
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
|
|
s = idMath::Fabs( clamped[i][i] );
|
|
|
|
if ( s == 0.0f ) {
|
|
return false;
|
|
}
|
|
|
|
diagonal[i] = d = 1.0f / clamped[i][i];
|
|
for ( j = i + 1; j < numClamped; j++ ) {
|
|
clamped[j][i] *= d;
|
|
}
|
|
|
|
for ( j = i + 1; j < numClamped; j++ ) {
|
|
d = clamped[j][i];
|
|
for ( k = i + 1; k < numClamped; k++ ) {
|
|
clamped[j][k] -= d * clamped[i][k];
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::SolveClamped
|
|
============
|
|
*/
|
|
void idLCP_Square::SolveClamped( idVecX &x, const float *b ) {
|
|
int i, j;
|
|
float sum;
|
|
|
|
// solve L
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
sum = b[i];
|
|
for ( j = 0; j < i; j++ ) {
|
|
sum -= clamped[i][j] * x[j];
|
|
}
|
|
x[i] = sum;
|
|
}
|
|
|
|
// solve U
|
|
for ( i = numClamped - 1; i >= 0; i-- ) {
|
|
sum = x[i];
|
|
for ( j = i + 1; j < numClamped; j++ ) {
|
|
sum -= clamped[i][j] * x[j];
|
|
}
|
|
x[i] = sum * diagonal[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::Swap
|
|
============
|
|
*/
|
|
void idLCP_Square::Swap( int i, int j ) {
|
|
|
|
if ( i == j ) {
|
|
return;
|
|
}
|
|
|
|
idSwap( rowPtrs[i], rowPtrs[j] );
|
|
m.SwapColumns( i, j );
|
|
b.SwapElements( i, j );
|
|
lo.SwapElements( i, j );
|
|
hi.SwapElements( i, j );
|
|
a.SwapElements( i, j );
|
|
f.SwapElements( i, j );
|
|
if ( boxIndex ) {
|
|
idSwap( boxIndex[i], boxIndex[j] );
|
|
}
|
|
idSwap( side[i], side[j] );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::AddClamped
|
|
============
|
|
*/
|
|
void idLCP_Square::AddClamped( int r ) {
|
|
int i, j;
|
|
float sum;
|
|
|
|
assert( r >= numClamped );
|
|
|
|
// add a row at the bottom and a column at the right of the factored
|
|
// matrix for the clamped variables
|
|
|
|
Swap( numClamped, r );
|
|
|
|
// add row to L
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
sum = rowPtrs[numClamped][i];
|
|
for ( j = 0; j < i; j++ ) {
|
|
sum -= clamped[numClamped][j] * clamped[j][i];
|
|
}
|
|
clamped[numClamped][i] = sum * diagonal[i];
|
|
}
|
|
|
|
// add column to U
|
|
for ( i = 0; i <= numClamped; i++ ) {
|
|
sum = rowPtrs[i][numClamped];
|
|
for ( j = 0; j < i; j++ ) {
|
|
sum -= clamped[i][j] * clamped[j][numClamped];
|
|
}
|
|
clamped[i][numClamped] = sum;
|
|
}
|
|
|
|
diagonal[numClamped] = 1.0f / clamped[numClamped][numClamped];
|
|
|
|
numClamped++;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::RemoveClamped
|
|
============
|
|
*/
|
|
void idLCP_Square::RemoveClamped( int r ) {
|
|
int i, j;
|
|
float *y0, *y1, *z0, *z1;
|
|
double diag, beta0, beta1, p0, p1, q0, q1, d;
|
|
|
|
assert( r < numClamped );
|
|
|
|
numClamped--;
|
|
|
|
// no need to swap and update the factored matrix when the last row and column are removed
|
|
if ( r == numClamped ) {
|
|
return;
|
|
}
|
|
|
|
y0 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
z0 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
y1 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
z1 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
// the row/column need to be subtracted from the factorization
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
y0[i] = -rowPtrs[i][r];
|
|
}
|
|
|
|
memset( y1, 0, numClamped * sizeof( float ) );
|
|
y1[r] = 1.0f;
|
|
|
|
memset( z0, 0, numClamped * sizeof( float ) );
|
|
z0[r] = 1.0f;
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
z1[i] = -rowPtrs[r][i];
|
|
}
|
|
|
|
// swap the to be removed row/column with the last row/column
|
|
Swap( r, numClamped );
|
|
|
|
// the swapped last row/column need to be added to the factorization
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
y0[i] += rowPtrs[i][r];
|
|
}
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
z1[i] += rowPtrs[r][i];
|
|
}
|
|
z1[r] = 0.0f;
|
|
|
|
// update the beginning of the to be updated row and column
|
|
for ( i = 0; i < r; i++ ) {
|
|
p0 = y0[i];
|
|
beta1 = z1[i] * diagonal[i];
|
|
|
|
clamped[i][r] += p0;
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
z1[j] -= beta1 * clamped[i][j];
|
|
}
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
y0[j] -= p0 * clamped[j][i];
|
|
}
|
|
clamped[r][i] += beta1;
|
|
}
|
|
|
|
// update the lower right corner starting at r,r
|
|
for ( i = r; i < numClamped; i++ ) {
|
|
diag = clamped[i][i];
|
|
|
|
p0 = y0[i];
|
|
p1 = z0[i];
|
|
diag += p0 * p1;
|
|
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
beta0 = p1 / diag;
|
|
|
|
q0 = y1[i];
|
|
q1 = z1[i];
|
|
diag += q0 * q1;
|
|
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
d = 1.0f / diag;
|
|
beta1 = q1 * d;
|
|
|
|
clamped[i][i] = diag;
|
|
diagonal[i] = d;
|
|
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
|
|
d = clamped[i][j];
|
|
|
|
d += p0 * z0[j];
|
|
z0[j] -= beta0 * d;
|
|
|
|
d += q0 * z1[j];
|
|
z1[j] -= beta1 * d;
|
|
|
|
clamped[i][j] = d;
|
|
}
|
|
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
|
|
d = clamped[j][i];
|
|
|
|
y0[j] -= p0 * d;
|
|
d += beta0 * y0[j];
|
|
|
|
y1[j] -= q0 * d;
|
|
d += beta1 * y1[j];
|
|
|
|
clamped[j][i] = d;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::CalcForceDelta
|
|
|
|
modifies this->delta_f
|
|
============
|
|
*/
|
|
ID_MAYBE_INLINE void idLCP_Square::CalcForceDelta(int d, float dir) {
|
|
int i;
|
|
float *ptr;
|
|
|
|
delta_f[d] = dir;
|
|
|
|
if ( numClamped == 0 ) {
|
|
return;
|
|
}
|
|
|
|
// get column d of matrix
|
|
ptr = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
ptr[i] = rowPtrs[i][d];
|
|
}
|
|
|
|
// solve force delta
|
|
SolveClamped( delta_f, ptr );
|
|
|
|
// flip force delta based on direction
|
|
if ( dir > 0.0f ) {
|
|
ptr = delta_f.ToFloatPtr();
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
ptr[i] = - ptr[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::CalcAccelDelta
|
|
|
|
modifies this->delta_a and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Square::CalcAccelDelta( int d ) {
|
|
int j;
|
|
float dot;
|
|
|
|
// only the not clamped variables, including the current variable, can have a change in acceleration
|
|
for ( j = numClamped; j <= d; j++ ) {
|
|
// only the clamped variables and the current variable have a force delta unequal zero
|
|
SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
|
|
delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::ChangeForce
|
|
|
|
modifies this->f and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Square::ChangeForce( int d, float step ) {
|
|
// only the clamped variables and current variable have a force delta unequal zero
|
|
SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
|
|
f[d] += step * delta_f[d];
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::ChangeAccel
|
|
|
|
modifies this->a and uses this->delta_a
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Square::ChangeAccel( int d, float step ) {
|
|
// only the not clamped variables, including the current variable, can have an acceleration unequal zero
|
|
SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::GetMaxStep
|
|
============
|
|
*/
|
|
void idLCP_Square::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
|
|
int i;
|
|
float s;
|
|
|
|
// default to a full step for the current variable
|
|
if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
|
|
maxStep = -a[d] / delta_a[d];
|
|
} else {
|
|
maxStep = 0.0f;
|
|
}
|
|
limit = d;
|
|
limitSide = 0;
|
|
|
|
// test the current variable
|
|
if ( dir < 0.0f ) {
|
|
if ( lo[d] != -idMath::INFINITY ) {
|
|
s = ( lo[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else {
|
|
if ( hi[d] != idMath::INFINITY ) {
|
|
s = ( hi[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the clamped bounded variables
|
|
for ( i = numUnbounded; i < numClamped; i++ ) {
|
|
if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a low boundary
|
|
if ( lo[i] != -idMath::INFINITY ) {
|
|
s = ( lo[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a high boundary
|
|
if ( hi[i] != idMath::INFINITY ) {
|
|
s = ( hi[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the not clamped bounded variables
|
|
for ( i = numClamped; i < d; i++ ) {
|
|
if ( side[i] == -1 ) {
|
|
if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else if ( side[i] == 1 ) {
|
|
if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else {
|
|
continue;
|
|
}
|
|
// ignore variables for which the force is not allowed to take any substantial value
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
|
|
continue;
|
|
}
|
|
s = -a[i] / delta_a[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::Solve
|
|
============
|
|
*/
|
|
bool idLCP_Square::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
|
|
int i, j, n, limit, limitSide, boxStartIndex;
|
|
float dir, maxStep, dot, s;
|
|
char *failed;
|
|
|
|
// true when the matrix rows are 16 byte padded
|
|
padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
|
|
|
|
assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
|
|
assert( o_x.GetSize() == o_m.GetNumRows() );
|
|
assert( o_b.GetSize() == o_m.GetNumRows() );
|
|
assert( o_lo.GetSize() == o_m.GetNumRows() );
|
|
assert( o_hi.GetSize() == o_m.GetNumRows() );
|
|
|
|
// allocate memory for permuted input
|
|
f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
|
|
a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
|
|
hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
|
|
if ( o_boxIndex ) {
|
|
boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
|
|
memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
|
|
} else {
|
|
boxIndex = NULL;
|
|
}
|
|
|
|
// we override the const on o_m here but on exit the matrix is unchanged
|
|
m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
|
|
f.Zero();
|
|
a.Zero();
|
|
b = o_b;
|
|
lo = o_lo;
|
|
hi = o_hi;
|
|
|
|
// pointers to the rows of m
|
|
rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
rowPtrs[i] = m[i];
|
|
}
|
|
|
|
// tells if a variable is at the low boundary, high boundary or inbetween
|
|
side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
|
|
// index to keep track of the permutation
|
|
permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
permuted[i] = i;
|
|
}
|
|
|
|
// permute input so all unbounded variables come first
|
|
numUnbounded = 0;
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
|
|
if ( numUnbounded != i ) {
|
|
Swap( numUnbounded, i );
|
|
}
|
|
numUnbounded++;
|
|
}
|
|
}
|
|
|
|
// permute input so all variables using the boxIndex come last
|
|
boxStartIndex = m.GetNumRows();
|
|
if ( boxIndex ) {
|
|
for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
|
|
if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
|
|
boxStartIndex--;
|
|
if ( boxStartIndex != i ) {
|
|
Swap( boxStartIndex, i );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// sub matrix for factorization
|
|
clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
|
|
diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// all unbounded variables are clamped
|
|
numClamped = numUnbounded;
|
|
|
|
// if there are unbounded variables
|
|
if ( numUnbounded ) {
|
|
|
|
// factor and solve for unbounded variables
|
|
if ( !FactorClamped() ) {
|
|
idLib::common->Printf( "idLCP_Square::Solve: unbounded factorization failed\n" );
|
|
return false;
|
|
}
|
|
SolveClamped( f, b.ToFloatPtr() );
|
|
|
|
// if there are no bounded variables we are done
|
|
if ( numUnbounded == m.GetNumRows() ) {
|
|
o_x = f; // the vector is not permuted
|
|
return true;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
int numIgnored = 0;
|
|
#endif
|
|
|
|
// allocate for delta force and delta acceleration
|
|
delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// solve for bounded variables
|
|
failed = NULL;
|
|
for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
|
|
|
|
// once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
|
|
if ( i == boxStartIndex ) {
|
|
for ( j = 0; j < boxStartIndex; j++ ) {
|
|
o_x[permuted[j]] = f[j];
|
|
}
|
|
for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
|
|
s = o_x[boxIndex[j]];
|
|
if ( lo[j] != -idMath::INFINITY ) {
|
|
lo[j] = - idMath::Fabs( lo[j] * s );
|
|
}
|
|
if ( hi[j] != idMath::INFINITY ) {
|
|
hi[j] = idMath::Fabs( hi[j] * s );
|
|
}
|
|
}
|
|
}
|
|
|
|
// calculate acceleration for current variable
|
|
SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
|
|
a[i] = dot - b[i];
|
|
|
|
// if already at the low boundary
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
|
|
side[i] = -1;
|
|
continue;
|
|
}
|
|
|
|
// if already at the high boundary
|
|
if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 1;
|
|
continue;
|
|
}
|
|
|
|
// if inside the clamped region
|
|
if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 0;
|
|
AddClamped( i );
|
|
continue;
|
|
}
|
|
|
|
// drive the current variable into a valid region
|
|
for ( n = 0; n < maxIterations; n++ ) {
|
|
|
|
// direction to move
|
|
if ( a[i] <= 0.0f ) {
|
|
dir = 1.0f;
|
|
} else {
|
|
dir = -1.0f;
|
|
}
|
|
|
|
// calculate force delta
|
|
CalcForceDelta( i, dir );
|
|
|
|
// calculate acceleration delta: delta_a = m * delta_f;
|
|
CalcAccelDelta( i );
|
|
|
|
// maximum step we can take
|
|
GetMaxStep( i, dir, maxStep, limit, limitSide );
|
|
|
|
if ( maxStep <= 0.0f ) {
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
// ignore the current variable completely
|
|
lo[i] = hi[i] = 0.0f;
|
|
f[i] = 0.0f;
|
|
side[i] = -1;
|
|
numIgnored++;
|
|
#else
|
|
failed = va( "invalid step size %.4f", maxStep );
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
// change force
|
|
ChangeForce( i, maxStep );
|
|
|
|
// change acceleration
|
|
ChangeAccel( i, maxStep );
|
|
|
|
// clamp/unclamp the variable that limited this step
|
|
side[limit] = limitSide;
|
|
switch( limitSide ) {
|
|
case 0: {
|
|
a[limit] = 0.0f;
|
|
AddClamped( limit );
|
|
break;
|
|
}
|
|
case -1: {
|
|
f[limit] = lo[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
case 1: {
|
|
f[limit] = hi[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if the current variable limited the step we can continue with the next variable
|
|
if ( limit == i ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n >= maxIterations ) {
|
|
failed = va( "max iterations %d", maxIterations );
|
|
break;
|
|
}
|
|
|
|
if ( failed ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
if ( numIgnored ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// if failed clear remaining forces
|
|
if ( failed ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Square::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
|
|
}
|
|
for ( j = i; j < m.GetNumRows(); j++ ) {
|
|
f[j] = 0.0f;
|
|
}
|
|
}
|
|
|
|
#if defined(_DEBUG) && 0
|
|
if ( !failed ) {
|
|
// test whether or not the solution satisfies the complementarity conditions
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
a[i] = -b[i];
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
a[i] += rowPtrs[i][j] * f[j];
|
|
}
|
|
|
|
if ( f[i] == lo[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
|
|
int bah1 = 1;
|
|
}
|
|
} else if ( f[i] == hi[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
|
|
int bah2 = 1;
|
|
}
|
|
} else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
|
|
int bah3 = 1;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// unpermute result
|
|
for ( i = 0; i < f.GetSize(); i++ ) {
|
|
o_x[permuted[i]] = f[i];
|
|
}
|
|
|
|
// unpermute original matrix
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
if ( permuted[j] == i ) {
|
|
break;
|
|
}
|
|
}
|
|
if ( i != j ) {
|
|
m.SwapColumns( i, j );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
//===============================================================
|
|
// M
|
|
// idLCP_Symmetric MrE
|
|
// E
|
|
//===============================================================
|
|
|
|
class idLCP_Symmetric : public idLCP {
|
|
public:
|
|
virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
|
|
|
|
private:
|
|
idMatX m; // original matrix
|
|
idVecX b; // right hand side
|
|
idVecX lo, hi; // low and high bounds
|
|
idVecX f, a; // force and acceleration
|
|
idVecX delta_f, delta_a; // delta force and delta acceleration
|
|
idMatX clamped; // LDLt factored sub matrix for clamped variables
|
|
idVecX diagonal; // reciprocal of diagonal of LDLt factored sub matrix for clamped variables
|
|
idVecX solveCache1; // intermediate result cached in SolveClamped
|
|
idVecX solveCache2; // "
|
|
int numUnbounded; // number of unbounded variables
|
|
int numClamped; // number of clamped variables
|
|
int clampedChangeStart; // lowest row/column changed in the clamped matrix during an iteration
|
|
float ** rowPtrs; // pointers to the rows of m
|
|
int * boxIndex; // box index
|
|
int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
|
|
int * permuted; // index to keep track of the permutation
|
|
bool padded; // set to true if the rows of the initial matrix are 16 byte padded
|
|
|
|
private:
|
|
bool FactorClamped( void );
|
|
void SolveClamped( idVecX &x, const float *b );
|
|
void Swap( int i, int j );
|
|
void AddClamped( int r, bool useSolveCache );
|
|
void RemoveClamped( int r );
|
|
void CalcForceDelta( int d, float dir );
|
|
void CalcAccelDelta( int d );
|
|
void ChangeForce( int d, float step );
|
|
void ChangeAccel( int d, float step );
|
|
void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
|
|
};
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::FactorClamped
|
|
============
|
|
*/
|
|
bool idLCP_Symmetric::FactorClamped( void ) {
|
|
|
|
clampedChangeStart = 0;
|
|
|
|
for ( int i = 0; i < numClamped; i++ ) {
|
|
memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
|
|
}
|
|
return SIMDProcessor->MatX_LDLTFactor( clamped, diagonal, numClamped );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::SolveClamped
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::SolveClamped( idVecX &x, const float *b ) {
|
|
|
|
// solve L
|
|
SIMDProcessor->MatX_LowerTriangularSolve( clamped, solveCache1.ToFloatPtr(), b, numClamped, clampedChangeStart );
|
|
|
|
// solve D
|
|
SIMDProcessor->Mul( solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), diagonal.ToFloatPtr(), numClamped );
|
|
|
|
// solve Lt
|
|
SIMDProcessor->MatX_LowerTriangularSolveTranspose( clamped, x.ToFloatPtr(), solveCache2.ToFloatPtr(), numClamped );
|
|
|
|
clampedChangeStart = numClamped;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::Swap
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::Swap( int i, int j ) {
|
|
|
|
if ( i == j ) {
|
|
return;
|
|
}
|
|
|
|
idSwap( rowPtrs[i], rowPtrs[j] );
|
|
m.SwapColumns( i, j );
|
|
b.SwapElements( i, j );
|
|
lo.SwapElements( i, j );
|
|
hi.SwapElements( i, j );
|
|
a.SwapElements( i, j );
|
|
f.SwapElements( i, j );
|
|
if ( boxIndex ) {
|
|
idSwap( boxIndex[i], boxIndex[j] );
|
|
}
|
|
idSwap( side[i], side[j] );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::AddClamped
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::AddClamped( int r, bool useSolveCache ) {
|
|
float d, dot;
|
|
|
|
assert( r >= numClamped );
|
|
|
|
if ( numClamped < clampedChangeStart ) {
|
|
clampedChangeStart = numClamped;
|
|
}
|
|
|
|
// add a row at the bottom and a column at the right of the factored
|
|
// matrix for the clamped variables
|
|
|
|
Swap( numClamped, r );
|
|
|
|
// solve for v in L * v = rowPtr[numClamped]
|
|
if ( useSolveCache ) {
|
|
|
|
// the lower triangular solve was cached in SolveClamped called by CalcForceDelta
|
|
memcpy( clamped[numClamped], solveCache2.ToFloatPtr(), numClamped * sizeof( float ) );
|
|
// calculate row dot product
|
|
SIMDProcessor->Dot( dot, solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), numClamped );
|
|
|
|
} else {
|
|
|
|
float *v = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[numClamped], numClamped );
|
|
// add bottom row to L
|
|
SIMDProcessor->Mul( clamped[numClamped], v, diagonal.ToFloatPtr(), numClamped );
|
|
// calculate row dot product
|
|
SIMDProcessor->Dot( dot, clamped[numClamped], v, numClamped );
|
|
}
|
|
|
|
// update diagonal[numClamped]
|
|
d = rowPtrs[numClamped][numClamped] - dot;
|
|
|
|
if ( d == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::AddClamped: updating factorization failed\n" );
|
|
numClamped++;
|
|
return;
|
|
}
|
|
|
|
clamped[numClamped][numClamped] = d;
|
|
diagonal[numClamped] = 1.0f / d;
|
|
|
|
numClamped++;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::RemoveClamped
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::RemoveClamped( int r ) {
|
|
int i, j, n;
|
|
float *addSub, *original, *v, *ptr, *v1, *v2, dot;
|
|
double sum, diag, newDiag, invNewDiag, p1, p2, alpha1, alpha2, beta1, beta2;
|
|
|
|
assert( r < numClamped );
|
|
|
|
if ( r < clampedChangeStart ) {
|
|
clampedChangeStart = r;
|
|
}
|
|
|
|
numClamped--;
|
|
|
|
// no need to swap and update the factored matrix when the last row and column are removed
|
|
if ( r == numClamped ) {
|
|
return;
|
|
}
|
|
|
|
// swap the to be removed row/column with the last row/column
|
|
Swap( r, numClamped );
|
|
|
|
// update the factored matrix
|
|
addSub = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
if ( r == 0 ) {
|
|
|
|
if ( numClamped == 1 ) {
|
|
diag = rowPtrs[0][0];
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
clamped[0][0] = diag;
|
|
diagonal[0] = 1.0f / diag;
|
|
return;
|
|
}
|
|
|
|
// calculate the row/column to be added to the lower right sub matrix starting at (r, r)
|
|
original = rowPtrs[numClamped];
|
|
ptr = rowPtrs[r];
|
|
addSub[0] = ptr[0] - original[numClamped];
|
|
for ( i = 1; i < numClamped; i++ ) {
|
|
addSub[i] = ptr[i] - original[i];
|
|
}
|
|
|
|
} else {
|
|
|
|
v = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
// solve for v in L * v = rowPtr[r]
|
|
SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[r], r );
|
|
|
|
// update removed row
|
|
SIMDProcessor->Mul( clamped[r], v, diagonal.ToFloatPtr(), r );
|
|
|
|
// if the last row/column of the matrix is updated
|
|
if ( r == numClamped - 1 ) {
|
|
// only calculate new diagonal
|
|
SIMDProcessor->Dot( dot, clamped[r], v, r );
|
|
diag = rowPtrs[r][r] - dot;
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
clamped[r][r] = diag;
|
|
diagonal[r] = 1.0f / diag;
|
|
return;
|
|
}
|
|
|
|
// calculate the row/column to be added to the lower right sub matrix starting at (r, r)
|
|
for ( i = 0; i < r; i++ ) {
|
|
v[i] = clamped[r][i] * clamped[i][i];
|
|
}
|
|
for ( i = r; i < numClamped; i++ ) {
|
|
if ( i == r ) {
|
|
sum = clamped[r][r];
|
|
} else {
|
|
sum = clamped[r][r] * clamped[i][r];
|
|
}
|
|
ptr = clamped[i];
|
|
for ( j = 0; j < r; j++ ) {
|
|
sum += ptr[j] * v[j];
|
|
}
|
|
addSub[i] = rowPtrs[r][i] - sum;
|
|
}
|
|
}
|
|
|
|
// add row/column to the lower right sub matrix starting at (r, r)
|
|
|
|
v1 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
v2 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
diag = idMath::SQRT_1OVER2;
|
|
v1[r] = ( 0.5f * addSub[r] + 1.0f ) * diag;
|
|
v2[r] = ( 0.5f * addSub[r] - 1.0f ) * diag;
|
|
for ( i = r+1; i < numClamped; i++ ) {
|
|
v1[i] = v2[i] = addSub[i] * diag;
|
|
}
|
|
|
|
alpha1 = 1.0f;
|
|
alpha2 = -1.0f;
|
|
|
|
// simultaneous update/downdate of the sub matrix starting at (r, r)
|
|
n = clamped.GetNumColumns();
|
|
for ( i = r; i < numClamped; i++ ) {
|
|
|
|
diag = clamped[i][i];
|
|
p1 = v1[i];
|
|
newDiag = diag + alpha1 * p1 * p1;
|
|
|
|
if ( newDiag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
alpha1 /= newDiag;
|
|
beta1 = p1 * alpha1;
|
|
alpha1 *= diag;
|
|
|
|
diag = newDiag;
|
|
p2 = v2[i];
|
|
newDiag = diag + alpha2 * p2 * p2;
|
|
|
|
if ( newDiag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
clamped[i][i] = newDiag;
|
|
diagonal[i] = invNewDiag = 1.0f / newDiag;
|
|
|
|
alpha2 *= invNewDiag;
|
|
beta2 = p2 * alpha2;
|
|
alpha2 *= diag;
|
|
|
|
// update column below diagonal (i,i)
|
|
ptr = clamped.ToFloatPtr() + i;
|
|
|
|
for ( j = i+1; j < numClamped - 1; j += 2 ) {
|
|
|
|
float sum0 = ptr[(j+0)*n];
|
|
float sum1 = ptr[(j+1)*n];
|
|
|
|
v1[j+0] -= p1 * sum0;
|
|
v1[j+1] -= p1 * sum1;
|
|
|
|
sum0 += beta1 * v1[j+0];
|
|
sum1 += beta1 * v1[j+1];
|
|
|
|
v2[j+0] -= p2 * sum0;
|
|
v2[j+1] -= p2 * sum1;
|
|
|
|
sum0 += beta2 * v2[j+0];
|
|
sum1 += beta2 * v2[j+1];
|
|
|
|
ptr[(j+0)*n] = sum0;
|
|
ptr[(j+1)*n] = sum1;
|
|
}
|
|
|
|
for ( ; j < numClamped; j++ ) {
|
|
|
|
sum = ptr[j*n];
|
|
|
|
v1[j] -= p1 * sum;
|
|
sum += beta1 * v1[j];
|
|
|
|
v2[j] -= p2 * sum;
|
|
sum += beta2 * v2[j];
|
|
|
|
ptr[j*n] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::CalcForceDelta
|
|
|
|
modifies this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::CalcForceDelta( int d, float dir ) {
|
|
int i;
|
|
float *ptr;
|
|
|
|
delta_f[d] = dir;
|
|
|
|
if ( numClamped == 0 ) {
|
|
return;
|
|
}
|
|
|
|
// solve force delta
|
|
SolveClamped( delta_f, rowPtrs[d] );
|
|
|
|
// flip force delta based on direction
|
|
if ( dir > 0.0f ) {
|
|
ptr = delta_f.ToFloatPtr();
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
ptr[i] = - ptr[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::CalcAccelDelta
|
|
|
|
modifies this->delta_a and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::CalcAccelDelta( int d ) {
|
|
int j;
|
|
float dot;
|
|
|
|
// only the not clamped variables, including the current variable, can have a change in acceleration
|
|
for ( j = numClamped; j <= d; j++ ) {
|
|
// only the clamped variables and the current variable have a force delta unequal zero
|
|
SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
|
|
delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::ChangeForce
|
|
|
|
modifies this->f and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::ChangeForce( int d, float step ) {
|
|
// only the clamped variables and current variable have a force delta unequal zero
|
|
SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
|
|
f[d] += step * delta_f[d];
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::ChangeAccel
|
|
|
|
modifies this->a and uses this->delta_a
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::ChangeAccel( int d, float step ) {
|
|
// only the not clamped variables, including the current variable, can have an acceleration unequal zero
|
|
SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::GetMaxStep
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
|
|
int i;
|
|
float s;
|
|
|
|
// default to a full step for the current variable
|
|
if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
|
|
maxStep = -a[d] / delta_a[d];
|
|
} else {
|
|
maxStep = 0.0f;
|
|
}
|
|
limit = d;
|
|
limitSide = 0;
|
|
|
|
// test the current variable
|
|
if ( dir < 0.0f ) {
|
|
if ( lo[d] != -idMath::INFINITY ) {
|
|
s = ( lo[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else {
|
|
if ( hi[d] != idMath::INFINITY ) {
|
|
s = ( hi[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the clamped bounded variables
|
|
for ( i = numUnbounded; i < numClamped; i++ ) {
|
|
if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a low boundary
|
|
if ( lo[i] != -idMath::INFINITY ) {
|
|
s = ( lo[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a high boundary
|
|
if ( hi[i] != idMath::INFINITY ) {
|
|
s = ( hi[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the not clamped bounded variables
|
|
for ( i = numClamped; i < d; i++ ) {
|
|
if ( side[i] == -1 ) {
|
|
if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else if ( side[i] == 1 ) {
|
|
if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else {
|
|
continue;
|
|
}
|
|
// ignore variables for which the force is not allowed to take any substantial value
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
|
|
continue;
|
|
}
|
|
s = -a[i] / delta_a[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::Solve
|
|
============
|
|
*/
|
|
bool idLCP_Symmetric::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
|
|
int i, j, n, limit, limitSide, boxStartIndex;
|
|
float dir, maxStep, dot, s;
|
|
char *failed;
|
|
|
|
// true when the matrix rows are 16 byte padded
|
|
padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
|
|
|
|
assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
|
|
assert( o_x.GetSize() == o_m.GetNumRows() );
|
|
assert( o_b.GetSize() == o_m.GetNumRows() );
|
|
assert( o_lo.GetSize() == o_m.GetNumRows() );
|
|
assert( o_hi.GetSize() == o_m.GetNumRows() );
|
|
|
|
// allocate memory for permuted input
|
|
f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
|
|
a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
|
|
hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
|
|
if ( o_boxIndex ) {
|
|
boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
|
|
memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
|
|
} else {
|
|
boxIndex = NULL;
|
|
}
|
|
|
|
// we override the const on o_m here but on exit the matrix is unchanged
|
|
m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
|
|
f.Zero();
|
|
a.Zero();
|
|
b = o_b;
|
|
lo = o_lo;
|
|
hi = o_hi;
|
|
|
|
// pointers to the rows of m
|
|
rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
rowPtrs[i] = m[i];
|
|
}
|
|
|
|
// tells if a variable is at the low boundary, high boundary or inbetween
|
|
side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
|
|
// index to keep track of the permutation
|
|
permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
permuted[i] = i;
|
|
}
|
|
|
|
// permute input so all unbounded variables come first
|
|
numUnbounded = 0;
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
|
|
if ( numUnbounded != i ) {
|
|
Swap( numUnbounded, i );
|
|
}
|
|
numUnbounded++;
|
|
}
|
|
}
|
|
|
|
// permute input so all variables using the boxIndex come last
|
|
boxStartIndex = m.GetNumRows();
|
|
if ( boxIndex ) {
|
|
for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
|
|
if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
|
|
boxStartIndex--;
|
|
if ( boxStartIndex != i ) {
|
|
Swap( boxStartIndex, i );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// sub matrix for factorization
|
|
clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
|
|
diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
solveCache1.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
solveCache2.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// all unbounded variables are clamped
|
|
numClamped = numUnbounded;
|
|
|
|
// if there are unbounded variables
|
|
if ( numUnbounded ) {
|
|
|
|
// factor and solve for unbounded variables
|
|
if ( !FactorClamped() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: unbounded factorization failed\n" );
|
|
return false;
|
|
}
|
|
SolveClamped( f, b.ToFloatPtr() );
|
|
|
|
// if there are no bounded variables we are done
|
|
if ( numUnbounded == m.GetNumRows() ) {
|
|
o_x = f; // the vector is not permuted
|
|
return true;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
int numIgnored = 0;
|
|
#endif
|
|
|
|
// allocate for delta force and delta acceleration
|
|
delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// solve for bounded variables
|
|
failed = NULL;
|
|
for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
|
|
|
|
clampedChangeStart = 0;
|
|
|
|
// once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
|
|
if ( i == boxStartIndex ) {
|
|
for ( j = 0; j < boxStartIndex; j++ ) {
|
|
o_x[permuted[j]] = f[j];
|
|
}
|
|
for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
|
|
s = o_x[boxIndex[j]];
|
|
if ( lo[j] != -idMath::INFINITY ) {
|
|
lo[j] = - idMath::Fabs( lo[j] * s );
|
|
}
|
|
if ( hi[j] != idMath::INFINITY ) {
|
|
hi[j] = idMath::Fabs( hi[j] * s );
|
|
}
|
|
}
|
|
}
|
|
|
|
// calculate acceleration for current variable
|
|
SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
|
|
a[i] = dot - b[i];
|
|
|
|
// if already at the low boundary
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
|
|
side[i] = -1;
|
|
continue;
|
|
}
|
|
|
|
// if already at the high boundary
|
|
if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 1;
|
|
continue;
|
|
}
|
|
|
|
// if inside the clamped region
|
|
if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 0;
|
|
AddClamped( i, false );
|
|
continue;
|
|
}
|
|
|
|
// drive the current variable into a valid region
|
|
for ( n = 0; n < maxIterations; n++ ) {
|
|
|
|
// direction to move
|
|
if ( a[i] <= 0.0f ) {
|
|
dir = 1.0f;
|
|
} else {
|
|
dir = -1.0f;
|
|
}
|
|
|
|
// calculate force delta
|
|
CalcForceDelta( i, dir );
|
|
|
|
// calculate acceleration delta: delta_a = m * delta_f;
|
|
CalcAccelDelta( i );
|
|
|
|
// maximum step we can take
|
|
GetMaxStep( i, dir, maxStep, limit, limitSide );
|
|
|
|
if ( maxStep <= 0.0f ) {
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
// ignore the current variable completely
|
|
lo[i] = hi[i] = 0.0f;
|
|
f[i] = 0.0f;
|
|
side[i] = -1;
|
|
numIgnored++;
|
|
#else
|
|
failed = va( "invalid step size %.4f", maxStep );
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
// change force
|
|
ChangeForce( i, maxStep );
|
|
|
|
// change acceleration
|
|
ChangeAccel( i, maxStep );
|
|
|
|
// clamp/unclamp the variable that limited this step
|
|
side[limit] = limitSide;
|
|
switch( limitSide ) {
|
|
case 0: {
|
|
a[limit] = 0.0f;
|
|
AddClamped( limit, ( limit == i ) );
|
|
break;
|
|
}
|
|
case -1: {
|
|
f[limit] = lo[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
case 1: {
|
|
f[limit] = hi[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if the current variable limited the step we can continue with the next variable
|
|
if ( limit == i ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n >= maxIterations ) {
|
|
failed = va( "max iterations %d", maxIterations );
|
|
break;
|
|
}
|
|
|
|
if ( failed ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
if ( numIgnored ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// if failed clear remaining forces
|
|
if ( failed ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
|
|
}
|
|
for ( j = i; j < m.GetNumRows(); j++ ) {
|
|
f[j] = 0.0f;
|
|
}
|
|
}
|
|
|
|
#if defined(_DEBUG) && 0
|
|
if ( !failed ) {
|
|
// test whether or not the solution satisfies the complementarity conditions
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
a[i] = -b[i];
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
a[i] += rowPtrs[i][j] * f[j];
|
|
}
|
|
|
|
if ( f[i] == lo[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
|
|
int bah1 = 1;
|
|
}
|
|
} else if ( f[i] == hi[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
|
|
int bah2 = 1;
|
|
}
|
|
} else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
|
|
int bah3 = 1;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// unpermute result
|
|
for ( i = 0; i < f.GetSize(); i++ ) {
|
|
o_x[permuted[i]] = f[i];
|
|
}
|
|
|
|
// unpermute original matrix
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
if ( permuted[j] == i ) {
|
|
break;
|
|
}
|
|
}
|
|
if ( i != j ) {
|
|
m.SwapColumns( i, j );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
//===============================================================
|
|
//
|
|
// idLCP
|
|
//
|
|
//===============================================================
|
|
|
|
/*
|
|
============
|
|
idLCP::AllocSquare
|
|
============
|
|
*/
|
|
idLCP *idLCP::AllocSquare( void ) {
|
|
idLCP *lcp = new idLCP_Square;
|
|
lcp->SetMaxIterations( 32 );
|
|
return lcp;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::AllocSymmetric
|
|
============
|
|
*/
|
|
idLCP *idLCP::AllocSymmetric( void ) {
|
|
idLCP *lcp = new idLCP_Symmetric;
|
|
lcp->SetMaxIterations( 32 );
|
|
return lcp;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::~idLCP
|
|
============
|
|
*/
|
|
idLCP::~idLCP( void ) {
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::SetMaxIterations
|
|
============
|
|
*/
|
|
void idLCP::SetMaxIterations( int max ) {
|
|
maxIterations = max;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::GetMaxIterations
|
|
============
|
|
*/
|
|
int idLCP::GetMaxIterations( void ) {
|
|
return maxIterations;
|
|
}
|