mirror of
https://github.com/dhewm/dhewm3.git
synced 2024-11-30 16:11:11 +00:00
736ec20d4d
Don't include the lazy precompiled.h everywhere, only what's required for the compilation unit. platform.h needs to be included instead to provide all essential defines and types. All includes use the relative path to the neo or the game specific root. Move all idlib related includes from idlib/Lib.h to precompiled.h. precompiled.h still exists for the MFC stuff in tools/. Add some missing header guards.
1645 lines
40 KiB
C++
1645 lines
40 KiB
C++
/*
|
|
===========================================================================
|
|
|
|
Doom 3 GPL Source Code
|
|
Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
|
|
|
|
This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
|
|
|
|
Doom 3 Source Code is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Doom 3 Source Code is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
|
|
|
|
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
|
|
|
|
===========================================================================
|
|
*/
|
|
|
|
#include "sys/platform.h"
|
|
|
|
#include "idlib/math/Lcp.h"
|
|
|
|
static idCVar lcp_showFailures( "lcp_showFailures", "0", CVAR_SYSTEM | CVAR_BOOL, "show LCP solver failures" );
|
|
|
|
const float LCP_BOUND_EPSILON = 1e-5f;
|
|
const float LCP_ACCEL_EPSILON = 1e-5f;
|
|
const float LCP_DELTA_ACCEL_EPSILON = 1e-9f;
|
|
const float LCP_DELTA_FORCE_EPSILON = 1e-9f;
|
|
|
|
#define IGNORE_UNSATISFIABLE_VARIABLES
|
|
|
|
//===============================================================
|
|
// M
|
|
// idLCP_Square MrE
|
|
// E
|
|
//===============================================================
|
|
|
|
class idLCP_Square : public idLCP {
|
|
public:
|
|
virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
|
|
|
|
private:
|
|
idMatX m; // original matrix
|
|
idVecX b; // right hand side
|
|
idVecX lo, hi; // low and high bounds
|
|
idVecX f, a; // force and acceleration
|
|
idVecX delta_f, delta_a; // delta force and delta acceleration
|
|
idMatX clamped; // LU factored sub matrix for clamped variables
|
|
idVecX diagonal; // reciprocal of diagonal of U of the LU factored sub matrix for clamped variables
|
|
int numUnbounded; // number of unbounded variables
|
|
int numClamped; // number of clamped variables
|
|
float ** rowPtrs; // pointers to the rows of m
|
|
int * boxIndex; // box index
|
|
int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
|
|
int * permuted; // index to keep track of the permutation
|
|
bool padded; // set to true if the rows of the initial matrix are 16 byte padded
|
|
|
|
private:
|
|
bool FactorClamped( void );
|
|
void SolveClamped( idVecX &x, const float *b );
|
|
void Swap( int i, int j );
|
|
void AddClamped( int r );
|
|
void RemoveClamped( int r );
|
|
void CalcForceDelta( int d, float dir );
|
|
void CalcAccelDelta( int d );
|
|
void ChangeForce( int d, float step );
|
|
void ChangeAccel( int d, float step );
|
|
void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
|
|
};
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::FactorClamped
|
|
============
|
|
*/
|
|
bool idLCP_Square::FactorClamped( void ) {
|
|
int i, j, k;
|
|
float s, d;
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
|
|
}
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
|
|
s = idMath::Fabs( clamped[i][i] );
|
|
|
|
if ( s == 0.0f ) {
|
|
return false;
|
|
}
|
|
|
|
diagonal[i] = d = 1.0f / clamped[i][i];
|
|
for ( j = i + 1; j < numClamped; j++ ) {
|
|
clamped[j][i] *= d;
|
|
}
|
|
|
|
for ( j = i + 1; j < numClamped; j++ ) {
|
|
d = clamped[j][i];
|
|
for ( k = i + 1; k < numClamped; k++ ) {
|
|
clamped[j][k] -= d * clamped[i][k];
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::SolveClamped
|
|
============
|
|
*/
|
|
void idLCP_Square::SolveClamped( idVecX &x, const float *b ) {
|
|
int i, j;
|
|
float sum;
|
|
|
|
// solve L
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
sum = b[i];
|
|
for ( j = 0; j < i; j++ ) {
|
|
sum -= clamped[i][j] * x[j];
|
|
}
|
|
x[i] = sum;
|
|
}
|
|
|
|
// solve U
|
|
for ( i = numClamped - 1; i >= 0; i-- ) {
|
|
sum = x[i];
|
|
for ( j = i + 1; j < numClamped; j++ ) {
|
|
sum -= clamped[i][j] * x[j];
|
|
}
|
|
x[i] = sum * diagonal[i];
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::Swap
|
|
============
|
|
*/
|
|
void idLCP_Square::Swap( int i, int j ) {
|
|
|
|
if ( i == j ) {
|
|
return;
|
|
}
|
|
|
|
idSwap( rowPtrs[i], rowPtrs[j] );
|
|
m.SwapColumns( i, j );
|
|
b.SwapElements( i, j );
|
|
lo.SwapElements( i, j );
|
|
hi.SwapElements( i, j );
|
|
a.SwapElements( i, j );
|
|
f.SwapElements( i, j );
|
|
if ( boxIndex ) {
|
|
idSwap( boxIndex[i], boxIndex[j] );
|
|
}
|
|
idSwap( side[i], side[j] );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::AddClamped
|
|
============
|
|
*/
|
|
void idLCP_Square::AddClamped( int r ) {
|
|
int i, j;
|
|
float sum;
|
|
|
|
assert( r >= numClamped );
|
|
|
|
// add a row at the bottom and a column at the right of the factored
|
|
// matrix for the clamped variables
|
|
|
|
Swap( numClamped, r );
|
|
|
|
// add row to L
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
sum = rowPtrs[numClamped][i];
|
|
for ( j = 0; j < i; j++ ) {
|
|
sum -= clamped[numClamped][j] * clamped[j][i];
|
|
}
|
|
clamped[numClamped][i] = sum * diagonal[i];
|
|
}
|
|
|
|
// add column to U
|
|
for ( i = 0; i <= numClamped; i++ ) {
|
|
sum = rowPtrs[i][numClamped];
|
|
for ( j = 0; j < i; j++ ) {
|
|
sum -= clamped[i][j] * clamped[j][numClamped];
|
|
}
|
|
clamped[i][numClamped] = sum;
|
|
}
|
|
|
|
diagonal[numClamped] = 1.0f / clamped[numClamped][numClamped];
|
|
|
|
numClamped++;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::RemoveClamped
|
|
============
|
|
*/
|
|
void idLCP_Square::RemoveClamped( int r ) {
|
|
int i, j;
|
|
float *y0, *y1, *z0, *z1;
|
|
double diag, beta0, beta1, p0, p1, q0, q1, d;
|
|
|
|
assert( r < numClamped );
|
|
|
|
numClamped--;
|
|
|
|
// no need to swap and update the factored matrix when the last row and column are removed
|
|
if ( r == numClamped ) {
|
|
return;
|
|
}
|
|
|
|
y0 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
z0 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
y1 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
z1 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
// the row/column need to be subtracted from the factorization
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
y0[i] = -rowPtrs[i][r];
|
|
}
|
|
|
|
memset( y1, 0, numClamped * sizeof( float ) );
|
|
y1[r] = 1.0f;
|
|
|
|
memset( z0, 0, numClamped * sizeof( float ) );
|
|
z0[r] = 1.0f;
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
z1[i] = -rowPtrs[r][i];
|
|
}
|
|
|
|
// swap the to be removed row/column with the last row/column
|
|
Swap( r, numClamped );
|
|
|
|
// the swapped last row/column need to be added to the factorization
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
y0[i] += rowPtrs[i][r];
|
|
}
|
|
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
z1[i] += rowPtrs[r][i];
|
|
}
|
|
z1[r] = 0.0f;
|
|
|
|
// update the beginning of the to be updated row and column
|
|
for ( i = 0; i < r; i++ ) {
|
|
p0 = y0[i];
|
|
beta1 = z1[i] * diagonal[i];
|
|
|
|
clamped[i][r] += p0;
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
z1[j] -= beta1 * clamped[i][j];
|
|
}
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
y0[j] -= p0 * clamped[j][i];
|
|
}
|
|
clamped[r][i] += beta1;
|
|
}
|
|
|
|
// update the lower right corner starting at r,r
|
|
for ( i = r; i < numClamped; i++ ) {
|
|
diag = clamped[i][i];
|
|
|
|
p0 = y0[i];
|
|
p1 = z0[i];
|
|
diag += p0 * p1;
|
|
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
beta0 = p1 / diag;
|
|
|
|
q0 = y1[i];
|
|
q1 = z1[i];
|
|
diag += q0 * q1;
|
|
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Square::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
d = 1.0f / diag;
|
|
beta1 = q1 * d;
|
|
|
|
clamped[i][i] = diag;
|
|
diagonal[i] = d;
|
|
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
|
|
d = clamped[i][j];
|
|
|
|
d += p0 * z0[j];
|
|
z0[j] -= beta0 * d;
|
|
|
|
d += q0 * z1[j];
|
|
z1[j] -= beta1 * d;
|
|
|
|
clamped[i][j] = d;
|
|
}
|
|
|
|
for ( j = i+1; j < numClamped; j++ ) {
|
|
|
|
d = clamped[j][i];
|
|
|
|
y0[j] -= p0 * d;
|
|
d += beta0 * y0[j];
|
|
|
|
y1[j] -= q0 * d;
|
|
d += beta1 * y1[j];
|
|
|
|
clamped[j][i] = d;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::CalcForceDelta
|
|
|
|
modifies this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Square::CalcForceDelta( int d, float dir ) {
|
|
int i;
|
|
float *ptr;
|
|
|
|
delta_f[d] = dir;
|
|
|
|
if ( numClamped == 0 ) {
|
|
return;
|
|
}
|
|
|
|
// get column d of matrix
|
|
ptr = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
ptr[i] = rowPtrs[i][d];
|
|
}
|
|
|
|
// solve force delta
|
|
SolveClamped( delta_f, ptr );
|
|
|
|
// flip force delta based on direction
|
|
if ( dir > 0.0f ) {
|
|
ptr = delta_f.ToFloatPtr();
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
ptr[i] = - ptr[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::CalcAccelDelta
|
|
|
|
modifies this->delta_a and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Square::CalcAccelDelta( int d ) {
|
|
int j;
|
|
float dot;
|
|
|
|
// only the not clamped variables, including the current variable, can have a change in acceleration
|
|
for ( j = numClamped; j <= d; j++ ) {
|
|
// only the clamped variables and the current variable have a force delta unequal zero
|
|
SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
|
|
delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::ChangeForce
|
|
|
|
modifies this->f and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Square::ChangeForce( int d, float step ) {
|
|
// only the clamped variables and current variable have a force delta unequal zero
|
|
SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
|
|
f[d] += step * delta_f[d];
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::ChangeAccel
|
|
|
|
modifies this->a and uses this->delta_a
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Square::ChangeAccel( int d, float step ) {
|
|
// only the not clamped variables, including the current variable, can have an acceleration unequal zero
|
|
SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::GetMaxStep
|
|
============
|
|
*/
|
|
void idLCP_Square::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
|
|
int i;
|
|
float s;
|
|
|
|
// default to a full step for the current variable
|
|
if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
|
|
maxStep = -a[d] / delta_a[d];
|
|
} else {
|
|
maxStep = 0.0f;
|
|
}
|
|
limit = d;
|
|
limitSide = 0;
|
|
|
|
// test the current variable
|
|
if ( dir < 0.0f ) {
|
|
if ( lo[d] != -idMath::INFINITY ) {
|
|
s = ( lo[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else {
|
|
if ( hi[d] != idMath::INFINITY ) {
|
|
s = ( hi[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the clamped bounded variables
|
|
for ( i = numUnbounded; i < numClamped; i++ ) {
|
|
if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a low boundary
|
|
if ( lo[i] != -idMath::INFINITY ) {
|
|
s = ( lo[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a high boundary
|
|
if ( hi[i] != idMath::INFINITY ) {
|
|
s = ( hi[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the not clamped bounded variables
|
|
for ( i = numClamped; i < d; i++ ) {
|
|
if ( side[i] == -1 ) {
|
|
if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else if ( side[i] == 1 ) {
|
|
if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else {
|
|
continue;
|
|
}
|
|
// ignore variables for which the force is not allowed to take any substantial value
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
|
|
continue;
|
|
}
|
|
s = -a[i] / delta_a[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Square::Solve
|
|
============
|
|
*/
|
|
bool idLCP_Square::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
|
|
int i, j, n, limit, limitSide, boxStartIndex;
|
|
float dir, maxStep, dot, s;
|
|
char *failed;
|
|
|
|
// true when the matrix rows are 16 byte padded
|
|
padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
|
|
|
|
assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
|
|
assert( o_x.GetSize() == o_m.GetNumRows() );
|
|
assert( o_b.GetSize() == o_m.GetNumRows() );
|
|
assert( o_lo.GetSize() == o_m.GetNumRows() );
|
|
assert( o_hi.GetSize() == o_m.GetNumRows() );
|
|
|
|
// allocate memory for permuted input
|
|
f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
|
|
a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
|
|
hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
|
|
if ( o_boxIndex ) {
|
|
boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
|
|
memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
|
|
} else {
|
|
boxIndex = NULL;
|
|
}
|
|
|
|
// we override the const on o_m here but on exit the matrix is unchanged
|
|
m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
|
|
f.Zero();
|
|
a.Zero();
|
|
b = o_b;
|
|
lo = o_lo;
|
|
hi = o_hi;
|
|
|
|
// pointers to the rows of m
|
|
rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
rowPtrs[i] = m[i];
|
|
}
|
|
|
|
// tells if a variable is at the low boundary, high boundary or inbetween
|
|
side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
|
|
// index to keep track of the permutation
|
|
permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
permuted[i] = i;
|
|
}
|
|
|
|
// permute input so all unbounded variables come first
|
|
numUnbounded = 0;
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
|
|
if ( numUnbounded != i ) {
|
|
Swap( numUnbounded, i );
|
|
}
|
|
numUnbounded++;
|
|
}
|
|
}
|
|
|
|
// permute input so all variables using the boxIndex come last
|
|
boxStartIndex = m.GetNumRows();
|
|
if ( boxIndex ) {
|
|
for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
|
|
if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
|
|
boxStartIndex--;
|
|
if ( boxStartIndex != i ) {
|
|
Swap( boxStartIndex, i );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// sub matrix for factorization
|
|
clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
|
|
diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// all unbounded variables are clamped
|
|
numClamped = numUnbounded;
|
|
|
|
// if there are unbounded variables
|
|
if ( numUnbounded ) {
|
|
|
|
// factor and solve for unbounded variables
|
|
if ( !FactorClamped() ) {
|
|
idLib::common->Printf( "idLCP_Square::Solve: unbounded factorization failed\n" );
|
|
return false;
|
|
}
|
|
SolveClamped( f, b.ToFloatPtr() );
|
|
|
|
// if there are no bounded variables we are done
|
|
if ( numUnbounded == m.GetNumRows() ) {
|
|
o_x = f; // the vector is not permuted
|
|
return true;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
int numIgnored = 0;
|
|
#endif
|
|
|
|
// allocate for delta force and delta acceleration
|
|
delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// solve for bounded variables
|
|
failed = NULL;
|
|
for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
|
|
|
|
// once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
|
|
if ( i == boxStartIndex ) {
|
|
for ( j = 0; j < boxStartIndex; j++ ) {
|
|
o_x[permuted[j]] = f[j];
|
|
}
|
|
for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
|
|
s = o_x[boxIndex[j]];
|
|
if ( lo[j] != -idMath::INFINITY ) {
|
|
lo[j] = - idMath::Fabs( lo[j] * s );
|
|
}
|
|
if ( hi[j] != idMath::INFINITY ) {
|
|
hi[j] = idMath::Fabs( hi[j] * s );
|
|
}
|
|
}
|
|
}
|
|
|
|
// calculate acceleration for current variable
|
|
SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
|
|
a[i] = dot - b[i];
|
|
|
|
// if already at the low boundary
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
|
|
side[i] = -1;
|
|
continue;
|
|
}
|
|
|
|
// if already at the high boundary
|
|
if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 1;
|
|
continue;
|
|
}
|
|
|
|
// if inside the clamped region
|
|
if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 0;
|
|
AddClamped( i );
|
|
continue;
|
|
}
|
|
|
|
// drive the current variable into a valid region
|
|
for ( n = 0; n < maxIterations; n++ ) {
|
|
|
|
// direction to move
|
|
if ( a[i] <= 0.0f ) {
|
|
dir = 1.0f;
|
|
} else {
|
|
dir = -1.0f;
|
|
}
|
|
|
|
// calculate force delta
|
|
CalcForceDelta( i, dir );
|
|
|
|
// calculate acceleration delta: delta_a = m * delta_f;
|
|
CalcAccelDelta( i );
|
|
|
|
// maximum step we can take
|
|
GetMaxStep( i, dir, maxStep, limit, limitSide );
|
|
|
|
if ( maxStep <= 0.0f ) {
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
// ignore the current variable completely
|
|
lo[i] = hi[i] = 0.0f;
|
|
f[i] = 0.0f;
|
|
side[i] = -1;
|
|
numIgnored++;
|
|
#else
|
|
failed = va( "invalid step size %.4f", maxStep );
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
// change force
|
|
ChangeForce( i, maxStep );
|
|
|
|
// change acceleration
|
|
ChangeAccel( i, maxStep );
|
|
|
|
// clamp/unclamp the variable that limited this step
|
|
side[limit] = limitSide;
|
|
switch( limitSide ) {
|
|
case 0: {
|
|
a[limit] = 0.0f;
|
|
AddClamped( limit );
|
|
break;
|
|
}
|
|
case -1: {
|
|
f[limit] = lo[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
case 1: {
|
|
f[limit] = hi[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if the current variable limited the step we can continue with the next variable
|
|
if ( limit == i ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n >= maxIterations ) {
|
|
failed = va( "max iterations %d", maxIterations );
|
|
break;
|
|
}
|
|
|
|
if ( failed ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
if ( numIgnored ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// if failed clear remaining forces
|
|
if ( failed ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Square::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
|
|
}
|
|
for ( j = i; j < m.GetNumRows(); j++ ) {
|
|
f[j] = 0.0f;
|
|
}
|
|
}
|
|
|
|
#if defined(_DEBUG) && 0
|
|
if ( !failed ) {
|
|
// test whether or not the solution satisfies the complementarity conditions
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
a[i] = -b[i];
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
a[i] += rowPtrs[i][j] * f[j];
|
|
}
|
|
|
|
if ( f[i] == lo[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
|
|
int bah1 = 1;
|
|
}
|
|
} else if ( f[i] == hi[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
|
|
int bah2 = 1;
|
|
}
|
|
} else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
|
|
int bah3 = 1;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// unpermute result
|
|
for ( i = 0; i < f.GetSize(); i++ ) {
|
|
o_x[permuted[i]] = f[i];
|
|
}
|
|
|
|
// unpermute original matrix
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
if ( permuted[j] == i ) {
|
|
break;
|
|
}
|
|
}
|
|
if ( i != j ) {
|
|
m.SwapColumns( i, j );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
//===============================================================
|
|
// M
|
|
// idLCP_Symmetric MrE
|
|
// E
|
|
//===============================================================
|
|
|
|
class idLCP_Symmetric : public idLCP {
|
|
public:
|
|
virtual bool Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex );
|
|
|
|
private:
|
|
idMatX m; // original matrix
|
|
idVecX b; // right hand side
|
|
idVecX lo, hi; // low and high bounds
|
|
idVecX f, a; // force and acceleration
|
|
idVecX delta_f, delta_a; // delta force and delta acceleration
|
|
idMatX clamped; // LDLt factored sub matrix for clamped variables
|
|
idVecX diagonal; // reciprocal of diagonal of LDLt factored sub matrix for clamped variables
|
|
idVecX solveCache1; // intermediate result cached in SolveClamped
|
|
idVecX solveCache2; // "
|
|
int numUnbounded; // number of unbounded variables
|
|
int numClamped; // number of clamped variables
|
|
int clampedChangeStart; // lowest row/column changed in the clamped matrix during an iteration
|
|
float ** rowPtrs; // pointers to the rows of m
|
|
int * boxIndex; // box index
|
|
int * side; // tells if a variable is at the low boundary = -1, high boundary = 1 or inbetween = 0
|
|
int * permuted; // index to keep track of the permutation
|
|
bool padded; // set to true if the rows of the initial matrix are 16 byte padded
|
|
|
|
private:
|
|
bool FactorClamped( void );
|
|
void SolveClamped( idVecX &x, const float *b );
|
|
void Swap( int i, int j );
|
|
void AddClamped( int r, bool useSolveCache );
|
|
void RemoveClamped( int r );
|
|
void CalcForceDelta( int d, float dir );
|
|
void CalcAccelDelta( int d );
|
|
void ChangeForce( int d, float step );
|
|
void ChangeAccel( int d, float step );
|
|
void GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const;
|
|
};
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::FactorClamped
|
|
============
|
|
*/
|
|
bool idLCP_Symmetric::FactorClamped( void ) {
|
|
|
|
clampedChangeStart = 0;
|
|
|
|
for ( int i = 0; i < numClamped; i++ ) {
|
|
memcpy( clamped[i], rowPtrs[i], numClamped * sizeof( float ) );
|
|
}
|
|
return SIMDProcessor->MatX_LDLTFactor( clamped, diagonal, numClamped );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::SolveClamped
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::SolveClamped( idVecX &x, const float *b ) {
|
|
|
|
// solve L
|
|
SIMDProcessor->MatX_LowerTriangularSolve( clamped, solveCache1.ToFloatPtr(), b, numClamped, clampedChangeStart );
|
|
|
|
// solve D
|
|
SIMDProcessor->Mul( solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), diagonal.ToFloatPtr(), numClamped );
|
|
|
|
// solve Lt
|
|
SIMDProcessor->MatX_LowerTriangularSolveTranspose( clamped, x.ToFloatPtr(), solveCache2.ToFloatPtr(), numClamped );
|
|
|
|
clampedChangeStart = numClamped;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::Swap
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::Swap( int i, int j ) {
|
|
|
|
if ( i == j ) {
|
|
return;
|
|
}
|
|
|
|
idSwap( rowPtrs[i], rowPtrs[j] );
|
|
m.SwapColumns( i, j );
|
|
b.SwapElements( i, j );
|
|
lo.SwapElements( i, j );
|
|
hi.SwapElements( i, j );
|
|
a.SwapElements( i, j );
|
|
f.SwapElements( i, j );
|
|
if ( boxIndex ) {
|
|
idSwap( boxIndex[i], boxIndex[j] );
|
|
}
|
|
idSwap( side[i], side[j] );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::AddClamped
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::AddClamped( int r, bool useSolveCache ) {
|
|
float d, dot;
|
|
|
|
assert( r >= numClamped );
|
|
|
|
if ( numClamped < clampedChangeStart ) {
|
|
clampedChangeStart = numClamped;
|
|
}
|
|
|
|
// add a row at the bottom and a column at the right of the factored
|
|
// matrix for the clamped variables
|
|
|
|
Swap( numClamped, r );
|
|
|
|
// solve for v in L * v = rowPtr[numClamped]
|
|
if ( useSolveCache ) {
|
|
|
|
// the lower triangular solve was cached in SolveClamped called by CalcForceDelta
|
|
memcpy( clamped[numClamped], solveCache2.ToFloatPtr(), numClamped * sizeof( float ) );
|
|
// calculate row dot product
|
|
SIMDProcessor->Dot( dot, solveCache2.ToFloatPtr(), solveCache1.ToFloatPtr(), numClamped );
|
|
|
|
} else {
|
|
|
|
float *v = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[numClamped], numClamped );
|
|
// add bottom row to L
|
|
SIMDProcessor->Mul( clamped[numClamped], v, diagonal.ToFloatPtr(), numClamped );
|
|
// calculate row dot product
|
|
SIMDProcessor->Dot( dot, clamped[numClamped], v, numClamped );
|
|
}
|
|
|
|
// update diagonal[numClamped]
|
|
d = rowPtrs[numClamped][numClamped] - dot;
|
|
|
|
if ( d == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::AddClamped: updating factorization failed\n" );
|
|
numClamped++;
|
|
return;
|
|
}
|
|
|
|
clamped[numClamped][numClamped] = d;
|
|
diagonal[numClamped] = 1.0f / d;
|
|
|
|
numClamped++;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::RemoveClamped
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::RemoveClamped( int r ) {
|
|
int i, j, n;
|
|
float *addSub, *original, *v, *ptr, *v1, *v2, dot;
|
|
double sum, diag, newDiag, invNewDiag, p1, p2, alpha1, alpha2, beta1, beta2;
|
|
|
|
assert( r < numClamped );
|
|
|
|
if ( r < clampedChangeStart ) {
|
|
clampedChangeStart = r;
|
|
}
|
|
|
|
numClamped--;
|
|
|
|
// no need to swap and update the factored matrix when the last row and column are removed
|
|
if ( r == numClamped ) {
|
|
return;
|
|
}
|
|
|
|
// swap the to be removed row/column with the last row/column
|
|
Swap( r, numClamped );
|
|
|
|
// update the factored matrix
|
|
addSub = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
if ( r == 0 ) {
|
|
|
|
if ( numClamped == 1 ) {
|
|
diag = rowPtrs[0][0];
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
clamped[0][0] = diag;
|
|
diagonal[0] = 1.0f / diag;
|
|
return;
|
|
}
|
|
|
|
// calculate the row/column to be added to the lower right sub matrix starting at (r, r)
|
|
original = rowPtrs[numClamped];
|
|
ptr = rowPtrs[r];
|
|
addSub[0] = ptr[0] - original[numClamped];
|
|
for ( i = 1; i < numClamped; i++ ) {
|
|
addSub[i] = ptr[i] - original[i];
|
|
}
|
|
|
|
} else {
|
|
|
|
v = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
// solve for v in L * v = rowPtr[r]
|
|
SIMDProcessor->MatX_LowerTriangularSolve( clamped, v, rowPtrs[r], r );
|
|
|
|
// update removed row
|
|
SIMDProcessor->Mul( clamped[r], v, diagonal.ToFloatPtr(), r );
|
|
|
|
// if the last row/column of the matrix is updated
|
|
if ( r == numClamped - 1 ) {
|
|
// only calculate new diagonal
|
|
SIMDProcessor->Dot( dot, clamped[r], v, r );
|
|
diag = rowPtrs[r][r] - dot;
|
|
if ( diag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
clamped[r][r] = diag;
|
|
diagonal[r] = 1.0f / diag;
|
|
return;
|
|
}
|
|
|
|
// calculate the row/column to be added to the lower right sub matrix starting at (r, r)
|
|
for ( i = 0; i < r; i++ ) {
|
|
v[i] = clamped[r][i] * clamped[i][i];
|
|
}
|
|
for ( i = r; i < numClamped; i++ ) {
|
|
if ( i == r ) {
|
|
sum = clamped[r][r];
|
|
} else {
|
|
sum = clamped[r][r] * clamped[i][r];
|
|
}
|
|
ptr = clamped[i];
|
|
for ( j = 0; j < r; j++ ) {
|
|
sum += ptr[j] * v[j];
|
|
}
|
|
addSub[i] = rowPtrs[r][i] - sum;
|
|
}
|
|
}
|
|
|
|
// add row/column to the lower right sub matrix starting at (r, r)
|
|
|
|
v1 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
v2 = (float *) _alloca16( numClamped * sizeof( float ) );
|
|
|
|
diag = idMath::SQRT_1OVER2;
|
|
v1[r] = ( 0.5f * addSub[r] + 1.0f ) * diag;
|
|
v2[r] = ( 0.5f * addSub[r] - 1.0f ) * diag;
|
|
for ( i = r+1; i < numClamped; i++ ) {
|
|
v1[i] = v2[i] = addSub[i] * diag;
|
|
}
|
|
|
|
alpha1 = 1.0f;
|
|
alpha2 = -1.0f;
|
|
|
|
// simultaneous update/downdate of the sub matrix starting at (r, r)
|
|
n = clamped.GetNumColumns();
|
|
for ( i = r; i < numClamped; i++ ) {
|
|
|
|
diag = clamped[i][i];
|
|
p1 = v1[i];
|
|
newDiag = diag + alpha1 * p1 * p1;
|
|
|
|
if ( newDiag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
alpha1 /= newDiag;
|
|
beta1 = p1 * alpha1;
|
|
alpha1 *= diag;
|
|
|
|
diag = newDiag;
|
|
p2 = v2[i];
|
|
newDiag = diag + alpha2 * p2 * p2;
|
|
|
|
if ( newDiag == 0.0f ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::RemoveClamped: updating factorization failed\n" );
|
|
return;
|
|
}
|
|
|
|
clamped[i][i] = newDiag;
|
|
diagonal[i] = invNewDiag = 1.0f / newDiag;
|
|
|
|
alpha2 *= invNewDiag;
|
|
beta2 = p2 * alpha2;
|
|
alpha2 *= diag;
|
|
|
|
// update column below diagonal (i,i)
|
|
ptr = clamped.ToFloatPtr() + i;
|
|
|
|
for ( j = i+1; j < numClamped - 1; j += 2 ) {
|
|
|
|
float sum0 = ptr[(j+0)*n];
|
|
float sum1 = ptr[(j+1)*n];
|
|
|
|
v1[j+0] -= p1 * sum0;
|
|
v1[j+1] -= p1 * sum1;
|
|
|
|
sum0 += beta1 * v1[j+0];
|
|
sum1 += beta1 * v1[j+1];
|
|
|
|
v2[j+0] -= p2 * sum0;
|
|
v2[j+1] -= p2 * sum1;
|
|
|
|
sum0 += beta2 * v2[j+0];
|
|
sum1 += beta2 * v2[j+1];
|
|
|
|
ptr[(j+0)*n] = sum0;
|
|
ptr[(j+1)*n] = sum1;
|
|
}
|
|
|
|
for ( ; j < numClamped; j++ ) {
|
|
|
|
sum = ptr[j*n];
|
|
|
|
v1[j] -= p1 * sum;
|
|
sum += beta1 * v1[j];
|
|
|
|
v2[j] -= p2 * sum;
|
|
sum += beta2 * v2[j];
|
|
|
|
ptr[j*n] = sum;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::CalcForceDelta
|
|
|
|
modifies this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::CalcForceDelta( int d, float dir ) {
|
|
int i;
|
|
float *ptr;
|
|
|
|
delta_f[d] = dir;
|
|
|
|
if ( numClamped == 0 ) {
|
|
return;
|
|
}
|
|
|
|
// solve force delta
|
|
SolveClamped( delta_f, rowPtrs[d] );
|
|
|
|
// flip force delta based on direction
|
|
if ( dir > 0.0f ) {
|
|
ptr = delta_f.ToFloatPtr();
|
|
for ( i = 0; i < numClamped; i++ ) {
|
|
ptr[i] = - ptr[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::CalcAccelDelta
|
|
|
|
modifies this->delta_a and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::CalcAccelDelta( int d ) {
|
|
int j;
|
|
float dot;
|
|
|
|
// only the not clamped variables, including the current variable, can have a change in acceleration
|
|
for ( j = numClamped; j <= d; j++ ) {
|
|
// only the clamped variables and the current variable have a force delta unequal zero
|
|
SIMDProcessor->Dot( dot, rowPtrs[j], delta_f.ToFloatPtr(), numClamped );
|
|
delta_a[j] = dot + rowPtrs[j][d] * delta_f[d];
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::ChangeForce
|
|
|
|
modifies this->f and uses this->delta_f
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::ChangeForce( int d, float step ) {
|
|
// only the clamped variables and current variable have a force delta unequal zero
|
|
SIMDProcessor->MulAdd( f.ToFloatPtr(), step, delta_f.ToFloatPtr(), numClamped );
|
|
f[d] += step * delta_f[d];
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::ChangeAccel
|
|
|
|
modifies this->a and uses this->delta_a
|
|
============
|
|
*/
|
|
ID_INLINE void idLCP_Symmetric::ChangeAccel( int d, float step ) {
|
|
// only the not clamped variables, including the current variable, can have an acceleration unequal zero
|
|
SIMDProcessor->MulAdd( a.ToFloatPtr() + numClamped, step, delta_a.ToFloatPtr() + numClamped, d - numClamped + 1 );
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::GetMaxStep
|
|
============
|
|
*/
|
|
void idLCP_Symmetric::GetMaxStep( int d, float dir, float &maxStep, int &limit, int &limitSide ) const {
|
|
int i;
|
|
float s;
|
|
|
|
// default to a full step for the current variable
|
|
if ( idMath::Fabs( delta_a[d] ) > LCP_DELTA_ACCEL_EPSILON ) {
|
|
maxStep = -a[d] / delta_a[d];
|
|
} else {
|
|
maxStep = 0.0f;
|
|
}
|
|
limit = d;
|
|
limitSide = 0;
|
|
|
|
// test the current variable
|
|
if ( dir < 0.0f ) {
|
|
if ( lo[d] != -idMath::INFINITY ) {
|
|
s = ( lo[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else {
|
|
if ( hi[d] != idMath::INFINITY ) {
|
|
s = ( hi[d] - f[d] ) / dir;
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the clamped bounded variables
|
|
for ( i = numUnbounded; i < numClamped; i++ ) {
|
|
if ( delta_f[i] < -LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a low boundary
|
|
if ( lo[i] != -idMath::INFINITY ) {
|
|
s = ( lo[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = -1;
|
|
}
|
|
}
|
|
} else if ( delta_f[i] > LCP_DELTA_FORCE_EPSILON ) {
|
|
// if there is a high boundary
|
|
if ( hi[i] != idMath::INFINITY ) {
|
|
s = ( hi[i] - f[i] ) / delta_f[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// test the not clamped bounded variables
|
|
for ( i = numClamped; i < d; i++ ) {
|
|
if ( side[i] == -1 ) {
|
|
if ( delta_a[i] >= -LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else if ( side[i] == 1 ) {
|
|
if ( delta_a[i] <= LCP_DELTA_ACCEL_EPSILON ) {
|
|
continue;
|
|
}
|
|
} else {
|
|
continue;
|
|
}
|
|
// ignore variables for which the force is not allowed to take any substantial value
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && hi[i] <= LCP_BOUND_EPSILON ) {
|
|
continue;
|
|
}
|
|
s = -a[i] / delta_a[i];
|
|
if ( s < maxStep ) {
|
|
maxStep = s;
|
|
limit = i;
|
|
limitSide = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP_Symmetric::Solve
|
|
============
|
|
*/
|
|
bool idLCP_Symmetric::Solve( const idMatX &o_m, idVecX &o_x, const idVecX &o_b, const idVecX &o_lo, const idVecX &o_hi, const int *o_boxIndex ) {
|
|
int i, j, n, limit, limitSide, boxStartIndex;
|
|
float dir, maxStep, dot, s;
|
|
char *failed;
|
|
|
|
// true when the matrix rows are 16 byte padded
|
|
padded = ((o_m.GetNumRows()+3)&~3) == o_m.GetNumColumns();
|
|
|
|
assert( padded || o_m.GetNumRows() == o_m.GetNumColumns() );
|
|
assert( o_x.GetSize() == o_m.GetNumRows() );
|
|
assert( o_b.GetSize() == o_m.GetNumRows() );
|
|
assert( o_lo.GetSize() == o_m.GetNumRows() );
|
|
assert( o_hi.GetSize() == o_m.GetNumRows() );
|
|
|
|
// allocate memory for permuted input
|
|
f.SetData( o_m.GetNumRows(), VECX_ALLOCA( o_m.GetNumRows() ) );
|
|
a.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
b.SetData( o_b.GetSize(), VECX_ALLOCA( o_b.GetSize() ) );
|
|
lo.SetData( o_lo.GetSize(), VECX_ALLOCA( o_lo.GetSize() ) );
|
|
hi.SetData( o_hi.GetSize(), VECX_ALLOCA( o_hi.GetSize() ) );
|
|
if ( o_boxIndex ) {
|
|
boxIndex = (int *)_alloca16( o_x.GetSize() * sizeof( int ) );
|
|
memcpy( boxIndex, o_boxIndex, o_x.GetSize() * sizeof( int ) );
|
|
} else {
|
|
boxIndex = NULL;
|
|
}
|
|
|
|
// we override the const on o_m here but on exit the matrix is unchanged
|
|
m.SetData( o_m.GetNumRows(), o_m.GetNumColumns(), const_cast<float *>(o_m[0]) );
|
|
f.Zero();
|
|
a.Zero();
|
|
b = o_b;
|
|
lo = o_lo;
|
|
hi = o_hi;
|
|
|
|
// pointers to the rows of m
|
|
rowPtrs = (float **) _alloca16( m.GetNumRows() * sizeof( float * ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
rowPtrs[i] = m[i];
|
|
}
|
|
|
|
// tells if a variable is at the low boundary, high boundary or inbetween
|
|
side = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
|
|
// index to keep track of the permutation
|
|
permuted = (int *) _alloca16( m.GetNumRows() * sizeof( int ) );
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
permuted[i] = i;
|
|
}
|
|
|
|
// permute input so all unbounded variables come first
|
|
numUnbounded = 0;
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
if ( lo[i] == -idMath::INFINITY && hi[i] == idMath::INFINITY ) {
|
|
if ( numUnbounded != i ) {
|
|
Swap( numUnbounded, i );
|
|
}
|
|
numUnbounded++;
|
|
}
|
|
}
|
|
|
|
// permute input so all variables using the boxIndex come last
|
|
boxStartIndex = m.GetNumRows();
|
|
if ( boxIndex ) {
|
|
for ( i = m.GetNumRows() - 1; i >= numUnbounded; i-- ) {
|
|
if ( boxIndex[i] >= 0 && ( lo[i] != -idMath::INFINITY || hi[i] != idMath::INFINITY ) ) {
|
|
boxStartIndex--;
|
|
if ( boxStartIndex != i ) {
|
|
Swap( boxStartIndex, i );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// sub matrix for factorization
|
|
clamped.SetData( m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA( m.GetNumRows() * m.GetNumColumns() ) );
|
|
diagonal.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
solveCache1.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
solveCache2.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// all unbounded variables are clamped
|
|
numClamped = numUnbounded;
|
|
|
|
// if there are unbounded variables
|
|
if ( numUnbounded ) {
|
|
|
|
// factor and solve for unbounded variables
|
|
if ( !FactorClamped() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: unbounded factorization failed\n" );
|
|
return false;
|
|
}
|
|
SolveClamped( f, b.ToFloatPtr() );
|
|
|
|
// if there are no bounded variables we are done
|
|
if ( numUnbounded == m.GetNumRows() ) {
|
|
o_x = f; // the vector is not permuted
|
|
return true;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
int numIgnored = 0;
|
|
#endif
|
|
|
|
// allocate for delta force and delta acceleration
|
|
delta_f.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
delta_a.SetData( m.GetNumRows(), VECX_ALLOCA( m.GetNumRows() ) );
|
|
|
|
// solve for bounded variables
|
|
failed = NULL;
|
|
for ( i = numUnbounded; i < m.GetNumRows(); i++ ) {
|
|
|
|
clampedChangeStart = 0;
|
|
|
|
// once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
|
|
if ( i == boxStartIndex ) {
|
|
for ( j = 0; j < boxStartIndex; j++ ) {
|
|
o_x[permuted[j]] = f[j];
|
|
}
|
|
for ( j = boxStartIndex; j < m.GetNumRows(); j++ ) {
|
|
s = o_x[boxIndex[j]];
|
|
if ( lo[j] != -idMath::INFINITY ) {
|
|
lo[j] = - idMath::Fabs( lo[j] * s );
|
|
}
|
|
if ( hi[j] != idMath::INFINITY ) {
|
|
hi[j] = idMath::Fabs( hi[j] * s );
|
|
}
|
|
}
|
|
}
|
|
|
|
// calculate acceleration for current variable
|
|
SIMDProcessor->Dot( dot, rowPtrs[i], f.ToFloatPtr(), i );
|
|
a[i] = dot - b[i];
|
|
|
|
// if already at the low boundary
|
|
if ( lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON ) {
|
|
side[i] = -1;
|
|
continue;
|
|
}
|
|
|
|
// if already at the high boundary
|
|
if ( hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 1;
|
|
continue;
|
|
}
|
|
|
|
// if inside the clamped region
|
|
if ( idMath::Fabs( a[i] ) <= LCP_ACCEL_EPSILON ) {
|
|
side[i] = 0;
|
|
AddClamped( i, false );
|
|
continue;
|
|
}
|
|
|
|
// drive the current variable into a valid region
|
|
for ( n = 0; n < maxIterations; n++ ) {
|
|
|
|
// direction to move
|
|
if ( a[i] <= 0.0f ) {
|
|
dir = 1.0f;
|
|
} else {
|
|
dir = -1.0f;
|
|
}
|
|
|
|
// calculate force delta
|
|
CalcForceDelta( i, dir );
|
|
|
|
// calculate acceleration delta: delta_a = m * delta_f;
|
|
CalcAccelDelta( i );
|
|
|
|
// maximum step we can take
|
|
GetMaxStep( i, dir, maxStep, limit, limitSide );
|
|
|
|
if ( maxStep <= 0.0f ) {
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
// ignore the current variable completely
|
|
lo[i] = hi[i] = 0.0f;
|
|
f[i] = 0.0f;
|
|
side[i] = -1;
|
|
numIgnored++;
|
|
#else
|
|
failed = va( "invalid step size %.4f", maxStep );
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
// change force
|
|
ChangeForce( i, maxStep );
|
|
|
|
// change acceleration
|
|
ChangeAccel( i, maxStep );
|
|
|
|
// clamp/unclamp the variable that limited this step
|
|
side[limit] = limitSide;
|
|
switch( limitSide ) {
|
|
case 0: {
|
|
a[limit] = 0.0f;
|
|
AddClamped( limit, ( limit == i ) );
|
|
break;
|
|
}
|
|
case -1: {
|
|
f[limit] = lo[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
case 1: {
|
|
f[limit] = hi[limit];
|
|
if ( limit != i ) {
|
|
RemoveClamped( limit );
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if the current variable limited the step we can continue with the next variable
|
|
if ( limit == i ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( n >= maxIterations ) {
|
|
failed = va( "max iterations %d", maxIterations );
|
|
break;
|
|
}
|
|
|
|
if ( failed ) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef IGNORE_UNSATISFIABLE_VARIABLES
|
|
if ( numIgnored ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded );
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// if failed clear remaining forces
|
|
if ( failed ) {
|
|
if ( lcp_showFailures.GetBool() ) {
|
|
idLib::common->Printf( "idLCP_Symmetric::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded );
|
|
}
|
|
for ( j = i; j < m.GetNumRows(); j++ ) {
|
|
f[j] = 0.0f;
|
|
}
|
|
}
|
|
|
|
#if defined(_DEBUG) && 0
|
|
if ( !failed ) {
|
|
// test whether or not the solution satisfies the complementarity conditions
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
a[i] = -b[i];
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
a[i] += rowPtrs[i][j] * f[j];
|
|
}
|
|
|
|
if ( f[i] == lo[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON ) {
|
|
int bah1 = 1;
|
|
}
|
|
} else if ( f[i] == hi[i] ) {
|
|
if ( lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON ) {
|
|
int bah2 = 1;
|
|
}
|
|
} else if ( f[i] < lo[i] || f[i] > hi[i] || idMath::Fabs( a[i] ) > 1.0f ) {
|
|
int bah3 = 1;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// unpermute result
|
|
for ( i = 0; i < f.GetSize(); i++ ) {
|
|
o_x[permuted[i]] = f[i];
|
|
}
|
|
|
|
// unpermute original matrix
|
|
for ( i = 0; i < m.GetNumRows(); i++ ) {
|
|
for ( j = 0; j < m.GetNumRows(); j++ ) {
|
|
if ( permuted[j] == i ) {
|
|
break;
|
|
}
|
|
}
|
|
if ( i != j ) {
|
|
m.SwapColumns( i, j );
|
|
idSwap( permuted[i], permuted[j] );
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
//===============================================================
|
|
//
|
|
// idLCP
|
|
//
|
|
//===============================================================
|
|
|
|
/*
|
|
============
|
|
idLCP::AllocSquare
|
|
============
|
|
*/
|
|
idLCP *idLCP::AllocSquare( void ) {
|
|
idLCP *lcp = new idLCP_Square;
|
|
lcp->SetMaxIterations( 32 );
|
|
return lcp;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::AllocSymmetric
|
|
============
|
|
*/
|
|
idLCP *idLCP::AllocSymmetric( void ) {
|
|
idLCP *lcp = new idLCP_Symmetric;
|
|
lcp->SetMaxIterations( 32 );
|
|
return lcp;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::~idLCP
|
|
============
|
|
*/
|
|
idLCP::~idLCP( void ) {
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::SetMaxIterations
|
|
============
|
|
*/
|
|
void idLCP::SetMaxIterations( int max ) {
|
|
maxIterations = max;
|
|
}
|
|
|
|
/*
|
|
============
|
|
idLCP::GetMaxIterations
|
|
============
|
|
*/
|
|
int idLCP::GetMaxIterations( void ) {
|
|
return maxIterations;
|
|
}
|