/*
Copyright (C) 2010 Matthew Baranowski, Sander van Rossen & Raven software.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include "matrix4.h"
extern const float Pi=3.1415926535f;
extern const float Half_Pi=Pi/2.0;
#define MAX_MATRIX (12)
#define MATRIX_DEBUG 0
void BackSub(float a[][MAX_MATRIX],int *indx,float *b,int sz)
{
int i,j,ii=-1,ip;
float sum;
for (i=0;i=0)
{
for (j=ii;j=0;i--)
{
sum=b[i];
for (j=i+1;jbig)
big=temp;
if (big=big)
{
big=dum;
imax=i;
}
}
if (j!=imax)
{
for (k=0;k=0)
{
for (j=ii;j=0;i--)
{
sum=b[i];
for (j=i+1;jbig)
big=temp;
if (big=big)
{
big=dum;
imax=i;
}
}
if (j!=imax)
{
for (k=0;kMATRIX_TOLERANCE)
return 0;
}
else
{
if (fabs(m[i][j])>MATRIX_TOLERANCE)
return 0;
}
}
}
flags=MATFLAG_IDENTITY;
return flags;
}
bool Matrix4::IsRotationIdentity(void) const
{
int i,j;
for (i=0;i<3;i++)
{
for (j=0;j<3;j++)
{
if (i==j)
{
#ifdef _DEBUG
float f = fabs(1.0f-m[i][j]);
if (f>MATRIX_TOLERANCE)
return false;
#else
if (fabs(1.0-m[i][j])>MATRIX_TOLERANCE)
return false;
#endif
}
else
{
#ifdef _DEBUG
float f = fabs(m[i][j]);
if (f>MATRIX_TOLERANCE)
return false;
#else
if (fabs(m[i][j])>MATRIX_TOLERANCE)
return false;
#endif
}
}
}
return true;
}
bool Matrix4::IsRotationIdentityMoreTolerance(void) const
{
int i,j;
for (i=0;i<3;i++)
{
for (j=0;j<3;j++)
{
if (i==j)
{
#ifdef _DEBUG
float f = fabs(1.0f-m[i][j]);
if (f>MATRIX_TOLERANCE_MORE)
return false;
#else
if (fabs(1.0-m[i][j])>MATRIX_TOLERANCE_MORE)
return false;
#endif
}
else
{
#ifdef _DEBUG
float f = fabs(m[i][j]);
if (f>MATRIX_TOLERANCE_MORE)
return false;
#else
if (fabs(m[i][j])>MATRIX_TOLERANCE_MORE)
return false;
#endif
}
}
}
return true;
}
void Matrix4::Identity()
{
int i;
float *f;
for (f=m[0],i=0;i<16;i++,f++)
*f=0;
for (i=0;i<4;i++)
m[i][i]=1.0;;
flags=MATFLAG_IDENTITY;
}
float Matrix4::MaxAbsElement()
{
if (flags&MATFLAG_IDENTITY)
{
assert(IntegrityCheck());
return 1.0f;
}
int i;
float best=-1,*f;
for (f=m[0],i=0;i<16;i++,f++)
{
if (fabs(*f)>best)
best=fabs(*f);
}
return best;
}
void Matrix4::Zero()
{
int i;
float *f;
for (f=m[0],i=0;i<16;i++,f++)
*f=0;
flags=0;
}
float Matrix4::Det()
{
if (flags&MATFLAG_IDENTITY)
{
assert(IntegrityCheck());
return 1.0f;
}
float d;
int i,indx[4];
Matrix4 lu=*this;
if (!LUDecomposition4(lu.m,indx,&d))
assert(0);
for (i=0;i<4;i++)
d*=lu.m[i][i];
return d;
}
void Matrix4::Translate(const float tx,const float ty,const float tz)
{
int i;
float *f;
for (f=m[0],i=0;i<12;i++,f++)
*f=0;
for (i=0;i<4;i++)
m[i][i]=1.0;;
m[3][0]=tx;
m[3][1]=ty;
m[3][2]=tz;
CalcFlags();
}
void Matrix4::Translate(const Vect3 &t)
{
int i;
float *f;
for (f=m[0],i=0;i<12;i++,f++)
*f=0;
for (i=0;i<4;i++)
m[i][i]=1.0;;
m[3][0]=t[0];
m[3][1]=t[1];
m[3][2]=t[2];
CalcFlags();
}
void Matrix4::Rotate(int axis,const float theta)
{
int i;
float *f,s,c;
for (f=m[0],i=0;i<16;i++,f++)
*f=0;
for (i=0;i<4;i++)
m[i][i]=1.0;
s=sin(theta);
c=cos(theta);
switch(axis)
{
case 0:
m[1][1]=c;
m[1][2]=-s;
m[2][1]=s;
m[2][2]=c;
break;
case 1:
m[0][0]=c;
m[0][2]=s;
m[2][0]=-s;
m[2][2]=c;
break;
case 2:
m[0][0]=c;
m[0][1]=-s;
m[1][0]=s;
m[1][1]=c;
break;
}
CalcFlags();
}
void Matrix4::Rotate(const float rx,const float ry,const float rz)
{
Matrix4 x,y,z,t;
x.Rotate(0,rx);
y.Rotate(1,ry);
z.Rotate(2,rz);
t.Concat(y,z);
Concat(x,t);
}
void Matrix4::Rotate(const Vect3 v)
{
Matrix4 x,y,z,t;
x.Rotate(0, v[0]);
y.Rotate(1, v[1]);
z.Rotate(2, v[2]);
t.Concat(y, z);
Concat(x, t);
}
void Matrix4::Scale(const float sx,const float sy,const float sz)
{
int i;
float *f;
for (f=m[0],i=0;i<16;i++,f++)
*f=0;
assert(fabs(sx)>1E-10);
assert(fabs(sy)>1E-10);
assert(fabs(sz)>1E-10);
m[0][0]=sx;
m[1][1]=sy;
m[2][2]=sz;
m[3][3]=1.0;
CalcFlags();
}
void Matrix4::Scale(const float sx)
{
int i;
float *f;
for (f=m[0],i=0;i<16;i++,f++)
*f=0;
assert(fabs(sx)>1E-10);
m[0][0]=sx;
m[1][1]=sx;
m[2][2]=sx;
m[3][3]=1.0;
CalcFlags();
}
void Matrix4::Interp(const Matrix4 &m1,float s1,const Matrix4 &m2,float s2)
{
if ((m1.flags&MATFLAG_IDENTITY)&&(m2.flags&MATFLAG_IDENTITY))
{
assert(m1.IntegrityCheck());
assert(m2.IntegrityCheck());
Identity();
return;
}
int i;
const float *f1;
const float *f2;
float *fd;
for (f1=m1.m[0],f2=m2.m[0],fd=m[0],i=0;i<16;i++,f1++,f2++,fd++)
*fd=s1*(*f1)+s2*(*f2);
flags=0;
}
void Matrix4::Interp(const Matrix4 &m1,const Matrix4 &m2,float s1)
{
if ((m1.flags&MATFLAG_IDENTITY)&&(m2.flags&MATFLAG_IDENTITY))
{
assert(m1.IntegrityCheck());
assert(m2.IntegrityCheck());
Identity();
return;
}
int i;
const float *f1;
const float *f2;
float *fd;
for (f1=m1.m[0],f2=m2.m[0],fd=m[0],i=0;i<16;i++,f1++,f2++,fd++)
*fd=s1*((*f1)-(*f2))+(*f2);
flags=0;
}
bool Matrix4::operator== (const Matrix4& t) const
{
if ((flags&MATFLAG_IDENTITY)&&(t.flags&MATFLAG_IDENTITY))
{
assert(IntegrityCheck());
assert(t.IntegrityCheck());
return true;
}
int i,j;
for (i=0;i<4;i++)
{
for (j=0;j<3;j++)
{
if (fabs(m[i][j]-t.m[i][j])>MATRIX_TOLERANCE)
return false;
}
}
return true;
}
void Matrix4::Concat(const Matrix4 &m1,const Matrix4 &m2)
{
if (m1.flags&MATFLAG_IDENTITY)
{
assert(m1.IntegrityCheck());
*this=m2;
}
else if (m2.flags&MATFLAG_IDENTITY)
{
assert(m2.IntegrityCheck());
*this=m1;
}
else
{
assert(this!=&m1&&this!=&m2);
/*
int i,j,k;
float acc=0;
for (i=0;i<4;i++)
{
for (j=0;j<4;j++)
{
for(k=0,acc=0.;k<4;k++)
acc+=m1.m[i][k]*m2.m[k][j];
m[i][j]=acc;
}
}
*/
m2.HXFormPointND(m[0],m1.m[0]);
m2.HXFormPointND(m[1],m1.m[1]);
m2.HXFormPointND(m[2],m1.m[2]);
m2.HXFormPointND(m[3],m1.m[3]);
flags=0;
}
}
void Matrix4::XFormPoint(Vect3 &dest,const Vect3 &src) const
{
if (flags&MATFLAG_IDENTITY)
{
assert(IntegrityCheck());
dest=src;
return;
}
/*
dest[0]=m[0][0]*src[0]+m[1][0]*src[1]+m[2][0]*src[2]+m[3][0];
dest[1]=m[0][0]*src[0]+m[1][0]*src[1]+m[2][0]*src[2]+m[3][1];
dest[2]=m[3][2];
*/
/*
int i,j;
dest[0]=m[3][0];
dest[1]=m[3][1];
dest[2]=m[3][2];
for (i=0;i<3;i++)
for (j=0;j<3;j++)
dest[i]+=m[j][i]*src[j];
*/
const float *s=&src.x();
float *d=&dest.x();
const float *mat=&m[0][0];
__asm
{
mov ebx,[s]
mov eax,[mat]
mov ecx,[d]
fld [DWORD PTR ebx+8] // z
fld [DWORD PTR ebx+4] // y z
fld [DWORD PTR ebx] // x y z
fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03
fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+4] // m01 m00x x y z 1/m33+m23z+m13y+xm03
fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+8] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03
fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03
fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+16] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+24] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+32] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+36] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
fadd [DWORD PTR eax+48] // m00x+m10y+m20z+m30 m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
fxch st(2) // m21z+m01x+m11y m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fadd [DWORD PTR eax+52] // m21z+m01x+m11y+m31 m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fxch st(1) // m22z+m12y+m02x m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fadd [DWORD PTR eax+56] // m22z+m12y+m02x+m32 m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fxch st(2) // m00x+m10y+m20z+m30 m21z+m01x+m11y+m31 m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03
fstp [DWORD PTR ecx]
fstp [DWORD PTR ecx+4]
fstp [DWORD PTR ecx+8]
}
}
void Matrix4::XFormVect(Vect3 &dest,const Vect3 &src) const
{
if (flags&MATFLAG_IDENTITY)
{
dest=src;
return;
}
int i,j;
dest=0.;
for (i=0;i<3;i++)
for (j=0;j<3;j++)
dest[i]+=m[j][i]*src[j];
}
void Matrix4::XFormVectTranspose(Vect3 &dest,const Vect3 &src) const
{
if (flags&MATFLAG_IDENTITY)
{
assert(IntegrityCheck());
dest=src;
return;
}
/*
int i,j;
dest=0.;
for (i=0;i<3;i++)
for (j=0;j<3;j++)
dest[i]+=m[i][j]*src[j];
*/
const float *s=&src.x();
float *d=&dest.x();
const float *mat=&m[0][0];
__asm
{
mov ebx,[s]
mov eax,[mat]
mov ecx,[d]
fld [DWORD PTR ebx+8] // z
fld [DWORD PTR ebx+4] // y z
fld [DWORD PTR ebx] // x y z
fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03
fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+16] // m01 m00x x y z 1/m33+m23z+m13y+xm03
fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+32] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03
fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03
fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+4] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+36] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+8] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+24] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
fstp [DWORD PTR ecx]
fstp [DWORD PTR ecx+8]
fstp [DWORD PTR ecx+4]
}
}
#pragma warning( disable : 4725) //instruction may be inaccurate on some Pentiums
float Matrix4::HXFormPoint(Vect3 &dest,const Vect3 &src) const
{
/*
int i,j;
float h;
h=m[3][3];
h+=m[0][3]*src[0];
h+=m[1][3]*src[1];
h+=m[2][3]*src[2];
h=1.0f/h;
for (i=0;i<3;i++)
{
dest[i]=m[3][i];
for (j=0;j<3;j++)
dest[i]+=m[j][i]*src[j];
dest[i]*=h;
}
// return h;
*/
static const float one=1.0f;
const float *s=&src.x();
float *d=&dest.x();
const float *mat=&m[0][0];
float tret;
__asm
{
mov ebx,[s]
mov eax,[mat]
mov ecx,[d]
fld [DWORD PTR ebx] // x
fld [DWORD PTR eax+12] // m03 x
fmul st,st(1) // xm03 x
fld [DWORD PTR ebx+4] // y xm03 x
fld [DWORD PTR eax+28] // m13 y xm03 x
fmul st,st(1) // m13y y xm03 x
fld [DWORD PTR ebx+8] // z m13y y xm03 x
fld [DWORD PTR eax+44] // m23 z m13y y xm03 x
fmul st,st(1) // m23z z m13y y xm03 x
fxch st(2) // m13y z m23z y xm03 x
faddp st(4),st // z m23z y m13y+xm03 x
fxch st(1) // m23z z y m13y+xm03 x
faddp st(3),st // z y m23z+m13y+xm03 x
fxch st(2) // m23z+m13y+xm03 y z x
fadd [DWORD PTR eax+60] // m33+m23z+m13y+xm03 y z x
fdivr [one] // 1/m33+m23z+m13y+xm03 y z x
fxch st(3) // x y z 1/m33+m23z+m13y+xm03
mov edx,[DWORD PTR eax]
mov edx,[DWORD PTR ecx]
mov edx,[DWORD PTR ecx+8]
fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03
fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+4] // m01 m00x x y z 1/m33+m23z+m13y+xm03
fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+8] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03
fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03
fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+16] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+24] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+32] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+36] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
fadd [DWORD PTR eax+48] // m00x+m10y+m20z+m30 m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
fxch st(2) // m21z+m01x+m11y m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fadd [DWORD PTR eax+52] // m21z+m01x+m11y+m31 m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fxch st(1) // m22z+m12y+m02x m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fadd [DWORD PTR eax+56] // m22z+m12y+m02x+m32 m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fxch st(2) // m00x+m10y+m20z+m30 m21z+m01x+m11y+m31 m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03
fmul st,st(3) // sx m21z+m01x+m11y+m31 m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03
fxch st(1) // m21z+m01x+m11y+m31 sx m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03
fmul st,st(3) // sy sx m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03
fxch st(2) // m22z+m12y+m02x+m32 sx sy 1/m33+m23z+m13y+xm03
fmul st,st(3) // sz sx sy rhw
fstp [DWORD PTR ecx+8]
fstp [DWORD PTR ecx]
fstp [DWORD PTR ecx+4]
fstp [tret]
}
return tret;
}
#pragma warning( default: 4725) //instruction may be inaccurate on some Pentiums
void Matrix4::HXFormPointND(float *d,const float *s) const
{
/*
int i,j;
float h;
h=m[3][3];
h+=m[0][3]*src[0];
h+=m[1][3]*src[1];
h+=m[2][3]*src[2];
h=1.0f/h;
for (i=0;i<3;i++)
{
dest[i]=m[3][i];
for (j=0;j<3;j++)
dest[i]+=m[j][i]*src[j];
dest[i]*=h;
}
// return h;
*/
const float *mat=&m[0][0];
__asm
{
mov ebx,[s]
mov eax,[mat]
mov ecx,[d]
fld [DWORD PTR ebx] // x
fld [DWORD PTR eax+12] // m03 x
fmul st,st(1) // xm03 x
fld [DWORD PTR ebx+4] // y xm03 x
fld [DWORD PTR eax+28] // m13 y xm03 x
fmul st,st(1) // m13y y xm03 x
fld [DWORD PTR ebx+8] // z m13y y xm03 x
fld [DWORD PTR eax+44] // m23 z m13y y xm03 x
fmul st,st(1) // m23z z m13y y xm03 x
fxch st(2) // m13y z m23z y xm03 x
faddp st(4),st // z m23z y m13y+xm03 x
fxch st(1) // m23z z y m13y+xm03 x
faddp st(3),st // z y m23z+m13y+xm03 x
fxch st(2) // m23z+m13y+xm03 y z x
fld [DWORD PTR ebx+12] // m33+m23z+m13y+xm03 y z x
fmul [DWORD PTR eax+60] // m33+m23z+m13y+xm03 y z x
faddp st(1),st
fxch st(3) // x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03
fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+4] // m01 m00x x y z 1/m33+m23z+m13y+xm03
fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+8] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03
fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03
fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+16] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03
faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+24] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03
faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+32] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+36] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03
faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03
fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03
fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
// fadd [DWORD PTR eax+48] // m00x+m10y+m20z+m30 m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+48]
fmul [DWORD PTR ebx+12]
faddp st(1),st
fxch st(2) // m21z+m01x+m11y m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+52]
fmul [DWORD PTR ebx+12]
faddp st(1),st
fxch st(1) // m22z+m12y+m02x m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03
fld [DWORD PTR eax+56]
fmul [DWORD PTR ebx+12]
faddp st(1),st
fstp [DWORD PTR ecx+8]
fstp [DWORD PTR ecx+4]
fstp [DWORD PTR ecx]
fstp [DWORD PTR ecx+12]
}
}
void Matrix4::From3x4(const float mat[3][4])
{
for (int i=0; i<4; i++)
{
for (int j=0; j<3; j++)
{
m[i][j]=mat[j][i];
}
}
m[0][3] = 0.0f;
m[1][3] = 0.0f;
m[2][3] = 0.0f;
m[3][3] = 1.0f;
CalcFlags();
}
void Matrix4::To3x4(float mat[3][4]) const
{
for (int i=0; i<4; i++)
{
for (int j=0; j<3; j++)
{
mat[j][i] = m[i][j];
}
}
}
void Matrix4::SetRow(int i,const Vect3 &t)
{
m[i][0]=t[0];
m[i][1]=t[1];
m[i][2]=t[2];
flags=0;
}
void Matrix4::SetColumn(int i,const Vect3 &t)
{
m[0][i]=t[0];
m[1][i]=t[1];
m[2][i]=t[2];
flags=0;
}
void Matrix4::SetRow(int i)
{
m[i][0]=0;
m[i][1]=0;
m[i][2]=0;
flags=0;
}
void Matrix4::SetColumn(int i)
{
m[0][i]=0;
m[1][i]=0;
m[2][i]=0;
flags=0;
}
void Matrix4::SetFromDouble(double *d)
{
int i;
float *f;
for (f=m[0],i=0;i<16;i++,f++,d++)
*f=(float)*d;
flags=0;
}
void Matrix4::MultiplyColumn(int i,float f)
{
m[0][i]*=f;
m[1][i]*=f;
m[2][i]*=f;
flags=0;
}
float Matrix4::GetColumnLen(int i)
{
float d;
d=m[0][i]*m[0][i]+m[1][i]*m[1][i]+m[2][i]*m[2][i];
assert(d>EPS_MATRIX);
return sqrt(d);
}
void Matrix4::Transpose()
{
int i,j;
float swap;
for (i=0;i<4;i++)
{
for (j=i+1;j<4;j++)
{
swap=m[i][j];
m[i][j]=m[j][i];
m[j][i]=swap;
}
}
}
// limit element precision (makes for tighter bone pools when compressing),
// eg use 3 for precision to 0.001...
//
void Matrix4::ClampPrecision(float fNumDecimals)
{
float fMultiplier = pow(10,fNumDecimals);
for (int i=0; i<4; i++)
{
for (int j=0; j<4; j++)
{
float f = m[i][j];
f *= fMultiplier;
f = floor(f);
f /= fMultiplier;
m[i][j] = f;
}
}
CalcFlags();
}