/* Copyright (C) 2010 Matthew Baranowski, Sander van Rossen & Raven software. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "matrix4.h" extern const float Pi=3.1415926535f; extern const float Half_Pi=Pi/2.0; #define MAX_MATRIX (12) #define MATRIX_DEBUG 0 void BackSub(float a[][MAX_MATRIX],int *indx,float *b,int sz) { int i,j,ii=-1,ip; float sum; for (i=0;i=0) { for (j=ii;j=0;i--) { sum=b[i]; for (j=i+1;jbig) big=temp; if (big=big) { big=dum; imax=i; } } if (j!=imax) { for (k=0;k=0) { for (j=ii;j=0;i--) { sum=b[i]; for (j=i+1;jbig) big=temp; if (big=big) { big=dum; imax=i; } } if (j!=imax) { for (k=0;kMATRIX_TOLERANCE) return 0; } else { if (fabs(m[i][j])>MATRIX_TOLERANCE) return 0; } } } flags=MATFLAG_IDENTITY; return flags; } bool Matrix4::IsRotationIdentity(void) const { int i,j; for (i=0;i<3;i++) { for (j=0;j<3;j++) { if (i==j) { #ifdef _DEBUG float f = fabs(1.0f-m[i][j]); if (f>MATRIX_TOLERANCE) return false; #else if (fabs(1.0-m[i][j])>MATRIX_TOLERANCE) return false; #endif } else { #ifdef _DEBUG float f = fabs(m[i][j]); if (f>MATRIX_TOLERANCE) return false; #else if (fabs(m[i][j])>MATRIX_TOLERANCE) return false; #endif } } } return true; } bool Matrix4::IsRotationIdentityMoreTolerance(void) const { int i,j; for (i=0;i<3;i++) { for (j=0;j<3;j++) { if (i==j) { #ifdef _DEBUG float f = fabs(1.0f-m[i][j]); if (f>MATRIX_TOLERANCE_MORE) return false; #else if (fabs(1.0-m[i][j])>MATRIX_TOLERANCE_MORE) return false; #endif } else { #ifdef _DEBUG float f = fabs(m[i][j]); if (f>MATRIX_TOLERANCE_MORE) return false; #else if (fabs(m[i][j])>MATRIX_TOLERANCE_MORE) return false; #endif } } } return true; } void Matrix4::Identity() { int i; float *f; for (f=m[0],i=0;i<16;i++,f++) *f=0; for (i=0;i<4;i++) m[i][i]=1.0;; flags=MATFLAG_IDENTITY; } float Matrix4::MaxAbsElement() { if (flags&MATFLAG_IDENTITY) { assert(IntegrityCheck()); return 1.0f; } int i; float best=-1,*f; for (f=m[0],i=0;i<16;i++,f++) { if (fabs(*f)>best) best=fabs(*f); } return best; } void Matrix4::Zero() { int i; float *f; for (f=m[0],i=0;i<16;i++,f++) *f=0; flags=0; } float Matrix4::Det() { if (flags&MATFLAG_IDENTITY) { assert(IntegrityCheck()); return 1.0f; } float d; int i,indx[4]; Matrix4 lu=*this; if (!LUDecomposition4(lu.m,indx,&d)) assert(0); for (i=0;i<4;i++) d*=lu.m[i][i]; return d; } void Matrix4::Translate(const float tx,const float ty,const float tz) { int i; float *f; for (f=m[0],i=0;i<12;i++,f++) *f=0; for (i=0;i<4;i++) m[i][i]=1.0;; m[3][0]=tx; m[3][1]=ty; m[3][2]=tz; CalcFlags(); } void Matrix4::Translate(const Vect3 &t) { int i; float *f; for (f=m[0],i=0;i<12;i++,f++) *f=0; for (i=0;i<4;i++) m[i][i]=1.0;; m[3][0]=t[0]; m[3][1]=t[1]; m[3][2]=t[2]; CalcFlags(); } void Matrix4::Rotate(int axis,const float theta) { int i; float *f,s,c; for (f=m[0],i=0;i<16;i++,f++) *f=0; for (i=0;i<4;i++) m[i][i]=1.0; s=sin(theta); c=cos(theta); switch(axis) { case 0: m[1][1]=c; m[1][2]=-s; m[2][1]=s; m[2][2]=c; break; case 1: m[0][0]=c; m[0][2]=s; m[2][0]=-s; m[2][2]=c; break; case 2: m[0][0]=c; m[0][1]=-s; m[1][0]=s; m[1][1]=c; break; } CalcFlags(); } void Matrix4::Rotate(const float rx,const float ry,const float rz) { Matrix4 x,y,z,t; x.Rotate(0,rx); y.Rotate(1,ry); z.Rotate(2,rz); t.Concat(y,z); Concat(x,t); } void Matrix4::Rotate(const Vect3 v) { Matrix4 x,y,z,t; x.Rotate(0, v[0]); y.Rotate(1, v[1]); z.Rotate(2, v[2]); t.Concat(y, z); Concat(x, t); } void Matrix4::Scale(const float sx,const float sy,const float sz) { int i; float *f; for (f=m[0],i=0;i<16;i++,f++) *f=0; assert(fabs(sx)>1E-10); assert(fabs(sy)>1E-10); assert(fabs(sz)>1E-10); m[0][0]=sx; m[1][1]=sy; m[2][2]=sz; m[3][3]=1.0; CalcFlags(); } void Matrix4::Scale(const float sx) { int i; float *f; for (f=m[0],i=0;i<16;i++,f++) *f=0; assert(fabs(sx)>1E-10); m[0][0]=sx; m[1][1]=sx; m[2][2]=sx; m[3][3]=1.0; CalcFlags(); } void Matrix4::Interp(const Matrix4 &m1,float s1,const Matrix4 &m2,float s2) { if ((m1.flags&MATFLAG_IDENTITY)&&(m2.flags&MATFLAG_IDENTITY)) { assert(m1.IntegrityCheck()); assert(m2.IntegrityCheck()); Identity(); return; } int i; const float *f1; const float *f2; float *fd; for (f1=m1.m[0],f2=m2.m[0],fd=m[0],i=0;i<16;i++,f1++,f2++,fd++) *fd=s1*(*f1)+s2*(*f2); flags=0; } void Matrix4::Interp(const Matrix4 &m1,const Matrix4 &m2,float s1) { if ((m1.flags&MATFLAG_IDENTITY)&&(m2.flags&MATFLAG_IDENTITY)) { assert(m1.IntegrityCheck()); assert(m2.IntegrityCheck()); Identity(); return; } int i; const float *f1; const float *f2; float *fd; for (f1=m1.m[0],f2=m2.m[0],fd=m[0],i=0;i<16;i++,f1++,f2++,fd++) *fd=s1*((*f1)-(*f2))+(*f2); flags=0; } bool Matrix4::operator== (const Matrix4& t) const { if ((flags&MATFLAG_IDENTITY)&&(t.flags&MATFLAG_IDENTITY)) { assert(IntegrityCheck()); assert(t.IntegrityCheck()); return true; } int i,j; for (i=0;i<4;i++) { for (j=0;j<3;j++) { if (fabs(m[i][j]-t.m[i][j])>MATRIX_TOLERANCE) return false; } } return true; } void Matrix4::Concat(const Matrix4 &m1,const Matrix4 &m2) { if (m1.flags&MATFLAG_IDENTITY) { assert(m1.IntegrityCheck()); *this=m2; } else if (m2.flags&MATFLAG_IDENTITY) { assert(m2.IntegrityCheck()); *this=m1; } else { assert(this!=&m1&&this!=&m2); /* int i,j,k; float acc=0; for (i=0;i<4;i++) { for (j=0;j<4;j++) { for(k=0,acc=0.;k<4;k++) acc+=m1.m[i][k]*m2.m[k][j]; m[i][j]=acc; } } */ m2.HXFormPointND(m[0],m1.m[0]); m2.HXFormPointND(m[1],m1.m[1]); m2.HXFormPointND(m[2],m1.m[2]); m2.HXFormPointND(m[3],m1.m[3]); flags=0; } } void Matrix4::XFormPoint(Vect3 &dest,const Vect3 &src) const { if (flags&MATFLAG_IDENTITY) { assert(IntegrityCheck()); dest=src; return; } /* dest[0]=m[0][0]*src[0]+m[1][0]*src[1]+m[2][0]*src[2]+m[3][0]; dest[1]=m[0][0]*src[0]+m[1][0]*src[1]+m[2][0]*src[2]+m[3][1]; dest[2]=m[3][2]; */ /* int i,j; dest[0]=m[3][0]; dest[1]=m[3][1]; dest[2]=m[3][2]; for (i=0;i<3;i++) for (j=0;j<3;j++) dest[i]+=m[j][i]*src[j]; */ const float *s=&src.x(); float *d=&dest.x(); const float *mat=&m[0][0]; __asm { mov ebx,[s] mov eax,[mat] mov ecx,[d] fld [DWORD PTR ebx+8] // z fld [DWORD PTR ebx+4] // y z fld [DWORD PTR ebx] // x y z fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03 fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+4] // m01 m00x x y z 1/m33+m23z+m13y+xm03 fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+8] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03 fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03 fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+16] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+24] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+32] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+36] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 fadd [DWORD PTR eax+48] // m00x+m10y+m20z+m30 m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 fxch st(2) // m21z+m01x+m11y m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fadd [DWORD PTR eax+52] // m21z+m01x+m11y+m31 m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fxch st(1) // m22z+m12y+m02x m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fadd [DWORD PTR eax+56] // m22z+m12y+m02x+m32 m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fxch st(2) // m00x+m10y+m20z+m30 m21z+m01x+m11y+m31 m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03 fstp [DWORD PTR ecx] fstp [DWORD PTR ecx+4] fstp [DWORD PTR ecx+8] } } void Matrix4::XFormVect(Vect3 &dest,const Vect3 &src) const { if (flags&MATFLAG_IDENTITY) { dest=src; return; } int i,j; dest=0.; for (i=0;i<3;i++) for (j=0;j<3;j++) dest[i]+=m[j][i]*src[j]; } void Matrix4::XFormVectTranspose(Vect3 &dest,const Vect3 &src) const { if (flags&MATFLAG_IDENTITY) { assert(IntegrityCheck()); dest=src; return; } /* int i,j; dest=0.; for (i=0;i<3;i++) for (j=0;j<3;j++) dest[i]+=m[i][j]*src[j]; */ const float *s=&src.x(); float *d=&dest.x(); const float *mat=&m[0][0]; __asm { mov ebx,[s] mov eax,[mat] mov ecx,[d] fld [DWORD PTR ebx+8] // z fld [DWORD PTR ebx+4] // y z fld [DWORD PTR ebx] // x y z fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03 fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+16] // m01 m00x x y z 1/m33+m23z+m13y+xm03 fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+32] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03 fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03 fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+4] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+36] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+8] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+24] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 fstp [DWORD PTR ecx] fstp [DWORD PTR ecx+8] fstp [DWORD PTR ecx+4] } } #pragma warning( disable : 4725) //instruction may be inaccurate on some Pentiums float Matrix4::HXFormPoint(Vect3 &dest,const Vect3 &src) const { /* int i,j; float h; h=m[3][3]; h+=m[0][3]*src[0]; h+=m[1][3]*src[1]; h+=m[2][3]*src[2]; h=1.0f/h; for (i=0;i<3;i++) { dest[i]=m[3][i]; for (j=0;j<3;j++) dest[i]+=m[j][i]*src[j]; dest[i]*=h; } // return h; */ static const float one=1.0f; const float *s=&src.x(); float *d=&dest.x(); const float *mat=&m[0][0]; float tret; __asm { mov ebx,[s] mov eax,[mat] mov ecx,[d] fld [DWORD PTR ebx] // x fld [DWORD PTR eax+12] // m03 x fmul st,st(1) // xm03 x fld [DWORD PTR ebx+4] // y xm03 x fld [DWORD PTR eax+28] // m13 y xm03 x fmul st,st(1) // m13y y xm03 x fld [DWORD PTR ebx+8] // z m13y y xm03 x fld [DWORD PTR eax+44] // m23 z m13y y xm03 x fmul st,st(1) // m23z z m13y y xm03 x fxch st(2) // m13y z m23z y xm03 x faddp st(4),st // z m23z y m13y+xm03 x fxch st(1) // m23z z y m13y+xm03 x faddp st(3),st // z y m23z+m13y+xm03 x fxch st(2) // m23z+m13y+xm03 y z x fadd [DWORD PTR eax+60] // m33+m23z+m13y+xm03 y z x fdivr [one] // 1/m33+m23z+m13y+xm03 y z x fxch st(3) // x y z 1/m33+m23z+m13y+xm03 mov edx,[DWORD PTR eax] mov edx,[DWORD PTR ecx] mov edx,[DWORD PTR ecx+8] fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03 fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+4] // m01 m00x x y z 1/m33+m23z+m13y+xm03 fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+8] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03 fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03 fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+16] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+24] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+32] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+36] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 fadd [DWORD PTR eax+48] // m00x+m10y+m20z+m30 m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 fxch st(2) // m21z+m01x+m11y m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fadd [DWORD PTR eax+52] // m21z+m01x+m11y+m31 m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fxch st(1) // m22z+m12y+m02x m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fadd [DWORD PTR eax+56] // m22z+m12y+m02x+m32 m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fxch st(2) // m00x+m10y+m20z+m30 m21z+m01x+m11y+m31 m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03 fmul st,st(3) // sx m21z+m01x+m11y+m31 m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03 fxch st(1) // m21z+m01x+m11y+m31 sx m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03 fmul st,st(3) // sy sx m22z+m12y+m02x+m32 1/m33+m23z+m13y+xm03 fxch st(2) // m22z+m12y+m02x+m32 sx sy 1/m33+m23z+m13y+xm03 fmul st,st(3) // sz sx sy rhw fstp [DWORD PTR ecx+8] fstp [DWORD PTR ecx] fstp [DWORD PTR ecx+4] fstp [tret] } return tret; } #pragma warning( default: 4725) //instruction may be inaccurate on some Pentiums void Matrix4::HXFormPointND(float *d,const float *s) const { /* int i,j; float h; h=m[3][3]; h+=m[0][3]*src[0]; h+=m[1][3]*src[1]; h+=m[2][3]*src[2]; h=1.0f/h; for (i=0;i<3;i++) { dest[i]=m[3][i]; for (j=0;j<3;j++) dest[i]+=m[j][i]*src[j]; dest[i]*=h; } // return h; */ const float *mat=&m[0][0]; __asm { mov ebx,[s] mov eax,[mat] mov ecx,[d] fld [DWORD PTR ebx] // x fld [DWORD PTR eax+12] // m03 x fmul st,st(1) // xm03 x fld [DWORD PTR ebx+4] // y xm03 x fld [DWORD PTR eax+28] // m13 y xm03 x fmul st,st(1) // m13y y xm03 x fld [DWORD PTR ebx+8] // z m13y y xm03 x fld [DWORD PTR eax+44] // m23 z m13y y xm03 x fmul st,st(1) // m23z z m13y y xm03 x fxch st(2) // m13y z m23z y xm03 x faddp st(4),st // z m23z y m13y+xm03 x fxch st(1) // m23z z y m13y+xm03 x faddp st(3),st // z y m23z+m13y+xm03 x fxch st(2) // m23z+m13y+xm03 y z x fld [DWORD PTR ebx+12] // m33+m23z+m13y+xm03 y z x fmul [DWORD PTR eax+60] // m33+m23z+m13y+xm03 y z x faddp st(1),st fxch st(3) // x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax] // m00 x y z 1/m33+m23z+m13y+xm03 fmul st,st(1) // m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+4] // m01 m00x x y z 1/m33+m23z+m13y+xm03 fmul st,st(2) // m01x m00x x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+8] // m02 m01x m00x x y z 1/m33+m23z+m13y+xm03 fmulp st(3),st // m01x m00x m02x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m00x m01x m02x y z 1/m33+m23z+m13y+xm03 fxch st(2) // m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+16] // m10 m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+20] // m11 m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m11y m10y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 fxch st(1) // m10y m11y m02x m01x m00x y z 1/m33+m23z+m13y+xm03 faddp st(4),st // m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+24] // m12 m11y m02x m01x m00x+m10y y z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m11y m02x m01x m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(2),st // m02x m01x+m11y m00x+m10y m12y z 1/m33+m23z+m13y+xm03 faddp st(3),st // m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+32] // m20 m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(4) // m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+36] // m21 m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fmul st,st(5) // m21z m20z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 fxch st(1) // m20z m21z m01x+m11y m00x+m10y m12y+m02x z 1/m33+m23z+m13y+xm03 faddp st(3),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+40] // m22 m21z m01x+m11y m00x+m10y+m20z m12y+m02x z 1/m33+m23z+m13y+xm03 fmulp st(5),st // m21z m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 faddp st(1),st // m21z+m01x+m11y m00x+m10y+m20z m12y+m02x m22z 1/m33+m23z+m13y+xm03 fxch st(3) // m22z m00x+m10y+m20z m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 faddp st(2),st // m00x+m10y+m20z m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 // fadd [DWORD PTR eax+48] // m00x+m10y+m20z+m30 m22z+m12y+m02x m21z+m01x+m11y 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+48] fmul [DWORD PTR ebx+12] faddp st(1),st fxch st(2) // m21z+m01x+m11y m22z+m12y+m02x m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+52] fmul [DWORD PTR ebx+12] faddp st(1),st fxch st(1) // m22z+m12y+m02x m21z+m01x+m11y+m31 m00x+m10y+m20z+m30 1/m33+m23z+m13y+xm03 fld [DWORD PTR eax+56] fmul [DWORD PTR ebx+12] faddp st(1),st fstp [DWORD PTR ecx+8] fstp [DWORD PTR ecx+4] fstp [DWORD PTR ecx] fstp [DWORD PTR ecx+12] } } void Matrix4::From3x4(const float mat[3][4]) { for (int i=0; i<4; i++) { for (int j=0; j<3; j++) { m[i][j]=mat[j][i]; } } m[0][3] = 0.0f; m[1][3] = 0.0f; m[2][3] = 0.0f; m[3][3] = 1.0f; CalcFlags(); } void Matrix4::To3x4(float mat[3][4]) const { for (int i=0; i<4; i++) { for (int j=0; j<3; j++) { mat[j][i] = m[i][j]; } } } void Matrix4::SetRow(int i,const Vect3 &t) { m[i][0]=t[0]; m[i][1]=t[1]; m[i][2]=t[2]; flags=0; } void Matrix4::SetColumn(int i,const Vect3 &t) { m[0][i]=t[0]; m[1][i]=t[1]; m[2][i]=t[2]; flags=0; } void Matrix4::SetRow(int i) { m[i][0]=0; m[i][1]=0; m[i][2]=0; flags=0; } void Matrix4::SetColumn(int i) { m[0][i]=0; m[1][i]=0; m[2][i]=0; flags=0; } void Matrix4::SetFromDouble(double *d) { int i; float *f; for (f=m[0],i=0;i<16;i++,f++,d++) *f=(float)*d; flags=0; } void Matrix4::MultiplyColumn(int i,float f) { m[0][i]*=f; m[1][i]*=f; m[2][i]*=f; flags=0; } float Matrix4::GetColumnLen(int i) { float d; d=m[0][i]*m[0][i]+m[1][i]*m[1][i]+m[2][i]*m[2][i]; assert(d>EPS_MATRIX); return sqrt(d); } void Matrix4::Transpose() { int i,j; float swap; for (i=0;i<4;i++) { for (j=i+1;j<4;j++) { swap=m[i][j]; m[i][j]=m[j][i]; m[j][i]=swap; } } } // limit element precision (makes for tighter bone pools when compressing), // eg use 3 for precision to 0.001... // void Matrix4::ClampPrecision(float fNumDecimals) { float fMultiplier = pow(10,fNumDecimals); for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float f = m[i][j]; f *= fMultiplier; f = floor(f); f /= fMultiplier; m[i][j] = f; } } CalcFlags(); }