mirror of
https://github.com/ioquake/ioq3.git
synced 2025-05-30 16:41:31 +00:00
567 lines
16 KiB
C
567 lines
16 KiB
C
/*
|
|
* =========================================================================
|
|
* Filename: matrix_multiplication.c
|
|
* Description: 4*4 matrix
|
|
* =========================================================================
|
|
*/
|
|
|
|
#if defined __x86_64__
|
|
#include <xmmintrin.h>
|
|
#endif
|
|
|
|
#include <string.h>
|
|
#include <math.h>
|
|
|
|
#include "ref_import.h"
|
|
#include "matrix_multiplication.h"
|
|
|
|
#define DEG2RAD( a ) ( ( (a) * M_PI ) / 180.0F )
|
|
|
|
static const float s_Identity3x3[3][3] = {
|
|
{ 1.0f, 0.0f, 0.0f },
|
|
{ 0.0f, 1.0f, 0.0f },
|
|
{ 0.0f, 0.0f, 1.0f }
|
|
};
|
|
|
|
static const float s_Identity4x4[16] = {
|
|
1.0f, 0.0f, 0.0f, 0.0f,
|
|
0.0f, 1.0f, 0.0f, 0.0f,
|
|
0.0f, 0.0f, 1.0f, 0.0f,
|
|
0.0f, 0.0f, 0.0f, 1.0f
|
|
};
|
|
|
|
|
|
void Mat4Copy( const float in[64], float out[16] )
|
|
{
|
|
memcpy(out, in, 64);
|
|
}
|
|
|
|
void Mat3x3Copy( float dst[3][3], const float src[3][3] )
|
|
{
|
|
memcpy(dst, src, 36);
|
|
}
|
|
|
|
void VectorLerp( float a[3], float b[3], float lerp, float out[3])
|
|
{
|
|
out[0] = a[0] + (b[0] - a[0]) * lerp;
|
|
out[1] = a[1] + (b[1] - a[1]) * lerp;
|
|
out[2] = a[2] + (b[2] - a[2]) * lerp;
|
|
}
|
|
|
|
void VectorNorm( float v[3] )
|
|
{
|
|
float length = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
|
|
|
|
if(length != 0)
|
|
{
|
|
/* writing it this way allows gcc to recognize that rsqrt can be used */
|
|
length = 1.0f / sqrtf (length);
|
|
v[0] *= length;
|
|
v[1] *= length;
|
|
v[2] *= length;
|
|
}
|
|
}
|
|
|
|
float VectorLen( const float v[3] )
|
|
{
|
|
return sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
|
|
}
|
|
|
|
|
|
void Mat4Identity( float out[16] )
|
|
{
|
|
memcpy(out, s_Identity4x4, 64);
|
|
}
|
|
|
|
void Mat4Translation( float vec[3], float out[16] )
|
|
{
|
|
memcpy(out, s_Identity4x4, 64);
|
|
|
|
out[12] = vec[0];
|
|
out[13] = vec[1];
|
|
out[14] = vec[2];
|
|
}
|
|
//
|
|
// NOTE; out = b * a ???
|
|
// a, b and c are specified in column-major order
|
|
//
|
|
void myGlMultMatrix(const float A[16], const float B[16], float out[16])
|
|
{
|
|
int i, j;
|
|
for ( i = 0 ; i < 4 ; i++ )
|
|
{
|
|
for ( j = 0 ; j < 4 ; j++ )
|
|
{
|
|
out[ i * 4 + j ] =
|
|
A [ i * 4 + 0 ] * B [ 0 * 4 + j ]
|
|
+ A [ i * 4 + 1 ] * B [ 1 * 4 + j ]
|
|
+ A [ i * 4 + 2 ] * B [ 2 * 4 + j ]
|
|
+ A [ i * 4 + 3 ] * B [ 3 * 4 + j ];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void MatrixMultiply4x4(const float A[16], const float B[16], float out[16])
|
|
{
|
|
out[0] = A[0]*B[0] + A[1]*B[4] + A[2]*B[8] + A[3]*B[12];
|
|
out[1] = A[0]*B[1] + A[1]*B[5] + A[2]*B[9] + A[3]*B[13];
|
|
out[2] = A[0]*B[2] + A[1]*B[6] + A[2]*B[10] + A[3]*B[14];
|
|
out[3] = A[0]*B[3] + A[1]*B[7] + A[2]*B[11] + A[3]*B[15];
|
|
|
|
out[4] = A[4]*B[0] + A[5]*B[4] + A[6]*B[8] + A[7]*B[12];
|
|
out[5] = A[4]*B[1] + A[5]*B[5] + A[6]*B[9] + A[7]*B[13];
|
|
out[6] = A[4]*B[2] + A[5]*B[6] + A[6]*B[10] + A[7]*B[14];
|
|
out[7] = A[4]*B[3] + A[5]*B[7] + A[6]*B[11] + A[7]*B[15];
|
|
|
|
out[8] = A[8]*B[0] + A[9]*B[4] + A[10]*B[8] + A[11]*B[12];
|
|
out[9] = A[8]*B[1] + A[9]*B[5] + A[10]*B[9] + A[11]*B[13];
|
|
out[10] = A[8]*B[2] + A[9]*B[6] + A[10]*B[10] + A[11]*B[14];
|
|
out[11] = A[8]*B[3] + A[9]*B[7] + A[10]*B[11] + A[11]*B[15];
|
|
|
|
out[12] = A[12]*B[0] + A[13]*B[4] + A[14]*B[8] + A[15]*B[12];
|
|
out[13] = A[12]*B[1] + A[13]*B[5] + A[14]*B[9] + A[15]*B[13];
|
|
out[14] = A[12]*B[2] + A[13]*B[6] + A[14]*B[10] + A[15]*B[14];
|
|
out[15] = A[12]*B[3] + A[13]*B[7] + A[14]*B[11] + A[15]*B[15];
|
|
}
|
|
|
|
/*
|
|
* NOTE; out = B * A in math
|
|
* a, b and c are specified in column-major order
|
|
* out must be 16 byte aliagned
|
|
*/
|
|
|
|
void MatrixMultiply4x4_SSE(const float A[16], const float B[16], float out[16])
|
|
{
|
|
#if defined __x86_64__
|
|
__m128 row1 = _mm_load_ps(&B[0]);
|
|
__m128 row2 = _mm_load_ps(&B[4]);
|
|
__m128 row3 = _mm_load_ps(&B[8]);
|
|
__m128 row4 = _mm_load_ps(&B[12]);
|
|
|
|
int i;
|
|
for(i=0; i<4; i++)
|
|
{
|
|
__m128 brod1 = _mm_set1_ps(A[4*i ]);
|
|
__m128 brod2 = _mm_set1_ps(A[4*i + 1]);
|
|
__m128 brod3 = _mm_set1_ps(A[4*i + 2]);
|
|
__m128 brod4 = _mm_set1_ps(A[4*i + 3]);
|
|
|
|
__m128 row = _mm_add_ps(
|
|
_mm_add_ps( _mm_mul_ps(brod1, row1), _mm_mul_ps(brod2, row2) ),
|
|
_mm_add_ps( _mm_mul_ps(brod3, row3), _mm_mul_ps(brod4, row4) )
|
|
);
|
|
|
|
_mm_store_ps(&out[4*i], row);
|
|
}
|
|
#else
|
|
MatrixMultiply4x4( A, B, out);
|
|
#endif
|
|
}
|
|
|
|
|
|
void Mat4Transform( const float in1[16], const float in2[4], float out[4] )
|
|
{
|
|
// 16 mult, 12 plus
|
|
|
|
float a = in2[0];
|
|
float b = in2[1];
|
|
float c = in2[2];
|
|
float d = in2[3];
|
|
|
|
out[0] = in1[0] * a + in1[4] * b + in1[ 8] * c + in1[12] * d;
|
|
out[1] = in1[1] * a + in1[5] * b + in1[ 9] * c + in1[13] * d;
|
|
out[2] = in1[2] * a + in1[6] * b + in1[10] * c + in1[14] * d;
|
|
out[3] = in1[3] * a + in1[7] * b + in1[11] * c + in1[15] * d;
|
|
}
|
|
|
|
|
|
void Vec3Transform(const float Mat[16], const float v[3], float out[3])
|
|
{
|
|
float x = v[0];
|
|
float y = v[1];
|
|
float z = v[2];
|
|
|
|
out[0] = Mat[0] * x + Mat[4] * y + Mat[ 8] * z + Mat[12];
|
|
out[1] = Mat[1] * x + Mat[5] * y + Mat[ 9] * z + Mat[13];
|
|
out[2] = Mat[2] * x + Mat[6] * y + Mat[10] * z + Mat[14];
|
|
}
|
|
|
|
|
|
// unfortunately, this fun seems not faseter than Mat4Transform
|
|
// vector1x4 * mat4x4
|
|
void Mat4x1Transform_SSE( const float A[16], const float x[4], float out[4] )
|
|
{
|
|
// 16 mult, 12 plus
|
|
//out[0] = A[0] * x[0] + A[4] * x[1] + A[ 8] * x[2] + A[12] * x[3];
|
|
//out[1] = A[1] * x[0] + A[5] * x[1] + A[ 9] * x[2] + A[13] * x[3];
|
|
//out[2] = A[2] * x[0] + A[6] * x[1] + A[10] * x[2] + A[14] * x[3];
|
|
//out[3] = A[3] * x[0] + A[7] * x[1] + A[11] * x[2] + A[15] * x[3];
|
|
|
|
// 4 mult + 3 plus + 4 broadcast + 8 load (4 _mm_set1_ps + 4 _mm_set1_ps)
|
|
// + 1 store
|
|
#if defined __x86_64__
|
|
__m128 r1 = _mm_mul_ps( _mm_set1_ps(x[0]), _mm_load_ps(A ) );
|
|
__m128 r2 = _mm_mul_ps( _mm_set1_ps(x[1]), _mm_load_ps(A+4 ) );
|
|
__m128 r3 = _mm_mul_ps( _mm_set1_ps(x[2]), _mm_load_ps(A+8 ) );
|
|
__m128 r4 = _mm_mul_ps( _mm_set1_ps(x[3]), _mm_load_ps(A+12) );
|
|
|
|
_mm_store_ps(out, _mm_add_ps( _mm_add_ps(r1, r2), _mm_add_ps(r3, r4) ) );
|
|
#else
|
|
Mat4Transform(A, x, out);
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
#define SHUFFLE_PARAM(x, y, z, w) ( x | y<<2 | z<<4 | w<<6 )
|
|
#define _mm_replicate_x_ps(v) _mm_shuffle_ps(v, v, SHUFFLE_PARAM(0, 0, 0, 0))
|
|
#define _mm_replicate_y_ps(v) _mm_shuffle_ps(v, v, SHUFFLE_PARAM(1, 1, 1, 1))
|
|
#define _mm_replicate_z_ps(v) _mm_shuffle_ps(v, v, SHUFFLE_PARAM(2, 2, 2, 2))
|
|
#define _mm_replicate_w_ps(v) _mm_shuffle_ps(v, v, SHUFFLE_PARAM(3, 3, 3, 3))
|
|
|
|
|
|
void Vec4Transform_SSE( const float A[16], float v[4], float out[4] )
|
|
{
|
|
#if defined __x86_64__
|
|
__m128 x = _mm_load_ps(v);
|
|
// 16 mult, 12 plus
|
|
//out[0] = A[0] * x[0] + A[4] * x[1] + A[ 8] * x[2] + A[12] * x[3];
|
|
//out[1] = A[1] * x[0] + A[5] * x[1] + A[ 9] * x[2] + A[13] * x[3];
|
|
//out[2] = A[2] * x[0] + A[6] * x[1] + A[10] * x[2] + A[14] * x[3];
|
|
//out[3] = A[3] * x[0] + A[7] * x[1] + A[11] * x[2] + A[15] * x[3];
|
|
|
|
// 4 mult + 3 plus + 4 broadcast + 8 load (4 _mm_set1_ps + 4 _mm_set1_ps)
|
|
// + 1 store
|
|
__m128 r1 = _mm_mul_ps( _mm_replicate_x_ps( x ), _mm_load_ps(A) );
|
|
__m128 r2 = _mm_mul_ps( _mm_replicate_y_ps( x ), _mm_load_ps(A+4) );
|
|
__m128 r3 = _mm_mul_ps( _mm_replicate_z_ps( x ), _mm_load_ps(A+8) );
|
|
__m128 r4 = _mm_mul_ps( _mm_replicate_w_ps( x ), _mm_load_ps(A+12) );
|
|
|
|
_mm_store_ps(out, _mm_add_ps( _mm_add_ps( r1, r2 ), _mm_add_ps( r3, r4 ) ));
|
|
#else
|
|
Mat4Transform(A, v, out);
|
|
#endif
|
|
}
|
|
*/
|
|
|
|
void Mat3x3Identity( float pMat[3][3] )
|
|
{
|
|
memcpy(pMat, s_Identity3x3, 36);
|
|
}
|
|
|
|
|
|
void TransformModelToClip( const float src[3], const float* pMatModel, const float* pMatProj, float eye[4], float dst[4])
|
|
{
|
|
int i;
|
|
|
|
for ( i = 0 ; i < 4 ; i++ )
|
|
{
|
|
eye[i] =
|
|
src[0] * pMatModel[ i + 0 * 4 ] +
|
|
src[1] * pMatModel[ i + 1 * 4 ] +
|
|
src[2] * pMatModel[ i + 2 * 4 ] +
|
|
1 * pMatModel[ i + 3 * 4 ];
|
|
}
|
|
|
|
for ( i = 0 ; i < 4 ; i++ )
|
|
{
|
|
dst[i] =
|
|
eye[0] * pMatProj[ i + 0 * 4 ] +
|
|
eye[1] * pMatProj[ i + 1 * 4 ] +
|
|
eye[2] * pMatProj[ i + 2 * 4 ] +
|
|
eye[3] * pMatProj[ i + 3 * 4 ];
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void VectorCross( const float v1[3], const float v2[3], float cross[3] )
|
|
{
|
|
cross[0] = v1[1]*v2[2] - v1[2]*v2[1];
|
|
cross[1] = v1[2]*v2[0] - v1[0]*v2[2];
|
|
cross[2] = v1[0]*v2[1] - v1[1]*v2[0];
|
|
}
|
|
|
|
|
|
// fast vector normalize routine that does not check to make sure
|
|
// that length != 0, nor does it return length, uses rsqrt approximation
|
|
void FastNormalize1f(float v[3])
|
|
{
|
|
// writing it this way allows gcc to recognize that rsqrt can be used
|
|
float invLen = 1.0f / sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
|
|
|
|
v[0] = v[0] * invLen;
|
|
v[1] = v[1] * invLen;
|
|
v[2] = v[2] * invLen;
|
|
}
|
|
|
|
void FastNormalize2f( const float* v, float* out)
|
|
{
|
|
// writing it this way allows gcc to recognize that rsqrt can be used
|
|
float invLen = 1.0f / sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
|
|
|
|
out[0] = v[0] * invLen;
|
|
out[1] = v[1] * invLen;
|
|
out[2] = v[2] * invLen;
|
|
}
|
|
|
|
// use Rodrigue's rotation formula
|
|
// dir are not assumed to be unit vector
|
|
void PointRotateAroundVector(float* res, const float* vec, const float* p, const float degrees)
|
|
{
|
|
float rad = DEG2RAD( degrees );
|
|
float cos_th = cos( rad );
|
|
float sin_th = sin( rad );
|
|
float k[3];
|
|
|
|
// writing it this way allows gcc to recognize that rsqrt can be used
|
|
float invLen = 1.0f / sqrtf(vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2]);
|
|
k[0] = vec[0] * invLen;
|
|
k[1] = vec[1] * invLen;
|
|
k[2] = vec[2] * invLen;
|
|
|
|
float d = (1 - cos_th) * (p[0] * k[0] + p[1] * k[1] + p[2] * k[2]);
|
|
|
|
res[0] = sin_th * (k[1]*p[2] - k[2]*p[1]);
|
|
res[1] = sin_th * (k[2]*p[0] - k[0]*p[2]);
|
|
res[2] = sin_th * (k[0]*p[1] - k[1]*p[0]);
|
|
|
|
res[0] += cos_th * p[0] + d * k[0];
|
|
res[1] += cos_th * p[1] + d * k[1];
|
|
res[2] += cos_th * p[2] + d * k[2];
|
|
}
|
|
|
|
// vector k are assumed to be unit
|
|
void RotateAroundUnitVector(float* res, const float* k, const float* p, const float degrees)
|
|
{
|
|
float rad = DEG2RAD( degrees );
|
|
float cos_th = cos( rad );
|
|
float sin_th = sin( rad );
|
|
|
|
float d = (1 - cos_th) * (p[0] * k[0] + p[1] * k[1] + p[2] * k[2]);
|
|
|
|
res[0] = sin_th * (k[1]*p[2] - k[2]*p[1]);
|
|
res[1] = sin_th * (k[2]*p[0] - k[0]*p[2]);
|
|
res[2] = sin_th * (k[0]*p[1] - k[1]*p[0]);
|
|
|
|
res[0] += cos_th * p[0] + d * k[0];
|
|
res[1] += cos_th * p[1] + d * k[1];
|
|
res[2] += cos_th * p[2] + d * k[2];
|
|
}
|
|
|
|
|
|
// note: vector forward are NOT assumed to be nornalized,
|
|
// unit: nornalized of forward,
|
|
// dst: unit vector which perpendicular of forward(src)
|
|
void VectorPerp( const float src[3], float dst[3] )
|
|
{
|
|
float unit[3];
|
|
|
|
float sqlen = src[0]*src[0] + src[1]*src[1] + src[2]*src[2];
|
|
if(0 == sqlen)
|
|
{
|
|
ri.Printf( PRINT_WARNING, "MakePerpVectors: zero vertor input!\n");
|
|
return;
|
|
}
|
|
|
|
dst[1] = -src[0];
|
|
dst[2] = src[1];
|
|
dst[0] = src[2];
|
|
// this rotate and negate try to make a vector not colinear with the original
|
|
// actually can not guarantee, for example
|
|
// forward = (1/sqrt(3), 1/sqrt(3), -1/sqrt(3)),
|
|
// then right = (-1/sqrt(3), -1/sqrt(3), 1/sqrt(3))
|
|
|
|
|
|
float invLen = 1.0f / sqrtf(sqlen);
|
|
unit[0] = src[0] * invLen;
|
|
unit[1] = src[1] * invLen;
|
|
unit[2] = src[2] * invLen;
|
|
|
|
|
|
float d = DotProduct(unit, dst);
|
|
dst[0] -= d*unit[0];
|
|
dst[1] -= d*unit[1];
|
|
dst[2] -= d*unit[2];
|
|
|
|
// normalize the result
|
|
invLen = 1.0f / sqrtf(dst[0]*dst[0] + dst[1]*dst[1] + dst[2]*dst[2]);
|
|
|
|
dst[0] *= invLen;
|
|
dst[1] *= invLen;
|
|
dst[2] *= invLen;
|
|
}
|
|
|
|
// Given a normalized forward vector, create two other perpendicular vectors
|
|
// note: vector forward are NOT assumed to be nornalized,
|
|
// after this funtion is called , forward are nornalized.
|
|
// right, up: perpendicular of forward
|
|
float MakeTwoPerpVectors(const float forward[3], float right[3], float up[3])
|
|
{
|
|
|
|
float sqLen = forward[0]*forward[0]+forward[1]*forward[1]+forward[2]*forward[2];
|
|
if(sqLen)
|
|
{
|
|
float nf[3] = {0, 0, 1};
|
|
float invLen = 1.0f / sqrtf(sqLen);
|
|
nf[0] = forward[0] * invLen;
|
|
nf[1] = forward[1] * invLen;
|
|
nf[2] = forward[2] * invLen;
|
|
|
|
float adjlen = DotProduct(nf, right);
|
|
|
|
// this rotate and negate guarantees a vector
|
|
// not colinear with the original
|
|
right[0] = forward[2] - adjlen * nf[0];
|
|
right[1] = -forward[0] - adjlen * nf[1];
|
|
right[2] = forward[1] - adjlen * nf[2];
|
|
|
|
|
|
invLen = 1.0f/sqrtf(right[0]*right[0]+right[1]*right[1]+right[2]*right[2]);
|
|
right[0] *= invLen;
|
|
right[1] *= invLen;
|
|
right[2] *= invLen;
|
|
|
|
// get the up vector with the right hand rules
|
|
VectorCross(right, nf, up);
|
|
|
|
return (sqLen * invLen);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
void TransformModelToClip_SSE( const float src[3], const float pMatModel[16], const float pMatProj[16], float dst[4] )
|
|
{
|
|
#if defined __x86_64__
|
|
|
|
float AugSrc[4] = {src[0], src[1], src[2], 1.0f};
|
|
|
|
|
|
__m128 row1 = _mm_load_ps(&pMatProj[0]);
|
|
__m128 row2 = _mm_load_ps(&pMatProj[4]);
|
|
__m128 row3 = _mm_load_ps(&pMatProj[8]);
|
|
__m128 row4 = _mm_load_ps(&pMatProj[12]);
|
|
|
|
__m128 res[4];
|
|
int i;
|
|
for(i=0; i<4; i++)
|
|
{
|
|
__m128 brod1 = _mm_set1_ps(pMatModel[4*i ]);
|
|
__m128 brod2 = _mm_set1_ps(pMatModel[4*i + 1]);
|
|
__m128 brod3 = _mm_set1_ps(pMatModel[4*i + 2]);
|
|
__m128 brod4 = _mm_set1_ps(pMatModel[4*i + 3]);
|
|
|
|
__m128 scol = _mm_set1_ps(AugSrc[i]);
|
|
|
|
res[i] =_mm_mul_ps( _mm_add_ps(
|
|
_mm_add_ps( _mm_mul_ps(brod1, row1), _mm_mul_ps(brod2, row2) ),
|
|
_mm_add_ps( _mm_mul_ps(brod3, row3), _mm_mul_ps(brod4, row4) )
|
|
), scol);
|
|
}
|
|
|
|
|
|
_mm_store_ps(dst, _mm_add_ps( _mm_add_ps(res[0], res[1]), _mm_add_ps(res[2], res[3]) ) );
|
|
#else
|
|
float eye[4];
|
|
TransformModelToClip(src, pMatModel, pMatProj, eye, dst );
|
|
#endif
|
|
|
|
|
|
// print4f("AugSrc", AugSrc);
|
|
// printMat4x4f("MatModel", pMatModel);
|
|
// printMat4x4f("MatProj", pMatProj);
|
|
// MatrixMultiply4x4_SSE(pMatModel, pMatProj, mvp);
|
|
// Mat4x1Transform_SSE(mvp, AugSrc, dst);
|
|
// print4f("dst", dst);
|
|
}
|
|
|
|
|
|
/*
|
|
|
|
void TransformModelToClip_SSE2( const float x[3], const float pMatModel[16], const float pMatProj[16], float dst[4] )
|
|
{
|
|
|
|
// 7/8 broadcaster, 8 load, 7/8 mult, 6 add
|
|
|
|
__m128 row = _mm_add_ps(
|
|
_mm_add_ps( _mm_mul_ps( _mm_set1_ps(x[0]), _mm_load_ps(pMatModel ) ) ,
|
|
_mm_mul_ps( _mm_set1_ps(x[1]), _mm_load_ps(pMatModel+4 ) ) )
|
|
,
|
|
_mm_add_ps( _mm_mul_ps( _mm_set1_ps(x[2]), _mm_load_ps(pMatModel+8 ) ) ,
|
|
_mm_load_ps(pMatModel+12) ) );
|
|
_mm_store_ps(dst, _mm_add_ps(
|
|
_mm_add_ps( _mm_mul_ps( _mm_set1_ps(row[0]), _mm_load_ps(pMatProj ) ) ,
|
|
_mm_mul_ps( _mm_set1_ps(row[1]), _mm_load_ps(pMatProj+4 ) ) )
|
|
,
|
|
_mm_add_ps( _mm_mul_ps( _mm_set1_ps(row[2]), _mm_load_ps(pMatProj+8 ) ) ,
|
|
_mm_mul_ps( _mm_set1_ps(row[3]), _mm_load_ps(pMatProj+12) ) ) )
|
|
);
|
|
|
|
|
|
// _mm_store_ps(dst, row);
|
|
|
|
// Mat4x1Transform_SSE(pMatModel, AugSrc, eye);
|
|
// Mat4x1Transform_SSE(pMatProj, eye, dst);
|
|
}
|
|
|
|
*/
|
|
|
|
// ===============================================
|
|
// not used now
|
|
// ===============================================
|
|
|
|
/*
|
|
void Mat4SimpleInverse( const float in[16], float out[16])
|
|
{
|
|
float v[3];
|
|
float invSqrLen;
|
|
|
|
VectorCopy(in + 0, v);
|
|
invSqrLen = 1.0f / DotProduct(v, v); VectorScale(v, invSqrLen, v);
|
|
out[ 0] = v[0]; out[ 4] = v[1]; out[ 8] = v[2]; out[12] = -DotProduct(v, &in[12]);
|
|
|
|
VectorCopy(in + 4, v);
|
|
invSqrLen = 1.0f / DotProduct(v, v); VectorScale(v, invSqrLen, v);
|
|
out[ 1] = v[0]; out[ 5] = v[1]; out[ 9] = v[2]; out[13] = -DotProduct(v, &in[12]);
|
|
|
|
VectorCopy(in + 8, v);
|
|
invSqrLen = 1.0f / DotProduct(v, v); VectorScale(v, invSqrLen, v);
|
|
out[ 2] = v[0]; out[ 6] = v[1]; out[10] = v[2]; out[14] = -DotProduct(v, &in[12]);
|
|
|
|
out[ 3] = 0.0f; out[ 7] = 0.0f; out[11] = 0.0f; out[15] = 1.0f;
|
|
}
|
|
|
|
|
|
void Mat4Dump( const float in[16] )
|
|
{
|
|
printf( "%3.5f %3.5f %3.5f %3.5f\n", in[ 0], in[ 4], in[ 8], in[12]);
|
|
printf( "%3.5f %3.5f %3.5f %3.5f\n", in[ 1], in[ 5], in[ 9], in[13]);
|
|
printf( "%3.5f %3.5f %3.5f %3.5f\n", in[ 2], in[ 6], in[10], in[14]);
|
|
printf( "%3.5f %3.5f %3.5f %3.5f\n", in[ 3], in[ 7], in[11], in[15]);
|
|
}
|
|
|
|
void Mat4View(vec3_t axes[3], vec3_t origin, mat4_t out)
|
|
{
|
|
out[0] = axes[0][0];
|
|
out[1] = axes[1][0];
|
|
out[2] = axes[2][0];
|
|
out[3] = 0;
|
|
|
|
out[4] = axes[0][1];
|
|
out[5] = axes[1][1];
|
|
out[6] = axes[2][1];
|
|
out[7] = 0;
|
|
|
|
out[8] = axes[0][2];
|
|
out[9] = axes[1][2];
|
|
out[10] = axes[2][2];
|
|
out[11] = 0;
|
|
|
|
out[12] = -DotProduct(origin, axes[0]);
|
|
out[13] = -DotProduct(origin, axes[1]);
|
|
out[14] = -DotProduct(origin, axes[2]);
|
|
out[15] = 1;
|
|
}
|
|
*/
|