mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-03-22 02:11:19 +00:00
[util] Get vectors working for non-SSE archs
GCC does a fairly nice job of producing code for vector types when the hardware doesn't support SIMD, but it seems to break certain math optimization rules due to excess precision (?). Still, it works well enough for the core engine, but may not be well suited to the tools. However, so far, only qfvis uses vector types (and it's not tested yet), and tools should probably be used on suitable machines anyway (not forces, of course).
This commit is contained in:
parent
a461c09586
commit
778c07e91f
7 changed files with 383 additions and 224 deletions
|
@ -81,7 +81,10 @@ AC_ARG_ENABLE(optimize,
|
|||
optimize=yes
|
||||
)
|
||||
|
||||
QF_CC_OPTION(-mavx2)
|
||||
QF_CC_OPTION(-Wno-psabi)
|
||||
dnl QF_CC_OPTION(-msse2)
|
||||
dnl QF_CC_OPTION(-Wno-psabi)
|
||||
dnl QF_CC_OPTION(-mavx2)
|
||||
dnl fma is not used as it is the equivalent of turning on
|
||||
dnl -funsafe-math-optimizations
|
||||
dnl QF_CC_OPTION(-mfma)
|
||||
|
|
|
@ -79,7 +79,11 @@ VEC_TYPE (float, vec4f_t);
|
|||
VEC_TYPE (int, vec4i_t);
|
||||
|
||||
#define VEC4D_FMT "[%.17g, %.17g, %.17g, %.17g]"
|
||||
#if __WORDSIZE == 64
|
||||
#define VEC4L_FMT "[%ld, %ld, %ld, %ld]"
|
||||
#else
|
||||
#define VEC4L_FMT "[%lld, %lld, %lld, %lld]"
|
||||
#endif
|
||||
#define VEC4F_FMT "[%.9g, %.9g, %.9g, %.9g]"
|
||||
#define VEC4I_FMT "[%d, %d, %d, %d]"
|
||||
#define VEC4_EXP(v) (v)[0], (v)[1], (v)[2], (v)[3]
|
||||
|
@ -94,4 +98,30 @@ typedef struct vspheref_s {
|
|||
float radius;
|
||||
} vspheref_t;
|
||||
|
||||
#include <immintrin.h>
|
||||
#ifndef __SSE__
|
||||
#define _mm_xor_ps __qf_mm_xor_ps
|
||||
#define _mm_and_ps __qf_mm_and_ps
|
||||
GNU89INLINE inline __m128 _mm_xor_ps (__m128 a, __m128 b);
|
||||
GNU89INLINE inline __m128 _mm_and_ps (__m128 a, __m128 b);
|
||||
#ifndef IMPLEMENT_MAT4F_Funcs
|
||||
GNU89INLINE inline
|
||||
#else
|
||||
VISIBLE
|
||||
#endif
|
||||
__m128 _mm_xor_ps (__m128 a, __m128 b)
|
||||
{
|
||||
return (__m128) ((vec4i_t) a ^ (vec4i_t) b);
|
||||
}
|
||||
#ifndef IMPLEMENT_MAT4F_Funcs
|
||||
GNU89INLINE inline
|
||||
#else
|
||||
VISIBLE
|
||||
#endif
|
||||
__m128 _mm_and_ps (__m128 a, __m128 b)
|
||||
{
|
||||
return (__m128) ((vec4i_t) a & (vec4i_t) b);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif//__QF_simd_types_h
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#ifndef __QF_simd_vec4d_h
|
||||
#define __QF_simd_vec4d_h
|
||||
|
||||
#ifdef __AVX__
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "QF/simd/types.h"
|
||||
|
@ -292,4 +293,6 @@ storevec3d (double v3[3], vec4d_t v4)
|
|||
v3[2] = v4[2];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif//__QF_simd_vec4d_h
|
||||
|
|
|
@ -110,7 +110,11 @@ vabsf (vec4f_t v)
|
|||
{
|
||||
const uint32_t nan = ~0u >> 1;
|
||||
const vec4i_t abs = { nan, nan, nan, nan };
|
||||
#ifndef __SSE__
|
||||
return (vec4f_t) ((vec4i_t) v & abs);
|
||||
#else
|
||||
return _mm_and_ps (v, (__m128) abs);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC4F_Funcs
|
||||
|
@ -121,7 +125,12 @@ VISIBLE
|
|||
vec4f_t
|
||||
vsqrtf (vec4f_t v)
|
||||
{
|
||||
#ifndef __SSE__
|
||||
vec4f_t r = { sqrtf (v[0]), sqrtf (v[1]), sqrtf (v[2]), sqrtf (v[3]) };
|
||||
return r;
|
||||
#else
|
||||
return _mm_sqrt_ps (v);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC4F_Funcs
|
||||
|
@ -132,7 +141,16 @@ VISIBLE
|
|||
vec4f_t
|
||||
vceilf (vec4f_t v)
|
||||
{
|
||||
#ifndef __SSE4_1__
|
||||
return (vec4f_t) {
|
||||
ceilf (v[0]),
|
||||
ceilf (v[1]),
|
||||
ceilf (v[2]),
|
||||
ceilf (v[3])
|
||||
};
|
||||
#else
|
||||
return _mm_ceil_ps (v);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC4F_Funcs
|
||||
|
@ -143,7 +161,16 @@ VISIBLE
|
|||
vec4f_t
|
||||
vfloorf (vec4f_t v)
|
||||
{
|
||||
#ifndef __SSE4_1__
|
||||
return (vec4f_t) {
|
||||
floorf (v[0]),
|
||||
floorf (v[1]),
|
||||
floorf (v[2]),
|
||||
floorf (v[3])
|
||||
};
|
||||
#else
|
||||
return _mm_floor_ps (v);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC4F_Funcs
|
||||
|
@ -154,7 +181,16 @@ VISIBLE
|
|||
vec4f_t
|
||||
vtruncf (vec4f_t v)
|
||||
{
|
||||
#ifndef __SSE4_1__
|
||||
return (vec4f_t) {
|
||||
truncf (v[0]),
|
||||
truncf (v[1]),
|
||||
truncf (v[2]),
|
||||
truncf (v[3])
|
||||
};
|
||||
#else
|
||||
return _mm_round_ps (v, _MM_FROUND_TRUNC);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC4F_Funcs
|
||||
|
@ -179,8 +215,13 @@ vec4f_t
|
|||
dotf (vec4f_t a, vec4f_t b)
|
||||
{
|
||||
vec4f_t c = a * b;
|
||||
#ifndef __SSE3__
|
||||
float x = c[0] + c[1] + c[2] + c[3];
|
||||
c = (vec4f_t) { x, x, x, x };
|
||||
#else
|
||||
c = _mm_hadd_ps (c, c);
|
||||
c = _mm_hadd_ps (c, c);
|
||||
#endif
|
||||
return c;
|
||||
}
|
||||
|
||||
|
@ -197,7 +238,11 @@ qmulf (vec4f_t a, vec4f_t b)
|
|||
vec4f_t c = crossf (a, b) + a * b[3] + a[3] * b;
|
||||
vec4f_t d = dotf (a, b);
|
||||
// zero out the vector component of dot product so only the scalar remains
|
||||
#ifndef __SSE4_1__
|
||||
d = (vec4f_t) { 0, 0, 0, d[3] };
|
||||
#else
|
||||
d = _mm_insert_ps (d, d, 0xf7);
|
||||
#endif
|
||||
return c - d;
|
||||
}
|
||||
|
||||
|
@ -212,7 +257,11 @@ qvmulf (vec4f_t q, vec4f_t v)
|
|||
float s = q[3];
|
||||
// zero the scalar of the quaternion. Results in an extra operation, but
|
||||
// avoids adding precision issues.
|
||||
#ifndef __SSE4_1__
|
||||
q[3] = 0;
|
||||
#else
|
||||
q = _mm_insert_ps (q, q, 0xf8);
|
||||
#endif
|
||||
vec4f_t c = crossf (q, v);
|
||||
vec4f_t qv = dotf (q, v); // q.w is 0 so v.w is irrelevant
|
||||
vec4f_t qq = dotf (q, q);
|
||||
|
@ -231,7 +280,11 @@ vqmulf (vec4f_t v, vec4f_t q)
|
|||
float s = q[3];
|
||||
// zero the scalar of the quaternion. Results in an extra operation, but
|
||||
// avoids adding precision issues.
|
||||
#ifndef __SSE4_1__
|
||||
q[3] = 0;
|
||||
#else
|
||||
q = _mm_insert_ps (q, q, 0xf8);
|
||||
#endif
|
||||
vec4f_t c = crossf (q, v);
|
||||
vec4f_t qv = dotf (q, v); // q.w is 0 so v.w is irrelevant
|
||||
vec4f_t qq = dotf (q, q);
|
||||
|
@ -266,7 +319,11 @@ vec4f_t
|
|||
qconjf (vec4f_t q)
|
||||
{
|
||||
const vec4i_t neg = { 1u << 31, 1u << 31, 1u << 31, 0 };
|
||||
#ifndef __SSE__
|
||||
return (vec4f_t) ((vec4i_t) q ^ neg);
|
||||
#else
|
||||
return _mm_xor_ps (q, (__m128) neg);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef IMPLEMENT_VEC4F_Funcs
|
||||
|
@ -299,6 +356,9 @@ loadvec3f (const float v3[3])
|
|||
{
|
||||
vec4f_t v4;
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
v4 = (vec4f_t) { v3[0], v3[1], v3[2], 0 };
|
||||
#else
|
||||
// this had to be in asm otherwise gcc thinks v4 is only partially
|
||||
// initialized, and gcc 10 does not use the zero flags when generating
|
||||
// the code, resulting in a memory access to load a 0 into v4[3]
|
||||
|
@ -311,6 +371,7 @@ loadvec3f (const float v3[3])
|
|||
"
|
||||
: "=v"(v4)
|
||||
: "m"(v3[0]), "m"(v3[1]), "m"(v3[2]));
|
||||
#endif
|
||||
return v4;
|
||||
}
|
||||
|
||||
|
|
|
@ -48,15 +48,19 @@
|
|||
|
||||
#define s05 0.70710678118654757
|
||||
|
||||
#ifdef __AVX__
|
||||
typedef struct {
|
||||
int line;
|
||||
vec4d_t (*op) (vec4d_t a, vec4d_t b);
|
||||
vec4d_t a;
|
||||
vec4d_t b;
|
||||
vec4d_t expect;
|
||||
vec4d_t ulp_errors;
|
||||
} vec4d_test_t;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int line;
|
||||
vec4f_t (*op) (vec4f_t a, vec4f_t b);
|
||||
vec4f_t a;
|
||||
vec4f_t b;
|
||||
|
@ -65,6 +69,7 @@ typedef struct {
|
|||
} vec4f_test_t;
|
||||
|
||||
typedef struct {
|
||||
int line;
|
||||
void (*op) (mat4f_t c, const mat4f_t a, const mat4f_t b);
|
||||
mat4f_t a;
|
||||
mat4f_t b;
|
||||
|
@ -73,6 +78,7 @@ typedef struct {
|
|||
} mat4f_test_t;
|
||||
|
||||
typedef struct {
|
||||
int line;
|
||||
vec4f_t (*op) (const mat4f_t a, vec4f_t b);
|
||||
mat4f_t a;
|
||||
vec4f_t b;
|
||||
|
@ -81,12 +87,14 @@ typedef struct {
|
|||
} mv4f_test_t;
|
||||
|
||||
typedef struct {
|
||||
int line;
|
||||
void (*op) (mat4f_t m, vec4f_t q);
|
||||
vec4f_t q;
|
||||
mat4f_t expect;
|
||||
mat4f_t ulp_errors;
|
||||
} mq4f_test_t;
|
||||
|
||||
#ifdef __AVX__
|
||||
static vec4d_t tvtruncd (vec4d_t v, vec4d_t ignore)
|
||||
{
|
||||
return vtruncd (v);
|
||||
|
@ -106,6 +114,7 @@ static vec4d_t tqconjd (vec4d_t v, vec4d_t ignore)
|
|||
{
|
||||
return qconjd (v);
|
||||
}
|
||||
#endif
|
||||
|
||||
static vec4f_t tvtruncf (vec4f_t v, vec4f_t ignore)
|
||||
{
|
||||
|
@ -147,290 +156,320 @@ static vec4f_t tmagnitude3f (vec4f_t v, vec4f_t ignore)
|
|||
return magnitude3f (v);
|
||||
}
|
||||
|
||||
#define T(t...) { __LINE__, t }
|
||||
|
||||
#ifdef __AVX__
|
||||
static vec4d_test_t vec4d_tests[] = {
|
||||
// 3D dot products
|
||||
{ dotd, right, right, one },
|
||||
{ dotd, right, forward, zero },
|
||||
{ dotd, right, up, zero },
|
||||
{ dotd, forward, right, zero },
|
||||
{ dotd, forward, forward, one },
|
||||
{ dotd, forward, up, zero },
|
||||
{ dotd, up, right, zero },
|
||||
{ dotd, up, forward, zero },
|
||||
{ dotd, up, up, one },
|
||||
T(dotd, right, right, one ),
|
||||
T(dotd, right, forward, zero ),
|
||||
T(dotd, right, up, zero ),
|
||||
T(dotd, forward, right, zero ),
|
||||
T(dotd, forward, forward, one ),
|
||||
T(dotd, forward, up, zero ),
|
||||
T(dotd, up, right, zero ),
|
||||
T(dotd, up, forward, zero ),
|
||||
T(dotd, up, up, one ),
|
||||
|
||||
// one is 4D, so its self dot product is 4
|
||||
{ dotd, one, one, { 4, 4, 4, 4} },
|
||||
{ dotd, one, none, {-4, -4, -4, -4} },
|
||||
T(dotd, one, one, { 4, 4, 4, 4} ),
|
||||
T(dotd, one, none, {-4, -4, -4, -4} ),
|
||||
|
||||
// 3D cross products
|
||||
{ crossd, right, right, zero },
|
||||
{ crossd, right, forward, up },
|
||||
{ crossd, right, up, nforward },
|
||||
{ crossd, forward, right, nup },
|
||||
{ crossd, forward, forward, zero },
|
||||
{ crossd, forward, up, right },
|
||||
{ crossd, up, right, forward },
|
||||
{ crossd, up, forward, nright },
|
||||
{ crossd, up, up, zero },
|
||||
T(crossd, right, right, zero ),
|
||||
T(crossd, right, forward, up ),
|
||||
T(crossd, right, up, nforward ),
|
||||
T(crossd, forward, right, nup ),
|
||||
T(crossd, forward, forward, zero ),
|
||||
T(crossd, forward, up, right ),
|
||||
T(crossd, up, right, forward ),
|
||||
T(crossd, up, forward, nright ),
|
||||
T(crossd, up, up, zero ),
|
||||
// double whammy tests: cross product with an angled vector and
|
||||
// ensuring that a 4d vector (non-zero w component) does not affect
|
||||
// the result, including the result's w component remaining zero.
|
||||
{ crossd, right, one, { 0, -1, 1} },
|
||||
{ crossd, forward, one, { 1, 0, -1} },
|
||||
{ crossd, up, one, {-1, 1, 0} },
|
||||
{ crossd, one, right, { 0, 1, -1} },
|
||||
{ crossd, one, forward, {-1, 0, 1} },
|
||||
{ crossd, one, up, { 1, -1, 0} },
|
||||
T(crossd, right, one, { 0, -1, 1} ),
|
||||
T(crossd, forward, one, { 1, 0, -1} ),
|
||||
T(crossd, up, one, {-1, 1, 0} ),
|
||||
T(crossd, one, right, { 0, 1, -1} ),
|
||||
T(crossd, one, forward, {-1, 0, 1} ),
|
||||
T(crossd, one, up, { 1, -1, 0} ),
|
||||
// This one fails when optimizing with -mfma (which is why fma is not
|
||||
// used): ulp errors in z and w
|
||||
{ crossd, qtest, qtest, {0, 0, 0, 0} },
|
||||
T(crossd, qtest, qtest, {0, 0, 0, 0} ),
|
||||
|
||||
{ qmuld, qident, qident, qident },
|
||||
{ qmuld, qident, right, right },
|
||||
{ qmuld, qident, forward, forward },
|
||||
{ qmuld, qident, up, up },
|
||||
{ qmuld, right, qident, right },
|
||||
{ qmuld, forward, qident, forward },
|
||||
{ qmuld, up, qident, up },
|
||||
{ qmuld, right, right, nqident },
|
||||
{ qmuld, right, forward, up },
|
||||
{ qmuld, right, up, nforward },
|
||||
{ qmuld, forward, right, nup },
|
||||
{ qmuld, forward, forward, nqident },
|
||||
{ qmuld, forward, up, right },
|
||||
{ qmuld, up, right, forward },
|
||||
{ qmuld, up, forward, nright },
|
||||
{ qmuld, up, up, nqident },
|
||||
{ qmuld, one, one, { 2, 2, 2, -2 } },
|
||||
{ qmuld, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } },
|
||||
T(qmuld, qident, qident, qident ),
|
||||
T(qmuld, qident, right, right ),
|
||||
T(qmuld, qident, forward, forward ),
|
||||
T(qmuld, qident, up, up ),
|
||||
T(qmuld, right, qident, right ),
|
||||
T(qmuld, forward, qident, forward ),
|
||||
T(qmuld, up, qident, up ),
|
||||
T(qmuld, right, right, nqident ),
|
||||
T(qmuld, right, forward, up ),
|
||||
T(qmuld, right, up, nforward ),
|
||||
T(qmuld, forward, right, nup ),
|
||||
T(qmuld, forward, forward, nqident ),
|
||||
T(qmuld, forward, up, right ),
|
||||
T(qmuld, up, right, forward ),
|
||||
T(qmuld, up, forward, nright ),
|
||||
T(qmuld, up, up, nqident ),
|
||||
T(qmuld, one, one, { 2, 2, 2, -2 } ),
|
||||
T(qmuld, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } ),
|
||||
// This one fails when optimizing with -mfma (which is why fma is not
|
||||
// used): ulp error in z
|
||||
{ qmuld, qtest, qtest, {0.768, 0.576, 0, -0.28} },
|
||||
T(qmuld, qtest, qtest, {0.768, 0.576, 0, -0.28} ),
|
||||
|
||||
// The one vector is not unit (magnitude 2), so using it as a rotation
|
||||
// quaternion results in scaling by 4. However, it still has the effect
|
||||
// of rotating 120 degrees around the axis equidistant from the three
|
||||
// orthogonal axes such that x->y->z->x
|
||||
{ qvmuld, one, right, { 0, 4, 0, 0 } },
|
||||
{ qvmuld, one, forward, { 0, 0, 4, 0 } },
|
||||
{ qvmuld, one, up, { 4, 0, 0, 0 } },
|
||||
{ qvmuld, one, {1,1,1,0}, { 4, 4, 4, 0 } },
|
||||
{ qvmuld, one, one, { 4, 4, 4, -2 } },
|
||||
T(qvmuld, one, right, { 0, 4, 0, 0 } ),
|
||||
T(qvmuld, one, forward, { 0, 0, 4, 0 } ),
|
||||
T(qvmuld, one, up, { 4, 0, 0, 0 } ),
|
||||
T(qvmuld, one, {1,1,1,0}, { 4, 4, 4, 0 } ),
|
||||
T(qvmuld, one, one, { 4, 4, 4, -2 } ),
|
||||
// inverse rotation, so x->z->y->x
|
||||
{ vqmuld, right, one, { 0, 0, 4, 0 } },
|
||||
{ vqmuld, forward, one, { 4, 0, 0, 0 } },
|
||||
{ vqmuld, up, one, { 0, 4, 0, 0 } },
|
||||
{ vqmuld, {1,1,1,0}, one, { 4, 4, 4, 0 } },
|
||||
{ vqmuld, one, one, { 4, 4, 4, -2 } },
|
||||
T(vqmuld, right, one, { 0, 0, 4, 0 } ),
|
||||
T(vqmuld, forward, one, { 4, 0, 0, 0 } ),
|
||||
T(vqmuld, up, one, { 0, 4, 0, 0 } ),
|
||||
T(vqmuld, {1,1,1,0}, one, { 4, 4, 4, 0 } ),
|
||||
T(vqmuld, one, one, { 4, 4, 4, -2 } ),
|
||||
// The half vector is unit.
|
||||
{ qvmuld, half, right, forward },
|
||||
{ qvmuld, half, forward, up },
|
||||
{ qvmuld, half, up, right },
|
||||
{ qvmuld, half, {1,1,1,0}, { 1, 1, 1, 0 } },
|
||||
T(qvmuld, half, right, forward ),
|
||||
T(qvmuld, half, forward, up ),
|
||||
T(qvmuld, half, up, right ),
|
||||
T(qvmuld, half, {1,1,1,0}, { 1, 1, 1, 0 } ),
|
||||
// inverse
|
||||
{ vqmuld, right, half, up },
|
||||
{ vqmuld, forward, half, right },
|
||||
{ vqmuld, up, half, forward },
|
||||
{ vqmuld, {1,1,1,0}, half, { 1, 1, 1, 0 } },
|
||||
T(vqmuld, right, half, up ),
|
||||
T(vqmuld, forward, half, right ),
|
||||
T(vqmuld, up, half, forward ),
|
||||
T(vqmuld, {1,1,1,0}, half, { 1, 1, 1, 0 } ),
|
||||
// one is a 4D vector and qvmuld is meant for 3D vectors. However, it
|
||||
// seems that the vector's w has no effect on the 3d portion of the
|
||||
// result, but the result's w is cosine of the full rotation angle
|
||||
// scaled by quaternion magnitude and vector w
|
||||
{ qvmuld, half, one, { 1, 1, 1, -0.5 } },
|
||||
{ qvmuld, half, {2,2,2,2}, { 2, 2, 2, -1 } },
|
||||
{ qvmuld, qtest, right, {0.5392, 0.6144, -0.576, 0} },
|
||||
{ qvmuld, qtest, forward, {0.6144, 0.1808, 0.768, 0},
|
||||
{0, -2.7e-17, 0, 0} },
|
||||
{ qvmuld, qtest, up, {0.576, -0.768, -0.28, 0} },
|
||||
T(qvmuld, half, one, { 1, 1, 1, -0.5 } ),
|
||||
T(qvmuld, half, {2,2,2,2}, { 2, 2, 2, -1 } ),
|
||||
T(qvmuld, qtest, right, {0.5392, 0.6144, -0.576, 0} ),
|
||||
T(qvmuld, qtest, forward, {0.6144, 0.1808, 0.768, 0},
|
||||
{0, -2.7e-17, 0, 0} ),
|
||||
T(qvmuld, qtest, up, {0.576, -0.768, -0.28, 0} ),
|
||||
// inverse
|
||||
{ vqmuld, one, half, { 1, 1, 1, -0.5 } },
|
||||
{ vqmuld, {2,2,2,2}, half, { 2, 2, 2, -1 } },
|
||||
{ vqmuld, right, qtest, {0.5392, 0.6144, 0.576, 0} },
|
||||
{ vqmuld, forward, qtest, {0.6144, 0.1808, -0.768, 0},
|
||||
{0, -2.7e-17, 0, 0} },
|
||||
{ vqmuld, up, qtest, {-0.576, 0.768, -0.28, 0} },
|
||||
T(vqmuld, one, half, { 1, 1, 1, -0.5 } ),
|
||||
T(vqmuld, {2,2,2,2}, half, { 2, 2, 2, -1 } ),
|
||||
T(vqmuld, right, qtest, {0.5392, 0.6144, 0.576, 0} ),
|
||||
T(vqmuld, forward, qtest, {0.6144, 0.1808, -0.768, 0},
|
||||
{0, -2.7e-17, 0, 0} ),
|
||||
T(vqmuld, up, qtest, {-0.576, 0.768, -0.28, 0} ),
|
||||
|
||||
{ qrotd, right, right, qident },
|
||||
{ qrotd, right, forward, { 0, 0, s05, s05 },
|
||||
{0, 0, -1.1e-16, 0} },
|
||||
{ qrotd, right, up, { 0, -s05, 0, s05 },
|
||||
{0, 1.1e-16, 0, 0} },
|
||||
{ qrotd, forward, right, { 0, 0, -s05, s05 },
|
||||
{0, 0, 1.1e-16, 0} },
|
||||
{ qrotd, forward, forward, qident },
|
||||
{ qrotd, forward, up, { s05, 0, 0, s05 },
|
||||
{-1.1e-16, 0, 0, 0} },
|
||||
{ qrotd, up, right, { 0, s05, 0, s05 },
|
||||
{0, -1.1e-16, 0, 0} },
|
||||
{ qrotd, up, forward, { -s05, 0, 0, s05 },
|
||||
{ 1.1e-16, 0, 0, 0} },
|
||||
{ qrotd, up, up, qident },
|
||||
T(qrotd, right, right, qident ),
|
||||
T(qrotd, right, forward, { 0, 0, s05, s05 },
|
||||
{0, 0, -1.1e-16, 0} ),
|
||||
T(qrotd, right, up, { 0, -s05, 0, s05 },
|
||||
{0, 1.1e-16, 0, 0} ),
|
||||
T(qrotd, forward, right, { 0, 0, -s05, s05 },
|
||||
{0, 0, 1.1e-16, 0} ),
|
||||
T(qrotd, forward, forward, qident ),
|
||||
T(qrotd, forward, up, { s05, 0, 0, s05 },
|
||||
{-1.1e-16, 0, 0, 0} ),
|
||||
T(qrotd, up, right, { 0, s05, 0, s05 },
|
||||
{0, -1.1e-16, 0, 0} ),
|
||||
T(qrotd, up, forward, { -s05, 0, 0, s05 },
|
||||
{ 1.1e-16, 0, 0, 0} ),
|
||||
T(qrotd, up, up, qident ),
|
||||
|
||||
{ tvtruncd, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -1, -2 } },
|
||||
{ tvceild, { 1.1, 2.9, -1.1, -2.9 }, {}, { 2, 3, -1, -2 } },
|
||||
{ tvfloord, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -2, -3 } },
|
||||
{ tqconjd, one, {}, { -1, -1, -1, 1 } },
|
||||
T(tvtruncd, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -1, -2 } ),
|
||||
T(tvceild, { 1.1, 2.9, -1.1, -2.9 }, {}, { 2, 3, -1, -2 } ),
|
||||
T(tvfloord, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -2, -3 } ),
|
||||
T(tqconjd, one, {}, { -1, -1, -1, 1 } ),
|
||||
};
|
||||
#define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0])))
|
||||
#endif
|
||||
|
||||
static vec4f_test_t vec4f_tests[] = {
|
||||
// 3D dot products
|
||||
{ dotf, right, right, one },
|
||||
{ dotf, right, forward, zero },
|
||||
{ dotf, right, up, zero },
|
||||
{ dotf, forward, right, zero },
|
||||
{ dotf, forward, forward, one },
|
||||
{ dotf, forward, up, zero },
|
||||
{ dotf, up, right, zero },
|
||||
{ dotf, up, forward, zero },
|
||||
{ dotf, up, up, one },
|
||||
T(dotf, right, right, one ),
|
||||
T(dotf, right, forward, zero ),
|
||||
T(dotf, right, up, zero ),
|
||||
T(dotf, forward, right, zero ),
|
||||
T(dotf, forward, forward, one ),
|
||||
T(dotf, forward, up, zero ),
|
||||
T(dotf, up, right, zero ),
|
||||
T(dotf, up, forward, zero ),
|
||||
T(dotf, up, up, one ),
|
||||
|
||||
// one is 4D, so its self dot product is 4
|
||||
{ dotf, one, one, { 4, 4, 4, 4} },
|
||||
{ dotf, one, none, {-4, -4, -4, -4} },
|
||||
T(dotf, one, one, { 4, 4, 4, 4} ),
|
||||
T(dotf, one, none, {-4, -4, -4, -4} ),
|
||||
|
||||
// 3D cross products
|
||||
{ crossf, right, right, zero },
|
||||
{ crossf, right, forward, up },
|
||||
{ crossf, right, up, nforward },
|
||||
{ crossf, forward, right, nup },
|
||||
{ crossf, forward, forward, zero },
|
||||
{ crossf, forward, up, right },
|
||||
{ crossf, up, right, forward },
|
||||
{ crossf, up, forward, nright },
|
||||
{ crossf, up, up, zero },
|
||||
T(crossf, right, right, zero ),
|
||||
T(crossf, right, forward, up ),
|
||||
T(crossf, right, up, nforward ),
|
||||
T(crossf, forward, right, nup ),
|
||||
T(crossf, forward, forward, zero ),
|
||||
T(crossf, forward, up, right ),
|
||||
T(crossf, up, right, forward ),
|
||||
T(crossf, up, forward, nright ),
|
||||
T(crossf, up, up, zero ),
|
||||
// double whammy tests: cross product with an angled vector and
|
||||
// ensuring that a 4d vector (non-zero w component) does not affect
|
||||
// the result, including the result's w component remaining zero.
|
||||
{ crossf, right, one, { 0, -1, 1} },
|
||||
{ crossf, forward, one, { 1, 0, -1} },
|
||||
{ crossf, up, one, {-1, 1, 0} },
|
||||
{ crossf, one, right, { 0, 1, -1} },
|
||||
{ crossf, one, forward, {-1, 0, 1} },
|
||||
{ crossf, one, up, { 1, -1, 0} },
|
||||
{ crossf, qtest, qtest, {0, 0, 0, 0} },
|
||||
T(crossf, right, one, { 0, -1, 1} ),
|
||||
T(crossf, forward, one, { 1, 0, -1} ),
|
||||
T(crossf, up, one, {-1, 1, 0} ),
|
||||
T(crossf, one, right, { 0, 1, -1} ),
|
||||
T(crossf, one, forward, {-1, 0, 1} ),
|
||||
T(crossf, one, up, { 1, -1, 0} ),
|
||||
T(crossf, qtest, qtest, {0, 0, 0, 0} ),
|
||||
|
||||
{ qmulf, qident, qident, qident },
|
||||
{ qmulf, qident, right, right },
|
||||
{ qmulf, qident, forward, forward },
|
||||
{ qmulf, qident, up, up },
|
||||
{ qmulf, right, qident, right },
|
||||
{ qmulf, forward, qident, forward },
|
||||
{ qmulf, up, qident, up },
|
||||
{ qmulf, right, right, nqident },
|
||||
{ qmulf, right, forward, up },
|
||||
{ qmulf, right, up, nforward },
|
||||
{ qmulf, forward, right, nup },
|
||||
{ qmulf, forward, forward, nqident },
|
||||
{ qmulf, forward, up, right },
|
||||
{ qmulf, up, right, forward },
|
||||
{ qmulf, up, forward, nright },
|
||||
{ qmulf, up, up, nqident },
|
||||
{ qmulf, one, one, { 2, 2, 2, -2 } },
|
||||
{ qmulf, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } },
|
||||
{ qmulf, qtest, qtest, {0.768, 0.576, 0, -0.28},
|
||||
{0, 6e-8, 0, 3e-8} },
|
||||
T(qmulf, qident, qident, qident ),
|
||||
T(qmulf, qident, right, right ),
|
||||
T(qmulf, qident, forward, forward ),
|
||||
T(qmulf, qident, up, up ),
|
||||
T(qmulf, right, qident, right ),
|
||||
T(qmulf, forward, qident, forward ),
|
||||
T(qmulf, up, qident, up ),
|
||||
T(qmulf, right, right, nqident ),
|
||||
T(qmulf, right, forward, up ),
|
||||
T(qmulf, right, up, nforward ),
|
||||
T(qmulf, forward, right, nup ),
|
||||
T(qmulf, forward, forward, nqident ),
|
||||
T(qmulf, forward, up, right ),
|
||||
T(qmulf, up, right, forward ),
|
||||
T(qmulf, up, forward, nright ),
|
||||
T(qmulf, up, up, nqident ),
|
||||
T(qmulf, one, one, { 2, 2, 2, -2 } ),
|
||||
T(qmulf, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } ),
|
||||
T(qmulf, qtest, qtest, {0.768, 0.576, 0, -0.28},
|
||||
#ifndef __SSE__
|
||||
{0, 6e-8, 0, 6e-8}
|
||||
#else
|
||||
{0, 6e-8, 0, 3e-8}
|
||||
#endif
|
||||
),
|
||||
|
||||
// The one vector is not unit (magnitude 2), so using it as a rotation
|
||||
// quaternion results in scaling by 4. However, it still has the effect
|
||||
// of rotating 120 degrees around the axis equidistant from the three
|
||||
// orthogonal axes such that x->y->z->x
|
||||
{ qvmulf, one, right, { 0, 4, 0, 0 } },
|
||||
{ qvmulf, one, forward, { 0, 0, 4, 0 } },
|
||||
{ qvmulf, one, up, { 4, 0, 0, 0 } },
|
||||
{ qvmulf, one, {1,1,1,0}, { 4, 4, 4, 0 } },
|
||||
{ qvmulf, one, one, { 4, 4, 4, -2 } },
|
||||
T(qvmulf, one, right, { 0, 4, 0, 0 } ),
|
||||
T(qvmulf, one, forward, { 0, 0, 4, 0 } ),
|
||||
T(qvmulf, one, up, { 4, 0, 0, 0 } ),
|
||||
T(qvmulf, one, {1,1,1,0}, { 4, 4, 4, 0 } ),
|
||||
T(qvmulf, one, one, { 4, 4, 4, -2 } ),
|
||||
// inverse rotation, so x->z->y->x
|
||||
{ vqmulf, right, one, { 0, 0, 4, 0 } },
|
||||
{ vqmulf, forward, one, { 4, 0, 0, 0 } },
|
||||
{ vqmulf, up, one, { 0, 4, 0, 0 } },
|
||||
{ vqmulf, {1,1,1,0}, one, { 4, 4, 4, 0 } },
|
||||
{ vqmulf, one, one, { 4, 4, 4, -2 } },
|
||||
T(vqmulf, right, one, { 0, 0, 4, 0 } ),
|
||||
T(vqmulf, forward, one, { 4, 0, 0, 0 } ),
|
||||
T(vqmulf, up, one, { 0, 4, 0, 0 } ),
|
||||
T(vqmulf, {1,1,1,0}, one, { 4, 4, 4, 0 } ),
|
||||
T(vqmulf, one, one, { 4, 4, 4, -2 } ),
|
||||
//
|
||||
{ qvmulf, qtest, right, {0.5392, 0.6144, -0.576, 0},
|
||||
{0, -5.9e-8, -6e-8, 0} },
|
||||
{ qvmulf, qtest, forward, {0.6144, 0.1808, 0.768, 0},
|
||||
{-5.9e-8, 1.5e-8, 0, 0} },
|
||||
{ qvmulf, qtest, up, {0.576, -0.768, -0.28, 0},
|
||||
{6e-8, 0, 3e-8, 0} },
|
||||
{ vqmulf, right, qtest, {0.5392, 0.6144, 0.576, 0},
|
||||
{0, -5.9e-8, 5.9e-8, 0} },
|
||||
{ vqmulf, forward, qtest, {0.6144, 0.1808, -0.768, 0},
|
||||
{-5.9e-8, 1.5e-8, 0, 0} },
|
||||
{ vqmulf, up, qtest, {-0.576, 0.768, -0.28, 0},
|
||||
{-5.9e-8, 0, 3e-8, 0} },
|
||||
T(qvmulf, qtest, right, {0.5392, 0.6144, -0.576, 0},
|
||||
{0, -5.9e-8, -6e-8, 0} ),
|
||||
T(qvmulf, qtest, forward, {0.6144, 0.1808, 0.768, 0},
|
||||
#ifndef __SSE__
|
||||
{-5.9e-8, 3e-8, 0, 0}
|
||||
#else
|
||||
{-5.9e-8, 1.5e-8, 0, 0}
|
||||
#endif
|
||||
),
|
||||
T(qvmulf, qtest, up, {0.576, -0.768, -0.28, 0},
|
||||
#ifndef __SSE__
|
||||
{6e-8, 0, 6e-8, 0}
|
||||
#else
|
||||
{6e-8, 0, 3e-8, 0}
|
||||
#endif
|
||||
),
|
||||
T(vqmulf, right, qtest, {0.5392, 0.6144, 0.576, 0},
|
||||
{0, -5.9e-8, 5.9e-8, 0} ),
|
||||
T(vqmulf, forward, qtest, {0.6144, 0.1808, -0.768, 0},
|
||||
#ifndef __SSE__
|
||||
{-5.9e-8, 3e-8, 0, 0}
|
||||
#else
|
||||
{-5.9e-8, 1.5e-8, 0, 0}
|
||||
#endif
|
||||
),
|
||||
T(vqmulf, up, qtest, {-0.576, 0.768, -0.28, 0},
|
||||
#ifndef __SSE__
|
||||
{-5.9e-8, 0, 6e-8, 0}
|
||||
#else
|
||||
{-5.9e-8, 0, 3e-8, 0}
|
||||
#endif
|
||||
),
|
||||
|
||||
{ qrotf, right, right, qident },
|
||||
{ qrotf, right, forward, { 0, 0, s05, s05 } },
|
||||
{ qrotf, right, up, { 0, -s05, 0, s05 } },
|
||||
{ qrotf, forward, right, { 0, 0, -s05, s05 } },
|
||||
{ qrotf, forward, forward, qident },
|
||||
{ qrotf, forward, up, { s05, 0, 0, s05 } },
|
||||
{ qrotf, up, right, { 0, s05, 0, s05 } },
|
||||
{ qrotf, up, forward, { -s05, 0, 0, s05 } },
|
||||
{ qrotf, up, up, qident },
|
||||
T(qrotf, right, right, qident ),
|
||||
T(qrotf, right, forward, { 0, 0, s05, s05 } ),
|
||||
T(qrotf, right, up, { 0, -s05, 0, s05 } ),
|
||||
T(qrotf, forward, right, { 0, 0, -s05, s05 } ),
|
||||
T(qrotf, forward, forward, qident ),
|
||||
T(qrotf, forward, up, { s05, 0, 0, s05 } ),
|
||||
T(qrotf, up, right, { 0, s05, 0, s05 } ),
|
||||
T(qrotf, up, forward, { -s05, 0, 0, s05 } ),
|
||||
T(qrotf, up, up, qident ),
|
||||
|
||||
{ tvabsf, pmpi, {}, pi },
|
||||
{ tvsqrtf, { 1, 4, 9, 16}, {}, {1, 2, 3, 4} },
|
||||
{ tvtruncf, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -1, -2 } },
|
||||
{ tvceilf, { 1.1, 2.9, -1.1, -2.9 }, {}, { 2, 3, -1, -2 } },
|
||||
{ tvfloorf, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -2, -3 } },
|
||||
{ tqconjf, one, {}, { -1, -1, -1, 1 } },
|
||||
{ tmagnitudef, { 3, 4, 12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { 3, 4, 12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { 3, 4, -12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { 3, 4, -12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { 3, -4, 12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { 3, -4, 12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { 3, -4, -12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { 3, -4, -12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, 4, 12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, 4, 12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, 4, -12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, 4, -12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, -4, 12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, -4, 12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, -4, -12, 84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitudef, { -3, -4, -12, -84}, {}, {85, 85, 85, 85} },
|
||||
{ tmagnitude3f, { -3, -4, -12, -84}, {}, {13, 13, 13, 13} },
|
||||
T(tvabsf, pmpi, {}, pi ),
|
||||
T(tvsqrtf, { 1, 4, 9, 16}, {}, {1, 2, 3, 4} ),
|
||||
T(tvtruncf, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -1, -2 } ),
|
||||
T(tvceilf, { 1.1, 2.9, -1.1, -2.9 }, {}, { 2, 3, -1, -2 } ),
|
||||
T(tvfloorf, { 1.1, 2.9, -1.1, -2.9 }, {}, { 1, 2, -2, -3 } ),
|
||||
T(tqconjf, one, {}, { -1, -1, -1, 1 } ),
|
||||
T(tmagnitudef, { 3, 4, 12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { 3, 4, 12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { 3, 4, -12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { 3, 4, -12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { 3, -4, 12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { 3, -4, 12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { 3, -4, -12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { 3, -4, -12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, 4, 12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, 4, 12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, 4, -12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, 4, -12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, -4, 12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, -4, 12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, -4, -12, 84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitudef, { -3, -4, -12, -84}, {}, {85, 85, 85, 85} ),
|
||||
T(tmagnitude3f, { -3, -4, -12, -84}, {}, {13, 13, 13, 13} ),
|
||||
};
|
||||
#define num_vec4f_tests (sizeof (vec4f_tests) / (sizeof (vec4f_tests[0])))
|
||||
|
||||
static mat4f_test_t mat4f_tests[] = {
|
||||
{ mmulf, identity, identity, identity },
|
||||
{ mmulf, rotate120, identity, rotate120 },
|
||||
{ mmulf, identity, rotate120, rotate120 },
|
||||
{ mmulf, rotate120, rotate120, rotate240 },
|
||||
{ mmulf, rotate120, rotate240, identity },
|
||||
{ mmulf, rotate240, rotate120, identity },
|
||||
T(mmulf, identity, identity, identity ),
|
||||
T(mmulf, rotate120, identity, rotate120 ),
|
||||
T(mmulf, identity, rotate120, rotate120 ),
|
||||
T(mmulf, rotate120, rotate120, rotate240 ),
|
||||
T(mmulf, rotate120, rotate240, identity ),
|
||||
T(mmulf, rotate240, rotate120, identity ),
|
||||
};
|
||||
#define num_mat4f_tests (sizeof (mat4f_tests) / (sizeof (mat4f_tests[0])))
|
||||
|
||||
static mv4f_test_t mv4f_tests[] = {
|
||||
{ mvmulf, identity, { 1, 0, 0, 0 }, { 1, 0, 0, 0 } },
|
||||
{ mvmulf, identity, { 0, 1, 0, 0 }, { 0, 1, 0, 0 } },
|
||||
{ mvmulf, identity, { 0, 0, 1, 0 }, { 0, 0, 1, 0 } },
|
||||
{ mvmulf, identity, { 0, 0, 0, 1 }, { 0, 0, 0, 1 } },
|
||||
{ mvmulf, rotate120, { 1, 2, 3, 4 }, { 3, 1, 2, 4 } },
|
||||
{ mvmulf, rotate240, { 1, 2, 3, 4 }, { 2, 3, 1, 4 } },
|
||||
T(mvmulf, identity, { 1, 0, 0, 0 }, { 1, 0, 0, 0 } ),
|
||||
T(mvmulf, identity, { 0, 1, 0, 0 }, { 0, 1, 0, 0 } ),
|
||||
T(mvmulf, identity, { 0, 0, 1, 0 }, { 0, 0, 1, 0 } ),
|
||||
T(mvmulf, identity, { 0, 0, 0, 1 }, { 0, 0, 0, 1 } ),
|
||||
T(mvmulf, rotate120, { 1, 2, 3, 4 }, { 3, 1, 2, 4 } ),
|
||||
T(mvmulf, rotate240, { 1, 2, 3, 4 }, { 2, 3, 1, 4 } ),
|
||||
};
|
||||
#define num_mv4f_tests (sizeof (mv4f_tests) / (sizeof (mv4f_tests[0])))
|
||||
|
||||
// expect filled in using non-simd QuatToMatrix (has its own tests)
|
||||
static mq4f_test_t mq4f_tests[] = {
|
||||
{ mat4fquat, { 0, 0, 0, 1 } },
|
||||
{ mat4fquat, { 0.5, 0.5, 0.5, 0.5 } },
|
||||
{ mat4fquat, { 0.5, 0.5, -0.5, 0.5 } },
|
||||
{ mat4fquat, { 0.5, -0.5, 0.5, 0.5 } },
|
||||
{ mat4fquat, { 0.5, -0.5, -0.5, 0.5 } },
|
||||
{ mat4fquat, { -0.5, 0.5, 0.5, 0.5 } },
|
||||
{ mat4fquat, { -0.5, 0.5, -0.5, 0.5 } },
|
||||
{ mat4fquat, { -0.5, -0.5, 0.5, 0.5 } },
|
||||
{ mat4fquat, { -0.5, -0.5, -0.5, 0.5 } },
|
||||
T(mat4fquat, { 0, 0, 0, 1 } ),
|
||||
T(mat4fquat, { 0.5, 0.5, 0.5, 0.5 } ),
|
||||
T(mat4fquat, { 0.5, 0.5, -0.5, 0.5 } ),
|
||||
T(mat4fquat, { 0.5, -0.5, 0.5, 0.5 } ),
|
||||
T(mat4fquat, { 0.5, -0.5, -0.5, 0.5 } ),
|
||||
T(mat4fquat, { -0.5, 0.5, 0.5, 0.5 } ),
|
||||
T(mat4fquat, { -0.5, 0.5, -0.5, 0.5 } ),
|
||||
T(mat4fquat, { -0.5, -0.5, 0.5, 0.5 } ),
|
||||
T(mat4fquat, { -0.5, -0.5, -0.5, 0.5 } ),
|
||||
};
|
||||
#define num_mq4f_tests (sizeof (mq4f_tests) / (sizeof (mq4f_tests[0])))
|
||||
|
||||
#ifdef __AVX__
|
||||
static int
|
||||
run_vec4d_tests (void)
|
||||
{
|
||||
|
@ -443,7 +482,7 @@ run_vec4d_tests (void)
|
|||
vec4l_t res = result != expect;
|
||||
if (res[0] || res[1] || res[2] || res[3]) {
|
||||
ret |= 1;
|
||||
printf ("\nrun_vec4d_tests %zd\n", i);
|
||||
printf ("\nrun_vec4d_tests %zd, line %d\n", i, test->line);
|
||||
printf ("a: " VEC4D_FMT "\n", VEC4_EXP(test->a));
|
||||
printf ("b: " VEC4D_FMT "\n", VEC4_EXP(test->b));
|
||||
printf ("r: " VEC4D_FMT "\n", VEC4_EXP(result));
|
||||
|
@ -455,6 +494,7 @@ run_vec4d_tests (void)
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
run_vec4f_tests (void)
|
||||
|
@ -465,10 +505,10 @@ run_vec4f_tests (void)
|
|||
__auto_type test = &vec4f_tests[i];
|
||||
vec4f_t result = test->op (test->a, test->b);
|
||||
vec4f_t expect = test->expect + test->ulp_errors;
|
||||
vec4i_t res = result != expect;
|
||||
vec4i_t res = (vec4i_t) result != (vec4i_t) expect;
|
||||
if (res[0] || res[1] || res[2] || res[3]) {
|
||||
ret |= 1;
|
||||
printf ("\nrun_vec4f_tests %zd\n", i);
|
||||
printf ("\nrun_vec4f_tests %zd, line %d\n", i, test->line);
|
||||
printf ("a: " VEC4F_FMT "\n", VEC4_EXP(test->a));
|
||||
printf ("b: " VEC4F_FMT "\n", VEC4_EXP(test->b));
|
||||
printf ("r: " VEC4F_FMT "\n", VEC4_EXP(result));
|
||||
|
@ -502,7 +542,7 @@ run_mat4f_tests (void)
|
|||
}
|
||||
if (fail) {
|
||||
ret |= 1;
|
||||
printf ("\nrun_mat4f_tests %zd\n", i);
|
||||
printf ("\nrun_mat4f_tests %zd, line %d\n", i, test->line);
|
||||
printf ("a: " VEC4F_FMT "\n", MAT4_ROW(test->a, 0));
|
||||
printf (" " VEC4F_FMT "\n", MAT4_ROW(test->a, 1));
|
||||
printf (" " VEC4F_FMT "\n", MAT4_ROW(test->a, 2));
|
||||
|
@ -549,7 +589,7 @@ run_mv4f_tests (void)
|
|||
|
||||
if (res[0] || res[1] || res[2] || res[3]) {
|
||||
ret |= 1;
|
||||
printf ("\nrun_mv4f_tests %zd\n", i);
|
||||
printf ("\nrun_mv4f_tests %zd, line %d\n", i, test->line);
|
||||
printf ("a: " VEC4F_FMT "\n", MAT4_ROW(test->a, 0));
|
||||
printf (" " VEC4F_FMT "\n", MAT4_ROW(test->a, 1));
|
||||
printf (" " VEC4F_FMT "\n", MAT4_ROW(test->a, 2));
|
||||
|
@ -595,7 +635,7 @@ run_mq4f_tests (void)
|
|||
}
|
||||
if (fail) {
|
||||
ret |= 1;
|
||||
printf ("\nrun_mq4f_tests %zd\n", i);
|
||||
printf ("\nrun_mq4f_tests %zd, line %d\n", i, test->line);
|
||||
printf ("q: " VEC4F_FMT "\n", VEC4_EXP(test->q));
|
||||
printf ("r: " VEC4F_FMT "\n", MAT4_ROW(result, 0));
|
||||
printf (" " VEC4F_FMT "\n", MAT4_ROW(result, 1));
|
||||
|
@ -626,7 +666,9 @@ int
|
|||
main (void)
|
||||
{
|
||||
int ret = 0;
|
||||
#ifdef __AVX__
|
||||
ret |= run_vec4d_tests ();
|
||||
#endif
|
||||
ret |= run_vec4f_tests ();
|
||||
ret |= run_mat4f_tests ();
|
||||
ret |= run_mv4f_tests ();
|
||||
|
|
|
@ -86,6 +86,7 @@ SimpleFlood (basethread_t *thread, portal_t *srcportal, int clusternum)
|
|||
static inline int
|
||||
test_sphere (const vspheref_t *sphere, vec4f_t plane)
|
||||
{
|
||||
#ifdef __SSE3__
|
||||
const vec4f_t zero = {};
|
||||
float r = sphere->radius;
|
||||
vec4f_t eps = { r, r, r, r };
|
||||
|
@ -94,6 +95,12 @@ test_sphere (const vspheref_t *sphere, vec4f_t plane)
|
|||
|
||||
c = (vec4i_t) _mm_hsub_epi32 ((__m128i) c, (__m128i) c);
|
||||
return c[0];
|
||||
#else
|
||||
float d = DotProduct (sphere->center, plane) + plane[3];
|
||||
int front = (d >= sphere->radius);
|
||||
int back = (d <= -sphere->radius);
|
||||
return front - back;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -214,12 +214,20 @@ NewFlippedWinding (threaddata_t *thread, const winding_t *w)
|
|||
static vec4i_t
|
||||
signeps (vec4f_t dist)
|
||||
{
|
||||
#ifdef __SSE3__
|
||||
const vec4f_t zero = {};
|
||||
const vec4f_t eps = { ON_EPSILON, ON_EPSILON, ON_EPSILON, ON_EPSILON };
|
||||
vec4f_t d = _mm_addsub_ps (zero, dist);
|
||||
vec4i_t c = (d - eps) > 0;
|
||||
c = (vec4i_t) _mm_hsub_epi32 ((__m128i) c, (__m128i) c);
|
||||
return c;
|
||||
#else
|
||||
float d = dist[0];
|
||||
int front = (d >= ON_EPSILON);
|
||||
int back = (d <= -ON_EPSILON);
|
||||
int i = front - back;
|
||||
return (vec4i_t) { i, i, i, i };
|
||||
#endif
|
||||
}
|
||||
|
||||
static vec4f_t
|
||||
|
@ -246,7 +254,12 @@ split_edge (const vec4f_t *points, const vec4f_t *dists,
|
|||
vec4i_t x = _mm_and_ps (split, (__m128) nan) == onenan;
|
||||
// plane vector has -dist in w
|
||||
vec4f_t y = _mm_and_ps (split, (__m128) x) * -split[3];
|
||||
#ifdef __SSE3__
|
||||
mid = _mm_blendv_ps (mid, y, (__m128) x);
|
||||
#else
|
||||
mid = (vec4f_t) ((vec4i_t) _mm_and_ps (y, (__m128) x) |
|
||||
(vec4i_t) _mm_and_ps (mid, (__m128) ~x));
|
||||
#endif
|
||||
if (isnan (mid[0])) *(int *) 0 = 0;
|
||||
return mid;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue