[util] Add basic SIMD implemented vector functions

They take advantage of gcc's vector_size attribute and so only cross,
dot, qmul, qvmul and qrot (create rotation quaternion from two vectors)
are needed at this stage as basic (per-component) math is supported
natively by gcc.

The provided functions work on horizontal (array-of-structs) data, ie a
vec4d_t or vec4f_t represents a single vector, or traditional vector
layout. Vertical layout (struct-of-arrays) does not need any special
functions as the regular math can be used to operate on four vectors at
a time.

Functions are provided for loading a vec4 from a vec3 (4th element set
to 0) and storing a vec4 into a vec3 (discarding the 4th element).

With this, QF will require AVX2 support (needed for vec4d_t). Without
support for doubles, SSE is possible, but may not be worthwhile for
horizontal data.

Fused-multiply-add is NOT used because it alters the results between
unoptimized and optimized code, resulting in -mfma really meaning
-mfast-math-anyway. I really do not want to have to debug issues that
occur only in optimized code.
This commit is contained in:
Bill Currie 2020-12-28 12:29:04 +09:00
parent d824db68a5
commit 09a10f80e1
6 changed files with 699 additions and 0 deletions

View file

@ -82,6 +82,11 @@ AC_ARG_ENABLE(optimize,
optimize=yes
)
QF_CC_OPTION(-mavx2)
dnl fma is not used as it is the equivalent of turning on
dnl -funsafe-math-optimizations
dnl QF_CC_OPTION(-mfma)
AC_MSG_CHECKING(for optimization)
if test "x$optimize" = xyes -a "x$leave_cflags_alone" != "xyes"; then
AC_MSG_RESULT(yes)

85
include/QF/simd/types.h Normal file
View file

@ -0,0 +1,85 @@
/*
QF/simd/types.h
Type definitions for QF SIMD values
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifndef __QF_simd_types_h
#define __QF_simd_types_h
#define VEC_TYPE(t,n) typedef t n __attribute__ ((vector_size (4*sizeof (t))))
/** Three element vector type for interfacing with compact data.
*
* This cannot be used directly by SIMD code and must be loaded and stored
* using load_vec3d() and store_vec3d() respectively.
*/
typedef double vec3d_t[3];
#ifdef __AVX__
/** Four element vector type for horizontal (AOS) vector data.
*
* This is used for both vectors (3D and 4D) and quaternions. 3D vectors
* are simply vec4d_t values with 0 in the 4th element.
*
* Also usable with vertical (SOA) code, in which case each vec4d_t represents
* a single component from four vectors, or a single row/column (depending on
* context) of an Nx4 or 4xN matrix.
*/
VEC_TYPE (double, vec4d_t);
/** Used mostly for __builtin_shuffle.
*/
VEC_TYPE (long, vec4l_t);
#endif
/** Three element vector type for interfacing with compact data.
*
* This cannot be used directly by SIMD code and must be loaded and stored
* using load_vec3f() and store_vec3f() respectively.
*/
typedef float vec3f_t[3];
/** Four element vector type for horizontal (AOS) vector data.
*
* This is used for both vectors (3D and 4D) and quaternions. 3D vectors
* are simply vec4f_t values with 0 in the 4th element.
*
* Also usable with vertical (SOA) code, in which case each vec4f_t represents
* a single component from four vectors, or a single row/column (depending on
* context) of an Nx4 or 4xN matrix.
*/
VEC_TYPE (float, vec4f_t);
/** Used mostly for __builtin_shuffle.
*/
VEC_TYPE (int, vec4i_t);
#define VEC4D_FMT "[%.17g, %.17g, %.17g, %.17g]"
#define VEC4L_FMT "[%ld, %ld, %ld, %ld]"
#define VEC4F_FMT "[%.9g, %.9g, %.9g, %.9g]"
#define VEC4I_FMT "[%d, %d, %d, %d]"
#define VEC4_EXP(v) (v)[0], (v)[1], (v)[2], (v)[3]
#endif//__QF_simd_types_h

156
include/QF/simd/vec4d.h Normal file
View file

@ -0,0 +1,156 @@
/*
QF/simd/vec4d.h
Vector functions for vec4d_t (ie, double precision)
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifndef __QF_simd_vec4d_h
#define __QF_simd_vec4d_h
#include <immintrin.h>
#include "QF/simd/types.h"
/** 3D vector cross product.
*
* The w (4th) component can be any value on input, and is guaranteed to be 0
* in the result. The result is not affected in any way by either vector's w
* componemnt
*/
vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const));
vec4d_t
crossd (vec4d_t a, vec4d_t b)
{
static const vec4l_t A = {1, 2, 0, 3};
vec4d_t c = a * __builtin_shuffle (b, A);
vec4d_t d = __builtin_shuffle (a, A) * b;
c = c - d;
return __builtin_shuffle(c, A);
}
/** 4D vector dot product.
*
* The w component *IS* significant, but if it is 0 in either vector, then
* the result will be as for a 3D dot product.
*
* Note that the dot product is in all 4 of the return value's elements. This
* helps optimize vector math as the scalar is already pre-spread. If just the
* scalar is required, use result[0].
*/
vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const));
vec4d_t
dotd (vec4d_t a, vec4d_t b)
{
vec4d_t c = a * b;
c = _mm256_hadd_pd (c, c);
static const vec4l_t A = {2, 3, 0, 1};
c += __builtin_shuffle(c, A);
return c;
}
/** Quaternion product.
*
* The vector is interpreted as a quaternion instead of a regular 4D vector.
* The quaternion may be of any magnitude, so this is more generally useful.
* than if the quaternion was required to be unit length.
*/
vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const));
vec4d_t
qmuld (vec4d_t a, vec4d_t b)
{
// results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation)
// doesn't seem to adversly affect precision
vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b;
vec4d_t d = dotd (a, b);
// zero out the vector component of dot product so only the scalar remains
d = _mm256_permute2f128_pd (d, d, 0x18);
d = _mm256_permute4x64_pd (d, 0xc0);
return c - d;
}
/** Optimized quaterion-vector multiplication for vector rotation.
*
* If the vector's w component is not zero, then the result's w component
* is the cosine of the full rotation angle scaled by the vector's w component.
* The quaternion is assumed to be unit. If the quaternion is not unit, the
* vector will be scaled by the square of the quaternion's magnitude.
*/
vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const));
vec4d_t
qvmuld (vec4d_t q, vec4d_t v)
{
double s = q[3];
// zero the scalar of the quaternion. Results in an extra operation, but
// avoids adding precision issues.
vec4d_t z = {};
q = _mm256_blend_pd (q, z, 0x08);
vec4d_t c = crossd (q, v);
vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant
vec4d_t qq = dotd (q, q);
return (s * s - qq) * v + 2 * (qv * q + s * c);
}
/** Create the quaternion representing the shortest rotation from a to b.
*
* Both a and b are assumed to be 3D vectors (w components 0), but a resonable
* (but incorrect) result will still be produced if either a or b is a 4D
* vector. The rotation axis will be the same as if both vectors were 3D, but
* the magnitude of the rotation will be different.
*/
vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const));
vec4d_t
qrotd (vec4d_t a, vec4d_t b)
{
vec4d_t ma = _mm256_sqrt_pd (dotd (a, a));
vec4d_t mb = _mm256_sqrt_pd (dotd (b, b));
vec4d_t den = 2 * ma * mb;
vec4d_t t = mb * a + ma * b;
vec4d_t mba_mab = _mm256_sqrt_pd (dotd (t, t));
vec4d_t q = crossd (a, b) / mba_mab;
q[3] = (mba_mab / den)[0];
return q;
}
vec4d_t loadvec3d (const double v3[]) __attribute__((pure, access(read_only, 1)));
vec4d_t
loadvec3d (const double v3[])
{
vec4d_t v4 = {};
v4[0] = v3[0];
v4[1] = v3[1];
v4[2] = v3[2];
return v4;
}
void storevec3d (double v3[3], vec4d_t v4) __attribute__((access (write_only, 1)));
void storevec3d (double v3[3], vec4d_t v4)
{
v3[0] = v4[0];
v3[1] = v4[1];
v3[2] = v4[2];
}
#endif//__QF_simd_vec4d_h

159
include/QF/simd/vec4f.h Normal file
View file

@ -0,0 +1,159 @@
/*
QF/simd/vec4f.h
Vector functions for vec4f_t (ie, float precision)
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifndef __QF_simd_vec4f_h
#define __QF_simd_vec4f_h
#include <immintrin.h>
#include "QF/simd/types.h"
/** 3D vector cross product.
*
* The w (4th) component can be any value on input, and is guaranteed to be 0
* in the result. The result is not affected in any way by either vector's w
* componemnt
*/
vec4f_t crossf (vec4f_t a, vec4f_t b) __attribute__((const));
vec4f_t
crossf (vec4f_t a, vec4f_t b)
{
static const vec4i_t A = {1, 2, 0, 3};
vec4f_t c = a * __builtin_shuffle (b, A) - __builtin_shuffle (a, A) * b;
return __builtin_shuffle(c, A);
}
/** 4D vector dot product.
*
* The w component *IS* significant, but if it is 0 in either vector, then
* the result will be as for a 3D dot product.
*
* Note that the dot product is in all 4 of the return value's elements. This
* helps optimize vector math as the scalar is already pre-spread. If just the
* scalar is required, use result[0].
*/
vec4f_t dotf (vec4f_t a, vec4f_t b) __attribute__((const));
vec4f_t
dotf (vec4f_t a, vec4f_t b)
{
vec4f_t c = a * b;
c = _mm_hadd_ps (c, c);
c = _mm_hadd_ps (c, c);
return c;
}
/** Quaternion product.
*
* The vector is interpreted as a quaternion instead of a regular 4D vector.
* The quaternion may be of any magnitude, so this is more generally useful.
* than if the quaternion was required to be unit length.
*/
vec4f_t qmulf (vec4f_t a, vec4f_t b) __attribute__((const));
vec4f_t
qmulf (vec4f_t a, vec4f_t b)
{
// results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation)
// doesn't seem to adversly affect precision
vec4f_t c = crossf (a, b) + a * b[3] + a[3] * b;
vec4f_t d = dotf (a, b);
// zero out the vector component of dot product so only the scalar remains
d = _mm_insert_ps (d, d, 0xf7);
return c - d;
}
/** Optimized quaterion-vector multiplication for vector rotation.
*
* If the vector's w component is not zero, then the result's w component
* is the cosine of the full rotation angle scaled by the vector's w component.
* The quaternion is assumed to be unit.
*/
vec4f_t qvmulf (vec4f_t q, vec4f_t v) __attribute__((const));
vec4f_t
qvmulf (vec4f_t q, vec4f_t v)
{
float s = q[3];
// zero the scalar of the quaternion. Results in an extra operation, but
// avoids adding precision issues.
q = _mm_insert_ps (q, q, 0xf8);
vec4f_t c = crossf (q, v); // q.w is 0 so v.w is irrelevant
vec4f_t qv = dotf (q, v);
vec4f_t qq = dotf (q, q);
return (s * s - qq) * v + 2 * (qv * q + s * c);
}
/** Create the quaternion representing the shortest rotation from a to b.
*
* Both a and b are assumed to be 3D vectors (w components 0), but a resonable
* (but incorrect) result will still be produced if either a or b is a 4D
* vector. The rotation axis will be the same as if both vectors were 3D, but
* the magnitude of the rotation will be different.
*/
vec4f_t qrotf (vec4f_t a, vec4f_t b) __attribute__((const));
vec4f_t
qrotf (vec4f_t a, vec4f_t b)
{
vec4f_t ma = _mm_sqrt_ps (dotf (a, a));
vec4f_t mb = _mm_sqrt_ps (dotf (b, b));
vec4f_t den = 2 * ma * mb;
vec4f_t t = mb * a + ma * b;
vec4f_t mba_mab = _mm_sqrt_ps (dotf (t, t));
vec4f_t q = crossf (a, b) / mba_mab;
q[3] = (mba_mab / den)[0];
return q;
}
vec4f_t loadvec3f (const float v3[3]) __attribute__((pure, access(read_only, 1)));
vec4f_t
loadvec3f (const float v3[3])
{
vec4f_t v4;
// this had to be in asm otherwise gcc thinks v4 is only partially
// initialized, and gcc 10 does not use the zero flags when generating
// the code, resulting in a memory access to load a 0 into v4[3]
//
// The first instruction zeros v4[3] while loading v4[0]
asm ("\n\
vinsertps $0x08, %1, %0, %0 \n\
vinsertps $0x10, %2, %0, %0 \n\
vinsertps $0x20, %3, %0, %0 \n\
"
: "=v"(v4)
: "m"(v3[0]), "m"(v3[1]), "m"(v3[2]));
return v4;
}
void storevec3f (float v3[3], vec4f_t v4) __attribute__((access (write_only, 1)));
void storevec3f (float v3[3], vec4f_t v4)
{
v3[0] = v4[0];
v3[1] = v4[1];
v3[2] = v4[2];
}
#endif//__QF_simd_vec4f_h

View file

@ -14,6 +14,7 @@ libs_util_tests = \
libs/util/test/test-seb \
libs/util/test/test-seg \
libs/util/test/test-set \
libs/util/test/test-simd \
libs/util/test/test-txtbuffer \
libs/util/test/test-vrect
@ -81,6 +82,10 @@ libs_util_test_test_set_SOURCES=libs/util/test/test-set.c
libs_util_test_test_set_LDADD=libs/util/libQFutil.la
libs_util_test_test_set_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_simd_SOURCES=libs/util/test/test-simd.c
libs_util_test_test_simd_LDADD=libs/util/libQFutil.la
libs_util_test_test_simd_DEPENDENCIES=libs/util/libQFutil.la
libs_util_test_test_txtbuffer_SOURCES=libs/util/test/test-txtbuffer.c
libs_util_test_test_txtbuffer_LDADD=libs/util/libQFutil.la
libs_util_test_test_txtbuffer_DEPENDENCIES=libs/util/libQFutil.la

289
libs/util/test/test-simd.c Normal file
View file

@ -0,0 +1,289 @@
#include <stdio.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "QF/simd/vec4d.h"
#include "QF/simd/vec4f.h"
#define right { 1, 0, 0 }
#define forward { 0, 1, 0 }
#define up { 0, 0, 1 }
#define one { 1, 1, 1, 1 }
#define half { 0.5, 0.5, 0.5, 0.5 }
#define zero { 0, 0, 0, 0 }
#define qident { 0, 0, 0, 1 }
#define qtest { 0.64, 0.48, 0, 0.6 }
#define nright { -1, 0, 0 }
#define nforward { 0, -1, 0 }
#define nup { 0, 0, -1 }
#define none { -1, -1, -1, -1 }
#define nqident { 0, 0, 0, -1 }
#define s05 0.70710678118654757
typedef struct {
vec4d_t (*op) (vec4d_t a, vec4d_t b);
vec4d_t a;
vec4d_t b;
vec4d_t expect;
vec4d_t ulp_errors;
} vec4d_test_t;
typedef struct {
vec4f_t (*op) (vec4f_t a, vec4f_t b);
vec4f_t a;
vec4f_t b;
vec4f_t expect;
vec4f_t ulp_errors;
} vec4f_test_t;
static vec4d_test_t vec4d_tests[] = {
// 3D dot products
{ dotd, right, right, one },
{ dotd, right, forward, zero },
{ dotd, right, up, zero },
{ dotd, forward, right, zero },
{ dotd, forward, forward, one },
{ dotd, forward, up, zero },
{ dotd, up, right, zero },
{ dotd, up, forward, zero },
{ dotd, up, up, one },
// one is 4D, so its self dot product is 4
{ dotd, one, one, { 4, 4, 4, 4} },
{ dotd, one, none, {-4, -4, -4, -4} },
// 3D cross products
{ crossd, right, right, zero },
{ crossd, right, forward, up },
{ crossd, right, up, nforward },
{ crossd, forward, right, nup },
{ crossd, forward, forward, zero },
{ crossd, forward, up, right },
{ crossd, up, right, forward },
{ crossd, up, forward, nright },
{ crossd, up, up, zero },
// double whammy tests: cross product with an angled vector and
// ensuring that a 4d vector (non-zero w component) does not affect
// the result, including the result's w component remaining zero.
{ crossd, right, one, { 0, -1, 1} },
{ crossd, forward, one, { 1, 0, -1} },
{ crossd, up, one, {-1, 1, 0} },
{ crossd, one, right, { 0, 1, -1} },
{ crossd, one, forward, {-1, 0, 1} },
{ crossd, one, up, { 1, -1, 0} },
// This one fails when optimizing with -mfma (which is why fma is not
// used): ulp errors in z and w
{ crossd, qtest, qtest, {0, 0, 0, 0} },
{ qmuld, qident, qident, qident },
{ qmuld, qident, right, right },
{ qmuld, qident, forward, forward },
{ qmuld, qident, up, up },
{ qmuld, right, qident, right },
{ qmuld, forward, qident, forward },
{ qmuld, up, qident, up },
{ qmuld, right, right, nqident },
{ qmuld, right, forward, up },
{ qmuld, right, up, nforward },
{ qmuld, forward, right, nup },
{ qmuld, forward, forward, nqident },
{ qmuld, forward, up, right },
{ qmuld, up, right, forward },
{ qmuld, up, forward, nright },
{ qmuld, up, up, nqident },
{ qmuld, one, one, { 2, 2, 2, -2 } },
{ qmuld, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } },
// This one fails when optimizing with -mfma (which is why fma is not
// used): ulp error in z
{ qmuld, qtest, qtest, {0.768, 0.576, 0, -0.28} },
// The one vector is not unit (magnitude 2), so using it as a rotation
// quaternion results in scaling by 4. However, it still has the effect
// of rotating 120 degrees around the axis equidistant from the three
// orthogonal axes such that x->y->z->x
{ qvmuld, one, right, { 0, 4, 0, 0 } },
{ qvmuld, one, forward, { 0, 0, 4, 0 } },
{ qvmuld, one, up, { 4, 0, 0, 0 } },
{ qvmuld, one, {1,1,1,0}, { 4, 4, 4, 0 } },
{ qvmuld, one, one, { 4, 4, 4, -2 } },
// The half vector is unit.
{ qvmuld, half, right, forward },
{ qvmuld, half, forward, up },
{ qvmuld, half, up, right },
{ qvmuld, half, {1,1,1,0}, { 1, 1, 1, 0 } },
// one is a 4D vector and qvmuld is meant for 3D vectors. However, it
// seems that the vector's w has no effect on the 3d portion of the
// result, but the result's w is cosine of the full rotation angle
// scaled by quaternion magnitude and vector w
{ qvmuld, half, one, { 1, 1, 1, -0.5 } },
{ qvmuld, half, {2,2,2,2}, { 2, 2, 2, -1 } },
{ qvmuld, qtest, right, {0.5392, 0.6144, -0.576, 0} },
{ qvmuld, qtest, forward, {0.6144, 0.1808, 0.768, 0},
{0, -2.7e-17, 0, 0} },
{ qvmuld, qtest, up, {0.576, -0.768, -0.28, 0} },
{ qrotd, right, right, qident },
{ qrotd, right, forward, { 0, 0, s05, s05 },
{0, 0, -1.1e-16, 0} },
{ qrotd, right, up, { 0, -s05, 0, s05 },
{0, 1.1e-16, 0, 0} },
{ qrotd, forward, right, { 0, 0, -s05, s05 },
{0, 0, 1.1e-16, 0} },
{ qrotd, forward, forward, qident },
{ qrotd, forward, up, { s05, 0, 0, s05 },
{-1.1e-16, 0, 0, 0} },
{ qrotd, up, right, { 0, s05, 0, s05 },
{0, -1.1e-16, 0, 0} },
{ qrotd, up, forward, { -s05, 0, 0, s05 },
{ 1.1e-16, 0, 0, 0} },
{ qrotd, up, up, qident },
};
#define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0])))
static vec4f_test_t vec4f_tests[] = {
// 3D dot products
{ dotf, right, right, one },
{ dotf, right, forward, zero },
{ dotf, right, up, zero },
{ dotf, forward, right, zero },
{ dotf, forward, forward, one },
{ dotf, forward, up, zero },
{ dotf, up, right, zero },
{ dotf, up, forward, zero },
{ dotf, up, up, one },
// one is 4D, so its self dot product is 4
{ dotf, one, one, { 4, 4, 4, 4} },
{ dotf, one, none, {-4, -4, -4, -4} },
// 3D cross products
{ crossf, right, right, zero },
{ crossf, right, forward, up },
{ crossf, right, up, nforward },
{ crossf, forward, right, nup },
{ crossf, forward, forward, zero },
{ crossf, forward, up, right },
{ crossf, up, right, forward },
{ crossf, up, forward, nright },
{ crossf, up, up, zero },
// double whammy tests: cross product with an angled vector and
// ensuring that a 4d vector (non-zero w component) does not affect
// the result, including the result's w component remaining zero.
{ crossf, right, one, { 0, -1, 1} },
{ crossf, forward, one, { 1, 0, -1} },
{ crossf, up, one, {-1, 1, 0} },
{ crossf, one, right, { 0, 1, -1} },
{ crossf, one, forward, {-1, 0, 1} },
{ crossf, one, up, { 1, -1, 0} },
{ crossf, qtest, qtest, {0, 0, 0, 0} },
{ qmulf, qident, qident, qident },
{ qmulf, qident, right, right },
{ qmulf, qident, forward, forward },
{ qmulf, qident, up, up },
{ qmulf, right, qident, right },
{ qmulf, forward, qident, forward },
{ qmulf, up, qident, up },
{ qmulf, right, right, nqident },
{ qmulf, right, forward, up },
{ qmulf, right, up, nforward },
{ qmulf, forward, right, nup },
{ qmulf, forward, forward, nqident },
{ qmulf, forward, up, right },
{ qmulf, up, right, forward },
{ qmulf, up, forward, nright },
{ qmulf, up, up, nqident },
{ qmulf, one, one, { 2, 2, 2, -2 } },
{ qmulf, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } },
{ qmulf, qtest, qtest, {0.768, 0.576, 0, -0.28},
{0, 6e-8, 0, 3e-8} },
// The one vector is not unit (magnitude 2), so using it as a rotation
// quaternion results in scaling by 4. However, it still has the effect
// of rotating 120 degrees around the axis equidistant from the three
// orthogonal axes such that x->y->z->x
{ qvmulf, one, right, { 0, 4, 0, 0 } },
{ qvmulf, one, forward, { 0, 0, 4, 0 } },
{ qvmulf, one, up, { 4, 0, 0, 0 } },
{ qvmulf, one, {1,1,1,0}, { 4, 4, 4, 0 } },
{ qvmulf, one, one, { 4, 4, 4, -2 } },
{ qvmulf, qtest, right, {0.5392, 0.6144, -0.576, 0},
{0, -5.9e-08, -6e-8, 0} },
{ qvmulf, qtest, forward, {0.6144, 0.1808, 0.768, 0},
{-5.9e-08, 1.5e-08, 0, 0} },
{ qvmulf, qtest, up, {0.576, -0.768, -0.28, 0},
{6e-8, 0, 3e-8, 0} },
{ qrotf, right, right, qident },
{ qrotf, right, forward, { 0, 0, s05, s05 } },
{ qrotf, right, up, { 0, -s05, 0, s05 } },
{ qrotf, forward, right, { 0, 0, -s05, s05 } },
{ qrotf, forward, forward, qident },
{ qrotf, forward, up, { s05, 0, 0, s05 } },
{ qrotf, up, right, { 0, s05, 0, s05 } },
{ qrotf, up, forward, { -s05, 0, 0, s05 } },
{ qrotf, up, up, qident },
};
#define num_vec4f_tests (sizeof (vec4f_tests) / (sizeof (vec4f_tests[0])))
static int
run_vec4d_tests (void)
{
int ret = 0;
for (size_t i = 0; i < num_vec4d_tests; i++) {
__auto_type test = &vec4d_tests[i];
vec4d_t result = test->op (test->a, test->b);
vec4d_t expect = test->expect + test->ulp_errors;
vec4l_t res = result != expect;
if (res[0] || res[1] || res[2] || res[3]) {
ret |= 1;
printf ("\nrun_vec4d_tests\n");
printf ("a: " VEC4D_FMT "\n", VEC4_EXP(test->a));
printf ("b: " VEC4D_FMT "\n", VEC4_EXP(test->b));
printf ("r: " VEC4D_FMT "\n", VEC4_EXP(result));
printf ("t: " VEC4L_FMT "\n", VEC4_EXP(res));
printf ("E: " VEC4D_FMT "\n", VEC4_EXP(expect));
printf ("e: " VEC4D_FMT "\n", VEC4_EXP(test->expect));
printf ("u: " VEC4D_FMT "\n", VEC4_EXP(test->ulp_errors));
}
}
return ret;
}
static int
run_vec4f_tests (void)
{
int ret = 0;
for (size_t i = 0; i < num_vec4f_tests; i++) {
__auto_type test = &vec4f_tests[i];
vec4f_t result = test->op (test->a, test->b);
vec4f_t expect = test->expect + test->ulp_errors;
vec4i_t res = result != expect;
if (res[0] || res[1] || res[2] || res[3]) {
ret |= 1;
printf ("\nrun_vec4f_tests\n");
printf ("a: " VEC4F_FMT "\n", VEC4_EXP(test->a));
printf ("b: " VEC4F_FMT "\n", VEC4_EXP(test->b));
printf ("r: " VEC4F_FMT "\n", VEC4_EXP(result));
printf ("t: " VEC4I_FMT "\n", VEC4_EXP(res));
printf ("E: " VEC4F_FMT "\n", VEC4_EXP(expect));
printf ("e: " VEC4F_FMT "\n", VEC4_EXP(test->expect));
printf ("u: " VEC4F_FMT "\n", VEC4_EXP(test->ulp_errors));
}
}
return ret;
}
int
main (void)
{
int ret = 0;
ret |= run_vec4d_tests ();
ret |= run_vec4f_tests ();
return ret;
}