diff --git a/config.d/compiling.m4 b/config.d/compiling.m4 index 343218d35..d9ba92887 100644 --- a/config.d/compiling.m4 +++ b/config.d/compiling.m4 @@ -82,6 +82,11 @@ AC_ARG_ENABLE(optimize, optimize=yes ) +QF_CC_OPTION(-mavx2) +dnl fma is not used as it is the equivalent of turning on +dnl -funsafe-math-optimizations +dnl QF_CC_OPTION(-mfma) + AC_MSG_CHECKING(for optimization) if test "x$optimize" = xyes -a "x$leave_cflags_alone" != "xyes"; then AC_MSG_RESULT(yes) diff --git a/include/QF/simd/types.h b/include/QF/simd/types.h new file mode 100644 index 000000000..cd8b0da9d --- /dev/null +++ b/include/QF/simd/types.h @@ -0,0 +1,85 @@ +/* + QF/simd/types.h + + Type definitions for QF SIMD values + + Copyright (C) 2020 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ + +#ifndef __QF_simd_types_h +#define __QF_simd_types_h + +#define VEC_TYPE(t,n) typedef t n __attribute__ ((vector_size (4*sizeof (t)))) + +/** Three element vector type for interfacing with compact data. + * + * This cannot be used directly by SIMD code and must be loaded and stored + * using load_vec3d() and store_vec3d() respectively. + */ +typedef double vec3d_t[3]; + +#ifdef __AVX__ +/** Four element vector type for horizontal (AOS) vector data. + * + * This is used for both vectors (3D and 4D) and quaternions. 3D vectors + * are simply vec4d_t values with 0 in the 4th element. + * + * Also usable with vertical (SOA) code, in which case each vec4d_t represents + * a single component from four vectors, or a single row/column (depending on + * context) of an Nx4 or 4xN matrix. + */ +VEC_TYPE (double, vec4d_t); + +/** Used mostly for __builtin_shuffle. + */ +VEC_TYPE (long, vec4l_t); +#endif + +/** Three element vector type for interfacing with compact data. + * + * This cannot be used directly by SIMD code and must be loaded and stored + * using load_vec3f() and store_vec3f() respectively. + */ +typedef float vec3f_t[3]; + +/** Four element vector type for horizontal (AOS) vector data. + * + * This is used for both vectors (3D and 4D) and quaternions. 3D vectors + * are simply vec4f_t values with 0 in the 4th element. + * + * Also usable with vertical (SOA) code, in which case each vec4f_t represents + * a single component from four vectors, or a single row/column (depending on + * context) of an Nx4 or 4xN matrix. + */ +VEC_TYPE (float, vec4f_t); + +/** Used mostly for __builtin_shuffle. + */ +VEC_TYPE (int, vec4i_t); + +#define VEC4D_FMT "[%.17g, %.17g, %.17g, %.17g]" +#define VEC4L_FMT "[%ld, %ld, %ld, %ld]" +#define VEC4F_FMT "[%.9g, %.9g, %.9g, %.9g]" +#define VEC4I_FMT "[%d, %d, %d, %d]" +#define VEC4_EXP(v) (v)[0], (v)[1], (v)[2], (v)[3] + +#endif//__QF_simd_types_h diff --git a/include/QF/simd/vec4d.h b/include/QF/simd/vec4d.h new file mode 100644 index 000000000..d608933f5 --- /dev/null +++ b/include/QF/simd/vec4d.h @@ -0,0 +1,156 @@ +/* + QF/simd/vec4d.h + + Vector functions for vec4d_t (ie, double precision) + + Copyright (C) 2020 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ + +#ifndef __QF_simd_vec4d_h +#define __QF_simd_vec4d_h + +#include + +#include "QF/simd/types.h" + +/** 3D vector cross product. + * + * The w (4th) component can be any value on input, and is guaranteed to be 0 + * in the result. The result is not affected in any way by either vector's w + * componemnt + */ +vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const)); +vec4d_t +crossd (vec4d_t a, vec4d_t b) +{ + static const vec4l_t A = {1, 2, 0, 3}; + vec4d_t c = a * __builtin_shuffle (b, A); + vec4d_t d = __builtin_shuffle (a, A) * b; + c = c - d; + return __builtin_shuffle(c, A); +} + +/** 4D vector dot product. + * + * The w component *IS* significant, but if it is 0 in either vector, then + * the result will be as for a 3D dot product. + * + * Note that the dot product is in all 4 of the return value's elements. This + * helps optimize vector math as the scalar is already pre-spread. If just the + * scalar is required, use result[0]. + */ +vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const)); +vec4d_t +dotd (vec4d_t a, vec4d_t b) +{ + vec4d_t c = a * b; + c = _mm256_hadd_pd (c, c); + static const vec4l_t A = {2, 3, 0, 1}; + c += __builtin_shuffle(c, A); + return c; +} + +/** Quaternion product. + * + * The vector is interpreted as a quaternion instead of a regular 4D vector. + * The quaternion may be of any magnitude, so this is more generally useful. + * than if the quaternion was required to be unit length. + */ +vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const)); +vec4d_t +qmuld (vec4d_t a, vec4d_t b) +{ + // results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation) + // doesn't seem to adversly affect precision + vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b; + vec4d_t d = dotd (a, b); + // zero out the vector component of dot product so only the scalar remains + d = _mm256_permute2f128_pd (d, d, 0x18); + d = _mm256_permute4x64_pd (d, 0xc0); + return c - d; +} + +/** Optimized quaterion-vector multiplication for vector rotation. + * + * If the vector's w component is not zero, then the result's w component + * is the cosine of the full rotation angle scaled by the vector's w component. + * The quaternion is assumed to be unit. If the quaternion is not unit, the + * vector will be scaled by the square of the quaternion's magnitude. + */ +vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const)); +vec4d_t +qvmuld (vec4d_t q, vec4d_t v) +{ + double s = q[3]; + // zero the scalar of the quaternion. Results in an extra operation, but + // avoids adding precision issues. + vec4d_t z = {}; + q = _mm256_blend_pd (q, z, 0x08); + vec4d_t c = crossd (q, v); + vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant + vec4d_t qq = dotd (q, q); + + return (s * s - qq) * v + 2 * (qv * q + s * c); +} + +/** Create the quaternion representing the shortest rotation from a to b. + * + * Both a and b are assumed to be 3D vectors (w components 0), but a resonable + * (but incorrect) result will still be produced if either a or b is a 4D + * vector. The rotation axis will be the same as if both vectors were 3D, but + * the magnitude of the rotation will be different. + */ +vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const)); +vec4d_t +qrotd (vec4d_t a, vec4d_t b) +{ + vec4d_t ma = _mm256_sqrt_pd (dotd (a, a)); + vec4d_t mb = _mm256_sqrt_pd (dotd (b, b)); + vec4d_t den = 2 * ma * mb; + vec4d_t t = mb * a + ma * b; + vec4d_t mba_mab = _mm256_sqrt_pd (dotd (t, t)); + vec4d_t q = crossd (a, b) / mba_mab; + q[3] = (mba_mab / den)[0]; + return q; +} + +vec4d_t loadvec3d (const double v3[]) __attribute__((pure, access(read_only, 1))); +vec4d_t +loadvec3d (const double v3[]) +{ + vec4d_t v4 = {}; + + v4[0] = v3[0]; + v4[1] = v3[1]; + v4[2] = v3[2]; + return v4; +} + +void storevec3d (double v3[3], vec4d_t v4) __attribute__((access (write_only, 1))); +void storevec3d (double v3[3], vec4d_t v4) +{ + v3[0] = v4[0]; + v3[1] = v4[1]; + v3[2] = v4[2]; +} + +#endif//__QF_simd_vec4d_h diff --git a/include/QF/simd/vec4f.h b/include/QF/simd/vec4f.h new file mode 100644 index 000000000..ea92b81d9 --- /dev/null +++ b/include/QF/simd/vec4f.h @@ -0,0 +1,159 @@ +/* + QF/simd/vec4f.h + + Vector functions for vec4f_t (ie, float precision) + + Copyright (C) 2020 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ + +#ifndef __QF_simd_vec4f_h +#define __QF_simd_vec4f_h + +#include + +#include "QF/simd/types.h" + +/** 3D vector cross product. + * + * The w (4th) component can be any value on input, and is guaranteed to be 0 + * in the result. The result is not affected in any way by either vector's w + * componemnt + */ +vec4f_t crossf (vec4f_t a, vec4f_t b) __attribute__((const)); +vec4f_t +crossf (vec4f_t a, vec4f_t b) +{ + static const vec4i_t A = {1, 2, 0, 3}; + vec4f_t c = a * __builtin_shuffle (b, A) - __builtin_shuffle (a, A) * b; + return __builtin_shuffle(c, A); +} + +/** 4D vector dot product. + * + * The w component *IS* significant, but if it is 0 in either vector, then + * the result will be as for a 3D dot product. + * + * Note that the dot product is in all 4 of the return value's elements. This + * helps optimize vector math as the scalar is already pre-spread. If just the + * scalar is required, use result[0]. + */ +vec4f_t dotf (vec4f_t a, vec4f_t b) __attribute__((const)); +vec4f_t +dotf (vec4f_t a, vec4f_t b) +{ + vec4f_t c = a * b; + c = _mm_hadd_ps (c, c); + c = _mm_hadd_ps (c, c); + return c; +} + +/** Quaternion product. + * + * The vector is interpreted as a quaternion instead of a regular 4D vector. + * The quaternion may be of any magnitude, so this is more generally useful. + * than if the quaternion was required to be unit length. + */ +vec4f_t qmulf (vec4f_t a, vec4f_t b) __attribute__((const)); +vec4f_t +qmulf (vec4f_t a, vec4f_t b) +{ + // results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation) + // doesn't seem to adversly affect precision + vec4f_t c = crossf (a, b) + a * b[3] + a[3] * b; + vec4f_t d = dotf (a, b); + // zero out the vector component of dot product so only the scalar remains + d = _mm_insert_ps (d, d, 0xf7); + return c - d; +} + +/** Optimized quaterion-vector multiplication for vector rotation. + * + * If the vector's w component is not zero, then the result's w component + * is the cosine of the full rotation angle scaled by the vector's w component. + * The quaternion is assumed to be unit. + */ +vec4f_t qvmulf (vec4f_t q, vec4f_t v) __attribute__((const)); +vec4f_t +qvmulf (vec4f_t q, vec4f_t v) +{ + float s = q[3]; + // zero the scalar of the quaternion. Results in an extra operation, but + // avoids adding precision issues. + q = _mm_insert_ps (q, q, 0xf8); + vec4f_t c = crossf (q, v); // q.w is 0 so v.w is irrelevant + vec4f_t qv = dotf (q, v); + vec4f_t qq = dotf (q, q); + + return (s * s - qq) * v + 2 * (qv * q + s * c); +} + +/** Create the quaternion representing the shortest rotation from a to b. + * + * Both a and b are assumed to be 3D vectors (w components 0), but a resonable + * (but incorrect) result will still be produced if either a or b is a 4D + * vector. The rotation axis will be the same as if both vectors were 3D, but + * the magnitude of the rotation will be different. + */ +vec4f_t qrotf (vec4f_t a, vec4f_t b) __attribute__((const)); +vec4f_t +qrotf (vec4f_t a, vec4f_t b) +{ + vec4f_t ma = _mm_sqrt_ps (dotf (a, a)); + vec4f_t mb = _mm_sqrt_ps (dotf (b, b)); + vec4f_t den = 2 * ma * mb; + vec4f_t t = mb * a + ma * b; + vec4f_t mba_mab = _mm_sqrt_ps (dotf (t, t)); + vec4f_t q = crossf (a, b) / mba_mab; + q[3] = (mba_mab / den)[0]; + return q; +} + +vec4f_t loadvec3f (const float v3[3]) __attribute__((pure, access(read_only, 1))); +vec4f_t +loadvec3f (const float v3[3]) +{ + vec4f_t v4; + + // this had to be in asm otherwise gcc thinks v4 is only partially + // initialized, and gcc 10 does not use the zero flags when generating + // the code, resulting in a memory access to load a 0 into v4[3] + // + // The first instruction zeros v4[3] while loading v4[0] + asm ("\n\ + vinsertps $0x08, %1, %0, %0 \n\ + vinsertps $0x10, %2, %0, %0 \n\ + vinsertps $0x20, %3, %0, %0 \n\ + " + : "=v"(v4) + : "m"(v3[0]), "m"(v3[1]), "m"(v3[2])); + return v4; +} + +void storevec3f (float v3[3], vec4f_t v4) __attribute__((access (write_only, 1))); +void storevec3f (float v3[3], vec4f_t v4) +{ + v3[0] = v4[0]; + v3[1] = v4[1]; + v3[2] = v4[2]; +} + +#endif//__QF_simd_vec4f_h diff --git a/libs/util/test/Makemodule.am b/libs/util/test/Makemodule.am index 9447a7807..ca777f864 100644 --- a/libs/util/test/Makemodule.am +++ b/libs/util/test/Makemodule.am @@ -14,6 +14,7 @@ libs_util_tests = \ libs/util/test/test-seb \ libs/util/test/test-seg \ libs/util/test/test-set \ + libs/util/test/test-simd \ libs/util/test/test-txtbuffer \ libs/util/test/test-vrect @@ -81,6 +82,10 @@ libs_util_test_test_set_SOURCES=libs/util/test/test-set.c libs_util_test_test_set_LDADD=libs/util/libQFutil.la libs_util_test_test_set_DEPENDENCIES=libs/util/libQFutil.la +libs_util_test_test_simd_SOURCES=libs/util/test/test-simd.c +libs_util_test_test_simd_LDADD=libs/util/libQFutil.la +libs_util_test_test_simd_DEPENDENCIES=libs/util/libQFutil.la + libs_util_test_test_txtbuffer_SOURCES=libs/util/test/test-txtbuffer.c libs_util_test_test_txtbuffer_LDADD=libs/util/libQFutil.la libs_util_test_test_txtbuffer_DEPENDENCIES=libs/util/libQFutil.la diff --git a/libs/util/test/test-simd.c b/libs/util/test/test-simd.c new file mode 100644 index 000000000..ec1b145ac --- /dev/null +++ b/libs/util/test/test-simd.c @@ -0,0 +1,289 @@ +#include +#include +#include +#include + +#include "QF/simd/vec4d.h" +#include "QF/simd/vec4f.h" + +#define right { 1, 0, 0 } +#define forward { 0, 1, 0 } +#define up { 0, 0, 1 } +#define one { 1, 1, 1, 1 } +#define half { 0.5, 0.5, 0.5, 0.5 } +#define zero { 0, 0, 0, 0 } +#define qident { 0, 0, 0, 1 } +#define qtest { 0.64, 0.48, 0, 0.6 } + +#define nright { -1, 0, 0 } +#define nforward { 0, -1, 0 } +#define nup { 0, 0, -1 } +#define none { -1, -1, -1, -1 } +#define nqident { 0, 0, 0, -1 } + +#define s05 0.70710678118654757 + +typedef struct { + vec4d_t (*op) (vec4d_t a, vec4d_t b); + vec4d_t a; + vec4d_t b; + vec4d_t expect; + vec4d_t ulp_errors; +} vec4d_test_t; + +typedef struct { + vec4f_t (*op) (vec4f_t a, vec4f_t b); + vec4f_t a; + vec4f_t b; + vec4f_t expect; + vec4f_t ulp_errors; +} vec4f_test_t; + +static vec4d_test_t vec4d_tests[] = { + // 3D dot products + { dotd, right, right, one }, + { dotd, right, forward, zero }, + { dotd, right, up, zero }, + { dotd, forward, right, zero }, + { dotd, forward, forward, one }, + { dotd, forward, up, zero }, + { dotd, up, right, zero }, + { dotd, up, forward, zero }, + { dotd, up, up, one }, + + // one is 4D, so its self dot product is 4 + { dotd, one, one, { 4, 4, 4, 4} }, + { dotd, one, none, {-4, -4, -4, -4} }, + + // 3D cross products + { crossd, right, right, zero }, + { crossd, right, forward, up }, + { crossd, right, up, nforward }, + { crossd, forward, right, nup }, + { crossd, forward, forward, zero }, + { crossd, forward, up, right }, + { crossd, up, right, forward }, + { crossd, up, forward, nright }, + { crossd, up, up, zero }, + // double whammy tests: cross product with an angled vector and + // ensuring that a 4d vector (non-zero w component) does not affect + // the result, including the result's w component remaining zero. + { crossd, right, one, { 0, -1, 1} }, + { crossd, forward, one, { 1, 0, -1} }, + { crossd, up, one, {-1, 1, 0} }, + { crossd, one, right, { 0, 1, -1} }, + { crossd, one, forward, {-1, 0, 1} }, + { crossd, one, up, { 1, -1, 0} }, + // This one fails when optimizing with -mfma (which is why fma is not + // used): ulp errors in z and w + { crossd, qtest, qtest, {0, 0, 0, 0} }, + + { qmuld, qident, qident, qident }, + { qmuld, qident, right, right }, + { qmuld, qident, forward, forward }, + { qmuld, qident, up, up }, + { qmuld, right, qident, right }, + { qmuld, forward, qident, forward }, + { qmuld, up, qident, up }, + { qmuld, right, right, nqident }, + { qmuld, right, forward, up }, + { qmuld, right, up, nforward }, + { qmuld, forward, right, nup }, + { qmuld, forward, forward, nqident }, + { qmuld, forward, up, right }, + { qmuld, up, right, forward }, + { qmuld, up, forward, nright }, + { qmuld, up, up, nqident }, + { qmuld, one, one, { 2, 2, 2, -2 } }, + { qmuld, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } }, + // This one fails when optimizing with -mfma (which is why fma is not + // used): ulp error in z + { qmuld, qtest, qtest, {0.768, 0.576, 0, -0.28} }, + + // The one vector is not unit (magnitude 2), so using it as a rotation + // quaternion results in scaling by 4. However, it still has the effect + // of rotating 120 degrees around the axis equidistant from the three + // orthogonal axes such that x->y->z->x + { qvmuld, one, right, { 0, 4, 0, 0 } }, + { qvmuld, one, forward, { 0, 0, 4, 0 } }, + { qvmuld, one, up, { 4, 0, 0, 0 } }, + { qvmuld, one, {1,1,1,0}, { 4, 4, 4, 0 } }, + { qvmuld, one, one, { 4, 4, 4, -2 } }, + // The half vector is unit. + { qvmuld, half, right, forward }, + { qvmuld, half, forward, up }, + { qvmuld, half, up, right }, + { qvmuld, half, {1,1,1,0}, { 1, 1, 1, 0 } }, + // one is a 4D vector and qvmuld is meant for 3D vectors. However, it + // seems that the vector's w has no effect on the 3d portion of the + // result, but the result's w is cosine of the full rotation angle + // scaled by quaternion magnitude and vector w + { qvmuld, half, one, { 1, 1, 1, -0.5 } }, + { qvmuld, half, {2,2,2,2}, { 2, 2, 2, -1 } }, + { qvmuld, qtest, right, {0.5392, 0.6144, -0.576, 0} }, + { qvmuld, qtest, forward, {0.6144, 0.1808, 0.768, 0}, + {0, -2.7e-17, 0, 0} }, + { qvmuld, qtest, up, {0.576, -0.768, -0.28, 0} }, + + { qrotd, right, right, qident }, + { qrotd, right, forward, { 0, 0, s05, s05 }, + {0, 0, -1.1e-16, 0} }, + { qrotd, right, up, { 0, -s05, 0, s05 }, + {0, 1.1e-16, 0, 0} }, + { qrotd, forward, right, { 0, 0, -s05, s05 }, + {0, 0, 1.1e-16, 0} }, + { qrotd, forward, forward, qident }, + { qrotd, forward, up, { s05, 0, 0, s05 }, + {-1.1e-16, 0, 0, 0} }, + { qrotd, up, right, { 0, s05, 0, s05 }, + {0, -1.1e-16, 0, 0} }, + { qrotd, up, forward, { -s05, 0, 0, s05 }, + { 1.1e-16, 0, 0, 0} }, + { qrotd, up, up, qident }, +}; +#define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0]))) + +static vec4f_test_t vec4f_tests[] = { + // 3D dot products + { dotf, right, right, one }, + { dotf, right, forward, zero }, + { dotf, right, up, zero }, + { dotf, forward, right, zero }, + { dotf, forward, forward, one }, + { dotf, forward, up, zero }, + { dotf, up, right, zero }, + { dotf, up, forward, zero }, + { dotf, up, up, one }, + + // one is 4D, so its self dot product is 4 + { dotf, one, one, { 4, 4, 4, 4} }, + { dotf, one, none, {-4, -4, -4, -4} }, + + // 3D cross products + { crossf, right, right, zero }, + { crossf, right, forward, up }, + { crossf, right, up, nforward }, + { crossf, forward, right, nup }, + { crossf, forward, forward, zero }, + { crossf, forward, up, right }, + { crossf, up, right, forward }, + { crossf, up, forward, nright }, + { crossf, up, up, zero }, + // double whammy tests: cross product with an angled vector and + // ensuring that a 4d vector (non-zero w component) does not affect + // the result, including the result's w component remaining zero. + { crossf, right, one, { 0, -1, 1} }, + { crossf, forward, one, { 1, 0, -1} }, + { crossf, up, one, {-1, 1, 0} }, + { crossf, one, right, { 0, 1, -1} }, + { crossf, one, forward, {-1, 0, 1} }, + { crossf, one, up, { 1, -1, 0} }, + { crossf, qtest, qtest, {0, 0, 0, 0} }, + + { qmulf, qident, qident, qident }, + { qmulf, qident, right, right }, + { qmulf, qident, forward, forward }, + { qmulf, qident, up, up }, + { qmulf, right, qident, right }, + { qmulf, forward, qident, forward }, + { qmulf, up, qident, up }, + { qmulf, right, right, nqident }, + { qmulf, right, forward, up }, + { qmulf, right, up, nforward }, + { qmulf, forward, right, nup }, + { qmulf, forward, forward, nqident }, + { qmulf, forward, up, right }, + { qmulf, up, right, forward }, + { qmulf, up, forward, nright }, + { qmulf, up, up, nqident }, + { qmulf, one, one, { 2, 2, 2, -2 } }, + { qmulf, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } }, + { qmulf, qtest, qtest, {0.768, 0.576, 0, -0.28}, + {0, 6e-8, 0, 3e-8} }, + + // The one vector is not unit (magnitude 2), so using it as a rotation + // quaternion results in scaling by 4. However, it still has the effect + // of rotating 120 degrees around the axis equidistant from the three + // orthogonal axes such that x->y->z->x + { qvmulf, one, right, { 0, 4, 0, 0 } }, + { qvmulf, one, forward, { 0, 0, 4, 0 } }, + { qvmulf, one, up, { 4, 0, 0, 0 } }, + { qvmulf, one, {1,1,1,0}, { 4, 4, 4, 0 } }, + { qvmulf, one, one, { 4, 4, 4, -2 } }, + { qvmulf, qtest, right, {0.5392, 0.6144, -0.576, 0}, + {0, -5.9e-08, -6e-8, 0} }, + { qvmulf, qtest, forward, {0.6144, 0.1808, 0.768, 0}, + {-5.9e-08, 1.5e-08, 0, 0} }, + { qvmulf, qtest, up, {0.576, -0.768, -0.28, 0}, + {6e-8, 0, 3e-8, 0} }, + + { qrotf, right, right, qident }, + { qrotf, right, forward, { 0, 0, s05, s05 } }, + { qrotf, right, up, { 0, -s05, 0, s05 } }, + { qrotf, forward, right, { 0, 0, -s05, s05 } }, + { qrotf, forward, forward, qident }, + { qrotf, forward, up, { s05, 0, 0, s05 } }, + { qrotf, up, right, { 0, s05, 0, s05 } }, + { qrotf, up, forward, { -s05, 0, 0, s05 } }, + { qrotf, up, up, qident }, +}; +#define num_vec4f_tests (sizeof (vec4f_tests) / (sizeof (vec4f_tests[0]))) + +static int +run_vec4d_tests (void) +{ + int ret = 0; + + for (size_t i = 0; i < num_vec4d_tests; i++) { + __auto_type test = &vec4d_tests[i]; + vec4d_t result = test->op (test->a, test->b); + vec4d_t expect = test->expect + test->ulp_errors; + vec4l_t res = result != expect; + if (res[0] || res[1] || res[2] || res[3]) { + ret |= 1; + printf ("\nrun_vec4d_tests\n"); + printf ("a: " VEC4D_FMT "\n", VEC4_EXP(test->a)); + printf ("b: " VEC4D_FMT "\n", VEC4_EXP(test->b)); + printf ("r: " VEC4D_FMT "\n", VEC4_EXP(result)); + printf ("t: " VEC4L_FMT "\n", VEC4_EXP(res)); + printf ("E: " VEC4D_FMT "\n", VEC4_EXP(expect)); + printf ("e: " VEC4D_FMT "\n", VEC4_EXP(test->expect)); + printf ("u: " VEC4D_FMT "\n", VEC4_EXP(test->ulp_errors)); + } + } + return ret; +} + +static int +run_vec4f_tests (void) +{ + int ret = 0; + + for (size_t i = 0; i < num_vec4f_tests; i++) { + __auto_type test = &vec4f_tests[i]; + vec4f_t result = test->op (test->a, test->b); + vec4f_t expect = test->expect + test->ulp_errors; + vec4i_t res = result != expect; + if (res[0] || res[1] || res[2] || res[3]) { + ret |= 1; + printf ("\nrun_vec4f_tests\n"); + printf ("a: " VEC4F_FMT "\n", VEC4_EXP(test->a)); + printf ("b: " VEC4F_FMT "\n", VEC4_EXP(test->b)); + printf ("r: " VEC4F_FMT "\n", VEC4_EXP(result)); + printf ("t: " VEC4I_FMT "\n", VEC4_EXP(res)); + printf ("E: " VEC4F_FMT "\n", VEC4_EXP(expect)); + printf ("e: " VEC4F_FMT "\n", VEC4_EXP(test->expect)); + printf ("u: " VEC4F_FMT "\n", VEC4_EXP(test->ulp_errors)); + } + } + return ret; +} + +int +main (void) +{ + int ret = 0; + ret |= run_vec4d_tests (); + ret |= run_vec4f_tests (); + return ret; +}