mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2024-11-23 04:42:32 +00:00
[util] Add basic SIMD implemented vector functions
They take advantage of gcc's vector_size attribute and so only cross, dot, qmul, qvmul and qrot (create rotation quaternion from two vectors) are needed at this stage as basic (per-component) math is supported natively by gcc. The provided functions work on horizontal (array-of-structs) data, ie a vec4d_t or vec4f_t represents a single vector, or traditional vector layout. Vertical layout (struct-of-arrays) does not need any special functions as the regular math can be used to operate on four vectors at a time. Functions are provided for loading a vec4 from a vec3 (4th element set to 0) and storing a vec4 into a vec3 (discarding the 4th element). With this, QF will require AVX2 support (needed for vec4d_t). Without support for doubles, SSE is possible, but may not be worthwhile for horizontal data. Fused-multiply-add is NOT used because it alters the results between unoptimized and optimized code, resulting in -mfma really meaning -mfast-math-anyway. I really do not want to have to debug issues that occur only in optimized code.
This commit is contained in:
parent
d824db68a5
commit
09a10f80e1
6 changed files with 699 additions and 0 deletions
|
@ -82,6 +82,11 @@ AC_ARG_ENABLE(optimize,
|
|||
optimize=yes
|
||||
)
|
||||
|
||||
QF_CC_OPTION(-mavx2)
|
||||
dnl fma is not used as it is the equivalent of turning on
|
||||
dnl -funsafe-math-optimizations
|
||||
dnl QF_CC_OPTION(-mfma)
|
||||
|
||||
AC_MSG_CHECKING(for optimization)
|
||||
if test "x$optimize" = xyes -a "x$leave_cflags_alone" != "xyes"; then
|
||||
AC_MSG_RESULT(yes)
|
||||
|
|
85
include/QF/simd/types.h
Normal file
85
include/QF/simd/types.h
Normal file
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
QF/simd/types.h
|
||||
|
||||
Type definitions for QF SIMD values
|
||||
|
||||
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to:
|
||||
|
||||
Free Software Foundation, Inc.
|
||||
59 Temple Place - Suite 330
|
||||
Boston, MA 02111-1307, USA
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __QF_simd_types_h
|
||||
#define __QF_simd_types_h
|
||||
|
||||
#define VEC_TYPE(t,n) typedef t n __attribute__ ((vector_size (4*sizeof (t))))
|
||||
|
||||
/** Three element vector type for interfacing with compact data.
|
||||
*
|
||||
* This cannot be used directly by SIMD code and must be loaded and stored
|
||||
* using load_vec3d() and store_vec3d() respectively.
|
||||
*/
|
||||
typedef double vec3d_t[3];
|
||||
|
||||
#ifdef __AVX__
|
||||
/** Four element vector type for horizontal (AOS) vector data.
|
||||
*
|
||||
* This is used for both vectors (3D and 4D) and quaternions. 3D vectors
|
||||
* are simply vec4d_t values with 0 in the 4th element.
|
||||
*
|
||||
* Also usable with vertical (SOA) code, in which case each vec4d_t represents
|
||||
* a single component from four vectors, or a single row/column (depending on
|
||||
* context) of an Nx4 or 4xN matrix.
|
||||
*/
|
||||
VEC_TYPE (double, vec4d_t);
|
||||
|
||||
/** Used mostly for __builtin_shuffle.
|
||||
*/
|
||||
VEC_TYPE (long, vec4l_t);
|
||||
#endif
|
||||
|
||||
/** Three element vector type for interfacing with compact data.
|
||||
*
|
||||
* This cannot be used directly by SIMD code and must be loaded and stored
|
||||
* using load_vec3f() and store_vec3f() respectively.
|
||||
*/
|
||||
typedef float vec3f_t[3];
|
||||
|
||||
/** Four element vector type for horizontal (AOS) vector data.
|
||||
*
|
||||
* This is used for both vectors (3D and 4D) and quaternions. 3D vectors
|
||||
* are simply vec4f_t values with 0 in the 4th element.
|
||||
*
|
||||
* Also usable with vertical (SOA) code, in which case each vec4f_t represents
|
||||
* a single component from four vectors, or a single row/column (depending on
|
||||
* context) of an Nx4 or 4xN matrix.
|
||||
*/
|
||||
VEC_TYPE (float, vec4f_t);
|
||||
|
||||
/** Used mostly for __builtin_shuffle.
|
||||
*/
|
||||
VEC_TYPE (int, vec4i_t);
|
||||
|
||||
#define VEC4D_FMT "[%.17g, %.17g, %.17g, %.17g]"
|
||||
#define VEC4L_FMT "[%ld, %ld, %ld, %ld]"
|
||||
#define VEC4F_FMT "[%.9g, %.9g, %.9g, %.9g]"
|
||||
#define VEC4I_FMT "[%d, %d, %d, %d]"
|
||||
#define VEC4_EXP(v) (v)[0], (v)[1], (v)[2], (v)[3]
|
||||
|
||||
#endif//__QF_simd_types_h
|
156
include/QF/simd/vec4d.h
Normal file
156
include/QF/simd/vec4d.h
Normal file
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
QF/simd/vec4d.h
|
||||
|
||||
Vector functions for vec4d_t (ie, double precision)
|
||||
|
||||
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to:
|
||||
|
||||
Free Software Foundation, Inc.
|
||||
59 Temple Place - Suite 330
|
||||
Boston, MA 02111-1307, USA
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __QF_simd_vec4d_h
|
||||
#define __QF_simd_vec4d_h
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "QF/simd/types.h"
|
||||
|
||||
/** 3D vector cross product.
|
||||
*
|
||||
* The w (4th) component can be any value on input, and is guaranteed to be 0
|
||||
* in the result. The result is not affected in any way by either vector's w
|
||||
* componemnt
|
||||
*/
|
||||
vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const));
|
||||
vec4d_t
|
||||
crossd (vec4d_t a, vec4d_t b)
|
||||
{
|
||||
static const vec4l_t A = {1, 2, 0, 3};
|
||||
vec4d_t c = a * __builtin_shuffle (b, A);
|
||||
vec4d_t d = __builtin_shuffle (a, A) * b;
|
||||
c = c - d;
|
||||
return __builtin_shuffle(c, A);
|
||||
}
|
||||
|
||||
/** 4D vector dot product.
|
||||
*
|
||||
* The w component *IS* significant, but if it is 0 in either vector, then
|
||||
* the result will be as for a 3D dot product.
|
||||
*
|
||||
* Note that the dot product is in all 4 of the return value's elements. This
|
||||
* helps optimize vector math as the scalar is already pre-spread. If just the
|
||||
* scalar is required, use result[0].
|
||||
*/
|
||||
vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const));
|
||||
vec4d_t
|
||||
dotd (vec4d_t a, vec4d_t b)
|
||||
{
|
||||
vec4d_t c = a * b;
|
||||
c = _mm256_hadd_pd (c, c);
|
||||
static const vec4l_t A = {2, 3, 0, 1};
|
||||
c += __builtin_shuffle(c, A);
|
||||
return c;
|
||||
}
|
||||
|
||||
/** Quaternion product.
|
||||
*
|
||||
* The vector is interpreted as a quaternion instead of a regular 4D vector.
|
||||
* The quaternion may be of any magnitude, so this is more generally useful.
|
||||
* than if the quaternion was required to be unit length.
|
||||
*/
|
||||
vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const));
|
||||
vec4d_t
|
||||
qmuld (vec4d_t a, vec4d_t b)
|
||||
{
|
||||
// results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation)
|
||||
// doesn't seem to adversly affect precision
|
||||
vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b;
|
||||
vec4d_t d = dotd (a, b);
|
||||
// zero out the vector component of dot product so only the scalar remains
|
||||
d = _mm256_permute2f128_pd (d, d, 0x18);
|
||||
d = _mm256_permute4x64_pd (d, 0xc0);
|
||||
return c - d;
|
||||
}
|
||||
|
||||
/** Optimized quaterion-vector multiplication for vector rotation.
|
||||
*
|
||||
* If the vector's w component is not zero, then the result's w component
|
||||
* is the cosine of the full rotation angle scaled by the vector's w component.
|
||||
* The quaternion is assumed to be unit. If the quaternion is not unit, the
|
||||
* vector will be scaled by the square of the quaternion's magnitude.
|
||||
*/
|
||||
vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const));
|
||||
vec4d_t
|
||||
qvmuld (vec4d_t q, vec4d_t v)
|
||||
{
|
||||
double s = q[3];
|
||||
// zero the scalar of the quaternion. Results in an extra operation, but
|
||||
// avoids adding precision issues.
|
||||
vec4d_t z = {};
|
||||
q = _mm256_blend_pd (q, z, 0x08);
|
||||
vec4d_t c = crossd (q, v);
|
||||
vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant
|
||||
vec4d_t qq = dotd (q, q);
|
||||
|
||||
return (s * s - qq) * v + 2 * (qv * q + s * c);
|
||||
}
|
||||
|
||||
/** Create the quaternion representing the shortest rotation from a to b.
|
||||
*
|
||||
* Both a and b are assumed to be 3D vectors (w components 0), but a resonable
|
||||
* (but incorrect) result will still be produced if either a or b is a 4D
|
||||
* vector. The rotation axis will be the same as if both vectors were 3D, but
|
||||
* the magnitude of the rotation will be different.
|
||||
*/
|
||||
vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const));
|
||||
vec4d_t
|
||||
qrotd (vec4d_t a, vec4d_t b)
|
||||
{
|
||||
vec4d_t ma = _mm256_sqrt_pd (dotd (a, a));
|
||||
vec4d_t mb = _mm256_sqrt_pd (dotd (b, b));
|
||||
vec4d_t den = 2 * ma * mb;
|
||||
vec4d_t t = mb * a + ma * b;
|
||||
vec4d_t mba_mab = _mm256_sqrt_pd (dotd (t, t));
|
||||
vec4d_t q = crossd (a, b) / mba_mab;
|
||||
q[3] = (mba_mab / den)[0];
|
||||
return q;
|
||||
}
|
||||
|
||||
vec4d_t loadvec3d (const double v3[]) __attribute__((pure, access(read_only, 1)));
|
||||
vec4d_t
|
||||
loadvec3d (const double v3[])
|
||||
{
|
||||
vec4d_t v4 = {};
|
||||
|
||||
v4[0] = v3[0];
|
||||
v4[1] = v3[1];
|
||||
v4[2] = v3[2];
|
||||
return v4;
|
||||
}
|
||||
|
||||
void storevec3d (double v3[3], vec4d_t v4) __attribute__((access (write_only, 1)));
|
||||
void storevec3d (double v3[3], vec4d_t v4)
|
||||
{
|
||||
v3[0] = v4[0];
|
||||
v3[1] = v4[1];
|
||||
v3[2] = v4[2];
|
||||
}
|
||||
|
||||
#endif//__QF_simd_vec4d_h
|
159
include/QF/simd/vec4f.h
Normal file
159
include/QF/simd/vec4f.h
Normal file
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
QF/simd/vec4f.h
|
||||
|
||||
Vector functions for vec4f_t (ie, float precision)
|
||||
|
||||
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to:
|
||||
|
||||
Free Software Foundation, Inc.
|
||||
59 Temple Place - Suite 330
|
||||
Boston, MA 02111-1307, USA
|
||||
|
||||
*/
|
||||
|
||||
#ifndef __QF_simd_vec4f_h
|
||||
#define __QF_simd_vec4f_h
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "QF/simd/types.h"
|
||||
|
||||
/** 3D vector cross product.
|
||||
*
|
||||
* The w (4th) component can be any value on input, and is guaranteed to be 0
|
||||
* in the result. The result is not affected in any way by either vector's w
|
||||
* componemnt
|
||||
*/
|
||||
vec4f_t crossf (vec4f_t a, vec4f_t b) __attribute__((const));
|
||||
vec4f_t
|
||||
crossf (vec4f_t a, vec4f_t b)
|
||||
{
|
||||
static const vec4i_t A = {1, 2, 0, 3};
|
||||
vec4f_t c = a * __builtin_shuffle (b, A) - __builtin_shuffle (a, A) * b;
|
||||
return __builtin_shuffle(c, A);
|
||||
}
|
||||
|
||||
/** 4D vector dot product.
|
||||
*
|
||||
* The w component *IS* significant, but if it is 0 in either vector, then
|
||||
* the result will be as for a 3D dot product.
|
||||
*
|
||||
* Note that the dot product is in all 4 of the return value's elements. This
|
||||
* helps optimize vector math as the scalar is already pre-spread. If just the
|
||||
* scalar is required, use result[0].
|
||||
*/
|
||||
vec4f_t dotf (vec4f_t a, vec4f_t b) __attribute__((const));
|
||||
vec4f_t
|
||||
dotf (vec4f_t a, vec4f_t b)
|
||||
{
|
||||
vec4f_t c = a * b;
|
||||
c = _mm_hadd_ps (c, c);
|
||||
c = _mm_hadd_ps (c, c);
|
||||
return c;
|
||||
}
|
||||
|
||||
/** Quaternion product.
|
||||
*
|
||||
* The vector is interpreted as a quaternion instead of a regular 4D vector.
|
||||
* The quaternion may be of any magnitude, so this is more generally useful.
|
||||
* than if the quaternion was required to be unit length.
|
||||
*/
|
||||
vec4f_t qmulf (vec4f_t a, vec4f_t b) __attribute__((const));
|
||||
vec4f_t
|
||||
qmulf (vec4f_t a, vec4f_t b)
|
||||
{
|
||||
// results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation)
|
||||
// doesn't seem to adversly affect precision
|
||||
vec4f_t c = crossf (a, b) + a * b[3] + a[3] * b;
|
||||
vec4f_t d = dotf (a, b);
|
||||
// zero out the vector component of dot product so only the scalar remains
|
||||
d = _mm_insert_ps (d, d, 0xf7);
|
||||
return c - d;
|
||||
}
|
||||
|
||||
/** Optimized quaterion-vector multiplication for vector rotation.
|
||||
*
|
||||
* If the vector's w component is not zero, then the result's w component
|
||||
* is the cosine of the full rotation angle scaled by the vector's w component.
|
||||
* The quaternion is assumed to be unit.
|
||||
*/
|
||||
vec4f_t qvmulf (vec4f_t q, vec4f_t v) __attribute__((const));
|
||||
vec4f_t
|
||||
qvmulf (vec4f_t q, vec4f_t v)
|
||||
{
|
||||
float s = q[3];
|
||||
// zero the scalar of the quaternion. Results in an extra operation, but
|
||||
// avoids adding precision issues.
|
||||
q = _mm_insert_ps (q, q, 0xf8);
|
||||
vec4f_t c = crossf (q, v); // q.w is 0 so v.w is irrelevant
|
||||
vec4f_t qv = dotf (q, v);
|
||||
vec4f_t qq = dotf (q, q);
|
||||
|
||||
return (s * s - qq) * v + 2 * (qv * q + s * c);
|
||||
}
|
||||
|
||||
/** Create the quaternion representing the shortest rotation from a to b.
|
||||
*
|
||||
* Both a and b are assumed to be 3D vectors (w components 0), but a resonable
|
||||
* (but incorrect) result will still be produced if either a or b is a 4D
|
||||
* vector. The rotation axis will be the same as if both vectors were 3D, but
|
||||
* the magnitude of the rotation will be different.
|
||||
*/
|
||||
vec4f_t qrotf (vec4f_t a, vec4f_t b) __attribute__((const));
|
||||
vec4f_t
|
||||
qrotf (vec4f_t a, vec4f_t b)
|
||||
{
|
||||
vec4f_t ma = _mm_sqrt_ps (dotf (a, a));
|
||||
vec4f_t mb = _mm_sqrt_ps (dotf (b, b));
|
||||
vec4f_t den = 2 * ma * mb;
|
||||
vec4f_t t = mb * a + ma * b;
|
||||
vec4f_t mba_mab = _mm_sqrt_ps (dotf (t, t));
|
||||
vec4f_t q = crossf (a, b) / mba_mab;
|
||||
q[3] = (mba_mab / den)[0];
|
||||
return q;
|
||||
}
|
||||
|
||||
vec4f_t loadvec3f (const float v3[3]) __attribute__((pure, access(read_only, 1)));
|
||||
vec4f_t
|
||||
loadvec3f (const float v3[3])
|
||||
{
|
||||
vec4f_t v4;
|
||||
|
||||
// this had to be in asm otherwise gcc thinks v4 is only partially
|
||||
// initialized, and gcc 10 does not use the zero flags when generating
|
||||
// the code, resulting in a memory access to load a 0 into v4[3]
|
||||
//
|
||||
// The first instruction zeros v4[3] while loading v4[0]
|
||||
asm ("\n\
|
||||
vinsertps $0x08, %1, %0, %0 \n\
|
||||
vinsertps $0x10, %2, %0, %0 \n\
|
||||
vinsertps $0x20, %3, %0, %0 \n\
|
||||
"
|
||||
: "=v"(v4)
|
||||
: "m"(v3[0]), "m"(v3[1]), "m"(v3[2]));
|
||||
return v4;
|
||||
}
|
||||
|
||||
void storevec3f (float v3[3], vec4f_t v4) __attribute__((access (write_only, 1)));
|
||||
void storevec3f (float v3[3], vec4f_t v4)
|
||||
{
|
||||
v3[0] = v4[0];
|
||||
v3[1] = v4[1];
|
||||
v3[2] = v4[2];
|
||||
}
|
||||
|
||||
#endif//__QF_simd_vec4f_h
|
|
@ -14,6 +14,7 @@ libs_util_tests = \
|
|||
libs/util/test/test-seb \
|
||||
libs/util/test/test-seg \
|
||||
libs/util/test/test-set \
|
||||
libs/util/test/test-simd \
|
||||
libs/util/test/test-txtbuffer \
|
||||
libs/util/test/test-vrect
|
||||
|
||||
|
@ -81,6 +82,10 @@ libs_util_test_test_set_SOURCES=libs/util/test/test-set.c
|
|||
libs_util_test_test_set_LDADD=libs/util/libQFutil.la
|
||||
libs_util_test_test_set_DEPENDENCIES=libs/util/libQFutil.la
|
||||
|
||||
libs_util_test_test_simd_SOURCES=libs/util/test/test-simd.c
|
||||
libs_util_test_test_simd_LDADD=libs/util/libQFutil.la
|
||||
libs_util_test_test_simd_DEPENDENCIES=libs/util/libQFutil.la
|
||||
|
||||
libs_util_test_test_txtbuffer_SOURCES=libs/util/test/test-txtbuffer.c
|
||||
libs_util_test_test_txtbuffer_LDADD=libs/util/libQFutil.la
|
||||
libs_util_test_test_txtbuffer_DEPENDENCIES=libs/util/libQFutil.la
|
||||
|
|
289
libs/util/test/test-simd.c
Normal file
289
libs/util/test/test-simd.c
Normal file
|
@ -0,0 +1,289 @@
|
|||
#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "QF/simd/vec4d.h"
|
||||
#include "QF/simd/vec4f.h"
|
||||
|
||||
#define right { 1, 0, 0 }
|
||||
#define forward { 0, 1, 0 }
|
||||
#define up { 0, 0, 1 }
|
||||
#define one { 1, 1, 1, 1 }
|
||||
#define half { 0.5, 0.5, 0.5, 0.5 }
|
||||
#define zero { 0, 0, 0, 0 }
|
||||
#define qident { 0, 0, 0, 1 }
|
||||
#define qtest { 0.64, 0.48, 0, 0.6 }
|
||||
|
||||
#define nright { -1, 0, 0 }
|
||||
#define nforward { 0, -1, 0 }
|
||||
#define nup { 0, 0, -1 }
|
||||
#define none { -1, -1, -1, -1 }
|
||||
#define nqident { 0, 0, 0, -1 }
|
||||
|
||||
#define s05 0.70710678118654757
|
||||
|
||||
typedef struct {
|
||||
vec4d_t (*op) (vec4d_t a, vec4d_t b);
|
||||
vec4d_t a;
|
||||
vec4d_t b;
|
||||
vec4d_t expect;
|
||||
vec4d_t ulp_errors;
|
||||
} vec4d_test_t;
|
||||
|
||||
typedef struct {
|
||||
vec4f_t (*op) (vec4f_t a, vec4f_t b);
|
||||
vec4f_t a;
|
||||
vec4f_t b;
|
||||
vec4f_t expect;
|
||||
vec4f_t ulp_errors;
|
||||
} vec4f_test_t;
|
||||
|
||||
static vec4d_test_t vec4d_tests[] = {
|
||||
// 3D dot products
|
||||
{ dotd, right, right, one },
|
||||
{ dotd, right, forward, zero },
|
||||
{ dotd, right, up, zero },
|
||||
{ dotd, forward, right, zero },
|
||||
{ dotd, forward, forward, one },
|
||||
{ dotd, forward, up, zero },
|
||||
{ dotd, up, right, zero },
|
||||
{ dotd, up, forward, zero },
|
||||
{ dotd, up, up, one },
|
||||
|
||||
// one is 4D, so its self dot product is 4
|
||||
{ dotd, one, one, { 4, 4, 4, 4} },
|
||||
{ dotd, one, none, {-4, -4, -4, -4} },
|
||||
|
||||
// 3D cross products
|
||||
{ crossd, right, right, zero },
|
||||
{ crossd, right, forward, up },
|
||||
{ crossd, right, up, nforward },
|
||||
{ crossd, forward, right, nup },
|
||||
{ crossd, forward, forward, zero },
|
||||
{ crossd, forward, up, right },
|
||||
{ crossd, up, right, forward },
|
||||
{ crossd, up, forward, nright },
|
||||
{ crossd, up, up, zero },
|
||||
// double whammy tests: cross product with an angled vector and
|
||||
// ensuring that a 4d vector (non-zero w component) does not affect
|
||||
// the result, including the result's w component remaining zero.
|
||||
{ crossd, right, one, { 0, -1, 1} },
|
||||
{ crossd, forward, one, { 1, 0, -1} },
|
||||
{ crossd, up, one, {-1, 1, 0} },
|
||||
{ crossd, one, right, { 0, 1, -1} },
|
||||
{ crossd, one, forward, {-1, 0, 1} },
|
||||
{ crossd, one, up, { 1, -1, 0} },
|
||||
// This one fails when optimizing with -mfma (which is why fma is not
|
||||
// used): ulp errors in z and w
|
||||
{ crossd, qtest, qtest, {0, 0, 0, 0} },
|
||||
|
||||
{ qmuld, qident, qident, qident },
|
||||
{ qmuld, qident, right, right },
|
||||
{ qmuld, qident, forward, forward },
|
||||
{ qmuld, qident, up, up },
|
||||
{ qmuld, right, qident, right },
|
||||
{ qmuld, forward, qident, forward },
|
||||
{ qmuld, up, qident, up },
|
||||
{ qmuld, right, right, nqident },
|
||||
{ qmuld, right, forward, up },
|
||||
{ qmuld, right, up, nforward },
|
||||
{ qmuld, forward, right, nup },
|
||||
{ qmuld, forward, forward, nqident },
|
||||
{ qmuld, forward, up, right },
|
||||
{ qmuld, up, right, forward },
|
||||
{ qmuld, up, forward, nright },
|
||||
{ qmuld, up, up, nqident },
|
||||
{ qmuld, one, one, { 2, 2, 2, -2 } },
|
||||
{ qmuld, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } },
|
||||
// This one fails when optimizing with -mfma (which is why fma is not
|
||||
// used): ulp error in z
|
||||
{ qmuld, qtest, qtest, {0.768, 0.576, 0, -0.28} },
|
||||
|
||||
// The one vector is not unit (magnitude 2), so using it as a rotation
|
||||
// quaternion results in scaling by 4. However, it still has the effect
|
||||
// of rotating 120 degrees around the axis equidistant from the three
|
||||
// orthogonal axes such that x->y->z->x
|
||||
{ qvmuld, one, right, { 0, 4, 0, 0 } },
|
||||
{ qvmuld, one, forward, { 0, 0, 4, 0 } },
|
||||
{ qvmuld, one, up, { 4, 0, 0, 0 } },
|
||||
{ qvmuld, one, {1,1,1,0}, { 4, 4, 4, 0 } },
|
||||
{ qvmuld, one, one, { 4, 4, 4, -2 } },
|
||||
// The half vector is unit.
|
||||
{ qvmuld, half, right, forward },
|
||||
{ qvmuld, half, forward, up },
|
||||
{ qvmuld, half, up, right },
|
||||
{ qvmuld, half, {1,1,1,0}, { 1, 1, 1, 0 } },
|
||||
// one is a 4D vector and qvmuld is meant for 3D vectors. However, it
|
||||
// seems that the vector's w has no effect on the 3d portion of the
|
||||
// result, but the result's w is cosine of the full rotation angle
|
||||
// scaled by quaternion magnitude and vector w
|
||||
{ qvmuld, half, one, { 1, 1, 1, -0.5 } },
|
||||
{ qvmuld, half, {2,2,2,2}, { 2, 2, 2, -1 } },
|
||||
{ qvmuld, qtest, right, {0.5392, 0.6144, -0.576, 0} },
|
||||
{ qvmuld, qtest, forward, {0.6144, 0.1808, 0.768, 0},
|
||||
{0, -2.7e-17, 0, 0} },
|
||||
{ qvmuld, qtest, up, {0.576, -0.768, -0.28, 0} },
|
||||
|
||||
{ qrotd, right, right, qident },
|
||||
{ qrotd, right, forward, { 0, 0, s05, s05 },
|
||||
{0, 0, -1.1e-16, 0} },
|
||||
{ qrotd, right, up, { 0, -s05, 0, s05 },
|
||||
{0, 1.1e-16, 0, 0} },
|
||||
{ qrotd, forward, right, { 0, 0, -s05, s05 },
|
||||
{0, 0, 1.1e-16, 0} },
|
||||
{ qrotd, forward, forward, qident },
|
||||
{ qrotd, forward, up, { s05, 0, 0, s05 },
|
||||
{-1.1e-16, 0, 0, 0} },
|
||||
{ qrotd, up, right, { 0, s05, 0, s05 },
|
||||
{0, -1.1e-16, 0, 0} },
|
||||
{ qrotd, up, forward, { -s05, 0, 0, s05 },
|
||||
{ 1.1e-16, 0, 0, 0} },
|
||||
{ qrotd, up, up, qident },
|
||||
};
|
||||
#define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0])))
|
||||
|
||||
static vec4f_test_t vec4f_tests[] = {
|
||||
// 3D dot products
|
||||
{ dotf, right, right, one },
|
||||
{ dotf, right, forward, zero },
|
||||
{ dotf, right, up, zero },
|
||||
{ dotf, forward, right, zero },
|
||||
{ dotf, forward, forward, one },
|
||||
{ dotf, forward, up, zero },
|
||||
{ dotf, up, right, zero },
|
||||
{ dotf, up, forward, zero },
|
||||
{ dotf, up, up, one },
|
||||
|
||||
// one is 4D, so its self dot product is 4
|
||||
{ dotf, one, one, { 4, 4, 4, 4} },
|
||||
{ dotf, one, none, {-4, -4, -4, -4} },
|
||||
|
||||
// 3D cross products
|
||||
{ crossf, right, right, zero },
|
||||
{ crossf, right, forward, up },
|
||||
{ crossf, right, up, nforward },
|
||||
{ crossf, forward, right, nup },
|
||||
{ crossf, forward, forward, zero },
|
||||
{ crossf, forward, up, right },
|
||||
{ crossf, up, right, forward },
|
||||
{ crossf, up, forward, nright },
|
||||
{ crossf, up, up, zero },
|
||||
// double whammy tests: cross product with an angled vector and
|
||||
// ensuring that a 4d vector (non-zero w component) does not affect
|
||||
// the result, including the result's w component remaining zero.
|
||||
{ crossf, right, one, { 0, -1, 1} },
|
||||
{ crossf, forward, one, { 1, 0, -1} },
|
||||
{ crossf, up, one, {-1, 1, 0} },
|
||||
{ crossf, one, right, { 0, 1, -1} },
|
||||
{ crossf, one, forward, {-1, 0, 1} },
|
||||
{ crossf, one, up, { 1, -1, 0} },
|
||||
{ crossf, qtest, qtest, {0, 0, 0, 0} },
|
||||
|
||||
{ qmulf, qident, qident, qident },
|
||||
{ qmulf, qident, right, right },
|
||||
{ qmulf, qident, forward, forward },
|
||||
{ qmulf, qident, up, up },
|
||||
{ qmulf, right, qident, right },
|
||||
{ qmulf, forward, qident, forward },
|
||||
{ qmulf, up, qident, up },
|
||||
{ qmulf, right, right, nqident },
|
||||
{ qmulf, right, forward, up },
|
||||
{ qmulf, right, up, nforward },
|
||||
{ qmulf, forward, right, nup },
|
||||
{ qmulf, forward, forward, nqident },
|
||||
{ qmulf, forward, up, right },
|
||||
{ qmulf, up, right, forward },
|
||||
{ qmulf, up, forward, nright },
|
||||
{ qmulf, up, up, nqident },
|
||||
{ qmulf, one, one, { 2, 2, 2, -2 } },
|
||||
{ qmulf, one, { 2, 2, 2, -2 }, { 0, 0, 0, -8 } },
|
||||
{ qmulf, qtest, qtest, {0.768, 0.576, 0, -0.28},
|
||||
{0, 6e-8, 0, 3e-8} },
|
||||
|
||||
// The one vector is not unit (magnitude 2), so using it as a rotation
|
||||
// quaternion results in scaling by 4. However, it still has the effect
|
||||
// of rotating 120 degrees around the axis equidistant from the three
|
||||
// orthogonal axes such that x->y->z->x
|
||||
{ qvmulf, one, right, { 0, 4, 0, 0 } },
|
||||
{ qvmulf, one, forward, { 0, 0, 4, 0 } },
|
||||
{ qvmulf, one, up, { 4, 0, 0, 0 } },
|
||||
{ qvmulf, one, {1,1,1,0}, { 4, 4, 4, 0 } },
|
||||
{ qvmulf, one, one, { 4, 4, 4, -2 } },
|
||||
{ qvmulf, qtest, right, {0.5392, 0.6144, -0.576, 0},
|
||||
{0, -5.9e-08, -6e-8, 0} },
|
||||
{ qvmulf, qtest, forward, {0.6144, 0.1808, 0.768, 0},
|
||||
{-5.9e-08, 1.5e-08, 0, 0} },
|
||||
{ qvmulf, qtest, up, {0.576, -0.768, -0.28, 0},
|
||||
{6e-8, 0, 3e-8, 0} },
|
||||
|
||||
{ qrotf, right, right, qident },
|
||||
{ qrotf, right, forward, { 0, 0, s05, s05 } },
|
||||
{ qrotf, right, up, { 0, -s05, 0, s05 } },
|
||||
{ qrotf, forward, right, { 0, 0, -s05, s05 } },
|
||||
{ qrotf, forward, forward, qident },
|
||||
{ qrotf, forward, up, { s05, 0, 0, s05 } },
|
||||
{ qrotf, up, right, { 0, s05, 0, s05 } },
|
||||
{ qrotf, up, forward, { -s05, 0, 0, s05 } },
|
||||
{ qrotf, up, up, qident },
|
||||
};
|
||||
#define num_vec4f_tests (sizeof (vec4f_tests) / (sizeof (vec4f_tests[0])))
|
||||
|
||||
static int
|
||||
run_vec4d_tests (void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
for (size_t i = 0; i < num_vec4d_tests; i++) {
|
||||
__auto_type test = &vec4d_tests[i];
|
||||
vec4d_t result = test->op (test->a, test->b);
|
||||
vec4d_t expect = test->expect + test->ulp_errors;
|
||||
vec4l_t res = result != expect;
|
||||
if (res[0] || res[1] || res[2] || res[3]) {
|
||||
ret |= 1;
|
||||
printf ("\nrun_vec4d_tests\n");
|
||||
printf ("a: " VEC4D_FMT "\n", VEC4_EXP(test->a));
|
||||
printf ("b: " VEC4D_FMT "\n", VEC4_EXP(test->b));
|
||||
printf ("r: " VEC4D_FMT "\n", VEC4_EXP(result));
|
||||
printf ("t: " VEC4L_FMT "\n", VEC4_EXP(res));
|
||||
printf ("E: " VEC4D_FMT "\n", VEC4_EXP(expect));
|
||||
printf ("e: " VEC4D_FMT "\n", VEC4_EXP(test->expect));
|
||||
printf ("u: " VEC4D_FMT "\n", VEC4_EXP(test->ulp_errors));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
run_vec4f_tests (void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
for (size_t i = 0; i < num_vec4f_tests; i++) {
|
||||
__auto_type test = &vec4f_tests[i];
|
||||
vec4f_t result = test->op (test->a, test->b);
|
||||
vec4f_t expect = test->expect + test->ulp_errors;
|
||||
vec4i_t res = result != expect;
|
||||
if (res[0] || res[1] || res[2] || res[3]) {
|
||||
ret |= 1;
|
||||
printf ("\nrun_vec4f_tests\n");
|
||||
printf ("a: " VEC4F_FMT "\n", VEC4_EXP(test->a));
|
||||
printf ("b: " VEC4F_FMT "\n", VEC4_EXP(test->b));
|
||||
printf ("r: " VEC4F_FMT "\n", VEC4_EXP(result));
|
||||
printf ("t: " VEC4I_FMT "\n", VEC4_EXP(res));
|
||||
printf ("E: " VEC4F_FMT "\n", VEC4_EXP(expect));
|
||||
printf ("e: " VEC4F_FMT "\n", VEC4_EXP(test->expect));
|
||||
printf ("u: " VEC4F_FMT "\n", VEC4_EXP(test->ulp_errors));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int ret = 0;
|
||||
ret |= run_vec4d_tests ();
|
||||
ret |= run_vec4f_tests ();
|
||||
return ret;
|
||||
}
|
Loading…
Reference in a new issue