From 7bf90e5f4a8659da87af15c50b2580988ad36405 Mon Sep 17 00:00:00 2001 From: Bill Currie Date: Fri, 1 Jan 2021 19:49:20 +0900 Subject: [PATCH] [util] Sort out implementation issues for simd --- include/QF/simd/vec4d.h | 192 ++++++++++++++++++++++++++----------- include/QF/simd/vec4f.h | 192 +++++++++++++++++++++++++------------ libs/util/Makemodule.am | 1 + libs/util/simd.c | 37 +++++++ libs/util/test/test-simd.c | 4 + 5 files changed, 308 insertions(+), 118 deletions(-) create mode 100644 libs/util/simd.c diff --git a/include/QF/simd/vec4d.h b/include/QF/simd/vec4d.h index 11e173eff..6bd984357 100644 --- a/include/QF/simd/vec4d.h +++ b/include/QF/simd/vec4d.h @@ -32,13 +32,107 @@ #include "QF/simd/types.h" +GNU89INLINE inline vec4d_t vsqrtd (vec4d_t v) __attribute__((const)); +GNU89INLINE inline vec4d_t vceild (vec4d_t v) __attribute__((const)); +GNU89INLINE inline vec4d_t vfloord (vec4d_t v) __attribute__((const)); +GNU89INLINE inline vec4d_t vtruncd (vec4d_t v) __attribute__((const)); /** 3D vector cross product. * * The w (4th) component can be any value on input, and is guaranteed to be 0 * in the result. The result is not affected in any way by either vector's w * componemnt */ -vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const)); +GNU89INLINE inline vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const)); +/** 4D vector dot product. + * + * The w component *IS* significant, but if it is 0 in either vector, then + * the result will be as for a 3D dot product. + * + * Note that the dot product is in all 4 of the return value's elements. This + * helps optimize vector math as the scalar is already pre-spread. If just the + * scalar is required, use result[0]. + */ +GNU89INLINE inline vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const)); +/** Quaternion product. + * + * The vector is interpreted as a quaternion instead of a regular 4D vector. + * The quaternion may be of any magnitude, so this is more generally useful. + * than if the quaternion was required to be unit length. + */ +GNU89INLINE inline vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const)); +/** Optimized quaterion-vector multiplication for vector rotation. + * + * If the vector's w component is not zero, then the result's w component + * is the cosine of the full rotation angle scaled by the vector's w component. + * The quaternion is assumed to be unit. If the quaternion is not unit, the + * vector will be scaled by the square of the quaternion's magnitude. + */ +GNU89INLINE inline vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const)); +/** Create the quaternion representing the shortest rotation from a to b. + * + * Both a and b are assumed to be 3D vectors (w components 0), but a resonable + * (but incorrect) result will still be produced if either a or b is a 4D + * vector. The rotation axis will be the same as if both vectors were 3D, but + * the magnitude of the rotation will be different. + */ +GNU89INLINE inline vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const)); +/** Return the conjugate of the quaternion. + * + * That is, [-x, -y, -z, w]. + */ +GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const)); +GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure, access(read_only, 1))); +GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4) __attribute__((access (write_only, 1))); + +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4d_t +vsqrtd (vec4d_t v) +{ + return _mm256_sqrt_pd (v); +} + +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4d_t +vceild (vec4d_t v) +{ + return _mm256_ceil_pd (v); +} + +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4d_t +vfloord (vec4d_t v) +{ + return _mm256_floor_pd (v); +} + +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4d_t +vtruncd (vec4d_t v) +{ + return _mm256_round_pd (v, _MM_FROUND_TRUNC); +} + +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4d_t crossd (vec4d_t a, vec4d_t b) { @@ -49,16 +143,11 @@ crossd (vec4d_t a, vec4d_t b) return __builtin_shuffle(c, A); } -/** 4D vector dot product. - * - * The w component *IS* significant, but if it is 0 in either vector, then - * the result will be as for a 3D dot product. - * - * Note that the dot product is in all 4 of the return value's elements. This - * helps optimize vector math as the scalar is already pre-spread. If just the - * scalar is required, use result[0]. - */ -vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const)); +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4d_t dotd (vec4d_t a, vec4d_t b) { @@ -69,13 +158,11 @@ dotd (vec4d_t a, vec4d_t b) return c; } -/** Quaternion product. - * - * The vector is interpreted as a quaternion instead of a regular 4D vector. - * The quaternion may be of any magnitude, so this is more generally useful. - * than if the quaternion was required to be unit length. - */ -vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const)); +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4d_t qmuld (vec4d_t a, vec4d_t b) { @@ -89,14 +176,11 @@ qmuld (vec4d_t a, vec4d_t b) return c - d; } -/** Optimized quaterion-vector multiplication for vector rotation. - * - * If the vector's w component is not zero, then the result's w component - * is the cosine of the full rotation angle scaled by the vector's w component. - * The quaternion is assumed to be unit. If the quaternion is not unit, the - * vector will be scaled by the square of the quaternion's magnitude. - */ -vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const)); +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4d_t qvmuld (vec4d_t q, vec4d_t v) { @@ -112,19 +196,16 @@ qvmuld (vec4d_t q, vec4d_t v) return (s * s - qq) * v + 2 * (qv * q + s * c); } -/** Create the quaternion representing the shortest rotation from a to b. - * - * Both a and b are assumed to be 3D vectors (w components 0), but a resonable - * (but incorrect) result will still be produced if either a or b is a 4D - * vector. The rotation axis will be the same as if both vectors were 3D, but - * the magnitude of the rotation will be different. - */ -vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const)); +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4d_t qrotd (vec4d_t a, vec4d_t b) { - vec4d_t ma = _mm256_sqrt_pd (dotd (a, a)); - vec4d_t mb = _mm256_sqrt_pd (dotd (b, b)); + vec4d_t ma = vsqrtd (dotd (a, a)); + vec4d_t mb = vsqrtd (dotd (b, b)); vec4d_t den = 2 * ma * mb; vec4d_t t = mb * a + ma * b; vec4d_t mba_mab = _mm256_sqrt_pd (dotd (t, t)); @@ -133,7 +214,11 @@ qrotd (vec4d_t a, vec4d_t b) return q; } -vec4d_t qconjd (vec4d_t q) __attribute__((const)); +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4d_t qconjd (vec4d_t q) { @@ -141,7 +226,11 @@ qconjd (vec4d_t q) return _mm256_xor_pd (q, (__m256d) neg); } -vec4d_t loadvec3d (const double v3[]) __attribute__((pure, access(read_only, 1))); +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4d_t loadvec3d (const double v3[]) { @@ -153,30 +242,17 @@ loadvec3d (const double v3[]) return v4; } -void storevec3d (double v3[3], vec4d_t v4) __attribute__((access (write_only, 1))); -void storevec3d (double v3[3], vec4d_t v4) +#ifndef IMPLEMENT_VEC4D_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +void +storevec3d (double v3[3], vec4d_t v4) { v3[0] = v4[0]; v3[1] = v4[1]; v3[2] = v4[2]; } -vec4d_t vceild (vec4d_t v) __attribute__((const)); -vec4d_t vceild (vec4d_t v) -{ - return _mm256_ceil_pd (v); -} - -vec4d_t vfloord (vec4d_t v) __attribute__((const)); -vec4d_t vfloord (vec4d_t v) -{ - return _mm256_floor_pd (v); -} - -vec4d_t vtruncd (vec4d_t v) __attribute__((const)); -vec4d_t vtruncd (vec4d_t v) -{ - return _mm256_round_pd (v, _MM_FROUND_TRUNC); -} - #endif//__QF_simd_vec4d_h diff --git a/include/QF/simd/vec4f.h b/include/QF/simd/vec4f.h index 997127a94..5c0b4cc3d 100644 --- a/include/QF/simd/vec4f.h +++ b/include/QF/simd/vec4f.h @@ -32,21 +32,17 @@ #include "QF/simd/types.h" +GNU89INLINE inline vec4f_t vsqrtf (vec4f_t v) __attribute__((const)); +GNU89INLINE inline vec4f_t vceilf (vec4f_t v) __attribute__((const)); +GNU89INLINE inline vec4f_t vfloorf (vec4f_t v) __attribute__((const)); +GNU89INLINE inline vec4f_t vtruncf (vec4f_t v) __attribute__((const)); /** 3D vector cross product. * * The w (4th) component can be any value on input, and is guaranteed to be 0 * in the result. The result is not affected in any way by either vector's w * componemnt */ -vec4f_t crossf (vec4f_t a, vec4f_t b) __attribute__((const)); -vec4f_t -crossf (vec4f_t a, vec4f_t b) -{ - static const vec4i_t A = {1, 2, 0, 3}; - vec4f_t c = a * __builtin_shuffle (b, A) - __builtin_shuffle (a, A) * b; - return __builtin_shuffle(c, A); -} - +GNU89INLINE inline vec4f_t crossf (vec4f_t a, vec4f_t b) __attribute__((const)); /** 4D vector dot product. * * The w component *IS* significant, but if it is 0 in either vector, then @@ -56,7 +52,99 @@ crossf (vec4f_t a, vec4f_t b) * helps optimize vector math as the scalar is already pre-spread. If just the * scalar is required, use result[0]. */ -vec4f_t dotf (vec4f_t a, vec4f_t b) __attribute__((const)); +GNU89INLINE inline vec4f_t dotf (vec4f_t a, vec4f_t b) __attribute__((const)); +/** Quaternion product. + * + * The vector is interpreted as a quaternion instead of a regular 4D vector. + * The quaternion may be of any magnitude, so this is more generally useful. + * than if the quaternion was required to be unit length. + */ +GNU89INLINE inline vec4f_t qmulf (vec4f_t a, vec4f_t b) __attribute__((const)); +/** Optimized quaterion-vector multiplication for vector rotation. + * + * If the vector's w component is not zero, then the result's w component + * is the cosine of the full rotation angle scaled by the vector's w component. + * The quaternion is assumed to be unit. + */ +GNU89INLINE inline vec4f_t qvmulf (vec4f_t q, vec4f_t v) __attribute__((const)); +/** Create the quaternion representing the shortest rotation from a to b. + * + * Both a and b are assumed to be 3D vectors (w components 0), but a resonable + * (but incorrect) result will still be produced if either a or b is a 4D + * vector. The rotation axis will be the same as if both vectors were 3D, but + * the magnitude of the rotation will be different. + */ +GNU89INLINE inline vec4f_t qrotf (vec4f_t a, vec4f_t b) __attribute__((const)); +/** Return the conjugate of the quaternion. + * + * That is, [-x, -y, -z, w]. + */ +GNU89INLINE inline vec4f_t qconjf (vec4f_t q) __attribute__((const)); +GNU89INLINE inline vec4f_t loadvec3f (const float v3[3]) __attribute__((pure, access(read_only, 1))); +GNU89INLINE inline void storevec3f (float v3[3], vec4f_t v4) __attribute__((access (write_only, 1))); + +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4f_t +vsqrtf (vec4f_t v) +{ + return _mm_sqrt_ps (v); +} + +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4f_t +vceilf (vec4f_t v) +{ + return _mm_ceil_ps (v); +} + +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4f_t +vfloorf (vec4f_t v) +{ + return _mm_floor_ps (v); +} + +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4f_t +vtruncf (vec4f_t v) +{ + return _mm_round_ps (v, _MM_FROUND_TRUNC); +} + +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +vec4f_t +crossf (vec4f_t a, vec4f_t b) +{ + static const vec4i_t A = {1, 2, 0, 3}; + vec4f_t c = a * __builtin_shuffle (b, A) - __builtin_shuffle (a, A) * b; + return __builtin_shuffle(c, A); +} + +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4f_t dotf (vec4f_t a, vec4f_t b) { @@ -66,13 +154,11 @@ dotf (vec4f_t a, vec4f_t b) return c; } -/** Quaternion product. - * - * The vector is interpreted as a quaternion instead of a regular 4D vector. - * The quaternion may be of any magnitude, so this is more generally useful. - * than if the quaternion was required to be unit length. - */ -vec4f_t qmulf (vec4f_t a, vec4f_t b) __attribute__((const)); +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4f_t qmulf (vec4f_t a, vec4f_t b) { @@ -85,13 +171,11 @@ qmulf (vec4f_t a, vec4f_t b) return c - d; } -/** Optimized quaterion-vector multiplication for vector rotation. - * - * If the vector's w component is not zero, then the result's w component - * is the cosine of the full rotation angle scaled by the vector's w component. - * The quaternion is assumed to be unit. - */ -vec4f_t qvmulf (vec4f_t q, vec4f_t v) __attribute__((const)); +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4f_t qvmulf (vec4f_t q, vec4f_t v) { @@ -106,19 +190,16 @@ qvmulf (vec4f_t q, vec4f_t v) return (s * s - qq) * v + 2 * (qv * q + s * c); } -/** Create the quaternion representing the shortest rotation from a to b. - * - * Both a and b are assumed to be 3D vectors (w components 0), but a resonable - * (but incorrect) result will still be produced if either a or b is a 4D - * vector. The rotation axis will be the same as if both vectors were 3D, but - * the magnitude of the rotation will be different. - */ -vec4f_t qrotf (vec4f_t a, vec4f_t b) __attribute__((const)); +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4f_t qrotf (vec4f_t a, vec4f_t b) { - vec4f_t ma = _mm_sqrt_ps (dotf (a, a)); - vec4f_t mb = _mm_sqrt_ps (dotf (b, b)); + vec4f_t ma = vsqrtf (dotf (a, a)); + vec4f_t mb = vsqrtf (dotf (b, b)); vec4f_t den = 2 * ma * mb; vec4f_t t = mb * a + ma * b; vec4f_t mba_mab = _mm_sqrt_ps (dotf (t, t)); @@ -127,11 +208,11 @@ qrotf (vec4f_t a, vec4f_t b) return q; } -/** Return the conjugate of the quaternion. - * - * That is, [-x, -y, -z, w]. - */ -vec4f_t qconjf (vec4f_t q) __attribute__((const)); +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4f_t qconjf (vec4f_t q) { @@ -139,7 +220,11 @@ qconjf (vec4f_t q) return _mm_xor_ps (q, (__m128) neg); } -vec4f_t loadvec3f (const float v3[3]) __attribute__((pure, access(read_only, 1))); +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif vec4f_t loadvec3f (const float v3[3]) { @@ -160,30 +245,17 @@ loadvec3f (const float v3[3]) return v4; } -void storevec3f (float v3[3], vec4f_t v4) __attribute__((access (write_only, 1))); -void storevec3f (float v3[3], vec4f_t v4) +#ifndef IMPLEMENT_VEC4F_Funcs +GNU89INLINE inline +#else +VISIBLE +#endif +void +storevec3f (float v3[3], vec4f_t v4) { v3[0] = v4[0]; v3[1] = v4[1]; v3[2] = v4[2]; } -vec4f_t vceilf (vec4f_t v) __attribute__((const)); -vec4f_t vceilf (vec4f_t v) -{ - return _mm_ceil_ps (v); -} - -vec4f_t vfloorf (vec4f_t v) __attribute__((const)); -vec4f_t vfloorf (vec4f_t v) -{ - return _mm_floor_ps (v); -} - -vec4f_t vtruncf (vec4f_t v) __attribute__((const)); -vec4f_t vtruncf (vec4f_t v) -{ - return _mm_round_ps (v, _MM_FROUND_TRUNC); -} - #endif//__QF_simd_vec4f_h diff --git a/libs/util/Makemodule.am b/libs/util/Makemodule.am index f554289be..d2ad47b0c 100644 --- a/libs/util/Makemodule.am +++ b/libs/util/Makemodule.am @@ -74,6 +74,7 @@ libs_util_libQFutil_la_SOURCES= \ libs/util/script.c \ libs/util/segtext.c \ libs/util/set.c \ + libs/util/simd.c \ libs/util/sizebuf.c \ libs/util/string.c \ libs/util/sys.c \ diff --git a/libs/util/simd.c b/libs/util/simd.c new file mode 100644 index 000000000..f5a0fcd35 --- /dev/null +++ b/libs/util/simd.c @@ -0,0 +1,37 @@ +/* + simd.c + + SIMD math support + + Copyright (C) 2020 Bill Currie + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + + Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA + +*/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include + +#define IMPLEMENT_VEC4F_Funcs +#define IMPLEMENT_VEC4D_Funcs + +#include "QF/simd/vec4d.h" +#include "QF/simd/vec4f.h" diff --git a/libs/util/test/test-simd.c b/libs/util/test/test-simd.c index 8fcc6935c..006e57526 100644 --- a/libs/util/test/test-simd.c +++ b/libs/util/test/test-simd.c @@ -1,3 +1,7 @@ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + #include #include #include