/* QF/simd/vec4d.h Vector functions for vec4d_t (ie, double precision) Copyright (C) 2020 Bill Currie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to: Free Software Foundation, Inc. 59 Temple Place - Suite 330 Boston, MA 02111-1307, USA */ #ifndef __QF_simd_vec4d_h #define __QF_simd_vec4d_h #include #include "QF/simd/types.h" #include "QF/simd/vec2d.h" GNU89INLINE inline vec4d_t vsqrt4d (vec4d_t v) __attribute__((const)); GNU89INLINE inline vec4d_t vceil4d (vec4d_t v) __attribute__((const)); GNU89INLINE inline vec4d_t vfloor4d (vec4d_t v) __attribute__((const)); GNU89INLINE inline vec4d_t vtrunc4d (vec4d_t v) __attribute__((const)); /** 3D vector cross product. * * The w (4th) component can be any value on input, and is guaranteed to be 0 * in the result. The result is not affected in any way by either vector's w * componemnt */ GNU89INLINE inline vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const)); /** 4D vector dot product. * * The w component *IS* significant, but if it is 0 in either vector, then * the result will be as for a 3D dot product. * * Note that the dot product is in all 4 of the return value's elements. This * helps optimize vector math as the scalar is already pre-spread. If just the * scalar is required, use result[0]. */ GNU89INLINE inline vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const)); /** Quaternion product. * * The vector is interpreted as a quaternion instead of a regular 4D vector. * The quaternion may be of any magnitude, so this is more generally useful. * than if the quaternion was required to be unit length. */ GNU89INLINE inline vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const)); /** Optimized quaterion-vector multiplication for vector rotation. * * \note This is the inverse of vqmulf * * If the vector's w component is not zero, then the result's w component * is the cosine of the full rotation angle scaled by the vector's w component. * The quaternion is assumed to be unit. If the quaternion is not unit, the * vector will be scaled by the square of the quaternion's magnitude. */ GNU89INLINE inline vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const)); /** Optimized vector-quaterion multiplication for vector rotation. * * \note This is the inverse of qvmulf * * If the vector's w component is not zero, then the result's w component * is the cosine of the full rotation angle scaled by the vector's w component. * The quaternion is assumed to be unit. If the quaternion is not unit, the * vector will be scaled by the square of the quaternion's magnitude. */ GNU89INLINE inline vec4d_t vqmuld (vec4d_t v, vec4d_t q) __attribute__((const)); /** Create the quaternion representing the shortest rotation from a to b. * * Both a and b are assumed to be 3D vectors (w components 0), but a resonable * (but incorrect) result will still be produced if either a or b is a 4D * vector. The rotation axis will be the same as if both vectors were 3D, but * the magnitude of the rotation will be different. */ GNU89INLINE inline vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const)); /** Return the conjugate of the quaternion. * * That is, [-x, -y, -z, w]. */ GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const)); GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure)); GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4); GNU89INLINE inline vec4l_t loadvec3l (const int64_t *v3) __attribute__((pure)); GNU89INLINE inline vec4l_t loadvec3l1 (const int64_t *v3) __attribute__((pure)); GNU89INLINE inline void storevec3l (int64_t *v3, vec4l_t v4); #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t vsqrt4d (vec4d_t v) { #ifndef __AVX__ vec2d_t xy = { v[0], v[1] }; vec2d_t zw = { v[2], v[3] }; xy = vsqrt2d (xy); zw = vsqrt2d (zw); return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] }; #else return _mm256_sqrt_pd (v); #endif } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t vceil4d (vec4d_t v) { #ifndef __AVX__ vec2d_t xy = { v[0], v[1] }; vec2d_t zw = { v[2], v[3] }; xy = vceil2d (xy); zw = vceil2d (zw); return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] }; #else return _mm256_ceil_pd (v); #endif } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t vfloor4d (vec4d_t v) { #ifndef __AVX__ vec2d_t xy = { v[0], v[1] }; vec2d_t zw = { v[2], v[3] }; xy = vfloor2d (xy); zw = vfloor2d (zw); return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] }; #else return _mm256_floor_pd (v); #endif } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t vtrunc4d (vec4d_t v) { #ifndef __AVX__ vec2d_t xy = { v[0], v[1] }; vec2d_t zw = { v[2], v[3] }; xy = vtrunc2d (xy); zw = vtrunc2d (zw); return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] }; #else return _mm256_round_pd (v, _MM_FROUND_TRUNC); #endif } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t crossd (vec4d_t a, vec4d_t b) { vec4d_t c = a * (vec4d_t) {b[1], b[2], b[0], b[3]} - b * (vec4d_t) {a[1], a[2], a[0], a[3]}; return (vec4d_t) {c[1], c[2], c[0], c[3]}; } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t dotd (vec4d_t a, vec4d_t b) { vec4d_t c = a * b; #ifndef __AVX__ c = (vec4d_t) { c[0] + c[1], c[0] + c[1], c[2] + c[3], c[2] + c[3] }; #else c = _mm256_hadd_pd (c, c); #endif c += (vec4d_t) {c[2], c[3], c[0], c[1]}; return c; } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t qmuld (vec4d_t a, vec4d_t b) { // results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation) // doesn't seem to adversly affect precision vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b; vec4d_t d = dotd (a, b); // zero out the vector component of dot product so only the scalar remains d = (vec4d_t) { 0, 0, 0, d[3] }; return c - d; } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t qvmuld (vec4d_t q, vec4d_t v) // ^^^ ^^^ { double s = q[3]; // zero the scalar of the quaternion. Results in an extra operation, but // avoids adding precision issues. #ifndef __AVX__ q = (vec4d_t) { q[0], q[1], q[2], 0 }; #else vec4d_t z = {}; q = _mm256_blend_pd (q, z, 0x08); #endif vec4d_t c = crossd (q, v); vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant vec4d_t qq = dotd (q, q); // vvv return (s * s - qq) * v + 2 * (qv * q + s * c); } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t vqmuld (vec4d_t v, vec4d_t q) // ^^^ ^^^ { double s = q[3]; // zero the scalar of the quaternion. Results in an extra operation, but // avoids adding precision issues. #ifndef __AVX__ q = (vec4d_t) { q[0], q[1], q[2], 0 }; #else vec4d_t z = {}; q = _mm256_blend_pd (q, z, 0x08); #endif vec4d_t c = crossd (q, v); vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant vec4d_t qq = dotd (q, q); // vvv return (s * s - qq) * v + 2 * (qv * q - s * c); } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t qrotd (vec4d_t a, vec4d_t b) { vec4d_t ma = vsqrt4d (dotd (a, a)); vec4d_t mb = vsqrt4d (dotd (b, b)); vec4d_t den = 2 * ma * mb; vec4d_t t = mb * a + ma * b; vec4d_t mba_mab = vsqrt4d (dotd (t, t)); vec4d_t q = crossd (a, b) / mba_mab; q[3] = (mba_mab / den)[0]; return q; } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t qconjd (vec4d_t q) { const uint64_t sign = UINT64_C(1) << 63; const vec4l_t neg = { sign, sign, sign, 0 }; return (vec4d_t) ((vec4l_t) q ^ neg); } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif vec4d_t loadvec3d (const double v3[]) { vec4d_t v4 = {}; v4[0] = v3[0]; v4[1] = v3[1]; v4[2] = v3[2]; return v4; } #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline #else VISIBLE #endif void storevec3d (double v3[3], vec4d_t v4) { v3[0] = v4[0]; v3[1] = v4[1]; v3[2] = v4[2]; } #ifndef IMPLEMENT_VEC4F_Funcs GNU89INLINE inline #else VISIBLE #endif vec4l_t loadvec3l (const int64_t *v3) { vec4l_t v4 = { v3[0], v3[1], v3[2], 0 }; return v4; } #ifndef IMPLEMENT_VEC4F_Funcs GNU89INLINE inline #else VISIBLE #endif vec4l_t loadvec3l1 (const int64_t *v3) { vec4l_t v4 = { v3[0], v3[1], v3[2], 1 }; return v4; } #ifndef IMPLEMENT_VEC4F_Funcs GNU89INLINE inline #else VISIBLE #endif void storevec3l (int64_t *v3, vec4l_t v4) { v3[0] = v4[0]; v3[1] = v4[1]; v3[2] = v4[2]; } #endif//__QF_simd_vec4d_h