quakeforge/include/QF/simd/vec4d.h
Bill Currie 87f3c206f1 [simd] Remove some intrinsics uses
GCC does a nice enough job compiling the more readable form (though
admittedly, hadd is possibly more readable than what's there for
dot[fd], hadd is supposedly slower than the shuffles and adds, and qfvis
seems to support that).
2022-05-20 11:09:15 +09:00

370 lines
8.8 KiB
C

/*
QF/simd/vec4d.h
Vector functions for vec4d_t (ie, double precision)
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
*/
#ifndef __QF_simd_vec4d_h
#define __QF_simd_vec4d_h
#include <immintrin.h>
#include "QF/simd/types.h"
#include "QF/simd/vec2d.h"
GNU89INLINE inline vec4d_t vsqrt4d (vec4d_t v) __attribute__((const));
GNU89INLINE inline vec4d_t vceil4d (vec4d_t v) __attribute__((const));
GNU89INLINE inline vec4d_t vfloor4d (vec4d_t v) __attribute__((const));
GNU89INLINE inline vec4d_t vtrunc4d (vec4d_t v) __attribute__((const));
/** 3D vector cross product.
*
* The w (4th) component can be any value on input, and is guaranteed to be 0
* in the result. The result is not affected in any way by either vector's w
* componemnt
*/
GNU89INLINE inline vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const));
/** 4D vector dot product.
*
* The w component *IS* significant, but if it is 0 in either vector, then
* the result will be as for a 3D dot product.
*
* Note that the dot product is in all 4 of the return value's elements. This
* helps optimize vector math as the scalar is already pre-spread. If just the
* scalar is required, use result[0].
*/
GNU89INLINE inline vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const));
/** Quaternion product.
*
* The vector is interpreted as a quaternion instead of a regular 4D vector.
* The quaternion may be of any magnitude, so this is more generally useful.
* than if the quaternion was required to be unit length.
*/
GNU89INLINE inline vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const));
/** Optimized quaterion-vector multiplication for vector rotation.
*
* \note This is the inverse of vqmulf
*
* If the vector's w component is not zero, then the result's w component
* is the cosine of the full rotation angle scaled by the vector's w component.
* The quaternion is assumed to be unit. If the quaternion is not unit, the
* vector will be scaled by the square of the quaternion's magnitude.
*/
GNU89INLINE inline vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const));
/** Optimized vector-quaterion multiplication for vector rotation.
*
* \note This is the inverse of qvmulf
*
* If the vector's w component is not zero, then the result's w component
* is the cosine of the full rotation angle scaled by the vector's w component.
* The quaternion is assumed to be unit. If the quaternion is not unit, the
* vector will be scaled by the square of the quaternion's magnitude.
*/
GNU89INLINE inline vec4d_t vqmuld (vec4d_t v, vec4d_t q) __attribute__((const));
/** Create the quaternion representing the shortest rotation from a to b.
*
* Both a and b are assumed to be 3D vectors (w components 0), but a resonable
* (but incorrect) result will still be produced if either a or b is a 4D
* vector. The rotation axis will be the same as if both vectors were 3D, but
* the magnitude of the rotation will be different.
*/
GNU89INLINE inline vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const));
/** Return the conjugate of the quaternion.
*
* That is, [-x, -y, -z, w].
*/
GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const));
GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure));
GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4);
GNU89INLINE inline vec4l_t loadvec3l (const int64_t *v3) __attribute__((pure));
GNU89INLINE inline vec4l_t loadvec3l1 (const int64_t *v3) __attribute__((pure));
GNU89INLINE inline void storevec3l (int64_t *v3, vec4l_t v4);
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
vsqrt4d (vec4d_t v)
{
#ifndef __AVX__
vec2d_t xy = { v[0], v[1] };
vec2d_t zw = { v[2], v[3] };
xy = vsqrt2d (xy);
zw = vsqrt2d (zw);
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
#else
return _mm256_sqrt_pd (v);
#endif
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
vceil4d (vec4d_t v)
{
#ifndef __AVX__
vec2d_t xy = { v[0], v[1] };
vec2d_t zw = { v[2], v[3] };
xy = vceil2d (xy);
zw = vceil2d (zw);
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
#else
return _mm256_ceil_pd (v);
#endif
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
vfloor4d (vec4d_t v)
{
#ifndef __AVX__
vec2d_t xy = { v[0], v[1] };
vec2d_t zw = { v[2], v[3] };
xy = vfloor2d (xy);
zw = vfloor2d (zw);
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
#else
return _mm256_floor_pd (v);
#endif
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
vtrunc4d (vec4d_t v)
{
#ifndef __AVX__
vec2d_t xy = { v[0], v[1] };
vec2d_t zw = { v[2], v[3] };
xy = vtrunc2d (xy);
zw = vtrunc2d (zw);
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
#else
return _mm256_round_pd (v, _MM_FROUND_TRUNC);
#endif
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
crossd (vec4d_t a, vec4d_t b)
{
vec4d_t c = a * (vec4d_t) {b[1], b[2], b[0], b[3]}
- b * (vec4d_t) {a[1], a[2], a[0], a[3]};
return (vec4d_t) {c[1], c[2], c[0], c[3]};
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
dotd (vec4d_t a, vec4d_t b)
{
vec4d_t c = a * b;
c += (vec4d_t) { c[3], c[0], c[1], c[2] };
c += (vec4d_t) { c[2], c[3], c[0], c[1] };
return c;
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
qmuld (vec4d_t a, vec4d_t b)
{
// results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation)
// doesn't seem to adversly affect precision
vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b;
vec4d_t d = dotd (a, b);
// zero out the vector component of dot product so only the scalar remains
d = (vec4d_t) { 0, 0, 0, d[3] };
return c - d;
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
qvmuld (vec4d_t q, vec4d_t v)
// ^^^ ^^^
{
double s = q[3];
// zero the scalar of the quaternion. Results in an extra operation, but
// avoids adding precision issues.
#ifndef __AVX__
q = (vec4d_t) { q[0], q[1], q[2], 0 };
#else
vec4d_t z = {};
q = _mm256_blend_pd (q, z, 0x08);
#endif
vec4d_t c = crossd (q, v);
vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant
vec4d_t qq = dotd (q, q);
// vvv
return (s * s - qq) * v + 2 * (qv * q + s * c);
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
vqmuld (vec4d_t v, vec4d_t q)
// ^^^ ^^^
{
double s = q[3];
// zero the scalar of the quaternion. Results in an extra operation, but
// avoids adding precision issues.
#ifndef __AVX__
q = (vec4d_t) { q[0], q[1], q[2], 0 };
#else
vec4d_t z = {};
q = _mm256_blend_pd (q, z, 0x08);
#endif
vec4d_t c = crossd (q, v);
vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant
vec4d_t qq = dotd (q, q);
// vvv
return (s * s - qq) * v + 2 * (qv * q - s * c);
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
qrotd (vec4d_t a, vec4d_t b)
{
vec4d_t ma = vsqrt4d (dotd (a, a));
vec4d_t mb = vsqrt4d (dotd (b, b));
vec4d_t den = 2 * ma * mb;
vec4d_t t = mb * a + ma * b;
vec4d_t mba_mab = vsqrt4d (dotd (t, t));
vec4d_t q = crossd (a, b) / mba_mab;
q[3] = (mba_mab / den)[0];
return q;
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
qconjd (vec4d_t q)
{
return (vec4d_t) { -q[0], -q[1], -q[2], q[3] };
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4d_t
loadvec3d (const double v3[])
{
vec4d_t v4 = {};
v4[0] = v3[0];
v4[1] = v3[1];
v4[2] = v3[2];
return v4;
}
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
storevec3d (double v3[3], vec4d_t v4)
{
v3[0] = v4[0];
v3[1] = v4[1];
v3[2] = v4[2];
}
#ifndef IMPLEMENT_VEC4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4l_t
loadvec3l (const int64_t *v3)
{
vec4l_t v4 = { v3[0], v3[1], v3[2], 0 };
return v4;
}
#ifndef IMPLEMENT_VEC4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4l_t
loadvec3l1 (const int64_t *v3)
{
vec4l_t v4 = { v3[0], v3[1], v3[2], 1 };
return v4;
}
#ifndef IMPLEMENT_VEC4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
storevec3l (int64_t *v3, vec4l_t v4)
{
v3[0] = v4[0];
v3[1] = v4[1];
v3[2] = v4[2];
}
#endif//__QF_simd_vec4d_h