mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2025-01-21 00:11:02 +00:00
23613ca985
In some cases, gcc-11 does a good enough job translating normal looking C expressions so use just those, but other times need to dig around for an appropriate intrinsic. Also, now need to disable psapi warnings when compiling for anything less than avx.
379 lines
9 KiB
C
379 lines
9 KiB
C
/*
|
|
QF/simd/vec4d.h
|
|
|
|
Vector functions for vec4d_t (ie, double precision)
|
|
|
|
Copyright (C) 2020 Bill Currie <bill@taniwha.org>
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to:
|
|
|
|
Free Software Foundation, Inc.
|
|
59 Temple Place - Suite 330
|
|
Boston, MA 02111-1307, USA
|
|
|
|
*/
|
|
|
|
#ifndef __QF_simd_vec4d_h
|
|
#define __QF_simd_vec4d_h
|
|
|
|
#include <immintrin.h>
|
|
|
|
#include "QF/simd/types.h"
|
|
#include "QF/simd/vec2d.h"
|
|
|
|
GNU89INLINE inline vec4d_t vsqrt4d (vec4d_t v) __attribute__((const));
|
|
GNU89INLINE inline vec4d_t vceil4d (vec4d_t v) __attribute__((const));
|
|
GNU89INLINE inline vec4d_t vfloor4d (vec4d_t v) __attribute__((const));
|
|
GNU89INLINE inline vec4d_t vtrunc4d (vec4d_t v) __attribute__((const));
|
|
/** 3D vector cross product.
|
|
*
|
|
* The w (4th) component can be any value on input, and is guaranteed to be 0
|
|
* in the result. The result is not affected in any way by either vector's w
|
|
* componemnt
|
|
*/
|
|
GNU89INLINE inline vec4d_t crossd (vec4d_t a, vec4d_t b) __attribute__((const));
|
|
/** 4D vector dot product.
|
|
*
|
|
* The w component *IS* significant, but if it is 0 in either vector, then
|
|
* the result will be as for a 3D dot product.
|
|
*
|
|
* Note that the dot product is in all 4 of the return value's elements. This
|
|
* helps optimize vector math as the scalar is already pre-spread. If just the
|
|
* scalar is required, use result[0].
|
|
*/
|
|
GNU89INLINE inline vec4d_t dotd (vec4d_t a, vec4d_t b) __attribute__((const));
|
|
/** Quaternion product.
|
|
*
|
|
* The vector is interpreted as a quaternion instead of a regular 4D vector.
|
|
* The quaternion may be of any magnitude, so this is more generally useful.
|
|
* than if the quaternion was required to be unit length.
|
|
*/
|
|
GNU89INLINE inline vec4d_t qmuld (vec4d_t a, vec4d_t b) __attribute__((const));
|
|
/** Optimized quaterion-vector multiplication for vector rotation.
|
|
*
|
|
* \note This is the inverse of vqmulf
|
|
*
|
|
* If the vector's w component is not zero, then the result's w component
|
|
* is the cosine of the full rotation angle scaled by the vector's w component.
|
|
* The quaternion is assumed to be unit. If the quaternion is not unit, the
|
|
* vector will be scaled by the square of the quaternion's magnitude.
|
|
*/
|
|
GNU89INLINE inline vec4d_t qvmuld (vec4d_t q, vec4d_t v) __attribute__((const));
|
|
/** Optimized vector-quaterion multiplication for vector rotation.
|
|
*
|
|
* \note This is the inverse of qvmulf
|
|
*
|
|
* If the vector's w component is not zero, then the result's w component
|
|
* is the cosine of the full rotation angle scaled by the vector's w component.
|
|
* The quaternion is assumed to be unit. If the quaternion is not unit, the
|
|
* vector will be scaled by the square of the quaternion's magnitude.
|
|
*/
|
|
GNU89INLINE inline vec4d_t vqmuld (vec4d_t v, vec4d_t q) __attribute__((const));
|
|
/** Create the quaternion representing the shortest rotation from a to b.
|
|
*
|
|
* Both a and b are assumed to be 3D vectors (w components 0), but a resonable
|
|
* (but incorrect) result will still be produced if either a or b is a 4D
|
|
* vector. The rotation axis will be the same as if both vectors were 3D, but
|
|
* the magnitude of the rotation will be different.
|
|
*/
|
|
GNU89INLINE inline vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const));
|
|
/** Return the conjugate of the quaternion.
|
|
*
|
|
* That is, [-x, -y, -z, w].
|
|
*/
|
|
GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const));
|
|
GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure));
|
|
GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4);
|
|
GNU89INLINE inline vec4l_t loadvec3l (const int64_t *v3) __attribute__((pure));
|
|
GNU89INLINE inline vec4l_t loadvec3l1 (const int64_t *v3) __attribute__((pure));
|
|
GNU89INLINE inline void storevec3l (int64_t *v3, vec4l_t v4);
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
vsqrt4d (vec4d_t v)
|
|
{
|
|
#ifndef __AVX__
|
|
vec2d_t xy = { v[0], v[1] };
|
|
vec2d_t zw = { v[2], v[3] };
|
|
xy = vsqrt2d (xy);
|
|
zw = vsqrt2d (zw);
|
|
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
|
|
#else
|
|
return _mm256_sqrt_pd (v);
|
|
#endif
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
vceil4d (vec4d_t v)
|
|
{
|
|
#ifndef __AVX__
|
|
vec2d_t xy = { v[0], v[1] };
|
|
vec2d_t zw = { v[2], v[3] };
|
|
xy = vceil2d (xy);
|
|
zw = vceil2d (zw);
|
|
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
|
|
#else
|
|
return _mm256_ceil_pd (v);
|
|
#endif
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
vfloor4d (vec4d_t v)
|
|
{
|
|
#ifndef __AVX__
|
|
vec2d_t xy = { v[0], v[1] };
|
|
vec2d_t zw = { v[2], v[3] };
|
|
xy = vfloor2d (xy);
|
|
zw = vfloor2d (zw);
|
|
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
|
|
#else
|
|
return _mm256_floor_pd (v);
|
|
#endif
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
vtrunc4d (vec4d_t v)
|
|
{
|
|
#ifndef __AVX__
|
|
vec2d_t xy = { v[0], v[1] };
|
|
vec2d_t zw = { v[2], v[3] };
|
|
xy = vtrunc2d (xy);
|
|
zw = vtrunc2d (zw);
|
|
return (vec4d_t) { xy[0], xy[1], zw[0], zw[1] };
|
|
#else
|
|
return _mm256_round_pd (v, _MM_FROUND_TRUNC);
|
|
#endif
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
crossd (vec4d_t a, vec4d_t b)
|
|
{
|
|
static const vec4l_t A = {1, 2, 0, 3};
|
|
vec4d_t c = a * __builtin_shuffle (b, A);
|
|
vec4d_t d = __builtin_shuffle (a, A) * b;
|
|
c = c - d;
|
|
return __builtin_shuffle(c, A);
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
dotd (vec4d_t a, vec4d_t b)
|
|
{
|
|
vec4d_t c = a * b;
|
|
#ifndef __AVX__
|
|
c = (vec4d_t) { c[0] + c[1], c[0] + c[1], c[2] + c[3], c[2] + c[3] };
|
|
#else
|
|
c = _mm256_hadd_pd (c, c);
|
|
#endif
|
|
static const vec4l_t A = {2, 3, 0, 1};
|
|
c += __builtin_shuffle(c, A);
|
|
return c;
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
qmuld (vec4d_t a, vec4d_t b)
|
|
{
|
|
// results in [2*as*bs, as*b + bs*a + a x b] ([scalar, vector] notation)
|
|
// doesn't seem to adversly affect precision
|
|
vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b;
|
|
vec4d_t d = dotd (a, b);
|
|
// zero out the vector component of dot product so only the scalar remains
|
|
d = (vec4d_t) { 0, 0, 0, d[3] };
|
|
return c - d;
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
qvmuld (vec4d_t q, vec4d_t v)
|
|
// ^^^ ^^^
|
|
{
|
|
double s = q[3];
|
|
// zero the scalar of the quaternion. Results in an extra operation, but
|
|
// avoids adding precision issues.
|
|
#ifndef __AVX__
|
|
q = (vec4d_t) { q[0], q[1], q[2], 0 };
|
|
#else
|
|
vec4d_t z = {};
|
|
q = _mm256_blend_pd (q, z, 0x08);
|
|
#endif
|
|
vec4d_t c = crossd (q, v);
|
|
vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant
|
|
vec4d_t qq = dotd (q, q);
|
|
|
|
// vvv
|
|
return (s * s - qq) * v + 2 * (qv * q + s * c);
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
vqmuld (vec4d_t v, vec4d_t q)
|
|
// ^^^ ^^^
|
|
{
|
|
double s = q[3];
|
|
// zero the scalar of the quaternion. Results in an extra operation, but
|
|
// avoids adding precision issues.
|
|
#ifndef __AVX__
|
|
q = (vec4d_t) { q[0], q[1], q[2], 0 };
|
|
#else
|
|
vec4d_t z = {};
|
|
q = _mm256_blend_pd (q, z, 0x08);
|
|
#endif
|
|
vec4d_t c = crossd (q, v);
|
|
vec4d_t qv = dotd (q, v); // q.w is 0 so v.w is irrelevant
|
|
vec4d_t qq = dotd (q, q);
|
|
|
|
// vvv
|
|
return (s * s - qq) * v + 2 * (qv * q - s * c);
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
qrotd (vec4d_t a, vec4d_t b)
|
|
{
|
|
vec4d_t ma = vsqrt4d (dotd (a, a));
|
|
vec4d_t mb = vsqrt4d (dotd (b, b));
|
|
vec4d_t den = 2 * ma * mb;
|
|
vec4d_t t = mb * a + ma * b;
|
|
vec4d_t mba_mab = vsqrt4d (dotd (t, t));
|
|
vec4d_t q = crossd (a, b) / mba_mab;
|
|
q[3] = (mba_mab / den)[0];
|
|
return q;
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
qconjd (vec4d_t q)
|
|
{
|
|
const uint64_t sign = UINT64_C(1) << 63;
|
|
const vec4l_t neg = { sign, sign, sign, 0 };
|
|
return (vec4d_t) ((vec4l_t) q ^ neg);
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4d_t
|
|
loadvec3d (const double v3[])
|
|
{
|
|
vec4d_t v4 = {};
|
|
|
|
v4[0] = v3[0];
|
|
v4[1] = v3[1];
|
|
v4[2] = v3[2];
|
|
return v4;
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4D_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
void
|
|
storevec3d (double v3[3], vec4d_t v4)
|
|
{
|
|
v3[0] = v4[0];
|
|
v3[1] = v4[1];
|
|
v3[2] = v4[2];
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4F_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4l_t
|
|
loadvec3l (const int64_t *v3)
|
|
{
|
|
vec4l_t v4 = { v3[0], v3[1], v3[2], 0 };
|
|
return v4;
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4F_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
vec4l_t
|
|
loadvec3l1 (const int64_t *v3)
|
|
{
|
|
vec4l_t v4 = { v3[0], v3[1], v3[2], 1 };
|
|
return v4;
|
|
}
|
|
|
|
#ifndef IMPLEMENT_VEC4F_Funcs
|
|
GNU89INLINE inline
|
|
#else
|
|
VISIBLE
|
|
#endif
|
|
void
|
|
storevec3l (int64_t *v3, vec4l_t v4)
|
|
{
|
|
v3[0] = v4[0];
|
|
v3[1] = v4[1];
|
|
v3[2] = v4[2];
|
|
}
|
|
|
|
#endif//__QF_simd_vec4d_h
|