[simd] Remove requirements for AVX2 for vec4d

It seems gcc-11 does a pretty good job of emulating the instructions (it
no longer requires avx2 for 256-bit wide vectors).
This commit is contained in:
Bill Currie 2022-01-06 18:06:56 +09:00
parent c0277c0b03
commit 80c5e2c3f6
3 changed files with 7 additions and 23 deletions

View file

@ -43,7 +43,6 @@ typedef double vec3d_t[3];
VEC_TYPE (double, vec2d_t, 2);
VEC_TYPE (int64_t, vec2l_t, 2);
#ifdef __AVX2__
/** Four element vector type for horizontal (AOS) vector data.
*
* This is used for both vectors (3D and 4D) and quaternions. 3D vectors
@ -58,7 +57,6 @@ VEC_TYPE (double, vec4d_t, 4);
/** Used mostly for __builtin_shuffle.
*/
VEC_TYPE (int64_t, vec4l_t, 4);
#endif
/** Three element vector type for interfacing with compact data.
*

View file

@ -28,7 +28,6 @@
#ifndef __QF_simd_vec4d_h
#define __QF_simd_vec4d_h
#ifdef __AVX2__
#include <immintrin.h>
#include "QF/simd/types.h"
@ -96,9 +95,9 @@ GNU89INLINE inline vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const));
GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const));
GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure));
GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4);
GNU89INLINE inline vec4l_t loadvec3l (const long *v3) __attribute__((pure));
GNU89INLINE inline vec4l_t loadvec3l1 (const long *v3) __attribute__((pure));
GNU89INLINE inline void storevec3l (long *v3, vec4l_t v4);
GNU89INLINE inline vec4l_t loadvec3l (const int64_t *v3) __attribute__((pure));
GNU89INLINE inline vec4l_t loadvec3l1 (const int64_t *v3) __attribute__((pure));
GNU89INLINE inline void storevec3l (int64_t *v3, vec4l_t v4);
#ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline
@ -187,8 +186,7 @@ qmuld (vec4d_t a, vec4d_t b)
vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b;
vec4d_t d = dotd (a, b);
// zero out the vector component of dot product so only the scalar remains
d = _mm256_permute2f128_pd (d, d, 0x18);
d = _mm256_permute4x64_pd (d, 0xc0);
d = (vec4d_t) { 0, 0, 0, d[3] };
return c - d;
}
@ -302,7 +300,7 @@ GNU89INLINE inline
VISIBLE
#endif
vec4l_t
loadvec3l (const long *v3)
loadvec3l (const int64_t *v3)
{
vec4l_t v4 = { v3[0], v3[1], v3[2], 0 };
return v4;
@ -314,7 +312,7 @@ GNU89INLINE inline
VISIBLE
#endif
vec4l_t
loadvec3l1 (const long *v3)
loadvec3l1 (const int64_t *v3)
{
vec4l_t v4 = { v3[0], v3[1], v3[2], 1 };
return v4;
@ -326,13 +324,11 @@ GNU89INLINE inline
VISIBLE
#endif
void
storevec3l (long *v3, vec4l_t v4)
storevec3l (int64_t *v3, vec4l_t v4)
{
v3[0] = v4[0];
v3[1] = v4[1];
v3[2] = v4[2];
}
#endif
#endif//__QF_simd_vec4d_h

View file

@ -48,7 +48,6 @@
#define s05 0.70710678118654757
#ifdef __AVX2__
typedef struct {
int line;
vec4d_t (*op) (vec4d_t a, vec4d_t b);
@ -57,7 +56,6 @@ typedef struct {
vec4d_t expect;
vec4d_t ulp_errors;
} vec4d_test_t;
#endif
typedef struct {
int line;
@ -94,7 +92,6 @@ typedef struct {
mat4f_t ulp_errors;
} mq4f_test_t;
#ifdef __AVX2__
static vec4d_t tvtruncd (vec4d_t v, vec4d_t ignore)
{
return vtrunc4d (v);
@ -114,7 +111,6 @@ static vec4d_t tqconjd (vec4d_t v, vec4d_t ignore)
{
return qconjd (v);
}
#endif
static vec4f_t tvtruncf (vec4f_t v, vec4f_t ignore)
{
@ -158,7 +154,6 @@ static vec4f_t tmagnitude3f (vec4f_t v, vec4f_t ignore)
#define T(t...) { __LINE__, t }
#ifdef __AVX2__
static vec4d_test_t vec4d_tests[] = {
// 3D dot products
T(dotd, right, right, one ),
@ -285,7 +280,6 @@ static vec4d_test_t vec4d_tests[] = {
T(tqconjd, one, {}, { -1, -1, -1, 1 } ),
};
#define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0])))
#endif
static vec4f_test_t vec4f_tests[] = {
// 3D dot products
@ -487,7 +481,6 @@ static mq4f_test_t mq4f_tests[] = {
};
#define num_mq4f_tests (sizeof (mq4f_tests) / (sizeof (mq4f_tests[0])))
#ifdef __AVX2__
static int
run_vec4d_tests (void)
{
@ -512,7 +505,6 @@ run_vec4d_tests (void)
}
return ret;
}
#endif
static int
run_vec4f_tests (void)
@ -684,9 +676,7 @@ int
main (void)
{
int ret = 0;
#ifdef __AVX2__
ret |= run_vec4d_tests ();
#endif
ret |= run_vec4f_tests ();
ret |= run_mat4f_tests ();
ret |= run_mv4f_tests ();