[simd] Remove requirements for AVX2 for vec4d

It seems gcc-11 does a pretty good job of emulating the instructions (it
no longer requires avx2 for 256-bit wide vectors).
This commit is contained in:
Bill Currie 2022-01-06 18:06:56 +09:00
parent c0277c0b03
commit 80c5e2c3f6
3 changed files with 7 additions and 23 deletions

View file

@ -43,7 +43,6 @@ typedef double vec3d_t[3];
VEC_TYPE (double, vec2d_t, 2); VEC_TYPE (double, vec2d_t, 2);
VEC_TYPE (int64_t, vec2l_t, 2); VEC_TYPE (int64_t, vec2l_t, 2);
#ifdef __AVX2__
/** Four element vector type for horizontal (AOS) vector data. /** Four element vector type for horizontal (AOS) vector data.
* *
* This is used for both vectors (3D and 4D) and quaternions. 3D vectors * This is used for both vectors (3D and 4D) and quaternions. 3D vectors
@ -58,7 +57,6 @@ VEC_TYPE (double, vec4d_t, 4);
/** Used mostly for __builtin_shuffle. /** Used mostly for __builtin_shuffle.
*/ */
VEC_TYPE (int64_t, vec4l_t, 4); VEC_TYPE (int64_t, vec4l_t, 4);
#endif
/** Three element vector type for interfacing with compact data. /** Three element vector type for interfacing with compact data.
* *

View file

@ -28,7 +28,6 @@
#ifndef __QF_simd_vec4d_h #ifndef __QF_simd_vec4d_h
#define __QF_simd_vec4d_h #define __QF_simd_vec4d_h
#ifdef __AVX2__
#include <immintrin.h> #include <immintrin.h>
#include "QF/simd/types.h" #include "QF/simd/types.h"
@ -96,9 +95,9 @@ GNU89INLINE inline vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const));
GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const)); GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const));
GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure)); GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure));
GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4); GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4);
GNU89INLINE inline vec4l_t loadvec3l (const long *v3) __attribute__((pure)); GNU89INLINE inline vec4l_t loadvec3l (const int64_t *v3) __attribute__((pure));
GNU89INLINE inline vec4l_t loadvec3l1 (const long *v3) __attribute__((pure)); GNU89INLINE inline vec4l_t loadvec3l1 (const int64_t *v3) __attribute__((pure));
GNU89INLINE inline void storevec3l (long *v3, vec4l_t v4); GNU89INLINE inline void storevec3l (int64_t *v3, vec4l_t v4);
#ifndef IMPLEMENT_VEC4D_Funcs #ifndef IMPLEMENT_VEC4D_Funcs
GNU89INLINE inline GNU89INLINE inline
@ -187,8 +186,7 @@ qmuld (vec4d_t a, vec4d_t b)
vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b; vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b;
vec4d_t d = dotd (a, b); vec4d_t d = dotd (a, b);
// zero out the vector component of dot product so only the scalar remains // zero out the vector component of dot product so only the scalar remains
d = _mm256_permute2f128_pd (d, d, 0x18); d = (vec4d_t) { 0, 0, 0, d[3] };
d = _mm256_permute4x64_pd (d, 0xc0);
return c - d; return c - d;
} }
@ -302,7 +300,7 @@ GNU89INLINE inline
VISIBLE VISIBLE
#endif #endif
vec4l_t vec4l_t
loadvec3l (const long *v3) loadvec3l (const int64_t *v3)
{ {
vec4l_t v4 = { v3[0], v3[1], v3[2], 0 }; vec4l_t v4 = { v3[0], v3[1], v3[2], 0 };
return v4; return v4;
@ -314,7 +312,7 @@ GNU89INLINE inline
VISIBLE VISIBLE
#endif #endif
vec4l_t vec4l_t
loadvec3l1 (const long *v3) loadvec3l1 (const int64_t *v3)
{ {
vec4l_t v4 = { v3[0], v3[1], v3[2], 1 }; vec4l_t v4 = { v3[0], v3[1], v3[2], 1 };
return v4; return v4;
@ -326,13 +324,11 @@ GNU89INLINE inline
VISIBLE VISIBLE
#endif #endif
void void
storevec3l (long *v3, vec4l_t v4) storevec3l (int64_t *v3, vec4l_t v4)
{ {
v3[0] = v4[0]; v3[0] = v4[0];
v3[1] = v4[1]; v3[1] = v4[1];
v3[2] = v4[2]; v3[2] = v4[2];
} }
#endif
#endif//__QF_simd_vec4d_h #endif//__QF_simd_vec4d_h

View file

@ -48,7 +48,6 @@
#define s05 0.70710678118654757 #define s05 0.70710678118654757
#ifdef __AVX2__
typedef struct { typedef struct {
int line; int line;
vec4d_t (*op) (vec4d_t a, vec4d_t b); vec4d_t (*op) (vec4d_t a, vec4d_t b);
@ -57,7 +56,6 @@ typedef struct {
vec4d_t expect; vec4d_t expect;
vec4d_t ulp_errors; vec4d_t ulp_errors;
} vec4d_test_t; } vec4d_test_t;
#endif
typedef struct { typedef struct {
int line; int line;
@ -94,7 +92,6 @@ typedef struct {
mat4f_t ulp_errors; mat4f_t ulp_errors;
} mq4f_test_t; } mq4f_test_t;
#ifdef __AVX2__
static vec4d_t tvtruncd (vec4d_t v, vec4d_t ignore) static vec4d_t tvtruncd (vec4d_t v, vec4d_t ignore)
{ {
return vtrunc4d (v); return vtrunc4d (v);
@ -114,7 +111,6 @@ static vec4d_t tqconjd (vec4d_t v, vec4d_t ignore)
{ {
return qconjd (v); return qconjd (v);
} }
#endif
static vec4f_t tvtruncf (vec4f_t v, vec4f_t ignore) static vec4f_t tvtruncf (vec4f_t v, vec4f_t ignore)
{ {
@ -158,7 +154,6 @@ static vec4f_t tmagnitude3f (vec4f_t v, vec4f_t ignore)
#define T(t...) { __LINE__, t } #define T(t...) { __LINE__, t }
#ifdef __AVX2__
static vec4d_test_t vec4d_tests[] = { static vec4d_test_t vec4d_tests[] = {
// 3D dot products // 3D dot products
T(dotd, right, right, one ), T(dotd, right, right, one ),
@ -285,7 +280,6 @@ static vec4d_test_t vec4d_tests[] = {
T(tqconjd, one, {}, { -1, -1, -1, 1 } ), T(tqconjd, one, {}, { -1, -1, -1, 1 } ),
}; };
#define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0]))) #define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0])))
#endif
static vec4f_test_t vec4f_tests[] = { static vec4f_test_t vec4f_tests[] = {
// 3D dot products // 3D dot products
@ -487,7 +481,6 @@ static mq4f_test_t mq4f_tests[] = {
}; };
#define num_mq4f_tests (sizeof (mq4f_tests) / (sizeof (mq4f_tests[0]))) #define num_mq4f_tests (sizeof (mq4f_tests) / (sizeof (mq4f_tests[0])))
#ifdef __AVX2__
static int static int
run_vec4d_tests (void) run_vec4d_tests (void)
{ {
@ -512,7 +505,6 @@ run_vec4d_tests (void)
} }
return ret; return ret;
} }
#endif
static int static int
run_vec4f_tests (void) run_vec4f_tests (void)
@ -684,9 +676,7 @@ int
main (void) main (void)
{ {
int ret = 0; int ret = 0;
#ifdef __AVX2__
ret |= run_vec4d_tests (); ret |= run_vec4d_tests ();
#endif
ret |= run_vec4f_tests (); ret |= run_vec4f_tests ();
ret |= run_mat4f_tests (); ret |= run_mat4f_tests ();
ret |= run_mv4f_tests (); ret |= run_mv4f_tests ();