From 80c5e2c3f63cd0872c299cb2153e0531898bee3a Mon Sep 17 00:00:00 2001 From: Bill Currie Date: Thu, 6 Jan 2022 18:06:56 +0900 Subject: [PATCH] [simd] Remove requirements for AVX2 for vec4d It seems gcc-11 does a pretty good job of emulating the instructions (it no longer requires avx2 for 256-bit wide vectors). --- include/QF/simd/types.h | 2 -- include/QF/simd/vec4d.h | 18 +++++++----------- libs/util/test/test-simd.c | 10 ---------- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/include/QF/simd/types.h b/include/QF/simd/types.h index ad8345f44..1f2a511c1 100644 --- a/include/QF/simd/types.h +++ b/include/QF/simd/types.h @@ -43,7 +43,6 @@ typedef double vec3d_t[3]; VEC_TYPE (double, vec2d_t, 2); VEC_TYPE (int64_t, vec2l_t, 2); -#ifdef __AVX2__ /** Four element vector type for horizontal (AOS) vector data. * * This is used for both vectors (3D and 4D) and quaternions. 3D vectors @@ -58,7 +57,6 @@ VEC_TYPE (double, vec4d_t, 4); /** Used mostly for __builtin_shuffle. */ VEC_TYPE (int64_t, vec4l_t, 4); -#endif /** Three element vector type for interfacing with compact data. * diff --git a/include/QF/simd/vec4d.h b/include/QF/simd/vec4d.h index d0e3f19bc..1e1d7bb89 100644 --- a/include/QF/simd/vec4d.h +++ b/include/QF/simd/vec4d.h @@ -28,7 +28,6 @@ #ifndef __QF_simd_vec4d_h #define __QF_simd_vec4d_h -#ifdef __AVX2__ #include #include "QF/simd/types.h" @@ -96,9 +95,9 @@ GNU89INLINE inline vec4d_t qrotd (vec4d_t a, vec4d_t b) __attribute__((const)); GNU89INLINE inline vec4d_t qconjd (vec4d_t q) __attribute__((const)); GNU89INLINE inline vec4d_t loadvec3d (const double v3[]) __attribute__((pure)); GNU89INLINE inline void storevec3d (double v3[3], vec4d_t v4); -GNU89INLINE inline vec4l_t loadvec3l (const long *v3) __attribute__((pure)); -GNU89INLINE inline vec4l_t loadvec3l1 (const long *v3) __attribute__((pure)); -GNU89INLINE inline void storevec3l (long *v3, vec4l_t v4); +GNU89INLINE inline vec4l_t loadvec3l (const int64_t *v3) __attribute__((pure)); +GNU89INLINE inline vec4l_t loadvec3l1 (const int64_t *v3) __attribute__((pure)); +GNU89INLINE inline void storevec3l (int64_t *v3, vec4l_t v4); #ifndef IMPLEMENT_VEC4D_Funcs GNU89INLINE inline @@ -187,8 +186,7 @@ qmuld (vec4d_t a, vec4d_t b) vec4d_t c = crossd (a, b) + a * b[3] + a[3] * b; vec4d_t d = dotd (a, b); // zero out the vector component of dot product so only the scalar remains - d = _mm256_permute2f128_pd (d, d, 0x18); - d = _mm256_permute4x64_pd (d, 0xc0); + d = (vec4d_t) { 0, 0, 0, d[3] }; return c - d; } @@ -302,7 +300,7 @@ GNU89INLINE inline VISIBLE #endif vec4l_t -loadvec3l (const long *v3) +loadvec3l (const int64_t *v3) { vec4l_t v4 = { v3[0], v3[1], v3[2], 0 }; return v4; @@ -314,7 +312,7 @@ GNU89INLINE inline VISIBLE #endif vec4l_t -loadvec3l1 (const long *v3) +loadvec3l1 (const int64_t *v3) { vec4l_t v4 = { v3[0], v3[1], v3[2], 1 }; return v4; @@ -326,13 +324,11 @@ GNU89INLINE inline VISIBLE #endif void -storevec3l (long *v3, vec4l_t v4) +storevec3l (int64_t *v3, vec4l_t v4) { v3[0] = v4[0]; v3[1] = v4[1]; v3[2] = v4[2]; } -#endif - #endif//__QF_simd_vec4d_h diff --git a/libs/util/test/test-simd.c b/libs/util/test/test-simd.c index d1b4bf286..7cba48f3b 100644 --- a/libs/util/test/test-simd.c +++ b/libs/util/test/test-simd.c @@ -48,7 +48,6 @@ #define s05 0.70710678118654757 -#ifdef __AVX2__ typedef struct { int line; vec4d_t (*op) (vec4d_t a, vec4d_t b); @@ -57,7 +56,6 @@ typedef struct { vec4d_t expect; vec4d_t ulp_errors; } vec4d_test_t; -#endif typedef struct { int line; @@ -94,7 +92,6 @@ typedef struct { mat4f_t ulp_errors; } mq4f_test_t; -#ifdef __AVX2__ static vec4d_t tvtruncd (vec4d_t v, vec4d_t ignore) { return vtrunc4d (v); @@ -114,7 +111,6 @@ static vec4d_t tqconjd (vec4d_t v, vec4d_t ignore) { return qconjd (v); } -#endif static vec4f_t tvtruncf (vec4f_t v, vec4f_t ignore) { @@ -158,7 +154,6 @@ static vec4f_t tmagnitude3f (vec4f_t v, vec4f_t ignore) #define T(t...) { __LINE__, t } -#ifdef __AVX2__ static vec4d_test_t vec4d_tests[] = { // 3D dot products T(dotd, right, right, one ), @@ -285,7 +280,6 @@ static vec4d_test_t vec4d_tests[] = { T(tqconjd, one, {}, { -1, -1, -1, 1 } ), }; #define num_vec4d_tests (sizeof (vec4d_tests) / (sizeof (vec4d_tests[0]))) -#endif static vec4f_test_t vec4f_tests[] = { // 3D dot products @@ -487,7 +481,6 @@ static mq4f_test_t mq4f_tests[] = { }; #define num_mq4f_tests (sizeof (mq4f_tests) / (sizeof (mq4f_tests[0]))) -#ifdef __AVX2__ static int run_vec4d_tests (void) { @@ -512,7 +505,6 @@ run_vec4d_tests (void) } return ret; } -#endif static int run_vec4f_tests (void) @@ -684,9 +676,7 @@ int main (void) { int ret = 0; -#ifdef __AVX2__ ret |= run_vec4d_tests (); -#endif ret |= run_vec4f_tests (); ret |= run_mat4f_tests (); ret |= run_mv4f_tests ();