diff --git a/config.d/compiling.m4 b/config.d/compiling.m4 index 463dd8795..81cca85bf 100644 --- a/config.d/compiling.m4 +++ b/config.d/compiling.m4 @@ -98,37 +98,41 @@ AC_ARG_ENABLE(optimize, optimize=yes ) -AC_ARG_ENABLE(simd, - AS_HELP_STRING([--enable-simd@<:@=arg@:.@], - [enable SIMD support (default auto)]), - [], - [enable_simd=yes] -) +if test "x$host_cpu" = xaarch64; then + simd=neon +else + AC_ARG_ENABLE(simd, + AS_HELP_STRING([--enable-simd@<:@=arg@:.@], + [enable SIMD support (default auto)]), + [], + [enable_simd=yes] + ) -case "$enable_simd" in - no) - simd=no - ;; - sse|sse2|avx|avx2) - QF_CC_OPTION(-m$enable_simd) - simd=$enable_simd - ;; - yes) - for simd in avx2 avx sse2 sse; do - if lscpu | grep -q -w $simd; then - QF_CC_OPTION(-m$simd) - break - fi - done - ;; -esac -case "$simd" in - avx*) - ;; - *) - QF_CC_OPTION(-Wno-psabi) - ;; -esac + case "$enable_simd" in + no) + simd=no + ;; + sse|sse2|avx|avx2) + QF_CC_OPTION(-m$enable_simd) + simd=$enable_simd + ;; + yes) + for simd in avx2 avx sse2 sse; do + if lscpu | grep -q -w $simd; then + QF_CC_OPTION(-m$simd) + break + fi + done + ;; + esac + case "$simd" in + avx*) + ;; + *) + QF_CC_OPTION(-Wno-psabi) + ;; + esac +fi AC_MSG_CHECKING(for optimization) if test "x$optimize" = xyes -a "x$leave_cflags_alone" != "xyes"; then diff --git a/include/QF/simd/mat4f.h b/include/QF/simd/mat4f.h index 981d71d22..ce061edd0 100644 --- a/include/QF/simd/mat4f.h +++ b/include/QF/simd/mat4f.h @@ -28,8 +28,6 @@ #ifndef __QF_simd_mat4f_h #define __QF_simd_mat4f_h -#include - #include "QF/simd/types.h" GNU89INLINE inline void maddf (mat4f_t c, const mat4f_t a, const mat4f_t b); @@ -172,27 +170,27 @@ mat4fquat (mat4f_t m, vec4f_t q) #undef m { vec4f_t a = xq; - vec4f_t b = _mm_xor_ps (shuff103 (yq), (__m128) mpm); - vec4f_t c = _mm_xor_ps (shuff230 (zq), (__m128) pmm); - vec4f_t d = _mm_xor_ps (shuff321 (wq), (__m128) mmp); + vec4f_t b = (vec4f_t) ((vec4i_t) shuff103 (yq) ^ mpm); + vec4f_t c = (vec4f_t) ((vec4i_t) shuff230 (zq) ^ pmm); + vec4f_t d = (vec4f_t) ((vec4i_t) shuff321 (wq) ^ mmp); // column: ww + xx - yy - zz // 2xy + 2wz // 2zx - 2wy // 0 - m[0] = _mm_and_ps (a + b - c - d, (__m128) mask); + m[0] = (vec4f_t) ((vec4i_t) (a + b - c - d) & mask); } { - vec4f_t a = _mm_xor_ps (shuff103 (xq), (__m128) mpm); + vec4f_t a = (vec4f_t) ((vec4i_t) shuff103 (xq) ^ mpm); vec4f_t b = yq; - vec4f_t c = _mm_xor_ps (shuff321 (zq), (__m128) mmp); - vec4f_t d = _mm_xor_ps (shuff230 (wq), (__m128) pmm); + vec4f_t c = (vec4f_t) ((vec4i_t) shuff321 (zq) ^ mmp); + vec4f_t d = (vec4f_t) ((vec4i_t) shuff230 (wq) ^ pmm); // column: 2xy - 2wz // ww - xx + yy - zz // 2yz + 2wx // 0 - m[1] = _mm_and_ps (b + c - a - d, (__m128) mask); + m[1] = (vec4f_t) ((vec4i_t) (b + c - a - d) & mask); } { - vec4f_t a = _mm_xor_ps (shuff230 (xq), (__m128) pmm); - vec4f_t b = _mm_xor_ps (shuff321 (yq), (__m128) mmp); + vec4f_t a = (vec4f_t) ((vec4i_t) shuff230 (xq) ^ pmm); + vec4f_t b = (vec4f_t) ((vec4i_t) shuff321 (yq) ^ mmp); vec4f_t c = zq; - vec4f_t d = _mm_xor_ps (shuff103 (wq), (__m128) mpm); + vec4f_t d = (vec4f_t) ((vec4i_t) shuff103 (wq) ^ mpm); // column: 2xz + 2wy // 2yz - 2wx // ww - xx - yy + zz // 0 - m[2] = _mm_and_ps (a - b + c - d, (__m128) mask); + m[2] = (vec4f_t) ((vec4i_t) (a - b + c - d) & mask); } m[3] = (vec4f_t) { 0, 0, 0, 1 }; } diff --git a/include/QF/simd/types.h b/include/QF/simd/types.h index 26aceccd0..c7c950fd0 100644 --- a/include/QF/simd/types.h +++ b/include/QF/simd/types.h @@ -105,30 +105,4 @@ typedef struct vspheref_s { float radius; } vspheref_t; -#include -#ifndef __SSE__ -#define _mm_xor_ps __qf_mm_xor_ps -#define _mm_and_ps __qf_mm_and_ps -GNU89INLINE inline __m128 _mm_xor_ps (__m128 a, __m128 b); -GNU89INLINE inline __m128 _mm_and_ps (__m128 a, __m128 b); -#ifndef IMPLEMENT_MAT4F_Funcs -GNU89INLINE inline -#else -VISIBLE -#endif -__m128 _mm_xor_ps (__m128 a, __m128 b) -{ - return (__m128) ((vec4i_t) a ^ (vec4i_t) b); -} -#ifndef IMPLEMENT_MAT4F_Funcs -GNU89INLINE inline -#else -VISIBLE -#endif -__m128 _mm_and_ps (__m128 a, __m128 b) -{ - return (__m128) ((vec4i_t) a & (vec4i_t) b); -} -#endif - #endif//__QF_simd_types_h diff --git a/include/QF/simd/vec2d.h b/include/QF/simd/vec2d.h index 86be9302b..25eb263bf 100644 --- a/include/QF/simd/vec2d.h +++ b/include/QF/simd/vec2d.h @@ -29,7 +29,11 @@ #ifndef __QF_simd_vec2d_h #define __QF_simd_vec2d_h +#ifdef __aarch64__ +#include +#else #include +#endif #include "QF/simd/types.h" @@ -50,7 +54,11 @@ VISIBLE vec2d_t vsqrt2d (vec2d_t v) { +#ifdef __aarch64__ + return vsqrtq_f64 (v); +#else return _mm_sqrt_pd (v); +#endif } #ifndef IMPLEMENT_VEC2D_Funcs diff --git a/include/QF/simd/vec2f.h b/include/QF/simd/vec2f.h index 1459dd37b..3dcc97ecc 100644 --- a/include/QF/simd/vec2f.h +++ b/include/QF/simd/vec2f.h @@ -29,7 +29,11 @@ #ifndef __QF_simd_vec2f_h #define __QF_simd_vec2f_h +#ifdef __aarch64__ +#include +#else #include +#endif #include #include "QF/simd/types.h" @@ -67,9 +71,13 @@ VISIBLE vec2f_t vsqrt2f (vec2f_t v) { +#ifdef __aarch64__ + return vsqrt_f32 (v); +#else vec4f_t t = { v[0], v[1], 0, 0 }; t = _mm_sqrt_ps (t); return (vec2f_t) { t[0], t[1] }; +#endif } #ifndef IMPLEMENT_VEC2F_Funcs diff --git a/include/QF/simd/vec2i.h b/include/QF/simd/vec2i.h index 29f2cc57c..0032e3e33 100644 --- a/include/QF/simd/vec2i.h +++ b/include/QF/simd/vec2i.h @@ -28,8 +28,6 @@ #ifndef __QF_simd_vec2i_h #define __QF_simd_vec2i_h -#include -#include #include "QF/simd/types.h" @@ -59,7 +57,7 @@ VISIBLE int any2i (vec2i_t v) { - vec2i_t t = _m_pcmpeqd (v, (vec2i_t) {0, 0}); + vec2i_t t = v == (vec2i_t) {0, 0}; #ifndef __SSSE3__ return (t[0] + t[1]) > -2; #else @@ -75,7 +73,7 @@ VISIBLE int all2i (vec2i_t v) { - vec2i_t t = _m_pcmpeqd (v, (vec2i_t) {0, 0}); + vec2i_t t = v == (vec2i_t) {0, 0}; #ifndef __SSSE3__ return (t[0] + t[1]) == 0; #else @@ -91,7 +89,7 @@ VISIBLE int none2i (vec2i_t v) { - vec2i_t t = _m_pcmpeqd (v, (vec2i_t) {0, 0}); + vec2i_t t = v == (vec2i_t) {0, 0}; #ifndef __SSSE3__ return (t[0] + t[1]) == -2; #else diff --git a/include/QF/simd/vec4d.h b/include/QF/simd/vec4d.h index 04e4b81a2..a56c70ab7 100644 --- a/include/QF/simd/vec4d.h +++ b/include/QF/simd/vec4d.h @@ -28,7 +28,11 @@ #ifndef __QF_simd_vec4d_h #define __QF_simd_vec4d_h +#ifdef __aarch64__ +#include +#else #include +#endif #include "QF/simd/types.h" #include "QF/simd/vec2d.h" diff --git a/include/QF/simd/vec4f.h b/include/QF/simd/vec4f.h index ee92d928e..effb32864 100644 --- a/include/QF/simd/vec4f.h +++ b/include/QF/simd/vec4f.h @@ -28,7 +28,11 @@ #ifndef __QF_simd_vec4f_h #define __QF_simd_vec4f_h +#ifdef __aarch64__ +#include +#else #include +#endif #include #include "QF/simd/types.h" @@ -121,12 +125,16 @@ VISIBLE vec4f_t vsqrt4f (vec4f_t v) { +#ifdef __aarch64__ + return vsqrtq_f32 (v); +#else #ifndef __SSE__ vec4f_t r = { sqrtf (v[0]), sqrtf (v[1]), sqrtf (v[2]), sqrtf (v[3]) }; return r; #else return _mm_sqrt_ps (v); #endif +#endif } #ifndef IMPLEMENT_VEC4F_Funcs diff --git a/include/QF/simd/vec4i.h b/include/QF/simd/vec4i.h index d3e4ac5dc..0e947a06a 100644 --- a/include/QF/simd/vec4i.h +++ b/include/QF/simd/vec4i.h @@ -28,7 +28,6 @@ #ifndef __QF_simd_vec4i_h #define __QF_simd_vec4i_h -#include #include #include "QF/simd/types.h" diff --git a/tools/qfvis/source/qfvis.c b/tools/qfvis/source/qfvis.c index 2e64e9a0d..7630de93b 100644 --- a/tools/qfvis/source/qfvis.c +++ b/tools/qfvis/source/qfvis.c @@ -242,14 +242,13 @@ split_edge (const vec4f_t *points, const vec4f_t *dists, // "nan" because 0x7fffffff is nan when viewed as a float static const vec4i_t onenan = {0x3f800000,0x3f800000,0x3f800000,~0u >> 1}; static const vec4i_t nan = { ~0u >> 1, ~0u >> 1, ~0u >> 1, ~0u >> 1}; - vec4i_t x = _mm_and_ps (split, (__m128) nan) == (__m128) onenan; + vec4i_t x = ((vec4i_t) split & nan) == onenan; // plane vector has -dist in w - vec4f_t y = _mm_and_ps (split, (__m128) x) * -split[3]; + vec4f_t y = (vec4f_t) ((vec4i_t) split & x) * -split[3]; #ifdef __SSE3__ mid = _mm_blendv_ps (mid, y, (__m128) x); #else - mid = (vec4f_t) ((vec4i_t) _mm_and_ps (y, (__m128) x) | - (vec4i_t) _mm_and_ps (mid, (__m128) ~x)); + mid = (vec4f_t) (((vec4i_t) y & x) | ((vec4i_t) mid & ~x)); #endif // if (isnan (mid[0])) *(int *) 0 = 0; return mid;