/* QF/simd/mat4f.h Matrix functions for mat4f_t (ie, float precision) Copyright (C) 2021 Bill Currie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to: Free Software Foundation, Inc. 59 Temple Place - Suite 330 Boston, MA 02111-1307, USA */ #ifndef __QF_simd_mat4f_h #define __QF_simd_mat4f_h #include #include "QF/simd/types.h" GNU89INLINE inline void maddf (mat4f_t c, const mat4f_t a, const mat4f_t b); GNU89INLINE inline void msubf (mat4f_t c, const mat4f_t a, const mat4f_t b); GNU89INLINE inline void mmulf (mat4f_t c, const mat4f_t a, const mat4f_t b); GNU89INLINE inline vec4f_t mvmulf (const mat4f_t m, vec4f_t v) __attribute__((const)); GNU89INLINE inline vec4f_t m3vmulf (const mat4f_t m, vec4f_t v) __attribute__((const)); GNU89INLINE inline void mat4fidentity (mat4f_t m); GNU89INLINE inline void mat4ftranspose (mat4f_t t, const mat4f_t m); GNU89INLINE inline void mat4fquat (mat4f_t m, vec4f_t q); #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif void maddf (mat4f_t c, const mat4f_t a, const mat4f_t b) { c[0] = a[0] + b[0]; c[1] = a[1] + b[1]; c[2] = a[2] + b[2]; c[3] = a[3] + b[3]; } #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif void msubf (mat4f_t c, const mat4f_t a, const mat4f_t b) { c[0] = a[0] - b[0]; c[1] = a[1] - b[1]; c[2] = a[2] - b[2]; c[3] = a[3] - b[3]; } #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif void mmulf (mat4f_t c, const mat4f_t a, const mat4f_t b) { c[0] = a[0] * b[0][0] + a[1] * b[0][1] + a[2] * b[0][2] + a[3] * b[0][3]; c[1] = a[0] * b[1][0] + a[1] * b[1][1] + a[2] * b[1][2] + a[3] * b[1][3]; c[2] = a[0] * b[2][0] + a[1] * b[2][1] + a[2] * b[2][2] + a[3] * b[2][3]; c[3] = a[0] * b[3][0] + a[1] * b[3][1] + a[2] * b[3][2] + a[3] * b[3][3]; } #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif vec4f_t mvmulf (const mat4f_t m, vec4f_t v) { return m[0] * v[0] + m[1] * v[1] + m[2] * v[2] + m[3] * v[3]; } #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif vec4f_t m3vmulf (const mat4f_t m, vec4f_t v) { vec4f_t w; w = m[0] * v[0] + m[1] * v[1] + m[2] * v[2]; w[3] = 1; return w; } #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif void mat4fidentity (mat4f_t m) { m[0] = (vec4f_t) { 1, 0, 0, 0 }; m[1] = (vec4f_t) { 0, 1, 0, 0 }; m[2] = (vec4f_t) { 0, 0, 1, 0 }; m[3] = (vec4f_t) { 0, 0, 0, 1 }; } #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif void mat4ftranspose (mat4f_t t, const mat4f_t m) { vec4f_t a = m[0]; vec4f_t b = m[1]; vec4f_t c = m[2]; vec4f_t d = m[3]; t[0] = (vec4f_t) { a[0], b[0], c[0], d[0] }; t[1] = (vec4f_t) { a[1], b[1], c[1], d[1] }; t[2] = (vec4f_t) { a[2], b[2], c[2], d[2] }; t[3] = (vec4f_t) { a[3], b[3], c[3], d[3] }; } #ifndef IMPLEMENT_MAT4F_Funcs GNU89INLINE inline #else VISIBLE #endif void mat4fquat (mat4f_t m, vec4f_t q) { vec4f_t xq = q[0] * q; vec4f_t yq = q[1] * q; vec4f_t zq = q[2] * q; vec4f_t wq = q[3] * q; static const vec4i_t shuff103 = { 1, 0, 3, 2 }; static const vec4i_t shuff230 = { 2, 3, 0, 1 }; static const vec4i_t shuff321 = { 3, 2, 1, 0 }; #define p (0) #define m (1u << 31) static const vec4i_t mpm = { m, p, m, 0 }; static const vec4i_t pmm = { p, m, m, 0 }; static const vec4i_t mmp = { m, m, p, 0 }; static const vec4i_t mask = { ~0u, ~0u, ~0u, 0 }; #undef p #undef m { vec4f_t a = xq; vec4f_t b = _mm_xor_ps (__builtin_shuffle (yq, shuff103), (__m128) mpm); vec4f_t c = _mm_xor_ps (__builtin_shuffle (zq, shuff230), (__m128) pmm); vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff321), (__m128) mmp); m[0] = _mm_and_ps (a + b - c - d, (__m128) mask); } { vec4f_t a = _mm_xor_ps (__builtin_shuffle (xq, shuff103), (__m128) mpm); vec4f_t b = yq; vec4f_t c = _mm_xor_ps (__builtin_shuffle (zq, shuff321), (__m128) mmp); vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff230), (__m128) pmm); m[1] = _mm_and_ps (b + c - a - d, (__m128) mask); } { vec4f_t a = _mm_xor_ps (__builtin_shuffle (xq, shuff230), (__m128) pmm); vec4f_t b = _mm_xor_ps (__builtin_shuffle (yq, shuff321), (__m128) mmp); vec4f_t c = zq; vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff103), (__m128) mpm); m[2] = _mm_and_ps (a - b + c - d, (__m128) mask); } m[3] = (vec4f_t) { 0, 0, 0, 1 }; } #endif//__QF_simd_mat4f_h