quakeforge/include/QF/simd/mat4f.h

/*
	QF/simd/mat4f.h

	Matrix functions for mat4f_t (ie, float precision)

	Copyright (C) 2021  Bill Currie <bill@taniwha.org>

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

	See the GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to:

		Free Software Foundation, Inc.
		59 Temple Place - Suite 330
		Boston, MA  02111-1307, USA

*/

#ifndef __QF_simd_mat4f_h
#define __QF_simd_mat4f_h

#include <immintrin.h>

#include "QF/simd/types.h"

GNU89INLINE inline void maddf (mat4f_t c, const mat4f_t a, const mat4f_t b);
GNU89INLINE inline void msubf (mat4f_t c, const mat4f_t a, const mat4f_t b);
GNU89INLINE inline void mmulf (mat4f_t c, const mat4f_t a, const mat4f_t b);
GNU89INLINE inline vec4f_t mvmulf (const mat4f_t m, vec4f_t v) __attribute__((const));
GNU89INLINE inline vec4f_t m3vmulf (const mat4f_t m, vec4f_t v) __attribute__((const));
GNU89INLINE inline void mat4fidentity (mat4f_t m);
GNU89INLINE inline void mat4ftranspose (mat4f_t t, const mat4f_t m);
GNU89INLINE inline void mat4fquat (mat4f_t m, vec4f_t q);

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
maddf (mat4f_t c, const mat4f_t a, const mat4f_t b)
{
	c[0] = a[0] + b[0];
	c[1] = a[1] + b[1];
	c[2] = a[2] + b[2];
	c[3] = a[3] + b[3];
}

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
msubf (mat4f_t c, const mat4f_t a, const mat4f_t b)
{
	c[0] = a[0] - b[0];
	c[1] = a[1] - b[1];
	c[2] = a[2] - b[2];
	c[3] = a[3] - b[3];
}

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
mmulf (mat4f_t c, const mat4f_t a, const mat4f_t b)
{
	c[0] = a[0] * b[0][0] + a[1] * b[0][1] + a[2] * b[0][2] + a[3] * b[0][3];
	c[1] = a[0] * b[1][0] + a[1] * b[1][1] + a[2] * b[1][2] + a[3] * b[1][3];
	c[2] = a[0] * b[2][0] + a[1] * b[2][1] + a[2] * b[2][2] + a[3] * b[2][3];
	c[3] = a[0] * b[3][0] + a[1] * b[3][1] + a[2] * b[3][2] + a[3] * b[3][3];
}

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4f_t
mvmulf (const mat4f_t m, vec4f_t v)
{
	return m[0] * v[0] + m[1] * v[1] + m[2] * v[2] + m[3] * v[3];
}

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
vec4f_t
m3vmulf (const mat4f_t m, vec4f_t v)
{
	vec4f_t     w;
	w = m[0] * v[0] + m[1] * v[1] + m[2] * v[2];
	w[3] = v[3];
	return w;
}

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
mat4fidentity (mat4f_t m)
{
	m[0] = (vec4f_t) { 1, 0, 0, 0 };
	m[1] = (vec4f_t) { 0, 1, 0, 0 };
	m[2] = (vec4f_t) { 0, 0, 1, 0 };
	m[3] = (vec4f_t) { 0, 0, 0, 1 };
}

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
mat4ftranspose (mat4f_t t, const mat4f_t m)
{
	vec4f_t     a = m[0];
	vec4f_t     b = m[1];
	vec4f_t     c = m[2];
	vec4f_t     d = m[3];
	t[0] = (vec4f_t) { a[0], b[0], c[0], d[0] };
	t[1] = (vec4f_t) { a[1], b[1], c[1], d[1] };
	t[2] = (vec4f_t) { a[2], b[2], c[2], d[2] };
	t[3] = (vec4f_t) { a[3], b[3], c[3], d[3] };
}

#ifndef IMPLEMENT_MAT4F_Funcs
GNU89INLINE inline
#else
VISIBLE
#endif
void
mat4fquat (mat4f_t m, vec4f_t q)
{
	vec4f_t xq = q[0] * q;
	vec4f_t yq = q[1] * q;
	vec4f_t zq = q[2] * q;
	vec4f_t wq = q[3] * q;

	static const vec4i_t shuff103 = { 1, 0, 3, 2 };
	static const vec4i_t shuff230 = { 2, 3, 0, 1 };
	static const vec4i_t shuff321 = { 3, 2, 1, 0 };
#define p (0)
#define m (1u << 31)
	static const vec4i_t mpm = { m, p, m, 0 };
	static const vec4i_t pmm = { p, m, m, 0 };
	static const vec4i_t mmp = { m, m, p, 0 };
	static const vec4i_t mask = { ~0u, ~0u, ~0u, 0 };
#undef p
#undef m
	{
		vec4f_t a = xq;
		vec4f_t b = _mm_xor_ps (__builtin_shuffle (yq, shuff103), (__m128) mpm);
		vec4f_t c = _mm_xor_ps (__builtin_shuffle (zq, shuff230), (__m128) pmm);
		vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff321), (__m128) mmp);

		m[0] = _mm_and_ps (a + b - c - d, (__m128) mask);
	}
	{
		vec4f_t a = _mm_xor_ps (__builtin_shuffle (xq, shuff103), (__m128) mpm);
		vec4f_t b = yq;
		vec4f_t c = _mm_xor_ps (__builtin_shuffle (zq, shuff321), (__m128) mmp);
		vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff230), (__m128) pmm);
		m[1] = _mm_and_ps (b + c - a - d, (__m128) mask);
	}
	{
		vec4f_t a = _mm_xor_ps (__builtin_shuffle (xq, shuff230), (__m128) pmm);
		vec4f_t b = _mm_xor_ps (__builtin_shuffle (yq, shuff321), (__m128) mmp);
		vec4f_t c = zq;
		vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff103), (__m128) mpm);
		m[2] = _mm_and_ps (a - b + c - d, (__m128) mask);
	}
	m[3] = (vec4f_t) { 0, 0, 0, 1 };
}

#endif//__QF_simd_mat4f_h
[util] Add simd 4x4 matrix functions Currently just add, subtract, multiply (m m and m v). 2021-03-03 07:17:15 +00:00			`/*`
			`QF/simd/mat4f.h`

			`Matrix functions for mat4f_t (ie, float precision)`

			`Copyright (C) 2021 Bill Currie <bill@taniwha.org>`

			`This program is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU General Public License`
			`as published by the Free Software Foundation; either version 2`
			`of the License, or (at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.`

			`See the GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with this program; if not, write to:`

			`Free Software Foundation, Inc.`
			`59 Temple Place - Suite 330`
			`Boston, MA 02111-1307, USA`

			`*/`

			`#ifndef __QF_simd_mat4f_h`
			`#define __QF_simd_mat4f_h`

			`#include <immintrin.h>`

			`#include "QF/simd/types.h"`

			`GNU89INLINE inline void maddf (mat4f_t c, const mat4f_t a, const mat4f_t b);`
			`GNU89INLINE inline void msubf (mat4f_t c, const mat4f_t a, const mat4f_t b);`
			`GNU89INLINE inline void mmulf (mat4f_t c, const mat4f_t a, const mat4f_t b);`
			`GNU89INLINE inline vec4f_t mvmulf (const mat4f_t m, vec4f_t v) __attribute__((const));`
			`GNU89INLINE inline vec4f_t m3vmulf (const mat4f_t m, vec4f_t v) __attribute__((const));`
			`GNU89INLINE inline void mat4fidentity (mat4f_t m);`
[util] Add a simd mat4 transpose function 2021-03-09 14:50:32 +00:00			`GNU89INLINE inline void mat4ftranspose (mat4f_t t, const mat4f_t m);`
[util] Create simd quaternion to matrix function This seems to be pretty close to as fast as it gets (might be able to do better with some shuffles of the negation constants instead of loading separate constants). 2021-03-04 08:39:05 +00:00			`GNU89INLINE inline void mat4fquat (mat4f_t m, vec4f_t q);`
[util] Add simd 4x4 matrix functions Currently just add, subtract, multiply (m m and m v). 2021-03-03 07:17:15 +00:00
			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`void`
			`maddf (mat4f_t c, const mat4f_t a, const mat4f_t b)`
			`{`
			`c[0] = a[0] + b[0];`
			`c[1] = a[1] + b[1];`
			`c[2] = a[2] + b[2];`
			`c[3] = a[3] + b[3];`
			`}`

			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`void`
			`msubf (mat4f_t c, const mat4f_t a, const mat4f_t b)`
			`{`
			`c[0] = a[0] - b[0];`
			`c[1] = a[1] - b[1];`
			`c[2] = a[2] - b[2];`
			`c[3] = a[3] - b[3];`
			`}`

			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`void`
			`mmulf (mat4f_t c, const mat4f_t a, const mat4f_t b)`
			`{`
			`c[0] = a[0] * b[0][0] + a[1] * b[0][1] + a[2] * b[0][2] + a[3] * b[0][3];`
			`c[1] = a[0] * b[1][0] + a[1] * b[1][1] + a[2] * b[1][2] + a[3] * b[1][3];`
			`c[2] = a[0] * b[2][0] + a[1] * b[2][1] + a[2] * b[2][2] + a[3] * b[2][3];`
			`c[3] = a[0] * b[3][0] + a[1] * b[3][1] + a[2] * b[3][2] + a[3] * b[3][3];`
			`}`

			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`vec4f_t`
			`mvmulf (const mat4f_t m, vec4f_t v)`
			`{`
			`return m[0] * v[0] + m[1] * v[1] + m[2] * v[2] + m[3] * v[3];`
			`}`

			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`vec4f_t`
			`m3vmulf (const mat4f_t m, vec4f_t v)`
			`{`
			`vec4f_t w;`
			`w = m[0] * v[0] + m[1] * v[1] + m[2] * v[2];`
Make m3vmulf return v[3] unchanged 2021-03-10 10:40:19 +00:00			`w[3] = v[3];`
[util] Add simd 4x4 matrix functions Currently just add, subtract, multiply (m m and m v). 2021-03-03 07:17:15 +00:00			`return w;`
			`}`

			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`void`
			`mat4fidentity (mat4f_t m)`
			`{`
			`m[0] = (vec4f_t) { 1, 0, 0, 0 };`
			`m[1] = (vec4f_t) { 0, 1, 0, 0 };`
			`m[2] = (vec4f_t) { 0, 0, 1, 0 };`
			`m[3] = (vec4f_t) { 0, 0, 0, 1 };`
			`}`

[util] Add a simd mat4 transpose function 2021-03-09 14:50:32 +00:00			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`void`
			`mat4ftranspose (mat4f_t t, const mat4f_t m)`
			`{`
			`vec4f_t a = m[0];`
			`vec4f_t b = m[1];`
			`vec4f_t c = m[2];`
			`vec4f_t d = m[3];`
			`t[0] = (vec4f_t) { a[0], b[0], c[0], d[0] };`
			`t[1] = (vec4f_t) { a[1], b[1], c[1], d[1] };`
			`t[2] = (vec4f_t) { a[2], b[2], c[2], d[2] };`
			`t[3] = (vec4f_t) { a[3], b[3], c[3], d[3] };`
			`}`

[util] Create simd quaternion to matrix function This seems to be pretty close to as fast as it gets (might be able to do better with some shuffles of the negation constants instead of loading separate constants). 2021-03-04 08:39:05 +00:00			`#ifndef IMPLEMENT_MAT4F_Funcs`
			`GNU89INLINE inline`
			`#else`
			`VISIBLE`
			`#endif`
			`void`
			`mat4fquat (mat4f_t m, vec4f_t q)`
			`{`
			`vec4f_t xq = q[0] * q;`
			`vec4f_t yq = q[1] * q;`
			`vec4f_t zq = q[2] * q;`
			`vec4f_t wq = q[3] * q;`

			`static const vec4i_t shuff103 = { 1, 0, 3, 2 };`
			`static const vec4i_t shuff230 = { 2, 3, 0, 1 };`
			`static const vec4i_t shuff321 = { 3, 2, 1, 0 };`
			`#define p (0)`
			`#define m (1u << 31)`
			`static const vec4i_t mpm = { m, p, m, 0 };`
			`static const vec4i_t pmm = { p, m, m, 0 };`
			`static const vec4i_t mmp = { m, m, p, 0 };`
			`static const vec4i_t mask = { ~0u, ~0u, ~0u, 0 };`
			`#undef p`
			`#undef m`
			`{`
			`vec4f_t a = xq;`
			`vec4f_t b = _mm_xor_ps (__builtin_shuffle (yq, shuff103), (__m128) mpm);`
			`vec4f_t c = _mm_xor_ps (__builtin_shuffle (zq, shuff230), (__m128) pmm);`
			`vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff321), (__m128) mmp);`

			`m[0] = _mm_and_ps (a + b - c - d, (__m128) mask);`
			`}`
			`{`
			`vec4f_t a = _mm_xor_ps (__builtin_shuffle (xq, shuff103), (__m128) mpm);`
			`vec4f_t b = yq;`
			`vec4f_t c = _mm_xor_ps (__builtin_shuffle (zq, shuff321), (__m128) mmp);`
			`vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff230), (__m128) pmm);`
			`m[1] = _mm_and_ps (b + c - a - d, (__m128) mask);`
			`}`
			`{`
			`vec4f_t a = _mm_xor_ps (__builtin_shuffle (xq, shuff230), (__m128) pmm);`
			`vec4f_t b = _mm_xor_ps (__builtin_shuffle (yq, shuff321), (__m128) mmp);`
			`vec4f_t c = zq;`
			`vec4f_t d = _mm_xor_ps (__builtin_shuffle (wq, shuff103), (__m128) mpm);`
			`m[2] = _mm_and_ps (a - b + c - d, (__m128) mask);`
			`}`
			`m[3] = (vec4f_t) { 0, 0, 0, 1 };`
			`}`

[util] Add simd 4x4 matrix functions Currently just add, subtract, multiply (m m and m v). 2021-03-03 07:17:15 +00:00			`#endif//__QF_simd_mat4f_h`