/* ** BuilderNative Renderer ** Copyright (c) 2019 Magnus Norddahl ** ** This software is provided 'as-is', without any express or implied ** warranty. In no event will the authors be held liable for any damages ** arising from the use of this software. ** ** Permission is granted to anyone to use this software for any purpose, ** including commercial applications, and to alter it and redistribute it ** freely, subject to the following restrictions: ** ** 1. The origin of this software must not be misrepresented; you must not ** claim that you wrote the original software. If you use this software ** in a product, an acknowledgment in the product documentation would be ** appreciated but is not required. ** 2. Altered source versions must be plainly marked as such, and must not be ** misrepresented as being the original software. ** 3. This notice may not be removed or altered from any source distribution. */ #include "Precomp.h" #include #include #include #include #include "fasttrig.h" #if defined (__GNUC__) && !defined (__i386__) && !defined (__x86_64__) #define NO_SSE #endif #if defined (_MSC_VER) && defined (_M_IX86_FP) && (_M_IX86_FP <= 1) #define NO_SSE #endif #if defined (_MSC_VER) && defined (_M_ARM) #define NO_SSE #endif #if defined (_MSC_VER) && defined (_M_ARM64) #define NO_SSE #endif #ifndef NO_SSE #include #endif extern "C" { #ifdef NO_SSE void Matrix_Null(float result[4][4]) { memset(result, 0, sizeof(float) * 16); } #else void Matrix_Null(float result[4][4]) { __m128 zero = _mm_setzero_ps(); _mm_storeu_ps(result[0], zero); _mm_storeu_ps(result[1], zero); _mm_storeu_ps(result[2], zero); _mm_storeu_ps(result[3], zero); } #endif void Matrix_Identity(float result[4][4]) { Matrix_Null(result); result[0][0] = 1.0f; result[1][1] = 1.0f; result[2][2] = 1.0f; result[3][3] = 1.0f; } void Matrix_Translation(float x, float y, float z, float result[4][4]) { Matrix_Null(result); result[0][0] = 1.0f; result[1][1] = 1.0f; result[2][2] = 1.0f; result[3][0] = x; result[3][1] = y; result[3][2] = z; result[3][3] = 1.0f; } void Matrix_RotationX(float angle, float result[4][4]) { float cos = fastcos(angle); float sin = fastsin(angle); Matrix_Null(result); result[0][0] = 1.0f; result[1][1] = cos; result[1][2] = sin; result[2][1] = -sin; result[2][2] = cos; result[3][3] = 1.0f; } void Matrix_RotationY(float angle, float result[4][4]) { float cos = fastcos(angle); float sin = fastsin(angle); Matrix_Null(result); result[0][0] = cos; result[0][2] = -sin; result[1][1] = 1.0f; result[2][0] = sin; result[2][2] = cos; result[3][3] = 1.0f; } void Matrix_RotationZ(float angle, float result[4][4]) { float cos = fastcos(angle); float sin = fastsin(angle); Matrix_Null(result); result[0][0] = cos; result[0][1] = sin; result[1][0] = -sin; result[1][1] = cos; result[2][2] = 1.0f; result[3][3] = 1.0f; } void Matrix_Scaling(float x, float y, float z, float result[4][4]) { Matrix_Null(result); result[0][0] = x; result[1][1] = y; result[2][2] = z; result[3][3] = 1.0f; } #ifdef NO_SSE void Matrix_Multiply(const float* left, const float* right, float* result) { result[0 * 4 + 0] = left[0 * 4 + 0] * right[0 * 4 + 0] + left[0 * 4 + 1] * right[1 * 4 + 0] + left[0 * 4 + 2] * right[2 * 4 + 0] + left[0 * 4 + 3] * right[3 * 4 + 0]; result[0 * 4 + 1] = left[0 * 4 + 0] * right[0 * 4 + 1] + left[0 * 4 + 1] * right[1 * 4 + 1] + left[0 * 4 + 2] * right[2 * 4 + 1] + left[0 * 4 + 3] * right[3 * 4 + 1]; result[0 * 4 + 2] = left[0 * 4 + 0] * right[0 * 4 + 2] + left[0 * 4 + 1] * right[1 * 4 + 2] + left[0 * 4 + 2] * right[2 * 4 + 2] + left[0 * 4 + 3] * right[3 * 4 + 2]; result[0 * 4 + 3] = left[0 * 4 + 0] * right[0 * 4 + 3] + left[0 * 4 + 1] * right[1 * 4 + 3] + left[0 * 4 + 2] * right[2 * 4 + 3] + left[0 * 4 + 3] * right[3 * 4 + 3]; result[1 * 4 + 0] = left[1 * 4 + 0] * right[0 * 4 + 0] + left[1 * 4 + 1] * right[1 * 4 + 0] + left[1 * 4 + 2] * right[2 * 4 + 0] + left[1 * 4 + 3] * right[3 * 4 + 0]; result[1 * 4 + 1] = left[1 * 4 + 0] * right[0 * 4 + 1] + left[1 * 4 + 1] * right[1 * 4 + 1] + left[1 * 4 + 2] * right[2 * 4 + 1] + left[1 * 4 + 3] * right[3 * 4 + 1]; result[1 * 4 + 2] = left[1 * 4 + 0] * right[0 * 4 + 2] + left[1 * 4 + 1] * right[1 * 4 + 2] + left[1 * 4 + 2] * right[2 * 4 + 2] + left[1 * 4 + 3] * right[3 * 4 + 2]; result[1 * 4 + 3] = left[1 * 4 + 0] * right[0 * 4 + 3] + left[1 * 4 + 1] * right[1 * 4 + 3] + left[1 * 4 + 2] * right[2 * 4 + 3] + left[1 * 4 + 3] * right[3 * 4 + 3]; result[2 * 4 + 0] = left[2 * 4 + 0] * right[0 * 4 + 0] + left[2 * 4 + 1] * right[1 * 4 + 0] + left[2 * 4 + 2] * right[2 * 4 + 0] + left[2 * 4 + 3] * right[3 * 4 + 0]; result[2 * 4 + 1] = left[2 * 4 + 0] * right[0 * 4 + 1] + left[2 * 4 + 1] * right[1 * 4 + 1] + left[2 * 4 + 2] * right[2 * 4 + 1] + left[2 * 4 + 3] * right[3 * 4 + 1]; result[2 * 4 + 2] = left[2 * 4 + 0] * right[0 * 4 + 2] + left[2 * 4 + 1] * right[1 * 4 + 2] + left[2 * 4 + 2] * right[2 * 4 + 2] + left[2 * 4 + 3] * right[3 * 4 + 2]; result[2 * 4 + 3] = left[2 * 4 + 0] * right[0 * 4 + 3] + left[2 * 4 + 1] * right[1 * 4 + 3] + left[2 * 4 + 2] * right[2 * 4 + 3] + left[2 * 4 + 3] * right[3 * 4 + 3]; result[3 * 4 + 0] = left[3 * 4 + 0] * right[0 * 4 + 0] + left[3 * 4 + 1] * right[1 * 4 + 0] + left[3 * 4 + 2] * right[2 * 4 + 0] + left[3 * 4 + 3] * right[3 * 4 + 0]; result[3 * 4 + 1] = left[3 * 4 + 0] * right[0 * 4 + 1] + left[3 * 4 + 1] * right[1 * 4 + 1] + left[3 * 4 + 2] * right[2 * 4 + 1] + left[3 * 4 + 3] * right[3 * 4 + 1]; result[3 * 4 + 2] = left[3 * 4 + 0] * right[0 * 4 + 2] + left[3 * 4 + 1] * right[1 * 4 + 2] + left[3 * 4 + 2] * right[2 * 4 + 2] + left[3 * 4 + 3] * right[3 * 4 + 2]; result[3 * 4 + 3] = left[3 * 4 + 0] * right[0 * 4 + 3] + left[3 * 4 + 1] * right[1 * 4 + 3] + left[3 * 4 + 2] * right[2 * 4 + 3] + left[3 * 4 + 3] * right[3 * 4 + 3]; } #else void Matrix_Multiply(const float a[4][4], const float b[4][4], float result[4][4]) { __m128 otherRow0 = _mm_loadu_ps(b[0]); __m128 otherRow1 = _mm_loadu_ps(b[1]); __m128 otherRow2 = _mm_loadu_ps(b[2]); __m128 otherRow3 = _mm_loadu_ps(b[3]); __m128 newRow0 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[0][0])); newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow1, _mm_set1_ps(a[0][1]))); newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow2, _mm_set1_ps(a[0][2]))); newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow3, _mm_set1_ps(a[0][3]))); __m128 newRow1 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[1][0])); newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow1, _mm_set1_ps(a[1][1]))); newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow2, _mm_set1_ps(a[1][2]))); newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow3, _mm_set1_ps(a[1][3]))); __m128 newRow2 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[2][0])); newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow1, _mm_set1_ps(a[2][1]))); newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow2, _mm_set1_ps(a[2][2]))); newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow3, _mm_set1_ps(a[2][3]))); __m128 newRow3 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[3][0])); newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow1, _mm_set1_ps(a[3][1]))); newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow2, _mm_set1_ps(a[3][2]))); newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow3, _mm_set1_ps(a[3][3]))); _mm_storeu_ps(result[0], newRow0); _mm_storeu_ps(result[1], newRow1); _mm_storeu_ps(result[2], newRow2); _mm_storeu_ps(result[3], newRow3); } #endif }