From b0538ac3c85b951e8fcf05c90a49f4921bb6828c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 21 Dec 2019 03:31:44 +0100 Subject: [PATCH] Move matrix functions to its own file --- Source/Native/BuilderNative.vcxproj | 1 + Source/Native/BuilderNative.vcxproj.filters | 1 + Source/Native/Matrix.cpp | 166 ++++++++++++++++++++ Source/Native/RenderDevice.cpp | 155 ------------------ 4 files changed, 168 insertions(+), 155 deletions(-) create mode 100644 Source/Native/Matrix.cpp diff --git a/Source/Native/BuilderNative.vcxproj b/Source/Native/BuilderNative.vcxproj index 3849251f..3202b4be 100644 --- a/Source/Native/BuilderNative.vcxproj +++ b/Source/Native/BuilderNative.vcxproj @@ -199,6 +199,7 @@ NotUsing + Create diff --git a/Source/Native/BuilderNative.vcxproj.filters b/Source/Native/BuilderNative.vcxproj.filters index f747f87c..715cf0e9 100644 --- a/Source/Native/BuilderNative.vcxproj.filters +++ b/Source/Native/BuilderNative.vcxproj.filters @@ -2,6 +2,7 @@ + diff --git a/Source/Native/Matrix.cpp b/Source/Native/Matrix.cpp new file mode 100644 index 00000000..b3df73bf --- /dev/null +++ b/Source/Native/Matrix.cpp @@ -0,0 +1,166 @@ + +#include "Precomp.h" +#include +#include +#include +#include +#include "fasttrig.h" + +#ifndef NO_SSE +#include +#endif + +extern "C" +{ + +#ifdef NO_SSE + +void Matrix_Null(float result[4][4]) +{ + memset(result, 0, sizeof(float) * 16); +} + +#else + +void Matrix_Null(float result[4][4]) +{ + __m128 zero = _mm_setzero_ps(); + _mm_storeu_ps(result[0], zero); + _mm_storeu_ps(result[1], zero); + _mm_storeu_ps(result[2], zero); + _mm_storeu_ps(result[3], zero); +} + +#endif + +void Matrix_Identity(float result[4][4]) +{ + Matrix_Null(result); + result[0][0] = 1.0f; + result[1][1] = 1.0f; + result[2][2] = 1.0f; + result[3][3] = 1.0f; +} + +void Matrix_Translation(float x, float y, float z, float result[4][4]) +{ + Matrix_Null(result); + result[0][0] = 1.0f; + result[1][1] = 1.0f; + result[2][2] = 1.0f; + result[3][0] = x; + result[3][1] = y; + result[3][2] = z; + result[3][3] = 1.0f; +} + +void Matrix_RotationX(float angle, float result[4][4]) +{ + float cos = fastcos(angle); + float sin = fastsin(angle); + + Matrix_Null(result); + result[0][0] = 1.0f; + result[1][1] = cos; + result[1][2] = sin; + result[2][1] = -sin; + result[2][2] = cos; + result[3][3] = 1.0f; +} + +void Matrix_RotationY(float angle, float result[4][4]) +{ + float cos = fastcos(angle); + float sin = fastsin(angle); + + Matrix_Null(result); + result[0][0] = cos; + result[0][2] = -sin; + result[1][1] = 1.0f; + result[2][0] = sin; + result[2][2] = cos; + result[3][3] = 1.0f; +} + +void Matrix_RotationZ(float angle, float result[4][4]) +{ + float cos = fastcos(angle); + float sin = fastsin(angle); + + Matrix_Null(result); + result[0][0] = cos; + result[0][1] = sin; + result[1][0] = -sin; + result[1][1] = cos; + result[2][2] = 1.0f; + result[3][3] = 1.0f; +} + +void Matrix_Scaling(float x, float y, float z, float result[4][4]) +{ + Matrix_Null(result); + result[0][0] = x; + result[1][1] = y; + result[2][2] = z; + result[3][3] = 1.0f; +} + +#ifdef NO_SSE + +void Matrix_Multiply(const float* left, const float* right, float* result) +{ + result[0 * 4 + 0] = left[0 * 4 + 0] * right[0 * 4 + 0] + left[0 * 4 + 1] * right[1 * 4 + 0] + left[0 * 4 + 2] * right[2 * 4 + 0] + left[0 * 4 + 3] * right[3 * 4 + 0]; + result[0 * 4 + 1] = left[0 * 4 + 0] * right[0 * 4 + 1] + left[0 * 4 + 1] * right[1 * 4 + 1] + left[0 * 4 + 2] * right[2 * 4 + 1] + left[0 * 4 + 3] * right[3 * 4 + 1]; + result[0 * 4 + 2] = left[0 * 4 + 0] * right[0 * 4 + 2] + left[0 * 4 + 1] * right[1 * 4 + 2] + left[0 * 4 + 2] * right[2 * 4 + 2] + left[0 * 4 + 3] * right[3 * 4 + 2]; + result[0 * 4 + 3] = left[0 * 4 + 0] * right[0 * 4 + 3] + left[0 * 4 + 1] * right[1 * 4 + 3] + left[0 * 4 + 2] * right[2 * 4 + 3] + left[0 * 4 + 3] * right[3 * 4 + 3]; + result[1 * 4 + 0] = left[1 * 4 + 0] * right[0 * 4 + 0] + left[1 * 4 + 1] * right[1 * 4 + 0] + left[1 * 4 + 2] * right[2 * 4 + 0] + left[1 * 4 + 3] * right[3 * 4 + 0]; + result[1 * 4 + 1] = left[1 * 4 + 0] * right[0 * 4 + 1] + left[1 * 4 + 1] * right[1 * 4 + 1] + left[1 * 4 + 2] * right[2 * 4 + 1] + left[1 * 4 + 3] * right[3 * 4 + 1]; + result[1 * 4 + 2] = left[1 * 4 + 0] * right[0 * 4 + 2] + left[1 * 4 + 1] * right[1 * 4 + 2] + left[1 * 4 + 2] * right[2 * 4 + 2] + left[1 * 4 + 3] * right[3 * 4 + 2]; + result[1 * 4 + 3] = left[1 * 4 + 0] * right[0 * 4 + 3] + left[1 * 4 + 1] * right[1 * 4 + 3] + left[1 * 4 + 2] * right[2 * 4 + 3] + left[1 * 4 + 3] * right[3 * 4 + 3]; + result[2 * 4 + 0] = left[2 * 4 + 0] * right[0 * 4 + 0] + left[2 * 4 + 1] * right[1 * 4 + 0] + left[2 * 4 + 2] * right[2 * 4 + 0] + left[2 * 4 + 3] * right[3 * 4 + 0]; + result[2 * 4 + 1] = left[2 * 4 + 0] * right[0 * 4 + 1] + left[2 * 4 + 1] * right[1 * 4 + 1] + left[2 * 4 + 2] * right[2 * 4 + 1] + left[2 * 4 + 3] * right[3 * 4 + 1]; + result[2 * 4 + 2] = left[2 * 4 + 0] * right[0 * 4 + 2] + left[2 * 4 + 1] * right[1 * 4 + 2] + left[2 * 4 + 2] * right[2 * 4 + 2] + left[2 * 4 + 3] * right[3 * 4 + 2]; + result[2 * 4 + 3] = left[2 * 4 + 0] * right[0 * 4 + 3] + left[2 * 4 + 1] * right[1 * 4 + 3] + left[2 * 4 + 2] * right[2 * 4 + 3] + left[2 * 4 + 3] * right[3 * 4 + 3]; + result[3 * 4 + 0] = left[3 * 4 + 0] * right[0 * 4 + 0] + left[3 * 4 + 1] * right[1 * 4 + 0] + left[3 * 4 + 2] * right[2 * 4 + 0] + left[3 * 4 + 3] * right[3 * 4 + 0]; + result[3 * 4 + 1] = left[3 * 4 + 0] * right[0 * 4 + 1] + left[3 * 4 + 1] * right[1 * 4 + 1] + left[3 * 4 + 2] * right[2 * 4 + 1] + left[3 * 4 + 3] * right[3 * 4 + 1]; + result[3 * 4 + 2] = left[3 * 4 + 0] * right[0 * 4 + 2] + left[3 * 4 + 1] * right[1 * 4 + 2] + left[3 * 4 + 2] * right[2 * 4 + 2] + left[3 * 4 + 3] * right[3 * 4 + 2]; + result[3 * 4 + 3] = left[3 * 4 + 0] * right[0 * 4 + 3] + left[3 * 4 + 1] * right[1 * 4 + 3] + left[3 * 4 + 2] * right[2 * 4 + 3] + left[3 * 4 + 3] * right[3 * 4 + 3]; +} + +#else + +void Matrix_Multiply(const float a[4][4], const float b[4][4], float result[4][4]) +{ + __m128 otherRow0 = _mm_loadu_ps(b[0]); + __m128 otherRow1 = _mm_loadu_ps(b[1]); + __m128 otherRow2 = _mm_loadu_ps(b[2]); + __m128 otherRow3 = _mm_loadu_ps(b[3]); + + __m128 newRow0 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[0][0])); + newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow1, _mm_set1_ps(a[0][1]))); + newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow2, _mm_set1_ps(a[0][2]))); + newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow3, _mm_set1_ps(a[0][3]))); + + __m128 newRow1 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[1][0])); + newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow1, _mm_set1_ps(a[1][1]))); + newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow2, _mm_set1_ps(a[1][2]))); + newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow3, _mm_set1_ps(a[1][3]))); + + __m128 newRow2 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[2][0])); + newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow1, _mm_set1_ps(a[2][1]))); + newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow2, _mm_set1_ps(a[2][2]))); + newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow3, _mm_set1_ps(a[2][3]))); + + __m128 newRow3 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[3][0])); + newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow1, _mm_set1_ps(a[3][1]))); + newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow2, _mm_set1_ps(a[3][2]))); + newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow3, _mm_set1_ps(a[3][3]))); + + _mm_storeu_ps(result[0], newRow0); + _mm_storeu_ps(result[1], newRow1); + _mm_storeu_ps(result[2], newRow2); + _mm_storeu_ps(result[3], newRow3); +} +#endif + +} diff --git a/Source/Native/RenderDevice.cpp b/Source/Native/RenderDevice.cpp index 370aaf1b..910a31af 100644 --- a/Source/Native/RenderDevice.cpp +++ b/Source/Native/RenderDevice.cpp @@ -9,11 +9,6 @@ #include #include #include -#include "fasttrig.h" - -#ifndef NO_SSE -#include -#endif static bool GLLogStarted = false; static void APIENTRY GLLogCallback(GLenum source, GLenum type, GLuint id, @@ -1019,154 +1014,4 @@ bool RenderDevice_UnmapPBO(RenderDevice* device, Texture* texture) return device->UnmapPBO(texture); } -#ifdef NO_SSE - -void Matrix_Null(float result[4][4]) -{ - memset(result, 0, sizeof(float) * 16); -} - -#else - -void Matrix_Null(float result[4][4]) -{ - __m128 zero = _mm_setzero_ps(); - _mm_storeu_ps(result[0], zero); - _mm_storeu_ps(result[1], zero); - _mm_storeu_ps(result[2], zero); - _mm_storeu_ps(result[3], zero); -} - -#endif - -void Matrix_Identity(float result[4][4]) -{ - Matrix_Null(result); - result[0][0] = 1.0f; - result[1][1] = 1.0f; - result[2][2] = 1.0f; - result[3][3] = 1.0f; -} - -void Matrix_Translation(float x, float y, float z, float result[4][4]) -{ - Matrix_Null(result); - result[0][0] = 1.0f; - result[1][1] = 1.0f; - result[2][2] = 1.0f; - result[3][0] = x; - result[3][1] = y; - result[3][2] = z; - result[3][3] = 1.0f; -} - -void Matrix_RotationX(float angle, float result[4][4]) -{ - float cos = fastcos(angle); - float sin = fastsin(angle); - - Matrix_Null(result); - result[0][0] = 1.0f; - result[1][1] = cos; - result[1][2] = sin; - result[2][1] = -sin; - result[2][2] = cos; - result[3][3] = 1.0f; -} - -void Matrix_RotationY(float angle, float result[4][4]) -{ - float cos = fastcos(angle); - float sin = fastsin(angle); - - Matrix_Null(result); - result[0][0] = cos; - result[0][2] = -sin; - result[1][1] = 1.0f; - result[2][0] = sin; - result[2][2] = cos; - result[3][3] = 1.0f; -} - -void Matrix_RotationZ(float angle, float result[4][4]) -{ - float cos = fastcos(angle); - float sin = fastsin(angle); - - Matrix_Null(result); - result[0][0] = cos; - result[0][1] = sin; - result[1][0] = -sin; - result[1][1] = cos; - result[2][2] = 1.0f; - result[3][3] = 1.0f; -} - -void Matrix_Scaling(float x, float y, float z, float result[4][4]) -{ - Matrix_Null(result); - result[0][0] = x; - result[1][1] = y; - result[2][2] = z; - result[3][3] = 1.0f; -} - -#ifdef NO_SSE - -void Matrix_Multiply(const float* left, const float* right, float* result) -{ - result[0 * 4 + 0] = left[0 * 4 + 0] * right[0 * 4 + 0] + left[0 * 4 + 1] * right[1 * 4 + 0] + left[0 * 4 + 2] * right[2 * 4 + 0] + left[0 * 4 + 3] * right[3 * 4 + 0]; - result[0 * 4 + 1] = left[0 * 4 + 0] * right[0 * 4 + 1] + left[0 * 4 + 1] * right[1 * 4 + 1] + left[0 * 4 + 2] * right[2 * 4 + 1] + left[0 * 4 + 3] * right[3 * 4 + 1]; - result[0 * 4 + 2] = left[0 * 4 + 0] * right[0 * 4 + 2] + left[0 * 4 + 1] * right[1 * 4 + 2] + left[0 * 4 + 2] * right[2 * 4 + 2] + left[0 * 4 + 3] * right[3 * 4 + 2]; - result[0 * 4 + 3] = left[0 * 4 + 0] * right[0 * 4 + 3] + left[0 * 4 + 1] * right[1 * 4 + 3] + left[0 * 4 + 2] * right[2 * 4 + 3] + left[0 * 4 + 3] * right[3 * 4 + 3]; - result[1 * 4 + 0] = left[1 * 4 + 0] * right[0 * 4 + 0] + left[1 * 4 + 1] * right[1 * 4 + 0] + left[1 * 4 + 2] * right[2 * 4 + 0] + left[1 * 4 + 3] * right[3 * 4 + 0]; - result[1 * 4 + 1] = left[1 * 4 + 0] * right[0 * 4 + 1] + left[1 * 4 + 1] * right[1 * 4 + 1] + left[1 * 4 + 2] * right[2 * 4 + 1] + left[1 * 4 + 3] * right[3 * 4 + 1]; - result[1 * 4 + 2] = left[1 * 4 + 0] * right[0 * 4 + 2] + left[1 * 4 + 1] * right[1 * 4 + 2] + left[1 * 4 + 2] * right[2 * 4 + 2] + left[1 * 4 + 3] * right[3 * 4 + 2]; - result[1 * 4 + 3] = left[1 * 4 + 0] * right[0 * 4 + 3] + left[1 * 4 + 1] * right[1 * 4 + 3] + left[1 * 4 + 2] * right[2 * 4 + 3] + left[1 * 4 + 3] * right[3 * 4 + 3]; - result[2 * 4 + 0] = left[2 * 4 + 0] * right[0 * 4 + 0] + left[2 * 4 + 1] * right[1 * 4 + 0] + left[2 * 4 + 2] * right[2 * 4 + 0] + left[2 * 4 + 3] * right[3 * 4 + 0]; - result[2 * 4 + 1] = left[2 * 4 + 0] * right[0 * 4 + 1] + left[2 * 4 + 1] * right[1 * 4 + 1] + left[2 * 4 + 2] * right[2 * 4 + 1] + left[2 * 4 + 3] * right[3 * 4 + 1]; - result[2 * 4 + 2] = left[2 * 4 + 0] * right[0 * 4 + 2] + left[2 * 4 + 1] * right[1 * 4 + 2] + left[2 * 4 + 2] * right[2 * 4 + 2] + left[2 * 4 + 3] * right[3 * 4 + 2]; - result[2 * 4 + 3] = left[2 * 4 + 0] * right[0 * 4 + 3] + left[2 * 4 + 1] * right[1 * 4 + 3] + left[2 * 4 + 2] * right[2 * 4 + 3] + left[2 * 4 + 3] * right[3 * 4 + 3]; - result[3 * 4 + 0] = left[3 * 4 + 0] * right[0 * 4 + 0] + left[3 * 4 + 1] * right[1 * 4 + 0] + left[3 * 4 + 2] * right[2 * 4 + 0] + left[3 * 4 + 3] * right[3 * 4 + 0]; - result[3 * 4 + 1] = left[3 * 4 + 0] * right[0 * 4 + 1] + left[3 * 4 + 1] * right[1 * 4 + 1] + left[3 * 4 + 2] * right[2 * 4 + 1] + left[3 * 4 + 3] * right[3 * 4 + 1]; - result[3 * 4 + 2] = left[3 * 4 + 0] * right[0 * 4 + 2] + left[3 * 4 + 1] * right[1 * 4 + 2] + left[3 * 4 + 2] * right[2 * 4 + 2] + left[3 * 4 + 3] * right[3 * 4 + 2]; - result[3 * 4 + 3] = left[3 * 4 + 0] * right[0 * 4 + 3] + left[3 * 4 + 1] * right[1 * 4 + 3] + left[3 * 4 + 2] * right[2 * 4 + 3] + left[3 * 4 + 3] * right[3 * 4 + 3]; -} - -#else - -void Matrix_Multiply(const float a[4][4], const float b[4][4], float result[4][4]) -{ - __m128 otherRow0 = _mm_loadu_ps(b[0]); - __m128 otherRow1 = _mm_loadu_ps(b[1]); - __m128 otherRow2 = _mm_loadu_ps(b[2]); - __m128 otherRow3 = _mm_loadu_ps(b[3]); - - __m128 newRow0 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[0][0])); - newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow1, _mm_set1_ps(a[0][1]))); - newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow2, _mm_set1_ps(a[0][2]))); - newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow3, _mm_set1_ps(a[0][3]))); - - __m128 newRow1 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[1][0])); - newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow1, _mm_set1_ps(a[1][1]))); - newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow2, _mm_set1_ps(a[1][2]))); - newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow3, _mm_set1_ps(a[1][3]))); - - __m128 newRow2 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[2][0])); - newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow1, _mm_set1_ps(a[2][1]))); - newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow2, _mm_set1_ps(a[2][2]))); - newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow3, _mm_set1_ps(a[2][3]))); - - __m128 newRow3 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[3][0])); - newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow1, _mm_set1_ps(a[3][1]))); - newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow2, _mm_set1_ps(a[3][2]))); - newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow3, _mm_set1_ps(a[3][3]))); - - _mm_storeu_ps(result[0], newRow0); - _mm_storeu_ps(result[1], newRow1); - _mm_storeu_ps(result[2], newRow2); - _mm_storeu_ps(result[3], newRow3); -} -#endif - }