Move matrix functions to its own file

This commit is contained in:
Magnus Norddahl 2019-12-21 03:31:44 +01:00
parent 28406cf1a7
commit b0538ac3c8
4 changed files with 168 additions and 155 deletions

View file

@ -199,6 +199,7 @@
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader> <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
</ClCompile> </ClCompile>
<ClCompile Include="IndexBuffer.cpp" /> <ClCompile Include="IndexBuffer.cpp" />
<ClCompile Include="Matrix.cpp.cpp" />
<ClCompile Include="OpenGLContext.cpp" /> <ClCompile Include="OpenGLContext.cpp" />
<ClCompile Include="Precomp.cpp"> <ClCompile Include="Precomp.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader> <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>

View file

@ -2,6 +2,7 @@
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup> <ItemGroup>
<ClCompile Include="IndexBuffer.cpp" /> <ClCompile Include="IndexBuffer.cpp" />
<ClCompile Include="Matrix.cpp" />
<ClCompile Include="RenderDevice.cpp" /> <ClCompile Include="RenderDevice.cpp" />
<ClCompile Include="Texture.cpp" /> <ClCompile Include="Texture.cpp" />
<ClCompile Include="VertexBuffer.cpp" /> <ClCompile Include="VertexBuffer.cpp" />

166
Source/Native/Matrix.cpp Normal file
View file

@ -0,0 +1,166 @@

#include "Precomp.h"
#include <stdexcept>
#include <cstdarg>
#include <algorithm>
#include <cmath>
#include "fasttrig.h"
#ifndef NO_SSE
#include <xmmintrin.h>
#endif
extern "C"
{
#ifdef NO_SSE
void Matrix_Null(float result[4][4])
{
memset(result, 0, sizeof(float) * 16);
}
#else
void Matrix_Null(float result[4][4])
{
__m128 zero = _mm_setzero_ps();
_mm_storeu_ps(result[0], zero);
_mm_storeu_ps(result[1], zero);
_mm_storeu_ps(result[2], zero);
_mm_storeu_ps(result[3], zero);
}
#endif
void Matrix_Identity(float result[4][4])
{
Matrix_Null(result);
result[0][0] = 1.0f;
result[1][1] = 1.0f;
result[2][2] = 1.0f;
result[3][3] = 1.0f;
}
void Matrix_Translation(float x, float y, float z, float result[4][4])
{
Matrix_Null(result);
result[0][0] = 1.0f;
result[1][1] = 1.0f;
result[2][2] = 1.0f;
result[3][0] = x;
result[3][1] = y;
result[3][2] = z;
result[3][3] = 1.0f;
}
void Matrix_RotationX(float angle, float result[4][4])
{
float cos = fastcos(angle);
float sin = fastsin(angle);
Matrix_Null(result);
result[0][0] = 1.0f;
result[1][1] = cos;
result[1][2] = sin;
result[2][1] = -sin;
result[2][2] = cos;
result[3][3] = 1.0f;
}
void Matrix_RotationY(float angle, float result[4][4])
{
float cos = fastcos(angle);
float sin = fastsin(angle);
Matrix_Null(result);
result[0][0] = cos;
result[0][2] = -sin;
result[1][1] = 1.0f;
result[2][0] = sin;
result[2][2] = cos;
result[3][3] = 1.0f;
}
void Matrix_RotationZ(float angle, float result[4][4])
{
float cos = fastcos(angle);
float sin = fastsin(angle);
Matrix_Null(result);
result[0][0] = cos;
result[0][1] = sin;
result[1][0] = -sin;
result[1][1] = cos;
result[2][2] = 1.0f;
result[3][3] = 1.0f;
}
void Matrix_Scaling(float x, float y, float z, float result[4][4])
{
Matrix_Null(result);
result[0][0] = x;
result[1][1] = y;
result[2][2] = z;
result[3][3] = 1.0f;
}
#ifdef NO_SSE
void Matrix_Multiply(const float* left, const float* right, float* result)
{
result[0 * 4 + 0] = left[0 * 4 + 0] * right[0 * 4 + 0] + left[0 * 4 + 1] * right[1 * 4 + 0] + left[0 * 4 + 2] * right[2 * 4 + 0] + left[0 * 4 + 3] * right[3 * 4 + 0];
result[0 * 4 + 1] = left[0 * 4 + 0] * right[0 * 4 + 1] + left[0 * 4 + 1] * right[1 * 4 + 1] + left[0 * 4 + 2] * right[2 * 4 + 1] + left[0 * 4 + 3] * right[3 * 4 + 1];
result[0 * 4 + 2] = left[0 * 4 + 0] * right[0 * 4 + 2] + left[0 * 4 + 1] * right[1 * 4 + 2] + left[0 * 4 + 2] * right[2 * 4 + 2] + left[0 * 4 + 3] * right[3 * 4 + 2];
result[0 * 4 + 3] = left[0 * 4 + 0] * right[0 * 4 + 3] + left[0 * 4 + 1] * right[1 * 4 + 3] + left[0 * 4 + 2] * right[2 * 4 + 3] + left[0 * 4 + 3] * right[3 * 4 + 3];
result[1 * 4 + 0] = left[1 * 4 + 0] * right[0 * 4 + 0] + left[1 * 4 + 1] * right[1 * 4 + 0] + left[1 * 4 + 2] * right[2 * 4 + 0] + left[1 * 4 + 3] * right[3 * 4 + 0];
result[1 * 4 + 1] = left[1 * 4 + 0] * right[0 * 4 + 1] + left[1 * 4 + 1] * right[1 * 4 + 1] + left[1 * 4 + 2] * right[2 * 4 + 1] + left[1 * 4 + 3] * right[3 * 4 + 1];
result[1 * 4 + 2] = left[1 * 4 + 0] * right[0 * 4 + 2] + left[1 * 4 + 1] * right[1 * 4 + 2] + left[1 * 4 + 2] * right[2 * 4 + 2] + left[1 * 4 + 3] * right[3 * 4 + 2];
result[1 * 4 + 3] = left[1 * 4 + 0] * right[0 * 4 + 3] + left[1 * 4 + 1] * right[1 * 4 + 3] + left[1 * 4 + 2] * right[2 * 4 + 3] + left[1 * 4 + 3] * right[3 * 4 + 3];
result[2 * 4 + 0] = left[2 * 4 + 0] * right[0 * 4 + 0] + left[2 * 4 + 1] * right[1 * 4 + 0] + left[2 * 4 + 2] * right[2 * 4 + 0] + left[2 * 4 + 3] * right[3 * 4 + 0];
result[2 * 4 + 1] = left[2 * 4 + 0] * right[0 * 4 + 1] + left[2 * 4 + 1] * right[1 * 4 + 1] + left[2 * 4 + 2] * right[2 * 4 + 1] + left[2 * 4 + 3] * right[3 * 4 + 1];
result[2 * 4 + 2] = left[2 * 4 + 0] * right[0 * 4 + 2] + left[2 * 4 + 1] * right[1 * 4 + 2] + left[2 * 4 + 2] * right[2 * 4 + 2] + left[2 * 4 + 3] * right[3 * 4 + 2];
result[2 * 4 + 3] = left[2 * 4 + 0] * right[0 * 4 + 3] + left[2 * 4 + 1] * right[1 * 4 + 3] + left[2 * 4 + 2] * right[2 * 4 + 3] + left[2 * 4 + 3] * right[3 * 4 + 3];
result[3 * 4 + 0] = left[3 * 4 + 0] * right[0 * 4 + 0] + left[3 * 4 + 1] * right[1 * 4 + 0] + left[3 * 4 + 2] * right[2 * 4 + 0] + left[3 * 4 + 3] * right[3 * 4 + 0];
result[3 * 4 + 1] = left[3 * 4 + 0] * right[0 * 4 + 1] + left[3 * 4 + 1] * right[1 * 4 + 1] + left[3 * 4 + 2] * right[2 * 4 + 1] + left[3 * 4 + 3] * right[3 * 4 + 1];
result[3 * 4 + 2] = left[3 * 4 + 0] * right[0 * 4 + 2] + left[3 * 4 + 1] * right[1 * 4 + 2] + left[3 * 4 + 2] * right[2 * 4 + 2] + left[3 * 4 + 3] * right[3 * 4 + 2];
result[3 * 4 + 3] = left[3 * 4 + 0] * right[0 * 4 + 3] + left[3 * 4 + 1] * right[1 * 4 + 3] + left[3 * 4 + 2] * right[2 * 4 + 3] + left[3 * 4 + 3] * right[3 * 4 + 3];
}
#else
void Matrix_Multiply(const float a[4][4], const float b[4][4], float result[4][4])
{
__m128 otherRow0 = _mm_loadu_ps(b[0]);
__m128 otherRow1 = _mm_loadu_ps(b[1]);
__m128 otherRow2 = _mm_loadu_ps(b[2]);
__m128 otherRow3 = _mm_loadu_ps(b[3]);
__m128 newRow0 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[0][0]));
newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow1, _mm_set1_ps(a[0][1])));
newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow2, _mm_set1_ps(a[0][2])));
newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow3, _mm_set1_ps(a[0][3])));
__m128 newRow1 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[1][0]));
newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow1, _mm_set1_ps(a[1][1])));
newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow2, _mm_set1_ps(a[1][2])));
newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow3, _mm_set1_ps(a[1][3])));
__m128 newRow2 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[2][0]));
newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow1, _mm_set1_ps(a[2][1])));
newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow2, _mm_set1_ps(a[2][2])));
newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow3, _mm_set1_ps(a[2][3])));
__m128 newRow3 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[3][0]));
newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow1, _mm_set1_ps(a[3][1])));
newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow2, _mm_set1_ps(a[3][2])));
newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow3, _mm_set1_ps(a[3][3])));
_mm_storeu_ps(result[0], newRow0);
_mm_storeu_ps(result[1], newRow1);
_mm_storeu_ps(result[2], newRow2);
_mm_storeu_ps(result[3], newRow3);
}
#endif
}

View file

@ -9,11 +9,6 @@
#include <cstdarg> #include <cstdarg>
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include "fasttrig.h"
#ifndef NO_SSE
#include <xmmintrin.h>
#endif
static bool GLLogStarted = false; static bool GLLogStarted = false;
static void APIENTRY GLLogCallback(GLenum source, GLenum type, GLuint id, static void APIENTRY GLLogCallback(GLenum source, GLenum type, GLuint id,
@ -1019,154 +1014,4 @@ bool RenderDevice_UnmapPBO(RenderDevice* device, Texture* texture)
return device->UnmapPBO(texture); return device->UnmapPBO(texture);
} }
#ifdef NO_SSE
void Matrix_Null(float result[4][4])
{
memset(result, 0, sizeof(float) * 16);
}
#else
void Matrix_Null(float result[4][4])
{
__m128 zero = _mm_setzero_ps();
_mm_storeu_ps(result[0], zero);
_mm_storeu_ps(result[1], zero);
_mm_storeu_ps(result[2], zero);
_mm_storeu_ps(result[3], zero);
}
#endif
void Matrix_Identity(float result[4][4])
{
Matrix_Null(result);
result[0][0] = 1.0f;
result[1][1] = 1.0f;
result[2][2] = 1.0f;
result[3][3] = 1.0f;
}
void Matrix_Translation(float x, float y, float z, float result[4][4])
{
Matrix_Null(result);
result[0][0] = 1.0f;
result[1][1] = 1.0f;
result[2][2] = 1.0f;
result[3][0] = x;
result[3][1] = y;
result[3][2] = z;
result[3][3] = 1.0f;
}
void Matrix_RotationX(float angle, float result[4][4])
{
float cos = fastcos(angle);
float sin = fastsin(angle);
Matrix_Null(result);
result[0][0] = 1.0f;
result[1][1] = cos;
result[1][2] = sin;
result[2][1] = -sin;
result[2][2] = cos;
result[3][3] = 1.0f;
}
void Matrix_RotationY(float angle, float result[4][4])
{
float cos = fastcos(angle);
float sin = fastsin(angle);
Matrix_Null(result);
result[0][0] = cos;
result[0][2] = -sin;
result[1][1] = 1.0f;
result[2][0] = sin;
result[2][2] = cos;
result[3][3] = 1.0f;
}
void Matrix_RotationZ(float angle, float result[4][4])
{
float cos = fastcos(angle);
float sin = fastsin(angle);
Matrix_Null(result);
result[0][0] = cos;
result[0][1] = sin;
result[1][0] = -sin;
result[1][1] = cos;
result[2][2] = 1.0f;
result[3][3] = 1.0f;
}
void Matrix_Scaling(float x, float y, float z, float result[4][4])
{
Matrix_Null(result);
result[0][0] = x;
result[1][1] = y;
result[2][2] = z;
result[3][3] = 1.0f;
}
#ifdef NO_SSE
void Matrix_Multiply(const float* left, const float* right, float* result)
{
result[0 * 4 + 0] = left[0 * 4 + 0] * right[0 * 4 + 0] + left[0 * 4 + 1] * right[1 * 4 + 0] + left[0 * 4 + 2] * right[2 * 4 + 0] + left[0 * 4 + 3] * right[3 * 4 + 0];
result[0 * 4 + 1] = left[0 * 4 + 0] * right[0 * 4 + 1] + left[0 * 4 + 1] * right[1 * 4 + 1] + left[0 * 4 + 2] * right[2 * 4 + 1] + left[0 * 4 + 3] * right[3 * 4 + 1];
result[0 * 4 + 2] = left[0 * 4 + 0] * right[0 * 4 + 2] + left[0 * 4 + 1] * right[1 * 4 + 2] + left[0 * 4 + 2] * right[2 * 4 + 2] + left[0 * 4 + 3] * right[3 * 4 + 2];
result[0 * 4 + 3] = left[0 * 4 + 0] * right[0 * 4 + 3] + left[0 * 4 + 1] * right[1 * 4 + 3] + left[0 * 4 + 2] * right[2 * 4 + 3] + left[0 * 4 + 3] * right[3 * 4 + 3];
result[1 * 4 + 0] = left[1 * 4 + 0] * right[0 * 4 + 0] + left[1 * 4 + 1] * right[1 * 4 + 0] + left[1 * 4 + 2] * right[2 * 4 + 0] + left[1 * 4 + 3] * right[3 * 4 + 0];
result[1 * 4 + 1] = left[1 * 4 + 0] * right[0 * 4 + 1] + left[1 * 4 + 1] * right[1 * 4 + 1] + left[1 * 4 + 2] * right[2 * 4 + 1] + left[1 * 4 + 3] * right[3 * 4 + 1];
result[1 * 4 + 2] = left[1 * 4 + 0] * right[0 * 4 + 2] + left[1 * 4 + 1] * right[1 * 4 + 2] + left[1 * 4 + 2] * right[2 * 4 + 2] + left[1 * 4 + 3] * right[3 * 4 + 2];
result[1 * 4 + 3] = left[1 * 4 + 0] * right[0 * 4 + 3] + left[1 * 4 + 1] * right[1 * 4 + 3] + left[1 * 4 + 2] * right[2 * 4 + 3] + left[1 * 4 + 3] * right[3 * 4 + 3];
result[2 * 4 + 0] = left[2 * 4 + 0] * right[0 * 4 + 0] + left[2 * 4 + 1] * right[1 * 4 + 0] + left[2 * 4 + 2] * right[2 * 4 + 0] + left[2 * 4 + 3] * right[3 * 4 + 0];
result[2 * 4 + 1] = left[2 * 4 + 0] * right[0 * 4 + 1] + left[2 * 4 + 1] * right[1 * 4 + 1] + left[2 * 4 + 2] * right[2 * 4 + 1] + left[2 * 4 + 3] * right[3 * 4 + 1];
result[2 * 4 + 2] = left[2 * 4 + 0] * right[0 * 4 + 2] + left[2 * 4 + 1] * right[1 * 4 + 2] + left[2 * 4 + 2] * right[2 * 4 + 2] + left[2 * 4 + 3] * right[3 * 4 + 2];
result[2 * 4 + 3] = left[2 * 4 + 0] * right[0 * 4 + 3] + left[2 * 4 + 1] * right[1 * 4 + 3] + left[2 * 4 + 2] * right[2 * 4 + 3] + left[2 * 4 + 3] * right[3 * 4 + 3];
result[3 * 4 + 0] = left[3 * 4 + 0] * right[0 * 4 + 0] + left[3 * 4 + 1] * right[1 * 4 + 0] + left[3 * 4 + 2] * right[2 * 4 + 0] + left[3 * 4 + 3] * right[3 * 4 + 0];
result[3 * 4 + 1] = left[3 * 4 + 0] * right[0 * 4 + 1] + left[3 * 4 + 1] * right[1 * 4 + 1] + left[3 * 4 + 2] * right[2 * 4 + 1] + left[3 * 4 + 3] * right[3 * 4 + 1];
result[3 * 4 + 2] = left[3 * 4 + 0] * right[0 * 4 + 2] + left[3 * 4 + 1] * right[1 * 4 + 2] + left[3 * 4 + 2] * right[2 * 4 + 2] + left[3 * 4 + 3] * right[3 * 4 + 2];
result[3 * 4 + 3] = left[3 * 4 + 0] * right[0 * 4 + 3] + left[3 * 4 + 1] * right[1 * 4 + 3] + left[3 * 4 + 2] * right[2 * 4 + 3] + left[3 * 4 + 3] * right[3 * 4 + 3];
}
#else
void Matrix_Multiply(const float a[4][4], const float b[4][4], float result[4][4])
{
__m128 otherRow0 = _mm_loadu_ps(b[0]);
__m128 otherRow1 = _mm_loadu_ps(b[1]);
__m128 otherRow2 = _mm_loadu_ps(b[2]);
__m128 otherRow3 = _mm_loadu_ps(b[3]);
__m128 newRow0 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[0][0]));
newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow1, _mm_set1_ps(a[0][1])));
newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow2, _mm_set1_ps(a[0][2])));
newRow0 = _mm_add_ps(newRow0, _mm_mul_ps(otherRow3, _mm_set1_ps(a[0][3])));
__m128 newRow1 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[1][0]));
newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow1, _mm_set1_ps(a[1][1])));
newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow2, _mm_set1_ps(a[1][2])));
newRow1 = _mm_add_ps(newRow1, _mm_mul_ps(otherRow3, _mm_set1_ps(a[1][3])));
__m128 newRow2 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[2][0]));
newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow1, _mm_set1_ps(a[2][1])));
newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow2, _mm_set1_ps(a[2][2])));
newRow2 = _mm_add_ps(newRow2, _mm_mul_ps(otherRow3, _mm_set1_ps(a[2][3])));
__m128 newRow3 = _mm_mul_ps(otherRow0, _mm_set1_ps(a[3][0]));
newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow1, _mm_set1_ps(a[3][1])));
newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow2, _mm_set1_ps(a[3][2])));
newRow3 = _mm_add_ps(newRow3, _mm_mul_ps(otherRow3, _mm_set1_ps(a[3][3])));
_mm_storeu_ps(result[0], newRow0);
_mm_storeu_ps(result[1], newRow1);
_mm_storeu_ps(result[2], newRow2);
_mm_storeu_ps(result[3], newRow3);
}
#endif
} }