mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-12-12 05:31:58 +00:00
This commit is contained in:
commit
f5e1502bd7
6 changed files with 120 additions and 34 deletions
|
@ -478,7 +478,12 @@ if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||||
|
|
||||||
# ARM processors (Raspberry Pi, et al) - enable ARM NEON support.
|
# ARM processors (Raspberry Pi, et al) - enable ARM NEON support.
|
||||||
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
|
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
|
||||||
set( CMAKE_CXX_FLAGS "-mfpu=neon ${CMAKE_CXX_FLAGS}" )
|
set (USE_ARMV8 0 CACHE BOOL "Use ARMv8 instructions - Raspberry Pi 3")
|
||||||
|
if (USE_ARMV8)
|
||||||
|
set( CMAKE_CXX_FLAGS "-mcpu=cortex-a53 -mfpu=neon-fp-armv8 -mtune=cortex-a53 -funsafe-math-optimizations -mhard-float -DNO_SSE ${CMAKE_CXX_FLAGS}" )
|
||||||
|
else ()
|
||||||
|
set( CMAKE_CXX_FLAGS "-mfpu=neon -DNO_SSE ${CMAKE_CXX_FLAGS}" )
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# Use the highest C++ standard available since VS2015 compiles with C++14
|
# Use the highest C++ standard available since VS2015 compiles with C++14
|
||||||
|
|
|
@ -127,7 +127,8 @@ public:
|
||||||
int lightshade = lightpos >> 8;
|
int lightshade = lightpos >> 8;
|
||||||
uint8_t bgcolor = dest[x * 8 + ix];
|
uint8_t bgcolor = dest[x * 8 + ix];
|
||||||
uint8_t fgcolor = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, color, translation);
|
uint8_t fgcolor = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, color, translation);
|
||||||
dest[x * 8 + ix] = ShadeAndBlend(fgcolor, bgcolor, lightshade, colormaps, srcalpha, destalpha);
|
uint32_t fgshade = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight);
|
||||||
|
dest[x * 8 + ix] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
|
||||||
for (int j = 0; j < TriVertex::NumVarying; j++)
|
for (int j = 0; j < TriVertex::NumVarying; j++)
|
||||||
varyingPos[j] += varyingStep[j];
|
varyingPos[j] += varyingStep[j];
|
||||||
lightpos += lightstep;
|
lightpos += lightstep;
|
||||||
|
@ -191,7 +192,8 @@ public:
|
||||||
int lightshade = lightpos >> 8;
|
int lightshade = lightpos >> 8;
|
||||||
uint8_t bgcolor = dest[x];
|
uint8_t bgcolor = dest[x];
|
||||||
uint8_t fgcolor = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, color, translation);
|
uint8_t fgcolor = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, color, translation);
|
||||||
dest[x] = ShadeAndBlend(fgcolor, bgcolor, lightshade, colormaps, srcalpha, destalpha);
|
uint32_t fgshade = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight);
|
||||||
|
dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < TriVertex::NumVarying; j++)
|
for (int j = 0; j < TriVertex::NumVarying; j++)
|
||||||
|
@ -244,7 +246,8 @@ public:
|
||||||
int lightshade = lightpos >> 8;
|
int lightshade = lightpos >> 8;
|
||||||
uint8_t bgcolor = dest[x];
|
uint8_t bgcolor = dest[x];
|
||||||
uint8_t fgcolor = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, color, translation);
|
uint8_t fgcolor = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, color, translation);
|
||||||
dest[x] = ShadeAndBlend(fgcolor, bgcolor, lightshade, colormaps, srcalpha, destalpha);
|
uint32_t fgshade = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight);
|
||||||
|
dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < TriVertex::NumVarying; j++)
|
for (int j = 0; j < TriVertex::NumVarying; j++)
|
||||||
|
@ -288,7 +291,6 @@ private:
|
||||||
|
|
||||||
if (SamplerT::Mode == (int)Samplers::Skycap)
|
if (SamplerT::Mode == (int)Samplers::Skycap)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
int start_fade = 2; // How fast it should fade out
|
int start_fade = 2; // How fast it should fade out
|
||||||
|
|
||||||
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
|
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
|
||||||
|
@ -296,19 +298,20 @@ private:
|
||||||
int a = MIN(alpha_top, alpha_bottom);
|
int a = MIN(alpha_top, alpha_bottom);
|
||||||
int inv_a = 256 - a;
|
int inv_a = 256 - a;
|
||||||
|
|
||||||
uint32_t r = RPART(texel);
|
if (a == 256)
|
||||||
uint32_t g = GPART(texel);
|
return texel;
|
||||||
uint32_t b = BPART(texel);
|
|
||||||
uint32_t fg_a = APART(texel);
|
uint32_t texelrgb = GPalette.BaseColors[texel].d;
|
||||||
uint32_t bg_red = RPART(color);
|
uint32_t r = RPART(texelrgb);
|
||||||
uint32_t bg_green = GPART(color);
|
uint32_t g = GPART(texelrgb);
|
||||||
uint32_t bg_blue = BPART(color);
|
uint32_t b = BPART(texelrgb);
|
||||||
r = (r * a + bg_red * inv_a + 127) >> 8;
|
uint32_t capcolor_red = RPART(color);
|
||||||
g = (g * a + bg_green * inv_a + 127) >> 8;
|
uint32_t capcolor_green = GPART(color);
|
||||||
b = (b * a + bg_blue * inv_a + 127) >> 8;
|
uint32_t capcolor_blue = BPART(color);
|
||||||
return MAKEARGB(fg_a, r, g, b);
|
r = (r * a + capcolor_red * inv_a + 127) >> 8;
|
||||||
*/
|
g = (g * a + capcolor_green * inv_a + 127) >> 8;
|
||||||
return texel;
|
b = (b * a + capcolor_blue * inv_a + 127) >> 8;
|
||||||
|
return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -316,12 +319,30 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCEINLINE static uint8_t ShadeAndBlend(uint8_t fgcolor, uint8_t bgcolor, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha)
|
FORCEINLINE static unsigned int SampleShade(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight)
|
||||||
{
|
{
|
||||||
using namespace TriScreenDrawerModes;
|
using namespace TriScreenDrawerModes;
|
||||||
|
|
||||||
lightshade = ((256 - lightshade) * (NUMCOLORMAPS - 1) + (NUMCOLORMAPS - 1) / 2) / 256;
|
if (SamplerT::Mode == (int)Samplers::Shaded)
|
||||||
uint8_t shadedfg = colormaps[lightshade * 256 + fgcolor];
|
{
|
||||||
|
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
|
||||||
|
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
|
||||||
|
unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY];
|
||||||
|
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
|
||||||
|
return sampleshadeout;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FORCEINLINE static uint8_t ShadeAndBlend(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha)
|
||||||
|
{
|
||||||
|
using namespace TriScreenDrawerModes;
|
||||||
|
|
||||||
|
lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00;
|
||||||
|
uint8_t shadedfg = colormaps[lightshade + fgcolor];
|
||||||
|
|
||||||
if (BlendT::Mode == (int)BlendModes::Opaque)
|
if (BlendT::Mode == (int)BlendModes::Opaque)
|
||||||
{
|
{
|
||||||
|
@ -333,19 +354,87 @@ private:
|
||||||
}
|
}
|
||||||
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
|
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
|
||||||
{
|
{
|
||||||
|
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
|
||||||
|
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
|
||||||
|
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
|
||||||
|
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
|
||||||
|
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
|
||||||
|
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
|
||||||
|
int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7));
|
||||||
|
int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7));
|
||||||
|
int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7));
|
||||||
|
fg_r = MIN<int32_t>(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255);
|
||||||
|
fg_g = MIN<int32_t>(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255);
|
||||||
|
fg_b = MIN<int32_t>(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255);
|
||||||
|
|
||||||
|
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
|
||||||
return (fgcolor != 0) ? shadedfg : bgcolor;
|
return (fgcolor != 0) ? shadedfg : bgcolor;
|
||||||
}
|
}
|
||||||
|
else if (BlendT::Mode == (int)BlendModes::Shaded)
|
||||||
|
{
|
||||||
|
uint32_t alpha = fgshade;
|
||||||
|
uint32_t inv_alpha = 256 - fgshade;
|
||||||
|
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
|
||||||
|
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
|
||||||
|
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
|
||||||
|
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
|
||||||
|
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
|
||||||
|
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
|
||||||
|
|
||||||
|
fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8;
|
||||||
|
fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8;
|
||||||
|
fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8;
|
||||||
|
|
||||||
|
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
|
||||||
|
return (alpha != 0) ? shadedfg : bgcolor;
|
||||||
|
}
|
||||||
|
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
|
||||||
|
{
|
||||||
|
uint32_t alpha = fgshade;
|
||||||
|
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
|
||||||
|
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
|
||||||
|
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
|
||||||
|
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
|
||||||
|
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
|
||||||
|
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
|
||||||
|
|
||||||
|
fg_r = MIN<int32_t>(bg_r + ((fg_r * alpha + 127) >> 8), 255);
|
||||||
|
fg_g = MIN<int32_t>(bg_g + ((fg_g * alpha + 127) >> 8), 255);
|
||||||
|
fg_b = MIN<int32_t>(bg_b + ((fg_b * alpha + 127) >> 8), 255);
|
||||||
|
|
||||||
|
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
|
||||||
|
|
||||||
|
return (alpha != 0) ? shadedfg : bgcolor;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
|
||||||
|
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
|
||||||
|
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
|
||||||
|
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
|
||||||
|
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
|
||||||
|
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
|
||||||
|
|
||||||
if (BlendT::Mode == (int)BlendModes::AddClamp)
|
if (BlendT::Mode == (int)BlendModes::AddClamp)
|
||||||
{
|
{
|
||||||
|
fg_r = MIN<int32_t>((fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255);
|
||||||
|
fg_g = MIN<int32_t>((fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255);
|
||||||
|
fg_b = MIN<int32_t>((fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255);
|
||||||
}
|
}
|
||||||
else if (BlendT::Mode == (int)BlendModes::SubClamp)
|
else if (BlendT::Mode == (int)BlendModes::SubClamp)
|
||||||
{
|
{
|
||||||
|
fg_r = MAX<int32_t>((fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0);
|
||||||
|
fg_g = MAX<int32_t>((fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0);
|
||||||
|
fg_b = MAX<int32_t>((fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0);
|
||||||
}
|
}
|
||||||
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
|
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
|
||||||
{
|
{
|
||||||
|
fg_r = MAX<int32_t>((bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0);
|
||||||
|
fg_g = MAX<int32_t>((bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0);
|
||||||
|
fg_b = MAX<int32_t>((bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
|
||||||
return (fgcolor != 0) ? shadedfg : bgcolor;
|
return (fgcolor != 0) ? shadedfg : bgcolor;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -304,7 +304,7 @@ int PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clippe
|
||||||
clipd[5] = v.w - v.z;
|
clipd[5] = v.w - v.z;
|
||||||
clipd[6] = v.clipDistance0;
|
clipd[6] = v.clipDistance0;
|
||||||
needsclipping = needsclipping || clipd[0] < 0.0f || clipd[1] < 0.0f || clipd[2] < 0.0f || clipd[3] < 0.0f || clipd[4] < 0.0f || clipd[5] < 0.0f || clipd[6] < 0.0f;
|
needsclipping = needsclipping || clipd[0] < 0.0f || clipd[1] < 0.0f || clipd[2] < 0.0f || clipd[3] < 0.0f || clipd[4] < 0.0f || clipd[5] < 0.0f || clipd[6] < 0.0f;
|
||||||
clipd += numclipdistancespitch;
|
clipd += numclipdistances;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If all halfspace clip distances are positive then the entire triangle is visible. Skip the expensive clipping step.
|
// If all halfspace clip distances are positive then the entire triangle is visible. Skip the expensive clipping step.
|
||||||
|
|
|
@ -58,7 +58,7 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa
|
||||||
TriPartialBlock * RESTRICT partial = thread->PartialBlocks;
|
TriPartialBlock * RESTRICT partial = thread->PartialBlocks;
|
||||||
|
|
||||||
// 28.4 fixed-point coordinates
|
// 28.4 fixed-point coordinates
|
||||||
#if NO_SSE
|
#ifdef NO_SSE
|
||||||
const int Y1 = (int)round(16.0f * v1.y);
|
const int Y1 = (int)round(16.0f * v1.y);
|
||||||
const int Y2 = (int)round(16.0f * v2.y);
|
const int Y2 = (int)round(16.0f * v2.y);
|
||||||
const int Y3 = (int)round(16.0f * v3.y);
|
const int Y3 = (int)round(16.0f * v3.y);
|
||||||
|
@ -227,7 +227,7 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa
|
||||||
uint32_t mask0 = 0;
|
uint32_t mask0 = 0;
|
||||||
uint32_t mask1 = 0;
|
uint32_t mask1 = 0;
|
||||||
|
|
||||||
#if NO_SSE
|
#ifdef NO_SSE
|
||||||
for (int iy = 0; iy < 4; iy++)
|
for (int iy = 0; iy < 4; iy++)
|
||||||
{
|
{
|
||||||
int CX1 = CY1;
|
int CX1 = CY1;
|
||||||
|
@ -399,7 +399,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea
|
||||||
TriPartialBlock * RESTRICT partial = thread->PartialBlocks;
|
TriPartialBlock * RESTRICT partial = thread->PartialBlocks;
|
||||||
|
|
||||||
// 28.4 fixed-point coordinates
|
// 28.4 fixed-point coordinates
|
||||||
#if NO_SSE
|
#ifdef NO_SSE
|
||||||
const int Y1 = (int)round(16.0f * v1.y);
|
const int Y1 = (int)round(16.0f * v1.y);
|
||||||
const int Y2 = (int)round(16.0f * v2.y);
|
const int Y2 = (int)round(16.0f * v2.y);
|
||||||
const int Y3 = (int)round(16.0f * v3.y);
|
const int Y3 = (int)round(16.0f * v3.y);
|
||||||
|
|
|
@ -33,10 +33,6 @@
|
||||||
**
|
**
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef __arm__
|
|
||||||
#define NO_SSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef NO_SSE
|
#ifndef NO_SSE
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -30,10 +30,6 @@
|
||||||
#include "swrenderer/viewport/r_walldrawer.h"
|
#include "swrenderer/viewport/r_walldrawer.h"
|
||||||
#include "swrenderer/viewport/r_spritedrawer.h"
|
#include "swrenderer/viewport/r_spritedrawer.h"
|
||||||
|
|
||||||
#ifdef __arm__
|
|
||||||
#define NO_SSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef NO_SSE
|
#ifndef NO_SSE
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue