mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-14 00:20:51 +00:00
- add CPU.bSSE2 branching support to softpoly
This commit is contained in:
parent
3608299e61
commit
545241aa06
5 changed files with 372 additions and 210 deletions
|
@ -27,7 +27,7 @@
|
||||||
namespace TriScreenDrawerModes
|
namespace TriScreenDrawerModes
|
||||||
{
|
{
|
||||||
template<typename SamplerT, typename FilterModeT>
|
template<typename SamplerT, typename FilterModeT>
|
||||||
FORCEINLINE unsigned int VECTORCALL Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation)
|
FORCEINLINE unsigned int VECTORCALL Sample32_SSE2(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation)
|
||||||
{
|
{
|
||||||
uint32_t texel;
|
uint32_t texel;
|
||||||
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz)
|
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill || SamplerT::Mode == (int)Samplers::Fuzz)
|
||||||
|
@ -107,7 +107,7 @@ namespace TriScreenDrawerModes
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename SamplerT>
|
template<typename SamplerT>
|
||||||
FORCEINLINE unsigned int VECTORCALL SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int &fuzzpos)
|
FORCEINLINE unsigned int VECTORCALL SampleShade32_SSE2(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, int &fuzzpos)
|
||||||
{
|
{
|
||||||
if (SamplerT::Mode == (int)Samplers::Shaded)
|
if (SamplerT::Mode == (int)Samplers::Shaded)
|
||||||
{
|
{
|
||||||
|
@ -143,7 +143,7 @@ namespace TriScreenDrawerModes
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename ShadeModeT>
|
template<typename ShadeModeT>
|
||||||
FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light)
|
FORCEINLINE __m128i VECTORCALL Shade32_SSE2(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light)
|
||||||
{
|
{
|
||||||
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
|
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
|
||||||
{
|
{
|
||||||
|
@ -172,7 +172,7 @@ namespace TriScreenDrawerModes
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename BlendT>
|
template<typename BlendT>
|
||||||
FORCEINLINE __m128i VECTORCALL Blend32(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
|
FORCEINLINE __m128i VECTORCALL Blend32_SSE2(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
|
||||||
{
|
{
|
||||||
if (BlendT::Mode == (int)BlendModes::Opaque)
|
if (BlendT::Mode == (int)BlendModes::Opaque)
|
||||||
{
|
{
|
||||||
|
@ -275,7 +275,7 @@ namespace TriScreenDrawerModes
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename BlendT, typename SamplerT>
|
template<typename BlendT, typename SamplerT>
|
||||||
class TriScreenDrawer32
|
class TriScreenDrawer32_SSE2
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
|
static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
|
||||||
|
@ -430,13 +430,13 @@ private:
|
||||||
|
|
||||||
// Sample fgcolor
|
// Sample fgcolor
|
||||||
unsigned int ifgcolor[2], ifgshade[2];
|
unsigned int ifgcolor[2], ifgshade[2];
|
||||||
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[0] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[0] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
posV += stepV;
|
posV += stepV;
|
||||||
|
|
||||||
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[1] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[1] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
posV += stepV;
|
posV += stepV;
|
||||||
|
|
||||||
|
@ -460,8 +460,8 @@ private:
|
||||||
|
|
||||||
// Shade and blend
|
// Shade and blend
|
||||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
fgcolor = Shade32_SSE2<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
||||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
__m128i outcolor = Blend32_SSE2<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||||
|
|
||||||
// Store result
|
// Store result
|
||||||
_mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor);
|
_mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor);
|
||||||
|
@ -517,13 +517,13 @@ private:
|
||||||
|
|
||||||
// Sample fgcolor
|
// Sample fgcolor
|
||||||
unsigned int ifgcolor[2], ifgshade[2];
|
unsigned int ifgcolor[2], ifgshade[2];
|
||||||
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[0] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[0] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
posV += stepV;
|
posV += stepV;
|
||||||
|
|
||||||
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[1] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[1] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
posV += stepV;
|
posV += stepV;
|
||||||
|
|
||||||
|
@ -547,8 +547,8 @@ private:
|
||||||
|
|
||||||
// Shade and blend
|
// Shade and blend
|
||||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
fgcolor = Shade32_SSE2<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
||||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
__m128i outcolor = Blend32_SSE2<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||||
|
|
||||||
// Store result
|
// Store result
|
||||||
_mm_storel_epi64((__m128i*)desttmp, outcolor);
|
_mm_storel_epi64((__m128i*)desttmp, outcolor);
|
||||||
|
@ -606,13 +606,13 @@ private:
|
||||||
|
|
||||||
// Sample fgcolor
|
// Sample fgcolor
|
||||||
unsigned int ifgcolor[2], ifgshade[2];
|
unsigned int ifgcolor[2], ifgshade[2];
|
||||||
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[0] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[0] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
posV += stepV;
|
posV += stepV;
|
||||||
|
|
||||||
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[1] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[1] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
posV += stepV;
|
posV += stepV;
|
||||||
|
|
||||||
|
@ -636,8 +636,8 @@ private:
|
||||||
|
|
||||||
// Shade and blend
|
// Shade and blend
|
||||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
fgcolor = Shade32_SSE2<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
||||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
__m128i outcolor = Blend32_SSE2<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||||
|
|
||||||
// Store result
|
// Store result
|
||||||
_mm_storel_epi64((__m128i*)desttmp, outcolor);
|
_mm_storel_epi64((__m128i*)desttmp, outcolor);
|
||||||
|
@ -658,7 +658,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename BlendT, typename SamplerT>
|
template<typename BlendT, typename SamplerT>
|
||||||
class RectScreenDrawer32
|
class RectScreenDrawer32_SSE2
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
|
static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
|
||||||
|
@ -780,18 +780,18 @@ private:
|
||||||
|
|
||||||
// Sample fgcolor
|
// Sample fgcolor
|
||||||
unsigned int ifgcolor[2], ifgshade[2];
|
unsigned int ifgcolor[2], ifgshade[2];
|
||||||
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[0] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[0] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
|
|
||||||
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[1] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[1] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
|
|
||||||
// Shade and blend
|
// Shade and blend
|
||||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
fgcolor = Shade32_SSE2<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
||||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
__m128i outcolor = Blend32_SSE2<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||||
|
|
||||||
// Store result
|
// Store result
|
||||||
_mm_storel_epi64((__m128i*)dest, outcolor);
|
_mm_storel_epi64((__m128i*)dest, outcolor);
|
||||||
|
@ -809,16 +809,16 @@ private:
|
||||||
|
|
||||||
// Sample fgcolor
|
// Sample fgcolor
|
||||||
unsigned int ifgcolor[2], ifgshade[2];
|
unsigned int ifgcolor[2], ifgshade[2];
|
||||||
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
ifgcolor[0] = Sample32_SSE2<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||||
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
ifgshade[0] = SampleShade32_SSE2<SamplerT>(posU, posV, texPixels, texWidth, texHeight, fuzzpos);
|
||||||
ifgcolor[1] = 0;
|
ifgcolor[1] = 0;
|
||||||
ifgshade[1] = 0;
|
ifgshade[1] = 0;
|
||||||
posU += stepU;
|
posU += stepU;
|
||||||
|
|
||||||
// Shade and blend
|
// Shade and blend
|
||||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
fgcolor = Shade32_SSE2<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
||||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
__m128i outcolor = Blend32_SSE2<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||||
|
|
||||||
// Store result
|
// Store result
|
||||||
*dest = _mm_cvtsi128_si32(outcolor);
|
*dest = _mm_cvtsi128_si32(outcolor);
|
||||||
|
|
|
@ -151,14 +151,8 @@ ShadedTriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip,
|
||||||
return sv;
|
return sv;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
void PolyTriangleDrawer::clip_to_viewport(TriVertex *clippedvert, int numclipvert)
|
||||||
{
|
{
|
||||||
// Cull, clip and generate additional vertices as needed
|
|
||||||
TriVertex clippedvert[max_additional_vertices];
|
|
||||||
int numclipvert = clipedge(vert, clippedvert);
|
|
||||||
|
|
||||||
#ifdef NO_SSE
|
|
||||||
// Map to 2D viewport:
|
|
||||||
for (int j = 0; j < numclipvert; j++)
|
for (int j = 0; j < numclipvert; j++)
|
||||||
{
|
{
|
||||||
auto &v = clippedvert[j];
|
auto &v = clippedvert[j];
|
||||||
|
@ -173,8 +167,11 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool
|
||||||
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
|
v.x = viewport_x + viewport_width * (1.0f + v.x) * 0.5f;
|
||||||
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
|
v.y = viewport_y + viewport_height * (1.0f - v.y) * 0.5f;
|
||||||
}
|
}
|
||||||
#else
|
}
|
||||||
// Map to 2D viewport:
|
|
||||||
|
#ifndef NO_SSE
|
||||||
|
void PolyTriangleDrawer::clip_to_viewport_sse2(TriVertex *clippedvert, int numclipvert)
|
||||||
|
{
|
||||||
__m128 mviewport_x = _mm_set1_ps((float)viewport_x);
|
__m128 mviewport_x = _mm_set1_ps((float)viewport_x);
|
||||||
__m128 mviewport_y = _mm_set1_ps((float)viewport_y);
|
__m128 mviewport_y = _mm_set1_ps((float)viewport_y);
|
||||||
__m128 mviewport_halfwidth = _mm_set1_ps(viewport_width * 0.5f);
|
__m128 mviewport_halfwidth = _mm_set1_ps(viewport_width * 0.5f);
|
||||||
|
@ -205,8 +202,21 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool
|
||||||
_mm_storeu_ps(&clippedvert[j + 2].x, vz);
|
_mm_storeu_ps(&clippedvert[j + 2].x, vz);
|
||||||
_mm_storeu_ps(&clippedvert[j + 3].x, vw);
|
_mm_storeu_ps(&clippedvert[j + 3].x, vw);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||||
|
{
|
||||||
|
// Cull, clip and generate additional vertices as needed
|
||||||
|
TriVertex clippedvert[max_additional_vertices];
|
||||||
|
int numclipvert = CPU.bSSE2 ? clipedge_sse2(vert, clippedvert) : clipedge(vert, clippedvert);
|
||||||
|
|
||||||
|
// Map to 2D viewport:
|
||||||
|
if (CPU.bSSE2)
|
||||||
|
clip_to_viewport_sse2(clippedvert, numclipvert);
|
||||||
|
else
|
||||||
|
clip_to_viewport(clippedvert, numclipvert);
|
||||||
|
|
||||||
// Keep varyings in -128 to 128 range if possible
|
// Keep varyings in -128 to 128 range if possible
|
||||||
if (numclipvert > 0)
|
if (numclipvert > 0)
|
||||||
{
|
{
|
||||||
|
@ -255,7 +265,6 @@ int PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clippe
|
||||||
|
|
||||||
// halfspace clip distances
|
// halfspace clip distances
|
||||||
static const int numclipdistances = 7;
|
static const int numclipdistances = 7;
|
||||||
#ifdef NO_SSE
|
|
||||||
float clipdistance[numclipdistances * 3];
|
float clipdistance[numclipdistances * 3];
|
||||||
bool needsclipping = false;
|
bool needsclipping = false;
|
||||||
float *clipd = clipdistance;
|
float *clipd = clipdistance;
|
||||||
|
@ -282,43 +291,6 @@ int PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clippe
|
||||||
}
|
}
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
__m128 mx = _mm_loadu_ps(&verts[0].x);
|
|
||||||
__m128 my = _mm_loadu_ps(&verts[1].x);
|
|
||||||
__m128 mz = _mm_loadu_ps(&verts[2].x);
|
|
||||||
__m128 mw = _mm_setzero_ps();
|
|
||||||
_MM_TRANSPOSE4_PS(mx, my, mz, mw);
|
|
||||||
__m128 clipd0 = _mm_add_ps(mx, mw);
|
|
||||||
__m128 clipd1 = _mm_sub_ps(mw, mx);
|
|
||||||
__m128 clipd2 = _mm_add_ps(my, mw);
|
|
||||||
__m128 clipd3 = _mm_sub_ps(mw, my);
|
|
||||||
__m128 clipd4 = _mm_add_ps(mz, mw);
|
|
||||||
__m128 clipd5 = _mm_sub_ps(mw, mz);
|
|
||||||
__m128 clipd6 = _mm_setr_ps(verts[0].clipDistance0, verts[1].clipDistance0, verts[2].clipDistance0, 0.0f);
|
|
||||||
__m128 mneedsclipping = _mm_cmplt_ps(clipd0, _mm_setzero_ps());
|
|
||||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd1, _mm_setzero_ps()));
|
|
||||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd2, _mm_setzero_ps()));
|
|
||||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd3, _mm_setzero_ps()));
|
|
||||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd4, _mm_setzero_ps()));
|
|
||||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd5, _mm_setzero_ps()));
|
|
||||||
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd6, _mm_setzero_ps()));
|
|
||||||
if (_mm_movemask_ps(mneedsclipping) == 0)
|
|
||||||
{
|
|
||||||
for (int i = 0; i < 3; i++)
|
|
||||||
{
|
|
||||||
memcpy(clippedvert + i, verts + i, sizeof(TriVertex));
|
|
||||||
}
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
float clipdistance[numclipdistances * 4];
|
|
||||||
_mm_storeu_ps(clipdistance, clipd0);
|
|
||||||
_mm_storeu_ps(clipdistance + 4, clipd1);
|
|
||||||
_mm_storeu_ps(clipdistance + 8, clipd2);
|
|
||||||
_mm_storeu_ps(clipdistance + 12, clipd3);
|
|
||||||
_mm_storeu_ps(clipdistance + 16, clipd4);
|
|
||||||
_mm_storeu_ps(clipdistance + 20, clipd5);
|
|
||||||
_mm_storeu_ps(clipdistance + 24, clipd6);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// use barycentric weights while clipping vertices
|
// use barycentric weights while clipping vertices
|
||||||
float weights[max_additional_vertices * 3 * 2];
|
float weights[max_additional_vertices * 3 * 2];
|
||||||
|
@ -341,7 +313,6 @@ int PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clippe
|
||||||
for (int i = 0; i < inputverts; i++)
|
for (int i = 0; i < inputverts; i++)
|
||||||
{
|
{
|
||||||
int j = (i + 1) % inputverts;
|
int j = (i + 1) % inputverts;
|
||||||
#ifdef NO_SSE
|
|
||||||
float clipdistance1 =
|
float clipdistance1 =
|
||||||
clipdistance[0 * numclipdistances + p] * input[i * 3 + 0] +
|
clipdistance[0 * numclipdistances + p] * input[i * 3 + 0] +
|
||||||
clipdistance[1 * numclipdistances + p] * input[i * 3 + 1] +
|
clipdistance[1 * numclipdistances + p] * input[i * 3 + 1] +
|
||||||
|
@ -351,17 +322,6 @@ int PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clippe
|
||||||
clipdistance[0 * numclipdistances + p] * input[j * 3 + 0] +
|
clipdistance[0 * numclipdistances + p] * input[j * 3 + 0] +
|
||||||
clipdistance[1 * numclipdistances + p] * input[j * 3 + 1] +
|
clipdistance[1 * numclipdistances + p] * input[j * 3 + 1] +
|
||||||
clipdistance[2 * numclipdistances + p] * input[j * 3 + 2];
|
clipdistance[2 * numclipdistances + p] * input[j * 3 + 2];
|
||||||
#else
|
|
||||||
float clipdistance1 =
|
|
||||||
clipdistance[0 + p * 4] * input[i * 3 + 0] +
|
|
||||||
clipdistance[1 + p * 4] * input[i * 3 + 1] +
|
|
||||||
clipdistance[2 + p * 4] * input[i * 3 + 2];
|
|
||||||
|
|
||||||
float clipdistance2 =
|
|
||||||
clipdistance[0 + p * 4] * input[j * 3 + 0] +
|
|
||||||
clipdistance[1 + p * 4] * input[j * 3 + 1] +
|
|
||||||
clipdistance[2 + p * 4] * input[j * 3 + 2];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Clip halfspace
|
// Clip halfspace
|
||||||
if ((clipdistance1 >= 0.0f || clipdistance2 >= 0.0f) && outputverts + 1 < max_additional_vertices)
|
if ((clipdistance1 >= 0.0f || clipdistance2 >= 0.0f) && outputverts + 1 < max_additional_vertices)
|
||||||
|
@ -408,6 +368,129 @@ int PolyTriangleDrawer::clipedge(const ShadedTriVertex *verts, TriVertex *clippe
|
||||||
return inputverts;
|
return inputverts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
|
int PolyTriangleDrawer::clipedge_sse2(const ShadedTriVertex *verts, TriVertex *clippedvert)
|
||||||
|
{
|
||||||
|
// Clip and cull so that the following is true for all vertices:
|
||||||
|
// -v.w <= v.x <= v.w
|
||||||
|
// -v.w <= v.y <= v.w
|
||||||
|
// -v.w <= v.z <= v.w
|
||||||
|
|
||||||
|
// halfspace clip distances
|
||||||
|
static const int numclipdistances = 7;
|
||||||
|
__m128 mx = _mm_loadu_ps(&verts[0].x);
|
||||||
|
__m128 my = _mm_loadu_ps(&verts[1].x);
|
||||||
|
__m128 mz = _mm_loadu_ps(&verts[2].x);
|
||||||
|
__m128 mw = _mm_setzero_ps();
|
||||||
|
_MM_TRANSPOSE4_PS(mx, my, mz, mw);
|
||||||
|
__m128 clipd0 = _mm_add_ps(mx, mw);
|
||||||
|
__m128 clipd1 = _mm_sub_ps(mw, mx);
|
||||||
|
__m128 clipd2 = _mm_add_ps(my, mw);
|
||||||
|
__m128 clipd3 = _mm_sub_ps(mw, my);
|
||||||
|
__m128 clipd4 = _mm_add_ps(mz, mw);
|
||||||
|
__m128 clipd5 = _mm_sub_ps(mw, mz);
|
||||||
|
__m128 clipd6 = _mm_setr_ps(verts[0].clipDistance0, verts[1].clipDistance0, verts[2].clipDistance0, 0.0f);
|
||||||
|
__m128 mneedsclipping = _mm_cmplt_ps(clipd0, _mm_setzero_ps());
|
||||||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd1, _mm_setzero_ps()));
|
||||||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd2, _mm_setzero_ps()));
|
||||||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd3, _mm_setzero_ps()));
|
||||||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd4, _mm_setzero_ps()));
|
||||||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd5, _mm_setzero_ps()));
|
||||||
|
mneedsclipping = _mm_or_ps(mneedsclipping, _mm_cmplt_ps(clipd6, _mm_setzero_ps()));
|
||||||
|
if (_mm_movemask_ps(mneedsclipping) == 0)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 3; i++)
|
||||||
|
{
|
||||||
|
memcpy(clippedvert + i, verts + i, sizeof(TriVertex));
|
||||||
|
}
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
float clipdistance[numclipdistances * 4];
|
||||||
|
_mm_storeu_ps(clipdistance, clipd0);
|
||||||
|
_mm_storeu_ps(clipdistance + 4, clipd1);
|
||||||
|
_mm_storeu_ps(clipdistance + 8, clipd2);
|
||||||
|
_mm_storeu_ps(clipdistance + 12, clipd3);
|
||||||
|
_mm_storeu_ps(clipdistance + 16, clipd4);
|
||||||
|
_mm_storeu_ps(clipdistance + 20, clipd5);
|
||||||
|
_mm_storeu_ps(clipdistance + 24, clipd6);
|
||||||
|
|
||||||
|
// use barycentric weights while clipping vertices
|
||||||
|
float weights[max_additional_vertices * 3 * 2];
|
||||||
|
for (int i = 0; i < 3; i++)
|
||||||
|
{
|
||||||
|
weights[i * 3 + 0] = 0.0f;
|
||||||
|
weights[i * 3 + 1] = 0.0f;
|
||||||
|
weights[i * 3 + 2] = 0.0f;
|
||||||
|
weights[i * 3 + i] = 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clip against each halfspace
|
||||||
|
float *input = weights;
|
||||||
|
float *output = weights + max_additional_vertices * 3;
|
||||||
|
int inputverts = 3;
|
||||||
|
for (int p = 0; p < numclipdistances; p++)
|
||||||
|
{
|
||||||
|
// Clip each edge
|
||||||
|
int outputverts = 0;
|
||||||
|
for (int i = 0; i < inputverts; i++)
|
||||||
|
{
|
||||||
|
int j = (i + 1) % inputverts;
|
||||||
|
float clipdistance1 =
|
||||||
|
clipdistance[0 + p * 4] * input[i * 3 + 0] +
|
||||||
|
clipdistance[1 + p * 4] * input[i * 3 + 1] +
|
||||||
|
clipdistance[2 + p * 4] * input[i * 3 + 2];
|
||||||
|
|
||||||
|
float clipdistance2 =
|
||||||
|
clipdistance[0 + p * 4] * input[j * 3 + 0] +
|
||||||
|
clipdistance[1 + p * 4] * input[j * 3 + 1] +
|
||||||
|
clipdistance[2 + p * 4] * input[j * 3 + 2];
|
||||||
|
|
||||||
|
// Clip halfspace
|
||||||
|
if ((clipdistance1 >= 0.0f || clipdistance2 >= 0.0f) && outputverts + 1 < max_additional_vertices)
|
||||||
|
{
|
||||||
|
float t1 = (clipdistance1 < 0.0f) ? MAX(-clipdistance1 / (clipdistance2 - clipdistance1), 0.0f) : 0.0f;
|
||||||
|
float t2 = (clipdistance2 < 0.0f) ? MIN(1.0f + clipdistance2 / (clipdistance1 - clipdistance2), 1.0f) : 1.0f;
|
||||||
|
|
||||||
|
// add t1 vertex
|
||||||
|
for (int k = 0; k < 3; k++)
|
||||||
|
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t1) + input[j * 3 + k] * t1;
|
||||||
|
outputverts++;
|
||||||
|
|
||||||
|
if (t2 != 1.0f && t2 > t1)
|
||||||
|
{
|
||||||
|
// add t2 vertex
|
||||||
|
for (int k = 0; k < 3; k++)
|
||||||
|
output[outputverts * 3 + k] = input[i * 3 + k] * (1.0f - t2) + input[j * 3 + k] * t2;
|
||||||
|
outputverts++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::swap(input, output);
|
||||||
|
inputverts = outputverts;
|
||||||
|
if (inputverts == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert barycentric weights to actual vertices
|
||||||
|
for (int i = 0; i < inputverts; i++)
|
||||||
|
{
|
||||||
|
auto &v = clippedvert[i];
|
||||||
|
memset(&v, 0, sizeof(TriVertex));
|
||||||
|
for (int w = 0; w < 3; w++)
|
||||||
|
{
|
||||||
|
float weight = input[i * 3 + w];
|
||||||
|
v.x += verts[w].x * weight;
|
||||||
|
v.y += verts[w].y * weight;
|
||||||
|
v.z += verts[w].z * weight;
|
||||||
|
v.w += verts[w].w * weight;
|
||||||
|
v.u += verts[w].u * weight;
|
||||||
|
v.v += verts[w].v * weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return inputverts;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, bool mirror)
|
DrawPolyTrianglesCommand::DrawPolyTrianglesCommand(const PolyDrawArgs &args, bool mirror)
|
||||||
|
|
|
@ -47,8 +47,12 @@ private:
|
||||||
static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v);
|
static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v);
|
||||||
static void draw_arrays(const PolyDrawArgs &args, WorkerThreadData *thread);
|
static void draw_arrays(const PolyDrawArgs &args, WorkerThreadData *thread);
|
||||||
static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||||
|
static void clip_to_viewport(TriVertex *clippedvert, int numclipvert);
|
||||||
static int clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert);
|
static int clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert);
|
||||||
|
#ifndef NO_SSE
|
||||||
|
static void clip_to_viewport_sse2(TriVertex *clippedvert, int numclipvert);
|
||||||
|
static int clipedge_sse2(const ShadedTriVertex *verts, TriVertex *clippedvert);
|
||||||
|
#endif
|
||||||
|
|
||||||
static int viewport_x, viewport_y, viewport_width, viewport_height, dest_pitch, dest_width, dest_height;
|
static int viewport_x, viewport_y, viewport_width, viewport_height, dest_pitch, dest_width, dest_height;
|
||||||
static bool dest_bgra;
|
static bool dest_bgra;
|
||||||
|
|
|
@ -41,6 +41,13 @@
|
||||||
#endif
|
#endif
|
||||||
#include "poly_drawer8.h"
|
#include "poly_drawer8.h"
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
class SSE2CPU { public: static const int HasSSE2 = 1; };
|
||||||
|
class GenericCPU { public: static const int HasSSE2 = 0; };
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename CPUType>
|
||||||
class TriangleBlock
|
class TriangleBlock
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -114,9 +121,17 @@ private:
|
||||||
void ClipTest();
|
void ClipTest();
|
||||||
void StencilWrite();
|
void StencilWrite();
|
||||||
void SubsectorWrite();
|
void SubsectorWrite();
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
|
void CoverageTestSSE2();
|
||||||
|
void StencilEqualTestSSE2();
|
||||||
|
void SubsectorTestSSE2();
|
||||||
|
void SubsectorWriteSSE2();
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
template<typename CPUType>
|
||||||
|
TriangleBlock<CPUType>::TriangleBlock(const TriDrawTriangleArgs *args)
|
||||||
{
|
{
|
||||||
const TriVertex &v1 = *args->v1;
|
const TriVertex &v1 = *args->v1;
|
||||||
const TriVertex &v2 = *args->v2;
|
const TriVertex &v2 = *args->v2;
|
||||||
|
@ -145,6 +160,9 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
||||||
const int X2 = (int)round(16.0f * v2.x);
|
const int X2 = (int)round(16.0f * v2.x);
|
||||||
const int X3 = (int)round(16.0f * v3.x);
|
const int X3 = (int)round(16.0f * v3.x);
|
||||||
#else
|
#else
|
||||||
|
int Y1, Y2, Y3, X1, X2, X3;
|
||||||
|
if (CPUType::HasSSE2 == 1)
|
||||||
|
{
|
||||||
int tempround[4 * 3];
|
int tempround[4 * 3];
|
||||||
__m128 m16 = _mm_set1_ps(16.0f);
|
__m128 m16 = _mm_set1_ps(16.0f);
|
||||||
__m128 mhalf = _mm_set1_ps(65536.5f);
|
__m128 mhalf = _mm_set1_ps(65536.5f);
|
||||||
|
@ -152,12 +170,22 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
||||||
_mm_storeu_si128((__m128i*)tempround, _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v1), m16), mhalf)), m65536));
|
_mm_storeu_si128((__m128i*)tempround, _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v1), m16), mhalf)), m65536));
|
||||||
_mm_storeu_si128((__m128i*)(tempround + 4), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v2), m16), mhalf)), m65536));
|
_mm_storeu_si128((__m128i*)(tempround + 4), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v2), m16), mhalf)), m65536));
|
||||||
_mm_storeu_si128((__m128i*)(tempround + 8), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v3), m16), mhalf)), m65536));
|
_mm_storeu_si128((__m128i*)(tempround + 8), _mm_sub_epi32(_mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(_mm_loadu_ps((const float*)&v3), m16), mhalf)), m65536));
|
||||||
const int X1 = tempround[0];
|
X1 = tempround[0];
|
||||||
const int X2 = tempround[4];
|
X2 = tempround[4];
|
||||||
const int X3 = tempround[8];
|
X3 = tempround[8];
|
||||||
const int Y1 = tempround[1];
|
Y1 = tempround[1];
|
||||||
const int Y2 = tempround[5];
|
Y2 = tempround[5];
|
||||||
const int Y3 = tempround[9];
|
Y3 = tempround[9];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Y1 = (int)round(16.0f * v1.y);
|
||||||
|
Y2 = (int)round(16.0f * v2.y);
|
||||||
|
Y3 = (int)round(16.0f * v3.y);
|
||||||
|
X1 = (int)round(16.0f * v1.x);
|
||||||
|
X2 = (int)round(16.0f * v2.x);
|
||||||
|
X3 = (int)round(16.0f * v3.x);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Deltas
|
// Deltas
|
||||||
|
@ -203,6 +231,8 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
||||||
if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
|
if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
|
||||||
|
|
||||||
#ifndef NO_SSE
|
#ifndef NO_SSE
|
||||||
|
if (CPUType::HasSSE2 == 1)
|
||||||
|
{
|
||||||
mFDY12Offset = _mm_setr_epi32(0, FDY12, FDY12 * 2, FDY12 * 3);
|
mFDY12Offset = _mm_setr_epi32(0, FDY12, FDY12 * 2, FDY12 * 3);
|
||||||
mFDY23Offset = _mm_setr_epi32(0, FDY23, FDY23 * 2, FDY23 * 3);
|
mFDY23Offset = _mm_setr_epi32(0, FDY23, FDY23 * 2, FDY23 * 3);
|
||||||
mFDY31Offset = _mm_setr_epi32(0, FDY31, FDY31 * 2, FDY31 * 3);
|
mFDY31Offset = _mm_setr_epi32(0, FDY31, FDY31 * 2, FDY31 * 3);
|
||||||
|
@ -221,10 +251,12 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
||||||
mDY23 = _mm_set1_epi32(DY23);
|
mDY23 = _mm_set1_epi32(DY23);
|
||||||
mDX31 = _mm_set1_epi32(DX31);
|
mDX31 = _mm_set1_epi32(DX31);
|
||||||
mDY31 = _mm_set1_epi32(DY31);
|
mDY31 = _mm_set1_epi32(DY31);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||||
{
|
{
|
||||||
// First block line for this thread
|
// First block line for this thread
|
||||||
int core = thread->core;
|
int core = thread->core;
|
||||||
|
@ -236,9 +268,18 @@ void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thre
|
||||||
bool writeColor = args->uniforms->WriteColor();
|
bool writeColor = args->uniforms->WriteColor();
|
||||||
bool writeStencil = args->uniforms->WriteStencil();
|
bool writeStencil = args->uniforms->WriteStencil();
|
||||||
bool writeSubsector = args->uniforms->WriteSubsector();
|
bool writeSubsector = args->uniforms->WriteSubsector();
|
||||||
|
|
||||||
int bmode = (int)args->uniforms->BlendMode();
|
int bmode = (int)args->uniforms->BlendMode();
|
||||||
|
|
||||||
|
// Find the drawer function for the given blend mode
|
||||||
|
#ifndef NO_SSE
|
||||||
|
void(*drawFunc)(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *);
|
||||||
|
if (CPUType::HasSSE2 == 1)
|
||||||
|
drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32_SSE2[bmode] : ScreenTriangle::TriDrawers8[bmode];
|
||||||
|
else
|
||||||
|
drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode];
|
||||||
|
#else
|
||||||
auto drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode];
|
auto drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode];
|
||||||
|
#endif
|
||||||
|
|
||||||
// Loop through blocks
|
// Loop through blocks
|
||||||
for (int y = start_miny; y < maxy; y += q * num_cores)
|
for (int y = start_miny; y < maxy; y += q * num_cores)
|
||||||
|
@ -248,7 +289,11 @@ void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thre
|
||||||
X = x;
|
X = x;
|
||||||
Y = y;
|
Y = y;
|
||||||
|
|
||||||
|
if (CPUType::HasSSE2 == 1)
|
||||||
|
CoverageTestSSE2();
|
||||||
|
else
|
||||||
CoverageTest();
|
CoverageTest();
|
||||||
|
|
||||||
if (Mask0 == 0 && Mask1 == 0)
|
if (Mask0 == 0 && Mask1 == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -259,7 +304,11 @@ void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thre
|
||||||
// To do: make the stencil test use its own flag for comparison mode instead of abusing the subsector test..
|
// To do: make the stencil test use its own flag for comparison mode instead of abusing the subsector test..
|
||||||
if (!subsectorTest)
|
if (!subsectorTest)
|
||||||
{
|
{
|
||||||
|
if (CPUType::HasSSE2 == 1)
|
||||||
|
StencilEqualTestSSE2();
|
||||||
|
else
|
||||||
StencilEqualTest();
|
StencilEqualTest();
|
||||||
|
|
||||||
if (Mask0 == 0 && Mask1 == 0)
|
if (Mask0 == 0 && Mask1 == 0)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -269,7 +318,11 @@ void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thre
|
||||||
if (Mask0 == 0 && Mask1 == 0)
|
if (Mask0 == 0 && Mask1 == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (CPUType::HasSSE2 == 1)
|
||||||
|
SubsectorTestSSE2();
|
||||||
|
else
|
||||||
SubsectorTest();
|
SubsectorTest();
|
||||||
|
|
||||||
if (Mask0 == 0 && Mask1 == 0)
|
if (Mask0 == 0 && Mask1 == 0)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -279,14 +332,18 @@ void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thre
|
||||||
if (writeStencil)
|
if (writeStencil)
|
||||||
StencilWrite();
|
StencilWrite();
|
||||||
if (writeSubsector)
|
if (writeSubsector)
|
||||||
|
{
|
||||||
|
if (CPUType::HasSSE2 == 1)
|
||||||
|
SubsectorWriteSSE2();
|
||||||
|
else
|
||||||
SubsectorWrite();
|
SubsectorWrite();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NO_SSE
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::SubsectorTest()
|
||||||
void TriangleBlock::SubsectorTest()
|
|
||||||
{
|
{
|
||||||
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
||||||
uint32_t *subsector = subsectorGBuffer + block * 64;
|
uint32_t *subsector = subsectorGBuffer + block * 64;
|
||||||
|
@ -312,9 +369,10 @@ void TriangleBlock::SubsectorTest()
|
||||||
Mask1 = Mask1 & mask1;
|
Mask1 = Mask1 & mask1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#ifndef NO_SSE
|
||||||
|
|
||||||
void TriangleBlock::SubsectorTest()
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::SubsectorTestSSE2()
|
||||||
{
|
{
|
||||||
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
||||||
uint32_t *subsector = subsectorGBuffer + block * 64;
|
uint32_t *subsector = subsectorGBuffer + block * 64;
|
||||||
|
@ -342,7 +400,8 @@ void TriangleBlock::SubsectorTest()
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void TriangleBlock::ClipTest()
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::ClipTest()
|
||||||
{
|
{
|
||||||
static const uint32_t clipxmask[8] =
|
static const uint32_t clipxmask[8] =
|
||||||
{
|
{
|
||||||
|
@ -376,9 +435,8 @@ void TriangleBlock::ClipTest()
|
||||||
Mask1 = Mask1 & xmask & ymask1;
|
Mask1 = Mask1 & xmask & ymask1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NO_SSE
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::StencilEqualTest()
|
||||||
void TriangleBlock::StencilEqualTest()
|
|
||||||
{
|
{
|
||||||
// Stencil test the whole block, if possible
|
// Stencil test the whole block, if possible
|
||||||
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
||||||
|
@ -421,9 +479,10 @@ void TriangleBlock::StencilEqualTest()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#ifndef NO_SSE
|
||||||
|
|
||||||
void TriangleBlock::StencilEqualTest()
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::StencilEqualTestSSE2()
|
||||||
{
|
{
|
||||||
// Stencil test the whole block, if possible
|
// Stencil test the whole block, if possible
|
||||||
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
||||||
|
@ -489,7 +548,8 @@ void TriangleBlock::StencilEqualTest()
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void TriangleBlock::StencilGreaterEqualTest()
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::StencilGreaterEqualTest()
|
||||||
{
|
{
|
||||||
// Stencil test the whole block, if possible
|
// Stencil test the whole block, if possible
|
||||||
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
||||||
|
@ -532,9 +592,8 @@ void TriangleBlock::StencilGreaterEqualTest()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NO_SSE
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::CoverageTest()
|
||||||
void TriangleBlock::CoverageTest()
|
|
||||||
{
|
{
|
||||||
// Corners of block
|
// Corners of block
|
||||||
int x0 = X << 4;
|
int x0 = X << 4;
|
||||||
|
@ -631,9 +690,10 @@ void TriangleBlock::CoverageTest()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#ifndef NO_SSE
|
||||||
|
|
||||||
void TriangleBlock::CoverageTest()
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::CoverageTestSSE2()
|
||||||
{
|
{
|
||||||
// Corners of block
|
// Corners of block
|
||||||
int x0 = X << 4;
|
int x0 = X << 4;
|
||||||
|
@ -743,7 +803,8 @@ void TriangleBlock::CoverageTest()
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void TriangleBlock::StencilWrite()
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::StencilWrite()
|
||||||
{
|
{
|
||||||
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
int block = (X >> 3) + (Y >> 3) * stencilPitch;
|
||||||
uint8_t *stencilBlock = &stencilValues[block * 64];
|
uint8_t *stencilBlock = &stencilValues[block * 64];
|
||||||
|
@ -793,9 +854,8 @@ void TriangleBlock::StencilWrite()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NO_SSE
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::SubsectorWrite()
|
||||||
void TriangleBlock::SubsectorWrite()
|
|
||||||
{
|
{
|
||||||
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
||||||
uint32_t *subsector = subsectorGBuffer + block * 64;
|
uint32_t *subsector = subsectorGBuffer + block * 64;
|
||||||
|
@ -828,9 +888,10 @@ void TriangleBlock::SubsectorWrite()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#ifndef NO_SSE
|
||||||
|
|
||||||
void TriangleBlock::SubsectorWrite()
|
template<typename CPUType>
|
||||||
|
void TriangleBlock<CPUType>::SubsectorWriteSSE2()
|
||||||
{
|
{
|
||||||
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
int block = (X >> 3) + (Y >> 3) * subsectorPitch;
|
||||||
uint32_t *subsector = subsectorGBuffer + block * 64;
|
uint32_t *subsector = subsectorGBuffer + block * 64;
|
||||||
|
@ -887,8 +948,21 @@ void TriangleBlock::SubsectorWrite()
|
||||||
|
|
||||||
void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||||
{
|
{
|
||||||
TriangleBlock block(args);
|
#ifdef NO_SSE
|
||||||
|
TriangleBlock<GenericCPU> block(args);
|
||||||
block.Loop(args, thread);
|
block.Loop(args, thread);
|
||||||
|
#else
|
||||||
|
if (CPU.bSSE2)
|
||||||
|
{
|
||||||
|
TriangleBlock<SSE2CPU> block(args);
|
||||||
|
block.Loop(args, thread);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
TriangleBlock<GenericCPU> block(args);
|
||||||
|
block.Loop(args, thread);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void(*ScreenTriangle::TriDrawers8[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *) =
|
void(*ScreenTriangle::TriDrawers8[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *) =
|
||||||
|
@ -918,40 +992,38 @@ void(*ScreenTriangle::TriDrawers8[])(int, int, uint32_t, uint32_t, const TriDraw
|
||||||
&TriScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
&TriScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef NO_SSE
|
|
||||||
|
|
||||||
void(*ScreenTriangle::TriDrawers32[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *) =
|
void(*ScreenTriangle::TriDrawers32[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *) =
|
||||||
{
|
{
|
||||||
nullptr
|
nullptr
|
||||||
};
|
};
|
||||||
|
|
||||||
#else
|
#ifndef NO_SSE
|
||||||
|
|
||||||
void(*ScreenTriangle::TriDrawers32[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *) =
|
void(*ScreenTriangle::TriDrawers32_SSE2[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *) =
|
||||||
{
|
{
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute, // Skycap
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute, // Skycap
|
||||||
&TriScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
&TriScreenDrawer32_SSE2<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -983,40 +1055,38 @@ void(*ScreenTriangle::RectDrawers8[])(const void *, int, int, int, const RectDra
|
||||||
&RectScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
&RectScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef NO_SSE
|
|
||||||
|
|
||||||
void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
|
void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
|
||||||
{
|
{
|
||||||
nullptr
|
nullptr
|
||||||
};
|
};
|
||||||
|
|
||||||
#else
|
#ifndef NO_SSE
|
||||||
|
|
||||||
void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
|
void(*ScreenTriangle::RectDrawers32_SSE2[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
|
||||||
{
|
{
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute, // Skycap
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute, // Skycap
|
||||||
&RectScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
&RectScreenDrawer32_SSE2<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::FuzzSampler>::Execute // Fuzz
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -131,6 +131,11 @@ public:
|
||||||
static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
|
static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
|
||||||
static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
|
static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
|
static void(*TriDrawers32_SSE2[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *);
|
||||||
|
static void(*RectDrawers32_SSE2[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
|
||||||
|
#endif
|
||||||
|
|
||||||
static int FuzzStart;
|
static int FuzzStart;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue