mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-13 07:57:51 +00:00
- step with SSE
This commit is contained in:
parent
2db433e68f
commit
dbb7df998d
1 changed files with 75 additions and 0 deletions
|
@ -488,6 +488,80 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
|
__m128 mposW, mposU, mposV, mstepW, mstepU, mstepV;
|
||||||
|
__m128 mposWorldX, mposWorldY, mposWorldZ, mstepWorldX, mstepWorldY, mstepWorldZ;
|
||||||
|
__m128i mtexMul1, mtexMul2;
|
||||||
|
|
||||||
|
#define SETUP_STEP_SSE(mpos,mstep,pos,step) \
|
||||||
|
mstep = _mm_load_ss(&step); \
|
||||||
|
mpos = _mm_load_ss(&pos); \
|
||||||
|
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
|
||||||
|
mpos = _mm_add_ss(mpos, mstep); \
|
||||||
|
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
|
||||||
|
mpos = _mm_add_ss(mpos, mstep); \
|
||||||
|
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
|
||||||
|
mpos = _mm_add_ss(mpos, mstep); \
|
||||||
|
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(0, 1, 2, 3)); \
|
||||||
|
mstep = _mm_mul_ss(mstep, _mm_set1_ps(4.0f)); \
|
||||||
|
mstep = _mm_shuffle_ps(mstep, mstep, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
|
|
||||||
|
SETUP_STEP_SSE(mposW, mstepW, posW, stepW);
|
||||||
|
|
||||||
|
if (OptT::Flags & SWOPT_DynLights)
|
||||||
|
{
|
||||||
|
SETUP_STEP_SSE(mposWorldX, mstepWorldX, posWorldX, stepWorldX);
|
||||||
|
SETUP_STEP_SSE(mposWorldY, mstepWorldY, posWorldY, stepWorldY);
|
||||||
|
SETUP_STEP_SSE(mposWorldZ, mstepWorldZ, posWorldZ, stepWorldZ);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||||
|
{
|
||||||
|
SETUP_STEP_SSE(mposU, mstepU, posU, stepU);
|
||||||
|
SETUP_STEP_SSE(mposV, mstepV, posV, stepV);
|
||||||
|
|
||||||
|
mtexMul1 = _mm_setr_epi16(texWidth, texWidth, texWidth, texWidth, texHeight, texHeight, texHeight, texHeight);
|
||||||
|
mtexMul2 = _mm_setr_epi16(texHeight, texHeight, texHeight, texHeight, 1, 1, 1, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef SETUP_STEP_SSE
|
||||||
|
|
||||||
|
for (int x = x0; x < x1; x += 4)
|
||||||
|
{
|
||||||
|
__m128 rcp_posW = _mm_rcp_ps(mposW);
|
||||||
|
|
||||||
|
if (OptT::Flags & SWOPT_DynLights)
|
||||||
|
{
|
||||||
|
_mm_storeu_ps(&worldposX[x], _mm_mul_ps(mposWorldX, rcp_posW));
|
||||||
|
_mm_storeu_ps(&worldposY[x], _mm_mul_ps(mposWorldY, rcp_posW));
|
||||||
|
_mm_storeu_ps(&worldposZ[x], _mm_mul_ps(mposWorldZ, rcp_posW));
|
||||||
|
mposWorldX = _mm_add_ps(mposWorldX, mstepWorldX);
|
||||||
|
mposWorldY = _mm_add_ps(mposWorldY, mstepWorldY);
|
||||||
|
mposWorldZ = _mm_add_ps(mposWorldZ, mstepWorldZ);
|
||||||
|
}
|
||||||
|
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||||
|
{
|
||||||
|
__m128 rcpW = _mm_mul_ps(_mm_set1_ps(0x01000000), rcp_posW);
|
||||||
|
__m128i u = _mm_cvtps_epi32(_mm_mul_ps(mposU, rcpW));
|
||||||
|
__m128i v = _mm_cvtps_epi32(_mm_mul_ps(mposV, rcpW));
|
||||||
|
_mm_storeu_si128((__m128i*)&texelV[x], v);
|
||||||
|
|
||||||
|
__m128i texelX = _mm_srli_epi32(_mm_slli_epi32(u, 8), 17);
|
||||||
|
__m128i texelY = _mm_srli_epi32(_mm_slli_epi32(v, 8), 17);
|
||||||
|
__m128i texelXY = _mm_mulhi_epu16(_mm_slli_epi16(_mm_packs_epi32(texelX, texelY), 1), mtexMul1);
|
||||||
|
__m128i texlo = _mm_mullo_epi16(texelXY, mtexMul2);
|
||||||
|
__m128i texhi = _mm_mulhi_epi16(texelXY, mtexMul2);
|
||||||
|
texelX = _mm_unpacklo_epi16(texlo, texhi);
|
||||||
|
texelY = _mm_unpackhi_epi16(texlo, texhi);
|
||||||
|
_mm_storeu_si128((__m128i*)&texel[x], _mm_add_epi32(texelX, texelY));
|
||||||
|
|
||||||
|
mposU = _mm_add_ps(mposU, mstepU);
|
||||||
|
mposV = _mm_add_ps(mposV, mstepV);
|
||||||
|
}
|
||||||
|
|
||||||
|
mposW = _mm_add_ps(mposW, mstepW);
|
||||||
|
}
|
||||||
|
#else
|
||||||
for (int x = x0; x < x1; x++)
|
for (int x = x0; x < x1; x++)
|
||||||
{
|
{
|
||||||
if (OptT::Flags & SWOPT_DynLights)
|
if (OptT::Flags & SWOPT_DynLights)
|
||||||
|
@ -515,6 +589,7 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
||||||
|
|
||||||
posW += stepW;
|
posW += stepW;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (OptT::Flags & SWOPT_DynLights)
|
if (OptT::Flags & SWOPT_DynLights)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue