mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-13 07:57:51 +00:00
- Add SSE code to OpenGLSWFrameBuffer::OpenGLPal::Update to workaround a broken auto-vectorizer in GCC
This commit is contained in:
parent
7f7be9e393
commit
f083109b51
1 changed files with 36 additions and 0 deletions
|
@ -2383,6 +2383,41 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update()
|
||||||
// See explanation in UploadPalette() for skipat rationale.
|
// See explanation in UploadPalette() for skipat rationale.
|
||||||
skipat = MIN(numEntries, DoColorSkip ? 256 - 8 : 256);
|
skipat = MIN(numEntries, DoColorSkip ? 256 - 8 : 256);
|
||||||
|
|
||||||
|
#ifndef NO_SSE
|
||||||
|
// Manual SSE vectorized version here to workaround a bug in GCC's auto-vectorizer
|
||||||
|
|
||||||
|
int sse_count = skipat / 4 * 4;
|
||||||
|
for (i = 0; i < sse_count; i += 4)
|
||||||
|
{
|
||||||
|
_mm_storeu_si128((__m128i*)(&buff[i]), _mm_loadu_si128((__m128i*)(&pal[i])));
|
||||||
|
}
|
||||||
|
switch (skipat - i)
|
||||||
|
{
|
||||||
|
// fall through is intentional
|
||||||
|
case 3: buff[i] = pal[i].d; i++;
|
||||||
|
case 2: buff[i] = pal[i].d; i++;
|
||||||
|
case 1: buff[i] = pal[i].d; i++;
|
||||||
|
default: i++;
|
||||||
|
}
|
||||||
|
sse_count = numEntries / 4 * 4;
|
||||||
|
__m128i alphamask = _mm_set1_epi32(0xff000000);
|
||||||
|
while (i < sse_count)
|
||||||
|
{
|
||||||
|
__m128i lastcolor = _mm_loadu_si128((__m128i*)(&pal[i - 1]));
|
||||||
|
__m128i color = _mm_loadu_si128((__m128i*)(&pal[i]));
|
||||||
|
_mm_storeu_si128((__m128i*)(&buff[i]), _mm_or_si128(_mm_and_si128(alphamask, color), _mm_andnot_si128(alphamask, lastcolor)));
|
||||||
|
i += 4;
|
||||||
|
}
|
||||||
|
switch (numEntries - i)
|
||||||
|
{
|
||||||
|
// fall through is intentional
|
||||||
|
case 3: buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); i++;
|
||||||
|
case 2: buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); i++;
|
||||||
|
case 1: buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); i++;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
for (i = 0; i < skipat; ++i)
|
for (i = 0; i < skipat; ++i)
|
||||||
{
|
{
|
||||||
buff[i] = ColorARGB(pal[i].a, pal[i].r, pal[i].g, pal[i].b);
|
buff[i] = ColorARGB(pal[i].a, pal[i].r, pal[i].g, pal[i].b);
|
||||||
|
@ -2391,6 +2426,7 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update()
|
||||||
{
|
{
|
||||||
buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b);
|
buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if (numEntries > 1)
|
if (numEntries > 1)
|
||||||
{
|
{
|
||||||
i = numEntries - 1;
|
i = numEntries - 1;
|
||||||
|
|
Loading…
Reference in a new issue