mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-10 23:01:59 +00:00
- Add SSE code to OpenGLSWFrameBuffer::OpenGLPal::Update to workaround a broken auto-vectorizer in GCC
This commit is contained in:
parent
7f7be9e393
commit
f083109b51
1 changed files with 36 additions and 0 deletions
|
@ -2383,6 +2383,41 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update()
|
|||
// See explanation in UploadPalette() for skipat rationale.
|
||||
skipat = MIN(numEntries, DoColorSkip ? 256 - 8 : 256);
|
||||
|
||||
#ifndef NO_SSE
|
||||
// Manual SSE vectorized version here to workaround a bug in GCC's auto-vectorizer
|
||||
|
||||
int sse_count = skipat / 4 * 4;
|
||||
for (i = 0; i < sse_count; i += 4)
|
||||
{
|
||||
_mm_storeu_si128((__m128i*)(&buff[i]), _mm_loadu_si128((__m128i*)(&pal[i])));
|
||||
}
|
||||
switch (skipat - i)
|
||||
{
|
||||
// fall through is intentional
|
||||
case 3: buff[i] = pal[i].d; i++;
|
||||
case 2: buff[i] = pal[i].d; i++;
|
||||
case 1: buff[i] = pal[i].d; i++;
|
||||
default: i++;
|
||||
}
|
||||
sse_count = numEntries / 4 * 4;
|
||||
__m128i alphamask = _mm_set1_epi32(0xff000000);
|
||||
while (i < sse_count)
|
||||
{
|
||||
__m128i lastcolor = _mm_loadu_si128((__m128i*)(&pal[i - 1]));
|
||||
__m128i color = _mm_loadu_si128((__m128i*)(&pal[i]));
|
||||
_mm_storeu_si128((__m128i*)(&buff[i]), _mm_or_si128(_mm_and_si128(alphamask, color), _mm_andnot_si128(alphamask, lastcolor)));
|
||||
i += 4;
|
||||
}
|
||||
switch (numEntries - i)
|
||||
{
|
||||
// fall through is intentional
|
||||
case 3: buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); i++;
|
||||
case 2: buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); i++;
|
||||
case 1: buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b); i++;
|
||||
default: break;
|
||||
}
|
||||
|
||||
#else
|
||||
for (i = 0; i < skipat; ++i)
|
||||
{
|
||||
buff[i] = ColorARGB(pal[i].a, pal[i].r, pal[i].g, pal[i].b);
|
||||
|
@ -2391,6 +2426,7 @@ bool OpenGLSWFrameBuffer::OpenGLPal::Update()
|
|||
{
|
||||
buff[i] = ColorARGB(pal[i].a, pal[i - 1].r, pal[i - 1].g, pal[i - 1].b);
|
||||
}
|
||||
#endif
|
||||
if (numEntries > 1)
|
||||
{
|
||||
i = numEntries - 1;
|
||||
|
|
Loading…
Reference in a new issue