From fd4fc450407421abeaf6e454f41a8f3bdc9ff8bc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 6 Dec 2019 20:35:19 +0100 Subject: [PATCH] Move blend operator and alpha test flag out of the inner blend loop --- .../polyrenderer/drawers/screen_triangle.cpp | 146 ++++++++++++------ .../polyrenderer/drawers/screen_triangle.h | 14 ++ 2 files changed, 112 insertions(+), 48 deletions(-) diff --git a/src/rendering/polyrenderer/drawers/screen_triangle.cpp b/src/rendering/polyrenderer/drawers/screen_triangle.cpp index caf1b43d8..0b15014c7 100644 --- a/src/rendering/polyrenderer/drawers/screen_triangle.cpp +++ b/src/rendering/polyrenderer/drawers/screen_triangle.cpp @@ -244,74 +244,119 @@ static void WriteVaryings(int y, int x0, int x1, const TriDrawTriangleArgs* args WriteVarying(args->v1->worldZ * args->v1->w + args->gradientX.WorldZ * startX + args->gradientY.WorldZ * startY, args->gradientX.WorldZ, x0, x1, thread->scanline.W, thread->scanline.WorldZ); } -static uint32_t BlendColor(FRenderStyle style, uint32_t fg, uint32_t bg) +static const int shiftTable[] = { + 0, 0, 0, 0, // STYLEALPHA_Zero + 0, 0, 0, 0, // STYLEALPHA_One + 24, 24, 24, 24, // STYLEALPHA_Src + 24, 24, 24, 24, // STYLEALPHA_InvSrc + 24, 16, 8, 0, // STYLEALPHA_SrcCol + 24, 16, 8, 0, // STYLEALPHA_InvSrcCol + 24, 16, 8, 0, // STYLEALPHA_DstCol + 24, 16, 8, 0 // STYLEALPHA_InvDstCol +}; + +template +static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread) { - static const int shiftTable[] = { - 0, 0, 0, 0, // STYLEALPHA_Zero - 0, 0, 0, 0, // STYLEALPHA_One - 24, 24, 24, 24, // STYLEALPHA_Src - 24, 24, 24, 24, // STYLEALPHA_InvSrc - 24, 16, 8, 0, // STYLEALPHA_SrcCol - 24, 16, 8, 0, // STYLEALPHA_InvSrcCol - 24, 16, 8, 0, // STYLEALPHA_DstCol - 24, 16, 8, 0 // STYLEALPHA_InvDstCol - }; + using namespace TriScreenDrawerModes; + + FRenderStyle style = thread->RenderStyle; bool invsrc = style.SrcAlpha & 1; bool invdst = style.DestAlpha & 1; - int srcinput = style.SrcAlpha <= STYLEALPHA_One ? 0 : (style.SrcAlpha >= STYLEALPHA_DstCol ? bg : fg); - int dstinput = style.DestAlpha <= STYLEALPHA_One ? 0 : (style.DestAlpha >= STYLEALPHA_DstCol ? bg : fg); - const int* shiftsrc = shiftTable + (style.SrcAlpha << 2); const int* shiftdst = shiftTable + (style.DestAlpha << 2); - int32_t src[4], dst[4]; - for (int i = 0; i < 4; i++) + uint32_t* dest = (uint32_t*)thread->dest; + uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch; + uint32_t* fragcolor = thread->scanline.FragColor; + + for (int x = x0; x < x1; x++) { - // Grab component for scale factors - src[i] = (srcinput >> shiftsrc[i]) & 0xff; - dst[i] = (dstinput >> shiftdst[i]) & 0xff; + uint32_t fg = fragcolor[x]; - // Inverse if needed - src[i] = invsrc ? 0xff - src[i] : src[i]; - dst[i] = invdst ? 0xff - dst[i] : dst[i]; + if (OptT::Flags & SWBLEND_AlphaTest) + { + if (fragcolor[x] <= 0x7f000000) + continue; + } - // Rescale 0-255 to 0-256 - src[i] = src[i] + (src[i] >> 7); - dst[i] = dst[i] + (dst[i] >> 7); + uint32_t bg = line[x]; - // Multiply with input - src[i] = src[i] * ((fg >> (24 - (i << 3))) & 0xff); - dst[i] = dst[i] * ((bg >> (24 - (i << 3))) & 0xff); + int srcinput = style.SrcAlpha <= STYLEALPHA_One ? 0 : (style.SrcAlpha >= STYLEALPHA_DstCol ? bg : fg); + int dstinput = style.DestAlpha <= STYLEALPHA_One ? 0 : (style.DestAlpha >= STYLEALPHA_DstCol ? bg : fg); + + uint32_t out[4]; + for (int i = 0; i < 4; i++) + { + // Grab component for scale factors + int32_t src = (srcinput >> shiftsrc[i]) & 0xff; + int32_t dst = (dstinput >> shiftdst[i]) & 0xff; + + // Inverse if needed + src = invsrc ? 0xff - src : src; + dst = invdst ? 0xff - dst : dst; + + // Rescale 0-255 to 0-256 + src = src + (src >> 7); + dst = dst + (dst >> 7); + + // Multiply with input + src = src * ((fg >> (24 - (i << 3))) & 0xff); + dst = dst * ((bg >> (24 - (i << 3))) & 0xff); + + // Apply blend operator + int32_t val; + if (OptT::Flags & SWBLEND_Sub) + { + val = src - dst; + } + else if (OptT::Flags & SWBLEND_RevSub) + { + val = dst - src; + } + else + { + val = src + dst; + } + out[i] = clamp((val + 127) >> 8, 0, 255); + } + + line[x] = MAKEARGB(out[0], out[1], out[2], out[3]); } - - uint32_t out[4]; - switch (style.BlendOp) - { - default: - case STYLEOP_Add: for (int i = 0; i < 4; i++) out[i] = clamp((src[i] + dst[i] + 127) >> 8, 0, 255); break; - case STYLEOP_Sub: for (int i = 0; i < 4; i++) out[i] = clamp((src[i] - dst[i] + 127) >> 8, 0, 255); break; - case STYLEOP_RevSub: for (int i = 0; i < 4; i++) out[i] = clamp((dst[i] - src[i] + 127) >> 8, 0, 255); break; - } - return MAKEARGB(out[0], out[1], out[2], out[3]); } static void WriteColor(int y, int x0, int x1, PolyTriangleThreadData* thread) { - uint32_t* dest = (uint32_t*)thread->dest; - uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch; - FRenderStyle style = thread->RenderStyle; - uint32_t* fragcolor = thread->scanline.FragColor; + using namespace TriScreenDrawerModes; + FRenderStyle style = thread->RenderStyle; if (style.BlendOp == STYLEOP_Add && style.SrcAlpha == STYLEALPHA_One && style.DestAlpha == STYLEALPHA_Zero) { + uint32_t* dest = (uint32_t*)thread->dest; + uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch; + uint32_t* fragcolor = thread->scanline.FragColor; + if (!thread->AlphaTest) { - for (int x = x0; x < x1; x++) +#if !defined(NO_SSE) + int ssecount = ((x1 - x0) & ~3); + int sseend = x0 + ssecount; + + for (int x = x0; x < sseend; x += 4) + { + __m128i v = _mm_loadu_si128((__m128i*)&fragcolor[x]); + _mm_storeu_si128((__m128i*)&line[x], v); + } + + for (int x = sseend; x < x1; x++) { line[x] = fragcolor[x]; } +#else + memcpy(line + x0, fragcolor + x0, (x1 - x0) * sizeof(uint32_t)); +#endif } else { @@ -326,17 +371,22 @@ static void WriteColor(int y, int x0, int x1, PolyTriangleThreadData* thread) { if (!thread->AlphaTest) { - for (int x = x0; x < x1; x++) + switch (style.BlendOp) { - line[x] = BlendColor(style, fragcolor[x], line[x]); + default: + case STYLEOP_Add: BlendColor(y, x0, x1, thread); break; + case STYLEOP_Sub: BlendColor(y, x0, x1, thread); break; + case STYLEOP_RevSub: BlendColor(y, x0, x1, thread); break; } } else { - for (int x = x0; x < x1; x++) + switch (style.BlendOp) { - if (fragcolor[x] > 0x7f000000) - line[x] = BlendColor(style, fragcolor[x], line[x]); + default: + case STYLEOP_Add: BlendColor(y, x0, x1, thread); break; + case STYLEOP_Sub: BlendColor(y, x0, x1, thread); break; + case STYLEOP_RevSub: BlendColor(y, x0, x1, thread); break; } } } diff --git a/src/rendering/polyrenderer/drawers/screen_triangle.h b/src/rendering/polyrenderer/drawers/screen_triangle.h index d26aadd07..ccf4818c1 100644 --- a/src/rendering/polyrenderer/drawers/screen_triangle.h +++ b/src/rendering/polyrenderer/drawers/screen_triangle.h @@ -114,4 +114,18 @@ namespace TriScreenDrawerModes struct TestSpanOpt1 { static const int Flags = 1; }; struct TestSpanOpt2 { static const int Flags = 2; }; struct TestSpanOpt3 { static const int Flags = 3; }; + + enum SWBlendColor + { + SWBLEND_Sub = 1, + SWBLEND_RevSub = 2, + SWBLEND_AlphaTest = 4 + }; + + struct BlendColorOpt_Add { static const int Flags = 0; }; + struct BlendColorOpt_Sub { static const int Flags = 1; }; + struct BlendColorOpt_RevSub { static const int Flags = 2; }; + struct BlendColorOpt_AlphaTest_Add { static const int Flags = 4; }; + struct BlendColorOpt_AlphaTest_Sub { static const int Flags = 5; }; + struct BlendColorOpt_AlphaTest_RevSub { static const int Flags = 6; }; }