diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 46b82f517e..e06cf2b7fa 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -37,6 +37,8 @@ #include "r_poly_triangle.h" #include "r_draw_rgba.h" +CVAR(Bool, r_debug_trisetup, false, 0); + int PolyTriangleDrawer::viewport_x; int PolyTriangleDrawer::viewport_y; int PolyTriangleDrawer::viewport_width; @@ -93,19 +95,22 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD int num_drawfuncs = 0; drawfuncs[num_drawfuncs++] = drawargs.subsectorTest ? &ScreenTriangle::SetupSubsector : &ScreenTriangle::SetupNormal; - - int bmode = (int)drawargs.blendmode; - if (drawargs.writeColor && drawargs.texturePixels) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; - else if (drawargs.writeColor) - drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; - + + if (!r_debug_trisetup) // For profiling how much time is spent in setup vs drawal + { + int bmode = (int)drawargs.blendmode; + if (drawargs.writeColor && drawargs.texturePixels) + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriDraw32[bmode] : llvm->TriDraw8[bmode]; + else if (drawargs.writeColor) + drawfuncs[num_drawfuncs++] = dest_bgra ? llvm->TriFill32[bmode] : llvm->TriFill8[bmode]; + } + if (drawargs.writeStencil) drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite; - + if (drawargs.writeSubsector) drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite; - + TriDrawTriangleArgs args; args.dest = dest; args.pitch = dest_pitch; @@ -793,6 +798,9 @@ void ScreenTriangle::SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadDa span->Length = 0; } + if (mask0 == 0 && mask1 == 0) + continue; + partial->X = x; partial->Y = y; partial->Mask0 = mask0; @@ -997,6 +1005,9 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea span->Length = 0; } + if (mask0 == 0 && mask1 == 0) + continue; + partial->X = x; partial->Y = y; partial->Mask0 = mask0; @@ -1083,6 +1094,9 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea span->Length = 0; } + if (mask0 == 0 && mask1 == 0) + continue; + partial->X = x; partial->Y = y; partial->Mask0 = mask0; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 38e9922c0a..8e4ed803f7 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -111,16 +111,38 @@ void DrawTriangleCodegen::DrawFullSpans() SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); SSAInt lightstep = (lightnext - lightpos) / 8; - for (int ix = 0; ix < 8; ix++) + if (truecolor) { - if (truecolor) + for (int ix = 0; ix < 8; ix += 4) { - currentlight = is_fixed_light.select(light, lightpos >> 8); - SSAUBytePtr destptr = dest[(x * 8 + ix) * 4]; - destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); + SSAVec16ub pixels16 = destptr.load_unaligned_vec16ub(false); + SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16); + SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16); + SSAVec4i pixels[4] = + { + SSAVec4i::extendlo(pixels8lo), + SSAVec4i::extendhi(pixels8lo), + SSAVec4i::extendlo(pixels8hi), + SSAVec4i::extendhi(pixels8hi) + }; + + for (int sse = 0; sse < 4; sse++) + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + pixels[sse] = ProcessPixel32(pixels[sse], varyingPos); + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } + + destptr.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3]))); } - else + } + else + { + for (int ix = 0; ix < 8; ix++) { currentlight = is_fixed_light.select(light, lightpos >> 8); SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); @@ -128,11 +150,11 @@ void DrawTriangleCodegen::DrawFullSpans() SSAUBytePtr destptr = dest[(x * 8 + ix)]; destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); - } - for (int j = 0; j < TriVertex::NumVarying; j++) - varyingPos[j] = varyingPos[j] + varyingStep[j]; - lightpos = lightpos + lightstep; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } } for (int j = 0; j < TriVertex::NumVarying; j++)