diff --git a/src/r_poly_sky.cpp b/src/r_poly_sky.cpp index e4e0a0144..823a510f2 100644 --- a/src/r_poly_sky.cpp +++ b/src/r_poly_sky.cpp @@ -58,7 +58,7 @@ void PolySkyDome::Render(const TriMatrix &worldToClip) args.uniforms.subsectorDepth = RenderPolyScene::SkySubsectorDepth; args.objectToClip = &objectToClip; args.stenciltestvalue = 255; - args.stencilwritevalue = 255; + args.stencilwritevalue = 1; args.SetTexture(frontskytex); args.SetColormap(&NormalLight); args.SetClipPlane(0.0f, 0.0f, 0.0f, 0.0f); @@ -84,6 +84,7 @@ void PolySkyDome::RenderRow(PolyDrawArgs &args, int row, uint32_t capcolor) args.ccw = false; args.uniforms.color = capcolor; PolyTriangleDrawer::draw(args, TriDrawVariant::DrawNormal, TriBlendMode::Skycap); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Skycap); } void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int row, bool bottomCap) @@ -98,6 +99,7 @@ void PolySkyDome::RenderCapColorRow(PolyDrawArgs &args, FTexture *skytex, int ro args.ccw = bottomCap; args.uniforms.color = solid; PolyTriangleDrawer::draw(args, TriDrawVariant::FillNormal, TriBlendMode::Copy); + PolyTriangleDrawer::draw(args, TriDrawVariant::Stencil, TriBlendMode::Copy); } void PolySkyDome::CreateDome() diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a97afaead..164fc98e2 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -88,22 +88,19 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian return; auto llvm = Drawers::Instance(); - void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); + PolyDrawFuncPtr setupfunc = nullptr; + PolyDrawFuncPtr drawfunc = nullptr; int bmode = (int)blendmode; switch (variant) { default: - //case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? &ScreenTriangle::DrawFunc : llvm->TriDrawNormal8[bmode]; break; - //case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? &ScreenTriangle::DrawSubsectorFunc : llvm->TriDrawSubsector8[bmode]; break; - //case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; - //case TriDrawVariant::StencilClose: drawfunc = &ScreenTriangle::StencilCloseFunc; break; - case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; - case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; - case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; + case TriDrawVariant::DrawNormal: setupfunc = &ScreenTriangle::SetupNormal; drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; + case TriDrawVariant::FillNormal: setupfunc = &ScreenTriangle::SetupNormal; drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; + case TriDrawVariant::DrawSubsector: setupfunc = &ScreenTriangle::SetupSubsector; drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; case TriDrawVariant::FuzzSubsector: - case TriDrawVariant::FillSubsector: drawfunc = dest_bgra ? llvm->TriFillSubsector32[bmode] : llvm->TriFillSubsector8[bmode]; break; - case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; - case TriDrawVariant::StencilClose: drawfunc = llvm->TriStencilClose; break; + case TriDrawVariant::FillSubsector: setupfunc = &ScreenTriangle::SetupSubsector; drawfunc = dest_bgra ? llvm->TriFillSubsector32[bmode] : llvm->TriFillSubsector8[bmode]; break; + case TriDrawVariant::Stencil: drawfunc = &ScreenTriangle::StencilFunc; break; + case TriDrawVariant::StencilClose: drawfunc = &ScreenTriangle::StencilCloseFunc; break; } TriDrawTriangleArgs args; @@ -139,7 +136,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian { for (int j = 0; j < 3; j++) vert[j] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); } } else if (drawargs.mode == TriangleDrawMode::Fan) @@ -149,7 +146,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); vert[1] = vert[2]; } } @@ -160,7 +157,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian for (int i = 2; i < vcount; i++) { vert[2] = shade_vertex(*drawargs.objectToClip, drawargs.clipPlane, *(vinput++)); - draw_shaded_triangle(vert, ccw, &args, thread, drawfunc); + draw_shaded_triangle(vert, ccw, &args, thread, setupfunc, drawfunc); vert[0] = vert[1]; vert[1] = vert[2]; ccw = !ccw; @@ -179,7 +176,7 @@ ShadedTriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip, return sv; } -void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)) +void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr setupfunc, PolyDrawFuncPtr drawfunc) { // Cull, clip and generate additional vertices as needed TriVertex clippedvert[max_additional_vertices]; @@ -223,6 +220,7 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool args->v1 = &clippedvert[numclipvert - 1]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i - 2]; + if (setupfunc) setupfunc(args, thread); drawfunc(args, thread); } } @@ -233,6 +231,7 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool args->v1 = &clippedvert[0]; args->v2 = &clippedvert[i - 1]; args->v3 = &clippedvert[i]; + if (setupfunc) setupfunc(args, thread); drawfunc(args, thread); } } @@ -952,7 +951,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea uint8_t *stencilBlock = &stencilValues[block * 64]; uint32_t *stencilBlockMask = &stencilMasks[block]; bool blockIsSingleStencil = ((*stencilBlockMask) & 0xffffff00) == 0xffffff00; - bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) != stencilTestValue; + bool skipBlock = blockIsSingleStencil && ((*stencilBlockMask) & 0xff) < stencilTestValue; // Skip block when outside an edge if (a == 0 || b == 0 || c == 0 || skipBlock) @@ -1043,7 +1042,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea for (int ix = 0; ix < q; ix++) { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); mask0 <<= 1; mask0 |= (uint32_t)covered; @@ -1067,7 +1066,7 @@ void ScreenTriangle::SetupSubsector(const TriDrawTriangleArgs *args, WorkerThrea for (int ix = 0; ix < q; ix++) { - bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] == stencilTestValue; + bool passStencilTest = blockIsSingleStencil || stencilBlock[ix + iy * q] >= stencilTestValue; bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && (x + ix) < clipright && (y + iy) < clipbottom && passStencilTest && subsector[ix] >= subsectorDepth); mask1 <<= 1; mask1 |= (uint32_t)covered; @@ -1247,6 +1246,7 @@ void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThrea } } +#if 0 float ScreenTriangle::FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); @@ -1323,11 +1323,10 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thr int32_t varyingPos[TriVertex::NumVarying]; for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] = (int32_t)(blockPosX.Varying[j] * rcpW); + int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); for (int x = 0; x < width; x++) { - int lightpos = 256 - (int)(clamp(shade - MIN(24.0f, globVis * blockPosX.W) / 32.0f, 0.0f, 31.0f / 32.0f) * 256.0f); - blockPosX.W += gradientX.W * 8; for (int j = 0; j < TriVertex::NumVarying; j++) blockPosX.Varying[j] += gradientX.Varying[j] * 8; @@ -1508,18 +1507,7 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thr } } } - -void ScreenTriangle::DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - SetupNormal(args, thread); - Draw(args, thread); -} - -void ScreenTriangle::DrawSubsectorFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) -{ - SetupSubsector(args, thread); - Draw(args, thread); -} +#endif void ScreenTriangle::StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index 14cfb1335..59e52ef66 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -153,6 +153,8 @@ struct TriMatrix float matrix[16]; }; +typedef void(*PolyDrawFuncPtr)(const TriDrawTriangleArgs *, WorkerThreadData *); + class PolyTriangleDrawer { public: @@ -163,7 +165,7 @@ public: private: static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v); static void draw_arrays(const PolyDrawArgs &args, TriDrawVariant variant, TriBlendMode blendmode, WorkerThreadData *thread); - static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *)); + static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr setupfunc, PolyDrawFuncPtr drawfunc); static bool cullhalfspace(float clipdistance1, float clipdistance2, float &t1, float &t2); static void clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert, int &numclipvert); @@ -274,18 +276,11 @@ struct ScreenTriangleStepVariables class ScreenTriangle { public: - static void DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void DrawSubsectorFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilCloseFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread); -private: static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static void Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - - static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); - static float FindGradientY(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2); }; diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp index 3806d5253..f38b42350 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.cpp @@ -32,6 +32,636 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" +void DrawTriangleCodegen::Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data) +{ + this->variant = variant; + this->blendmode = blendmode; + this->truecolor = truecolor; + pixelsize = truecolor ? 4 : 1; + + LoadArgs(args, thread_data); + CalculateGradients(); + DrawFullSpans(); + DrawPartialBlocks(); +} + +void DrawTriangleCodegen::DrawFullSpans() +{ + stack_i.store(SSAInt(0)); + SSAForBlock loop; + SSAInt i = stack_i.load(); + loop.loop_block(i < numSpans, 0); + { + SSAInt spanX = SSAShort(fullSpans[i][0].load(true).v).zext_int(); + SSAInt spanY = SSAShort(fullSpans[i][1].load(true).v).zext_int(); + SSAInt spanLength = fullSpans[i][2].load(true); + + SSAInt width = spanLength; + SSAInt height = SSAInt(8); + + stack_dest.store(destOrg[(spanX + spanY * pitch) * pixelsize]); + stack_subsector.store(subsectorGBuffer[spanX + spanY * pitch]); + stack_posYW.store(start.W + gradientX.W * (spanX - startX) + gradientY.W * (spanY - startY)); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posYVarying[j].store(start.Varying[j] + gradientX.Varying[j] * (spanX - startX) + gradientY.Varying[j] * (spanY - startY)); + stack_y.store(SSAInt(0)); + + SSAForBlock loop_y; + SSAInt y = stack_y.load(); + SSAUBytePtr dest = stack_dest.load(); + SSAIntPtr subsector = stack_subsector.load(); + SSAStepVariables blockPosY; + blockPosY.W = stack_posYW.load(); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = stack_posYVarying[j].load(); + loop_y.loop_block(y < height, 0); + { + stack_posXW.store(blockPosY.W); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posXVarying[j].store(blockPosY.Varying[j]); + + SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosY.W; + stack_lightpos.store(FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosY.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true)); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_varyingPos[j].store(SSAInt(blockPosY.Varying[j] * rcpW, false)); + stack_x.store(SSAInt(0)); + + SSAForBlock loop_x; + SSAInt x = stack_x.load(); + SSAStepVariables blockPosX; + blockPosX.W = stack_posXW.load(); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] = stack_posXVarying[j].load(); + SSAInt lightpos = stack_lightpos.load(); + SSAInt varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = stack_varyingPos[j].load(); + loop_x.loop_block(x < width, 0); + { + blockPosX.W = blockPosX.W + gradientX.W * 8.0f; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f; + + rcpW = SSAFloat((float)0x01000000) / blockPosX.W; + SSAInt varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + SSAInt lightstep = (lightnext - lightpos) / 8; + + for (int ix = 0; ix < 8; ix++) + { + if (truecolor) + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + + SSAUBytePtr destptr = dest[(x * 8 + ix) * 4]; + destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x * 8 + ix].store(subsectorDepth); + } + else + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); + currentcolormap = Colormaps[colormapindex << 8]; + + SSAUBytePtr destptr = dest[(x * 8 + ix)]; + destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x * 8 + ix].store(subsectorDepth); + } + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } + + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_varyingPos[j].store(varyingPos[j]); + stack_lightpos.store(lightpos); + stack_posXW.store(blockPosX.W); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posXVarying[j].store(blockPosX.Varying[j]); + stack_x.store(x + 1); + } + loop_x.end_block(); + + stack_posYW.store(blockPosY.W + gradientY.W); + for (int j = 0; j < TriVertex::NumVarying; j++) + stack_posYVarying[j].store(blockPosY.Varying[j] + gradientY.Varying[j]); + stack_dest.store(dest[pitch * pixelsize]); + stack_subsector.store(subsector[pitch]); + stack_y.store(y + 1); + } + loop_y.end_block(); + + stack_i.store(i + 1); + } + loop.end_block(); +} + +void DrawTriangleCodegen::DrawPartialBlocks() +{ + stack_i.store(SSAInt(0)); + SSAForBlock loop; + SSAInt i = stack_i.load(); + loop.loop_block(i < numBlocks, 0); + { + SSAInt blockX = SSAShort(partialBlocks[i][0].load(true).v).zext_int(); + SSAInt blockY = SSAShort(partialBlocks[i][1].load(true).v).zext_int(); + SSAInt mask0 = partialBlocks[i][2].load(true); + SSAInt mask1 = partialBlocks[i][3].load(true); + + SSAUBytePtr dest = destOrg[(blockX + blockY * pitch) * pixelsize]; + SSAIntPtr subsector = subsectorGBuffer[blockX + blockY * pitch]; + + SSAStepVariables blockPosY; + blockPosY.W = start.W + gradientX.W * (blockX - startX) + gradientY.W * (blockY - startY); + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = start.Varying[j] + gradientX.Varying[j] * (blockX - startX) + gradientY.Varying[j] * (blockY - startY); + + for (int maskNum = 0; maskNum < 2; maskNum++) + { + SSAInt mask = (maskNum == 0) ? mask0 : mask1; + + for (int y = 0; y < 4; y++) + { + SSAStepVariables blockPosX = blockPosY; + + SSAFloat rcpW = SSAFloat((float)0x01000000) / blockPosX.W; + SSAInt varyingPos[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = SSAInt(blockPosX.Varying[j] * rcpW, false); + + SSAInt lightpos = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + + blockPosX.W = blockPosX.W + gradientX.W * 8.0f; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosX.Varying[j] = blockPosX.Varying[j] + gradientX.Varying[j] * 8.0f; + + rcpW = SSAFloat((float)0x01000000) / blockPosX.W; + SSAInt varyingStep[TriVertex::NumVarying]; + for (int j = 0; j < TriVertex::NumVarying; j++) + { + SSAInt nextPos = SSAInt(blockPosX.Varying[j] * rcpW, false); + varyingStep[j] = (nextPos - varyingPos[j]) / 8; + } + + SSAInt lightnext = FRACUNIT - SSAInt(SSAFloat::clamp(shade - SSAFloat::MIN(SSAFloat(24.0f), globVis * blockPosX.W) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f)) * (float)FRACUNIT, true); + SSAInt lightstep = (lightnext - lightpos) / 8; + + for (int x = 0; x < 8; x++) + { + SSABool covered = !((mask & (1 << (31 - y * 8 - x))) == SSAInt(0)); + SSAIfBlock branch; + branch.if_block(covered); + { + if (truecolor) + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + + SSAUBytePtr destptr = dest[x * 4]; + destptr.store_vec4ub(ProcessPixel32(destptr.load_vec4ub(false), varyingPos)); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x].store(subsectorDepth); + } + else + { + currentlight = is_fixed_light.select(light, lightpos >> 8); + SSAInt colormapindex = SSAInt::MIN((256 - currentlight) * 32 / 256, SSAInt(31)); + currentcolormap = Colormaps[colormapindex << 8]; + + SSAUBytePtr destptr = dest[x]; + destptr.store(ProcessPixel8(destptr.load(false).zext_int(), varyingPos).trunc_ubyte()); + + if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector) + subsector[x].store(subsectorDepth); + } + } + branch.end_block(); + + for (int j = 0; j < TriVertex::NumVarying; j++) + varyingPos[j] = varyingPos[j] + varyingStep[j]; + lightpos = lightpos + lightstep; + } + + blockPosY.W = blockPosY.W + gradientY.W; + for (int j = 0; j < TriVertex::NumVarying; j++) + blockPosY.Varying[j] = blockPosY.Varying[j] + gradientY.Varying[j]; + + dest = dest[pitch * pixelsize]; + subsector = subsector[pitch]; + } + } + + stack_i.store(i + 1); + } + loop.end_block(); +} + +SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying) +{ + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return translation[color * 4].load_vec4ub(true); + else + return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); +} + +SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) +{ + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return translation[color].load(true).zext_int(); + else + return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); +} + +SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) +{ + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return SSAVec4i::unpack(color); + + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAVec4i nearest; + SSAVec4i linear; + + { + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + nearest = texturePixels[uvoffset * 4].load_vec4ub(true); + } + + return nearest; + + /* + { + SSAInt uone = (SSAInt(0x01000000) / textureWidth) << 8; + SSAInt vone = (SSAInt(0x01000000) / textureHeight) << 8; + + ufrac = ufrac - (uone >> 1); + vfrac = vfrac - (vone >> 1); + + SSAInt frac_x0 = (ufrac >> FRACBITS) * textureWidth; + SSAInt frac_x1 = ((ufrac + uone) >> FRACBITS) * textureWidth; + SSAInt frac_y0 = (vfrac >> FRACBITS) * textureHeight; + SSAInt frac_y1 = ((vfrac + vone) >> FRACBITS) * textureHeight; + + SSAInt x0 = frac_x0 >> FRACBITS; + SSAInt x1 = frac_x1 >> FRACBITS; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = texturePixels[(x0 * textureHeight + y0) * 4].load_vec4ub(true); + SSAVec4i p01 = texturePixels[(x0 * textureHeight + y1) * 4].load_vec4ub(true); + SSAVec4i p10 = texturePixels[(x1 * textureHeight + y0) * 4].load_vec4ub(true); + SSAVec4i p11 = texturePixels[(x1 * textureHeight + y1) * 4].load_vec4ub(true); + + SSAInt inv_b = (frac_x1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; + } + + return AffineLinear.select(linear, nearest); + */ +} + +SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying) +{ + SSAInt ufrac = varying[0] << 8; + SSAInt vfrac = varying[1] << 8; + + SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; + SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; + SSAInt uvoffset = upos * textureHeight + vpos; + + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return color; + else + return texturePixels[uvoffset].load(true).zext_int(); +} + +SSAInt DrawTriangleCodegen::Shade8(SSAInt c) +{ + return currentcolormap[c].load(true).zext_int(); +} + +SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) +{ + SSAVec4i fg; + SSAVec4i output; + + switch (blendmode) + { + default: + case TriBlendMode::Copy: + fg = Sample32(varying); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; + case TriBlendMode::AlphaBlend: + fg = Sample32(varying); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + break; + case TriBlendMode::AddSolid: + fg = Sample32(varying); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); + break; + case TriBlendMode::Add: + fg = Sample32(varying); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::Sub: + fg = Sample32(varying); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::RevSub: + fg = Sample32(varying); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::Stencil: + fg = Sample32(varying); + output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); + break; + case TriBlendMode::Shaded: + output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(varying), bg, srcalpha, destalpha); + break; + case TriBlendMode::TranslateCopy: + fg = TranslateSample32(varying); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; + case TriBlendMode::TranslateAlphaBlend: + fg = TranslateSample32(varying); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + break; + case TriBlendMode::TranslateAdd: + fg = TranslateSample32(varying); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateSub: + fg = TranslateSample32(varying); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::TranslateRevSub: + fg = TranslateSample32(varying); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; + case TriBlendMode::AddSrcColorOneMinusSrcColor: + fg = Sample32(varying); + output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); + break; + case TriBlendMode::Skycap: + fg = Sample32(varying); + output = FadeOut(varying[1], fg); + break; + } + + return output; +} + +SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) +{ + SSAVec4i c = BaseColors[index * 4].load_vec4ub(true); + c = c.insert(3, 255); + return c; +} + +SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) +{ + return RGB32k[((c[2] >> 3) * 32 + (c[1] >> 3)) * 32 + (c[0] >> 3)].load(true).zext_int(); +} + +SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) +{ + SSAVec4i fg; + SSAInt alpha, inv_alpha; + SSAInt output; + SSAInt palindex; + + switch (blendmode) + { + default: + case TriBlendMode::Copy: + output = Shade8(Sample8(varying)); + break; + case TriBlendMode::AlphaBlend: + palindex = Sample8(varying); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::AddSolid: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Add: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Sub: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::RevSub: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Stencil: + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(varying) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); + break; + case TriBlendMode::Shaded: + palindex = Sample8(varying); + output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha)); + break; + case TriBlendMode::TranslateCopy: + palindex = TranslateSample8(varying); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateAlphaBlend: + palindex = TranslateSample8(varying); + output = Shade8(palindex); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateAdd: + palindex = TranslateSample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateSub: + palindex = TranslateSample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::TranslateRevSub: + palindex = TranslateSample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::AddSrcColorOneMinusSrcColor: + palindex = Sample8(varying); + fg = ToBgra(Shade8(palindex)); + output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); + output = (palindex == SSAInt(0)).select(bg, output); + break; + case TriBlendMode::Skycap: + fg = ToBgra(Sample8(varying)); + output = ToPal8(FadeOut(varying[1], fg)); + break; + } + + return output; +} + +SSAVec4i DrawTriangleCodegen::FadeOut(SSAInt frac, SSAVec4i fg) +{ + int start_fade = 2; // How fast it should fade out + + SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); + SSAInt alpha = SSAInt::MIN(alpha_top, alpha_bottom); + SSAInt inv_alpha = 256 - alpha; + + fg = (fg * alpha + SSAVec4i::unpack(color) * inv_alpha) / 256; + return fg.insert(3, 255); +} + +void DrawTriangleCodegen::CalculateGradients() +{ + gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); + start.W = v1.w + gradientX.W * (SSAFloat(startX) - v1.x) + gradientY.W * (SSAFloat(startY) - v1.y); + for (int i = 0; i < TriVertex::NumVarying; i++) + { + gradientX.Varying[i] = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + gradientY.Varying[i] = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w); + start.Varying[i] = v1.varying[i] * v1.w + gradientX.Varying[i] * (SSAFloat(startX) - v1.x) + gradientY.Varying[i] * (SSAFloat(startY) - v1.y); + } + + shade = (64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f) / 32.0f; + globVis = SSAFloat(1706.0f); +} + +void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) +{ + destOrg = args[0][0].load(true); + pitch = args[0][1].load(true); + v1 = LoadTriVertex(args[0][2].load(true)); + v2 = LoadTriVertex(args[0][3].load(true)); + v3 = LoadTriVertex(args[0][4].load(true)); + texturePixels = args[0][9].load(true); + textureWidth = args[0][10].load(true); + textureHeight = args[0][11].load(true); + translation = args[0][12].load(true); + LoadUniforms(args[0][13].load(true)); + subsectorGBuffer = args[0][19].load(true); + if (!truecolor) + { + Colormaps = args[0][20].load(true); + RGB32k = args[0][21].load(true); + BaseColors = args[0][22].load(true); + } + + fullSpans = thread_data[0][5].load(true); + partialBlocks = thread_data[0][6].load(true); + numSpans = thread_data[0][7].load(true); + numBlocks = thread_data[0][8].load(true); + startX = thread_data[0][9].load(true); + startY = thread_data[0][10].load(true); +} + +SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) +{ + SSATriVertex v; + v.x = ptr[0][0].load(true); + v.y = ptr[0][1].load(true); + v.z = ptr[0][2].load(true); + v.w = ptr[0][3].load(true); + for (int i = 0; i < TriVertex::NumVarying; i++) + v.varying[i] = ptr[0][4 + i].load(true); + return v; +} + +void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) +{ + light = uniforms[0][0].load(true); + subsectorDepth = uniforms[0][1].load(true); + color = uniforms[0][2].load(true); + srcalpha = uniforms[0][3].load(true); + destalpha = uniforms[0][4].load(true); + + SSAShort light_alpha = uniforms[0][5].load(true); + SSAShort light_red = uniforms[0][6].load(true); + SSAShort light_green = uniforms[0][7].load(true); + SSAShort light_blue = uniforms[0][8].load(true); + SSAShort fade_alpha = uniforms[0][9].load(true); + SSAShort fade_red = uniforms[0][10].load(true); + SSAShort fade_green = uniforms[0][11].load(true); + SSAShort fade_blue = uniforms[0][12].load(true); + SSAShort desaturate = uniforms[0][13].load(true); + SSAInt flags = uniforms[0][14].load(true); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade); + is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter); + is_fixed_light = (flags & TriUniforms::fixed_light) == SSAInt(TriUniforms::fixed_light); +} + +SSAFloat DrawTriangleCodegen::FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) +{ + SSAFloat top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); + SSAFloat bottom = (x1 - x2) * (y0 - y2) - (x0 - x2) * (y1 - y2); + return top / bottom; +} + +SSAFloat DrawTriangleCodegen::FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2) +{ + SSAFloat top = (c1 - c2) * (x0 - x2) - (c0 - c2) * (x1 - x2); + SSAFloat bottom = (x0 - x2) * (y1 - y2) - (x1 - x2) * (y0 - y2); + return top / bottom; +} + + +#if 0 + void DrawTriangleCodegen::Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data) { this->variant = variant; @@ -742,297 +1372,6 @@ void DrawTriangleCodegen::LoopMaskedStoreBlock() } #endif -SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return translation[color * 4].load_vec4ub(true); - else - return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); -} - -SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return translation[color].load(true).zext_int(); - else - return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); -} - -SSAVec4i DrawTriangleCodegen::Sample32(SSAInt *varying) -{ - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return SSAVec4i::unpack(color); - - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAVec4i nearest; - SSAVec4i linear; - - { - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - nearest = texturePixels[uvoffset * 4].load_vec4ub(true); - } - - { - SSAInt uone = (SSAInt(0x01000000) / textureWidth) << 8; - SSAInt vone = (SSAInt(0x01000000) / textureHeight) << 8; - - ufrac = ufrac - (uone >> 1); - vfrac = vfrac - (vone >> 1); - - SSAInt frac_x0 = (ufrac >> FRACBITS) * textureWidth; - SSAInt frac_x1 = ((ufrac + uone) >> FRACBITS) * textureWidth; - SSAInt frac_y0 = (vfrac >> FRACBITS) * textureHeight; - SSAInt frac_y1 = ((vfrac + vone) >> FRACBITS) * textureHeight; - - SSAInt x0 = frac_x0 >> FRACBITS; - SSAInt x1 = frac_x1 >> FRACBITS; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = texturePixels[(x0 * textureHeight + y0) * 4].load_vec4ub(true); - SSAVec4i p01 = texturePixels[(x0 * textureHeight + y1) * 4].load_vec4ub(true); - SSAVec4i p10 = texturePixels[(x1 * textureHeight + y0) * 4].load_vec4ub(true); - SSAVec4i p11 = texturePixels[(x1 * textureHeight + y1) * 4].load_vec4ub(true); - - SSAInt inv_b = (frac_x1 >> (FRACBITS - 4)) & 15; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - linear = (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; - } - - return AffineLinear.select(linear, nearest); -} - -SSAInt DrawTriangleCodegen::Sample8(SSAInt *varying) -{ - SSAInt ufrac = varying[0] << 8; - SSAInt vfrac = varying[1] << 8; - - SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16; - SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; - SSAInt uvoffset = upos * textureHeight + vpos; - - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) - return color; - else - return texturePixels[uvoffset].load(true).zext_int(); -} - -SSAInt DrawTriangleCodegen::Shade8(SSAInt c) -{ - return currentcolormap[c].load(true).zext_int(); -} - -SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) -{ - SSAVec4i fg; - SSAVec4i output; - - switch (blendmode) - { - default: - case TriBlendMode::Copy: - fg = Sample32(varying); - output = blend_copy(shade_bgra_simple(fg, currentlight)); - break; - case TriBlendMode::AlphaBlend: - fg = Sample32(varying); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); - break; - case TriBlendMode::AddSolid: - fg = Sample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); - break; - case TriBlendMode::Add: - fg = Sample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::Sub: - fg = Sample32(varying); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::RevSub: - fg = Sample32(varying); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::Stencil: - fg = Sample32(varying); - output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), fg[3], bg, srcalpha, destalpha); - break; - case TriBlendMode::Shaded: - output = blend_stencil(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), Sample8(varying), bg, srcalpha, destalpha); - break; - case TriBlendMode::TranslateCopy: - fg = TranslateSample32(varying); - output = blend_copy(shade_bgra_simple(fg, currentlight)); - break; - case TriBlendMode::TranslateAlphaBlend: - fg = TranslateSample32(varying); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); - break; - case TriBlendMode::TranslateAdd: - fg = TranslateSample32(varying); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateSub: - fg = TranslateSample32(varying); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::TranslateRevSub: - fg = TranslateSample32(varying); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); - break; - case TriBlendMode::AddSrcColorOneMinusSrcColor: - fg = Sample32(varying); - output = blend_add_srccolor_oneminussrccolor(shade_bgra_simple(fg, currentlight), bg); - break; - case TriBlendMode::Skycap: - fg = Sample32(varying); - output = FadeOut(varying[1], fg); - break; - } - - return output; -} - -SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) -{ - SSAVec4i c = BaseColors[index * 4].load_vec4ub(true); - c = c.insert(3, 255); - return c; -} - -SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) -{ - return RGB32k[((c[2] >> 3) * 32 + (c[1] >> 3)) * 32 + (c[0] >> 3)].load(true).zext_int(); -} - -SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) -{ - SSAVec4i fg; - SSAInt alpha, inv_alpha; - SSAInt output; - SSAInt palindex; - - switch (blendmode) - { - default: - case TriBlendMode::Copy: - output = Shade8(Sample8(varying)); - break; - case TriBlendMode::AlphaBlend: - palindex = Sample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::AddSolid: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Add: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Sub: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::RevSub: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Stencil: - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), (Sample8(varying) == SSAInt(0)).select(SSAInt(0), SSAInt(256)), ToBgra(bg), srcalpha, destalpha)); - break; - case TriBlendMode::Shaded: - palindex = Sample8(varying); - output = ToPal8(blend_stencil(ToBgra(Shade8(color)), palindex, ToBgra(bg), srcalpha, destalpha)); - break; - case TriBlendMode::TranslateCopy: - palindex = TranslateSample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateAlphaBlend: - palindex = TranslateSample8(varying); - output = Shade8(palindex); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateAdd: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateSub: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::TranslateRevSub: - palindex = TranslateSample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::AddSrcColorOneMinusSrcColor: - palindex = Sample8(varying); - fg = ToBgra(Shade8(palindex)); - output = ToPal8(blend_add_srccolor_oneminussrccolor(fg, ToBgra(bg))); - output = (palindex == SSAInt(0)).select(bg, output); - break; - case TriBlendMode::Skycap: - fg = ToBgra(Sample8(varying)); - output = ToPal8(FadeOut(varying[1], fg)); - break; - } - - return output; -} - -SSAVec4i DrawTriangleCodegen::FadeOut(SSAInt frac, SSAVec4i fg) -{ - int start_fade = 2; // How fast it should fade out - - SSAInt alpha_top = SSAInt::MAX(SSAInt::MIN(frac.ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt alpha_bottom = SSAInt::MAX(SSAInt::MIN(((2 << 24) - frac).ashr(16 - start_fade), SSAInt(256)), SSAInt(0)); - SSAInt alpha = SSAInt::MIN(alpha_top, alpha_bottom); - SSAInt inv_alpha = 256 - alpha; - - fg = (fg * alpha + SSAVec4i::unpack(color) * inv_alpha) / 256; - return fg.insert(3, 255); -} - void DrawTriangleCodegen::SetStencilBlock(SSAInt block) { StencilBlock = stencilValues[block * 64]; @@ -1087,41 +1426,4 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) thread.pass_end_y = SSAInt(32000); } -SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr) -{ - SSATriVertex v; - v.x = ptr[0][0].load(true); - v.y = ptr[0][1].load(true); - v.z = ptr[0][2].load(true); - v.w = ptr[0][3].load(true); - for (int i = 0; i < TriVertex::NumVarying; i++) - v.varying[i] = ptr[0][4 + i].load(true); - return v; -} - -void DrawTriangleCodegen::LoadUniforms(SSAValue uniforms) -{ - light = uniforms[0][0].load(true); - subsectorDepth = uniforms[0][1].load(true); - color = uniforms[0][2].load(true); - srcalpha = uniforms[0][3].load(true); - destalpha = uniforms[0][4].load(true); - - SSAShort light_alpha = uniforms[0][5].load(true); - SSAShort light_red = uniforms[0][6].load(true); - SSAShort light_green = uniforms[0][7].load(true); - SSAShort light_blue = uniforms[0][8].load(true); - SSAShort fade_alpha = uniforms[0][9].load(true); - SSAShort fade_red = uniforms[0][10].load(true); - SSAShort fade_green = uniforms[0][11].load(true); - SSAShort fade_blue = uniforms[0][12].load(true); - SSAShort desaturate = uniforms[0][13].load(true); - SSAInt flags = uniforms[0][14].load(true); - shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); - shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); - shade_constants.desaturate = desaturate.zext_int(); - - is_simple_shade = (flags & TriUniforms::simple_shade) == SSAInt(TriUniforms::simple_shade); - is_nearest_filter = (flags & TriUniforms::nearest_filter) == SSAInt(TriUniforms::nearest_filter); - is_fixed_light = (flags & TriUniforms::fixed_light) == SSAInt(TriUniforms::fixed_light); -} +#endif diff --git a/tools/drawergen/fixedfunction/drawtrianglecodegen.h b/tools/drawergen/fixedfunction/drawtrianglecodegen.h index 81a5e57d2..02db1451c 100644 --- a/tools/drawergen/fixedfunction/drawtrianglecodegen.h +++ b/tools/drawergen/fixedfunction/drawtrianglecodegen.h @@ -30,6 +30,91 @@ struct SSATriVertex SSAFloat varying[TriVertex::NumVarying]; }; +struct SSAStepVariables +{ + SSAFloat W; + SSAFloat Varying[TriVertex::NumVarying]; +}; + +class DrawTriangleCodegen : public DrawerCodegen +{ +public: + void Generate(TriDrawVariant variant, TriBlendMode blendmode, bool truecolor, SSAValue args, SSAValue thread_data); + +private: + void LoadArgs(SSAValue args, SSAValue thread_data); + SSATriVertex LoadTriVertex(SSAValue v); + void LoadUniforms(SSAValue uniforms); + void CalculateGradients(); + SSAFloat FindGradientX(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); + SSAFloat FindGradientY(SSAFloat x0, SSAFloat y0, SSAFloat x1, SSAFloat y1, SSAFloat x2, SSAFloat y2, SSAFloat c0, SSAFloat c1, SSAFloat c2); + void DrawFullSpans(); + void DrawPartialBlocks(); + + SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); + SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); + SSAVec4i TranslateSample32(SSAInt *varying); + SSAInt TranslateSample8(SSAInt *varying); + SSAVec4i Sample32(SSAInt *varying); + SSAInt Sample8(SSAInt *varying); + SSAInt Shade8(SSAInt c); + SSAVec4i ToBgra(SSAInt index); + SSAInt ToPal8(SSAVec4i c); + SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); + + SSAStack stack_i, stack_y, stack_x; + SSAStack stack_posYW, stack_posXW; + SSAStack stack_posYVarying[TriVertex::NumVarying]; + SSAStack stack_posXVarying[TriVertex::NumVarying]; + SSAStack stack_varyingPos[TriVertex::NumVarying]; + SSAStack stack_lightpos; + SSAStack stack_dest; + SSAStack stack_subsector; + + SSAStepVariables gradientX, gradientY, start; + SSAFloat shade, globVis; + + SSAInt currentlight; + SSAUBytePtr currentcolormap; + + SSAUBytePtr destOrg; + SSAIntPtr subsectorGBuffer; + SSAInt pitch; + SSATriVertex v1; + SSATriVertex v2; + SSATriVertex v3; + SSAUBytePtr texturePixels; + SSAInt textureWidth; + SSAInt textureHeight; + SSAUBytePtr translation; + SSAInt color, srcalpha, destalpha; + + SSAInt light; + SSAInt subsectorDepth; + SSAShadeConstants shade_constants; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSABool is_fixed_light; + + SSAUBytePtr Colormaps; + SSAUBytePtr RGB32k; + SSAUBytePtr BaseColors; + + SSAInt numSpans; + SSAInt numBlocks; + SSAInt startX; + SSAInt startY; + SSAValue fullSpans; // TriFullSpan[] + SSAValue partialBlocks; // TriPartialBlock[] + + TriDrawVariant variant; + TriBlendMode blendmode; + bool truecolor; + int pixelsize; +}; + +#if 0 + class DrawTriangleCodegen : public DrawerCodegen { public: @@ -154,3 +239,5 @@ private: SSAUBytePtr StencilBlock; SSAIntPtr StencilBlockMask; }; + +#endif diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 49eeb5a46..83f1b1416 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -204,6 +204,9 @@ void LLVMDrawers::CodegenDrawTriangle(const std::string &name, TriDrawVariant va llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) { + if (DrawColumnArgsStruct) + return DrawColumnArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; @@ -233,11 +236,15 @@ llvm::Type *LLVMDrawers::GetDrawColumnArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); + DrawColumnArgsStruct = llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo(); + return DrawColumnArgsStruct; } llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { + if (DrawSpanArgsStruct) + return DrawSpanArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; @@ -264,11 +271,15 @@ llvm::Type *LLVMDrawers::GetDrawSpanArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); + DrawSpanArgsStruct = llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo(); + return DrawSpanArgsStruct; } llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) { + if (DrawWallArgsStruct) + return DrawWallArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); for (int i = 0; i < 8; i++) @@ -285,47 +296,71 @@ llvm::Type *LLVMDrawers::GetDrawWallArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); + DrawWallArgsStruct = llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo(); + return DrawWallArgsStruct; } llvm::Type *LLVMDrawers::GetDrawSkyArgsStruct(llvm::LLVMContext &context) { + if (DrawSkyArgsStruct) + return DrawSkyArgsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); for (int i = 0; i < 8; i++) elements.push_back(llvm::Type::getInt8PtrTy(context)); for (int i = 0; i < 15; i++) elements.push_back(llvm::Type::getInt32Ty(context)); - return llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); + DrawSkyArgsStruct = llvm::StructType::create(context, elements, "DrawSkyArgs", false)->getPointerTo(); + return DrawSkyArgsStruct; } llvm::Type *LLVMDrawers::GetWorkerThreadDataStruct(llvm::LLVMContext &context) { + if (WorkerThreadDataStruct) + return WorkerThreadDataStruct; + std::vector elements; for (int i = 0; i < 4; i++) elements.push_back(llvm::Type::getInt32Ty(context)); elements.push_back(llvm::Type::getInt8PtrTy(context)); - return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); + elements.push_back(GetTriFullSpanStruct(context)); + elements.push_back(GetTriPartialBlockStruct(context)); + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + WorkerThreadDataStruct = llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo(); + return WorkerThreadDataStruct; } llvm::Type *LLVMDrawers::GetTriVertexStruct(llvm::LLVMContext &context) { + if (TriVertexStruct) + return TriVertexStruct; + std::vector elements; for (int i = 0; i < 4 + TriVertex::NumVarying; i++) elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); + TriVertexStruct = llvm::StructType::create(context, elements, "TriVertex", false)->getPointerTo(); + return TriVertexStruct; } llvm::Type *LLVMDrawers::GetTriMatrixStruct(llvm::LLVMContext &context) { + if (TriMatrixStruct) + return TriMatrixStruct; + std::vector elements; for (int i = 0; i < 4 * 4; i++) elements.push_back(llvm::Type::getFloatTy(context)); - return llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); + TriMatrixStruct = llvm::StructType::create(context, elements, "TriMatrix", false)->getPointerTo(); + return TriMatrixStruct; } llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) { + if (TriUniformsStruct) + return TriUniformsStruct; + std::vector elements; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t subsectorDepth; @@ -343,11 +378,42 @@ llvm::Type *LLVMDrawers::GetTriUniformsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; elements.push_back(GetTriMatrixStruct(context)); // TriMatrix objectToClip - return llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); + TriUniformsStruct = llvm::StructType::create(context, elements, "TriUniforms", false)->getPointerTo(); + return TriUniformsStruct; +} + +llvm::Type *LLVMDrawers::GetTriFullSpanStruct(llvm::LLVMContext &context) +{ + if (TriFullSpanStruct) + return TriFullSpanStruct; + + std::vector elements; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Length; + TriFullSpanStruct = llvm::StructType::create(context, elements, "TriFullSpan", false)->getPointerTo(); + return TriFullSpanStruct; +} + +llvm::Type *LLVMDrawers::GetTriPartialBlockStruct(llvm::LLVMContext &context) +{ + if (TriPartialBlockStruct) + return TriPartialBlockStruct; + + std::vector elements; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t X; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint32_t Y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask0; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t Mask1; + TriPartialBlockStruct = llvm::StructType::create(context, elements, "TriPartialBlock", false)->getPointerTo(); + return TriPartialBlockStruct; } llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) { + if (TriDrawTriangleArgs) + return TriDrawTriangleArgs; + std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *dest; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; @@ -372,5 +438,6 @@ llvm::Type *LLVMDrawers::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; - return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); + TriDrawTriangleArgs = llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); + return TriDrawTriangleArgs; } diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index 3eef605df..df6078f4d 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -53,15 +53,29 @@ private: void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); void CodegenDrawTriangle(const std::string &name, TriDrawVariant variant, TriBlendMode blendmode, bool truecolor); - static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); - static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); - static llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); + llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetDrawSkyArgsStruct(llvm::LLVMContext &context); + llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); + llvm::Type *GetTriVertexStruct(llvm::LLVMContext &context); + llvm::Type *GetTriMatrixStruct(llvm::LLVMContext &context); + llvm::Type *GetTriUniformsStruct(llvm::LLVMContext &context); + llvm::Type *GetTriFullSpanStruct(llvm::LLVMContext &context); + llvm::Type *GetTriPartialBlockStruct(llvm::LLVMContext &context); + llvm::Type *GetTriDrawTriangleArgs(llvm::LLVMContext &context); + + llvm::Type *DrawColumnArgsStruct = nullptr; + llvm::Type *DrawSpanArgsStruct = nullptr; + llvm::Type *DrawWallArgsStruct = nullptr; + llvm::Type *DrawSkyArgsStruct = nullptr; + llvm::Type *WorkerThreadDataStruct = nullptr; + llvm::Type *TriVertexStruct = nullptr; + llvm::Type *TriMatrixStruct = nullptr; + llvm::Type *TriUniformsStruct = nullptr; + llvm::Type *TriFullSpanStruct = nullptr; + llvm::Type *TriPartialBlockStruct = nullptr; + llvm::Type *TriDrawTriangleArgs = nullptr; LLVMProgram mProgram; std::string mNamePostfix;