From 4b18530047085cf480b873198dda3010037c4854 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 5 Nov 2016 11:29:50 +0100 Subject: [PATCH] Add linear filtering to column drawers and fix offsetting bug with wall/span linear filters --- .../fixedfunction/drawcolumncodegen.cpp | 142 +++++++++++++----- .../fixedfunction/drawcolumncodegen.h | 13 +- .../fixedfunction/drawercodegen.cpp | 42 ------ src/r_compiler/fixedfunction/drawercodegen.h | 4 - .../fixedfunction/drawspancodegen.cpp | 28 +++- .../fixedfunction/drawspancodegen.h | 1 + .../fixedfunction/drawwallcodegen.cpp | 25 ++- .../fixedfunction/drawwallcodegen.h | 1 + src/r_compiler/llvmdrawers.cpp | 3 + src/r_compiler/llvmdrawers.h | 6 +- src/r_draw.cpp | 6 +- src/r_draw_rgba.cpp | 5 + src/r_drawt_rgba.cpp | 3 + src/r_segs.cpp | 1 + src/r_things.cpp | 125 +++++++++++++++ 15 files changed, 310 insertions(+), 95 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 0ed58b943..45a75cdcb 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -36,30 +36,35 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met { dest = args[0][0].load(true); source = args[0][1].load(true); - colormap = args[0][2].load(true); - translation = args[0][3].load(true); - basecolors = args[0][4].load(true); - pitch = args[0][5].load(true); - count = args[0][6].load(true); - dest_y = args[0][7].load(true); + source2 = args[0][2].load(true); + colormap = args[0][3].load(true); + translation = args[0][4].load(true); + basecolors = args[0][5].load(true); + pitch = args[0][6].load(true); + count = args[0][7].load(true); + dest_y = args[0][8].load(true); if (method == DrawColumnMethod::Normal) - iscale = args[0][8].load(true); - texturefrac = args[0][9].load(true); - light = args[0][10].load(true); - color = SSAVec4i::unpack(args[0][11].load(true)); - srccolor = SSAVec4i::unpack(args[0][12].load(true)); - srcalpha = args[0][13].load(true); - destalpha = args[0][14].load(true); - SSAShort light_alpha = args[0][15].load(true); - SSAShort light_red = args[0][16].load(true); - SSAShort light_green = args[0][17].load(true); - SSAShort light_blue = args[0][18].load(true); - SSAShort fade_alpha = args[0][19].load(true); - SSAShort fade_red = args[0][20].load(true); - SSAShort fade_green = args[0][21].load(true); - SSAShort fade_blue = args[0][22].load(true); - SSAShort desaturate = args[0][23].load(true); - SSAInt flags = args[0][24].load(true); + { + iscale = args[0][9].load(true); + texturefracx = args[0][10].load(true); + textureheight = args[0][11].load(true); + } + texturefrac = args[0][12].load(true); + light = args[0][13].load(true); + color = SSAVec4i::unpack(args[0][14].load(true)); + srccolor = SSAVec4i::unpack(args[0][15].load(true)); + srcalpha = args[0][16].load(true); + destalpha = args[0][17].load(true); + SSAShort light_alpha = args[0][18].load(true); + SSAShort light_red = args[0][19].load(true); + SSAShort light_green = args[0][20].load(true); + SSAShort light_blue = args[0][21].load(true); + SSAShort fade_alpha = args[0][22].load(true); + SSAShort fade_red = args[0][23].load(true); + SSAShort fade_green = args[0][24].load(true); + SSAShort fade_blue = args[0][25].load(true); + SSAShort desaturate = args[0][26].load(true); + SSAInt flags = args[0][27].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); @@ -71,6 +76,7 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met thread.temp = thread_data[0][4].load(true); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); + is_nearest_filter = (flags & DrawColumnArgs::nearest_filter) == SSAInt(DrawColumnArgs::nearest_filter); count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); @@ -79,21 +85,40 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met { stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); iscale = iscale * thread.num_cores; + one = (1 << 30) / textureheight; + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + LoopShade(variant, method, true); + branch.else_block(); + LoopShade(variant, method, false); + branch.end_block(); } else { source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4]; - } + SSAIfBlock branch; + branch.if_block(is_simple_shade); + Loop(variant, method, true, true); + branch.else_block(); + Loop(variant, method, false, true); + branch.end_block(); + } +} + +void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) +{ SSAIfBlock branch; - branch.if_block(is_simple_shade); - Loop(variant, method, true); + branch.if_block(is_nearest_filter); + Loop(variant, method, isSimpleShade, true); branch.else_block(); - Loop(variant, method, false); + stack_frac.store(stack_frac.load() - (one >> 1)); + Loop(variant, method, isSimpleShade, false); branch.end_block(); } -void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) +void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) { SSAInt sincr; if (method != DrawColumnMethod::Normal) @@ -109,9 +134,10 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, if (method == DrawColumnMethod::Normal) { frac = stack_frac.load(); - sample_index = frac >> FRACBITS; - if (!IsPaletteInput(variant)) - sample_index = sample_index * 4; + if (IsPaletteInput(variant)) + sample_index = frac >> FRACBITS; + else + sample_index = frac; } else { @@ -140,7 +166,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, SSAVec4i outcolor[4]; for (int i = 0; i < numColumns; i++) - outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, isSimpleShade); + outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, method, isSimpleShade, isNearestFilter); if (numColumns == 4) { @@ -186,7 +212,7 @@ bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) } } -SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) +SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) { SSAInt alpha, inv_alpha; SSAVec4i fg; @@ -194,22 +220,22 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, { default: case DrawColumnVariant::DrawCopy: - return blend_copy(Sample(sample_index)); + return blend_copy(Sample(sample_index, method, isNearestFilter)); case DrawColumnVariant::Draw: - return blend_copy(Shade(Sample(sample_index), isSimpleShade)); + return blend_copy(Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade)); case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAddClamp: - fg = Shade(Sample(sample_index), isSimpleShade); + fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawShaded: alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; inv_alpha = 256 - alpha; return blend_add(color, bgcolor, alpha, inv_alpha); case DrawColumnVariant::DrawSubClamp: - fg = Shade(Sample(sample_index), isSimpleShade); + fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawRevSubClamp: - fg = Shade(Sample(sample_index), isSimpleShade); + fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawTranslated: return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); @@ -285,9 +311,45 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo } } -SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index) +SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter) { - return source[sample_index].load_vec4ub(true); + if (method == DrawColumnMethod::Normal) + { + if (isNearestFilter) + { + SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index * 4].load_vec4ub(false); + } + else + { + return SampleLinear(source, source2, texturefracx, frac, one, textureheight); + } + } + else + { + return source[frac].load_vec4ub(true); + } +} + +SSAVec4i DrawColumnCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + // Clamp to edge + SSAInt frac_y0 = (SSAInt::MAX(SSAInt::MIN(texturefracy, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height; + SSAInt frac_y1 = (SSAInt::MAX(SSAInt::MIN(texturefracy + one, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); + + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; } SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index ba453a076..2c44edc5c 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -58,10 +58,12 @@ public: void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data); private: - void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); - SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); + void LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); + void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); + SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); - SSAVec4i Sample(SSAInt frac); + SSAVec4i Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter); + SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAInt ColormapSample(SSAInt frac); SSAVec4i TranslateSample(SSAInt frac); SSAInt TranslateSamplePal(SSAInt frac); @@ -73,6 +75,7 @@ private: SSAUBytePtr dest; SSAUBytePtr source; + SSAUBytePtr source2; SSAUBytePtr colormap; SSAUBytePtr translation; SSAUBytePtr basecolors; @@ -80,6 +83,9 @@ private: SSAInt count; SSAInt dest_y; SSAInt iscale; + SSAInt texturefracx; + SSAInt textureheight; + SSAInt one; SSAInt texturefrac; SSAInt light; SSAVec4i color; @@ -87,6 +93,7 @@ private: SSAInt srcalpha; SSAInt destalpha; SSABool is_simple_shade; + SSABool is_nearest_filter; SSAShadeConstants shade_constants; SSAWorkerThread thread; }; diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 812ccafd7..761023cef 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -139,45 +139,3 @@ SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) SSAInt inv_alpha = 256 - alpha; return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; } - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; - SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); - - SSAInt inv_b = texturefracx; - SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt inv_a = 16 - a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) -{ - SSAInt xshift = (32 - xbits); - SSAInt yshift = (32 - ybits); - SSAInt xmask = (SSAInt(1) << xshift) - 1; - SSAInt ymask = (SSAInt(1) << yshift) - 1; - SSAInt x = xfrac >> xbits; - SSAInt y = yfrac >> ybits; - - SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); - SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); - - SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; - SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 4dda370fe..5de52dca1 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -88,8 +88,4 @@ public: // Calculates the final alpha values to be used when combined with the source texture alpha channel SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); - - // SampleBgra - SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); - SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); }; diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp index fac8411cc..0c6bd683f 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -89,6 +89,8 @@ void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade) branch.if_block(is_nearest_filter); LoopFilter(variant, isSimpleShade, true); branch.else_block(); + stack_xfrac.store(stack_xfrac.load() - (SSAInt(1) << (31 - xbits))); + stack_yfrac.store(stack_yfrac.load() - (SSAInt(1) << (31 - ybits))); LoopFilter(variant, isSimpleShade, false); branch.end_block(); } @@ -187,15 +189,37 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte { if (is64x64) { - return sample_linear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); + return SampleLinear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); } else { - return sample_linear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); + return SampleLinear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); } } } +SSAVec4i DrawSpanCodegen::SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) { if (isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawspancodegen.h b/src/r_compiler/fixedfunction/drawspancodegen.h index 9ef3edd38..48c86040b 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.h +++ b/src/r_compiler/fixedfunction/drawspancodegen.h @@ -45,6 +45,7 @@ private: SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + SSAVec4i SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 7c042f9be..ec30a8298 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -117,6 +117,9 @@ void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool branch.if_block(is_nearest_filter); Loop(variant, fourColumns, isSimpleShade, true); branch.else_block(); + int numColumns = fourColumns ? 4 : 1; + for (int i = 0; i < numColumns; i++) + stack_frac[i].store(stack_frac[i].load() - (one[i] / 2)); Loop(variant, fourColumns, isSimpleShade, false); branch.end_block(); } @@ -180,10 +183,30 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) } else { - return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + return SampleLinear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); } } +SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); + + SSAInt inv_b = texturefracx; + SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = 16 - a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) { if (isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index 6591fb9b6..4a1599063 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -43,6 +43,7 @@ private: void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); + SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index ce5024bba..c39019799 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -360,6 +360,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; @@ -367,6 +368,8 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 5cbd05e4a..b2978cabf 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -117,6 +117,7 @@ struct DrawColumnArgs { uint32_t *dest; const uint8_t *source; + const uint8_t *source2; uint8_t *colormap; uint8_t *translation; const uint32_t *basecolors; @@ -124,6 +125,8 @@ struct DrawColumnArgs int32_t count; int32_t dest_y; uint32_t iscale; + uint32_t texturefracx; + uint32_t textureheight; uint32_t texturefrac; uint32_t light; uint32_t color; @@ -143,7 +146,8 @@ struct DrawColumnArgs uint32_t flags; enum Flags { - simple_shade = 1 + simple_shade = 1, + nearest_filter = 2 }; FString ToString() diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 0a2c8553e..0ff047238 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2973,7 +2973,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { R_SetColorMapLight(basecolormap, 0, 0); } - return r_columnmethod ? DoDraw1 : DoDraw0; + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; } fglevel = GetAlpha(style.SrcAlpha, alpha); @@ -3006,7 +3007,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { return DontDraw; } - return r_columnmethod ? DoDraw1 : DoDraw0; + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; } void R_FinishSetPatchStyle () diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index c65718de2..2f1f6d17a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -348,6 +348,7 @@ public: { args.dest = (uint32_t*)dc_dest; args.source = dc_source; + args.source2 = dc_source2; args.colormap = dc_colormap; args.translation = dc_translation; args.basecolors = (const uint32_t *)GPalette.BaseColors; @@ -355,6 +356,8 @@ public: args.count = dc_count; args.dest_y = _dest_y; args.iscale = dc_iscale; + args.texturefracx = dc_texturefracx; + args.textureheight = dc_textureheight; args.texturefrac = dc_texturefrac; args.light = LightBgra::calc_light_multiplier(dc_light); args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); @@ -373,6 +376,8 @@ public: args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; + if (args.source2 == nullptr) + args.flags |= DrawWallArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 2963d2fc1..04f216c0c 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -72,6 +72,7 @@ public: { args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx; args.source = nullptr; + args.source2 = nullptr; args.colormap = dc_colormap; args.translation = dc_translation; args.basecolors = (const uint32_t *)GPalette.BaseColors; @@ -97,6 +98,8 @@ public: args.flags = 0; if (dc_shade_constants.simple_shade) args.flags |= DrawColumnArgs::simple_shade; + if (args.source2 == nullptr) + args.flags |= DrawWallArgs::nearest_filter; DetectRangeError(args.dest, args.dest_y, args.count); } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index e1925f66a..ea3debb18 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1183,6 +1183,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { + xoffset -= FRACUNIT / 2; int tx0 = (xoffset >> FRACBITS) % mip_width; if (tx0 < 0) tx0 += mip_width; diff --git a/src/r_things.cpp b/src/r_things.cpp index a04676492..ef5208848 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -251,8 +251,133 @@ double sprtopscreen; bool sprflipvert; +void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmasked) +{ + fixed_t saved_iscale = dc_iscale; // Save this because we need to modify it for mipmaps + + // Normalize to 0-1 range: + double uv_stepd = FIXED2DBL(dc_iscale); + double v_step = uv_stepd / tex->GetHeight(); + + // Convert to uint32: + dc_iscale = (uint32_t)(v_step * (1 << 30)); + + // Texture mipmap and filter selection: + fixed_t xoffset = col; + double magnitude = fabs(uv_stepd * 2); + bool magnifying = magnitude < 1.0f; + + int mipmap_offset = 0; + int mip_width = tex->GetWidth(); + int mip_height = tex->GetHeight(); + if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1) + { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); + double texture_bias = 1.7f; + double level = MAX(magnitude - 3.0, 0.0); + while (level > texture_bias && mip_width > 1 && mip_height > 1) + { + mipmap_offset += mip_width * mip_height; + level *= 0.5f; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + xoffset = (xpos >> FRACBITS) * mip_width; + } + + const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0); + + int tx = xoffset >> FRACBITS; + dc_source = (BYTE*)(pixels + tx * mip_height); + dc_source2 = nullptr; + dc_textureheight = mip_height; + dc_texturefracx = 0; + } + else + { + xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0); + + int tx0 = xoffset >> FRACBITS; + int tx1 = MIN(tx0 + 1, mip_width - 1); + dc_source = (BYTE*)(pixels + tx0 * mip_height); + dc_source2 = (BYTE*)(pixels + tx1 * mip_height); + dc_textureheight = mip_height; + dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15; + } + + // Grab the posts we need to draw + const FTexture::Span *span; + tex->GetColumnBgra(col >> FRACBITS, &span); + FTexture::Span unmaskedSpan[2]; + if (unmasked) + { + span = unmaskedSpan; + unmaskedSpan[0].TopOffset = 0; + unmaskedSpan[0].Length = tex->GetHeight(); + unmaskedSpan[1].TopOffset = 0; + unmaskedSpan[1].Length = 0; + } + + // Draw each span post + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + // calculate unclipped screen coordinates for post + dc_yl = (int)(sprtopscreen + spryscale * top + 0.5); + dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1; + + if (sprflipvert) + { + swapvalues(dc_yl, dc_yh); + } + + if (dc_yh >= mfloorclip[dc_x]) + { + dc_yh = mfloorclip[dc_x] - 1; + } + if (dc_yl < mceilingclip[dc_x]) + { + dc_yl = mceilingclip[dc_x]; + } + + if (dc_yl <= dc_yh) + { + dc_dest = (ylookup[dc_yl] + dc_x) * 4 + dc_destorg; + dc_count = dc_yh - dc_yl + 1; + + double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); + dc_texturefrac = (uint32_t)(v * (1 << 30)); + + if (useRt) + hcolfunc_pre(); + else + colfunc(); + } + span++; + } + + dc_iscale = saved_iscale; + + if (sprflipvert && useRt) + rt_flip_posts(); +} + void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) { + // Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom. + if (r_swtruecolor && !drawer_needs_pal_input && !useRt) // To do: add support to R_DrawColumnHoriz_rgba + { + R_DrawMaskedColumnBgra(tex, col, useRt, unmasked); + return; + } + const FTexture::Span *span; const BYTE *column; if (r_swtruecolor && !drawer_needs_pal_input)