Add linear filtering to column drawers and fix offsetting bug with wall/span linear filters

This commit is contained in:
Magnus Norddahl 2016-11-05 11:29:50 +01:00
parent 2fe5cb4f77
commit 4b18530047
15 changed files with 310 additions and 95 deletions

View file

@ -36,30 +36,35 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met
{ {
dest = args[0][0].load(true); dest = args[0][0].load(true);
source = args[0][1].load(true); source = args[0][1].load(true);
colormap = args[0][2].load(true); source2 = args[0][2].load(true);
translation = args[0][3].load(true); colormap = args[0][3].load(true);
basecolors = args[0][4].load(true); translation = args[0][4].load(true);
pitch = args[0][5].load(true); basecolors = args[0][5].load(true);
count = args[0][6].load(true); pitch = args[0][6].load(true);
dest_y = args[0][7].load(true); count = args[0][7].load(true);
dest_y = args[0][8].load(true);
if (method == DrawColumnMethod::Normal) if (method == DrawColumnMethod::Normal)
iscale = args[0][8].load(true); {
texturefrac = args[0][9].load(true); iscale = args[0][9].load(true);
light = args[0][10].load(true); texturefracx = args[0][10].load(true);
color = SSAVec4i::unpack(args[0][11].load(true)); textureheight = args[0][11].load(true);
srccolor = SSAVec4i::unpack(args[0][12].load(true)); }
srcalpha = args[0][13].load(true); texturefrac = args[0][12].load(true);
destalpha = args[0][14].load(true); light = args[0][13].load(true);
SSAShort light_alpha = args[0][15].load(true); color = SSAVec4i::unpack(args[0][14].load(true));
SSAShort light_red = args[0][16].load(true); srccolor = SSAVec4i::unpack(args[0][15].load(true));
SSAShort light_green = args[0][17].load(true); srcalpha = args[0][16].load(true);
SSAShort light_blue = args[0][18].load(true); destalpha = args[0][17].load(true);
SSAShort fade_alpha = args[0][19].load(true); SSAShort light_alpha = args[0][18].load(true);
SSAShort fade_red = args[0][20].load(true); SSAShort light_red = args[0][19].load(true);
SSAShort fade_green = args[0][21].load(true); SSAShort light_green = args[0][20].load(true);
SSAShort fade_blue = args[0][22].load(true); SSAShort light_blue = args[0][21].load(true);
SSAShort desaturate = args[0][23].load(true); SSAShort fade_alpha = args[0][22].load(true);
SSAInt flags = args[0][24].load(true); SSAShort fade_red = args[0][23].load(true);
SSAShort fade_green = args[0][24].load(true);
SSAShort fade_blue = args[0][25].load(true);
SSAShort desaturate = args[0][26].load(true);
SSAInt flags = args[0][27].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int(); shade_constants.desaturate = desaturate.zext_int();
@ -71,6 +76,7 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met
thread.temp = thread_data[0][4].load(true); thread.temp = thread_data[0][4].load(true);
is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade);
is_nearest_filter = (flags & DrawColumnArgs::nearest_filter) == SSAInt(DrawColumnArgs::nearest_filter);
count = count_for_thread(dest_y, count, thread); count = count_for_thread(dest_y, count, thread);
dest = dest_for_thread(dest_y, pitch, dest, thread); dest = dest_for_thread(dest_y, pitch, dest, thread);
@ -79,21 +85,40 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met
{ {
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
iscale = iscale * thread.num_cores; iscale = iscale * thread.num_cores;
one = (1 << 30) / textureheight;
SSAIfBlock branch;
branch.if_block(is_simple_shade);
LoopShade(variant, method, true);
branch.else_block();
LoopShade(variant, method, false);
branch.end_block();
} }
else else
{ {
source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4]; source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4];
}
SSAIfBlock branch; SSAIfBlock branch;
branch.if_block(is_simple_shade); branch.if_block(is_simple_shade);
Loop(variant, method, true); Loop(variant, method, true, true);
branch.else_block(); branch.else_block();
Loop(variant, method, false); Loop(variant, method, false, true);
branch.end_block();
}
}
void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade)
{
SSAIfBlock branch;
branch.if_block(is_nearest_filter);
Loop(variant, method, isSimpleShade, true);
branch.else_block();
stack_frac.store(stack_frac.load() - (one >> 1));
Loop(variant, method, isSimpleShade, false);
branch.end_block(); branch.end_block();
} }
void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter)
{ {
SSAInt sincr; SSAInt sincr;
if (method != DrawColumnMethod::Normal) if (method != DrawColumnMethod::Normal)
@ -109,9 +134,10 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
if (method == DrawColumnMethod::Normal) if (method == DrawColumnMethod::Normal)
{ {
frac = stack_frac.load(); frac = stack_frac.load();
if (IsPaletteInput(variant))
sample_index = frac >> FRACBITS; sample_index = frac >> FRACBITS;
if (!IsPaletteInput(variant)) else
sample_index = sample_index * 4; sample_index = frac;
} }
else else
{ {
@ -140,7 +166,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
SSAVec4i outcolor[4]; SSAVec4i outcolor[4];
for (int i = 0; i < numColumns; i++) for (int i = 0; i < numColumns; i++)
outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, isSimpleShade); outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, method, isSimpleShade, isNearestFilter);
if (numColumns == 4) if (numColumns == 4)
{ {
@ -186,7 +212,7 @@ bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant)
} }
} }
SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter)
{ {
SSAInt alpha, inv_alpha; SSAInt alpha, inv_alpha;
SSAVec4i fg; SSAVec4i fg;
@ -194,22 +220,22 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor,
{ {
default: default:
case DrawColumnVariant::DrawCopy: case DrawColumnVariant::DrawCopy:
return blend_copy(Sample(sample_index)); return blend_copy(Sample(sample_index, method, isNearestFilter));
case DrawColumnVariant::Draw: case DrawColumnVariant::Draw:
return blend_copy(Shade(Sample(sample_index), isSimpleShade)); return blend_copy(Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade));
case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp: case DrawColumnVariant::DrawAddClamp:
fg = Shade(Sample(sample_index), isSimpleShade); fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade);
return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawShaded: case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha; inv_alpha = 256 - alpha;
return blend_add(color, bgcolor, alpha, inv_alpha); return blend_add(color, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::DrawSubClamp: case DrawColumnVariant::DrawSubClamp:
fg = Shade(Sample(sample_index), isSimpleShade); fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade);
return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawRevSubClamp: case DrawColumnVariant::DrawRevSubClamp:
fg = Shade(Sample(sample_index), isSimpleShade); fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade);
return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
case DrawColumnVariant::DrawTranslated: case DrawColumnVariant::DrawTranslated:
return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade));
@ -285,9 +311,45 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo
} }
} }
SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index) SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter)
{ {
return source[sample_index].load_vec4ub(true); if (method == DrawColumnMethod::Normal)
{
if (isNearestFilter)
{
SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS;
return source[sample_index * 4].load_vec4ub(false);
}
else
{
return SampleLinear(source, source2, texturefracx, frac, one, textureheight);
}
}
else
{
return source[frac].load_vec4ub(true);
}
}
SSAVec4i DrawColumnCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{
// Clamp to edge
SSAInt frac_y0 = (SSAInt::MAX(SSAInt::MIN(texturefracy, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height;
SSAInt frac_y1 = (SSAInt::MAX(SSAInt::MIN(texturefracy + one, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height;
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
SSAInt inv_b = texturefracx;
SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
} }
SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index)

View file

@ -58,10 +58,12 @@ public:
void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data); void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data);
private: private:
void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); void LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade);
SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter);
SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter);
SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade);
SSAVec4i Sample(SSAInt frac); SSAVec4i Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter);
SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAInt ColormapSample(SSAInt frac); SSAInt ColormapSample(SSAInt frac);
SSAVec4i TranslateSample(SSAInt frac); SSAVec4i TranslateSample(SSAInt frac);
SSAInt TranslateSamplePal(SSAInt frac); SSAInt TranslateSamplePal(SSAInt frac);
@ -73,6 +75,7 @@ private:
SSAUBytePtr dest; SSAUBytePtr dest;
SSAUBytePtr source; SSAUBytePtr source;
SSAUBytePtr source2;
SSAUBytePtr colormap; SSAUBytePtr colormap;
SSAUBytePtr translation; SSAUBytePtr translation;
SSAUBytePtr basecolors; SSAUBytePtr basecolors;
@ -80,6 +83,9 @@ private:
SSAInt count; SSAInt count;
SSAInt dest_y; SSAInt dest_y;
SSAInt iscale; SSAInt iscale;
SSAInt texturefracx;
SSAInt textureheight;
SSAInt one;
SSAInt texturefrac; SSAInt texturefrac;
SSAInt light; SSAInt light;
SSAVec4i color; SSAVec4i color;
@ -87,6 +93,7 @@ private:
SSAInt srcalpha; SSAInt srcalpha;
SSAInt destalpha; SSAInt destalpha;
SSABool is_simple_shade; SSABool is_simple_shade;
SSABool is_nearest_filter;
SSAShadeConstants shade_constants; SSAShadeConstants shade_constants;
SSAWorkerThread thread; SSAWorkerThread thread;
}; };

View file

@ -139,45 +139,3 @@ SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha)
SSAInt inv_alpha = 256 - alpha; SSAInt inv_alpha = 256 - alpha;
return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; return (destalpha * alpha + 256 * inv_alpha + 128) >> 8;
} }
SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;
SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
SSAInt inv_b = texturefracx;
SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15;
SSAInt inv_a = 16 - a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits)
{
SSAInt xshift = (32 - xbits);
SSAInt yshift = (32 - ybits);
SSAInt xmask = (SSAInt(1) << xshift) - 1;
SSAInt ymask = (SSAInt(1) << yshift) - 1;
SSAInt x = xfrac >> xbits;
SSAInt y = yfrac >> ybits;
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}

View file

@ -88,8 +88,4 @@ public:
// Calculates the final alpha values to be used when combined with the source texture alpha channel // Calculates the final alpha values to be used when combined with the source texture alpha channel
SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha);
// SampleBgra
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
}; };

View file

@ -89,6 +89,8 @@ void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade)
branch.if_block(is_nearest_filter); branch.if_block(is_nearest_filter);
LoopFilter(variant, isSimpleShade, true); LoopFilter(variant, isSimpleShade, true);
branch.else_block(); branch.else_block();
stack_xfrac.store(stack_xfrac.load() - (SSAInt(1) << (31 - xbits)));
stack_yfrac.store(stack_yfrac.load() - (SSAInt(1) << (31 - ybits)));
LoopFilter(variant, isSimpleShade, false); LoopFilter(variant, isSimpleShade, false);
branch.end_block(); branch.end_block();
} }
@ -187,15 +189,37 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte
{ {
if (is64x64) if (is64x64)
{ {
return sample_linear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); return SampleLinear(source, xfrac, yfrac, SSAInt(26), SSAInt(26));
} }
else else
{ {
return sample_linear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); return SampleLinear(source, xfrac, yfrac, 32 - xbits, 32 - ybits);
} }
} }
} }
SSAVec4i DrawSpanCodegen::SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits)
{
SSAInt xshift = (32 - xbits);
SSAInt yshift = (32 - ybits);
SSAInt xmask = (SSAInt(1) << xshift) - 1;
SSAInt ymask = (SSAInt(1) << yshift) - 1;
SSAInt x = xfrac >> xbits;
SSAInt y = yfrac >> ybits;
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{ {
if (isSimpleShade) if (isSimpleShade)

View file

@ -45,6 +45,7 @@ private:
SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
SSAVec4i SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade);
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant);

View file

@ -117,6 +117,9 @@ void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool
branch.if_block(is_nearest_filter); branch.if_block(is_nearest_filter);
Loop(variant, fourColumns, isSimpleShade, true); Loop(variant, fourColumns, isSimpleShade, true);
branch.else_block(); branch.else_block();
int numColumns = fourColumns ? 4 : 1;
for (int i = 0; i < numColumns; i++)
stack_frac[i].store(stack_frac[i].load() - (one[i] / 2));
Loop(variant, fourColumns, isSimpleShade, false); Loop(variant, fourColumns, isSimpleShade, false);
branch.end_block(); branch.end_block();
} }
@ -180,10 +183,30 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter)
} }
else else
{ {
return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); return SampleLinear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]);
} }
} }
SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;
SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
SSAInt inv_b = texturefracx;
SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15;
SSAInt inv_a = 16 - a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade)
{ {
if (isSimpleShade) if (isSimpleShade)

View file

@ -43,6 +43,7 @@ private:
void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade);
void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter);
SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter);
SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade);
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant);

View file

@ -360,6 +360,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
std::vector<llvm::Type *> elements; std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors;
@ -367,6 +368,8 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color;

View file

@ -117,6 +117,7 @@ struct DrawColumnArgs
{ {
uint32_t *dest; uint32_t *dest;
const uint8_t *source; const uint8_t *source;
const uint8_t *source2;
uint8_t *colormap; uint8_t *colormap;
uint8_t *translation; uint8_t *translation;
const uint32_t *basecolors; const uint32_t *basecolors;
@ -124,6 +125,8 @@ struct DrawColumnArgs
int32_t count; int32_t count;
int32_t dest_y; int32_t dest_y;
uint32_t iscale; uint32_t iscale;
uint32_t texturefracx;
uint32_t textureheight;
uint32_t texturefrac; uint32_t texturefrac;
uint32_t light; uint32_t light;
uint32_t color; uint32_t color;
@ -143,7 +146,8 @@ struct DrawColumnArgs
uint32_t flags; uint32_t flags;
enum Flags enum Flags
{ {
simple_shade = 1 simple_shade = 1,
nearest_filter = 2
}; };
FString ToString() FString ToString()

View file

@ -2973,7 +2973,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
{ {
R_SetColorMapLight(basecolormap, 0, 0); R_SetColorMapLight(basecolormap, 0, 0);
} }
return r_columnmethod ? DoDraw1 : DoDraw0; bool active_columnmethod = r_columnmethod && !r_swtruecolor;
return active_columnmethod ? DoDraw1 : DoDraw0;
} }
fglevel = GetAlpha(style.SrcAlpha, alpha); fglevel = GetAlpha(style.SrcAlpha, alpha);
@ -3006,7 +3007,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
{ {
return DontDraw; return DontDraw;
} }
return r_columnmethod ? DoDraw1 : DoDraw0; bool active_columnmethod = r_columnmethod && !r_swtruecolor;
return active_columnmethod ? DoDraw1 : DoDraw0;
} }
void R_FinishSetPatchStyle () void R_FinishSetPatchStyle ()

View file

@ -348,6 +348,7 @@ public:
{ {
args.dest = (uint32_t*)dc_dest; args.dest = (uint32_t*)dc_dest;
args.source = dc_source; args.source = dc_source;
args.source2 = dc_source2;
args.colormap = dc_colormap; args.colormap = dc_colormap;
args.translation = dc_translation; args.translation = dc_translation;
args.basecolors = (const uint32_t *)GPalette.BaseColors; args.basecolors = (const uint32_t *)GPalette.BaseColors;
@ -355,6 +356,8 @@ public:
args.count = dc_count; args.count = dc_count;
args.dest_y = _dest_y; args.dest_y = _dest_y;
args.iscale = dc_iscale; args.iscale = dc_iscale;
args.texturefracx = dc_texturefracx;
args.textureheight = dc_textureheight;
args.texturefrac = dc_texturefrac; args.texturefrac = dc_texturefrac;
args.light = LightBgra::calc_light_multiplier(dc_light); args.light = LightBgra::calc_light_multiplier(dc_light);
args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); args.color = LightBgra::shade_pal_index_simple(dc_color, args.light);
@ -373,6 +376,8 @@ public:
args.flags = 0; args.flags = 0;
if (dc_shade_constants.simple_shade) if (dc_shade_constants.simple_shade)
args.flags |= DrawColumnArgs::simple_shade; args.flags |= DrawColumnArgs::simple_shade;
if (args.source2 == nullptr)
args.flags |= DrawWallArgs::nearest_filter;
DetectRangeError(args.dest, args.dest_y, args.count); DetectRangeError(args.dest, args.dest_y, args.count);
} }

View file

@ -72,6 +72,7 @@ public:
{ {
args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx; args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx;
args.source = nullptr; args.source = nullptr;
args.source2 = nullptr;
args.colormap = dc_colormap; args.colormap = dc_colormap;
args.translation = dc_translation; args.translation = dc_translation;
args.basecolors = (const uint32_t *)GPalette.BaseColors; args.basecolors = (const uint32_t *)GPalette.BaseColors;
@ -97,6 +98,8 @@ public:
args.flags = 0; args.flags = 0;
if (dc_shade_constants.simple_shade) if (dc_shade_constants.simple_shade)
args.flags |= DrawColumnArgs::simple_shade; args.flags |= DrawColumnArgs::simple_shade;
if (args.source2 == nullptr)
args.flags |= DrawWallArgs::nearest_filter;
DetectRangeError(args.dest, args.dest_y, args.count); DetectRangeError(args.dest, args.dest_y, args.count);
} }

View file

@ -1183,6 +1183,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof
} }
else else
{ {
xoffset -= FRACUNIT / 2;
int tx0 = (xoffset >> FRACBITS) % mip_width; int tx0 = (xoffset >> FRACBITS) % mip_width;
if (tx0 < 0) if (tx0 < 0)
tx0 += mip_width; tx0 += mip_width;

View file

@ -251,8 +251,133 @@ double sprtopscreen;
bool sprflipvert; bool sprflipvert;
void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmasked)
{
fixed_t saved_iscale = dc_iscale; // Save this because we need to modify it for mipmaps
// Normalize to 0-1 range:
double uv_stepd = FIXED2DBL(dc_iscale);
double v_step = uv_stepd / tex->GetHeight();
// Convert to uint32:
dc_iscale = (uint32_t)(v_step * (1 << 30));
// Texture mipmap and filter selection:
fixed_t xoffset = col;
double magnitude = fabs(uv_stepd * 2);
bool magnifying = magnitude < 1.0f;
int mipmap_offset = 0;
int mip_width = tex->GetWidth();
int mip_height = tex->GetHeight();
if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1)
{
uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width);
double texture_bias = 1.7f;
double level = MAX(magnitude - 3.0, 0.0);
while (level > texture_bias && mip_width > 1 && mip_height > 1)
{
mipmap_offset += mip_width * mip_height;
level *= 0.5f;
mip_width = MAX(mip_width >> 1, 1);
mip_height = MAX(mip_height >> 1, 1);
}
xoffset = (xpos >> FRACBITS) * mip_width;
}
const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset;
bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter);
if (filter_nearest)
{
xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0);
int tx = xoffset >> FRACBITS;
dc_source = (BYTE*)(pixels + tx * mip_height);
dc_source2 = nullptr;
dc_textureheight = mip_height;
dc_texturefracx = 0;
}
else
{
xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0);
int tx0 = xoffset >> FRACBITS;
int tx1 = MIN(tx0 + 1, mip_width - 1);
dc_source = (BYTE*)(pixels + tx0 * mip_height);
dc_source2 = (BYTE*)(pixels + tx1 * mip_height);
dc_textureheight = mip_height;
dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15;
}
// Grab the posts we need to draw
const FTexture::Span *span;
tex->GetColumnBgra(col >> FRACBITS, &span);
FTexture::Span unmaskedSpan[2];
if (unmasked)
{
span = unmaskedSpan;
unmaskedSpan[0].TopOffset = 0;
unmaskedSpan[0].Length = tex->GetHeight();
unmaskedSpan[1].TopOffset = 0;
unmaskedSpan[1].Length = 0;
}
// Draw each span post
while (span->Length != 0)
{
const int length = span->Length;
const int top = span->TopOffset;
// calculate unclipped screen coordinates for post
dc_yl = (int)(sprtopscreen + spryscale * top + 0.5);
dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1;
if (sprflipvert)
{
swapvalues(dc_yl, dc_yh);
}
if (dc_yh >= mfloorclip[dc_x])
{
dc_yh = mfloorclip[dc_x] - 1;
}
if (dc_yl < mceilingclip[dc_x])
{
dc_yl = mceilingclip[dc_x];
}
if (dc_yl <= dc_yh)
{
dc_dest = (ylookup[dc_yl] + dc_x) * 4 + dc_destorg;
dc_count = dc_yh - dc_yl + 1;
double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight();
dc_texturefrac = (uint32_t)(v * (1 << 30));
if (useRt)
hcolfunc_pre();
else
colfunc();
}
span++;
}
dc_iscale = saved_iscale;
if (sprflipvert && useRt)
rt_flip_posts();
}
void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked)
{ {
// Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom.
if (r_swtruecolor && !drawer_needs_pal_input && !useRt) // To do: add support to R_DrawColumnHoriz_rgba
{
R_DrawMaskedColumnBgra(tex, col, useRt, unmasked);
return;
}
const FTexture::Span *span; const FTexture::Span *span;
const BYTE *column; const BYTE *column;
if (r_swtruecolor && !drawer_needs_pal_input) if (r_swtruecolor && !drawer_needs_pal_input)