mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-26 06:01:14 +00:00
Add linear filtering to column drawers and fix offsetting bug with wall/span linear filters
This commit is contained in:
parent
2fe5cb4f77
commit
4b18530047
15 changed files with 310 additions and 95 deletions
|
@ -36,30 +36,35 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met
|
|||
{
|
||||
dest = args[0][0].load(true);
|
||||
source = args[0][1].load(true);
|
||||
colormap = args[0][2].load(true);
|
||||
translation = args[0][3].load(true);
|
||||
basecolors = args[0][4].load(true);
|
||||
pitch = args[0][5].load(true);
|
||||
count = args[0][6].load(true);
|
||||
dest_y = args[0][7].load(true);
|
||||
source2 = args[0][2].load(true);
|
||||
colormap = args[0][3].load(true);
|
||||
translation = args[0][4].load(true);
|
||||
basecolors = args[0][5].load(true);
|
||||
pitch = args[0][6].load(true);
|
||||
count = args[0][7].load(true);
|
||||
dest_y = args[0][8].load(true);
|
||||
if (method == DrawColumnMethod::Normal)
|
||||
iscale = args[0][8].load(true);
|
||||
texturefrac = args[0][9].load(true);
|
||||
light = args[0][10].load(true);
|
||||
color = SSAVec4i::unpack(args[0][11].load(true));
|
||||
srccolor = SSAVec4i::unpack(args[0][12].load(true));
|
||||
srcalpha = args[0][13].load(true);
|
||||
destalpha = args[0][14].load(true);
|
||||
SSAShort light_alpha = args[0][15].load(true);
|
||||
SSAShort light_red = args[0][16].load(true);
|
||||
SSAShort light_green = args[0][17].load(true);
|
||||
SSAShort light_blue = args[0][18].load(true);
|
||||
SSAShort fade_alpha = args[0][19].load(true);
|
||||
SSAShort fade_red = args[0][20].load(true);
|
||||
SSAShort fade_green = args[0][21].load(true);
|
||||
SSAShort fade_blue = args[0][22].load(true);
|
||||
SSAShort desaturate = args[0][23].load(true);
|
||||
SSAInt flags = args[0][24].load(true);
|
||||
{
|
||||
iscale = args[0][9].load(true);
|
||||
texturefracx = args[0][10].load(true);
|
||||
textureheight = args[0][11].load(true);
|
||||
}
|
||||
texturefrac = args[0][12].load(true);
|
||||
light = args[0][13].load(true);
|
||||
color = SSAVec4i::unpack(args[0][14].load(true));
|
||||
srccolor = SSAVec4i::unpack(args[0][15].load(true));
|
||||
srcalpha = args[0][16].load(true);
|
||||
destalpha = args[0][17].load(true);
|
||||
SSAShort light_alpha = args[0][18].load(true);
|
||||
SSAShort light_red = args[0][19].load(true);
|
||||
SSAShort light_green = args[0][20].load(true);
|
||||
SSAShort light_blue = args[0][21].load(true);
|
||||
SSAShort fade_alpha = args[0][22].load(true);
|
||||
SSAShort fade_red = args[0][23].load(true);
|
||||
SSAShort fade_green = args[0][24].load(true);
|
||||
SSAShort fade_blue = args[0][25].load(true);
|
||||
SSAShort desaturate = args[0][26].load(true);
|
||||
SSAInt flags = args[0][27].load(true);
|
||||
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
|
||||
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
|
||||
shade_constants.desaturate = desaturate.zext_int();
|
||||
|
@ -71,6 +76,7 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met
|
|||
thread.temp = thread_data[0][4].load(true);
|
||||
|
||||
is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade);
|
||||
is_nearest_filter = (flags & DrawColumnArgs::nearest_filter) == SSAInt(DrawColumnArgs::nearest_filter);
|
||||
|
||||
count = count_for_thread(dest_y, count, thread);
|
||||
dest = dest_for_thread(dest_y, pitch, dest, thread);
|
||||
|
@ -79,21 +85,40 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met
|
|||
{
|
||||
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
|
||||
iscale = iscale * thread.num_cores;
|
||||
one = (1 << 30) / textureheight;
|
||||
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(is_simple_shade);
|
||||
LoopShade(variant, method, true);
|
||||
branch.else_block();
|
||||
LoopShade(variant, method, false);
|
||||
branch.end_block();
|
||||
}
|
||||
else
|
||||
{
|
||||
source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4];
|
||||
}
|
||||
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(is_simple_shade);
|
||||
Loop(variant, method, true, true);
|
||||
branch.else_block();
|
||||
Loop(variant, method, false, true);
|
||||
branch.end_block();
|
||||
}
|
||||
}
|
||||
|
||||
void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade)
|
||||
{
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(is_simple_shade);
|
||||
Loop(variant, method, true);
|
||||
branch.if_block(is_nearest_filter);
|
||||
Loop(variant, method, isSimpleShade, true);
|
||||
branch.else_block();
|
||||
Loop(variant, method, false);
|
||||
stack_frac.store(stack_frac.load() - (one >> 1));
|
||||
Loop(variant, method, isSimpleShade, false);
|
||||
branch.end_block();
|
||||
}
|
||||
|
||||
void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade)
|
||||
void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter)
|
||||
{
|
||||
SSAInt sincr;
|
||||
if (method != DrawColumnMethod::Normal)
|
||||
|
@ -109,9 +134,10 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
|
|||
if (method == DrawColumnMethod::Normal)
|
||||
{
|
||||
frac = stack_frac.load();
|
||||
sample_index = frac >> FRACBITS;
|
||||
if (!IsPaletteInput(variant))
|
||||
sample_index = sample_index * 4;
|
||||
if (IsPaletteInput(variant))
|
||||
sample_index = frac >> FRACBITS;
|
||||
else
|
||||
sample_index = frac;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -140,7 +166,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
|
|||
|
||||
SSAVec4i outcolor[4];
|
||||
for (int i = 0; i < numColumns; i++)
|
||||
outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, isSimpleShade);
|
||||
outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, method, isSimpleShade, isNearestFilter);
|
||||
|
||||
if (numColumns == 4)
|
||||
{
|
||||
|
@ -186,7 +212,7 @@ bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant)
|
|||
}
|
||||
}
|
||||
|
||||
SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade)
|
||||
SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter)
|
||||
{
|
||||
SSAInt alpha, inv_alpha;
|
||||
SSAVec4i fg;
|
||||
|
@ -194,22 +220,22 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor,
|
|||
{
|
||||
default:
|
||||
case DrawColumnVariant::DrawCopy:
|
||||
return blend_copy(Sample(sample_index));
|
||||
return blend_copy(Sample(sample_index, method, isNearestFilter));
|
||||
case DrawColumnVariant::Draw:
|
||||
return blend_copy(Shade(Sample(sample_index), isSimpleShade));
|
||||
return blend_copy(Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade));
|
||||
case DrawColumnVariant::DrawAdd:
|
||||
case DrawColumnVariant::DrawAddClamp:
|
||||
fg = Shade(Sample(sample_index), isSimpleShade);
|
||||
fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade);
|
||||
return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
||||
case DrawColumnVariant::DrawShaded:
|
||||
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4;
|
||||
inv_alpha = 256 - alpha;
|
||||
return blend_add(color, bgcolor, alpha, inv_alpha);
|
||||
case DrawColumnVariant::DrawSubClamp:
|
||||
fg = Shade(Sample(sample_index), isSimpleShade);
|
||||
fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade);
|
||||
return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
||||
case DrawColumnVariant::DrawRevSubClamp:
|
||||
fg = Shade(Sample(sample_index), isSimpleShade);
|
||||
fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade);
|
||||
return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
||||
case DrawColumnVariant::DrawTranslated:
|
||||
return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade));
|
||||
|
@ -285,9 +311,45 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo
|
|||
}
|
||||
}
|
||||
|
||||
SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index)
|
||||
SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter)
|
||||
{
|
||||
return source[sample_index].load_vec4ub(true);
|
||||
if (method == DrawColumnMethod::Normal)
|
||||
{
|
||||
if (isNearestFilter)
|
||||
{
|
||||
SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS;
|
||||
return source[sample_index * 4].load_vec4ub(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
return SampleLinear(source, source2, texturefracx, frac, one, textureheight);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return source[frac].load_vec4ub(true);
|
||||
}
|
||||
}
|
||||
|
||||
SSAVec4i DrawColumnCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
|
||||
{
|
||||
// Clamp to edge
|
||||
SSAInt frac_y0 = (SSAInt::MAX(SSAInt::MIN(texturefracy, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height;
|
||||
SSAInt frac_y1 = (SSAInt::MAX(SSAInt::MIN(texturefracy + one, SSAInt((1 << 30) - 1)), SSAInt(0)) >> (FRACBITS - 2)) * height;
|
||||
SSAInt y0 = frac_y0 >> FRACBITS;
|
||||
SSAInt y1 = frac_y1 >> FRACBITS;
|
||||
|
||||
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
|
||||
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
|
||||
|
||||
SSAInt inv_b = texturefracx;
|
||||
SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15;
|
||||
SSAInt a = 16 - inv_a;
|
||||
SSAInt b = 16 - inv_b;
|
||||
|
||||
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
|
||||
}
|
||||
|
||||
SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index)
|
||||
|
|
|
@ -58,10 +58,12 @@ public:
|
|||
void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data);
|
||||
|
||||
private:
|
||||
void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade);
|
||||
SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade);
|
||||
void LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade);
|
||||
void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter);
|
||||
SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter);
|
||||
SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade);
|
||||
SSAVec4i Sample(SSAInt frac);
|
||||
SSAVec4i Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter);
|
||||
SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
|
||||
SSAInt ColormapSample(SSAInt frac);
|
||||
SSAVec4i TranslateSample(SSAInt frac);
|
||||
SSAInt TranslateSamplePal(SSAInt frac);
|
||||
|
@ -73,6 +75,7 @@ private:
|
|||
|
||||
SSAUBytePtr dest;
|
||||
SSAUBytePtr source;
|
||||
SSAUBytePtr source2;
|
||||
SSAUBytePtr colormap;
|
||||
SSAUBytePtr translation;
|
||||
SSAUBytePtr basecolors;
|
||||
|
@ -80,6 +83,9 @@ private:
|
|||
SSAInt count;
|
||||
SSAInt dest_y;
|
||||
SSAInt iscale;
|
||||
SSAInt texturefracx;
|
||||
SSAInt textureheight;
|
||||
SSAInt one;
|
||||
SSAInt texturefrac;
|
||||
SSAInt light;
|
||||
SSAVec4i color;
|
||||
|
@ -87,6 +93,7 @@ private:
|
|||
SSAInt srcalpha;
|
||||
SSAInt destalpha;
|
||||
SSABool is_simple_shade;
|
||||
SSABool is_nearest_filter;
|
||||
SSAShadeConstants shade_constants;
|
||||
SSAWorkerThread thread;
|
||||
};
|
||||
|
|
|
@ -139,45 +139,3 @@ SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha)
|
|||
SSAInt inv_alpha = 256 - alpha;
|
||||
return (destalpha * alpha + 256 * inv_alpha + 128) >> 8;
|
||||
}
|
||||
|
||||
SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
|
||||
{
|
||||
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;
|
||||
SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
|
||||
SSAInt y0 = frac_y0 >> FRACBITS;
|
||||
SSAInt y1 = frac_y1 >> FRACBITS;
|
||||
|
||||
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
|
||||
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
|
||||
|
||||
SSAInt inv_b = texturefracx;
|
||||
SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15;
|
||||
SSAInt inv_a = 16 - a;
|
||||
SSAInt b = 16 - inv_b;
|
||||
|
||||
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
|
||||
}
|
||||
|
||||
SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits)
|
||||
{
|
||||
SSAInt xshift = (32 - xbits);
|
||||
SSAInt yshift = (32 - ybits);
|
||||
SSAInt xmask = (SSAInt(1) << xshift) - 1;
|
||||
SSAInt ymask = (SSAInt(1) << yshift) - 1;
|
||||
SSAInt x = xfrac >> xbits;
|
||||
SSAInt y = yfrac >> ybits;
|
||||
|
||||
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
|
||||
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
|
||||
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;
|
||||
SSAInt a = 16 - inv_a;
|
||||
SSAInt b = 16 - inv_b;
|
||||
|
||||
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
|
||||
}
|
||||
|
|
|
@ -88,8 +88,4 @@ public:
|
|||
|
||||
// Calculates the final alpha values to be used when combined with the source texture alpha channel
|
||||
SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha);
|
||||
|
||||
// SampleBgra
|
||||
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
|
||||
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
|
||||
};
|
||||
|
|
|
@ -89,6 +89,8 @@ void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade)
|
|||
branch.if_block(is_nearest_filter);
|
||||
LoopFilter(variant, isSimpleShade, true);
|
||||
branch.else_block();
|
||||
stack_xfrac.store(stack_xfrac.load() - (SSAInt(1) << (31 - xbits)));
|
||||
stack_yfrac.store(stack_yfrac.load() - (SSAInt(1) << (31 - ybits)));
|
||||
LoopFilter(variant, isSimpleShade, false);
|
||||
branch.end_block();
|
||||
}
|
||||
|
@ -187,15 +189,37 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte
|
|||
{
|
||||
if (is64x64)
|
||||
{
|
||||
return sample_linear(source, xfrac, yfrac, SSAInt(26), SSAInt(26));
|
||||
return SampleLinear(source, xfrac, yfrac, SSAInt(26), SSAInt(26));
|
||||
}
|
||||
else
|
||||
{
|
||||
return sample_linear(source, xfrac, yfrac, 32 - xbits, 32 - ybits);
|
||||
return SampleLinear(source, xfrac, yfrac, 32 - xbits, 32 - ybits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SSAVec4i DrawSpanCodegen::SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits)
|
||||
{
|
||||
SSAInt xshift = (32 - xbits);
|
||||
SSAInt yshift = (32 - ybits);
|
||||
SSAInt xmask = (SSAInt(1) << xshift) - 1;
|
||||
SSAInt ymask = (SSAInt(1) << yshift) - 1;
|
||||
SSAInt x = xfrac >> xbits;
|
||||
SSAInt y = yfrac >> ybits;
|
||||
|
||||
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
|
||||
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
|
||||
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;
|
||||
SSAInt a = 16 - inv_a;
|
||||
SSAInt b = 16 - inv_b;
|
||||
|
||||
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
|
||||
}
|
||||
|
||||
SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
|
||||
{
|
||||
if (isSimpleShade)
|
||||
|
|
|
@ -45,6 +45,7 @@ private:
|
|||
SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
|
||||
void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
|
||||
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
|
||||
SSAVec4i SampleLinear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
|
||||
SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade);
|
||||
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant);
|
||||
|
||||
|
|
|
@ -117,6 +117,9 @@ void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool
|
|||
branch.if_block(is_nearest_filter);
|
||||
Loop(variant, fourColumns, isSimpleShade, true);
|
||||
branch.else_block();
|
||||
int numColumns = fourColumns ? 4 : 1;
|
||||
for (int i = 0; i < numColumns; i++)
|
||||
stack_frac[i].store(stack_frac[i].load() - (one[i] / 2));
|
||||
Loop(variant, fourColumns, isSimpleShade, false);
|
||||
branch.end_block();
|
||||
}
|
||||
|
@ -180,10 +183,30 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter)
|
|||
}
|
||||
else
|
||||
{
|
||||
return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]);
|
||||
return SampleLinear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]);
|
||||
}
|
||||
}
|
||||
|
||||
SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
|
||||
{
|
||||
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;
|
||||
SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
|
||||
SSAInt y0 = frac_y0 >> FRACBITS;
|
||||
SSAInt y1 = frac_y1 >> FRACBITS;
|
||||
|
||||
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
|
||||
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
|
||||
|
||||
SSAInt inv_b = texturefracx;
|
||||
SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15;
|
||||
SSAInt inv_a = 16 - a;
|
||||
SSAInt b = 16 - inv_b;
|
||||
|
||||
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
|
||||
}
|
||||
|
||||
SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade)
|
||||
{
|
||||
if (isSimpleShade)
|
||||
|
|
|
@ -43,6 +43,7 @@ private:
|
|||
void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade);
|
||||
void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter);
|
||||
SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter);
|
||||
SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
|
||||
SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade);
|
||||
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant);
|
||||
|
||||
|
|
|
@ -360,6 +360,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
|
|||
std::vector<llvm::Type *> elements;
|
||||
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest;
|
||||
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source;
|
||||
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source2;
|
||||
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap;
|
||||
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation;
|
||||
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors;
|
||||
|
@ -367,6 +368,8 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
|
|||
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count;
|
||||
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y;
|
||||
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale;
|
||||
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefracx;
|
||||
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t textureheight;
|
||||
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac;
|
||||
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light;
|
||||
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color;
|
||||
|
|
|
@ -117,6 +117,7 @@ struct DrawColumnArgs
|
|||
{
|
||||
uint32_t *dest;
|
||||
const uint8_t *source;
|
||||
const uint8_t *source2;
|
||||
uint8_t *colormap;
|
||||
uint8_t *translation;
|
||||
const uint32_t *basecolors;
|
||||
|
@ -124,6 +125,8 @@ struct DrawColumnArgs
|
|||
int32_t count;
|
||||
int32_t dest_y;
|
||||
uint32_t iscale;
|
||||
uint32_t texturefracx;
|
||||
uint32_t textureheight;
|
||||
uint32_t texturefrac;
|
||||
uint32_t light;
|
||||
uint32_t color;
|
||||
|
@ -143,7 +146,8 @@ struct DrawColumnArgs
|
|||
uint32_t flags;
|
||||
enum Flags
|
||||
{
|
||||
simple_shade = 1
|
||||
simple_shade = 1,
|
||||
nearest_filter = 2
|
||||
};
|
||||
|
||||
FString ToString()
|
||||
|
|
|
@ -2973,7 +2973,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
|
|||
{
|
||||
R_SetColorMapLight(basecolormap, 0, 0);
|
||||
}
|
||||
return r_columnmethod ? DoDraw1 : DoDraw0;
|
||||
bool active_columnmethod = r_columnmethod && !r_swtruecolor;
|
||||
return active_columnmethod ? DoDraw1 : DoDraw0;
|
||||
}
|
||||
|
||||
fglevel = GetAlpha(style.SrcAlpha, alpha);
|
||||
|
@ -3006,7 +3007,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
|
|||
{
|
||||
return DontDraw;
|
||||
}
|
||||
return r_columnmethod ? DoDraw1 : DoDraw0;
|
||||
bool active_columnmethod = r_columnmethod && !r_swtruecolor;
|
||||
return active_columnmethod ? DoDraw1 : DoDraw0;
|
||||
}
|
||||
|
||||
void R_FinishSetPatchStyle ()
|
||||
|
|
|
@ -348,6 +348,7 @@ public:
|
|||
{
|
||||
args.dest = (uint32_t*)dc_dest;
|
||||
args.source = dc_source;
|
||||
args.source2 = dc_source2;
|
||||
args.colormap = dc_colormap;
|
||||
args.translation = dc_translation;
|
||||
args.basecolors = (const uint32_t *)GPalette.BaseColors;
|
||||
|
@ -355,6 +356,8 @@ public:
|
|||
args.count = dc_count;
|
||||
args.dest_y = _dest_y;
|
||||
args.iscale = dc_iscale;
|
||||
args.texturefracx = dc_texturefracx;
|
||||
args.textureheight = dc_textureheight;
|
||||
args.texturefrac = dc_texturefrac;
|
||||
args.light = LightBgra::calc_light_multiplier(dc_light);
|
||||
args.color = LightBgra::shade_pal_index_simple(dc_color, args.light);
|
||||
|
@ -373,6 +376,8 @@ public:
|
|||
args.flags = 0;
|
||||
if (dc_shade_constants.simple_shade)
|
||||
args.flags |= DrawColumnArgs::simple_shade;
|
||||
if (args.source2 == nullptr)
|
||||
args.flags |= DrawWallArgs::nearest_filter;
|
||||
|
||||
DetectRangeError(args.dest, args.dest_y, args.count);
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ public:
|
|||
{
|
||||
args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx;
|
||||
args.source = nullptr;
|
||||
args.source2 = nullptr;
|
||||
args.colormap = dc_colormap;
|
||||
args.translation = dc_translation;
|
||||
args.basecolors = (const uint32_t *)GPalette.BaseColors;
|
||||
|
@ -97,6 +98,8 @@ public:
|
|||
args.flags = 0;
|
||||
if (dc_shade_constants.simple_shade)
|
||||
args.flags |= DrawColumnArgs::simple_shade;
|
||||
if (args.source2 == nullptr)
|
||||
args.flags |= DrawWallArgs::nearest_filter;
|
||||
|
||||
DetectRangeError(args.dest, args.dest_y, args.count);
|
||||
}
|
||||
|
|
|
@ -1183,6 +1183,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof
|
|||
}
|
||||
else
|
||||
{
|
||||
xoffset -= FRACUNIT / 2;
|
||||
int tx0 = (xoffset >> FRACBITS) % mip_width;
|
||||
if (tx0 < 0)
|
||||
tx0 += mip_width;
|
||||
|
|
125
src/r_things.cpp
125
src/r_things.cpp
|
@ -251,8 +251,133 @@ double sprtopscreen;
|
|||
|
||||
bool sprflipvert;
|
||||
|
||||
void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmasked)
|
||||
{
|
||||
fixed_t saved_iscale = dc_iscale; // Save this because we need to modify it for mipmaps
|
||||
|
||||
// Normalize to 0-1 range:
|
||||
double uv_stepd = FIXED2DBL(dc_iscale);
|
||||
double v_step = uv_stepd / tex->GetHeight();
|
||||
|
||||
// Convert to uint32:
|
||||
dc_iscale = (uint32_t)(v_step * (1 << 30));
|
||||
|
||||
// Texture mipmap and filter selection:
|
||||
fixed_t xoffset = col;
|
||||
double magnitude = fabs(uv_stepd * 2);
|
||||
bool magnifying = magnitude < 1.0f;
|
||||
|
||||
int mipmap_offset = 0;
|
||||
int mip_width = tex->GetWidth();
|
||||
int mip_height = tex->GetHeight();
|
||||
if (r_mipmap && tex->Mipmapped() && mip_width > 1 && mip_height > 1)
|
||||
{
|
||||
uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width);
|
||||
double texture_bias = 1.7f;
|
||||
double level = MAX(magnitude - 3.0, 0.0);
|
||||
while (level > texture_bias && mip_width > 1 && mip_height > 1)
|
||||
{
|
||||
mipmap_offset += mip_width * mip_height;
|
||||
level *= 0.5f;
|
||||
mip_width = MAX(mip_width >> 1, 1);
|
||||
mip_height = MAX(mip_height >> 1, 1);
|
||||
}
|
||||
xoffset = (xpos >> FRACBITS) * mip_width;
|
||||
}
|
||||
|
||||
const uint32_t *pixels = tex->GetPixelsBgra() + mipmap_offset;
|
||||
|
||||
bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter);
|
||||
if (filter_nearest)
|
||||
{
|
||||
xoffset = MAX(MIN(xoffset, (mip_width << FRACBITS) - 1), 0);
|
||||
|
||||
int tx = xoffset >> FRACBITS;
|
||||
dc_source = (BYTE*)(pixels + tx * mip_height);
|
||||
dc_source2 = nullptr;
|
||||
dc_textureheight = mip_height;
|
||||
dc_texturefracx = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
xoffset = MAX(MIN(xoffset - (FRACUNIT / 2), (mip_width << FRACBITS) - 1), 0);
|
||||
|
||||
int tx0 = xoffset >> FRACBITS;
|
||||
int tx1 = MIN(tx0 + 1, mip_width - 1);
|
||||
dc_source = (BYTE*)(pixels + tx0 * mip_height);
|
||||
dc_source2 = (BYTE*)(pixels + tx1 * mip_height);
|
||||
dc_textureheight = mip_height;
|
||||
dc_texturefracx = (xoffset >> (FRACBITS - 4)) & 15;
|
||||
}
|
||||
|
||||
// Grab the posts we need to draw
|
||||
const FTexture::Span *span;
|
||||
tex->GetColumnBgra(col >> FRACBITS, &span);
|
||||
FTexture::Span unmaskedSpan[2];
|
||||
if (unmasked)
|
||||
{
|
||||
span = unmaskedSpan;
|
||||
unmaskedSpan[0].TopOffset = 0;
|
||||
unmaskedSpan[0].Length = tex->GetHeight();
|
||||
unmaskedSpan[1].TopOffset = 0;
|
||||
unmaskedSpan[1].Length = 0;
|
||||
}
|
||||
|
||||
// Draw each span post
|
||||
while (span->Length != 0)
|
||||
{
|
||||
const int length = span->Length;
|
||||
const int top = span->TopOffset;
|
||||
|
||||
// calculate unclipped screen coordinates for post
|
||||
dc_yl = (int)(sprtopscreen + spryscale * top + 0.5);
|
||||
dc_yh = (int)(sprtopscreen + spryscale * (top + length) + 0.5) - 1;
|
||||
|
||||
if (sprflipvert)
|
||||
{
|
||||
swapvalues(dc_yl, dc_yh);
|
||||
}
|
||||
|
||||
if (dc_yh >= mfloorclip[dc_x])
|
||||
{
|
||||
dc_yh = mfloorclip[dc_x] - 1;
|
||||
}
|
||||
if (dc_yl < mceilingclip[dc_x])
|
||||
{
|
||||
dc_yl = mceilingclip[dc_x];
|
||||
}
|
||||
|
||||
if (dc_yl <= dc_yh)
|
||||
{
|
||||
dc_dest = (ylookup[dc_yl] + dc_x) * 4 + dc_destorg;
|
||||
dc_count = dc_yh - dc_yl + 1;
|
||||
|
||||
double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight();
|
||||
dc_texturefrac = (uint32_t)(v * (1 << 30));
|
||||
|
||||
if (useRt)
|
||||
hcolfunc_pre();
|
||||
else
|
||||
colfunc();
|
||||
}
|
||||
span++;
|
||||
}
|
||||
|
||||
dc_iscale = saved_iscale;
|
||||
|
||||
if (sprflipvert && useRt)
|
||||
rt_flip_posts();
|
||||
}
|
||||
|
||||
void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked)
|
||||
{
|
||||
// Handle the linear filtered version in a different function to reduce chances of merge conflicts from zdoom.
|
||||
if (r_swtruecolor && !drawer_needs_pal_input && !useRt) // To do: add support to R_DrawColumnHoriz_rgba
|
||||
{
|
||||
R_DrawMaskedColumnBgra(tex, col, useRt, unmasked);
|
||||
return;
|
||||
}
|
||||
|
||||
const FTexture::Span *span;
|
||||
const BYTE *column;
|
||||
if (r_swtruecolor && !drawer_needs_pal_input)
|
||||
|
|
Loading…
Reference in a new issue