mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-01-18 15:11:46 +00:00
Add aliasing meta data to loads and stores
This commit is contained in:
parent
0b0ee2e3c2
commit
fc07a25306
22 changed files with 260 additions and 189 deletions
|
@ -13,41 +13,41 @@
|
|||
|
||||
void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data)
|
||||
{
|
||||
dest = args[0][0].load();
|
||||
source = args[0][1].load();
|
||||
colormap = args[0][2].load();
|
||||
translation = args[0][3].load();
|
||||
basecolors = args[0][4].load();
|
||||
pitch = args[0][5].load();
|
||||
count = args[0][6].load();
|
||||
dest_y = args[0][7].load();
|
||||
dest = args[0][0].load(true);
|
||||
source = args[0][1].load(true);
|
||||
colormap = args[0][2].load(true);
|
||||
translation = args[0][3].load(true);
|
||||
basecolors = args[0][4].load(true);
|
||||
pitch = args[0][5].load(true);
|
||||
count = args[0][6].load(true);
|
||||
dest_y = args[0][7].load(true);
|
||||
if (method == DrawColumnMethod::Normal)
|
||||
iscale = args[0][8].load();
|
||||
texturefrac = args[0][9].load();
|
||||
light = args[0][10].load();
|
||||
color = SSAVec4i::unpack(args[0][11].load());
|
||||
srccolor = SSAVec4i::unpack(args[0][12].load());
|
||||
srcalpha = args[0][13].load();
|
||||
destalpha = args[0][14].load();
|
||||
SSAShort light_alpha = args[0][15].load();
|
||||
SSAShort light_red = args[0][16].load();
|
||||
SSAShort light_green = args[0][17].load();
|
||||
SSAShort light_blue = args[0][18].load();
|
||||
SSAShort fade_alpha = args[0][19].load();
|
||||
SSAShort fade_red = args[0][20].load();
|
||||
SSAShort fade_green = args[0][21].load();
|
||||
SSAShort fade_blue = args[0][22].load();
|
||||
SSAShort desaturate = args[0][23].load();
|
||||
SSAInt flags = args[0][24].load();
|
||||
iscale = args[0][8].load(true);
|
||||
texturefrac = args[0][9].load(true);
|
||||
light = args[0][10].load(true);
|
||||
color = SSAVec4i::unpack(args[0][11].load(true));
|
||||
srccolor = SSAVec4i::unpack(args[0][12].load(true));
|
||||
srcalpha = args[0][13].load(true);
|
||||
destalpha = args[0][14].load(true);
|
||||
SSAShort light_alpha = args[0][15].load(true);
|
||||
SSAShort light_red = args[0][16].load(true);
|
||||
SSAShort light_green = args[0][17].load(true);
|
||||
SSAShort light_blue = args[0][18].load(true);
|
||||
SSAShort fade_alpha = args[0][19].load(true);
|
||||
SSAShort fade_red = args[0][20].load(true);
|
||||
SSAShort fade_green = args[0][21].load(true);
|
||||
SSAShort fade_blue = args[0][22].load(true);
|
||||
SSAShort desaturate = args[0][23].load(true);
|
||||
SSAInt flags = args[0][24].load(true);
|
||||
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
|
||||
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
|
||||
shade_constants.desaturate = desaturate.zext_int();
|
||||
|
||||
thread.core = thread_data[0][0].load();
|
||||
thread.num_cores = thread_data[0][1].load();
|
||||
thread.pass_start_y = thread_data[0][2].load();
|
||||
thread.pass_end_y = thread_data[0][3].load();
|
||||
thread.temp = thread_data[0][4].load();
|
||||
thread.core = thread_data[0][0].load(true);
|
||||
thread.num_cores = thread_data[0][1].load(true);
|
||||
thread.pass_start_y = thread_data[0][2].load(true);
|
||||
thread.pass_end_y = thread_data[0][3].load(true);
|
||||
thread.temp = thread_data[0][4].load(true);
|
||||
|
||||
is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade);
|
||||
|
||||
|
@ -104,7 +104,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
|
|||
|
||||
if (numColumns == 4)
|
||||
{
|
||||
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub();
|
||||
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false);
|
||||
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
|
||||
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
|
||||
bgcolor[0] = SSAVec4i::extendlo(bg0);
|
||||
|
@ -114,7 +114,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
|
|||
}
|
||||
else
|
||||
{
|
||||
bgcolor[0] = dest[offset].load_vec4ub();
|
||||
bgcolor[0] = dest[offset].load_vec4ub(false);
|
||||
}
|
||||
|
||||
SSAVec4i outcolor[4];
|
||||
|
@ -131,7 +131,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
|
|||
dest[offset].store_vec4ub(outcolor[0]);
|
||||
}
|
||||
|
||||
stack_index.store(index + 1);
|
||||
stack_index.store(index.add(SSAInt(1), true, true));
|
||||
if (method == DrawColumnMethod::Normal)
|
||||
stack_frac.store(frac + iscale);
|
||||
loop.end_block();
|
||||
|
@ -218,7 +218,7 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo
|
|||
{
|
||||
default:
|
||||
case DrawColumnVariant::DrawCopy:
|
||||
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub());
|
||||
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub(true));
|
||||
case DrawColumnVariant::Draw:
|
||||
return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade));
|
||||
case DrawColumnVariant::DrawAdd:
|
||||
|
@ -259,17 +259,17 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo
|
|||
|
||||
SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index)
|
||||
{
|
||||
return source[sample_index].load_vec4ub();
|
||||
return source[sample_index].load_vec4ub(true);
|
||||
}
|
||||
|
||||
SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index)
|
||||
{
|
||||
return colormap[source[sample_index].load().zext_int()].load().zext_int();
|
||||
return colormap[source[sample_index].load(true).zext_int()].load(true).zext_int();
|
||||
}
|
||||
|
||||
SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index)
|
||||
{
|
||||
return translation[source[sample_index].load().zext_int()].load().zext_int();
|
||||
return translation[source[sample_index].load(true).zext_int()].load(true).zext_int();
|
||||
}
|
||||
|
||||
SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
|
||||
|
|
|
@ -43,13 +43,13 @@ SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light)
|
|||
|
||||
SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors)
|
||||
{
|
||||
SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index];
|
||||
SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index];
|
||||
return shade_bgra_simple(color, light);
|
||||
}
|
||||
|
||||
SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors)
|
||||
{
|
||||
SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index];
|
||||
SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index];
|
||||
return shade_bgra_advanced(color, light, constants);
|
||||
}
|
||||
|
||||
|
@ -125,10 +125,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt
|
|||
SSAInt y0 = frac_y0 >> FRACBITS;
|
||||
SSAInt y1 = frac_y1 >> FRACBITS;
|
||||
|
||||
SSAVec4i p00 = col0[y0 * 4].load_vec4ub();
|
||||
SSAVec4i p01 = col0[y1 * 4].load_vec4ub();
|
||||
SSAVec4i p10 = col1[y0 * 4].load_vec4ub();
|
||||
SSAVec4i p11 = col1[y1 * 4].load_vec4ub();
|
||||
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
|
||||
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
|
||||
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
|
||||
|
||||
SSAInt inv_b = texturefracx;
|
||||
SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15;
|
||||
|
@ -147,10 +147,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt
|
|||
SSAInt x = xfrac >> xbits;
|
||||
SSAInt y = yfrac >> ybits;
|
||||
|
||||
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub();
|
||||
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub();
|
||||
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub();
|
||||
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub();
|
||||
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
|
||||
|
||||
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
|
||||
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;
|
||||
|
|
|
@ -13,31 +13,31 @@
|
|||
|
||||
void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args)
|
||||
{
|
||||
destorg = args[0][0].load();
|
||||
source = args[0][1].load();
|
||||
destpitch = args[0][2].load();
|
||||
stack_xfrac.store(args[0][3].load());
|
||||
stack_yfrac.store(args[0][4].load());
|
||||
xstep = args[0][5].load();
|
||||
ystep = args[0][6].load();
|
||||
x1 = args[0][7].load();
|
||||
x2 = args[0][8].load();
|
||||
y = args[0][9].load();
|
||||
xbits = args[0][10].load();
|
||||
ybits = args[0][11].load();
|
||||
light = args[0][12].load();
|
||||
srcalpha = args[0][13].load();
|
||||
destalpha = args[0][14].load();
|
||||
SSAShort light_alpha = args[0][15].load();
|
||||
SSAShort light_red = args[0][16].load();
|
||||
SSAShort light_green = args[0][17].load();
|
||||
SSAShort light_blue = args[0][18].load();
|
||||
SSAShort fade_alpha = args[0][19].load();
|
||||
SSAShort fade_red = args[0][20].load();
|
||||
SSAShort fade_green = args[0][21].load();
|
||||
SSAShort fade_blue = args[0][22].load();
|
||||
SSAShort desaturate = args[0][23].load();
|
||||
SSAInt flags = args[0][24].load();
|
||||
destorg = args[0][0].load(true);
|
||||
source = args[0][1].load(true);
|
||||
destpitch = args[0][2].load(true);
|
||||
stack_xfrac.store(args[0][3].load(true));
|
||||
stack_yfrac.store(args[0][4].load(true));
|
||||
xstep = args[0][5].load(true);
|
||||
ystep = args[0][6].load(true);
|
||||
x1 = args[0][7].load(true);
|
||||
x2 = args[0][8].load(true);
|
||||
y = args[0][9].load(true);
|
||||
xbits = args[0][10].load(true);
|
||||
ybits = args[0][11].load(true);
|
||||
light = args[0][12].load(true);
|
||||
srcalpha = args[0][13].load(true);
|
||||
destalpha = args[0][14].load(true);
|
||||
SSAShort light_alpha = args[0][15].load(true);
|
||||
SSAShort light_red = args[0][16].load(true);
|
||||
SSAShort light_green = args[0][17].load(true);
|
||||
SSAShort light_blue = args[0][18].load(true);
|
||||
SSAShort fade_alpha = args[0][19].load(true);
|
||||
SSAShort fade_red = args[0][20].load(true);
|
||||
SSAShort fade_green = args[0][21].load(true);
|
||||
SSAShort fade_blue = args[0][22].load(true);
|
||||
SSAShort desaturate = args[0][23].load(true);
|
||||
SSAInt flags = args[0][24].load(true);
|
||||
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
|
||||
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
|
||||
shade_constants.desaturate = desaturate.zext_int();
|
||||
|
@ -97,7 +97,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool
|
|||
SSAInt index = stack_index.load();
|
||||
loop.loop_block(index < sseLength);
|
||||
|
||||
SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub();
|
||||
SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(false);
|
||||
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
|
||||
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
|
||||
SSAVec4i bgcolors[4] =
|
||||
|
@ -123,7 +123,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool
|
|||
SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
|
||||
data[index * 16].store_unaligned_vec16ub(color);
|
||||
|
||||
stack_index.store(index + 1);
|
||||
stack_index.store(index.add(SSAInt(1), true, true));
|
||||
loop.end_block();
|
||||
}
|
||||
return sseLength;
|
||||
|
@ -140,11 +140,11 @@ void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleS
|
|||
SSAInt xfrac = stack_xfrac.load();
|
||||
SSAInt yfrac = stack_yfrac.load();
|
||||
|
||||
SSAVec4i bgcolor = data[index * 4].load_vec4ub();
|
||||
SSAVec4i bgcolor = data[index * 4].load_vec4ub(false);
|
||||
SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant);
|
||||
data[index * 4].store_vec4ub(color);
|
||||
|
||||
stack_index.store(index + 1);
|
||||
stack_index.store(index.add(SSAInt(1), true, true));
|
||||
stack_xfrac.store(xfrac + xstep);
|
||||
stack_yfrac.store(yfrac + ystep);
|
||||
loop.end_block();
|
||||
|
@ -160,7 +160,7 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte
|
|||
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
|
||||
else
|
||||
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
|
||||
return source[spot * 4].load_vec4ub();
|
||||
return source[spot * 4].load_vec4ub(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -13,58 +13,58 @@
|
|||
|
||||
void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data)
|
||||
{
|
||||
dest = args[0][0].load();
|
||||
source[0] = args[0][1].load();
|
||||
source[1] = args[0][2].load();
|
||||
source[2] = args[0][3].load();
|
||||
source[3] = args[0][4].load();
|
||||
source2[0] = args[0][5].load();
|
||||
source2[1] = args[0][6].load();
|
||||
source2[2] = args[0][7].load();
|
||||
source2[3] = args[0][8].load();
|
||||
pitch = args[0][9].load();
|
||||
count = args[0][10].load();
|
||||
dest_y = args[0][11].load();
|
||||
texturefrac[0] = args[0][12].load();
|
||||
texturefrac[1] = args[0][13].load();
|
||||
texturefrac[2] = args[0][14].load();
|
||||
texturefrac[3] = args[0][15].load();
|
||||
texturefracx[0] = args[0][16].load();
|
||||
texturefracx[1] = args[0][17].load();
|
||||
texturefracx[2] = args[0][18].load();
|
||||
texturefracx[3] = args[0][19].load();
|
||||
iscale[0] = args[0][20].load();
|
||||
iscale[1] = args[0][21].load();
|
||||
iscale[2] = args[0][22].load();
|
||||
iscale[3] = args[0][23].load();
|
||||
textureheight[0] = args[0][24].load();
|
||||
textureheight[1] = args[0][25].load();
|
||||
textureheight[2] = args[0][26].load();
|
||||
textureheight[3] = args[0][27].load();
|
||||
light[0] = args[0][28].load();
|
||||
light[1] = args[0][29].load();
|
||||
light[2] = args[0][30].load();
|
||||
light[3] = args[0][31].load();
|
||||
srcalpha = args[0][32].load();
|
||||
destalpha = args[0][33].load();
|
||||
SSAShort light_alpha = args[0][34].load();
|
||||
SSAShort light_red = args[0][35].load();
|
||||
SSAShort light_green = args[0][36].load();
|
||||
SSAShort light_blue = args[0][37].load();
|
||||
SSAShort fade_alpha = args[0][38].load();
|
||||
SSAShort fade_red = args[0][39].load();
|
||||
SSAShort fade_green = args[0][40].load();
|
||||
SSAShort fade_blue = args[0][41].load();
|
||||
SSAShort desaturate = args[0][42].load();
|
||||
SSAInt flags = args[0][43].load();
|
||||
dest = args[0][0].load(true);
|
||||
source[0] = args[0][1].load(true);
|
||||
source[1] = args[0][2].load(true);
|
||||
source[2] = args[0][3].load(true);
|
||||
source[3] = args[0][4].load(true);
|
||||
source2[0] = args[0][5].load(true);
|
||||
source2[1] = args[0][6].load(true);
|
||||
source2[2] = args[0][7].load(true);
|
||||
source2[3] = args[0][8].load(true);
|
||||
pitch = args[0][9].load(true);
|
||||
count = args[0][10].load(true);
|
||||
dest_y = args[0][11].load(true);
|
||||
texturefrac[0] = args[0][12].load(true);
|
||||
texturefrac[1] = args[0][13].load(true);
|
||||
texturefrac[2] = args[0][14].load(true);
|
||||
texturefrac[3] = args[0][15].load(true);
|
||||
texturefracx[0] = args[0][16].load(true);
|
||||
texturefracx[1] = args[0][17].load(true);
|
||||
texturefracx[2] = args[0][18].load(true);
|
||||
texturefracx[3] = args[0][19].load(true);
|
||||
iscale[0] = args[0][20].load(true);
|
||||
iscale[1] = args[0][21].load(true);
|
||||
iscale[2] = args[0][22].load(true);
|
||||
iscale[3] = args[0][23].load(true);
|
||||
textureheight[0] = args[0][24].load(true);
|
||||
textureheight[1] = args[0][25].load(true);
|
||||
textureheight[2] = args[0][26].load(true);
|
||||
textureheight[3] = args[0][27].load(true);
|
||||
light[0] = args[0][28].load(true);
|
||||
light[1] = args[0][29].load(true);
|
||||
light[2] = args[0][30].load(true);
|
||||
light[3] = args[0][31].load(true);
|
||||
srcalpha = args[0][32].load(true);
|
||||
destalpha = args[0][33].load(true);
|
||||
SSAShort light_alpha = args[0][34].load(true);
|
||||
SSAShort light_red = args[0][35].load(true);
|
||||
SSAShort light_green = args[0][36].load(true);
|
||||
SSAShort light_blue = args[0][37].load(true);
|
||||
SSAShort fade_alpha = args[0][38].load(true);
|
||||
SSAShort fade_red = args[0][39].load(true);
|
||||
SSAShort fade_green = args[0][40].load(true);
|
||||
SSAShort fade_blue = args[0][41].load(true);
|
||||
SSAShort desaturate = args[0][42].load(true);
|
||||
SSAInt flags = args[0][43].load(true);
|
||||
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
|
||||
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
|
||||
shade_constants.desaturate = desaturate.zext_int();
|
||||
|
||||
thread.core = thread_data[0][0].load();
|
||||
thread.num_cores = thread_data[0][1].load();
|
||||
thread.pass_start_y = thread_data[0][2].load();
|
||||
thread.pass_end_y = thread_data[0][3].load();
|
||||
thread.core = thread_data[0][0].load(true);
|
||||
thread.num_cores = thread_data[0][1].load(true);
|
||||
thread.pass_start_y = thread_data[0][2].load(true);
|
||||
thread.pass_end_y = thread_data[0][3].load(true);
|
||||
|
||||
is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade);
|
||||
is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter);
|
||||
|
@ -118,7 +118,7 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim
|
|||
|
||||
if (fourColumns)
|
||||
{
|
||||
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub();
|
||||
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false);
|
||||
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
|
||||
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
|
||||
SSAVec4i bgcolors[4] =
|
||||
|
@ -138,12 +138,12 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim
|
|||
}
|
||||
else
|
||||
{
|
||||
SSAVec4i bgcolor = dest[offset].load_vec4ub();
|
||||
SSAVec4i bgcolor = dest[offset].load_vec4ub(false);
|
||||
SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant);
|
||||
dest[offset].store_vec4ub(color);
|
||||
}
|
||||
|
||||
stack_index.store(index + 1);
|
||||
stack_index.store(index.add(SSAInt(1), true, true));
|
||||
for (int i = 0; i < numColumns; i++)
|
||||
stack_frac[i].store(frac[i] + fracstep[i]);
|
||||
loop.end_block();
|
||||
|
@ -155,7 +155,7 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter)
|
|||
if (isNearestFilter)
|
||||
{
|
||||
SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS;
|
||||
return source[index][sample_index * 4].load_vec4ub();
|
||||
return source[index][sample_index * 4].load_vec4ub(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -23,36 +23,48 @@ SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const
|
|||
return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAFloat SSAFloatPtr::load() const
|
||||
SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const
|
||||
{
|
||||
return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAFloat::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
SSAVec4f SSAFloatPtr::load_vec4f() const
|
||||
SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const
|
||||
{
|
||||
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
|
||||
return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec4f::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const
|
||||
SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const
|
||||
{
|
||||
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
|
||||
return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec4f::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
void SSAFloatPtr::store(const SSAFloat &new_value)
|
||||
{
|
||||
SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value)
|
||||
{
|
||||
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
|
||||
SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()));
|
||||
auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()));
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value)
|
||||
{
|
||||
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
|
||||
SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4);
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
|
@ -17,9 +17,9 @@ public:
|
|||
static llvm::Type *llvm_type();
|
||||
SSAFloatPtr operator[](SSAInt index) const;
|
||||
SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; }
|
||||
SSAFloat load() const;
|
||||
SSAVec4f load_vec4f() const;
|
||||
SSAVec4f load_unaligned_vec4f() const;
|
||||
SSAFloat load(bool constantScopeDomain) const;
|
||||
SSAVec4f load_vec4f(bool constantScopeDomain) const;
|
||||
SSAVec4f load_unaligned_vec4f(bool constantScopeDomain) const;
|
||||
void store(const SSAFloat &new_value);
|
||||
void store_vec4f(const SSAVec4f &new_value);
|
||||
void store_unaligned_vec4f(const SSAVec4f &new_value);
|
||||
|
|
|
@ -42,6 +42,11 @@ SSAInt SSAInt::MAX(SSAInt a, SSAInt b)
|
|||
return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap)
|
||||
{
|
||||
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap));
|
||||
}
|
||||
|
||||
SSAInt operator+(const SSAInt &a, const SSAInt &b)
|
||||
{
|
||||
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
|
||||
|
|
|
@ -19,6 +19,8 @@ public:
|
|||
static SSAInt MIN(SSAInt a, SSAInt b);
|
||||
static SSAInt MAX(SSAInt a, SSAInt b);
|
||||
|
||||
SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap);
|
||||
|
||||
llvm::Value *v;
|
||||
};
|
||||
|
||||
|
|
|
@ -23,36 +23,48 @@ SSAIntPtr SSAIntPtr::operator[](SSAInt index) const
|
|||
return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAInt SSAIntPtr::load() const
|
||||
SSAInt SSAIntPtr::load(bool constantScopeDomain) const
|
||||
{
|
||||
return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAInt::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
SSAVec4i SSAIntPtr::load_vec4i() const
|
||||
SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const
|
||||
{
|
||||
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
|
||||
return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec4i::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
SSAVec4i SSAIntPtr::load_unaligned_vec4i() const
|
||||
SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const
|
||||
{
|
||||
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
|
||||
return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec4i::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
void SSAIntPtr::store(const SSAInt &new_value)
|
||||
{
|
||||
SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAIntPtr::store_vec4i(const SSAVec4i &new_value)
|
||||
{
|
||||
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
|
||||
SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()));
|
||||
auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()));
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value)
|
||||
{
|
||||
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
|
||||
SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4);
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
|
@ -17,9 +17,9 @@ public:
|
|||
static llvm::Type *llvm_type();
|
||||
SSAIntPtr operator[](SSAInt index) const;
|
||||
SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; }
|
||||
SSAInt load() const;
|
||||
SSAVec4i load_vec4i() const;
|
||||
SSAVec4i load_unaligned_vec4i() const;
|
||||
SSAInt load(bool constantScopeDomain) const;
|
||||
SSAVec4i load_vec4i(bool constantScopeDomain) const;
|
||||
SSAVec4i load_unaligned_vec4i(bool constantScopeDomain) const;
|
||||
void store(const SSAInt &new_value);
|
||||
void store_vec4i(const SSAVec4i &new_value);
|
||||
void store_unaligned_vec4i(const SSAVec4i &new_value);
|
||||
|
|
|
@ -13,9 +13,9 @@ public:
|
|||
SSAFloatPtr pixels() { return _pixels; }
|
||||
SSAFloatPtr pixels() const { return _pixels; }
|
||||
|
||||
SSAVec4f get4f(SSAInt index) const
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return _pixels[index * 4].load_vec4f();
|
||||
return _pixels[index * 4].load_vec4f(constantScopeDomain);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
|
|
|
@ -13,9 +13,9 @@ public:
|
|||
SSAUBytePtr pixels() { return _pixels; }
|
||||
SSAUBytePtr pixels() const { return _pixels; }
|
||||
|
||||
SSAVec4f get4f(SSAInt index) const
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f);
|
||||
return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
|
|
|
@ -20,9 +20,9 @@ public:
|
|||
out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
||||
}
|
||||
*/
|
||||
SSAVec4f get4f(SSAInt index) const
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3);
|
||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
|
|
|
@ -13,9 +13,9 @@ public:
|
|||
SSAUBytePtr pixels() { return _pixels; }
|
||||
SSAUBytePtr pixels() const { return _pixels; }
|
||||
|
||||
SSAVec4f get4f(SSAInt index) const
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0);
|
||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
|
|
|
@ -7,6 +7,10 @@ SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBui
|
|||
: _context(context), _module(module), _builder(builder)
|
||||
{
|
||||
instance = this;
|
||||
|
||||
_constant_scope_domain = llvm::MDNode::get(SSAScope::context(), { llvm::MDString::get(SSAScope::context(), "ConstantScopeDomain") });
|
||||
_constant_scope = llvm::MDNode::getDistinct(SSAScope::context(), { _constant_scope_domain });
|
||||
_constant_scope_list = llvm::MDNode::get(SSAScope::context(), { _constant_scope });
|
||||
}
|
||||
|
||||
SSAScope::~SSAScope()
|
||||
|
@ -50,6 +54,11 @@ llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size)
|
|||
return alloca_builder.CreateAlloca(type, size.v, hint());
|
||||
}
|
||||
|
||||
llvm::MDNode *SSAScope::constant_scope_list()
|
||||
{
|
||||
return instance->_constant_scope_list;
|
||||
}
|
||||
|
||||
const std::string &SSAScope::hint()
|
||||
{
|
||||
return instance->_hint;
|
||||
|
|
|
@ -14,6 +14,7 @@ public:
|
|||
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
|
||||
static llvm::Value *alloca(llvm::Type *type);
|
||||
static llvm::Value *alloca(llvm::Type *type, SSAInt size);
|
||||
static llvm::MDNode *constant_scope_list();
|
||||
static const std::string &hint();
|
||||
static void set_hint(const std::string &hint);
|
||||
|
||||
|
@ -22,6 +23,9 @@ private:
|
|||
llvm::LLVMContext *_context;
|
||||
llvm::Module *_module;
|
||||
llvm::IRBuilder<> *_builder;
|
||||
llvm::MDNode *_constant_scope_domain;
|
||||
llvm::MDNode *_constant_scope;
|
||||
llvm::MDNode *_constant_scope_list;
|
||||
std::string _hint;
|
||||
};
|
||||
|
||||
|
|
|
@ -23,32 +23,45 @@ SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const
|
|||
return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAUByte SSAUBytePtr::load() const
|
||||
SSAUByte SSAUBytePtr::load(bool constantScopeDomain) const
|
||||
{
|
||||
return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAUByte::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
SSAVec4i SSAUBytePtr::load_vec4ub() const
|
||||
SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const
|
||||
{
|
||||
SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
SSAInt i32 = SSAInt::from_llvm(loadInst);
|
||||
return SSAVec4i::unpack(i32);
|
||||
}
|
||||
|
||||
SSAVec16ub SSAUBytePtr::load_vec16ub() const
|
||||
SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const
|
||||
{
|
||||
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
|
||||
return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec16ub::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const
|
||||
SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const
|
||||
{
|
||||
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
|
||||
return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec16ub::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
void SSAUBytePtr::store(const SSAUByte &new_value)
|
||||
{
|
||||
SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
|
||||
|
@ -66,13 +79,15 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
|
|||
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3)));
|
||||
llvm::Value *mask = llvm::ConstantVector::get(constants);
|
||||
llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint());
|
||||
SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false);
|
||||
llvm::StoreInst *inst = SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
|
||||
{
|
||||
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
|
||||
llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()));
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
|
||||
// The following generates _mm_stream_si128, maybe!
|
||||
// llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1));
|
||||
|
@ -83,4 +98,5 @@ void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value)
|
|||
{
|
||||
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
|
||||
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
|
@ -19,11 +19,10 @@ public:
|
|||
static llvm::Type *llvm_type();
|
||||
SSAUBytePtr operator[](SSAInt index) const;
|
||||
SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; }
|
||||
SSAUByte load() const;
|
||||
SSAVec4i load_vec4ub() const;
|
||||
SSAVec8s load_vec8s() const;
|
||||
SSAVec16ub load_vec16ub() const;
|
||||
SSAVec16ub load_unaligned_vec16ub() const;
|
||||
SSAUByte load(bool constantScopeDomain) const;
|
||||
SSAVec4i load_vec4ub(bool constantScopeDomain) const;
|
||||
SSAVec16ub load_vec16ub(bool constantScopeDomain) const;
|
||||
SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const;
|
||||
void store(const SSAUByte &new_value);
|
||||
void store_vec4ub(const SSAVec4i &new_value);
|
||||
void store_vec16ub(const SSAVec16ub &new_value);
|
||||
|
|
|
@ -4,14 +4,18 @@
|
|||
#include "ssa_int.h"
|
||||
#include "ssa_scope.h"
|
||||
|
||||
SSAValue SSAValue::load()
|
||||
SSAValue SSAValue::load(bool constantScopeDomain)
|
||||
{
|
||||
return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(v, false);
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAValue::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
void SSAValue::store(llvm::Value *value)
|
||||
{
|
||||
SSAScope::builder().CreateStore(value, v, false);
|
||||
auto inst = SSAScope::builder().CreateStore(value, v, false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
SSAIndexLookup SSAValue::operator[](int index)
|
||||
|
|
|
@ -15,7 +15,7 @@ public:
|
|||
|
||||
static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; }
|
||||
|
||||
SSAValue load();
|
||||
SSAValue load(bool constantScopeDomain);
|
||||
void store(llvm::Value *v);
|
||||
|
||||
template<typename Type>
|
||||
|
@ -38,7 +38,7 @@ public:
|
|||
llvm::Value *v;
|
||||
std::vector<llvm::Value *> indexes;
|
||||
|
||||
SSAValue load() { SSAValue value = *this; return value.load(); }
|
||||
SSAValue load(bool constantScopeDomain) { SSAValue value = *this; return value.load(constantScopeDomain); }
|
||||
void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); }
|
||||
|
||||
template<typename Type>
|
||||
|
|
|
@ -23,22 +23,30 @@ SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const
|
|||
return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAVec4f SSAVec4fPtr::load() const
|
||||
SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec4f::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
SSAVec4f SSAVec4fPtr::load_unaligned() const
|
||||
SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint()));
|
||||
auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 4, false, SSAScope::hint());
|
||||
if (constantScopeDomain)
|
||||
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
|
||||
return SSAVec4f::from_llvm(loadInst);
|
||||
}
|
||||
|
||||
void SSAVec4fPtr::store(const SSAVec4f &new_value)
|
||||
{
|
||||
SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value)
|
||||
{
|
||||
SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false);
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
|
@ -15,8 +15,8 @@ public:
|
|||
static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); }
|
||||
static llvm::Type *llvm_type();
|
||||
SSAVec4fPtr operator[](SSAInt index) const;
|
||||
SSAVec4f load() const;
|
||||
SSAVec4f load_unaligned() const;
|
||||
SSAVec4f load(bool constantScopeDomain) const;
|
||||
SSAVec4f load_unaligned(bool constantScopeDomain) const;
|
||||
void store(const SSAVec4f &new_value);
|
||||
void store_unaligned(const SSAVec4f &new_value);
|
||||
|
||||
|
|
Loading…
Reference in a new issue