Add aliasing meta data to loads and stores

This commit is contained in:
Magnus Norddahl 2016-10-08 09:29:26 +02:00
parent 0b0ee2e3c2
commit fc07a25306
22 changed files with 260 additions and 189 deletions

View file

@ -13,41 +13,41 @@
void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data)
{
dest = args[0][0].load();
source = args[0][1].load();
colormap = args[0][2].load();
translation = args[0][3].load();
basecolors = args[0][4].load();
pitch = args[0][5].load();
count = args[0][6].load();
dest_y = args[0][7].load();
dest = args[0][0].load(true);
source = args[0][1].load(true);
colormap = args[0][2].load(true);
translation = args[0][3].load(true);
basecolors = args[0][4].load(true);
pitch = args[0][5].load(true);
count = args[0][6].load(true);
dest_y = args[0][7].load(true);
if (method == DrawColumnMethod::Normal)
iscale = args[0][8].load();
texturefrac = args[0][9].load();
light = args[0][10].load();
color = SSAVec4i::unpack(args[0][11].load());
srccolor = SSAVec4i::unpack(args[0][12].load());
srcalpha = args[0][13].load();
destalpha = args[0][14].load();
SSAShort light_alpha = args[0][15].load();
SSAShort light_red = args[0][16].load();
SSAShort light_green = args[0][17].load();
SSAShort light_blue = args[0][18].load();
SSAShort fade_alpha = args[0][19].load();
SSAShort fade_red = args[0][20].load();
SSAShort fade_green = args[0][21].load();
SSAShort fade_blue = args[0][22].load();
SSAShort desaturate = args[0][23].load();
SSAInt flags = args[0][24].load();
iscale = args[0][8].load(true);
texturefrac = args[0][9].load(true);
light = args[0][10].load(true);
color = SSAVec4i::unpack(args[0][11].load(true));
srccolor = SSAVec4i::unpack(args[0][12].load(true));
srcalpha = args[0][13].load(true);
destalpha = args[0][14].load(true);
SSAShort light_alpha = args[0][15].load(true);
SSAShort light_red = args[0][16].load(true);
SSAShort light_green = args[0][17].load(true);
SSAShort light_blue = args[0][18].load(true);
SSAShort fade_alpha = args[0][19].load(true);
SSAShort fade_red = args[0][20].load(true);
SSAShort fade_green = args[0][21].load(true);
SSAShort fade_blue = args[0][22].load(true);
SSAShort desaturate = args[0][23].load(true);
SSAInt flags = args[0][24].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
thread.core = thread_data[0][0].load();
thread.num_cores = thread_data[0][1].load();
thread.pass_start_y = thread_data[0][2].load();
thread.pass_end_y = thread_data[0][3].load();
thread.temp = thread_data[0][4].load();
thread.core = thread_data[0][0].load(true);
thread.num_cores = thread_data[0][1].load(true);
thread.pass_start_y = thread_data[0][2].load(true);
thread.pass_end_y = thread_data[0][3].load(true);
thread.temp = thread_data[0][4].load(true);
is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade);
@ -104,7 +104,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
if (numColumns == 4)
{
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub();
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false);
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
bgcolor[0] = SSAVec4i::extendlo(bg0);
@ -114,7 +114,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
}
else
{
bgcolor[0] = dest[offset].load_vec4ub();
bgcolor[0] = dest[offset].load_vec4ub(false);
}
SSAVec4i outcolor[4];
@ -131,7 +131,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
dest[offset].store_vec4ub(outcolor[0]);
}
stack_index.store(index + 1);
stack_index.store(index.add(SSAInt(1), true, true));
if (method == DrawColumnMethod::Normal)
stack_frac.store(frac + iscale);
loop.end_block();
@ -218,7 +218,7 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo
{
default:
case DrawColumnVariant::DrawCopy:
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub());
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub(true));
case DrawColumnVariant::Draw:
return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawAdd:
@ -259,17 +259,17 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo
SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index)
{
return source[sample_index].load_vec4ub();
return source[sample_index].load_vec4ub(true);
}
SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index)
{
return colormap[source[sample_index].load().zext_int()].load().zext_int();
return colormap[source[sample_index].load(true).zext_int()].load(true).zext_int();
}
SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index)
{
return translation[source[sample_index].load().zext_int()].load().zext_int();
return translation[source[sample_index].load(true).zext_int()].load(true).zext_int();
}
SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade)

View file

@ -43,13 +43,13 @@ SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light)
SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors)
{
SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index];
SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index];
return shade_bgra_simple(color, light);
}
SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors)
{
SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index];
SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index];
return shade_bgra_advanced(color, light, constants);
}
@ -125,10 +125,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = col0[y0 * 4].load_vec4ub();
SSAVec4i p01 = col0[y1 * 4].load_vec4ub();
SSAVec4i p10 = col1[y0 * 4].load_vec4ub();
SSAVec4i p11 = col1[y1 * 4].load_vec4ub();
SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true);
SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true);
SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true);
SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true);
SSAInt inv_b = texturefracx;
SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15;
@ -147,10 +147,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt
SSAInt x = xfrac >> xbits;
SSAInt y = yfrac >> ybits;
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub();
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub();
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub();
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub();
SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true);
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;

View file

@ -13,31 +13,31 @@
void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args)
{
destorg = args[0][0].load();
source = args[0][1].load();
destpitch = args[0][2].load();
stack_xfrac.store(args[0][3].load());
stack_yfrac.store(args[0][4].load());
xstep = args[0][5].load();
ystep = args[0][6].load();
x1 = args[0][7].load();
x2 = args[0][8].load();
y = args[0][9].load();
xbits = args[0][10].load();
ybits = args[0][11].load();
light = args[0][12].load();
srcalpha = args[0][13].load();
destalpha = args[0][14].load();
SSAShort light_alpha = args[0][15].load();
SSAShort light_red = args[0][16].load();
SSAShort light_green = args[0][17].load();
SSAShort light_blue = args[0][18].load();
SSAShort fade_alpha = args[0][19].load();
SSAShort fade_red = args[0][20].load();
SSAShort fade_green = args[0][21].load();
SSAShort fade_blue = args[0][22].load();
SSAShort desaturate = args[0][23].load();
SSAInt flags = args[0][24].load();
destorg = args[0][0].load(true);
source = args[0][1].load(true);
destpitch = args[0][2].load(true);
stack_xfrac.store(args[0][3].load(true));
stack_yfrac.store(args[0][4].load(true));
xstep = args[0][5].load(true);
ystep = args[0][6].load(true);
x1 = args[0][7].load(true);
x2 = args[0][8].load(true);
y = args[0][9].load(true);
xbits = args[0][10].load(true);
ybits = args[0][11].load(true);
light = args[0][12].load(true);
srcalpha = args[0][13].load(true);
destalpha = args[0][14].load(true);
SSAShort light_alpha = args[0][15].load(true);
SSAShort light_red = args[0][16].load(true);
SSAShort light_green = args[0][17].load(true);
SSAShort light_blue = args[0][18].load(true);
SSAShort fade_alpha = args[0][19].load(true);
SSAShort fade_red = args[0][20].load(true);
SSAShort fade_green = args[0][21].load(true);
SSAShort fade_blue = args[0][22].load(true);
SSAShort desaturate = args[0][23].load(true);
SSAInt flags = args[0][24].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
@ -97,7 +97,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool
SSAInt index = stack_index.load();
loop.loop_block(index < sseLength);
SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub();
SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(false);
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
SSAVec4i bgcolors[4] =
@ -123,7 +123,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool
SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
data[index * 16].store_unaligned_vec16ub(color);
stack_index.store(index + 1);
stack_index.store(index.add(SSAInt(1), true, true));
loop.end_block();
}
return sseLength;
@ -140,11 +140,11 @@ void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleS
SSAInt xfrac = stack_xfrac.load();
SSAInt yfrac = stack_yfrac.load();
SSAVec4i bgcolor = data[index * 4].load_vec4ub();
SSAVec4i bgcolor = data[index * 4].load_vec4ub(false);
SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant);
data[index * 4].store_vec4ub(color);
stack_index.store(index + 1);
stack_index.store(index.add(SSAInt(1), true, true));
stack_xfrac.store(xfrac + xstep);
stack_yfrac.store(yfrac + ystep);
loop.end_block();
@ -160,7 +160,7 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte
spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
else
spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
return source[spot * 4].load_vec4ub();
return source[spot * 4].load_vec4ub(true);
}
else
{

View file

@ -13,58 +13,58 @@
void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data)
{
dest = args[0][0].load();
source[0] = args[0][1].load();
source[1] = args[0][2].load();
source[2] = args[0][3].load();
source[3] = args[0][4].load();
source2[0] = args[0][5].load();
source2[1] = args[0][6].load();
source2[2] = args[0][7].load();
source2[3] = args[0][8].load();
pitch = args[0][9].load();
count = args[0][10].load();
dest_y = args[0][11].load();
texturefrac[0] = args[0][12].load();
texturefrac[1] = args[0][13].load();
texturefrac[2] = args[0][14].load();
texturefrac[3] = args[0][15].load();
texturefracx[0] = args[0][16].load();
texturefracx[1] = args[0][17].load();
texturefracx[2] = args[0][18].load();
texturefracx[3] = args[0][19].load();
iscale[0] = args[0][20].load();
iscale[1] = args[0][21].load();
iscale[2] = args[0][22].load();
iscale[3] = args[0][23].load();
textureheight[0] = args[0][24].load();
textureheight[1] = args[0][25].load();
textureheight[2] = args[0][26].load();
textureheight[3] = args[0][27].load();
light[0] = args[0][28].load();
light[1] = args[0][29].load();
light[2] = args[0][30].load();
light[3] = args[0][31].load();
srcalpha = args[0][32].load();
destalpha = args[0][33].load();
SSAShort light_alpha = args[0][34].load();
SSAShort light_red = args[0][35].load();
SSAShort light_green = args[0][36].load();
SSAShort light_blue = args[0][37].load();
SSAShort fade_alpha = args[0][38].load();
SSAShort fade_red = args[0][39].load();
SSAShort fade_green = args[0][40].load();
SSAShort fade_blue = args[0][41].load();
SSAShort desaturate = args[0][42].load();
SSAInt flags = args[0][43].load();
dest = args[0][0].load(true);
source[0] = args[0][1].load(true);
source[1] = args[0][2].load(true);
source[2] = args[0][3].load(true);
source[3] = args[0][4].load(true);
source2[0] = args[0][5].load(true);
source2[1] = args[0][6].load(true);
source2[2] = args[0][7].load(true);
source2[3] = args[0][8].load(true);
pitch = args[0][9].load(true);
count = args[0][10].load(true);
dest_y = args[0][11].load(true);
texturefrac[0] = args[0][12].load(true);
texturefrac[1] = args[0][13].load(true);
texturefrac[2] = args[0][14].load(true);
texturefrac[3] = args[0][15].load(true);
texturefracx[0] = args[0][16].load(true);
texturefracx[1] = args[0][17].load(true);
texturefracx[2] = args[0][18].load(true);
texturefracx[3] = args[0][19].load(true);
iscale[0] = args[0][20].load(true);
iscale[1] = args[0][21].load(true);
iscale[2] = args[0][22].load(true);
iscale[3] = args[0][23].load(true);
textureheight[0] = args[0][24].load(true);
textureheight[1] = args[0][25].load(true);
textureheight[2] = args[0][26].load(true);
textureheight[3] = args[0][27].load(true);
light[0] = args[0][28].load(true);
light[1] = args[0][29].load(true);
light[2] = args[0][30].load(true);
light[3] = args[0][31].load(true);
srcalpha = args[0][32].load(true);
destalpha = args[0][33].load(true);
SSAShort light_alpha = args[0][34].load(true);
SSAShort light_red = args[0][35].load(true);
SSAShort light_green = args[0][36].load(true);
SSAShort light_blue = args[0][37].load(true);
SSAShort fade_alpha = args[0][38].load(true);
SSAShort fade_red = args[0][39].load(true);
SSAShort fade_green = args[0][40].load(true);
SSAShort fade_blue = args[0][41].load(true);
SSAShort desaturate = args[0][42].load(true);
SSAInt flags = args[0][43].load(true);
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
thread.core = thread_data[0][0].load();
thread.num_cores = thread_data[0][1].load();
thread.pass_start_y = thread_data[0][2].load();
thread.pass_end_y = thread_data[0][3].load();
thread.core = thread_data[0][0].load(true);
thread.num_cores = thread_data[0][1].load(true);
thread.pass_start_y = thread_data[0][2].load(true);
thread.pass_end_y = thread_data[0][3].load(true);
is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade);
is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter);
@ -118,7 +118,7 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim
if (fourColumns)
{
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub();
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false);
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
SSAVec4i bgcolors[4] =
@ -138,12 +138,12 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim
}
else
{
SSAVec4i bgcolor = dest[offset].load_vec4ub();
SSAVec4i bgcolor = dest[offset].load_vec4ub(false);
SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant);
dest[offset].store_vec4ub(color);
}
stack_index.store(index + 1);
stack_index.store(index.add(SSAInt(1), true, true));
for (int i = 0; i < numColumns; i++)
stack_frac[i].store(frac[i] + fracstep[i]);
loop.end_block();
@ -155,7 +155,7 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter)
if (isNearestFilter)
{
SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS;
return source[index][sample_index * 4].load_vec4ub();
return source[index][sample_index * 4].load_vec4ub(false);
}
else
{

View file

@ -23,36 +23,48 @@ SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const
return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAFloat SSAFloatPtr::load() const
SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const
{
return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAFloat::from_llvm(loadInst);
}
SSAVec4f SSAFloatPtr::load_vec4f() const
SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const
SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
void SSAFloatPtr::store(const SSAFloat &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value)
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()));
auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()));
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value)
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4);
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -17,9 +17,9 @@ public:
static llvm::Type *llvm_type();
SSAFloatPtr operator[](SSAInt index) const;
SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; }
SSAFloat load() const;
SSAVec4f load_vec4f() const;
SSAVec4f load_unaligned_vec4f() const;
SSAFloat load(bool constantScopeDomain) const;
SSAVec4f load_vec4f(bool constantScopeDomain) const;
SSAVec4f load_unaligned_vec4f(bool constantScopeDomain) const;
void store(const SSAFloat &new_value);
void store_vec4f(const SSAVec4f &new_value);
void store_unaligned_vec4f(const SSAVec4f &new_value);

View file

@ -42,6 +42,11 @@ SSAInt SSAInt::MAX(SSAInt a, SSAInt b)
return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint()));
}
SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap));
}
SSAInt operator+(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));

View file

@ -19,6 +19,8 @@ public:
static SSAInt MIN(SSAInt a, SSAInt b);
static SSAInt MAX(SSAInt a, SSAInt b);
SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap);
llvm::Value *v;
};

View file

@ -23,36 +23,48 @@ SSAIntPtr SSAIntPtr::operator[](SSAInt index) const
return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAInt SSAIntPtr::load() const
SSAInt SSAIntPtr::load(bool constantScopeDomain) const
{
return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAInt::from_llvm(loadInst);
}
SSAVec4i SSAIntPtr::load_vec4i() const
SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4i::from_llvm(loadInst);
}
SSAVec4i SSAIntPtr::load_unaligned_vec4i() const
SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4i::from_llvm(loadInst);
}
void SSAIntPtr::store(const SSAInt &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAIntPtr::store_vec4i(const SSAVec4i &new_value)
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()));
auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()));
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value)
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4);
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -17,9 +17,9 @@ public:
static llvm::Type *llvm_type();
SSAIntPtr operator[](SSAInt index) const;
SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; }
SSAInt load() const;
SSAVec4i load_vec4i() const;
SSAVec4i load_unaligned_vec4i() const;
SSAInt load(bool constantScopeDomain) const;
SSAVec4i load_vec4i(bool constantScopeDomain) const;
SSAVec4i load_unaligned_vec4i(bool constantScopeDomain) const;
void store(const SSAInt &new_value);
void store_vec4i(const SSAVec4i &new_value);
void store_unaligned_vec4i(const SSAVec4i &new_value);

View file

@ -13,9 +13,9 @@ public:
SSAFloatPtr pixels() { return _pixels; }
SSAFloatPtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index) const
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return _pixels[index * 4].load_vec4f();
return _pixels[index * 4].load_vec4f(constantScopeDomain);
}
void set4f(SSAInt index, const SSAVec4f &pixel)

View file

@ -13,9 +13,9 @@ public:
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index) const
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f);
return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f);
}
void set4f(SSAInt index, const SSAVec4f &pixel)

View file

@ -20,9 +20,9 @@ public:
out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
}
*/
SSAVec4f get4f(SSAInt index) const
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3);
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3);
}
void set4f(SSAInt index, const SSAVec4f &pixel)

View file

@ -13,9 +13,9 @@ public:
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index) const
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0);
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0);
}
void set4f(SSAInt index, const SSAVec4f &pixel)

View file

@ -7,6 +7,10 @@ SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBui
: _context(context), _module(module), _builder(builder)
{
instance = this;
_constant_scope_domain = llvm::MDNode::get(SSAScope::context(), { llvm::MDString::get(SSAScope::context(), "ConstantScopeDomain") });
_constant_scope = llvm::MDNode::getDistinct(SSAScope::context(), { _constant_scope_domain });
_constant_scope_list = llvm::MDNode::get(SSAScope::context(), { _constant_scope });
}
SSAScope::~SSAScope()
@ -50,6 +54,11 @@ llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size)
return alloca_builder.CreateAlloca(type, size.v, hint());
}
llvm::MDNode *SSAScope::constant_scope_list()
{
return instance->_constant_scope_list;
}
const std::string &SSAScope::hint()
{
return instance->_hint;

View file

@ -14,6 +14,7 @@ public:
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
static llvm::Value *alloca(llvm::Type *type);
static llvm::Value *alloca(llvm::Type *type, SSAInt size);
static llvm::MDNode *constant_scope_list();
static const std::string &hint();
static void set_hint(const std::string &hint);
@ -22,6 +23,9 @@ private:
llvm::LLVMContext *_context;
llvm::Module *_module;
llvm::IRBuilder<> *_builder;
llvm::MDNode *_constant_scope_domain;
llvm::MDNode *_constant_scope;
llvm::MDNode *_constant_scope_list;
std::string _hint;
};

View file

@ -23,32 +23,45 @@ SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const
return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAUByte SSAUBytePtr::load() const
SSAUByte SSAUBytePtr::load(bool constantScopeDomain) const
{
return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAUByte::from_llvm(loadInst);
}
SSAVec4i SSAUBytePtr::load_vec4ub() const
SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const
{
SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
SSAInt i32 = SSAInt::from_llvm(loadInst);
return SSAVec4i::unpack(i32);
}
SSAVec16ub SSAUBytePtr::load_vec16ub() const
SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec16ub::from_llvm(loadInst);
}
SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const
SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec16ub::from_llvm(loadInst);
}
void SSAUBytePtr::store(const SSAUByte &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
@ -66,13 +79,15 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint());
SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false);
llvm::StoreInst *inst = SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()));
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
// The following generates _mm_stream_si128, maybe!
// llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1));
@ -83,4 +98,5 @@ void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value)
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -19,11 +19,10 @@ public:
static llvm::Type *llvm_type();
SSAUBytePtr operator[](SSAInt index) const;
SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; }
SSAUByte load() const;
SSAVec4i load_vec4ub() const;
SSAVec8s load_vec8s() const;
SSAVec16ub load_vec16ub() const;
SSAVec16ub load_unaligned_vec16ub() const;
SSAUByte load(bool constantScopeDomain) const;
SSAVec4i load_vec4ub(bool constantScopeDomain) const;
SSAVec16ub load_vec16ub(bool constantScopeDomain) const;
SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const;
void store(const SSAUByte &new_value);
void store_vec4ub(const SSAVec4i &new_value);
void store_vec16ub(const SSAVec16ub &new_value);

View file

@ -4,14 +4,18 @@
#include "ssa_int.h"
#include "ssa_scope.h"
SSAValue SSAValue::load()
SSAValue SSAValue::load(bool constantScopeDomain)
{
return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false));
auto loadInst = SSAScope::builder().CreateLoad(v, false);
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAValue::from_llvm(loadInst);
}
void SSAValue::store(llvm::Value *value)
{
SSAScope::builder().CreateStore(value, v, false);
auto inst = SSAScope::builder().CreateStore(value, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
SSAIndexLookup SSAValue::operator[](int index)

View file

@ -15,7 +15,7 @@ public:
static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; }
SSAValue load();
SSAValue load(bool constantScopeDomain);
void store(llvm::Value *v);
template<typename Type>
@ -38,7 +38,7 @@ public:
llvm::Value *v;
std::vector<llvm::Value *> indexes;
SSAValue load() { SSAValue value = *this; return value.load(); }
SSAValue load(bool constantScopeDomain) { SSAValue value = *this; return value.load(constantScopeDomain); }
void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); }
template<typename Type>

View file

@ -23,22 +23,30 @@ SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const
return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAVec4f SSAVec4fPtr::load() const
SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
SSAVec4f SSAVec4fPtr::load_unaligned() const
SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const
{
return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint()));
auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 4, false, SSAScope::hint());
if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst);
}
void SSAVec4fPtr::store(const SSAVec4f &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}
void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value)
{
SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false);
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
}

View file

@ -15,8 +15,8 @@ public:
static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); }
static llvm::Type *llvm_type();
SSAVec4fPtr operator[](SSAInt index) const;
SSAVec4f load() const;
SSAVec4f load_unaligned() const;
SSAVec4f load(bool constantScopeDomain) const;
SSAVec4f load_unaligned(bool constantScopeDomain) const;
void store(const SSAVec4f &new_value);
void store_unaligned(const SSAVec4f &new_value);