diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index e920fb73a9..721c85dd85 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -13,41 +13,41 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data) { - dest = args[0][0].load(); - source = args[0][1].load(); - colormap = args[0][2].load(); - translation = args[0][3].load(); - basecolors = args[0][4].load(); - pitch = args[0][5].load(); - count = args[0][6].load(); - dest_y = args[0][7].load(); + dest = args[0][0].load(true); + source = args[0][1].load(true); + colormap = args[0][2].load(true); + translation = args[0][3].load(true); + basecolors = args[0][4].load(true); + pitch = args[0][5].load(true); + count = args[0][6].load(true); + dest_y = args[0][7].load(true); if (method == DrawColumnMethod::Normal) - iscale = args[0][8].load(); - texturefrac = args[0][9].load(); - light = args[0][10].load(); - color = SSAVec4i::unpack(args[0][11].load()); - srccolor = SSAVec4i::unpack(args[0][12].load()); - srcalpha = args[0][13].load(); - destalpha = args[0][14].load(); - SSAShort light_alpha = args[0][15].load(); - SSAShort light_red = args[0][16].load(); - SSAShort light_green = args[0][17].load(); - SSAShort light_blue = args[0][18].load(); - SSAShort fade_alpha = args[0][19].load(); - SSAShort fade_red = args[0][20].load(); - SSAShort fade_green = args[0][21].load(); - SSAShort fade_blue = args[0][22].load(); - SSAShort desaturate = args[0][23].load(); - SSAInt flags = args[0][24].load(); + iscale = args[0][8].load(true); + texturefrac = args[0][9].load(true); + light = args[0][10].load(true); + color = SSAVec4i::unpack(args[0][11].load(true)); + srccolor = SSAVec4i::unpack(args[0][12].load(true)); + srcalpha = args[0][13].load(true); + destalpha = args[0][14].load(true); + SSAShort light_alpha = args[0][15].load(true); + SSAShort light_red = args[0][16].load(true); + SSAShort light_green = args[0][17].load(true); + SSAShort light_blue = args[0][18].load(true); + SSAShort fade_alpha = args[0][19].load(true); + SSAShort fade_red = args[0][20].load(true); + SSAShort fade_green = args[0][21].load(true); + SSAShort fade_blue = args[0][22].load(true); + SSAShort desaturate = args[0][23].load(true); + SSAInt flags = args[0][24].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); - thread.core = thread_data[0][0].load(); - thread.num_cores = thread_data[0][1].load(); - thread.pass_start_y = thread_data[0][2].load(); - thread.pass_end_y = thread_data[0][3].load(); - thread.temp = thread_data[0][4].load(); + thread.core = thread_data[0][0].load(true); + thread.num_cores = thread_data[0][1].load(true); + thread.pass_start_y = thread_data[0][2].load(true); + thread.pass_end_y = thread_data[0][3].load(true); + thread.temp = thread_data[0][4].load(true); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); @@ -104,7 +104,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, if (numColumns == 4) { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); SSAVec8s bg0 = SSAVec8s::extendlo(bg); SSAVec8s bg1 = SSAVec8s::extendhi(bg); bgcolor[0] = SSAVec4i::extendlo(bg0); @@ -114,7 +114,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, } else { - bgcolor[0] = dest[offset].load_vec4ub(); + bgcolor[0] = dest[offset].load_vec4ub(false); } SSAVec4i outcolor[4]; @@ -131,7 +131,7 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, dest[offset].store_vec4ub(outcolor[0]); } - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); if (method == DrawColumnMethod::Normal) stack_frac.store(frac + iscale); loop.end_block(); @@ -218,7 +218,7 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo { default: case DrawColumnVariant::DrawCopy: - return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub(true)); case DrawColumnVariant::Draw: return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawAdd: @@ -259,17 +259,17 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index) { - return source[sample_index].load_vec4ub(); + return source[sample_index].load_vec4ub(true); } SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) { - return colormap[source[sample_index].load().zext_int()].load().zext_int(); + return colormap[source[sample_index].load(true).zext_int()].load(true).zext_int(); } SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index) { - return translation[source[sample_index].load().zext_int()].load().zext_int(); + return translation[source[sample_index].load(true).zext_int()].load(true).zext_int(); } SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 822a811411..3916c29ab0 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -43,13 +43,13 @@ SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) { - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index]; return shade_bgra_simple(color, light); } SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) { - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + SSAVec4i color = basecolors[index * 4].load_vec4ub(true); // = GPalette.BaseColors[index]; return shade_bgra_advanced(color, light, constants); } @@ -125,10 +125,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt SSAInt y0 = frac_y0 >> FRACBITS; SSAInt y1 = frac_y1 >> FRACBITS; - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(true); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(true); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(true); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(true); SSAInt inv_b = texturefracx; SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; @@ -147,10 +147,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt SSAInt x = xfrac >> xbits; SSAInt y = yfrac >> ybits; - SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(true); SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp index 70ecb0abd0..4404456ab8 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -13,31 +13,31 @@ void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) { - destorg = args[0][0].load(); - source = args[0][1].load(); - destpitch = args[0][2].load(); - stack_xfrac.store(args[0][3].load()); - stack_yfrac.store(args[0][4].load()); - xstep = args[0][5].load(); - ystep = args[0][6].load(); - x1 = args[0][7].load(); - x2 = args[0][8].load(); - y = args[0][9].load(); - xbits = args[0][10].load(); - ybits = args[0][11].load(); - light = args[0][12].load(); - srcalpha = args[0][13].load(); - destalpha = args[0][14].load(); - SSAShort light_alpha = args[0][15].load(); - SSAShort light_red = args[0][16].load(); - SSAShort light_green = args[0][17].load(); - SSAShort light_blue = args[0][18].load(); - SSAShort fade_alpha = args[0][19].load(); - SSAShort fade_red = args[0][20].load(); - SSAShort fade_green = args[0][21].load(); - SSAShort fade_blue = args[0][22].load(); - SSAShort desaturate = args[0][23].load(); - SSAInt flags = args[0][24].load(); + destorg = args[0][0].load(true); + source = args[0][1].load(true); + destpitch = args[0][2].load(true); + stack_xfrac.store(args[0][3].load(true)); + stack_yfrac.store(args[0][4].load(true)); + xstep = args[0][5].load(true); + ystep = args[0][6].load(true); + x1 = args[0][7].load(true); + x2 = args[0][8].load(true); + y = args[0][9].load(true); + xbits = args[0][10].load(true); + ybits = args[0][11].load(true); + light = args[0][12].load(true); + srcalpha = args[0][13].load(true); + destalpha = args[0][14].load(true); + SSAShort light_alpha = args[0][15].load(true); + SSAShort light_red = args[0][16].load(true); + SSAShort light_green = args[0][17].load(true); + SSAShort light_blue = args[0][18].load(true); + SSAShort fade_alpha = args[0][19].load(true); + SSAShort fade_red = args[0][20].load(true); + SSAShort fade_green = args[0][21].load(true); + SSAShort fade_blue = args[0][22].load(true); + SSAShort desaturate = args[0][23].load(true); + SSAInt flags = args[0][24].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); @@ -97,7 +97,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool SSAInt index = stack_index.load(); loop.loop_block(index < sseLength); - SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(); + SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(false); SSAVec8s bg0 = SSAVec8s::extendlo(bg); SSAVec8s bg1 = SSAVec8s::extendhi(bg); SSAVec4i bgcolors[4] = @@ -123,7 +123,7 @@ SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); data[index * 16].store_unaligned_vec16ub(color); - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); loop.end_block(); } return sseLength; @@ -140,11 +140,11 @@ void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleS SSAInt xfrac = stack_xfrac.load(); SSAInt yfrac = stack_yfrac.load(); - SSAVec4i bgcolor = data[index * 4].load_vec4ub(); + SSAVec4i bgcolor = data[index * 4].load_vec4ub(false); SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant); data[index * 4].store_vec4ub(color); - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); loop.end_block(); @@ -160,7 +160,7 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); else spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - return source[spot * 4].load_vec4ub(); + return source[spot * 4].load_vec4ub(true); } else { diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 55b17dafee..56d99e78ee 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -13,58 +13,58 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) { - dest = args[0][0].load(); - source[0] = args[0][1].load(); - source[1] = args[0][2].load(); - source[2] = args[0][3].load(); - source[3] = args[0][4].load(); - source2[0] = args[0][5].load(); - source2[1] = args[0][6].load(); - source2[2] = args[0][7].load(); - source2[3] = args[0][8].load(); - pitch = args[0][9].load(); - count = args[0][10].load(); - dest_y = args[0][11].load(); - texturefrac[0] = args[0][12].load(); - texturefrac[1] = args[0][13].load(); - texturefrac[2] = args[0][14].load(); - texturefrac[3] = args[0][15].load(); - texturefracx[0] = args[0][16].load(); - texturefracx[1] = args[0][17].load(); - texturefracx[2] = args[0][18].load(); - texturefracx[3] = args[0][19].load(); - iscale[0] = args[0][20].load(); - iscale[1] = args[0][21].load(); - iscale[2] = args[0][22].load(); - iscale[3] = args[0][23].load(); - textureheight[0] = args[0][24].load(); - textureheight[1] = args[0][25].load(); - textureheight[2] = args[0][26].load(); - textureheight[3] = args[0][27].load(); - light[0] = args[0][28].load(); - light[1] = args[0][29].load(); - light[2] = args[0][30].load(); - light[3] = args[0][31].load(); - srcalpha = args[0][32].load(); - destalpha = args[0][33].load(); - SSAShort light_alpha = args[0][34].load(); - SSAShort light_red = args[0][35].load(); - SSAShort light_green = args[0][36].load(); - SSAShort light_blue = args[0][37].load(); - SSAShort fade_alpha = args[0][38].load(); - SSAShort fade_red = args[0][39].load(); - SSAShort fade_green = args[0][40].load(); - SSAShort fade_blue = args[0][41].load(); - SSAShort desaturate = args[0][42].load(); - SSAInt flags = args[0][43].load(); + dest = args[0][0].load(true); + source[0] = args[0][1].load(true); + source[1] = args[0][2].load(true); + source[2] = args[0][3].load(true); + source[3] = args[0][4].load(true); + source2[0] = args[0][5].load(true); + source2[1] = args[0][6].load(true); + source2[2] = args[0][7].load(true); + source2[3] = args[0][8].load(true); + pitch = args[0][9].load(true); + count = args[0][10].load(true); + dest_y = args[0][11].load(true); + texturefrac[0] = args[0][12].load(true); + texturefrac[1] = args[0][13].load(true); + texturefrac[2] = args[0][14].load(true); + texturefrac[3] = args[0][15].load(true); + texturefracx[0] = args[0][16].load(true); + texturefracx[1] = args[0][17].load(true); + texturefracx[2] = args[0][18].load(true); + texturefracx[3] = args[0][19].load(true); + iscale[0] = args[0][20].load(true); + iscale[1] = args[0][21].load(true); + iscale[2] = args[0][22].load(true); + iscale[3] = args[0][23].load(true); + textureheight[0] = args[0][24].load(true); + textureheight[1] = args[0][25].load(true); + textureheight[2] = args[0][26].load(true); + textureheight[3] = args[0][27].load(true); + light[0] = args[0][28].load(true); + light[1] = args[0][29].load(true); + light[2] = args[0][30].load(true); + light[3] = args[0][31].load(true); + srcalpha = args[0][32].load(true); + destalpha = args[0][33].load(true); + SSAShort light_alpha = args[0][34].load(true); + SSAShort light_red = args[0][35].load(true); + SSAShort light_green = args[0][36].load(true); + SSAShort light_blue = args[0][37].load(true); + SSAShort fade_alpha = args[0][38].load(true); + SSAShort fade_red = args[0][39].load(true); + SSAShort fade_green = args[0][40].load(true); + SSAShort fade_blue = args[0][41].load(true); + SSAShort desaturate = args[0][42].load(true); + SSAInt flags = args[0][43].load(true); shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); - thread.core = thread_data[0][0].load(); - thread.num_cores = thread_data[0][1].load(); - thread.pass_start_y = thread_data[0][2].load(); - thread.pass_end_y = thread_data[0][3].load(); + thread.core = thread_data[0][0].load(true); + thread.num_cores = thread_data[0][1].load(true); + thread.pass_start_y = thread_data[0][2].load(true); + thread.pass_end_y = thread_data[0][3].load(true); is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade); is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter); @@ -118,7 +118,7 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim if (fourColumns) { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); SSAVec8s bg0 = SSAVec8s::extendlo(bg); SSAVec8s bg1 = SSAVec8s::extendhi(bg); SSAVec4i bgcolors[4] = @@ -138,12 +138,12 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim } else { - SSAVec4i bgcolor = dest[offset].load_vec4ub(); + SSAVec4i bgcolor = dest[offset].load_vec4ub(false); SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); dest[offset].store_vec4ub(color); } - stack_index.store(index + 1); + stack_index.store(index.add(SSAInt(1), true, true)); for (int i = 0; i < numColumns; i++) stack_frac[i].store(frac[i] + fracstep[i]); loop.end_block(); @@ -155,7 +155,7 @@ SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) if (isNearestFilter) { SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; - return source[index][sample_index * 4].load_vec4ub(); + return source[index][sample_index * 4].load_vec4ub(false); } else { diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index 582821ca03..f694be15d1 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -23,36 +23,48 @@ SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAFloat SSAFloatPtr::load() const +SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const { - return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAFloat::from_llvm(loadInst); } -SSAVec4f SSAFloatPtr::load_vec4f() const +SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } -SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const +SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } void SSAFloatPtr::store(const SSAFloat &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); + auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h index f29b2de3f7..a9953eb813 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.h +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -17,9 +17,9 @@ public: static llvm::Type *llvm_type(); SSAFloatPtr operator[](SSAInt index) const; SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAFloat load() const; - SSAVec4f load_vec4f() const; - SSAVec4f load_unaligned_vec4f() const; + SSAFloat load(bool constantScopeDomain) const; + SSAVec4f load_vec4f(bool constantScopeDomain) const; + SSAVec4f load_unaligned_vec4f(bool constantScopeDomain) const; void store(const SSAFloat &new_value); void store_vec4f(const SSAVec4f &new_value); void store_unaligned_vec4f(const SSAVec4f &new_value); diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 3d9cb22bdf..1815985c5f 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -42,6 +42,11 @@ SSAInt SSAInt::MAX(SSAInt a, SSAInt b) return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); } +SSAInt SSAInt::add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAdd(v, b.v, SSAScope::hint(), no_unsigned_wrap, no_signed_wrap)); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index c0f46e4b67..e9ce978c47 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -19,6 +19,8 @@ public: static SSAInt MIN(SSAInt a, SSAInt b); static SSAInt MAX(SSAInt a, SSAInt b); + SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); + llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index 974645d08c..d9441088e6 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -23,36 +23,48 @@ SSAIntPtr SSAIntPtr::operator[](SSAInt index) const return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAInt SSAIntPtr::load() const +SSAInt SSAIntPtr::load(bool constantScopeDomain) const { - return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAInt::from_llvm(loadInst); } -SSAVec4i SSAIntPtr::load_vec4i() const +SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4i::from_llvm(loadInst); } -SSAVec4i SSAIntPtr::load_unaligned_vec4i() const +SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4i::from_llvm(loadInst); } void SSAIntPtr::store(const SSAInt &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); + auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h index c75ed6a8d5..9685283651 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.h +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -17,9 +17,9 @@ public: static llvm::Type *llvm_type(); SSAIntPtr operator[](SSAInt index) const; SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAInt load() const; - SSAVec4i load_vec4i() const; - SSAVec4i load_unaligned_vec4i() const; + SSAInt load(bool constantScopeDomain) const; + SSAVec4i load_vec4i(bool constantScopeDomain) const; + SSAVec4i load_unaligned_vec4i(bool constantScopeDomain) const; void store(const SSAInt &new_value); void store_vec4i(const SSAVec4i &new_value); void store_unaligned_vec4i(const SSAVec4i &new_value); diff --git a/src/r_compiler/ssa/ssa_pixelformat4f.h b/src/r_compiler/ssa/ssa_pixelformat4f.h index 507e95b5d1..9cefb517b4 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4f.h +++ b/src/r_compiler/ssa/ssa_pixelformat4f.h @@ -13,9 +13,9 @@ public: SSAFloatPtr pixels() { return _pixels; } SSAFloatPtr pixels() const { return _pixels; } - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return _pixels[index * 4].load_vec4f(); + return _pixels[index * 4].load_vec4f(constantScopeDomain); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub.h b/src/r_compiler/ssa/ssa_pixelformat4ub.h index fdf98c4aa6..91b04557c4 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4ub.h +++ b/src/r_compiler/ssa/ssa_pixelformat4ub.h @@ -13,9 +13,9 @@ public: SSAUBytePtr pixels() { return _pixels; } SSAUBytePtr pixels() const { return _pixels; } - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f); + return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h index 4601eeb3c1..1f7e4eb0f4 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h @@ -20,9 +20,9 @@ public: out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3); } */ - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3); + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h index 402480c49b..9b50ec00fe 100644 --- a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h @@ -13,9 +13,9 @@ public: SSAUBytePtr pixels() { return _pixels; } SSAUBytePtr pixels() const { return _pixels; } - SSAVec4f get4f(SSAInt index) const + SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const { - return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0); + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0); } void set4f(SSAInt index, const SSAVec4f &pixel) diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp index e5d34a2033..520f301a4a 100644 --- a/src/r_compiler/ssa/ssa_scope.cpp +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -7,6 +7,10 @@ SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBui : _context(context), _module(module), _builder(builder) { instance = this; + + _constant_scope_domain = llvm::MDNode::get(SSAScope::context(), { llvm::MDString::get(SSAScope::context(), "ConstantScopeDomain") }); + _constant_scope = llvm::MDNode::getDistinct(SSAScope::context(), { _constant_scope_domain }); + _constant_scope_list = llvm::MDNode::get(SSAScope::context(), { _constant_scope }); } SSAScope::~SSAScope() @@ -50,6 +54,11 @@ llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size) return alloca_builder.CreateAlloca(type, size.v, hint()); } +llvm::MDNode *SSAScope::constant_scope_list() +{ + return instance->_constant_scope_list; +} + const std::string &SSAScope::hint() { return instance->_hint; diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h index ad080fde6c..c942a7c377 100644 --- a/src/r_compiler/ssa/ssa_scope.h +++ b/src/r_compiler/ssa/ssa_scope.h @@ -14,6 +14,7 @@ public: static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); static llvm::Value *alloca(llvm::Type *type); static llvm::Value *alloca(llvm::Type *type, SSAInt size); + static llvm::MDNode *constant_scope_list(); static const std::string &hint(); static void set_hint(const std::string &hint); @@ -22,6 +23,9 @@ private: llvm::LLVMContext *_context; llvm::Module *_module; llvm::IRBuilder<> *_builder; + llvm::MDNode *_constant_scope_domain; + llvm::MDNode *_constant_scope; + llvm::MDNode *_constant_scope_list; std::string _hint; }; diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 34de0ab889..1ce4a6ae28 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -23,32 +23,45 @@ SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAUByte SSAUBytePtr::load() const +SSAUByte SSAUBytePtr::load(bool constantScopeDomain) const { - return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAUByte::from_llvm(loadInst); } -SSAVec4i SSAUBytePtr::load_vec4ub() const +SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const { - SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + SSAInt i32 = SSAInt::from_llvm(loadInst); return SSAVec4i::unpack(i32); } -SSAVec16ub SSAUBytePtr::load_vec16ub() const +SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec16ub::from_llvm(loadInst); } -SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const +SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec16ub::from_llvm(loadInst); } void SSAUBytePtr::store(const SSAUByte &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) @@ -66,13 +79,15 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); llvm::Value *mask = llvm::ConstantVector::get(constants); llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint()); - SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); // The following generates _mm_stream_si128, maybe! // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); @@ -83,4 +98,5 @@ void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h index c084068bc7..167a5877d6 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.h +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -19,11 +19,10 @@ public: static llvm::Type *llvm_type(); SSAUBytePtr operator[](SSAInt index) const; SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; } - SSAUByte load() const; - SSAVec4i load_vec4ub() const; - SSAVec8s load_vec8s() const; - SSAVec16ub load_vec16ub() const; - SSAVec16ub load_unaligned_vec16ub() const; + SSAUByte load(bool constantScopeDomain) const; + SSAVec4i load_vec4ub(bool constantScopeDomain) const; + SSAVec16ub load_vec16ub(bool constantScopeDomain) const; + SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const; void store(const SSAUByte &new_value); void store_vec4ub(const SSAVec4i &new_value); void store_vec16ub(const SSAVec16ub &new_value); diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp index c37b7f4c1d..65f9da15d6 100644 --- a/src/r_compiler/ssa/ssa_value.cpp +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -4,14 +4,18 @@ #include "ssa_int.h" #include "ssa_scope.h" -SSAValue SSAValue::load() +SSAValue SSAValue::load(bool constantScopeDomain) { - return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false)); + auto loadInst = SSAScope::builder().CreateLoad(v, false); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAValue::from_llvm(loadInst); } void SSAValue::store(llvm::Value *value) { - SSAScope::builder().CreateStore(value, v, false); + auto inst = SSAScope::builder().CreateStore(value, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } SSAIndexLookup SSAValue::operator[](int index) diff --git a/src/r_compiler/ssa/ssa_value.h b/src/r_compiler/ssa/ssa_value.h index ec156a4529..d0d73043c9 100644 --- a/src/r_compiler/ssa/ssa_value.h +++ b/src/r_compiler/ssa/ssa_value.h @@ -15,7 +15,7 @@ public: static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; } - SSAValue load(); + SSAValue load(bool constantScopeDomain); void store(llvm::Value *v); template @@ -38,7 +38,7 @@ public: llvm::Value *v; std::vector indexes; - SSAValue load() { SSAValue value = *this; return value.load(); } + SSAValue load(bool constantScopeDomain) { SSAValue value = *this; return value.load(constantScopeDomain); } void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); } template diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index e0ed8bc868..e8bac71f17 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -23,22 +23,30 @@ SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); } -SSAVec4f SSAVec4fPtr::load() const +SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const { - return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } -SSAVec4f SSAVec4fPtr::load_unaligned() const +SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const { - return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint())); + auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 4, false, SSAScope::hint()); + if (constantScopeDomain) + loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); + return SSAVec4f::from_llvm(loadInst); } void SSAVec4fPtr::store(const SSAVec4f &new_value) { - SSAScope::builder().CreateStore(new_value.v, v, false); + auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) { - SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); + auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); + inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); } diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.h b/src/r_compiler/ssa/ssa_vec4f_ptr.h index ab4e841900..15192352a9 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.h +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.h @@ -15,8 +15,8 @@ public: static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); } static llvm::Type *llvm_type(); SSAVec4fPtr operator[](SSAInt index) const; - SSAVec4f load() const; - SSAVec4f load_unaligned() const; + SSAVec4f load(bool constantScopeDomain) const; + SSAVec4f load_unaligned(bool constantScopeDomain) const; void store(const SSAVec4f &new_value); void store_unaligned(const SSAVec4f &new_value);