Fix weird access violation in some of the drawers

This commit is contained in:
Magnus Norddahl 2016-10-13 11:16:51 +02:00
parent b5b96ee222
commit d1d443497f
6 changed files with 104 additions and 58 deletions

View file

@ -37,6 +37,7 @@
#include <llvm/ExecutionEngine/ExecutionEngine.h> #include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/MCJIT.h> #include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/Analysis/Passes.h> #include <llvm/Analysis/Passes.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/Transforms/Scalar.h> #include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/IPO.h> #include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h> #include <llvm/Transforms/IPO/PassManagerBuilder.h>

View file

@ -18,27 +18,25 @@ class LLVMProgram
{ {
public: public:
LLVMProgram(); LLVMProgram();
~LLVMProgram();
void CreateEE();
std::string DumpModule();
void StopLogFatalErrors(); void StopLogFatalErrors();
template<typename Func> template<typename Func>
Func *GetProcAddress(const char *name) { return reinterpret_cast<Func*>(PointerToFunction(name)); } Func *GetProcAddress(const char *name) { return reinterpret_cast<Func*>(PointerToFunction(name)); }
llvm::LLVMContext &context() { return *mContext; } llvm::LLVMContext &context() { return *mContext; }
llvm::Module *module() { return mModule; } llvm::Module *module() { return mModule.get(); }
llvm::ExecutionEngine *engine() { return mEngine.get(); } llvm::ExecutionEngine *engine() { return mEngine.get(); }
llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); }
llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); }
private: private:
void *PointerToFunction(const char *name); void *PointerToFunction(const char *name);
llvm::TargetMachine *machine = nullptr;
std::unique_ptr<llvm::LLVMContext> mContext; std::unique_ptr<llvm::LLVMContext> mContext;
llvm::Module *mModule; std::unique_ptr<llvm::Module> mModule;
std::unique_ptr<llvm::ExecutionEngine> mEngine; std::unique_ptr<llvm::ExecutionEngine> mEngine;
std::unique_ptr<llvm::legacy::PassManager> mModulePassManager;
std::unique_ptr<llvm::legacy::FunctionPassManager> mFunctionPassManager;
}; };
class LLVMDrawersImpl : public LLVMDrawers class LLVMDrawersImpl : public LLVMDrawers
@ -143,8 +141,7 @@ LLVMDrawersImpl::LLVMDrawersImpl()
CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1);
CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4);
mProgram.modulePassManager()->run(*mProgram.module()); mProgram.CreateEE();
mProgram.engine()->finalizeObject();
FillColumn = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumn"); FillColumn = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumn");
FillColumnAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnAdd"); FillColumnAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnAdd");
@ -205,6 +202,37 @@ LLVMDrawersImpl::LLVMDrawersImpl()
tmvline1_revsubclamp = mProgram.GetProcAddress<void(const DrawWallArgs *, const WorkerThreadData *)>("tmvline1_revsubclamp"); tmvline1_revsubclamp = mProgram.GetProcAddress<void(const DrawWallArgs *, const WorkerThreadData *)>("tmvline1_revsubclamp");
tmvline4_revsubclamp = mProgram.GetProcAddress<void(const DrawWallArgs *, const WorkerThreadData *)>("tmvline4_revsubclamp"); tmvline4_revsubclamp = mProgram.GetProcAddress<void(const DrawWallArgs *, const WorkerThreadData *)>("tmvline4_revsubclamp");
#if 0
std::vector<uint32_t> foo(1024 * 4);
std::vector<uint32_t> boo(256 * 256 * 4);
DrawColumnArgs args = { 0 };
WorkerThreadData thread = { 0 };
thread.core = 0;
thread.num_cores = 1;
thread.pass_start_y = 0;
thread.pass_end_y = 3600;
thread.temp = foo.data();
foo[125 * 4] = 1234;
foo[126 * 4] = 1234;
for (int i = 0; i < 16; i++)
boo[i] = i;
args.dest = boo.data() + 4;
args.dest_y = 125;
args.pitch = 256;
args.count = 1;
args.texturefrac = 0;
args.flags = 0;
args.iscale = 252769;
args.light = 256;
args.color = 4279179008;
args.srcalpha = 12;
args.destalpha = 256;
args.light_red = 192;
args.light_green = 256;
args.light_blue = 128;
DrawColumnRt4AddClamp(&args, &thread);
#endif
mProgram.StopLogFatalErrors(); mProgram.StopLogFatalErrors();
} }
@ -225,8 +253,6 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari
if (llvm::verifyFunction(*function.func)) if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__); I_FatalError("verifyFunction failed for " __FUNCTION__);
mProgram.functionPassManager()->run(*function.func);
} }
void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
@ -245,8 +271,6 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
if (llvm::verifyFunction(*function.func)) if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__); I_FatalError("verifyFunction failed for " __FUNCTION__);
mProgram.functionPassManager()->run(*function.func);
} }
void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns)
@ -266,8 +290,6 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant,
if (llvm::verifyFunction(*function.func)) if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__); I_FatalError("verifyFunction failed for " __FUNCTION__);
mProgram.functionPassManager()->run(*function.func);
} }
llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
@ -298,7 +320,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
return llvm::StructType::get(context, elements, false)->getPointerTo(); return llvm::StructType::create(context, elements, "DrawColumnArgs", false)->getPointerTo();
} }
llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context)
@ -329,7 +351,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context)
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
return llvm::StructType::get(context, elements, false)->getPointerTo(); return llvm::StructType::create(context, elements, "DrawSpanArgs", false)->getPointerTo();
} }
llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context)
@ -350,7 +372,7 @@ llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context)
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
return llvm::StructType::get(context, elements, false)->getPointerTo(); return llvm::StructType::create(context, elements, "DrawWallArgs", false)->getPointerTo();
} }
llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context)
@ -359,7 +381,7 @@ llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &contex
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
elements.push_back(llvm::Type::getInt32Ty(context)); elements.push_back(llvm::Type::getInt32Ty(context));
elements.push_back(llvm::Type::getInt8PtrTy(context)); elements.push_back(llvm::Type::getInt8PtrTy(context));
return llvm::StructType::get(context, elements, false)->getPointerTo(); return llvm::StructType::create(context, elements, "ThreadData", false)->getPointerTo();
} }
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
@ -406,29 +428,28 @@ LLVMProgram::LLVMProgram()
I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); I_FatalError("Could not find LLVM target: %s", errorstring.c_str());
TargetOptions opt; TargetOptions opt;
auto relocModel = Optional<Reloc::Model>(Reloc::Static); auto relocModel = Optional<Reloc::Model>();
TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive);
if (!machine) if (!machine)
I_FatalError("Could not create LLVM target machine"); I_FatalError("Could not create LLVM target machine");
mContext = std::make_unique<LLVMContext>(); mContext = std::make_unique<LLVMContext>();
auto moduleOwner = std::make_unique<Module>("render", context()); mModule = std::make_unique<Module>("render", context());
mModule = moduleOwner.get();
mModule->setTargetTriple(targetTriple); mModule->setTargetTriple(targetTriple);
mModule->setDataLayout(machine->createDataLayout()); mModule->setDataLayout(machine->createDataLayout());
EngineBuilder engineBuilder(std::move(moduleOwner)); }
engineBuilder.setErrorStr(&errorstring);
engineBuilder.setOptLevel(CodeGenOpt::Aggressive);
engineBuilder.setRelocationModel(Reloc::Static);
engineBuilder.setEngineKind(EngineKind::JIT);
mEngine.reset(engineBuilder.create(machine));
if (!mEngine)
I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str());
mModulePassManager = std::make_unique<legacy::PassManager>(); void LLVMProgram::CreateEE()
mFunctionPassManager = std::make_unique<legacy::FunctionPassManager>(mModule); {
using namespace llvm;
legacy::FunctionPassManager PerFunctionPasses(mModule.get());
legacy::PassManager PerModulePasses;
PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
PassManagerBuilder passManagerBuilder; PassManagerBuilder passManagerBuilder;
passManagerBuilder.OptLevel = 3; passManagerBuilder.OptLevel = 3;
@ -437,22 +458,46 @@ LLVMProgram::LLVMProgram()
passManagerBuilder.SLPVectorize = true; passManagerBuilder.SLPVectorize = true;
passManagerBuilder.LoopVectorize = true; passManagerBuilder.LoopVectorize = true;
passManagerBuilder.LoadCombine = true; passManagerBuilder.LoadCombine = true;
passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); passManagerBuilder.populateModulePassManager(PerModulePasses);
passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); passManagerBuilder.populateFunctionPassManager(PerFunctionPasses);
// Run function passes:
PerFunctionPasses.doInitialization();
for (llvm::Function &func : *mModule.get())
{
if (!func.isDeclaration())
PerFunctionPasses.run(func);
}
PerFunctionPasses.doFinalization();
// Run module passes:
PerModulePasses.run(*mModule.get());
std::string errorstring;
EngineBuilder engineBuilder(std::move(mModule));
engineBuilder.setErrorStr(&errorstring);
engineBuilder.setOptLevel(CodeGenOpt::Aggressive);
engineBuilder.setRelocationModel(Reloc::Static);
engineBuilder.setEngineKind(EngineKind::JIT);
mEngine.reset(engineBuilder.create(machine));
if (!mEngine)
I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str());
mEngine->finalizeObject();
} }
LLVMProgram::~LLVMProgram() std::string LLVMProgram::DumpModule()
{ {
mEngine.reset(); std::string str;
mContext.reset(); llvm::raw_string_ostream stream(str);
mModule->print(stream, nullptr, false, true);
return stream.str();
} }
void *LLVMProgram::PointerToFunction(const char *name) void *LLVMProgram::PointerToFunction(const char *name)
{ {
llvm::Function *function = mModule->getFunction(name); return reinterpret_cast<void(*)()>(mEngine->getFunctionAddress(name));
if (!function)
return nullptr;
return mEngine->getPointerToFunction(function);
} }
void LLVMProgram::StopLogFatalErrors() void LLVMProgram::StopLogFatalErrors()

View file

@ -34,7 +34,7 @@ SSAFloat SSAFloatPtr::load(bool constantScopeDomain) const
SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const
{ {
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst); return SSAVec4f::from_llvm(loadInst);
@ -43,7 +43,7 @@ SSAVec4f SSAFloatPtr::load_vec4f(bool constantScopeDomain) const
SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const SSAVec4f SSAFloatPtr::load_unaligned_vec4f(bool constantScopeDomain) const
{ {
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4, false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst); return SSAVec4f::from_llvm(loadInst);
@ -58,13 +58,13 @@ void SSAFloatPtr::store(const SSAFloat &new_value)
void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value)
{ {
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
} }
void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value)
{ {
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
} }

View file

@ -34,7 +34,7 @@ SSAInt SSAIntPtr::load(bool constantScopeDomain) const
SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const
{ {
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4i::from_llvm(loadInst); return SSAVec4i::from_llvm(loadInst);
@ -43,7 +43,7 @@ SSAVec4i SSAIntPtr::load_vec4i(bool constantScopeDomain) const
SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const SSAVec4i SSAIntPtr::load_unaligned_vec4i(bool constantScopeDomain) const
{ {
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4i::from_llvm(loadInst); return SSAVec4i::from_llvm(loadInst);
@ -58,13 +58,13 @@ void SSAIntPtr::store(const SSAInt &new_value)
void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) void SSAIntPtr::store_vec4i(const SSAVec4i &new_value)
{ {
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
} }
void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value)
{ {
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
} }

View file

@ -43,7 +43,7 @@ SSAVec4i SSAUBytePtr::load_vec4ub(bool constantScopeDomain) const
SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const
{ {
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
auto loadInst = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec16ub::from_llvm(loadInst); return SSAVec16ub::from_llvm(loadInst);
@ -52,7 +52,7 @@ SSAVec16ub SSAUBytePtr::load_vec16ub(bool constantScopeDomain) const
SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub(bool constantScopeDomain) const
{ {
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4, false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec16ub::from_llvm(loadInst); return SSAVec16ub::from_llvm(loadInst);
@ -86,7 +86,7 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
{ {
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
// The following generates _mm_stream_si128, maybe! // The following generates _mm_stream_si128, maybe!
@ -97,6 +97,6 @@ void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value)
{ {
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
} }

View file

@ -25,7 +25,7 @@ SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const
SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const
{ {
auto loadInst = SSAScope::builder().CreateLoad(v, false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 16, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst); return SSAVec4f::from_llvm(loadInst);
@ -33,7 +33,7 @@ SSAVec4f SSAVec4fPtr::load(bool constantScopeDomain) const
SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const
{ {
auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 4, false, SSAScope::hint()); auto loadInst = SSAScope::builder().CreateAlignedLoad(v, 1, false, SSAScope::hint());
if (constantScopeDomain) if (constantScopeDomain)
loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list()); loadInst->setMetadata(llvm::LLVMContext::MD_alias_scope, SSAScope::constant_scope_list());
return SSAVec4f::from_llvm(loadInst); return SSAVec4f::from_llvm(loadInst);
@ -41,12 +41,12 @@ SSAVec4f SSAVec4fPtr::load_unaligned(bool constantScopeDomain) const
void SSAVec4fPtr::store(const SSAVec4f &new_value) void SSAVec4fPtr::store(const SSAVec4f &new_value)
{ {
auto inst = SSAScope::builder().CreateStore(new_value.v, v, false); auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
} }
void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value)
{ {
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 1, false);
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list()); inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
} }