mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-24 04:51:41 +00:00
macOS compile fixes and support for slightly older versions of LLVM (to allow Mac users to brew install llvm)
This commit is contained in:
parent
dfed525e18
commit
d654301bc2
14 changed files with 55 additions and 669 deletions
|
@ -13,8 +13,8 @@
|
|||
#include "r_compiler/ssa/ssa_ubyte_ptr.h"
|
||||
#include "r_compiler/ssa/ssa_vec4f_ptr.h"
|
||||
#include "r_compiler/ssa/ssa_vec4i_ptr.h"
|
||||
#include "r_compiler/ssa/ssa_pixels.h"
|
||||
#include "r_compiler/ssa/ssa_stack.h"
|
||||
#include "r_compiler/ssa/ssa_bool.h"
|
||||
#include "r_compiler/ssa/ssa_barycentric_weight.h"
|
||||
#include "r_compiler/llvm_include.h"
|
||||
|
||||
|
|
|
@ -25,12 +25,19 @@
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define __STDC_LIMIT_MACROS // DataTypes.h:57:3: error: "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
|
||||
#define __STDC_CONSTANT_MACROS // DataTypes.h:61:3: error: "Must #define __STDC_CONSTANT_MACROS before " "#including Support/DataTypes.h"
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wredundant-move"
|
||||
#endif
|
||||
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Attributes.h>
|
||||
#include <llvm/IR/Verifier.h>
|
||||
#include <llvm/IR/PassManager.h>
|
||||
//#include <llvm/IR/PassManager.h>
|
||||
#include <llvm/IR/LegacyPassManager.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/Intrinsics.h>
|
||||
|
@ -43,8 +50,14 @@
|
|||
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/Host.h>
|
||||
#include <llvm/CodeGen/AsmPrinter.h>
|
||||
#include <llvm/MC/MCAsmInfo.h>
|
||||
#include <llvm/Target/TargetSubtargetInfo.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
|
|
|
@ -263,7 +263,7 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari
|
|||
builder.CreateRetVoid();
|
||||
|
||||
if (llvm::verifyFunction(*function.func))
|
||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
||||
I_FatalError("verifyFunction failed for CodegenDrawColumn()");
|
||||
}
|
||||
|
||||
void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
|
||||
|
@ -281,7 +281,7 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
|
|||
builder.CreateRetVoid();
|
||||
|
||||
if (llvm::verifyFunction(*function.func))
|
||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
||||
I_FatalError("verifyFunction failed for CodegenDrawSpan()");
|
||||
}
|
||||
|
||||
void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns)
|
||||
|
@ -300,7 +300,7 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant,
|
|||
builder.CreateRetVoid();
|
||||
|
||||
if (llvm::verifyFunction(*function.func))
|
||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
||||
I_FatalError("verifyFunction failed for CodegenDrawWall()");
|
||||
}
|
||||
|
||||
void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns)
|
||||
|
@ -319,7 +319,7 @@ void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, i
|
|||
builder.CreateRetVoid();
|
||||
|
||||
if (llvm::verifyFunction(*function.func))
|
||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
||||
I_FatalError("verifyFunction failed for CodegenDrawSky()");
|
||||
}
|
||||
|
||||
llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
|
||||
|
@ -469,7 +469,11 @@ LLVMProgram::LLVMProgram()
|
|||
I_FatalError("Could not find LLVM target: %s", errorstring.c_str());
|
||||
|
||||
TargetOptions opt;
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
Reloc::Model relocModel = Reloc::Default;
|
||||
#else
|
||||
auto relocModel = Optional<Reloc::Model>();
|
||||
#endif
|
||||
machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive);
|
||||
if (!machine)
|
||||
I_FatalError("Could not create LLVM target machine");
|
||||
|
@ -478,7 +482,11 @@ LLVMProgram::LLVMProgram()
|
|||
|
||||
mModule = std::make_unique<Module>("render", context());
|
||||
mModule->setTargetTriple(targetTriple);
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
mModule->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout()));
|
||||
#else
|
||||
mModule->setDataLayout(machine->createDataLayout());
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
@ -489,8 +497,10 @@ void LLVMProgram::CreateEE()
|
|||
legacy::FunctionPassManager PerFunctionPasses(mModule.get());
|
||||
legacy::PassManager PerModulePasses;
|
||||
|
||||
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
|
||||
PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
|
||||
PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
|
||||
#endif
|
||||
|
||||
PassManagerBuilder passManagerBuilder;
|
||||
passManagerBuilder.OptLevel = 3;
|
||||
|
@ -532,13 +542,17 @@ std::string LLVMProgram::DumpModule()
|
|||
{
|
||||
std::string str;
|
||||
llvm::raw_string_ostream stream(str);
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
mModule->print(stream, nullptr);
|
||||
#else
|
||||
mModule->print(stream, nullptr, false, true);
|
||||
#endif
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
void *LLVMProgram::PointerToFunction(const char *name)
|
||||
{
|
||||
return reinterpret_cast<void(*)()>(mEngine->getFunctionAddress(name));
|
||||
return reinterpret_cast<void*>(mEngine->getFunctionAddress(name));
|
||||
}
|
||||
|
||||
void LLVMProgram::StopLogFatalErrors()
|
||||
|
|
|
@ -16,6 +16,7 @@ SSAForBlock::SSAForBlock()
|
|||
void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
|
||||
{
|
||||
auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block);
|
||||
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
|
||||
if (unroll_count > 0)
|
||||
{
|
||||
using namespace llvm;
|
||||
|
@ -29,6 +30,7 @@ void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
|
|||
auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count });
|
||||
branch->setMetadata(LLVMContext::MD_loop, md_loop);
|
||||
}
|
||||
#endif
|
||||
SSAScope::builder().SetInsertPoint(loop_basic_block);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ssa_int.h"
|
||||
#include "ssa_float_ptr.h"
|
||||
|
||||
class SSAPixelFormat4f
|
||||
{
|
||||
public:
|
||||
SSAPixelFormat4f() { }
|
||||
SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
||||
|
||||
SSAFloatPtr pixels() { return _pixels; }
|
||||
SSAFloatPtr pixels() const { return _pixels; }
|
||||
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return _pixels[index * 4].load_vec4f(constantScopeDomain);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
{
|
||||
_pixels[index * 4].store_vec4f(pixel);
|
||||
}
|
||||
|
||||
protected:
|
||||
SSAFloatPtr _pixels;
|
||||
};
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ssa_int.h"
|
||||
#include "ssa_ubyte_ptr.h"
|
||||
|
||||
class SSAPixelFormat4ub
|
||||
{
|
||||
public:
|
||||
SSAPixelFormat4ub() { }
|
||||
SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
||||
|
||||
SSAUBytePtr pixels() { return _pixels; }
|
||||
SSAUBytePtr pixels() const { return _pixels; }
|
||||
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
{
|
||||
_pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f));
|
||||
}
|
||||
|
||||
private:
|
||||
SSAUBytePtr _pixels;
|
||||
};
|
|
@ -1,35 +0,0 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ssa_int.h"
|
||||
#include "ssa_ubyte_ptr.h"
|
||||
|
||||
class SSAPixelFormat4ub_argb_rev
|
||||
{
|
||||
public:
|
||||
SSAPixelFormat4ub_argb_rev() { }
|
||||
SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
||||
|
||||
SSAUBytePtr pixels() { return _pixels; }
|
||||
SSAUBytePtr pixels() const { return _pixels; }
|
||||
/*
|
||||
void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const
|
||||
{
|
||||
SSAVec8s p = _pixels[index * 4].load_vec8s();
|
||||
out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
||||
out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
||||
}
|
||||
*/
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
{
|
||||
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3)));
|
||||
}
|
||||
|
||||
public:
|
||||
SSAUBytePtr _pixels;
|
||||
};
|
|
@ -1,28 +0,0 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ssa_int.h"
|
||||
#include "ssa_ubyte_ptr.h"
|
||||
|
||||
class SSAPixelFormat4ub_rev
|
||||
{
|
||||
public:
|
||||
SSAPixelFormat4ub_rev() { }
|
||||
SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
||||
|
||||
SSAUBytePtr pixels() { return _pixels; }
|
||||
SSAUBytePtr pixels() const { return _pixels; }
|
||||
|
||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
||||
{
|
||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0);
|
||||
}
|
||||
|
||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
||||
{
|
||||
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0)));
|
||||
}
|
||||
|
||||
public:
|
||||
SSAUBytePtr _pixels;
|
||||
};
|
|
@ -1,39 +0,0 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ssa_ubyte.h"
|
||||
#include "ssa_ubyte_ptr.h"
|
||||
#include "ssa_float.h"
|
||||
#include "ssa_float_ptr.h"
|
||||
#include "ssa_int.h"
|
||||
#include "ssa_pixeltype.h"
|
||||
//#include "ssa_pixelformat1f.h"
|
||||
//#include "ssa_pixelformat2f.h"
|
||||
//#include "ssa_pixelformat3f.h"
|
||||
#include "ssa_pixelformat4f.h"
|
||||
//#include "ssa_pixelformat1ub.h"
|
||||
//#include "ssa_pixelformat2ub.h"
|
||||
//#include "ssa_pixelformat3ub.h"
|
||||
//#include "ssa_pixelformat3ub_rev.h"
|
||||
#include "ssa_pixelformat4ub.h"
|
||||
//#include "ssa_pixelformat4ub_argb.h"
|
||||
#include "ssa_pixelformat4ub_rev.h"
|
||||
#include "ssa_pixelformat4ub_argb_rev.h"
|
||||
//#include "ssa_pixelformat4ub_channel.h"
|
||||
|
||||
//typedef SSAPixelType<SSAPixelFormat1f, SSAFloatPtr> SSAPixels1f;
|
||||
//typedef SSAPixelType<SSAPixelFormat2f, SSAFloatPtr> SSAPixels2f;
|
||||
//typedef SSAPixelType<SSAPixelFormat3f, SSAFloatPtr> SSAPixels3f;
|
||||
typedef SSAPixelType<SSAPixelFormat4f, SSAFloatPtr> SSAPixels4f;
|
||||
|
||||
//typedef SSAPixelType<SSAPixelFormat1ub, SSAUBytePtr> SSAPixels1ub;
|
||||
//typedef SSAPixelType<SSAPixelFormat2ub, SSAUBytePtr> SSAPixels2ub;
|
||||
//typedef SSAPixelType<SSAPixelFormat3ub, SSAUBytePtr> SSAPixels3ub;
|
||||
typedef SSAPixelType<SSAPixelFormat4ub, SSAUBytePtr> SSAPixels4ub;
|
||||
//typedef SSAPixelType<SSAPixelFormat4ub_argb, SSAUBytePtr> SSAPixels4ub_argb;
|
||||
|
||||
//typedef SSAPixelType<SSAPixelFormat3ub_rev, SSAUBytePtr> SSAPixels3ub_rev;
|
||||
typedef SSAPixelType<SSAPixelFormat4ub_rev, SSAUBytePtr> SSAPixels4ub_rev;
|
||||
typedef SSAPixelType<SSAPixelFormat4ub_argb_rev, SSAUBytePtr> SSAPixels4ub_argb_rev;
|
||||
|
||||
//typedef SSAPixelType<SSAPixelFormat4ub_channel, SSAUBytePtr> SSAPixels4ub_channel;
|
|
@ -1,498 +0,0 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ssa_int.h"
|
||||
#include "ssa_float.h"
|
||||
#include "ssa_vec4f.h"
|
||||
#include "ssa_bool.h"
|
||||
#include "ssa_if_block.h"
|
||||
#include "ssa_phi.h"
|
||||
|
||||
template<typename PixelFormat, typename PixelType>
|
||||
class SSAPixelType : public PixelFormat
|
||||
{
|
||||
public:
|
||||
SSAPixelType()
|
||||
{
|
||||
}
|
||||
|
||||
SSAPixelType(SSAInt width, SSAInt height, PixelType pixels)
|
||||
: PixelFormat(pixels, width, height), _width(width), _height(height)
|
||||
{
|
||||
_width32 = SSAVec4i(_width);
|
||||
SSAVec4i height32(_height);
|
||||
_widthps = SSAVec4f(_width32);
|
||||
_heightps = SSAVec4f(height32);
|
||||
_width16 = SSAVec8s(_width32, _width32);
|
||||
|
||||
_widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4);
|
||||
_widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4);
|
||||
}
|
||||
|
||||
SSAInt width() const { return _width; }
|
||||
SSAInt height() const { return _height; }
|
||||
SSAInt size() const { return _width * _height; }
|
||||
|
||||
SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; }
|
||||
SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; }
|
||||
//void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); }
|
||||
//void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); }
|
||||
|
||||
SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); }
|
||||
SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); }
|
||||
SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); }
|
||||
SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); }
|
||||
SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); }
|
||||
SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); }
|
||||
SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); }
|
||||
SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); }
|
||||
|
||||
static SSAInt int_min(SSAInt a, SSAInt b)
|
||||
{
|
||||
SSAPhi<SSAInt> phi;
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(a <= b);
|
||||
phi.add_incoming(a);
|
||||
branch.else_block();
|
||||
phi.add_incoming(b);
|
||||
branch.end_block();
|
||||
return phi.create();
|
||||
}
|
||||
|
||||
static SSAInt int_max(SSAInt a, SSAInt b)
|
||||
{
|
||||
SSAPhi<SSAInt> phi;
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(a >= b);
|
||||
phi.add_incoming(a);
|
||||
branch.else_block();
|
||||
phi.add_incoming(b);
|
||||
branch.end_block();
|
||||
return phi.create();
|
||||
}
|
||||
|
||||
static SSAInt clamp(SSAInt v, SSAInt size)
|
||||
{
|
||||
return int_max(int_min(v, size - 1), 0);
|
||||
}
|
||||
|
||||
static SSAInt repeat(SSAInt v, SSAInt size)
|
||||
{
|
||||
SSAPhi<SSAInt> phi;
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(v >= 0);
|
||||
phi.add_incoming(v % size);
|
||||
branch.else_block();
|
||||
phi.add_incoming(size - 1 + v % size);
|
||||
branch.end_block();
|
||||
return phi.create();
|
||||
}
|
||||
|
||||
static SSAInt mirror(SSAInt v, SSAInt size)
|
||||
{
|
||||
SSAInt size2 = size * 2;
|
||||
v = repeat(v, size2);
|
||||
|
||||
SSAPhi<SSAInt> phi;
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(v < size);
|
||||
phi.add_incoming(v);
|
||||
branch.else_block();
|
||||
phi.add_incoming(size2 - v - 1);
|
||||
branch.end_block();
|
||||
return phi.create();
|
||||
}
|
||||
|
||||
static SSAInt round(SSAFloat v)
|
||||
{
|
||||
SSAPhi<SSAFloat> phi;
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(v >= 0.0f);
|
||||
phi.add_incoming(v + 0.5f);
|
||||
branch.else_block();
|
||||
phi.add_incoming(v - 0.5f);
|
||||
branch.end_block();
|
||||
return SSAInt(phi.create());
|
||||
}
|
||||
|
||||
// To do: fix this:
|
||||
static SSAInt int_floor(SSAFloat v)
|
||||
{
|
||||
return SSAInt(v);
|
||||
}
|
||||
static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); }
|
||||
|
||||
SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); }
|
||||
void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); }
|
||||
|
||||
SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); }
|
||||
SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); }
|
||||
SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); }
|
||||
|
||||
SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const
|
||||
{
|
||||
SSAFloat a = fract(s * SSAFloat(_width) - 0.5f);
|
||||
SSAFloat b = fract(t * SSAFloat(_height) - 0.5f);
|
||||
SSAFloat inv_a = 1.0f - a;
|
||||
SSAFloat inv_b = 1.0f - b;
|
||||
return
|
||||
samples[0] * (inv_a * inv_b) +
|
||||
samples[1] * (a * inv_b) +
|
||||
samples[2] * (inv_a * b) +
|
||||
samples[3] * (a * b);
|
||||
}
|
||||
|
||||
void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
|
||||
{
|
||||
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
|
||||
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
|
||||
out_pixels[0] = get_clamp4f(x, y);
|
||||
out_pixels[1] = get_clamp4f(x + 1, y);
|
||||
out_pixels[2] = get_clamp4f(x, y + 1);
|
||||
out_pixels[3] = get_clamp4f(x + 1, y + 1);
|
||||
/*
|
||||
SSAInt x0 = clamp_x(x);
|
||||
SSAInt x1 = clamp_x(x + 1);
|
||||
SSAInt y0 = clamp_y(y);
|
||||
SSAInt y1 = clamp_y(y + 1);
|
||||
SSAInt offset0 = y0 * _width;
|
||||
SSAInt offset1 = y1 * _width;
|
||||
SSAPhi<SSAVec4f> phi0;
|
||||
SSAPhi<SSAVec4f> phi1;
|
||||
SSAPhi<SSAVec4f> phi2;
|
||||
SSAPhi<SSAVec4f> phi3;
|
||||
SSAIfBlock if0;
|
||||
if0.if_block(x0 + 1 == x1);
|
||||
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
|
||||
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
|
||||
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
|
||||
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
|
||||
if0.else_block();
|
||||
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
|
||||
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
|
||||
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
|
||||
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
|
||||
if0.end_block();
|
||||
out_pixels[0] = phi0.create();
|
||||
out_pixels[1] = phi1.create();
|
||||
out_pixels[2] = phi2.create();
|
||||
out_pixels[3] = phi3.create();
|
||||
*/
|
||||
}
|
||||
|
||||
void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
|
||||
{
|
||||
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
|
||||
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
|
||||
out_pixels[0] = get_repeat4f(x, y);
|
||||
out_pixels[1] = get_repeat4f(x + 1, y);
|
||||
out_pixels[2] = get_repeat4f(x, y + 1);
|
||||
out_pixels[3] = get_repeat4f(x + 1, y + 1);
|
||||
}
|
||||
|
||||
void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
|
||||
{
|
||||
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
|
||||
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
|
||||
out_pixels[0] = get_mirror4f(x, y);
|
||||
out_pixels[1] = get_mirror4f(x + 1, y);
|
||||
out_pixels[2] = get_mirror4f(x, y + 1);
|
||||
out_pixels[3] = get_mirror4f(x + 1, y + 1);
|
||||
}
|
||||
|
||||
SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); }
|
||||
SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); }
|
||||
SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); }
|
||||
|
||||
SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const
|
||||
{
|
||||
SSAVec4f samples[4];
|
||||
gather_clamp4f(s, t, samples);
|
||||
return linear_interpolate4f(s, t, samples);
|
||||
}
|
||||
|
||||
SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const
|
||||
{
|
||||
SSAVec4f samples[4];
|
||||
gather_repeat4f(s, t, samples);
|
||||
return linear_interpolate4f(s, t, samples);
|
||||
}
|
||||
|
||||
SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const
|
||||
{
|
||||
SSAVec4f samples[4];
|
||||
gather_mirror4f(s, t, samples);
|
||||
return linear_interpolate4f(s, t, samples);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Packed versions:
|
||||
|
||||
SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); }
|
||||
SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); }
|
||||
SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); }
|
||||
SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); }
|
||||
SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); }
|
||||
SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); }
|
||||
SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); }
|
||||
SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); }
|
||||
|
||||
static SSAVec4i clamp(SSAVec4i v, SSAInt size)
|
||||
{
|
||||
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0);
|
||||
}
|
||||
|
||||
static SSAVec4i repeat(SSAVec4i v, SSAInt size)
|
||||
{
|
||||
return clamp(v, size);
|
||||
/*SSAPhi<SSAInt> phi;
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(v >= 0);
|
||||
phi.add_incoming(v % size);
|
||||
branch.else_block();
|
||||
phi.add_incoming(size - 1 + v % size);
|
||||
branch.end_block();
|
||||
return phi.create();*/
|
||||
}
|
||||
|
||||
static SSAVec4i mirror(SSAVec4i v, SSAInt size)
|
||||
{
|
||||
return clamp(v, size);
|
||||
/*SSAInt size2 = size * 2;
|
||||
v = repeat(v, size2);
|
||||
|
||||
SSAPhi<SSAInt> phi;
|
||||
SSAIfBlock branch;
|
||||
branch.if_block(v < size);
|
||||
phi.add_incoming(v);
|
||||
branch.else_block();
|
||||
phi.add_incoming(size2 - v - 1);
|
||||
branch.end_block();
|
||||
return phi.create();*/
|
||||
}
|
||||
|
||||
static SSAVec4i round(SSAVec4f v)
|
||||
{
|
||||
// Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs)
|
||||
SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000);
|
||||
SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f)));
|
||||
return v + signed_half;
|
||||
}
|
||||
|
||||
static SSAVec4i int_floor(SSAVec4f v)
|
||||
{
|
||||
return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31);
|
||||
}
|
||||
|
||||
static SSAVec4f fract(SSAVec4f v)
|
||||
{
|
||||
// return v - SSAVec4f::floor_sse4(v);
|
||||
return v - SSAVec4f(int_floor(v));
|
||||
}
|
||||
|
||||
template<typename WrapXFunctor, typename WrapYFunctor>
|
||||
SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
|
||||
{
|
||||
SSAVec4i x = int_floor(s * _widthps - 0.5f);
|
||||
SSAVec4i y = int_floor(t * _heightps - 0.5f);
|
||||
SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y));
|
||||
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
|
||||
SSAVec8s offsetlo = y16 * _width16;
|
||||
SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x;
|
||||
return PixelFormat::get4f(offset[index]);
|
||||
}
|
||||
|
||||
SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
|
||||
{
|
||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
|
||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
|
||||
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
|
||||
/*
|
||||
return nearest_helper4f(
|
||||
s, t, index,
|
||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
|
||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
|
||||
*/
|
||||
}
|
||||
|
||||
SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
|
||||
{
|
||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
|
||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
|
||||
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
|
||||
/*
|
||||
return nearest_helper4f(
|
||||
s, t, index,
|
||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
|
||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
|
||||
*/
|
||||
}
|
||||
|
||||
SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
|
||||
{
|
||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
|
||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
|
||||
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
|
||||
/*
|
||||
return nearest_helper4f(
|
||||
s, t, index,
|
||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
|
||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
|
||||
*/
|
||||
}
|
||||
|
||||
template<typename WrapXFunctor, typename WrapYFunctor>
|
||||
void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
|
||||
{
|
||||
SSAVec4i x = int_floor(s * _widthps - 0.5f);
|
||||
SSAVec4i y = int_floor(t * _heightps - 0.5f);
|
||||
SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y));
|
||||
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
|
||||
SSAVec8s offsetlo = y16 * _width16;
|
||||
SSAVec4i x0 = wrap_x(x);
|
||||
SSAVec4i x1 = wrap_x(x + 1);
|
||||
SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi);
|
||||
SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi);
|
||||
SSAVec4i offset0 = x0 + line0;
|
||||
SSAVec4i offset1 = x1 + line0;
|
||||
SSAVec4i offset2 = x0 + line1;
|
||||
SSAVec4i offset3 = x1 + line1;
|
||||
out_pixels[0] = PixelFormat::get4f(offset0[index]);
|
||||
out_pixels[1] = PixelFormat::get4f(offset1[index]);
|
||||
out_pixels[2] = PixelFormat::get4f(offset2[index]);
|
||||
out_pixels[3] = PixelFormat::get4f(offset3[index]);
|
||||
}
|
||||
|
||||
void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
|
||||
{
|
||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
|
||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
|
||||
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
|
||||
/*
|
||||
gather_helper4f(
|
||||
s, t, index, out_pixels,
|
||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
|
||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
|
||||
*/
|
||||
}
|
||||
|
||||
void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
|
||||
{
|
||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
|
||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
|
||||
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
|
||||
/*
|
||||
gather_helper4f(
|
||||
s, t, index, out_pixels,
|
||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
|
||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
|
||||
*/
|
||||
}
|
||||
|
||||
void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
|
||||
{
|
||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
|
||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
|
||||
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
|
||||
/*
|
||||
gather_helper4f(
|
||||
s, t, index, out_pixels,
|
||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
|
||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
|
||||
*/
|
||||
}
|
||||
|
||||
SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
|
||||
{
|
||||
SSAScopeHint hint("linearclamp");
|
||||
SSAVec4f samples[4];
|
||||
gather_clamp4f(s, t, index, samples);
|
||||
return linear_interpolate4f(s, t, index, samples);
|
||||
}
|
||||
|
||||
SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
|
||||
{
|
||||
SSAVec4f samples[4];
|
||||
gather_repeat4f(s, t, index, samples);
|
||||
return linear_interpolate4f(s, t, index, samples);
|
||||
}
|
||||
|
||||
SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
|
||||
{
|
||||
SSAVec4f samples[4];
|
||||
gather_mirror4f(s, t, index, samples);
|
||||
return linear_interpolate4f(s, t, index, samples);
|
||||
}
|
||||
|
||||
SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const
|
||||
{
|
||||
SSAVec4f a = fract(s * _widthps - 0.5f);
|
||||
SSAVec4f b = fract(t * _heightps - 0.5f);
|
||||
SSAVec4f inv_a = 1.0f - a;
|
||||
SSAVec4f inv_b = 1.0f - b;
|
||||
return
|
||||
samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) +
|
||||
samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) +
|
||||
samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) +
|
||||
samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SSAVec4i clamp(SSAVec4i sstt) const
|
||||
{
|
||||
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0);
|
||||
}
|
||||
|
||||
template<typename WrapFunctor>
|
||||
void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const
|
||||
{
|
||||
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
|
||||
SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1));
|
||||
SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7);
|
||||
SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3);
|
||||
out_pixels[0] = PixelFormat::get4f(offsets[0]);
|
||||
out_pixels[1] = PixelFormat::get4f(offsets[1]);
|
||||
out_pixels[2] = PixelFormat::get4f(offsets[2]);
|
||||
out_pixels[3] = PixelFormat::get4f(offsets[3]);
|
||||
}
|
||||
|
||||
void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const
|
||||
{
|
||||
struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; };
|
||||
return gather_helper4f(st, out_pixels, Wrap(this));
|
||||
}
|
||||
|
||||
SSAVec4f linear_clamp4f(SSAVec4f st) const
|
||||
{
|
||||
SSAScopeHint hint("linearclamp");
|
||||
SSAVec4f samples[4];
|
||||
gather_clamp4f(st, samples);
|
||||
return linear_interpolate4f(st, samples);
|
||||
}
|
||||
|
||||
SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const
|
||||
{
|
||||
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
|
||||
SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f);
|
||||
SSAVec4f inv_aabb = 1.0f - aabb;
|
||||
SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6);
|
||||
SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0);
|
||||
return
|
||||
samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) +
|
||||
samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) +
|
||||
samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) +
|
||||
samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
SSAInt _width;
|
||||
SSAInt _height;
|
||||
SSAVec4i _width32;
|
||||
SSAVec8s _width16;
|
||||
SSAVec4f _widthps;
|
||||
SSAVec4f _heightps;
|
||||
|
||||
SSAVec4i _widthheight;
|
||||
SSAVec4f _widthheightps;
|
||||
};
|
|
@ -41,12 +41,12 @@ llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm:
|
|||
return func;
|
||||
}
|
||||
|
||||
llvm::Value *SSAScope::alloca(llvm::Type *type)
|
||||
llvm::Value *SSAScope::alloc_stack(llvm::Type *type)
|
||||
{
|
||||
return alloca(type, SSAInt(1));
|
||||
return alloc_stack(type, SSAInt(1));
|
||||
}
|
||||
|
||||
llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size)
|
||||
llvm::Value *SSAScope::alloc_stack(llvm::Type *type, SSAInt size)
|
||||
{
|
||||
// Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work
|
||||
llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock();
|
||||
|
|
|
@ -12,8 +12,8 @@ public:
|
|||
static llvm::Module *module();
|
||||
static llvm::IRBuilder<> &builder();
|
||||
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
|
||||
static llvm::Value *alloca(llvm::Type *type);
|
||||
static llvm::Value *alloca(llvm::Type *type, SSAInt size);
|
||||
static llvm::Value *alloc_stack(llvm::Type *type);
|
||||
static llvm::Value *alloc_stack(llvm::Type *type, SSAInt size);
|
||||
static llvm::MDNode *constant_scope_list();
|
||||
static const std::string &hint();
|
||||
static void set_hint(const std::string &hint);
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "ssa_scope.h"
|
||||
|
||||
template<typename SSAVariable>
|
||||
class SSAStack
|
||||
{
|
||||
|
@ -8,7 +10,7 @@ public:
|
|||
SSAStack()
|
||||
: v(0)
|
||||
{
|
||||
v = SSAScope::alloca(SSAVariable::llvm_type());
|
||||
v = SSAScope::alloc_stack(SSAVariable::llvm_type());
|
||||
}
|
||||
|
||||
SSAVariable load() const
|
||||
|
|
|
@ -55,10 +55,17 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
|
|||
std::vector<llvm::Constant*> constants;
|
||||
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
|
||||
v = llvm::ConstantVector::get(constants);
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
v = SSAScope::builder().CreateInsertElement(v, i0.v, SSAInt(0).v, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i1.v, SSAInt(1).v, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i2.v, SSAInt(2).v, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i3.v, SSAInt(3).v, SSAScope::hint());
|
||||
#else
|
||||
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
|
||||
#endif
|
||||
}
|
||||
|
||||
SSAVec4i::SSAVec4i(SSAVec4f f32)
|
||||
|
@ -84,7 +91,11 @@ SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value)
|
|||
|
||||
SSAVec4i SSAVec4i::insert(int index, SSAInt value)
|
||||
{
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, SSAInt(index).v, SSAScope::hint()));
|
||||
#else
|
||||
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint()));
|
||||
#endif
|
||||
}
|
||||
|
||||
SSAVec4i SSAVec4i::insert(int index, int value)
|
||||
|
|
Loading…
Reference in a new issue