macOS compile fixes and support for slightly older versions of LLVM (to allow Mac users to brew install llvm)

This commit is contained in:
Magnus Norddahl 2016-10-17 00:19:07 +02:00
parent dfed525e18
commit d654301bc2
14 changed files with 55 additions and 669 deletions

View file

@ -13,8 +13,8 @@
#include "r_compiler/ssa/ssa_ubyte_ptr.h"
#include "r_compiler/ssa/ssa_vec4f_ptr.h"
#include "r_compiler/ssa/ssa_vec4i_ptr.h"
#include "r_compiler/ssa/ssa_pixels.h"
#include "r_compiler/ssa/ssa_stack.h"
#include "r_compiler/ssa/ssa_bool.h"
#include "r_compiler/ssa/ssa_barycentric_weight.h"
#include "r_compiler/llvm_include.h"

View file

@ -25,12 +25,19 @@
#endif
#ifdef __APPLE__
#define __STDC_LIMIT_MACROS // DataTypes.h:57:3: error: "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
#define __STDC_CONSTANT_MACROS // DataTypes.h:61:3: error: "Must #define __STDC_CONSTANT_MACROS before " "#including Support/DataTypes.h"
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wredundant-move"
#endif
#include <llvm/IR/DerivedTypes.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/Verifier.h>
#include <llvm/IR/PassManager.h>
//#include <llvm/IR/PassManager.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Intrinsics.h>
@ -43,8 +50,14 @@
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/Host.h>
#include <llvm/CodeGen/AsmPrinter.h>
#include <llvm/MC/MCAsmInfo.h>
#include <llvm/Target/TargetSubtargetInfo.h>
#ifdef __APPLE__
#pragma clang diagnostic pop
#endif
#ifdef _MSC_VER

View file

@ -263,7 +263,7 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__);
I_FatalError("verifyFunction failed for CodegenDrawColumn()");
}
void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
@ -281,7 +281,7 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__);
I_FatalError("verifyFunction failed for CodegenDrawSpan()");
}
void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns)
@ -300,7 +300,7 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant,
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__);
I_FatalError("verifyFunction failed for CodegenDrawWall()");
}
void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns)
@ -319,7 +319,7 @@ void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, i
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__);
I_FatalError("verifyFunction failed for CodegenDrawSky()");
}
llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
@ -469,7 +469,11 @@ LLVMProgram::LLVMProgram()
I_FatalError("Could not find LLVM target: %s", errorstring.c_str());
TargetOptions opt;
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
Reloc::Model relocModel = Reloc::Default;
#else
auto relocModel = Optional<Reloc::Model>();
#endif
machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive);
if (!machine)
I_FatalError("Could not create LLVM target machine");
@ -478,7 +482,11 @@ LLVMProgram::LLVMProgram()
mModule = std::make_unique<Module>("render", context());
mModule->setTargetTriple(targetTriple);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
mModule->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout()));
#else
mModule->setDataLayout(machine->createDataLayout());
#endif
}
@ -489,8 +497,10 @@ void LLVMProgram::CreateEE()
legacy::FunctionPassManager PerFunctionPasses(mModule.get());
legacy::PassManager PerModulePasses;
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
#endif
PassManagerBuilder passManagerBuilder;
passManagerBuilder.OptLevel = 3;
@ -532,13 +542,17 @@ std::string LLVMProgram::DumpModule()
{
std::string str;
llvm::raw_string_ostream stream(str);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
mModule->print(stream, nullptr);
#else
mModule->print(stream, nullptr, false, true);
#endif
return stream.str();
}
void *LLVMProgram::PointerToFunction(const char *name)
{
return reinterpret_cast<void(*)()>(mEngine->getFunctionAddress(name));
return reinterpret_cast<void*>(mEngine->getFunctionAddress(name));
}
void LLVMProgram::StopLogFatalErrors()

View file

@ -16,6 +16,7 @@ SSAForBlock::SSAForBlock()
void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
{
auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block);
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
if (unroll_count > 0)
{
using namespace llvm;
@ -29,6 +30,7 @@ void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count });
branch->setMetadata(LLVMContext::MD_loop, md_loop);
}
#endif
SSAScope::builder().SetInsertPoint(loop_basic_block);
}

View file

@ -1,28 +0,0 @@
#pragma once
#include "ssa_int.h"
#include "ssa_float_ptr.h"
class SSAPixelFormat4f
{
public:
SSAPixelFormat4f() { }
SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAFloatPtr pixels() { return _pixels; }
SSAFloatPtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return _pixels[index * 4].load_vec4f(constantScopeDomain);
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4f(pixel);
}
protected:
SSAFloatPtr _pixels;
};

View file

@ -1,28 +0,0 @@
#pragma once
#include "ssa_int.h"
#include "ssa_ubyte_ptr.h"
class SSAPixelFormat4ub
{
public:
SSAPixelFormat4ub() { }
SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f);
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f));
}
private:
SSAUBytePtr _pixels;
};

View file

@ -1,35 +0,0 @@
#pragma once
#include "ssa_int.h"
#include "ssa_ubyte_ptr.h"
class SSAPixelFormat4ub_argb_rev
{
public:
SSAPixelFormat4ub_argb_rev() { }
SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
/*
void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const
{
SSAVec8s p = _pixels[index * 4].load_vec8s();
out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
}
*/
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3);
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3)));
}
public:
SSAUBytePtr _pixels;
};

View file

@ -1,28 +0,0 @@
#pragma once
#include "ssa_int.h"
#include "ssa_ubyte_ptr.h"
class SSAPixelFormat4ub_rev
{
public:
SSAPixelFormat4ub_rev() { }
SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
{
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0);
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0)));
}
public:
SSAUBytePtr _pixels;
};

View file

@ -1,39 +0,0 @@
#pragma once
#include "ssa_ubyte.h"
#include "ssa_ubyte_ptr.h"
#include "ssa_float.h"
#include "ssa_float_ptr.h"
#include "ssa_int.h"
#include "ssa_pixeltype.h"
//#include "ssa_pixelformat1f.h"
//#include "ssa_pixelformat2f.h"
//#include "ssa_pixelformat3f.h"
#include "ssa_pixelformat4f.h"
//#include "ssa_pixelformat1ub.h"
//#include "ssa_pixelformat2ub.h"
//#include "ssa_pixelformat3ub.h"
//#include "ssa_pixelformat3ub_rev.h"
#include "ssa_pixelformat4ub.h"
//#include "ssa_pixelformat4ub_argb.h"
#include "ssa_pixelformat4ub_rev.h"
#include "ssa_pixelformat4ub_argb_rev.h"
//#include "ssa_pixelformat4ub_channel.h"
//typedef SSAPixelType<SSAPixelFormat1f, SSAFloatPtr> SSAPixels1f;
//typedef SSAPixelType<SSAPixelFormat2f, SSAFloatPtr> SSAPixels2f;
//typedef SSAPixelType<SSAPixelFormat3f, SSAFloatPtr> SSAPixels3f;
typedef SSAPixelType<SSAPixelFormat4f, SSAFloatPtr> SSAPixels4f;
//typedef SSAPixelType<SSAPixelFormat1ub, SSAUBytePtr> SSAPixels1ub;
//typedef SSAPixelType<SSAPixelFormat2ub, SSAUBytePtr> SSAPixels2ub;
//typedef SSAPixelType<SSAPixelFormat3ub, SSAUBytePtr> SSAPixels3ub;
typedef SSAPixelType<SSAPixelFormat4ub, SSAUBytePtr> SSAPixels4ub;
//typedef SSAPixelType<SSAPixelFormat4ub_argb, SSAUBytePtr> SSAPixels4ub_argb;
//typedef SSAPixelType<SSAPixelFormat3ub_rev, SSAUBytePtr> SSAPixels3ub_rev;
typedef SSAPixelType<SSAPixelFormat4ub_rev, SSAUBytePtr> SSAPixels4ub_rev;
typedef SSAPixelType<SSAPixelFormat4ub_argb_rev, SSAUBytePtr> SSAPixels4ub_argb_rev;
//typedef SSAPixelType<SSAPixelFormat4ub_channel, SSAUBytePtr> SSAPixels4ub_channel;

View file

@ -1,498 +0,0 @@
#pragma once
#include "ssa_int.h"
#include "ssa_float.h"
#include "ssa_vec4f.h"
#include "ssa_bool.h"
#include "ssa_if_block.h"
#include "ssa_phi.h"
template<typename PixelFormat, typename PixelType>
class SSAPixelType : public PixelFormat
{
public:
SSAPixelType()
{
}
SSAPixelType(SSAInt width, SSAInt height, PixelType pixels)
: PixelFormat(pixels, width, height), _width(width), _height(height)
{
_width32 = SSAVec4i(_width);
SSAVec4i height32(_height);
_widthps = SSAVec4f(_width32);
_heightps = SSAVec4f(height32);
_width16 = SSAVec8s(_width32, _width32);
_widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4);
_widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4);
}
SSAInt width() const { return _width; }
SSAInt height() const { return _height; }
SSAInt size() const { return _width * _height; }
SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; }
SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; }
//void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); }
//void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); }
SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); }
SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); }
SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); }
SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); }
SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); }
SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); }
SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); }
SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); }
static SSAInt int_min(SSAInt a, SSAInt b)
{
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(a <= b);
phi.add_incoming(a);
branch.else_block();
phi.add_incoming(b);
branch.end_block();
return phi.create();
}
static SSAInt int_max(SSAInt a, SSAInt b)
{
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(a >= b);
phi.add_incoming(a);
branch.else_block();
phi.add_incoming(b);
branch.end_block();
return phi.create();
}
static SSAInt clamp(SSAInt v, SSAInt size)
{
return int_max(int_min(v, size - 1), 0);
}
static SSAInt repeat(SSAInt v, SSAInt size)
{
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v >= 0);
phi.add_incoming(v % size);
branch.else_block();
phi.add_incoming(size - 1 + v % size);
branch.end_block();
return phi.create();
}
static SSAInt mirror(SSAInt v, SSAInt size)
{
SSAInt size2 = size * 2;
v = repeat(v, size2);
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v < size);
phi.add_incoming(v);
branch.else_block();
phi.add_incoming(size2 - v - 1);
branch.end_block();
return phi.create();
}
static SSAInt round(SSAFloat v)
{
SSAPhi<SSAFloat> phi;
SSAIfBlock branch;
branch.if_block(v >= 0.0f);
phi.add_incoming(v + 0.5f);
branch.else_block();
phi.add_incoming(v - 0.5f);
branch.end_block();
return SSAInt(phi.create());
}
// To do: fix this:
static SSAInt int_floor(SSAFloat v)
{
return SSAInt(v);
}
static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); }
SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); }
void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); }
SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); }
SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); }
SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); }
SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const
{
SSAFloat a = fract(s * SSAFloat(_width) - 0.5f);
SSAFloat b = fract(t * SSAFloat(_height) - 0.5f);
SSAFloat inv_a = 1.0f - a;
SSAFloat inv_b = 1.0f - b;
return
samples[0] * (inv_a * inv_b) +
samples[1] * (a * inv_b) +
samples[2] * (inv_a * b) +
samples[3] * (a * b);
}
void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
{
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
out_pixels[0] = get_clamp4f(x, y);
out_pixels[1] = get_clamp4f(x + 1, y);
out_pixels[2] = get_clamp4f(x, y + 1);
out_pixels[3] = get_clamp4f(x + 1, y + 1);
/*
SSAInt x0 = clamp_x(x);
SSAInt x1 = clamp_x(x + 1);
SSAInt y0 = clamp_y(y);
SSAInt y1 = clamp_y(y + 1);
SSAInt offset0 = y0 * _width;
SSAInt offset1 = y1 * _width;
SSAPhi<SSAVec4f> phi0;
SSAPhi<SSAVec4f> phi1;
SSAPhi<SSAVec4f> phi2;
SSAPhi<SSAVec4f> phi3;
SSAIfBlock if0;
if0.if_block(x0 + 1 == x1);
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
if0.else_block();
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
if0.end_block();
out_pixels[0] = phi0.create();
out_pixels[1] = phi1.create();
out_pixels[2] = phi2.create();
out_pixels[3] = phi3.create();
*/
}
void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
{
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
out_pixels[0] = get_repeat4f(x, y);
out_pixels[1] = get_repeat4f(x + 1, y);
out_pixels[2] = get_repeat4f(x, y + 1);
out_pixels[3] = get_repeat4f(x + 1, y + 1);
}
void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
{
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
out_pixels[0] = get_mirror4f(x, y);
out_pixels[1] = get_mirror4f(x + 1, y);
out_pixels[2] = get_mirror4f(x, y + 1);
out_pixels[3] = get_mirror4f(x + 1, y + 1);
}
SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); }
SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); }
SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); }
SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const
{
SSAVec4f samples[4];
gather_clamp4f(s, t, samples);
return linear_interpolate4f(s, t, samples);
}
SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const
{
SSAVec4f samples[4];
gather_repeat4f(s, t, samples);
return linear_interpolate4f(s, t, samples);
}
SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const
{
SSAVec4f samples[4];
gather_mirror4f(s, t, samples);
return linear_interpolate4f(s, t, samples);
}
/////////////////////////////////////////////////////////////////////////
// Packed versions:
SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); }
SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); }
SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); }
SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); }
SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); }
SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); }
SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); }
SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); }
static SSAVec4i clamp(SSAVec4i v, SSAInt size)
{
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0);
}
static SSAVec4i repeat(SSAVec4i v, SSAInt size)
{
return clamp(v, size);
/*SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v >= 0);
phi.add_incoming(v % size);
branch.else_block();
phi.add_incoming(size - 1 + v % size);
branch.end_block();
return phi.create();*/
}
static SSAVec4i mirror(SSAVec4i v, SSAInt size)
{
return clamp(v, size);
/*SSAInt size2 = size * 2;
v = repeat(v, size2);
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v < size);
phi.add_incoming(v);
branch.else_block();
phi.add_incoming(size2 - v - 1);
branch.end_block();
return phi.create();*/
}
static SSAVec4i round(SSAVec4f v)
{
// Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs)
SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000);
SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f)));
return v + signed_half;
}
static SSAVec4i int_floor(SSAVec4f v)
{
return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31);
}
static SSAVec4f fract(SSAVec4f v)
{
// return v - SSAVec4f::floor_sse4(v);
return v - SSAVec4f(int_floor(v));
}
template<typename WrapXFunctor, typename WrapYFunctor>
SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
{
SSAVec4i x = int_floor(s * _widthps - 0.5f);
SSAVec4i y = int_floor(t * _heightps - 0.5f);
SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y));
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
SSAVec8s offsetlo = y16 * _width16;
SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x;
return PixelFormat::get4f(offset[index]);
}
SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
/*
return nearest_helper4f(
s, t, index,
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
*/
}
SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
/*
return nearest_helper4f(
s, t, index,
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
*/
}
SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
/*
return nearest_helper4f(
s, t, index,
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
*/
}
template<typename WrapXFunctor, typename WrapYFunctor>
void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
{
SSAVec4i x = int_floor(s * _widthps - 0.5f);
SSAVec4i y = int_floor(t * _heightps - 0.5f);
SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y));
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
SSAVec8s offsetlo = y16 * _width16;
SSAVec4i x0 = wrap_x(x);
SSAVec4i x1 = wrap_x(x + 1);
SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi);
SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi);
SSAVec4i offset0 = x0 + line0;
SSAVec4i offset1 = x1 + line0;
SSAVec4i offset2 = x0 + line1;
SSAVec4i offset3 = x1 + line1;
out_pixels[0] = PixelFormat::get4f(offset0[index]);
out_pixels[1] = PixelFormat::get4f(offset1[index]);
out_pixels[2] = PixelFormat::get4f(offset2[index]);
out_pixels[3] = PixelFormat::get4f(offset3[index]);
}
void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
/*
gather_helper4f(
s, t, index, out_pixels,
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
*/
}
void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
/*
gather_helper4f(
s, t, index, out_pixels,
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
*/
}
void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
/*
gather_helper4f(
s, t, index, out_pixels,
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
*/
}
SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
{
SSAScopeHint hint("linearclamp");
SSAVec4f samples[4];
gather_clamp4f(s, t, index, samples);
return linear_interpolate4f(s, t, index, samples);
}
SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
{
SSAVec4f samples[4];
gather_repeat4f(s, t, index, samples);
return linear_interpolate4f(s, t, index, samples);
}
SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
{
SSAVec4f samples[4];
gather_mirror4f(s, t, index, samples);
return linear_interpolate4f(s, t, index, samples);
}
SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const
{
SSAVec4f a = fract(s * _widthps - 0.5f);
SSAVec4f b = fract(t * _heightps - 0.5f);
SSAVec4f inv_a = 1.0f - a;
SSAVec4f inv_b = 1.0f - b;
return
samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) +
samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) +
samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) +
samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index);
}
/////////////////////////////////////////////////////////////////////////
SSAVec4i clamp(SSAVec4i sstt) const
{
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0);
}
template<typename WrapFunctor>
void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const
{
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1));
SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7);
SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3);
out_pixels[0] = PixelFormat::get4f(offsets[0]);
out_pixels[1] = PixelFormat::get4f(offsets[1]);
out_pixels[2] = PixelFormat::get4f(offsets[2]);
out_pixels[3] = PixelFormat::get4f(offsets[3]);
}
void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const
{
struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; };
return gather_helper4f(st, out_pixels, Wrap(this));
}
SSAVec4f linear_clamp4f(SSAVec4f st) const
{
SSAScopeHint hint("linearclamp");
SSAVec4f samples[4];
gather_clamp4f(st, samples);
return linear_interpolate4f(st, samples);
}
SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const
{
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f);
SSAVec4f inv_aabb = 1.0f - aabb;
SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6);
SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0);
return
samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) +
samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) +
samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) +
samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0);
}
public:
SSAInt _width;
SSAInt _height;
SSAVec4i _width32;
SSAVec8s _width16;
SSAVec4f _widthps;
SSAVec4f _heightps;
SSAVec4i _widthheight;
SSAVec4f _widthheightps;
};

View file

@ -41,12 +41,12 @@ llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm:
return func;
}
llvm::Value *SSAScope::alloca(llvm::Type *type)
llvm::Value *SSAScope::alloc_stack(llvm::Type *type)
{
return alloca(type, SSAInt(1));
return alloc_stack(type, SSAInt(1));
}
llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size)
llvm::Value *SSAScope::alloc_stack(llvm::Type *type, SSAInt size)
{
// Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work
llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock();

View file

@ -12,8 +12,8 @@ public:
static llvm::Module *module();
static llvm::IRBuilder<> &builder();
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
static llvm::Value *alloca(llvm::Type *type);
static llvm::Value *alloca(llvm::Type *type, SSAInt size);
static llvm::Value *alloc_stack(llvm::Type *type);
static llvm::Value *alloc_stack(llvm::Type *type, SSAInt size);
static llvm::MDNode *constant_scope_list();
static const std::string &hint();
static void set_hint(const std::string &hint);

View file

@ -1,6 +1,8 @@
#pragma once
#include "ssa_scope.h"
template<typename SSAVariable>
class SSAStack
{
@ -8,7 +10,7 @@ public:
SSAStack()
: v(0)
{
v = SSAScope::alloca(SSAVariable::llvm_type());
v = SSAScope::alloc_stack(SSAVariable::llvm_type());
}
SSAVariable load() const

View file

@ -55,10 +55,17 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
v = llvm::ConstantVector::get(constants);
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
v = SSAScope::builder().CreateInsertElement(v, i0.v, SSAInt(0).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i1.v, SSAInt(1).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i2.v, SSAInt(2).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i3.v, SSAInt(3).v, SSAScope::hint());
#else
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
#endif
}
SSAVec4i::SSAVec4i(SSAVec4f f32)
@ -84,7 +91,11 @@ SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value)
SSAVec4i SSAVec4i::insert(int index, SSAInt value)
{
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, SSAInt(index).v, SSAScope::hint()));
#else
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint()));
#endif
}
SSAVec4i SSAVec4i::insert(int index, int value)