mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-11 23:32:02 +00:00
macOS compile fixes and support for slightly older versions of LLVM (to allow Mac users to brew install llvm)
This commit is contained in:
parent
dfed525e18
commit
d654301bc2
14 changed files with 55 additions and 669 deletions
|
@ -13,8 +13,8 @@
|
||||||
#include "r_compiler/ssa/ssa_ubyte_ptr.h"
|
#include "r_compiler/ssa/ssa_ubyte_ptr.h"
|
||||||
#include "r_compiler/ssa/ssa_vec4f_ptr.h"
|
#include "r_compiler/ssa/ssa_vec4f_ptr.h"
|
||||||
#include "r_compiler/ssa/ssa_vec4i_ptr.h"
|
#include "r_compiler/ssa/ssa_vec4i_ptr.h"
|
||||||
#include "r_compiler/ssa/ssa_pixels.h"
|
|
||||||
#include "r_compiler/ssa/ssa_stack.h"
|
#include "r_compiler/ssa/ssa_stack.h"
|
||||||
|
#include "r_compiler/ssa/ssa_bool.h"
|
||||||
#include "r_compiler/ssa/ssa_barycentric_weight.h"
|
#include "r_compiler/ssa/ssa_barycentric_weight.h"
|
||||||
#include "r_compiler/llvm_include.h"
|
#include "r_compiler/llvm_include.h"
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,19 @@
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define __STDC_LIMIT_MACROS // DataTypes.h:57:3: error: "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
|
||||||
|
#define __STDC_CONSTANT_MACROS // DataTypes.h:61:3: error: "Must #define __STDC_CONSTANT_MACROS before " "#including Support/DataTypes.h"
|
||||||
|
#pragma clang diagnostic push
|
||||||
|
#pragma clang diagnostic ignored "-Wredundant-move"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <llvm/IR/DerivedTypes.h>
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
#include <llvm/IR/LLVMContext.h>
|
#include <llvm/IR/LLVMContext.h>
|
||||||
#include <llvm/IR/Module.h>
|
#include <llvm/IR/Module.h>
|
||||||
#include <llvm/IR/Attributes.h>
|
#include <llvm/IR/Attributes.h>
|
||||||
#include <llvm/IR/Verifier.h>
|
#include <llvm/IR/Verifier.h>
|
||||||
#include <llvm/IR/PassManager.h>
|
//#include <llvm/IR/PassManager.h>
|
||||||
#include <llvm/IR/LegacyPassManager.h>
|
#include <llvm/IR/LegacyPassManager.h>
|
||||||
#include <llvm/IR/IRBuilder.h>
|
#include <llvm/IR/IRBuilder.h>
|
||||||
#include <llvm/IR/Intrinsics.h>
|
#include <llvm/IR/Intrinsics.h>
|
||||||
|
@ -43,8 +50,14 @@
|
||||||
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
|
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
|
||||||
#include <llvm/Support/TargetSelect.h>
|
#include <llvm/Support/TargetSelect.h>
|
||||||
#include <llvm/Support/TargetRegistry.h>
|
#include <llvm/Support/TargetRegistry.h>
|
||||||
|
#include <llvm/Support/Host.h>
|
||||||
#include <llvm/CodeGen/AsmPrinter.h>
|
#include <llvm/CodeGen/AsmPrinter.h>
|
||||||
#include <llvm/MC/MCAsmInfo.h>
|
#include <llvm/MC/MCAsmInfo.h>
|
||||||
|
#include <llvm/Target/TargetSubtargetInfo.h>
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
|
|
@ -263,7 +263,7 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari
|
||||||
builder.CreateRetVoid();
|
builder.CreateRetVoid();
|
||||||
|
|
||||||
if (llvm::verifyFunction(*function.func))
|
if (llvm::verifyFunction(*function.func))
|
||||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
I_FatalError("verifyFunction failed for CodegenDrawColumn()");
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
|
void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
|
||||||
|
@ -281,7 +281,7 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
|
||||||
builder.CreateRetVoid();
|
builder.CreateRetVoid();
|
||||||
|
|
||||||
if (llvm::verifyFunction(*function.func))
|
if (llvm::verifyFunction(*function.func))
|
||||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
I_FatalError("verifyFunction failed for CodegenDrawSpan()");
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns)
|
void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns)
|
||||||
|
@ -300,7 +300,7 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant,
|
||||||
builder.CreateRetVoid();
|
builder.CreateRetVoid();
|
||||||
|
|
||||||
if (llvm::verifyFunction(*function.func))
|
if (llvm::verifyFunction(*function.func))
|
||||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
I_FatalError("verifyFunction failed for CodegenDrawWall()");
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns)
|
void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns)
|
||||||
|
@ -319,7 +319,7 @@ void LLVMDrawersImpl::CodegenDrawSky(const char *name, DrawSkyVariant variant, i
|
||||||
builder.CreateRetVoid();
|
builder.CreateRetVoid();
|
||||||
|
|
||||||
if (llvm::verifyFunction(*function.func))
|
if (llvm::verifyFunction(*function.func))
|
||||||
I_FatalError("verifyFunction failed for " __FUNCTION__);
|
I_FatalError("verifyFunction failed for CodegenDrawSky()");
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
|
llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
|
||||||
|
@ -469,7 +469,11 @@ LLVMProgram::LLVMProgram()
|
||||||
I_FatalError("Could not find LLVM target: %s", errorstring.c_str());
|
I_FatalError("Could not find LLVM target: %s", errorstring.c_str());
|
||||||
|
|
||||||
TargetOptions opt;
|
TargetOptions opt;
|
||||||
|
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||||
|
Reloc::Model relocModel = Reloc::Default;
|
||||||
|
#else
|
||||||
auto relocModel = Optional<Reloc::Model>();
|
auto relocModel = Optional<Reloc::Model>();
|
||||||
|
#endif
|
||||||
machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive);
|
machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::JITDefault, CodeGenOpt::Aggressive);
|
||||||
if (!machine)
|
if (!machine)
|
||||||
I_FatalError("Could not create LLVM target machine");
|
I_FatalError("Could not create LLVM target machine");
|
||||||
|
@ -478,7 +482,11 @@ LLVMProgram::LLVMProgram()
|
||||||
|
|
||||||
mModule = std::make_unique<Module>("render", context());
|
mModule = std::make_unique<Module>("render", context());
|
||||||
mModule->setTargetTriple(targetTriple);
|
mModule->setTargetTriple(targetTriple);
|
||||||
|
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||||
|
mModule->setDataLayout(new DataLayout(*machine->getSubtargetImpl()->getDataLayout()));
|
||||||
|
#else
|
||||||
mModule->setDataLayout(machine->createDataLayout());
|
mModule->setDataLayout(machine->createDataLayout());
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -489,8 +497,10 @@ void LLVMProgram::CreateEE()
|
||||||
legacy::FunctionPassManager PerFunctionPasses(mModule.get());
|
legacy::FunctionPassManager PerFunctionPasses(mModule.get());
|
||||||
legacy::PassManager PerModulePasses;
|
legacy::PassManager PerModulePasses;
|
||||||
|
|
||||||
|
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
|
||||||
PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
|
PerFunctionPasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
|
||||||
PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
|
PerModulePasses.add(createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis()));
|
||||||
|
#endif
|
||||||
|
|
||||||
PassManagerBuilder passManagerBuilder;
|
PassManagerBuilder passManagerBuilder;
|
||||||
passManagerBuilder.OptLevel = 3;
|
passManagerBuilder.OptLevel = 3;
|
||||||
|
@ -532,13 +542,17 @@ std::string LLVMProgram::DumpModule()
|
||||||
{
|
{
|
||||||
std::string str;
|
std::string str;
|
||||||
llvm::raw_string_ostream stream(str);
|
llvm::raw_string_ostream stream(str);
|
||||||
|
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||||
|
mModule->print(stream, nullptr);
|
||||||
|
#else
|
||||||
mModule->print(stream, nullptr, false, true);
|
mModule->print(stream, nullptr, false, true);
|
||||||
|
#endif
|
||||||
return stream.str();
|
return stream.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
void *LLVMProgram::PointerToFunction(const char *name)
|
void *LLVMProgram::PointerToFunction(const char *name)
|
||||||
{
|
{
|
||||||
return reinterpret_cast<void(*)()>(mEngine->getFunctionAddress(name));
|
return reinterpret_cast<void*>(mEngine->getFunctionAddress(name));
|
||||||
}
|
}
|
||||||
|
|
||||||
void LLVMProgram::StopLogFatalErrors()
|
void LLVMProgram::StopLogFatalErrors()
|
||||||
|
|
|
@ -16,6 +16,7 @@ SSAForBlock::SSAForBlock()
|
||||||
void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
|
void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
|
||||||
{
|
{
|
||||||
auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block);
|
auto branch = SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block);
|
||||||
|
#if LLVM_VERSION_MAJOR > 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
|
||||||
if (unroll_count > 0)
|
if (unroll_count > 0)
|
||||||
{
|
{
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
@ -29,6 +30,7 @@ void SSAForBlock::loop_block(SSABool true_condition, int unroll_count)
|
||||||
auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count });
|
auto md_loop = MDNode::getDistinct(SSAScope::context(), { md_unroll_enable, md_unroll_count });
|
||||||
branch->setMetadata(LLVMContext::MD_loop, md_loop);
|
branch->setMetadata(LLVMContext::MD_loop, md_loop);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
SSAScope::builder().SetInsertPoint(loop_basic_block);
|
SSAScope::builder().SetInsertPoint(loop_basic_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,28 +0,0 @@
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ssa_int.h"
|
|
||||||
#include "ssa_float_ptr.h"
|
|
||||||
|
|
||||||
class SSAPixelFormat4f
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SSAPixelFormat4f() { }
|
|
||||||
SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
|
||||||
|
|
||||||
SSAFloatPtr pixels() { return _pixels; }
|
|
||||||
SSAFloatPtr pixels() const { return _pixels; }
|
|
||||||
|
|
||||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
|
||||||
{
|
|
||||||
return _pixels[index * 4].load_vec4f(constantScopeDomain);
|
|
||||||
}
|
|
||||||
|
|
||||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
|
||||||
{
|
|
||||||
_pixels[index * 4].store_vec4f(pixel);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
SSAFloatPtr _pixels;
|
|
||||||
};
|
|
|
@ -1,28 +0,0 @@
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ssa_int.h"
|
|
||||||
#include "ssa_ubyte_ptr.h"
|
|
||||||
|
|
||||||
class SSAPixelFormat4ub
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SSAPixelFormat4ub() { }
|
|
||||||
SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
|
||||||
|
|
||||||
SSAUBytePtr pixels() { return _pixels; }
|
|
||||||
SSAUBytePtr pixels() const { return _pixels; }
|
|
||||||
|
|
||||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
|
||||||
{
|
|
||||||
return SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f);
|
|
||||||
}
|
|
||||||
|
|
||||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
|
||||||
{
|
|
||||||
_pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f));
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
SSAUBytePtr _pixels;
|
|
||||||
};
|
|
|
@ -1,35 +0,0 @@
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ssa_int.h"
|
|
||||||
#include "ssa_ubyte_ptr.h"
|
|
||||||
|
|
||||||
class SSAPixelFormat4ub_argb_rev
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SSAPixelFormat4ub_argb_rev() { }
|
|
||||||
SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
|
||||||
|
|
||||||
SSAUBytePtr pixels() { return _pixels; }
|
|
||||||
SSAUBytePtr pixels() const { return _pixels; }
|
|
||||||
/*
|
|
||||||
void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const
|
|
||||||
{
|
|
||||||
SSAVec8s p = _pixels[index * 4].load_vec8s();
|
|
||||||
out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
|
||||||
out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
|
||||||
{
|
|
||||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 2, 1, 0, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
|
||||||
{
|
|
||||||
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
SSAUBytePtr _pixels;
|
|
||||||
};
|
|
|
@ -1,28 +0,0 @@
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ssa_int.h"
|
|
||||||
#include "ssa_ubyte_ptr.h"
|
|
||||||
|
|
||||||
class SSAPixelFormat4ub_rev
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SSAPixelFormat4ub_rev() { }
|
|
||||||
SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
|
|
||||||
|
|
||||||
SSAUBytePtr pixels() { return _pixels; }
|
|
||||||
SSAUBytePtr pixels() const { return _pixels; }
|
|
||||||
|
|
||||||
SSAVec4f get4f(SSAInt index, bool constantScopeDomain) const
|
|
||||||
{
|
|
||||||
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub(constantScopeDomain)) * (1.0f / 255.0f), 3, 2, 1, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void set4f(SSAInt index, const SSAVec4f &pixel)
|
|
||||||
{
|
|
||||||
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
SSAUBytePtr _pixels;
|
|
||||||
};
|
|
|
@ -1,39 +0,0 @@
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ssa_ubyte.h"
|
|
||||||
#include "ssa_ubyte_ptr.h"
|
|
||||||
#include "ssa_float.h"
|
|
||||||
#include "ssa_float_ptr.h"
|
|
||||||
#include "ssa_int.h"
|
|
||||||
#include "ssa_pixeltype.h"
|
|
||||||
//#include "ssa_pixelformat1f.h"
|
|
||||||
//#include "ssa_pixelformat2f.h"
|
|
||||||
//#include "ssa_pixelformat3f.h"
|
|
||||||
#include "ssa_pixelformat4f.h"
|
|
||||||
//#include "ssa_pixelformat1ub.h"
|
|
||||||
//#include "ssa_pixelformat2ub.h"
|
|
||||||
//#include "ssa_pixelformat3ub.h"
|
|
||||||
//#include "ssa_pixelformat3ub_rev.h"
|
|
||||||
#include "ssa_pixelformat4ub.h"
|
|
||||||
//#include "ssa_pixelformat4ub_argb.h"
|
|
||||||
#include "ssa_pixelformat4ub_rev.h"
|
|
||||||
#include "ssa_pixelformat4ub_argb_rev.h"
|
|
||||||
//#include "ssa_pixelformat4ub_channel.h"
|
|
||||||
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat1f, SSAFloatPtr> SSAPixels1f;
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat2f, SSAFloatPtr> SSAPixels2f;
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat3f, SSAFloatPtr> SSAPixels3f;
|
|
||||||
typedef SSAPixelType<SSAPixelFormat4f, SSAFloatPtr> SSAPixels4f;
|
|
||||||
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat1ub, SSAUBytePtr> SSAPixels1ub;
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat2ub, SSAUBytePtr> SSAPixels2ub;
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat3ub, SSAUBytePtr> SSAPixels3ub;
|
|
||||||
typedef SSAPixelType<SSAPixelFormat4ub, SSAUBytePtr> SSAPixels4ub;
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat4ub_argb, SSAUBytePtr> SSAPixels4ub_argb;
|
|
||||||
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat3ub_rev, SSAUBytePtr> SSAPixels3ub_rev;
|
|
||||||
typedef SSAPixelType<SSAPixelFormat4ub_rev, SSAUBytePtr> SSAPixels4ub_rev;
|
|
||||||
typedef SSAPixelType<SSAPixelFormat4ub_argb_rev, SSAUBytePtr> SSAPixels4ub_argb_rev;
|
|
||||||
|
|
||||||
//typedef SSAPixelType<SSAPixelFormat4ub_channel, SSAUBytePtr> SSAPixels4ub_channel;
|
|
|
@ -1,498 +0,0 @@
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "ssa_int.h"
|
|
||||||
#include "ssa_float.h"
|
|
||||||
#include "ssa_vec4f.h"
|
|
||||||
#include "ssa_bool.h"
|
|
||||||
#include "ssa_if_block.h"
|
|
||||||
#include "ssa_phi.h"
|
|
||||||
|
|
||||||
template<typename PixelFormat, typename PixelType>
|
|
||||||
class SSAPixelType : public PixelFormat
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
SSAPixelType()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAPixelType(SSAInt width, SSAInt height, PixelType pixels)
|
|
||||||
: PixelFormat(pixels, width, height), _width(width), _height(height)
|
|
||||||
{
|
|
||||||
_width32 = SSAVec4i(_width);
|
|
||||||
SSAVec4i height32(_height);
|
|
||||||
_widthps = SSAVec4f(_width32);
|
|
||||||
_heightps = SSAVec4f(height32);
|
|
||||||
_width16 = SSAVec8s(_width32, _width32);
|
|
||||||
|
|
||||||
_widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4);
|
|
||||||
_widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAInt width() const { return _width; }
|
|
||||||
SSAInt height() const { return _height; }
|
|
||||||
SSAInt size() const { return _width * _height; }
|
|
||||||
|
|
||||||
SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; }
|
|
||||||
SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; }
|
|
||||||
//void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); }
|
|
||||||
//void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); }
|
|
||||||
|
|
||||||
SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); }
|
|
||||||
SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); }
|
|
||||||
SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); }
|
|
||||||
SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); }
|
|
||||||
SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); }
|
|
||||||
SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); }
|
|
||||||
SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); }
|
|
||||||
SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); }
|
|
||||||
|
|
||||||
static SSAInt int_min(SSAInt a, SSAInt b)
|
|
||||||
{
|
|
||||||
SSAPhi<SSAInt> phi;
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block(a <= b);
|
|
||||||
phi.add_incoming(a);
|
|
||||||
branch.else_block();
|
|
||||||
phi.add_incoming(b);
|
|
||||||
branch.end_block();
|
|
||||||
return phi.create();
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAInt int_max(SSAInt a, SSAInt b)
|
|
||||||
{
|
|
||||||
SSAPhi<SSAInt> phi;
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block(a >= b);
|
|
||||||
phi.add_incoming(a);
|
|
||||||
branch.else_block();
|
|
||||||
phi.add_incoming(b);
|
|
||||||
branch.end_block();
|
|
||||||
return phi.create();
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAInt clamp(SSAInt v, SSAInt size)
|
|
||||||
{
|
|
||||||
return int_max(int_min(v, size - 1), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAInt repeat(SSAInt v, SSAInt size)
|
|
||||||
{
|
|
||||||
SSAPhi<SSAInt> phi;
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block(v >= 0);
|
|
||||||
phi.add_incoming(v % size);
|
|
||||||
branch.else_block();
|
|
||||||
phi.add_incoming(size - 1 + v % size);
|
|
||||||
branch.end_block();
|
|
||||||
return phi.create();
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAInt mirror(SSAInt v, SSAInt size)
|
|
||||||
{
|
|
||||||
SSAInt size2 = size * 2;
|
|
||||||
v = repeat(v, size2);
|
|
||||||
|
|
||||||
SSAPhi<SSAInt> phi;
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block(v < size);
|
|
||||||
phi.add_incoming(v);
|
|
||||||
branch.else_block();
|
|
||||||
phi.add_incoming(size2 - v - 1);
|
|
||||||
branch.end_block();
|
|
||||||
return phi.create();
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAInt round(SSAFloat v)
|
|
||||||
{
|
|
||||||
SSAPhi<SSAFloat> phi;
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block(v >= 0.0f);
|
|
||||||
phi.add_incoming(v + 0.5f);
|
|
||||||
branch.else_block();
|
|
||||||
phi.add_incoming(v - 0.5f);
|
|
||||||
branch.end_block();
|
|
||||||
return SSAInt(phi.create());
|
|
||||||
}
|
|
||||||
|
|
||||||
// To do: fix this:
|
|
||||||
static SSAInt int_floor(SSAFloat v)
|
|
||||||
{
|
|
||||||
return SSAInt(v);
|
|
||||||
}
|
|
||||||
static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); }
|
|
||||||
|
|
||||||
SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); }
|
|
||||||
void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); }
|
|
||||||
|
|
||||||
SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); }
|
|
||||||
SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); }
|
|
||||||
SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); }
|
|
||||||
|
|
||||||
SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const
|
|
||||||
{
|
|
||||||
SSAFloat a = fract(s * SSAFloat(_width) - 0.5f);
|
|
||||||
SSAFloat b = fract(t * SSAFloat(_height) - 0.5f);
|
|
||||||
SSAFloat inv_a = 1.0f - a;
|
|
||||||
SSAFloat inv_b = 1.0f - b;
|
|
||||||
return
|
|
||||||
samples[0] * (inv_a * inv_b) +
|
|
||||||
samples[1] * (a * inv_b) +
|
|
||||||
samples[2] * (inv_a * b) +
|
|
||||||
samples[3] * (a * b);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
|
|
||||||
{
|
|
||||||
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
|
|
||||||
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
|
|
||||||
out_pixels[0] = get_clamp4f(x, y);
|
|
||||||
out_pixels[1] = get_clamp4f(x + 1, y);
|
|
||||||
out_pixels[2] = get_clamp4f(x, y + 1);
|
|
||||||
out_pixels[3] = get_clamp4f(x + 1, y + 1);
|
|
||||||
/*
|
|
||||||
SSAInt x0 = clamp_x(x);
|
|
||||||
SSAInt x1 = clamp_x(x + 1);
|
|
||||||
SSAInt y0 = clamp_y(y);
|
|
||||||
SSAInt y1 = clamp_y(y + 1);
|
|
||||||
SSAInt offset0 = y0 * _width;
|
|
||||||
SSAInt offset1 = y1 * _width;
|
|
||||||
SSAPhi<SSAVec4f> phi0;
|
|
||||||
SSAPhi<SSAVec4f> phi1;
|
|
||||||
SSAPhi<SSAVec4f> phi2;
|
|
||||||
SSAPhi<SSAVec4f> phi3;
|
|
||||||
SSAIfBlock if0;
|
|
||||||
if0.if_block(x0 + 1 == x1);
|
|
||||||
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
|
|
||||||
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
|
|
||||||
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
|
|
||||||
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
|
|
||||||
if0.else_block();
|
|
||||||
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
|
|
||||||
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
|
|
||||||
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
|
|
||||||
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
|
|
||||||
if0.end_block();
|
|
||||||
out_pixels[0] = phi0.create();
|
|
||||||
out_pixels[1] = phi1.create();
|
|
||||||
out_pixels[2] = phi2.create();
|
|
||||||
out_pixels[3] = phi3.create();
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
|
|
||||||
{
|
|
||||||
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
|
|
||||||
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
|
|
||||||
out_pixels[0] = get_repeat4f(x, y);
|
|
||||||
out_pixels[1] = get_repeat4f(x + 1, y);
|
|
||||||
out_pixels[2] = get_repeat4f(x, y + 1);
|
|
||||||
out_pixels[3] = get_repeat4f(x + 1, y + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
|
|
||||||
{
|
|
||||||
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
|
|
||||||
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
|
|
||||||
out_pixels[0] = get_mirror4f(x, y);
|
|
||||||
out_pixels[1] = get_mirror4f(x + 1, y);
|
|
||||||
out_pixels[2] = get_mirror4f(x, y + 1);
|
|
||||||
out_pixels[3] = get_mirror4f(x + 1, y + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); }
|
|
||||||
SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); }
|
|
||||||
SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); }
|
|
||||||
|
|
||||||
SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const
|
|
||||||
{
|
|
||||||
SSAVec4f samples[4];
|
|
||||||
gather_clamp4f(s, t, samples);
|
|
||||||
return linear_interpolate4f(s, t, samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const
|
|
||||||
{
|
|
||||||
SSAVec4f samples[4];
|
|
||||||
gather_repeat4f(s, t, samples);
|
|
||||||
return linear_interpolate4f(s, t, samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const
|
|
||||||
{
|
|
||||||
SSAVec4f samples[4];
|
|
||||||
gather_mirror4f(s, t, samples);
|
|
||||||
return linear_interpolate4f(s, t, samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
|
||||||
// Packed versions:
|
|
||||||
|
|
||||||
SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); }
|
|
||||||
SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); }
|
|
||||||
SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); }
|
|
||||||
SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); }
|
|
||||||
SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); }
|
|
||||||
SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); }
|
|
||||||
SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); }
|
|
||||||
SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); }
|
|
||||||
|
|
||||||
static SSAVec4i clamp(SSAVec4i v, SSAInt size)
|
|
||||||
{
|
|
||||||
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAVec4i repeat(SSAVec4i v, SSAInt size)
|
|
||||||
{
|
|
||||||
return clamp(v, size);
|
|
||||||
/*SSAPhi<SSAInt> phi;
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block(v >= 0);
|
|
||||||
phi.add_incoming(v % size);
|
|
||||||
branch.else_block();
|
|
||||||
phi.add_incoming(size - 1 + v % size);
|
|
||||||
branch.end_block();
|
|
||||||
return phi.create();*/
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAVec4i mirror(SSAVec4i v, SSAInt size)
|
|
||||||
{
|
|
||||||
return clamp(v, size);
|
|
||||||
/*SSAInt size2 = size * 2;
|
|
||||||
v = repeat(v, size2);
|
|
||||||
|
|
||||||
SSAPhi<SSAInt> phi;
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block(v < size);
|
|
||||||
phi.add_incoming(v);
|
|
||||||
branch.else_block();
|
|
||||||
phi.add_incoming(size2 - v - 1);
|
|
||||||
branch.end_block();
|
|
||||||
return phi.create();*/
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAVec4i round(SSAVec4f v)
|
|
||||||
{
|
|
||||||
// Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs)
|
|
||||||
SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000);
|
|
||||||
SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f)));
|
|
||||||
return v + signed_half;
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAVec4i int_floor(SSAVec4f v)
|
|
||||||
{
|
|
||||||
return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31);
|
|
||||||
}
|
|
||||||
|
|
||||||
static SSAVec4f fract(SSAVec4f v)
|
|
||||||
{
|
|
||||||
// return v - SSAVec4f::floor_sse4(v);
|
|
||||||
return v - SSAVec4f(int_floor(v));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename WrapXFunctor, typename WrapYFunctor>
|
|
||||||
SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
|
|
||||||
{
|
|
||||||
SSAVec4i x = int_floor(s * _widthps - 0.5f);
|
|
||||||
SSAVec4i y = int_floor(t * _heightps - 0.5f);
|
|
||||||
SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y));
|
|
||||||
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
|
|
||||||
SSAVec8s offsetlo = y16 * _width16;
|
|
||||||
SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x;
|
|
||||||
return PixelFormat::get4f(offset[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
|
|
||||||
{
|
|
||||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
|
|
||||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
|
|
||||||
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
|
|
||||||
/*
|
|
||||||
return nearest_helper4f(
|
|
||||||
s, t, index,
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
|
|
||||||
{
|
|
||||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
|
|
||||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
|
|
||||||
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
|
|
||||||
/*
|
|
||||||
return nearest_helper4f(
|
|
||||||
s, t, index,
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
|
|
||||||
{
|
|
||||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
|
|
||||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
|
|
||||||
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
|
|
||||||
/*
|
|
||||||
return nearest_helper4f(
|
|
||||||
s, t, index,
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename WrapXFunctor, typename WrapYFunctor>
|
|
||||||
void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
|
|
||||||
{
|
|
||||||
SSAVec4i x = int_floor(s * _widthps - 0.5f);
|
|
||||||
SSAVec4i y = int_floor(t * _heightps - 0.5f);
|
|
||||||
SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y));
|
|
||||||
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
|
|
||||||
SSAVec8s offsetlo = y16 * _width16;
|
|
||||||
SSAVec4i x0 = wrap_x(x);
|
|
||||||
SSAVec4i x1 = wrap_x(x + 1);
|
|
||||||
SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi);
|
|
||||||
SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi);
|
|
||||||
SSAVec4i offset0 = x0 + line0;
|
|
||||||
SSAVec4i offset1 = x1 + line0;
|
|
||||||
SSAVec4i offset2 = x0 + line1;
|
|
||||||
SSAVec4i offset3 = x1 + line1;
|
|
||||||
out_pixels[0] = PixelFormat::get4f(offset0[index]);
|
|
||||||
out_pixels[1] = PixelFormat::get4f(offset1[index]);
|
|
||||||
out_pixels[2] = PixelFormat::get4f(offset2[index]);
|
|
||||||
out_pixels[3] = PixelFormat::get4f(offset3[index]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
|
|
||||||
{
|
|
||||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
|
|
||||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
|
|
||||||
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
|
|
||||||
/*
|
|
||||||
gather_helper4f(
|
|
||||||
s, t, index, out_pixels,
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
|
|
||||||
{
|
|
||||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
|
|
||||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
|
|
||||||
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
|
|
||||||
/*
|
|
||||||
gather_helper4f(
|
|
||||||
s, t, index, out_pixels,
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
|
|
||||||
{
|
|
||||||
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
|
|
||||||
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
|
|
||||||
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
|
|
||||||
/*
|
|
||||||
gather_helper4f(
|
|
||||||
s, t, index, out_pixels,
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
|
|
||||||
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
|
|
||||||
{
|
|
||||||
SSAScopeHint hint("linearclamp");
|
|
||||||
SSAVec4f samples[4];
|
|
||||||
gather_clamp4f(s, t, index, samples);
|
|
||||||
return linear_interpolate4f(s, t, index, samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
|
|
||||||
{
|
|
||||||
SSAVec4f samples[4];
|
|
||||||
gather_repeat4f(s, t, index, samples);
|
|
||||||
return linear_interpolate4f(s, t, index, samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
|
|
||||||
{
|
|
||||||
SSAVec4f samples[4];
|
|
||||||
gather_mirror4f(s, t, index, samples);
|
|
||||||
return linear_interpolate4f(s, t, index, samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const
|
|
||||||
{
|
|
||||||
SSAVec4f a = fract(s * _widthps - 0.5f);
|
|
||||||
SSAVec4f b = fract(t * _heightps - 0.5f);
|
|
||||||
SSAVec4f inv_a = 1.0f - a;
|
|
||||||
SSAVec4f inv_b = 1.0f - b;
|
|
||||||
return
|
|
||||||
samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) +
|
|
||||||
samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) +
|
|
||||||
samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) +
|
|
||||||
samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index);
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
SSAVec4i clamp(SSAVec4i sstt) const
|
|
||||||
{
|
|
||||||
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename WrapFunctor>
|
|
||||||
void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const
|
|
||||||
{
|
|
||||||
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
|
|
||||||
SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1));
|
|
||||||
SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7);
|
|
||||||
SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3);
|
|
||||||
out_pixels[0] = PixelFormat::get4f(offsets[0]);
|
|
||||||
out_pixels[1] = PixelFormat::get4f(offsets[1]);
|
|
||||||
out_pixels[2] = PixelFormat::get4f(offsets[2]);
|
|
||||||
out_pixels[3] = PixelFormat::get4f(offsets[3]);
|
|
||||||
}
|
|
||||||
|
|
||||||
void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const
|
|
||||||
{
|
|
||||||
struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; };
|
|
||||||
return gather_helper4f(st, out_pixels, Wrap(this));
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_clamp4f(SSAVec4f st) const
|
|
||||||
{
|
|
||||||
SSAScopeHint hint("linearclamp");
|
|
||||||
SSAVec4f samples[4];
|
|
||||||
gather_clamp4f(st, samples);
|
|
||||||
return linear_interpolate4f(st, samples);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const
|
|
||||||
{
|
|
||||||
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
|
|
||||||
SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f);
|
|
||||||
SSAVec4f inv_aabb = 1.0f - aabb;
|
|
||||||
SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6);
|
|
||||||
SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0);
|
|
||||||
return
|
|
||||||
samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) +
|
|
||||||
samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) +
|
|
||||||
samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) +
|
|
||||||
samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
SSAInt _width;
|
|
||||||
SSAInt _height;
|
|
||||||
SSAVec4i _width32;
|
|
||||||
SSAVec8s _width16;
|
|
||||||
SSAVec4f _widthps;
|
|
||||||
SSAVec4f _heightps;
|
|
||||||
|
|
||||||
SSAVec4i _widthheight;
|
|
||||||
SSAVec4f _widthheightps;
|
|
||||||
};
|
|
|
@ -41,12 +41,12 @@ llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm:
|
||||||
return func;
|
return func;
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Value *SSAScope::alloca(llvm::Type *type)
|
llvm::Value *SSAScope::alloc_stack(llvm::Type *type)
|
||||||
{
|
{
|
||||||
return alloca(type, SSAInt(1));
|
return alloc_stack(type, SSAInt(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size)
|
llvm::Value *SSAScope::alloc_stack(llvm::Type *type, SSAInt size)
|
||||||
{
|
{
|
||||||
// Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work
|
// Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work
|
||||||
llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock();
|
llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock();
|
||||||
|
|
|
@ -12,8 +12,8 @@ public:
|
||||||
static llvm::Module *module();
|
static llvm::Module *module();
|
||||||
static llvm::IRBuilder<> &builder();
|
static llvm::IRBuilder<> &builder();
|
||||||
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
|
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
|
||||||
static llvm::Value *alloca(llvm::Type *type);
|
static llvm::Value *alloc_stack(llvm::Type *type);
|
||||||
static llvm::Value *alloca(llvm::Type *type, SSAInt size);
|
static llvm::Value *alloc_stack(llvm::Type *type, SSAInt size);
|
||||||
static llvm::MDNode *constant_scope_list();
|
static llvm::MDNode *constant_scope_list();
|
||||||
static const std::string &hint();
|
static const std::string &hint();
|
||||||
static void set_hint(const std::string &hint);
|
static void set_hint(const std::string &hint);
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "ssa_scope.h"
|
||||||
|
|
||||||
template<typename SSAVariable>
|
template<typename SSAVariable>
|
||||||
class SSAStack
|
class SSAStack
|
||||||
{
|
{
|
||||||
|
@ -8,7 +10,7 @@ public:
|
||||||
SSAStack()
|
SSAStack()
|
||||||
: v(0)
|
: v(0)
|
||||||
{
|
{
|
||||||
v = SSAScope::alloca(SSAVariable::llvm_type());
|
v = SSAScope::alloc_stack(SSAVariable::llvm_type());
|
||||||
}
|
}
|
||||||
|
|
||||||
SSAVariable load() const
|
SSAVariable load() const
|
||||||
|
|
|
@ -55,10 +55,17 @@ SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
|
||||||
std::vector<llvm::Constant*> constants;
|
std::vector<llvm::Constant*> constants;
|
||||||
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
|
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
|
||||||
v = llvm::ConstantVector::get(constants);
|
v = llvm::ConstantVector::get(constants);
|
||||||
|
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i0.v, SSAInt(0).v, SSAScope::hint());
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i1.v, SSAInt(1).v, SSAScope::hint());
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i2.v, SSAInt(2).v, SSAScope::hint());
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i3.v, SSAInt(3).v, SSAScope::hint());
|
||||||
|
#else
|
||||||
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
|
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
|
||||||
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
|
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
|
||||||
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
|
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
|
||||||
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
|
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
SSAVec4i::SSAVec4i(SSAVec4f f32)
|
SSAVec4i::SSAVec4i(SSAVec4f f32)
|
||||||
|
@ -84,7 +91,11 @@ SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value)
|
||||||
|
|
||||||
SSAVec4i SSAVec4i::insert(int index, SSAInt value)
|
SSAVec4i SSAVec4i::insert(int index, SSAInt value)
|
||||||
{
|
{
|
||||||
|
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||||
|
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, SSAInt(index).v, SSAScope::hint()));
|
||||||
|
#else
|
||||||
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint()));
|
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint()));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
SSAVec4i SSAVec4i::insert(int index, int value)
|
SSAVec4i SSAVec4i::insert(int index, int value)
|
||||||
|
|
Loading…
Reference in a new issue