Use LLVM to JIT the code for one of the drawer functions

This commit is contained in:
Magnus Norddahl 2016-09-26 09:00:19 +02:00
parent 5ef46d1730
commit 3dd8b593b6
52 changed files with 4705 additions and 0 deletions

View File

@ -104,6 +104,15 @@ if( WIN32 )
endif()
add_definitions( -D_WIN32 )
set( FMOD_SEARCH_PATHS
"C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api"
"C:/Program Files (x86)/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api"
# This next one is for Randy.
"E:/Software/Dev/FMOD/${WIN_TYPE}/api"
)
set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc )
set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib )
set( FMOD_SEARCH_PATHS
"C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api"
@ -255,6 +264,57 @@ if( NOT NO_OPENAL )
endif()
endif()
# C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm
find_package(LLVM REQUIRED CONFIG)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
llvm_map_components_to_libnames(llvm_libs
analysis
asmparser
asmprinter
bitreader
bitwriter
codegen
core
executionengine
globalisel
instcombine
ipo
irreader
linker
lto
mc
mcdisassembler
mcjit
mcparser
mirparser
object
objectyaml
orcjit
passes
scalaropts
selectiondag
support
symbolize
tablegen
target
transformutils
vectorize
x86asmparser
x86asmprinter
x86codegen
x86desc
x86info
x86utils
aarch64asmparser
aarch64asmprinter
aarch64codegen
aarch64desc
aarch64info
aarch64utils)
include_directories(${LLVM_INCLUDE_DIRS})
set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} )
if( NOT NO_FMOD )
# Search for FMOD include files
if( NOT WIN32 )
@ -843,6 +903,9 @@ file( GLOB HEADER_FILES
posix/*.h
posix/cocoa/*.h
posix/sdl/*.h
r_compiler/*.h
r_compiler/ssa/*.h
r_compiler/fixedfunction/*.h
r_data/*.h
resourcefiles/*.h
sfmt/*.h
@ -1372,6 +1435,26 @@ set (PCH_SOURCES
fragglescript/t_spec.cpp
fragglescript/t_variable.cpp
fragglescript/t_cmd.cpp
r_compiler/ssa/ssa_bool.cpp
r_compiler/ssa/ssa_float.cpp
r_compiler/ssa/ssa_float_ptr.cpp
r_compiler/ssa/ssa_for_block.cpp
r_compiler/ssa/ssa_function.cpp
r_compiler/ssa/ssa_if_block.cpp
r_compiler/ssa/ssa_int.cpp
r_compiler/ssa/ssa_int_ptr.cpp
r_compiler/ssa/ssa_scope.cpp
r_compiler/ssa/ssa_struct_type.cpp
r_compiler/ssa/ssa_ubyte.cpp
r_compiler/ssa/ssa_ubyte_ptr.cpp
r_compiler/ssa/ssa_value.cpp
r_compiler/ssa/ssa_vec4f.cpp
r_compiler/ssa/ssa_vec4f_ptr.cpp
r_compiler/ssa/ssa_vec4i.cpp
r_compiler/ssa/ssa_vec4i_ptr.cpp
r_compiler/ssa/ssa_vec8s.cpp
r_compiler/ssa/ssa_vec16ub.cpp
r_compiler/fixedfunction/fixedfunction.cpp
r_data/sprites.cpp
r_data/voxels.cpp
r_data/renderstyle.cpp
@ -1587,6 +1670,9 @@ source_group("Render Data\\Resource Headers" REGULAR_EXPRESSION "^${CMAKE_CURREN
source_group("Render Data\\Resource Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.cpp$")
source_group("Render Data\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/textures/.+")
source_group("Render Interface" FILES r_defs.h r_renderer.h r_sky.cpp r_sky.h r_state.h r_utility.cpp r_utility.h)
source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+")
source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+")
source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+")
source_group("Resource Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/resourcefiles/.+")
source_group("POSIX Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/.+")
source_group("Cocoa Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/cocoa/.+")

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,130 @@
#pragma once
#include "r_compiler/ssa/ssa_vec4f.h"
#include "r_compiler/ssa/ssa_vec4i.h"
#include "r_compiler/ssa/ssa_vec8s.h"
#include "r_compiler/ssa/ssa_vec16ub.h"
#include "r_compiler/ssa/ssa_int.h"
#include "r_compiler/ssa/ssa_ubyte_ptr.h"
#include "r_compiler/ssa/ssa_vec4f_ptr.h"
#include "r_compiler/ssa/ssa_vec4i_ptr.h"
#include "r_compiler/ssa/ssa_pixels.h"
#include "r_compiler/ssa/ssa_stack.h"
#include "r_compiler/ssa/ssa_barycentric_weight.h"
#include "r_compiler/llvm_include.h"
class RenderProgram
{
public:
RenderProgram();
~RenderProgram();
template<typename Func>
Func *GetProcAddress(const char *name) { return reinterpret_cast<Func*>(PointerToFunction(name)); }
llvm::LLVMContext &context() { return *mContext; }
llvm::Module *module() { return mModule; }
llvm::ExecutionEngine *engine() { return mEngine.get(); }
private:
void *PointerToFunction(const char *name);
std::unique_ptr<llvm::LLVMContext> mContext;
llvm::Module *mModule;
std::unique_ptr<llvm::ExecutionEngine> mEngine;
};
class FixedFunction
{
public:
FixedFunction();
void(*DrawSpan)(int, uint32_t *) = nullptr;
private:
void CodegenDrawSpan();
RenderProgram mProgram;
};
#if 0
class GlslProgram;
class GlslCodeGen;
class GlslFixedFunction
{
public:
GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen);
void codegen();
static llvm::Type *get_sampler_struct(llvm::LLVMContext &context);
private:
void codegen_draw_triangles(int num_vertex_in, int num_vertex_out);
void codegen_calc_window_positions();
void codegen_calc_polygon_face_direction();
void codegen_calc_polygon_y_range();
void codegen_update_polygon_edge();
void codegen_texture();
void codegen_normalize();
void codegen_reflect();
void codegen_max();
void codegen_pow();
void codegen_dot();
void codegen_mix();
struct OuterData
{
OuterData() : sampler() { }
SSAInt start;
SSAInt end;
SSAInt input_width;
SSAInt input_height;
SSAInt output_width;
SSAInt output_height;
SSAUBytePtr input_pixels;
SSAUBytePtr output_pixels_line;
SSAVec4fPtr sse_left_varying_in;
SSAVec4fPtr sse_right_varying_in;
int num_varyings;
SSAVec4f viewport_x;
SSAVec4f viewport_rcp_half_width;
SSAVec4f dx;
SSAVec4f dw;
SSAVec4f v1w;
SSAVec4f v1x;
llvm::Value *sampler;
};
void render_polygon(
SSAInt input_width,
SSAInt input_height,
SSAUBytePtr input_data,
SSAInt output_width,
SSAInt output_height,
SSAUBytePtr output_data,
SSAInt viewport_x,
SSAInt viewport_y,
SSAInt viewport_width,
SSAInt viewport_height,
SSAInt num_vertices,
std::vector<SSAVec4fPtr> fragment_ins,
SSAInt core,
SSAInt num_cores);
void codegen_render_scanline(int num_varyings);
void process_first_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
void process_last_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors);
void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest);
GlslProgram &program;
GlslCodeGen &vertex_codegen;
GlslCodeGen &fragment_codegen;
};
#endif

View File

@ -0,0 +1,46 @@
#pragma once
#if defined(min)
#define llvm_min_bug min
#undef min
#endif
#if defined(max)
#define llvm_max_bug max
#undef max
#endif
#pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned
#pragma warning(disable: 4624) // warning C4624: 'llvm::AugmentedUse' : destructor could not be generated because a base class destructor is inaccessible
#pragma warning(disable: 4355) // warning C4355: 'this' : used in base member initializer list
#pragma warning(disable: 4800) // warning C4800: 'const unsigned int' : forcing value to bool 'true' or 'false' (performance warning)
#pragma warning(disable: 4996) // warning C4996: 'std::_Copy_impl': Function call with parameters that may be unsafe - this call relies on the caller to check that the passed values are correct. To disable this warning, use -D_Sclan::SECURE_NO_WARNINGS. See documentation on how to use Visual C++ 'Checked Iterators'
#pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data
#pragma warning(disable: 4141) // warning C4141: 'inline': used more than once
#pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception
#include <llvm/IR/DerivedTypes.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/MCJIT.h>
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Attributes.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/CodeGen/AsmPrinter.h>
#include <llvm/MC/MCAsmInfo.h>
#if defined(llvm_min_bug)
#define min llvm_min_bug
#undef llvm_min_bug
#endif
#if defined(llvm_max_bug)
#define max llvm_max_bug
#undef llvm_max_bug
#endif

View File

@ -0,0 +1,97 @@
#pragma once
#include "ssa_vec4f.h"
#include "ssa_float.h"
#include "ssa_int.h"
class SSAViewport
{
public:
SSAViewport(SSAInt x, SSAInt y, SSAInt width, SSAInt height)
: x(x), y(y), width(width), height(height), right(x + width), bottom(y + height),
half_width(SSAFloat(width) * 0.5f), half_height(SSAFloat(height) * 0.5f),
rcp_half_width(1.0f / (SSAFloat(width) * 0.5f)),
rcp_half_height(1.0f / (SSAFloat(height) * 0.5f))
{
}
SSAInt x, y;
SSAInt width, height;
SSAInt right, bottom;
SSAFloat half_width;
SSAFloat half_height;
SSAFloat rcp_half_width;
SSAFloat rcp_half_height;
SSAVec4f clip_to_window(SSAVec4f clip) const
{
SSAFloat w = clip[3];
SSAVec4f normalized = SSAVec4f::insert_element(clip / SSAVec4f::shuffle(clip, 3, 3, 3, 3), w, 3);
return normalized_to_window(normalized);
}
SSAVec4f normalized_to_window(SSAVec4f normalized) const
{
return SSAVec4f(
SSAFloat(x) + (normalized[0] + 1.0f) * half_width,
SSAFloat(y) + (normalized[1] + 1.0f) * half_height,
0.0f - normalized[2],
normalized[3]);
}
};
class SSABarycentricWeight
{
public:
SSABarycentricWeight(SSAViewport vp, SSAVec4f v1, SSAVec4f v2);
SSAFloat from_window_x(SSAInt x) const;
SSAFloat from_window_y(SSAInt y) const;
SSAViewport viewport;
SSAVec4f v1;
SSAVec4f v2;
};
inline SSABarycentricWeight::SSABarycentricWeight(SSAViewport viewport, SSAVec4f v1, SSAVec4f v2)
: viewport(viewport), v1(v1), v2(v2)
{
}
inline SSAFloat SSABarycentricWeight::from_window_x(SSAInt x) const
{
/* SSAFloat xnormalized = (x + 0.5f - viewport.x) * viewport.rcp_half_width - 1.0f;
SSAFloat dx = v2.x-v1.x;
SSAFloat dw = v2.w-v1.w;
SSAFloat a = (v2.x - xnormalized * v2.w) / (dx - xnormalized * dw);
return a;*/
SSAFloat xnormalized = (SSAFloat(x) + 0.5f - SSAFloat(viewport.x)) * viewport.rcp_half_width - 1.0f;
SSAFloat dx = v2[0]-v1[0];
SSAFloat dw = v2[3]-v1[3];
SSAFloat t = (xnormalized * v1[3] - v1[0]) / (dx - xnormalized * dw);
return 1.0f - t;
}
inline SSAFloat SSABarycentricWeight::from_window_y(SSAInt y) const
{
/* SSAFloat ynormalized = (y + 0.5f - viewport.y) * viewport.rcp_half_height - 1.0f;
SSAFloat dy = v2.y-v1.y;
SSAFloat dw = v2.w-v1.w;
SSAFloat a = (v2.y - ynormalized * v2.w) / (dy - ynormalized * dw);
return a;*/
SSAFloat ynormalized = (SSAFloat(y) + 0.5f - SSAFloat(viewport.y)) * viewport.rcp_half_height - 1.0f;
SSAFloat dy = v2[1]-v1[1];
SSAFloat dw = v2[3]-v1[3];
SSAFloat t = (ynormalized * v1[3] - v1[1]) / (dy - ynormalized * dw);
return 1.0f - t;
}
/*
y = (v1.y + t * dy) / (v1.w + t * dw)
y * v1.w + y * t * dw = v1.y + t * dy
y * v1.w - v1.y = t * (dy - y * dw)
t = (y * v1.w - v1.y) / (dy - y * dw)
*/

View File

@ -0,0 +1,91 @@
#include "ssa_bool.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSABool::SSABool()
: v(0)
{
}
/*
SSABool::SSABool(bool constant)
: v(0)
{
}
*/
SSABool::SSABool(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSABool::llvm_type()
{
return llvm::Type::getInt1Ty(SSAScope::context());
}
SSABool operator&&(const SSABool &a, const SSABool &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint()));
}
SSABool operator||(const SSABool &a, const SSABool &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint()));
}
SSABool operator!(const SSABool &a)
{
return SSABool::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint()));
}
SSABool operator<(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint()));
}
SSABool operator<=(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint()));
}
SSABool operator==(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint()));
}
SSABool operator>=(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint()));
}
SSABool operator>(const SSAInt &a, const SSAInt &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint()));
}
/////////////////////////////////////////////////////////////////////////////
SSABool operator<(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint()));
}
SSABool operator<=(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLE(a.v, b.v, SSAScope::hint()));
}
SSABool operator==(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOEQ(a.v, b.v, SSAScope::hint()));
}
SSABool operator>=(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGE(a.v, b.v, SSAScope::hint()));
}
SSABool operator>(const SSAFloat &a, const SSAFloat &b)
{
return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGT(a.v, b.v, SSAScope::hint()));
}

View File

@ -0,0 +1,37 @@
#pragma once
#include "ssa_int.h"
#include "ssa_float.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSABool
{
public:
SSABool();
//SSABool(bool constant);
explicit SSABool(llvm::Value *v);
static SSABool from_llvm(llvm::Value *v) { return SSABool(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
};
SSABool operator&&(const SSABool &a, const SSABool &b);
SSABool operator||(const SSABool &a, const SSABool &b);
SSABool operator!(const SSABool &a);
SSABool operator<(const SSAInt &a, const SSAInt &b);
SSABool operator<=(const SSAInt &a, const SSAInt &b);
SSABool operator==(const SSAInt &a, const SSAInt &b);
SSABool operator>=(const SSAInt &a, const SSAInt &b);
SSABool operator>(const SSAInt &a, const SSAInt &b);
SSABool operator<(const SSAFloat &a, const SSAFloat &b);
SSABool operator<=(const SSAFloat &a, const SSAFloat &b);
SSABool operator==(const SSAFloat &a, const SSAFloat &b);
SSABool operator>=(const SSAFloat &a, const SSAFloat &b);
SSABool operator>(const SSAFloat &a, const SSAFloat &b);

View File

@ -0,0 +1,152 @@
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAFloat::SSAFloat()
: v(0)
{
}
SSAFloat::SSAFloat(float constant)
: v(0)
{
v = llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant));
}
SSAFloat::SSAFloat(SSAInt i)
: v(0)
{
v = SSAScope::builder().CreateSIToFP(i.v, llvm::Type::getFloatTy(SSAScope::context()), SSAScope::hint());
}
SSAFloat::SSAFloat(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAFloat::llvm_type()
{
return llvm::Type::getFloatTy(SSAScope::context());
}
SSAFloat SSAFloat::sqrt(SSAFloat f)
{
std::vector<llvm::Type *> params;
params.push_back(SSAFloat::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint()));
}
SSAFloat SSAFloat::sin(SSAFloat val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAFloat::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint()));
}
SSAFloat SSAFloat::cos(SSAFloat val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAFloat::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint()));
}
SSAFloat SSAFloat::pow(SSAFloat val, SSAFloat power)
{
std::vector<llvm::Type *> params;
params.push_back(SSAFloat::llvm_type());
//params.push_back(SSAFloat::llvm_type());
std::vector<llvm::Value*> args;
args.push_back(val.v);
args.push_back(power.v);
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint()));
}
SSAFloat SSAFloat::exp(SSAFloat val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAFloat::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint()));
}
SSAFloat SSAFloat::log(SSAFloat val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAFloat::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint()));
}
SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c)
{
std::vector<llvm::Type *> params;
params.push_back(SSAFloat::llvm_type());
//params.push_back(SSAFloat::llvm_type());
//params.push_back(SSAFloat::llvm_type());
std::vector<llvm::Value*> args;
args.push_back(a.v);
args.push_back(b.v);
args.push_back(c.v);
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint()));
}
SSAFloat operator+(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator-(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator*(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator/(const SSAFloat &a, const SSAFloat &b)
{
return SSAFloat::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint()));
}
SSAFloat operator+(float a, const SSAFloat &b)
{
return SSAFloat(a) + b;
}
SSAFloat operator-(float a, const SSAFloat &b)
{
return SSAFloat(a) - b;
}
SSAFloat operator*(float a, const SSAFloat &b)
{
return SSAFloat(a) * b;
}
SSAFloat operator/(float a, const SSAFloat &b)
{
return SSAFloat(a) / b;
}
SSAFloat operator+(const SSAFloat &a, float b)
{
return a + SSAFloat(b);
}
SSAFloat operator-(const SSAFloat &a, float b)
{
return a - SSAFloat(b);
}
SSAFloat operator*(const SSAFloat &a, float b)
{
return a * SSAFloat(b);
}
SSAFloat operator/(const SSAFloat &a, float b)
{
return a / SSAFloat(b);
}

View File

@ -0,0 +1,42 @@
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAInt;
class SSAFloat
{
public:
SSAFloat();
SSAFloat(SSAInt i);
SSAFloat(float constant);
explicit SSAFloat(llvm::Value *v);
static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); }
static llvm::Type *llvm_type();
static SSAFloat sqrt(SSAFloat f);
static SSAFloat sin(SSAFloat val);
static SSAFloat cos(SSAFloat val);
static SSAFloat pow(SSAFloat val, SSAFloat power);
static SSAFloat exp(SSAFloat val);
static SSAFloat log(SSAFloat val);
static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c);
llvm::Value *v;
};
SSAFloat operator+(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator-(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator*(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator/(const SSAFloat &a, const SSAFloat &b);
SSAFloat operator+(float a, const SSAFloat &b);
SSAFloat operator-(float a, const SSAFloat &b);
SSAFloat operator*(float a, const SSAFloat &b);
SSAFloat operator/(float a, const SSAFloat &b);
SSAFloat operator+(const SSAFloat &a, float b);
SSAFloat operator-(const SSAFloat &a, float b);
SSAFloat operator*(const SSAFloat &a, float b);
SSAFloat operator/(const SSAFloat &a, float b);

View File

@ -0,0 +1,65 @@
#include "ssa_float_ptr.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAFloatPtr::SSAFloatPtr()
: v(0)
{
}
SSAFloatPtr::SSAFloatPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAFloatPtr::llvm_type()
{
return llvm::Type::getFloatPtrTy(SSAScope::context());
}
SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const
{
return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAFloat SSAFloatPtr::load() const
{
return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
}
SSAVec4f SSAFloatPtr::load_vec4f() const
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint()));
}
SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
// return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(get_intrinsic(llvm::Intrinsic::x86_sse2_loadu_dq), SSAScope::builder().CreateBitCast(v, llvm::PointerType::getUnqual(llvm::IntegerType::get(SSAScope::context(), 8)))));
}
void SSAFloatPtr::store(const SSAFloat &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
}
void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value)
{
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16);
}
void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value)
{
/*llvm::Value *values[2] =
{
SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())),
new_value.v
};
SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()));
}

View File

@ -0,0 +1,27 @@
#pragma once
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_vec4f.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAFloatPtr
{
public:
SSAFloatPtr();
explicit SSAFloatPtr(llvm::Value *v);
static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); }
static llvm::Type *llvm_type();
SSAFloatPtr operator[](SSAInt index) const;
SSAFloat load() const;
SSAVec4f load_vec4f() const;
SSAVec4f load_unaligned_vec4f() const;
void store(const SSAFloat &new_value);
void store_vec4f(const SSAVec4f &new_value);
void store_unaligned_vec4f(const SSAVec4f &new_value);
llvm::Value *v;
};

View File

@ -0,0 +1,25 @@
#include "ssa_for_block.h"
#include "ssa_scope.h"
SSAForBlock::SSAForBlock()
: if_basic_block(0), loop_basic_block(0), end_basic_block(0)
{
if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forbegin", SSAScope::builder().GetInsertBlock()->getParent());
loop_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forloop", SSAScope::builder().GetInsertBlock()->getParent());
end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forend", SSAScope::builder().GetInsertBlock()->getParent());
SSAScope::builder().CreateBr(if_basic_block);
SSAScope::builder().SetInsertPoint(if_basic_block);
}
void SSAForBlock::loop_block(SSABool true_condition)
{
SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block);
SSAScope::builder().SetInsertPoint(loop_basic_block);
}
void SSAForBlock::end_block()
{
SSAScope::builder().CreateBr(if_basic_block);
SSAScope::builder().SetInsertPoint(end_basic_block);
}

View File

@ -0,0 +1,18 @@
#pragma once
#include "ssa_bool.h"
#include "r_compiler/llvm_include.h"
class SSAForBlock
{
public:
SSAForBlock();
void loop_block(SSABool true_condition);
void end_block();
private:
llvm::BasicBlock *if_basic_block;
llvm::BasicBlock *loop_basic_block;
llvm::BasicBlock *end_basic_block;
};

View File

@ -0,0 +1,55 @@
#include "ssa_function.h"
#include "ssa_int.h"
#include "ssa_scope.h"
#include "ssa_value.h"
#include "r_compiler/llvm_include.h"
SSAFunction::SSAFunction(const std::string name)
: name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func()
{
}
void SSAFunction::set_return_type(llvm::Type *type)
{
return_type = type;
}
void SSAFunction::add_parameter(llvm::Type *type)
{
parameters.push_back(type);
}
void SSAFunction::create_public()
{
func = SSAScope::module()->getFunction(name.c_str());
if (func == 0)
{
llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false);
func = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name.c_str(), SSAScope::module());
//func->setCallingConv(llvm::CallingConv::X86_StdCall);
}
llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func);
SSAScope::builder().SetInsertPoint(entry);
}
void SSAFunction::create_private()
{
func = SSAScope::module()->getFunction(name.c_str());
if (func == 0)
{
llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false);
func = llvm::Function::Create(function_type, llvm::Function::PrivateLinkage, name.c_str(), SSAScope::module());
func->addFnAttr(llvm::Attribute::AlwaysInline);
}
llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func);
SSAScope::builder().SetInsertPoint(entry);
}
SSAValue SSAFunction::parameter(int index)
{
llvm::Function::arg_iterator arg_it = func->arg_begin();
for (int i = 0; i < index; i++)
++arg_it;
return SSAValue::from_llvm(static_cast<llvm::Argument*>(arg_it));
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <string>
#include <vector>
namespace llvm { class Value; }
namespace llvm { class Type; }
namespace llvm { class Function; }
class SSAInt;
class SSAValue;
class SSAFunction
{
public:
SSAFunction(const std::string name);
void set_return_type(llvm::Type *type);
void add_parameter(llvm::Type *type);
void create_public();
void create_private();
SSAValue parameter(int index);
llvm::Function *func;
private:
std::string name;
llvm::Type *return_type;
std::vector<llvm::Type *> parameters;
};

View File

@ -0,0 +1,30 @@
#include "ssa_if_block.h"
#include "ssa_scope.h"
SSAIfBlock::SSAIfBlock()
: if_basic_block(0), else_basic_block(0), end_basic_block(0)
{
}
void SSAIfBlock::if_block(SSABool true_condition)
{
if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "if", SSAScope::builder().GetInsertBlock()->getParent());
else_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "else", SSAScope::builder().GetInsertBlock()->getParent());
end_basic_block = else_basic_block;
SSAScope::builder().CreateCondBr(true_condition.v, if_basic_block, else_basic_block);
SSAScope::builder().SetInsertPoint(if_basic_block);
}
void SSAIfBlock::else_block()
{
end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "end", SSAScope::builder().GetInsertBlock()->getParent());
SSAScope::builder().CreateBr(end_basic_block);
SSAScope::builder().SetInsertPoint(else_basic_block);
}
void SSAIfBlock::end_block()
{
SSAScope::builder().CreateBr(end_basic_block);
SSAScope::builder().SetInsertPoint(end_basic_block);
}

View File

@ -0,0 +1,46 @@
#pragma once
#include "ssa_bool.h"
#include "ssa_phi.h"
#include "r_compiler/llvm_include.h"
class SSAIfBlock
{
public:
SSAIfBlock();
void if_block(SSABool true_condition);
void else_block();
void end_block();
private:
llvm::BasicBlock *if_basic_block;
llvm::BasicBlock *else_basic_block;
llvm::BasicBlock *end_basic_block;
};
template<typename T>
T ssa_min(T a, T b)
{
SSAPhi<T> phi;
SSAIfBlock if_block;
if_block.if_block(a <= b);
phi.add_incoming(a);
if_block.else_block();
phi.add_incoming(b);
if_block.end_block();
return phi.create();
}
template<typename T>
T ssa_max(T a, T b)
{
SSAPhi<T> phi;
SSAIfBlock if_block;
if_block.if_block(a >= b);
phi.add_incoming(a);
if_block.else_block();
phi.add_incoming(b);
if_block.end_block();
return phi.create();
}

View File

@ -0,0 +1,117 @@
#include "ssa_int.h"
#include "ssa_float.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAInt::SSAInt()
: v(0)
{
}
SSAInt::SSAInt(int constant)
: v(0)
{
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true));
}
SSAInt::SSAInt(SSAFloat f)
: v(0)
{
v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint());
}
SSAInt::SSAInt(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAInt::llvm_type()
{
return llvm::Type::getInt32Ty(SSAScope::context());
}
SSAInt operator+(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAInt operator-(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAInt operator*(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
SSAInt operator/(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint()));
}
SSAInt operator%(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint()));
}
SSAInt operator+(int a, const SSAInt &b)
{
return SSAInt(a) + b;
}
SSAInt operator-(int a, const SSAInt &b)
{
return SSAInt(a) - b;
}
SSAInt operator*(int a, const SSAInt &b)
{
return SSAInt(a) * b;
}
SSAInt operator/(int a, const SSAInt &b)
{
return SSAInt(a) / b;
}
SSAInt operator%(int a, const SSAInt &b)
{
return SSAInt(a) % b;
}
SSAInt operator+(const SSAInt &a, int b)
{
return a + SSAInt(b);
}
SSAInt operator-(const SSAInt &a, int b)
{
return a - SSAInt(b);
}
SSAInt operator*(const SSAInt &a, int b)
{
return a * SSAInt(b);
}
SSAInt operator/(const SSAInt &a, int b)
{
return a / SSAInt(b);
}
SSAInt operator%(const SSAInt &a, int b)
{
return a % SSAInt(b);
}
SSAInt operator<<(const SSAInt &a, int bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint()));
}
SSAInt operator>>(const SSAInt &a, int bits)
{
return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint()));
}

View File

@ -0,0 +1,41 @@
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAFloat;
class SSAInt
{
public:
SSAInt();
SSAInt(int constant);
SSAInt(SSAFloat f);
explicit SSAInt(llvm::Value *v);
static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
};
SSAInt operator+(const SSAInt &a, const SSAInt &b);
SSAInt operator-(const SSAInt &a, const SSAInt &b);
SSAInt operator*(const SSAInt &a, const SSAInt &b);
SSAInt operator/(const SSAInt &a, const SSAInt &b);
SSAInt operator%(const SSAInt &a, const SSAInt &b);
SSAInt operator+(int a, const SSAInt &b);
SSAInt operator-(int a, const SSAInt &b);
SSAInt operator*(int a, const SSAInt &b);
SSAInt operator/(int a, const SSAInt &b);
SSAInt operator%(int a, const SSAInt &b);
SSAInt operator+(const SSAInt &a, int b);
SSAInt operator-(const SSAInt &a, int b);
SSAInt operator*(const SSAInt &a, int b);
SSAInt operator/(const SSAInt &a, int b);
SSAInt operator%(const SSAInt &a, int b);
SSAInt operator<<(const SSAInt &a, int bits);
SSAInt operator>>(const SSAInt &a, int bits);

View File

@ -0,0 +1,58 @@
#include "ssa_int_ptr.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAIntPtr::SSAIntPtr()
: v(0)
{
}
SSAIntPtr::SSAIntPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAIntPtr::llvm_type()
{
return llvm::Type::getInt32PtrTy(SSAScope::context());
}
SSAIntPtr SSAIntPtr::operator[](SSAInt index) const
{
return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAInt SSAIntPtr::load() const
{
return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
}
SSAVec4i SSAIntPtr::load_vec4i() const
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint()));
}
SSAVec4i SSAIntPtr::load_unaligned_vec4i() const
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
}
void SSAIntPtr::store(const SSAInt &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
}
void SSAIntPtr::store_vec4i(const SSAVec4i &new_value)
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16);
}
void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value)
{
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()));
}

View File

@ -0,0 +1,27 @@
#pragma once
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_vec4i.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAIntPtr
{
public:
SSAIntPtr();
explicit SSAIntPtr(llvm::Value *v);
static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); }
static llvm::Type *llvm_type();
SSAIntPtr operator[](SSAInt index) const;
SSAInt load() const;
SSAVec4i load_vec4i() const;
SSAVec4i load_unaligned_vec4i() const;
void store(const SSAInt &new_value);
void store_vec4i(const SSAVec4i &new_value);
void store_unaligned_vec4i(const SSAVec4i &new_value);
llvm::Value *v;
};

View File

@ -0,0 +1,33 @@
#pragma once
#include "ssa_scope.h"
class SSAIfBlock;
template <typename SSAVariable>
class SSAPhi
{
public:
void add_incoming(SSAVariable var)
{
incoming.push_back(Incoming(var.v, SSAScope::builder().GetInsertBlock()));
}
SSAVariable create()
{
llvm::PHINode *phi_node = SSAScope::builder().CreatePHI(SSAVariable::llvm_type(), (unsigned int)incoming.size(), SSAScope::hint());
for (size_t i = 0; i < incoming.size(); i++)
phi_node->addIncoming(incoming[i].v, incoming[i].bb);
return SSAVariable::from_llvm(phi_node);
}
private:
struct Incoming
{
Incoming(llvm::Value *v, llvm::BasicBlock *bb) : v(v), bb(bb) { }
llvm::Value *v;
llvm::BasicBlock *bb;
};
std::vector<Incoming> incoming;
};

View File

@ -0,0 +1,28 @@
#pragma once
#include "ssa_int.h"
#include "ssa_float_ptr.h"
class SSAPixelFormat4f
{
public:
SSAPixelFormat4f() { }
SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAFloatPtr pixels() { return _pixels; }
SSAFloatPtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index) const
{
return _pixels[index * 4].load_vec4f();
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4f(pixel);
}
protected:
SSAFloatPtr _pixels;
};

View File

@ -0,0 +1,28 @@
#pragma once
#include "ssa_int.h"
#include "ssa_ubyte_ptr.h"
class SSAPixelFormat4ub
{
public:
SSAPixelFormat4ub() { }
SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index) const
{
return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f);
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f));
}
private:
SSAUBytePtr _pixels;
};

View File

@ -0,0 +1,35 @@
#pragma once
#include "ssa_int.h"
#include "ssa_ubyte_ptr.h"
class SSAPixelFormat4ub_argb_rev
{
public:
SSAPixelFormat4ub_argb_rev() { }
SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
/*
void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const
{
SSAVec8s p = _pixels[index * 4].load_vec8s();
out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3);
}
*/
SSAVec4f get4f(SSAInt index) const
{
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3);
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3)));
}
public:
SSAUBytePtr _pixels;
};

View File

@ -0,0 +1,28 @@
#pragma once
#include "ssa_int.h"
#include "ssa_ubyte_ptr.h"
class SSAPixelFormat4ub_rev
{
public:
SSAPixelFormat4ub_rev() { }
SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { }
SSAUBytePtr pixels() { return _pixels; }
SSAUBytePtr pixels() const { return _pixels; }
SSAVec4f get4f(SSAInt index) const
{
return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0);
}
void set4f(SSAInt index, const SSAVec4f &pixel)
{
_pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0)));
}
public:
SSAUBytePtr _pixels;
};

View File

@ -0,0 +1,39 @@
#pragma once
#include "ssa_ubyte.h"
#include "ssa_ubyte_ptr.h"
#include "ssa_float.h"
#include "ssa_float_ptr.h"
#include "ssa_int.h"
#include "ssa_pixeltype.h"
//#include "ssa_pixelformat1f.h"
//#include "ssa_pixelformat2f.h"
//#include "ssa_pixelformat3f.h"
#include "ssa_pixelformat4f.h"
//#include "ssa_pixelformat1ub.h"
//#include "ssa_pixelformat2ub.h"
//#include "ssa_pixelformat3ub.h"
//#include "ssa_pixelformat3ub_rev.h"
#include "ssa_pixelformat4ub.h"
//#include "ssa_pixelformat4ub_argb.h"
#include "ssa_pixelformat4ub_rev.h"
#include "ssa_pixelformat4ub_argb_rev.h"
//#include "ssa_pixelformat4ub_channel.h"
//typedef SSAPixelType<SSAPixelFormat1f, SSAFloatPtr> SSAPixels1f;
//typedef SSAPixelType<SSAPixelFormat2f, SSAFloatPtr> SSAPixels2f;
//typedef SSAPixelType<SSAPixelFormat3f, SSAFloatPtr> SSAPixels3f;
typedef SSAPixelType<SSAPixelFormat4f, SSAFloatPtr> SSAPixels4f;
//typedef SSAPixelType<SSAPixelFormat1ub, SSAUBytePtr> SSAPixels1ub;
//typedef SSAPixelType<SSAPixelFormat2ub, SSAUBytePtr> SSAPixels2ub;
//typedef SSAPixelType<SSAPixelFormat3ub, SSAUBytePtr> SSAPixels3ub;
typedef SSAPixelType<SSAPixelFormat4ub, SSAUBytePtr> SSAPixels4ub;
//typedef SSAPixelType<SSAPixelFormat4ub_argb, SSAUBytePtr> SSAPixels4ub_argb;
//typedef SSAPixelType<SSAPixelFormat3ub_rev, SSAUBytePtr> SSAPixels3ub_rev;
typedef SSAPixelType<SSAPixelFormat4ub_rev, SSAUBytePtr> SSAPixels4ub_rev;
typedef SSAPixelType<SSAPixelFormat4ub_argb_rev, SSAUBytePtr> SSAPixels4ub_argb_rev;
//typedef SSAPixelType<SSAPixelFormat4ub_channel, SSAUBytePtr> SSAPixels4ub_channel;

View File

@ -0,0 +1,498 @@
#pragma once
#include "ssa_int.h"
#include "ssa_float.h"
#include "ssa_vec4f.h"
#include "ssa_bool.h"
#include "ssa_if_block.h"
#include "ssa_phi.h"
template<typename PixelFormat, typename PixelType>
class SSAPixelType : public PixelFormat
{
public:
SSAPixelType()
{
}
SSAPixelType(SSAInt width, SSAInt height, PixelType pixels)
: PixelFormat(pixels, width, height), _width(width), _height(height)
{
_width32 = SSAVec4i(_width);
SSAVec4i height32(_height);
_widthps = SSAVec4f(_width32);
_heightps = SSAVec4f(height32);
_width16 = SSAVec8s(_width32, _width32);
_widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4);
_widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4);
}
SSAInt width() const { return _width; }
SSAInt height() const { return _height; }
SSAInt size() const { return _width * _height; }
SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; }
SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; }
//void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); }
//void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); }
SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); }
SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); }
SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); }
SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); }
SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); }
SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); }
SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); }
SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); }
static SSAInt int_min(SSAInt a, SSAInt b)
{
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(a <= b);
phi.add_incoming(a);
branch.else_block();
phi.add_incoming(b);
branch.end_block();
return phi.create();
}
static SSAInt int_max(SSAInt a, SSAInt b)
{
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(a >= b);
phi.add_incoming(a);
branch.else_block();
phi.add_incoming(b);
branch.end_block();
return phi.create();
}
static SSAInt clamp(SSAInt v, SSAInt size)
{
return int_max(int_min(v, size - 1), 0);
}
static SSAInt repeat(SSAInt v, SSAInt size)
{
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v >= 0);
phi.add_incoming(v % size);
branch.else_block();
phi.add_incoming(size - 1 + v % size);
branch.end_block();
return phi.create();
}
static SSAInt mirror(SSAInt v, SSAInt size)
{
SSAInt size2 = size * 2;
v = repeat(v, size2);
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v < size);
phi.add_incoming(v);
branch.else_block();
phi.add_incoming(size2 - v - 1);
branch.end_block();
return phi.create();
}
static SSAInt round(SSAFloat v)
{
SSAPhi<SSAFloat> phi;
SSAIfBlock branch;
branch.if_block(v >= 0.0f);
phi.add_incoming(v + 0.5f);
branch.else_block();
phi.add_incoming(v - 0.5f);
branch.end_block();
return SSAInt(phi.create());
}
// To do: fix this:
static SSAInt int_floor(SSAFloat v)
{
return SSAInt(v);
}
static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); }
SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); }
void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); }
SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); }
SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); }
SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); }
SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const
{
SSAFloat a = fract(s * SSAFloat(_width) - 0.5f);
SSAFloat b = fract(t * SSAFloat(_height) - 0.5f);
SSAFloat inv_a = 1.0f - a;
SSAFloat inv_b = 1.0f - b;
return
samples[0] * (inv_a * inv_b) +
samples[1] * (a * inv_b) +
samples[2] * (inv_a * b) +
samples[3] * (a * b);
}
void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
{
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
out_pixels[0] = get_clamp4f(x, y);
out_pixels[1] = get_clamp4f(x + 1, y);
out_pixels[2] = get_clamp4f(x, y + 1);
out_pixels[3] = get_clamp4f(x + 1, y + 1);
/*
SSAInt x0 = clamp_x(x);
SSAInt x1 = clamp_x(x + 1);
SSAInt y0 = clamp_y(y);
SSAInt y1 = clamp_y(y + 1);
SSAInt offset0 = y0 * _width;
SSAInt offset1 = y1 * _width;
SSAPhi<SSAVec4f> phi0;
SSAPhi<SSAVec4f> phi1;
SSAPhi<SSAVec4f> phi2;
SSAPhi<SSAVec4f> phi3;
SSAIfBlock if0;
if0.if_block(x0 + 1 == x1);
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
if0.else_block();
phi0.add_incoming(PixelFormat::get4f(x0 + offset0));
phi1.add_incoming(PixelFormat::get4f(x1 + offset0));
phi2.add_incoming(PixelFormat::get4f(x0 + offset1));
phi3.add_incoming(PixelFormat::get4f(x1 + offset1));
if0.end_block();
out_pixels[0] = phi0.create();
out_pixels[1] = phi1.create();
out_pixels[2] = phi2.create();
out_pixels[3] = phi3.create();
*/
}
void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
{
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
out_pixels[0] = get_repeat4f(x, y);
out_pixels[1] = get_repeat4f(x + 1, y);
out_pixels[2] = get_repeat4f(x, y + 1);
out_pixels[3] = get_repeat4f(x + 1, y + 1);
}
void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const
{
SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f);
SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f);
out_pixels[0] = get_mirror4f(x, y);
out_pixels[1] = get_mirror4f(x + 1, y);
out_pixels[2] = get_mirror4f(x, y + 1);
out_pixels[3] = get_mirror4f(x + 1, y + 1);
}
SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); }
SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); }
SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); }
SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const
{
SSAVec4f samples[4];
gather_clamp4f(s, t, samples);
return linear_interpolate4f(s, t, samples);
}
SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const
{
SSAVec4f samples[4];
gather_repeat4f(s, t, samples);
return linear_interpolate4f(s, t, samples);
}
SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const
{
SSAVec4f samples[4];
gather_mirror4f(s, t, samples);
return linear_interpolate4f(s, t, samples);
}
/////////////////////////////////////////////////////////////////////////
// Packed versions:
SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); }
SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); }
SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); }
SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); }
SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); }
SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); }
SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); }
SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); }
static SSAVec4i clamp(SSAVec4i v, SSAInt size)
{
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0);
}
static SSAVec4i repeat(SSAVec4i v, SSAInt size)
{
return clamp(v, size);
/*SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v >= 0);
phi.add_incoming(v % size);
branch.else_block();
phi.add_incoming(size - 1 + v % size);
branch.end_block();
return phi.create();*/
}
static SSAVec4i mirror(SSAVec4i v, SSAInt size)
{
return clamp(v, size);
/*SSAInt size2 = size * 2;
v = repeat(v, size2);
SSAPhi<SSAInt> phi;
SSAIfBlock branch;
branch.if_block(v < size);
phi.add_incoming(v);
branch.else_block();
phi.add_incoming(size2 - v - 1);
branch.end_block();
return phi.create();*/
}
static SSAVec4i round(SSAVec4f v)
{
// Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs)
SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000);
SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f)));
return v + signed_half;
}
static SSAVec4i int_floor(SSAVec4f v)
{
return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31);
}
static SSAVec4f fract(SSAVec4f v)
{
// return v - SSAVec4f::floor_sse4(v);
return v - SSAVec4f(int_floor(v));
}
template<typename WrapXFunctor, typename WrapYFunctor>
SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
{
SSAVec4i x = int_floor(s * _widthps - 0.5f);
SSAVec4i y = int_floor(t * _heightps - 0.5f);
SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y));
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
SSAVec8s offsetlo = y16 * _width16;
SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x;
return PixelFormat::get4f(offset[index]);
}
SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
/*
return nearest_helper4f(
s, t, index,
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
*/
}
SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
/*
return nearest_helper4f(
s, t, index,
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
*/
}
SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
return nearest_helper4f(s, t, index, WrapX(this), WrapY(this));
/*
return nearest_helper4f(
s, t, index,
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
*/
}
template<typename WrapXFunctor, typename WrapYFunctor>
void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const
{
SSAVec4i x = int_floor(s * _widthps - 0.5f);
SSAVec4i y = int_floor(t * _heightps - 0.5f);
SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y));
SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16);
SSAVec8s offsetlo = y16 * _width16;
SSAVec4i x0 = wrap_x(x);
SSAVec4i x1 = wrap_x(x + 1);
SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi);
SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi);
SSAVec4i offset0 = x0 + line0;
SSAVec4i offset1 = x1 + line0;
SSAVec4i offset2 = x0 + line1;
SSAVec4i offset3 = x1 + line1;
out_pixels[0] = PixelFormat::get4f(offset0[index]);
out_pixels[1] = PixelFormat::get4f(offset1[index]);
out_pixels[2] = PixelFormat::get4f(offset2[index]);
out_pixels[3] = PixelFormat::get4f(offset3[index]);
}
void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; };
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
/*
gather_helper4f(
s, t, index, out_pixels,
[this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); });
*/
}
void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; };
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
/*
gather_helper4f(
s, t, index, out_pixels,
[this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); });
*/
}
void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const
{
struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; };
struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; };
return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this));
/*
gather_helper4f(
s, t, index, out_pixels,
[this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); },
[this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); });
*/
}
SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const
{
SSAScopeHint hint("linearclamp");
SSAVec4f samples[4];
gather_clamp4f(s, t, index, samples);
return linear_interpolate4f(s, t, index, samples);
}
SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const
{
SSAVec4f samples[4];
gather_repeat4f(s, t, index, samples);
return linear_interpolate4f(s, t, index, samples);
}
SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const
{
SSAVec4f samples[4];
gather_mirror4f(s, t, index, samples);
return linear_interpolate4f(s, t, index, samples);
}
SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const
{
SSAVec4f a = fract(s * _widthps - 0.5f);
SSAVec4f b = fract(t * _heightps - 0.5f);
SSAVec4f inv_a = 1.0f - a;
SSAVec4f inv_b = 1.0f - b;
return
samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) +
samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) +
samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) +
samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index);
}
/////////////////////////////////////////////////////////////////////////
SSAVec4i clamp(SSAVec4i sstt) const
{
return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0);
}
template<typename WrapFunctor>
void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const
{
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1));
SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7);
SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3);
out_pixels[0] = PixelFormat::get4f(offsets[0]);
out_pixels[1] = PixelFormat::get4f(offsets[1]);
out_pixels[2] = PixelFormat::get4f(offsets[2]);
out_pixels[3] = PixelFormat::get4f(offsets[3]);
}
void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const
{
struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; };
return gather_helper4f(st, out_pixels, Wrap(this));
}
SSAVec4f linear_clamp4f(SSAVec4f st) const
{
SSAScopeHint hint("linearclamp");
SSAVec4f samples[4];
gather_clamp4f(st, samples);
return linear_interpolate4f(st, samples);
}
SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const
{
SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1);
SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f);
SSAVec4f inv_aabb = 1.0f - aabb;
SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6);
SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0);
return
samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) +
samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) +
samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) +
samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0);
}
public:
SSAInt _width;
SSAInt _height;
SSAVec4i _width32;
SSAVec8s _width16;
SSAVec4f _widthps;
SSAVec4f _heightps;
SSAVec4i _widthheight;
SSAVec4f _widthheightps;
};

View File

@ -0,0 +1,65 @@
#include "ssa_scope.h"
#include "ssa_int.h"
SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder)
: _context(context), _module(module), _builder(builder)
{
instance = this;
}
SSAScope::~SSAScope()
{
instance = 0;
}
llvm::LLVMContext &SSAScope::context()
{
return *instance->_context;
}
llvm::Module *SSAScope::module()
{
return instance->_module;
}
llvm::IRBuilder<> &SSAScope::builder()
{
return *instance->_builder;
}
llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types)
{
llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id));
if (func == 0)
func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module());
return func;
}
llvm::Value *SSAScope::alloca(llvm::Type *type)
{
return alloca(type, SSAInt(1));
}
llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size)
{
// Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work
llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock();
llvm::IRBuilder<> alloca_builder(&entry, entry.begin());
return alloca_builder.CreateAlloca(type, size.v, hint());
}
const std::string &SSAScope::hint()
{
return instance->_hint;
}
void SSAScope::set_hint(const std::string &new_hint)
{
if (new_hint.empty())
instance->_hint = "tmp";
else
instance->_hint = new_hint;
}
SSAScope *SSAScope::instance = 0;

View File

@ -0,0 +1,41 @@
#pragma once
#include "r_compiler/llvm_include.h"
class SSAInt;
class SSAScope
{
public:
SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder);
~SSAScope();
static llvm::LLVMContext &context();
static llvm::Module *module();
static llvm::IRBuilder<> &builder();
static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef<llvm::Type *> parameter_types = llvm::ArrayRef<llvm::Type*>());
static llvm::Value *alloca(llvm::Type *type);
static llvm::Value *alloca(llvm::Type *type, SSAInt size);
static const std::string &hint();
static void set_hint(const std::string &hint);
private:
static SSAScope *instance;
llvm::LLVMContext *_context;
llvm::Module *_module;
llvm::IRBuilder<> *_builder;
std::string _hint;
};
class SSAScopeHint
{
public:
SSAScopeHint() : old_hint(SSAScope::hint()) { }
SSAScopeHint(const std::string &hint) : old_hint(SSAScope::hint()) { SSAScope::set_hint(hint); }
~SSAScopeHint() { SSAScope::set_hint(old_hint); }
void set(const std::string &hint) { SSAScope::set_hint(hint); }
void clear() { SSAScope::set_hint(old_hint); }
private:
std::string old_hint;
};

View File

@ -0,0 +1,25 @@
#pragma once
template<typename SSAVariable>
class SSAStack
{
public:
SSAStack()
: v(0)
{
v = SSAScope::alloca(SSAVariable::llvm_type());
}
SSAVariable load() const
{
return SSAVariable::from_llvm(SSAScope::builder().CreateLoad(v, SSAScope::hint()));
}
void store(const SSAVariable &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v);
}
llvm::Value *v;
};

View File

@ -0,0 +1,18 @@
#include "ssa_struct_type.h"
#include "ssa_scope.h"
void SSAStructType::add_parameter(llvm::Type *type)
{
elements.push_back(type);
}
llvm::Type *SSAStructType::llvm_type()
{
return llvm::StructType::get(SSAScope::context(), elements, false);
}
llvm::Type *SSAStructType::llvm_type_packed()
{
return llvm::StructType::get(SSAScope::context(), elements, true);
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <vector>
namespace llvm { class Type; }
class SSAStructType
{
public:
void add_parameter(llvm::Type *type);
llvm::Type *llvm_type();
llvm::Type *llvm_type_packed();
private:
std::vector<llvm::Type *> elements;
};

View File

@ -0,0 +1,95 @@
#include "ssa_ubyte.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAUByte::SSAUByte()
: v(0)
{
}
SSAUByte::SSAUByte(unsigned char constant)
: v(0)
{
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false));
}
SSAUByte::SSAUByte(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAUByte::llvm_type()
{
return llvm::Type::getInt8Ty(SSAScope::context());
}
SSAUByte operator+(const SSAUByte &a, const SSAUByte &b)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAUByte operator-(const SSAUByte &a, const SSAUByte &b)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAUByte operator*(const SSAUByte &a, const SSAUByte &b)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
/*
SSAUByte operator/(const SSAUByte &a, const SSAUByte &b)
{
return SSAScope::builder().CreateDiv(a.v, b.v);
}
*/
SSAUByte operator+(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) + b;
}
SSAUByte operator-(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) - b;
}
SSAUByte operator*(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) * b;
}
/*
SSAUByte operator/(unsigned char a, const SSAUByte &b)
{
return SSAUByte(a) / b;
}
*/
SSAUByte operator+(const SSAUByte &a, unsigned char b)
{
return a + SSAUByte(b);
}
SSAUByte operator-(const SSAUByte &a, unsigned char b)
{
return a - SSAUByte(b);
}
SSAUByte operator*(const SSAUByte &a, unsigned char b)
{
return a * SSAUByte(b);
}
/*
SSAUByte operator/(const SSAUByte &a, unsigned char b)
{
return a / SSAUByte(b);
}
*/
SSAUByte operator<<(const SSAUByte &a, unsigned char bits)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateShl(a.v, bits));
}
SSAUByte operator>>(const SSAUByte &a, unsigned char bits)
{
return SSAUByte::from_llvm(SSAScope::builder().CreateLShr(a.v, bits));
}

View File

@ -0,0 +1,35 @@
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAUByte
{
public:
SSAUByte();
SSAUByte(unsigned char constant);
explicit SSAUByte(llvm::Value *v);
static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
};
SSAUByte operator+(const SSAUByte &a, const SSAUByte &b);
SSAUByte operator-(const SSAUByte &a, const SSAUByte &b);
SSAUByte operator*(const SSAUByte &a, const SSAUByte &b);
//SSAUByte operator/(const SSAUByte &a, const SSAUByte &b);
SSAUByte operator+(unsigned char a, const SSAUByte &b);
SSAUByte operator-(unsigned char a, const SSAUByte &b);
SSAUByte operator*(unsigned char a, const SSAUByte &b);
//SSAUByte operator/(unsigned char a, const SSAUByte &b);
SSAUByte operator+(const SSAUByte &a, unsigned char b);
SSAUByte operator-(const SSAUByte &a, unsigned char b);
SSAUByte operator*(const SSAUByte &a, unsigned char b);
//SSAUByte operator/(const SSAUByte &a, unsigned char b);
SSAUByte operator<<(const SSAUByte &a, unsigned char bits);
SSAUByte operator>>(const SSAUByte &a, unsigned char bits);

View File

@ -0,0 +1,106 @@
#include "ssa_ubyte_ptr.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAUBytePtr::SSAUBytePtr()
: v(0)
{
}
SSAUBytePtr::SSAUBytePtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAUBytePtr::llvm_type()
{
return llvm::Type::getInt8PtrTy(SSAScope::context());
}
SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const
{
return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAUByte SSAUBytePtr::load() const
{
return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
}
SSAVec4i SSAUBytePtr::load_vec4ub() const
{
// _mm_cvtsi32_si128 as implemented by clang:
SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint()));
llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint());
SSAVec4i v4i = SSAVec4i::from_llvm(v);
SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8
return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16
/*
llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo();
llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4);
llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint());
return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type));
*/
}
SSAVec16ub SSAUBytePtr::load_vec16ub() const
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint()));
}
SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint()));
}
void SSAUBytePtr::store(const SSAUByte &new_value)
{
SSAScope::builder().CreateStore(new_value.v, v, false);
}
void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
{
// Store using saturate:
SSAVec8s v8s(new_value, new_value);
SSAVec16ub v16ub(v8s, v8s);
llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16);
llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo();
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint());
SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false);
}
void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
{
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16);
// The following generates _mm_stream_si128, maybe!
// llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1));
// inst->setMetadata(SSAScope::module()->getMDKindID("nontemporal"), node);
}
void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value)
{
/*llvm::Value *values[2] =
{
SSAScope::builder().CreateBitCast(v, llvm::Type::getInt8PtrTy(SSAScope::context())),
new_value.v
};
SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_dq), values);*/
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()));
}

View File

@ -0,0 +1,32 @@
#pragma once
#include "ssa_ubyte.h"
#include "ssa_int.h"
#include "ssa_vec4i.h"
#include "ssa_vec8s.h"
#include "ssa_vec16ub.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAUBytePtr
{
public:
SSAUBytePtr();
explicit SSAUBytePtr(llvm::Value *v);
static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); }
static llvm::Type *llvm_type();
SSAUBytePtr operator[](SSAInt index) const;
SSAUByte load() const;
SSAVec4i load_vec4ub() const;
SSAVec8s load_vec8s() const;
SSAVec16ub load_vec16ub() const;
SSAVec16ub load_unaligned_vec16ub() const;
void store(const SSAUByte &new_value);
void store_vec4ub(const SSAVec4i &new_value);
void store_vec16ub(const SSAVec16ub &new_value);
void store_unaligned_vec16ub(const SSAVec16ub &new_value);
llvm::Value *v;
};

View File

@ -0,0 +1,56 @@
#include "ssa_value.h"
#include "ssa_int.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAValue SSAValue::load()
{
return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false));
}
void SSAValue::store(llvm::Value *value)
{
SSAScope::builder().CreateStore(value, v, false);
}
SSAIndexLookup SSAValue::operator[](int index)
{
SSAIndexLookup result;
result.v = v;
result.indexes.push_back(SSAInt(index).v);
return result;
}
SSAIndexLookup SSAValue::operator[](SSAInt index)
{
SSAIndexLookup result;
result.v = v;
result.indexes.push_back(index.v);
return result;
}
/////////////////////////////////////////////////////////////////////////////
SSAIndexLookup::operator SSAValue()
{
return SSAValue::from_llvm(SSAScope::builder().CreateGEP(v, indexes));
}
SSAIndexLookup SSAIndexLookup::operator[](int index)
{
SSAIndexLookup result;
result.v = v;
result.indexes = indexes;
result.indexes.push_back(SSAInt(index).v);
return result;
}
SSAIndexLookup SSAIndexLookup::operator[](SSAInt index)
{
SSAIndexLookup result;
result.v = v;
result.indexes = indexes;
result.indexes.push_back(index.v);
return result;
}

View File

@ -0,0 +1,53 @@
#pragma once
#include <vector>
namespace llvm { class Value; }
class SSAInt;
class SSAIndexLookup;
class SSAValue
{
public:
SSAValue() : v(0) { }
static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; }
SSAValue load();
void store(llvm::Value *v);
template<typename Type>
operator Type()
{
return Type::from_llvm(v);
}
SSAIndexLookup operator[](int index);
SSAIndexLookup operator[](SSAInt index);
llvm::Value *v;
};
class SSAIndexLookup
{
public:
SSAIndexLookup() : v(0) { }
llvm::Value *v;
std::vector<llvm::Value *> indexes;
SSAValue load() { SSAValue value = *this; return value.load(); }
void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); }
template<typename Type>
operator Type()
{
return Type::from_llvm(v);
}
operator SSAValue();
SSAIndexLookup operator[](int index);
SSAIndexLookup operator[](SSAInt index);
};

View File

@ -0,0 +1,155 @@
#include "ssa_vec16ub.h"
#include "ssa_vec8s.h"
#include "ssa_vec4i.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAVec16ub::SSAVec16ub()
: v(0)
{
}
SSAVec16ub::SSAVec16ub(unsigned char constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false)));
v = llvm::ConstantVector::get(constants);
}
SSAVec16ub::SSAVec16ub(
unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7,
unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant0, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant1, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant2, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant3, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant4, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant5, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant6, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant7, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant8, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant9, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant10, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant11, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant12, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant13, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant14, false)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant15, false)));
v = llvm::ConstantVector::get(constants);
}
SSAVec16ub::SSAVec16ub(llvm::Value *v)
: v(v)
{
}
SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1)
: v(0)
{
llvm::Value *values[2] = { s0.v, s1.v };
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint());
}
llvm::Type *SSAVec16ub::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16);
}
SSAVec16ub SSAVec16ub::bitcast(SSAVec4i i32)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint()));
}
SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15)
{
return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7, index8, index9, index10, index11, index12, index13, index14, index15);
}
SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index8)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index9)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index10)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index11)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index12)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index13)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index14)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index15)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec16ub::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint()));
}
SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAVec16ub::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
/*
SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b)
{
return SSAScope::builder().CreateDiv(a.v, b.v, SSAScope::hint());
}
*/
SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) + b;
}
SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) - b;
}
SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) * b;
}
/*
SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b)
{
return SSAVec16ub(a) / b;
}
*/
SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b)
{
return a + SSAVec16ub(b);
}
SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b)
{
return a - SSAVec16ub(b);
}
SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b)
{
return a * SSAVec16ub(b);
}
/*
SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b)
{
return a / SSAVec16ub(b);
}
*/

View File

@ -0,0 +1,42 @@
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec8s;
class SSAVec4i;
class SSAVec16ub
{
public:
SSAVec16ub();
SSAVec16ub(unsigned char constant);
SSAVec16ub(
unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7,
unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15);
explicit SSAVec16ub(llvm::Value *v);
SSAVec16ub(SSAVec8s s0, SSAVec8s s1);
static SSAVec16ub from_llvm(llvm::Value *v) { return SSAVec16ub(v); }
static llvm::Type *llvm_type();
static SSAVec16ub bitcast(SSAVec4i i32);
static SSAVec16ub shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15);
static SSAVec16ub shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15);
llvm::Value *v;
};
SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b);
SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b);
SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b);
SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b);
SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b);
SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b);

View File

@ -0,0 +1,244 @@
#include "ssa_vec4f.h"
#include "ssa_vec4i.h"
#include "ssa_float.h"
#include "ssa_int.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAVec4f::SSAVec4f()
: v(0)
{
}
SSAVec4f::SSAVec4f(float constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4f::SSAVec4f(float constant0, float constant1, float constant2, float constant3)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant0)));
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant1)));
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant2)));
constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant3)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4f::SSAVec4f(SSAFloat f)
: v(0)
{
llvm::Type *m1xfloattype = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 1);
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(f.v, m1xfloattype, SSAScope::hint()), llvm::UndefValue::get(m1xfloattype), mask, SSAScope::hint());
}
SSAVec4f::SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3)
: v(0)
{
v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(llvm_type()), f0.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0)));
v = SSAScope::builder().CreateInsertElement(v, f1.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1)));
v = SSAScope::builder().CreateInsertElement(v, f2.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2)));
v = SSAScope::builder().CreateInsertElement(v, f3.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)3)));
}
SSAVec4f::SSAVec4f(llvm::Value *v)
: v(v)
{
}
SSAVec4f::SSAVec4f(SSAVec4i i32)
: v(0)
{
//llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4);
//return builder->CreateSIToFP(i32.v, m128type);
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint());
}
llvm::Type *SSAVec4f::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4);
}
SSAFloat SSAVec4f::operator[](SSAInt index) const
{
return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint()));
}
SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index)
{
return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index))));
}
SSAVec4f SSAVec4f::bitcast(SSAVec4i i32)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint()));
}
SSAVec4f SSAVec4f::sqrt(SSAVec4f f)
{
std::vector<llvm::Type *> params;
params.push_back(SSAVec4f::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint()));
//return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_sqrt_ps), f.v, SSAScope::hint()));
}
SSAVec4f SSAVec4f::rcp(SSAVec4f f)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rcp_ps), f.v, SSAScope::hint()));
}
SSAVec4f SSAVec4f::sin(SSAVec4f val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAVec4f::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint()));
}
SSAVec4f SSAVec4f::cos(SSAVec4f val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAVec4f::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint()));
}
SSAVec4f SSAVec4f::pow(SSAVec4f val, SSAVec4f power)
{
std::vector<llvm::Type *> params;
params.push_back(SSAVec4f::llvm_type());
//params.push_back(SSAVec4f::llvm_type());
std::vector<llvm::Value*> args;
args.push_back(val.v);
args.push_back(power.v);
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint()));
}
SSAVec4f SSAVec4f::exp(SSAVec4f val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAVec4f::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint()));
}
SSAVec4f SSAVec4f::log(SSAVec4f val)
{
std::vector<llvm::Type *> params;
params.push_back(SSAVec4f::llvm_type());
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint()));
}
SSAVec4f SSAVec4f::fma(SSAVec4f a, SSAVec4f b, SSAVec4f c)
{
std::vector<llvm::Type *> params;
params.push_back(SSAVec4f::llvm_type());
//params.push_back(SSAVec4f::llvm_type());
//params.push_back(SSAVec4f::llvm_type());
std::vector<llvm::Value*> args;
args.push_back(a.v);
args.push_back(b.v);
args.push_back(c.v);
return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint()));
}
void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3)
{
SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0));
SSAVec4f tmp2 = shuffle(row0, row1, 0xEE);//_MM_SHUFFLE(3,2,3,2));
SSAVec4f tmp1 = shuffle(row2, row3, 0x44);//_MM_SHUFFLE(1,0,1,0));
SSAVec4f tmp3 = shuffle(row2, row3, 0xEE);//_MM_SHUFFLE(3,2,3,2));
row0 = shuffle(tmp0, tmp1, 0x88);//_MM_SHUFFLE(2,0,2,0));
row1 = shuffle(tmp0, tmp1, 0xDD);//_MM_SHUFFLE(3,1,3,1));
row2 = shuffle(tmp2, tmp3, 0x88);//_MM_SHUFFLE(2,0,2,0));
row3 = shuffle(tmp2, tmp3, 0xDD);//_MM_SHUFFLE(3,1,3,1));
}
SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3)
{
return shuffle(f0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3);
}
SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec4f::from_llvm(SSAScope::builder().CreateShuffleVector(f0.v, f1.v, mask, SSAScope::hint()));
}
SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask)
{
return shuffle(f0, f1, mask & 3, (mask >> 2) & 3, ((mask >> 4) & 3) + 4, ((mask >> 6) & 3) + 4);
}
SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b)
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint()));
}
SSAVec4f operator+(float a, const SSAVec4f &b)
{
return SSAVec4f(a) + b;
}
SSAVec4f operator-(float a, const SSAVec4f &b)
{
return SSAVec4f(a) - b;
}
SSAVec4f operator*(float a, const SSAVec4f &b)
{
return SSAVec4f(a) * b;
}
SSAVec4f operator/(float a, const SSAVec4f &b)
{
return SSAVec4f(a) / b;
}
SSAVec4f operator+(const SSAVec4f &a, float b)
{
return a + SSAVec4f(b);
}
SSAVec4f operator-(const SSAVec4f &a, float b)
{
return a - SSAVec4f(b);
}
SSAVec4f operator*(const SSAVec4f &a, float b)
{
return a * SSAVec4f(b);
}
SSAVec4f operator/(const SSAVec4f &a, float b)
{
return a / SSAVec4f(b);
}

View File

@ -0,0 +1,57 @@
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4i;
class SSAFloat;
class SSAInt;
class SSAVec4f
{
public:
SSAVec4f();
SSAVec4f(float constant);
SSAVec4f(float constant0, float constant1, float constant2, float constant3);
SSAVec4f(SSAFloat f);
SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3);
explicit SSAVec4f(llvm::Value *v);
SSAVec4f(SSAVec4i i32);
SSAFloat operator[](SSAInt index) const;
static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index);
static SSAVec4f bitcast(SSAVec4i i32);
static SSAVec4f sqrt(SSAVec4f f);
static SSAVec4f rcp(SSAVec4f f);
static SSAVec4f sin(SSAVec4f val);
static SSAVec4f cos(SSAVec4f val);
static SSAVec4f pow(SSAVec4f val, SSAVec4f power);
static SSAVec4f exp(SSAVec4f val);
static SSAVec4f log(SSAVec4f val);
static SSAVec4f fma(SSAVec4f a, SSAVec4f b, SSAVec4f c);
static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3);
static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3);
static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3);
static SSAVec4f from_llvm(llvm::Value *v) { return SSAVec4f(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
private:
static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask);
};
SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b);
SSAVec4f operator+(float a, const SSAVec4f &b);
SSAVec4f operator-(float a, const SSAVec4f &b);
SSAVec4f operator*(float a, const SSAVec4f &b);
SSAVec4f operator/(float a, const SSAVec4f &b);
SSAVec4f operator+(const SSAVec4f &a, float b);
SSAVec4f operator-(const SSAVec4f &a, float b);
SSAVec4f operator*(const SSAVec4f &a, float b);
SSAVec4f operator/(const SSAVec4f &a, float b);

View File

@ -0,0 +1,50 @@
#include "ssa_vec4f_ptr.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAVec4fPtr::SSAVec4fPtr()
: v(0)
{
}
SSAVec4fPtr::SSAVec4fPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAVec4fPtr::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
}
SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const
{
return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAVec4f SSAVec4fPtr::load() const
{
return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
}
SSAVec4f SSAVec4fPtr::load_unaligned() const
{
return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint()));
}
void SSAVec4fPtr::store(const SSAVec4f &new_value)
{
SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false);
}
void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value)
{
/*llvm::Value *values[2] =
{
SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())),
new_value.v
};
SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/
SSAScope::builder().CreateStore(new_value.v, v, false);
}

View File

@ -0,0 +1,24 @@
#pragma once
#include "ssa_int.h"
#include "ssa_vec4f.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4fPtr
{
public:
SSAVec4fPtr();
explicit SSAVec4fPtr(llvm::Value *v);
static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); }
static llvm::Type *llvm_type();
SSAVec4fPtr operator[](SSAInt index) const;
SSAVec4f load() const;
SSAVec4f load_unaligned() const;
void store(const SSAVec4f &new_value);
void store_unaligned(const SSAVec4f &new_value);
llvm::Value *v;
};

View File

@ -0,0 +1,213 @@
#include "ssa_vec4i.h"
#include "ssa_vec4f.h"
#include "ssa_vec8s.h"
#include "ssa_vec16ub.h"
#include "ssa_int.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAVec4i::SSAVec4i()
: v(0)
{
}
SSAVec4i::SSAVec4i(int constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4i::SSAVec4i(int constant0, int constant1, int constant2, int constant3)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant0, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant1, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant2, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant3, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec4i::SSAVec4i(llvm::Value *v)
: v(v)
{
}
SSAVec4i::SSAVec4i(SSAInt i)
: v(0)
{
llvm::Type *m1xi32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 1);
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint());
}
SSAVec4i::SSAVec4i(SSAVec4f f32)
: v(0)
{
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint());
}
SSAInt SSAVec4i::operator[](SSAInt index)
{
return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint()));
}
llvm::Type *SSAVec4i::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4);
}
SSAVec4i SSAVec4i::bitcast(SSAVec4f f32)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint()));
}
SSAVec4i SSAVec4i::bitcast(SSAVec8s i16)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i16.v, llvm_type(), SSAScope::hint()));
}
SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, int index0, int index1, int index2, int index3)
{
return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3);
}
SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, const SSAVec4i &i1, int index0, int index1, int index2, int index3)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec4i::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint()));
}
void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3)
{
SSAVec8s low = SSAVec8s::extendlo(a);
SSAVec8s high = SSAVec8s::extendhi(a);
out0 = extendlo(low);
out1 = extendhi(low);
out2 = extendlo(high);
out3 = extendhi(high);
}
SSAVec4i SSAVec4i::extendhi(SSAVec8s i16)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16
}
SSAVec4i SSAVec4i::extendlo(SSAVec8s i16)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16
}
SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16
}
SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b)
{
return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16
}
SSAVec4i SSAVec4i::sqrt(SSAVec4i f)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_sqrt_pd), f.v, SSAScope::hint()));
}
/*
SSAVec4i SSAVec4i::min_sse41(SSAVec4i a, SSAVec4i b)
{
llvm::Value *values[2] = { a.v, b.v };
return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pminsd), values, SSAScope::hint()));
}
SSAVec4i SSAVec4i::max_sse41(SSAVec4i a, SSAVec4i b)
{
llvm::Value *values[2] = { a.v, b.v };
return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pmaxsd), values, SSAScope::hint()));
}
*/
SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint()));
}
SSAVec4i operator+(int a, const SSAVec4i &b)
{
return SSAVec4i(a) + b;
}
SSAVec4i operator-(int a, const SSAVec4i &b)
{
return SSAVec4i(a) - b;
}
SSAVec4i operator*(int a, const SSAVec4i &b)
{
return SSAVec4i(a) * b;
}
SSAVec4i operator/(int a, const SSAVec4i &b)
{
return SSAVec4i(a) / b;
}
SSAVec4i operator+(const SSAVec4i &a, int b)
{
return a + SSAVec4i(b);
}
SSAVec4i operator-(const SSAVec4i &a, int b)
{
return a - SSAVec4i(b);
}
SSAVec4i operator*(const SSAVec4i &a, int b)
{
return a * SSAVec4i(b);
}
SSAVec4i operator/(const SSAVec4i &a, int b)
{
return a / SSAVec4i(b);
}
SSAVec4i operator<<(const SSAVec4i &a, int bits)
{
//return SSAScope::builder().CreateShl(a.v, bits);
llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) };
return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint()));
}
SSAVec4i operator>>(const SSAVec4i &a, int bits)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint()));
}

View File

@ -0,0 +1,56 @@
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4f;
class SSAVec8s;
class SSAVec16ub;
class SSAInt;
class SSAVec4i
{
public:
SSAVec4i();
SSAVec4i(int constant);
SSAVec4i(int constant0, int constant1, int constant2, int constant3);
SSAVec4i(SSAInt i);
explicit SSAVec4i(llvm::Value *v);
SSAVec4i(SSAVec4f f32);
SSAInt operator[](SSAInt index);
static SSAVec4i bitcast(SSAVec4f f32);
static SSAVec4i bitcast(SSAVec8s i16);
static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3);
static SSAVec4i shuffle(const SSAVec4i &f0, const SSAVec4i &f1, int index0, int index1, int index2, int index3);
static SSAVec4i extendhi(SSAVec8s i16);
static SSAVec4i extendlo(SSAVec8s i16);
static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3);
static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1);
static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1);
static SSAVec4i sqrt(SSAVec4i f);
//static SSAVec4i min_sse41(SSAVec4i a, SSAVec4i b);
//static SSAVec4i max_sse41(SSAVec4i a, SSAVec4i b);
static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
};
SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b);
SSAVec4i operator+(int a, const SSAVec4i &b);
SSAVec4i operator-(int a, const SSAVec4i &b);
SSAVec4i operator*(int a, const SSAVec4i &b);
SSAVec4i operator/(int a, const SSAVec4i &b);
SSAVec4i operator+(const SSAVec4i &a, int b);
SSAVec4i operator-(const SSAVec4i &a, int b);
SSAVec4i operator*(const SSAVec4i &a, int b);
SSAVec4i operator/(const SSAVec4i &a, int b);
SSAVec4i operator<<(const SSAVec4i &a, int bits);
SSAVec4i operator>>(const SSAVec4i &a, int bits);

View File

@ -0,0 +1,50 @@
#include "ssa_vec4i_ptr.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAVec4iPtr::SSAVec4iPtr()
: v(0)
{
}
SSAVec4iPtr::SSAVec4iPtr(llvm::Value *v)
: v(v)
{
}
llvm::Type *SSAVec4iPtr::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
}
SSAVec4iPtr SSAVec4iPtr::operator[](SSAInt index) const
{
return SSAVec4iPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint()));
}
SSAVec4i SSAVec4iPtr::load() const
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint()));
}
SSAVec4i SSAVec4iPtr::load_unaligned() const
{
return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4)));
}
void SSAVec4iPtr::store(const SSAVec4i &new_value)
{
SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false);
}
void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value)
{
/*llvm::Value *values[2] =
{
v,
new_value.v
};
SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_pd), values);*/
SSAScope::builder().CreateStore(new_value.v, v, false);
}

View File

@ -0,0 +1,24 @@
#pragma once
#include "ssa_int.h"
#include "ssa_vec4i.h"
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4iPtr
{
public:
SSAVec4iPtr();
explicit SSAVec4iPtr(llvm::Value *v);
static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); }
static llvm::Type *llvm_type();
SSAVec4iPtr operator[](SSAInt index) const;
SSAVec4i load() const;
SSAVec4i load_unaligned() const;
void store(const SSAVec4i &new_value);
void store_unaligned(const SSAVec4i &new_value);
llvm::Value *v;
};

View File

@ -0,0 +1,178 @@
#include "ssa_vec8s.h"
#include "ssa_vec4i.h"
#include "ssa_vec16ub.h"
#include "ssa_scope.h"
#include "r_compiler/llvm_include.h"
SSAVec8s::SSAVec8s()
: v(0)
{
}
SSAVec8s::SSAVec8s(short constant)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(8, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec8s::SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant0, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant1, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant2, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant3, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant4, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant5, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant6, true)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant7, true)));
v = llvm::ConstantVector::get(constants);
}
SSAVec8s::SSAVec8s(llvm::Value *v)
: v(v)
{
}
SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1)
: v(0)
{
llvm::Value *values[2] = { i0.v, i1.v };
v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint());
}
llvm::Type *SSAVec8s::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 8);
}
SSAVec8s SSAVec8s::bitcast(SSAVec16ub i8)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateBitCast(i8.v, llvm_type(), SSAScope::hint()));
}
SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7)
{
return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7);
}
SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7)
{
std::vector<llvm::Constant*> constants;
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6)));
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7)));
llvm::Value *mask = llvm::ConstantVector::get(constants);
return SSAVec8s::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint()));
}
SSAVec8s SSAVec8s::extendhi(SSAVec16ub a)
{
return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8
}
SSAVec8s SSAVec8s::extendlo(SSAVec16ub a)
{
return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8
}
/*
SSAVec8s SSAVec8s::min_sse2(SSAVec8s a, SSAVec8s b)
{
llvm::Value *values[2] = { a.v, b.v };
return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmins_w), values, SSAScope::hint()));
}
SSAVec8s SSAVec8s::max_sse2(SSAVec8s a, SSAVec8s b)
{
llvm::Value *values[2] = { a.v, b.v };
return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmaxs_w), values, SSAScope::hint()));
}
*/
SSAVec8s SSAVec8s::mulhi(SSAVec8s a, SSAVec8s b)
{
llvm::Value *values[2] = { a.v, b.v };
return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmulh_w), values, SSAScope::hint()));
}
SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint()));
}
SSAVec8s operator+(short a, const SSAVec8s &b)
{
return SSAVec8s(a) + b;
}
SSAVec8s operator-(short a, const SSAVec8s &b)
{
return SSAVec8s(a) - b;
}
SSAVec8s operator*(short a, const SSAVec8s &b)
{
return SSAVec8s(a) * b;
}
SSAVec8s operator/(short a, const SSAVec8s &b)
{
return SSAVec8s(a) / b;
}
SSAVec8s operator+(const SSAVec8s &a, short b)
{
return a + SSAVec8s(b);
}
SSAVec8s operator-(const SSAVec8s &a, short b)
{
return a - SSAVec8s(b);
}
SSAVec8s operator*(const SSAVec8s &a, short b)
{
return a * SSAVec8s(b);
}
SSAVec8s operator/(const SSAVec8s &a, short b)
{
return a / SSAVec8s(b);
}
SSAVec8s operator<<(const SSAVec8s &a, int bits)
{
//return SSAScope::builder().CreateShl(a.v, bits);
llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) };
return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint()));
}
SSAVec8s operator>>(const SSAVec8s &a, int bits)
{
return SSAVec8s::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint()));
}

View File

@ -0,0 +1,48 @@
#pragma once
namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAVec4i;
class SSAVec16ub;
class SSAVec8s
{
public:
SSAVec8s();
SSAVec8s(short constant);
SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7);
explicit SSAVec8s(llvm::Value *v);
SSAVec8s(SSAVec4i i0, SSAVec4i i1);
static SSAVec8s bitcast(SSAVec16ub i8);
static SSAVec8s shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7);
static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7);
static SSAVec8s extendhi(SSAVec16ub a);
static SSAVec8s extendlo(SSAVec16ub a);
//static SSAVec8s min_sse2(SSAVec8s a, SSAVec8s b);
//static SSAVec8s max_sse2(SSAVec8s a, SSAVec8s b);
static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b);
static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); }
static llvm::Type *llvm_type();
llvm::Value *v;
};
SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b);
SSAVec8s operator+(short a, const SSAVec8s &b);
SSAVec8s operator-(short a, const SSAVec8s &b);
SSAVec8s operator*(short a, const SSAVec8s &b);
SSAVec8s operator/(short a, const SSAVec8s &b);
SSAVec8s operator+(const SSAVec8s &a, short b);
SSAVec8s operator-(const SSAVec8s &a, short b);
SSAVec8s operator*(const SSAVec8s &a, short b);
SSAVec8s operator/(const SSAVec8s &a, short b);
SSAVec8s operator<<(const SSAVec8s &a, int bits);
SSAVec8s operator>>(const SSAVec8s &a, int bits);

View File

@ -38,6 +38,7 @@
#include "r_data/colormaps.h"
#include "r_plane.h"
#include "r_draw_rgba.h"
#include "r_compiler/fixedfunction/fixedfunction.h"
#include "gi.h"
#include "stats.h"
@ -299,6 +300,68 @@ void DrawerCommandQueue::StopThreads()
/////////////////////////////////////////////////////////////////////////////
class DrawSpanFFCommand : public DrawerCommand
{
fixed_t _xfrac;
fixed_t _yfrac;
fixed_t _xstep;
fixed_t _ystep;
int _x1;
int _x2;
int _y;
int _xbits;
int _ybits;
BYTE * RESTRICT _destorg;
const uint32_t * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
bool _nearest_filter;
uint32_t _srcalpha;
uint32_t _destalpha;
FixedFunction *_ff;
public:
DrawSpanFFCommand()
{
_xfrac = ds_xfrac;
_yfrac = ds_yfrac;
_xstep = ds_xstep;
_ystep = ds_ystep;
_x1 = ds_x1;
_x2 = ds_x2;
_y = ds_y;
_xbits = ds_xbits;
_ybits = ds_ybits;
_destorg = dc_destorg;
_source = (const uint32_t*)ds_source;
_light = LightBgra::calc_light_multiplier(ds_light);
_shade_constants = ds_shade_constants;
_nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped);
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
static FixedFunction ff;
_ff = &ff;
}
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(_y))
return;
uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg;
int count = _x2 - _x1 + 1;
_ff->DrawSpan(count, dest);
}
};
/////////////////////////////////////////////////////////////////////////////
class DrawerColumnCommand : public DrawerCommand
{
public:
@ -2700,11 +2763,14 @@ void R_DrawRevSubClampTranslatedColumn_rgba()
void R_DrawSpan_rgba()
{
DrawerCommandQueue::QueueCommand<DrawSpanFFCommand>();
/*
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanRGBA_SSE_Command>();
#endif
*/
}
void R_DrawSpanMasked_rgba()