From 3dd8b593b6a9e35fefd4ce76490f963cccc70fb4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 26 Sep 2016 09:00:19 +0200 Subject: [PATCH] Use LLVM to JIT the code for one of the drawer functions --- src/CMakeLists.txt | 86 ++ .../fixedfunction/fixedfunction.cpp | 1046 +++++++++++++++++ src/r_compiler/fixedfunction/fixedfunction.h | 130 ++ src/r_compiler/llvm_include.h | 46 + src/r_compiler/ssa/ssa_barycentric_weight.h | 97 ++ src/r_compiler/ssa/ssa_bool.cpp | 91 ++ src/r_compiler/ssa/ssa_bool.h | 37 + src/r_compiler/ssa/ssa_float.cpp | 152 +++ src/r_compiler/ssa/ssa_float.h | 42 + src/r_compiler/ssa/ssa_float_ptr.cpp | 65 + src/r_compiler/ssa/ssa_float_ptr.h | 27 + src/r_compiler/ssa/ssa_for_block.cpp | 25 + src/r_compiler/ssa/ssa_for_block.h | 18 + src/r_compiler/ssa/ssa_function.cpp | 55 + src/r_compiler/ssa/ssa_function.h | 30 + src/r_compiler/ssa/ssa_if_block.cpp | 30 + src/r_compiler/ssa/ssa_if_block.h | 46 + src/r_compiler/ssa/ssa_int.cpp | 117 ++ src/r_compiler/ssa/ssa_int.h | 41 + src/r_compiler/ssa/ssa_int_ptr.cpp | 58 + src/r_compiler/ssa/ssa_int_ptr.h | 27 + src/r_compiler/ssa/ssa_phi.h | 33 + src/r_compiler/ssa/ssa_pixelformat4f.h | 28 + src/r_compiler/ssa/ssa_pixelformat4ub.h | 28 + .../ssa/ssa_pixelformat4ub_argb_rev.h | 35 + src/r_compiler/ssa/ssa_pixelformat4ub_rev.h | 28 + src/r_compiler/ssa/ssa_pixels.h | 39 + src/r_compiler/ssa/ssa_pixeltype.h | 498 ++++++++ src/r_compiler/ssa/ssa_scope.cpp | 65 + src/r_compiler/ssa/ssa_scope.h | 41 + src/r_compiler/ssa/ssa_stack.h | 25 + src/r_compiler/ssa/ssa_struct_type.cpp | 18 + src/r_compiler/ssa/ssa_struct_type.h | 17 + src/r_compiler/ssa/ssa_ubyte.cpp | 95 ++ src/r_compiler/ssa/ssa_ubyte.h | 35 + src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 106 ++ src/r_compiler/ssa/ssa_ubyte_ptr.h | 32 + src/r_compiler/ssa/ssa_value.cpp | 56 + src/r_compiler/ssa/ssa_value.h | 53 + src/r_compiler/ssa/ssa_vec16ub.cpp | 155 +++ src/r_compiler/ssa/ssa_vec16ub.h | 42 + src/r_compiler/ssa/ssa_vec4f.cpp | 244 ++++ src/r_compiler/ssa/ssa_vec4f.h | 57 + src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 50 + src/r_compiler/ssa/ssa_vec4f_ptr.h | 24 + src/r_compiler/ssa/ssa_vec4i.cpp | 213 ++++ src/r_compiler/ssa/ssa_vec4i.h | 56 + src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 50 + src/r_compiler/ssa/ssa_vec4i_ptr.h | 24 + src/r_compiler/ssa/ssa_vec8s.cpp | 178 +++ src/r_compiler/ssa/ssa_vec8s.h | 48 + src/r_draw_rgba.cpp | 66 ++ 52 files changed, 4705 insertions(+) create mode 100644 src/r_compiler/fixedfunction/fixedfunction.cpp create mode 100644 src/r_compiler/fixedfunction/fixedfunction.h create mode 100644 src/r_compiler/llvm_include.h create mode 100644 src/r_compiler/ssa/ssa_barycentric_weight.h create mode 100644 src/r_compiler/ssa/ssa_bool.cpp create mode 100644 src/r_compiler/ssa/ssa_bool.h create mode 100644 src/r_compiler/ssa/ssa_float.cpp create mode 100644 src/r_compiler/ssa/ssa_float.h create mode 100644 src/r_compiler/ssa/ssa_float_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_float_ptr.h create mode 100644 src/r_compiler/ssa/ssa_for_block.cpp create mode 100644 src/r_compiler/ssa/ssa_for_block.h create mode 100644 src/r_compiler/ssa/ssa_function.cpp create mode 100644 src/r_compiler/ssa/ssa_function.h create mode 100644 src/r_compiler/ssa/ssa_if_block.cpp create mode 100644 src/r_compiler/ssa/ssa_if_block.h create mode 100644 src/r_compiler/ssa/ssa_int.cpp create mode 100644 src/r_compiler/ssa/ssa_int.h create mode 100644 src/r_compiler/ssa/ssa_int_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_int_ptr.h create mode 100644 src/r_compiler/ssa/ssa_phi.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4f.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_rev.h create mode 100644 src/r_compiler/ssa/ssa_pixels.h create mode 100644 src/r_compiler/ssa/ssa_pixeltype.h create mode 100644 src/r_compiler/ssa/ssa_scope.cpp create mode 100644 src/r_compiler/ssa/ssa_scope.h create mode 100644 src/r_compiler/ssa/ssa_stack.h create mode 100644 src/r_compiler/ssa/ssa_struct_type.cpp create mode 100644 src/r_compiler/ssa/ssa_struct_type.h create mode 100644 src/r_compiler/ssa/ssa_ubyte.cpp create mode 100644 src/r_compiler/ssa/ssa_ubyte.h create mode 100644 src/r_compiler/ssa/ssa_ubyte_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_ubyte_ptr.h create mode 100644 src/r_compiler/ssa/ssa_value.cpp create mode 100644 src/r_compiler/ssa/ssa_value.h create mode 100644 src/r_compiler/ssa/ssa_vec16ub.cpp create mode 100644 src/r_compiler/ssa/ssa_vec16ub.h create mode 100644 src/r_compiler/ssa/ssa_vec4f.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4f.h create mode 100644 src/r_compiler/ssa/ssa_vec4f_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4f_ptr.h create mode 100644 src/r_compiler/ssa/ssa_vec4i.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4i.h create mode 100644 src/r_compiler/ssa/ssa_vec4i_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4i_ptr.h create mode 100644 src/r_compiler/ssa/ssa_vec8s.cpp create mode 100644 src/r_compiler/ssa/ssa_vec8s.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2d71170ee..4f9599b35 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -104,6 +104,15 @@ if( WIN32 ) endif() add_definitions( -D_WIN32 ) + + set( FMOD_SEARCH_PATHS + "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" + "C:/Program Files (x86)/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" + # This next one is for Randy. + "E:/Software/Dev/FMOD/${WIN_TYPE}/api" + ) + set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) + set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) set( FMOD_SEARCH_PATHS "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" @@ -255,6 +264,57 @@ if( NOT NO_OPENAL ) endif() endif() +# C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm +find_package(LLVM REQUIRED CONFIG) +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") +message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") +llvm_map_components_to_libnames(llvm_libs + analysis + asmparser + asmprinter + bitreader + bitwriter + codegen + core + executionengine + globalisel + instcombine + ipo + irreader + linker + lto + mc + mcdisassembler + mcjit + mcparser + mirparser + object + objectyaml + orcjit + passes + scalaropts + selectiondag + support + symbolize + tablegen + target + transformutils + vectorize + x86asmparser + x86asmprinter + x86codegen + x86desc + x86info + x86utils + aarch64asmparser + aarch64asmprinter + aarch64codegen + aarch64desc + aarch64info + aarch64utils) +include_directories(${LLVM_INCLUDE_DIRS}) +set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) + if( NOT NO_FMOD ) # Search for FMOD include files if( NOT WIN32 ) @@ -843,6 +903,9 @@ file( GLOB HEADER_FILES posix/*.h posix/cocoa/*.h posix/sdl/*.h + r_compiler/*.h + r_compiler/ssa/*.h + r_compiler/fixedfunction/*.h r_data/*.h resourcefiles/*.h sfmt/*.h @@ -1372,6 +1435,26 @@ set (PCH_SOURCES fragglescript/t_spec.cpp fragglescript/t_variable.cpp fragglescript/t_cmd.cpp + r_compiler/ssa/ssa_bool.cpp + r_compiler/ssa/ssa_float.cpp + r_compiler/ssa/ssa_float_ptr.cpp + r_compiler/ssa/ssa_for_block.cpp + r_compiler/ssa/ssa_function.cpp + r_compiler/ssa/ssa_if_block.cpp + r_compiler/ssa/ssa_int.cpp + r_compiler/ssa/ssa_int_ptr.cpp + r_compiler/ssa/ssa_scope.cpp + r_compiler/ssa/ssa_struct_type.cpp + r_compiler/ssa/ssa_ubyte.cpp + r_compiler/ssa/ssa_ubyte_ptr.cpp + r_compiler/ssa/ssa_value.cpp + r_compiler/ssa/ssa_vec4f.cpp + r_compiler/ssa/ssa_vec4f_ptr.cpp + r_compiler/ssa/ssa_vec4i.cpp + r_compiler/ssa/ssa_vec4i_ptr.cpp + r_compiler/ssa/ssa_vec8s.cpp + r_compiler/ssa/ssa_vec16ub.cpp + r_compiler/fixedfunction/fixedfunction.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp @@ -1587,6 +1670,9 @@ source_group("Render Data\\Resource Headers" REGULAR_EXPRESSION "^${CMAKE_CURREN source_group("Render Data\\Resource Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.cpp$") source_group("Render Data\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/textures/.+") source_group("Render Interface" FILES r_defs.h r_renderer.h r_sky.cpp r_sky.h r_state.h r_utility.cpp r_utility.h) +source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+") +source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+") +source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+") source_group("Resource Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/resourcefiles/.+") source_group("POSIX Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/.+") source_group("Cocoa Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/cocoa/.+") diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp new file mode 100644 index 000000000..347ba6de3 --- /dev/null +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -0,0 +1,1046 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" + +RenderProgram::RenderProgram() +{ + llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + I_FatalError(reason.c_str()); + }); + + //llvm::llvm_start_multithreaded(); + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetAsmParser(); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + + std::string errorstring; + llvm::EngineBuilder engineBuilder(std::move(moduleOwner)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(llvm::Reloc::Static); + engineBuilder.setEngineKind(llvm::EngineKind::JIT); + mEngine.reset(engineBuilder.create()); + if (!mEngine) + I_FatalError(errorstring.c_str()); +} + +RenderProgram::~RenderProgram() +{ + mEngine.reset(); + mContext.reset(); + //llvm::llvm_stop_multithreaded(); +} + +void *RenderProgram::PointerToFunction(const char *name) +{ + llvm::Function *function = mModule->getFunction(name); + if (!function) + return nullptr; + return mEngine->getPointerToFunction(function); +} + +FixedFunction::FixedFunction() +{ + CodegenDrawSpan(); + mProgram.engine()->finalizeObject(); + + DrawSpan = mProgram.GetProcAddress("DrawSpan"); +} + +void FixedFunction::CodegenDrawSpan() +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function("DrawSpan"); + function.add_parameter(SSAInt::llvm_type()); + function.add_parameter(SSAUBytePtr::llvm_type()); + function.create_public(); + + SSAInt count = function.parameter(0); + SSAUBytePtr data = function.parameter(1); + SSAStack stack_index; + + stack_index.store(0); + SSAForBlock loop; + { + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + //SSAVec4i color(255, 255, 0, 255); + //data[index * 4].store_vec4ub(color); + data[index * 4].store(0); + data[index * 4 + 1].store(128); + data[index * 4 + 2].store(255); + data[index * 4 + 3].store(255); + stack_index.store(index + 1); + } + loop.end_block(); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); +} + +#if 0 + +GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) +: program(program), vertex_codegen(vertex_codegen), fragment_codegen(fragment_codegen) +{ +} + +llvm::Type *GlslFixedFunction::get_sampler_struct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt32Ty(context)); // width + elements.push_back(llvm::Type::getInt32Ty(context)); // height + elements.push_back(llvm::Type::getInt8PtrTy(context)); // data + return llvm::StructType::get(context, elements, false); +} + +void GlslFixedFunction::codegen() +{ + codegen_render_scanline(5); + codegen_calc_window_positions(); + codegen_calc_polygon_face_direction(); + codegen_calc_polygon_y_range(); + codegen_update_polygon_edge(); + codegen_draw_triangles(5, 5); + codegen_texture(); + codegen_normalize(); + codegen_reflect(); + codegen_max(); + codegen_pow(); + codegen_dot(); + codegen_mix(); +} + +void GlslFixedFunction::codegen_texture() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_texture"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(get_sampler_struct(program.context())); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAValue sampler_ptr = function.parameter(1); + SSAVec4f pos = function.parameter(2); + + SSAInt width = sampler_ptr[0][0].load(); + SSAInt height = sampler_ptr[0][1].load(); + SSAUBytePtr data = sampler_ptr[0][2].load(); + + SSAPixels4ub_argb_rev pixels(width, height, data); + //builder.CreateRet(pixels.linear_clamp4f(pos).v); + builder.CreateRet(pixels.linear_clamp4f(pos[0], pos[1]).v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_normalize() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_normalize"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f vec = function.parameter(1); + + // To do: this can probably be done a lot faster with _mm_rsqrt_ss + SSAVec4f vec2 = vec * vec; + SSAVec4f length3(SSAFloat::sqrt(vec2[0] + vec2[1] + vec2[2])); + SSAVec4f normalized = vec / length3; + builder.CreateRet(normalized.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_reflect() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_reflect"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f i = function.parameter(1); + SSAVec4f n = function.parameter(2); + + SSAVec4f c = i * n; + SSAFloat dot3 = c[0] + c[1] + c[2]; + SSAVec4f result = i - (2.0f * dot3) * n; + builder.CreateRet(result.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_max() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_max"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAFloat a = function.parameter(1); + SSAFloat b = function.parameter(2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a >= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + SSAFloat c = phi.create(); + + builder.CreateRet(c.v); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_pow() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_pow"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAFloat a = function.parameter(1); + SSAFloat b = function.parameter(2); + builder.CreateRet(a.v); + //builder.CreateRet(SSAFloat::pow(a, b).v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_dot() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_dot"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f a = function.parameter(1); + SSAVec4f b = function.parameter(2); + + SSAVec4f c = a * b; + SSAFloat dot3 = c[0] + c[1] + c[2]; + builder.CreateRet(dot3.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_mix() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_mix"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAVec4f v1 = function.parameter(1); + SSAVec4f v2 = function.parameter(2); + SSAFloat t = function.parameter(3); + + SSAVec4f b = t; + SSAVec4f a = 1.0f - b; + SSAVec4f mix = v1 * a + v2 * b; + builder.CreateRet(mix.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_draw_triangles(int num_vertex_in, int num_vertex_out) +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("draw_triangles"); + function.add_parameter(SSAInt::llvm_type()); // input_width + function.add_parameter(SSAInt::llvm_type()); // input_height + function.add_parameter(SSAUBytePtr::llvm_type()); // input_data + function.add_parameter(SSAInt::llvm_type()); // output_width + function.add_parameter(SSAInt::llvm_type()); // output_height + function.add_parameter(SSAUBytePtr::llvm_type()); // output_data + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAVec4fPtr::llvm_type()); // uniforms + function.add_parameter(SSAInt::llvm_type()); // first_vertex + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()->getPointerTo()); // vertex attributes + function.add_parameter(SSAInt::llvm_type()); // core + function.add_parameter(SSAInt::llvm_type()); // num_cores + function.create_public(); + + SSAInt input_width = function.parameter(0); + SSAInt input_height = function.parameter(1); + SSAUBytePtr input_data = function.parameter(2); + SSAInt output_width = function.parameter(3); + SSAInt output_height = function.parameter(4); + SSAUBytePtr output_data = function.parameter(5); + SSAInt viewport_x = function.parameter(6); + SSAInt viewport_y = function.parameter(7); + SSAInt viewport_width = function.parameter(8); + SSAInt viewport_height = function.parameter(9); + SSAVec4fPtr uniforms = function.parameter(10); + SSAInt first_vertex = function.parameter(11); + SSAInt num_vertices = function.parameter(12); + SSAValue vertex_in_ptr = function.parameter(13); + SSAInt core = function.parameter(14); + SSAInt num_cores = function.parameter(15); + + SSAStack stack_vertex_index; + SSAValue vertex_globals_ptr = SSAValue::from_llvm(SSAScope::alloca(vertex_codegen.get_global_struct_type())); + std::vector vertex_outs; + for (int i = 0; i < num_vertex_out; i++) + vertex_outs.push_back(SSAVec4fPtr::from_llvm(SSAScope::builder().CreateAlloca(SSAVec4f::llvm_type(), SSAInt(3).v))); + + int num_uniforms = 1; + { + llvm::Type *type = llvm::ArrayType::get(llvm::VectorType::get(llvm::Type::getFloatTy(program.context()), 4), 4); + llvm::Value *matrix = llvm::UndefValue::get(type); + for (int col = 0; col < 4; col++) + { + SSAVec4f column = uniforms[col].load_unaligned(); + std::vector indexes; + indexes.push_back(col); + matrix = builder.CreateInsertValue(matrix, column.v, indexes); + } + vertex_globals_ptr[0][0].store(matrix); + } + + stack_vertex_index.store(0); + SSAForBlock loop; + SSAInt vertex_index = stack_vertex_index.load(); + loop.loop_block(vertex_index + 2 < num_vertices); + for (int v = 0; v < 3; v++) + { + for (int i = 0; i < num_vertex_in; i++) + { + SSAValue attribute_ptr = vertex_in_ptr[i].load(); + SSAVec4f vertex_in = SSAVec4f::shuffle(SSAVec4fPtr(attribute_ptr)[first_vertex + vertex_index + v].load_unaligned(), 0, 1, 2, 3); + vertex_globals_ptr[0][num_uniforms + i].store(vertex_in.v); + } + SSAScope::builder().CreateCall(SSAScope::module()->getFunction((vertex_codegen.shader_prefix() + "main").c_str()), vertex_globals_ptr.v); + for (int i = 0; i < num_vertex_out; i++) + { + vertex_outs[i][v].store(vertex_globals_ptr[0][num_uniforms + num_vertex_in + i].load()); + } + } + + render_polygon(input_width, input_height, input_data, output_width, output_height, output_data, viewport_x, viewport_y, viewport_width, viewport_height, 3, vertex_outs, core, num_cores); + + stack_vertex_index.store(vertex_index + 3); + loop.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_window_positions() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_window_positions"); + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // gl_Position + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.create_private(); + SSAInt viewport_x = function.parameter(0); + SSAInt viewport_y = function.parameter(1); + SSAInt viewport_width = function.parameter(2); + SSAInt viewport_height = function.parameter(3); + SSAInt num_vertices = function.parameter(4); + SSAVec4fPtr clip_positions = function.parameter(5); + SSAVec4fPtr window_positions = function.parameter(6); + + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + SSAStack stack_transform_index; + stack_transform_index.store(0); + SSAForBlock loop_transform; + SSAInt transform_index = stack_transform_index.load(); + loop_transform.loop_block(transform_index < num_vertices); + { + SSAVec4f clip_pos = clip_positions[transform_index].load(); + SSAVec4f window_pos = viewport.clip_to_window(clip_pos); + window_positions[transform_index].store(window_pos); + + stack_transform_index.store(transform_index + 1); + } + loop_transform.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_polygon_face_direction() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_polygon_face_direction"); + function.set_return_type(SSABool::llvm_type()); + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.create_private(); + SSAInt num_vertices = function.parameter(0); + SSAVec4fPtr window_positions = function.parameter(1); + + SSAStack stack_face_direction; + SSAStack stack_face_vertex_index; + stack_face_direction.store(0.0f); + stack_face_vertex_index.store(0); + SSAForBlock loop_face_direction; + SSAInt face_vertex_index = stack_face_vertex_index.load(); + loop_face_direction.loop_block(face_vertex_index < num_vertices); + { + SSAVec4f v0 = window_positions[face_vertex_index].load(); + SSAVec4f v1 = window_positions[(face_vertex_index + 1) % num_vertices].load(); + stack_face_direction.store(stack_face_direction.load() + v0[0] * v1[1] - v1[0] * v0[1]); + stack_face_vertex_index.store(face_vertex_index + 1); + } + loop_face_direction.end_block(); + SSABool front_facing_ccw = (stack_face_direction.load() >= 0.0f); + + builder.CreateRet(front_facing_ccw.v); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_polygon_y_range() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_polygon_y_range"); + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_start + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_end + function.create_private(); + SSAInt viewport_y = function.parameter(0); + SSAInt viewport_height = function.parameter(1); + SSAInt num_vertices = function.parameter(2); + SSAVec4fPtr window_positions = function.parameter(3); + SSAValue out_y_start = function.parameter(4); + SSAValue out_y_end = function.parameter(5); + + SSAStack y_start; + SSAStack y_end; + y_start.store(0x7fffffff); + y_end.store(0); + + SSAStack stack_minmax_index; + stack_minmax_index.store(0); + SSAForBlock loop_minmax; + SSAInt minmax_index = stack_minmax_index.load(); + loop_minmax.loop_block(minmax_index < num_vertices); + { + SSAInt y = SSAInt(window_positions[minmax_index].load()[1] + 0.5f); + y_start.store(ssa_min(y_start.load(), y)); + y_end.store(ssa_max(y_end.load(), y)); + stack_minmax_index.store(minmax_index + 1); + } + loop_minmax.end_block(); + + y_start.store(ssa_max(y_start.load(), viewport_y)); + y_end.store(ssa_min(y_end.load(), viewport_y + viewport_height)); + + out_y_start.store(y_start.load().v); + out_y_end.store(y_end.load().v); + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_update_polygon_edge() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("update_polygon_edge"); + function.add_parameter(SSAFloat::llvm_type()); // y_position + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout left_index + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout right_index + function.create_private(); + SSAFloat float_y = function.parameter(0); + SSAInt num_vertices = function.parameter(1); + SSAVec4fPtr window_positions = function.parameter(2); + SSAValue ptr_left_index = function.parameter(3); + SSAValue ptr_right_index = function.parameter(4); + + SSAStack max_iterate; + max_iterate.store(num_vertices); + SSAForBlock loop_left; + SSAInt left_index = ptr_left_index.load(); + SSAInt right_index = ptr_right_index.load(); + SSAInt next_left_index = (left_index + 1) % num_vertices; + SSAFloat left_y0 = window_positions[left_index].load()[1]; + SSAFloat left_y1 = window_positions[next_left_index].load()[1]; + SSABool in_range = (left_y0 >= float_y && left_y1 < float_y) || (left_y1 >= float_y && left_y0 < float_y); + loop_left.loop_block((left_index == right_index || !in_range) && max_iterate.load() > 0); + ptr_left_index.store(next_left_index.v); + max_iterate.store(max_iterate.load() - 1); + loop_left.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::render_polygon( + SSAInt input_width, + SSAInt input_height, + SSAUBytePtr input_data, + SSAInt output_width, + SSAInt output_height, + SSAUBytePtr output_data, + SSAInt viewport_x, + SSAInt viewport_y, + SSAInt viewport_width, + SSAInt viewport_height, + SSAInt num_vertices, + std::vector fragment_ins, + SSAInt core, + SSAInt num_cores) +{ + SSAVec4fPtr window_positions = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), num_vertices)); + SSAVec4fPtr left_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + SSAVec4fPtr right_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + + /////////////////////////////////// + + llvm::Value *calc_window_positions_args[] = { viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, num_vertices.v, fragment_ins[0].v, window_positions.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_window_positions"), calc_window_positions_args); + + llvm::Value *calc_polygon_face_direction_args[] = { num_vertices.v, window_positions.v }; + SSABool front_facing_ccw = SSABool::from_llvm(SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_face_direction"), calc_polygon_face_direction_args)); + + SSAIfBlock cull_if; + cull_if.if_block(front_facing_ccw); + { + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + + SSAStack y_start; + SSAStack y_end; + + llvm::Value *calc_polygon_y_range_args[] = { viewport_y.v, viewport_height.v, num_vertices.v, window_positions.v, y_start.v, y_end.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_y_range"), calc_polygon_y_range_args); + + y_start.store((y_start.load() + num_cores - core - 1) / num_cores * num_cores + core); // find_first_line_for_core + + SSAStack stack_left_index; + SSAStack stack_right_index; + SSAStack stack_int_y; + stack_left_index.store(0); + stack_right_index.store(1); + stack_int_y.store(y_start.load()); + SSAForBlock scanlines_loop; + scanlines_loop.loop_block(stack_int_y.load() < y_end.load()); + { + SSAInt int_y = stack_int_y.load(); + SSAFloat float_y = SSAFloat(int_y) + 0.5f; + + llvm::Value *update_polygon_edge_args0[] = { float_y.v, num_vertices.v, window_positions.v, stack_left_index.v, stack_right_index.v }; + llvm::Value *update_polygon_edge_args1[] = { float_y.v, num_vertices.v, window_positions.v, stack_right_index.v, stack_left_index.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args0); + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args1); + + SSAInt left_index = stack_left_index.load(); + SSAInt right_index = stack_right_index.load(); + SSAInt next_left_index = (left_index + 1) % num_vertices; + SSAInt next_right_index = (right_index + 1) % num_vertices; + + SSABarycentricWeight left_weight(viewport, fragment_ins[0][left_index].load(), fragment_ins[0][next_left_index].load()); + SSABarycentricWeight right_weight(viewport, fragment_ins[0][right_index].load(), fragment_ins[0][next_right_index].load()); + + SSAFloat a = left_weight.from_window_y(int_y); + SSAFloat b = right_weight.from_window_y(int_y); + + SSAVec4f left_clip_pos = left_weight.v1 * a + left_weight.v2 * (1.0f - a); + SSAVec4f right_clip_pos = right_weight.v1 * b + right_weight.v2 * (1.0f - b); + + for (size_t i = 0; i + 1 < fragment_ins.size(); i++) + { + left_line_varyings[i].store(fragment_ins[i + 1][left_index].load() * a + fragment_ins[i + 1][next_left_index].load() * (1.0f - a)); + right_line_varyings[i].store(fragment_ins[i + 1][right_index].load() * b + fragment_ins[i + 1][next_right_index].load() * (1.0f - b)); + } + + llvm::Value *render_scanline_args[] = { output_width.v, output_height.v, output_data.v, viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, int_y.v, left_clip_pos.v, right_clip_pos.v, left_line_varyings.v, right_line_varyings.v, input_width.v, input_height.v, input_data.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("render_scanline"), render_scanline_args); + + stack_int_y.store(stack_int_y.load() + num_cores); + } + scanlines_loop.end_block(); + } + cull_if.end_block(); +} + +void GlslFixedFunction::codegen_render_scanline(int num_varyings) +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("render_scanline"); + function.add_parameter(SSAInt::llvm_type()); // output_width + function.add_parameter(SSAInt::llvm_type()); // output_height + function.add_parameter(SSAUBytePtr::llvm_type()); // output_data + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // y + function.add_parameter(SSAVec4f::llvm_type()); // left_clip_pos + function.add_parameter(SSAVec4f::llvm_type()); // right_clip_pos + function.add_parameter(SSAVec4fPtr::llvm_type()); // left_line_varyings + function.add_parameter(SSAVec4fPtr::llvm_type()); // right_line_varyings + function.add_parameter(SSAInt::llvm_type()); // input_width + function.add_parameter(SSAInt::llvm_type()); // input_height + function.add_parameter(SSAUBytePtr::llvm_type()); // input_data + function.create_private(); + SSAInt output_width = function.parameter(0); + SSAInt output_height = function.parameter(1); + SSAUBytePtr output_data = function.parameter(2); + SSAInt viewport_x = function.parameter(3); + SSAInt viewport_y = function.parameter(4); + SSAInt viewport_width = function.parameter(5); + SSAInt viewport_height = function.parameter(6); + SSAInt y = function.parameter(7); + SSAVec4f left_clip_pos = function.parameter(8); + SSAVec4f right_clip_pos = function.parameter(9); + SSAVec4fPtr left_line_varyings = function.parameter(10); + SSAVec4fPtr right_line_varyings = function.parameter(11); + SSAInt input_width = function.parameter(12); + SSAInt input_height = function.parameter(13); + SSAUBytePtr input_data = function.parameter(14); + + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + + SSAScopeHint hint; + + SSAStack stack_x; + SSAStack stack_xnormalized; + + //////////////////////////////// + // Prepare to render scanline: + + hint.set("prepare"); + OuterData outer_data; + + SSAVec4f left_window_pos = viewport.clip_to_window(left_clip_pos); + SSAVec4f right_window_pos = viewport.clip_to_window(right_clip_pos); + + SSAFloat x0 = left_window_pos[0]; + SSAFloat x1 = right_window_pos[0]; + SSAInt start(ssa_min(x0, x1)); + SSAInt end(ssa_max(x1, x0) + 0.5f); + + start = ssa_max(start, viewport.x); + end = ssa_min(end, viewport.right); + + SSABarycentricWeight weight_scanline(viewport, left_clip_pos, right_clip_pos); + + outer_data.start = start; + outer_data.end = end; + outer_data.input_width = input_width; + outer_data.input_height = input_height; + outer_data.output_width = output_width; + outer_data.output_height = output_height; + outer_data.input_pixels = input_data; + outer_data.output_pixels_line = output_data[output_width * y * 4]; + + outer_data.viewport_x = SSAFloat(viewport.x); + outer_data.viewport_rcp_half_width = viewport.rcp_half_width; + outer_data.dx = weight_scanline.v2[0] - weight_scanline.v1[0]; + outer_data.dw = weight_scanline.v2[3] - weight_scanline.v1[3]; + outer_data.v1w = weight_scanline.v1[3]; + outer_data.v1x = weight_scanline.v1[0]; + outer_data.sse_left_varying_in = left_line_varyings; + outer_data.sse_right_varying_in = right_line_varyings; + outer_data.num_varyings = num_varyings; + + outer_data.sampler = SSAScope::alloca(get_sampler_struct(SSAScope::context())); + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + llvm::Value *sampler_width_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1)); + llvm::Value *sampler_height_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2)); + llvm::Value *sampler_data_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + SSAScope::builder().CreateStore(outer_data.input_width.v, sampler_width_ptr, false); + SSAScope::builder().CreateStore(outer_data.input_height.v, sampler_height_ptr, false); + SSAScope::builder().CreateStore(outer_data.input_pixels.v, sampler_data_ptr, false); + + + SSAVec4i xposinit = SSAVec4i(outer_data.start) + SSAVec4i(0, 1, 2, 3); + stack_x.store(outer_data.start); + stack_xnormalized.store((SSAVec4f(xposinit) + 0.5f - outer_data.viewport_x) * outer_data.viewport_rcp_half_width - 1.0f); + + ///////////////////////////////////////////////////////////////////////// + // First pixels: + + hint.set("firstpixels"); + SSAIfBlock if_block; + if_block.if_block(outer_data.end - outer_data.start > 3); + process_first_pixels(outer_data, stack_x, stack_xnormalized); + if_block.end_block(); + + ///////////////////////////////////////////////////////////////////////// + // Start: for (SSAInt x = start; x < end; x += 4) + + hint.set("loopstart"); + + SSAForBlock for_block; + SSAInt x = stack_x.load(); + for_block.loop_block(x + 3 < outer_data.end); + + ///////////////////////////////////////////////////////////////////////// + // Loop body + { + SSAVec4f xnormalized = stack_xnormalized.load(); + + hint.set("blendload"); + SSAVec4i desti[4]; + SSAVec16ub dest_block = outer_data.output_pixels_line[x << 2].load_vec16ub(); + SSAVec4i::extend(dest_block, desti[0], desti[1], desti[2], desti[3]); + + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + + hint.set("blendstore"); + outer_data.output_pixels_line[x << 2].store_vec16ub(dest_block); + hint.clear(); + + xnormalized = xnormalized + 4.0f * outer_data.viewport_rcp_half_width; + stack_xnormalized.store(xnormalized); + } + ///////////////////////////////////////////////////////////////////////// + // End: for (SSAInt x = start; x < end; x += 4) + + hint.set("loopend"); + x = x + 4; + stack_x.store(x); + for_block.end_block(); + + ///////////////////////////////////////////////////////////////////////// + // Last pixels: + + hint.set("lastpixels"); + process_last_pixels(outer_data, stack_x, stack_xnormalized); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +{ + SSAInt x = stack_x.load(); + SSAVec4f xnormalized = stack_xnormalized.load(); + SSAInt offset = x << 2; + + // Find how many pixels we have left until we 16 byte align: + llvm::Value *output_line_align = SSAScope::builder().CreatePtrToInt(outer_data.output_pixels_line.v, llvm::Type::getInt32Ty(SSAScope::context())); + output_line_align = SSAScope::builder().CreateAdd(output_line_align, offset.v); + SSAInt left = 4 - (SSAInt::from_llvm(SSAScope::builder().CreateURem(output_line_align, SSAInt(16).v)) >> 2); + + SSAIfBlock if_block0; + if_block0.if_block(left == 3); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + outer_data.output_pixels_line[offset + 8].load_vec4ub(), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); + + stack_x.store(x + 3); + stack_xnormalized.store(xnormalized + 3.0f * outer_data.viewport_rcp_half_width); + } + if_block0.else_block(); + { + SSAIfBlock if_block1; + if_block1.if_block(left == 2); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + + stack_x.store(x + 2); + stack_xnormalized.store(xnormalized + 2.0f * outer_data.viewport_rcp_half_width); + } + if_block1.else_block(); + { + SSAIfBlock if_block2; + if_block2.if_block(left == 1); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + + stack_x.store(x + 1); + stack_xnormalized.store(xnormalized + outer_data.viewport_rcp_half_width); + } + if_block2.end_block(); + } + if_block1.end_block(); + } + if_block0.end_block(); +} + +void GlslFixedFunction::process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +{ + SSAInt x = stack_x.load(); + SSAVec4f xnormalized = stack_xnormalized.load(); + + SSAInt left = outer_data.end - x; + SSAInt offset = x << 2; + SSAIfBlock if_block0; + SSAIfBlock if_block1; + SSAIfBlock if_block2; + if_block0.if_block(left == 3); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + outer_data.output_pixels_line[offset + 8].load_vec4ub(), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); + } + if_block0.else_block(); + if_block1.if_block(left == 2); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + } + if_block1.else_block(); + if_block2.if_block(left == 1); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + } + if_block2.end_block(); + if_block1.end_block(); + if_block0.end_block(); +} + +void GlslFixedFunction::inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *frag_color) +{ + SSAScopeHint hint; + hint.set("varying"); + SSAVec4f a = (xnormalized * data.v1w - data.v1x) * SSAVec4f::rcp(data.dx - xnormalized * data.dw); + SSAVec4f one_minus_a = 1.0f - a; + + llvm::Value *globals_ptr[4]; + for (int i = 0; i < 4; i++) + { + globals_ptr[i] = SSAScope::alloca(fragment_codegen.get_global_struct_type()); + + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + llvm::Value *sampler_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + SSAScope::builder().CreateStore(data.sampler, sampler_ptr, false); + + for (int j = 0; j < data.num_varyings; j++) + { + SSAVec4f field_value = + data.sse_left_varying_in[j].load() * SSAVec4f::shuffle(one_minus_a, i, i, i, i) + + data.sse_right_varying_in[j].load() * SSAVec4f::shuffle(a, i, i, i, i); + index_list.clear(); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)j+1))); + llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + SSAScope::builder().CreateStore(field_value.v, field_ptr, false); + } + } + + hint.set("fragprogram"); + for (int i = 0; i < 4; i++) + { + SSAScope::builder().CreateCall(SSAScope::module()->getFunction((fragment_codegen.shader_prefix() + "main").c_str()), globals_ptr[i]); + + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)5))); + llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + frag_color[i] = SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(field_ptr, false)); + } +} +/* +void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) +{ + SSAVec4i desti[4]; + SSAVec4i::extend(dest, desti[0], desti[1], desti[2], desti[3]); + + // Pre-mulitiplied alpha blend: + for (int pixel_index = 0; pixel_index < 4; pixel_index++) + { + SSAVec4f src = SSAVec4f::shuffle(frag_color[pixel_index], 2, 1, 0, 3); + desti[pixel_index] = SSAVec4i(src * 255.0f); + SSAVec4f dest = SSAVec4f(desti[pixel_index]) * (1.0f / 255.0f); + SSAVec4f alpha = SSAVec4f::shuffle(dest, 3, 3, 3, 3); + SSAVec4f resultf = src + dest * (1.0f - alpha); + desti[pixel_index] = SSAVec4i(resultf * 255.0f); + } + + dest = SSAVec16ub(SSAVec8s(desti[0], desti[1]), SSAVec8s(desti[2], desti[3])); +} +*/ +void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) +{ + for (int i = 0; i < 4; i++) + frag_color[i] = SSAVec4f::shuffle(frag_color[i], 2, 1, 0, 3); + + // Pre-mulitiplied alpha blend: + SSAVec8s dest0 = SSAVec8s::extendlo(dest); + SSAVec8s dest1 = SSAVec8s::extendhi(dest); + + SSAVec8s src0(SSAVec4i(frag_color[0] * 255.0f), SSAVec4i(frag_color[1] * 255.0f)); + SSAVec8s src1(SSAVec4i(frag_color[2] * 255.0f), SSAVec4i(frag_color[3] * 255.0f)); + + // Extract and duplicate alpha components: + SSAVec8s alpha0 = SSAVec8s::shuffle(src0, 3, 3, 3, 3, 7, 7, 7, 7); + SSAVec8s alpha1 = SSAVec8s::shuffle(src1, 3, 3, 3, 3, 7, 7, 7, 7); + + // Convert from 0-255 to 0-256 range: + alpha0 = SSAVec8s::max_sse2(alpha0, 255); + alpha1 = SSAVec8s::max_sse2(alpha1, 255); + alpha0 = alpha0 + (alpha0 >> 7); + alpha1 = alpha1 + (alpha1 >> 7); + + SSAVec8s result0 = src0 + ((dest0 * (256 - alpha0)) >> 8); + SSAVec8s result1 = src1 + ((dest1 * (256 - alpha1)) >> 8); + + dest = SSAVec16ub(result0, result1); +} + +#endif diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h new file mode 100644 index 000000000..4c81fc108 --- /dev/null +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -0,0 +1,130 @@ + +#pragma once + +#include "r_compiler/ssa/ssa_vec4f.h" +#include "r_compiler/ssa/ssa_vec4i.h" +#include "r_compiler/ssa/ssa_vec8s.h" +#include "r_compiler/ssa/ssa_vec16ub.h" +#include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_ubyte_ptr.h" +#include "r_compiler/ssa/ssa_vec4f_ptr.h" +#include "r_compiler/ssa/ssa_vec4i_ptr.h" +#include "r_compiler/ssa/ssa_pixels.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" +#include "r_compiler/llvm_include.h" + +class RenderProgram +{ +public: + RenderProgram(); + ~RenderProgram(); + + template + Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule; } + llvm::ExecutionEngine *engine() { return mEngine.get(); } + +private: + void *PointerToFunction(const char *name); + + std::unique_ptr mContext; + llvm::Module *mModule; + std::unique_ptr mEngine; +}; + +class FixedFunction +{ +public: + FixedFunction(); + + void(*DrawSpan)(int, uint32_t *) = nullptr; + +private: + void CodegenDrawSpan(); + + RenderProgram mProgram; +}; + +#if 0 + +class GlslProgram; +class GlslCodeGen; + +class GlslFixedFunction +{ +public: + GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen); + void codegen(); + static llvm::Type *get_sampler_struct(llvm::LLVMContext &context); + +private: + void codegen_draw_triangles(int num_vertex_in, int num_vertex_out); + void codegen_calc_window_positions(); + void codegen_calc_polygon_face_direction(); + void codegen_calc_polygon_y_range(); + void codegen_update_polygon_edge(); + void codegen_texture(); + void codegen_normalize(); + void codegen_reflect(); + void codegen_max(); + void codegen_pow(); + void codegen_dot(); + void codegen_mix(); + + struct OuterData + { + OuterData() : sampler() { } + + SSAInt start; + SSAInt end; + SSAInt input_width; + SSAInt input_height; + SSAInt output_width; + SSAInt output_height; + SSAUBytePtr input_pixels; + SSAUBytePtr output_pixels_line; + + SSAVec4fPtr sse_left_varying_in; + SSAVec4fPtr sse_right_varying_in; + int num_varyings; + SSAVec4f viewport_x; + SSAVec4f viewport_rcp_half_width; + SSAVec4f dx; + SSAVec4f dw; + SSAVec4f v1w; + SSAVec4f v1x; + + llvm::Value *sampler; + }; + + void render_polygon( + SSAInt input_width, + SSAInt input_height, + SSAUBytePtr input_data, + SSAInt output_width, + SSAInt output_height, + SSAUBytePtr output_data, + SSAInt viewport_x, + SSAInt viewport_y, + SSAInt viewport_width, + SSAInt viewport_height, + SSAInt num_vertices, + std::vector fragment_ins, + SSAInt core, + SSAInt num_cores); + + void codegen_render_scanline(int num_varyings); + void process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); + void process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); + void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors); + void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest); + + GlslProgram &program; + GlslCodeGen &vertex_codegen; + GlslCodeGen &fragment_codegen; +}; + +#endif diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h new file mode 100644 index 000000000..1eed549e1 --- /dev/null +++ b/src/r_compiler/llvm_include.h @@ -0,0 +1,46 @@ + +#pragma once + +#if defined(min) +#define llvm_min_bug min +#undef min +#endif +#if defined(max) +#define llvm_max_bug max +#undef max +#endif + +#pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned +#pragma warning(disable: 4624) // warning C4624: 'llvm::AugmentedUse' : destructor could not be generated because a base class destructor is inaccessible +#pragma warning(disable: 4355) // warning C4355: 'this' : used in base member initializer list +#pragma warning(disable: 4800) // warning C4800: 'const unsigned int' : forcing value to bool 'true' or 'false' (performance warning) +#pragma warning(disable: 4996) // warning C4996: 'std::_Copy_impl': Function call with parameters that may be unsafe - this call relies on the caller to check that the passed values are correct. To disable this warning, use -D_Sclan::SECURE_NO_WARNINGS. See documentation on how to use Visual C++ 'Checked Iterators' +#pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data +#pragma warning(disable: 4141) // warning C4141: 'inline': used more than once +#pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(llvm_min_bug) +#define min llvm_min_bug +#undef llvm_min_bug +#endif +#if defined(llvm_max_bug) +#define max llvm_max_bug +#undef llvm_max_bug +#endif diff --git a/src/r_compiler/ssa/ssa_barycentric_weight.h b/src/r_compiler/ssa/ssa_barycentric_weight.h new file mode 100644 index 000000000..52117ccc6 --- /dev/null +++ b/src/r_compiler/ssa/ssa_barycentric_weight.h @@ -0,0 +1,97 @@ + +#pragma once + +#include "ssa_vec4f.h" +#include "ssa_float.h" +#include "ssa_int.h" + +class SSAViewport +{ +public: + SSAViewport(SSAInt x, SSAInt y, SSAInt width, SSAInt height) + : x(x), y(y), width(width), height(height), right(x + width), bottom(y + height), + half_width(SSAFloat(width) * 0.5f), half_height(SSAFloat(height) * 0.5f), + rcp_half_width(1.0f / (SSAFloat(width) * 0.5f)), + rcp_half_height(1.0f / (SSAFloat(height) * 0.5f)) + { + } + + SSAInt x, y; + SSAInt width, height; + SSAInt right, bottom; + SSAFloat half_width; + SSAFloat half_height; + SSAFloat rcp_half_width; + SSAFloat rcp_half_height; + + SSAVec4f clip_to_window(SSAVec4f clip) const + { + SSAFloat w = clip[3]; + SSAVec4f normalized = SSAVec4f::insert_element(clip / SSAVec4f::shuffle(clip, 3, 3, 3, 3), w, 3); + return normalized_to_window(normalized); + } + + SSAVec4f normalized_to_window(SSAVec4f normalized) const + { + return SSAVec4f( + SSAFloat(x) + (normalized[0] + 1.0f) * half_width, + SSAFloat(y) + (normalized[1] + 1.0f) * half_height, + 0.0f - normalized[2], + normalized[3]); + } +}; + +class SSABarycentricWeight +{ +public: + SSABarycentricWeight(SSAViewport vp, SSAVec4f v1, SSAVec4f v2); + SSAFloat from_window_x(SSAInt x) const; + SSAFloat from_window_y(SSAInt y) const; + + SSAViewport viewport; + SSAVec4f v1; + SSAVec4f v2; +}; + +inline SSABarycentricWeight::SSABarycentricWeight(SSAViewport viewport, SSAVec4f v1, SSAVec4f v2) +: viewport(viewport), v1(v1), v2(v2) +{ +} + +inline SSAFloat SSABarycentricWeight::from_window_x(SSAInt x) const +{ +/* SSAFloat xnormalized = (x + 0.5f - viewport.x) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2.x-v1.x; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.x - xnormalized * v2.w) / (dx - xnormalized * dw); + return a;*/ + + SSAFloat xnormalized = (SSAFloat(x) + 0.5f - SSAFloat(viewport.x)) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2[0]-v1[0]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (xnormalized * v1[3] - v1[0]) / (dx - xnormalized * dw); + return 1.0f - t; +} + +inline SSAFloat SSABarycentricWeight::from_window_y(SSAInt y) const +{ +/* SSAFloat ynormalized = (y + 0.5f - viewport.y) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2.y-v1.y; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.y - ynormalized * v2.w) / (dy - ynormalized * dw); + return a;*/ + + SSAFloat ynormalized = (SSAFloat(y) + 0.5f - SSAFloat(viewport.y)) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2[1]-v1[1]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (ynormalized * v1[3] - v1[1]) / (dy - ynormalized * dw); + return 1.0f - t; +} + +/* + y = (v1.y + t * dy) / (v1.w + t * dw) + + y * v1.w + y * t * dw = v1.y + t * dy + y * v1.w - v1.y = t * (dy - y * dw) + t = (y * v1.w - v1.y) / (dy - y * dw) +*/ diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp new file mode 100644 index 000000000..101323911 --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -0,0 +1,91 @@ + +#include "ssa_bool.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSABool::SSABool() +: v(0) +{ +} +/* +SSABool::SSABool(bool constant) +: v(0) +{ +} +*/ +SSABool::SSABool(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSABool::llvm_type() +{ + return llvm::Type::getInt1Ty(SSAScope::context()); +} + +SSABool operator&&(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSABool operator||(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} + +SSABool operator!(const SSABool &a) +{ + return SSABool::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint())); +} + +SSABool operator<(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint())); +} + +///////////////////////////////////////////////////////////////////////////// + +SSABool operator<(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGT(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h new file mode 100644 index 000000000..2ef79e49b --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.h @@ -0,0 +1,37 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSABool +{ +public: + SSABool(); + //SSABool(bool constant); + explicit SSABool(llvm::Value *v); + static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSABool operator&&(const SSABool &a, const SSABool &b); +SSABool operator||(const SSABool &a, const SSABool &b); + +SSABool operator!(const SSABool &a); + +SSABool operator<(const SSAInt &a, const SSAInt &b); +SSABool operator<=(const SSAInt &a, const SSAInt &b); +SSABool operator==(const SSAInt &a, const SSAInt &b); +SSABool operator>=(const SSAInt &a, const SSAInt &b); +SSABool operator>(const SSAInt &a, const SSAInt &b); + +SSABool operator<(const SSAFloat &a, const SSAFloat &b); +SSABool operator<=(const SSAFloat &a, const SSAFloat &b); +SSABool operator==(const SSAFloat &a, const SSAFloat &b); +SSABool operator>=(const SSAFloat &a, const SSAFloat &b); +SSABool operator>(const SSAFloat &a, const SSAFloat &b); diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp new file mode 100644 index 000000000..87488af74 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -0,0 +1,152 @@ + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAFloat::SSAFloat() +: v(0) +{ +} + +SSAFloat::SSAFloat(float constant) +: v(0) +{ + v = llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant)); +} + +SSAFloat::SSAFloat(SSAInt i) +: v(0) +{ + v = SSAScope::builder().CreateSIToFP(i.v, llvm::Type::getFloatTy(SSAScope::context()), SSAScope::hint()); +} + +SSAFloat::SSAFloat(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloat::llvm_type() +{ + return llvm::Type::getFloatTy(SSAScope::context()); +} + +SSAFloat SSAFloat::sqrt(SSAFloat f) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::sin(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::cos(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::pow(SSAFloat val, SSAFloat power) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAFloat SSAFloat::exp(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::log(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator+(float a, const SSAFloat &b) +{ + return SSAFloat(a) + b; +} + +SSAFloat operator-(float a, const SSAFloat &b) +{ + return SSAFloat(a) - b; +} + +SSAFloat operator*(float a, const SSAFloat &b) +{ + return SSAFloat(a) * b; +} + +SSAFloat operator/(float a, const SSAFloat &b) +{ + return SSAFloat(a) / b; +} + +SSAFloat operator+(const SSAFloat &a, float b) +{ + return a + SSAFloat(b); +} + +SSAFloat operator-(const SSAFloat &a, float b) +{ + return a - SSAFloat(b); +} + +SSAFloat operator*(const SSAFloat &a, float b) +{ + return a * SSAFloat(b); +} + +SSAFloat operator/(const SSAFloat &a, float b) +{ + return a / SSAFloat(b); +} + diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h new file mode 100644 index 000000000..2349ab877 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAInt; + +class SSAFloat +{ +public: + SSAFloat(); + SSAFloat(SSAInt i); + SSAFloat(float constant); + explicit SSAFloat(llvm::Value *v); + static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } + static llvm::Type *llvm_type(); + static SSAFloat sqrt(SSAFloat f); + static SSAFloat sin(SSAFloat val); + static SSAFloat cos(SSAFloat val); + static SSAFloat pow(SSAFloat val, SSAFloat power); + static SSAFloat exp(SSAFloat val); + static SSAFloat log(SSAFloat val); + static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c); + + llvm::Value *v; +}; + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b); + +SSAFloat operator+(float a, const SSAFloat &b); +SSAFloat operator-(float a, const SSAFloat &b); +SSAFloat operator*(float a, const SSAFloat &b); +SSAFloat operator/(float a, const SSAFloat &b); + +SSAFloat operator+(const SSAFloat &a, float b); +SSAFloat operator-(const SSAFloat &a, float b); +SSAFloat operator*(const SSAFloat &a, float b); +SSAFloat operator/(const SSAFloat &a, float b); diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp new file mode 100644 index 000000000..4413c6e92 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -0,0 +1,65 @@ + +#include "ssa_float_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAFloatPtr::SSAFloatPtr() +: v(0) +{ +} + +SSAFloatPtr::SSAFloatPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloatPtr::llvm_type() +{ + return llvm::Type::getFloatPtrTy(SSAScope::context()); +} + +SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const +{ + return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAFloat SSAFloatPtr::load() const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + // return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(get_intrinsic(llvm::Intrinsic::x86_sse2_loadu_dq), SSAScope::builder().CreateBitCast(v, llvm::PointerType::getUnqual(llvm::IntegerType::get(SSAScope::context(), 8))))); +} + +void SSAFloatPtr::store(const SSAFloat &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16); +} + +void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h new file mode 100644 index 000000000..a4318e027 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -0,0 +1,27 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloatPtr +{ +public: + SSAFloatPtr(); + explicit SSAFloatPtr(llvm::Value *v); + static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); } + static llvm::Type *llvm_type(); + SSAFloatPtr operator[](SSAInt index) const; + SSAFloat load() const; + SSAVec4f load_vec4f() const; + SSAVec4f load_unaligned_vec4f() const; + void store(const SSAFloat &new_value); + void store_vec4f(const SSAVec4f &new_value); + void store_unaligned_vec4f(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp new file mode 100644 index 000000000..ce9328607 --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -0,0 +1,25 @@ + +#include "ssa_for_block.h" +#include "ssa_scope.h" + +SSAForBlock::SSAForBlock() +: if_basic_block(0), loop_basic_block(0), end_basic_block(0) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forbegin", SSAScope::builder().GetInsertBlock()->getParent()); + loop_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forloop", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forend", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAForBlock::loop_block(SSABool true_condition) +{ + SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); + SSAScope::builder().SetInsertPoint(loop_basic_block); +} + +void SSAForBlock::end_block() +{ + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h new file mode 100644 index 000000000..58803dee5 --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -0,0 +1,18 @@ + +#pragma once + +#include "ssa_bool.h" +#include "r_compiler/llvm_include.h" + +class SSAForBlock +{ +public: + SSAForBlock(); + void loop_block(SSABool true_condition); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *loop_basic_block; + llvm::BasicBlock *end_basic_block; +}; diff --git a/src/r_compiler/ssa/ssa_function.cpp b/src/r_compiler/ssa/ssa_function.cpp new file mode 100644 index 000000000..aee4de5a9 --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.cpp @@ -0,0 +1,55 @@ + +#include "ssa_function.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "ssa_value.h" +#include "r_compiler/llvm_include.h" + +SSAFunction::SSAFunction(const std::string name) +: name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func() +{ +} + +void SSAFunction::set_return_type(llvm::Type *type) +{ + return_type = type; +} + +void SSAFunction::add_parameter(llvm::Type *type) +{ + parameters.push_back(type); +} + +void SSAFunction::create_public() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name.c_str(), SSAScope::module()); + //func->setCallingConv(llvm::CallingConv::X86_StdCall); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +void SSAFunction::create_private() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::PrivateLinkage, name.c_str(), SSAScope::module()); + func->addFnAttr(llvm::Attribute::AlwaysInline); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +SSAValue SSAFunction::parameter(int index) +{ + llvm::Function::arg_iterator arg_it = func->arg_begin(); + for (int i = 0; i < index; i++) + ++arg_it; + return SSAValue::from_llvm(static_cast(arg_it)); +} diff --git a/src/r_compiler/ssa/ssa_function.h b/src/r_compiler/ssa/ssa_function.h new file mode 100644 index 000000000..f1969c35b --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.h @@ -0,0 +1,30 @@ + +#pragma once + +#include +#include + +namespace llvm { class Value; } +namespace llvm { class Type; } +namespace llvm { class Function; } + +class SSAInt; +class SSAValue; + +class SSAFunction +{ +public: + SSAFunction(const std::string name); + void set_return_type(llvm::Type *type); + void add_parameter(llvm::Type *type); + void create_public(); + void create_private(); + SSAValue parameter(int index); + + llvm::Function *func; + +private: + std::string name; + llvm::Type *return_type; + std::vector parameters; +}; diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp new file mode 100644 index 000000000..e2de9ecad --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -0,0 +1,30 @@ + +#include "ssa_if_block.h" +#include "ssa_scope.h" + +SSAIfBlock::SSAIfBlock() +: if_basic_block(0), else_basic_block(0), end_basic_block(0) +{ +} + +void SSAIfBlock::if_block(SSABool true_condition) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "if", SSAScope::builder().GetInsertBlock()->getParent()); + else_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "else", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = else_basic_block; + SSAScope::builder().CreateCondBr(true_condition.v, if_basic_block, else_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAIfBlock::else_block() +{ + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "end", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(else_basic_block); +} + +void SSAIfBlock::end_block() +{ + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h new file mode 100644 index 000000000..98c534a86 --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -0,0 +1,46 @@ + +#pragma once + +#include "ssa_bool.h" +#include "ssa_phi.h" +#include "r_compiler/llvm_include.h" + +class SSAIfBlock +{ +public: + SSAIfBlock(); + void if_block(SSABool true_condition); + void else_block(); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *else_basic_block; + llvm::BasicBlock *end_basic_block; +}; + +template +T ssa_min(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a <= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} + +template +T ssa_max(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a >= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp new file mode 100644 index 000000000..9f3c54f50 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -0,0 +1,117 @@ + +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAInt::SSAInt() +: v(0) +{ +} + +SSAInt::SSAInt(int constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)); +} + +SSAInt::SSAInt(SSAFloat f) +: v(0) +{ + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); +} + +SSAInt::SSAInt(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAInt::llvm_type() +{ + return llvm::Type::getInt32Ty(SSAScope::context()); +} + +SSAInt operator+(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator-(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator*(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator/(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator%(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator+(int a, const SSAInt &b) +{ + return SSAInt(a) + b; +} + +SSAInt operator-(int a, const SSAInt &b) +{ + return SSAInt(a) - b; +} + +SSAInt operator*(int a, const SSAInt &b) +{ + return SSAInt(a) * b; +} + +SSAInt operator/(int a, const SSAInt &b) +{ + return SSAInt(a) / b; +} + +SSAInt operator%(int a, const SSAInt &b) +{ + return SSAInt(a) % b; +} + +SSAInt operator+(const SSAInt &a, int b) +{ + return a + SSAInt(b); +} + +SSAInt operator-(const SSAInt &a, int b) +{ + return a - SSAInt(b); +} + +SSAInt operator*(const SSAInt &a, int b) +{ + return a * SSAInt(b); +} + +SSAInt operator/(const SSAInt &a, int b) +{ + return a / SSAInt(b); +} + +SSAInt operator%(const SSAInt &a, int b) +{ + return a % SSAInt(b); +} + +SSAInt operator<<(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); +} + +SSAInt operator>>(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h new file mode 100644 index 000000000..0be37ee7e --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.h @@ -0,0 +1,41 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloat; + +class SSAInt +{ +public: + SSAInt(); + SSAInt(int constant); + SSAInt(SSAFloat f); + explicit SSAInt(llvm::Value *v); + static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAInt operator+(const SSAInt &a, const SSAInt &b); +SSAInt operator-(const SSAInt &a, const SSAInt &b); +SSAInt operator*(const SSAInt &a, const SSAInt &b); +SSAInt operator/(const SSAInt &a, const SSAInt &b); +SSAInt operator%(const SSAInt &a, const SSAInt &b); + +SSAInt operator+(int a, const SSAInt &b); +SSAInt operator-(int a, const SSAInt &b); +SSAInt operator*(int a, const SSAInt &b); +SSAInt operator/(int a, const SSAInt &b); +SSAInt operator%(int a, const SSAInt &b); + +SSAInt operator+(const SSAInt &a, int b); +SSAInt operator-(const SSAInt &a, int b); +SSAInt operator*(const SSAInt &a, int b); +SSAInt operator/(const SSAInt &a, int b); +SSAInt operator%(const SSAInt &a, int b); + +SSAInt operator<<(const SSAInt &a, int bits); +SSAInt operator>>(const SSAInt &a, int bits); diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp new file mode 100644 index 000000000..dd0ca17f6 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -0,0 +1,58 @@ + +#include "ssa_int_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAIntPtr::SSAIntPtr() +: v(0) +{ +} + +SSAIntPtr::SSAIntPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAIntPtr::llvm_type() +{ + return llvm::Type::getInt32PtrTy(SSAScope::context()); +} + +SSAIntPtr SSAIntPtr::operator[](SSAInt index) const +{ + return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAInt SSAIntPtr::load() const +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_unaligned_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAIntPtr::store(const SSAInt &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16); +} + +void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h new file mode 100644 index 000000000..20e024a31 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -0,0 +1,27 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAIntPtr +{ +public: + SSAIntPtr(); + explicit SSAIntPtr(llvm::Value *v); + static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); } + static llvm::Type *llvm_type(); + SSAIntPtr operator[](SSAInt index) const; + SSAInt load() const; + SSAVec4i load_vec4i() const; + SSAVec4i load_unaligned_vec4i() const; + void store(const SSAInt &new_value); + void store_vec4i(const SSAVec4i &new_value); + void store_unaligned_vec4i(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_phi.h b/src/r_compiler/ssa/ssa_phi.h new file mode 100644 index 000000000..89cbc8cf0 --- /dev/null +++ b/src/r_compiler/ssa/ssa_phi.h @@ -0,0 +1,33 @@ + +#pragma once + +#include "ssa_scope.h" + +class SSAIfBlock; + +template +class SSAPhi +{ +public: + void add_incoming(SSAVariable var) + { + incoming.push_back(Incoming(var.v, SSAScope::builder().GetInsertBlock())); + } + + SSAVariable create() + { + llvm::PHINode *phi_node = SSAScope::builder().CreatePHI(SSAVariable::llvm_type(), (unsigned int)incoming.size(), SSAScope::hint()); + for (size_t i = 0; i < incoming.size(); i++) + phi_node->addIncoming(incoming[i].v, incoming[i].bb); + return SSAVariable::from_llvm(phi_node); + } + +private: + struct Incoming + { + Incoming(llvm::Value *v, llvm::BasicBlock *bb) : v(v), bb(bb) { } + llvm::Value *v; + llvm::BasicBlock *bb; + }; + std::vector incoming; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4f.h b/src/r_compiler/ssa/ssa_pixelformat4f.h new file mode 100644 index 000000000..507e95b5d --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4f.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float_ptr.h" + +class SSAPixelFormat4f +{ +public: + SSAPixelFormat4f() { } + SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAFloatPtr pixels() { return _pixels; } + SSAFloatPtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return _pixels[index * 4].load_vec4f(); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4f(pixel); + } + +protected: + SSAFloatPtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub.h b/src/r_compiler/ssa/ssa_pixelformat4ub.h new file mode 100644 index 000000000..fdf98c4aa --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub +{ +public: + SSAPixelFormat4ub() { } + SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f)); + } + +private: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h new file mode 100644 index 000000000..4601eeb3c --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h @@ -0,0 +1,35 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_argb_rev +{ +public: + SSAPixelFormat4ub_argb_rev() { } + SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } +/* + void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const + { + SSAVec8s p = _pixels[index * 4].load_vec8s(); + out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + } +*/ + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h new file mode 100644 index 000000000..402480c49 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_rev +{ +public: + SSAPixelFormat4ub_rev() { } + SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixels.h b/src/r_compiler/ssa/ssa_pixels.h new file mode 100644 index 000000000..a4209d439 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixels.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_ubyte_ptr.h" +#include "ssa_float.h" +#include "ssa_float_ptr.h" +#include "ssa_int.h" +#include "ssa_pixeltype.h" +//#include "ssa_pixelformat1f.h" +//#include "ssa_pixelformat2f.h" +//#include "ssa_pixelformat3f.h" +#include "ssa_pixelformat4f.h" +//#include "ssa_pixelformat1ub.h" +//#include "ssa_pixelformat2ub.h" +//#include "ssa_pixelformat3ub.h" +//#include "ssa_pixelformat3ub_rev.h" +#include "ssa_pixelformat4ub.h" +//#include "ssa_pixelformat4ub_argb.h" +#include "ssa_pixelformat4ub_rev.h" +#include "ssa_pixelformat4ub_argb_rev.h" +//#include "ssa_pixelformat4ub_channel.h" + +//typedef SSAPixelType SSAPixels1f; +//typedef SSAPixelType SSAPixels2f; +//typedef SSAPixelType SSAPixels3f; +typedef SSAPixelType SSAPixels4f; + +//typedef SSAPixelType SSAPixels1ub; +//typedef SSAPixelType SSAPixels2ub; +//typedef SSAPixelType SSAPixels3ub; +typedef SSAPixelType SSAPixels4ub; +//typedef SSAPixelType SSAPixels4ub_argb; + +//typedef SSAPixelType SSAPixels3ub_rev; +typedef SSAPixelType SSAPixels4ub_rev; +typedef SSAPixelType SSAPixels4ub_argb_rev; + +//typedef SSAPixelType SSAPixels4ub_channel; diff --git a/src/r_compiler/ssa/ssa_pixeltype.h b/src/r_compiler/ssa/ssa_pixeltype.h new file mode 100644 index 000000000..8614f171d --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixeltype.h @@ -0,0 +1,498 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_vec4f.h" +#include "ssa_bool.h" +#include "ssa_if_block.h" +#include "ssa_phi.h" + +template +class SSAPixelType : public PixelFormat +{ +public: + SSAPixelType() + { + } + + SSAPixelType(SSAInt width, SSAInt height, PixelType pixels) + : PixelFormat(pixels, width, height), _width(width), _height(height) + { + _width32 = SSAVec4i(_width); + SSAVec4i height32(_height); + _widthps = SSAVec4f(_width32); + _heightps = SSAVec4f(height32); + _width16 = SSAVec8s(_width32, _width32); + + _widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4); + _widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4); + } + + SSAInt width() const { return _width; } + SSAInt height() const { return _height; } + SSAInt size() const { return _width * _height; } + + SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; } + SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; } + //void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); } + //void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); } + + SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); } + SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); } + SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); } + SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); } + SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); } + SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); } + SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); } + SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); } + + static SSAInt int_min(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a <= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt int_max(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a >= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt clamp(SSAInt v, SSAInt size) + { + return int_max(int_min(v, size - 1), 0); + } + + static SSAInt repeat(SSAInt v, SSAInt size) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create(); + } + + static SSAInt mirror(SSAInt v, SSAInt size) + { + SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create(); + } + + static SSAInt round(SSAFloat v) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0.0f); + phi.add_incoming(v + 0.5f); + branch.else_block(); + phi.add_incoming(v - 0.5f); + branch.end_block(); + return SSAInt(phi.create()); + } + + // To do: fix this: + static SSAInt int_floor(SSAFloat v) + { + return SSAInt(v); + } + static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); } + + SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); } + void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); } + + SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); } + SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); } + SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); } + + SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const + { + SSAFloat a = fract(s * SSAFloat(_width) - 0.5f); + SSAFloat b = fract(t * SSAFloat(_height) - 0.5f); + SSAFloat inv_a = 1.0f - a; + SSAFloat inv_b = 1.0f - b; + return + samples[0] * (inv_a * inv_b) + + samples[1] * (a * inv_b) + + samples[2] * (inv_a * b) + + samples[3] * (a * b); + } + + void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_clamp4f(x, y); + out_pixels[1] = get_clamp4f(x + 1, y); + out_pixels[2] = get_clamp4f(x, y + 1); + out_pixels[3] = get_clamp4f(x + 1, y + 1); + /* + SSAInt x0 = clamp_x(x); + SSAInt x1 = clamp_x(x + 1); + SSAInt y0 = clamp_y(y); + SSAInt y1 = clamp_y(y + 1); + SSAInt offset0 = y0 * _width; + SSAInt offset1 = y1 * _width; + SSAPhi phi0; + SSAPhi phi1; + SSAPhi phi2; + SSAPhi phi3; + SSAIfBlock if0; + if0.if_block(x0 + 1 == x1); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.else_block(); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.end_block(); + out_pixels[0] = phi0.create(); + out_pixels[1] = phi1.create(); + out_pixels[2] = phi2.create(); + out_pixels[3] = phi3.create(); + */ + } + + void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_repeat4f(x, y); + out_pixels[1] = get_repeat4f(x + 1, y); + out_pixels[2] = get_repeat4f(x, y + 1); + out_pixels[3] = get_repeat4f(x + 1, y + 1); + } + + void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_mirror4f(x, y); + out_pixels[1] = get_mirror4f(x + 1, y); + out_pixels[2] = get_mirror4f(x, y + 1); + out_pixels[3] = get_mirror4f(x + 1, y + 1); + } + + SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); } + + SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_clamp4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + ///////////////////////////////////////////////////////////////////////// + // Packed versions: + + SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); } + SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); } + SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); } + SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); } + SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); } + SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); } + SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); } + SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); } + + static SSAVec4i clamp(SSAVec4i v, SSAInt size) + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0); + } + + static SSAVec4i repeat(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i mirror(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i round(SSAVec4f v) + { + // Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs) + SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000); + SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f))); + return v + signed_half; + } + + static SSAVec4i int_floor(SSAVec4f v) + { + return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31); + } + + static SSAVec4f fract(SSAVec4f v) + { + // return v - SSAVec4f::floor_sse4(v); + return v - SSAVec4f(int_floor(v)); + } + + template + SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x; + return PixelFormat::get4f(offset[index]); + } + + SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + template + void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i x0 = wrap_x(x); + SSAVec4i x1 = wrap_x(x + 1); + SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi); + SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi); + SSAVec4i offset0 = x0 + line0; + SSAVec4i offset1 = x1 + line0; + SSAVec4i offset2 = x0 + line1; + SSAVec4i offset3 = x1 + line1; + out_pixels[0] = PixelFormat::get4f(offset0[index]); + out_pixels[1] = PixelFormat::get4f(offset1[index]); + out_pixels[2] = PixelFormat::get4f(offset2[index]); + out_pixels[3] = PixelFormat::get4f(offset3[index]); + } + + void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const + { + SSAVec4f a = fract(s * _widthps - 0.5f); + SSAVec4f b = fract(t * _heightps - 0.5f); + SSAVec4f inv_a = 1.0f - a; + SSAVec4f inv_b = 1.0f - b; + return + samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) + + samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) + + samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) + + samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index); + } + + ///////////////////////////////////////////////////////////////////////// + + SSAVec4i clamp(SSAVec4i sstt) const + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0); + } + + template + void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1)); + SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7); + SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3); + out_pixels[0] = PixelFormat::get4f(offsets[0]); + out_pixels[1] = PixelFormat::get4f(offsets[1]); + out_pixels[2] = PixelFormat::get4f(offsets[2]); + out_pixels[3] = PixelFormat::get4f(offsets[3]); + } + + void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const + { + struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; }; + return gather_helper4f(st, out_pixels, Wrap(this)); + } + + SSAVec4f linear_clamp4f(SSAVec4f st) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(st, samples); + return linear_interpolate4f(st, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f); + SSAVec4f inv_aabb = 1.0f - aabb; + SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6); + SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0); + return + samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) + + samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) + + samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) + + samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0); + } + +public: + SSAInt _width; + SSAInt _height; + SSAVec4i _width32; + SSAVec8s _width16; + SSAVec4f _widthps; + SSAVec4f _heightps; + + SSAVec4i _widthheight; + SSAVec4f _widthheightps; +}; diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp new file mode 100644 index 000000000..f9d16f188 --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -0,0 +1,65 @@ + +#include "ssa_scope.h" +#include "ssa_int.h" + +SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder) +: _context(context), _module(module), _builder(builder) +{ + instance = this; +} + +SSAScope::~SSAScope() +{ + instance = 0; +} + +llvm::LLVMContext &SSAScope::context() +{ + return *instance->_context; +} + +llvm::Module *SSAScope::module() +{ + return instance->_module; +} + +llvm::IRBuilder<> &SSAScope::builder() +{ + return *instance->_builder; +} + +llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types) +{ + llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id)); + if (func == 0) + func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module()); + return func; +} + +llvm::Value *SSAScope::alloca(llvm::Type *type) +{ + return alloca(type, SSAInt(1)); +} + +llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size) +{ + // Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work + llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock(); + llvm::IRBuilder<> alloca_builder(&entry, entry.begin()); + return alloca_builder.CreateAlloca(type, size.v, hint()); +} + +const std::string &SSAScope::hint() +{ + return instance->_hint; +} + +void SSAScope::set_hint(const std::string &new_hint) +{ + if (new_hint.empty()) + instance->_hint = "tmp"; + else + instance->_hint = new_hint; +} + +SSAScope *SSAScope::instance = 0; diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h new file mode 100644 index 000000000..d184643ad --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.h @@ -0,0 +1,41 @@ + +#pragma once + +#include "r_compiler/llvm_include.h" + +class SSAInt; + +class SSAScope +{ +public: + SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder); + ~SSAScope(); + static llvm::LLVMContext &context(); + static llvm::Module *module(); + static llvm::IRBuilder<> &builder(); + static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); + static llvm::Value *alloca(llvm::Type *type); + static llvm::Value *alloca(llvm::Type *type, SSAInt size); + static const std::string &hint(); + static void set_hint(const std::string &hint); + +private: + static SSAScope *instance; + llvm::LLVMContext *_context; + llvm::Module *_module; + llvm::IRBuilder<> *_builder; + std::string _hint; +}; + +class SSAScopeHint +{ +public: + SSAScopeHint() : old_hint(SSAScope::hint()) { } + SSAScopeHint(const std::string &hint) : old_hint(SSAScope::hint()) { SSAScope::set_hint(hint); } + ~SSAScopeHint() { SSAScope::set_hint(old_hint); } + void set(const std::string &hint) { SSAScope::set_hint(hint); } + void clear() { SSAScope::set_hint(old_hint); } + +private: + std::string old_hint; +}; diff --git a/src/r_compiler/ssa/ssa_stack.h b/src/r_compiler/ssa/ssa_stack.h new file mode 100644 index 000000000..435530be1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_stack.h @@ -0,0 +1,25 @@ + +#pragma once + +template +class SSAStack +{ +public: + SSAStack() + : v(0) + { + v = SSAScope::alloca(SSAVariable::llvm_type()); + } + + SSAVariable load() const + { + return SSAVariable::from_llvm(SSAScope::builder().CreateLoad(v, SSAScope::hint())); + } + + void store(const SSAVariable &new_value) + { + SSAScope::builder().CreateStore(new_value.v, v); + } + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_struct_type.cpp b/src/r_compiler/ssa/ssa_struct_type.cpp new file mode 100644 index 000000000..4a79768ce --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.cpp @@ -0,0 +1,18 @@ + +#include "ssa_struct_type.h" +#include "ssa_scope.h" + +void SSAStructType::add_parameter(llvm::Type *type) +{ + elements.push_back(type); +} + +llvm::Type *SSAStructType::llvm_type() +{ + return llvm::StructType::get(SSAScope::context(), elements, false); +} + +llvm::Type *SSAStructType::llvm_type_packed() +{ + return llvm::StructType::get(SSAScope::context(), elements, true); +} diff --git a/src/r_compiler/ssa/ssa_struct_type.h b/src/r_compiler/ssa/ssa_struct_type.h new file mode 100644 index 000000000..67b056b32 --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.h @@ -0,0 +1,17 @@ + +#pragma once + +#include + +namespace llvm { class Type; } + +class SSAStructType +{ +public: + void add_parameter(llvm::Type *type); + llvm::Type *llvm_type(); + llvm::Type *llvm_type_packed(); + +private: + std::vector elements; +}; diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp new file mode 100644 index 000000000..04db4fd28 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -0,0 +1,95 @@ + +#include "ssa_ubyte.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAUByte::SSAUByte() +: v(0) +{ +} + +SSAUByte::SSAUByte(unsigned char constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false)); +} + +SSAUByte::SSAUByte(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUByte::llvm_type() +{ + return llvm::Type::getInt8Ty(SSAScope::context()); +} + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAUByte operator/(const SSAUByte &a, const SSAUByte &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v); +} +*/ +SSAUByte operator+(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) + b; +} + +SSAUByte operator-(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) - b; +} + +SSAUByte operator*(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) * b; +} +/* +SSAUByte operator/(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) / b; +} +*/ +SSAUByte operator+(const SSAUByte &a, unsigned char b) +{ + return a + SSAUByte(b); +} + +SSAUByte operator-(const SSAUByte &a, unsigned char b) +{ + return a - SSAUByte(b); +} + +SSAUByte operator*(const SSAUByte &a, unsigned char b) +{ + return a * SSAUByte(b); +} +/* +SSAUByte operator/(const SSAUByte &a, unsigned char b) +{ + return a / SSAUByte(b); +} +*/ +SSAUByte operator<<(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateShl(a.v, bits)); +} + +SSAUByte operator>>(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLShr(a.v, bits)); +} diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h new file mode 100644 index 000000000..f1e12afba --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -0,0 +1,35 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUByte +{ +public: + SSAUByte(); + SSAUByte(unsigned char constant); + explicit SSAUByte(llvm::Value *v); + static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b); +//SSAUByte operator/(const SSAUByte &a, const SSAUByte &b); + +SSAUByte operator+(unsigned char a, const SSAUByte &b); +SSAUByte operator-(unsigned char a, const SSAUByte &b); +SSAUByte operator*(unsigned char a, const SSAUByte &b); +//SSAUByte operator/(unsigned char a, const SSAUByte &b); + +SSAUByte operator+(const SSAUByte &a, unsigned char b); +SSAUByte operator-(const SSAUByte &a, unsigned char b); +SSAUByte operator*(const SSAUByte &a, unsigned char b); +//SSAUByte operator/(const SSAUByte &a, unsigned char b); + +SSAUByte operator<<(const SSAUByte &a, unsigned char bits); +SSAUByte operator>>(const SSAUByte &a, unsigned char bits); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp new file mode 100644 index 000000000..825806148 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -0,0 +1,106 @@ + +#include "ssa_ubyte_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAUBytePtr::SSAUBytePtr() +: v(0) +{ +} + +SSAUBytePtr::SSAUBytePtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUBytePtr::llvm_type() +{ + return llvm::Type::getInt8PtrTy(SSAScope::context()); +} + +SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const +{ + return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAUByte SSAUBytePtr::load() const +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAUBytePtr::load_vec4ub() const +{ + // _mm_cvtsi32_si128 as implemented by clang: + SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); + llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); + SSAVec4i v4i = SSAVec4i::from_llvm(v); + + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 +/* + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); + llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); + return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type)); +*/ +} + +SSAVec16ub SSAUBytePtr::load_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAUBytePtr::store(const SSAUByte &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) +{ + // Store using saturate: + SSAVec8s v8s(new_value, new_value); + SSAVec16ub v16ub(v8s, v8s); + + llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint()); + SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); +} + +void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16); + + // The following generates _mm_stream_si128, maybe! + // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); + // inst->setMetadata(SSAScope::module()->getMDKindID("nontemporal"), node); +} + +void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getInt8PtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_dq), values);*/ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h new file mode 100644 index 000000000..5b68ee1ad --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -0,0 +1,32 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUBytePtr +{ +public: + SSAUBytePtr(); + explicit SSAUBytePtr(llvm::Value *v); + static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); } + static llvm::Type *llvm_type(); + SSAUBytePtr operator[](SSAInt index) const; + SSAUByte load() const; + SSAVec4i load_vec4ub() const; + SSAVec8s load_vec8s() const; + SSAVec16ub load_vec16ub() const; + SSAVec16ub load_unaligned_vec16ub() const; + void store(const SSAUByte &new_value); + void store_vec4ub(const SSAVec4i &new_value); + void store_vec16ub(const SSAVec16ub &new_value); + void store_unaligned_vec16ub(const SSAVec16ub &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp new file mode 100644 index 000000000..877420fc5 --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -0,0 +1,56 @@ + +#include "ssa_value.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAValue SSAValue::load() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false)); +} + +void SSAValue::store(llvm::Value *value) +{ + SSAScope::builder().CreateStore(value, v, false); +} + +SSAIndexLookup SSAValue::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAValue::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(index.v); + return result; +} + +///////////////////////////////////////////////////////////////////////////// + +SSAIndexLookup::operator SSAValue() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateGEP(v, indexes)); +} + +SSAIndexLookup SSAIndexLookup::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAIndexLookup::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(index.v); + return result; +} diff --git a/src/r_compiler/ssa/ssa_value.h b/src/r_compiler/ssa/ssa_value.h new file mode 100644 index 000000000..ec156a452 --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.h @@ -0,0 +1,53 @@ + +#pragma once + +#include + +namespace llvm { class Value; } + +class SSAInt; +class SSAIndexLookup; + +class SSAValue +{ +public: + SSAValue() : v(0) { } + + static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; } + + SSAValue load(); + void store(llvm::Value *v); + + template + operator Type() + { + return Type::from_llvm(v); + } + + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); + + llvm::Value *v; +}; + +class SSAIndexLookup +{ +public: + SSAIndexLookup() : v(0) { } + + llvm::Value *v; + std::vector indexes; + + SSAValue load() { SSAValue value = *this; return value.load(); } + void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); } + + template + operator Type() + { + return Type::from_llvm(v); + } + + operator SSAValue(); + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); +}; diff --git a/src/r_compiler/ssa/ssa_vec16ub.cpp b/src/r_compiler/ssa/ssa_vec16ub.cpp new file mode 100644 index 000000000..f18d68718 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.cpp @@ -0,0 +1,155 @@ + +#include "ssa_vec16ub.h" +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec16ub::SSAVec16ub() +: v(0) +{ +} + +SSAVec16ub::SSAVec16ub(unsigned char constant) +: v(0) +{ + std::vector constants; + constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant0, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant1, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant2, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant3, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant4, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant5, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant6, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant7, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant8, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant9, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant10, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant11, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant12, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant13, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant14, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant15, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub(llvm::Value *v) +: v(v) +{ +} + +SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) +: v(0) +{ + llvm::Value *values[2] = { s0.v, s1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec16ub::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); +} + +SSAVec16ub SSAVec16ub::bitcast(SSAVec4i i32) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7, index8, index9, index10, index11, index12, index13, index14, index15); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index8))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index9))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index10))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index11))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index12))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index13))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index14))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index15))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v, SSAScope::hint()); +} +*/ +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) + b; +} + +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) - b; +} + +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) * b; +} +/* +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) / b; +} +*/ +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b) +{ + return a + SSAVec16ub(b); +} + +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b) +{ + return a - SSAVec16ub(b); +} + +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b) +{ + return a * SSAVec16ub(b); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b) +{ + return a / SSAVec16ub(b); +} +*/ \ No newline at end of file diff --git a/src/r_compiler/ssa/ssa_vec16ub.h b/src/r_compiler/ssa/ssa_vec16ub.h new file mode 100644 index 000000000..e4cfcdc87 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec8s; +class SSAVec4i; + +class SSAVec16ub +{ +public: + SSAVec16ub(); + SSAVec16ub(unsigned char constant); + SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15); + explicit SSAVec16ub(llvm::Value *v); + SSAVec16ub(SSAVec8s s0, SSAVec8s s1); + static SSAVec16ub from_llvm(llvm::Value *v) { return SSAVec16ub(v); } + static llvm::Type *llvm_type(); + static SSAVec16ub bitcast(SSAVec4i i32); + static SSAVec16ub shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + static SSAVec16ub shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + + llvm::Value *v; +}; + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b); + +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b); + +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b); diff --git a/src/r_compiler/ssa/ssa_vec4f.cpp b/src/r_compiler/ssa/ssa_vec4f.cpp new file mode 100644 index 000000000..e002018fe --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.cpp @@ -0,0 +1,244 @@ + +#include "ssa_vec4f.h" +#include "ssa_vec4i.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4f::SSAVec4f() +: v(0) +{ +} + +SSAVec4f::SSAVec4f(float constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(float constant0, float constant1, float constant2, float constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant0))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant1))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant2))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant3))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(SSAFloat f) +: v(0) +{ + llvm::Type *m1xfloattype = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(f.v, m1xfloattype, SSAScope::hint()), llvm::UndefValue::get(m1xfloattype), mask, SSAScope::hint()); +} + +SSAVec4f::SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3) +: v(0) +{ + v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(llvm_type()), f0.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + v = SSAScope::builder().CreateInsertElement(v, f1.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1))); + v = SSAScope::builder().CreateInsertElement(v, f2.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2))); + v = SSAScope::builder().CreateInsertElement(v, f3.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)3))); +} + +SSAVec4f::SSAVec4f(llvm::Value *v) +: v(v) +{ +} + +SSAVec4f::SSAVec4f(SSAVec4i i32) +: v(0) +{ + //llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4); + //return builder->CreateSIToFP(i32.v, m128type); + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint()); +} + +llvm::Type *SSAVec4f::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4); +} + +SSAFloat SSAVec4f::operator[](SSAInt index) const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index) +{ + return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index)))); +} + +SSAVec4f SSAVec4f::bitcast(SSAVec4i i32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sqrt(SSAVec4f f) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); + //return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_sqrt_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::rcp(SSAVec4f f) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rcp_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sin(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::cos(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::pow(SSAVec4f val, SSAVec4f power) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::exp(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::log(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::fma(SSAVec4f a, SSAVec4f b, SSAVec4f c) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3) +{ + SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp2 = shuffle(row0, row1, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + SSAVec4f tmp1 = shuffle(row2, row3, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp3 = shuffle(row2, row3, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + row0 = shuffle(tmp0, tmp1, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row1 = shuffle(tmp0, tmp1, 0xDD);//_MM_SHUFFLE(3,1,3,1)); + row2 = shuffle(tmp2, tmp3, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row3 = shuffle(tmp2, tmp3, 0xDD);//_MM_SHUFFLE(3,1,3,1)); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3) +{ + return shuffle(f0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4f::from_llvm(SSAScope::builder().CreateShuffleVector(f0.v, f1.v, mask, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask) +{ + return shuffle(f0, f1, mask & 3, (mask >> 2) & 3, ((mask >> 4) & 3) + 4, ((mask >> 6) & 3) + 4); +} + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator+(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) + b; +} + +SSAVec4f operator-(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) - b; +} + +SSAVec4f operator*(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) * b; +} + +SSAVec4f operator/(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) / b; +} + +SSAVec4f operator+(const SSAVec4f &a, float b) +{ + return a + SSAVec4f(b); +} + +SSAVec4f operator-(const SSAVec4f &a, float b) +{ + return a - SSAVec4f(b); +} + +SSAVec4f operator*(const SSAVec4f &a, float b) +{ + return a * SSAVec4f(b); +} + +SSAVec4f operator/(const SSAVec4f &a, float b) +{ + return a / SSAVec4f(b); +} diff --git a/src/r_compiler/ssa/ssa_vec4f.h b/src/r_compiler/ssa/ssa_vec4f.h new file mode 100644 index 000000000..5e3397e58 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.h @@ -0,0 +1,57 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAFloat; +class SSAInt; + +class SSAVec4f +{ +public: + SSAVec4f(); + SSAVec4f(float constant); + SSAVec4f(float constant0, float constant1, float constant2, float constant3); + SSAVec4f(SSAFloat f); + SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3); + explicit SSAVec4f(llvm::Value *v); + SSAVec4f(SSAVec4i i32); + SSAFloat operator[](SSAInt index) const; + static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); + static SSAVec4f bitcast(SSAVec4i i32); + static SSAVec4f sqrt(SSAVec4f f); + static SSAVec4f rcp(SSAVec4f f); + static SSAVec4f sin(SSAVec4f val); + static SSAVec4f cos(SSAVec4f val); + static SSAVec4f pow(SSAVec4f val, SSAVec4f power); + static SSAVec4f exp(SSAVec4f val); + static SSAVec4f log(SSAVec4f val); + static SSAVec4f fma(SSAVec4f a, SSAVec4f b, SSAVec4f c); + static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3); + static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3); + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3); + static SSAVec4f from_llvm(llvm::Value *v) { return SSAVec4f(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; + +private: + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask); +}; + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b); + +SSAVec4f operator+(float a, const SSAVec4f &b); +SSAVec4f operator-(float a, const SSAVec4f &b); +SSAVec4f operator*(float a, const SSAVec4f &b); +SSAVec4f operator/(float a, const SSAVec4f &b); + +SSAVec4f operator+(const SSAVec4f &a, float b); +SSAVec4f operator-(const SSAVec4f &a, float b); +SSAVec4f operator*(const SSAVec4f &a, float b); +SSAVec4f operator/(const SSAVec4f &a, float b); diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp new file mode 100644 index 000000000..e2df64167 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -0,0 +1,50 @@ + +#include "ssa_vec4f_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4fPtr::SSAVec4fPtr() +: v(0) +{ +} + +SSAVec4fPtr::SSAVec4fPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4fPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const +{ + return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load_unaligned() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAVec4fPtr::store(const SSAVec4f &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); +} + +void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ + SSAScope::builder().CreateStore(new_value.v, v, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.h b/src/r_compiler/ssa/ssa_vec4f_ptr.h new file mode 100644 index 000000000..ab4e84190 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4fPtr +{ +public: + SSAVec4fPtr(); + explicit SSAVec4fPtr(llvm::Value *v); + static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4fPtr operator[](SSAInt index) const; + SSAVec4f load() const; + SSAVec4f load_unaligned() const; + void store(const SSAVec4f &new_value); + void store_unaligned(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp new file mode 100644 index 000000000..80e07c8d4 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -0,0 +1,213 @@ + +#include "ssa_vec4i.h" +#include "ssa_vec4f.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4i::SSAVec4i() +: v(0) +{ +} + +SSAVec4i::SSAVec4i(int constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(int constant0, int constant1, int constant2, int constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant3, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(llvm::Value *v) +: v(v) +{ +} + +SSAVec4i::SSAVec4i(SSAInt i) +: v(0) +{ + llvm::Type *m1xi32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); +} + +SSAVec4i::SSAVec4i(SSAVec4f f32) +: v(0) +{ + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); +} + +SSAInt SSAVec4i::operator[](SSAInt index) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +llvm::Type *SSAVec4i::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec8s i16) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i16.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, int index0, int index1, int index2, int index3) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, const SSAVec4i &i1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4i::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3) +{ + SSAVec8s low = SSAVec8s::extendlo(a); + SSAVec8s high = SSAVec8s::extendhi(a); + out0 = extendlo(low); + out1 = extendhi(low); + out2 = extendlo(high); + out3 = extendhi(high); +} + +SSAVec4i SSAVec4i::extendhi(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::extendlo(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::sqrt(SSAVec4i f) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_sqrt_pd), f.v, SSAScope::hint())); +} + +/* +SSAVec4i SSAVec4i::min_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pminsd), values, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::max_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pmaxsd), values, SSAScope::hint())); +} +*/ + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator+(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) + b; +} + +SSAVec4i operator-(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) - b; +} + +SSAVec4i operator*(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) * b; +} + +SSAVec4i operator/(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) / b; +} + +SSAVec4i operator+(const SSAVec4i &a, int b) +{ + return a + SSAVec4i(b); +} + +SSAVec4i operator-(const SSAVec4i &a, int b) +{ + return a - SSAVec4i(b); +} + +SSAVec4i operator*(const SSAVec4i &a, int b) +{ + return a * SSAVec4i(b); +} + +SSAVec4i operator/(const SSAVec4i &a, int b) +{ + return a / SSAVec4i(b); +} + +SSAVec4i operator<<(const SSAVec4i &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec4i operator>>(const SSAVec4i &a, int bits) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h new file mode 100644 index 000000000..d19f1d1aa --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -0,0 +1,56 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4f; +class SSAVec8s; +class SSAVec16ub; +class SSAInt; + +class SSAVec4i +{ +public: + SSAVec4i(); + SSAVec4i(int constant); + SSAVec4i(int constant0, int constant1, int constant2, int constant3); + SSAVec4i(SSAInt i); + explicit SSAVec4i(llvm::Value *v); + SSAVec4i(SSAVec4f f32); + SSAInt operator[](SSAInt index); + static SSAVec4i bitcast(SSAVec4f f32); + static SSAVec4i bitcast(SSAVec8s i16); + static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); + static SSAVec4i shuffle(const SSAVec4i &f0, const SSAVec4i &f1, int index0, int index1, int index2, int index3); + static SSAVec4i extendhi(SSAVec8s i16); + static SSAVec4i extendlo(SSAVec8s i16); + static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3); + static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i sqrt(SSAVec4i f); + //static SSAVec4i min_sse41(SSAVec4i a, SSAVec4i b); + //static SSAVec4i max_sse41(SSAVec4i a, SSAVec4i b); + static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b); + +SSAVec4i operator+(int a, const SSAVec4i &b); +SSAVec4i operator-(int a, const SSAVec4i &b); +SSAVec4i operator*(int a, const SSAVec4i &b); +SSAVec4i operator/(int a, const SSAVec4i &b); + +SSAVec4i operator+(const SSAVec4i &a, int b); +SSAVec4i operator-(const SSAVec4i &a, int b); +SSAVec4i operator*(const SSAVec4i &a, int b); +SSAVec4i operator/(const SSAVec4i &a, int b); + +SSAVec4i operator<<(const SSAVec4i &a, int bits); +SSAVec4i operator>>(const SSAVec4i &a, int bits); diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp new file mode 100644 index 000000000..a28befb70 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -0,0 +1,50 @@ + +#include "ssa_vec4i_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4iPtr::SSAVec4iPtr() +: v(0) +{ +} + +SSAVec4iPtr::SSAVec4iPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4iPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4iPtr SSAVec4iPtr::operator[](SSAInt index) const +{ + return SSAVec4iPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load_unaligned() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4))); +} + +void SSAVec4iPtr::store(const SSAVec4i &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); +} + +void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value) +{ + /*llvm::Value *values[2] = + { + v, + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_pd), values);*/ + SSAScope::builder().CreateStore(new_value.v, v, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.h b/src/r_compiler/ssa/ssa_vec4i_ptr.h new file mode 100644 index 000000000..56937b1cc --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4iPtr +{ +public: + SSAVec4iPtr(); + explicit SSAVec4iPtr(llvm::Value *v); + static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4iPtr operator[](SSAInt index) const; + SSAVec4i load() const; + SSAVec4i load_unaligned() const; + void store(const SSAVec4i &new_value); + void store_unaligned(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec8s.cpp b/src/r_compiler/ssa/ssa_vec8s.cpp new file mode 100644 index 000000000..d61a4c4a9 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.cpp @@ -0,0 +1,178 @@ + +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_vec16ub.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec8s::SSAVec8s() +: v(0) +{ +} + +SSAVec8s::SSAVec8s(short constant) +: v(0) +{ + std::vector constants; + constants.resize(8, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant3, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant4, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant5, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant6, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant7, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(llvm::Value *v) +: v(v) +{ +} + +SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) +: v(0) +{ + llvm::Value *values[2] = { i0.v, i1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec8s::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 8); +} + +SSAVec8s SSAVec8s::bitcast(SSAVec16ub i8) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateBitCast(i8.v, llvm_type(), SSAScope::hint())); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec8s::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::extendhi(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 +} + +SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 +} + +/* +SSAVec8s SSAVec8s::min_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmins_w), values, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::max_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmaxs_w), values, SSAScope::hint())); +} +*/ + +SSAVec8s SSAVec8s::mulhi(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmulh_w), values, SSAScope::hint())); +} + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator+(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) + b; +} + +SSAVec8s operator-(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) - b; +} + +SSAVec8s operator*(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) * b; +} + +SSAVec8s operator/(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) / b; +} + +SSAVec8s operator+(const SSAVec8s &a, short b) +{ + return a + SSAVec8s(b); +} + +SSAVec8s operator-(const SSAVec8s &a, short b) +{ + return a - SSAVec8s(b); +} + +SSAVec8s operator*(const SSAVec8s &a, short b) +{ + return a * SSAVec8s(b); +} + +SSAVec8s operator/(const SSAVec8s &a, short b) +{ + return a / SSAVec8s(b); +} + +SSAVec8s operator<<(const SSAVec8s &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec8s operator>>(const SSAVec8s &a, int bits) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec8s.h b/src/r_compiler/ssa/ssa_vec8s.h new file mode 100644 index 000000000..aded358dd --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.h @@ -0,0 +1,48 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAVec16ub; + +class SSAVec8s +{ +public: + SSAVec8s(); + SSAVec8s(short constant); + SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); + explicit SSAVec8s(llvm::Value *v); + SSAVec8s(SSAVec4i i0, SSAVec4i i1); + static SSAVec8s bitcast(SSAVec16ub i8); + static SSAVec8s shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s extendhi(SSAVec16ub a); + static SSAVec8s extendlo(SSAVec16ub a); + //static SSAVec8s min_sse2(SSAVec8s a, SSAVec8s b); + //static SSAVec8s max_sse2(SSAVec8s a, SSAVec8s b); + static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b); + static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b); + +SSAVec8s operator+(short a, const SSAVec8s &b); +SSAVec8s operator-(short a, const SSAVec8s &b); +SSAVec8s operator*(short a, const SSAVec8s &b); +SSAVec8s operator/(short a, const SSAVec8s &b); + +SSAVec8s operator+(const SSAVec8s &a, short b); +SSAVec8s operator-(const SSAVec8s &a, short b); +SSAVec8s operator*(const SSAVec8s &a, short b); +SSAVec8s operator/(const SSAVec8s &a, short b); + +SSAVec8s operator<<(const SSAVec8s &a, int bits); +SSAVec8s operator>>(const SSAVec8s &a, int bits); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 0d86ead47..d54bad7ae 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -38,6 +38,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" +#include "r_compiler/fixedfunction/fixedfunction.h" #include "gi.h" #include "stats.h" @@ -299,6 +300,68 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// +class DrawSpanFFCommand : public DrawerCommand +{ + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE * RESTRICT _destorg; + + const uint32_t * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + bool _nearest_filter; + + uint32_t _srcalpha; + uint32_t _destalpha; + + FixedFunction *_ff; + +public: + DrawSpanFFCommand() + { + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + + _source = (const uint32_t*)ds_source; + _light = LightBgra::calc_light_multiplier(ds_light); + _shade_constants = ds_shade_constants; + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + + static FixedFunction ff; + _ff = &ff; + } + + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(_y)) + return; + + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = _x2 - _x1 + 1; + _ff->DrawSpan(count, dest); + } +}; + +///////////////////////////////////////////////////////////////////////////// + class DrawerColumnCommand : public DrawerCommand { public: @@ -2700,11 +2763,14 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { + DrawerCommandQueue::QueueCommand(); +/* #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else DrawerCommandQueue::QueueCommand(); #endif +*/ } void R_DrawSpanMasked_rgba()