From 3dd8b593b6a9e35fefd4ce76490f963cccc70fb4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 26 Sep 2016 09:00:19 +0200 Subject: [PATCH 01/15] Use LLVM to JIT the code for one of the drawer functions --- src/CMakeLists.txt | 86 ++ .../fixedfunction/fixedfunction.cpp | 1046 +++++++++++++++++ src/r_compiler/fixedfunction/fixedfunction.h | 130 ++ src/r_compiler/llvm_include.h | 46 + src/r_compiler/ssa/ssa_barycentric_weight.h | 97 ++ src/r_compiler/ssa/ssa_bool.cpp | 91 ++ src/r_compiler/ssa/ssa_bool.h | 37 + src/r_compiler/ssa/ssa_float.cpp | 152 +++ src/r_compiler/ssa/ssa_float.h | 42 + src/r_compiler/ssa/ssa_float_ptr.cpp | 65 + src/r_compiler/ssa/ssa_float_ptr.h | 27 + src/r_compiler/ssa/ssa_for_block.cpp | 25 + src/r_compiler/ssa/ssa_for_block.h | 18 + src/r_compiler/ssa/ssa_function.cpp | 55 + src/r_compiler/ssa/ssa_function.h | 30 + src/r_compiler/ssa/ssa_if_block.cpp | 30 + src/r_compiler/ssa/ssa_if_block.h | 46 + src/r_compiler/ssa/ssa_int.cpp | 117 ++ src/r_compiler/ssa/ssa_int.h | 41 + src/r_compiler/ssa/ssa_int_ptr.cpp | 58 + src/r_compiler/ssa/ssa_int_ptr.h | 27 + src/r_compiler/ssa/ssa_phi.h | 33 + src/r_compiler/ssa/ssa_pixelformat4f.h | 28 + src/r_compiler/ssa/ssa_pixelformat4ub.h | 28 + .../ssa/ssa_pixelformat4ub_argb_rev.h | 35 + src/r_compiler/ssa/ssa_pixelformat4ub_rev.h | 28 + src/r_compiler/ssa/ssa_pixels.h | 39 + src/r_compiler/ssa/ssa_pixeltype.h | 498 ++++++++ src/r_compiler/ssa/ssa_scope.cpp | 65 + src/r_compiler/ssa/ssa_scope.h | 41 + src/r_compiler/ssa/ssa_stack.h | 25 + src/r_compiler/ssa/ssa_struct_type.cpp | 18 + src/r_compiler/ssa/ssa_struct_type.h | 17 + src/r_compiler/ssa/ssa_ubyte.cpp | 95 ++ src/r_compiler/ssa/ssa_ubyte.h | 35 + src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 106 ++ src/r_compiler/ssa/ssa_ubyte_ptr.h | 32 + src/r_compiler/ssa/ssa_value.cpp | 56 + src/r_compiler/ssa/ssa_value.h | 53 + src/r_compiler/ssa/ssa_vec16ub.cpp | 155 +++ src/r_compiler/ssa/ssa_vec16ub.h | 42 + src/r_compiler/ssa/ssa_vec4f.cpp | 244 ++++ src/r_compiler/ssa/ssa_vec4f.h | 57 + src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 50 + src/r_compiler/ssa/ssa_vec4f_ptr.h | 24 + src/r_compiler/ssa/ssa_vec4i.cpp | 213 ++++ src/r_compiler/ssa/ssa_vec4i.h | 56 + src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 50 + src/r_compiler/ssa/ssa_vec4i_ptr.h | 24 + src/r_compiler/ssa/ssa_vec8s.cpp | 178 +++ src/r_compiler/ssa/ssa_vec8s.h | 48 + src/r_draw_rgba.cpp | 66 ++ 52 files changed, 4705 insertions(+) create mode 100644 src/r_compiler/fixedfunction/fixedfunction.cpp create mode 100644 src/r_compiler/fixedfunction/fixedfunction.h create mode 100644 src/r_compiler/llvm_include.h create mode 100644 src/r_compiler/ssa/ssa_barycentric_weight.h create mode 100644 src/r_compiler/ssa/ssa_bool.cpp create mode 100644 src/r_compiler/ssa/ssa_bool.h create mode 100644 src/r_compiler/ssa/ssa_float.cpp create mode 100644 src/r_compiler/ssa/ssa_float.h create mode 100644 src/r_compiler/ssa/ssa_float_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_float_ptr.h create mode 100644 src/r_compiler/ssa/ssa_for_block.cpp create mode 100644 src/r_compiler/ssa/ssa_for_block.h create mode 100644 src/r_compiler/ssa/ssa_function.cpp create mode 100644 src/r_compiler/ssa/ssa_function.h create mode 100644 src/r_compiler/ssa/ssa_if_block.cpp create mode 100644 src/r_compiler/ssa/ssa_if_block.h create mode 100644 src/r_compiler/ssa/ssa_int.cpp create mode 100644 src/r_compiler/ssa/ssa_int.h create mode 100644 src/r_compiler/ssa/ssa_int_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_int_ptr.h create mode 100644 src/r_compiler/ssa/ssa_phi.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4f.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h create mode 100644 src/r_compiler/ssa/ssa_pixelformat4ub_rev.h create mode 100644 src/r_compiler/ssa/ssa_pixels.h create mode 100644 src/r_compiler/ssa/ssa_pixeltype.h create mode 100644 src/r_compiler/ssa/ssa_scope.cpp create mode 100644 src/r_compiler/ssa/ssa_scope.h create mode 100644 src/r_compiler/ssa/ssa_stack.h create mode 100644 src/r_compiler/ssa/ssa_struct_type.cpp create mode 100644 src/r_compiler/ssa/ssa_struct_type.h create mode 100644 src/r_compiler/ssa/ssa_ubyte.cpp create mode 100644 src/r_compiler/ssa/ssa_ubyte.h create mode 100644 src/r_compiler/ssa/ssa_ubyte_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_ubyte_ptr.h create mode 100644 src/r_compiler/ssa/ssa_value.cpp create mode 100644 src/r_compiler/ssa/ssa_value.h create mode 100644 src/r_compiler/ssa/ssa_vec16ub.cpp create mode 100644 src/r_compiler/ssa/ssa_vec16ub.h create mode 100644 src/r_compiler/ssa/ssa_vec4f.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4f.h create mode 100644 src/r_compiler/ssa/ssa_vec4f_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4f_ptr.h create mode 100644 src/r_compiler/ssa/ssa_vec4i.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4i.h create mode 100644 src/r_compiler/ssa/ssa_vec4i_ptr.cpp create mode 100644 src/r_compiler/ssa/ssa_vec4i_ptr.h create mode 100644 src/r_compiler/ssa/ssa_vec8s.cpp create mode 100644 src/r_compiler/ssa/ssa_vec8s.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2d71170ee..4f9599b35 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -104,6 +104,15 @@ if( WIN32 ) endif() add_definitions( -D_WIN32 ) + + set( FMOD_SEARCH_PATHS + "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" + "C:/Program Files (x86)/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" + # This next one is for Randy. + "E:/Software/Dev/FMOD/${WIN_TYPE}/api" + ) + set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) + set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) set( FMOD_SEARCH_PATHS "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" @@ -255,6 +264,57 @@ if( NOT NO_OPENAL ) endif() endif() +# C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm +find_package(LLVM REQUIRED CONFIG) +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") +message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") +llvm_map_components_to_libnames(llvm_libs + analysis + asmparser + asmprinter + bitreader + bitwriter + codegen + core + executionengine + globalisel + instcombine + ipo + irreader + linker + lto + mc + mcdisassembler + mcjit + mcparser + mirparser + object + objectyaml + orcjit + passes + scalaropts + selectiondag + support + symbolize + tablegen + target + transformutils + vectorize + x86asmparser + x86asmprinter + x86codegen + x86desc + x86info + x86utils + aarch64asmparser + aarch64asmprinter + aarch64codegen + aarch64desc + aarch64info + aarch64utils) +include_directories(${LLVM_INCLUDE_DIRS}) +set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) + if( NOT NO_FMOD ) # Search for FMOD include files if( NOT WIN32 ) @@ -843,6 +903,9 @@ file( GLOB HEADER_FILES posix/*.h posix/cocoa/*.h posix/sdl/*.h + r_compiler/*.h + r_compiler/ssa/*.h + r_compiler/fixedfunction/*.h r_data/*.h resourcefiles/*.h sfmt/*.h @@ -1372,6 +1435,26 @@ set (PCH_SOURCES fragglescript/t_spec.cpp fragglescript/t_variable.cpp fragglescript/t_cmd.cpp + r_compiler/ssa/ssa_bool.cpp + r_compiler/ssa/ssa_float.cpp + r_compiler/ssa/ssa_float_ptr.cpp + r_compiler/ssa/ssa_for_block.cpp + r_compiler/ssa/ssa_function.cpp + r_compiler/ssa/ssa_if_block.cpp + r_compiler/ssa/ssa_int.cpp + r_compiler/ssa/ssa_int_ptr.cpp + r_compiler/ssa/ssa_scope.cpp + r_compiler/ssa/ssa_struct_type.cpp + r_compiler/ssa/ssa_ubyte.cpp + r_compiler/ssa/ssa_ubyte_ptr.cpp + r_compiler/ssa/ssa_value.cpp + r_compiler/ssa/ssa_vec4f.cpp + r_compiler/ssa/ssa_vec4f_ptr.cpp + r_compiler/ssa/ssa_vec4i.cpp + r_compiler/ssa/ssa_vec4i_ptr.cpp + r_compiler/ssa/ssa_vec8s.cpp + r_compiler/ssa/ssa_vec16ub.cpp + r_compiler/fixedfunction/fixedfunction.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp @@ -1587,6 +1670,9 @@ source_group("Render Data\\Resource Headers" REGULAR_EXPRESSION "^${CMAKE_CURREN source_group("Render Data\\Resource Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.cpp$") source_group("Render Data\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/textures/.+") source_group("Render Interface" FILES r_defs.h r_renderer.h r_sky.cpp r_sky.h r_state.h r_utility.cpp r_utility.h) +source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+") +source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+") +source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+") source_group("Resource Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/resourcefiles/.+") source_group("POSIX Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/.+") source_group("Cocoa Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/cocoa/.+") diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp new file mode 100644 index 000000000..347ba6de3 --- /dev/null +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -0,0 +1,1046 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" + +RenderProgram::RenderProgram() +{ + llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + I_FatalError(reason.c_str()); + }); + + //llvm::llvm_start_multithreaded(); + llvm::InitializeNativeTarget(); + llvm::InitializeNativeTargetAsmPrinter(); + llvm::InitializeNativeTargetAsmParser(); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + + std::string errorstring; + llvm::EngineBuilder engineBuilder(std::move(moduleOwner)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(llvm::Reloc::Static); + engineBuilder.setEngineKind(llvm::EngineKind::JIT); + mEngine.reset(engineBuilder.create()); + if (!mEngine) + I_FatalError(errorstring.c_str()); +} + +RenderProgram::~RenderProgram() +{ + mEngine.reset(); + mContext.reset(); + //llvm::llvm_stop_multithreaded(); +} + +void *RenderProgram::PointerToFunction(const char *name) +{ + llvm::Function *function = mModule->getFunction(name); + if (!function) + return nullptr; + return mEngine->getPointerToFunction(function); +} + +FixedFunction::FixedFunction() +{ + CodegenDrawSpan(); + mProgram.engine()->finalizeObject(); + + DrawSpan = mProgram.GetProcAddress("DrawSpan"); +} + +void FixedFunction::CodegenDrawSpan() +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function("DrawSpan"); + function.add_parameter(SSAInt::llvm_type()); + function.add_parameter(SSAUBytePtr::llvm_type()); + function.create_public(); + + SSAInt count = function.parameter(0); + SSAUBytePtr data = function.parameter(1); + SSAStack stack_index; + + stack_index.store(0); + SSAForBlock loop; + { + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + //SSAVec4i color(255, 255, 0, 255); + //data[index * 4].store_vec4ub(color); + data[index * 4].store(0); + data[index * 4 + 1].store(128); + data[index * 4 + 2].store(255); + data[index * 4 + 3].store(255); + stack_index.store(index + 1); + } + loop.end_block(); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); +} + +#if 0 + +GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) +: program(program), vertex_codegen(vertex_codegen), fragment_codegen(fragment_codegen) +{ +} + +llvm::Type *GlslFixedFunction::get_sampler_struct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt32Ty(context)); // width + elements.push_back(llvm::Type::getInt32Ty(context)); // height + elements.push_back(llvm::Type::getInt8PtrTy(context)); // data + return llvm::StructType::get(context, elements, false); +} + +void GlslFixedFunction::codegen() +{ + codegen_render_scanline(5); + codegen_calc_window_positions(); + codegen_calc_polygon_face_direction(); + codegen_calc_polygon_y_range(); + codegen_update_polygon_edge(); + codegen_draw_triangles(5, 5); + codegen_texture(); + codegen_normalize(); + codegen_reflect(); + codegen_max(); + codegen_pow(); + codegen_dot(); + codegen_mix(); +} + +void GlslFixedFunction::codegen_texture() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_texture"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(get_sampler_struct(program.context())); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAValue sampler_ptr = function.parameter(1); + SSAVec4f pos = function.parameter(2); + + SSAInt width = sampler_ptr[0][0].load(); + SSAInt height = sampler_ptr[0][1].load(); + SSAUBytePtr data = sampler_ptr[0][2].load(); + + SSAPixels4ub_argb_rev pixels(width, height, data); + //builder.CreateRet(pixels.linear_clamp4f(pos).v); + builder.CreateRet(pixels.linear_clamp4f(pos[0], pos[1]).v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_normalize() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_normalize"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f vec = function.parameter(1); + + // To do: this can probably be done a lot faster with _mm_rsqrt_ss + SSAVec4f vec2 = vec * vec; + SSAVec4f length3(SSAFloat::sqrt(vec2[0] + vec2[1] + vec2[2])); + SSAVec4f normalized = vec / length3; + builder.CreateRet(normalized.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_reflect() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_reflect"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f i = function.parameter(1); + SSAVec4f n = function.parameter(2); + + SSAVec4f c = i * n; + SSAFloat dot3 = c[0] + c[1] + c[2]; + SSAVec4f result = i - (2.0f * dot3) * n; + builder.CreateRet(result.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_max() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_max"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAFloat a = function.parameter(1); + SSAFloat b = function.parameter(2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a >= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + SSAFloat c = phi.create(); + + builder.CreateRet(c.v); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_pow() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_pow"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAFloat a = function.parameter(1); + SSAFloat b = function.parameter(2); + builder.CreateRet(a.v); + //builder.CreateRet(SSAFloat::pow(a, b).v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_dot() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_dot"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.create_private(); + + SSAVec4f a = function.parameter(1); + SSAVec4f b = function.parameter(2); + + SSAVec4f c = a * b; + SSAFloat dot3 = c[0] + c[1] + c[2]; + builder.CreateRet(dot3.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_mix() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("fragment_mix"); + function.add_parameter(fragment_codegen.get_global_struct_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAVec4f::llvm_type()); + function.add_parameter(SSAFloat::llvm_type()); + function.create_private(); + + SSAVec4f v1 = function.parameter(1); + SSAVec4f v2 = function.parameter(2); + SSAFloat t = function.parameter(3); + + SSAVec4f b = t; + SSAVec4f a = 1.0f - b; + SSAVec4f mix = v1 * a + v2 * b; + builder.CreateRet(mix.v); + + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_draw_triangles(int num_vertex_in, int num_vertex_out) +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("draw_triangles"); + function.add_parameter(SSAInt::llvm_type()); // input_width + function.add_parameter(SSAInt::llvm_type()); // input_height + function.add_parameter(SSAUBytePtr::llvm_type()); // input_data + function.add_parameter(SSAInt::llvm_type()); // output_width + function.add_parameter(SSAInt::llvm_type()); // output_height + function.add_parameter(SSAUBytePtr::llvm_type()); // output_data + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAVec4fPtr::llvm_type()); // uniforms + function.add_parameter(SSAInt::llvm_type()); // first_vertex + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()->getPointerTo()); // vertex attributes + function.add_parameter(SSAInt::llvm_type()); // core + function.add_parameter(SSAInt::llvm_type()); // num_cores + function.create_public(); + + SSAInt input_width = function.parameter(0); + SSAInt input_height = function.parameter(1); + SSAUBytePtr input_data = function.parameter(2); + SSAInt output_width = function.parameter(3); + SSAInt output_height = function.parameter(4); + SSAUBytePtr output_data = function.parameter(5); + SSAInt viewport_x = function.parameter(6); + SSAInt viewport_y = function.parameter(7); + SSAInt viewport_width = function.parameter(8); + SSAInt viewport_height = function.parameter(9); + SSAVec4fPtr uniforms = function.parameter(10); + SSAInt first_vertex = function.parameter(11); + SSAInt num_vertices = function.parameter(12); + SSAValue vertex_in_ptr = function.parameter(13); + SSAInt core = function.parameter(14); + SSAInt num_cores = function.parameter(15); + + SSAStack stack_vertex_index; + SSAValue vertex_globals_ptr = SSAValue::from_llvm(SSAScope::alloca(vertex_codegen.get_global_struct_type())); + std::vector vertex_outs; + for (int i = 0; i < num_vertex_out; i++) + vertex_outs.push_back(SSAVec4fPtr::from_llvm(SSAScope::builder().CreateAlloca(SSAVec4f::llvm_type(), SSAInt(3).v))); + + int num_uniforms = 1; + { + llvm::Type *type = llvm::ArrayType::get(llvm::VectorType::get(llvm::Type::getFloatTy(program.context()), 4), 4); + llvm::Value *matrix = llvm::UndefValue::get(type); + for (int col = 0; col < 4; col++) + { + SSAVec4f column = uniforms[col].load_unaligned(); + std::vector indexes; + indexes.push_back(col); + matrix = builder.CreateInsertValue(matrix, column.v, indexes); + } + vertex_globals_ptr[0][0].store(matrix); + } + + stack_vertex_index.store(0); + SSAForBlock loop; + SSAInt vertex_index = stack_vertex_index.load(); + loop.loop_block(vertex_index + 2 < num_vertices); + for (int v = 0; v < 3; v++) + { + for (int i = 0; i < num_vertex_in; i++) + { + SSAValue attribute_ptr = vertex_in_ptr[i].load(); + SSAVec4f vertex_in = SSAVec4f::shuffle(SSAVec4fPtr(attribute_ptr)[first_vertex + vertex_index + v].load_unaligned(), 0, 1, 2, 3); + vertex_globals_ptr[0][num_uniforms + i].store(vertex_in.v); + } + SSAScope::builder().CreateCall(SSAScope::module()->getFunction((vertex_codegen.shader_prefix() + "main").c_str()), vertex_globals_ptr.v); + for (int i = 0; i < num_vertex_out; i++) + { + vertex_outs[i][v].store(vertex_globals_ptr[0][num_uniforms + num_vertex_in + i].load()); + } + } + + render_polygon(input_width, input_height, input_data, output_width, output_height, output_data, viewport_x, viewport_y, viewport_width, viewport_height, 3, vertex_outs, core, num_cores); + + stack_vertex_index.store(vertex_index + 3); + loop.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_window_positions() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_window_positions"); + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // gl_Position + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.create_private(); + SSAInt viewport_x = function.parameter(0); + SSAInt viewport_y = function.parameter(1); + SSAInt viewport_width = function.parameter(2); + SSAInt viewport_height = function.parameter(3); + SSAInt num_vertices = function.parameter(4); + SSAVec4fPtr clip_positions = function.parameter(5); + SSAVec4fPtr window_positions = function.parameter(6); + + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + SSAStack stack_transform_index; + stack_transform_index.store(0); + SSAForBlock loop_transform; + SSAInt transform_index = stack_transform_index.load(); + loop_transform.loop_block(transform_index < num_vertices); + { + SSAVec4f clip_pos = clip_positions[transform_index].load(); + SSAVec4f window_pos = viewport.clip_to_window(clip_pos); + window_positions[transform_index].store(window_pos); + + stack_transform_index.store(transform_index + 1); + } + loop_transform.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_polygon_face_direction() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_polygon_face_direction"); + function.set_return_type(SSABool::llvm_type()); + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.create_private(); + SSAInt num_vertices = function.parameter(0); + SSAVec4fPtr window_positions = function.parameter(1); + + SSAStack stack_face_direction; + SSAStack stack_face_vertex_index; + stack_face_direction.store(0.0f); + stack_face_vertex_index.store(0); + SSAForBlock loop_face_direction; + SSAInt face_vertex_index = stack_face_vertex_index.load(); + loop_face_direction.loop_block(face_vertex_index < num_vertices); + { + SSAVec4f v0 = window_positions[face_vertex_index].load(); + SSAVec4f v1 = window_positions[(face_vertex_index + 1) % num_vertices].load(); + stack_face_direction.store(stack_face_direction.load() + v0[0] * v1[1] - v1[0] * v0[1]); + stack_face_vertex_index.store(face_vertex_index + 1); + } + loop_face_direction.end_block(); + SSABool front_facing_ccw = (stack_face_direction.load() >= 0.0f); + + builder.CreateRet(front_facing_ccw.v); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_calc_polygon_y_range() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("calc_polygon_y_range"); + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_start + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_end + function.create_private(); + SSAInt viewport_y = function.parameter(0); + SSAInt viewport_height = function.parameter(1); + SSAInt num_vertices = function.parameter(2); + SSAVec4fPtr window_positions = function.parameter(3); + SSAValue out_y_start = function.parameter(4); + SSAValue out_y_end = function.parameter(5); + + SSAStack y_start; + SSAStack y_end; + y_start.store(0x7fffffff); + y_end.store(0); + + SSAStack stack_minmax_index; + stack_minmax_index.store(0); + SSAForBlock loop_minmax; + SSAInt minmax_index = stack_minmax_index.load(); + loop_minmax.loop_block(minmax_index < num_vertices); + { + SSAInt y = SSAInt(window_positions[minmax_index].load()[1] + 0.5f); + y_start.store(ssa_min(y_start.load(), y)); + y_end.store(ssa_max(y_end.load(), y)); + stack_minmax_index.store(minmax_index + 1); + } + loop_minmax.end_block(); + + y_start.store(ssa_max(y_start.load(), viewport_y)); + y_end.store(ssa_min(y_end.load(), viewport_y + viewport_height)); + + out_y_start.store(y_start.load().v); + out_y_end.store(y_end.load().v); + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::codegen_update_polygon_edge() +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("update_polygon_edge"); + function.add_parameter(SSAFloat::llvm_type()); // y_position + function.add_parameter(SSAInt::llvm_type()); // num_vertices + function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout left_index + function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout right_index + function.create_private(); + SSAFloat float_y = function.parameter(0); + SSAInt num_vertices = function.parameter(1); + SSAVec4fPtr window_positions = function.parameter(2); + SSAValue ptr_left_index = function.parameter(3); + SSAValue ptr_right_index = function.parameter(4); + + SSAStack max_iterate; + max_iterate.store(num_vertices); + SSAForBlock loop_left; + SSAInt left_index = ptr_left_index.load(); + SSAInt right_index = ptr_right_index.load(); + SSAInt next_left_index = (left_index + 1) % num_vertices; + SSAFloat left_y0 = window_positions[left_index].load()[1]; + SSAFloat left_y1 = window_positions[next_left_index].load()[1]; + SSABool in_range = (left_y0 >= float_y && left_y1 < float_y) || (left_y1 >= float_y && left_y0 < float_y); + loop_left.loop_block((left_index == right_index || !in_range) && max_iterate.load() > 0); + ptr_left_index.store(next_left_index.v); + max_iterate.store(max_iterate.load() - 1); + loop_left.end_block(); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::render_polygon( + SSAInt input_width, + SSAInt input_height, + SSAUBytePtr input_data, + SSAInt output_width, + SSAInt output_height, + SSAUBytePtr output_data, + SSAInt viewport_x, + SSAInt viewport_y, + SSAInt viewport_width, + SSAInt viewport_height, + SSAInt num_vertices, + std::vector fragment_ins, + SSAInt core, + SSAInt num_cores) +{ + SSAVec4fPtr window_positions = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), num_vertices)); + SSAVec4fPtr left_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + SSAVec4fPtr right_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + + /////////////////////////////////// + + llvm::Value *calc_window_positions_args[] = { viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, num_vertices.v, fragment_ins[0].v, window_positions.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_window_positions"), calc_window_positions_args); + + llvm::Value *calc_polygon_face_direction_args[] = { num_vertices.v, window_positions.v }; + SSABool front_facing_ccw = SSABool::from_llvm(SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_face_direction"), calc_polygon_face_direction_args)); + + SSAIfBlock cull_if; + cull_if.if_block(front_facing_ccw); + { + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + + SSAStack y_start; + SSAStack y_end; + + llvm::Value *calc_polygon_y_range_args[] = { viewport_y.v, viewport_height.v, num_vertices.v, window_positions.v, y_start.v, y_end.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_y_range"), calc_polygon_y_range_args); + + y_start.store((y_start.load() + num_cores - core - 1) / num_cores * num_cores + core); // find_first_line_for_core + + SSAStack stack_left_index; + SSAStack stack_right_index; + SSAStack stack_int_y; + stack_left_index.store(0); + stack_right_index.store(1); + stack_int_y.store(y_start.load()); + SSAForBlock scanlines_loop; + scanlines_loop.loop_block(stack_int_y.load() < y_end.load()); + { + SSAInt int_y = stack_int_y.load(); + SSAFloat float_y = SSAFloat(int_y) + 0.5f; + + llvm::Value *update_polygon_edge_args0[] = { float_y.v, num_vertices.v, window_positions.v, stack_left_index.v, stack_right_index.v }; + llvm::Value *update_polygon_edge_args1[] = { float_y.v, num_vertices.v, window_positions.v, stack_right_index.v, stack_left_index.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args0); + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args1); + + SSAInt left_index = stack_left_index.load(); + SSAInt right_index = stack_right_index.load(); + SSAInt next_left_index = (left_index + 1) % num_vertices; + SSAInt next_right_index = (right_index + 1) % num_vertices; + + SSABarycentricWeight left_weight(viewport, fragment_ins[0][left_index].load(), fragment_ins[0][next_left_index].load()); + SSABarycentricWeight right_weight(viewport, fragment_ins[0][right_index].load(), fragment_ins[0][next_right_index].load()); + + SSAFloat a = left_weight.from_window_y(int_y); + SSAFloat b = right_weight.from_window_y(int_y); + + SSAVec4f left_clip_pos = left_weight.v1 * a + left_weight.v2 * (1.0f - a); + SSAVec4f right_clip_pos = right_weight.v1 * b + right_weight.v2 * (1.0f - b); + + for (size_t i = 0; i + 1 < fragment_ins.size(); i++) + { + left_line_varyings[i].store(fragment_ins[i + 1][left_index].load() * a + fragment_ins[i + 1][next_left_index].load() * (1.0f - a)); + right_line_varyings[i].store(fragment_ins[i + 1][right_index].load() * b + fragment_ins[i + 1][next_right_index].load() * (1.0f - b)); + } + + llvm::Value *render_scanline_args[] = { output_width.v, output_height.v, output_data.v, viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, int_y.v, left_clip_pos.v, right_clip_pos.v, left_line_varyings.v, right_line_varyings.v, input_width.v, input_height.v, input_data.v }; + SSAScope::builder().CreateCall(SSAScope::module()->getFunction("render_scanline"), render_scanline_args); + + stack_int_y.store(stack_int_y.load() + num_cores); + } + scanlines_loop.end_block(); + } + cull_if.end_block(); +} + +void GlslFixedFunction::codegen_render_scanline(int num_varyings) +{ + llvm::IRBuilder<> builder(program.context()); + SSAScope ssa_scope(&program.context(), program.module(), &builder); + + SSAFunction function("render_scanline"); + function.add_parameter(SSAInt::llvm_type()); // output_width + function.add_parameter(SSAInt::llvm_type()); // output_height + function.add_parameter(SSAUBytePtr::llvm_type()); // output_data + function.add_parameter(SSAInt::llvm_type()); // viewport_x + function.add_parameter(SSAInt::llvm_type()); // viewport_y + function.add_parameter(SSAInt::llvm_type()); // viewport_width + function.add_parameter(SSAInt::llvm_type()); // viewport_height + function.add_parameter(SSAInt::llvm_type()); // y + function.add_parameter(SSAVec4f::llvm_type()); // left_clip_pos + function.add_parameter(SSAVec4f::llvm_type()); // right_clip_pos + function.add_parameter(SSAVec4fPtr::llvm_type()); // left_line_varyings + function.add_parameter(SSAVec4fPtr::llvm_type()); // right_line_varyings + function.add_parameter(SSAInt::llvm_type()); // input_width + function.add_parameter(SSAInt::llvm_type()); // input_height + function.add_parameter(SSAUBytePtr::llvm_type()); // input_data + function.create_private(); + SSAInt output_width = function.parameter(0); + SSAInt output_height = function.parameter(1); + SSAUBytePtr output_data = function.parameter(2); + SSAInt viewport_x = function.parameter(3); + SSAInt viewport_y = function.parameter(4); + SSAInt viewport_width = function.parameter(5); + SSAInt viewport_height = function.parameter(6); + SSAInt y = function.parameter(7); + SSAVec4f left_clip_pos = function.parameter(8); + SSAVec4f right_clip_pos = function.parameter(9); + SSAVec4fPtr left_line_varyings = function.parameter(10); + SSAVec4fPtr right_line_varyings = function.parameter(11); + SSAInt input_width = function.parameter(12); + SSAInt input_height = function.parameter(13); + SSAUBytePtr input_data = function.parameter(14); + + SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); + + SSAScopeHint hint; + + SSAStack stack_x; + SSAStack stack_xnormalized; + + //////////////////////////////// + // Prepare to render scanline: + + hint.set("prepare"); + OuterData outer_data; + + SSAVec4f left_window_pos = viewport.clip_to_window(left_clip_pos); + SSAVec4f right_window_pos = viewport.clip_to_window(right_clip_pos); + + SSAFloat x0 = left_window_pos[0]; + SSAFloat x1 = right_window_pos[0]; + SSAInt start(ssa_min(x0, x1)); + SSAInt end(ssa_max(x1, x0) + 0.5f); + + start = ssa_max(start, viewport.x); + end = ssa_min(end, viewport.right); + + SSABarycentricWeight weight_scanline(viewport, left_clip_pos, right_clip_pos); + + outer_data.start = start; + outer_data.end = end; + outer_data.input_width = input_width; + outer_data.input_height = input_height; + outer_data.output_width = output_width; + outer_data.output_height = output_height; + outer_data.input_pixels = input_data; + outer_data.output_pixels_line = output_data[output_width * y * 4]; + + outer_data.viewport_x = SSAFloat(viewport.x); + outer_data.viewport_rcp_half_width = viewport.rcp_half_width; + outer_data.dx = weight_scanline.v2[0] - weight_scanline.v1[0]; + outer_data.dw = weight_scanline.v2[3] - weight_scanline.v1[3]; + outer_data.v1w = weight_scanline.v1[3]; + outer_data.v1x = weight_scanline.v1[0]; + outer_data.sse_left_varying_in = left_line_varyings; + outer_data.sse_right_varying_in = right_line_varyings; + outer_data.num_varyings = num_varyings; + + outer_data.sampler = SSAScope::alloca(get_sampler_struct(SSAScope::context())); + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + llvm::Value *sampler_width_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1)); + llvm::Value *sampler_height_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2)); + llvm::Value *sampler_data_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); + SSAScope::builder().CreateStore(outer_data.input_width.v, sampler_width_ptr, false); + SSAScope::builder().CreateStore(outer_data.input_height.v, sampler_height_ptr, false); + SSAScope::builder().CreateStore(outer_data.input_pixels.v, sampler_data_ptr, false); + + + SSAVec4i xposinit = SSAVec4i(outer_data.start) + SSAVec4i(0, 1, 2, 3); + stack_x.store(outer_data.start); + stack_xnormalized.store((SSAVec4f(xposinit) + 0.5f - outer_data.viewport_x) * outer_data.viewport_rcp_half_width - 1.0f); + + ///////////////////////////////////////////////////////////////////////// + // First pixels: + + hint.set("firstpixels"); + SSAIfBlock if_block; + if_block.if_block(outer_data.end - outer_data.start > 3); + process_first_pixels(outer_data, stack_x, stack_xnormalized); + if_block.end_block(); + + ///////////////////////////////////////////////////////////////////////// + // Start: for (SSAInt x = start; x < end; x += 4) + + hint.set("loopstart"); + + SSAForBlock for_block; + SSAInt x = stack_x.load(); + for_block.loop_block(x + 3 < outer_data.end); + + ///////////////////////////////////////////////////////////////////////// + // Loop body + { + SSAVec4f xnormalized = stack_xnormalized.load(); + + hint.set("blendload"); + SSAVec4i desti[4]; + SSAVec16ub dest_block = outer_data.output_pixels_line[x << 2].load_vec16ub(); + SSAVec4i::extend(dest_block, desti[0], desti[1], desti[2], desti[3]); + + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + + hint.set("blendstore"); + outer_data.output_pixels_line[x << 2].store_vec16ub(dest_block); + hint.clear(); + + xnormalized = xnormalized + 4.0f * outer_data.viewport_rcp_half_width; + stack_xnormalized.store(xnormalized); + } + ///////////////////////////////////////////////////////////////////////// + // End: for (SSAInt x = start; x < end; x += 4) + + hint.set("loopend"); + x = x + 4; + stack_x.store(x); + for_block.end_block(); + + ///////////////////////////////////////////////////////////////////////// + // Last pixels: + + hint.set("lastpixels"); + process_last_pixels(outer_data, stack_x, stack_xnormalized); + + builder.CreateRetVoid(); + llvm::verifyFunction(*function.func); +} + +void GlslFixedFunction::process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +{ + SSAInt x = stack_x.load(); + SSAVec4f xnormalized = stack_xnormalized.load(); + SSAInt offset = x << 2; + + // Find how many pixels we have left until we 16 byte align: + llvm::Value *output_line_align = SSAScope::builder().CreatePtrToInt(outer_data.output_pixels_line.v, llvm::Type::getInt32Ty(SSAScope::context())); + output_line_align = SSAScope::builder().CreateAdd(output_line_align, offset.v); + SSAInt left = 4 - (SSAInt::from_llvm(SSAScope::builder().CreateURem(output_line_align, SSAInt(16).v)) >> 2); + + SSAIfBlock if_block0; + if_block0.if_block(left == 3); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + outer_data.output_pixels_line[offset + 8].load_vec4ub(), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); + + stack_x.store(x + 3); + stack_xnormalized.store(xnormalized + 3.0f * outer_data.viewport_rcp_half_width); + } + if_block0.else_block(); + { + SSAIfBlock if_block1; + if_block1.if_block(left == 2); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + + stack_x.store(x + 2); + stack_xnormalized.store(xnormalized + 2.0f * outer_data.viewport_rcp_half_width); + } + if_block1.else_block(); + { + SSAIfBlock if_block2; + if_block2.if_block(left == 1); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + + stack_x.store(x + 1); + stack_xnormalized.store(xnormalized + outer_data.viewport_rcp_half_width); + } + if_block2.end_block(); + } + if_block1.end_block(); + } + if_block0.end_block(); +} + +void GlslFixedFunction::process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +{ + SSAInt x = stack_x.load(); + SSAVec4f xnormalized = stack_xnormalized.load(); + + SSAInt left = outer_data.end - x; + SSAInt offset = x << 2; + SSAIfBlock if_block0; + SSAIfBlock if_block1; + SSAIfBlock if_block2; + if_block0.if_block(left == 3); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + outer_data.output_pixels_line[offset + 8].load_vec4ub(), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); + } + if_block0.else_block(); + if_block1.if_block(left == 2); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + outer_data.output_pixels_line[offset + 4].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); + } + if_block1.else_block(); + if_block2.if_block(left == 1); + { + SSAVec4i dest[4] = + { + outer_data.output_pixels_line[offset].load_vec4ub(), + SSAVec4i(0), + SSAVec4i(0), + SSAVec4i(0) + }; + + // To do: do this in a less braindead way + SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); + SSAVec4f frag_colors[4]; + inner_block(outer_data, xnormalized, frag_colors); + blend(frag_colors, dest_block); + SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); + + outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); + } + if_block2.end_block(); + if_block1.end_block(); + if_block0.end_block(); +} + +void GlslFixedFunction::inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *frag_color) +{ + SSAScopeHint hint; + hint.set("varying"); + SSAVec4f a = (xnormalized * data.v1w - data.v1x) * SSAVec4f::rcp(data.dx - xnormalized * data.dw); + SSAVec4f one_minus_a = 1.0f - a; + + llvm::Value *globals_ptr[4]; + for (int i = 0; i < 4; i++) + { + globals_ptr[i] = SSAScope::alloca(fragment_codegen.get_global_struct_type()); + + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + llvm::Value *sampler_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + SSAScope::builder().CreateStore(data.sampler, sampler_ptr, false); + + for (int j = 0; j < data.num_varyings; j++) + { + SSAVec4f field_value = + data.sse_left_varying_in[j].load() * SSAVec4f::shuffle(one_minus_a, i, i, i, i) + + data.sse_right_varying_in[j].load() * SSAVec4f::shuffle(a, i, i, i, i); + index_list.clear(); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)j+1))); + llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + SSAScope::builder().CreateStore(field_value.v, field_ptr, false); + } + } + + hint.set("fragprogram"); + for (int i = 0; i < 4; i++) + { + SSAScope::builder().CreateCall(SSAScope::module()->getFunction((fragment_codegen.shader_prefix() + "main").c_str()), globals_ptr[i]); + + std::vector index_list; + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)5))); + llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); + frag_color[i] = SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(field_ptr, false)); + } +} +/* +void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) +{ + SSAVec4i desti[4]; + SSAVec4i::extend(dest, desti[0], desti[1], desti[2], desti[3]); + + // Pre-mulitiplied alpha blend: + for (int pixel_index = 0; pixel_index < 4; pixel_index++) + { + SSAVec4f src = SSAVec4f::shuffle(frag_color[pixel_index], 2, 1, 0, 3); + desti[pixel_index] = SSAVec4i(src * 255.0f); + SSAVec4f dest = SSAVec4f(desti[pixel_index]) * (1.0f / 255.0f); + SSAVec4f alpha = SSAVec4f::shuffle(dest, 3, 3, 3, 3); + SSAVec4f resultf = src + dest * (1.0f - alpha); + desti[pixel_index] = SSAVec4i(resultf * 255.0f); + } + + dest = SSAVec16ub(SSAVec8s(desti[0], desti[1]), SSAVec8s(desti[2], desti[3])); +} +*/ +void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) +{ + for (int i = 0; i < 4; i++) + frag_color[i] = SSAVec4f::shuffle(frag_color[i], 2, 1, 0, 3); + + // Pre-mulitiplied alpha blend: + SSAVec8s dest0 = SSAVec8s::extendlo(dest); + SSAVec8s dest1 = SSAVec8s::extendhi(dest); + + SSAVec8s src0(SSAVec4i(frag_color[0] * 255.0f), SSAVec4i(frag_color[1] * 255.0f)); + SSAVec8s src1(SSAVec4i(frag_color[2] * 255.0f), SSAVec4i(frag_color[3] * 255.0f)); + + // Extract and duplicate alpha components: + SSAVec8s alpha0 = SSAVec8s::shuffle(src0, 3, 3, 3, 3, 7, 7, 7, 7); + SSAVec8s alpha1 = SSAVec8s::shuffle(src1, 3, 3, 3, 3, 7, 7, 7, 7); + + // Convert from 0-255 to 0-256 range: + alpha0 = SSAVec8s::max_sse2(alpha0, 255); + alpha1 = SSAVec8s::max_sse2(alpha1, 255); + alpha0 = alpha0 + (alpha0 >> 7); + alpha1 = alpha1 + (alpha1 >> 7); + + SSAVec8s result0 = src0 + ((dest0 * (256 - alpha0)) >> 8); + SSAVec8s result1 = src1 + ((dest1 * (256 - alpha1)) >> 8); + + dest = SSAVec16ub(result0, result1); +} + +#endif diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h new file mode 100644 index 000000000..4c81fc108 --- /dev/null +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -0,0 +1,130 @@ + +#pragma once + +#include "r_compiler/ssa/ssa_vec4f.h" +#include "r_compiler/ssa/ssa_vec4i.h" +#include "r_compiler/ssa/ssa_vec8s.h" +#include "r_compiler/ssa/ssa_vec16ub.h" +#include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_ubyte_ptr.h" +#include "r_compiler/ssa/ssa_vec4f_ptr.h" +#include "r_compiler/ssa/ssa_vec4i_ptr.h" +#include "r_compiler/ssa/ssa_pixels.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" +#include "r_compiler/llvm_include.h" + +class RenderProgram +{ +public: + RenderProgram(); + ~RenderProgram(); + + template + Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule; } + llvm::ExecutionEngine *engine() { return mEngine.get(); } + +private: + void *PointerToFunction(const char *name); + + std::unique_ptr mContext; + llvm::Module *mModule; + std::unique_ptr mEngine; +}; + +class FixedFunction +{ +public: + FixedFunction(); + + void(*DrawSpan)(int, uint32_t *) = nullptr; + +private: + void CodegenDrawSpan(); + + RenderProgram mProgram; +}; + +#if 0 + +class GlslProgram; +class GlslCodeGen; + +class GlslFixedFunction +{ +public: + GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen); + void codegen(); + static llvm::Type *get_sampler_struct(llvm::LLVMContext &context); + +private: + void codegen_draw_triangles(int num_vertex_in, int num_vertex_out); + void codegen_calc_window_positions(); + void codegen_calc_polygon_face_direction(); + void codegen_calc_polygon_y_range(); + void codegen_update_polygon_edge(); + void codegen_texture(); + void codegen_normalize(); + void codegen_reflect(); + void codegen_max(); + void codegen_pow(); + void codegen_dot(); + void codegen_mix(); + + struct OuterData + { + OuterData() : sampler() { } + + SSAInt start; + SSAInt end; + SSAInt input_width; + SSAInt input_height; + SSAInt output_width; + SSAInt output_height; + SSAUBytePtr input_pixels; + SSAUBytePtr output_pixels_line; + + SSAVec4fPtr sse_left_varying_in; + SSAVec4fPtr sse_right_varying_in; + int num_varyings; + SSAVec4f viewport_x; + SSAVec4f viewport_rcp_half_width; + SSAVec4f dx; + SSAVec4f dw; + SSAVec4f v1w; + SSAVec4f v1x; + + llvm::Value *sampler; + }; + + void render_polygon( + SSAInt input_width, + SSAInt input_height, + SSAUBytePtr input_data, + SSAInt output_width, + SSAInt output_height, + SSAUBytePtr output_data, + SSAInt viewport_x, + SSAInt viewport_y, + SSAInt viewport_width, + SSAInt viewport_height, + SSAInt num_vertices, + std::vector fragment_ins, + SSAInt core, + SSAInt num_cores); + + void codegen_render_scanline(int num_varyings); + void process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); + void process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); + void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors); + void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest); + + GlslProgram &program; + GlslCodeGen &vertex_codegen; + GlslCodeGen &fragment_codegen; +}; + +#endif diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h new file mode 100644 index 000000000..1eed549e1 --- /dev/null +++ b/src/r_compiler/llvm_include.h @@ -0,0 +1,46 @@ + +#pragma once + +#if defined(min) +#define llvm_min_bug min +#undef min +#endif +#if defined(max) +#define llvm_max_bug max +#undef max +#endif + +#pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned +#pragma warning(disable: 4624) // warning C4624: 'llvm::AugmentedUse' : destructor could not be generated because a base class destructor is inaccessible +#pragma warning(disable: 4355) // warning C4355: 'this' : used in base member initializer list +#pragma warning(disable: 4800) // warning C4800: 'const unsigned int' : forcing value to bool 'true' or 'false' (performance warning) +#pragma warning(disable: 4996) // warning C4996: 'std::_Copy_impl': Function call with parameters that may be unsafe - this call relies on the caller to check that the passed values are correct. To disable this warning, use -D_Sclan::SECURE_NO_WARNINGS. See documentation on how to use Visual C++ 'Checked Iterators' +#pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data +#pragma warning(disable: 4141) // warning C4141: 'inline': used more than once +#pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(llvm_min_bug) +#define min llvm_min_bug +#undef llvm_min_bug +#endif +#if defined(llvm_max_bug) +#define max llvm_max_bug +#undef llvm_max_bug +#endif diff --git a/src/r_compiler/ssa/ssa_barycentric_weight.h b/src/r_compiler/ssa/ssa_barycentric_weight.h new file mode 100644 index 000000000..52117ccc6 --- /dev/null +++ b/src/r_compiler/ssa/ssa_barycentric_weight.h @@ -0,0 +1,97 @@ + +#pragma once + +#include "ssa_vec4f.h" +#include "ssa_float.h" +#include "ssa_int.h" + +class SSAViewport +{ +public: + SSAViewport(SSAInt x, SSAInt y, SSAInt width, SSAInt height) + : x(x), y(y), width(width), height(height), right(x + width), bottom(y + height), + half_width(SSAFloat(width) * 0.5f), half_height(SSAFloat(height) * 0.5f), + rcp_half_width(1.0f / (SSAFloat(width) * 0.5f)), + rcp_half_height(1.0f / (SSAFloat(height) * 0.5f)) + { + } + + SSAInt x, y; + SSAInt width, height; + SSAInt right, bottom; + SSAFloat half_width; + SSAFloat half_height; + SSAFloat rcp_half_width; + SSAFloat rcp_half_height; + + SSAVec4f clip_to_window(SSAVec4f clip) const + { + SSAFloat w = clip[3]; + SSAVec4f normalized = SSAVec4f::insert_element(clip / SSAVec4f::shuffle(clip, 3, 3, 3, 3), w, 3); + return normalized_to_window(normalized); + } + + SSAVec4f normalized_to_window(SSAVec4f normalized) const + { + return SSAVec4f( + SSAFloat(x) + (normalized[0] + 1.0f) * half_width, + SSAFloat(y) + (normalized[1] + 1.0f) * half_height, + 0.0f - normalized[2], + normalized[3]); + } +}; + +class SSABarycentricWeight +{ +public: + SSABarycentricWeight(SSAViewport vp, SSAVec4f v1, SSAVec4f v2); + SSAFloat from_window_x(SSAInt x) const; + SSAFloat from_window_y(SSAInt y) const; + + SSAViewport viewport; + SSAVec4f v1; + SSAVec4f v2; +}; + +inline SSABarycentricWeight::SSABarycentricWeight(SSAViewport viewport, SSAVec4f v1, SSAVec4f v2) +: viewport(viewport), v1(v1), v2(v2) +{ +} + +inline SSAFloat SSABarycentricWeight::from_window_x(SSAInt x) const +{ +/* SSAFloat xnormalized = (x + 0.5f - viewport.x) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2.x-v1.x; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.x - xnormalized * v2.w) / (dx - xnormalized * dw); + return a;*/ + + SSAFloat xnormalized = (SSAFloat(x) + 0.5f - SSAFloat(viewport.x)) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2[0]-v1[0]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (xnormalized * v1[3] - v1[0]) / (dx - xnormalized * dw); + return 1.0f - t; +} + +inline SSAFloat SSABarycentricWeight::from_window_y(SSAInt y) const +{ +/* SSAFloat ynormalized = (y + 0.5f - viewport.y) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2.y-v1.y; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.y - ynormalized * v2.w) / (dy - ynormalized * dw); + return a;*/ + + SSAFloat ynormalized = (SSAFloat(y) + 0.5f - SSAFloat(viewport.y)) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2[1]-v1[1]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (ynormalized * v1[3] - v1[1]) / (dy - ynormalized * dw); + return 1.0f - t; +} + +/* + y = (v1.y + t * dy) / (v1.w + t * dw) + + y * v1.w + y * t * dw = v1.y + t * dy + y * v1.w - v1.y = t * (dy - y * dw) + t = (y * v1.w - v1.y) / (dy - y * dw) +*/ diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp new file mode 100644 index 000000000..101323911 --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -0,0 +1,91 @@ + +#include "ssa_bool.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSABool::SSABool() +: v(0) +{ +} +/* +SSABool::SSABool(bool constant) +: v(0) +{ +} +*/ +SSABool::SSABool(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSABool::llvm_type() +{ + return llvm::Type::getInt1Ty(SSAScope::context()); +} + +SSABool operator&&(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSABool operator||(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} + +SSABool operator!(const SSABool &a) +{ + return SSABool::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint())); +} + +SSABool operator<(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint())); +} + +///////////////////////////////////////////////////////////////////////////// + +SSABool operator<(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGT(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h new file mode 100644 index 000000000..2ef79e49b --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.h @@ -0,0 +1,37 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSABool +{ +public: + SSABool(); + //SSABool(bool constant); + explicit SSABool(llvm::Value *v); + static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSABool operator&&(const SSABool &a, const SSABool &b); +SSABool operator||(const SSABool &a, const SSABool &b); + +SSABool operator!(const SSABool &a); + +SSABool operator<(const SSAInt &a, const SSAInt &b); +SSABool operator<=(const SSAInt &a, const SSAInt &b); +SSABool operator==(const SSAInt &a, const SSAInt &b); +SSABool operator>=(const SSAInt &a, const SSAInt &b); +SSABool operator>(const SSAInt &a, const SSAInt &b); + +SSABool operator<(const SSAFloat &a, const SSAFloat &b); +SSABool operator<=(const SSAFloat &a, const SSAFloat &b); +SSABool operator==(const SSAFloat &a, const SSAFloat &b); +SSABool operator>=(const SSAFloat &a, const SSAFloat &b); +SSABool operator>(const SSAFloat &a, const SSAFloat &b); diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp new file mode 100644 index 000000000..87488af74 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -0,0 +1,152 @@ + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAFloat::SSAFloat() +: v(0) +{ +} + +SSAFloat::SSAFloat(float constant) +: v(0) +{ + v = llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant)); +} + +SSAFloat::SSAFloat(SSAInt i) +: v(0) +{ + v = SSAScope::builder().CreateSIToFP(i.v, llvm::Type::getFloatTy(SSAScope::context()), SSAScope::hint()); +} + +SSAFloat::SSAFloat(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloat::llvm_type() +{ + return llvm::Type::getFloatTy(SSAScope::context()); +} + +SSAFloat SSAFloat::sqrt(SSAFloat f) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::sin(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::cos(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::pow(SSAFloat val, SSAFloat power) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAFloat SSAFloat::exp(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::log(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator+(float a, const SSAFloat &b) +{ + return SSAFloat(a) + b; +} + +SSAFloat operator-(float a, const SSAFloat &b) +{ + return SSAFloat(a) - b; +} + +SSAFloat operator*(float a, const SSAFloat &b) +{ + return SSAFloat(a) * b; +} + +SSAFloat operator/(float a, const SSAFloat &b) +{ + return SSAFloat(a) / b; +} + +SSAFloat operator+(const SSAFloat &a, float b) +{ + return a + SSAFloat(b); +} + +SSAFloat operator-(const SSAFloat &a, float b) +{ + return a - SSAFloat(b); +} + +SSAFloat operator*(const SSAFloat &a, float b) +{ + return a * SSAFloat(b); +} + +SSAFloat operator/(const SSAFloat &a, float b) +{ + return a / SSAFloat(b); +} + diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h new file mode 100644 index 000000000..2349ab877 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAInt; + +class SSAFloat +{ +public: + SSAFloat(); + SSAFloat(SSAInt i); + SSAFloat(float constant); + explicit SSAFloat(llvm::Value *v); + static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } + static llvm::Type *llvm_type(); + static SSAFloat sqrt(SSAFloat f); + static SSAFloat sin(SSAFloat val); + static SSAFloat cos(SSAFloat val); + static SSAFloat pow(SSAFloat val, SSAFloat power); + static SSAFloat exp(SSAFloat val); + static SSAFloat log(SSAFloat val); + static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c); + + llvm::Value *v; +}; + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b); + +SSAFloat operator+(float a, const SSAFloat &b); +SSAFloat operator-(float a, const SSAFloat &b); +SSAFloat operator*(float a, const SSAFloat &b); +SSAFloat operator/(float a, const SSAFloat &b); + +SSAFloat operator+(const SSAFloat &a, float b); +SSAFloat operator-(const SSAFloat &a, float b); +SSAFloat operator*(const SSAFloat &a, float b); +SSAFloat operator/(const SSAFloat &a, float b); diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp new file mode 100644 index 000000000..4413c6e92 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -0,0 +1,65 @@ + +#include "ssa_float_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAFloatPtr::SSAFloatPtr() +: v(0) +{ +} + +SSAFloatPtr::SSAFloatPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloatPtr::llvm_type() +{ + return llvm::Type::getFloatPtrTy(SSAScope::context()); +} + +SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const +{ + return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAFloat SSAFloatPtr::load() const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); + // return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(get_intrinsic(llvm::Intrinsic::x86_sse2_loadu_dq), SSAScope::builder().CreateBitCast(v, llvm::PointerType::getUnqual(llvm::IntegerType::get(SSAScope::context(), 8))))); +} + +void SSAFloatPtr::store(const SSAFloat &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16); +} + +void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h new file mode 100644 index 000000000..a4318e027 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -0,0 +1,27 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloatPtr +{ +public: + SSAFloatPtr(); + explicit SSAFloatPtr(llvm::Value *v); + static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); } + static llvm::Type *llvm_type(); + SSAFloatPtr operator[](SSAInt index) const; + SSAFloat load() const; + SSAVec4f load_vec4f() const; + SSAVec4f load_unaligned_vec4f() const; + void store(const SSAFloat &new_value); + void store_vec4f(const SSAVec4f &new_value); + void store_unaligned_vec4f(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp new file mode 100644 index 000000000..ce9328607 --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -0,0 +1,25 @@ + +#include "ssa_for_block.h" +#include "ssa_scope.h" + +SSAForBlock::SSAForBlock() +: if_basic_block(0), loop_basic_block(0), end_basic_block(0) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forbegin", SSAScope::builder().GetInsertBlock()->getParent()); + loop_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forloop", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forend", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAForBlock::loop_block(SSABool true_condition) +{ + SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); + SSAScope::builder().SetInsertPoint(loop_basic_block); +} + +void SSAForBlock::end_block() +{ + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h new file mode 100644 index 000000000..58803dee5 --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -0,0 +1,18 @@ + +#pragma once + +#include "ssa_bool.h" +#include "r_compiler/llvm_include.h" + +class SSAForBlock +{ +public: + SSAForBlock(); + void loop_block(SSABool true_condition); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *loop_basic_block; + llvm::BasicBlock *end_basic_block; +}; diff --git a/src/r_compiler/ssa/ssa_function.cpp b/src/r_compiler/ssa/ssa_function.cpp new file mode 100644 index 000000000..aee4de5a9 --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.cpp @@ -0,0 +1,55 @@ + +#include "ssa_function.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "ssa_value.h" +#include "r_compiler/llvm_include.h" + +SSAFunction::SSAFunction(const std::string name) +: name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func() +{ +} + +void SSAFunction::set_return_type(llvm::Type *type) +{ + return_type = type; +} + +void SSAFunction::add_parameter(llvm::Type *type) +{ + parameters.push_back(type); +} + +void SSAFunction::create_public() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name.c_str(), SSAScope::module()); + //func->setCallingConv(llvm::CallingConv::X86_StdCall); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +void SSAFunction::create_private() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::PrivateLinkage, name.c_str(), SSAScope::module()); + func->addFnAttr(llvm::Attribute::AlwaysInline); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +SSAValue SSAFunction::parameter(int index) +{ + llvm::Function::arg_iterator arg_it = func->arg_begin(); + for (int i = 0; i < index; i++) + ++arg_it; + return SSAValue::from_llvm(static_cast(arg_it)); +} diff --git a/src/r_compiler/ssa/ssa_function.h b/src/r_compiler/ssa/ssa_function.h new file mode 100644 index 000000000..f1969c35b --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.h @@ -0,0 +1,30 @@ + +#pragma once + +#include +#include + +namespace llvm { class Value; } +namespace llvm { class Type; } +namespace llvm { class Function; } + +class SSAInt; +class SSAValue; + +class SSAFunction +{ +public: + SSAFunction(const std::string name); + void set_return_type(llvm::Type *type); + void add_parameter(llvm::Type *type); + void create_public(); + void create_private(); + SSAValue parameter(int index); + + llvm::Function *func; + +private: + std::string name; + llvm::Type *return_type; + std::vector parameters; +}; diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp new file mode 100644 index 000000000..e2de9ecad --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -0,0 +1,30 @@ + +#include "ssa_if_block.h" +#include "ssa_scope.h" + +SSAIfBlock::SSAIfBlock() +: if_basic_block(0), else_basic_block(0), end_basic_block(0) +{ +} + +void SSAIfBlock::if_block(SSABool true_condition) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "if", SSAScope::builder().GetInsertBlock()->getParent()); + else_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "else", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = else_basic_block; + SSAScope::builder().CreateCondBr(true_condition.v, if_basic_block, else_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAIfBlock::else_block() +{ + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "end", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(else_basic_block); +} + +void SSAIfBlock::end_block() +{ + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h new file mode 100644 index 000000000..98c534a86 --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -0,0 +1,46 @@ + +#pragma once + +#include "ssa_bool.h" +#include "ssa_phi.h" +#include "r_compiler/llvm_include.h" + +class SSAIfBlock +{ +public: + SSAIfBlock(); + void if_block(SSABool true_condition); + void else_block(); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *else_basic_block; + llvm::BasicBlock *end_basic_block; +}; + +template +T ssa_min(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a <= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} + +template +T ssa_max(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a >= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp new file mode 100644 index 000000000..9f3c54f50 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -0,0 +1,117 @@ + +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAInt::SSAInt() +: v(0) +{ +} + +SSAInt::SSAInt(int constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)); +} + +SSAInt::SSAInt(SSAFloat f) +: v(0) +{ + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); +} + +SSAInt::SSAInt(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAInt::llvm_type() +{ + return llvm::Type::getInt32Ty(SSAScope::context()); +} + +SSAInt operator+(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator-(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator*(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator/(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator%(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator+(int a, const SSAInt &b) +{ + return SSAInt(a) + b; +} + +SSAInt operator-(int a, const SSAInt &b) +{ + return SSAInt(a) - b; +} + +SSAInt operator*(int a, const SSAInt &b) +{ + return SSAInt(a) * b; +} + +SSAInt operator/(int a, const SSAInt &b) +{ + return SSAInt(a) / b; +} + +SSAInt operator%(int a, const SSAInt &b) +{ + return SSAInt(a) % b; +} + +SSAInt operator+(const SSAInt &a, int b) +{ + return a + SSAInt(b); +} + +SSAInt operator-(const SSAInt &a, int b) +{ + return a - SSAInt(b); +} + +SSAInt operator*(const SSAInt &a, int b) +{ + return a * SSAInt(b); +} + +SSAInt operator/(const SSAInt &a, int b) +{ + return a / SSAInt(b); +} + +SSAInt operator%(const SSAInt &a, int b) +{ + return a % SSAInt(b); +} + +SSAInt operator<<(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); +} + +SSAInt operator>>(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h new file mode 100644 index 000000000..0be37ee7e --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.h @@ -0,0 +1,41 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloat; + +class SSAInt +{ +public: + SSAInt(); + SSAInt(int constant); + SSAInt(SSAFloat f); + explicit SSAInt(llvm::Value *v); + static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAInt operator+(const SSAInt &a, const SSAInt &b); +SSAInt operator-(const SSAInt &a, const SSAInt &b); +SSAInt operator*(const SSAInt &a, const SSAInt &b); +SSAInt operator/(const SSAInt &a, const SSAInt &b); +SSAInt operator%(const SSAInt &a, const SSAInt &b); + +SSAInt operator+(int a, const SSAInt &b); +SSAInt operator-(int a, const SSAInt &b); +SSAInt operator*(int a, const SSAInt &b); +SSAInt operator/(int a, const SSAInt &b); +SSAInt operator%(int a, const SSAInt &b); + +SSAInt operator+(const SSAInt &a, int b); +SSAInt operator-(const SSAInt &a, int b); +SSAInt operator*(const SSAInt &a, int b); +SSAInt operator/(const SSAInt &a, int b); +SSAInt operator%(const SSAInt &a, int b); + +SSAInt operator<<(const SSAInt &a, int bits); +SSAInt operator>>(const SSAInt &a, int bits); diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp new file mode 100644 index 000000000..dd0ca17f6 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -0,0 +1,58 @@ + +#include "ssa_int_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAIntPtr::SSAIntPtr() +: v(0) +{ +} + +SSAIntPtr::SSAIntPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAIntPtr::llvm_type() +{ + return llvm::Type::getInt32PtrTy(SSAScope::context()); +} + +SSAIntPtr SSAIntPtr::operator[](SSAInt index) const +{ + return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAInt SSAIntPtr::load() const +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_unaligned_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAIntPtr::store(const SSAInt &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16); +} + +void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h new file mode 100644 index 000000000..20e024a31 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -0,0 +1,27 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAIntPtr +{ +public: + SSAIntPtr(); + explicit SSAIntPtr(llvm::Value *v); + static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); } + static llvm::Type *llvm_type(); + SSAIntPtr operator[](SSAInt index) const; + SSAInt load() const; + SSAVec4i load_vec4i() const; + SSAVec4i load_unaligned_vec4i() const; + void store(const SSAInt &new_value); + void store_vec4i(const SSAVec4i &new_value); + void store_unaligned_vec4i(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_phi.h b/src/r_compiler/ssa/ssa_phi.h new file mode 100644 index 000000000..89cbc8cf0 --- /dev/null +++ b/src/r_compiler/ssa/ssa_phi.h @@ -0,0 +1,33 @@ + +#pragma once + +#include "ssa_scope.h" + +class SSAIfBlock; + +template +class SSAPhi +{ +public: + void add_incoming(SSAVariable var) + { + incoming.push_back(Incoming(var.v, SSAScope::builder().GetInsertBlock())); + } + + SSAVariable create() + { + llvm::PHINode *phi_node = SSAScope::builder().CreatePHI(SSAVariable::llvm_type(), (unsigned int)incoming.size(), SSAScope::hint()); + for (size_t i = 0; i < incoming.size(); i++) + phi_node->addIncoming(incoming[i].v, incoming[i].bb); + return SSAVariable::from_llvm(phi_node); + } + +private: + struct Incoming + { + Incoming(llvm::Value *v, llvm::BasicBlock *bb) : v(v), bb(bb) { } + llvm::Value *v; + llvm::BasicBlock *bb; + }; + std::vector incoming; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4f.h b/src/r_compiler/ssa/ssa_pixelformat4f.h new file mode 100644 index 000000000..507e95b5d --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4f.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float_ptr.h" + +class SSAPixelFormat4f +{ +public: + SSAPixelFormat4f() { } + SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAFloatPtr pixels() { return _pixels; } + SSAFloatPtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return _pixels[index * 4].load_vec4f(); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4f(pixel); + } + +protected: + SSAFloatPtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub.h b/src/r_compiler/ssa/ssa_pixelformat4ub.h new file mode 100644 index 000000000..fdf98c4aa --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub +{ +public: + SSAPixelFormat4ub() { } + SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f)); + } + +private: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h new file mode 100644 index 000000000..4601eeb3c --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h @@ -0,0 +1,35 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_argb_rev +{ +public: + SSAPixelFormat4ub_argb_rev() { } + SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } +/* + void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const + { + SSAVec8s p = _pixels[index * 4].load_vec8s(); + out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + } +*/ + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h new file mode 100644 index 000000000..402480c49 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_rev +{ +public: + SSAPixelFormat4ub_rev() { } + SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixels.h b/src/r_compiler/ssa/ssa_pixels.h new file mode 100644 index 000000000..a4209d439 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixels.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_ubyte_ptr.h" +#include "ssa_float.h" +#include "ssa_float_ptr.h" +#include "ssa_int.h" +#include "ssa_pixeltype.h" +//#include "ssa_pixelformat1f.h" +//#include "ssa_pixelformat2f.h" +//#include "ssa_pixelformat3f.h" +#include "ssa_pixelformat4f.h" +//#include "ssa_pixelformat1ub.h" +//#include "ssa_pixelformat2ub.h" +//#include "ssa_pixelformat3ub.h" +//#include "ssa_pixelformat3ub_rev.h" +#include "ssa_pixelformat4ub.h" +//#include "ssa_pixelformat4ub_argb.h" +#include "ssa_pixelformat4ub_rev.h" +#include "ssa_pixelformat4ub_argb_rev.h" +//#include "ssa_pixelformat4ub_channel.h" + +//typedef SSAPixelType SSAPixels1f; +//typedef SSAPixelType SSAPixels2f; +//typedef SSAPixelType SSAPixels3f; +typedef SSAPixelType SSAPixels4f; + +//typedef SSAPixelType SSAPixels1ub; +//typedef SSAPixelType SSAPixels2ub; +//typedef SSAPixelType SSAPixels3ub; +typedef SSAPixelType SSAPixels4ub; +//typedef SSAPixelType SSAPixels4ub_argb; + +//typedef SSAPixelType SSAPixels3ub_rev; +typedef SSAPixelType SSAPixels4ub_rev; +typedef SSAPixelType SSAPixels4ub_argb_rev; + +//typedef SSAPixelType SSAPixels4ub_channel; diff --git a/src/r_compiler/ssa/ssa_pixeltype.h b/src/r_compiler/ssa/ssa_pixeltype.h new file mode 100644 index 000000000..8614f171d --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixeltype.h @@ -0,0 +1,498 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_vec4f.h" +#include "ssa_bool.h" +#include "ssa_if_block.h" +#include "ssa_phi.h" + +template +class SSAPixelType : public PixelFormat +{ +public: + SSAPixelType() + { + } + + SSAPixelType(SSAInt width, SSAInt height, PixelType pixels) + : PixelFormat(pixels, width, height), _width(width), _height(height) + { + _width32 = SSAVec4i(_width); + SSAVec4i height32(_height); + _widthps = SSAVec4f(_width32); + _heightps = SSAVec4f(height32); + _width16 = SSAVec8s(_width32, _width32); + + _widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4); + _widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4); + } + + SSAInt width() const { return _width; } + SSAInt height() const { return _height; } + SSAInt size() const { return _width * _height; } + + SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; } + SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; } + //void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); } + //void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); } + + SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); } + SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); } + SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); } + SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); } + SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); } + SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); } + SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); } + SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); } + + static SSAInt int_min(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a <= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt int_max(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a >= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt clamp(SSAInt v, SSAInt size) + { + return int_max(int_min(v, size - 1), 0); + } + + static SSAInt repeat(SSAInt v, SSAInt size) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create(); + } + + static SSAInt mirror(SSAInt v, SSAInt size) + { + SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create(); + } + + static SSAInt round(SSAFloat v) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0.0f); + phi.add_incoming(v + 0.5f); + branch.else_block(); + phi.add_incoming(v - 0.5f); + branch.end_block(); + return SSAInt(phi.create()); + } + + // To do: fix this: + static SSAInt int_floor(SSAFloat v) + { + return SSAInt(v); + } + static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); } + + SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); } + void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); } + + SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); } + SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); } + SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); } + + SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const + { + SSAFloat a = fract(s * SSAFloat(_width) - 0.5f); + SSAFloat b = fract(t * SSAFloat(_height) - 0.5f); + SSAFloat inv_a = 1.0f - a; + SSAFloat inv_b = 1.0f - b; + return + samples[0] * (inv_a * inv_b) + + samples[1] * (a * inv_b) + + samples[2] * (inv_a * b) + + samples[3] * (a * b); + } + + void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_clamp4f(x, y); + out_pixels[1] = get_clamp4f(x + 1, y); + out_pixels[2] = get_clamp4f(x, y + 1); + out_pixels[3] = get_clamp4f(x + 1, y + 1); + /* + SSAInt x0 = clamp_x(x); + SSAInt x1 = clamp_x(x + 1); + SSAInt y0 = clamp_y(y); + SSAInt y1 = clamp_y(y + 1); + SSAInt offset0 = y0 * _width; + SSAInt offset1 = y1 * _width; + SSAPhi phi0; + SSAPhi phi1; + SSAPhi phi2; + SSAPhi phi3; + SSAIfBlock if0; + if0.if_block(x0 + 1 == x1); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.else_block(); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.end_block(); + out_pixels[0] = phi0.create(); + out_pixels[1] = phi1.create(); + out_pixels[2] = phi2.create(); + out_pixels[3] = phi3.create(); + */ + } + + void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_repeat4f(x, y); + out_pixels[1] = get_repeat4f(x + 1, y); + out_pixels[2] = get_repeat4f(x, y + 1); + out_pixels[3] = get_repeat4f(x + 1, y + 1); + } + + void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_mirror4f(x, y); + out_pixels[1] = get_mirror4f(x + 1, y); + out_pixels[2] = get_mirror4f(x, y + 1); + out_pixels[3] = get_mirror4f(x + 1, y + 1); + } + + SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); } + + SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_clamp4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + ///////////////////////////////////////////////////////////////////////// + // Packed versions: + + SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); } + SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); } + SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); } + SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); } + SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); } + SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); } + SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); } + SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); } + + static SSAVec4i clamp(SSAVec4i v, SSAInt size) + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0); + } + + static SSAVec4i repeat(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i mirror(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i round(SSAVec4f v) + { + // Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs) + SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000); + SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f))); + return v + signed_half; + } + + static SSAVec4i int_floor(SSAVec4f v) + { + return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31); + } + + static SSAVec4f fract(SSAVec4f v) + { + // return v - SSAVec4f::floor_sse4(v); + return v - SSAVec4f(int_floor(v)); + } + + template + SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x; + return PixelFormat::get4f(offset[index]); + } + + SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + template + void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i x0 = wrap_x(x); + SSAVec4i x1 = wrap_x(x + 1); + SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi); + SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi); + SSAVec4i offset0 = x0 + line0; + SSAVec4i offset1 = x1 + line0; + SSAVec4i offset2 = x0 + line1; + SSAVec4i offset3 = x1 + line1; + out_pixels[0] = PixelFormat::get4f(offset0[index]); + out_pixels[1] = PixelFormat::get4f(offset1[index]); + out_pixels[2] = PixelFormat::get4f(offset2[index]); + out_pixels[3] = PixelFormat::get4f(offset3[index]); + } + + void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const + { + SSAVec4f a = fract(s * _widthps - 0.5f); + SSAVec4f b = fract(t * _heightps - 0.5f); + SSAVec4f inv_a = 1.0f - a; + SSAVec4f inv_b = 1.0f - b; + return + samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) + + samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) + + samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) + + samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index); + } + + ///////////////////////////////////////////////////////////////////////// + + SSAVec4i clamp(SSAVec4i sstt) const + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0); + } + + template + void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1)); + SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7); + SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3); + out_pixels[0] = PixelFormat::get4f(offsets[0]); + out_pixels[1] = PixelFormat::get4f(offsets[1]); + out_pixels[2] = PixelFormat::get4f(offsets[2]); + out_pixels[3] = PixelFormat::get4f(offsets[3]); + } + + void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const + { + struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; }; + return gather_helper4f(st, out_pixels, Wrap(this)); + } + + SSAVec4f linear_clamp4f(SSAVec4f st) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(st, samples); + return linear_interpolate4f(st, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f); + SSAVec4f inv_aabb = 1.0f - aabb; + SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6); + SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0); + return + samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) + + samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) + + samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) + + samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0); + } + +public: + SSAInt _width; + SSAInt _height; + SSAVec4i _width32; + SSAVec8s _width16; + SSAVec4f _widthps; + SSAVec4f _heightps; + + SSAVec4i _widthheight; + SSAVec4f _widthheightps; +}; diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp new file mode 100644 index 000000000..f9d16f188 --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -0,0 +1,65 @@ + +#include "ssa_scope.h" +#include "ssa_int.h" + +SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder) +: _context(context), _module(module), _builder(builder) +{ + instance = this; +} + +SSAScope::~SSAScope() +{ + instance = 0; +} + +llvm::LLVMContext &SSAScope::context() +{ + return *instance->_context; +} + +llvm::Module *SSAScope::module() +{ + return instance->_module; +} + +llvm::IRBuilder<> &SSAScope::builder() +{ + return *instance->_builder; +} + +llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types) +{ + llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id)); + if (func == 0) + func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module()); + return func; +} + +llvm::Value *SSAScope::alloca(llvm::Type *type) +{ + return alloca(type, SSAInt(1)); +} + +llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size) +{ + // Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work + llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock(); + llvm::IRBuilder<> alloca_builder(&entry, entry.begin()); + return alloca_builder.CreateAlloca(type, size.v, hint()); +} + +const std::string &SSAScope::hint() +{ + return instance->_hint; +} + +void SSAScope::set_hint(const std::string &new_hint) +{ + if (new_hint.empty()) + instance->_hint = "tmp"; + else + instance->_hint = new_hint; +} + +SSAScope *SSAScope::instance = 0; diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h new file mode 100644 index 000000000..d184643ad --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.h @@ -0,0 +1,41 @@ + +#pragma once + +#include "r_compiler/llvm_include.h" + +class SSAInt; + +class SSAScope +{ +public: + SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder); + ~SSAScope(); + static llvm::LLVMContext &context(); + static llvm::Module *module(); + static llvm::IRBuilder<> &builder(); + static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); + static llvm::Value *alloca(llvm::Type *type); + static llvm::Value *alloca(llvm::Type *type, SSAInt size); + static const std::string &hint(); + static void set_hint(const std::string &hint); + +private: + static SSAScope *instance; + llvm::LLVMContext *_context; + llvm::Module *_module; + llvm::IRBuilder<> *_builder; + std::string _hint; +}; + +class SSAScopeHint +{ +public: + SSAScopeHint() : old_hint(SSAScope::hint()) { } + SSAScopeHint(const std::string &hint) : old_hint(SSAScope::hint()) { SSAScope::set_hint(hint); } + ~SSAScopeHint() { SSAScope::set_hint(old_hint); } + void set(const std::string &hint) { SSAScope::set_hint(hint); } + void clear() { SSAScope::set_hint(old_hint); } + +private: + std::string old_hint; +}; diff --git a/src/r_compiler/ssa/ssa_stack.h b/src/r_compiler/ssa/ssa_stack.h new file mode 100644 index 000000000..435530be1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_stack.h @@ -0,0 +1,25 @@ + +#pragma once + +template +class SSAStack +{ +public: + SSAStack() + : v(0) + { + v = SSAScope::alloca(SSAVariable::llvm_type()); + } + + SSAVariable load() const + { + return SSAVariable::from_llvm(SSAScope::builder().CreateLoad(v, SSAScope::hint())); + } + + void store(const SSAVariable &new_value) + { + SSAScope::builder().CreateStore(new_value.v, v); + } + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_struct_type.cpp b/src/r_compiler/ssa/ssa_struct_type.cpp new file mode 100644 index 000000000..4a79768ce --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.cpp @@ -0,0 +1,18 @@ + +#include "ssa_struct_type.h" +#include "ssa_scope.h" + +void SSAStructType::add_parameter(llvm::Type *type) +{ + elements.push_back(type); +} + +llvm::Type *SSAStructType::llvm_type() +{ + return llvm::StructType::get(SSAScope::context(), elements, false); +} + +llvm::Type *SSAStructType::llvm_type_packed() +{ + return llvm::StructType::get(SSAScope::context(), elements, true); +} diff --git a/src/r_compiler/ssa/ssa_struct_type.h b/src/r_compiler/ssa/ssa_struct_type.h new file mode 100644 index 000000000..67b056b32 --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.h @@ -0,0 +1,17 @@ + +#pragma once + +#include + +namespace llvm { class Type; } + +class SSAStructType +{ +public: + void add_parameter(llvm::Type *type); + llvm::Type *llvm_type(); + llvm::Type *llvm_type_packed(); + +private: + std::vector elements; +}; diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp new file mode 100644 index 000000000..04db4fd28 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -0,0 +1,95 @@ + +#include "ssa_ubyte.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAUByte::SSAUByte() +: v(0) +{ +} + +SSAUByte::SSAUByte(unsigned char constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false)); +} + +SSAUByte::SSAUByte(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUByte::llvm_type() +{ + return llvm::Type::getInt8Ty(SSAScope::context()); +} + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAUByte operator/(const SSAUByte &a, const SSAUByte &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v); +} +*/ +SSAUByte operator+(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) + b; +} + +SSAUByte operator-(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) - b; +} + +SSAUByte operator*(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) * b; +} +/* +SSAUByte operator/(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) / b; +} +*/ +SSAUByte operator+(const SSAUByte &a, unsigned char b) +{ + return a + SSAUByte(b); +} + +SSAUByte operator-(const SSAUByte &a, unsigned char b) +{ + return a - SSAUByte(b); +} + +SSAUByte operator*(const SSAUByte &a, unsigned char b) +{ + return a * SSAUByte(b); +} +/* +SSAUByte operator/(const SSAUByte &a, unsigned char b) +{ + return a / SSAUByte(b); +} +*/ +SSAUByte operator<<(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateShl(a.v, bits)); +} + +SSAUByte operator>>(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLShr(a.v, bits)); +} diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h new file mode 100644 index 000000000..f1e12afba --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -0,0 +1,35 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUByte +{ +public: + SSAUByte(); + SSAUByte(unsigned char constant); + explicit SSAUByte(llvm::Value *v); + static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b); +//SSAUByte operator/(const SSAUByte &a, const SSAUByte &b); + +SSAUByte operator+(unsigned char a, const SSAUByte &b); +SSAUByte operator-(unsigned char a, const SSAUByte &b); +SSAUByte operator*(unsigned char a, const SSAUByte &b); +//SSAUByte operator/(unsigned char a, const SSAUByte &b); + +SSAUByte operator+(const SSAUByte &a, unsigned char b); +SSAUByte operator-(const SSAUByte &a, unsigned char b); +SSAUByte operator*(const SSAUByte &a, unsigned char b); +//SSAUByte operator/(const SSAUByte &a, unsigned char b); + +SSAUByte operator<<(const SSAUByte &a, unsigned char bits); +SSAUByte operator>>(const SSAUByte &a, unsigned char bits); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp new file mode 100644 index 000000000..825806148 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -0,0 +1,106 @@ + +#include "ssa_ubyte_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAUBytePtr::SSAUBytePtr() +: v(0) +{ +} + +SSAUBytePtr::SSAUBytePtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUBytePtr::llvm_type() +{ + return llvm::Type::getInt8PtrTy(SSAScope::context()); +} + +SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const +{ + return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAUByte SSAUBytePtr::load() const +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAUBytePtr::load_vec4ub() const +{ + // _mm_cvtsi32_si128 as implemented by clang: + SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); + llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); + SSAVec4i v4i = SSAVec4i::from_llvm(v); + + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 +/* + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); + llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); + return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type)); +*/ +} + +SSAVec16ub SSAUBytePtr::load_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAUBytePtr::store(const SSAUByte &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) +{ + // Store using saturate: + SSAVec8s v8s(new_value, new_value); + SSAVec16ub v16ub(v8s, v8s); + + llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint()); + SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); +} + +void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16); + + // The following generates _mm_stream_si128, maybe! + // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); + // inst->setMetadata(SSAScope::module()->getMDKindID("nontemporal"), node); +} + +void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getInt8PtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_dq), values);*/ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h new file mode 100644 index 000000000..5b68ee1ad --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -0,0 +1,32 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUBytePtr +{ +public: + SSAUBytePtr(); + explicit SSAUBytePtr(llvm::Value *v); + static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); } + static llvm::Type *llvm_type(); + SSAUBytePtr operator[](SSAInt index) const; + SSAUByte load() const; + SSAVec4i load_vec4ub() const; + SSAVec8s load_vec8s() const; + SSAVec16ub load_vec16ub() const; + SSAVec16ub load_unaligned_vec16ub() const; + void store(const SSAUByte &new_value); + void store_vec4ub(const SSAVec4i &new_value); + void store_vec16ub(const SSAVec16ub &new_value); + void store_unaligned_vec16ub(const SSAVec16ub &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp new file mode 100644 index 000000000..877420fc5 --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -0,0 +1,56 @@ + +#include "ssa_value.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAValue SSAValue::load() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false)); +} + +void SSAValue::store(llvm::Value *value) +{ + SSAScope::builder().CreateStore(value, v, false); +} + +SSAIndexLookup SSAValue::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAValue::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(index.v); + return result; +} + +///////////////////////////////////////////////////////////////////////////// + +SSAIndexLookup::operator SSAValue() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateGEP(v, indexes)); +} + +SSAIndexLookup SSAIndexLookup::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAIndexLookup::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(index.v); + return result; +} diff --git a/src/r_compiler/ssa/ssa_value.h b/src/r_compiler/ssa/ssa_value.h new file mode 100644 index 000000000..ec156a452 --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.h @@ -0,0 +1,53 @@ + +#pragma once + +#include + +namespace llvm { class Value; } + +class SSAInt; +class SSAIndexLookup; + +class SSAValue +{ +public: + SSAValue() : v(0) { } + + static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; } + + SSAValue load(); + void store(llvm::Value *v); + + template + operator Type() + { + return Type::from_llvm(v); + } + + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); + + llvm::Value *v; +}; + +class SSAIndexLookup +{ +public: + SSAIndexLookup() : v(0) { } + + llvm::Value *v; + std::vector indexes; + + SSAValue load() { SSAValue value = *this; return value.load(); } + void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); } + + template + operator Type() + { + return Type::from_llvm(v); + } + + operator SSAValue(); + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); +}; diff --git a/src/r_compiler/ssa/ssa_vec16ub.cpp b/src/r_compiler/ssa/ssa_vec16ub.cpp new file mode 100644 index 000000000..f18d68718 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.cpp @@ -0,0 +1,155 @@ + +#include "ssa_vec16ub.h" +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec16ub::SSAVec16ub() +: v(0) +{ +} + +SSAVec16ub::SSAVec16ub(unsigned char constant) +: v(0) +{ + std::vector constants; + constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant0, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant1, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant2, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant3, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant4, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant5, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant6, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant7, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant8, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant9, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant10, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant11, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant12, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant13, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant14, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant15, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub(llvm::Value *v) +: v(v) +{ +} + +SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) +: v(0) +{ + llvm::Value *values[2] = { s0.v, s1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec16ub::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); +} + +SSAVec16ub SSAVec16ub::bitcast(SSAVec4i i32) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7, index8, index9, index10, index11, index12, index13, index14, index15); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index8))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index9))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index10))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index11))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index12))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index13))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index14))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index15))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v, SSAScope::hint()); +} +*/ +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) + b; +} + +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) - b; +} + +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) * b; +} +/* +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) / b; +} +*/ +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b) +{ + return a + SSAVec16ub(b); +} + +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b) +{ + return a - SSAVec16ub(b); +} + +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b) +{ + return a * SSAVec16ub(b); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b) +{ + return a / SSAVec16ub(b); +} +*/ \ No newline at end of file diff --git a/src/r_compiler/ssa/ssa_vec16ub.h b/src/r_compiler/ssa/ssa_vec16ub.h new file mode 100644 index 000000000..e4cfcdc87 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec8s; +class SSAVec4i; + +class SSAVec16ub +{ +public: + SSAVec16ub(); + SSAVec16ub(unsigned char constant); + SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15); + explicit SSAVec16ub(llvm::Value *v); + SSAVec16ub(SSAVec8s s0, SSAVec8s s1); + static SSAVec16ub from_llvm(llvm::Value *v) { return SSAVec16ub(v); } + static llvm::Type *llvm_type(); + static SSAVec16ub bitcast(SSAVec4i i32); + static SSAVec16ub shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + static SSAVec16ub shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + + llvm::Value *v; +}; + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b); + +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b); + +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b); diff --git a/src/r_compiler/ssa/ssa_vec4f.cpp b/src/r_compiler/ssa/ssa_vec4f.cpp new file mode 100644 index 000000000..e002018fe --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.cpp @@ -0,0 +1,244 @@ + +#include "ssa_vec4f.h" +#include "ssa_vec4i.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4f::SSAVec4f() +: v(0) +{ +} + +SSAVec4f::SSAVec4f(float constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(float constant0, float constant1, float constant2, float constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant0))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant1))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant2))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant3))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(SSAFloat f) +: v(0) +{ + llvm::Type *m1xfloattype = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(f.v, m1xfloattype, SSAScope::hint()), llvm::UndefValue::get(m1xfloattype), mask, SSAScope::hint()); +} + +SSAVec4f::SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3) +: v(0) +{ + v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(llvm_type()), f0.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + v = SSAScope::builder().CreateInsertElement(v, f1.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1))); + v = SSAScope::builder().CreateInsertElement(v, f2.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2))); + v = SSAScope::builder().CreateInsertElement(v, f3.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)3))); +} + +SSAVec4f::SSAVec4f(llvm::Value *v) +: v(v) +{ +} + +SSAVec4f::SSAVec4f(SSAVec4i i32) +: v(0) +{ + //llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4); + //return builder->CreateSIToFP(i32.v, m128type); + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint()); +} + +llvm::Type *SSAVec4f::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4); +} + +SSAFloat SSAVec4f::operator[](SSAInt index) const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index) +{ + return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index)))); +} + +SSAVec4f SSAVec4f::bitcast(SSAVec4i i32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sqrt(SSAVec4f f) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); + //return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_sqrt_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::rcp(SSAVec4f f) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rcp_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sin(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::cos(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::pow(SSAVec4f val, SSAVec4f power) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::exp(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::log(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::fma(SSAVec4f a, SSAVec4f b, SSAVec4f c) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3) +{ + SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp2 = shuffle(row0, row1, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + SSAVec4f tmp1 = shuffle(row2, row3, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp3 = shuffle(row2, row3, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + row0 = shuffle(tmp0, tmp1, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row1 = shuffle(tmp0, tmp1, 0xDD);//_MM_SHUFFLE(3,1,3,1)); + row2 = shuffle(tmp2, tmp3, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row3 = shuffle(tmp2, tmp3, 0xDD);//_MM_SHUFFLE(3,1,3,1)); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3) +{ + return shuffle(f0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4f::from_llvm(SSAScope::builder().CreateShuffleVector(f0.v, f1.v, mask, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask) +{ + return shuffle(f0, f1, mask & 3, (mask >> 2) & 3, ((mask >> 4) & 3) + 4, ((mask >> 6) & 3) + 4); +} + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator+(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) + b; +} + +SSAVec4f operator-(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) - b; +} + +SSAVec4f operator*(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) * b; +} + +SSAVec4f operator/(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) / b; +} + +SSAVec4f operator+(const SSAVec4f &a, float b) +{ + return a + SSAVec4f(b); +} + +SSAVec4f operator-(const SSAVec4f &a, float b) +{ + return a - SSAVec4f(b); +} + +SSAVec4f operator*(const SSAVec4f &a, float b) +{ + return a * SSAVec4f(b); +} + +SSAVec4f operator/(const SSAVec4f &a, float b) +{ + return a / SSAVec4f(b); +} diff --git a/src/r_compiler/ssa/ssa_vec4f.h b/src/r_compiler/ssa/ssa_vec4f.h new file mode 100644 index 000000000..5e3397e58 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.h @@ -0,0 +1,57 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAFloat; +class SSAInt; + +class SSAVec4f +{ +public: + SSAVec4f(); + SSAVec4f(float constant); + SSAVec4f(float constant0, float constant1, float constant2, float constant3); + SSAVec4f(SSAFloat f); + SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3); + explicit SSAVec4f(llvm::Value *v); + SSAVec4f(SSAVec4i i32); + SSAFloat operator[](SSAInt index) const; + static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); + static SSAVec4f bitcast(SSAVec4i i32); + static SSAVec4f sqrt(SSAVec4f f); + static SSAVec4f rcp(SSAVec4f f); + static SSAVec4f sin(SSAVec4f val); + static SSAVec4f cos(SSAVec4f val); + static SSAVec4f pow(SSAVec4f val, SSAVec4f power); + static SSAVec4f exp(SSAVec4f val); + static SSAVec4f log(SSAVec4f val); + static SSAVec4f fma(SSAVec4f a, SSAVec4f b, SSAVec4f c); + static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3); + static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3); + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3); + static SSAVec4f from_llvm(llvm::Value *v) { return SSAVec4f(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; + +private: + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask); +}; + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b); + +SSAVec4f operator+(float a, const SSAVec4f &b); +SSAVec4f operator-(float a, const SSAVec4f &b); +SSAVec4f operator*(float a, const SSAVec4f &b); +SSAVec4f operator/(float a, const SSAVec4f &b); + +SSAVec4f operator+(const SSAVec4f &a, float b); +SSAVec4f operator-(const SSAVec4f &a, float b); +SSAVec4f operator*(const SSAVec4f &a, float b); +SSAVec4f operator/(const SSAVec4f &a, float b); diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp new file mode 100644 index 000000000..e2df64167 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -0,0 +1,50 @@ + +#include "ssa_vec4f_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4fPtr::SSAVec4fPtr() +: v(0) +{ +} + +SSAVec4fPtr::SSAVec4fPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4fPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const +{ + return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load_unaligned() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAVec4fPtr::store(const SSAVec4f &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); +} + +void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) +{ + /*llvm::Value *values[2] = + { + SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ + SSAScope::builder().CreateStore(new_value.v, v, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.h b/src/r_compiler/ssa/ssa_vec4f_ptr.h new file mode 100644 index 000000000..ab4e84190 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4fPtr +{ +public: + SSAVec4fPtr(); + explicit SSAVec4fPtr(llvm::Value *v); + static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4fPtr operator[](SSAInt index) const; + SSAVec4f load() const; + SSAVec4f load_unaligned() const; + void store(const SSAVec4f &new_value); + void store_unaligned(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp new file mode 100644 index 000000000..80e07c8d4 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -0,0 +1,213 @@ + +#include "ssa_vec4i.h" +#include "ssa_vec4f.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4i::SSAVec4i() +: v(0) +{ +} + +SSAVec4i::SSAVec4i(int constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(int constant0, int constant1, int constant2, int constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant3, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(llvm::Value *v) +: v(v) +{ +} + +SSAVec4i::SSAVec4i(SSAInt i) +: v(0) +{ + llvm::Type *m1xi32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); +} + +SSAVec4i::SSAVec4i(SSAVec4f f32) +: v(0) +{ + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); +} + +SSAInt SSAVec4i::operator[](SSAInt index) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +llvm::Type *SSAVec4i::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec8s i16) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i16.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, int index0, int index1, int index2, int index3) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, const SSAVec4i &i1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4i::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3) +{ + SSAVec8s low = SSAVec8s::extendlo(a); + SSAVec8s high = SSAVec8s::extendhi(a); + out0 = extendlo(low); + out1 = extendhi(low); + out2 = extendlo(high); + out3 = extendhi(high); +} + +SSAVec4i SSAVec4i::extendhi(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::extendlo(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::sqrt(SSAVec4i f) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_sqrt_pd), f.v, SSAScope::hint())); +} + +/* +SSAVec4i SSAVec4i::min_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pminsd), values, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::max_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pmaxsd), values, SSAScope::hint())); +} +*/ + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator+(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) + b; +} + +SSAVec4i operator-(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) - b; +} + +SSAVec4i operator*(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) * b; +} + +SSAVec4i operator/(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) / b; +} + +SSAVec4i operator+(const SSAVec4i &a, int b) +{ + return a + SSAVec4i(b); +} + +SSAVec4i operator-(const SSAVec4i &a, int b) +{ + return a - SSAVec4i(b); +} + +SSAVec4i operator*(const SSAVec4i &a, int b) +{ + return a * SSAVec4i(b); +} + +SSAVec4i operator/(const SSAVec4i &a, int b) +{ + return a / SSAVec4i(b); +} + +SSAVec4i operator<<(const SSAVec4i &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec4i operator>>(const SSAVec4i &a, int bits) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h new file mode 100644 index 000000000..d19f1d1aa --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -0,0 +1,56 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4f; +class SSAVec8s; +class SSAVec16ub; +class SSAInt; + +class SSAVec4i +{ +public: + SSAVec4i(); + SSAVec4i(int constant); + SSAVec4i(int constant0, int constant1, int constant2, int constant3); + SSAVec4i(SSAInt i); + explicit SSAVec4i(llvm::Value *v); + SSAVec4i(SSAVec4f f32); + SSAInt operator[](SSAInt index); + static SSAVec4i bitcast(SSAVec4f f32); + static SSAVec4i bitcast(SSAVec8s i16); + static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); + static SSAVec4i shuffle(const SSAVec4i &f0, const SSAVec4i &f1, int index0, int index1, int index2, int index3); + static SSAVec4i extendhi(SSAVec8s i16); + static SSAVec4i extendlo(SSAVec8s i16); + static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3); + static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i sqrt(SSAVec4i f); + //static SSAVec4i min_sse41(SSAVec4i a, SSAVec4i b); + //static SSAVec4i max_sse41(SSAVec4i a, SSAVec4i b); + static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b); + +SSAVec4i operator+(int a, const SSAVec4i &b); +SSAVec4i operator-(int a, const SSAVec4i &b); +SSAVec4i operator*(int a, const SSAVec4i &b); +SSAVec4i operator/(int a, const SSAVec4i &b); + +SSAVec4i operator+(const SSAVec4i &a, int b); +SSAVec4i operator-(const SSAVec4i &a, int b); +SSAVec4i operator*(const SSAVec4i &a, int b); +SSAVec4i operator/(const SSAVec4i &a, int b); + +SSAVec4i operator<<(const SSAVec4i &a, int bits); +SSAVec4i operator>>(const SSAVec4i &a, int bits); diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp new file mode 100644 index 000000000..a28befb70 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -0,0 +1,50 @@ + +#include "ssa_vec4i_ptr.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec4iPtr::SSAVec4iPtr() +: v(0) +{ +} + +SSAVec4iPtr::SSAVec4iPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4iPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4iPtr SSAVec4iPtr::operator[](SSAInt index) const +{ + return SSAVec4iPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load_unaligned() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4))); +} + +void SSAVec4iPtr::store(const SSAVec4i &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); +} + +void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value) +{ + /*llvm::Value *values[2] = + { + v, + new_value.v + }; + SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_pd), values);*/ + SSAScope::builder().CreateStore(new_value.v, v, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.h b/src/r_compiler/ssa/ssa_vec4i_ptr.h new file mode 100644 index 000000000..56937b1cc --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4iPtr +{ +public: + SSAVec4iPtr(); + explicit SSAVec4iPtr(llvm::Value *v); + static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4iPtr operator[](SSAInt index) const; + SSAVec4i load() const; + SSAVec4i load_unaligned() const; + void store(const SSAVec4i &new_value); + void store_unaligned(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec8s.cpp b/src/r_compiler/ssa/ssa_vec8s.cpp new file mode 100644 index 000000000..d61a4c4a9 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.cpp @@ -0,0 +1,178 @@ + +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_vec16ub.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAVec8s::SSAVec8s() +: v(0) +{ +} + +SSAVec8s::SSAVec8s(short constant) +: v(0) +{ + std::vector constants; + constants.resize(8, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant3, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant4, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant5, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant6, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant7, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(llvm::Value *v) +: v(v) +{ +} + +SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) +: v(0) +{ + llvm::Value *values[2] = { i0.v, i1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec8s::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 8); +} + +SSAVec8s SSAVec8s::bitcast(SSAVec16ub i8) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateBitCast(i8.v, llvm_type(), SSAScope::hint())); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec8s::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::extendhi(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 +} + +SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 +} + +/* +SSAVec8s SSAVec8s::min_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmins_w), values, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::max_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmaxs_w), values, SSAScope::hint())); +} +*/ + +SSAVec8s SSAVec8s::mulhi(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmulh_w), values, SSAScope::hint())); +} + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator+(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) + b; +} + +SSAVec8s operator-(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) - b; +} + +SSAVec8s operator*(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) * b; +} + +SSAVec8s operator/(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) / b; +} + +SSAVec8s operator+(const SSAVec8s &a, short b) +{ + return a + SSAVec8s(b); +} + +SSAVec8s operator-(const SSAVec8s &a, short b) +{ + return a - SSAVec8s(b); +} + +SSAVec8s operator*(const SSAVec8s &a, short b) +{ + return a * SSAVec8s(b); +} + +SSAVec8s operator/(const SSAVec8s &a, short b) +{ + return a / SSAVec8s(b); +} + +SSAVec8s operator<<(const SSAVec8s &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec8s operator>>(const SSAVec8s &a, int bits) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec8s.h b/src/r_compiler/ssa/ssa_vec8s.h new file mode 100644 index 000000000..aded358dd --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.h @@ -0,0 +1,48 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAVec16ub; + +class SSAVec8s +{ +public: + SSAVec8s(); + SSAVec8s(short constant); + SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); + explicit SSAVec8s(llvm::Value *v); + SSAVec8s(SSAVec4i i0, SSAVec4i i1); + static SSAVec8s bitcast(SSAVec16ub i8); + static SSAVec8s shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s extendhi(SSAVec16ub a); + static SSAVec8s extendlo(SSAVec16ub a); + //static SSAVec8s min_sse2(SSAVec8s a, SSAVec8s b); + //static SSAVec8s max_sse2(SSAVec8s a, SSAVec8s b); + static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b); + static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b); + +SSAVec8s operator+(short a, const SSAVec8s &b); +SSAVec8s operator-(short a, const SSAVec8s &b); +SSAVec8s operator*(short a, const SSAVec8s &b); +SSAVec8s operator/(short a, const SSAVec8s &b); + +SSAVec8s operator+(const SSAVec8s &a, short b); +SSAVec8s operator-(const SSAVec8s &a, short b); +SSAVec8s operator*(const SSAVec8s &a, short b); +SSAVec8s operator/(const SSAVec8s &a, short b); + +SSAVec8s operator<<(const SSAVec8s &a, int bits); +SSAVec8s operator>>(const SSAVec8s &a, int bits); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 0d86ead47..d54bad7ae 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -38,6 +38,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" +#include "r_compiler/fixedfunction/fixedfunction.h" #include "gi.h" #include "stats.h" @@ -299,6 +300,68 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// +class DrawSpanFFCommand : public DrawerCommand +{ + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE * RESTRICT _destorg; + + const uint32_t * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + bool _nearest_filter; + + uint32_t _srcalpha; + uint32_t _destalpha; + + FixedFunction *_ff; + +public: + DrawSpanFFCommand() + { + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + + _source = (const uint32_t*)ds_source; + _light = LightBgra::calc_light_multiplier(ds_light); + _shade_constants = ds_shade_constants; + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + + static FixedFunction ff; + _ff = &ff; + } + + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(_y)) + return; + + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = _x2 - _x1 + 1; + _ff->DrawSpan(count, dest); + } +}; + +///////////////////////////////////////////////////////////////////////////// + class DrawerColumnCommand : public DrawerCommand { public: @@ -2700,11 +2763,14 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { + DrawerCommandQueue::QueueCommand(); +/* #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else DrawerCommandQueue::QueueCommand(); #endif +*/ } void R_DrawSpanMasked_rgba() From 4f2ae42ed59307a5e823cb97df110f0920be6b34 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 26 Sep 2016 09:04:29 +0200 Subject: [PATCH 02/15] Revert duplicate entry in CMakeLists.txt --- src/CMakeLists.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4f9599b35..09238ff57 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -104,15 +104,6 @@ if( WIN32 ) endif() add_definitions( -D_WIN32 ) - - set( FMOD_SEARCH_PATHS - "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" - "C:/Program Files (x86)/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" - # This next one is for Randy. - "E:/Software/Dev/FMOD/${WIN_TYPE}/api" - ) - set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) - set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) set( FMOD_SEARCH_PATHS "C:/Program Files/FMOD SoundSystem/FMOD Programmers API ${WIN_TYPE}/api" From d5c7a7ab76bb34e248a4dbb6ad3ca3800176da94 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Sep 2016 03:07:03 +0200 Subject: [PATCH 03/15] Make LLVM compile and optimize for the current CPU --- .../fixedfunction/fixedfunction.cpp | 86 ++++++++++++++----- src/r_compiler/fixedfunction/fixedfunction.h | 4 + src/r_compiler/llvm_include.h | 11 ++- 3 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index 347ba6de3..cc46b8d50 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -13,36 +13,75 @@ RenderProgram::RenderProgram() { - llvm::install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { - I_FatalError(reason.c_str()); + using namespace llvm; + + install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + I_FatalError("LLVM fatal error: %s", reason.c_str()); }); - //llvm::llvm_start_multithreaded(); - llvm::InitializeNativeTarget(); - llvm::InitializeNativeTargetAsmPrinter(); - llvm::InitializeNativeTargetAsmParser(); - - mContext = std::make_unique(); - - auto moduleOwner = std::make_unique("render", context()); - mModule = moduleOwner.get(); + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); std::string errorstring; - llvm::EngineBuilder engineBuilder(std::move(moduleOwner)); + + std::string targetTriple = sys::getProcessTriple(); + std::string cpuName = sys::getHostCPUName(); + StringMap cpuFeatures; + sys::getHostCPUFeatures(cpuFeatures); + std::string cpuFeaturesStr; + for (const auto &it : cpuFeatures) + { + if (!cpuFeaturesStr.empty()) + cpuFeaturesStr.push_back(' '); + cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); + cpuFeaturesStr += it.getKey(); + } + + Printf("LLVM target triple: %s\n", targetTriple.c_str()); + Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + + const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); + if (!target) + I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); + + TargetOptions opt; + auto relocModel = Optional(Reloc::Static); + TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); + if (!machine) + I_FatalError("Could not create LLVM target machine"); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + mModule->setTargetTriple(targetTriple); + mModule->setDataLayout(machine->createDataLayout()); + + EngineBuilder engineBuilder(std::move(moduleOwner)); engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive); - engineBuilder.setRelocationModel(llvm::Reloc::Static); - engineBuilder.setEngineKind(llvm::EngineKind::JIT); - mEngine.reset(engineBuilder.create()); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(Reloc::Static); + engineBuilder.setEngineKind(EngineKind::JIT); + mEngine.reset(engineBuilder.create(machine)); if (!mEngine) - I_FatalError(errorstring.c_str()); + I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); + + mModulePassManager = std::make_unique(); + mFunctionPassManager = std::make_unique(mModule); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); + passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); } RenderProgram::~RenderProgram() { mEngine.reset(); mContext.reset(); - //llvm::llvm_stop_multithreaded(); } void *RenderProgram::PointerToFunction(const char *name) @@ -57,6 +96,7 @@ FixedFunction::FixedFunction() { CodegenDrawSpan(); mProgram.engine()->finalizeObject(); + mProgram.modulePassManager()->run(*mProgram.module()); DrawSpan = mProgram.GetProcAddress("DrawSpan"); } @@ -81,12 +121,12 @@ void FixedFunction::CodegenDrawSpan() SSAInt index = stack_index.load(); loop.loop_block(index < count); - //SSAVec4i color(255, 255, 0, 255); - //data[index * 4].store_vec4ub(color); - data[index * 4].store(0); + SSAVec4i color(0, 128, 255, 255); + data[index * 4].store_vec4ub(color); + /*data[index * 4].store(0); data[index * 4 + 1].store(128); data[index * 4 + 2].store(255); - data[index * 4 + 3].store(255); + data[index * 4 + 3].store(255);*/ stack_index.store(index + 1); } loop.end_block(); @@ -95,6 +135,8 @@ void FixedFunction::CodegenDrawSpan() if (llvm::verifyFunction(*function.func)) I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); } #if 0 diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 4c81fc108..7ee68032e 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -26,6 +26,8 @@ public: llvm::LLVMContext &context() { return *mContext; } llvm::Module *module() { return mModule; } llvm::ExecutionEngine *engine() { return mEngine.get(); } + llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } + llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } private: void *PointerToFunction(const char *name); @@ -33,6 +35,8 @@ private: std::unique_ptr mContext; llvm::Module *mModule; std::unique_ptr mEngine; + std::unique_ptr mModulePassManager; + std::unique_ptr mFunctionPassManager; }; class FixedFunction diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index 1eed549e1..b916bad0e 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -20,19 +20,22 @@ #pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception #include -#include -#include #include #include #include #include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include +#include #include #include From 20f67ad40a96b90e17fe44e98a026ac6cc9dac7c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Sep 2016 22:53:20 +0200 Subject: [PATCH 04/15] Add SSAShort, shift, and, or, and fix unaligned store --- src/CMakeLists.txt | 1 + src/r_compiler/ssa/ssa_float_ptr.cpp | 11 +- src/r_compiler/ssa/ssa_int.cpp | 30 ++++++ src/r_compiler/ssa/ssa_int.h | 7 ++ src/r_compiler/ssa/ssa_int_ptr.cpp | 4 +- src/r_compiler/ssa/ssa_short.cpp | 148 +++++++++++++++++++++++++++ src/r_compiler/ssa/ssa_short.h | 49 +++++++++ src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 10 +- src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 10 +- src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 10 +- 10 files changed, 245 insertions(+), 35 deletions(-) create mode 100644 src/r_compiler/ssa/ssa_short.cpp create mode 100644 src/r_compiler/ssa/ssa_short.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 09238ff57..4b81a24f4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1434,6 +1434,7 @@ set (PCH_SOURCES r_compiler/ssa/ssa_if_block.cpp r_compiler/ssa/ssa_int.cpp r_compiler/ssa/ssa_int_ptr.cpp + r_compiler/ssa/ssa_short.cpp r_compiler/ssa/ssa_scope.cpp r_compiler/ssa/ssa_struct_type.cpp r_compiler/ssa/ssa_ubyte.cpp diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index 4413c6e92..6a1409271 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -38,7 +38,6 @@ SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); - // return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(get_intrinsic(llvm::Intrinsic::x86_sse2_loadu_dq), SSAScope::builder().CreateBitCast(v, llvm::PointerType::getUnqual(llvm::IntegerType::get(SSAScope::context(), 8))))); } void SSAFloatPtr::store(const SSAFloat &new_value) @@ -49,17 +48,11 @@ void SSAFloatPtr::store(const SSAFloat &new_value) void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) { llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 16); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); } void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) { - /*llvm::Value *values[2] = - { - SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); } diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 9f3c54f50..674f44350 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -115,3 +115,33 @@ SSAInt operator>>(const SSAInt &a, int bits) { return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); } + +SSAInt operator<<(const SSAInt &a, const SSAInt &bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); +} + +SSAInt operator>>(const SSAInt &a, const SSAInt &bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); +} + +SSAInt operator&(const SSAInt &a, int b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); +} + +SSAInt operator&(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator|(const SSAInt &a, int b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); +} + +SSAInt operator|(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index 0be37ee7e..5e373c62e 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -39,3 +39,10 @@ SSAInt operator%(const SSAInt &a, int b); SSAInt operator<<(const SSAInt &a, int bits); SSAInt operator>>(const SSAInt &a, int bits); +SSAInt operator<<(const SSAInt &a, const SSAInt &bits); +SSAInt operator>>(const SSAInt &a, const SSAInt &bits); + +SSAInt operator&(const SSAInt &a, int b); +SSAInt operator&(const SSAInt &a, const SSAInt &b); +SSAInt operator|(const SSAInt &a, int b); +SSAInt operator|(const SSAInt &a, const SSAInt &b); diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index dd0ca17f6..3c2637073 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -48,11 +48,11 @@ void SSAIntPtr::store(const SSAInt &new_value) void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 16); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); } void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) { llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); - SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); } diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp new file mode 100644 index 000000000..fc8de9449 --- /dev/null +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -0,0 +1,148 @@ + +#include "ssa_short.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "r_compiler/llvm_include.h" + +SSAShort::SSAShort() +: v(0) +{ +} + +SSAShort::SSAShort(int constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true)); +} + +SSAShort::SSAShort(SSAFloat f) +: v(0) +{ + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt16Ty(SSAScope::context()), SSAScope::hint()); +} + +SSAShort::SSAShort(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAShort::llvm_type() +{ + return llvm::Type::getInt16Ty(SSAScope::context()); +} + +SSAShort operator+(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator-(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator*(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator/(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator%(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator+(int a, const SSAShort &b) +{ + return SSAShort(a) + b; +} + +SSAShort operator-(int a, const SSAShort &b) +{ + return SSAShort(a) - b; +} + +SSAShort operator*(int a, const SSAShort &b) +{ + return SSAShort(a) * b; +} + +SSAShort operator/(int a, const SSAShort &b) +{ + return SSAShort(a) / b; +} + +SSAShort operator%(int a, const SSAShort &b) +{ + return SSAShort(a) % b; +} + +SSAShort operator+(const SSAShort &a, int b) +{ + return a + SSAShort(b); +} + +SSAShort operator-(const SSAShort &a, int b) +{ + return a - SSAShort(b); +} + +SSAShort operator*(const SSAShort &a, int b) +{ + return a * SSAShort(b); +} + +SSAShort operator/(const SSAShort &a, int b) +{ + return a / SSAShort(b); +} + +SSAShort operator%(const SSAShort &a, int b) +{ + return a % SSAShort(b); +} + +SSAShort operator<<(const SSAShort &a, int bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); +} + +SSAShort operator>>(const SSAShort &a, int bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} + +SSAShort operator<<(const SSAShort &a, const SSAInt &bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); +} + +SSAShort operator>>(const SSAShort &a, const SSAInt &bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); +} + +SSAShort operator&(const SSAShort &a, int b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); +} + +SSAShort operator&(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator|(const SSAShort &a, int b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); +} + +SSAShort operator|(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h new file mode 100644 index 000000000..ae71a1336 --- /dev/null +++ b/src/r_compiler/ssa/ssa_short.h @@ -0,0 +1,49 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloat; +class SSAInt; + +class SSAShort +{ +public: + SSAShort(); + SSAShort(int constant); + SSAShort(SSAFloat f); + explicit SSAShort(llvm::Value *v); + static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAShort operator+(const SSAShort &a, const SSAShort &b); +SSAShort operator-(const SSAShort &a, const SSAShort &b); +SSAShort operator*(const SSAShort &a, const SSAShort &b); +SSAShort operator/(const SSAShort &a, const SSAShort &b); +SSAShort operator%(const SSAShort &a, const SSAShort &b); + +SSAShort operator+(int a, const SSAShort &b); +SSAShort operator-(int a, const SSAShort &b); +SSAShort operator*(int a, const SSAShort &b); +SSAShort operator/(int a, const SSAShort &b); +SSAShort operator%(int a, const SSAShort &b); + +SSAShort operator+(const SSAShort &a, int b); +SSAShort operator-(const SSAShort &a, int b); +SSAShort operator*(const SSAShort &a, int b); +SSAShort operator/(const SSAShort &a, int b); +SSAShort operator%(const SSAShort &a, int b); + +SSAShort operator<<(const SSAShort &a, int bits); +SSAShort operator>>(const SSAShort &a, int bits); +SSAShort operator<<(const SSAShort &a, const SSAInt &bits); +SSAShort operator>>(const SSAShort &a, const SSAInt &bits); + +SSAShort operator&(const SSAShort &a, int b); +SSAShort operator&(const SSAShort &a, const SSAShort &b); +SSAShort operator|(const SSAShort &a, int b); +SSAShort operator|(const SSAShort &a, const SSAShort &b); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 825806148..b2408066e 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -86,7 +86,7 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) { llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 16); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); // The following generates _mm_stream_si128, maybe! // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); @@ -95,12 +95,6 @@ void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) { - /*llvm::Value *values[2] = - { - SSAScope::builder().CreateBitCast(v, llvm::Type::getInt8PtrTy(SSAScope::context())), - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_dq), values);*/ llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); - llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); } diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index e2df64167..6a197ec90 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -35,16 +35,10 @@ SSAVec4f SSAVec4fPtr::load_unaligned() const void SSAVec4fPtr::store(const SSAVec4f &new_value) { - SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); + SSAScope::builder().CreateStore(new_value.v, v, false); } void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) { - /*llvm::Value *values[2] = - { - SSAScope::builder().CreateBitCast(v, llvm::Type::getFloatPtrTy(SSAScope::context())), - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_storeu_ps), values);*/ - SSAScope::builder().CreateStore(new_value.v, v, false); + SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); } diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp index a28befb70..7138c30d2 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -35,16 +35,10 @@ SSAVec4i SSAVec4iPtr::load_unaligned() const void SSAVec4iPtr::store(const SSAVec4i &new_value) { - SSAScope::builder().CreateAlignedStore(new_value.v, v, 16, false); + SSAScope::builder().CreateStore(new_value.v, v, false); } void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value) { - /*llvm::Value *values[2] = - { - v, - new_value.v - }; - SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_storeu_pd), values);*/ - SSAScope::builder().CreateStore(new_value.v, v, false); + SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); } From f9a7186550bf6b6e72f9770ba79e3789b7acf541 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Sep 2016 22:54:37 +0200 Subject: [PATCH 05/15] Improve DrawSpan codegen enough to do the simple shade for 64x64 flats --- .../fixedfunction/fixedfunction.cpp | 112 +++++++++++++++--- src/r_compiler/fixedfunction/fixedfunction.h | 26 +++- src/r_draw_rgba.cpp | 20 +++- 3 files changed, 139 insertions(+), 19 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index cc46b8d50..8f8b09f23 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -38,8 +38,8 @@ RenderProgram::RenderProgram() cpuFeaturesStr += it.getKey(); } - Printf("LLVM target triple: %s\n", targetTriple.c_str()); - Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + //Printf("LLVM target triple: %s\n", targetTriple.c_str()); + //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); if (!target) @@ -98,7 +98,7 @@ FixedFunction::FixedFunction() mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); - DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpan = mProgram.GetProcAddress("DrawSpan"); } void FixedFunction::CodegenDrawSpan() @@ -107,29 +107,90 @@ void FixedFunction::CodegenDrawSpan() SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); SSAFunction function("DrawSpan"); - function.add_parameter(SSAInt::llvm_type()); - function.add_parameter(SSAUBytePtr::llvm_type()); + function.add_parameter(GetRenderArgsStruct(mProgram.context())); function.create_public(); - SSAInt count = function.parameter(0); - SSAUBytePtr data = function.parameter(1); - SSAStack stack_index; + SSAStack stack_index, stack_xfrac, stack_yfrac; + SSAValue args = function.parameter(0); + SSAUBytePtr destorg = args[0][0].load(); + SSAUBytePtr source = args[0][1].load(); + SSAInt destpitch = args[0][2].load(); + stack_xfrac.store(args[0][3].load()); + stack_yfrac.store(args[0][4].load()); + SSAInt xstep = args[0][5].load(); + SSAInt ystep = args[0][6].load(); + SSAInt x1 = args[0][7].load(); + SSAInt x2 = args[0][8].load(); + SSAInt y = args[0][9].load(); + SSAInt xbits = args[0][10].load(); + SSAInt ybits = args[0][11].load(); + SSAInt light = args[0][12].load(); + SSAInt srcalpha = args[0][13].load(); + SSAInt destalpha = args[0][14].load(); + + SSAInt count = x2 - x1 + 1; + SSAUBytePtr data = destorg[(x1 + y * destpitch) * 4]; + + SSAInt yshift = 32 - ybits; + SSAInt xshift = yshift - xbits; + SSAInt xmask = ((SSAInt(1) << xbits) - 1) << ybits; + //is_64x64 = xbits == 6 && ybits == 6; + + SSAInt sseLength = count / 4; stack_index.store(0); - SSAForBlock loop; { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < sseLength); + + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + { + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + // 64x64 is the most common case by far, so special case it. + SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + colors[i] = source[spot64 * 4].load_vec4ub() * light / 256; + + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + } + + SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(ssecolors); + + stack_index.store(index + 1); + loop.end_block(); + } + + stack_index.store(sseLength * 4); + { + SSAForBlock loop; SSAInt index = stack_index.load(); loop.loop_block(index < count); - SSAVec4i color(0, 128, 255, 255); + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + // 64x64 is the most common case by far, so special case it. + SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + SSAVec4i color = source[spot64 * 4].load_vec4ub(); + color = color * light / 256; data[index * 4].store_vec4ub(color); - /*data[index * 4].store(0); - data[index * 4 + 1].store(128); - data[index * 4 + 2].store(255); - data[index * 4 + 3].store(255);*/ + stack_index.store(index + 1); + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + loop.end_block(); } - loop.end_block(); builder.CreateRetVoid(); @@ -139,6 +200,27 @@ void FixedFunction::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } +llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + #if 0 GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 7ee68032e..3bbf05abe 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -6,6 +6,7 @@ #include "r_compiler/ssa/ssa_vec8s.h" #include "r_compiler/ssa/ssa_vec16ub.h" #include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_short.h" #include "r_compiler/ssa/ssa_ubyte_ptr.h" #include "r_compiler/ssa/ssa_vec4f_ptr.h" #include "r_compiler/ssa/ssa_vec4i_ptr.h" @@ -39,16 +40,39 @@ private: std::unique_ptr mFunctionPassManager; }; +struct RenderArgs +{ + uint32_t *destorg; + const uint32_t *source; + int32_t destpitch; + int32_t xfrac; + int32_t yfrac; + int32_t xstep; + int32_t ystep; + int32_t x1; + int32_t x2; + int32_t y; + int32_t xbits; + int32_t ybits; + uint32_t light; + uint32_t srcalpha; + uint32_t destalpha; + //ShadeConstants _shade_constants; + //int32_t nearest_filter; +}; + class FixedFunction { public: FixedFunction(); - void(*DrawSpan)(int, uint32_t *) = nullptr; + void(*DrawSpan)(const RenderArgs *) = nullptr; private: void CodegenDrawSpan(); + static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); + RenderProgram mProgram; }; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d54bad7ae..975739095 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -354,9 +354,23 @@ public: if (thread->skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - int count = _x2 - _x1 + 1; - _ff->DrawSpan(count, dest); + RenderArgs args; + args.destorg = (uint32_t *)_destorg; + args.source = _source; + args.destpitch = dc_pitch; + args.xfrac = _xfrac; + args.yfrac = _yfrac; + args.xstep = _xstep; + args.ystep = _ystep; + args.x1 = _x1; + args.x2 = _x2; + args.y = _y; + args.xbits = _xbits; + args.ybits = _ybits; + args.light = _light; + args.srcalpha = _srcalpha; + args.destalpha = _destalpha; + _ff->DrawSpan(&args); } }; From 576fed5afceebd8b6a08c7580fbd2cb25e25b2e5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Sep 2016 05:18:16 +0200 Subject: [PATCH 06/15] Add light, blend and sampler functions --- .../fixedfunction/fixedfunction.cpp | 123 +++++++++++++++++- src/r_compiler/fixedfunction/fixedfunction.h | 45 ++++++- src/r_compiler/ssa/ssa_vec4i.cpp | 10 ++ src/r_compiler/ssa/ssa_vec4i.h | 2 + 4 files changed, 173 insertions(+), 7 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index 8f8b09f23..d70248864 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -154,8 +154,7 @@ void FixedFunction::CodegenDrawSpan() SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - colors[i] = source[spot64 * 4].load_vec4ub() * light / 256; + colors[i] = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); @@ -181,9 +180,7 @@ void FixedFunction::CodegenDrawSpan() SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); - SSAVec4i color = source[spot64 * 4].load_vec4ub(); - color = color * light / 256; + SSAVec4i color = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); data[index * 4].store_vec4ub(color); stack_index.store(index + 1); @@ -200,6 +197,122 @@ void FixedFunction::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } +SSAInt FixedFunction::calc_light_multiplier(SSAInt light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +SSAVec4i FixedFunction::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_simple(color, light); +} + +SSAVec4i FixedFunction::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_advanced(color, light, constants); +} + +SSAVec4i FixedFunction::shade_bgra_simple(SSAVec4i color, SSAInt light) +{ + color = color * light / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) +{ + SSAInt blue = color[0]; + SSAInt green = color[1]; + SSAInt red = color[2]; + SSAInt alpha = color[3]; + + SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + SSAVec4i inv_light = 256 - light; + SSAVec4i inv_desaturate = 256 - constants.desaturate; + + color = (color * inv_desaturate + intensity) / 256; + color = (constants.fade * inv_light + color * light) / 256; + color = (color * constants.light) / 256; + + return color.insert(3, alpha); +} + +SSAVec4i FixedFunction::blend_copy(SSAVec4i fg) +{ + return fg; +} + +SSAVec4i FixedFunction::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i FixedFunction::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0].load_vec4ub(); + SSAVec4i p01 = col0[y1].load_vec4ub(); + SSAVec4i p10 = col1[y0].load_vec4ub(); + SSAVec4i p11 = col1[y1].load_vec4ub(); + + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + +SSAVec4i FixedFunction::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) { std::vector elements; diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 3bbf05abe..40236d233 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -6,6 +6,7 @@ #include "r_compiler/ssa/ssa_vec8s.h" #include "r_compiler/ssa/ssa_vec16ub.h" #include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_int_ptr.h" #include "r_compiler/ssa/ssa_short.h" #include "r_compiler/ssa/ssa_ubyte_ptr.h" #include "r_compiler/ssa/ssa_vec4f_ptr.h" @@ -57,8 +58,30 @@ struct RenderArgs uint32_t light; uint32_t srcalpha; uint32_t destalpha; - //ShadeConstants _shade_constants; - //int32_t nearest_filter; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + +class SSAShadeConstants +{ +public: + SSAVec4i light; + SSAVec4i fade; + SSAInt desaturate; }; class FixedFunction @@ -71,6 +94,24 @@ public: private: void CodegenDrawSpan(); + // LightBgra + SSAInt calc_light_multiplier(SSAInt light); + SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); + SSAVec4i shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors); + SSAVec4i shade_bgra_simple(SSAVec4i color, SSAInt light); + SSAVec4i shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants); + + // BlendBgra + SSAVec4i blend_copy(SSAVec4i fg); + SSAVec4i blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); + + // SampleBgra + SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); + SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); + static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); RenderProgram mProgram; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 80e07c8d4..d8e31276c 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -60,6 +60,16 @@ SSAInt SSAVec4i::operator[](SSAInt index) return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); } +SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::insert(int index, SSAInt value) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint())); +} + llvm::Type *SSAVec4i::llvm_type() { return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index d19f1d1aa..a654a87ae 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -19,6 +19,8 @@ public: explicit SSAVec4i(llvm::Value *v); SSAVec4i(SSAVec4f f32); SSAInt operator[](SSAInt index); + SSAVec4i insert(SSAInt index, SSAInt value); + SSAVec4i insert(int index, SSAInt value); static SSAVec4i bitcast(SSAVec4f f32); static SSAVec4i bitcast(SSAVec8s i16); static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); From 3aea3a0beedcfcedf793a5858dba93645da46bef Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 28 Sep 2016 18:49:39 +0200 Subject: [PATCH 07/15] Fully implemented codegen for DrawSpan --- .../fixedfunction/fixedfunction.cpp | 1310 +++-------------- src/r_compiler/fixedfunction/fixedfunction.h | 140 +- src/r_compiler/ssa/ssa_short.cpp | 5 + src/r_compiler/ssa/ssa_short.h | 2 + src/r_compiler/ssa/ssa_vec4i.cpp | 12 + src/r_compiler/ssa/ssa_vec4i.h | 1 + src/r_draw_rgba.cpp | 92 +- 7 files changed, 337 insertions(+), 1225 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index d70248864..cc53a069a 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -92,6 +92,8 @@ void *RenderProgram::PointerToFunction(const char *name) return mEngine->getPointerToFunction(function); } +///////////////////////////////////////////////////////////////////////////// + FixedFunction::FixedFunction() { CodegenDrawSpan(); @@ -110,84 +112,8 @@ void FixedFunction::CodegenDrawSpan() function.add_parameter(GetRenderArgsStruct(mProgram.context())); function.create_public(); - SSAStack stack_index, stack_xfrac, stack_yfrac; - - SSAValue args = function.parameter(0); - SSAUBytePtr destorg = args[0][0].load(); - SSAUBytePtr source = args[0][1].load(); - SSAInt destpitch = args[0][2].load(); - stack_xfrac.store(args[0][3].load()); - stack_yfrac.store(args[0][4].load()); - SSAInt xstep = args[0][5].load(); - SSAInt ystep = args[0][6].load(); - SSAInt x1 = args[0][7].load(); - SSAInt x2 = args[0][8].load(); - SSAInt y = args[0][9].load(); - SSAInt xbits = args[0][10].load(); - SSAInt ybits = args[0][11].load(); - SSAInt light = args[0][12].load(); - SSAInt srcalpha = args[0][13].load(); - SSAInt destalpha = args[0][14].load(); - - SSAInt count = x2 - x1 + 1; - SSAUBytePtr data = destorg[(x1 + y * destpitch) * 4]; - - SSAInt yshift = 32 - ybits; - SSAInt xshift = yshift - xbits; - SSAInt xmask = ((SSAInt(1) << xbits) - 1) << ybits; - //is_64x64 = xbits == 6 && ybits == 6; - - SSAInt sseLength = count / 4; - stack_index.store(0); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - loop.loop_block(index < sseLength); - - SSAVec4i colors[4]; - for (int i = 0; i < 4; i++) - { - SSAInt xfrac = stack_xfrac.load(); - SSAInt yfrac = stack_yfrac.load(); - - // 64x64 is the most common case by far, so special case it. - SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - colors[i] = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); - - stack_xfrac.store(xfrac + xstep); - stack_yfrac.store(yfrac + ystep); - } - - SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - data[index * 16].store_unaligned_vec16ub(ssecolors); - - stack_index.store(index + 1); - loop.end_block(); - } - - stack_index.store(sseLength * 4); - { - SSAForBlock loop; - SSAInt index = stack_index.load(); - loop.loop_block(index < count); - - SSAInt xfrac = stack_xfrac.load(); - SSAInt yfrac = stack_yfrac.load(); - - // 64x64 is the most common case by far, so special case it. - SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - SSAVec4i color = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light); - data[index * 4].store_vec4ub(color); - - stack_index.store(index + 1); - stack_xfrac.store(xfrac + xstep); - stack_yfrac.store(yfrac + ystep); - loop.end_block(); - } + DrawSpanCodegen codegen; + codegen.Generate(function.parameter(0)); builder.CreateRetVoid(); @@ -197,122 +123,6 @@ void FixedFunction::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } -SSAInt FixedFunction::calc_light_multiplier(SSAInt light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -SSAVec4i FixedFunction::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_simple(color, light); -} - -SSAVec4i FixedFunction::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_advanced(color, light, constants); -} - -SSAVec4i FixedFunction::shade_bgra_simple(SSAVec4i color, SSAInt light) -{ - color = color * light / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) -{ - SSAInt blue = color[0]; - SSAInt green = color[1]; - SSAInt red = color[2]; - SSAInt alpha = color[3]; - - SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - SSAVec4i inv_light = 256 - light; - SSAVec4i inv_desaturate = 256 - constants.desaturate; - - color = (color * inv_desaturate + intensity) / 256; - color = (constants.fade * inv_light + color * light) / 256; - color = (color * constants.light) / 256; - - return color.insert(3, alpha); -} - -SSAVec4i FixedFunction::blend_copy(SSAVec4i fg) -{ - return fg; -} - -SSAVec4i FixedFunction::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // // 255 -> 256 - SSAInt inv_alpha = 256 - alpha; - SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i FixedFunction::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; - SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0].load_vec4ub(); - SSAVec4i p01 = col0[y1].load_vec4ub(); - SSAVec4i p10 = col1[y0].load_vec4ub(); - SSAVec4i p11 = col1[y1].load_vec4ub(); - - SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i FixedFunction::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) -{ - SSAInt xshift = (32 - xbits); - SSAInt yshift = (32 - ybits); - SSAInt xmask = (SSAInt(1) << xshift) - 1; - SSAInt ymask = (SSAInt(1) << yshift) - 1; - SSAInt x = xfrac >> xbits; - SSAInt y = yfrac >> ybits; - - SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); - SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); - - SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; - SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) { std::vector elements; @@ -331,953 +141,297 @@ llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; return llvm::StructType::get(context, elements, false)->getPointerTo(); } -#if 0 +///////////////////////////////////////////////////////////////////////////// -GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) -: program(program), vertex_codegen(vertex_codegen), fragment_codegen(fragment_codegen) +void DrawSpanCodegen::Generate(SSAValue args) { -} + destorg = args[0][0].load(); + source = args[0][1].load(); + destpitch = args[0][2].load(); + stack_xfrac.store(args[0][3].load()); + stack_yfrac.store(args[0][4].load()); + xstep = args[0][5].load(); + ystep = args[0][6].load(); + x1 = args[0][7].load(); + x2 = args[0][8].load(); + y = args[0][9].load(); + xbits = args[0][10].load(); + ybits = args[0][11].load(); + light = args[0][12].load(); + srcalpha = args[0][13].load(); + destalpha = args[0][14].load(); + SSAShort light_alpha = args[0][15].load(); + SSAShort light_red = args[0][16].load(); + SSAShort light_green = args[0][17].load(); + SSAShort light_blue = args[0][18].load(); + SSAShort fade_alpha = args[0][19].load(); + SSAShort fade_red = args[0][20].load(); + SSAShort fade_green = args[0][21].load(); + SSAShort fade_blue = args[0][22].load(); + SSAShort desaturate = args[0][23].load(); + SSAInt flags = args[0][24].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); -llvm::Type *GlslFixedFunction::get_sampler_struct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt32Ty(context)); // width - elements.push_back(llvm::Type::getInt32Ty(context)); // height - elements.push_back(llvm::Type::getInt8PtrTy(context)); // data - return llvm::StructType::get(context, elements, false); -} + count = x2 - x1 + 1; + data = destorg[(x1 + y * destpitch) * 4]; -void GlslFixedFunction::codegen() -{ - codegen_render_scanline(5); - codegen_calc_window_positions(); - codegen_calc_polygon_face_direction(); - codegen_calc_polygon_y_range(); - codegen_update_polygon_edge(); - codegen_draw_triangles(5, 5); - codegen_texture(); - codegen_normalize(); - codegen_reflect(); - codegen_max(); - codegen_pow(); - codegen_dot(); - codegen_mix(); -} + yshift = 32 - ybits; + xshift = yshift - xbits; + xmask = ((SSAInt(1) << xbits) - 1) << ybits; -void GlslFixedFunction::codegen_texture() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); + // 64x64 is the most common case by far, so special case it. + is_64x64 = xbits == 6 && ybits == 6; + is_simple_shade = (flags & RenderArgs::simple_shade) == RenderArgs::simple_shade; + is_nearest_filter = (flags & RenderArgs::nearest_filter) == RenderArgs::nearest_filter; - SSAFunction function("fragment_texture"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(get_sampler_struct(program.context())); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAValue sampler_ptr = function.parameter(1); - SSAVec4f pos = function.parameter(2); - - SSAInt width = sampler_ptr[0][0].load(); - SSAInt height = sampler_ptr[0][1].load(); - SSAUBytePtr data = sampler_ptr[0][2].load(); - - SSAPixels4ub_argb_rev pixels(width, height, data); - //builder.CreateRet(pixels.linear_clamp4f(pos).v); - builder.CreateRet(pixels.linear_clamp4f(pos[0], pos[1]).v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_normalize() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_normalize"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAVec4f vec = function.parameter(1); - - // To do: this can probably be done a lot faster with _mm_rsqrt_ss - SSAVec4f vec2 = vec * vec; - SSAVec4f length3(SSAFloat::sqrt(vec2[0] + vec2[1] + vec2[2])); - SSAVec4f normalized = vec / length3; - builder.CreateRet(normalized.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_reflect() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_reflect"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAVec4f i = function.parameter(1); - SSAVec4f n = function.parameter(2); - - SSAVec4f c = i * n; - SSAFloat dot3 = c[0] + c[1] + c[2]; - SSAVec4f result = i - (2.0f * dot3) * n; - builder.CreateRet(result.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_max() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_max"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.create_private(); - - SSAFloat a = function.parameter(1); - SSAFloat b = function.parameter(2); - - SSAPhi phi; SSAIfBlock branch; - branch.if_block(a >= b); - phi.add_incoming(a); + branch.if_block(is_simple_shade); + LoopShade(true); branch.else_block(); - phi.add_incoming(b); + LoopShade(false); branch.end_block(); - SSAFloat c = phi.create(); - - builder.CreateRet(c.v); - llvm::verifyFunction(*function.func); } -void GlslFixedFunction::codegen_pow() +void DrawSpanCodegen::LoopShade(bool isSimpleShade) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_pow"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.create_private(); - - SSAFloat a = function.parameter(1); - SSAFloat b = function.parameter(2); - builder.CreateRet(a.v); - //builder.CreateRet(SSAFloat::pow(a, b).v); - - llvm::verifyFunction(*function.func); + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + LoopFilter(isSimpleShade, true); + branch.else_block(); + LoopFilter(isSimpleShade, false); + branch.end_block(); } -void GlslFixedFunction::codegen_dot() +void DrawSpanCodegen::LoopFilter(bool isSimpleShade, bool isNearestFilter) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_dot"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.create_private(); - - SSAVec4f a = function.parameter(1); - SSAVec4f b = function.parameter(2); - - SSAVec4f c = a * b; - SSAFloat dot3 = c[0] + c[1] + c[2]; - builder.CreateRet(dot3.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_mix() -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("fragment_mix"); - function.add_parameter(fragment_codegen.get_global_struct_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAVec4f::llvm_type()); - function.add_parameter(SSAFloat::llvm_type()); - function.create_private(); - - SSAVec4f v1 = function.parameter(1); - SSAVec4f v2 = function.parameter(2); - SSAFloat t = function.parameter(3); - - SSAVec4f b = t; - SSAVec4f a = 1.0f - b; - SSAVec4f mix = v1 * a + v2 * b; - builder.CreateRet(mix.v); - - llvm::verifyFunction(*function.func); -} - -void GlslFixedFunction::codegen_draw_triangles(int num_vertex_in, int num_vertex_out) -{ - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("draw_triangles"); - function.add_parameter(SSAInt::llvm_type()); // input_width - function.add_parameter(SSAInt::llvm_type()); // input_height - function.add_parameter(SSAUBytePtr::llvm_type()); // input_data - function.add_parameter(SSAInt::llvm_type()); // output_width - function.add_parameter(SSAInt::llvm_type()); // output_height - function.add_parameter(SSAUBytePtr::llvm_type()); // output_data - function.add_parameter(SSAInt::llvm_type()); // viewport_x - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_width - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAVec4fPtr::llvm_type()); // uniforms - function.add_parameter(SSAInt::llvm_type()); // first_vertex - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()->getPointerTo()); // vertex attributes - function.add_parameter(SSAInt::llvm_type()); // core - function.add_parameter(SSAInt::llvm_type()); // num_cores - function.create_public(); - - SSAInt input_width = function.parameter(0); - SSAInt input_height = function.parameter(1); - SSAUBytePtr input_data = function.parameter(2); - SSAInt output_width = function.parameter(3); - SSAInt output_height = function.parameter(4); - SSAUBytePtr output_data = function.parameter(5); - SSAInt viewport_x = function.parameter(6); - SSAInt viewport_y = function.parameter(7); - SSAInt viewport_width = function.parameter(8); - SSAInt viewport_height = function.parameter(9); - SSAVec4fPtr uniforms = function.parameter(10); - SSAInt first_vertex = function.parameter(11); - SSAInt num_vertices = function.parameter(12); - SSAValue vertex_in_ptr = function.parameter(13); - SSAInt core = function.parameter(14); - SSAInt num_cores = function.parameter(15); - - SSAStack stack_vertex_index; - SSAValue vertex_globals_ptr = SSAValue::from_llvm(SSAScope::alloca(vertex_codegen.get_global_struct_type())); - std::vector vertex_outs; - for (int i = 0; i < num_vertex_out; i++) - vertex_outs.push_back(SSAVec4fPtr::from_llvm(SSAScope::builder().CreateAlloca(SSAVec4f::llvm_type(), SSAInt(3).v))); - - int num_uniforms = 1; + SSAIfBlock branch; + branch.if_block(is_64x64); { - llvm::Type *type = llvm::ArrayType::get(llvm::VectorType::get(llvm::Type::getFloatTy(program.context()), 4), 4); - llvm::Value *matrix = llvm::UndefValue::get(type); - for (int col = 0; col < 4; col++) - { - SSAVec4f column = uniforms[col].load_unaligned(); - std::vector indexes; - indexes.push_back(col); - matrix = builder.CreateInsertValue(matrix, column.v, indexes); - } - vertex_globals_ptr[0][0].store(matrix); + SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, true); + Loop(sseLength * 4, isSimpleShade, isNearestFilter, true); } - - stack_vertex_index.store(0); - SSAForBlock loop; - SSAInt vertex_index = stack_vertex_index.load(); - loop.loop_block(vertex_index + 2 < num_vertices); - for (int v = 0; v < 3; v++) + branch.else_block(); { - for (int i = 0; i < num_vertex_in; i++) + SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, false); + Loop(sseLength * 4, isSimpleShade, isNearestFilter, false); + } + branch.end_block(); +} + +SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64) +{ + SSAInt sseLength = count / 4; + stack_index.store(0); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < sseLength); + + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) { - SSAValue attribute_ptr = vertex_in_ptr[i].load(); - SSAVec4f vertex_in = SSAVec4f::shuffle(SSAVec4fPtr(attribute_ptr)[first_vertex + vertex_index + v].load_unaligned(), 0, 1, 2, 3); - vertex_globals_ptr[0][num_uniforms + i].store(vertex_in.v); + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); + if (isSimpleShade) + colors[i] = shade_bgra_simple(fg, light); + else + colors[i] = shade_bgra_advanced(fg, light, shade_constants); + + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); } - SSAScope::builder().CreateCall(SSAScope::module()->getFunction((vertex_codegen.shader_prefix() + "main").c_str()), vertex_globals_ptr.v); - for (int i = 0; i < num_vertex_out; i++) + + SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(ssecolors); + + stack_index.store(index + 1); + loop.end_block(); + } + return sseLength; +} + +void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64) +{ + stack_index.store(start); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); + SSAVec4i color; + if (isSimpleShade) + color = shade_bgra_simple(fg, light); + else + color = shade_bgra_advanced(fg, light, shade_constants); + + data[index * 4].store_vec4ub(color); + + stack_index.store(index + 1); + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + loop.end_block(); + } +} + +SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64) +{ + if (isNearestFilter) + { + SSAInt spot; + if (is64x64) + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + else + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + return source[spot * 4].load_vec4ub(); + } + else + { + if (is64x64) { - vertex_outs[i][v].store(vertex_globals_ptr[0][num_uniforms + num_vertex_in + i].load()); + return sample_linear(source, xfrac, yfrac, 26, 26); + } + else + { + return sample_linear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); } } - - render_polygon(input_width, input_height, input_data, output_width, output_height, output_data, viewport_x, viewport_y, viewport_width, viewport_height, 3, vertex_outs, core, num_cores); - - stack_vertex_index.store(vertex_index + 3); - loop.end_block(); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); } -void GlslFixedFunction::codegen_calc_window_positions() +///////////////////////////////////////////////////////////////////////////// + +SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("calc_window_positions"); - function.add_parameter(SSAInt::llvm_type()); // viewport_x - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_width - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // gl_Position - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.create_private(); - SSAInt viewport_x = function.parameter(0); - SSAInt viewport_y = function.parameter(1); - SSAInt viewport_width = function.parameter(2); - SSAInt viewport_height = function.parameter(3); - SSAInt num_vertices = function.parameter(4); - SSAVec4fPtr clip_positions = function.parameter(5); - SSAVec4fPtr window_positions = function.parameter(6); - - SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); - SSAStack stack_transform_index; - stack_transform_index.store(0); - SSAForBlock loop_transform; - SSAInt transform_index = stack_transform_index.load(); - loop_transform.loop_block(transform_index < num_vertices); - { - SSAVec4f clip_pos = clip_positions[transform_index].load(); - SSAVec4f window_pos = viewport.clip_to_window(clip_pos); - window_positions[transform_index].store(window_pos); - - stack_transform_index.store(transform_index + 1); - } - loop_transform.end_block(); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + return 256 - (light >> (FRACBITS - 8)); } -void GlslFixedFunction::codegen_calc_polygon_face_direction() +SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("calc_polygon_face_direction"); - function.set_return_type(SSABool::llvm_type()); - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.create_private(); - SSAInt num_vertices = function.parameter(0); - SSAVec4fPtr window_positions = function.parameter(1); - - SSAStack stack_face_direction; - SSAStack stack_face_vertex_index; - stack_face_direction.store(0.0f); - stack_face_vertex_index.store(0); - SSAForBlock loop_face_direction; - SSAInt face_vertex_index = stack_face_vertex_index.load(); - loop_face_direction.loop_block(face_vertex_index < num_vertices); - { - SSAVec4f v0 = window_positions[face_vertex_index].load(); - SSAVec4f v1 = window_positions[(face_vertex_index + 1) % num_vertices].load(); - stack_face_direction.store(stack_face_direction.load() + v0[0] * v1[1] - v1[0] * v0[1]); - stack_face_vertex_index.store(face_vertex_index + 1); - } - loop_face_direction.end_block(); - SSABool front_facing_ccw = (stack_face_direction.load() >= 0.0f); - - builder.CreateRet(front_facing_ccw.v); - llvm::verifyFunction(*function.func); + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_simple(color, light); } -void GlslFixedFunction::codegen_calc_polygon_y_range() +SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("calc_polygon_y_range"); - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_start - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // out_y_end - function.create_private(); - SSAInt viewport_y = function.parameter(0); - SSAInt viewport_height = function.parameter(1); - SSAInt num_vertices = function.parameter(2); - SSAVec4fPtr window_positions = function.parameter(3); - SSAValue out_y_start = function.parameter(4); - SSAValue out_y_end = function.parameter(5); - - SSAStack y_start; - SSAStack y_end; - y_start.store(0x7fffffff); - y_end.store(0); - - SSAStack stack_minmax_index; - stack_minmax_index.store(0); - SSAForBlock loop_minmax; - SSAInt minmax_index = stack_minmax_index.load(); - loop_minmax.loop_block(minmax_index < num_vertices); - { - SSAInt y = SSAInt(window_positions[minmax_index].load()[1] + 0.5f); - y_start.store(ssa_min(y_start.load(), y)); - y_end.store(ssa_max(y_end.load(), y)); - stack_minmax_index.store(minmax_index + 1); - } - loop_minmax.end_block(); - - y_start.store(ssa_max(y_start.load(), viewport_y)); - y_end.store(ssa_min(y_end.load(), viewport_y + viewport_height)); - - out_y_start.store(y_start.load().v); - out_y_end.store(y_end.load().v); - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_advanced(color, light, constants); } -void GlslFixedFunction::codegen_update_polygon_edge() +SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("update_polygon_edge"); - function.add_parameter(SSAFloat::llvm_type()); // y_position - function.add_parameter(SSAInt::llvm_type()); // num_vertices - function.add_parameter(SSAVec4fPtr::llvm_type()); // window_pos - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout left_index - function.add_parameter(SSAInt::llvm_type()->getPointerTo()); // inout right_index - function.create_private(); - SSAFloat float_y = function.parameter(0); - SSAInt num_vertices = function.parameter(1); - SSAVec4fPtr window_positions = function.parameter(2); - SSAValue ptr_left_index = function.parameter(3); - SSAValue ptr_right_index = function.parameter(4); - - SSAStack max_iterate; - max_iterate.store(num_vertices); - SSAForBlock loop_left; - SSAInt left_index = ptr_left_index.load(); - SSAInt right_index = ptr_right_index.load(); - SSAInt next_left_index = (left_index + 1) % num_vertices; - SSAFloat left_y0 = window_positions[left_index].load()[1]; - SSAFloat left_y1 = window_positions[next_left_index].load()[1]; - SSABool in_range = (left_y0 >= float_y && left_y1 < float_y) || (left_y1 >= float_y && left_y0 < float_y); - loop_left.loop_block((left_index == right_index || !in_range) && max_iterate.load() > 0); - ptr_left_index.store(next_left_index.v); - max_iterate.store(max_iterate.load() - 1); - loop_left.end_block(); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + color = color * light / 256; + return color.insert(3, 255); } -void GlslFixedFunction::render_polygon( - SSAInt input_width, - SSAInt input_height, - SSAUBytePtr input_data, - SSAInt output_width, - SSAInt output_height, - SSAUBytePtr output_data, - SSAInt viewport_x, - SSAInt viewport_y, - SSAInt viewport_width, - SSAInt viewport_height, - SSAInt num_vertices, - std::vector fragment_ins, - SSAInt core, - SSAInt num_cores) +SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) { - SSAVec4fPtr window_positions = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), num_vertices)); - SSAVec4fPtr left_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); - SSAVec4fPtr right_line_varyings = SSAVec4fPtr::from_llvm(SSAScope::alloca(SSAVec4f::llvm_type(), fragment_ins.size())); + SSAInt blue = color[0]; + SSAInt green = color[1]; + SSAInt red = color[2]; + SSAInt alpha = color[3]; - /////////////////////////////////// + SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - llvm::Value *calc_window_positions_args[] = { viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, num_vertices.v, fragment_ins[0].v, window_positions.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_window_positions"), calc_window_positions_args); + SSAVec4i inv_light = 256 - light; + SSAVec4i inv_desaturate = 256 - constants.desaturate; - llvm::Value *calc_polygon_face_direction_args[] = { num_vertices.v, window_positions.v }; - SSABool front_facing_ccw = SSABool::from_llvm(SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_face_direction"), calc_polygon_face_direction_args)); + color = (color * inv_desaturate + intensity) / 256; + color = (constants.fade * inv_light + color * light) / 256; + color = (color * constants.light) / 256; - SSAIfBlock cull_if; - cull_if.if_block(front_facing_ccw); - { - SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); - - SSAStack y_start; - SSAStack y_end; - - llvm::Value *calc_polygon_y_range_args[] = { viewport_y.v, viewport_height.v, num_vertices.v, window_positions.v, y_start.v, y_end.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("calc_polygon_y_range"), calc_polygon_y_range_args); - - y_start.store((y_start.load() + num_cores - core - 1) / num_cores * num_cores + core); // find_first_line_for_core - - SSAStack stack_left_index; - SSAStack stack_right_index; - SSAStack stack_int_y; - stack_left_index.store(0); - stack_right_index.store(1); - stack_int_y.store(y_start.load()); - SSAForBlock scanlines_loop; - scanlines_loop.loop_block(stack_int_y.load() < y_end.load()); - { - SSAInt int_y = stack_int_y.load(); - SSAFloat float_y = SSAFloat(int_y) + 0.5f; - - llvm::Value *update_polygon_edge_args0[] = { float_y.v, num_vertices.v, window_positions.v, stack_left_index.v, stack_right_index.v }; - llvm::Value *update_polygon_edge_args1[] = { float_y.v, num_vertices.v, window_positions.v, stack_right_index.v, stack_left_index.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args0); - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("update_polygon_edge"), update_polygon_edge_args1); - - SSAInt left_index = stack_left_index.load(); - SSAInt right_index = stack_right_index.load(); - SSAInt next_left_index = (left_index + 1) % num_vertices; - SSAInt next_right_index = (right_index + 1) % num_vertices; - - SSABarycentricWeight left_weight(viewport, fragment_ins[0][left_index].load(), fragment_ins[0][next_left_index].load()); - SSABarycentricWeight right_weight(viewport, fragment_ins[0][right_index].load(), fragment_ins[0][next_right_index].load()); - - SSAFloat a = left_weight.from_window_y(int_y); - SSAFloat b = right_weight.from_window_y(int_y); - - SSAVec4f left_clip_pos = left_weight.v1 * a + left_weight.v2 * (1.0f - a); - SSAVec4f right_clip_pos = right_weight.v1 * b + right_weight.v2 * (1.0f - b); - - for (size_t i = 0; i + 1 < fragment_ins.size(); i++) - { - left_line_varyings[i].store(fragment_ins[i + 1][left_index].load() * a + fragment_ins[i + 1][next_left_index].load() * (1.0f - a)); - right_line_varyings[i].store(fragment_ins[i + 1][right_index].load() * b + fragment_ins[i + 1][next_right_index].load() * (1.0f - b)); - } - - llvm::Value *render_scanline_args[] = { output_width.v, output_height.v, output_data.v, viewport_x.v, viewport_y.v, viewport_width.v, viewport_height.v, int_y.v, left_clip_pos.v, right_clip_pos.v, left_line_varyings.v, right_line_varyings.v, input_width.v, input_height.v, input_data.v }; - SSAScope::builder().CreateCall(SSAScope::module()->getFunction("render_scanline"), render_scanline_args); - - stack_int_y.store(stack_int_y.load() + num_cores); - } - scanlines_loop.end_block(); - } - cull_if.end_block(); + return color.insert(3, alpha); } -void GlslFixedFunction::codegen_render_scanline(int num_varyings) +SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) { - llvm::IRBuilder<> builder(program.context()); - SSAScope ssa_scope(&program.context(), program.module(), &builder); - - SSAFunction function("render_scanline"); - function.add_parameter(SSAInt::llvm_type()); // output_width - function.add_parameter(SSAInt::llvm_type()); // output_height - function.add_parameter(SSAUBytePtr::llvm_type()); // output_data - function.add_parameter(SSAInt::llvm_type()); // viewport_x - function.add_parameter(SSAInt::llvm_type()); // viewport_y - function.add_parameter(SSAInt::llvm_type()); // viewport_width - function.add_parameter(SSAInt::llvm_type()); // viewport_height - function.add_parameter(SSAInt::llvm_type()); // y - function.add_parameter(SSAVec4f::llvm_type()); // left_clip_pos - function.add_parameter(SSAVec4f::llvm_type()); // right_clip_pos - function.add_parameter(SSAVec4fPtr::llvm_type()); // left_line_varyings - function.add_parameter(SSAVec4fPtr::llvm_type()); // right_line_varyings - function.add_parameter(SSAInt::llvm_type()); // input_width - function.add_parameter(SSAInt::llvm_type()); // input_height - function.add_parameter(SSAUBytePtr::llvm_type()); // input_data - function.create_private(); - SSAInt output_width = function.parameter(0); - SSAInt output_height = function.parameter(1); - SSAUBytePtr output_data = function.parameter(2); - SSAInt viewport_x = function.parameter(3); - SSAInt viewport_y = function.parameter(4); - SSAInt viewport_width = function.parameter(5); - SSAInt viewport_height = function.parameter(6); - SSAInt y = function.parameter(7); - SSAVec4f left_clip_pos = function.parameter(8); - SSAVec4f right_clip_pos = function.parameter(9); - SSAVec4fPtr left_line_varyings = function.parameter(10); - SSAVec4fPtr right_line_varyings = function.parameter(11); - SSAInt input_width = function.parameter(12); - SSAInt input_height = function.parameter(13); - SSAUBytePtr input_data = function.parameter(14); - - SSAViewport viewport(viewport_x, viewport_y, viewport_width, viewport_height); - - SSAScopeHint hint; - - SSAStack stack_x; - SSAStack stack_xnormalized; - - //////////////////////////////// - // Prepare to render scanline: - - hint.set("prepare"); - OuterData outer_data; - - SSAVec4f left_window_pos = viewport.clip_to_window(left_clip_pos); - SSAVec4f right_window_pos = viewport.clip_to_window(right_clip_pos); - - SSAFloat x0 = left_window_pos[0]; - SSAFloat x1 = right_window_pos[0]; - SSAInt start(ssa_min(x0, x1)); - SSAInt end(ssa_max(x1, x0) + 0.5f); - - start = ssa_max(start, viewport.x); - end = ssa_min(end, viewport.right); - - SSABarycentricWeight weight_scanline(viewport, left_clip_pos, right_clip_pos); - - outer_data.start = start; - outer_data.end = end; - outer_data.input_width = input_width; - outer_data.input_height = input_height; - outer_data.output_width = output_width; - outer_data.output_height = output_height; - outer_data.input_pixels = input_data; - outer_data.output_pixels_line = output_data[output_width * y * 4]; - - outer_data.viewport_x = SSAFloat(viewport.x); - outer_data.viewport_rcp_half_width = viewport.rcp_half_width; - outer_data.dx = weight_scanline.v2[0] - weight_scanline.v1[0]; - outer_data.dw = weight_scanline.v2[3] - weight_scanline.v1[3]; - outer_data.v1w = weight_scanline.v1[3]; - outer_data.v1x = weight_scanline.v1[0]; - outer_data.sse_left_varying_in = left_line_varyings; - outer_data.sse_right_varying_in = right_line_varyings; - outer_data.num_varyings = num_varyings; - - outer_data.sampler = SSAScope::alloca(get_sampler_struct(SSAScope::context())); - std::vector index_list; - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - llvm::Value *sampler_width_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); - index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1)); - llvm::Value *sampler_height_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); - index_list[1] = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2)); - llvm::Value *sampler_data_ptr = SSAScope::builder().CreateGEP(outer_data.sampler, index_list); - SSAScope::builder().CreateStore(outer_data.input_width.v, sampler_width_ptr, false); - SSAScope::builder().CreateStore(outer_data.input_height.v, sampler_height_ptr, false); - SSAScope::builder().CreateStore(outer_data.input_pixels.v, sampler_data_ptr, false); - - - SSAVec4i xposinit = SSAVec4i(outer_data.start) + SSAVec4i(0, 1, 2, 3); - stack_x.store(outer_data.start); - stack_xnormalized.store((SSAVec4f(xposinit) + 0.5f - outer_data.viewport_x) * outer_data.viewport_rcp_half_width - 1.0f); - - ///////////////////////////////////////////////////////////////////////// - // First pixels: - - hint.set("firstpixels"); - SSAIfBlock if_block; - if_block.if_block(outer_data.end - outer_data.start > 3); - process_first_pixels(outer_data, stack_x, stack_xnormalized); - if_block.end_block(); - - ///////////////////////////////////////////////////////////////////////// - // Start: for (SSAInt x = start; x < end; x += 4) - - hint.set("loopstart"); - - SSAForBlock for_block; - SSAInt x = stack_x.load(); - for_block.loop_block(x + 3 < outer_data.end); - - ///////////////////////////////////////////////////////////////////////// - // Loop body - { - SSAVec4f xnormalized = stack_xnormalized.load(); - - hint.set("blendload"); - SSAVec4i desti[4]; - SSAVec16ub dest_block = outer_data.output_pixels_line[x << 2].load_vec16ub(); - SSAVec4i::extend(dest_block, desti[0], desti[1], desti[2], desti[3]); - - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - - hint.set("blendstore"); - outer_data.output_pixels_line[x << 2].store_vec16ub(dest_block); - hint.clear(); - - xnormalized = xnormalized + 4.0f * outer_data.viewport_rcp_half_width; - stack_xnormalized.store(xnormalized); - } - ///////////////////////////////////////////////////////////////////////// - // End: for (SSAInt x = start; x < end; x += 4) - - hint.set("loopend"); - x = x + 4; - stack_x.store(x); - for_block.end_block(); - - ///////////////////////////////////////////////////////////////////////// - // Last pixels: - - hint.set("lastpixels"); - process_last_pixels(outer_data, stack_x, stack_xnormalized); - - builder.CreateRetVoid(); - llvm::verifyFunction(*function.func); + return fg; } -void GlslFixedFunction::process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { - SSAInt x = stack_x.load(); - SSAVec4f xnormalized = stack_xnormalized.load(); - SSAInt offset = x << 2; - - // Find how many pixels we have left until we 16 byte align: - llvm::Value *output_line_align = SSAScope::builder().CreatePtrToInt(outer_data.output_pixels_line.v, llvm::Type::getInt32Ty(SSAScope::context())); - output_line_align = SSAScope::builder().CreateAdd(output_line_align, offset.v); - SSAInt left = 4 - (SSAInt::from_llvm(SSAScope::builder().CreateURem(output_line_align, SSAInt(16).v)) >> 2); - - SSAIfBlock if_block0; - if_block0.if_block(left == 3); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - outer_data.output_pixels_line[offset + 8].load_vec4ub(), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); - - stack_x.store(x + 3); - stack_xnormalized.store(xnormalized + 3.0f * outer_data.viewport_rcp_half_width); - } - if_block0.else_block(); - { - SSAIfBlock if_block1; - if_block1.if_block(left == 2); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - - stack_x.store(x + 2); - stack_xnormalized.store(xnormalized + 2.0f * outer_data.viewport_rcp_half_width); - } - if_block1.else_block(); - { - SSAIfBlock if_block2; - if_block2.if_block(left == 1); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - - stack_x.store(x + 1); - stack_xnormalized.store(xnormalized + outer_data.viewport_rcp_half_width); - } - if_block2.end_block(); - } - if_block1.end_block(); - } - if_block0.end_block(); + SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); } -void GlslFixedFunction::process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized) +SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { - SSAInt x = stack_x.load(); - SSAVec4f xnormalized = stack_xnormalized.load(); - - SSAInt left = outer_data.end - x; - SSAInt offset = x << 2; - SSAIfBlock if_block0; - SSAIfBlock if_block1; - SSAIfBlock if_block2; - if_block0.if_block(left == 3); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - outer_data.output_pixels_line[offset + 8].load_vec4ub(), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - outer_data.output_pixels_line[offset + 8].store_vec4ub(dest[2]); - } - if_block0.else_block(); - if_block1.if_block(left == 2); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - outer_data.output_pixels_line[offset + 4].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - outer_data.output_pixels_line[offset + 4].store_vec4ub(dest[1]); - } - if_block1.else_block(); - if_block2.if_block(left == 1); - { - SSAVec4i dest[4] = - { - outer_data.output_pixels_line[offset].load_vec4ub(), - SSAVec4i(0), - SSAVec4i(0), - SSAVec4i(0) - }; - - // To do: do this in a less braindead way - SSAVec16ub dest_block(SSAVec8s(dest[0], dest[1]), SSAVec8s(dest[2], dest[3])); - SSAVec4f frag_colors[4]; - inner_block(outer_data, xnormalized, frag_colors); - blend(frag_colors, dest_block); - SSAVec4i::extend(dest_block, dest[0], dest[1], dest[2], dest[3]); - - outer_data.output_pixels_line[offset].store_vec4ub(dest[0]); - } - if_block2.end_block(); - if_block1.end_block(); - if_block0.end_block(); + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + return color.insert(3, 255); } -void GlslFixedFunction::inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *frag_color) +SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) { - SSAScopeHint hint; - hint.set("varying"); - SSAVec4f a = (xnormalized * data.v1w - data.v1x) * SSAVec4f::rcp(data.dx - xnormalized * data.dw); - SSAVec4f one_minus_a = 1.0f - a; - - llvm::Value *globals_ptr[4]; - for (int i = 0; i < 4; i++) - { - globals_ptr[i] = SSAScope::alloca(fragment_codegen.get_global_struct_type()); - - std::vector index_list; - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - llvm::Value *sampler_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); - SSAScope::builder().CreateStore(data.sampler, sampler_ptr, false); - - for (int j = 0; j < data.num_varyings; j++) - { - SSAVec4f field_value = - data.sse_left_varying_in[j].load() * SSAVec4f::shuffle(one_minus_a, i, i, i, i) + - data.sse_right_varying_in[j].load() * SSAVec4f::shuffle(a, i, i, i, i); - index_list.clear(); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)j+1))); - llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); - SSAScope::builder().CreateStore(field_value.v, field_ptr, false); - } - } - - hint.set("fragprogram"); - for (int i = 0; i < 4; i++) - { - SSAScope::builder().CreateCall(SSAScope::module()->getFunction((fragment_codegen.shader_prefix() + "main").c_str()), globals_ptr[i]); - - std::vector index_list; - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); - index_list.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)5))); - llvm::Value *field_ptr = SSAScope::builder().CreateGEP(globals_ptr[i], index_list); - frag_color[i] = SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(field_ptr, false)); - } + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + return color.insert(3, 255); } -/* -void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) + +SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) { - SSAVec4i desti[4]; - SSAVec4i::extend(dest, desti[0], desti[1], desti[2], desti[3]); - - // Pre-mulitiplied alpha blend: - for (int pixel_index = 0; pixel_index < 4; pixel_index++) - { - SSAVec4f src = SSAVec4f::shuffle(frag_color[pixel_index], 2, 1, 0, 3); - desti[pixel_index] = SSAVec4i(src * 255.0f); - SSAVec4f dest = SSAVec4f(desti[pixel_index]) * (1.0f / 255.0f); - SSAVec4f alpha = SSAVec4f::shuffle(dest, 3, 3, 3, 3); - SSAVec4f resultf = src + dest * (1.0f - alpha); - desti[pixel_index] = SSAVec4i(resultf * 255.0f); - } - - dest = SSAVec16ub(SSAVec8s(desti[0], desti[1]), SSAVec8s(desti[2], desti[3])); + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; + return color.insert(3, 255); } -*/ -void GlslFixedFunction::blend(SSAVec4f frag_color[4], SSAVec16ub &dest) + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) { - for (int i = 0; i < 4; i++) - frag_color[i] = SSAVec4f::shuffle(frag_color[i], 2, 1, 0, 3); + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; - // Pre-mulitiplied alpha blend: - SSAVec8s dest0 = SSAVec8s::extendlo(dest); - SSAVec8s dest1 = SSAVec8s::extendhi(dest); + SSAVec4i p00 = col0[y0].load_vec4ub(); + SSAVec4i p01 = col0[y1].load_vec4ub(); + SSAVec4i p10 = col1[y0].load_vec4ub(); + SSAVec4i p11 = col1[y1].load_vec4ub(); - SSAVec8s src0(SSAVec4i(frag_color[0] * 255.0f), SSAVec4i(frag_color[1] * 255.0f)); - SSAVec8s src1(SSAVec4i(frag_color[2] * 255.0f), SSAVec4i(frag_color[3] * 255.0f)); + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; - // Extract and duplicate alpha components: - SSAVec8s alpha0 = SSAVec8s::shuffle(src0, 3, 3, 3, 3, 7, 7, 7, 7); - SSAVec8s alpha1 = SSAVec8s::shuffle(src1, 3, 3, 3, 3, 7, 7, 7, 7); - - // Convert from 0-255 to 0-256 range: - alpha0 = SSAVec8s::max_sse2(alpha0, 255); - alpha1 = SSAVec8s::max_sse2(alpha1, 255); - alpha0 = alpha0 + (alpha0 >> 7); - alpha1 = alpha1 + (alpha1 >> 7); - - SSAVec8s result0 = src0 + ((dest0 * (256 - alpha0)) >> 8); - SSAVec8s result1 = src1 + ((dest1 * (256 - alpha1)) >> 8); - - dest = SSAVec16ub(result0, result1); + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; } -#endif +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); + SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 40236d233..d9b8f042e 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -1,6 +1,7 @@ #pragma once +#include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_vec4f.h" #include "r_compiler/ssa/ssa_vec4i.h" #include "r_compiler/ssa/ssa_vec8s.h" @@ -84,16 +85,9 @@ public: SSAInt desaturate; }; -class FixedFunction +class DrawerCodegen { public: - FixedFunction(); - - void(*DrawSpan)(const RenderArgs *) = nullptr; - -private: - void CodegenDrawSpan(); - // LightBgra SSAInt calc_light_multiplier(SSAInt light); SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); @@ -111,89 +105,57 @@ private: // SampleBgra SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); +}; + +class DrawSpanCodegen : public DrawerCodegen +{ +public: + void Generate(SSAValue args); + +private: + void LoopShade(bool isSimpleShade); + void LoopFilter(bool isSimpleShade, bool isNearestFilter); + SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64); + void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64); + SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + + SSAStack stack_index, stack_xfrac, stack_yfrac; + + SSAUBytePtr destorg; + SSAUBytePtr source; + SSAInt destpitch; + SSAInt xstep; + SSAInt ystep; + SSAInt x1; + SSAInt x2; + SSAInt y; + SSAInt xbits; + SSAInt ybits; + SSAInt light; + SSAInt srcalpha; + SSAInt destalpha; + SSAInt count; + SSAUBytePtr data; + SSAInt yshift; + SSAInt xshift; + SSAInt xmask; + SSABool is_64x64; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; +}; + +class FixedFunction +{ +public: + FixedFunction(); + + void(*DrawSpan)(const RenderArgs *) = nullptr; + +private: + void CodegenDrawSpan(); static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); RenderProgram mProgram; }; - -#if 0 - -class GlslProgram; -class GlslCodeGen; - -class GlslFixedFunction -{ -public: - GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen); - void codegen(); - static llvm::Type *get_sampler_struct(llvm::LLVMContext &context); - -private: - void codegen_draw_triangles(int num_vertex_in, int num_vertex_out); - void codegen_calc_window_positions(); - void codegen_calc_polygon_face_direction(); - void codegen_calc_polygon_y_range(); - void codegen_update_polygon_edge(); - void codegen_texture(); - void codegen_normalize(); - void codegen_reflect(); - void codegen_max(); - void codegen_pow(); - void codegen_dot(); - void codegen_mix(); - - struct OuterData - { - OuterData() : sampler() { } - - SSAInt start; - SSAInt end; - SSAInt input_width; - SSAInt input_height; - SSAInt output_width; - SSAInt output_height; - SSAUBytePtr input_pixels; - SSAUBytePtr output_pixels_line; - - SSAVec4fPtr sse_left_varying_in; - SSAVec4fPtr sse_right_varying_in; - int num_varyings; - SSAVec4f viewport_x; - SSAVec4f viewport_rcp_half_width; - SSAVec4f dx; - SSAVec4f dw; - SSAVec4f v1w; - SSAVec4f v1x; - - llvm::Value *sampler; - }; - - void render_polygon( - SSAInt input_width, - SSAInt input_height, - SSAUBytePtr input_data, - SSAInt output_width, - SSAInt output_height, - SSAUBytePtr output_data, - SSAInt viewport_x, - SSAInt viewport_y, - SSAInt viewport_width, - SSAInt viewport_height, - SSAInt num_vertices, - std::vector fragment_ins, - SSAInt core, - SSAInt num_cores); - - void codegen_render_scanline(int num_varyings); - void process_first_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); - void process_last_pixels(OuterData &outer_data, SSAStack &stack_x, SSAStack &stack_xnormalized); - void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors); - void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest); - - GlslProgram &program; - GlslCodeGen &vertex_codegen; - GlslCodeGen &fragment_codegen; -}; - -#endif diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp index fc8de9449..3fa59b688 100644 --- a/src/r_compiler/ssa/ssa_short.cpp +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -32,6 +32,11 @@ llvm::Type *SSAShort::llvm_type() return llvm::Type::getInt16Ty(SSAScope::context()); } +SSAInt SSAShort::zext_int() +{ + return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); +} + SSAShort operator+(const SSAShort &a, const SSAShort &b) { return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h index ae71a1336..932aafc0e 100644 --- a/src/r_compiler/ssa/ssa_short.h +++ b/src/r_compiler/ssa/ssa_short.h @@ -17,6 +17,8 @@ public: static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } static llvm::Type *llvm_type(); + SSAInt zext_int(); + llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index d8e31276c..1eed7b269 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -49,6 +49,18 @@ SSAVec4i::SSAVec4i(SSAInt i) v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); } +SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true))); + v = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint()); +} + SSAVec4i::SSAVec4i(SSAVec4f f32) : v(0) { diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index a654a87ae..c1c9140d7 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -16,6 +16,7 @@ public: SSAVec4i(int constant); SSAVec4i(int constant0, int constant1, int constant2, int constant3); SSAVec4i(SSAInt i); + SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3); explicit SSAVec4i(llvm::Value *v); SSAVec4i(SSAVec4f f32); SSAInt operator[](SSAInt index); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 975739095..665e6b84e 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -300,50 +300,43 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// -class DrawSpanFFCommand : public DrawerCommand +class DrawSpanLLVMCommand : public DrawerCommand { - fixed_t _xfrac; - fixed_t _yfrac; - fixed_t _xstep; - fixed_t _ystep; - int _x1; - int _x2; - int _y; - int _xbits; - int _ybits; - BYTE * RESTRICT _destorg; - - const uint32_t * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - bool _nearest_filter; - - uint32_t _srcalpha; - uint32_t _destalpha; - + RenderArgs args; FixedFunction *_ff; public: - DrawSpanFFCommand() + DrawSpanLLVMCommand() { - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _xstep = ds_xstep; - _ystep = ds_ystep; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xbits = ds_xbits; - _ybits = ds_ybits; - _destorg = dc_destorg; - - _source = (const uint32_t*)ds_source; - _light = LightBgra::calc_light_multiplier(ds_light); - _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); - - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); + args.xfrac = ds_xfrac; + args.yfrac = ds_yfrac; + args.xstep = ds_xstep; + args.ystep = ds_ystep; + args.x1 = ds_x1; + args.x2 = ds_x2; + args.y = ds_y; + args.xbits = ds_xbits; + args.ybits = ds_ybits; + args.destorg = (uint32_t*)dc_destorg; + args.destpitch = dc_pitch; + args.source = (const uint32_t*)ds_source; + args.light = LightBgra::calc_light_multiplier(ds_light); + args.light_red = ds_shade_constants.light_red; + args.light_green = ds_shade_constants.light_green; + args.light_blue = ds_shade_constants.light_blue; + args.light_alpha = ds_shade_constants.light_alpha; + args.fade_red = ds_shade_constants.fade_red; + args.fade_green = ds_shade_constants.fade_green; + args.fade_blue = ds_shade_constants.fade_blue; + args.fade_alpha = ds_shade_constants.fade_alpha; + args.desaturate = ds_shade_constants.desaturate; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (ds_shade_constants.simple_shade) + args.flags |= RenderArgs::simple_shade; + if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) + args.flags |= RenderArgs::nearest_filter; static FixedFunction ff; _ff = &ff; @@ -351,25 +344,8 @@ public: void Execute(DrawerThread *thread) override { - if (thread->skipped_by_thread(_y)) + if (thread->skipped_by_thread(args.y)) return; - - RenderArgs args; - args.destorg = (uint32_t *)_destorg; - args.source = _source; - args.destpitch = dc_pitch; - args.xfrac = _xfrac; - args.yfrac = _yfrac; - args.xstep = _xstep; - args.ystep = _ystep; - args.x1 = _x1; - args.x2 = _x2; - args.y = _y; - args.xbits = _xbits; - args.ybits = _ybits; - args.light = _light; - args.srcalpha = _srcalpha; - args.destalpha = _destalpha; _ff->DrawSpan(&args); } }; @@ -2777,7 +2753,7 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); /* #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); From bfa291b02f9242f41bfddb1d6a93994ce9b1b6c3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 02:10:14 +0200 Subject: [PATCH 08/15] Create LLVMDrawers class as the external interface to the drawers --- src/CMakeLists.txt | 1 + .../fixedfunction/fixedfunction.cpp | 145 ----------- src/r_compiler/fixedfunction/fixedfunction.h | 76 +----- src/r_compiler/llvmdrawers.cpp | 232 ++++++++++++++++++ src/r_compiler/llvmdrawers.h | 52 ++++ src/r_draw_rgba.cpp | 8 +- src/r_swrenderer.cpp | 11 + src/r_swrenderer.h | 3 + 8 files changed, 302 insertions(+), 226 deletions(-) create mode 100644 src/r_compiler/llvmdrawers.cpp create mode 100644 src/r_compiler/llvmdrawers.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4b81a24f4..508951510 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1426,6 +1426,7 @@ set (PCH_SOURCES fragglescript/t_spec.cpp fragglescript/t_variable.cpp fragglescript/t_cmd.cpp + r_compiler/llvmdrawers.cpp r_compiler/ssa/ssa_bool.cpp r_compiler/ssa/ssa_float.cpp r_compiler/ssa/ssa_float_ptr.cpp diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index cc53a069a..fffd2c885 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -11,151 +11,6 @@ #include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_barycentric_weight.h" -RenderProgram::RenderProgram() -{ - using namespace llvm; - - install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { - I_FatalError("LLVM fatal error: %s", reason.c_str()); - }); - - InitializeNativeTarget(); - InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetAsmParser(); - - std::string errorstring; - - std::string targetTriple = sys::getProcessTriple(); - std::string cpuName = sys::getHostCPUName(); - StringMap cpuFeatures; - sys::getHostCPUFeatures(cpuFeatures); - std::string cpuFeaturesStr; - for (const auto &it : cpuFeatures) - { - if (!cpuFeaturesStr.empty()) - cpuFeaturesStr.push_back(' '); - cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); - cpuFeaturesStr += it.getKey(); - } - - //Printf("LLVM target triple: %s\n", targetTriple.c_str()); - //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); - - const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); - if (!target) - I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); - - TargetOptions opt; - auto relocModel = Optional(Reloc::Static); - TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); - if (!machine) - I_FatalError("Could not create LLVM target machine"); - - mContext = std::make_unique(); - - auto moduleOwner = std::make_unique("render", context()); - mModule = moduleOwner.get(); - mModule->setTargetTriple(targetTriple); - mModule->setDataLayout(machine->createDataLayout()); - - EngineBuilder engineBuilder(std::move(moduleOwner)); - engineBuilder.setErrorStr(&errorstring); - engineBuilder.setOptLevel(CodeGenOpt::Aggressive); - engineBuilder.setRelocationModel(Reloc::Static); - engineBuilder.setEngineKind(EngineKind::JIT); - mEngine.reset(engineBuilder.create(machine)); - if (!mEngine) - I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); - - mModulePassManager = std::make_unique(); - mFunctionPassManager = std::make_unique(mModule); - - PassManagerBuilder passManagerBuilder; - passManagerBuilder.OptLevel = 3; - passManagerBuilder.SizeLevel = 0; - passManagerBuilder.Inliner = createFunctionInliningPass(); - passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); - passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); -} - -RenderProgram::~RenderProgram() -{ - mEngine.reset(); - mContext.reset(); -} - -void *RenderProgram::PointerToFunction(const char *name) -{ - llvm::Function *function = mModule->getFunction(name); - if (!function) - return nullptr; - return mEngine->getPointerToFunction(function); -} - -///////////////////////////////////////////////////////////////////////////// - -FixedFunction::FixedFunction() -{ - CodegenDrawSpan(); - mProgram.engine()->finalizeObject(); - mProgram.modulePassManager()->run(*mProgram.module()); - - DrawSpan = mProgram.GetProcAddress("DrawSpan"); -} - -void FixedFunction::CodegenDrawSpan() -{ - llvm::IRBuilder<> builder(mProgram.context()); - SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - - SSAFunction function("DrawSpan"); - function.add_parameter(GetRenderArgsStruct(mProgram.context())); - function.create_public(); - - DrawSpanCodegen codegen; - codegen.Generate(function.parameter(0)); - - builder.CreateRetVoid(); - - if (llvm::verifyFunction(*function.func)) - I_FatalError("verifyFunction failed for " __FUNCTION__); - - mProgram.functionPassManager()->run(*function.func); -} - -llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) -{ - std::vector elements; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; - elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; - elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; - elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; - return llvm::StructType::get(context, elements, false)->getPointerTo(); -} - -///////////////////////////////////////////////////////////////////////////// - void DrawSpanCodegen::Generate(SSAValue args) { destorg = args[0][0].load(); diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index d9b8f042e..4b5bfc8b7 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -1,6 +1,7 @@ #pragma once +#include "r_compiler/llvmdrawers.h" #include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_vec4f.h" #include "r_compiler/ssa/ssa_vec4i.h" @@ -17,66 +18,6 @@ #include "r_compiler/ssa/ssa_barycentric_weight.h" #include "r_compiler/llvm_include.h" -class RenderProgram -{ -public: - RenderProgram(); - ~RenderProgram(); - - template - Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } - - llvm::LLVMContext &context() { return *mContext; } - llvm::Module *module() { return mModule; } - llvm::ExecutionEngine *engine() { return mEngine.get(); } - llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } - llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } - -private: - void *PointerToFunction(const char *name); - - std::unique_ptr mContext; - llvm::Module *mModule; - std::unique_ptr mEngine; - std::unique_ptr mModulePassManager; - std::unique_ptr mFunctionPassManager; -}; - -struct RenderArgs -{ - uint32_t *destorg; - const uint32_t *source; - int32_t destpitch; - int32_t xfrac; - int32_t yfrac; - int32_t xstep; - int32_t ystep; - int32_t x1; - int32_t x2; - int32_t y; - int32_t xbits; - int32_t ybits; - uint32_t light; - uint32_t srcalpha; - uint32_t destalpha; - - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - uint32_t flags; - enum Flags - { - simple_shade = 1, - nearest_filter = 2 - }; -}; - class SSAShadeConstants { public: @@ -144,18 +85,3 @@ private: SSABool is_nearest_filter; SSAShadeConstants shade_constants; }; - -class FixedFunction -{ -public: - FixedFunction(); - - void(*DrawSpan)(const RenderArgs *) = nullptr; - -private: - void CodegenDrawSpan(); - - static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); - - RenderProgram mProgram; -}; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp new file mode 100644 index 000000000..408213707 --- /dev/null +++ b/src/r_compiler/llvmdrawers.cpp @@ -0,0 +1,232 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" + +class LLVMProgram +{ +public: + LLVMProgram(); + ~LLVMProgram(); + + void StopLogFatalErrors(); + + template + Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule; } + llvm::ExecutionEngine *engine() { return mEngine.get(); } + llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } + llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } + +private: + void *PointerToFunction(const char *name); + + std::unique_ptr mContext; + llvm::Module *mModule; + std::unique_ptr mEngine; + std::unique_ptr mModulePassManager; + std::unique_ptr mFunctionPassManager; +}; + +class LLVMDrawersImpl : public LLVMDrawers +{ +public: + LLVMDrawersImpl(); + +private: + void CodegenDrawSpan(); + static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); + + LLVMProgram mProgram; +}; + +///////////////////////////////////////////////////////////////////////////// + +LLVMDrawers *LLVMDrawers::Singleton = nullptr; + +void LLVMDrawers::Create() +{ + if (!Singleton) + Singleton = new LLVMDrawersImpl(); +} + +void LLVMDrawers::Destroy() +{ + delete Singleton; + Singleton = nullptr; +} + +LLVMDrawers *LLVMDrawers::Instance() +{ + return Singleton; +} + +///////////////////////////////////////////////////////////////////////////// + +LLVMDrawersImpl::LLVMDrawersImpl() +{ + CodegenDrawSpan(); + mProgram.engine()->finalizeObject(); + mProgram.modulePassManager()->run(*mProgram.module()); + + DrawSpan = mProgram.GetProcAddress("DrawSpan"); + + mProgram.StopLogFatalErrors(); +} + +void LLVMDrawersImpl::CodegenDrawSpan() +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function("DrawSpan"); + function.add_parameter(GetRenderArgsStruct(mProgram.context())); + function.create_public(); + + DrawSpanCodegen codegen; + codegen.Generate(function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + +llvm::Type *LLVMDrawersImpl::GetRenderArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + +///////////////////////////////////////////////////////////////////////////// + +namespace { static bool LogFatalErrors = false; } + +LLVMProgram::LLVMProgram() +{ + using namespace llvm; + + // We have to extra careful about this because both LLVM and ZDoom made + // the very unwise decision to hook atexit. To top it off, LLVM decided + // to log something in the atexit handler.. + LogFatalErrors = true; + + install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + if (LogFatalErrors) + I_FatalError("LLVM fatal error: %s", reason.c_str()); + }); + + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + std::string errorstring; + + std::string targetTriple = sys::getProcessTriple(); + std::string cpuName = sys::getHostCPUName(); + StringMap cpuFeatures; + sys::getHostCPUFeatures(cpuFeatures); + std::string cpuFeaturesStr; + for (const auto &it : cpuFeatures) + { + if (!cpuFeaturesStr.empty()) + cpuFeaturesStr.push_back(' '); + cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); + cpuFeaturesStr += it.getKey(); + } + + //Printf("LLVM target triple: %s\n", targetTriple.c_str()); + //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + + const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); + if (!target) + I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); + + TargetOptions opt; + auto relocModel = Optional(Reloc::Static); + TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); + if (!machine) + I_FatalError("Could not create LLVM target machine"); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + mModule->setTargetTriple(targetTriple); + mModule->setDataLayout(machine->createDataLayout()); + + EngineBuilder engineBuilder(std::move(moduleOwner)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(Reloc::Static); + engineBuilder.setEngineKind(EngineKind::JIT); + mEngine.reset(engineBuilder.create(machine)); + if (!mEngine) + I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); + + mModulePassManager = std::make_unique(); + mFunctionPassManager = std::make_unique(mModule); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); + passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); +} + +LLVMProgram::~LLVMProgram() +{ + mEngine.reset(); + mContext.reset(); +} + +void *LLVMProgram::PointerToFunction(const char *name) +{ + llvm::Function *function = mModule->getFunction(name); + if (!function) + return nullptr; + return mEngine->getPointerToFunction(function); +} + +void LLVMProgram::StopLogFatalErrors() +{ + LogFatalErrors = false; +} diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h new file mode 100644 index 000000000..2ad6c3d52 --- /dev/null +++ b/src/r_compiler/llvmdrawers.h @@ -0,0 +1,52 @@ + +#pragma once + +struct RenderArgs +{ + uint32_t *destorg; + const uint32_t *source; + int32_t destpitch; + int32_t xfrac; + int32_t yfrac; + int32_t xstep; + int32_t ystep; + int32_t x1; + int32_t x2; + int32_t y; + int32_t xbits; + int32_t ybits; + uint32_t light; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + +class LLVMDrawers +{ +public: + virtual ~LLVMDrawers() { } + + static void Create(); + static void Destroy(); + static LLVMDrawers *Instance(); + + void(*DrawSpan)(const RenderArgs *) = nullptr; + +private: + static LLVMDrawers *Singleton; +}; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 665e6b84e..9c2cd6293 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -38,7 +38,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" -#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/llvmdrawers.h" #include "gi.h" #include "stats.h" @@ -303,7 +303,6 @@ void DrawerCommandQueue::StopThreads() class DrawSpanLLVMCommand : public DrawerCommand { RenderArgs args; - FixedFunction *_ff; public: DrawSpanLLVMCommand() @@ -337,16 +336,13 @@ public: args.flags |= RenderArgs::simple_shade; if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) args.flags |= RenderArgs::nearest_filter; - - static FixedFunction ff; - _ff = &ff; } void Execute(DrawerThread *thread) override { if (thread->skipped_by_thread(args.y)) return; - _ff->DrawSpan(&args); + LLVMDrawers::Instance()->DrawSpan(&args); } }; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5be847502..b9a9ea7fd 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -43,6 +43,7 @@ #include "textures/textures.h" #include "r_data/voxels.h" #include "r_draw_rgba.h" +#include "r_compiler/llvmdrawers.h" EXTERN_CVAR(Bool, r_shadercolormaps) @@ -51,6 +52,16 @@ void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +FSoftwareRenderer::FSoftwareRenderer() +{ + LLVMDrawers::Create(); +} + +FSoftwareRenderer::~FSoftwareRenderer() +{ + LLVMDrawers::Destroy(); +} + //========================================================================== // // DCanvas :: Init diff --git a/src/r_swrenderer.h b/src/r_swrenderer.h index f9d5609a0..fc3ec2551 100644 --- a/src/r_swrenderer.h +++ b/src/r_swrenderer.h @@ -5,6 +5,9 @@ struct FSoftwareRenderer : public FRenderer { + FSoftwareRenderer(); + ~FSoftwareRenderer(); + // Can be overridden so that the colormaps for sector color/fade won't be built. virtual bool UsesColormap() const override; From efd22346d8ec29dace77fe3b788c59f98b4ab340 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 02:26:36 +0200 Subject: [PATCH 09/15] Fix linear sampling bug --- src/r_compiler/fixedfunction/fixedfunction.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index fffd2c885..c205bc45e 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -256,10 +256,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt SSAInt y0 = frac_y0 >> FRACBITS; SSAInt y1 = frac_y1 >> FRACBITS; - SSAVec4i p00 = col0[y0].load_vec4ub(); - SSAVec4i p01 = col0[y1].load_vec4ub(); - SSAVec4i p10 = col1[y0].load_vec4ub(); - SSAVec4i p11 = col1[y1].load_vec4ub(); + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); SSAInt inv_b = texturefracx; SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; @@ -278,10 +278,10 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt SSAInt x = xfrac >> xbits; SSAInt y = yfrac >> ybits; - SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub(); - SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); - SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub(); + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; From e5f3c119cdf4c547be763ceacedf629752938014 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 04:01:42 +0200 Subject: [PATCH 10/15] Codegen all DrawSpan variants --- .../fixedfunction/fixedfunction.cpp | 94 +++++++++++++------ src/r_compiler/fixedfunction/fixedfunction.h | 25 ++++- src/r_compiler/llvmdrawers.cpp | 29 ++++-- src/r_compiler/llvmdrawers.h | 9 +- src/r_draw_rgba.cpp | 87 +++++++++++++++-- 5 files changed, 193 insertions(+), 51 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index c205bc45e..fc5402a42 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -11,7 +11,7 @@ #include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_barycentric_weight.h" -void DrawSpanCodegen::Generate(SSAValue args) +void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) { destorg = args[0][0].load(); source = args[0][1].load(); @@ -51,44 +51,44 @@ void DrawSpanCodegen::Generate(SSAValue args) // 64x64 is the most common case by far, so special case it. is_64x64 = xbits == 6 && ybits == 6; - is_simple_shade = (flags & RenderArgs::simple_shade) == RenderArgs::simple_shade; - is_nearest_filter = (flags & RenderArgs::nearest_filter) == RenderArgs::nearest_filter; + is_simple_shade = (flags & DrawSpanArgs::simple_shade) == DrawSpanArgs::simple_shade; + is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == DrawSpanArgs::nearest_filter; SSAIfBlock branch; branch.if_block(is_simple_shade); - LoopShade(true); + LoopShade(variant, true); branch.else_block(); - LoopShade(false); + LoopShade(variant, false); branch.end_block(); } -void DrawSpanCodegen::LoopShade(bool isSimpleShade) +void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade) { SSAIfBlock branch; branch.if_block(is_nearest_filter); - LoopFilter(isSimpleShade, true); + LoopFilter(variant, isSimpleShade, true); branch.else_block(); - LoopFilter(isSimpleShade, false); + LoopFilter(variant, isSimpleShade, false); branch.end_block(); } -void DrawSpanCodegen::LoopFilter(bool isSimpleShade, bool isNearestFilter) +void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter) { SSAIfBlock branch; branch.if_block(is_64x64); { - SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, true); - Loop(sseLength * 4, isSimpleShade, isNearestFilter, true); + SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, true); + Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, true); } branch.else_block(); { - SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, false); - Loop(sseLength * 4, isSimpleShade, isNearestFilter, false); + SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, false); + Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, false); } branch.end_block(); } -SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64) +SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) { SSAInt sseLength = count / 4; stack_index.store(0); @@ -97,24 +97,31 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is SSAInt index = stack_index.load(); loop.loop_block(index < sseLength); + SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + SSAVec4i bgcolors[4] = + { + SSAVec4i::extendlo(bg0), + SSAVec4i::extendhi(bg0), + SSAVec4i::extendlo(bg1), + SSAVec4i::extendhi(bg1) + }; + SSAVec4i colors[4]; for (int i = 0; i < 4; i++) { SSAInt xfrac = stack_xfrac.load(); SSAInt yfrac = stack_yfrac.load(); - SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); - if (isSimpleShade) - colors[i] = shade_bgra_simple(fg, light); - else - colors[i] = shade_bgra_advanced(fg, light, shade_constants); + colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant); stack_xfrac.store(xfrac + xstep); stack_yfrac.store(yfrac + ystep); } - SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - data[index * 16].store_unaligned_vec16ub(ssecolors); + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(color); stack_index.store(index + 1); loop.end_block(); @@ -122,7 +129,7 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is return sseLength; } -void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64) +void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) { stack_index.store(start); { @@ -133,13 +140,8 @@ void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilte SSAInt xfrac = stack_xfrac.load(); SSAInt yfrac = stack_yfrac.load(); - SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); - SSAVec4i color; - if (isSimpleShade) - color = shade_bgra_simple(fg, light); - else - color = shade_bgra_advanced(fg, light, shade_constants); - + SSAVec4i bgcolor = data[index * 4].load_vec4ub(); + SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant); data[index * 4].store_vec4ub(color); stack_index.store(index + 1); @@ -173,6 +175,32 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte } } +SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light); + else + return shade_bgra_advanced(fg, light, shade_constants); +} + +SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant) +{ + switch (variant) + { + default: + case DrawSpanVariant::Opaque: + return blend_copy(fg); + case DrawSpanVariant::Masked: + return blend_alpha_blend(fg, bg); + case DrawSpanVariant::Translucent: + case DrawSpanVariant::AddClamp: + return blend_add(fg, bg, srcalpha, destalpha); + case DrawSpanVariant::MaskedTranslucent: + case DrawSpanVariant::MaskedAddClamp: + return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + } +} + ///////////////////////////////////////////////////////////////////////////// SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) @@ -249,6 +277,14 @@ SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) return color.insert(3, 255); } +SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); + SSAInt inv_alpha = 256 - alpha; + return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; +} + SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) { SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 4b5bfc8b7..1c58740d5 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -43,22 +43,37 @@ public: SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); + // Calculates the final alpha values to be used when combined with the source texture alpha channel + SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); + // SampleBgra SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); }; +enum class DrawSpanVariant +{ + Opaque, + Masked, + Translucent, + MaskedTranslucent, + AddClamp, + MaskedAddClamp +}; + class DrawSpanCodegen : public DrawerCodegen { public: - void Generate(SSAValue args); + void Generate(DrawSpanVariant variant, SSAValue args); private: - void LoopShade(bool isSimpleShade); - void LoopFilter(bool isSimpleShade, bool isNearestFilter); - SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64); - void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64); + void LoopShade(DrawSpanVariant variant, bool isSimpleShade); + void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); SSAStack stack_index, stack_xfrac, stack_yfrac; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 408213707..fb4a6d023 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -44,8 +44,8 @@ public: LLVMDrawersImpl(); private: - void CodegenDrawSpan(); - static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); + void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -75,26 +75,37 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - CodegenDrawSpan(); + CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); + CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); + CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); + CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); + CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); + CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); - DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); + DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); + DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); + DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); + DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); mProgram.StopLogFatalErrors(); } -void LLVMDrawersImpl::CodegenDrawSpan() +void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); - SSAFunction function("DrawSpan"); - function.add_parameter(GetRenderArgsStruct(mProgram.context())); + SSAFunction function(name); + function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); function.create_public(); DrawSpanCodegen codegen; - codegen.Generate(function.parameter(0)); + codegen.Generate(variant, function.parameter(0)); builder.CreateRetVoid(); @@ -104,7 +115,7 @@ void LLVMDrawersImpl::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } -llvm::Type *LLVMDrawersImpl::GetRenderArgsStruct(llvm::LLVMContext &context) +llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 2ad6c3d52..53e64032f 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,7 +1,7 @@ #pragma once -struct RenderArgs +struct DrawSpanArgs { uint32_t *destorg; const uint32_t *source; @@ -45,7 +45,12 @@ public: static void Destroy(); static LLVMDrawers *Instance(); - void(*DrawSpan)(const RenderArgs *) = nullptr; + void(*DrawSpan)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMaskedTranslucent)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; private: static LLVMDrawers *Singleton; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9c2cd6293..8a0a6871a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -302,7 +302,8 @@ void DrawerCommandQueue::StopThreads() class DrawSpanLLVMCommand : public DrawerCommand { - RenderArgs args; +protected: + DrawSpanArgs args; public: DrawSpanLLVMCommand() @@ -333,9 +334,9 @@ public: args.destalpha = dc_destalpha >> (FRACBITS - 8); args.flags = 0; if (ds_shade_constants.simple_shade) - args.flags |= RenderArgs::simple_shade; + args.flags |= DrawSpanArgs::simple_shade; if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) - args.flags |= RenderArgs::nearest_filter; + args.flags |= DrawSpanArgs::nearest_filter; } void Execute(DrawerThread *thread) override @@ -346,6 +347,61 @@ public: } }; +class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMasked(&args); + } +}; + +class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanTranslucent(&args); + } +}; + +class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMaskedTranslucent(&args); + } +}; + +class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanAddClamp(&args); + } +}; + +class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMaskedAddClamp(&args); + } +}; + ///////////////////////////////////////////////////////////////////////////// class DrawerColumnCommand : public DrawerCommand @@ -2749,39 +2805,58 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { +#if !defined(NO_LLVM) DrawerCommandQueue::QueueCommand(); -/* -#ifdef NO_SSE +#elif defined(NO_SSE) DrawerCommandQueue::QueueCommand(); #else DrawerCommandQueue::QueueCommand(); #endif -*/ } void R_DrawSpanMasked_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanTranslucent_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMaskedTranslucent_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanAddClamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMaskedAddClamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_FillSpan_rgba() From 7be25112699a71f0c504607ec90e154bb4fa3a52 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 05:21:43 +0200 Subject: [PATCH 11/15] Add codegen files for walls and columns --- src/CMakeLists.txt | 5 +- .../fixedfunction/drawcolumncodegen.cpp | 15 ++ .../fixedfunction/drawcolumncodegen.h | 26 ++++ .../fixedfunction/drawercodegen.cpp | 135 ++++++++++++++++++ .../{fixedfunction.h => drawercodegen.h} | 50 ------- ...{fixedfunction.cpp => drawspancodegen.cpp} | 129 +---------------- .../fixedfunction/drawspancodegen.h | 54 +++++++ .../fixedfunction/drawwallcodegen.cpp | 15 ++ .../fixedfunction/drawwallcodegen.h | 26 ++++ src/r_compiler/llvmdrawers.cpp | 4 +- 10 files changed, 279 insertions(+), 180 deletions(-) create mode 100644 src/r_compiler/fixedfunction/drawcolumncodegen.cpp create mode 100644 src/r_compiler/fixedfunction/drawcolumncodegen.h create mode 100644 src/r_compiler/fixedfunction/drawercodegen.cpp rename src/r_compiler/fixedfunction/{fixedfunction.h => drawercodegen.h} (61%) rename src/r_compiler/fixedfunction/{fixedfunction.cpp => drawspancodegen.cpp} (57%) create mode 100644 src/r_compiler/fixedfunction/drawspancodegen.h create mode 100644 src/r_compiler/fixedfunction/drawwallcodegen.cpp create mode 100644 src/r_compiler/fixedfunction/drawwallcodegen.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 508951510..41829b996 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1447,7 +1447,10 @@ set (PCH_SOURCES r_compiler/ssa/ssa_vec4i_ptr.cpp r_compiler/ssa/ssa_vec8s.cpp r_compiler/ssa/ssa_vec16ub.cpp - r_compiler/fixedfunction/fixedfunction.cpp + r_compiler/fixedfunction/drawercodegen.cpp + r_compiler/fixedfunction/drawspancodegen.cpp + r_compiler/fixedfunction/drawwallcodegen.cpp + r_compiler/fixedfunction/drawcolumncodegen.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp new file mode 100644 index 000000000..4594e2290 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -0,0 +1,15 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/drawcolumncodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args) +{ +} diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h new file mode 100644 index 000000000..0749def7f --- /dev/null +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -0,0 +1,26 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawColumnVariant +{ + Opaque, + Fuzz, + Add, + Translated, + TlatedAdd, + Shaded, + AddClamp, + AddClampTranslated, + SubClamp, + SubClampTranslated, + RevSubClamp, + RevSubClampTranslated +}; + +class DrawColumnCodegen : public DrawerCodegen +{ +public: + void Generate(DrawColumnVariant variant, SSAValue args); +}; diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp new file mode 100644 index 000000000..5da858e27 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -0,0 +1,135 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/drawercodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_simple(color, light); +} + +SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_advanced(color, light, constants); +} + +SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) +{ + color = color * light / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) +{ + SSAInt blue = color[0]; + SSAInt green = color[1]; + SSAInt red = color[2]; + SSAInt alpha = color[3]; + + SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + SSAVec4i inv_light = 256 - light; + SSAVec4i inv_desaturate = 256 - constants.desaturate; + + color = (color * inv_desaturate + intensity) / 256; + color = (constants.fade * inv_light + color * light) / 256; + color = (color * constants.light) / 256; + + return color.insert(3, alpha); +} + +SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) +{ + return fg; +} + +SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; + return color.insert(3, 255); +} + +SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); + SSAInt inv_alpha = 256 - alpha; + return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; +} + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); + + SSAInt inv_b = texturefracx; + SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/drawercodegen.h similarity index 61% rename from src/r_compiler/fixedfunction/fixedfunction.h rename to src/r_compiler/fixedfunction/drawercodegen.h index 1c58740d5..9e0706ed1 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -50,53 +50,3 @@ public: SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); }; - -enum class DrawSpanVariant -{ - Opaque, - Masked, - Translucent, - MaskedTranslucent, - AddClamp, - MaskedAddClamp -}; - -class DrawSpanCodegen : public DrawerCodegen -{ -public: - void Generate(DrawSpanVariant variant, SSAValue args); - -private: - void LoopShade(DrawSpanVariant variant, bool isSimpleShade); - void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); - SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); - void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); - SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); - SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); - SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); - - SSAStack stack_index, stack_xfrac, stack_yfrac; - - SSAUBytePtr destorg; - SSAUBytePtr source; - SSAInt destpitch; - SSAInt xstep; - SSAInt ystep; - SSAInt x1; - SSAInt x2; - SSAInt y; - SSAInt xbits; - SSAInt ybits; - SSAInt light; - SSAInt srcalpha; - SSAInt destalpha; - SSAInt count; - SSAUBytePtr data; - SSAInt yshift; - SSAInt xshift; - SSAInt xmask; - SSABool is_64x64; - SSABool is_simple_shade; - SSABool is_nearest_filter; - SSAShadeConstants shade_constants; -}; diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp similarity index 57% rename from src/r_compiler/fixedfunction/fixedfunction.cpp rename to src/r_compiler/fixedfunction/drawspancodegen.cpp index fc5402a42..1623c38f2 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -1,6 +1,6 @@ #include "i_system.h" -#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/fixedfunction/drawspancodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" #include "r_compiler/ssa/ssa_for_block.h" @@ -9,7 +9,6 @@ #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -#include "r_compiler/ssa/ssa_barycentric_weight.h" void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) { @@ -200,129 +199,3 @@ SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant varian return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); } } - -///////////////////////////////////////////////////////////////////////////// - -SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_simple(color, light); -} - -SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) -{ - SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; - return shade_bgra_advanced(color, light, constants); -} - -SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) -{ - color = color * light / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) -{ - SSAInt blue = color[0]; - SSAInt green = color[1]; - SSAInt red = color[2]; - SSAInt alpha = color[3]; - - SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - SSAVec4i inv_light = 256 - light; - SSAVec4i inv_desaturate = 256 - constants.desaturate; - - color = (color * inv_desaturate + intensity) / 256; - color = (constants.fade * inv_light + color * light) / 256; - color = (color * constants.light) / 256; - - return color.insert(3, alpha); -} - -SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) -{ - return fg; -} - -SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) -{ - SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; - return color.insert(3, 255); -} - -SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); // // 255 -> 256 - SSAInt inv_alpha = 256 - alpha; - SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; - return color.insert(3, 255); -} - -SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) -{ - SSAInt alpha = fg[3]; - alpha = alpha + (alpha >> 7); - SSAInt inv_alpha = 256 - alpha; - return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; -} - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) -{ - SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; - SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; - SSAInt y0 = frac_y0 >> FRACBITS; - SSAInt y1 = frac_y1 >> FRACBITS; - - SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); - SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); - SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); - SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); - - SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} - -SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) -{ - SSAInt xshift = (32 - xbits); - SSAInt yshift = (32 - ybits); - SSAInt xmask = (SSAInt(1) << xshift) - 1; - SSAInt ymask = (SSAInt(1) << yshift) - 1; - SSAInt x = xfrac >> xbits; - SSAInt y = yfrac >> ybits; - - SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); - SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); - - SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; - SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; - SSAInt a = 16 - inv_a; - SSAInt b = 16 - inv_b; - - return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; -} diff --git a/src/r_compiler/fixedfunction/drawspancodegen.h b/src/r_compiler/fixedfunction/drawspancodegen.h new file mode 100644 index 000000000..20869ac2f --- /dev/null +++ b/src/r_compiler/fixedfunction/drawspancodegen.h @@ -0,0 +1,54 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawSpanVariant +{ + Opaque, + Masked, + Translucent, + MaskedTranslucent, + AddClamp, + MaskedAddClamp +}; + +class DrawSpanCodegen : public DrawerCodegen +{ +public: + void Generate(DrawSpanVariant variant, SSAValue args); + +private: + void LoopShade(DrawSpanVariant variant, bool isSimpleShade); + void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); + + SSAStack stack_index, stack_xfrac, stack_yfrac; + + SSAUBytePtr destorg; + SSAUBytePtr source; + SSAInt destpitch; + SSAInt xstep; + SSAInt ystep; + SSAInt x1; + SSAInt x2; + SSAInt y; + SSAInt xbits; + SSAInt ybits; + SSAInt light; + SSAInt srcalpha; + SSAInt destalpha; + SSAInt count; + SSAUBytePtr data; + SSAInt yshift; + SSAInt xshift; + SSAInt xmask; + SSABool is_64x64; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; +}; diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp new file mode 100644 index 000000000..0e94c11ed --- /dev/null +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -0,0 +1,15 @@ + +#include "i_system.h" +#include "r_compiler/fixedfunction/drawwallcodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args) +{ +} diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h new file mode 100644 index 000000000..f514ca8ca --- /dev/null +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -0,0 +1,26 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawWallVariant +{ + Opaque1, // vlinec1 + Opaque4, // vlinec4 + Masked1, // mvlinec1 + Masked4, // mvlinec4 + Add1, // tmvline1_add + Add4, // tmvline4_add + AddClamp1, // tmvline1_addclamp + AddClamp4, // tmvline4_addclamp + SubClamp1, // tmvline1_subclamp + SubClamp4, // tmvline4_subclamp + RevSubClamp1, // tmvline1_revsubclamp + RevSubClamp4, // tmvline4_revsubclamp +}; + +class DrawWallCodegen : public DrawerCodegen +{ +public: + void Generate(DrawWallVariant variant, SSAValue args); +}; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index fb4a6d023..320bfb653 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -1,6 +1,8 @@ #include "i_system.h" -#include "r_compiler/fixedfunction/fixedfunction.h" +#include "r_compiler/fixedfunction/drawspancodegen.h" +#include "r_compiler/fixedfunction/drawwallcodegen.h" +#include "r_compiler/fixedfunction/drawcolumncodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" #include "r_compiler/ssa/ssa_for_block.h" From afab45674ba32901d789a631d858757862650d3d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 07:38:33 +0200 Subject: [PATCH 12/15] Added half of wall codegen --- .../fixedfunction/drawwallcodegen.cpp | 154 +++++++++++++++++- .../fixedfunction/drawwallcodegen.h | 49 ++++-- src/r_compiler/llvmdrawers.cpp | 70 +++++++- src/r_compiler/llvmdrawers.h | 46 ++++++ 4 files changed, 304 insertions(+), 15 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 0e94c11ed..65b2224b5 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -10,6 +10,158 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args) +void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args) { + dest = args[0][0].load(); + source[0] = args[0][1].load(); + source[1] = args[0][2].load(); + source[2] = args[0][3].load(); + source[3] = args[0][4].load(); + source2[0] = args[0][5].load(); + source2[1] = args[0][6].load(); + source2[2] = args[0][7].load(); + source2[3] = args[0][8].load(); + pitch = args[0][9].load(); + count = args[0][10].load(); + dest_y = args[0][11].load(); + texturefrac[0] = args[0][12].load(); + texturefrac[1] = args[0][13].load(); + texturefrac[2] = args[0][14].load(); + texturefrac[3] = args[0][15].load(); + texturefracx[0] = args[0][16].load(); + texturefracx[1] = args[0][17].load(); + texturefracx[2] = args[0][18].load(); + texturefracx[3] = args[0][19].load(); + iscale[0] = args[0][20].load(); + iscale[1] = args[0][21].load(); + iscale[2] = args[0][22].load(); + iscale[3] = args[0][23].load(); + textureheight[0] = args[0][24].load(); + textureheight[1] = args[0][25].load(); + textureheight[2] = args[0][26].load(); + textureheight[3] = args[0][27].load(); + light[0] = args[0][28].load(); + light[1] = args[0][29].load(); + light[2] = args[0][30].load(); + light[3] = args[0][31].load(); + srcalpha = args[0][32].load(); + destalpha = args[0][33].load(); + SSAShort light_alpha = args[0][34].load(); + SSAShort light_red = args[0][35].load(); + SSAShort light_green = args[0][36].load(); + SSAShort light_blue = args[0][37].load(); + SSAShort fade_alpha = args[0][38].load(); + SSAShort fade_red = args[0][39].load(); + SSAShort fade_green = args[0][40].load(); + SSAShort fade_blue = args[0][41].load(); + SSAShort desaturate = args[0][42].load(); + SSAInt flags = args[0][43].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; + is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; + + /* + count = thread->count_for_thread(command->_dest_y, command->_count); + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + texturefracx = command->_texturefracx; + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + height = command->_textureheight; + one = ((0x80000000 + height - 1) / height) * 2 + 1; + */ + int numColumns = fourColumns ? 4 : 1; + for (int i = 0; i < numColumns; i++) + { + stack_frac[i].store(texturefrac[i] + iscale[i]);// * skipped_by_thread(dest_y); + fracstep[i] = iscale[i];// * num_cores; + one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; + } + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + LoopShade(variant, fourColumns, true); + branch.else_block(); + LoopShade(variant, fourColumns, false); + branch.end_block(); +} + +void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade) +{ + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + Loop(variant, fourColumns, isSimpleShade, true); + branch.else_block(); + Loop(variant, fourColumns, isSimpleShade, false); + branch.end_block(); +} + +void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter) +{ + int numColumns = fourColumns ? 4 : 1; + + stack_index.store(0); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt frac[4]; + for (int i = 0; i < numColumns; i++) + frac[i] = stack_frac[i].load(); + + SSAInt offset = (dest_y + index) * pitch * 4; + + if (fourColumns) + { + + } + else + { + SSAVec4i bgcolor = dest[offset].load_vec4ub(); + SSAVec4i color = Blend(Shade(Sample(frac[0], isNearestFilter), 0, isSimpleShade), bgcolor, variant); + dest[offset].store_vec4ub(color); + } + + stack_index.store(index + 1); + for (int i = 0; i < numColumns; i++) + stack_frac[i].store(frac[i] + fracstep[i]); + loop.end_block(); + } +} + +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) +{ + // int sample_index() { return ((frac >> FRACBITS) * height) >> FRACBITS; } + return SSAVec4i(0); +} + +SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light[index]); + else + return shade_bgra_advanced(fg, light[index], shade_constants); +} + +SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant) +{ + switch (variant) + { + default: + case DrawWallVariant::Opaque: + return blend_copy(fg); + case DrawWallVariant::Masked: + return blend_alpha_blend(fg, bg); + case DrawWallVariant::Add: + case DrawWallVariant::AddClamp: + return blend_add(fg, bg, srcalpha, destalpha); + case DrawWallVariant::SubClamp: + return blend_sub(fg, bg, srcalpha, destalpha); + case DrawWallVariant::RevSubClamp: + return blend_revsub(fg, bg, srcalpha, destalpha); + } } diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index f514ca8ca..eafc8cf69 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -5,22 +5,45 @@ enum class DrawWallVariant { - Opaque1, // vlinec1 - Opaque4, // vlinec4 - Masked1, // mvlinec1 - Masked4, // mvlinec4 - Add1, // tmvline1_add - Add4, // tmvline4_add - AddClamp1, // tmvline1_addclamp - AddClamp4, // tmvline4_addclamp - SubClamp1, // tmvline1_subclamp - SubClamp4, // tmvline4_subclamp - RevSubClamp1, // tmvline1_revsubclamp - RevSubClamp4, // tmvline4_revsubclamp + Opaque, + Masked, + Add, + AddClamp, + SubClamp, + RevSubClamp }; class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, SSAValue args); + void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args); + +private: + void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); + void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, bool isNearestFilter); + SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); + + SSAStack stack_index, stack_frac[4]; + + SSAUBytePtr dest; + SSAUBytePtr source[4]; + SSAUBytePtr source2[4]; + SSAInt pitch; + SSAInt count; + SSAInt dest_y; + SSAInt texturefrac[4]; + SSAInt texturefracx[4]; + SSAInt iscale[4]; + SSAInt textureheight[4]; + SSAInt light[4]; + SSAInt srcalpha; + SSAInt destalpha; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; + + SSAInt fracstep[4]; + SSAInt one[4]; }; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 320bfb653..57c3293bb 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -47,7 +47,10 @@ public: private: void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -83,6 +86,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); + CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); + CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); + CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); + CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); + CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); + CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); @@ -93,6 +108,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); + vlinec1 = mProgram.GetProcAddress("vlinec1"); + vlinec4 = mProgram.GetProcAddress("vlinec4"); + mvlinec1 = mProgram.GetProcAddress("mvlinec1"); + mvlinec4 = mProgram.GetProcAddress("mvlinec4"); + tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); + tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); + tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); + tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); + tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); + tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); + tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); + tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); mProgram.StopLogFatalErrors(); } @@ -117,11 +144,31 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) mProgram.functionPassManager()->run(*function.func); } +void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.create_public(); + + DrawWallCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; @@ -148,6 +195,27 @@ llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) return llvm::StructType::get(context, elements, false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 25; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + ///////////////////////////////////////////////////////////////////////////// namespace { static bool LogFatalErrors = false; } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 53e64032f..92f7e9440 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,6 +1,39 @@ #pragma once +struct DrawWallArgs +{ + uint32_t *dest; + const uint32_t *source[4]; + const uint32_t *source2[4]; + int32_t pitch; + int32_t count; + int32_t dest_y; + uint32_t texturefrac[4]; + uint32_t texturefracx[4]; + uint32_t iscale[4]; + uint32_t textureheight[4]; + uint32_t light[4]; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + struct DrawSpanArgs { uint32_t *destorg; @@ -52,6 +85,19 @@ public: void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; + void(*vlinec1)(const DrawWallArgs *) = nullptr; + void(*vlinec4)(const DrawWallArgs *) = nullptr; + void(*mvlinec1)(const DrawWallArgs *) = nullptr; + void(*mvlinec4)(const DrawWallArgs *) = nullptr; + void(*tmvline1_add)(const DrawWallArgs *) = nullptr; + void(*tmvline4_add)(const DrawWallArgs *) = nullptr; + void(*tmvline1_addclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_addclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline1_subclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_subclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline1_revsubclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_revsubclamp)(const DrawWallArgs *) = nullptr; + private: static LLVMDrawers *Singleton; }; From 28bb5da181535e5639d655efc44a0b177be5fa72 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Sep 2016 07:27:25 +0200 Subject: [PATCH 13/15] Hooked up LLVM wall drawers --- .../fixedfunction/drawercodegen.cpp | 29 +- src/r_compiler/fixedfunction/drawercodegen.h | 21 ++ .../fixedfunction/drawwallcodegen.cpp | 59 ++-- .../fixedfunction/drawwallcodegen.h | 5 +- src/r_compiler/llvmdrawers.cpp | 36 ++- src/r_compiler/llvmdrawers.h | 32 ++- src/r_compiler/ssa/ssa_int.cpp | 11 + src/r_compiler/ssa/ssa_int.h | 3 + src/r_draw_rgba.cpp | 261 ++++++++++++++++++ 9 files changed, 410 insertions(+), 47 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 5da858e27..2cba50121 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -10,6 +10,31 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" +SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread thread) +{ + return line < thread.pass_start_y || line >= thread.pass_end_y || !(line % thread.num_cores == thread.core); +} + +SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread) +{ + SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, 0); + SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores; + return pass_skip + core_skip; +} + +SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread) +{ + SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, 0); + count = SSAInt::MIN(count, lines_until_pass_end); + SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores; + return SSAInt::MAX(c, 0); +} + +SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread) +{ + return dest[skipped_by_thread(first_line, thread) * pitch * 4]; +} + SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) { return 256 - (light >> (FRACBITS - 8)); @@ -105,8 +130,8 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; + SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = 16 - a; SSAInt b = 16 - inv_b; return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 9e0706ed1..17b36234d 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -18,6 +18,15 @@ #include "r_compiler/ssa/ssa_barycentric_weight.h" #include "r_compiler/llvm_include.h" +class SSAWorkerThread +{ +public: + SSAInt core; + SSAInt num_cores; + SSAInt pass_start_y; + SSAInt pass_end_y; +}; + class SSAShadeConstants { public: @@ -29,6 +38,18 @@ public: class DrawerCodegen { public: + // Checks if a line is rendered by this thread + SSABool line_skipped_by_thread(SSAInt line, SSAWorkerThread thread); + + // The number of lines to skip to reach the first line to be rendered by this thread + SSAInt skipped_by_thread(SSAInt first_line, SSAWorkerThread thread); + + // The number of lines to be rendered by this thread + SSAInt count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread); + + // Calculate the dest address for the first line to be rendered by this thread + SSAUBytePtr dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread); + // LightBgra SSAInt calc_light_multiplier(SSAInt light); SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 65b2224b5..0ca537723 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -10,7 +10,7 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args) +void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(); source[0] = args[0][1].load(); @@ -60,24 +60,24 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); + thread.core = thread_data[0][0].load(); + thread.num_cores = thread_data[0][1].load(); + thread.pass_start_y = thread_data[0][2].load(); + thread.pass_end_y = thread_data[0][3].load(); + is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; - /* - count = thread->count_for_thread(command->_dest_y, command->_count); - fracstep = command->_iscale * thread->num_cores; - frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); - texturefracx = command->_texturefracx; - dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - pitch = command->_pitch * thread->num_cores; - height = command->_textureheight; - one = ((0x80000000 + height - 1) / height) * 2 + 1; - */ + count = count_for_thread(dest_y, count, thread); + dest = dest_for_thread(dest_y, pitch, dest, thread); + + pitch = pitch * thread.num_cores; + int numColumns = fourColumns ? 4 : 1; for (int i = 0; i < numColumns; i++) { - stack_frac[i].store(texturefrac[i] + iscale[i]);// * skipped_by_thread(dest_y); - fracstep[i] = iscale[i];// * num_cores; + stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); + fracstep[i] = iscale[i] * thread.num_cores; one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; } @@ -113,16 +113,32 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim for (int i = 0; i < numColumns; i++) frac[i] = stack_frac[i].load(); - SSAInt offset = (dest_y + index) * pitch * 4; + SSAInt offset = index * pitch * 4; if (fourColumns) { + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + SSAVec4i bgcolors[4] = + { + SSAVec4i::extendlo(bg0), + SSAVec4i::extendhi(bg0), + SSAVec4i::extendlo(bg1), + SSAVec4i::extendhi(bg1) + }; + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + colors[i] = Blend(Shade(Sample(frac[i], i, isNearestFilter), i, isSimpleShade), bgcolors[i], variant); + + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + dest[offset].store_unaligned_vec16ub(color); } else { SSAVec4i bgcolor = dest[offset].load_vec4ub(); - SSAVec4i color = Blend(Shade(Sample(frac[0], isNearestFilter), 0, isSimpleShade), bgcolor, variant); + SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); dest[offset].store_vec4ub(color); } @@ -133,10 +149,17 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim } } -SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) { - // int sample_index() { return ((frac >> FRACBITS) * height) >> FRACBITS; } - return SSAVec4i(0); + if (isNearestFilter) + { + SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; + return source[index][sample_index * 4].load_vec4ub(); + } + else + { + return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + } } SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index eafc8cf69..0e1cce5fc 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -16,12 +16,12 @@ enum class DrawWallVariant class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args); + void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); private: void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); - SSAVec4i Sample(SSAInt frac, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); @@ -43,6 +43,7 @@ private: SSABool is_simple_shade; SSABool is_nearest_filter; SSAShadeConstants shade_constants; + SSAWorkerThread thread; SSAInt fracstep[4]; SSAInt one[4]; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 57c3293bb..60727744c 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -51,6 +51,7 @@ private: static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -108,18 +109,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); - vlinec1 = mProgram.GetProcAddress("vlinec1"); - vlinec4 = mProgram.GetProcAddress("vlinec4"); - mvlinec1 = mProgram.GetProcAddress("mvlinec1"); - mvlinec4 = mProgram.GetProcAddress("mvlinec4"); - tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); - tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); - tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); - tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); - tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); - tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); - tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); - tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); + vlinec1 = mProgram.GetProcAddress("vlinec1"); + vlinec4 = mProgram.GetProcAddress("vlinec4"); + mvlinec1 = mProgram.GetProcAddress("mvlinec1"); + mvlinec4 = mProgram.GetProcAddress("mvlinec4"); + tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); + tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); + tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); + tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); + tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); + tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); + tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); + tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); mProgram.StopLogFatalErrors(); } @@ -151,10 +152,11 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, SSAFunction function(name); function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); function.create_public(); DrawWallCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0)); + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); @@ -216,6 +218,14 @@ llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) return llvm::StructType::get(context, elements, false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + ///////////////////////////////////////////////////////////////////////////// namespace { static bool LogFatalErrors = false; } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 92f7e9440..b1039cf49 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,6 +1,14 @@ #pragma once +struct WorkerThreadData +{ + int32_t core; + int32_t num_cores; + int32_t pass_start_y; + int32_t pass_end_y; +}; + struct DrawWallArgs { uint32_t *dest; @@ -85,18 +93,18 @@ public: void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; - void(*vlinec1)(const DrawWallArgs *) = nullptr; - void(*vlinec4)(const DrawWallArgs *) = nullptr; - void(*mvlinec1)(const DrawWallArgs *) = nullptr; - void(*mvlinec4)(const DrawWallArgs *) = nullptr; - void(*tmvline1_add)(const DrawWallArgs *) = nullptr; - void(*tmvline4_add)(const DrawWallArgs *) = nullptr; - void(*tmvline1_addclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_addclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline1_subclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_subclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline1_revsubclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_revsubclamp)(const DrawWallArgs *) = nullptr; + void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*vlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; private: static LLVMDrawers *Singleton; diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 674f44350..8d5a32e4c 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -1,6 +1,7 @@ #include "ssa_int.h" #include "ssa_float.h" +#include "ssa_bool.h" #include "ssa_scope.h" #include "r_compiler/llvm_include.h" @@ -31,6 +32,16 @@ llvm::Type *SSAInt::llvm_type() return llvm::Type::getInt32Ty(SSAScope::context()); } +SSAInt SSAInt::MIN(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); +} + +SSAInt SSAInt::MAX(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index 5e373c62e..d928c41f2 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -16,6 +16,9 @@ public: static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } static llvm::Type *llvm_type(); + static SSAInt MIN(SSAInt a, SSAInt b); + static SSAInt MAX(SSAInt a, SSAInt b); + llvm::Value *v; }; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 8a0a6871a..c76c2c3c5 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -404,6 +404,219 @@ public: ///////////////////////////////////////////////////////////////////////////// +class DrawWall4LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall4LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.count = dc_count; + args.pitch = dc_pitch; + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + for (int i = 0; i < 4; i++) + { + args.texturefrac[i] = vplce[i]; + args.iscale[i] = vince[i]; + args.texturefracx[i] = buftexturefracx[i]; + args.textureheight[i] = bufheight[i]; + args.source[i] = (const uint32_t *)bufplce[i]; + args.source2[i] = (const uint32_t *)bufplce2[i]; + args.light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); + } + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec4(&args, &d); + } +}; + +class DrawWallMasked4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec4(&args, &d); + } +}; + +class DrawWallAdd4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_add(&args, &d); + } +}; + +class DrawWallAddClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_revsubclamp(&args, &d); + } +}; + +class DrawWall1LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall1LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.pitch = dc_pitch; + args.count = dc_count; + args.texturefrac[0] = dc_texturefrac; + args.texturefracx[0] = dc_texturefracx; + args.iscale[0] = dc_iscale; + args.textureheight[0] = dc_textureheight; + args.source[0] = (const uint32 *)dc_source; + args.source2[0] = (const uint32 *)dc_source2; + args.light[0] = LightBgra::calc_light_multiplier(dc_light); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec1(&args, &d); + } +}; + +class DrawWallMasked1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec1(&args, &d); + } +}; + +class DrawWallAdd1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_add(&args, &d); + } +}; + +class DrawWallAddClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_revsubclamp(&args, &d); + } +}; + +///////////////////////////////////////////////////////////////////////////// + class DrawerColumnCommand : public DrawerCommand { public: @@ -2901,7 +3114,11 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DWORD vlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } @@ -2920,72 +3137,116 @@ void queue_wallcommand() void vlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void mvlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } From 8765cf2016e4a747a7031b0df5c2522d7e4c9bb5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 1 Oct 2016 06:51:55 +0200 Subject: [PATCH 14/15] Change Windows build to use a precompiled version of LLVM --- .gitignore | 1 + src/CMakeLists.txt | 98 +++++++++++++++++----------------- src/r_compiler/llvmdrawers.cpp | 5 +- 3 files changed, 51 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index 1b078ed63..7cc9d9860 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ /build_vc2015-32 /build_vc2015-64 /build +/llvm diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 41829b996..37dd9b5a8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -255,56 +255,44 @@ if( NOT NO_OPENAL ) endif() endif() -# C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm -find_package(LLVM REQUIRED CONFIG) -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") -message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") -llvm_map_components_to_libnames(llvm_libs - analysis - asmparser - asmprinter - bitreader - bitwriter - codegen - core - executionengine - globalisel - instcombine - ipo - irreader - linker - lto - mc - mcdisassembler - mcjit - mcparser - mirparser - object - objectyaml - orcjit - passes - scalaropts - selectiondag - support - symbolize - tablegen - target - transformutils - vectorize - x86asmparser - x86asmprinter - x86codegen - x86desc - x86info - x86utils - aarch64asmparser - aarch64asmprinter - aarch64codegen - aarch64desc - aarch64info - aarch64utils) -include_directories(${LLVM_INCLUDE_DIRS}) -set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) +set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo + irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld + object instcombine linker analysis selectiondag scalaropts vectorize executionengine + mc mcdisassembler mcparser mcjit target ) +set( LLVM_COMPONENTS_X86 x86asmprinter x86info x86desc x86utils x86codegen ) +set( LLVM_COMPONENTS_X64 aarch64asmprinter aarch64info aarch64desc aarch64utils aarch64codegen ) + +# Path where it looks for the LLVM compiled files on Windows +set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) + +if( NOT WIN32 ) + # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm + find_package(LLVM REQUIRED CONFIG) + message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") + llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ${LLVM_COMPONENTS_X64} ) + include_directories( ${LLVM_INCLUDE_DIRS} ) + set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) +else() + include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) + if( X64 ) + include_directories( "${LLVM_PRECOMPILED_DIR}/64bit/include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/llvm/64bit" ) + set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X64} ) + else() + include_directories( "${LLVM_PRECOMPILED_DIR}/32bit/include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit" ) + set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ) + endif() + foreach(buildtype IN ITEMS RELEASE DEBUG) + set( llvm_libs_${buildtype} "${llvm_libs_base}/${buildtype}" ) + set( LLVM_${buildtype}_LIBS "" ) + foreach( llvm_module ${LLVM_ALL_COMPONENTS} ) + find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) + set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) + endforeach( llvm_module ) + endforeach(buildtype) +endif() if( NOT NO_FMOD ) # Search for FMOD include files @@ -1513,6 +1501,16 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") endif() target_link_libraries( zdoom ${ZDOOM_LIBS} gdtoa dumb lzma ) + +if( WIN32 ) + foreach(debuglib ${LLVM_DEBUG_LIBS}) + target_link_libraries( zdoom debug ${debuglib} ) + endforeach(debuglib) + foreach(releaselib ${LLVM_RELEASE_LIBS}) + target_link_libraries( zdoom optimized ${releaselib} ) + endforeach(releaselib) +endif() + include_directories( . g_doom g_heretic diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 60727744c..4f59419ad 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -246,7 +246,6 @@ LLVMProgram::LLVMProgram() InitializeNativeTarget(); InitializeNativeTargetAsmPrinter(); - InitializeNativeTargetAsmParser(); std::string errorstring; @@ -263,8 +262,8 @@ LLVMProgram::LLVMProgram() cpuFeaturesStr += it.getKey(); } - //Printf("LLVM target triple: %s\n", targetTriple.c_str()); - //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + DPrintf(DMSG_SPAMMY, "LLVM target triple: %s\n", targetTriple.c_str()); + DPrintf(DMSG_SPAMMY, "LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); if (!target) From c960742dbd197b9a73ba2ab0b1175e8e5d10655f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 1 Oct 2016 11:47:21 +0200 Subject: [PATCH 15/15] Fix 64 bit compile errors --- src/CMakeLists.txt | 24 +++++++++---------- .../fixedfunction/drawcolumncodegen.cpp | 1 + .../fixedfunction/drawercodegen.cpp | 7 +++--- .../fixedfunction/drawspancodegen.cpp | 11 +++++---- .../fixedfunction/drawwallcodegen.cpp | 7 +++--- src/r_compiler/llvm_include.h | 10 ++++++++ src/r_compiler/llvmdrawers.cpp | 1 + src/r_compiler/ssa/ssa_bool.cpp | 2 +- src/r_compiler/ssa/ssa_float.cpp | 2 +- src/r_compiler/ssa/ssa_float.h | 2 +- src/r_compiler/ssa/ssa_float_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_float_ptr.h | 1 + src/r_compiler/ssa/ssa_for_block.cpp | 1 + src/r_compiler/ssa/ssa_for_block.h | 1 - src/r_compiler/ssa/ssa_function.cpp | 2 +- src/r_compiler/ssa/ssa_if_block.cpp | 1 + src/r_compiler/ssa/ssa_if_block.h | 1 - src/r_compiler/ssa/ssa_int.cpp | 2 +- src/r_compiler/ssa/ssa_int.h | 2 +- src/r_compiler/ssa/ssa_int_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_int_ptr.h | 1 + src/r_compiler/ssa/ssa_scope.cpp | 1 + src/r_compiler/ssa/ssa_scope.h | 2 -- src/r_compiler/ssa/ssa_short.cpp | 2 +- src/r_compiler/ssa/ssa_short.h | 2 +- src/r_compiler/ssa/ssa_struct_type.cpp | 1 + src/r_compiler/ssa/ssa_ubyte.cpp | 2 +- src/r_compiler/ssa/ssa_ubyte.h | 2 +- src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 4 ++-- src/r_compiler/ssa/ssa_ubyte_ptr.h | 1 + src/r_compiler/ssa/ssa_value.cpp | 2 +- src/r_compiler/ssa/ssa_vec16ub.cpp | 2 +- src/r_compiler/ssa/ssa_vec16ub.h | 4 ++-- src/r_compiler/ssa/ssa_vec4f.cpp | 7 +++++- src/r_compiler/ssa/ssa_vec4f.h | 5 ++-- src/r_compiler/ssa/ssa_vec4f_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_vec4i.cpp | 18 ++++++++++---- src/r_compiler/ssa/ssa_vec4i.h | 8 ++++--- src/r_compiler/ssa/ssa_vec4i_ptr.cpp | 2 +- src/r_compiler/ssa/ssa_vec4i_ptr.h | 1 + src/r_compiler/ssa/ssa_vec8s.cpp | 6 ++--- src/r_compiler/ssa/ssa_vec8s.h | 4 ++-- 42 files changed, 99 insertions(+), 62 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 37dd9b5a8..ac6de85f6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -258,9 +258,7 @@ endif() set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld object instcombine linker analysis selectiondag scalaropts vectorize executionengine - mc mcdisassembler mcparser mcjit target ) -set( LLVM_COMPONENTS_X86 x86asmprinter x86info x86desc x86utils x86codegen ) -set( LLVM_COMPONENTS_X64 aarch64asmprinter aarch64info aarch64desc aarch64utils aarch64codegen ) + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) # Path where it looks for the LLVM compiled files on Windows set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) @@ -270,24 +268,22 @@ if( NOT WIN32 ) find_package(LLVM REQUIRED CONFIG) message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ${LLVM_COMPONENTS_X64} ) + llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ) include_directories( ${LLVM_INCLUDE_DIRS} ) set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) else() include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) if( X64 ) - include_directories( "${LLVM_PRECOMPILED_DIR}/64bit/include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/llvm/64bit" ) - set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X64} ) + include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/64bit-" ) else() - include_directories( "${LLVM_PRECOMPILED_DIR}/32bit/include" ) - set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit" ) - set( LLVM_ALL_COMPONENTS ${LLVM_COMPONENTS} ${LLVM_COMPONENTS_X86} ) + include_directories( "${LLVM_PRECOMPILED_DIR}/32bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit-" ) endif() foreach(buildtype IN ITEMS RELEASE DEBUG) - set( llvm_libs_${buildtype} "${llvm_libs_base}/${buildtype}" ) + set( llvm_libs_${buildtype} "${llvm_libs_base}${buildtype}" ) set( LLVM_${buildtype}_LIBS "" ) - foreach( llvm_module ${LLVM_ALL_COMPONENTS} ) + foreach( llvm_module ${LLVM_COMPONENTS} ) find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) endforeach( llvm_module ) @@ -1492,6 +1488,10 @@ set_source_files_properties( sc_man.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRE set_source_files_properties( ${NOT_COMPILED_SOURCE_FILES} PROPERTIES HEADER_FILE_ONLY TRUE ) if ( WIN32 ) set_source_files_properties( win32/fb_d3d9.cpp win32/fb_d3d9_wipe.cpp PROPERTIES COMPILE_FLAGS ${ZD_FASTMATH_FLAG} ) + + # Supress C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data + # For some reason using #pragma warning(disable: 4244) is not working.. + set_source_files_properties( ${PCH_SOURCES} PROPERTIES COMPILE_FLAGS /wd4244 ) endif() diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 4594e2290..67d801162 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawcolumncodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 2cba50121..822a81141 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawercodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" @@ -17,17 +18,17 @@ SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread threa SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread) { - SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, 0); + SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, SSAInt(0)); SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores; return pass_skip + core_skip; } SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread) { - SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, 0); + SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, SSAInt(0)); count = SSAInt::MIN(count, lines_until_pass_end); SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores; - return SSAInt::MAX(c, 0); + return SSAInt::MAX(c, SSAInt(0)); } SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread) diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp index 1623c38f2..70ecb0abd 100644 --- a/src/r_compiler/fixedfunction/drawspancodegen.cpp +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawspancodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" @@ -49,9 +50,9 @@ void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) xmask = ((SSAInt(1) << xbits) - 1) << ybits; // 64x64 is the most common case by far, so special case it. - is_64x64 = xbits == 6 && ybits == 6; - is_simple_shade = (flags & DrawSpanArgs::simple_shade) == DrawSpanArgs::simple_shade; - is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == DrawSpanArgs::nearest_filter; + is_64x64 = xbits == SSAInt(6) && ybits == SSAInt(6); + is_simple_shade = (flags & DrawSpanArgs::simple_shade) == SSAInt(DrawSpanArgs::simple_shade); + is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == SSAInt(DrawSpanArgs::nearest_filter); SSAIfBlock branch; branch.if_block(is_simple_shade); @@ -90,7 +91,7 @@ void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bo SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) { SSAInt sseLength = count / 4; - stack_index.store(0); + stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); @@ -165,7 +166,7 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte { if (is64x64) { - return sample_linear(source, xfrac, yfrac, 26, 26); + return sample_linear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); } else { diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 0ca537723..55b17dafe 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawwallcodegen.h" #include "r_compiler/ssa/ssa_function.h" #include "r_compiler/ssa/ssa_scope.h" @@ -65,8 +66,8 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal thread.pass_start_y = thread_data[0][2].load(); thread.pass_end_y = thread_data[0][3].load(); - is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; - is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; + is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade); + is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter); count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); @@ -103,7 +104,7 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim { int numColumns = fourColumns ? 4 : 1; - stack_index.store(0); + stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h index b916bad0e..d1550f38a 100644 --- a/src/r_compiler/llvm_include.h +++ b/src/r_compiler/llvm_include.h @@ -1,6 +1,8 @@ #pragma once +#ifdef _MSC_VER + #if defined(min) #define llvm_min_bug min #undef min @@ -18,6 +20,10 @@ #pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data #pragma warning(disable: 4141) // warning C4141: 'inline': used more than once #pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception +#pragma warning(disable: 4267) // warning C4267: 'return': conversion from 'size_t' to 'unsigned int', possible loss of data +#pragma warning(disable: 4244) // warning C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data + +#endif #include #include @@ -39,6 +45,8 @@ #include #include +#ifdef _MSC_VER + #if defined(llvm_min_bug) #define min llvm_min_bug #undef llvm_min_bug @@ -47,3 +55,5 @@ #define max llvm_max_bug #undef llvm_max_bug #endif + +#endif diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 4f59419ad..7691af35b 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -1,5 +1,6 @@ #include "i_system.h" +#include "r_compiler/llvm_include.h" #include "r_compiler/fixedfunction/drawspancodegen.h" #include "r_compiler/fixedfunction/drawwallcodegen.h" #include "r_compiler/fixedfunction/drawcolumncodegen.h" diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp index 101323911..bfd9ba5ab 100644 --- a/src/r_compiler/ssa/ssa_bool.cpp +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_bool.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSABool::SSABool() : v(0) diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp index 87488af74..4ec5c516d 100644 --- a/src/r_compiler/ssa/ssa_float.cpp +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -1,8 +1,8 @@ +#include "r_compiler/llvm_include.h" #include "ssa_float.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAFloat::SSAFloat() : v(0) diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h index 2349ab877..0edbcfcba 100644 --- a/src/r_compiler/ssa/ssa_float.h +++ b/src/r_compiler/ssa/ssa_float.h @@ -11,7 +11,7 @@ class SSAFloat public: SSAFloat(); SSAFloat(SSAInt i); - SSAFloat(float constant); + explicit SSAFloat(float constant); explicit SSAFloat(llvm::Value *v); static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } static llvm::Type *llvm_type(); diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp index 6a1409271..582821ca0 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.cpp +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_float_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAFloatPtr::SSAFloatPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h index a4318e027..f29b2de3f 100644 --- a/src/r_compiler/ssa/ssa_float_ptr.h +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -16,6 +16,7 @@ public: static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); } static llvm::Type *llvm_type(); SSAFloatPtr operator[](SSAInt index) const; + SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAFloat load() const; SSAVec4f load_vec4f() const; SSAVec4f load_unaligned_vec4f() const; diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp index ce9328607..f7cd6ad0b 100644 --- a/src/r_compiler/ssa/ssa_for_block.cpp +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_for_block.h" #include "ssa_scope.h" diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h index 58803dee5..4c1952c14 100644 --- a/src/r_compiler/ssa/ssa_for_block.h +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -2,7 +2,6 @@ #pragma once #include "ssa_bool.h" -#include "r_compiler/llvm_include.h" class SSAForBlock { diff --git a/src/r_compiler/ssa/ssa_function.cpp b/src/r_compiler/ssa/ssa_function.cpp index aee4de5a9..a326beaf7 100644 --- a/src/r_compiler/ssa/ssa_function.cpp +++ b/src/r_compiler/ssa/ssa_function.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_function.h" #include "ssa_int.h" #include "ssa_scope.h" #include "ssa_value.h" -#include "r_compiler/llvm_include.h" SSAFunction::SSAFunction(const std::string name) : name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func() diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp index e2de9ecad..7187a0759 100644 --- a/src/r_compiler/ssa/ssa_if_block.cpp +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_if_block.h" #include "ssa_scope.h" diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h index 98c534a86..4f0c8a26b 100644 --- a/src/r_compiler/ssa/ssa_if_block.h +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -3,7 +3,6 @@ #include "ssa_bool.h" #include "ssa_phi.h" -#include "r_compiler/llvm_include.h" class SSAIfBlock { diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 8d5a32e4c..3d9cb22bd 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_int.h" #include "ssa_float.h" #include "ssa_bool.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAInt::SSAInt() : v(0) diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index d928c41f2..c0f46e4b6 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -10,7 +10,7 @@ class SSAInt { public: SSAInt(); - SSAInt(int constant); + explicit SSAInt(int constant); SSAInt(SSAFloat f); explicit SSAInt(llvm::Value *v); static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp index 3c2637073..974645d08 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.cpp +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_int_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAIntPtr::SSAIntPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h index 20e024a31..c75ed6a8d 100644 --- a/src/r_compiler/ssa/ssa_int_ptr.h +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -16,6 +16,7 @@ public: static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); } static llvm::Type *llvm_type(); SSAIntPtr operator[](SSAInt index) const; + SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAInt load() const; SSAVec4i load_vec4i() const; SSAVec4i load_unaligned_vec4i() const; diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp index f9d16f188..e5d34a203 100644 --- a/src/r_compiler/ssa/ssa_scope.cpp +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_scope.h" #include "ssa_int.h" diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h index d184643ad..ad080fde6 100644 --- a/src/r_compiler/ssa/ssa_scope.h +++ b/src/r_compiler/ssa/ssa_scope.h @@ -1,8 +1,6 @@ #pragma once -#include "r_compiler/llvm_include.h" - class SSAInt; class SSAScope diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp index 3fa59b688..017f3002a 100644 --- a/src/r_compiler/ssa/ssa_short.cpp +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_short.h" #include "ssa_float.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAShort::SSAShort() : v(0) diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h index 932aafc0e..4a5343402 100644 --- a/src/r_compiler/ssa/ssa_short.h +++ b/src/r_compiler/ssa/ssa_short.h @@ -11,7 +11,7 @@ class SSAShort { public: SSAShort(); - SSAShort(int constant); + explicit SSAShort(int constant); SSAShort(SSAFloat f); explicit SSAShort(llvm::Value *v); static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } diff --git a/src/r_compiler/ssa/ssa_struct_type.cpp b/src/r_compiler/ssa/ssa_struct_type.cpp index 4a79768ce..d4ae2acb1 100644 --- a/src/r_compiler/ssa/ssa_struct_type.cpp +++ b/src/r_compiler/ssa/ssa_struct_type.cpp @@ -1,4 +1,5 @@ +#include "r_compiler/llvm_include.h" #include "ssa_struct_type.h" #include "ssa_scope.h" diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp index 04db4fd28..3204d064d 100644 --- a/src/r_compiler/ssa/ssa_ubyte.cpp +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_ubyte.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAUByte::SSAUByte() : v(0) diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h index f1e12afba..ef878b325 100644 --- a/src/r_compiler/ssa/ssa_ubyte.h +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -8,7 +8,7 @@ class SSAUByte { public: SSAUByte(); - SSAUByte(unsigned char constant); + explicit SSAUByte(unsigned char constant); explicit SSAUByte(llvm::Value *v); static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } static llvm::Type *llvm_type(); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index b2408066e..98bf27c46 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_ubyte_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAUBytePtr::SSAUBytePtr() : v(0) @@ -38,7 +38,7 @@ SSAVec4i SSAUBytePtr::load_vec4ub() const v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); SSAVec4i v4i = SSAVec4i::from_llvm(v); - SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 /* llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h index 5b68ee1ad..c084068bc 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.h +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -18,6 +18,7 @@ public: static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); } static llvm::Type *llvm_type(); SSAUBytePtr operator[](SSAInt index) const; + SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAUByte load() const; SSAVec4i load_vec4ub() const; SSAVec8s load_vec8s() const; diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp index 877420fc5..c37b7f4c1 100644 --- a/src/r_compiler/ssa/ssa_value.cpp +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -1,8 +1,8 @@ +#include "r_compiler/llvm_include.h" #include "ssa_value.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAValue SSAValue::load() { diff --git a/src/r_compiler/ssa/ssa_vec16ub.cpp b/src/r_compiler/ssa/ssa_vec16ub.cpp index f18d68718..4a077382e 100644 --- a/src/r_compiler/ssa/ssa_vec16ub.cpp +++ b/src/r_compiler/ssa/ssa_vec16ub.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec16ub.h" #include "ssa_vec8s.h" #include "ssa_vec4i.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec16ub::SSAVec16ub() : v(0) diff --git a/src/r_compiler/ssa/ssa_vec16ub.h b/src/r_compiler/ssa/ssa_vec16ub.h index e4cfcdc87..8f48c0c49 100644 --- a/src/r_compiler/ssa/ssa_vec16ub.h +++ b/src/r_compiler/ssa/ssa_vec16ub.h @@ -11,8 +11,8 @@ class SSAVec16ub { public: SSAVec16ub(); - SSAVec16ub(unsigned char constant); - SSAVec16ub( + explicit SSAVec16ub(unsigned char constant); + explicit SSAVec16ub( unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15); explicit SSAVec16ub(llvm::Value *v); diff --git a/src/r_compiler/ssa/ssa_vec4f.cpp b/src/r_compiler/ssa/ssa_vec4f.cpp index e002018fe..dc6f9a716 100644 --- a/src/r_compiler/ssa/ssa_vec4f.cpp +++ b/src/r_compiler/ssa/ssa_vec4f.cpp @@ -1,10 +1,10 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4f.h" #include "ssa_vec4i.h" #include "ssa_float.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4f::SSAVec4f() : v(0) @@ -75,6 +75,11 @@ SSAFloat SSAVec4f::operator[](SSAInt index) const return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); } +SSAFloat SSAVec4f::operator[](int index) const +{ + return (*this)[SSAInt(index)]; +} + SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index) { return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index)))); diff --git a/src/r_compiler/ssa/ssa_vec4f.h b/src/r_compiler/ssa/ssa_vec4f.h index 5e3397e58..6d4ae6335 100644 --- a/src/r_compiler/ssa/ssa_vec4f.h +++ b/src/r_compiler/ssa/ssa_vec4f.h @@ -12,13 +12,14 @@ class SSAVec4f { public: SSAVec4f(); - SSAVec4f(float constant); - SSAVec4f(float constant0, float constant1, float constant2, float constant3); + explicit SSAVec4f(float constant); + explicit SSAVec4f(float constant0, float constant1, float constant2, float constant3); SSAVec4f(SSAFloat f); SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3); explicit SSAVec4f(llvm::Value *v); SSAVec4f(SSAVec4i i32); SSAFloat operator[](SSAInt index) const; + SSAFloat operator[](int index) const; static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); static SSAVec4f bitcast(SSAVec4i i32); static SSAVec4f sqrt(SSAVec4f f); diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp index 6a197ec90..e0ed8bc86 100644 --- a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4f_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4fPtr::SSAVec4fPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 1eed7b269..3b508412f 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -1,11 +1,11 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4i.h" #include "ssa_vec4f.h" #include "ssa_vec8s.h" #include "ssa_vec16ub.h" #include "ssa_int.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4i::SSAVec4i() : v(0) @@ -67,11 +67,16 @@ SSAVec4i::SSAVec4i(SSAVec4f f32) v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); } -SSAInt SSAVec4i::operator[](SSAInt index) +SSAInt SSAVec4i::operator[](SSAInt index) const { return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); } +SSAInt SSAVec4i::operator[](int index) const +{ + return (*this)[SSAInt(index)]; +} + SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value) { return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint())); @@ -82,6 +87,11 @@ SSAVec4i SSAVec4i::insert(int index, SSAInt value) return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint())); } +SSAVec4i SSAVec4i::insert(int index, int value) +{ + return insert(index, SSAInt(value)); +} + llvm::Type *SSAVec4i::llvm_type() { return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); @@ -125,12 +135,12 @@ void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &ou SSAVec4i SSAVec4i::extendhi(SSAVec8s i16) { - return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 } SSAVec4i SSAVec4i::extendlo(SSAVec8s i16) { - return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, 0, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 } SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b) diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index c1c9140d7..89cda1646 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -13,15 +13,17 @@ class SSAVec4i { public: SSAVec4i(); - SSAVec4i(int constant); - SSAVec4i(int constant0, int constant1, int constant2, int constant3); + explicit SSAVec4i(int constant); + explicit SSAVec4i(int constant0, int constant1, int constant2, int constant3); SSAVec4i(SSAInt i); SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3); explicit SSAVec4i(llvm::Value *v); SSAVec4i(SSAVec4f f32); - SSAInt operator[](SSAInt index); + SSAInt operator[](SSAInt index) const; + SSAInt operator[](int index) const; SSAVec4i insert(SSAInt index, SSAInt value); SSAVec4i insert(int index, SSAInt value); + SSAVec4i insert(int index, int value); static SSAVec4i bitcast(SSAVec4f f32); static SSAVec4i bitcast(SSAVec8s i16); static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp index 7138c30d2..f75ccd43f 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -1,7 +1,7 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec4i_ptr.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec4iPtr::SSAVec4iPtr() : v(0) diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.h b/src/r_compiler/ssa/ssa_vec4i_ptr.h index 56937b1cc..257b4e34f 100644 --- a/src/r_compiler/ssa/ssa_vec4i_ptr.h +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.h @@ -15,6 +15,7 @@ public: static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); } static llvm::Type *llvm_type(); SSAVec4iPtr operator[](SSAInt index) const; + SSAVec4iPtr operator[](int index) const { return (*this)[SSAInt(index)]; } SSAVec4i load() const; SSAVec4i load_unaligned() const; void store(const SSAVec4i &new_value); diff --git a/src/r_compiler/ssa/ssa_vec8s.cpp b/src/r_compiler/ssa/ssa_vec8s.cpp index d61a4c4a9..6016b551f 100644 --- a/src/r_compiler/ssa/ssa_vec8s.cpp +++ b/src/r_compiler/ssa/ssa_vec8s.cpp @@ -1,9 +1,9 @@ +#include "r_compiler/llvm_include.h" #include "ssa_vec8s.h" #include "ssa_vec4i.h" #include "ssa_vec16ub.h" #include "ssa_scope.h" -#include "r_compiler/llvm_include.h" SSAVec8s::SSAVec8s() : v(0) @@ -77,12 +77,12 @@ SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, i SSAVec8s SSAVec8s::extendhi(SSAVec16ub a) { - return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 } SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) { - return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, 0, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 } /* diff --git a/src/r_compiler/ssa/ssa_vec8s.h b/src/r_compiler/ssa/ssa_vec8s.h index aded358dd..40263773b 100644 --- a/src/r_compiler/ssa/ssa_vec8s.h +++ b/src/r_compiler/ssa/ssa_vec8s.h @@ -11,8 +11,8 @@ class SSAVec8s { public: SSAVec8s(); - SSAVec8s(short constant); - SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); + explicit SSAVec8s(short constant); + explicit SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); explicit SSAVec8s(llvm::Value *v); SSAVec8s(SSAVec4i i0, SSAVec4i i1); static SSAVec8s bitcast(SSAVec16ub i8);