diff --git a/.gitignore b/.gitignore index 1b078ed632..7cc9d98607 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ /build_vc2015-32 /build_vc2015-64 /build +/llvm diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a276b65fde..acbec1612b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -272,6 +272,41 @@ if( NOT NO_OPENAL ) endif() endif() +set( LLVM_COMPONENTS core support asmparser asmprinter bitreader codegen passes ipo + irreader transformutils instrumentation profiledata debuginfocodeview runtimedyld + object instcombine linker analysis selectiondag scalaropts vectorize executionengine + mc mcdisassembler mcparser mcjit target x86asmprinter x86info x86desc x86utils x86codegen ) + +# Path where it looks for the LLVM compiled files on Windows +set( LLVM_PRECOMPILED_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../llvm" ) + +if( NOT WIN32 ) + # Example LLVM_DIR folder: C:/Development/Environment/Src/llvm-3.9.0/build/lib/cmake/llvm + find_package(LLVM REQUIRED CONFIG) + message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") + llvm_map_components_to_libnames( llvm_libs ${LLVM_COMPONENTS} ) + include_directories( ${LLVM_INCLUDE_DIRS} ) + set( ZDOOM_LIBS ${ZDOOM_LIBS} ${llvm_libs} ) +else() + include_directories( "${LLVM_PRECOMPILED_DIR}/include" ) + if( X64 ) + include_directories( "${LLVM_PRECOMPILED_DIR}/64bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/64bit-" ) + else() + include_directories( "${LLVM_PRECOMPILED_DIR}/32bit-include" ) + set( llvm_libs_base "${LLVM_PRECOMPILED_DIR}/32bit-" ) + endif() + foreach(buildtype IN ITEMS RELEASE DEBUG) + set( llvm_libs_${buildtype} "${llvm_libs_base}${buildtype}" ) + set( LLVM_${buildtype}_LIBS "" ) + foreach( llvm_module ${LLVM_COMPONENTS} ) + find_library( LLVM_${llvm_module}_LIBRARY_${buildtype} LLVM${llvm_module} PATHS ${llvm_libs_${buildtype}} ) + set( LLVM_${buildtype}_LIBS ${LLVM_${buildtype}_LIBS} ${LLVM_${llvm_module}_LIBRARY_${buildtype}} ) + endforeach( llvm_module ) + endforeach(buildtype) +endif() + if( NOT NO_FMOD ) # Search for FMOD include files if( NOT WIN32 ) @@ -860,6 +895,9 @@ file( GLOB HEADER_FILES posix/*.h posix/cocoa/*.h posix/sdl/*.h + r_compiler/*.h + r_compiler/ssa/*.h + r_compiler/fixedfunction/*.h r_data/*.h resourcefiles/*.h sfmt/*.h @@ -1393,6 +1431,31 @@ set (PCH_SOURCES fragglescript/t_spec.cpp fragglescript/t_variable.cpp fragglescript/t_cmd.cpp + r_compiler/llvmdrawers.cpp + r_compiler/ssa/ssa_bool.cpp + r_compiler/ssa/ssa_float.cpp + r_compiler/ssa/ssa_float_ptr.cpp + r_compiler/ssa/ssa_for_block.cpp + r_compiler/ssa/ssa_function.cpp + r_compiler/ssa/ssa_if_block.cpp + r_compiler/ssa/ssa_int.cpp + r_compiler/ssa/ssa_int_ptr.cpp + r_compiler/ssa/ssa_short.cpp + r_compiler/ssa/ssa_scope.cpp + r_compiler/ssa/ssa_struct_type.cpp + r_compiler/ssa/ssa_ubyte.cpp + r_compiler/ssa/ssa_ubyte_ptr.cpp + r_compiler/ssa/ssa_value.cpp + r_compiler/ssa/ssa_vec4f.cpp + r_compiler/ssa/ssa_vec4f_ptr.cpp + r_compiler/ssa/ssa_vec4i.cpp + r_compiler/ssa/ssa_vec4i_ptr.cpp + r_compiler/ssa/ssa_vec8s.cpp + r_compiler/ssa/ssa_vec16ub.cpp + r_compiler/fixedfunction/drawercodegen.cpp + r_compiler/fixedfunction/drawspancodegen.cpp + r_compiler/fixedfunction/drawwallcodegen.cpp + r_compiler/fixedfunction/drawcolumncodegen.cpp r_data/sprites.cpp r_data/voxels.cpp r_data/renderstyle.cpp @@ -1446,6 +1509,10 @@ set_source_files_properties( sc_man.cpp PROPERTIES OBJECT_DEPENDS "${CMAKE_CURRE set_source_files_properties( ${NOT_COMPILED_SOURCE_FILES} PROPERTIES HEADER_FILE_ONLY TRUE ) if ( WIN32 ) set_source_files_properties( win32/fb_d3d9.cpp win32/fb_d3d9_wipe.cpp PROPERTIES COMPILE_FLAGS ${ZD_FASTMATH_FLAG} ) + + # Supress C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data + # For some reason using #pragma warning(disable: 4244) is not working.. + set_source_files_properties( ${PCH_SOURCES} PROPERTIES COMPILE_FLAGS /wd4244 ) endif() @@ -1455,6 +1522,16 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") endif() target_link_libraries( zdoom ${ZDOOM_LIBS} gdtoa dumb lzma ) + +if( WIN32 ) + foreach(debuglib ${LLVM_DEBUG_LIBS}) + target_link_libraries( zdoom debug ${debuglib} ) + endforeach(debuglib) + foreach(releaselib ${LLVM_RELEASE_LIBS}) + target_link_libraries( zdoom optimized ${releaselib} ) + endforeach(releaselib) +endif() + include_directories( . g_doom g_heretic @@ -1608,6 +1685,9 @@ source_group("Render Data\\Resource Headers" REGULAR_EXPRESSION "^${CMAKE_CURREN source_group("Render Data\\Resource Sources" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_data/.+\\.cpp$") source_group("Render Data\\Textures" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/textures/.+") source_group("Render Interface" FILES r_defs.h r_renderer.h r_sky.cpp r_sky.h r_state.h r_utility.cpp r_utility.h) +source_group("Render Compiler" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/.+") +source_group("Render Compiler\\SSA" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/ssa/.+") +source_group("Render Compiler\\Fixed Function" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/r_compiler/fixedfunction/.+") source_group("Resource Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/resourcefiles/.+") source_group("POSIX Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/.+") source_group("Cocoa Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/posix/cocoa/.+") diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp new file mode 100644 index 0000000000..67d801162b --- /dev/null +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -0,0 +1,16 @@ + +#include "i_system.h" +#include "r_compiler/llvm_include.h" +#include "r_compiler/fixedfunction/drawcolumncodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args) +{ +} diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h new file mode 100644 index 0000000000..0749def7f5 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -0,0 +1,26 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawColumnVariant +{ + Opaque, + Fuzz, + Add, + Translated, + TlatedAdd, + Shaded, + AddClamp, + AddClampTranslated, + SubClamp, + SubClampTranslated, + RevSubClamp, + RevSubClampTranslated +}; + +class DrawColumnCodegen : public DrawerCodegen +{ +public: + void Generate(DrawColumnVariant variant, SSAValue args); +}; diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp new file mode 100644 index 0000000000..822a811411 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -0,0 +1,161 @@ + +#include "i_system.h" +#include "r_compiler/llvm_include.h" +#include "r_compiler/fixedfunction/drawercodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread thread) +{ + return line < thread.pass_start_y || line >= thread.pass_end_y || !(line % thread.num_cores == thread.core); +} + +SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread) +{ + SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, SSAInt(0)); + SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores; + return pass_skip + core_skip; +} + +SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread) +{ + SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, SSAInt(0)); + count = SSAInt::MIN(count, lines_until_pass_end); + SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores; + return SSAInt::MAX(c, SSAInt(0)); +} + +SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread) +{ + return dest[skipped_by_thread(first_line, thread) * pitch * 4]; +} + +SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +SSAVec4i DrawerCodegen::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_simple(color, light); +} + +SSAVec4i DrawerCodegen::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors) +{ + SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index]; + return shade_bgra_advanced(color, light, constants); +} + +SSAVec4i DrawerCodegen::shade_bgra_simple(SSAVec4i color, SSAInt light) +{ + color = color * light / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants) +{ + SSAInt blue = color[0]; + SSAInt green = color[1]; + SSAInt red = color[2]; + SSAInt alpha = color[3]; + + SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + SSAVec4i inv_light = 256 - light; + SSAVec4i inv_desaturate = 256 - constants.desaturate; + + color = (color * inv_desaturate + intensity) / 256; + color = (constants.fade * inv_light + color * light) / 256; + color = (color * constants.light) / 256; + + return color.insert(3, alpha); +} + +SSAVec4i DrawerCodegen::blend_copy(SSAVec4i fg) +{ + return fg; +} + +SSAVec4i DrawerCodegen::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha) +{ + SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256; + return color.insert(3, 255); +} + +SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); // // 255 -> 256 + SSAInt inv_alpha = 256 - alpha; + SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256; + return color.insert(3, 255); +} + +SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha) +{ + SSAInt alpha = fg[3]; + alpha = alpha + (alpha >> 7); + SSAInt inv_alpha = 256 - alpha; + return (destalpha * alpha + 256 * inv_alpha + 128) >> 8; +} + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) +{ + SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; + SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height; + SSAInt y0 = frac_y0 >> FRACBITS; + SSAInt y1 = frac_y1 >> FRACBITS; + + SSAVec4i p00 = col0[y0 * 4].load_vec4ub(); + SSAVec4i p01 = col0[y1 * 4].load_vec4ub(); + SSAVec4i p10 = col1[y0 * 4].load_vec4ub(); + SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); + + SSAInt inv_b = texturefracx; + SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = 16 - a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} + +SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits) +{ + SSAInt xshift = (32 - xbits); + SSAInt yshift = (32 - ybits); + SSAInt xmask = (SSAInt(1) << xshift) - 1; + SSAInt ymask = (SSAInt(1) << yshift) - 1; + SSAInt x = xfrac >> xbits; + SSAInt y = yfrac >> ybits; + + SSAVec4i p00 = texture[((y & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p01 = texture[(((y + 1) & ymask) + ((x & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p10 = texture[((y & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + SSAVec4i p11 = texture[(((y + 1) & ymask) + (((x + 1) & xmask) << yshift)) * 4].load_vec4ub(); + + SSAInt inv_b = (xfrac >> (xbits - 4)) & 15; + SSAInt inv_a = (yfrac >> (ybits - 4)) & 15; + SSAInt a = 16 - inv_a; + SSAInt b = 16 - inv_b; + + return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; +} diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h new file mode 100644 index 0000000000..17b36234dc --- /dev/null +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -0,0 +1,73 @@ + +#pragma once + +#include "r_compiler/llvmdrawers.h" +#include "r_compiler/ssa/ssa_value.h" +#include "r_compiler/ssa/ssa_vec4f.h" +#include "r_compiler/ssa/ssa_vec4i.h" +#include "r_compiler/ssa/ssa_vec8s.h" +#include "r_compiler/ssa/ssa_vec16ub.h" +#include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_int_ptr.h" +#include "r_compiler/ssa/ssa_short.h" +#include "r_compiler/ssa/ssa_ubyte_ptr.h" +#include "r_compiler/ssa/ssa_vec4f_ptr.h" +#include "r_compiler/ssa/ssa_vec4i_ptr.h" +#include "r_compiler/ssa/ssa_pixels.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" +#include "r_compiler/llvm_include.h" + +class SSAWorkerThread +{ +public: + SSAInt core; + SSAInt num_cores; + SSAInt pass_start_y; + SSAInt pass_end_y; +}; + +class SSAShadeConstants +{ +public: + SSAVec4i light; + SSAVec4i fade; + SSAInt desaturate; +}; + +class DrawerCodegen +{ +public: + // Checks if a line is rendered by this thread + SSABool line_skipped_by_thread(SSAInt line, SSAWorkerThread thread); + + // The number of lines to skip to reach the first line to be rendered by this thread + SSAInt skipped_by_thread(SSAInt first_line, SSAWorkerThread thread); + + // The number of lines to be rendered by this thread + SSAInt count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread); + + // Calculate the dest address for the first line to be rendered by this thread + SSAUBytePtr dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread); + + // LightBgra + SSAInt calc_light_multiplier(SSAInt light); + SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); + SSAVec4i shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors); + SSAVec4i shade_bgra_simple(SSAVec4i color, SSAInt light); + SSAVec4i shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants); + + // BlendBgra + SSAVec4i blend_copy(SSAVec4i fg); + SSAVec4i blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); + SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); + + // Calculates the final alpha values to be used when combined with the source texture alpha channel + SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha); + + // SampleBgra + SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); + SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); +}; diff --git a/src/r_compiler/fixedfunction/drawspancodegen.cpp b/src/r_compiler/fixedfunction/drawspancodegen.cpp new file mode 100644 index 0000000000..70ecb0abd0 --- /dev/null +++ b/src/r_compiler/fixedfunction/drawspancodegen.cpp @@ -0,0 +1,202 @@ + +#include "i_system.h" +#include "r_compiler/llvm_include.h" +#include "r_compiler/fixedfunction/drawspancodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args) +{ + destorg = args[0][0].load(); + source = args[0][1].load(); + destpitch = args[0][2].load(); + stack_xfrac.store(args[0][3].load()); + stack_yfrac.store(args[0][4].load()); + xstep = args[0][5].load(); + ystep = args[0][6].load(); + x1 = args[0][7].load(); + x2 = args[0][8].load(); + y = args[0][9].load(); + xbits = args[0][10].load(); + ybits = args[0][11].load(); + light = args[0][12].load(); + srcalpha = args[0][13].load(); + destalpha = args[0][14].load(); + SSAShort light_alpha = args[0][15].load(); + SSAShort light_red = args[0][16].load(); + SSAShort light_green = args[0][17].load(); + SSAShort light_blue = args[0][18].load(); + SSAShort fade_alpha = args[0][19].load(); + SSAShort fade_red = args[0][20].load(); + SSAShort fade_green = args[0][21].load(); + SSAShort fade_blue = args[0][22].load(); + SSAShort desaturate = args[0][23].load(); + SSAInt flags = args[0][24].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + count = x2 - x1 + 1; + data = destorg[(x1 + y * destpitch) * 4]; + + yshift = 32 - ybits; + xshift = yshift - xbits; + xmask = ((SSAInt(1) << xbits) - 1) << ybits; + + // 64x64 is the most common case by far, so special case it. + is_64x64 = xbits == SSAInt(6) && ybits == SSAInt(6); + is_simple_shade = (flags & DrawSpanArgs::simple_shade) == SSAInt(DrawSpanArgs::simple_shade); + is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == SSAInt(DrawSpanArgs::nearest_filter); + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + LoopShade(variant, true); + branch.else_block(); + LoopShade(variant, false); + branch.end_block(); +} + +void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade) +{ + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + LoopFilter(variant, isSimpleShade, true); + branch.else_block(); + LoopFilter(variant, isSimpleShade, false); + branch.end_block(); +} + +void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter) +{ + SSAIfBlock branch; + branch.if_block(is_64x64); + { + SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, true); + Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, true); + } + branch.else_block(); + { + SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, false); + Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, false); + } + branch.end_block(); +} + +SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) +{ + SSAInt sseLength = count / 4; + stack_index.store(SSAInt(0)); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < sseLength); + + SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + SSAVec4i bgcolors[4] = + { + SSAVec4i::extendlo(bg0), + SSAVec4i::extendhi(bg0), + SSAVec4i::extendlo(bg1), + SSAVec4i::extendhi(bg1) + }; + + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + { + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant); + + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + } + + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(color); + + stack_index.store(index + 1); + loop.end_block(); + } + return sseLength; +} + +void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64) +{ + stack_index.store(start); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + SSAVec4i bgcolor = data[index * 4].load_vec4ub(); + SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant); + data[index * 4].store_vec4ub(color); + + stack_index.store(index + 1); + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + loop.end_block(); + } +} + +SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64) +{ + if (isNearestFilter) + { + SSAInt spot; + if (is64x64) + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + else + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + return source[spot * 4].load_vec4ub(); + } + else + { + if (is64x64) + { + return sample_linear(source, xfrac, yfrac, SSAInt(26), SSAInt(26)); + } + else + { + return sample_linear(source, xfrac, yfrac, 32 - xbits, 32 - ybits); + } + } +} + +SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light); + else + return shade_bgra_advanced(fg, light, shade_constants); +} + +SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant) +{ + switch (variant) + { + default: + case DrawSpanVariant::Opaque: + return blend_copy(fg); + case DrawSpanVariant::Masked: + return blend_alpha_blend(fg, bg); + case DrawSpanVariant::Translucent: + case DrawSpanVariant::AddClamp: + return blend_add(fg, bg, srcalpha, destalpha); + case DrawSpanVariant::MaskedTranslucent: + case DrawSpanVariant::MaskedAddClamp: + return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + } +} diff --git a/src/r_compiler/fixedfunction/drawspancodegen.h b/src/r_compiler/fixedfunction/drawspancodegen.h new file mode 100644 index 0000000000..20869ac2ff --- /dev/null +++ b/src/r_compiler/fixedfunction/drawspancodegen.h @@ -0,0 +1,54 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawSpanVariant +{ + Opaque, + Masked, + Translucent, + MaskedTranslucent, + AddClamp, + MaskedAddClamp +}; + +class DrawSpanCodegen : public DrawerCodegen +{ +public: + void Generate(DrawSpanVariant variant, SSAValue args); + +private: + void LoopShade(DrawSpanVariant variant, bool isSimpleShade); + void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64); + SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); + SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant); + + SSAStack stack_index, stack_xfrac, stack_yfrac; + + SSAUBytePtr destorg; + SSAUBytePtr source; + SSAInt destpitch; + SSAInt xstep; + SSAInt ystep; + SSAInt x1; + SSAInt x2; + SSAInt y; + SSAInt xbits; + SSAInt ybits; + SSAInt light; + SSAInt srcalpha; + SSAInt destalpha; + SSAInt count; + SSAUBytePtr data; + SSAInt yshift; + SSAInt xshift; + SSAInt xmask; + SSABool is_64x64; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; +}; diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp new file mode 100644 index 0000000000..55b17dafee --- /dev/null +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -0,0 +1,191 @@ + +#include "i_system.h" +#include "r_compiler/llvm_include.h" +#include "r_compiler/fixedfunction/drawwallcodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" + +void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) +{ + dest = args[0][0].load(); + source[0] = args[0][1].load(); + source[1] = args[0][2].load(); + source[2] = args[0][3].load(); + source[3] = args[0][4].load(); + source2[0] = args[0][5].load(); + source2[1] = args[0][6].load(); + source2[2] = args[0][7].load(); + source2[3] = args[0][8].load(); + pitch = args[0][9].load(); + count = args[0][10].load(); + dest_y = args[0][11].load(); + texturefrac[0] = args[0][12].load(); + texturefrac[1] = args[0][13].load(); + texturefrac[2] = args[0][14].load(); + texturefrac[3] = args[0][15].load(); + texturefracx[0] = args[0][16].load(); + texturefracx[1] = args[0][17].load(); + texturefracx[2] = args[0][18].load(); + texturefracx[3] = args[0][19].load(); + iscale[0] = args[0][20].load(); + iscale[1] = args[0][21].load(); + iscale[2] = args[0][22].load(); + iscale[3] = args[0][23].load(); + textureheight[0] = args[0][24].load(); + textureheight[1] = args[0][25].load(); + textureheight[2] = args[0][26].load(); + textureheight[3] = args[0][27].load(); + light[0] = args[0][28].load(); + light[1] = args[0][29].load(); + light[2] = args[0][30].load(); + light[3] = args[0][31].load(); + srcalpha = args[0][32].load(); + destalpha = args[0][33].load(); + SSAShort light_alpha = args[0][34].load(); + SSAShort light_red = args[0][35].load(); + SSAShort light_green = args[0][36].load(); + SSAShort light_blue = args[0][37].load(); + SSAShort fade_alpha = args[0][38].load(); + SSAShort fade_red = args[0][39].load(); + SSAShort fade_green = args[0][40].load(); + SSAShort fade_blue = args[0][41].load(); + SSAShort desaturate = args[0][42].load(); + SSAInt flags = args[0][43].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + thread.core = thread_data[0][0].load(); + thread.num_cores = thread_data[0][1].load(); + thread.pass_start_y = thread_data[0][2].load(); + thread.pass_end_y = thread_data[0][3].load(); + + is_simple_shade = (flags & DrawWallArgs::simple_shade) == SSAInt(DrawWallArgs::simple_shade); + is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == SSAInt(DrawWallArgs::nearest_filter); + + count = count_for_thread(dest_y, count, thread); + dest = dest_for_thread(dest_y, pitch, dest, thread); + + pitch = pitch * thread.num_cores; + + int numColumns = fourColumns ? 4 : 1; + for (int i = 0; i < numColumns; i++) + { + stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); + fracstep[i] = iscale[i] * thread.num_cores; + one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; + } + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + LoopShade(variant, fourColumns, true); + branch.else_block(); + LoopShade(variant, fourColumns, false); + branch.end_block(); +} + +void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade) +{ + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + Loop(variant, fourColumns, isSimpleShade, true); + branch.else_block(); + Loop(variant, fourColumns, isSimpleShade, false); + branch.end_block(); +} + +void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter) +{ + int numColumns = fourColumns ? 4 : 1; + + stack_index.store(SSAInt(0)); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt frac[4]; + for (int i = 0; i < numColumns; i++) + frac[i] = stack_frac[i].load(); + + SSAInt offset = index * pitch * 4; + + if (fourColumns) + { + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + SSAVec4i bgcolors[4] = + { + SSAVec4i::extendlo(bg0), + SSAVec4i::extendhi(bg0), + SSAVec4i::extendlo(bg1), + SSAVec4i::extendhi(bg1) + }; + + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + colors[i] = Blend(Shade(Sample(frac[i], i, isNearestFilter), i, isSimpleShade), bgcolors[i], variant); + + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + dest[offset].store_unaligned_vec16ub(color); + } + else + { + SSAVec4i bgcolor = dest[offset].load_vec4ub(); + SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); + dest[offset].store_vec4ub(color); + } + + stack_index.store(index + 1); + for (int i = 0; i < numColumns; i++) + stack_frac[i].store(frac[i] + fracstep[i]); + loop.end_block(); + } +} + +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) +{ + if (isNearestFilter) + { + SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; + return source[index][sample_index * 4].load_vec4ub(); + } + else + { + return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + } +} + +SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light[index]); + else + return shade_bgra_advanced(fg, light[index], shade_constants); +} + +SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant) +{ + switch (variant) + { + default: + case DrawWallVariant::Opaque: + return blend_copy(fg); + case DrawWallVariant::Masked: + return blend_alpha_blend(fg, bg); + case DrawWallVariant::Add: + case DrawWallVariant::AddClamp: + return blend_add(fg, bg, srcalpha, destalpha); + case DrawWallVariant::SubClamp: + return blend_sub(fg, bg, srcalpha, destalpha); + case DrawWallVariant::RevSubClamp: + return blend_revsub(fg, bg, srcalpha, destalpha); + } +} diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h new file mode 100644 index 0000000000..0e1cce5fcf --- /dev/null +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -0,0 +1,50 @@ + +#pragma once + +#include "drawercodegen.h" + +enum class DrawWallVariant +{ + Opaque, + Masked, + Add, + AddClamp, + SubClamp, + RevSubClamp +}; + +class DrawWallCodegen : public DrawerCodegen +{ +public: + void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); + +private: + void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); + void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); + SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); + + SSAStack stack_index, stack_frac[4]; + + SSAUBytePtr dest; + SSAUBytePtr source[4]; + SSAUBytePtr source2[4]; + SSAInt pitch; + SSAInt count; + SSAInt dest_y; + SSAInt texturefrac[4]; + SSAInt texturefracx[4]; + SSAInt iscale[4]; + SSAInt textureheight[4]; + SSAInt light[4]; + SSAInt srcalpha; + SSAInt destalpha; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; + SSAWorkerThread thread; + + SSAInt fracstep[4]; + SSAInt one[4]; +}; diff --git a/src/r_compiler/llvm_include.h b/src/r_compiler/llvm_include.h new file mode 100644 index 0000000000..d1550f38a5 --- /dev/null +++ b/src/r_compiler/llvm_include.h @@ -0,0 +1,59 @@ + +#pragma once + +#ifdef _MSC_VER + +#if defined(min) +#define llvm_min_bug min +#undef min +#endif +#if defined(max) +#define llvm_max_bug max +#undef max +#endif + +#pragma warning(disable: 4146) // warning C4146: unary minus operator applied to unsigned type, result still unsigned +#pragma warning(disable: 4624) // warning C4624: 'llvm::AugmentedUse' : destructor could not be generated because a base class destructor is inaccessible +#pragma warning(disable: 4355) // warning C4355: 'this' : used in base member initializer list +#pragma warning(disable: 4800) // warning C4800: 'const unsigned int' : forcing value to bool 'true' or 'false' (performance warning) +#pragma warning(disable: 4996) // warning C4996: 'std::_Copy_impl': Function call with parameters that may be unsafe - this call relies on the caller to check that the passed values are correct. To disable this warning, use -D_Sclan::SECURE_NO_WARNINGS. See documentation on how to use Visual C++ 'Checked Iterators' +#pragma warning(disable: 4244) // warning C4244: 'return' : conversion from 'uint64_t' to 'unsigned int', possible loss of data +#pragma warning(disable: 4141) // warning C4141: 'inline': used more than once +#pragma warning(disable: 4291) // warning C4291: 'void *llvm::User::operator new(std::size_t,unsigned int,unsigned int)': no matching operator delete found; memory will not be freed if initialization throws an exception +#pragma warning(disable: 4267) // warning C4267: 'return': conversion from 'size_t' to 'unsigned int', possible loss of data +#pragma warning(disable: 4244) // warning C4244: 'initializing': conversion from '__int64' to 'unsigned int', possible loss of data + +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER + +#if defined(llvm_min_bug) +#define min llvm_min_bug +#undef llvm_min_bug +#endif +#if defined(llvm_max_bug) +#define max llvm_max_bug +#undef llvm_max_bug +#endif + +#endif diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp new file mode 100644 index 0000000000..7691af35b1 --- /dev/null +++ b/src/r_compiler/llvmdrawers.cpp @@ -0,0 +1,323 @@ + +#include "i_system.h" +#include "r_compiler/llvm_include.h" +#include "r_compiler/fixedfunction/drawspancodegen.h" +#include "r_compiler/fixedfunction/drawwallcodegen.h" +#include "r_compiler/fixedfunction/drawcolumncodegen.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_scope.h" +#include "r_compiler/ssa/ssa_for_block.h" +#include "r_compiler/ssa/ssa_if_block.h" +#include "r_compiler/ssa/ssa_stack.h" +#include "r_compiler/ssa/ssa_function.h" +#include "r_compiler/ssa/ssa_struct_type.h" +#include "r_compiler/ssa/ssa_value.h" +#include "r_compiler/ssa/ssa_barycentric_weight.h" + +class LLVMProgram +{ +public: + LLVMProgram(); + ~LLVMProgram(); + + void StopLogFatalErrors(); + + template + Func *GetProcAddress(const char *name) { return reinterpret_cast(PointerToFunction(name)); } + + llvm::LLVMContext &context() { return *mContext; } + llvm::Module *module() { return mModule; } + llvm::ExecutionEngine *engine() { return mEngine.get(); } + llvm::legacy::PassManager *modulePassManager() { return mModulePassManager.get(); } + llvm::legacy::FunctionPassManager *functionPassManager() { return mFunctionPassManager.get(); } + +private: + void *PointerToFunction(const char *name); + + std::unique_ptr mContext; + llvm::Module *mModule; + std::unique_ptr mEngine; + std::unique_ptr mModulePassManager; + std::unique_ptr mFunctionPassManager; +}; + +class LLVMDrawersImpl : public LLVMDrawers +{ +public: + LLVMDrawersImpl(); + +private: + void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); + + LLVMProgram mProgram; +}; + +///////////////////////////////////////////////////////////////////////////// + +LLVMDrawers *LLVMDrawers::Singleton = nullptr; + +void LLVMDrawers::Create() +{ + if (!Singleton) + Singleton = new LLVMDrawersImpl(); +} + +void LLVMDrawers::Destroy() +{ + delete Singleton; + Singleton = nullptr; +} + +LLVMDrawers *LLVMDrawers::Instance() +{ + return Singleton; +} + +///////////////////////////////////////////////////////////////////////////// + +LLVMDrawersImpl::LLVMDrawersImpl() +{ + CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); + CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); + CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); + CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); + CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); + CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); + CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); + CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); + CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); + CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); + CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); + CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); + + mProgram.engine()->finalizeObject(); + mProgram.modulePassManager()->run(*mProgram.module()); + + DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); + DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); + DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); + DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); + DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); + vlinec1 = mProgram.GetProcAddress("vlinec1"); + vlinec4 = mProgram.GetProcAddress("vlinec4"); + mvlinec1 = mProgram.GetProcAddress("mvlinec1"); + mvlinec4 = mProgram.GetProcAddress("mvlinec4"); + tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); + tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); + tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); + tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); + tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); + tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); + tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); + tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); + + mProgram.StopLogFatalErrors(); +} + +void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawSpanArgsStruct(mProgram.context())); + function.create_public(); + + DrawSpanCodegen codegen; + codegen.Generate(variant, function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + +void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawWallCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + +llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + +llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 25; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + +llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + +///////////////////////////////////////////////////////////////////////////// + +namespace { static bool LogFatalErrors = false; } + +LLVMProgram::LLVMProgram() +{ + using namespace llvm; + + // We have to extra careful about this because both LLVM and ZDoom made + // the very unwise decision to hook atexit. To top it off, LLVM decided + // to log something in the atexit handler.. + LogFatalErrors = true; + + install_fatal_error_handler([](void *user_data, const std::string& reason, bool gen_crash_diag) { + if (LogFatalErrors) + I_FatalError("LLVM fatal error: %s", reason.c_str()); + }); + + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + + std::string errorstring; + + std::string targetTriple = sys::getProcessTriple(); + std::string cpuName = sys::getHostCPUName(); + StringMap cpuFeatures; + sys::getHostCPUFeatures(cpuFeatures); + std::string cpuFeaturesStr; + for (const auto &it : cpuFeatures) + { + if (!cpuFeaturesStr.empty()) + cpuFeaturesStr.push_back(' '); + cpuFeaturesStr.push_back(it.getValue() ? '+' : '-'); + cpuFeaturesStr += it.getKey(); + } + + DPrintf(DMSG_SPAMMY, "LLVM target triple: %s\n", targetTriple.c_str()); + DPrintf(DMSG_SPAMMY, "LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + + const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); + if (!target) + I_FatalError("Could not find LLVM target: %s", errorstring.c_str()); + + TargetOptions opt; + auto relocModel = Optional(Reloc::Static); + TargetMachine *machine = target->createTargetMachine(targetTriple, cpuName, cpuFeaturesStr, opt, relocModel, CodeModel::Default, CodeGenOpt::Aggressive); + if (!machine) + I_FatalError("Could not create LLVM target machine"); + + mContext = std::make_unique(); + + auto moduleOwner = std::make_unique("render", context()); + mModule = moduleOwner.get(); + mModule->setTargetTriple(targetTriple); + mModule->setDataLayout(machine->createDataLayout()); + + EngineBuilder engineBuilder(std::move(moduleOwner)); + engineBuilder.setErrorStr(&errorstring); + engineBuilder.setOptLevel(CodeGenOpt::Aggressive); + engineBuilder.setRelocationModel(Reloc::Static); + engineBuilder.setEngineKind(EngineKind::JIT); + mEngine.reset(engineBuilder.create(machine)); + if (!mEngine) + I_FatalError("Could not create LLVM execution engine: %s", errorstring.c_str()); + + mModulePassManager = std::make_unique(); + mFunctionPassManager = std::make_unique(mModule); + + PassManagerBuilder passManagerBuilder; + passManagerBuilder.OptLevel = 3; + passManagerBuilder.SizeLevel = 0; + passManagerBuilder.Inliner = createFunctionInliningPass(); + passManagerBuilder.populateModulePassManager(*mModulePassManager.get()); + passManagerBuilder.populateFunctionPassManager(*mFunctionPassManager.get()); +} + +LLVMProgram::~LLVMProgram() +{ + mEngine.reset(); + mContext.reset(); +} + +void *LLVMProgram::PointerToFunction(const char *name) +{ + llvm::Function *function = mModule->getFunction(name); + if (!function) + return nullptr; + return mEngine->getPointerToFunction(function); +} + +void LLVMProgram::StopLogFatalErrors() +{ + LogFatalErrors = false; +} diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h new file mode 100644 index 0000000000..b1039cf496 --- /dev/null +++ b/src/r_compiler/llvmdrawers.h @@ -0,0 +1,111 @@ + +#pragma once + +struct WorkerThreadData +{ + int32_t core; + int32_t num_cores; + int32_t pass_start_y; + int32_t pass_end_y; +}; + +struct DrawWallArgs +{ + uint32_t *dest; + const uint32_t *source[4]; + const uint32_t *source2[4]; + int32_t pitch; + int32_t count; + int32_t dest_y; + uint32_t texturefrac[4]; + uint32_t texturefracx[4]; + uint32_t iscale[4]; + uint32_t textureheight[4]; + uint32_t light[4]; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + +struct DrawSpanArgs +{ + uint32_t *destorg; + const uint32_t *source; + int32_t destpitch; + int32_t xfrac; + int32_t yfrac; + int32_t xstep; + int32_t ystep; + int32_t x1; + int32_t x2; + int32_t y; + int32_t xbits; + int32_t ybits; + uint32_t light; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + +class LLVMDrawers +{ +public: + virtual ~LLVMDrawers() { } + + static void Create(); + static void Destroy(); + static LLVMDrawers *Instance(); + + void(*DrawSpan)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMaskedTranslucent)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; + void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; + + void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*vlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + +private: + static LLVMDrawers *Singleton; +}; diff --git a/src/r_compiler/ssa/ssa_barycentric_weight.h b/src/r_compiler/ssa/ssa_barycentric_weight.h new file mode 100644 index 0000000000..52117ccc69 --- /dev/null +++ b/src/r_compiler/ssa/ssa_barycentric_weight.h @@ -0,0 +1,97 @@ + +#pragma once + +#include "ssa_vec4f.h" +#include "ssa_float.h" +#include "ssa_int.h" + +class SSAViewport +{ +public: + SSAViewport(SSAInt x, SSAInt y, SSAInt width, SSAInt height) + : x(x), y(y), width(width), height(height), right(x + width), bottom(y + height), + half_width(SSAFloat(width) * 0.5f), half_height(SSAFloat(height) * 0.5f), + rcp_half_width(1.0f / (SSAFloat(width) * 0.5f)), + rcp_half_height(1.0f / (SSAFloat(height) * 0.5f)) + { + } + + SSAInt x, y; + SSAInt width, height; + SSAInt right, bottom; + SSAFloat half_width; + SSAFloat half_height; + SSAFloat rcp_half_width; + SSAFloat rcp_half_height; + + SSAVec4f clip_to_window(SSAVec4f clip) const + { + SSAFloat w = clip[3]; + SSAVec4f normalized = SSAVec4f::insert_element(clip / SSAVec4f::shuffle(clip, 3, 3, 3, 3), w, 3); + return normalized_to_window(normalized); + } + + SSAVec4f normalized_to_window(SSAVec4f normalized) const + { + return SSAVec4f( + SSAFloat(x) + (normalized[0] + 1.0f) * half_width, + SSAFloat(y) + (normalized[1] + 1.0f) * half_height, + 0.0f - normalized[2], + normalized[3]); + } +}; + +class SSABarycentricWeight +{ +public: + SSABarycentricWeight(SSAViewport vp, SSAVec4f v1, SSAVec4f v2); + SSAFloat from_window_x(SSAInt x) const; + SSAFloat from_window_y(SSAInt y) const; + + SSAViewport viewport; + SSAVec4f v1; + SSAVec4f v2; +}; + +inline SSABarycentricWeight::SSABarycentricWeight(SSAViewport viewport, SSAVec4f v1, SSAVec4f v2) +: viewport(viewport), v1(v1), v2(v2) +{ +} + +inline SSAFloat SSABarycentricWeight::from_window_x(SSAInt x) const +{ +/* SSAFloat xnormalized = (x + 0.5f - viewport.x) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2.x-v1.x; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.x - xnormalized * v2.w) / (dx - xnormalized * dw); + return a;*/ + + SSAFloat xnormalized = (SSAFloat(x) + 0.5f - SSAFloat(viewport.x)) * viewport.rcp_half_width - 1.0f; + SSAFloat dx = v2[0]-v1[0]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (xnormalized * v1[3] - v1[0]) / (dx - xnormalized * dw); + return 1.0f - t; +} + +inline SSAFloat SSABarycentricWeight::from_window_y(SSAInt y) const +{ +/* SSAFloat ynormalized = (y + 0.5f - viewport.y) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2.y-v1.y; + SSAFloat dw = v2.w-v1.w; + SSAFloat a = (v2.y - ynormalized * v2.w) / (dy - ynormalized * dw); + return a;*/ + + SSAFloat ynormalized = (SSAFloat(y) + 0.5f - SSAFloat(viewport.y)) * viewport.rcp_half_height - 1.0f; + SSAFloat dy = v2[1]-v1[1]; + SSAFloat dw = v2[3]-v1[3]; + SSAFloat t = (ynormalized * v1[3] - v1[1]) / (dy - ynormalized * dw); + return 1.0f - t; +} + +/* + y = (v1.y + t * dy) / (v1.w + t * dw) + + y * v1.w + y * t * dw = v1.y + t * dy + y * v1.w - v1.y = t * (dy - y * dw) + t = (y * v1.w - v1.y) / (dy - y * dw) +*/ diff --git a/src/r_compiler/ssa/ssa_bool.cpp b/src/r_compiler/ssa/ssa_bool.cpp new file mode 100644 index 0000000000..bfd9ba5abf --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.cpp @@ -0,0 +1,91 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_bool.h" +#include "ssa_scope.h" + +SSABool::SSABool() +: v(0) +{ +} +/* +SSABool::SSABool(bool constant) +: v(0) +{ +} +*/ +SSABool::SSABool(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSABool::llvm_type() +{ + return llvm::Type::getInt1Ty(SSAScope::context()); +} + +SSABool operator&&(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSABool operator||(const SSABool &a, const SSABool &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} + +SSABool operator!(const SSABool &a) +{ + return SSABool::from_llvm(SSAScope::builder().CreateNot(a.v, SSAScope::hint())); +} + +SSABool operator<(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAInt &a, const SSAInt &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateICmpSGT(a.v, b.v, SSAScope::hint())); +} + +///////////////////////////////////////////////////////////////////////////// + +SSABool operator<(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLT(a.v, b.v, SSAScope::hint())); +} + +SSABool operator<=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOLE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator==(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOEQ(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>=(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGE(a.v, b.v, SSAScope::hint())); +} + +SSABool operator>(const SSAFloat &a, const SSAFloat &b) +{ + return SSABool::from_llvm(SSAScope::builder().CreateFCmpOGT(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_bool.h b/src/r_compiler/ssa/ssa_bool.h new file mode 100644 index 0000000000..2ef79e49b7 --- /dev/null +++ b/src/r_compiler/ssa/ssa_bool.h @@ -0,0 +1,37 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSABool +{ +public: + SSABool(); + //SSABool(bool constant); + explicit SSABool(llvm::Value *v); + static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSABool operator&&(const SSABool &a, const SSABool &b); +SSABool operator||(const SSABool &a, const SSABool &b); + +SSABool operator!(const SSABool &a); + +SSABool operator<(const SSAInt &a, const SSAInt &b); +SSABool operator<=(const SSAInt &a, const SSAInt &b); +SSABool operator==(const SSAInt &a, const SSAInt &b); +SSABool operator>=(const SSAInt &a, const SSAInt &b); +SSABool operator>(const SSAInt &a, const SSAInt &b); + +SSABool operator<(const SSAFloat &a, const SSAFloat &b); +SSABool operator<=(const SSAFloat &a, const SSAFloat &b); +SSABool operator==(const SSAFloat &a, const SSAFloat &b); +SSABool operator>=(const SSAFloat &a, const SSAFloat &b); +SSABool operator>(const SSAFloat &a, const SSAFloat &b); diff --git a/src/r_compiler/ssa/ssa_float.cpp b/src/r_compiler/ssa/ssa_float.cpp new file mode 100644 index 0000000000..4ec5c516df --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.cpp @@ -0,0 +1,152 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" + +SSAFloat::SSAFloat() +: v(0) +{ +} + +SSAFloat::SSAFloat(float constant) +: v(0) +{ + v = llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant)); +} + +SSAFloat::SSAFloat(SSAInt i) +: v(0) +{ + v = SSAScope::builder().CreateSIToFP(i.v, llvm::Type::getFloatTy(SSAScope::context()), SSAScope::hint()); +} + +SSAFloat::SSAFloat(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloat::llvm_type() +{ + return llvm::Type::getFloatTy(SSAScope::context()); +} + +SSAFloat SSAFloat::sqrt(SSAFloat f) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::sin(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::cos(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::pow(SSAFloat val, SSAFloat power) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAFloat SSAFloat::exp(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::log(SSAFloat val) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAFloat SSAFloat::fma(SSAFloat a, SSAFloat b, SSAFloat c) +{ + std::vector params; + params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + //params.push_back(SSAFloat::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b) +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAFloat operator+(float a, const SSAFloat &b) +{ + return SSAFloat(a) + b; +} + +SSAFloat operator-(float a, const SSAFloat &b) +{ + return SSAFloat(a) - b; +} + +SSAFloat operator*(float a, const SSAFloat &b) +{ + return SSAFloat(a) * b; +} + +SSAFloat operator/(float a, const SSAFloat &b) +{ + return SSAFloat(a) / b; +} + +SSAFloat operator+(const SSAFloat &a, float b) +{ + return a + SSAFloat(b); +} + +SSAFloat operator-(const SSAFloat &a, float b) +{ + return a - SSAFloat(b); +} + +SSAFloat operator*(const SSAFloat &a, float b) +{ + return a * SSAFloat(b); +} + +SSAFloat operator/(const SSAFloat &a, float b) +{ + return a / SSAFloat(b); +} + diff --git a/src/r_compiler/ssa/ssa_float.h b/src/r_compiler/ssa/ssa_float.h new file mode 100644 index 0000000000..0edbcfcba8 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAInt; + +class SSAFloat +{ +public: + SSAFloat(); + SSAFloat(SSAInt i); + explicit SSAFloat(float constant); + explicit SSAFloat(llvm::Value *v); + static SSAFloat from_llvm(llvm::Value *v) { return SSAFloat(v); } + static llvm::Type *llvm_type(); + static SSAFloat sqrt(SSAFloat f); + static SSAFloat sin(SSAFloat val); + static SSAFloat cos(SSAFloat val); + static SSAFloat pow(SSAFloat val, SSAFloat power); + static SSAFloat exp(SSAFloat val); + static SSAFloat log(SSAFloat val); + static SSAFloat fma(SSAFloat a, SSAFloat b, SSAFloat c); + + llvm::Value *v; +}; + +SSAFloat operator+(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator-(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator*(const SSAFloat &a, const SSAFloat &b); +SSAFloat operator/(const SSAFloat &a, const SSAFloat &b); + +SSAFloat operator+(float a, const SSAFloat &b); +SSAFloat operator-(float a, const SSAFloat &b); +SSAFloat operator*(float a, const SSAFloat &b); +SSAFloat operator/(float a, const SSAFloat &b); + +SSAFloat operator+(const SSAFloat &a, float b); +SSAFloat operator-(const SSAFloat &a, float b); +SSAFloat operator*(const SSAFloat &a, float b); +SSAFloat operator/(const SSAFloat &a, float b); diff --git a/src/r_compiler/ssa/ssa_float_ptr.cpp b/src/r_compiler/ssa/ssa_float_ptr.cpp new file mode 100644 index 0000000000..582821ca03 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.cpp @@ -0,0 +1,58 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_float_ptr.h" +#include "ssa_scope.h" + +SSAFloatPtr::SSAFloatPtr() +: v(0) +{ +} + +SSAFloatPtr::SSAFloatPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAFloatPtr::llvm_type() +{ + return llvm::Type::getFloatPtrTy(SSAScope::context()); +} + +SSAFloatPtr SSAFloatPtr::operator[](SSAInt index) const +{ + return SSAFloatPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAFloat SSAFloatPtr::load() const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4f SSAFloatPtr::load_unaligned_vec4f() const +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAFloatPtr::store(const SSAFloat &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value) +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint())); +} + +void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value) +{ + llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4); +} diff --git a/src/r_compiler/ssa/ssa_float_ptr.h b/src/r_compiler/ssa/ssa_float_ptr.h new file mode 100644 index 0000000000..f29b2de3f7 --- /dev/null +++ b/src/r_compiler/ssa/ssa_float_ptr.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloatPtr +{ +public: + SSAFloatPtr(); + explicit SSAFloatPtr(llvm::Value *v); + static SSAFloatPtr from_llvm(llvm::Value *v) { return SSAFloatPtr(v); } + static llvm::Type *llvm_type(); + SSAFloatPtr operator[](SSAInt index) const; + SSAFloatPtr operator[](int index) const { return (*this)[SSAInt(index)]; } + SSAFloat load() const; + SSAVec4f load_vec4f() const; + SSAVec4f load_unaligned_vec4f() const; + void store(const SSAFloat &new_value); + void store_vec4f(const SSAVec4f &new_value); + void store_unaligned_vec4f(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_for_block.cpp b/src/r_compiler/ssa/ssa_for_block.cpp new file mode 100644 index 0000000000..f7cd6ad0bd --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.cpp @@ -0,0 +1,26 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_for_block.h" +#include "ssa_scope.h" + +SSAForBlock::SSAForBlock() +: if_basic_block(0), loop_basic_block(0), end_basic_block(0) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forbegin", SSAScope::builder().GetInsertBlock()->getParent()); + loop_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forloop", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "forend", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAForBlock::loop_block(SSABool true_condition) +{ + SSAScope::builder().CreateCondBr(true_condition.v, loop_basic_block, end_basic_block); + SSAScope::builder().SetInsertPoint(loop_basic_block); +} + +void SSAForBlock::end_block() +{ + SSAScope::builder().CreateBr(if_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_for_block.h b/src/r_compiler/ssa/ssa_for_block.h new file mode 100644 index 0000000000..4c1952c14e --- /dev/null +++ b/src/r_compiler/ssa/ssa_for_block.h @@ -0,0 +1,17 @@ + +#pragma once + +#include "ssa_bool.h" + +class SSAForBlock +{ +public: + SSAForBlock(); + void loop_block(SSABool true_condition); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *loop_basic_block; + llvm::BasicBlock *end_basic_block; +}; diff --git a/src/r_compiler/ssa/ssa_function.cpp b/src/r_compiler/ssa/ssa_function.cpp new file mode 100644 index 0000000000..a326beaf76 --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.cpp @@ -0,0 +1,55 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_function.h" +#include "ssa_int.h" +#include "ssa_scope.h" +#include "ssa_value.h" + +SSAFunction::SSAFunction(const std::string name) +: name(name), return_type(llvm::Type::getVoidTy(SSAScope::context())), func() +{ +} + +void SSAFunction::set_return_type(llvm::Type *type) +{ + return_type = type; +} + +void SSAFunction::add_parameter(llvm::Type *type) +{ + parameters.push_back(type); +} + +void SSAFunction::create_public() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::ExternalLinkage, name.c_str(), SSAScope::module()); + //func->setCallingConv(llvm::CallingConv::X86_StdCall); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +void SSAFunction::create_private() +{ + func = SSAScope::module()->getFunction(name.c_str()); + if (func == 0) + { + llvm::FunctionType *function_type = llvm::FunctionType::get(return_type, parameters, false); + func = llvm::Function::Create(function_type, llvm::Function::PrivateLinkage, name.c_str(), SSAScope::module()); + func->addFnAttr(llvm::Attribute::AlwaysInline); + } + llvm::BasicBlock *entry = llvm::BasicBlock::Create(SSAScope::context(), "entry", func); + SSAScope::builder().SetInsertPoint(entry); +} + +SSAValue SSAFunction::parameter(int index) +{ + llvm::Function::arg_iterator arg_it = func->arg_begin(); + for (int i = 0; i < index; i++) + ++arg_it; + return SSAValue::from_llvm(static_cast(arg_it)); +} diff --git a/src/r_compiler/ssa/ssa_function.h b/src/r_compiler/ssa/ssa_function.h new file mode 100644 index 0000000000..f1969c35b5 --- /dev/null +++ b/src/r_compiler/ssa/ssa_function.h @@ -0,0 +1,30 @@ + +#pragma once + +#include +#include + +namespace llvm { class Value; } +namespace llvm { class Type; } +namespace llvm { class Function; } + +class SSAInt; +class SSAValue; + +class SSAFunction +{ +public: + SSAFunction(const std::string name); + void set_return_type(llvm::Type *type); + void add_parameter(llvm::Type *type); + void create_public(); + void create_private(); + SSAValue parameter(int index); + + llvm::Function *func; + +private: + std::string name; + llvm::Type *return_type; + std::vector parameters; +}; diff --git a/src/r_compiler/ssa/ssa_if_block.cpp b/src/r_compiler/ssa/ssa_if_block.cpp new file mode 100644 index 0000000000..7187a0759c --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.cpp @@ -0,0 +1,31 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_if_block.h" +#include "ssa_scope.h" + +SSAIfBlock::SSAIfBlock() +: if_basic_block(0), else_basic_block(0), end_basic_block(0) +{ +} + +void SSAIfBlock::if_block(SSABool true_condition) +{ + if_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "if", SSAScope::builder().GetInsertBlock()->getParent()); + else_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "else", SSAScope::builder().GetInsertBlock()->getParent()); + end_basic_block = else_basic_block; + SSAScope::builder().CreateCondBr(true_condition.v, if_basic_block, else_basic_block); + SSAScope::builder().SetInsertPoint(if_basic_block); +} + +void SSAIfBlock::else_block() +{ + end_basic_block = llvm::BasicBlock::Create(SSAScope::context(), "end", SSAScope::builder().GetInsertBlock()->getParent()); + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(else_basic_block); +} + +void SSAIfBlock::end_block() +{ + SSAScope::builder().CreateBr(end_basic_block); + SSAScope::builder().SetInsertPoint(end_basic_block); +} diff --git a/src/r_compiler/ssa/ssa_if_block.h b/src/r_compiler/ssa/ssa_if_block.h new file mode 100644 index 0000000000..4f0c8a26bb --- /dev/null +++ b/src/r_compiler/ssa/ssa_if_block.h @@ -0,0 +1,45 @@ + +#pragma once + +#include "ssa_bool.h" +#include "ssa_phi.h" + +class SSAIfBlock +{ +public: + SSAIfBlock(); + void if_block(SSABool true_condition); + void else_block(); + void end_block(); + +private: + llvm::BasicBlock *if_basic_block; + llvm::BasicBlock *else_basic_block; + llvm::BasicBlock *end_basic_block; +}; + +template +T ssa_min(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a <= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} + +template +T ssa_max(T a, T b) +{ + SSAPhi phi; + SSAIfBlock if_block; + if_block.if_block(a >= b); + phi.add_incoming(a); + if_block.else_block(); + phi.add_incoming(b); + if_block.end_block(); + return phi.create(); +} diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp new file mode 100644 index 0000000000..3d9cb22bdf --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -0,0 +1,158 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_bool.h" +#include "ssa_scope.h" + +SSAInt::SSAInt() +: v(0) +{ +} + +SSAInt::SSAInt(int constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true)); +} + +SSAInt::SSAInt(SSAFloat f) +: v(0) +{ + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt32Ty(SSAScope::context()), SSAScope::hint()); +} + +SSAInt::SSAInt(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAInt::llvm_type() +{ + return llvm::Type::getInt32Ty(SSAScope::context()); +} + +SSAInt SSAInt::MIN(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); +} + +SSAInt SSAInt::MAX(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); +} + +SSAInt operator+(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator-(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator*(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator/(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator%(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator+(int a, const SSAInt &b) +{ + return SSAInt(a) + b; +} + +SSAInt operator-(int a, const SSAInt &b) +{ + return SSAInt(a) - b; +} + +SSAInt operator*(int a, const SSAInt &b) +{ + return SSAInt(a) * b; +} + +SSAInt operator/(int a, const SSAInt &b) +{ + return SSAInt(a) / b; +} + +SSAInt operator%(int a, const SSAInt &b) +{ + return SSAInt(a) % b; +} + +SSAInt operator+(const SSAInt &a, int b) +{ + return a + SSAInt(b); +} + +SSAInt operator-(const SSAInt &a, int b) +{ + return a - SSAInt(b); +} + +SSAInt operator*(const SSAInt &a, int b) +{ + return a * SSAInt(b); +} + +SSAInt operator/(const SSAInt &a, int b) +{ + return a / SSAInt(b); +} + +SSAInt operator%(const SSAInt &a, int b) +{ + return a % SSAInt(b); +} + +SSAInt operator<<(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); +} + +SSAInt operator>>(const SSAInt &a, int bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} + +SSAInt operator<<(const SSAInt &a, const SSAInt &bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); +} + +SSAInt operator>>(const SSAInt &a, const SSAInt &bits) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); +} + +SSAInt operator&(const SSAInt &a, int b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); +} + +SSAInt operator&(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSAInt operator|(const SSAInt &a, int b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); +} + +SSAInt operator|(const SSAInt &a, const SSAInt &b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h new file mode 100644 index 0000000000..c0f46e4b67 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int.h @@ -0,0 +1,51 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloat; + +class SSAInt +{ +public: + SSAInt(); + explicit SSAInt(int constant); + SSAInt(SSAFloat f); + explicit SSAInt(llvm::Value *v); + static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } + static llvm::Type *llvm_type(); + + static SSAInt MIN(SSAInt a, SSAInt b); + static SSAInt MAX(SSAInt a, SSAInt b); + + llvm::Value *v; +}; + +SSAInt operator+(const SSAInt &a, const SSAInt &b); +SSAInt operator-(const SSAInt &a, const SSAInt &b); +SSAInt operator*(const SSAInt &a, const SSAInt &b); +SSAInt operator/(const SSAInt &a, const SSAInt &b); +SSAInt operator%(const SSAInt &a, const SSAInt &b); + +SSAInt operator+(int a, const SSAInt &b); +SSAInt operator-(int a, const SSAInt &b); +SSAInt operator*(int a, const SSAInt &b); +SSAInt operator/(int a, const SSAInt &b); +SSAInt operator%(int a, const SSAInt &b); + +SSAInt operator+(const SSAInt &a, int b); +SSAInt operator-(const SSAInt &a, int b); +SSAInt operator*(const SSAInt &a, int b); +SSAInt operator/(const SSAInt &a, int b); +SSAInt operator%(const SSAInt &a, int b); + +SSAInt operator<<(const SSAInt &a, int bits); +SSAInt operator>>(const SSAInt &a, int bits); +SSAInt operator<<(const SSAInt &a, const SSAInt &bits); +SSAInt operator>>(const SSAInt &a, const SSAInt &bits); + +SSAInt operator&(const SSAInt &a, int b); +SSAInt operator&(const SSAInt &a, const SSAInt &b); +SSAInt operator|(const SSAInt &a, int b); +SSAInt operator|(const SSAInt &a, const SSAInt &b); diff --git a/src/r_compiler/ssa/ssa_int_ptr.cpp b/src/r_compiler/ssa/ssa_int_ptr.cpp new file mode 100644 index 0000000000..974645d08c --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.cpp @@ -0,0 +1,58 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_int_ptr.h" +#include "ssa_scope.h" + +SSAIntPtr::SSAIntPtr() +: v(0) +{ +} + +SSAIntPtr::SSAIntPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAIntPtr::llvm_type() +{ + return llvm::Type::getInt32PtrTy(SSAScope::context()); +} + +SSAIntPtr SSAIntPtr::operator[](SSAInt index) const +{ + return SSAIntPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAInt SSAIntPtr::load() const +{ + return SSAInt::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec4i SSAIntPtr::load_unaligned_vec4i() const +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAIntPtr::store(const SSAInt &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAIntPtr::store_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint())); +} + +void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value) +{ + llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); + SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4); +} diff --git a/src/r_compiler/ssa/ssa_int_ptr.h b/src/r_compiler/ssa/ssa_int_ptr.h new file mode 100644 index 0000000000..c75ed6a8d5 --- /dev/null +++ b/src/r_compiler/ssa/ssa_int_ptr.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAIntPtr +{ +public: + SSAIntPtr(); + explicit SSAIntPtr(llvm::Value *v); + static SSAIntPtr from_llvm(llvm::Value *v) { return SSAIntPtr(v); } + static llvm::Type *llvm_type(); + SSAIntPtr operator[](SSAInt index) const; + SSAIntPtr operator[](int index) const { return (*this)[SSAInt(index)]; } + SSAInt load() const; + SSAVec4i load_vec4i() const; + SSAVec4i load_unaligned_vec4i() const; + void store(const SSAInt &new_value); + void store_vec4i(const SSAVec4i &new_value); + void store_unaligned_vec4i(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_phi.h b/src/r_compiler/ssa/ssa_phi.h new file mode 100644 index 0000000000..89cbc8cf05 --- /dev/null +++ b/src/r_compiler/ssa/ssa_phi.h @@ -0,0 +1,33 @@ + +#pragma once + +#include "ssa_scope.h" + +class SSAIfBlock; + +template +class SSAPhi +{ +public: + void add_incoming(SSAVariable var) + { + incoming.push_back(Incoming(var.v, SSAScope::builder().GetInsertBlock())); + } + + SSAVariable create() + { + llvm::PHINode *phi_node = SSAScope::builder().CreatePHI(SSAVariable::llvm_type(), (unsigned int)incoming.size(), SSAScope::hint()); + for (size_t i = 0; i < incoming.size(); i++) + phi_node->addIncoming(incoming[i].v, incoming[i].bb); + return SSAVariable::from_llvm(phi_node); + } + +private: + struct Incoming + { + Incoming(llvm::Value *v, llvm::BasicBlock *bb) : v(v), bb(bb) { } + llvm::Value *v; + llvm::BasicBlock *bb; + }; + std::vector incoming; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4f.h b/src/r_compiler/ssa/ssa_pixelformat4f.h new file mode 100644 index 0000000000..507e95b5d1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4f.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float_ptr.h" + +class SSAPixelFormat4f +{ +public: + SSAPixelFormat4f() { } + SSAPixelFormat4f(SSAFloatPtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAFloatPtr pixels() { return _pixels; } + SSAFloatPtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return _pixels[index * 4].load_vec4f(); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4f(pixel); + } + +protected: + SSAFloatPtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub.h b/src/r_compiler/ssa/ssa_pixelformat4ub.h new file mode 100644 index 0000000000..fdf98c4aa6 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub +{ +public: + SSAPixelFormat4ub() { } + SSAPixelFormat4ub(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(pixel * 255.0f)); + } + +private: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h new file mode 100644 index 0000000000..4601eeb3c1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_argb_rev.h @@ -0,0 +1,35 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_argb_rev +{ +public: + SSAPixelFormat4ub_argb_rev() { } + SSAPixelFormat4ub_argb_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } +/* + void get4f(SSAInt index, SSAVec4f &out_pixel1, SSAVec4f &out_pixel2) const + { + SSAVec8s p = _pixels[index * 4].load_vec8s(); + out_pixel1 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendlo(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + out_pixel2 = SSAVec4f::shuffle(SSAVec4f(SSAVec4i::extendhi(p)) * (1.0f / 255.0f), 2, 1, 0, 3); + } +*/ + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 2, 1, 0, 3); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 2, 1, 0, 3))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h new file mode 100644 index 0000000000..402480c49b --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixelformat4ub_rev.h @@ -0,0 +1,28 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_ubyte_ptr.h" + +class SSAPixelFormat4ub_rev +{ +public: + SSAPixelFormat4ub_rev() { } + SSAPixelFormat4ub_rev(SSAUBytePtr pixels, SSAInt width, SSAInt height) : _pixels(pixels) { } + + SSAUBytePtr pixels() { return _pixels; } + SSAUBytePtr pixels() const { return _pixels; } + + SSAVec4f get4f(SSAInt index) const + { + return SSAVec4f::shuffle(SSAVec4f(_pixels[index * 4].load_vec4ub()) * (1.0f / 255.0f), 3, 2, 1, 0); + } + + void set4f(SSAInt index, const SSAVec4f &pixel) + { + _pixels[index * 4].store_vec4ub(SSAVec4i(SSAVec4f::shuffle(pixel * 255.0f, 3, 2, 1, 0))); + } + +public: + SSAUBytePtr _pixels; +}; diff --git a/src/r_compiler/ssa/ssa_pixels.h b/src/r_compiler/ssa/ssa_pixels.h new file mode 100644 index 0000000000..a4209d439a --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixels.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_ubyte_ptr.h" +#include "ssa_float.h" +#include "ssa_float_ptr.h" +#include "ssa_int.h" +#include "ssa_pixeltype.h" +//#include "ssa_pixelformat1f.h" +//#include "ssa_pixelformat2f.h" +//#include "ssa_pixelformat3f.h" +#include "ssa_pixelformat4f.h" +//#include "ssa_pixelformat1ub.h" +//#include "ssa_pixelformat2ub.h" +//#include "ssa_pixelformat3ub.h" +//#include "ssa_pixelformat3ub_rev.h" +#include "ssa_pixelformat4ub.h" +//#include "ssa_pixelformat4ub_argb.h" +#include "ssa_pixelformat4ub_rev.h" +#include "ssa_pixelformat4ub_argb_rev.h" +//#include "ssa_pixelformat4ub_channel.h" + +//typedef SSAPixelType SSAPixels1f; +//typedef SSAPixelType SSAPixels2f; +//typedef SSAPixelType SSAPixels3f; +typedef SSAPixelType SSAPixels4f; + +//typedef SSAPixelType SSAPixels1ub; +//typedef SSAPixelType SSAPixels2ub; +//typedef SSAPixelType SSAPixels3ub; +typedef SSAPixelType SSAPixels4ub; +//typedef SSAPixelType SSAPixels4ub_argb; + +//typedef SSAPixelType SSAPixels3ub_rev; +typedef SSAPixelType SSAPixels4ub_rev; +typedef SSAPixelType SSAPixels4ub_argb_rev; + +//typedef SSAPixelType SSAPixels4ub_channel; diff --git a/src/r_compiler/ssa/ssa_pixeltype.h b/src/r_compiler/ssa/ssa_pixeltype.h new file mode 100644 index 0000000000..8614f171d1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_pixeltype.h @@ -0,0 +1,498 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_float.h" +#include "ssa_vec4f.h" +#include "ssa_bool.h" +#include "ssa_if_block.h" +#include "ssa_phi.h" + +template +class SSAPixelType : public PixelFormat +{ +public: + SSAPixelType() + { + } + + SSAPixelType(SSAInt width, SSAInt height, PixelType pixels) + : PixelFormat(pixels, width, height), _width(width), _height(height) + { + _width32 = SSAVec4i(_width); + SSAVec4i height32(_height); + _widthps = SSAVec4f(_width32); + _heightps = SSAVec4f(height32); + _width16 = SSAVec8s(_width32, _width32); + + _widthheight = SSAVec4i::shuffle(_width32, height32, 0, 0, 4, 4); + _widthheightps = SSAVec4i::shuffle(_widthps, _heightps, 0, 0, 4, 4); + } + + SSAInt width() const { return _width; } + SSAInt height() const { return _height; } + SSAInt size() const { return _width * _height; } + + SSABool in_bounds(SSAInt i) const { return i >= 0 && i < _width * _height; } + SSABool in_bounds(SSAInt x, SSAInt y) const { return x>= 0 && x < _width && y >= 0 && y < _height; } + //void throw_if_out_of_bounds(SSAInt i) const { if (!in_bounds(i)) throw clan::Exception("Out of bounds"); } + //void throw_if_out_of_bounds(SSAInt x, SSAInt y) const { if (!in_bounds(x, y)) throw clan::Exception("Out of bounds"); } + + SSAInt s_to_x(SSAFloat s) const { return round(s * SSAFloat(_width)); } + SSAInt t_to_y(SSAFloat t) const { return round(t * SSAFloat(_height)); } + SSAInt clamp_x(SSAInt x) const { return clamp(x, _width); } + SSAInt clamp_y(SSAInt y) const { return clamp(y, _height); } + SSAInt repeat_x(SSAInt x) const { return repeat(x,_width); } + SSAInt repeat_y(SSAInt y) const { return repeat(y, _height); } + SSAInt mirror_x(SSAInt x) const { return mirror(x, _width); } + SSAInt mirror_y(SSAInt y) const { return mirror(y, _height); } + + static SSAInt int_min(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a <= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt int_max(SSAInt a, SSAInt b) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(a >= b); + phi.add_incoming(a); + branch.else_block(); + phi.add_incoming(b); + branch.end_block(); + return phi.create(); + } + + static SSAInt clamp(SSAInt v, SSAInt size) + { + return int_max(int_min(v, size - 1), 0); + } + + static SSAInt repeat(SSAInt v, SSAInt size) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create(); + } + + static SSAInt mirror(SSAInt v, SSAInt size) + { + SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create(); + } + + static SSAInt round(SSAFloat v) + { + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0.0f); + phi.add_incoming(v + 0.5f); + branch.else_block(); + phi.add_incoming(v - 0.5f); + branch.end_block(); + return SSAInt(phi.create()); + } + + // To do: fix this: + static SSAInt int_floor(SSAFloat v) + { + return SSAInt(v); + } + static SSAFloat fract(SSAFloat v) { return v - SSAFloat(int_floor(v)); } + + SSAVec4f get4f(SSAInt x, SSAInt y) const { return PixelFormat::get4f(x + y * _width); } + void set4f(SSAInt x, SSAInt y, const SSAVec4f &pixel) { PixelFormat::set4f(x + y * _width, pixel); } + + SSAVec4f get_clamp4f(SSAInt x, SSAInt y) const { return get4f(clamp_x(x), clamp_y(y)); } + SSAVec4f get_repeat4f(SSAInt x, SSAInt y) const { return get4f(repeat_x(x), repeat_y(y)); } + SSAVec4f get_mirror4f(SSAInt x, SSAInt y) const { return get4f(mirror_x(x), mirror_y(y)); } + + SSAVec4f linear_interpolate4f(SSAFloat s, SSAFloat t, const SSAVec4f *samples) const + { + SSAFloat a = fract(s * SSAFloat(_width) - 0.5f); + SSAFloat b = fract(t * SSAFloat(_height) - 0.5f); + SSAFloat inv_a = 1.0f - a; + SSAFloat inv_b = 1.0f - b; + return + samples[0] * (inv_a * inv_b) + + samples[1] * (a * inv_b) + + samples[2] * (inv_a * b) + + samples[3] * (a * b); + } + + void gather_clamp4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_clamp4f(x, y); + out_pixels[1] = get_clamp4f(x + 1, y); + out_pixels[2] = get_clamp4f(x, y + 1); + out_pixels[3] = get_clamp4f(x + 1, y + 1); + /* + SSAInt x0 = clamp_x(x); + SSAInt x1 = clamp_x(x + 1); + SSAInt y0 = clamp_y(y); + SSAInt y1 = clamp_y(y + 1); + SSAInt offset0 = y0 * _width; + SSAInt offset1 = y1 * _width; + SSAPhi phi0; + SSAPhi phi1; + SSAPhi phi2; + SSAPhi phi3; + SSAIfBlock if0; + if0.if_block(x0 + 1 == x1); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.else_block(); + phi0.add_incoming(PixelFormat::get4f(x0 + offset0)); + phi1.add_incoming(PixelFormat::get4f(x1 + offset0)); + phi2.add_incoming(PixelFormat::get4f(x0 + offset1)); + phi3.add_incoming(PixelFormat::get4f(x1 + offset1)); + if0.end_block(); + out_pixels[0] = phi0.create(); + out_pixels[1] = phi1.create(); + out_pixels[2] = phi2.create(); + out_pixels[3] = phi3.create(); + */ + } + + void gather_repeat4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_repeat4f(x, y); + out_pixels[1] = get_repeat4f(x + 1, y); + out_pixels[2] = get_repeat4f(x, y + 1); + out_pixels[3] = get_repeat4f(x + 1, y + 1); + } + + void gather_mirror4f(SSAFloat s, SSAFloat t, SSAVec4f *out_pixels) const + { + SSAInt x = int_floor(s * SSAFloat(_width) - 0.5f); + SSAInt y = int_floor(t * SSAFloat(_height) - 0.5f); + out_pixels[0] = get_mirror4f(x, y); + out_pixels[1] = get_mirror4f(x + 1, y); + out_pixels[2] = get_mirror4f(x, y + 1); + out_pixels[3] = get_mirror4f(x + 1, y + 1); + } + + SSAVec4f nearest_clamp4f(SSAFloat s, SSAFloat t) const { return get_clamp4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_repeat4f(SSAFloat s, SSAFloat t) const { return get_repeat4f(s_to_x(s), t_to_y(t)); } + SSAVec4f nearest_mirror4f(SSAFloat s, SSAFloat t) const { return get_mirror4f(s_to_x(s), t_to_y(t)); } + + SSAVec4f linear_clamp4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_clamp4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_repeat4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + SSAVec4f linear_mirror4f(SSAFloat s, SSAFloat t) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, samples); + return linear_interpolate4f(s, t, samples); + } + + ///////////////////////////////////////////////////////////////////////// + // Packed versions: + + SSAVec4i s_to_x(SSAVec4f s) const { return round(s * SSAVec4f(_width)); } + SSAVec4i t_to_y(SSAVec4f t) const { return round(t * SSAVec4f(_height)); } + SSAVec4i clamp_x(SSAVec4i x) const { return clamp(x, _width); } + SSAVec4i clamp_y(SSAVec4i y) const { return clamp(y, _height); } + SSAVec4i repeat_x(SSAVec4i x) const { return repeat(x,_width); } + SSAVec4i repeat_y(SSAVec4i y) const { return repeat(y, _height); } + SSAVec4i mirror_x(SSAVec4i x) const { return mirror(x, _width); } + SSAVec4i mirror_y(SSAVec4i y) const { return mirror(y, _height); } + + static SSAVec4i clamp(SSAVec4i v, SSAInt size) + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(v, size - 1), 0); + } + + static SSAVec4i repeat(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v >= 0); + phi.add_incoming(v % size); + branch.else_block(); + phi.add_incoming(size - 1 + v % size); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i mirror(SSAVec4i v, SSAInt size) + { + return clamp(v, size); + /*SSAInt size2 = size * 2; + v = repeat(v, size2); + + SSAPhi phi; + SSAIfBlock branch; + branch.if_block(v < size); + phi.add_incoming(v); + branch.else_block(); + phi.add_incoming(size2 - v - 1); + branch.end_block(); + return phi.create();*/ + } + + static SSAVec4i round(SSAVec4f v) + { + // Maybe we should use the normal round SSE function (but that requires the rounding mode is set the round to nearest before the code runs) + SSAVec4i signbit = (SSAVec4i::bitcast(v) & 0x80000000); + SSAVec4f signed_half = SSAVec4f::bitcast(signbit | SSAVec4i::bitcast(SSAVec4f(0.5f))); + return v + signed_half; + } + + static SSAVec4i int_floor(SSAVec4f v) + { + return SSAVec4i(v) - (SSAVec4i::bitcast(v) >> 31); + } + + static SSAVec4f fract(SSAVec4f v) + { + // return v - SSAVec4f::floor_sse4(v); + return v - SSAVec4f(int_floor(v)); + } + + template + SSAVec4f nearest_helper4f(SSAVec4f s, SSAVec4f t, int index, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i offset = SSAVec4i::combinelo(offsetlo, offsethi) + x; + return PixelFormat::get4f(offset[index]); + } + + SSAVec4f nearest_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + SSAVec4f nearest_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + SSAVec4f nearest_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return nearest_helper4f(s, t, index, WrapX(this), WrapY(this)); + /* + return nearest_helper4f( + s, t, index, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + template + void gather_helper4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels, WrapXFunctor wrap_x, WrapYFunctor wrap_y) const + { + SSAVec4i x = int_floor(s * _widthps - 0.5f); + SSAVec4i y = int_floor(t * _heightps - 0.5f); + SSAVec8s y16 = SSAVec8s(wrap_y(y + 1), wrap_y(y)); + SSAVec8s offsethi = SSAVec8s::mulhi(y16, _width16); + SSAVec8s offsetlo = y16 * _width16; + SSAVec4i x0 = wrap_x(x); + SSAVec4i x1 = wrap_x(x + 1); + SSAVec4i line0 = SSAVec4i::combinehi(offsetlo, offsethi); + SSAVec4i line1 = SSAVec4i::combinelo(offsetlo, offsethi); + SSAVec4i offset0 = x0 + line0; + SSAVec4i offset1 = x1 + line0; + SSAVec4i offset2 = x0 + line1; + SSAVec4i offset3 = x1 + line1; + out_pixels[0] = PixelFormat::get4f(offset0[index]); + out_pixels[1] = PixelFormat::get4f(offset1[index]); + out_pixels[2] = PixelFormat::get4f(offset2[index]); + out_pixels[3] = PixelFormat::get4f(offset3[index]); + } + + void gather_clamp4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->clamp_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return clamp_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return clamp_y(v); }); + */ + } + + void gather_repeat4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->repeat_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return repeat_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return repeat_y(v); }); + */ + } + + void gather_mirror4f(SSAVec4f s, SSAVec4f t, int index, SSAVec4f *out_pixels) const + { + struct WrapX { WrapX(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_x(v); } const SSAPixelType *self; }; + struct WrapY { WrapY(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i v) { return self->mirror_y(v); } const SSAPixelType *self; }; + return gather_helper4f(s, t, index, out_pixels, WrapX(this), WrapY(this)); + /* + gather_helper4f( + s, t, index, out_pixels, + [this](SSAVec4i v) -> SSAVec4i { return mirror_x(v); }, + [this](SSAVec4i v) -> SSAVec4i { return mirror_y(v); }); + */ + } + + SSAVec4f linear_clamp4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_repeat4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_repeat4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_mirror4f(SSAVec4f s, SSAVec4f t, int index) const + { + SSAVec4f samples[4]; + gather_mirror4f(s, t, index, samples); + return linear_interpolate4f(s, t, index, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f s, SSAVec4f t, int index, const SSAVec4f *samples) const + { + SSAVec4f a = fract(s * _widthps - 0.5f); + SSAVec4f b = fract(t * _heightps - 0.5f); + SSAVec4f inv_a = 1.0f - a; + SSAVec4f inv_b = 1.0f - b; + return + samples[0] * SSAVec4f::shuffle(inv_a * inv_b, index, index, index, index) + + samples[1] * SSAVec4f::shuffle(a * inv_b, index, index, index, index) + + samples[2] * SSAVec4f::shuffle(inv_a * b, index, index, index, index) + + samples[3] * SSAVec4f::shuffle(a * b, index, index, index, index); + } + + ///////////////////////////////////////////////////////////////////////// + + SSAVec4i clamp(SSAVec4i sstt) const + { + return SSAVec4i::max_sse41(SSAVec4i::min_sse41(sstt, _widthheight - 1), 0); + } + + template + void gather_helper4f(SSAVec4f st, SSAVec4f *out_pixels, WrapFunctor wrap) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4i xxyy = wrap(int_floor(sstt * _widthheightps - 0.5f) + SSAVec4i(0, 1, 0, 1)); + SSAVec4i xxoffset = SSAVec4f::shuffle(xxyy, xxyy * _width32, 0, 1, 6, 7); + SSAVec4i offsets = SSAVec4i::shuffle(xxoffset, 0, 1, 0, 1) + SSAVec4i::shuffle(xxoffset, 2, 2, 3, 3); + out_pixels[0] = PixelFormat::get4f(offsets[0]); + out_pixels[1] = PixelFormat::get4f(offsets[1]); + out_pixels[2] = PixelFormat::get4f(offsets[2]); + out_pixels[3] = PixelFormat::get4f(offsets[3]); + } + + void gather_clamp4f(SSAVec4f st, SSAVec4f *out_pixels) const + { + struct Wrap { Wrap(const SSAPixelType *self) : self(self) { } SSAVec4i operator()(SSAVec4i sstt) { return self->clamp(sstt); } const SSAPixelType *self; }; + return gather_helper4f(st, out_pixels, Wrap(this)); + } + + SSAVec4f linear_clamp4f(SSAVec4f st) const + { + SSAScopeHint hint("linearclamp"); + SSAVec4f samples[4]; + gather_clamp4f(st, samples); + return linear_interpolate4f(st, samples); + } + + SSAVec4f linear_interpolate4f(SSAVec4f st, const SSAVec4f *samples) const + { + SSAVec4f sstt = SSAVec4f::shuffle(st, 0, 0, 1, 1); + SSAVec4f aabb = fract(sstt * _widthheightps - 0.5f); + SSAVec4f inv_aabb = 1.0f - aabb; + SSAVec4f ab_inv_ab = SSAVec4f::shuffle(aabb, inv_aabb, 0, 2, 4, 6); + SSAVec4f ab__inv_a_b__inv_a_inv_b__a_invb = ab_inv_ab * SSAVec4f::shuffle(ab_inv_ab, 1, 2, 3, 0); + return + samples[0] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 2, 2, 2, 2) + + samples[1] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 3, 3, 3, 3) + + samples[2] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 1, 1, 1, 1) + + samples[3] * SSAVec4f::shuffle(ab__inv_a_b__inv_a_inv_b__a_invb, 0, 0, 0, 0); + } + +public: + SSAInt _width; + SSAInt _height; + SSAVec4i _width32; + SSAVec8s _width16; + SSAVec4f _widthps; + SSAVec4f _heightps; + + SSAVec4i _widthheight; + SSAVec4f _widthheightps; +}; diff --git a/src/r_compiler/ssa/ssa_scope.cpp b/src/r_compiler/ssa/ssa_scope.cpp new file mode 100644 index 0000000000..e5d34a2033 --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.cpp @@ -0,0 +1,66 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_scope.h" +#include "ssa_int.h" + +SSAScope::SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder) +: _context(context), _module(module), _builder(builder) +{ + instance = this; +} + +SSAScope::~SSAScope() +{ + instance = 0; +} + +llvm::LLVMContext &SSAScope::context() +{ + return *instance->_context; +} + +llvm::Module *SSAScope::module() +{ + return instance->_module; +} + +llvm::IRBuilder<> &SSAScope::builder() +{ + return *instance->_builder; +} + +llvm::Function *SSAScope::intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types) +{ + llvm::Function *func = module()->getFunction(llvm::Intrinsic::getName(id)); + if (func == 0) + func = llvm::Function::Create(llvm::Intrinsic::getType(context(), id, parameter_types), llvm::Function::ExternalLinkage, llvm::Intrinsic::getName(id, parameter_types), module()); + return func; +} + +llvm::Value *SSAScope::alloca(llvm::Type *type) +{ + return alloca(type, SSAInt(1)); +} + +llvm::Value *SSAScope::alloca(llvm::Type *type, SSAInt size) +{ + // Allocas must be created at top of entry block for the PromoteMemoryToRegisterPass to work + llvm::BasicBlock &entry = SSAScope::builder().GetInsertBlock()->getParent()->getEntryBlock(); + llvm::IRBuilder<> alloca_builder(&entry, entry.begin()); + return alloca_builder.CreateAlloca(type, size.v, hint()); +} + +const std::string &SSAScope::hint() +{ + return instance->_hint; +} + +void SSAScope::set_hint(const std::string &new_hint) +{ + if (new_hint.empty()) + instance->_hint = "tmp"; + else + instance->_hint = new_hint; +} + +SSAScope *SSAScope::instance = 0; diff --git a/src/r_compiler/ssa/ssa_scope.h b/src/r_compiler/ssa/ssa_scope.h new file mode 100644 index 0000000000..ad080fde6c --- /dev/null +++ b/src/r_compiler/ssa/ssa_scope.h @@ -0,0 +1,39 @@ + +#pragma once + +class SSAInt; + +class SSAScope +{ +public: + SSAScope(llvm::LLVMContext *context, llvm::Module *module, llvm::IRBuilder<> *builder); + ~SSAScope(); + static llvm::LLVMContext &context(); + static llvm::Module *module(); + static llvm::IRBuilder<> &builder(); + static llvm::Function *intrinsic(llvm::Intrinsic::ID id, llvm::ArrayRef parameter_types = llvm::ArrayRef()); + static llvm::Value *alloca(llvm::Type *type); + static llvm::Value *alloca(llvm::Type *type, SSAInt size); + static const std::string &hint(); + static void set_hint(const std::string &hint); + +private: + static SSAScope *instance; + llvm::LLVMContext *_context; + llvm::Module *_module; + llvm::IRBuilder<> *_builder; + std::string _hint; +}; + +class SSAScopeHint +{ +public: + SSAScopeHint() : old_hint(SSAScope::hint()) { } + SSAScopeHint(const std::string &hint) : old_hint(SSAScope::hint()) { SSAScope::set_hint(hint); } + ~SSAScopeHint() { SSAScope::set_hint(old_hint); } + void set(const std::string &hint) { SSAScope::set_hint(hint); } + void clear() { SSAScope::set_hint(old_hint); } + +private: + std::string old_hint; +}; diff --git a/src/r_compiler/ssa/ssa_short.cpp b/src/r_compiler/ssa/ssa_short.cpp new file mode 100644 index 0000000000..017f3002a2 --- /dev/null +++ b/src/r_compiler/ssa/ssa_short.cpp @@ -0,0 +1,153 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_short.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" + +SSAShort::SSAShort() +: v(0) +{ +} + +SSAShort::SSAShort(int constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true)); +} + +SSAShort::SSAShort(SSAFloat f) +: v(0) +{ + v = SSAScope::builder().CreateFPToSI(f.v, llvm::Type::getInt16Ty(SSAScope::context()), SSAScope::hint()); +} + +SSAShort::SSAShort(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAShort::llvm_type() +{ + return llvm::Type::getInt16Ty(SSAScope::context()); +} + +SSAInt SSAShort::zext_int() +{ + return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); +} + +SSAShort operator+(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator-(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator*(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator/(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator%(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateSRem(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator+(int a, const SSAShort &b) +{ + return SSAShort(a) + b; +} + +SSAShort operator-(int a, const SSAShort &b) +{ + return SSAShort(a) - b; +} + +SSAShort operator*(int a, const SSAShort &b) +{ + return SSAShort(a) * b; +} + +SSAShort operator/(int a, const SSAShort &b) +{ + return SSAShort(a) / b; +} + +SSAShort operator%(int a, const SSAShort &b) +{ + return SSAShort(a) % b; +} + +SSAShort operator+(const SSAShort &a, int b) +{ + return a + SSAShort(b); +} + +SSAShort operator-(const SSAShort &a, int b) +{ + return a - SSAShort(b); +} + +SSAShort operator*(const SSAShort &a, int b) +{ + return a * SSAShort(b); +} + +SSAShort operator/(const SSAShort &a, int b) +{ + return a / SSAShort(b); +} + +SSAShort operator%(const SSAShort &a, int b) +{ + return a % SSAShort(b); +} + +SSAShort operator<<(const SSAShort &a, int bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits, SSAScope::hint())); +} + +SSAShort operator>>(const SSAShort &a, int bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} + +SSAShort operator<<(const SSAShort &a, const SSAInt &bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateShl(a.v, bits.v, SSAScope::hint())); +} + +SSAShort operator>>(const SSAShort &a, const SSAInt &bits) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateLShr(a.v, bits.v, SSAScope::hint())); +} + +SSAShort operator&(const SSAShort &a, int b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b, SSAScope::hint())); +} + +SSAShort operator&(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint())); +} + +SSAShort operator|(const SSAShort &a, int b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b, SSAScope::hint())); +} + +SSAShort operator|(const SSAShort &a, const SSAShort &b) +{ + return SSAShort::from_llvm(SSAScope::builder().CreateOr(a.v, b.v, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_short.h b/src/r_compiler/ssa/ssa_short.h new file mode 100644 index 0000000000..4a53434026 --- /dev/null +++ b/src/r_compiler/ssa/ssa_short.h @@ -0,0 +1,51 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAFloat; +class SSAInt; + +class SSAShort +{ +public: + SSAShort(); + explicit SSAShort(int constant); + SSAShort(SSAFloat f); + explicit SSAShort(llvm::Value *v); + static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } + static llvm::Type *llvm_type(); + + SSAInt zext_int(); + + llvm::Value *v; +}; + +SSAShort operator+(const SSAShort &a, const SSAShort &b); +SSAShort operator-(const SSAShort &a, const SSAShort &b); +SSAShort operator*(const SSAShort &a, const SSAShort &b); +SSAShort operator/(const SSAShort &a, const SSAShort &b); +SSAShort operator%(const SSAShort &a, const SSAShort &b); + +SSAShort operator+(int a, const SSAShort &b); +SSAShort operator-(int a, const SSAShort &b); +SSAShort operator*(int a, const SSAShort &b); +SSAShort operator/(int a, const SSAShort &b); +SSAShort operator%(int a, const SSAShort &b); + +SSAShort operator+(const SSAShort &a, int b); +SSAShort operator-(const SSAShort &a, int b); +SSAShort operator*(const SSAShort &a, int b); +SSAShort operator/(const SSAShort &a, int b); +SSAShort operator%(const SSAShort &a, int b); + +SSAShort operator<<(const SSAShort &a, int bits); +SSAShort operator>>(const SSAShort &a, int bits); +SSAShort operator<<(const SSAShort &a, const SSAInt &bits); +SSAShort operator>>(const SSAShort &a, const SSAInt &bits); + +SSAShort operator&(const SSAShort &a, int b); +SSAShort operator&(const SSAShort &a, const SSAShort &b); +SSAShort operator|(const SSAShort &a, int b); +SSAShort operator|(const SSAShort &a, const SSAShort &b); diff --git a/src/r_compiler/ssa/ssa_stack.h b/src/r_compiler/ssa/ssa_stack.h new file mode 100644 index 0000000000..435530be1f --- /dev/null +++ b/src/r_compiler/ssa/ssa_stack.h @@ -0,0 +1,25 @@ + +#pragma once + +template +class SSAStack +{ +public: + SSAStack() + : v(0) + { + v = SSAScope::alloca(SSAVariable::llvm_type()); + } + + SSAVariable load() const + { + return SSAVariable::from_llvm(SSAScope::builder().CreateLoad(v, SSAScope::hint())); + } + + void store(const SSAVariable &new_value) + { + SSAScope::builder().CreateStore(new_value.v, v); + } + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_struct_type.cpp b/src/r_compiler/ssa/ssa_struct_type.cpp new file mode 100644 index 0000000000..d4ae2acb1c --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.cpp @@ -0,0 +1,19 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_struct_type.h" +#include "ssa_scope.h" + +void SSAStructType::add_parameter(llvm::Type *type) +{ + elements.push_back(type); +} + +llvm::Type *SSAStructType::llvm_type() +{ + return llvm::StructType::get(SSAScope::context(), elements, false); +} + +llvm::Type *SSAStructType::llvm_type_packed() +{ + return llvm::StructType::get(SSAScope::context(), elements, true); +} diff --git a/src/r_compiler/ssa/ssa_struct_type.h b/src/r_compiler/ssa/ssa_struct_type.h new file mode 100644 index 0000000000..67b056b325 --- /dev/null +++ b/src/r_compiler/ssa/ssa_struct_type.h @@ -0,0 +1,17 @@ + +#pragma once + +#include + +namespace llvm { class Type; } + +class SSAStructType +{ +public: + void add_parameter(llvm::Type *type); + llvm::Type *llvm_type(); + llvm::Type *llvm_type_packed(); + +private: + std::vector elements; +}; diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp new file mode 100644 index 0000000000..3204d064d1 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -0,0 +1,95 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_ubyte.h" +#include "ssa_scope.h" + +SSAUByte::SSAUByte() +: v(0) +{ +} + +SSAUByte::SSAUByte(unsigned char constant) +: v(0) +{ + v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false)); +} + +SSAUByte::SSAUByte(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUByte::llvm_type() +{ + return llvm::Type::getInt8Ty(SSAScope::context()); +} + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAUByte operator/(const SSAUByte &a, const SSAUByte &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v); +} +*/ +SSAUByte operator+(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) + b; +} + +SSAUByte operator-(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) - b; +} + +SSAUByte operator*(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) * b; +} +/* +SSAUByte operator/(unsigned char a, const SSAUByte &b) +{ + return SSAUByte(a) / b; +} +*/ +SSAUByte operator+(const SSAUByte &a, unsigned char b) +{ + return a + SSAUByte(b); +} + +SSAUByte operator-(const SSAUByte &a, unsigned char b) +{ + return a - SSAUByte(b); +} + +SSAUByte operator*(const SSAUByte &a, unsigned char b) +{ + return a * SSAUByte(b); +} +/* +SSAUByte operator/(const SSAUByte &a, unsigned char b) +{ + return a / SSAUByte(b); +} +*/ +SSAUByte operator<<(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateShl(a.v, bits)); +} + +SSAUByte operator>>(const SSAUByte &a, unsigned char bits) +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLShr(a.v, bits)); +} diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h new file mode 100644 index 0000000000..ef878b3259 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -0,0 +1,35 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUByte +{ +public: + SSAUByte(); + explicit SSAUByte(unsigned char constant); + explicit SSAUByte(llvm::Value *v); + static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAUByte operator+(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator-(const SSAUByte &a, const SSAUByte &b); +SSAUByte operator*(const SSAUByte &a, const SSAUByte &b); +//SSAUByte operator/(const SSAUByte &a, const SSAUByte &b); + +SSAUByte operator+(unsigned char a, const SSAUByte &b); +SSAUByte operator-(unsigned char a, const SSAUByte &b); +SSAUByte operator*(unsigned char a, const SSAUByte &b); +//SSAUByte operator/(unsigned char a, const SSAUByte &b); + +SSAUByte operator+(const SSAUByte &a, unsigned char b); +SSAUByte operator-(const SSAUByte &a, unsigned char b); +SSAUByte operator*(const SSAUByte &a, unsigned char b); +//SSAUByte operator/(const SSAUByte &a, unsigned char b); + +SSAUByte operator<<(const SSAUByte &a, unsigned char bits); +SSAUByte operator>>(const SSAUByte &a, unsigned char bits); diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp new file mode 100644 index 0000000000..98bf27c462 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -0,0 +1,100 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_ubyte_ptr.h" +#include "ssa_scope.h" + +SSAUBytePtr::SSAUBytePtr() +: v(0) +{ +} + +SSAUBytePtr::SSAUBytePtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAUBytePtr::llvm_type() +{ + return llvm::Type::getInt8PtrTy(SSAScope::context()); +} + +SSAUBytePtr SSAUBytePtr::operator[](SSAInt index) const +{ + return SSAUBytePtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAUByte SSAUBytePtr::load() const +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAUBytePtr::load_vec4ub() const +{ + // _mm_cvtsi32_si128 as implemented by clang: + SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); + llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); + SSAVec4i v4i = SSAVec4i::from_llvm(v); + + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 + return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 +/* + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); + llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); + return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type)); +*/ +} + +SSAVec16ub SSAUBytePtr::load_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), false, SSAScope::hint())); +} + +SSAVec16ub SSAUBytePtr::load_unaligned_vec16ub() const +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + return SSAVec16ub::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAUBytePtr::store(const SSAUByte &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value) +{ + // Store using saturate: + SSAVec8s v8s(new_value, new_value); + SSAVec16ub v16ub(v8s, v8s); + + llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); + llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), mask, SSAScope::hint()); + SSAScope::builder().CreateStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false); +} + +void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value) +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint())); + + // The following generates _mm_stream_si128, maybe! + // llvm::MDNode *node = llvm::MDNode::get(SSAScope::context(), SSAScope::builder().getInt32(1)); + // inst->setMetadata(SSAScope::module()->getMDKindID("nontemporal"), node); +} + +void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value) +{ + llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo(); + llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4); +} diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.h b/src/r_compiler/ssa/ssa_ubyte_ptr.h new file mode 100644 index 0000000000..c084068bc7 --- /dev/null +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.h @@ -0,0 +1,33 @@ + +#pragma once + +#include "ssa_ubyte.h" +#include "ssa_int.h" +#include "ssa_vec4i.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAUBytePtr +{ +public: + SSAUBytePtr(); + explicit SSAUBytePtr(llvm::Value *v); + static SSAUBytePtr from_llvm(llvm::Value *v) { return SSAUBytePtr(v); } + static llvm::Type *llvm_type(); + SSAUBytePtr operator[](SSAInt index) const; + SSAUBytePtr operator[](int index) const { return (*this)[SSAInt(index)]; } + SSAUByte load() const; + SSAVec4i load_vec4ub() const; + SSAVec8s load_vec8s() const; + SSAVec16ub load_vec16ub() const; + SSAVec16ub load_unaligned_vec16ub() const; + void store(const SSAUByte &new_value); + void store_vec4ub(const SSAVec4i &new_value); + void store_vec16ub(const SSAVec16ub &new_value); + void store_unaligned_vec16ub(const SSAVec16ub &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_value.cpp b/src/r_compiler/ssa/ssa_value.cpp new file mode 100644 index 0000000000..c37b7f4c1d --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.cpp @@ -0,0 +1,56 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_value.h" +#include "ssa_int.h" +#include "ssa_scope.h" + +SSAValue SSAValue::load() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateLoad(v, false)); +} + +void SSAValue::store(llvm::Value *value) +{ + SSAScope::builder().CreateStore(value, v, false); +} + +SSAIndexLookup SSAValue::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAValue::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes.push_back(index.v); + return result; +} + +///////////////////////////////////////////////////////////////////////////// + +SSAIndexLookup::operator SSAValue() +{ + return SSAValue::from_llvm(SSAScope::builder().CreateGEP(v, indexes)); +} + +SSAIndexLookup SSAIndexLookup::operator[](int index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(SSAInt(index).v); + return result; +} + +SSAIndexLookup SSAIndexLookup::operator[](SSAInt index) +{ + SSAIndexLookup result; + result.v = v; + result.indexes = indexes; + result.indexes.push_back(index.v); + return result; +} diff --git a/src/r_compiler/ssa/ssa_value.h b/src/r_compiler/ssa/ssa_value.h new file mode 100644 index 0000000000..ec156a4529 --- /dev/null +++ b/src/r_compiler/ssa/ssa_value.h @@ -0,0 +1,53 @@ + +#pragma once + +#include + +namespace llvm { class Value; } + +class SSAInt; +class SSAIndexLookup; + +class SSAValue +{ +public: + SSAValue() : v(0) { } + + static SSAValue from_llvm(llvm::Value *v) { SSAValue val; val.v = v; return val; } + + SSAValue load(); + void store(llvm::Value *v); + + template + operator Type() + { + return Type::from_llvm(v); + } + + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); + + llvm::Value *v; +}; + +class SSAIndexLookup +{ +public: + SSAIndexLookup() : v(0) { } + + llvm::Value *v; + std::vector indexes; + + SSAValue load() { SSAValue value = *this; return value.load(); } + void store(llvm::Value *v) { SSAValue value = *this; return value.store(v); } + + template + operator Type() + { + return Type::from_llvm(v); + } + + operator SSAValue(); + SSAIndexLookup operator[](int index); + SSAIndexLookup operator[](SSAInt index); +}; diff --git a/src/r_compiler/ssa/ssa_vec16ub.cpp b/src/r_compiler/ssa/ssa_vec16ub.cpp new file mode 100644 index 0000000000..4a077382eb --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.cpp @@ -0,0 +1,155 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_vec16ub.h" +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_scope.h" + +SSAVec16ub::SSAVec16ub() +: v(0) +{ +} + +SSAVec16ub::SSAVec16ub(unsigned char constant) +: v(0) +{ + std::vector constants; + constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant0, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant1, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant2, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant3, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant4, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant5, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant6, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant7, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant8, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant9, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant10, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant11, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant12, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant13, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant14, false))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(8, constant15, false))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec16ub::SSAVec16ub(llvm::Value *v) +: v(v) +{ +} + +SSAVec16ub::SSAVec16ub(SSAVec8s s0, SSAVec8s s1) +: v(0) +{ + llvm::Value *values[2] = { s0.v, s1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packuswb_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec16ub::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16); +} + +SSAVec16ub SSAVec16ub::bitcast(SSAVec4i i32) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7, index8, index9, index10, index11, index12, index13, index14, index15); +} + +SSAVec16ub SSAVec16ub::shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index8))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index9))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index10))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index11))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index12))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index13))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index14))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index15))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec16ub::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAVec16ub::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b) +{ + return SSAScope::builder().CreateDiv(a.v, b.v, SSAScope::hint()); +} +*/ +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) + b; +} + +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) - b; +} + +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) * b; +} +/* +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b) +{ + return SSAVec16ub(a) / b; +} +*/ +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b) +{ + return a + SSAVec16ub(b); +} + +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b) +{ + return a - SSAVec16ub(b); +} + +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b) +{ + return a * SSAVec16ub(b); +} +/* +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b) +{ + return a / SSAVec16ub(b); +} +*/ \ No newline at end of file diff --git a/src/r_compiler/ssa/ssa_vec16ub.h b/src/r_compiler/ssa/ssa_vec16ub.h new file mode 100644 index 0000000000..8f48c0c490 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec16ub.h @@ -0,0 +1,42 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec8s; +class SSAVec4i; + +class SSAVec16ub +{ +public: + SSAVec16ub(); + explicit SSAVec16ub(unsigned char constant); + explicit SSAVec16ub( + unsigned char constant0, unsigned char constant1, unsigned char constant2, unsigned char constant3, unsigned char constant4, unsigned char constant5, unsigned char constant6, unsigned char constant7, + unsigned char constant8, unsigned char constant9, unsigned char constant10, unsigned char constant11, unsigned char constant12, unsigned char constant13, unsigned char constant14, unsigned char constant15); + explicit SSAVec16ub(llvm::Value *v); + SSAVec16ub(SSAVec8s s0, SSAVec8s s1); + static SSAVec16ub from_llvm(llvm::Value *v) { return SSAVec16ub(v); } + static llvm::Type *llvm_type(); + static SSAVec16ub bitcast(SSAVec4i i32); + static SSAVec16ub shuffle(const SSAVec16ub &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + static SSAVec16ub shuffle(const SSAVec16ub &i0, const SSAVec16ub &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7, int index8, int index9, int index10, int index11, int index12, int index13, int index14, int index15); + + llvm::Value *v; +}; + +SSAVec16ub operator+(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator-(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator*(const SSAVec16ub &a, const SSAVec16ub &b); +SSAVec16ub operator/(const SSAVec16ub &a, const SSAVec16ub &b); + +SSAVec16ub operator+(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator-(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator*(unsigned char a, const SSAVec16ub &b); +SSAVec16ub operator/(unsigned char a, const SSAVec16ub &b); + +SSAVec16ub operator+(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator-(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator*(const SSAVec16ub &a, unsigned char b); +SSAVec16ub operator/(const SSAVec16ub &a, unsigned char b); diff --git a/src/r_compiler/ssa/ssa_vec4f.cpp b/src/r_compiler/ssa/ssa_vec4f.cpp new file mode 100644 index 0000000000..dc6f9a716b --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.cpp @@ -0,0 +1,249 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_vec4f.h" +#include "ssa_vec4i.h" +#include "ssa_float.h" +#include "ssa_int.h" +#include "ssa_scope.h" + +SSAVec4f::SSAVec4f() +: v(0) +{ +} + +SSAVec4f::SSAVec4f(float constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(float constant0, float constant1, float constant2, float constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant0))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant1))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant2))); + constants.push_back(llvm::ConstantFP::get(SSAScope::context(), llvm::APFloat(constant3))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4f::SSAVec4f(SSAFloat f) +: v(0) +{ + llvm::Type *m1xfloattype = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(f.v, m1xfloattype, SSAScope::hint()), llvm::UndefValue::get(m1xfloattype), mask, SSAScope::hint()); +} + +SSAVec4f::SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3) +: v(0) +{ + v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(llvm_type()), f0.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)0))); + v = SSAScope::builder().CreateInsertElement(v, f1.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)1))); + v = SSAScope::builder().CreateInsertElement(v, f2.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)2))); + v = SSAScope::builder().CreateInsertElement(v, f3.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)3))); +} + +SSAVec4f::SSAVec4f(llvm::Value *v) +: v(v) +{ +} + +SSAVec4f::SSAVec4f(SSAVec4i i32) +: v(0) +{ + //llvm::VectorType *m128type = llvm::VectorType::get(llvm::Type::getFloatTy(*context), 4); + //return builder->CreateSIToFP(i32.v, m128type); + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvtdq2ps), i32.v, SSAScope::hint()); +} + +llvm::Type *SSAVec4f::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4); +} + +SSAFloat SSAVec4f::operator[](SSAInt index) const +{ + return SSAFloat::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +SSAFloat SSAVec4f::operator[](int index) const +{ + return (*this)[SSAInt(index)]; +} + +SSAVec4f SSAVec4f::insert_element(SSAVec4f vec4f, SSAFloat value, int index) +{ + return from_llvm(SSAScope::builder().CreateInsertElement(vec4f.v, value.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)index)))); +} + +SSAVec4f SSAVec4f::bitcast(SSAVec4i i32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sqrt(SSAVec4f f) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sqrt, params), f.v, SSAScope::hint())); + //return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_sqrt_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::rcp(SSAVec4f f) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse_rcp_ps), f.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::sin(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::sin, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::cos(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::cos, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::pow(SSAVec4f val, SSAVec4f power) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(val.v); + args.push_back(power.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::pow, params), args, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::exp(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::exp, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::log(SSAVec4f val) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::log, params), val.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::fma(SSAVec4f a, SSAVec4f b, SSAVec4f c) +{ + std::vector params; + params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + //params.push_back(SSAVec4f::llvm_type()); + std::vector args; + args.push_back(a.v); + args.push_back(b.v); + args.push_back(c.v); + return SSAFloat::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::fma, params), args, SSAScope::hint())); +} + +void SSAVec4f::transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3) +{ + SSAVec4f tmp0 = shuffle(row0, row1, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp2 = shuffle(row0, row1, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + SSAVec4f tmp1 = shuffle(row2, row3, 0x44);//_MM_SHUFFLE(1,0,1,0)); + SSAVec4f tmp3 = shuffle(row2, row3, 0xEE);//_MM_SHUFFLE(3,2,3,2)); + row0 = shuffle(tmp0, tmp1, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row1 = shuffle(tmp0, tmp1, 0xDD);//_MM_SHUFFLE(3,1,3,1)); + row2 = shuffle(tmp2, tmp3, 0x88);//_MM_SHUFFLE(2,0,2,0)); + row3 = shuffle(tmp2, tmp3, 0xDD);//_MM_SHUFFLE(3,1,3,1)); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3) +{ + return shuffle(f0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4f::from_llvm(SSAScope::builder().CreateShuffleVector(f0.v, f1.v, mask, SSAScope::hint())); +} + +SSAVec4f SSAVec4f::shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask) +{ + return shuffle(f0, f1, mask & 3, (mask >> 2) & 3, ((mask >> 4) & 3) + 4, ((mask >> 6) & 3) + 4); +} + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b) +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateFDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4f operator+(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) + b; +} + +SSAVec4f operator-(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) - b; +} + +SSAVec4f operator*(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) * b; +} + +SSAVec4f operator/(float a, const SSAVec4f &b) +{ + return SSAVec4f(a) / b; +} + +SSAVec4f operator+(const SSAVec4f &a, float b) +{ + return a + SSAVec4f(b); +} + +SSAVec4f operator-(const SSAVec4f &a, float b) +{ + return a - SSAVec4f(b); +} + +SSAVec4f operator*(const SSAVec4f &a, float b) +{ + return a * SSAVec4f(b); +} + +SSAVec4f operator/(const SSAVec4f &a, float b) +{ + return a / SSAVec4f(b); +} diff --git a/src/r_compiler/ssa/ssa_vec4f.h b/src/r_compiler/ssa/ssa_vec4f.h new file mode 100644 index 0000000000..6d4ae63352 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f.h @@ -0,0 +1,58 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAFloat; +class SSAInt; + +class SSAVec4f +{ +public: + SSAVec4f(); + explicit SSAVec4f(float constant); + explicit SSAVec4f(float constant0, float constant1, float constant2, float constant3); + SSAVec4f(SSAFloat f); + SSAVec4f(SSAFloat f0, SSAFloat f1, SSAFloat f2, SSAFloat f3); + explicit SSAVec4f(llvm::Value *v); + SSAVec4f(SSAVec4i i32); + SSAFloat operator[](SSAInt index) const; + SSAFloat operator[](int index) const; + static SSAVec4f insert_element(SSAVec4f vec4f, SSAFloat value, int index); + static SSAVec4f bitcast(SSAVec4i i32); + static SSAVec4f sqrt(SSAVec4f f); + static SSAVec4f rcp(SSAVec4f f); + static SSAVec4f sin(SSAVec4f val); + static SSAVec4f cos(SSAVec4f val); + static SSAVec4f pow(SSAVec4f val, SSAVec4f power); + static SSAVec4f exp(SSAVec4f val); + static SSAVec4f log(SSAVec4f val); + static SSAVec4f fma(SSAVec4f a, SSAVec4f b, SSAVec4f c); + static void transpose(SSAVec4f &row0, SSAVec4f &row1, SSAVec4f &row2, SSAVec4f &row3); + static SSAVec4f shuffle(const SSAVec4f &f0, int index0, int index1, int index2, int index3); + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int index0, int index1, int index2, int index3); + static SSAVec4f from_llvm(llvm::Value *v) { return SSAVec4f(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; + +private: + static SSAVec4f shuffle(const SSAVec4f &f0, const SSAVec4f &f1, int mask); +}; + +SSAVec4f operator+(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator-(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator*(const SSAVec4f &a, const SSAVec4f &b); +SSAVec4f operator/(const SSAVec4f &a, const SSAVec4f &b); + +SSAVec4f operator+(float a, const SSAVec4f &b); +SSAVec4f operator-(float a, const SSAVec4f &b); +SSAVec4f operator*(float a, const SSAVec4f &b); +SSAVec4f operator/(float a, const SSAVec4f &b); + +SSAVec4f operator+(const SSAVec4f &a, float b); +SSAVec4f operator-(const SSAVec4f &a, float b); +SSAVec4f operator*(const SSAVec4f &a, float b); +SSAVec4f operator/(const SSAVec4f &a, float b); diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.cpp b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp new file mode 100644 index 0000000000..e0ed8bc868 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.cpp @@ -0,0 +1,44 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_vec4f_ptr.h" +#include "ssa_scope.h" + +SSAVec4fPtr::SSAVec4fPtr() +: v(0) +{ +} + +SSAVec4fPtr::SSAVec4fPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4fPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4fPtr SSAVec4fPtr::operator[](SSAInt index) const +{ + return SSAVec4fPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4f SSAVec4fPtr::load_unaligned() const +{ + return SSAVec4f::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4), SSAScope::hint())); +} + +void SSAVec4fPtr::store(const SSAVec4f &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4f_ptr.h b/src/r_compiler/ssa/ssa_vec4f_ptr.h new file mode 100644 index 0000000000..ab4e841900 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4f_ptr.h @@ -0,0 +1,24 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4f.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4fPtr +{ +public: + SSAVec4fPtr(); + explicit SSAVec4fPtr(llvm::Value *v); + static SSAVec4fPtr from_llvm(llvm::Value *v) { return SSAVec4fPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4fPtr operator[](SSAInt index) const; + SSAVec4f load() const; + SSAVec4f load_unaligned() const; + void store(const SSAVec4f &new_value); + void store_unaligned(const SSAVec4f &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp new file mode 100644 index 0000000000..3b508412f3 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -0,0 +1,245 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_vec4i.h" +#include "ssa_vec4f.h" +#include "ssa_vec8s.h" +#include "ssa_vec16ub.h" +#include "ssa_int.h" +#include "ssa_scope.h" + +SSAVec4i::SSAVec4i() +: v(0) +{ +} + +SSAVec4i::SSAVec4i(int constant) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(int constant0, int constant1, int constant2, int constant3) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, constant3, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec4i::SSAVec4i(llvm::Value *v) +: v(v) +{ +} + +SSAVec4i::SSAVec4i(SSAInt i) +: v(0) +{ + llvm::Type *m1xi32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 1); + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); +} + +SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3) +: v(0) +{ + std::vector constants; + constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true))); + v = llvm::ConstantVector::get(constants); + v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint()); +} + +SSAVec4i::SSAVec4i(SSAVec4f f32) +: v(0) +{ + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_cvttps2dq), f32.v, SSAScope::hint()); +} + +SSAInt SSAVec4i::operator[](SSAInt index) const +{ + return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint())); +} + +SSAInt SSAVec4i::operator[](int index) const +{ + return (*this)[SSAInt(index)]; +} + +SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::insert(int index, SSAInt value) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::insert(int index, int value) +{ + return insert(index, SSAInt(value)); +} + +llvm::Type *SSAVec4i::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::bitcast(SSAVec8s i16) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(i16.v, llvm_type(), SSAScope::hint())); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, int index0, int index1, int index2, int index3) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3); +} + +SSAVec4i SSAVec4i::shuffle(const SSAVec4i &i0, const SSAVec4i &i1, int index0, int index1, int index2, int index3) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec4i::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +void SSAVec4i::extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3) +{ + SSAVec8s low = SSAVec8s::extendlo(a); + SSAVec8s high = SSAVec8s::extendhi(a); + out0 = extendlo(low); + out1 = extendhi(low); + out2 = extendlo(high); + out3 = extendhi(high); +} + +SSAVec4i SSAVec4i::extendhi(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::extendlo(SSAVec8s i16) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(i16, SSAVec8s((short)0), 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::combinehi(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7)); // _mm_unpackhi_epi16 +} + +SSAVec4i SSAVec4i::combinelo(SSAVec8s a, SSAVec8s b) +{ + return SSAVec4i::bitcast(SSAVec8s::shuffle(a, b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3)); // _mm_unpacklo_epi16 +} + +SSAVec4i SSAVec4i::sqrt(SSAVec4i f) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_sqrt_pd), f.v, SSAScope::hint())); +} + +/* +SSAVec4i SSAVec4i::min_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pminsd), values, SSAScope::hint())); +} + +SSAVec4i SSAVec4i::max_sse41(SSAVec4i a, SSAVec4i b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse41_pmaxsd), values, SSAScope::hint())); +} +*/ + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec4i operator+(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) + b; +} + +SSAVec4i operator-(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) - b; +} + +SSAVec4i operator*(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) * b; +} + +SSAVec4i operator/(int a, const SSAVec4i &b) +{ + return SSAVec4i(a) / b; +} + +SSAVec4i operator+(const SSAVec4i &a, int b) +{ + return a + SSAVec4i(b); +} + +SSAVec4i operator-(const SSAVec4i &a, int b) +{ + return a - SSAVec4i(b); +} + +SSAVec4i operator*(const SSAVec4i &a, int b) +{ + return a * SSAVec4i(b); +} + +SSAVec4i operator/(const SSAVec4i &a, int b) +{ + return a / SSAVec4i(b); +} + +SSAVec4i operator<<(const SSAVec4i &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec4i::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec4i operator>>(const SSAVec4i &a, int bits) +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h new file mode 100644 index 0000000000..89cda16465 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -0,0 +1,61 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4f; +class SSAVec8s; +class SSAVec16ub; +class SSAInt; + +class SSAVec4i +{ +public: + SSAVec4i(); + explicit SSAVec4i(int constant); + explicit SSAVec4i(int constant0, int constant1, int constant2, int constant3); + SSAVec4i(SSAInt i); + SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3); + explicit SSAVec4i(llvm::Value *v); + SSAVec4i(SSAVec4f f32); + SSAInt operator[](SSAInt index) const; + SSAInt operator[](int index) const; + SSAVec4i insert(SSAInt index, SSAInt value); + SSAVec4i insert(int index, SSAInt value); + SSAVec4i insert(int index, int value); + static SSAVec4i bitcast(SSAVec4f f32); + static SSAVec4i bitcast(SSAVec8s i16); + static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); + static SSAVec4i shuffle(const SSAVec4i &f0, const SSAVec4i &f1, int index0, int index1, int index2, int index3); + static SSAVec4i extendhi(SSAVec8s i16); + static SSAVec4i extendlo(SSAVec8s i16); + static void extend(SSAVec16ub a, SSAVec4i &out0, SSAVec4i &out1, SSAVec4i &out2, SSAVec4i &out3); + static SSAVec4i combinehi(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i combinelo(SSAVec8s v0, SSAVec8s v1); + static SSAVec4i sqrt(SSAVec4i f); + //static SSAVec4i min_sse41(SSAVec4i a, SSAVec4i b); + //static SSAVec4i max_sse41(SSAVec4i a, SSAVec4i b); + static SSAVec4i from_llvm(llvm::Value *v) { return SSAVec4i(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec4i operator+(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator-(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator*(const SSAVec4i &a, const SSAVec4i &b); +SSAVec4i operator/(const SSAVec4i &a, const SSAVec4i &b); + +SSAVec4i operator+(int a, const SSAVec4i &b); +SSAVec4i operator-(int a, const SSAVec4i &b); +SSAVec4i operator*(int a, const SSAVec4i &b); +SSAVec4i operator/(int a, const SSAVec4i &b); + +SSAVec4i operator+(const SSAVec4i &a, int b); +SSAVec4i operator-(const SSAVec4i &a, int b); +SSAVec4i operator*(const SSAVec4i &a, int b); +SSAVec4i operator/(const SSAVec4i &a, int b); + +SSAVec4i operator<<(const SSAVec4i &a, int bits); +SSAVec4i operator>>(const SSAVec4i &a, int bits); diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.cpp b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp new file mode 100644 index 0000000000..f75ccd43fa --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.cpp @@ -0,0 +1,44 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_vec4i_ptr.h" +#include "ssa_scope.h" + +SSAVec4iPtr::SSAVec4iPtr() +: v(0) +{ +} + +SSAVec4iPtr::SSAVec4iPtr(llvm::Value *v) +: v(v) +{ +} + +llvm::Type *SSAVec4iPtr::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo(); +} + +SSAVec4iPtr SSAVec4iPtr::operator[](SSAInt index) const +{ + return SSAVec4iPtr::from_llvm(SSAScope::builder().CreateGEP(v, index.v, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().CreateLoad(v, false, SSAScope::hint())); +} + +SSAVec4i SSAVec4iPtr::load_unaligned() const +{ + return SSAVec4i::from_llvm(SSAScope::builder().Insert(new llvm::LoadInst(v, SSAScope::hint(), false, 4))); +} + +void SSAVec4iPtr::store(const SSAVec4i &new_value) +{ + SSAScope::builder().CreateStore(new_value.v, v, false); +} + +void SSAVec4iPtr::store_unaligned(const SSAVec4i &new_value) +{ + SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false); +} diff --git a/src/r_compiler/ssa/ssa_vec4i_ptr.h b/src/r_compiler/ssa/ssa_vec4i_ptr.h new file mode 100644 index 0000000000..257b4e34f2 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec4i_ptr.h @@ -0,0 +1,25 @@ + +#pragma once + +#include "ssa_int.h" +#include "ssa_vec4i.h" + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4iPtr +{ +public: + SSAVec4iPtr(); + explicit SSAVec4iPtr(llvm::Value *v); + static SSAVec4iPtr from_llvm(llvm::Value *v) { return SSAVec4iPtr(v); } + static llvm::Type *llvm_type(); + SSAVec4iPtr operator[](SSAInt index) const; + SSAVec4iPtr operator[](int index) const { return (*this)[SSAInt(index)]; } + SSAVec4i load() const; + SSAVec4i load_unaligned() const; + void store(const SSAVec4i &new_value); + void store_unaligned(const SSAVec4i &new_value); + + llvm::Value *v; +}; diff --git a/src/r_compiler/ssa/ssa_vec8s.cpp b/src/r_compiler/ssa/ssa_vec8s.cpp new file mode 100644 index 0000000000..6016b551fe --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.cpp @@ -0,0 +1,178 @@ + +#include "r_compiler/llvm_include.h" +#include "ssa_vec8s.h" +#include "ssa_vec4i.h" +#include "ssa_vec16ub.h" +#include "ssa_scope.h" + +SSAVec8s::SSAVec8s() +: v(0) +{ +} + +SSAVec8s::SSAVec8s(short constant) +: v(0) +{ + std::vector constants; + constants.resize(8, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7) +: v(0) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant0, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant1, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant2, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant3, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant4, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant5, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant6, true))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(16, constant7, true))); + v = llvm::ConstantVector::get(constants); +} + +SSAVec8s::SSAVec8s(llvm::Value *v) +: v(v) +{ +} + +SSAVec8s::SSAVec8s(SSAVec4i i0, SSAVec4i i1) +: v(0) +{ + llvm::Value *values[2] = { i0.v, i1.v }; + v = SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_packssdw_128), values, SSAScope::hint()); +} + +llvm::Type *SSAVec8s::llvm_type() +{ + return llvm::VectorType::get(llvm::Type::getInt16Ty(SSAScope::context()), 8); +} + +SSAVec8s SSAVec8s::bitcast(SSAVec16ub i8) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateBitCast(i8.v, llvm_type(), SSAScope::hint())); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + return shuffle(i0, from_llvm(llvm::UndefValue::get(llvm_type())), index0, index1, index2, index3, index4, index5, index6, index7); +} + +SSAVec8s SSAVec8s::shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7) +{ + std::vector constants; + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index0))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index1))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index2))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index3))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index4))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index5))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index6))); + constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, index7))); + llvm::Value *mask = llvm::ConstantVector::get(constants); + return SSAVec8s::from_llvm(SSAScope::builder().CreateShuffleVector(i0.v, i1.v, mask, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::extendhi(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15)); // _mm_unpackhi_epi8 +} + +SSAVec8s SSAVec8s::extendlo(SSAVec16ub a) +{ + return SSAVec8s::bitcast(SSAVec16ub::shuffle(a, SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 +} + +/* +SSAVec8s SSAVec8s::min_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmins_w), values, SSAScope::hint())); +} + +SSAVec8s SSAVec8s::max_sse2(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmaxs_w), values, SSAScope::hint())); +} +*/ + +SSAVec8s SSAVec8s::mulhi(SSAVec8s a, SSAVec8s b) +{ + llvm::Value *values[2] = { a.v, b.v }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pmulh_w), values, SSAScope::hint())); +} + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSub(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateMul(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateSDiv(a.v, b.v, SSAScope::hint())); +} + +SSAVec8s operator+(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) + b; +} + +SSAVec8s operator-(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) - b; +} + +SSAVec8s operator*(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) * b; +} + +SSAVec8s operator/(short a, const SSAVec8s &b) +{ + return SSAVec8s(a) / b; +} + +SSAVec8s operator+(const SSAVec8s &a, short b) +{ + return a + SSAVec8s(b); +} + +SSAVec8s operator-(const SSAVec8s &a, short b) +{ + return a - SSAVec8s(b); +} + +SSAVec8s operator*(const SSAVec8s &a, short b) +{ + return a * SSAVec8s(b); +} + +SSAVec8s operator/(const SSAVec8s &a, short b) +{ + return a / SSAVec8s(b); +} + +SSAVec8s operator<<(const SSAVec8s &a, int bits) +{ + //return SSAScope::builder().CreateShl(a.v, bits); + llvm::Value *values[2] = { a.v, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, (uint64_t)bits)) }; + return SSAVec8s::from_llvm(SSAScope::builder().CreateCall(SSAScope::intrinsic(llvm::Intrinsic::x86_sse2_pslli_d), values, SSAScope::hint())); +} + +SSAVec8s operator>>(const SSAVec8s &a, int bits) +{ + return SSAVec8s::from_llvm(SSAScope::builder().CreateLShr(a.v, bits, SSAScope::hint())); +} diff --git a/src/r_compiler/ssa/ssa_vec8s.h b/src/r_compiler/ssa/ssa_vec8s.h new file mode 100644 index 0000000000..40263773b4 --- /dev/null +++ b/src/r_compiler/ssa/ssa_vec8s.h @@ -0,0 +1,48 @@ + +#pragma once + +namespace llvm { class Value; } +namespace llvm { class Type; } + +class SSAVec4i; +class SSAVec16ub; + +class SSAVec8s +{ +public: + SSAVec8s(); + explicit SSAVec8s(short constant); + explicit SSAVec8s(short constant0, short constant1, short constant2, short constant3, short constant4, short constant5, short constant6, short constant7); + explicit SSAVec8s(llvm::Value *v); + SSAVec8s(SSAVec4i i0, SSAVec4i i1); + static SSAVec8s bitcast(SSAVec16ub i8); + static SSAVec8s shuffle(const SSAVec8s &i0, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s shuffle(const SSAVec8s &i0, const SSAVec8s &i1, int index0, int index1, int index2, int index3, int index4, int index5, int index6, int index7); + static SSAVec8s extendhi(SSAVec16ub a); + static SSAVec8s extendlo(SSAVec16ub a); + //static SSAVec8s min_sse2(SSAVec8s a, SSAVec8s b); + //static SSAVec8s max_sse2(SSAVec8s a, SSAVec8s b); + static SSAVec8s mulhi(SSAVec8s a, SSAVec8s b); + static SSAVec8s from_llvm(llvm::Value *v) { return SSAVec8s(v); } + static llvm::Type *llvm_type(); + + llvm::Value *v; +}; + +SSAVec8s operator+(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator-(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator*(const SSAVec8s &a, const SSAVec8s &b); +SSAVec8s operator/(const SSAVec8s &a, const SSAVec8s &b); + +SSAVec8s operator+(short a, const SSAVec8s &b); +SSAVec8s operator-(short a, const SSAVec8s &b); +SSAVec8s operator*(short a, const SSAVec8s &b); +SSAVec8s operator/(short a, const SSAVec8s &b); + +SSAVec8s operator+(const SSAVec8s &a, short b); +SSAVec8s operator-(const SSAVec8s &a, short b); +SSAVec8s operator*(const SSAVec8s &a, short b); +SSAVec8s operator/(const SSAVec8s &a, short b); + +SSAVec8s operator<<(const SSAVec8s &a, int bits); +SSAVec8s operator>>(const SSAVec8s &a, int bits); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 127697356a..ae6c26cfcf 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -38,6 +38,7 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" +#include "r_compiler/llvmdrawers.h" #include "gi.h" #include "stats.h" @@ -301,6 +302,323 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// +class DrawSpanLLVMCommand : public DrawerCommand +{ +protected: + DrawSpanArgs args; + +public: + DrawSpanLLVMCommand() + { + args.xfrac = ds_xfrac; + args.yfrac = ds_yfrac; + args.xstep = ds_xstep; + args.ystep = ds_ystep; + args.x1 = ds_x1; + args.x2 = ds_x2; + args.y = ds_y; + args.xbits = ds_xbits; + args.ybits = ds_ybits; + args.destorg = (uint32_t*)dc_destorg; + args.destpitch = dc_pitch; + args.source = (const uint32_t*)ds_source; + args.light = LightBgra::calc_light_multiplier(ds_light); + args.light_red = ds_shade_constants.light_red; + args.light_green = ds_shade_constants.light_green; + args.light_blue = ds_shade_constants.light_blue; + args.light_alpha = ds_shade_constants.light_alpha; + args.fade_red = ds_shade_constants.fade_red; + args.fade_green = ds_shade_constants.fade_green; + args.fade_blue = ds_shade_constants.fade_blue; + args.fade_alpha = ds_shade_constants.fade_alpha; + args.desaturate = ds_shade_constants.desaturate; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (ds_shade_constants.simple_shade) + args.flags |= DrawSpanArgs::simple_shade; + if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) + args.flags |= DrawSpanArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpan(&args); + } +}; + +class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMasked(&args); + } +}; + +class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanTranslucent(&args); + } +}; + +class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMaskedTranslucent(&args); + } +}; + +class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanAddClamp(&args); + } +}; + +class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + if (thread->skipped_by_thread(args.y)) + return; + LLVMDrawers::Instance()->DrawSpanMaskedAddClamp(&args); + } +}; + +///////////////////////////////////////////////////////////////////////////// + +class DrawWall4LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall4LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.count = dc_count; + args.pitch = dc_pitch; + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + for (int i = 0; i < 4; i++) + { + args.texturefrac[i] = vplce[i]; + args.iscale[i] = vince[i]; + args.texturefracx[i] = buftexturefracx[i]; + args.textureheight[i] = bufheight[i]; + args.source[i] = (const uint32_t *)bufplce[i]; + args.source2[i] = (const uint32_t *)bufplce2[i]; + args.light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); + } + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec4(&args, &d); + } +}; + +class DrawWallMasked4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec4(&args, &d); + } +}; + +class DrawWallAdd4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_add(&args, &d); + } +}; + +class DrawWallAddClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_revsubclamp(&args, &d); + } +}; + +class DrawWall1LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall1LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.pitch = dc_pitch; + args.count = dc_count; + args.texturefrac[0] = dc_texturefrac; + args.texturefracx[0] = dc_texturefracx; + args.iscale[0] = dc_iscale; + args.textureheight[0] = dc_textureheight; + args.source[0] = (const uint32 *)dc_source; + args.source2[0] = (const uint32 *)dc_source2; + args.light[0] = LightBgra::calc_light_multiplier(dc_light); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec1(&args, &d); + } +}; + +class DrawWallMasked1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec1(&args, &d); + } +}; + +class DrawWallAdd1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_add(&args, &d); + } +}; + +class DrawWallAddClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_revsubclamp(&args, &d); + } +}; + +///////////////////////////////////////////////////////////////////////////// + class DrawerColumnCommand : public DrawerCommand { public: @@ -2702,7 +3020,9 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { -#ifdef NO_SSE +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#elif defined(NO_SSE) DrawerCommandQueue::QueueCommand(); #else DrawerCommandQueue::QueueCommand(); @@ -2711,27 +3031,47 @@ void R_DrawSpan_rgba() void R_DrawSpanMasked_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanTranslucent_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMaskedTranslucent_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanAddClamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMaskedAddClamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif } void R_FillSpan_rgba() @@ -2776,7 +3116,11 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DWORD vlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } @@ -2795,72 +3139,116 @@ void queue_wallcommand() void vlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void mvlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 368b1c3fd8..372f23accc 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -43,6 +43,7 @@ #include "textures/textures.h" #include "r_data/voxels.h" #include "r_draw_rgba.h" +#include "r_compiler/llvmdrawers.h" EXTERN_CVAR(Bool, r_shadercolormaps) @@ -51,6 +52,16 @@ void R_SetupColormap(player_t *); void R_SetupFreelook(); void R_InitRenderer(); +FSoftwareRenderer::FSoftwareRenderer() +{ + LLVMDrawers::Create(); +} + +FSoftwareRenderer::~FSoftwareRenderer() +{ + LLVMDrawers::Destroy(); +} + //========================================================================== // // DCanvas :: Init diff --git a/src/r_swrenderer.h b/src/r_swrenderer.h index f9d5609a0d..fc3ec25512 100644 --- a/src/r_swrenderer.h +++ b/src/r_swrenderer.h @@ -5,6 +5,9 @@ struct FSoftwareRenderer : public FRenderer { + FSoftwareRenderer(); + ~FSoftwareRenderer(); + // Can be overridden so that the colormaps for sector color/fade won't be built. virtual bool UsesColormap() const override;