From 970adff5e76322df6e81264be707e65b76b68bd2 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Fri, 14 Jul 2017 15:14:34 +0300 Subject: [PATCH] Use multiple threads for xBRZ upscaling Implementation relies on Concurrency Runtime, Grand Central Dispatch aka libdispatch or OpenMP depending on their availability --- src/CMakeLists.txt | 27 ++++++++++++++++ src/gl/textures/gl_hqresize.cpp | 56 +++++++++++++++++++++++---------- 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c862d27e57..629c9e25e2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -265,6 +265,16 @@ else( X64 ) set( CMAKE_CXX_FLAGS ${SAFE_CMAKE_CXX_FLAGS} ) endif( X64 ) +CHECK_CXX_SOURCE_COMPILES("#include + int main() { concurrency::parallel_for(0, 1, 1, [](int) { } ); }" + HAVE_PARALLEL_FOR) + +if( NOT HAVE_PARALLEL_FOR ) + CHECK_CXX_SOURCE_COMPILES("#include + int main() { dispatch_apply(1, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^(size_t) { }); }" + HAVE_DISPATCH_APPLY) +endif() + # Set up flags for MSVC if (MSVC) set( CMAKE_CXX_FLAGS "/MP ${CMAKE_CXX_FLAGS}" ) @@ -567,6 +577,23 @@ if( HAVE_MMX ) endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) endif( HAVE_MMX ) +if( HAVE_PARALLEL_FOR ) + add_definitions( -DHAVE_PARALLEL_FOR=1 ) +elseif( HAVE_DISPATCH_APPLY ) + add_definitions( -DHAVE_DISPATCH_APPLY=1 ) +else() + option( NO_OPENMP "Disable usage of OpenMP" OFF ) + + if( NOT NO_OPENMP ) + include( FindOpenMP ) + + if( OPENMP_FOUND ) + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) + endif( OPENMP_FOUND ) + endif( NOT NO_OPENMP ) +endif() + add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y ) diff --git a/src/gl/textures/gl_hqresize.cpp b/src/gl/textures/gl_hqresize.cpp index 08a340199c..62825777c6 100644 --- a/src/gl/textures/gl_hqresize.cpp +++ b/src/gl/textures/gl_hqresize.cpp @@ -46,16 +46,44 @@ #include "gl/xbr/xbrz.h" #include "gl/xbr/xbrz_old.h" -#ifdef __APPLE__ -# include -# if MAC_OS_X_VERSION_MAX_ALLOWED >= 1060 -# define GZ_USE_LIBDISPATCH -# endif // MAC_OS_X_VERSION_MAX_ALLOWED >= 1060 -#endif // __APPLE__ +#ifdef HAVE_PARALLEL_FOR -#ifdef GZ_USE_LIBDISPATCH -# include -#endif // GZ_USE_LIBDISPATCH +#include + +template +inline void parallel_for(const Index count, const Index step, const Function& function) +{ + concurrency::parallel_for(0, count, step, function); +} + +#elif defined HAVE_DISPATCH_APPLY + +#include + +template +inline void parallel_for(const Index count, const Index step, const Function& function) +{ + const dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); + + dispatch_apply(count / step + 1, queue, ^(size_t sliceY) + { + function(sliceY * step); + }); +} + +#else + +template +inline void parallel_for(const Index count, const Index step, const Function& function) +{ +#pragma omp parallel for + for (Index i = 0; i < count; i += step) + { + function(i); + } +} + +#endif // HAVE_PARALLEL_FOR CUSTOM_CVAR(Int, gl_texture_hqresize, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { @@ -87,7 +115,6 @@ CVAR (Flag, gl_texture_hqresize_textures, gl_texture_hqresize_targets, 1); CVAR (Flag, gl_texture_hqresize_sprites, gl_texture_hqresize_targets, 2); CVAR (Flag, gl_texture_hqresize_fonts, gl_texture_hqresize_targets, 4); -#ifdef GZ_USE_LIBDISPATCH CVAR(Bool, gl_texture_hqresize_multithread, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); CUSTOM_CVAR(Int, gl_texture_hqresize_mt_width, 16, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) @@ -101,7 +128,6 @@ CUSTOM_CVAR(Int, gl_texture_hqresize_mt_height, 4, CVAR_ARCHIVE | CVAR_GLOBALCON if (self < 2) self = 2; if (self > 1024) self = 1024; } -#endif // GZ_USE_LIBDISPATCH static void scale2x ( uint32_t* inputBuffer, uint32_t* outputBuffer, int inWidth, int inHeight ) @@ -289,7 +315,6 @@ static unsigned char *xbrzHelper( void (*xbrzFunction) ( size_t, const uint32_t* unsigned char * newBuffer = new unsigned char[outWidth*outHeight*4]; -#ifdef GZ_USE_LIBDISPATCH const int thresholdWidth = gl_texture_hqresize_mt_width; const int thresholdHeight = gl_texture_hqresize_mt_height; @@ -297,16 +322,13 @@ static unsigned char *xbrzHelper( void (*xbrzFunction) ( size_t, const uint32_t* && inWidth > thresholdWidth && inHeight > thresholdHeight) { - const dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); - - dispatch_apply(inHeight / thresholdHeight + 1, queue, ^(size_t sliceY) + parallel_for(inHeight, thresholdHeight, [=](int sliceY) { xbrzFunction(N, reinterpret_cast(inputBuffer), reinterpret_cast(newBuffer), - inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), sliceY * thresholdHeight, (sliceY + 1) * thresholdHeight); + inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), sliceY, sliceY + thresholdHeight); }); } else -#endif // GZ_USE_LIBDISPATCH { xbrzFunction(N, reinterpret_cast(inputBuffer), reinterpret_cast(newBuffer), inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), 0, std::numeric_limits::max());