mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-26 22:11:43 +00:00
Use multiple threads for xBRZ upscaling
Implementation relies on Concurrency Runtime, Grand Central Dispatch aka libdispatch or OpenMP depending on their availability
This commit is contained in:
parent
ac811f99c0
commit
970adff5e7
2 changed files with 66 additions and 17 deletions
|
@ -265,6 +265,16 @@ else( X64 )
|
|||
set( CMAKE_CXX_FLAGS ${SAFE_CMAKE_CXX_FLAGS} )
|
||||
endif( X64 )
|
||||
|
||||
CHECK_CXX_SOURCE_COMPILES("#include <ppl.h>
|
||||
int main() { concurrency::parallel_for(0, 1, 1, [](int) { } ); }"
|
||||
HAVE_PARALLEL_FOR)
|
||||
|
||||
if( NOT HAVE_PARALLEL_FOR )
|
||||
CHECK_CXX_SOURCE_COMPILES("#include <dispatch/dispatch.h>
|
||||
int main() { dispatch_apply(1, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^(size_t) { }); }"
|
||||
HAVE_DISPATCH_APPLY)
|
||||
endif()
|
||||
|
||||
# Set up flags for MSVC
|
||||
if (MSVC)
|
||||
set( CMAKE_CXX_FLAGS "/MP ${CMAKE_CXX_FLAGS}" )
|
||||
|
@ -567,6 +577,23 @@ if( HAVE_MMX )
|
|||
endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||
endif( HAVE_MMX )
|
||||
|
||||
if( HAVE_PARALLEL_FOR )
|
||||
add_definitions( -DHAVE_PARALLEL_FOR=1 )
|
||||
elseif( HAVE_DISPATCH_APPLY )
|
||||
add_definitions( -DHAVE_DISPATCH_APPLY=1 )
|
||||
else()
|
||||
option( NO_OPENMP "Disable usage of OpenMP" OFF )
|
||||
|
||||
if( NOT NO_OPENMP )
|
||||
include( FindOpenMP )
|
||||
|
||||
if( OPENMP_FOUND )
|
||||
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
|
||||
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
|
||||
endif( OPENMP_FOUND )
|
||||
endif( NOT NO_OPENMP )
|
||||
endif()
|
||||
|
||||
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
|
||||
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
|
||||
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
|
||||
|
|
|
@ -46,16 +46,44 @@
|
|||
#include "gl/xbr/xbrz.h"
|
||||
#include "gl/xbr/xbrz_old.h"
|
||||
|
||||
#ifdef __APPLE__
|
||||
# include <AvailabilityMacros.h>
|
||||
# if MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
|
||||
# define GZ_USE_LIBDISPATCH
|
||||
# endif // MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
|
||||
#endif // __APPLE__
|
||||
#ifdef HAVE_PARALLEL_FOR
|
||||
|
||||
#ifdef GZ_USE_LIBDISPATCH
|
||||
# include <dispatch/dispatch.h>
|
||||
#endif // GZ_USE_LIBDISPATCH
|
||||
#include <ppl.h>
|
||||
|
||||
template <typename Index, typename Function>
|
||||
inline void parallel_for(const Index count, const Index step, const Function& function)
|
||||
{
|
||||
concurrency::parallel_for(0, count, step, function);
|
||||
}
|
||||
|
||||
#elif defined HAVE_DISPATCH_APPLY
|
||||
|
||||
#include <dispatch/dispatch.h>
|
||||
|
||||
template <typename Index, typename Function>
|
||||
inline void parallel_for(const Index count, const Index step, const Function& function)
|
||||
{
|
||||
const dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||
|
||||
dispatch_apply(count / step + 1, queue, ^(size_t sliceY)
|
||||
{
|
||||
function(sliceY * step);
|
||||
});
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template <typename Index, typename Function>
|
||||
inline void parallel_for(const Index count, const Index step, const Function& function)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (Index i = 0; i < count; i += step)
|
||||
{
|
||||
function(i);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // HAVE_PARALLEL_FOR
|
||||
|
||||
CUSTOM_CVAR(Int, gl_texture_hqresize, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
|
||||
{
|
||||
|
@ -87,7 +115,6 @@ CVAR (Flag, gl_texture_hqresize_textures, gl_texture_hqresize_targets, 1);
|
|||
CVAR (Flag, gl_texture_hqresize_sprites, gl_texture_hqresize_targets, 2);
|
||||
CVAR (Flag, gl_texture_hqresize_fonts, gl_texture_hqresize_targets, 4);
|
||||
|
||||
#ifdef GZ_USE_LIBDISPATCH
|
||||
CVAR(Bool, gl_texture_hqresize_multithread, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
||||
|
||||
CUSTOM_CVAR(Int, gl_texture_hqresize_mt_width, 16, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
|
||||
|
@ -101,7 +128,6 @@ CUSTOM_CVAR(Int, gl_texture_hqresize_mt_height, 4, CVAR_ARCHIVE | CVAR_GLOBALCON
|
|||
if (self < 2) self = 2;
|
||||
if (self > 1024) self = 1024;
|
||||
}
|
||||
#endif // GZ_USE_LIBDISPATCH
|
||||
|
||||
|
||||
static void scale2x ( uint32_t* inputBuffer, uint32_t* outputBuffer, int inWidth, int inHeight )
|
||||
|
@ -289,7 +315,6 @@ static unsigned char *xbrzHelper( void (*xbrzFunction) ( size_t, const uint32_t*
|
|||
|
||||
unsigned char * newBuffer = new unsigned char[outWidth*outHeight*4];
|
||||
|
||||
#ifdef GZ_USE_LIBDISPATCH
|
||||
const int thresholdWidth = gl_texture_hqresize_mt_width;
|
||||
const int thresholdHeight = gl_texture_hqresize_mt_height;
|
||||
|
||||
|
@ -297,16 +322,13 @@ static unsigned char *xbrzHelper( void (*xbrzFunction) ( size_t, const uint32_t*
|
|||
&& inWidth > thresholdWidth
|
||||
&& inHeight > thresholdHeight)
|
||||
{
|
||||
const dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||
|
||||
dispatch_apply(inHeight / thresholdHeight + 1, queue, ^(size_t sliceY)
|
||||
parallel_for(inHeight, thresholdHeight, [=](int sliceY)
|
||||
{
|
||||
xbrzFunction(N, reinterpret_cast<uint32_t*>(inputBuffer), reinterpret_cast<uint32_t*>(newBuffer),
|
||||
inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), sliceY * thresholdHeight, (sliceY + 1) * thresholdHeight);
|
||||
inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), sliceY, sliceY + thresholdHeight);
|
||||
});
|
||||
}
|
||||
else
|
||||
#endif // GZ_USE_LIBDISPATCH
|
||||
{
|
||||
xbrzFunction(N, reinterpret_cast<uint32_t*>(inputBuffer), reinterpret_cast<uint32_t*>(newBuffer),
|
||||
inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), 0, std::numeric_limits<int>::max());
|
||||
|
|
Loading…
Reference in a new issue