mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-30 07:41:22 +00:00
Use multiple threads for xBRZ upscaling
Implementation relies on Concurrency Runtime, Grand Central Dispatch aka libdispatch or OpenMP depending on their availability
This commit is contained in:
parent
ac811f99c0
commit
970adff5e7
2 changed files with 66 additions and 17 deletions
|
@ -265,6 +265,16 @@ else( X64 )
|
||||||
set( CMAKE_CXX_FLAGS ${SAFE_CMAKE_CXX_FLAGS} )
|
set( CMAKE_CXX_FLAGS ${SAFE_CMAKE_CXX_FLAGS} )
|
||||||
endif( X64 )
|
endif( X64 )
|
||||||
|
|
||||||
|
CHECK_CXX_SOURCE_COMPILES("#include <ppl.h>
|
||||||
|
int main() { concurrency::parallel_for(0, 1, 1, [](int) { } ); }"
|
||||||
|
HAVE_PARALLEL_FOR)
|
||||||
|
|
||||||
|
if( NOT HAVE_PARALLEL_FOR )
|
||||||
|
CHECK_CXX_SOURCE_COMPILES("#include <dispatch/dispatch.h>
|
||||||
|
int main() { dispatch_apply(1, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^(size_t) { }); }"
|
||||||
|
HAVE_DISPATCH_APPLY)
|
||||||
|
endif()
|
||||||
|
|
||||||
# Set up flags for MSVC
|
# Set up flags for MSVC
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
set( CMAKE_CXX_FLAGS "/MP ${CMAKE_CXX_FLAGS}" )
|
set( CMAKE_CXX_FLAGS "/MP ${CMAKE_CXX_FLAGS}" )
|
||||||
|
@ -567,6 +577,23 @@ if( HAVE_MMX )
|
||||||
endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE )
|
||||||
endif( HAVE_MMX )
|
endif( HAVE_MMX )
|
||||||
|
|
||||||
|
if( HAVE_PARALLEL_FOR )
|
||||||
|
add_definitions( -DHAVE_PARALLEL_FOR=1 )
|
||||||
|
elseif( HAVE_DISPATCH_APPLY )
|
||||||
|
add_definitions( -DHAVE_DISPATCH_APPLY=1 )
|
||||||
|
else()
|
||||||
|
option( NO_OPENMP "Disable usage of OpenMP" OFF )
|
||||||
|
|
||||||
|
if( NOT NO_OPENMP )
|
||||||
|
include( FindOpenMP )
|
||||||
|
|
||||||
|
if( OPENMP_FOUND )
|
||||||
|
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
|
||||||
|
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
|
||||||
|
endif( OPENMP_FOUND )
|
||||||
|
endif( NOT NO_OPENMP )
|
||||||
|
endif()
|
||||||
|
|
||||||
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
|
add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h
|
||||||
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
|
COMMAND lemon -C${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y
|
||||||
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
|
DEPENDS lemon ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y )
|
||||||
|
|
|
@ -46,16 +46,44 @@
|
||||||
#include "gl/xbr/xbrz.h"
|
#include "gl/xbr/xbrz.h"
|
||||||
#include "gl/xbr/xbrz_old.h"
|
#include "gl/xbr/xbrz_old.h"
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef HAVE_PARALLEL_FOR
|
||||||
# include <AvailabilityMacros.h>
|
|
||||||
# if MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
|
|
||||||
# define GZ_USE_LIBDISPATCH
|
|
||||||
# endif // MAC_OS_X_VERSION_MAX_ALLOWED >= 1060
|
|
||||||
#endif // __APPLE__
|
|
||||||
|
|
||||||
#ifdef GZ_USE_LIBDISPATCH
|
#include <ppl.h>
|
||||||
# include <dispatch/dispatch.h>
|
|
||||||
#endif // GZ_USE_LIBDISPATCH
|
template <typename Index, typename Function>
|
||||||
|
inline void parallel_for(const Index count, const Index step, const Function& function)
|
||||||
|
{
|
||||||
|
concurrency::parallel_for(0, count, step, function);
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined HAVE_DISPATCH_APPLY
|
||||||
|
|
||||||
|
#include <dispatch/dispatch.h>
|
||||||
|
|
||||||
|
template <typename Index, typename Function>
|
||||||
|
inline void parallel_for(const Index count, const Index step, const Function& function)
|
||||||
|
{
|
||||||
|
const dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||||
|
|
||||||
|
dispatch_apply(count / step + 1, queue, ^(size_t sliceY)
|
||||||
|
{
|
||||||
|
function(sliceY * step);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template <typename Index, typename Function>
|
||||||
|
inline void parallel_for(const Index count, const Index step, const Function& function)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (Index i = 0; i < count; i += step)
|
||||||
|
{
|
||||||
|
function(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HAVE_PARALLEL_FOR
|
||||||
|
|
||||||
CUSTOM_CVAR(Int, gl_texture_hqresize, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
|
CUSTOM_CVAR(Int, gl_texture_hqresize, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
|
||||||
{
|
{
|
||||||
|
@ -87,7 +115,6 @@ CVAR (Flag, gl_texture_hqresize_textures, gl_texture_hqresize_targets, 1);
|
||||||
CVAR (Flag, gl_texture_hqresize_sprites, gl_texture_hqresize_targets, 2);
|
CVAR (Flag, gl_texture_hqresize_sprites, gl_texture_hqresize_targets, 2);
|
||||||
CVAR (Flag, gl_texture_hqresize_fonts, gl_texture_hqresize_targets, 4);
|
CVAR (Flag, gl_texture_hqresize_fonts, gl_texture_hqresize_targets, 4);
|
||||||
|
|
||||||
#ifdef GZ_USE_LIBDISPATCH
|
|
||||||
CVAR(Bool, gl_texture_hqresize_multithread, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
CVAR(Bool, gl_texture_hqresize_multithread, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG);
|
||||||
|
|
||||||
CUSTOM_CVAR(Int, gl_texture_hqresize_mt_width, 16, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
|
CUSTOM_CVAR(Int, gl_texture_hqresize_mt_width, 16, CVAR_ARCHIVE | CVAR_GLOBALCONFIG)
|
||||||
|
@ -101,7 +128,6 @@ CUSTOM_CVAR(Int, gl_texture_hqresize_mt_height, 4, CVAR_ARCHIVE | CVAR_GLOBALCON
|
||||||
if (self < 2) self = 2;
|
if (self < 2) self = 2;
|
||||||
if (self > 1024) self = 1024;
|
if (self > 1024) self = 1024;
|
||||||
}
|
}
|
||||||
#endif // GZ_USE_LIBDISPATCH
|
|
||||||
|
|
||||||
|
|
||||||
static void scale2x ( uint32_t* inputBuffer, uint32_t* outputBuffer, int inWidth, int inHeight )
|
static void scale2x ( uint32_t* inputBuffer, uint32_t* outputBuffer, int inWidth, int inHeight )
|
||||||
|
@ -289,7 +315,6 @@ static unsigned char *xbrzHelper( void (*xbrzFunction) ( size_t, const uint32_t*
|
||||||
|
|
||||||
unsigned char * newBuffer = new unsigned char[outWidth*outHeight*4];
|
unsigned char * newBuffer = new unsigned char[outWidth*outHeight*4];
|
||||||
|
|
||||||
#ifdef GZ_USE_LIBDISPATCH
|
|
||||||
const int thresholdWidth = gl_texture_hqresize_mt_width;
|
const int thresholdWidth = gl_texture_hqresize_mt_width;
|
||||||
const int thresholdHeight = gl_texture_hqresize_mt_height;
|
const int thresholdHeight = gl_texture_hqresize_mt_height;
|
||||||
|
|
||||||
|
@ -297,16 +322,13 @@ static unsigned char *xbrzHelper( void (*xbrzFunction) ( size_t, const uint32_t*
|
||||||
&& inWidth > thresholdWidth
|
&& inWidth > thresholdWidth
|
||||||
&& inHeight > thresholdHeight)
|
&& inHeight > thresholdHeight)
|
||||||
{
|
{
|
||||||
const dispatch_queue_t queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
parallel_for(inHeight, thresholdHeight, [=](int sliceY)
|
||||||
|
|
||||||
dispatch_apply(inHeight / thresholdHeight + 1, queue, ^(size_t sliceY)
|
|
||||||
{
|
{
|
||||||
xbrzFunction(N, reinterpret_cast<uint32_t*>(inputBuffer), reinterpret_cast<uint32_t*>(newBuffer),
|
xbrzFunction(N, reinterpret_cast<uint32_t*>(inputBuffer), reinterpret_cast<uint32_t*>(newBuffer),
|
||||||
inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), sliceY * thresholdHeight, (sliceY + 1) * thresholdHeight);
|
inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), sliceY, sliceY + thresholdHeight);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif // GZ_USE_LIBDISPATCH
|
|
||||||
{
|
{
|
||||||
xbrzFunction(N, reinterpret_cast<uint32_t*>(inputBuffer), reinterpret_cast<uint32_t*>(newBuffer),
|
xbrzFunction(N, reinterpret_cast<uint32_t*>(inputBuffer), reinterpret_cast<uint32_t*>(newBuffer),
|
||||||
inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), 0, std::numeric_limits<int>::max());
|
inWidth, inHeight, xbrz::ARGB, xbrz::ScalerCfg(), 0, std::numeric_limits<int>::max());
|
||||||
|
|
Loading…
Reference in a new issue