From cdbd50800730b62be30d594a31ab91c662a3eb10 Mon Sep 17 00:00:00 2001 From: derselbst Date: Thu, 12 Apr 2018 18:26:12 +0200 Subject: [PATCH 01/15] add fluid_align_ptr() for aligning pointers --- src/utils/fluid_sys.h | 16 ++++++++++++++++ test/CMakeLists.txt | 1 + test/test_pointer_alignment.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) create mode 100644 test/test_pointer_alignment.c diff --git a/src/utils/fluid_sys.h b/src/utils/fluid_sys.h index 210225c1..821e24ce 100644 --- a/src/utils/fluid_sys.h +++ b/src/utils/fluid_sys.h @@ -602,4 +602,20 @@ void fluid_clear_fpe_i386(void); /* System control */ void fluid_msleep(unsigned int msecs); +/** + * Advances the given \c ptr to the next \c alignment byte boundary. + * Make sure you've allocated an extra of \c alignment bytes to avoid a buffer overflow. + * + * @return Returned pointer is guarenteed to be aligned to \c alignment boundary and in range \f[ ptr <= returned_ptr < ptr + alignment \f]. + */ +static FLUID_INLINE void* fluid_align_ptr(const void* ptr, unsigned int alignment) +{ + uintptr_t ptr_int = (uintptr_t)ptr; + unsigned int offset = ptr_int % alignment; + unsigned int add = offset == 0 ? 0 // is already aligned, dont advance, else buffer overrun + : alignment - offset; // advance the pointer to the next alignment boundary + ptr_int += add; + return (void*)ptr_int; +} + #endif /* _FLUID_SYS_H */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fa8e9c00..c48069da 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,6 +13,7 @@ ADD_FLUID_TEST(test_sample_cache) ADD_FLUID_TEST(test_sfont_loading) ADD_FLUID_TEST(test_sample_rate_change) ADD_FLUID_TEST(test_preset_sample_loading) +ADD_FLUID_TEST(test_pointer_alignment) if ( LIBSNDFILE_HASVORBIS ) ADD_FLUID_TEST(test_sf3_sfont_loading) diff --git a/test/test_pointer_alignment.c b/test/test_pointer_alignment.c new file mode 100644 index 00000000..a7447912 --- /dev/null +++ b/test/test_pointer_alignment.c @@ -0,0 +1,31 @@ + +#include "test.h" +#include "utils/fluid_sys.h" + + +// test for fluid_align_ptr() +int main(void) +{ + unsigned int align; + uintptr_t ptr, aligned_ptr; + + for(align = 32; align <= 4*1024u; align <<= 1) + { + for(ptr = 0; ptr <= (align<<10); ptr++) + { + char* tmp = fluid_align_ptr((char*)ptr, align); + aligned_ptr = (uintptr_t)tmp; + + // pointer must be aligned properly + TEST_ASSERT(aligned_ptr % align == 0); + + // aligned pointer must not be smaller than ptr + TEST_ASSERT(aligned_ptr >= ptr); + + // aligned pointer must not be bigger than alignment + TEST_ASSERT(aligned_ptr < ptr + align); + } + } + + return EXIT_SUCCESS; +} From e229f62020bc70c004b59b28756e5bbbabf28c0c Mon Sep 17 00:00:00 2001 From: derselbst Date: Wed, 25 Apr 2018 14:38:28 +0200 Subject: [PATCH 02/15] cmake: detect openMP --- CMakeLists.txt | 14 ++++++++++++++ cmake_admin/report.cmake | 6 ++++++ src/config.cmake | 3 +++ src/utils/fluidsynth_priv.h | 4 ++++ 4 files changed, 27 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a0825f9..c64ff4a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -500,6 +500,20 @@ else ( enable-readline ) unset ( READLINE_LIBS CACHE ) endif ( enable-readline ) +unset ( HAVE_OPENMP CACHE ) +find_package ( OpenMP QUIET ) +if ( OpenMP_FOUND OR OpenMP_C_FOUND ) + message(STATUS "Found OpenMP ${OpenMP_C_SPEC_DATE}") + # require at least OMP 4.0 + if ( ( NOT OpenMP_C_SPEC_DATE LESS "201307" ) OR NOT ( OpenMP_C_VERSION VERSION_LESS "4.0" ) ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) + set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) + set ( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}" ) + set ( LIBFLUID_LIBS "${OpenMP_C_LIBRARIES};${LIBFLUID_LIBS}" ) + set ( HAVE_OPENMP 1 ) + endif() +endif() + if(enable-tests) # manipulate some variables to setup a proper test env diff --git a/cmake_admin/report.cmake b/cmake_admin/report.cmake index bf2fbad0..0d3018cb 100644 --- a/cmake_admin/report.cmake +++ b/cmake_admin/report.cmake @@ -127,6 +127,12 @@ else ( WITH_PROFILING ) message ( "Profiling: no" ) endif ( WITH_PROFILING ) +if ( HAVE_OPENMP ) + message ( "OpenMP 4.0: yes" ) +else ( HAVE_OPENMP ) + message ( "OpenMP 4.0: no" ) +endif ( HAVE_OPENMP ) + if ( ENABLE_DEBUG ) message ( "Debug: yes" ) else ( ENABLE_DEBUG ) diff --git a/src/config.cmake b/src/config.cmake index 49403121..aa222676 100644 --- a/src/config.cmake +++ b/src/config.cmake @@ -76,6 +76,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_NETINET_TCP_H @HAVE_NETINET_TCP_H@ +/* Define if compiling with openMP to enable parallel audio rendering */ +#cmakedefine HAVE_OPENMP @HAVE_OPENMP@ + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_PTHREAD_H @HAVE_PTHREAD_H@ diff --git a/src/utils/fluidsynth_priv.h b/src/utils/fluidsynth_priv.h index d8c73c10..af5da6c5 100644 --- a/src/utils/fluidsynth_priv.h +++ b/src/utils/fluidsynth_priv.h @@ -102,6 +102,10 @@ #include #endif +#if HAVE_OPENMP +#include +#endif + #if HAVE_IO_H #include #endif From 7ebdabae0cd830e59eef7680ab335e1b9e7f185b Mon Sep 17 00:00:00 2001 From: derselbst Date: Wed, 25 Apr 2018 14:15:54 +0200 Subject: [PATCH 03/15] allocate mono voice mix buffer on the heap --- src/rvoice/fluid_rvoice_mixer.c | 19 ++++++++++++++----- src/utils/fluid_sys.h | 2 ++ src/utils/fluidsynth_priv.h | 3 ++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index 444f1e8f..e8a837f7 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -47,6 +47,8 @@ struct _fluid_mixer_buffers_t { fluid_atomic_int_t ready; /**< Atomic: buffers are ready for mixing */ + fluid_real_t* local_buf; + int buf_count; fluid_real_t** left_buf; fluid_real_t** right_buf; @@ -257,7 +259,7 @@ get_dest_buf(fluid_rvoice_buffers_t* buffers, int index, */ static void fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, - fluid_real_t* dsp_buf, int start, int samplecount, + fluid_real_t *FLUID_RESTRICT dsp_buf, int start, int samplecount, fluid_real_t** dest_bufs, int dest_bufcount) { int bufcount = buffers->count; @@ -284,8 +286,12 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, i++; } else { + #pragma omp simd aligned(dsp_buf:FLUID_DEFAULT_ALIGNMENT) + #pragma vector aligned(dsp_buf) for (dsp_i = start; dsp_i < samplecount; dsp_i++) + { buf[dsp_i] += amp * dsp_buf[dsp_i]; + } } } } @@ -303,7 +309,7 @@ fluid_mixer_buffers_render_one(fluid_mixer_buffers_t* buffers, int blockcount = buffers->mixer->current_blockcount; int i, result = 0, start = 0; - FLUID_DECLARE_VLA(fluid_real_t, local_buf, FLUID_BUFSIZE*blockcount); + fluid_real_t* local_buf = fluid_align_ptr(buffers->local_buf, FLUID_DEFAULT_ALIGNMENT); for (i=0; i < blockcount; i++) { int s = fluid_rvoice_write(rvoice, &local_buf[FLUID_BUFSIZE*i]); @@ -439,7 +445,6 @@ fluid_mixer_buffers_zero(fluid_mixer_buffers_t* buffers) } - static int fluid_mixer_buffers_init(fluid_mixer_buffers_t* buffers, fluid_rvoice_mixer_t* mixer) { @@ -449,14 +454,16 @@ fluid_mixer_buffers_init(fluid_mixer_buffers_t* buffers, fluid_rvoice_mixer_t* m buffers->buf_count = buffers->mixer->buffers.buf_count; buffers->fx_buf_count = buffers->mixer->buffers.fx_buf_count; samplecount = FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT; - + + /* Local mono voice buf */ + buffers->local_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, samplecount, FLUID_DEFAULT_ALIGNMENT); /* Left and right audio buffers */ buffers->left_buf = FLUID_ARRAY(fluid_real_t*, buffers->buf_count); buffers->right_buf = FLUID_ARRAY(fluid_real_t*, buffers->buf_count); - if ((buffers->left_buf == NULL) || (buffers->right_buf == NULL)) { + if ((buffers->local_buf == NULL) || (buffers->left_buf == NULL) || (buffers->right_buf == NULL)) { FLUID_LOG(FLUID_ERR, "Out of memory"); return 0; } @@ -586,6 +593,8 @@ fluid_mixer_buffers_free(fluid_mixer_buffers_t* buffers) FLUID_FREE(buffers->finished_voices); /* free all the sample buffers */ + FLUID_FREE(buffers->local_buf); + if (buffers->left_buf != NULL) { for (i = 0; i < buffers->buf_count; i++) { if (buffers->left_buf[i] != NULL) { diff --git a/src/utils/fluid_sys.h b/src/utils/fluid_sys.h index 821e24ce..f2d2cae3 100644 --- a/src/utils/fluid_sys.h +++ b/src/utils/fluid_sys.h @@ -618,4 +618,6 @@ static FLUID_INLINE void* fluid_align_ptr(const void* ptr, unsigned int alignmen return (void*)ptr_int; } +#define FLUID_DEFAULT_ALIGNMENT (64U) + #endif /* _FLUID_SYS_H */ diff --git a/src/utils/fluidsynth_priv.h b/src/utils/fluidsynth_priv.h index af5da6c5..1fbf0896 100644 --- a/src/utils/fluidsynth_priv.h +++ b/src/utils/fluidsynth_priv.h @@ -249,7 +249,8 @@ typedef FILE* fluid_file; #define FLUID_MALLOC(_n) malloc(_n) #define FLUID_REALLOC(_p,_n) realloc(_p,_n) #define FLUID_NEW(_t) (_t*)malloc(sizeof(_t)) -#define FLUID_ARRAY(_t,_n) (_t*)malloc((_n)*sizeof(_t)) +#define FLUID_ARRAY_ALIGNED(_t,_n,_a) (_t*)malloc((_n)*sizeof(_t) + ((unsigned int)_a - 1u)) +#define FLUID_ARRAY(_t,_n) FLUID_ARRAY_ALIGNED(_t,_n,1u) #define FLUID_FREE(_p) free(_p) #define FLUID_FOPEN(_f,_m) fopen(_f,_m) #define FLUID_FCLOSE(_f) fclose(_f) From ba9da3b790928b8956ed0805adf54a73fa453bd1 Mon Sep 17 00:00:00 2001 From: derselbst Date: Thu, 26 Apr 2018 16:38:12 +0200 Subject: [PATCH 04/15] enable FLUID_ASSERT macro --- src/utils/fluid_settings.c | 8 ++++---- src/utils/fluidsynth_priv.h | 7 +++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/utils/fluid_settings.c b/src/utils/fluid_settings.c index 5d315533..0d0f17d2 100644 --- a/src/utils/fluid_settings.c +++ b/src/utils/fluid_settings.c @@ -116,7 +116,7 @@ delete_fluid_str_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT(node->type, FLUID_STR_TYPE); + FLUID_ASSERT(node->type == FLUID_STR_TYPE); FLUID_FREE(node->str.value); FLUID_FREE(node->str.def); @@ -169,7 +169,7 @@ delete_fluid_num_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT (node->type, FLUID_NUM_TYPE); + FLUID_ASSERT (node->type == FLUID_NUM_TYPE); FLUID_FREE(node); } @@ -205,7 +205,7 @@ delete_fluid_int_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT (node->type, FLUID_INT_TYPE); + FLUID_ASSERT (node->type == FLUID_INT_TYPE); FLUID_FREE(node); } @@ -243,7 +243,7 @@ delete_fluid_set_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT (node->type, FLUID_SET_TYPE); + FLUID_ASSERT (node->type == FLUID_SET_TYPE); delete_fluid_hashtable(node->set.hashtable); FLUID_FREE(node); } diff --git a/src/utils/fluidsynth_priv.h b/src/utils/fluidsynth_priv.h index 1fbf0896..7addb1f8 100644 --- a/src/utils/fluidsynth_priv.h +++ b/src/utils/fluidsynth_priv.h @@ -335,8 +335,11 @@ do { strncpy(_dst,_src,_n); \ #define M_LN10 2.3025850929940456840179914546844 #endif -#define FLUID_ASSERT(a,b) -#define FLUID_ASSERT_P(a,b) +#ifdef NDEBUG +#define FLUID_ASSERT(a) +#else +#define FLUID_ASSERT(a) g_assert(a) +#endif #define FLUID_LIKELY G_LIKELY #define FLUID_UNLIKELY G_UNLIKELY From f332f32a7c3b26b70231fabe09b16c729b96ef74 Mon Sep 17 00:00:00 2001 From: derselbst Date: Thu, 26 Apr 2018 16:47:06 +0200 Subject: [PATCH 05/15] remove optimization for centered stereo samples in favour of vectorized mixing loop. Was incredibly unlikely to happen anyway. --- src/rvoice/fluid_rvoice_mixer.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index e8a837f7..fdd23664 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -269,30 +269,16 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, for (i=0; i < bufcount; i++) { fluid_real_t* buf = get_dest_buf(buffers, i, dest_bufs, dest_bufcount); - fluid_real_t* next_buf; fluid_real_t amp = buffers->bufs[i].amp; if (buf == NULL || amp == 0.0f) continue; - /* Optimization for centered stereo samples - we can save one - multiplication per sample */ - next_buf = (i+1 >= bufcount ? NULL : get_dest_buf(buffers, i+1, dest_bufs, dest_bufcount)); - if (next_buf && buffers->bufs[i+1].amp == amp) { - for (dsp_i = start; dsp_i < samplecount; dsp_i++) { - fluid_real_t samp = amp * dsp_buf[dsp_i]; - buf[dsp_i] += samp; - next_buf[dsp_i] += samp; - } - i++; - } - else { #pragma omp simd aligned(dsp_buf:FLUID_DEFAULT_ALIGNMENT) #pragma vector aligned(dsp_buf) for (dsp_i = start; dsp_i < samplecount; dsp_i++) { buf[dsp_i] += amp * dsp_buf[dsp_i]; } - } } } From 89015494cbab74c516fe966cfff581112a49796b Mon Sep 17 00:00:00 2001 From: derselbst Date: Thu, 26 Apr 2018 17:31:05 +0200 Subject: [PATCH 06/15] vectorize mixing loop of fluid_rvoice_buffers_mix() --- src/rvoice/fluid_rvoice_mixer.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index fdd23664..decdd743 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -253,29 +253,31 @@ get_dest_buf(fluid_rvoice_buffers_t* buffers, int index, * * @param buffers Destination buffer(s) * @param dsp_buf Mono sample source - * @param samplecount Number of samples to process (no FLUID_BUFSIZE restriction) + * @param start_block Block to start mixing at + * @param sample_count number of samples to mix following \c start_block * @param dest_bufs Array of buffers to mixdown to * @param dest_bufcount Length of dest_bufs */ static void fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, - fluid_real_t *FLUID_RESTRICT dsp_buf, int start, int samplecount, + fluid_real_t *FLUID_RESTRICT dsp_buf, + int start_block, int sample_count, fluid_real_t** dest_bufs, int dest_bufcount) { int bufcount = buffers->count; int i, dsp_i; - if (!samplecount || !bufcount || !dest_bufcount) + if (sample_count <= 0 || dest_bufcount <= 0) return; for (i=0; i < bufcount; i++) { - fluid_real_t* buf = get_dest_buf(buffers, i, dest_bufs, dest_bufcount); + fluid_real_t *FLUID_RESTRICT buf = get_dest_buf(buffers, i, dest_bufs, dest_bufcount); fluid_real_t amp = buffers->bufs[i].amp; + if (buf == NULL || amp == 0.0f) continue; #pragma omp simd aligned(dsp_buf:FLUID_DEFAULT_ALIGNMENT) - #pragma vector aligned(dsp_buf) - for (dsp_i = start; dsp_i < samplecount; dsp_i++) + for (dsp_i = (start_block * FLUID_BUFSIZE); dsp_i < sample_count; dsp_i++) { buf[dsp_i] += amp * dsp_buf[dsp_i]; } @@ -293,24 +295,25 @@ fluid_mixer_buffers_render_one(fluid_mixer_buffers_t* buffers, unsigned int dest_bufcount) { int blockcount = buffers->mixer->current_blockcount; - int i, result = 0, start = 0; + int i, total_samples = 0, start_block = 0; fluid_real_t* local_buf = fluid_align_ptr(buffers->local_buf, FLUID_DEFAULT_ALIGNMENT); for (i=0; i < blockcount; i++) { int s = fluid_rvoice_write(rvoice, &local_buf[FLUID_BUFSIZE*i]); if (s == -1) { - start += FLUID_BUFSIZE; + start_block += s; s = FLUID_BUFSIZE; } - result += s; + total_samples += s; + if (s < FLUID_BUFSIZE) { - break; + break; } } - fluid_rvoice_buffers_mix(&rvoice->buffers, local_buf, start, result-start, dest_bufs, dest_bufcount); + fluid_rvoice_buffers_mix(&rvoice->buffers, local_buf, -start_block, total_samples-((-start_block)*FLUID_BUFSIZE), dest_bufs, dest_bufcount); - if (result < buffers->mixer->current_blockcount * FLUID_BUFSIZE) { + if (total_samples < buffers->mixer->current_blockcount * FLUID_BUFSIZE) { fluid_finish_rvoice(buffers, rvoice); } } From ed312b7accd3d028e381116bcd0982c7738a823c Mon Sep 17 00:00:00 2001 From: derselbst Date: Thu, 26 Apr 2018 21:33:38 +0200 Subject: [PATCH 07/15] refactor fluid_rvoice_mixer_process_fx() --- src/rvoice/fluid_rvoice_mixer.c | 63 +++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index decdd743..70dcf325 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -103,40 +103,49 @@ fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) { int i; fluid_profile_ref_var(prof_ref); - if (mixer->fx.with_reverb) { - if (mixer->fx.mix_fx_to_out) { - for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_revmodel_processmix(mixer->fx.reverb, - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL][i], - &mixer->buffers.left_buf[0][i], - &mixer->buffers.right_buf[0][i]); - } - else { - for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_revmodel_processreplace(mixer->fx.reverb, - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL][i], - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL][i], - &mixer->buffers.fx_right_buf[SYNTH_REVERB_CHANNEL][i]); + + fluid_real_t* in_rev = mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL]; + fluid_real_t* in_ch = mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL]; + + fluid_real_t *out_rev_l, *out_rev_r, *out_ch_l, *out_ch_r; + + void (*reverb_process_func)(fluid_revmodel_t* rev, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); + void (*chorus_process_func)(fluid_chorus_t* chorus, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); + + if (mixer->fx.mix_fx_to_out) + { + out_ch_l = out_rev_l = mixer->buffers.left_buf[0]; + out_ch_r = out_rev_r = mixer->buffers.right_buf[0]; + + reverb_process_func = fluid_revmodel_processmix; + chorus_process_func = fluid_chorus_processmix; + } + else + { + out_rev_l = mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL]; + out_rev_r = mixer->buffers.fx_right_buf[SYNTH_REVERB_CHANNEL]; + + out_ch_l = mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL]; + out_ch_r = mixer->buffers.fx_right_buf[SYNTH_CHORUS_CHANNEL]; + + reverb_process_func = fluid_revmodel_processreplace; + chorus_process_func = fluid_chorus_processreplace; + } + + + if (mixer->fx.with_reverb) { + for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) + reverb_process_func(mixer->fx.reverb, &in_rev[i], &out_rev_l[i], &out_rev_r[i]); + fluid_profile(FLUID_PROF_ONE_BLOCK_REVERB, prof_ref,0, mixer->current_blockcount * FLUID_BUFSIZE); } if (mixer->fx.with_chorus) { - if (mixer->fx.mix_fx_to_out) { for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_chorus_processmix(mixer->fx.chorus, - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL][i], - &mixer->buffers.left_buf[0][i], - &mixer->buffers.right_buf[0][i]); - } - else { - for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_chorus_processreplace(mixer->fx.chorus, - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL][i], - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL][i], - &mixer->buffers.fx_right_buf[SYNTH_CHORUS_CHANNEL][i]); - } + chorus_process_func(mixer->fx.chorus, &in_ch[i], &out_ch_l[i], &out_ch_r[i]); + fluid_profile(FLUID_PROF_ONE_BLOCK_CHORUS, prof_ref,0, mixer->current_blockcount * FLUID_BUFSIZE); } From 58008aae18bd36d8d824b103fc4ec0ca590a6713 Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 27 Apr 2018 17:19:56 +0200 Subject: [PATCH 08/15] convert rvoice_mixer mixdown buffer to 1D arrays --- src/bindings/fluid_ladspa.c | 7 +- src/bindings/fluid_ladspa.h | 2 +- src/rvoice/fluid_rvoice_mixer.c | 286 +++++++++++++++++--------------- src/rvoice/fluid_rvoice_mixer.h | 4 +- src/synth/fluid_synth.c | 52 +++--- 5 files changed, 189 insertions(+), 162 deletions(-) diff --git a/src/bindings/fluid_ladspa.c b/src/bindings/fluid_ladspa.c index c7faa2c7..bb79ba56 100644 --- a/src/bindings/fluid_ladspa.c +++ b/src/bindings/fluid_ladspa.c @@ -266,11 +266,12 @@ void delete_fluid_ladspa_fx(fluid_ladspa_fx_t *fx) * @param fx LADSPA fx instance * @param prefix common name prefix for the created nodes * @param num_buffers number of of buffers buffer array - * @param buffers array of pointers to buffers + * @param buffers array of sample buffers + * @param buf_stride number of samples contained in one buffer * @return FLUID_OK on success, otherwise FLUID_FAILED */ int fluid_ladspa_add_host_ports(fluid_ladspa_fx_t *fx, const char *prefix, - int num_buffers, fluid_real_t *buffers[]) + int num_buffers, fluid_real_t buffers[], int buf_stride) { int i; char name[99]; @@ -296,7 +297,7 @@ int fluid_ladspa_add_host_ports(fluid_ladspa_fx_t *fx, const char *prefix, if (new_fluid_ladspa_node(fx, name, FLUID_LADSPA_NODE_AUDIO | FLUID_LADSPA_NODE_HOST, - buffers[i]) == NULL) + &buffers[i * buf_stride]) == NULL) { LADSPA_API_RETURN(fx, FLUID_FAILED); } diff --git a/src/bindings/fluid_ladspa.h b/src/bindings/fluid_ladspa.h index e4669855..be607a89 100644 --- a/src/bindings/fluid_ladspa.h +++ b/src/bindings/fluid_ladspa.h @@ -31,6 +31,6 @@ int fluid_ladspa_set_sample_rate(fluid_ladspa_fx_t *fx, fluid_real_t sample_rate void fluid_ladspa_run(fluid_ladspa_fx_t *fx, int block_count, int block_size); int fluid_ladspa_add_host_ports(fluid_ladspa_fx_t *fx, const char *prefix, - int num_buffers, fluid_real_t *buffers[]); + int num_buffers, fluid_real_t buffers[], int buf_stride); #endif /* _FLUID_LADSPA_H */ diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index 70dcf325..88123c8d 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -50,12 +50,27 @@ struct _fluid_mixer_buffers_t { fluid_real_t* local_buf; int buf_count; - fluid_real_t** left_buf; - fluid_real_t** right_buf; - int fx_buf_count; - fluid_real_t** fx_left_buf; - fluid_real_t** fx_right_buf; + + /** buffer to store the left part of a stereo channel to. + * Specifically a two dimensional array, containing \c buf_count buffers + * (i.e. for each synth.audio-channels), of which each buffer contains + * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT audio items (=samples) + * @note the beginning of the array is aligned to the FLUID_DEFAULT_ALIGNMENT + * boundary, so make sure to access all those pointers using fluid_align_ptr() + */ + fluid_real_t* left_buf; + + /** dito, but for right part of a stereo channel */ + fluid_real_t* right_buf; + + /** buffer to store the left part of a stereo effects channel to. + * Specifically a two dimensional array, containing \c fx_buf_count buffers + * (i.e. for each synth.effects-channels), of which each buffer contains + * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT audio items (=samples) + */ + fluid_real_t* fx_left_buf; + fluid_real_t* fx_right_buf; }; typedef struct _fluid_mixer_fx_t fluid_mixer_fx_t; @@ -104,18 +119,24 @@ fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) int i; fluid_profile_ref_var(prof_ref); - fluid_real_t* in_rev = mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL]; - fluid_real_t* in_ch = mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL]; + void (*reverb_process_func)(fluid_revmodel_t* rev, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); + void (*chorus_process_func)(fluid_chorus_t* chorus, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); fluid_real_t *out_rev_l, *out_rev_r, *out_ch_l, *out_ch_r; - void (*reverb_process_func)(fluid_revmodel_t* rev, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); - void (*chorus_process_func)(fluid_chorus_t* chorus, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); + fluid_real_t* in_rev = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t* in_ch = in_rev; + + in_rev = &in_rev[SYNTH_REVERB_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + in_ch = &in_ch [SYNTH_CHORUS_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; if (mixer->fx.mix_fx_to_out) { - out_ch_l = out_rev_l = mixer->buffers.left_buf[0]; - out_ch_r = out_rev_r = mixer->buffers.right_buf[0]; + out_rev_l = fluid_align_ptr(mixer->buffers.left_buf, FLUID_DEFAULT_ALIGNMENT); + out_rev_r = fluid_align_ptr(mixer->buffers.right_buf, FLUID_DEFAULT_ALIGNMENT); + + out_ch_l = &out_rev_l[0 * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + out_ch_r = &out_rev_r[0 * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; reverb_process_func = fluid_revmodel_processmix; chorus_process_func = fluid_chorus_processmix; @@ -123,11 +144,14 @@ fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) } else { - out_rev_l = mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL]; - out_rev_r = mixer->buffers.fx_right_buf[SYNTH_REVERB_CHANNEL]; + out_ch_l = out_rev_l = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + out_ch_r = out_rev_r = fluid_align_ptr(mixer->buffers.fx_right_buf, FLUID_DEFAULT_ALIGNMENT); - out_ch_l = mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL]; - out_ch_r = mixer->buffers.fx_right_buf[SYNTH_CHORUS_CHANNEL]; + out_rev_l = &out_rev_l[SYNTH_REVERB_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + out_rev_r = &out_rev_r[SYNTH_REVERB_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + + out_ch_l = &out_ch_l[SYNTH_CHORUS_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + out_ch_r = &out_ch_r[SYNTH_CHORUS_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; reverb_process_func = fluid_revmodel_processreplace; chorus_process_func = fluid_chorus_processreplace; @@ -167,7 +191,7 @@ fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) static FLUID_INLINE int fluid_mixer_buffers_prepare(fluid_mixer_buffers_t* buffers, fluid_real_t** outbufs) { - fluid_real_t *reverb_buf, *chorus_buf; + fluid_real_t *base_ptr; int i; int with_reverb = buffers->mixer->fx.with_reverb; int with_chorus = buffers->mixer->fx.with_chorus; @@ -180,10 +204,11 @@ fluid_mixer_buffers_prepare(fluid_mixer_buffers_t* buffers, fluid_real_t** outbu with_reverb = (with_reverb | with_ladspa); with_chorus = (with_chorus | with_ladspa); #endif - reverb_buf = (with_reverb) ? buffers->fx_left_buf[SYNTH_REVERB_CHANNEL] : NULL; - chorus_buf = (with_chorus) ? buffers->fx_left_buf[SYNTH_CHORUS_CHANNEL] : NULL; - outbufs[buffers->buf_count*2 + SYNTH_REVERB_CHANNEL] = reverb_buf; - outbufs[buffers->buf_count*2 + SYNTH_CHORUS_CHANNEL] = chorus_buf; + + base_ptr = fluid_align_ptr(buffers->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + + outbufs[buffers->buf_count*2 + SYNTH_REVERB_CHANNEL] = (with_reverb) ? &base_ptr[SYNTH_REVERB_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] : NULL; + outbufs[buffers->buf_count*2 + SYNTH_CHORUS_CHANNEL] = (with_chorus) ? &base_ptr[SYNTH_CHORUS_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] : NULL; /* The output associated with a MIDI channel is wrapped around * using the number of audio groups as modulo divider. This is @@ -197,10 +222,14 @@ fluid_mixer_buffers_prepare(fluid_mixer_buffers_t* buffers, fluid_real_t** outbu * channels 1, 4, 7, 10 etc go to output 1; 2, 5, 8, 11 etc to * output 2, 3, 6, 9, 12 etc to output 3. */ - + base_ptr = fluid_align_ptr(buffers->left_buf, FLUID_DEFAULT_ALIGNMENT); for (i = 0; i < buffers->buf_count; i++) { - outbufs[i*2] = buffers->left_buf[i]; - outbufs[i*2+1] = buffers->right_buf[i]; + outbufs[i*2] = &base_ptr[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; + } + + base_ptr = fluid_align_ptr(buffers->right_buf, FLUID_DEFAULT_ALIGNMENT); + for (i = 0; i < buffers->buf_count; i++) { + outbufs[i*2+1] = &base_ptr[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; } return buffers->buf_count*2 + 2; } @@ -278,6 +307,9 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, if (sample_count <= 0 || dest_bufcount <= 0) return; + FLUID_ASSERT((uintptr_t)dsp_buf % FLUID_DEFAULT_ALIGNMENT == 0); + FLUID_ASSERT((uintptr_t)(&dsp_buf[start_block * FLUID_BUFSIZE]) % FLUID_DEFAULT_ALIGNMENT == 0); + for (i=0; i < bufcount; i++) { fluid_real_t *FLUID_RESTRICT buf = get_dest_buf(buffers, i, dest_bufs, dest_bufcount); fluid_real_t amp = buffers->bufs[i].amp; @@ -285,7 +317,9 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, if (buf == NULL || amp == 0.0f) continue; - #pragma omp simd aligned(dsp_buf:FLUID_DEFAULT_ALIGNMENT) + FLUID_ASSERT((uintptr_t)buf % FLUID_DEFAULT_ALIGNMENT == 0); + + #pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT) for (dsp_i = (start_block * FLUID_BUFSIZE); dsp_i < sample_count; dsp_i++) { buf[dsp_i] += amp * dsp_buf[dsp_i]; @@ -425,24 +459,25 @@ fluid_render_loop_singlethread(fluid_rvoice_mixer_t* mixer) } } - static FLUID_INLINE void fluid_mixer_buffers_zero(fluid_mixer_buffers_t* buffers) { - int i; - int size = buffers->mixer->current_blockcount * FLUID_BUFSIZE * sizeof(fluid_real_t); - /* TODO: Optimize by only zero out the buffers we actually use later on. */ - for (i=0; i < buffers->buf_count; i++) { - FLUID_MEMSET(buffers->left_buf[i], 0, size); - FLUID_MEMSET(buffers->right_buf[i], 0, size); - } - for (i=0; i < buffers->fx_buf_count; i++) { - FLUID_MEMSET(buffers->fx_left_buf[i], 0, size); - FLUID_MEMSET(buffers->fx_right_buf[i], 0, size); - } + int size = FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE * sizeof(fluid_real_t); + fluid_real_t* buf; + + buf = fluid_align_ptr(buffers->left_buf, FLUID_DEFAULT_ALIGNMENT); + FLUID_MEMSET(buf, 0, buffers->buf_count * size); + + buf = fluid_align_ptr(buffers->right_buf, FLUID_DEFAULT_ALIGNMENT); + FLUID_MEMSET(buf, 0, buffers->buf_count * size); + + buf = fluid_align_ptr(buffers->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + FLUID_MEMSET(buf, 0, buffers->fx_buf_count * size); + + buf = fluid_align_ptr(buffers->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + FLUID_MEMSET(buf, 0, buffers->fx_buf_count * size); } - static int fluid_mixer_buffers_init(fluid_mixer_buffers_t* buffers, fluid_rvoice_mixer_t* mixer) { @@ -458,51 +493,24 @@ fluid_mixer_buffers_init(fluid_mixer_buffers_t* buffers, fluid_rvoice_mixer_t* m /* Left and right audio buffers */ - buffers->left_buf = FLUID_ARRAY(fluid_real_t*, buffers->buf_count); - buffers->right_buf = FLUID_ARRAY(fluid_real_t*, buffers->buf_count); + buffers->left_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); + buffers->right_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); if ((buffers->local_buf == NULL) || (buffers->left_buf == NULL) || (buffers->right_buf == NULL)) { FLUID_LOG(FLUID_ERR, "Out of memory"); return 0; } - FLUID_MEMSET(buffers->left_buf, 0, buffers->buf_count * sizeof(fluid_real_t*)); - FLUID_MEMSET(buffers->right_buf, 0, buffers->buf_count * sizeof(fluid_real_t*)); - - for (i = 0; i < buffers->buf_count; i++) { - - buffers->left_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - buffers->right_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - - if ((buffers->left_buf[i] == NULL) || (buffers->right_buf[i] == NULL)) { - FLUID_LOG(FLUID_ERR, "Out of memory"); - return 0; - } - } - /* Effects audio buffers */ - buffers->fx_left_buf = FLUID_ARRAY(fluid_real_t*, buffers->fx_buf_count); - buffers->fx_right_buf = FLUID_ARRAY(fluid_real_t*, buffers->fx_buf_count); + buffers->fx_left_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->fx_buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); + buffers->fx_right_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->fx_buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); if ((buffers->fx_left_buf == NULL) || (buffers->fx_right_buf == NULL)) { FLUID_LOG(FLUID_ERR, "Out of memory"); return 0; } - FLUID_MEMSET(buffers->fx_left_buf, 0, buffers->fx_buf_count * sizeof(fluid_real_t*)); - FLUID_MEMSET(buffers->fx_right_buf, 0, buffers->fx_buf_count * sizeof(fluid_real_t*)); - - for (i = 0; i < buffers->fx_buf_count; i++) { - buffers->fx_left_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - buffers->fx_right_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - - if ((buffers->fx_left_buf[i] == NULL) || (buffers->fx_right_buf[i] == NULL)) { - FLUID_LOG(FLUID_ERR, "Out of memory"); - return 0; - } - } - buffers->finished_voices = NULL; if (fluid_mixer_buffers_update_polyphony(buffers, mixer->polyphony) == FLUID_FAILED) { @@ -592,42 +600,10 @@ fluid_mixer_buffers_free(fluid_mixer_buffers_t* buffers) /* free all the sample buffers */ FLUID_FREE(buffers->local_buf); - - if (buffers->left_buf != NULL) { - for (i = 0; i < buffers->buf_count; i++) { - if (buffers->left_buf[i] != NULL) { - FLUID_FREE(buffers->left_buf[i]); - } - } - FLUID_FREE(buffers->left_buf); - } - - if (buffers->right_buf != NULL) { - for (i = 0; i < buffers->buf_count; i++) { - if (buffers->right_buf[i] != NULL) { - FLUID_FREE(buffers->right_buf[i]); - } - } - FLUID_FREE(buffers->right_buf); - } - - if (buffers->fx_left_buf != NULL) { - for (i = 0; i < buffers->fx_buf_count; i++) { - if (buffers->fx_left_buf[i] != NULL) { - FLUID_FREE(buffers->fx_left_buf[i]); - } - } - FLUID_FREE(buffers->fx_left_buf); - } - - if (buffers->fx_right_buf != NULL) { - for (i = 0; i < buffers->fx_buf_count; i++) { - if (buffers->fx_right_buf[i] != NULL) { - FLUID_FREE(buffers->fx_right_buf[i]); - } - } - FLUID_FREE(buffers->fx_right_buf); - } + FLUID_FREE(buffers->left_buf); + FLUID_FREE(buffers->right_buf); + FLUID_FREE(buffers->fx_left_buf); + FLUID_FREE(buffers->fx_right_buf); } void delete_fluid_rvoice_mixer(fluid_rvoice_mixer_t* mixer) @@ -649,6 +625,7 @@ void delete_fluid_rvoice_mixer(fluid_rvoice_mixer_t* mixer) delete_fluid_revmodel(mixer->fx.reverb); if (mixer->fx.chorus) delete_fluid_chorus(mixer->fx.chorus); + FLUID_FREE(mixer->rvoices); FLUID_FREE(mixer); } @@ -666,18 +643,33 @@ void fluid_rvoice_mixer_set_ladspa(fluid_rvoice_mixer_t* mixer, { return; } + else + { + fluid_real_t* main_l = fluid_align_ptr(mixer->buffers.left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t* main_r = fluid_align_ptr(mixer->buffers.right_buf, FLUID_DEFAULT_ALIGNMENT); + + fluid_real_t* rev = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t* chor = rev; + + rev = &rev[SYNTH_REVERB_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; + chor = &chor[SYNTH_CHORUS_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; - fluid_ladspa_add_host_ports(ladspa_fx, "Main:L", audio_groups, - mixer->buffers.left_buf); + fluid_ladspa_add_host_ports(ladspa_fx, "Main:L", audio_groups, + main_l, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); - fluid_ladspa_add_host_ports(ladspa_fx, "Main:R", audio_groups, - mixer->buffers.right_buf); + fluid_ladspa_add_host_ports(ladspa_fx, "Main:R", audio_groups, + main_r, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); - fluid_ladspa_add_host_ports(ladspa_fx, "Reverb:Send", 1, - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL]); + fluid_ladspa_add_host_ports(ladspa_fx, "Reverb:Send", 1, + rev, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); - fluid_ladspa_add_host_ports(ladspa_fx, "Chorus:Send", 1, - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL]); + fluid_ladspa_add_host_ports(ladspa_fx, "Chorus:Send", 1, + chor, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); + } } #endif @@ -739,18 +731,18 @@ DECLARE_FLUID_RVOICE_FUNCTION(fluid_rvoice_mixer_reset_chorus) } int fluid_rvoice_mixer_get_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** left, fluid_real_t*** right) + fluid_real_t** left, fluid_real_t** right) { - *left = mixer->buffers.left_buf; - *right = mixer->buffers.right_buf; + *left = fluid_align_ptr(mixer->buffers.left_buf, FLUID_DEFAULT_ALIGNMENT); + *right = fluid_align_ptr(mixer->buffers.right_buf, FLUID_DEFAULT_ALIGNMENT); return mixer->buffers.buf_count; } int fluid_rvoice_mixer_get_fx_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** fx_left, fluid_real_t*** fx_right) + fluid_real_t** fx_left, fluid_real_t** fx_right) { - *fx_left = mixer->buffers.fx_left_buf; - *fx_right = mixer->buffers.fx_right_buf; + *fx_left = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + *fx_right = fluid_align_ptr(mixer->buffers.fx_right_buf, FLUID_DEFAULT_ALIGNMENT); return mixer->buffers.fx_buf_count; } @@ -828,29 +820,63 @@ fluid_mixer_thread_func (void* data) } static void -fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src) +fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dst, fluid_mixer_buffers_t* src) { int i,j; - int scount = dest->mixer->current_blockcount * FLUID_BUFSIZE; + int scount = dst->mixer->current_blockcount * FLUID_BUFSIZE; int minbuf; + fluid_real_t *FLUID_RESTRICT base_src; + fluid_real_t *FLUID_RESTRICT base_dst; - minbuf = dest->buf_count; + minbuf = dst->buf_count; if (minbuf > src->buf_count) minbuf = src->buf_count; - for (i=0; i < minbuf; i++) { - for (j=0; j < scount; j++) { - dest->left_buf[i][j] += src->left_buf[i][j]; - dest->right_buf[i][j] += src->right_buf[i][j]; + + base_src = fluid_align_ptr(src->left_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->left_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; + } + } + + base_src = fluid_align_ptr(src->right_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->right_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; } } - minbuf = dest->fx_buf_count; + minbuf = dst->fx_buf_count; if (minbuf > src->fx_buf_count) minbuf = src->fx_buf_count; - for (i=0; i < minbuf; i++) { - for (j=0; j < scount; j++) { - dest->fx_left_buf[i][j] += src->fx_left_buf[i][j]; - dest->fx_right_buf[i][j] += src->fx_right_buf[i][j]; + + base_src = fluid_align_ptr(src->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; + } + } + + base_src = fluid_align_ptr(src->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; } } } diff --git a/src/rvoice/fluid_rvoice_mixer.h b/src/rvoice/fluid_rvoice_mixer.h index 6bff3de7..f05697a2 100644 --- a/src/rvoice/fluid_rvoice_mixer.h +++ b/src/rvoice/fluid_rvoice_mixer.h @@ -33,9 +33,9 @@ typedef struct _fluid_rvoice_mixer_t fluid_rvoice_mixer_t; int fluid_rvoice_mixer_render(fluid_rvoice_mixer_t* mixer, int blockcount); int fluid_rvoice_mixer_get_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** left, fluid_real_t*** right); + fluid_real_t** left, fluid_real_t** right); int fluid_rvoice_mixer_get_fx_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** fx_left, fluid_real_t*** fx_right); + fluid_real_t** fx_left, fluid_real_t** fx_right); int fluid_rvoice_mixer_get_bufcount(fluid_rvoice_mixer_t* mixer); #if WITH_PROFILING int fluid_rvoice_mixer_get_active_voices(fluid_rvoice_mixer_t* mixer); diff --git a/src/synth/fluid_synth.c b/src/synth/fluid_synth.c index 3c10a760..7d8956d9 100644 --- a/src/synth/fluid_synth.c +++ b/src/synth/fluid_synth.c @@ -2882,8 +2882,8 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, float** left, float** right, float** fx_left, float** fx_right) { - fluid_real_t** left_in, **fx_left_in; - fluid_real_t** right_in, **fx_right_in; + fluid_real_t* left_in, *fx_left_in; + fluid_real_t* right_in, *fx_right_in; double time = fluid_utime(); int i, num, available, count; #ifdef WITH_FLOAT @@ -2906,13 +2906,13 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, for (i = 0; i < synth->audio_channels; i++) { #ifdef WITH_FLOAT - FLUID_MEMCPY(left[i], left_in[i] + synth->cur, bytes); - FLUID_MEMCPY(right[i], right_in[i] + synth->cur, bytes); + FLUID_MEMCPY(left[i], &left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); + FLUID_MEMCPY(right[i], &right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); #else //WITH_FLOAT int j; for (j = 0; j < num; j++) { - left[i][j] = (float) left_in[i][j + synth->cur]; - right[i][j] = (float) right_in[i][j + synth->cur]; + left[i][j] = (float) left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; + right[i][j] = (float) right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; } #endif //WITH_FLOAT } @@ -2921,20 +2921,20 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, { #ifdef WITH_FLOAT if(fx_left != NULL) - FLUID_MEMCPY(fx_left[i], fx_left_in[i] + synth->cur, bytes); + FLUID_MEMCPY(fx_left[i], &fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); if(fx_right != NULL) - FLUID_MEMCPY(fx_right[i], fx_right_in[i] + synth->cur, bytes); + FLUID_MEMCPY(fx_right[i], &fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); #else //WITH_FLOAT int j; if(fx_left != NULL) { for (j = 0; j < num; j++) - fx_left[i][j] = (float) fx_left_in[i][j + synth->cur]; + fx_left[i][j] = (float) fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; } if(fx_right != NULL) { for (j = 0; j < num; j++) - fx_right[i][j] = (float) fx_right_in[i][j + synth->cur]; + fx_right[i][j] = (float) fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; } #endif //WITH_FLOAT } @@ -2957,13 +2957,13 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, for (i = 0; i < synth->audio_channels; i++) { #ifdef WITH_FLOAT - FLUID_MEMCPY(left[i] + count, left_in[i], bytes); - FLUID_MEMCPY(right[i] + count, right_in[i], bytes); + FLUID_MEMCPY(left[i] + count, &left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); + FLUID_MEMCPY(right[i] + count, &right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); #else //WITH_FLOAT int j; for (j = 0; j < num; j++) { - left[i][j + count] = (float) left_in[i][j]; - right[i][j + count] = (float) right_in[i][j]; + left[i][j + count] = (float) left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; + right[i][j + count] = (float) right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; } #endif //WITH_FLOAT } @@ -2972,20 +2972,20 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, { #ifdef WITH_FLOAT if(fx_left != NULL) - FLUID_MEMCPY(fx_left[i] + count, fx_left_in[i], bytes); + FLUID_MEMCPY(fx_left[i] + count, &fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); if(fx_right != NULL) - FLUID_MEMCPY(fx_right[i] + count, fx_right_in[i], bytes); + FLUID_MEMCPY(fx_right[i] + count, &fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); #else //WITH_FLOAT int j; if(fx_left != NULL) { for (j = 0; j < num; j++) - fx_left[i][j + count] = (float) fx_left_in[i][j]; + fx_left[i][j + count] = (float) fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; } if(fx_right != NULL) { for (j = 0; j < num; j++) - fx_right[i][j + count] = (float) fx_right_in[i][j]; + fx_right[i][j + count] = (float) fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; } #endif //WITH_FLOAT } @@ -3074,8 +3074,8 @@ fluid_synth_write_float(fluid_synth_t* synth, int len, int i, j, k, l; float* left_out = (float*) lout; float* right_out = (float*) rout; - fluid_real_t** left_in; - fluid_real_t** right_in; + fluid_real_t* left_in; + fluid_real_t* right_in; double time = fluid_utime(); float cpu_load; @@ -3095,8 +3095,8 @@ fluid_synth_write_float(fluid_synth_t* synth, int len, l = 0; } - left_out[j] = (float) left_in[0][l]; - right_out[k] = (float) right_in[0][l]; + left_out[j] = (float) left_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + l]; + right_out[k] = (float) right_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + l]; } synth->cur = l; @@ -3172,8 +3172,8 @@ fluid_synth_write_s16(fluid_synth_t* synth, int len, int i, j, k, cur; signed short* left_out = (signed short*) lout; signed short* right_out = (signed short*) rout; - fluid_real_t** left_in; - fluid_real_t** right_in; + fluid_real_t* left_in; + fluid_real_t* right_in; fluid_real_t left_sample; fluid_real_t right_sample; double time = fluid_utime(); @@ -3198,8 +3198,8 @@ fluid_synth_write_s16(fluid_synth_t* synth, int len, cur = 0; } - left_sample = roundi (left_in[0][cur] * 32766.0f + rand_table[0][di]); - right_sample = roundi (right_in[0][cur] * 32766.0f + rand_table[1][di]); + left_sample = roundi (left_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + cur] * 32766.0f + rand_table[0][di]); + right_sample = roundi (right_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + cur] * 32766.0f + rand_table[1][di]); di++; if (di >= DITHER_SIZE) di = 0; From b7cf79bc15935a7109314d3e84c1c9d486ccd922 Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 27 Apr 2018 17:49:22 +0200 Subject: [PATCH 09/15] vectorize mixdown loop of fluid_mixer_buffers_mix() --- src/rvoice/fluid_rvoice_mixer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index 88123c8d..6cbac0e3 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -836,6 +836,7 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dst, fluid_mixer_buffers_t* src) base_dst = fluid_align_ptr(dst->left_buf, FLUID_DEFAULT_ALIGNMENT); for (i=0; i < minbuf; i++) { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) for (j=0; j < scount; j++) { int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; @@ -847,6 +848,7 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dst, fluid_mixer_buffers_t* src) base_dst = fluid_align_ptr(dst->right_buf, FLUID_DEFAULT_ALIGNMENT); for (i=0; i < minbuf; i++) { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) for (j=0; j < scount; j++) { int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; @@ -862,6 +864,7 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dst, fluid_mixer_buffers_t* src) base_dst = fluid_align_ptr(dst->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); for (i=0; i < minbuf; i++) { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) for (j=0; j < scount; j++) { int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; @@ -873,6 +876,7 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dst, fluid_mixer_buffers_t* src) base_dst = fluid_align_ptr(dst->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); for (i=0; i < minbuf; i++) { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) for (j=0; j < scount; j++) { int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; From 1d9dfd2a2ab65d1ae4cfb58f5f695fdcbef09139 Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 27 Apr 2018 18:25:07 +0200 Subject: [PATCH 10/15] optimize fluid_mixer_buffers_zero() only zero needed parts of sample buffers --- src/rvoice/fluid_rvoice_mixer.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index 6cbac0e3..deb704f6 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -460,22 +460,28 @@ fluid_render_loop_singlethread(fluid_rvoice_mixer_t* mixer) } static FLUID_INLINE void -fluid_mixer_buffers_zero(fluid_mixer_buffers_t* buffers) +fluid_mixer_buffers_zero(fluid_mixer_buffers_t* buffers, int current_blockcount) { - int size = FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE * sizeof(fluid_real_t); - fluid_real_t* buf; + int i, size = current_blockcount * FLUID_BUFSIZE * sizeof(fluid_real_t); - buf = fluid_align_ptr(buffers->left_buf, FLUID_DEFAULT_ALIGNMENT); - FLUID_MEMSET(buf, 0, buffers->buf_count * size); + /* TODO: Optimize by only zero out the buffers we actually use later on. */ + int buf_count = buffers->buf_count, fx_buf_count = buffers->fx_buf_count; - buf = fluid_align_ptr(buffers->right_buf, FLUID_DEFAULT_ALIGNMENT); - FLUID_MEMSET(buf, 0, buffers->buf_count * size); + fluid_real_t *FLUID_RESTRICT buf_l = fluid_align_ptr(buffers->left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t *FLUID_RESTRICT buf_r = fluid_align_ptr(buffers->right_buf, FLUID_DEFAULT_ALIGNMENT); - buf = fluid_align_ptr(buffers->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); - FLUID_MEMSET(buf, 0, buffers->fx_buf_count * size); + for (i=0; i < buf_count; i++) { + FLUID_MEMSET(&buf_l[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + FLUID_MEMSET(&buf_r[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + } - buf = fluid_align_ptr(buffers->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); - FLUID_MEMSET(buf, 0, buffers->fx_buf_count * size); + buf_l = fluid_align_ptr(buffers->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + buf_r = fluid_align_ptr(buffers->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + + for (i=0; i < fx_buf_count; i++) { + FLUID_MEMSET(&buf_l[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + FLUID_MEMSET(&buf_r[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + } } static int @@ -783,6 +789,7 @@ fluid_mixer_thread_func (void* data) int hasValidData = 0; FLUID_DECLARE_VLA(fluid_real_t*, bufs, buffers->buf_count*2 + buffers->fx_buf_count*2); int bufcount = 0; + int current_blockcount = buffers->mixer->current_blockcount; while (!fluid_atomic_int_get(&mixer->threads_should_terminate)) { fluid_rvoice_t* rvoice = fluid_mixer_get_mt_rvoice(mixer); @@ -807,7 +814,7 @@ fluid_mixer_thread_func (void* data) else { // else: if buffer is not zeroed, zero buffers if (!hasValidData) { - fluid_mixer_buffers_zero(buffers); + fluid_mixer_buffers_zero(buffers, current_blockcount); bufcount = fluid_mixer_buffers_prepare(buffers, bufs); hasValidData = 1; } @@ -1047,7 +1054,7 @@ fluid_rvoice_mixer_render(fluid_rvoice_mixer_t* mixer, int blockcount) mixer->current_blockcount = blockcount; // Zero buffers - fluid_mixer_buffers_zero(&mixer->buffers); + fluid_mixer_buffers_zero(&mixer->buffers, blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_CLEAR, prof_ref, mixer->active_voices, mixer->current_blockcount * FLUID_BUFSIZE); From ab255b71780739e3375e9d3045598fa1d7d317e2 Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 27 Apr 2018 20:54:30 +0200 Subject: [PATCH 11/15] avoid polling variables from global mem that never change --- src/rvoice/fluid_rvoice_mixer.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index deb704f6..2f95340d 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -335,15 +335,12 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, static FLUID_INLINE void fluid_mixer_buffers_render_one(fluid_mixer_buffers_t* buffers, fluid_rvoice_t* rvoice, fluid_real_t** dest_bufs, - unsigned int dest_bufcount) + unsigned int dest_bufcount, fluid_real_t* src_buf, int blockcount) { - int blockcount = buffers->mixer->current_blockcount; int i, total_samples = 0, start_block = 0; - fluid_real_t* local_buf = fluid_align_ptr(buffers->local_buf, FLUID_DEFAULT_ALIGNMENT); - for (i=0; i < blockcount; i++) { - int s = fluid_rvoice_write(rvoice, &local_buf[FLUID_BUFSIZE*i]); + int s = fluid_rvoice_write(rvoice, &src_buf[FLUID_BUFSIZE*i]); if (s == -1) { start_block += s; s = FLUID_BUFSIZE; @@ -354,7 +351,7 @@ fluid_mixer_buffers_render_one(fluid_mixer_buffers_t* buffers, break; } } - fluid_rvoice_buffers_mix(&rvoice->buffers, local_buf, -start_block, total_samples-((-start_block)*FLUID_BUFSIZE), dest_bufs, dest_bufcount); + fluid_rvoice_buffers_mix(&rvoice->buffers, src_buf, -start_block, total_samples-((-start_block)*FLUID_BUFSIZE), dest_bufs, dest_bufcount); if (total_samples < buffers->mixer->current_blockcount * FLUID_BUFSIZE) { fluid_finish_rvoice(buffers, rvoice); @@ -444,16 +441,19 @@ DECLARE_FLUID_RVOICE_FUNCTION(fluid_rvoice_mixer_set_polyphony) static void -fluid_render_loop_singlethread(fluid_rvoice_mixer_t* mixer) +fluid_render_loop_singlethread(fluid_rvoice_mixer_t* mixer, int blockcount) { int i; FLUID_DECLARE_VLA(fluid_real_t*, bufs, mixer->buffers.buf_count * 2 + mixer->buffers.fx_buf_count * 2); int bufcount = fluid_mixer_buffers_prepare(&mixer->buffers, bufs); + + fluid_real_t* local_buf = fluid_align_ptr(mixer->buffers.local_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_profile_ref_var(prof_ref); for (i=0; i < mixer->active_voices; i++) { fluid_mixer_buffers_render_one(&mixer->buffers, mixer->rvoices[i], bufs, - bufcount); + bufcount, local_buf, blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_VOICE, prof_ref,1, mixer->current_blockcount * FLUID_BUFSIZE); } @@ -790,6 +790,7 @@ fluid_mixer_thread_func (void* data) FLUID_DECLARE_VLA(fluid_real_t*, bufs, buffers->buf_count*2 + buffers->fx_buf_count*2); int bufcount = 0; int current_blockcount = buffers->mixer->current_blockcount; + fluid_real_t* local_buf = fluid_align_ptr(buffers->local_buf, FLUID_DEFAULT_ALIGNMENT); while (!fluid_atomic_int_get(&mixer->threads_should_terminate)) { fluid_rvoice_t* rvoice = fluid_mixer_get_mt_rvoice(mixer); @@ -819,7 +820,7 @@ fluid_mixer_thread_func (void* data) hasValidData = 1; } // then render voice to buffers - fluid_mixer_buffers_render_one(buffers, rvoice, bufs, bufcount); + fluid_mixer_buffers_render_one(buffers, rvoice, bufs, bufcount, local_buf, current_blockcount); } } @@ -924,7 +925,9 @@ static void fluid_render_loop_multithread(fluid_rvoice_mixer_t* mixer) { int i, bufcount; - //int scount = mixer->current_blockcount * FLUID_BUFSIZE; + int current_blockcount = mixer->current_blockcount; + fluid_real_t* local_buf = fluid_align_ptr(mixer->buffers.local_buf, FLUID_DEFAULT_ALIGNMENT); + FLUID_DECLARE_VLA(fluid_real_t*, bufs, mixer->buffers.buf_count * 2 + mixer->buffers.fx_buf_count * 2); // How many threads should we start this time? @@ -933,7 +936,7 @@ fluid_render_loop_multithread(fluid_rvoice_mixer_t* mixer) extra_threads = mixer->thread_count; if (extra_threads == 0) { // No extra threads? No thread overhead! - fluid_render_loop_singlethread(mixer); + fluid_render_loop_singlethread(mixer, current_blockcount); return; } @@ -954,9 +957,9 @@ fluid_render_loop_multithread(fluid_rvoice_mixer_t* mixer) fluid_rvoice_t* rvoice = fluid_mixer_get_mt_rvoice(mixer); if (rvoice != NULL) { fluid_profile_ref_var(prof_ref); - fluid_mixer_buffers_render_one(&mixer->buffers, rvoice, bufs, bufcount); + fluid_mixer_buffers_render_one(&mixer->buffers, rvoice, bufs, bufcount, local_buf, current_blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_VOICE, prof_ref,1, - mixer->current_blockcount * FLUID_BUFSIZE); + current_blockcount * FLUID_BUFSIZE); //test++; } else { @@ -1063,7 +1066,7 @@ fluid_rvoice_mixer_render(fluid_rvoice_mixer_t* mixer, int blockcount) fluid_render_loop_multithread(mixer); else #endif - fluid_render_loop_singlethread(mixer); + fluid_render_loop_singlethread(mixer, blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_VOICES, prof_ref, mixer->active_voices, mixer->current_blockcount * FLUID_BUFSIZE); From 5280aae7ac31e8bc3e7ae31da63d5a1652ae512e Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 4 May 2018 16:40:45 +0200 Subject: [PATCH 12/15] optimize fluid_align_ptr() for alignment powers of two --- src/utils/fluid_sys.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/utils/fluid_sys.h b/src/utils/fluid_sys.h index f2d2cae3..9f4a5f5a 100644 --- a/src/utils/fluid_sys.h +++ b/src/utils/fluid_sys.h @@ -606,15 +606,19 @@ void fluid_msleep(unsigned int msecs); * Advances the given \c ptr to the next \c alignment byte boundary. * Make sure you've allocated an extra of \c alignment bytes to avoid a buffer overflow. * + * @note \c alignment must be a power of two * @return Returned pointer is guarenteed to be aligned to \c alignment boundary and in range \f[ ptr <= returned_ptr < ptr + alignment \f]. */ static FLUID_INLINE void* fluid_align_ptr(const void* ptr, unsigned int alignment) { uintptr_t ptr_int = (uintptr_t)ptr; - unsigned int offset = ptr_int % alignment; - unsigned int add = offset == 0 ? 0 // is already aligned, dont advance, else buffer overrun - : alignment - offset; // advance the pointer to the next alignment boundary + unsigned int offset = ptr_int & (alignment-1); + unsigned int add = (alignment - offset) & (alignment-1); // advance the pointer to the next alignment boundary ptr_int += add; + + /* assert alignment is power of two */ + FLUID_ASSERT(!(alignment == 0) && !(alignment & (alignment - 1))); + return (void*)ptr_int; } From 7fba85296b3f5e7c2342dfeff166e5a583382bea Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 4 May 2018 20:05:45 +0200 Subject: [PATCH 13/15] fix compilation with intel compiler by adding -restrict compile flag --- CMakeLists.txt | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c64ff4a4..b104c1d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,16 +156,26 @@ if ( CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_ "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined" ) endif ( NOT APPLE AND NOT OS2 ) - if ( OS2 ) - set ( GNUCC_VISIBILITY_FLAG "" ) - else ( OS2 ) - set ( GNUCC_VISIBILITY_FLAG "-fvisibility=hidden" ) + # define some warning flags + set ( ADDITIONAL_FLAGS "-Wall -W -Wpointer-arith -Wno-cast-qual -Wstrict-prototypes -Wno-unused-parameter -Wdeclaration-after-statement" ) + + # no visibility support on OS2 + if ( NOT OS2 ) + set ( ADDITIONAL_FLAGS "${ADDITIONAL_FLAGS} -fvisibility=hidden" ) endif ( OS2 ) + + if ( CMAKE_C_COMPILER_ID STREQUAL "Intel" ) + # icc needs the restrict flag to recognize C99 restrict pointers + set ( ADDITIONAL_FLAGS "${ADDITIONAL_FLAGS} -restrict" ) + else () # not intel + # gcc and clang support bad function cast and alignment warnings; add them as well. + set ( ADDITIONAL_FLAGS "${ADDITIONAL_FLAGS} -Wbad-function-cast -Wcast-align" ) + endif (CMAKE_C_COMPILER_ID STREQUAL "Intel" ) - set ( GNUCC_WARNING_FLAGS "-Wall -W -Wpointer-arith -Wbad-function-cast -Wno-cast-qual -Wcast-align -Wstrict-prototypes -Wno-unused-parameter -Wdeclaration-after-statement" ) - set ( CMAKE_C_FLAGS_DEBUG "-g ${GNUCC_VISIBILITY_FLAG} -DDEBUG ${GNUCC_WARNING_FLAGS} -fsanitize=undefined ${CMAKE_C_FLAGS_DEBUG}" ) - set ( CMAKE_C_FLAGS_RELEASE "-O2 -fomit-frame-pointer -finline-functions ${GNUCC_VISIBILITY_FLAG} -DNDEBUG ${GNUCC_WARNING_FLAGS} ${CMAKE_C_FLAGS_RELEASE}" ) - set ( CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -fomit-frame-pointer -finline-functions ${GNUCC_VISIBILITY_FLAG} -DNDEBUG ${GNUCC_WARNING_FLAGS} ${CMAKE_C_FLAGS_RELWITHDEBINFO}" ) + set ( CMAKE_C_FLAGS_DEBUG "-g ${ADDITIONAL_FLAGS} -DDEBUG -fsanitize=undefined ${CMAKE_C_FLAGS_DEBUG}" ) + set ( CMAKE_C_FLAGS_RELEASE "-O2 -fomit-frame-pointer -finline-functions ${ADDITIONAL_FLAGS} -DNDEBUG ${CMAKE_C_FLAGS_RELEASE}" ) + set ( CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -fomit-frame-pointer -finline-functions ${ADDITIONAL_FLAGS} -DNDEBUG ${CMAKE_C_FLAGS_RELWITHDEBINFO}" ) + endif ( CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "Intel" ) # Windows From a153d0ad736fd9961e52ee8459d99b9f7903bcfe Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 4 May 2018 20:07:06 +0200 Subject: [PATCH 14/15] no need to link against openMP --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b104c1d9..a6292695 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -518,8 +518,10 @@ if ( OpenMP_FOUND OR OpenMP_C_FOUND ) if ( ( NOT OpenMP_C_SPEC_DATE LESS "201307" ) OR NOT ( OpenMP_C_VERSION VERSION_LESS "4.0" ) ) set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) - set ( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}" ) - set ( LIBFLUID_LIBS "${OpenMP_C_LIBRARIES};${LIBFLUID_LIBS}" ) + + # currently no need to link against openMP runtime lib(s). If need be, uncomment below. + # set ( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}" ) + # set ( LIBFLUID_LIBS "${OpenMP_C_LIBRARIES};${LIBFLUID_LIBS}" ) set ( HAVE_OPENMP 1 ) endif() endif() From d8f46f2bae545926768b6468169eddbd485a5717 Mon Sep 17 00:00:00 2001 From: derselbst Date: Fri, 4 May 2018 20:15:01 +0200 Subject: [PATCH 15/15] remove unused vars, clarify comments --- CMakeLists.txt | 2 +- src/rvoice/fluid_rvoice_mixer.c | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6292695..cac22dad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,7 +162,7 @@ if ( CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_ # no visibility support on OS2 if ( NOT OS2 ) set ( ADDITIONAL_FLAGS "${ADDITIONAL_FLAGS} -fvisibility=hidden" ) - endif ( OS2 ) + endif ( NOT OS2 ) if ( CMAKE_C_COMPILER_ID STREQUAL "Intel" ) # icc needs the restrict flag to recognize C99 restrict pointers diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index 2f95340d..1ac2036b 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -53,11 +53,13 @@ struct _fluid_mixer_buffers_t { int fx_buf_count; /** buffer to store the left part of a stereo channel to. - * Specifically a two dimensional array, containing \c buf_count buffers - * (i.e. for each synth.audio-channels), of which each buffer contains - * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT audio items (=samples) - * @note the beginning of the array is aligned to the FLUID_DEFAULT_ALIGNMENT - * boundary, so make sure to access all those pointers using fluid_align_ptr() + * Specifically a two dimensional array, containing \c buf_count sample buffers + * (i.e. for each synth.audio-channels), of which each contains + * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT audio items (=samples) + * @note Each sample buffer is aligned to the FLUID_DEFAULT_ALIGNMENT + * boundary provided that this pointer points to an aligned buffer. + * So make sure to access the sample buffer by first aligning this + * pointer using fluid_align_ptr() */ fluid_real_t* left_buf; @@ -117,7 +119,6 @@ static FLUID_INLINE void fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) { int i; - fluid_profile_ref_var(prof_ref); void (*reverb_process_func)(fluid_revmodel_t* rev, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); void (*chorus_process_func)(fluid_chorus_t* chorus, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); @@ -127,6 +128,8 @@ fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) fluid_real_t* in_rev = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); fluid_real_t* in_ch = in_rev; + fluid_profile_ref_var(prof_ref); + in_rev = &in_rev[SYNTH_REVERB_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; in_ch = &in_ch [SYNTH_CHORUS_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; @@ -487,7 +490,7 @@ fluid_mixer_buffers_zero(fluid_mixer_buffers_t* buffers, int current_blockcount) static int fluid_mixer_buffers_init(fluid_mixer_buffers_t* buffers, fluid_rvoice_mixer_t* mixer) { - int i, samplecount; + int samplecount; buffers->mixer = mixer; buffers->buf_count = buffers->mixer->buffers.buf_count; @@ -600,8 +603,6 @@ new_fluid_rvoice_mixer(int buf_count, int fx_buf_count, fluid_real_t sample_rate static void fluid_mixer_buffers_free(fluid_mixer_buffers_t* buffers) { - int i; - FLUID_FREE(buffers->finished_voices); /* free all the sample buffers */