diff --git a/CMakeLists.txt b/CMakeLists.txt index 402074ba..ccade1cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,16 +156,26 @@ if ( CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_ "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-undefined" ) endif ( NOT APPLE AND NOT OS2 ) - if ( OS2 ) - set ( GNUCC_VISIBILITY_FLAG "" ) - else ( OS2 ) - set ( GNUCC_VISIBILITY_FLAG "-fvisibility=hidden" ) - endif ( OS2 ) + # define some warning flags + set ( ADDITIONAL_FLAGS "-Wall -W -Wpointer-arith -Wno-cast-qual -Wstrict-prototypes -Wno-unused-parameter -Wdeclaration-after-statement" ) + + # no visibility support on OS2 + if ( NOT OS2 ) + set ( ADDITIONAL_FLAGS "${ADDITIONAL_FLAGS} -fvisibility=hidden" ) + endif ( NOT OS2 ) + + if ( CMAKE_C_COMPILER_ID STREQUAL "Intel" ) + # icc needs the restrict flag to recognize C99 restrict pointers + set ( ADDITIONAL_FLAGS "${ADDITIONAL_FLAGS} -restrict" ) + else () # not intel + # gcc and clang support bad function cast and alignment warnings; add them as well. + set ( ADDITIONAL_FLAGS "${ADDITIONAL_FLAGS} -Wbad-function-cast -Wcast-align" ) + endif (CMAKE_C_COMPILER_ID STREQUAL "Intel" ) - set ( GNUCC_WARNING_FLAGS "-Wall -W -Wpointer-arith -Wbad-function-cast -Wno-cast-qual -Wcast-align -Wstrict-prototypes -Wno-unused-parameter -Wdeclaration-after-statement" ) - set ( CMAKE_C_FLAGS_DEBUG "-g ${GNUCC_VISIBILITY_FLAG} -DDEBUG ${GNUCC_WARNING_FLAGS} -fsanitize=undefined ${CMAKE_C_FLAGS_DEBUG}" ) - set ( CMAKE_C_FLAGS_RELEASE "-O2 -fomit-frame-pointer -finline-functions ${GNUCC_VISIBILITY_FLAG} -DNDEBUG ${GNUCC_WARNING_FLAGS} ${CMAKE_C_FLAGS_RELEASE}" ) - set ( CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -fomit-frame-pointer -finline-functions ${GNUCC_VISIBILITY_FLAG} -DNDEBUG ${GNUCC_WARNING_FLAGS} ${CMAKE_C_FLAGS_RELWITHDEBINFO}" ) + set ( CMAKE_C_FLAGS_DEBUG "-g ${ADDITIONAL_FLAGS} -DDEBUG -fsanitize=undefined ${CMAKE_C_FLAGS_DEBUG}" ) + set ( CMAKE_C_FLAGS_RELEASE "-O2 -fomit-frame-pointer -finline-functions ${ADDITIONAL_FLAGS} -DNDEBUG ${CMAKE_C_FLAGS_RELEASE}" ) + set ( CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -fomit-frame-pointer -finline-functions ${ADDITIONAL_FLAGS} -DNDEBUG ${CMAKE_C_FLAGS_RELWITHDEBINFO}" ) + endif ( CMAKE_COMPILER_IS_GNUCC OR CMAKE_C_COMPILER_ID STREQUAL "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "Intel" ) # Windows @@ -500,6 +510,22 @@ else ( enable-readline ) unset ( READLINE_LIBS CACHE ) endif ( enable-readline ) +unset ( HAVE_OPENMP CACHE ) +find_package ( OpenMP QUIET ) +if ( OpenMP_FOUND OR OpenMP_C_FOUND ) + message(STATUS "Found OpenMP ${OpenMP_C_SPEC_DATE}") + # require at least OMP 4.0 + if ( ( NOT OpenMP_C_SPEC_DATE LESS "201307" ) OR NOT ( OpenMP_C_VERSION VERSION_LESS "4.0" ) ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) + set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) + + # currently no need to link against openMP runtime lib(s). If need be, uncomment below. + # set ( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}" ) + # set ( LIBFLUID_LIBS "${OpenMP_C_LIBRARIES};${LIBFLUID_LIBS}" ) + set ( HAVE_OPENMP 1 ) + endif() +endif() + if(enable-tests) # manipulate some variables to setup a proper test env diff --git a/cmake_admin/report.cmake b/cmake_admin/report.cmake index bf2fbad0..0d3018cb 100644 --- a/cmake_admin/report.cmake +++ b/cmake_admin/report.cmake @@ -127,6 +127,12 @@ else ( WITH_PROFILING ) message ( "Profiling: no" ) endif ( WITH_PROFILING ) +if ( HAVE_OPENMP ) + message ( "OpenMP 4.0: yes" ) +else ( HAVE_OPENMP ) + message ( "OpenMP 4.0: no" ) +endif ( HAVE_OPENMP ) + if ( ENABLE_DEBUG ) message ( "Debug: yes" ) else ( ENABLE_DEBUG ) diff --git a/src/bindings/fluid_ladspa.c b/src/bindings/fluid_ladspa.c index c7faa2c7..bb79ba56 100644 --- a/src/bindings/fluid_ladspa.c +++ b/src/bindings/fluid_ladspa.c @@ -266,11 +266,12 @@ void delete_fluid_ladspa_fx(fluid_ladspa_fx_t *fx) * @param fx LADSPA fx instance * @param prefix common name prefix for the created nodes * @param num_buffers number of of buffers buffer array - * @param buffers array of pointers to buffers + * @param buffers array of sample buffers + * @param buf_stride number of samples contained in one buffer * @return FLUID_OK on success, otherwise FLUID_FAILED */ int fluid_ladspa_add_host_ports(fluid_ladspa_fx_t *fx, const char *prefix, - int num_buffers, fluid_real_t *buffers[]) + int num_buffers, fluid_real_t buffers[], int buf_stride) { int i; char name[99]; @@ -296,7 +297,7 @@ int fluid_ladspa_add_host_ports(fluid_ladspa_fx_t *fx, const char *prefix, if (new_fluid_ladspa_node(fx, name, FLUID_LADSPA_NODE_AUDIO | FLUID_LADSPA_NODE_HOST, - buffers[i]) == NULL) + &buffers[i * buf_stride]) == NULL) { LADSPA_API_RETURN(fx, FLUID_FAILED); } diff --git a/src/bindings/fluid_ladspa.h b/src/bindings/fluid_ladspa.h index e4669855..be607a89 100644 --- a/src/bindings/fluid_ladspa.h +++ b/src/bindings/fluid_ladspa.h @@ -31,6 +31,6 @@ int fluid_ladspa_set_sample_rate(fluid_ladspa_fx_t *fx, fluid_real_t sample_rate void fluid_ladspa_run(fluid_ladspa_fx_t *fx, int block_count, int block_size); int fluid_ladspa_add_host_ports(fluid_ladspa_fx_t *fx, const char *prefix, - int num_buffers, fluid_real_t *buffers[]); + int num_buffers, fluid_real_t buffers[], int buf_stride); #endif /* _FLUID_LADSPA_H */ diff --git a/src/config.cmake b/src/config.cmake index 49403121..aa222676 100644 --- a/src/config.cmake +++ b/src/config.cmake @@ -76,6 +76,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_NETINET_TCP_H @HAVE_NETINET_TCP_H@ +/* Define if compiling with openMP to enable parallel audio rendering */ +#cmakedefine HAVE_OPENMP @HAVE_OPENMP@ + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_PTHREAD_H @HAVE_PTHREAD_H@ diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index e2511d43..62a9c8ba 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -47,13 +47,32 @@ struct _fluid_mixer_buffers_t { fluid_atomic_int_t ready; /**< Atomic: buffers are ready for mixing */ + fluid_real_t* local_buf; + int buf_count; - fluid_real_t** left_buf; - fluid_real_t** right_buf; - int fx_buf_count; - fluid_real_t** fx_left_buf; - fluid_real_t** fx_right_buf; + + /** buffer to store the left part of a stereo channel to. + * Specifically a two dimensional array, containing \c buf_count sample buffers + * (i.e. for each synth.audio-channels), of which each contains + * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT audio items (=samples) + * @note Each sample buffer is aligned to the FLUID_DEFAULT_ALIGNMENT + * boundary provided that this pointer points to an aligned buffer. + * So make sure to access the sample buffer by first aligning this + * pointer using fluid_align_ptr() + */ + fluid_real_t* left_buf; + + /** dito, but for right part of a stereo channel */ + fluid_real_t* right_buf; + + /** buffer to store the left part of a stereo effects channel to. + * Specifically a two dimensional array, containing \c fx_buf_count buffers + * (i.e. for each synth.effects-channels), of which each buffer contains + * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT audio items (=samples) + */ + fluid_real_t* fx_left_buf; + fluid_real_t* fx_right_buf; }; typedef struct _fluid_mixer_fx_t fluid_mixer_fx_t; @@ -104,41 +123,60 @@ static FLUID_INLINE void fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) { int i; - fluid_profile_ref_var(prof_ref); - if (mixer->fx.with_reverb) { - if (mixer->fx.mix_fx_to_out) { - for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_revmodel_processmix(mixer->fx.reverb, - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL][i], - &mixer->buffers.left_buf[0][i], - &mixer->buffers.right_buf[0][i]); - } - else { - for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_revmodel_processreplace(mixer->fx.reverb, - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL][i], - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL][i], - &mixer->buffers.fx_right_buf[SYNTH_REVERB_CHANNEL][i]); + + void (*reverb_process_func)(fluid_revmodel_t* rev, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); + void (*chorus_process_func)(fluid_chorus_t* chorus, fluid_real_t *in, fluid_real_t *left_out, fluid_real_t *right_out); + + fluid_real_t *out_rev_l, *out_rev_r, *out_ch_l, *out_ch_r; + + fluid_real_t* in_rev = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t* in_ch = in_rev; + + fluid_profile_ref_var(prof_ref); + + in_rev = &in_rev[SYNTH_REVERB_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + in_ch = &in_ch [SYNTH_CHORUS_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + + if (mixer->fx.mix_fx_to_out) + { + out_rev_l = fluid_align_ptr(mixer->buffers.left_buf, FLUID_DEFAULT_ALIGNMENT); + out_rev_r = fluid_align_ptr(mixer->buffers.right_buf, FLUID_DEFAULT_ALIGNMENT); + + out_ch_l = &out_rev_l[0 * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + out_ch_r = &out_rev_r[0 * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + + reverb_process_func = fluid_revmodel_processmix; + chorus_process_func = fluid_chorus_processmix; + } + else + { + out_ch_l = out_rev_l = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + out_ch_r = out_rev_r = fluid_align_ptr(mixer->buffers.fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + + out_rev_l = &out_rev_l[SYNTH_REVERB_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + out_rev_r = &out_rev_r[SYNTH_REVERB_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + + out_ch_l = &out_ch_l[SYNTH_CHORUS_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + out_ch_r = &out_ch_r[SYNTH_CHORUS_CHANNEL * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE]; + + reverb_process_func = fluid_revmodel_processreplace; + chorus_process_func = fluid_chorus_processreplace; + } + + + if (mixer->fx.with_reverb) { + for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) + reverb_process_func(mixer->fx.reverb, &in_rev[i], &out_rev_l[i], &out_rev_r[i]); + fluid_profile(FLUID_PROF_ONE_BLOCK_REVERB, prof_ref,0, mixer->current_blockcount * FLUID_BUFSIZE); } if (mixer->fx.with_chorus) { - if (mixer->fx.mix_fx_to_out) { for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_chorus_processmix(mixer->fx.chorus, - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL][i], - &mixer->buffers.left_buf[0][i], - &mixer->buffers.right_buf[0][i]); - } - else { - for (i=0; i < mixer->current_blockcount * FLUID_BUFSIZE; i += FLUID_BUFSIZE) - fluid_chorus_processreplace(mixer->fx.chorus, - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL][i], - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL][i], - &mixer->buffers.fx_right_buf[SYNTH_CHORUS_CHANNEL][i]); - } + chorus_process_func(mixer->fx.chorus, &in_ch[i], &out_ch_l[i], &out_ch_r[i]); + fluid_profile(FLUID_PROF_ONE_BLOCK_CHORUS, prof_ref,0, mixer->current_blockcount * FLUID_BUFSIZE); } @@ -160,7 +198,7 @@ fluid_rvoice_mixer_process_fx(fluid_rvoice_mixer_t* mixer) static FLUID_INLINE int fluid_mixer_buffers_prepare(fluid_mixer_buffers_t* buffers, fluid_real_t** outbufs) { - fluid_real_t *reverb_buf, *chorus_buf; + fluid_real_t *base_ptr; int i; int with_reverb = buffers->mixer->fx.with_reverb; int with_chorus = buffers->mixer->fx.with_chorus; @@ -173,10 +211,11 @@ fluid_mixer_buffers_prepare(fluid_mixer_buffers_t* buffers, fluid_real_t** outbu with_reverb = (with_reverb | with_ladspa); with_chorus = (with_chorus | with_ladspa); #endif - reverb_buf = (with_reverb) ? buffers->fx_left_buf[SYNTH_REVERB_CHANNEL] : NULL; - chorus_buf = (with_chorus) ? buffers->fx_left_buf[SYNTH_CHORUS_CHANNEL] : NULL; - outbufs[buffers->buf_count*2 + SYNTH_REVERB_CHANNEL] = reverb_buf; - outbufs[buffers->buf_count*2 + SYNTH_CHORUS_CHANNEL] = chorus_buf; + + base_ptr = fluid_align_ptr(buffers->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + + outbufs[buffers->buf_count*2 + SYNTH_REVERB_CHANNEL] = (with_reverb) ? &base_ptr[SYNTH_REVERB_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] : NULL; + outbufs[buffers->buf_count*2 + SYNTH_CHORUS_CHANNEL] = (with_chorus) ? &base_ptr[SYNTH_CHORUS_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] : NULL; /* The output associated with a MIDI channel is wrapped around * using the number of audio groups as modulo divider. This is @@ -190,10 +229,14 @@ fluid_mixer_buffers_prepare(fluid_mixer_buffers_t* buffers, fluid_real_t** outbu * channels 1, 4, 7, 10 etc go to output 1; 2, 5, 8, 11 etc to * output 2, 3, 6, 9, 12 etc to output 3. */ - + base_ptr = fluid_align_ptr(buffers->left_buf, FLUID_DEFAULT_ALIGNMENT); for (i = 0; i < buffers->buf_count; i++) { - outbufs[i*2] = buffers->left_buf[i]; - outbufs[i*2+1] = buffers->right_buf[i]; + outbufs[i*2] = &base_ptr[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; + } + + base_ptr = fluid_align_ptr(buffers->right_buf, FLUID_DEFAULT_ALIGNMENT); + for (i = 0; i < buffers->buf_count; i++) { + outbufs[i*2+1] = &base_ptr[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; } return buffers->buf_count*2 + 2; } @@ -255,42 +298,39 @@ get_dest_buf(fluid_rvoice_buffers_t* buffers, int index, * * @param buffers Destination buffer(s) * @param dsp_buf Mono sample source - * @param samplecount Number of samples to process (no FLUID_BUFSIZE restriction) + * @param start_block Block to start mixing at + * @param sample_count number of samples to mix following \c start_block * @param dest_bufs Array of buffers to mixdown to * @param dest_bufcount Length of dest_bufs */ static void fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, - fluid_real_t* dsp_buf, int start, int samplecount, + fluid_real_t *FLUID_RESTRICT dsp_buf, + int start_block, int sample_count, fluid_real_t** dest_bufs, int dest_bufcount) { int bufcount = buffers->count; int i, dsp_i; - if (!samplecount || !bufcount || !dest_bufcount) + if (sample_count <= 0 || dest_bufcount <= 0) return; + FLUID_ASSERT((uintptr_t)dsp_buf % FLUID_DEFAULT_ALIGNMENT == 0); + FLUID_ASSERT((uintptr_t)(&dsp_buf[start_block * FLUID_BUFSIZE]) % FLUID_DEFAULT_ALIGNMENT == 0); + for (i=0; i < bufcount; i++) { - fluid_real_t* buf = get_dest_buf(buffers, i, dest_bufs, dest_bufcount); - fluid_real_t* next_buf; + fluid_real_t *FLUID_RESTRICT buf = get_dest_buf(buffers, i, dest_bufs, dest_bufcount); fluid_real_t amp = buffers->bufs[i].amp; + if (buf == NULL || amp == 0.0f) continue; - /* Optimization for centered stereo samples - we can save one - multiplication per sample */ - next_buf = (i+1 >= bufcount ? NULL : get_dest_buf(buffers, i+1, dest_bufs, dest_bufcount)); - if (next_buf && buffers->bufs[i+1].amp == amp) { - for (dsp_i = start; dsp_i < samplecount; dsp_i++) { - fluid_real_t samp = amp * dsp_buf[dsp_i]; - buf[dsp_i] += samp; - next_buf[dsp_i] += samp; - } - i++; - } - else { - for (dsp_i = start; dsp_i < samplecount; dsp_i++) + FLUID_ASSERT((uintptr_t)buf % FLUID_DEFAULT_ALIGNMENT == 0); + + #pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT) + for (dsp_i = (start_block * FLUID_BUFSIZE); dsp_i < sample_count; dsp_i++) + { buf[dsp_i] += amp * dsp_buf[dsp_i]; - } + } } } @@ -302,27 +342,25 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t* buffers, static FLUID_INLINE void fluid_mixer_buffers_render_one(fluid_mixer_buffers_t* buffers, fluid_rvoice_t* rvoice, fluid_real_t** dest_bufs, - unsigned int dest_bufcount) + unsigned int dest_bufcount, fluid_real_t* src_buf, int blockcount) { - int blockcount = buffers->mixer->current_blockcount; - int i, result = 0, start = 0; - - FLUID_DECLARE_VLA(fluid_real_t, local_buf, FLUID_BUFSIZE*blockcount); + int i, total_samples = 0, start_block = 0; for (i=0; i < blockcount; i++) { - int s = fluid_rvoice_write(rvoice, &local_buf[FLUID_BUFSIZE*i]); + int s = fluid_rvoice_write(rvoice, &src_buf[FLUID_BUFSIZE*i]); if (s == -1) { - start += FLUID_BUFSIZE; + start_block += s; s = FLUID_BUFSIZE; } - result += s; + total_samples += s; + if (s < FLUID_BUFSIZE) { - break; + break; } } - fluid_rvoice_buffers_mix(&rvoice->buffers, local_buf, start, result-start, dest_bufs, dest_bufcount); + fluid_rvoice_buffers_mix(&rvoice->buffers, src_buf, -start_block, total_samples-((-start_block)*FLUID_BUFSIZE), dest_bufs, dest_bufcount); - if (result < buffers->mixer->current_blockcount * FLUID_BUFSIZE) { + if (total_samples < buffers->mixer->current_blockcount * FLUID_BUFSIZE) { fluid_finish_rvoice(buffers, rvoice); } } @@ -410,98 +448,82 @@ DECLARE_FLUID_RVOICE_FUNCTION(fluid_rvoice_mixer_set_polyphony) static void -fluid_render_loop_singlethread(fluid_rvoice_mixer_t* mixer) +fluid_render_loop_singlethread(fluid_rvoice_mixer_t* mixer, int blockcount) { int i; FLUID_DECLARE_VLA(fluid_real_t*, bufs, mixer->buffers.buf_count * 2 + mixer->buffers.fx_buf_count * 2); int bufcount = fluid_mixer_buffers_prepare(&mixer->buffers, bufs); + + fluid_real_t* local_buf = fluid_align_ptr(mixer->buffers.local_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_profile_ref_var(prof_ref); for (i=0; i < mixer->active_voices; i++) { fluid_mixer_buffers_render_one(&mixer->buffers, mixer->rvoices[i], bufs, - bufcount); + bufcount, local_buf, blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_VOICE, prof_ref,1, mixer->current_blockcount * FLUID_BUFSIZE); } } - static FLUID_INLINE void -fluid_mixer_buffers_zero(fluid_mixer_buffers_t* buffers) +fluid_mixer_buffers_zero(fluid_mixer_buffers_t* buffers, int current_blockcount) { - int i; - int size = buffers->mixer->current_blockcount * FLUID_BUFSIZE * sizeof(fluid_real_t); - /* TODO: Optimize by only zero out the buffers we actually use later on. */ - for (i=0; i < buffers->buf_count; i++) { - FLUID_MEMSET(buffers->left_buf[i], 0, size); - FLUID_MEMSET(buffers->right_buf[i], 0, size); - } - for (i=0; i < buffers->fx_buf_count; i++) { - FLUID_MEMSET(buffers->fx_left_buf[i], 0, size); - FLUID_MEMSET(buffers->fx_right_buf[i], 0, size); - } + int i, size = current_blockcount * FLUID_BUFSIZE * sizeof(fluid_real_t); + + /* TODO: Optimize by only zero out the buffers we actually use later on. */ + int buf_count = buffers->buf_count, fx_buf_count = buffers->fx_buf_count; + + fluid_real_t *FLUID_RESTRICT buf_l = fluid_align_ptr(buffers->left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t *FLUID_RESTRICT buf_r = fluid_align_ptr(buffers->right_buf, FLUID_DEFAULT_ALIGNMENT); + + for (i=0; i < buf_count; i++) { + FLUID_MEMSET(&buf_l[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + FLUID_MEMSET(&buf_r[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + } + + buf_l = fluid_align_ptr(buffers->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + buf_r = fluid_align_ptr(buffers->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + + for (i=0; i < fx_buf_count; i++) { + FLUID_MEMSET(&buf_l[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + FLUID_MEMSET(&buf_r[i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE], 0, size); + } } - - static int fluid_mixer_buffers_init(fluid_mixer_buffers_t* buffers, fluid_rvoice_mixer_t* mixer) { - int i, samplecount; + int samplecount; buffers->mixer = mixer; buffers->buf_count = buffers->mixer->buffers.buf_count; buffers->fx_buf_count = buffers->mixer->buffers.fx_buf_count; samplecount = FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT; - + + /* Local mono voice buf */ + buffers->local_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, samplecount, FLUID_DEFAULT_ALIGNMENT); /* Left and right audio buffers */ - buffers->left_buf = FLUID_ARRAY(fluid_real_t*, buffers->buf_count); - buffers->right_buf = FLUID_ARRAY(fluid_real_t*, buffers->buf_count); + buffers->left_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); + buffers->right_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); - if ((buffers->left_buf == NULL) || (buffers->right_buf == NULL)) { + if ((buffers->local_buf == NULL) || (buffers->left_buf == NULL) || (buffers->right_buf == NULL)) { FLUID_LOG(FLUID_ERR, "Out of memory"); return 0; } - FLUID_MEMSET(buffers->left_buf, 0, buffers->buf_count * sizeof(fluid_real_t*)); - FLUID_MEMSET(buffers->right_buf, 0, buffers->buf_count * sizeof(fluid_real_t*)); - - for (i = 0; i < buffers->buf_count; i++) { - - buffers->left_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - buffers->right_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - - if ((buffers->left_buf[i] == NULL) || (buffers->right_buf[i] == NULL)) { - FLUID_LOG(FLUID_ERR, "Out of memory"); - return 0; - } - } - /* Effects audio buffers */ - buffers->fx_left_buf = FLUID_ARRAY(fluid_real_t*, buffers->fx_buf_count); - buffers->fx_right_buf = FLUID_ARRAY(fluid_real_t*, buffers->fx_buf_count); + buffers->fx_left_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->fx_buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); + buffers->fx_right_buf = FLUID_ARRAY_ALIGNED(fluid_real_t, buffers->fx_buf_count * samplecount, FLUID_DEFAULT_ALIGNMENT); if ((buffers->fx_left_buf == NULL) || (buffers->fx_right_buf == NULL)) { FLUID_LOG(FLUID_ERR, "Out of memory"); return 0; } - FLUID_MEMSET(buffers->fx_left_buf, 0, buffers->fx_buf_count * sizeof(fluid_real_t*)); - FLUID_MEMSET(buffers->fx_right_buf, 0, buffers->fx_buf_count * sizeof(fluid_real_t*)); - - for (i = 0; i < buffers->fx_buf_count; i++) { - buffers->fx_left_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - buffers->fx_right_buf[i] = FLUID_ARRAY(fluid_real_t, samplecount); - - if ((buffers->fx_left_buf[i] == NULL) || (buffers->fx_right_buf[i] == NULL)) { - FLUID_LOG(FLUID_ERR, "Out of memory"); - return 0; - } - } - buffers->finished_voices = NULL; if (fluid_mixer_buffers_update_polyphony(buffers, mixer->polyphony) == FLUID_FAILED) { @@ -585,46 +607,14 @@ new_fluid_rvoice_mixer(int buf_count, int fx_buf_count, fluid_real_t sample_rate static void fluid_mixer_buffers_free(fluid_mixer_buffers_t* buffers) { - int i; - FLUID_FREE(buffers->finished_voices); /* free all the sample buffers */ - if (buffers->left_buf != NULL) { - for (i = 0; i < buffers->buf_count; i++) { - if (buffers->left_buf[i] != NULL) { - FLUID_FREE(buffers->left_buf[i]); - } - } - FLUID_FREE(buffers->left_buf); - } - - if (buffers->right_buf != NULL) { - for (i = 0; i < buffers->buf_count; i++) { - if (buffers->right_buf[i] != NULL) { - FLUID_FREE(buffers->right_buf[i]); - } - } - FLUID_FREE(buffers->right_buf); - } - - if (buffers->fx_left_buf != NULL) { - for (i = 0; i < buffers->fx_buf_count; i++) { - if (buffers->fx_left_buf[i] != NULL) { - FLUID_FREE(buffers->fx_left_buf[i]); - } - } - FLUID_FREE(buffers->fx_left_buf); - } - - if (buffers->fx_right_buf != NULL) { - for (i = 0; i < buffers->fx_buf_count; i++) { - if (buffers->fx_right_buf[i] != NULL) { - FLUID_FREE(buffers->fx_right_buf[i]); - } - } - FLUID_FREE(buffers->fx_right_buf); - } + FLUID_FREE(buffers->local_buf); + FLUID_FREE(buffers->left_buf); + FLUID_FREE(buffers->right_buf); + FLUID_FREE(buffers->fx_left_buf); + FLUID_FREE(buffers->fx_right_buf); } void delete_fluid_rvoice_mixer(fluid_rvoice_mixer_t* mixer) @@ -648,6 +638,7 @@ void delete_fluid_rvoice_mixer(fluid_rvoice_mixer_t* mixer) delete_fluid_revmodel(mixer->fx.reverb); if (mixer->fx.chorus) delete_fluid_chorus(mixer->fx.chorus); + FLUID_FREE(mixer->rvoices); FLUID_FREE(mixer); } @@ -665,18 +656,33 @@ void fluid_rvoice_mixer_set_ladspa(fluid_rvoice_mixer_t* mixer, { return; } + else + { + fluid_real_t* main_l = fluid_align_ptr(mixer->buffers.left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t* main_r = fluid_align_ptr(mixer->buffers.right_buf, FLUID_DEFAULT_ALIGNMENT); + + fluid_real_t* rev = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + fluid_real_t* chor = rev; + + rev = &rev[SYNTH_REVERB_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; + chor = &chor[SYNTH_CHORUS_CHANNEL * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT]; - fluid_ladspa_add_host_ports(ladspa_fx, "Main:L", audio_groups, - mixer->buffers.left_buf); + fluid_ladspa_add_host_ports(ladspa_fx, "Main:L", audio_groups, + main_l, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); - fluid_ladspa_add_host_ports(ladspa_fx, "Main:R", audio_groups, - mixer->buffers.right_buf); + fluid_ladspa_add_host_ports(ladspa_fx, "Main:R", audio_groups, + main_r, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); - fluid_ladspa_add_host_ports(ladspa_fx, "Reverb:Send", 1, - &mixer->buffers.fx_left_buf[SYNTH_REVERB_CHANNEL]); + fluid_ladspa_add_host_ports(ladspa_fx, "Reverb:Send", 1, + rev, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); - fluid_ladspa_add_host_ports(ladspa_fx, "Chorus:Send", 1, - &mixer->buffers.fx_left_buf[SYNTH_CHORUS_CHANNEL]); + fluid_ladspa_add_host_ports(ladspa_fx, "Chorus:Send", 1, + chor, + FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT); + } } #endif @@ -738,18 +744,18 @@ DECLARE_FLUID_RVOICE_FUNCTION(fluid_rvoice_mixer_reset_chorus) } int fluid_rvoice_mixer_get_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** left, fluid_real_t*** right) + fluid_real_t** left, fluid_real_t** right) { - *left = mixer->buffers.left_buf; - *right = mixer->buffers.right_buf; + *left = fluid_align_ptr(mixer->buffers.left_buf, FLUID_DEFAULT_ALIGNMENT); + *right = fluid_align_ptr(mixer->buffers.right_buf, FLUID_DEFAULT_ALIGNMENT); return mixer->buffers.buf_count; } int fluid_rvoice_mixer_get_fx_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** fx_left, fluid_real_t*** fx_right) + fluid_real_t** fx_left, fluid_real_t** fx_right) { - *fx_left = mixer->buffers.fx_left_buf; - *fx_right = mixer->buffers.fx_right_buf; + *fx_left = fluid_align_ptr(mixer->buffers.fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + *fx_right = fluid_align_ptr(mixer->buffers.fx_right_buf, FLUID_DEFAULT_ALIGNMENT); return mixer->buffers.fx_buf_count; } @@ -790,6 +796,8 @@ fluid_mixer_thread_func (void* data) int hasValidData = 0; FLUID_DECLARE_VLA(fluid_real_t*, bufs, buffers->buf_count*2 + buffers->fx_buf_count*2); int bufcount = 0; + int current_blockcount = buffers->mixer->current_blockcount; + fluid_real_t* local_buf = fluid_align_ptr(buffers->local_buf, FLUID_DEFAULT_ALIGNMENT); while (!fluid_atomic_int_get(&mixer->threads_should_terminate)) { fluid_rvoice_t* rvoice = fluid_mixer_get_mt_rvoice(mixer); @@ -814,12 +822,12 @@ fluid_mixer_thread_func (void* data) else { // else: if buffer is not zeroed, zero buffers if (!hasValidData) { - fluid_mixer_buffers_zero(buffers); + fluid_mixer_buffers_zero(buffers, current_blockcount); bufcount = fluid_mixer_buffers_prepare(buffers, bufs); hasValidData = 1; } // then render voice to buffers - fluid_mixer_buffers_render_one(buffers, rvoice, bufs, bufcount); + fluid_mixer_buffers_render_one(buffers, rvoice, bufs, bufcount, local_buf, current_blockcount); } } @@ -827,29 +835,67 @@ fluid_mixer_thread_func (void* data) } static void -fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src) +fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dst, fluid_mixer_buffers_t* src) { int i,j; - int scount = dest->mixer->current_blockcount * FLUID_BUFSIZE; + int scount = dst->mixer->current_blockcount * FLUID_BUFSIZE; int minbuf; + fluid_real_t *FLUID_RESTRICT base_src; + fluid_real_t *FLUID_RESTRICT base_dst; - minbuf = dest->buf_count; + minbuf = dst->buf_count; if (minbuf > src->buf_count) minbuf = src->buf_count; - for (i=0; i < minbuf; i++) { - for (j=0; j < scount; j++) { - dest->left_buf[i][j] += src->left_buf[i][j]; - dest->right_buf[i][j] += src->right_buf[i][j]; + + base_src = fluid_align_ptr(src->left_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->left_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; + } + } + + base_src = fluid_align_ptr(src->right_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->right_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; } } - minbuf = dest->fx_buf_count; + minbuf = dst->fx_buf_count; if (minbuf > src->fx_buf_count) minbuf = src->fx_buf_count; - for (i=0; i < minbuf; i++) { - for (j=0; j < scount; j++) { - dest->fx_left_buf[i][j] += src->fx_left_buf[i][j]; - dest->fx_right_buf[i][j] += src->fx_right_buf[i][j]; + + base_src = fluid_align_ptr(src->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->fx_left_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; + } + } + + base_src = fluid_align_ptr(src->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + base_dst = fluid_align_ptr(dst->fx_right_buf, FLUID_DEFAULT_ALIGNMENT); + for (i=0; i < minbuf; i++) + { + #pragma omp simd aligned(base_dst,base_src:FLUID_DEFAULT_ALIGNMENT) + for (j=0; j < scount; j++) + { + int dsp_i = i * FLUID_MIXER_MAX_BUFFERS_DEFAULT * FLUID_BUFSIZE + j; + base_dst[dsp_i] += base_src[dsp_i]; } } } @@ -886,7 +932,9 @@ static void fluid_render_loop_multithread(fluid_rvoice_mixer_t* mixer) { int i, bufcount; - //int scount = mixer->current_blockcount * FLUID_BUFSIZE; + int current_blockcount = mixer->current_blockcount; + fluid_real_t* local_buf = fluid_align_ptr(mixer->buffers.local_buf, FLUID_DEFAULT_ALIGNMENT); + FLUID_DECLARE_VLA(fluid_real_t*, bufs, mixer->buffers.buf_count * 2 + mixer->buffers.fx_buf_count * 2); // How many threads should we start this time? @@ -895,7 +943,7 @@ fluid_render_loop_multithread(fluid_rvoice_mixer_t* mixer) extra_threads = mixer->thread_count; if (extra_threads == 0) { // No extra threads? No thread overhead! - fluid_render_loop_singlethread(mixer); + fluid_render_loop_singlethread(mixer, current_blockcount); return; } @@ -916,9 +964,9 @@ fluid_render_loop_multithread(fluid_rvoice_mixer_t* mixer) fluid_rvoice_t* rvoice = fluid_mixer_get_mt_rvoice(mixer); if (rvoice != NULL) { fluid_profile_ref_var(prof_ref); - fluid_mixer_buffers_render_one(&mixer->buffers, rvoice, bufs, bufcount); + fluid_mixer_buffers_render_one(&mixer->buffers, rvoice, bufs, bufcount, local_buf, current_blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_VOICE, prof_ref,1, - mixer->current_blockcount * FLUID_BUFSIZE); + current_blockcount * FLUID_BUFSIZE); //test++; } else { @@ -1022,7 +1070,7 @@ fluid_rvoice_mixer_render(fluid_rvoice_mixer_t* mixer, int blockcount) mixer->current_blockcount = blockcount; // Zero buffers - fluid_mixer_buffers_zero(&mixer->buffers); + fluid_mixer_buffers_zero(&mixer->buffers, blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_CLEAR, prof_ref, mixer->active_voices, mixer->current_blockcount * FLUID_BUFSIZE); @@ -1031,7 +1079,7 @@ fluid_rvoice_mixer_render(fluid_rvoice_mixer_t* mixer, int blockcount) fluid_render_loop_multithread(mixer); else #endif - fluid_render_loop_singlethread(mixer); + fluid_render_loop_singlethread(mixer, blockcount); fluid_profile(FLUID_PROF_ONE_BLOCK_VOICES, prof_ref, mixer->active_voices, mixer->current_blockcount * FLUID_BUFSIZE); diff --git a/src/rvoice/fluid_rvoice_mixer.h b/src/rvoice/fluid_rvoice_mixer.h index 6bff3de7..f05697a2 100644 --- a/src/rvoice/fluid_rvoice_mixer.h +++ b/src/rvoice/fluid_rvoice_mixer.h @@ -33,9 +33,9 @@ typedef struct _fluid_rvoice_mixer_t fluid_rvoice_mixer_t; int fluid_rvoice_mixer_render(fluid_rvoice_mixer_t* mixer, int blockcount); int fluid_rvoice_mixer_get_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** left, fluid_real_t*** right); + fluid_real_t** left, fluid_real_t** right); int fluid_rvoice_mixer_get_fx_bufs(fluid_rvoice_mixer_t* mixer, - fluid_real_t*** fx_left, fluid_real_t*** fx_right); + fluid_real_t** fx_left, fluid_real_t** fx_right); int fluid_rvoice_mixer_get_bufcount(fluid_rvoice_mixer_t* mixer); #if WITH_PROFILING int fluid_rvoice_mixer_get_active_voices(fluid_rvoice_mixer_t* mixer); diff --git a/src/synth/fluid_synth.c b/src/synth/fluid_synth.c index 57d16254..74602515 100644 --- a/src/synth/fluid_synth.c +++ b/src/synth/fluid_synth.c @@ -2887,8 +2887,8 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, float** left, float** right, float** fx_left, float** fx_right) { - fluid_real_t** left_in, **fx_left_in; - fluid_real_t** right_in, **fx_right_in; + fluid_real_t* left_in, *fx_left_in; + fluid_real_t* right_in, *fx_right_in; double time = fluid_utime(); int i, num, available, count; #ifdef WITH_FLOAT @@ -2911,13 +2911,13 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, for (i = 0; i < synth->audio_channels; i++) { #ifdef WITH_FLOAT - FLUID_MEMCPY(left[i], left_in[i] + synth->cur, bytes); - FLUID_MEMCPY(right[i], right_in[i] + synth->cur, bytes); + FLUID_MEMCPY(left[i], &left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); + FLUID_MEMCPY(right[i], &right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); #else //WITH_FLOAT int j; for (j = 0; j < num; j++) { - left[i][j] = (float) left_in[i][j + synth->cur]; - right[i][j] = (float) right_in[i][j + synth->cur]; + left[i][j] = (float) left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; + right[i][j] = (float) right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; } #endif //WITH_FLOAT } @@ -2926,20 +2926,20 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, { #ifdef WITH_FLOAT if(fx_left != NULL) - FLUID_MEMCPY(fx_left[i], fx_left_in[i] + synth->cur, bytes); + FLUID_MEMCPY(fx_left[i], &fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); if(fx_right != NULL) - FLUID_MEMCPY(fx_right[i], fx_right_in[i] + synth->cur, bytes); + FLUID_MEMCPY(fx_right[i], &fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT] + synth->cur, bytes); #else //WITH_FLOAT int j; if(fx_left != NULL) { for (j = 0; j < num; j++) - fx_left[i][j] = (float) fx_left_in[i][j + synth->cur]; + fx_left[i][j] = (float) fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; } if(fx_right != NULL) { for (j = 0; j < num; j++) - fx_right[i][j] = (float) fx_right_in[i][j + synth->cur]; + fx_right[i][j] = (float) fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j + synth->cur]; } #endif //WITH_FLOAT } @@ -2962,13 +2962,13 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, for (i = 0; i < synth->audio_channels; i++) { #ifdef WITH_FLOAT - FLUID_MEMCPY(left[i] + count, left_in[i], bytes); - FLUID_MEMCPY(right[i] + count, right_in[i], bytes); + FLUID_MEMCPY(left[i] + count, &left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); + FLUID_MEMCPY(right[i] + count, &right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); #else //WITH_FLOAT int j; for (j = 0; j < num; j++) { - left[i][j + count] = (float) left_in[i][j]; - right[i][j + count] = (float) right_in[i][j]; + left[i][j + count] = (float) left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; + right[i][j + count] = (float) right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; } #endif //WITH_FLOAT } @@ -2977,20 +2977,20 @@ fluid_synth_nwrite_float(fluid_synth_t* synth, int len, { #ifdef WITH_FLOAT if(fx_left != NULL) - FLUID_MEMCPY(fx_left[i] + count, fx_left_in[i], bytes); + FLUID_MEMCPY(fx_left[i] + count, &fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); if(fx_right != NULL) - FLUID_MEMCPY(fx_right[i] + count, fx_right_in[i], bytes); + FLUID_MEMCPY(fx_right[i] + count, &fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT], bytes); #else //WITH_FLOAT int j; if(fx_left != NULL) { for (j = 0; j < num; j++) - fx_left[i][j + count] = (float) fx_left_in[i][j]; + fx_left[i][j + count] = (float) fx_left_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; } if(fx_right != NULL) { for (j = 0; j < num; j++) - fx_right[i][j + count] = (float) fx_right_in[i][j]; + fx_right[i][j + count] = (float) fx_right_in[i * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + j]; } #endif //WITH_FLOAT } @@ -3118,8 +3118,8 @@ fluid_synth_write_float(fluid_synth_t* synth, int len, int i, j, k, l; float* left_out = (float*) lout; float* right_out = (float*) rout; - fluid_real_t** left_in; - fluid_real_t** right_in; + fluid_real_t* left_in; + fluid_real_t* right_in; double time = fluid_utime(); float cpu_load; @@ -3139,8 +3139,8 @@ fluid_synth_write_float(fluid_synth_t* synth, int len, l = 0; } - left_out[j] = (float) left_in[0][l]; - right_out[k] = (float) right_in[0][l]; + left_out[j] = (float) left_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + l]; + right_out[k] = (float) right_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + l]; } synth->cur = l; @@ -3216,8 +3216,8 @@ fluid_synth_write_s16(fluid_synth_t* synth, int len, int i, j, k, cur; signed short* left_out = (signed short*) lout; signed short* right_out = (signed short*) rout; - fluid_real_t** left_in; - fluid_real_t** right_in; + fluid_real_t* left_in; + fluid_real_t* right_in; fluid_real_t left_sample; fluid_real_t right_sample; double time = fluid_utime(); @@ -3242,8 +3242,8 @@ fluid_synth_write_s16(fluid_synth_t* synth, int len, cur = 0; } - left_sample = roundi (left_in[0][cur] * 32766.0f + rand_table[0][di]); - right_sample = roundi (right_in[0][cur] * 32766.0f + rand_table[1][di]); + left_sample = roundi (left_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + cur] * 32766.0f + rand_table[0][di]); + right_sample = roundi (right_in[0 * FLUID_BUFSIZE * FLUID_MIXER_MAX_BUFFERS_DEFAULT + cur] * 32766.0f + rand_table[1][di]); di++; if (di >= DITHER_SIZE) di = 0; diff --git a/src/utils/fluid_settings.c b/src/utils/fluid_settings.c index 5d315533..0d0f17d2 100644 --- a/src/utils/fluid_settings.c +++ b/src/utils/fluid_settings.c @@ -116,7 +116,7 @@ delete_fluid_str_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT(node->type, FLUID_STR_TYPE); + FLUID_ASSERT(node->type == FLUID_STR_TYPE); FLUID_FREE(node->str.value); FLUID_FREE(node->str.def); @@ -169,7 +169,7 @@ delete_fluid_num_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT (node->type, FLUID_NUM_TYPE); + FLUID_ASSERT (node->type == FLUID_NUM_TYPE); FLUID_FREE(node); } @@ -205,7 +205,7 @@ delete_fluid_int_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT (node->type, FLUID_INT_TYPE); + FLUID_ASSERT (node->type == FLUID_INT_TYPE); FLUID_FREE(node); } @@ -243,7 +243,7 @@ delete_fluid_set_setting(fluid_setting_node_t* node) { fluid_return_if_fail(node != NULL); - FLUID_ASSERT (node->type, FLUID_SET_TYPE); + FLUID_ASSERT (node->type == FLUID_SET_TYPE); delete_fluid_hashtable(node->set.hashtable); FLUID_FREE(node); } diff --git a/src/utils/fluid_sys.h b/src/utils/fluid_sys.h index 498b961f..2bda36ae 100644 --- a/src/utils/fluid_sys.h +++ b/src/utils/fluid_sys.h @@ -602,4 +602,26 @@ void fluid_clear_fpe_i386(void); /* System control */ void fluid_msleep(unsigned int msecs); +/** + * Advances the given \c ptr to the next \c alignment byte boundary. + * Make sure you've allocated an extra of \c alignment bytes to avoid a buffer overflow. + * + * @note \c alignment must be a power of two + * @return Returned pointer is guarenteed to be aligned to \c alignment boundary and in range \f[ ptr <= returned_ptr < ptr + alignment \f]. + */ +static FLUID_INLINE void* fluid_align_ptr(const void* ptr, unsigned int alignment) +{ + uintptr_t ptr_int = (uintptr_t)ptr; + unsigned int offset = ptr_int & (alignment-1); + unsigned int add = (alignment - offset) & (alignment-1); // advance the pointer to the next alignment boundary + ptr_int += add; + + /* assert alignment is power of two */ + FLUID_ASSERT(!(alignment == 0) && !(alignment & (alignment - 1))); + + return (void*)ptr_int; +} + +#define FLUID_DEFAULT_ALIGNMENT (64U) + #endif /* _FLUID_SYS_H */ diff --git a/src/utils/fluidsynth_priv.h b/src/utils/fluidsynth_priv.h index d8c73c10..7addb1f8 100644 --- a/src/utils/fluidsynth_priv.h +++ b/src/utils/fluidsynth_priv.h @@ -102,6 +102,10 @@ #include #endif +#if HAVE_OPENMP +#include +#endif + #if HAVE_IO_H #include #endif @@ -245,7 +249,8 @@ typedef FILE* fluid_file; #define FLUID_MALLOC(_n) malloc(_n) #define FLUID_REALLOC(_p,_n) realloc(_p,_n) #define FLUID_NEW(_t) (_t*)malloc(sizeof(_t)) -#define FLUID_ARRAY(_t,_n) (_t*)malloc((_n)*sizeof(_t)) +#define FLUID_ARRAY_ALIGNED(_t,_n,_a) (_t*)malloc((_n)*sizeof(_t) + ((unsigned int)_a - 1u)) +#define FLUID_ARRAY(_t,_n) FLUID_ARRAY_ALIGNED(_t,_n,1u) #define FLUID_FREE(_p) free(_p) #define FLUID_FOPEN(_f,_m) fopen(_f,_m) #define FLUID_FCLOSE(_f) fclose(_f) @@ -330,8 +335,11 @@ do { strncpy(_dst,_src,_n); \ #define M_LN10 2.3025850929940456840179914546844 #endif -#define FLUID_ASSERT(a,b) -#define FLUID_ASSERT_P(a,b) +#ifdef NDEBUG +#define FLUID_ASSERT(a) +#else +#define FLUID_ASSERT(a) g_assert(a) +#endif #define FLUID_LIKELY G_LIKELY #define FLUID_UNLIKELY G_UNLIKELY diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8e9d4c2a..77cf4a37 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,6 +13,7 @@ ADD_FLUID_TEST(test_sample_cache) ADD_FLUID_TEST(test_sfont_loading) ADD_FLUID_TEST(test_sample_rate_change) ADD_FLUID_TEST(test_preset_sample_loading) +ADD_FLUID_TEST(test_pointer_alignment) ADD_FLUID_TEST(test_seqbind_unregister) ADD_FLUID_TEST(test_snprintf) diff --git a/test/test_pointer_alignment.c b/test/test_pointer_alignment.c new file mode 100644 index 00000000..a7447912 --- /dev/null +++ b/test/test_pointer_alignment.c @@ -0,0 +1,31 @@ + +#include "test.h" +#include "utils/fluid_sys.h" + + +// test for fluid_align_ptr() +int main(void) +{ + unsigned int align; + uintptr_t ptr, aligned_ptr; + + for(align = 32; align <= 4*1024u; align <<= 1) + { + for(ptr = 0; ptr <= (align<<10); ptr++) + { + char* tmp = fluid_align_ptr((char*)ptr, align); + aligned_ptr = (uintptr_t)tmp; + + // pointer must be aligned properly + TEST_ASSERT(aligned_ptr % align == 0); + + // aligned pointer must not be smaller than ptr + TEST_ASSERT(aligned_ptr >= ptr); + + // aligned pointer must not be bigger than alignment + TEST_ASSERT(aligned_ptr < ptr + align); + } + } + + return EXIT_SUCCESS; +}