From 8adaaa7079e7d4f37e00615b4024957daaca71c1 Mon Sep 17 00:00:00 2001 From: derselbst Date: Thu, 4 Mar 2021 19:23:20 +0100 Subject: [PATCH] Fix regression introduced in aebc4837ddfe0c9329211d9806a9c77bc83a4e1c If a voice has finished, sample_count may be smaller than FLUID_BUFSIZE, in which case audible artifacts would occur. Addresses #786 --- src/rvoice/fluid_rvoice_mixer.c | 49 +++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c index fd2cc956..234e38d8 100644 --- a/src/rvoice/fluid_rvoice_mixer.c +++ b/src/rvoice/fluid_rvoice_mixer.c @@ -414,27 +414,42 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t *buffers, * But it seems like having two separate loops is easier for compilers to understand, and therefore * auto-vectorizing the loops. */ - #pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT) - for(dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i++) + if(sample_count < FLUID_BUFSIZE) { - // We cannot simply increment current_amp by amp_incr during every iteration, as this would create a dependency and prevent vectorization. - buf[start_block * FLUID_BUFSIZE + dsp_i] += (current_amp + amp_incr * dsp_i) * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i]; - } - - if(target_amp > 0) - { - /* Note, that this loop could be unrolled by FLUID_BUFSIZE elements */ - #pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT) - for(dsp_i = FLUID_BUFSIZE; dsp_i < sample_count; dsp_i++) + // scalar loop variant, the voice will have finished afterwards + for(dsp_i = 0; dsp_i < sample_count; dsp_i++) { - // Index by blocks (not by samples) to let the compiler know that we always start accessing - // buf and dsp_buf at the FLUID_BUFSIZE*sizeof(fluid_real_t) byte boundary and never somewhere - // in between. - // A good compiler should understand: Aha, so I don't need to add a peel loop when vectorizing - // this loop. Great. - buf[start_block * FLUID_BUFSIZE + dsp_i] += target_amp * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i]; + buf[start_block * FLUID_BUFSIZE + dsp_i] += current_amp * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i]; + current_amp += amp_incr; } } + else + { + // here goes the vectorizable loop + #pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT) + for(dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i++) + { + // We cannot simply increment current_amp by amp_incr during every iteration, as this would create a dependency and prevent vectorization. + buf[start_block * FLUID_BUFSIZE + dsp_i] += (current_amp + amp_incr * dsp_i) * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i]; + } + + // we have reached the target_amp + if(target_amp > 0) + { + /* Note, that this loop could be unrolled by FLUID_BUFSIZE elements */ + #pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT) + for(dsp_i = FLUID_BUFSIZE; dsp_i < sample_count; dsp_i++) + { + // Index by blocks (not by samples) to let the compiler know that we always start accessing + // buf and dsp_buf at the FLUID_BUFSIZE*sizeof(fluid_real_t) byte boundary and never somewhere + // in between. + // A good compiler should understand: Aha, so I don't need to add a peel loop when vectorizing + // this loop. Great. + buf[start_block * FLUID_BUFSIZE + dsp_i] += target_amp * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i]; + } + } + } + buffers->bufs[i].current_amp = target_amp; } }