mirror of
https://github.com/ZDoom/fluidsynth.git
synced 2024-11-10 06:51:54 +00:00
Simplify control flow to allow auto-vectorization by compiler
This commit is contained in:
parent
25b0503ba7
commit
aebc4837dd
1 changed files with 26 additions and 19 deletions
|
@ -405,29 +405,36 @@ fluid_rvoice_buffers_mix(fluid_rvoice_buffers_t *buffers,
|
|||
|
||||
FLUID_ASSERT((uintptr_t)buf % FLUID_DEFAULT_ALIGNMENT == 0);
|
||||
|
||||
/* mixdown sample_count samples in the current buffer buf
|
||||
Note, that this loop could be unrolled by FLUID_BUFSIZE elements */
|
||||
/* Mixdown sample_count samples in the current buffer buf
|
||||
*
|
||||
* For the first FLUID_BUFSIZE samples, we linearly interpolate the buffers amplitude to
|
||||
* avoid clicks/pops when rapidly changing the channels panning (issue 768).
|
||||
*
|
||||
* We could have squashed this into one single loop by using an if clause within the loop body.
|
||||
* But it seems like having two separate loops is easier for compilers to understand, and therefore
|
||||
* auto-vectorizing the loops.
|
||||
*/
|
||||
#pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT)
|
||||
for(dsp_i = 0; dsp_i < sample_count; dsp_i++)
|
||||
for(dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i++)
|
||||
{
|
||||
fluid_real_t samp;
|
||||
if(dsp_i < FLUID_BUFSIZE)
|
||||
{
|
||||
samp = (current_amp + amp_incr * dsp_i) * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i];
|
||||
}
|
||||
else
|
||||
{
|
||||
samp = target_amp * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i];
|
||||
}
|
||||
|
||||
// Index by blocks (not by samples) to let the compiler know that we always start accessing
|
||||
// buf and dsp_buf at the FLUID_BUFSIZE*sizeof(fluid_real_t) byte boundary and never somewhere
|
||||
// in between.
|
||||
// A good compiler should understand: Aha, so I don't need to add a peel loop when vectorizing
|
||||
// this loop. Great.
|
||||
buf[start_block * FLUID_BUFSIZE + dsp_i] += samp;
|
||||
// We cannot simply increment current_amp by amp_incr during every iteration, as this would create a dependency and prevent vectorization.
|
||||
buf[start_block * FLUID_BUFSIZE + dsp_i] += (current_amp + amp_incr * dsp_i) * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i];
|
||||
}
|
||||
|
||||
if(target_amp > 0)
|
||||
{
|
||||
/* Note, that this loop could be unrolled by FLUID_BUFSIZE elements */
|
||||
#pragma omp simd aligned(dsp_buf,buf:FLUID_DEFAULT_ALIGNMENT)
|
||||
for(dsp_i = FLUID_BUFSIZE; dsp_i < sample_count; dsp_i++)
|
||||
{
|
||||
// Index by blocks (not by samples) to let the compiler know that we always start accessing
|
||||
// buf and dsp_buf at the FLUID_BUFSIZE*sizeof(fluid_real_t) byte boundary and never somewhere
|
||||
// in between.
|
||||
// A good compiler should understand: Aha, so I don't need to add a peel loop when vectorizing
|
||||
// this loop. Great.
|
||||
buf[start_block * FLUID_BUFSIZE + dsp_i] += target_amp * dsp_buf[start_block * FLUID_BUFSIZE + dsp_i];
|
||||
}
|
||||
}
|
||||
buffers->bufs[i].current_amp = target_amp;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue