I've split up the dsp core file in three

files: fluid_dsp_simple.c, fluid_dsp_float.c, and
fluid_dsp_sse.c. This improves the readability.
This commit is contained in:
Peter Hanappe 2004-03-30 10:07:32 +00:00
parent dd092ac0ad
commit 654ec72119
4 changed files with 794 additions and 0 deletions

View file

@ -1,3 +1,9 @@
2004-03-30 Peter Hanappe <peter@hanappe.com>
* src/fluid_dsp_core.c: I've split up the dsp core file in three
files: fluid_dsp_simple.c, fluid_dsp_float.c, and
fluid_dsp_sse.c. This improves the readability.
2004-03-29 Peter Hanappe <peter@hanappe.com> 2004-03-29 Peter Hanappe <peter@hanappe.com>
* src/fluid_jack.c (new_fluid_jack_audio_driver2): Testing the * src/fluid_jack.c (new_fluid_jack_audio_driver2): Testing the

View file

@ -0,0 +1,281 @@
/* FluidSynth - A Software Synthesizer
*
* Copyright (C) 2003 Peter Hanappe and others.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License
* as published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307, USA
*/
/* Purpose:
* Low-level voice processing:
*
* - interpolates (obtains values between the samples of the original waveform data)
* - filters (applies a lowpass filter with variable cutoff frequency and quality factor)
* - mixes the processed sample to left and right output using the pan setting
* - sends the processed sample to chorus and reverb
*
*
* This file does -not- generate an object file.
* Instead, it is #included in several places in fluid_voice.c.
* The motivation for this is
* - Calling it as a subroutine may be time consuming, especially with optimization off
* - The previous implementation as a macro was clumsy to handle
*
*
* Fluid_voice.c sets a couple of variables before #including this:
* - dsp_data: Pointer to the original waveform data
* - dsp_left_buf: The generated signal goes here, left channel
* - dsp_right_buf: right channel
* - dsp_reverb_buf: Send to reverb unit
* - dsp_chorus_buf: Send to chorus unit
* - dsp_start: Start processing at this output buffer index
* - dsp_end: End processing just before this output buffer index
* - dsp_a1: Coefficient for the filter
* - dsp_a2: same
* - dsp_b0: same
* - dsp_b1: same
* - dsp_b2: same
* - dsp_filter_flag: Set, the filter is needed (many sound fonts don't use
* the filter at all. If it is left at its default setting
* of roughly 20 kHz, there is no need to apply filterling.)
* - dsp_interp_method: Which interpolation method to use.
* - voice holds the voice structure
*
* Some variables are set and modified:
* - dsp_phase: The position in the original waveform data.
* This has an integer and a fractional part (between samples).
* - dsp_phase_incr: For each output sample, the position in the original
* waveform advances by dsp_phase_incr. This also has an integer
* part and a fractional part.
* If a sample is played at root pitch (no pitch change),
* dsp_phase_incr is integer=1 and fractional=0.
* - dsp_amp: The current amplitude envelope value.
* - dsp_amp_incr: The changing rate of the amplitude envelope.
*
* A couple of variables are used internally, their results are discarded:
* - dsp_i: Index through the output buffer
* - dsp_phase_fractional: The fractional part of dsp_phase
* - dsp_coeff: A table of four coefficients, depending on the fractional phase.
* Used to interpolate between samples.
* - dsp_process_buffer: Holds the processed signal between stages
* - dsp_centernode: delay line for the IIR filter
* - dsp_hist1: same
* - dsp_hist2: same
*
*/
/* Purpose:
* zap_almost_zero will return a number, as long as its
* absolute value is over a certain threshold. Otherwise 0. See
* fluid_rev.c for documentation (denormal numbers)
*/
# if defined(WITH_FLOAT)
# define zap_almost_zero(_sample) \
((((*(unsigned int*)&(_sample))&0x7f800000) < 0x08000000)? 0.0f : (_sample))
# else
/* 1e-20 was chosen as an arbitrary (small) threshold. */
#define zap_almost_zero(_sample) ((abs(_sample) < 1e-20)? 0.0f : (_sample))
#endif
/* Interpolation (find a value between two samples of the original waveform) */
if ((fluid_phase_fract(dsp_phase) == 0)
&& (fluid_phase_fract(dsp_phase_incr) == 0)
&& (fluid_phase_index(dsp_phase_incr) == 1)) {
/* Check for a special case: The current phase falls directly on an
* original sample. Also, the stepsize per output sample is exactly
* one sample, no fractional part. In other words: The sample is
* played back at normal phase and root pitch. => No interpolation
* needed.
*/
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
/* Mix to the buffer and advance the phase by one sample */
dsp_buf[dsp_i] = dsp_amp * dsp_data[fluid_phase_index_plusplus(dsp_phase)];
dsp_amp += dsp_amp_incr;
}
} else {
/* wave table interpolation: Choose the interpolation method */
switch(dsp_interp_method){
case FLUID_INTERP_NONE:
/* No interpolation. Just take the sample, which is closest to
* the playback pointer. Questionable quality, but very
* efficient. */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_phase_index = fluid_phase_index(dsp_phase);
dsp_buf[dsp_i] = dsp_amp * dsp_data[dsp_phase_index];
/* increment phase and amplitude */
fluid_phase_incr(dsp_phase, dsp_phase_incr);
dsp_amp += dsp_amp_incr;
};
break;
case FLUID_INTERP_LINEAR:
/* Straight line interpolation. */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_coeff = &interp_coeff_linear[fluid_phase_fract_to_tablerow(dsp_phase)];
dsp_phase_index = fluid_phase_index(dsp_phase);
dsp_buf[dsp_i] = (dsp_amp *
(dsp_coeff->a0 * dsp_data[dsp_phase_index]
+ dsp_coeff->a1 * dsp_data[dsp_phase_index+1]));
/* increment phase and amplitude */
fluid_phase_incr(dsp_phase, dsp_phase_incr);
dsp_amp += dsp_amp_incr;
};
break;
case FLUID_INTERP_4THORDER:
default:
/* Default interpolation loop using floats */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_coeff = &interp_coeff[fluid_phase_fract_to_tablerow(dsp_phase)];
dsp_phase_index = fluid_phase_index(dsp_phase);
dsp_buf[dsp_i] = (dsp_amp *
(dsp_coeff->a0 * dsp_data[dsp_phase_index]
+ dsp_coeff->a1 * dsp_data[dsp_phase_index+1]
+ dsp_coeff->a2 * dsp_data[dsp_phase_index+2]
+ dsp_coeff->a3 * dsp_data[dsp_phase_index+3]));
/* increment phase and amplitude */
fluid_phase_incr(dsp_phase, dsp_phase_incr);
dsp_amp += dsp_amp_incr;
}
break;
case FLUID_INTERP_7THORDER:
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
int fract = fluid_phase_fract_to_tablerow(dsp_phase);
dsp_phase_index = fluid_phase_index(dsp_phase);
dsp_buf[dsp_i] = (dsp_amp *
(sinc_table7[0][fract] * (fluid_real_t) dsp_data[dsp_phase_index]
+ sinc_table7[1][fract] * (fluid_real_t) dsp_data[dsp_phase_index+1]
+ sinc_table7[2][fract] * (fluid_real_t) dsp_data[dsp_phase_index+2]
+ sinc_table7[3][fract] * (fluid_real_t) dsp_data[dsp_phase_index+3]
+ sinc_table7[4][fract] * (fluid_real_t) dsp_data[dsp_phase_index+4]
+ sinc_table7[5][fract] * (fluid_real_t) dsp_data[dsp_phase_index+5]
+ sinc_table7[6][fract] * (fluid_real_t) dsp_data[dsp_phase_index+6]));
/* increment phase and amplitude */
fluid_phase_incr(dsp_phase, dsp_phase_incr);
dsp_amp += dsp_amp_incr;
}
break;
} /* switch interpolation method */
} /* If interpolation is needed */
/* filter (implement the voice filter according to Soundfont standard) */
if (dsp_use_filter_flag) {
/* Check for denormal number (too close to zero) once in a
* while. This is not a big concern here - why would someone play a
* sample with an empty tail? */
dsp_hist1 = zap_almost_zero(dsp_hist1);
/* Two versions of the filter loop. One, while the filter is
* changing towards its new setting. The other, if the filter
* doesn't change.
*/
if (dsp_filter_coeff_incr_count > 0) {
/* The increment is added to each filter coefficient
filter_coeff_incr_count times. */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
/* The filter is implemented in Direct-II form. */
dsp_centernode = dsp_buf[dsp_i] - dsp_a1 * dsp_hist1 - dsp_a2 * dsp_hist2;
dsp_buf[dsp_i] = dsp_b02 * (dsp_centernode + dsp_hist2) + dsp_b1 * dsp_hist1;
dsp_hist2 = dsp_hist1;
dsp_hist1 = dsp_centernode;
if (dsp_filter_coeff_incr_count-- > 0){
dsp_a1 += dsp_a1_incr;
dsp_a2 += dsp_a2_incr;
dsp_b02 += dsp_b02_incr;
dsp_b1 += dsp_b1_incr;
}
} /* for dsp_i */
} else {
/* The filter parameters are constant. This is duplicated to save
* time. */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
/* The filter is implemented in Direct-II form. */
dsp_centernode = dsp_buf[dsp_i] - dsp_a1 * dsp_hist1 - dsp_a2 * dsp_hist2;
dsp_buf[dsp_i] = dsp_b02 * (dsp_centernode + dsp_hist2) + dsp_b1 * dsp_hist1;
dsp_hist2 = dsp_hist1;
dsp_hist1 = dsp_centernode;
}
} /* if filter is fixed */
} /* if filter is enabled */
/* pan (Copy the signal to the left and right output buffer) The voice
* panning generator has a range of -500 .. 500. If it is centered,
* it's close to 0. voice->amp_left and voice->amp_right are then the
* same, and we can save one multiplication per voice and sample.
*/
if ((-0.5 < voice->pan) && (voice->pan < 0.5)) {
/* The voice is centered. Use voice->amp_left twice. */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
float v = voice->amp_left * dsp_buf[dsp_i];
dsp_left_buf[dsp_i] += v;
dsp_right_buf[dsp_i] += v;
}
} else {
/* The voice is not centered. For stereo samples, one of the
* amplitudes will be zero. */
if (voice->amp_left != 0.0){
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_left_buf[dsp_i] += voice->amp_left * dsp_buf[dsp_i];
}
}
if (voice->amp_right != 0.0){
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_right_buf[dsp_i] += voice->amp_right * dsp_buf[dsp_i];
}
}
}
/* reverb send. Buffer may be NULL. */
if ((dsp_reverb_buf != NULL) && (voice->amp_reverb != 0.0)) {
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_reverb_buf[dsp_i] += voice->amp_reverb * dsp_buf[dsp_i];
}
}
/* chorus send. Buffer may be NULL. */
if ((dsp_chorus_buf != NULL) && (voice->amp_chorus != 0)) {
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_chorus_buf[dsp_i] += voice->amp_chorus * dsp_buf[dsp_i];
}
}

View file

@ -0,0 +1,120 @@
/* FluidSynth - A Software Synthesizer
*
* Copyright (C) 2003 Peter Hanappe and others.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License
* as published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307, USA
*/
/* Purpose:
* Low-level voice processing:
*
* - interpolates (obtains values between the samples of the original waveform data)
* - filters (applies a lowpass filter with variable cutoff frequency and quality factor)
* - mixes the processed sample to left and right output using the pan setting
* - sends the processed sample to chorus and reverb
*
*
* This file does -not- generate an object file.
* Instead, it is #included in several places in fluid_voice.c.
* The motivation for this is
* - Calling it as a subroutine may be time consuming, especially with optimization off
* - The previous implementation as a macro was clumsy to handle
*
*
* Fluid_voice.c sets a couple of variables before #including this:
* - dsp_data: Pointer to the original waveform data
* - dsp_left_buf: The generated signal goes here, left channel
* - dsp_right_buf: right channel
* - dsp_reverb_buf: Send to reverb unit
* - dsp_chorus_buf: Send to chorus unit
* - dsp_start: Start processing at this output buffer index
* - dsp_end: End processing just before this output buffer index
* - dsp_a1: Coefficient for the filter
* - dsp_a2: same
* - dsp_b0: same
* - dsp_b1: same
* - dsp_b2: same
* - dsp_filter_flag: Set, the filter is needed (many sound fonts don't use
* the filter at all. If it is left at its default setting
* of roughly 20 kHz, there is no need to apply filterling.)
* - dsp_interp_method: Which interpolation method to use.
* - voice holds the voice structure
*
* Some variables are set and modified:
* - dsp_phase: The position in the original waveform data.
* This has an integer and a fractional part (between samples).
* - dsp_phase_incr: For each output sample, the position in the original
* waveform advances by dsp_phase_incr. This also has an integer
* part and a fractional part.
* If a sample is played at root pitch (no pitch change),
* dsp_phase_incr is integer=1 and fractional=0.
* - dsp_amp: The current amplitude envelope value.
* - dsp_amp_incr: The changing rate of the amplitude envelope.
*
* A couple of variables are used internally, their results are discarded:
* - dsp_i: Index through the output buffer
* - dsp_phase_fractional: The fractional part of dsp_phase
* - dsp_coeff: A table of four coefficients, depending on the fractional phase.
* Used to interpolate between samples.
* - dsp_process_buffer: Holds the processed signal between stages
* - dsp_centernode: delay line for the IIR filter
* - dsp_hist1: same
* - dsp_hist2: same
*
*/
/* Nonoptimized DSP loop */
#warning "This code is meant for experiments only.";
/* wave table interpolation */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
dsp_coeff = &interp_coeff[fluid_phase_fract_to_tablerow(dsp_phase)];
dsp_phase_index = fluid_phase_index(dsp_phase);
dsp_sample = (dsp_amp *
(dsp_coeff->a0 * dsp_data[dsp_phase_index]
+ dsp_coeff->a1 * dsp_data[dsp_phase_index+1]
+ dsp_coeff->a2 * dsp_data[dsp_phase_index+2]
+ dsp_coeff->a3 * dsp_data[dsp_phase_index+3]));
/* increment phase and amplitude */
fluid_phase_incr(dsp_phase, dsp_phase_incr);
dsp_amp += dsp_amp_incr;
/* filter */
/* The filter is implemented in Direct-II form. */
dsp_centernode = dsp_sample - dsp_a1 * dsp_hist1 - dsp_a2 * dsp_hist2;
dsp_sample = dsp_b0 * dsp_centernode + dsp_b1 * dsp_hist1 + dsp_b2 * dsp_hist2;
dsp_hist2 = dsp_hist1;
dsp_hist1 = dsp_centernode;
/* pan */
dsp_left_buf[dsp_i] += voice->amp_left * dsp_sample;
dsp_right_buf[dsp_i] += voice->amp_right * dsp_sample;
/* reverb */
if (dsp_reverb_buf){
dsp_reverb_buf[dsp_i] += voice->amp_reverb * dsp_sample;
}
/* chorus */
if (dsp_chorus_buf){
dsp_chorus_buf[dsp_i] += voice->amp_chorus * dsp_sample;
}
}

View file

@ -0,0 +1,387 @@
/* FluidSynth - A Software Synthesizer
*
* Copyright (C) 2003 Peter Hanappe and others.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public License
* as published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
* 02111-1307, USA
*/
/* Purpose:
* Low-level voice processing:
*
* - interpolates (obtains values between the samples of the original waveform data)
* - filters (applies a lowpass filter with variable cutoff frequency and quality factor)
* - mixes the processed sample to left and right output using the pan setting
* - sends the processed sample to chorus and reverb
*
*
* This file does -not- generate an object file.
* Instead, it is #included in several places in fluid_voice.c.
* The motivation for this is
* - Calling it as a subroutine may be time consuming, especially with optimization off
* - The previous implementation as a macro was clumsy to handle
*
*
* Fluid_voice.c sets a couple of variables before #including this:
* - dsp_data: Pointer to the original waveform data
* - dsp_left_buf: The generated signal goes here, left channel
* - dsp_right_buf: right channel
* - dsp_reverb_buf: Send to reverb unit
* - dsp_chorus_buf: Send to chorus unit
* - dsp_start: Start processing at this output buffer index
* - dsp_end: End processing just before this output buffer index
* - dsp_a1: Coefficient for the filter
* - dsp_a2: same
* - dsp_b0: same
* - dsp_b1: same
* - dsp_b2: same
* - dsp_filter_flag: Set, the filter is needed (many sound fonts don't use
* the filter at all. If it is left at its default setting
* of roughly 20 kHz, there is no need to apply filterling.)
* - dsp_interp_method: Which interpolation method to use.
* - voice holds the voice structure
*
* Some variables are set and modified:
* - dsp_phase: The position in the original waveform data.
* This has an integer and a fractional part (between samples).
* - dsp_phase_incr: For each output sample, the position in the original
* waveform advances by dsp_phase_incr. This also has an integer
* part and a fractional part.
* If a sample is played at root pitch (no pitch change),
* dsp_phase_incr is integer=1 and fractional=0.
* - dsp_amp: The current amplitude envelope value.
* - dsp_amp_incr: The changing rate of the amplitude envelope.
*
* A couple of variables are used internally, their results are discarded:
* - dsp_i: Index through the output buffer
* - dsp_phase_fractional: The fractional part of dsp_phase
* - dsp_coeff: A table of four coefficients, depending on the fractional phase.
* Used to interpolate between samples.
* - dsp_process_buffer: Holds the processed signal between stages
* - dsp_centernode: delay line for the IIR filter
* - dsp_hist1: same
* - dsp_hist2: same
*
*/
/* Purpose:
* zap_almost_zero will return a number, as long as its
* absolute value is over a certain threshold. Otherwise 0. See
* fluid_rev.c for documentation (denormal numbers)
*/
# if defined(WITH_FLOAT)
# define zap_almost_zero(_sample) \
((((*(unsigned int*)&(_sample))&0x7f800000) < 0x08000000)? 0.0f : (_sample))
# else
/* 1e-20 was chosen as an arbitrary (small) threshold. */
#define zap_almost_zero(_sample) ((abs(_sample) < 1e-20)? 0.0f : (_sample))
#endif
/* Interpolation (find a value between two samples of the original waveform) */
if ((fluid_phase_fract(dsp_phase) == 0)
&& (fluid_phase_fract(dsp_phase_incr) == 0)
&& (fluid_phase_index(dsp_phase_incr) == 1)) {
/* Check for a special case: The current phase falls directly on an
* original sample. Also, the stepsize per output sample is exactly
* one sample, no fractional part. In other words: The sample is
* played back at normal phase and root pitch. => No interpolation
* needed.
*/
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
/* Mix to the buffer and advance the phase by one sample */
dsp_buf[dsp_i] = dsp_amp * dsp_data[fluid_phase_index_plusplus(dsp_phase)];
dsp_amp += dsp_amp_incr;
}
} else {
/* wave table interpolation: Choose the interpolation method */
/* !!! SSE interpolation is less efficient that normal interpolation. */
/* Initialize amplitude increase */
sse_b->sf[0] = sse_b->sf[1] = sse_b->sf[2] = sse_b->sf[3] = 4.*dsp_amp_incr;
/* Initialize amplitude => xmm7
* The amplitude is kept in xmm7 throughout the whole process
*/
sse_a->sf[0]=sse_a->sf[1]=sse_a->sf[2]=sse_a->sf[3]=dsp_amp;
sse_a->sf[1] += dsp_amp_incr;
sse_a->sf[2] += 2.* dsp_amp_incr;
sse_a->sf[3] += 3.* dsp_amp_incr;
movaps_m2r(*sse_a,xmm7);
/* Where to store the result */
sse_dest=(sse_t*)&dsp_buf[0];
for (dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i += 4) {
/* Note / fixme: The coefficients are first copied to
* sse_c, then to the xmm register. Can't get it through
* the compiler differently... */
/* Load the four source samples for the 1st output sample */
dsp_phase_index = fluid_phase_index(dsp_phase);
sse_a->sf[0]=(fluid_real_t)dsp_data[dsp_phase_index];
sse_a->sf[1]=(fluid_real_t)dsp_data[dsp_phase_index+1];
sse_a->sf[2]=(fluid_real_t)dsp_data[dsp_phase_index+2];
sse_a->sf[3]=(fluid_real_t)dsp_data[dsp_phase_index+3];
movaps_m2r(*sse_a,xmm0);
*sse_c=interp_coeff_sse[fluid_phase_fract_to_tablerow(dsp_phase)];
mulps_m2r(*sse_c,xmm0);
fluid_phase_incr(dsp_phase, dsp_phase_incr);
/* Load the four source samples for the 2nd output sample */
dsp_phase_index = fluid_phase_index(dsp_phase);
sse_a->sf[0]=(fluid_real_t)dsp_data[dsp_phase_index];
sse_a->sf[1]=(fluid_real_t)dsp_data[dsp_phase_index+1];
sse_a->sf[2]=(fluid_real_t)dsp_data[dsp_phase_index+2];
sse_a->sf[3]=(fluid_real_t)dsp_data[dsp_phase_index+3];
movaps_m2r(*sse_a,xmm1);
*sse_c=interp_coeff_sse[fluid_phase_fract_to_tablerow(dsp_phase)];
mulps_m2r(*sse_c,xmm1);
fluid_phase_incr(dsp_phase, dsp_phase_incr);
/* Load the four source samples for the 3rd output sample */
dsp_phase_index = fluid_phase_index(dsp_phase);
sse_a->sf[0]=(fluid_real_t)dsp_data[dsp_phase_index];
sse_a->sf[1]=(fluid_real_t)dsp_data[dsp_phase_index+1];
sse_a->sf[2]=(fluid_real_t)dsp_data[dsp_phase_index+2];
sse_a->sf[3]=(fluid_real_t)dsp_data[dsp_phase_index+3];
movaps_m2r(*sse_a,xmm2);
*sse_c=interp_coeff_sse[fluid_phase_fract_to_tablerow(dsp_phase)];
mulps_m2r(*sse_c,xmm2);
fluid_phase_incr(dsp_phase, dsp_phase_incr);
/* Load the four source samples for the 4th output sample */
dsp_phase_index = fluid_phase_index(dsp_phase);
sse_a->sf[0]=(fluid_real_t)dsp_data[dsp_phase_index];
sse_a->sf[1]=(fluid_real_t)dsp_data[dsp_phase_index+1];
sse_a->sf[2]=(fluid_real_t)dsp_data[dsp_phase_index+2];
sse_a->sf[3]=(fluid_real_t)dsp_data[dsp_phase_index+3];
movaps_m2r(*sse_a,xmm3);
*sse_c=interp_coeff_sse[fluid_phase_fract_to_tablerow(dsp_phase)];
mulps_m2r(*sse_c,xmm3);
fluid_phase_incr(dsp_phase, dsp_phase_incr);
#if 0
/*Testcase for horizontal add */
sse_a->sf[0]=0.1;sse_a->sf[1]=0.01;sse_a->sf[2]=0.001;sse_a->sf[3]=0.0001;
movaps_m2r(*sse_a,xmm0);
sse_a->sf[0]=0.2;sse_a->sf[1]=0.02;sse_a->sf[2]=0.002;sse_a->sf[3]=0.0002;
movaps_m2r(*sse_a,xmm1);
sse_a->sf[0]=0.3;sse_a->sf[1]=0.03;sse_a->sf[2]=0.003;sse_a->sf[3]=0.0003;
movaps_m2r(*sse_a,xmm2);
sse_a->sf[0]=0.4;sse_a->sf[1]=0.04;sse_a->sf[2]=0.004;sse_a->sf[3]=0.0004;
movaps_m2r(*sse_a,xmm3);
#endif /* #if 0 */
#if 1
/* Horizontal add
* xmm4[0]:=xmm0[0]+xmm1[0]+xmm2[0]+xmm3[0]
* xmm4[1]:=xmm0[1]+xmm1[1]+xmm2[1]+xmm3[1]
* etc.
* The only register, which is unused, is xmm7.
*/
movaps_r2r(xmm0,xmm5);
movaps_r2r(xmm2,xmm6);
movlhps_r2r(xmm1,xmm5);
movlhps_r2r(xmm3,xmm6);
movhlps_r2r(xmm0,xmm1);
movhlps_r2r(xmm2,xmm3);
addps_r2r(xmm1,xmm5);
addps_r2r(xmm3,xmm6);
movaps_r2r(xmm5,xmm4);
shufps_r2r(xmm6,xmm5,0xDD);
shufps_r2r(xmm6,xmm4,0x88);
addps_r2r(xmm5,xmm4);
/* movaps_r2m(xmm4,*sse_a); */
/* printf("xmm4 (Result): %f %f %f %f\n", */
/* sse_a->sf[0], sse_a->sf[1], */
/* sse_a->sf[2], sse_a->sf[3]); */
#else
/* Add using normal FPU */
movaps_r2m(xmm0,*sse_a);
sse_c->sf[0]=sse_a->sf[0]+sse_a->sf[1]+sse_a->sf[2]+sse_a->sf[3];
movaps_r2m(xmm1,*sse_a);
sse_c->sf[1]=sse_a->sf[0]+sse_a->sf[1]+sse_a->sf[2]+sse_a->sf[3];
movaps_r2m(xmm2,*sse_a);
sse_c->sf[2]=sse_a->sf[0]+sse_a->sf[1]+sse_a->sf[2]+sse_a->sf[3];
movaps_r2m(xmm3,*sse_a);
sse_c->sf[3]=sse_a->sf[0]+sse_a->sf[1]+sse_a->sf[2]+sse_a->sf[3];
movaps_m2r(*sse_c,xmm4);
#endif /* #if 1 */
/* end horizontal add. Result in xmm6. */
/* Multiply xmm4 with amplitude */
mulps_r2r(xmm7,xmm4);
/* Store the result */
movaps_r2m(xmm4,*sse_dest); // ++
/* Advance the position in the output buffer */
sse_dest++;
/* Change the amplitude */
addps_m2r(*sse_b,xmm7);
} /* for dsp_i in steps of four */
movaps_r2m(xmm7,*sse_a);
/* Retrieve the last amplitude value. */
dsp_amp=sse_a->sf[3];
} /* If interpolation is needed */
/* filter (implement the voice filter according to Soundfont standard) */
if (dsp_use_filter_flag) {
/* Check for denormal number (too close to zero) once in a
* while. This is not a big concern here - why would someone play a
* sample with an empty tail? */
dsp_hist1 = zap_almost_zero(dsp_hist1);
/* Two versions of the filter loop. One, while the filter is
* changing towards its new setting. The other, if the filter
* doesn't change.
*/
if (dsp_filter_coeff_incr_count > 0) {
/* The increment is added to each filter coefficient
filter_coeff_incr_count times. */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
/* The filter is implemented in Direct-II form. */
dsp_centernode = dsp_buf[dsp_i] - dsp_a1 * dsp_hist1 - dsp_a2 * dsp_hist2;
dsp_buf[dsp_i] = dsp_b02 * (dsp_centernode + dsp_hist2) + dsp_b1 * dsp_hist1;
dsp_hist2 = dsp_hist1;
dsp_hist1 = dsp_centernode;
if (dsp_filter_coeff_incr_count-- > 0){
dsp_a1 += dsp_a1_incr;
dsp_a2 += dsp_a2_incr;
dsp_b02 += dsp_b02_incr;
dsp_b1 += dsp_b1_incr;
}
} /* for dsp_i */
} else {
/* The filter parameters are constant. This is duplicated to save
* time. */
for (dsp_i = dsp_start; dsp_i < dsp_end; dsp_i++) {
/* The filter is implemented in Direct-II form. */
dsp_centernode = dsp_buf[dsp_i] - dsp_a1 * dsp_hist1 - dsp_a2 * dsp_hist2;
dsp_buf[dsp_i] = dsp_b02 * (dsp_centernode + dsp_hist2) + dsp_b1 * dsp_hist1;
dsp_hist2 = dsp_hist1;
dsp_hist1 = dsp_centernode;
}
} /* if filter is fixed */
} /* if filter is enabled */
/* The following optimization will process a whole buffer using the
* SSE extension of the Pentium processor.
*/
if (voice->amp_left != 0.0) {
sse_a->sf[0]=voice->amp_left;
sse_a->sf[1]=voice->amp_left;
sse_a->sf[2]=voice->amp_left;
sse_a->sf[3]=voice->amp_left;
movaps_m2r(*sse_a,xmm0);
sse_src=(sse_t*)dsp_buf;
sse_dest=(sse_t*)&dsp_left_buf[0];
for (dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i+=4) {
movaps_m2r(*sse_src,xmm4); /* Load original sample */
mulps_r2r(xmm0,xmm4); /* Gain */
sse_src++;
addps_m2r(*sse_dest,xmm4); /* Mix with buf */
movaps_r2m(xmm4,*sse_dest); /* Store in buf */
sse_dest++;
}
}
if (voice->amp_right != 0.0){
sse_a->sf[0]=voice->amp_right;
sse_a->sf[1]=voice->amp_right;
sse_a->sf[2]=voice->amp_right;
sse_a->sf[3]=voice->amp_right;
movaps_m2r(*sse_a,xmm0);
sse_src=(sse_t*)dsp_buf;
sse_dest=(sse_t*)&dsp_right_buf[0];
for (dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i+=4) {
movaps_m2r(*sse_src,xmm4); /* Load original sample */
sse_src++;
mulps_r2r(xmm0,xmm4); /* Gain */
addps_m2r(*sse_dest,xmm4); /* Mix with buf */
movaps_r2m(xmm4,*sse_dest); /* Store in buf */
sse_dest++;
}
}
/* reverb send. Buffer may be NULL. */
if (dsp_reverb_buf && voice->amp_reverb != 0.0){
sse_a->sf[0]=voice->amp_reverb;
sse_a->sf[1]=voice->amp_reverb;
sse_a->sf[2]=voice->amp_reverb;
sse_a->sf[3]=voice->amp_reverb;
movaps_m2r(*sse_a,xmm0);
sse_src=(sse_t*)dsp_buf;
sse_dest=(sse_t*)&dsp_reverb_buf[0];
for (dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i+=4) {
movaps_m2r(*sse_src,xmm4); /* Load original sample */
sse_src++;
mulps_r2r(xmm0,xmm4); /* Gain */
addps_m2r(*sse_dest,xmm4); /* Mix with buf */
movaps_r2m(xmm4,*sse_dest); /* Store in buf */
sse_dest++;
}
}
/* chorus send. Buffer may be NULL. */
if (dsp_chorus_buf && voice->amp_chorus != 0){
sse_a->sf[0]=voice->amp_chorus;
sse_a->sf[1]=voice->amp_chorus;
sse_a->sf[2]=voice->amp_chorus;
sse_a->sf[3]=voice->amp_chorus;
movaps_m2r(*sse_a,xmm0);
sse_src=(sse_t*)dsp_buf;
sse_dest=(sse_t*)&dsp_chorus_buf[0];
for (dsp_i = 0; dsp_i < FLUID_BUFSIZE; dsp_i+=4) {
movaps_m2r(*sse_src,xmm4); /* Load original sample */
sse_src++;
mulps_r2r(xmm0,xmm4); /* Gain */
addps_m2r(*sse_dest,xmm4); /* Mix with buf */
movaps_r2m(xmm4,*sse_dest); /* Store in buf */
sse_dest++;
}
}