mirror of
https://github.com/ioquake/ioq3.git
synced 2024-11-10 07:11:46 +00:00
VoIP: attempt at changing voipPower calc and using it for VAD.
The Speex VAD sort of sucks, honestly, or I'm not using it right. Now trying this algorithm, after denoising: http://lists.xiph.org/pipermail/speex-dev/2006-March/004269.html And I'll play around to find the threshold for considering a set of frames to be "voice" from there. Also worth noting: we consider the power of the set of frames as a whole, so you need to sustain power for 0.25 seconds at a time, or it's not "voice."
This commit is contained in:
parent
fdfaff64f8
commit
d4ee08b985
2 changed files with 12 additions and 22 deletions
|
@ -937,10 +937,6 @@ void CL_FirstSnapshot( void ) {
|
||||||
speex_preprocess_ctl(clc.speexPreprocessor,
|
speex_preprocess_ctl(clc.speexPreprocessor,
|
||||||
SPEEX_PREPROCESS_SET_DENOISE, &i);
|
SPEEX_PREPROCESS_SET_DENOISE, &i);
|
||||||
|
|
||||||
i = (cl_voipUseVAD->integer != 0);
|
|
||||||
speex_preprocess_ctl(clc.speexPreprocessor,
|
|
||||||
SPEEX_PREPROCESS_SET_VAD, &i);
|
|
||||||
|
|
||||||
for (i = 0; i < MAX_CLIENTS; i++) {
|
for (i = 0; i < MAX_CLIENTS; i++) {
|
||||||
speex_bits_init(&clc.speexDecoderBits[i]);
|
speex_bits_init(&clc.speexDecoderBits[i]);
|
||||||
speex_bits_reset(&clc.speexDecoderBits[i]);
|
speex_bits_reset(&clc.speexDecoderBits[i]);
|
||||||
|
|
|
@ -283,11 +283,8 @@ void CL_CaptureVoip(void)
|
||||||
return; // packet is pending transmission, don't record more yet.
|
return; // packet is pending transmission, don't record more yet.
|
||||||
|
|
||||||
if (cl_voipUseVAD->modified) {
|
if (cl_voipUseVAD->modified) {
|
||||||
int useVadi = (int) useVad;
|
|
||||||
speex_preprocess_ctl(clc.speexPreprocessor,
|
|
||||||
SPEEX_PREPROCESS_SET_VAD, &useVadi);
|
|
||||||
cl_voipUseVAD->modified = qfalse;
|
|
||||||
Cvar_Set("cl_voipSend", (useVad) ? "1" : "0");
|
Cvar_Set("cl_voipSend", (useVad) ? "1" : "0");
|
||||||
|
cl_voipUseVAD->modified = qfalse;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((useVad) && (!cl_voipSend->integer))
|
if ((useVad) && (!cl_voipSend->integer))
|
||||||
|
@ -339,8 +336,7 @@ void CL_CaptureVoip(void)
|
||||||
// audio capture is always MONO16 (and that's what speex wants!).
|
// audio capture is always MONO16 (and that's what speex wants!).
|
||||||
// 2048 will cover 12 uncompressed frames in narrowband mode.
|
// 2048 will cover 12 uncompressed frames in narrowband mode.
|
||||||
static int16_t sampbuffer[2048];
|
static int16_t sampbuffer[2048];
|
||||||
qboolean isVoice = qfalse;
|
float voipPower = 0.0f;
|
||||||
int16_t voipPower = 0;
|
|
||||||
int speexFrames = 0;
|
int speexFrames = 0;
|
||||||
int wpos = 0;
|
int wpos = 0;
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
|
@ -359,19 +355,15 @@ void CL_CaptureVoip(void)
|
||||||
int16_t *sampptr = &sampbuffer[pos];
|
int16_t *sampptr = &sampbuffer[pos];
|
||||||
int i, bytes;
|
int i, bytes;
|
||||||
|
|
||||||
|
// preprocess samples to remove noise...
|
||||||
|
speex_preprocess_run(clc.speexPreprocessor, sampptr);
|
||||||
|
|
||||||
// check the "power" of this packet...
|
// check the "power" of this packet...
|
||||||
for (i = 0; i < clc.speexFrameSize; i++) {
|
for (i = 0; i < clc.speexFrameSize; i++) {
|
||||||
int16_t s = sampptr[i];
|
const float s = fabs((float) sampptr[i]);
|
||||||
if (s < 0)
|
voipPower += s * s;
|
||||||
s = -s;
|
|
||||||
if (s > voipPower)
|
|
||||||
voipPower = s; // !!! FIXME: this isn't very clever.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// preprocess samples to remove noise, check for voice...
|
|
||||||
if (speex_preprocess_run(clc.speexPreprocessor, sampptr))
|
|
||||||
isVoice = qtrue; // player is probably speaking.
|
|
||||||
|
|
||||||
// encode raw audio samples into Speex data...
|
// encode raw audio samples into Speex data...
|
||||||
speex_bits_reset(&clc.speexEncoderBits);
|
speex_bits_reset(&clc.speexEncoderBits);
|
||||||
speex_encode_int(clc.speexEncoder, sampptr,
|
speex_encode_int(clc.speexEncoder, sampptr,
|
||||||
|
@ -389,10 +381,12 @@ void CL_CaptureVoip(void)
|
||||||
speexFrames++;
|
speexFrames++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((useVad) && (!isVoice)) {
|
clc.voipPower = voipPower / (32768.0f * 32768.0f *
|
||||||
CL_VoipNewGeneration(); // no talk for at least 1/4 second.
|
((float) (clc.speexFrameSize * speexFrames)));
|
||||||
|
|
||||||
|
if ((useVad) && (clc.voipPower > 0.25f)) {
|
||||||
|
CL_VoipNewGeneration(); // no "talk" for at least 1/4 second.
|
||||||
} else {
|
} else {
|
||||||
clc.voipPower = ((float) voipPower) / 32767.0f;
|
|
||||||
clc.voipOutgoingDataSize = wpos;
|
clc.voipOutgoingDataSize = wpos;
|
||||||
clc.voipOutgoingDataFrames = speexFrames;
|
clc.voipOutgoingDataFrames = speexFrames;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue