libs-gui/Tools/speech_recognizer/PocketsphinxSpeechRecognitionEngine.m

234 lines
6.2 KiB
Mathematica
Raw Permalink Normal View History

2020-02-07 09:35:35 +00:00
/* Implementation of class PocketsphinxSpeechRecognitionEngine
Copyright (C) 2019 Free Software Foundation, Inc.
By: Gregory John Casamento
Date: Fri Dec 6 04:55:59 EST 2019
This file is part of the GNUstep Library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110 USA.
*/
2020-02-01 10:43:06 +00:00
#import "GSSpeechRecognitionEngine.h"
#import <Foundation/NSDistributedNotificationCenter.h>
2020-02-03 16:02:03 +00:00
#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>
2020-02-03 06:02:50 +00:00
#include <pocketsphinx/pocketsphinx.h>
2020-01-30 21:28:18 +00:00
/**
2020-02-01 10:43:06 +00:00
* Implementation of a speech engine using pocketsphinx. This should be the default
2020-01-30 21:28:18 +00:00
* for resource-constrained platforms.
*/
2020-02-03 12:56:55 +00:00
#define MODELDIR "/share/pocketsphinx/model"
static const arg_t cont_args_def[] = {
POCKETSPHINX_OPTIONS,
/* Argument file. */
{"-argfile",
ARG_STRING,
NULL,
"Argument file giving extra arguments."},
{"-adcdev",
ARG_STRING,
NULL,
"Name of audio device to use for input."},
{"-infile",
ARG_STRING,
NULL,
"Audio file to transcribe."},
{"-inmic",
ARG_BOOLEAN,
"yes",
"Transcribe audio from microphone."},
{"-time",
ARG_BOOLEAN,
"no",
"Print word times in file transcription."},
CMDLN_EMPTY_OPTION
};
@interface PocketsphinxSpeechRecognitionEngine : GSSpeechRecognitionEngine
2020-02-01 10:43:06 +00:00
{
2020-02-03 12:56:55 +00:00
ps_decoder_t *ps;
cmd_ln_t *config;
2020-02-03 13:13:08 +00:00
FILE *fh;
2020-02-03 16:02:03 +00:00
char const *uttid;
2020-02-03 13:13:08 +00:00
int16 buf[512];
int rv;
int32 score;
2020-02-03 16:02:03 +00:00
NSThread *_listeningThread;
id<NSSpeechRecognizerDelegate> _delegate;
2020-01-30 21:28:18 +00:00
}
@end
@implementation PocketsphinxSpeechRecognitionEngine
2020-02-03 12:56:55 +00:00
2020-01-30 21:28:18 +00:00
- (id)init
{
2020-02-03 16:02:03 +00:00
if ((self = [super init]) != nil)
2020-02-03 12:56:55 +00:00
{
char *arg[3];
arg[0] = "";
arg[1] = "-inmic";
arg[2] = "yes";
config = cmd_ln_parse_r(NULL, cont_args_def, 3, arg, TRUE);
2020-02-08 11:35:11 +00:00
// turn off pocketsphinx output
err_set_logfp(NULL);
err_set_debug_level(0);
ps_default_search_args(config);
2020-02-03 12:56:55 +00:00
ps = ps_init(config);
if (ps == NULL)
{
cmd_ln_free_r(config);
NSLog(@"Could not start server");
return nil;
}
2020-02-03 16:02:03 +00:00
_listeningThread = nil;
2020-02-03 12:56:55 +00:00
}
2020-01-30 21:28:18 +00:00
return self;
}
2020-02-03 06:02:50 +00:00
2020-02-03 16:02:03 +00:00
- (void) _recognizedWord: (NSString *)word
{
[[NSDistributedNotificationCenter defaultCenter]
postNotificationName: GSSpeechRecognizerDidRecognizeWordNotification
object: word
userInfo: nil];
2020-02-03 16:02:03 +00:00
}
/*
* NOTE: This code is derived from continuous.c under pocketsphinx
* which is MIT licensed
2020-02-03 16:02:03 +00:00
* Main utterance processing loop:
2020-02-06 07:35:53 +00:00
* while (YES) {
2020-02-03 16:02:03 +00:00
* start utterance and wait for speech to process
* decoding till end-of-utterance silence will be detected
* print utterance result;
* }
*/
- (void) recognize
{
ad_rec_t *ad;
int16 adbuf[2048];
BOOL utt_started, in_speech;
2020-02-03 16:02:03 +00:00
int32 k;
char const *hyp;
2020-02-06 03:56:16 +00:00
2020-02-08 11:44:43 +00:00
NSDebugLog(@"** Starting speech recognition loop");
2020-02-03 16:02:03 +00:00
if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
(int) cmd_ln_float32_r(config,
"-samprate"))) == NULL)
{
NSLog(@"Failed to open audio device");
return;
}
2020-02-03 16:02:03 +00:00
if (ad_start_rec(ad) < 0)
{
NSLog(@"Failed to start recording");
return;
}
2020-02-03 16:02:03 +00:00
if (ps_start_utt(ps) < 0)
{
NSLog(@"Failed to start utterance");
return;
}
utt_started = NO;
2020-02-08 11:44:43 +00:00
NSDebugLog(@"Ready.... <%@, %d>", _listeningThread, [_listeningThread isCancelled]);
2020-02-03 16:02:03 +00:00
while([_listeningThread isCancelled] == NO && _listeningThread != nil)
{
if ((k = ad_read(ad, adbuf, 2048)) < 0)
{
NSLog(@"Failed to read audio");
break;
}
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started)
{
utt_started = YES;
NSDebugLog(@"Listening...");
}
2020-02-03 16:02:03 +00:00
if (!in_speech && utt_started)
{
/* speech -> silence transition, time to start new utterance */
ps_end_utt(ps);
hyp = ps_get_hyp(ps, NULL);
if (hyp != NULL)
{
NSString *recognizedString = [NSString stringWithCString: hyp
encoding: NSUTF8StringEncoding];
[self performSelectorOnMainThread: @selector(_recognizedWord:)
withObject: recognizedString
waitUntilDone: NO];
NSDebugLog(@"Word: %s", hyp);
}
if (ps_start_utt(ps) < 0)
{
NSLog(@"Failed to start utterance");
}
utt_started = NO;
2020-02-08 11:44:43 +00:00
NSDebugLog(@"Ready.... <%@, %d>", _listeningThread, [_listeningThread isCancelled]);
}
[NSThread sleepForTimeInterval: 0.01];
2020-02-03 16:02:03 +00:00
}
// Close everything...
ps_end_utt(ps);
2020-02-03 16:02:03 +00:00
ad_close(ad);
}
- (void) start
2020-02-03 06:02:50 +00:00
{
2020-02-06 07:35:53 +00:00
_listeningThread =
[[NSThread alloc] initWithTarget: self
selector: @selector(recognize)
object: nil];
[_listeningThread setName: @"Speech Recognition Loop"];
NSLog(@"Starting - Thread info for speech recognition server %@", _listeningThread);
2020-02-06 07:35:53 +00:00
[_listeningThread start];
2020-02-03 06:02:50 +00:00
}
- (void) stop
2020-02-03 06:02:50 +00:00
{
NSLog(@"Stop listening thread %@", _listeningThread);
[_listeningThread cancel];
RELEASE(_listeningThread);
_listeningThread = nil;
2020-02-03 06:02:50 +00:00
}
2020-01-30 21:28:18 +00:00
@end
2020-02-01 10:43:06 +00:00
@implementation GSSpeechRecognitionEngine (Pocketsphinx)
2020-02-03 12:56:55 +00:00
2020-02-01 10:43:06 +00:00
+ (GSSpeechRecognitionEngine*)defaultSpeechRecognitionEngine
2020-01-30 21:28:18 +00:00
{
2020-02-03 16:02:03 +00:00
return AUTORELEASE([[PocketsphinxSpeechRecognitionEngine alloc] init]);
2020-01-30 21:28:18 +00:00
}
2020-02-03 12:56:55 +00:00
2020-01-30 21:28:18 +00:00
@end