2020-02-01 10:43:06 +00:00
|
|
|
#import "GSSpeechRecognitionEngine.h"
|
2020-02-04 10:26:52 +00:00
|
|
|
#import <Foundation/NSDistributedNotificationCenter.h>
|
2020-02-03 16:02:03 +00:00
|
|
|
|
|
|
|
#include <sphinxbase/err.h>
|
|
|
|
#include <sphinxbase/ad.h>
|
2020-02-03 06:02:50 +00:00
|
|
|
#include <pocketsphinx/pocketsphinx.h>
|
2020-01-30 21:28:18 +00:00
|
|
|
|
|
|
|
/**
|
2020-02-01 10:43:06 +00:00
|
|
|
* Implementation of a speech engine using pocketsphinx. This should be the default
|
2020-01-30 21:28:18 +00:00
|
|
|
* for resource-constrained platforms.
|
|
|
|
*/
|
2020-02-03 12:56:55 +00:00
|
|
|
|
|
|
|
#define MODELDIR "/share/pocketsphinx/model"
|
|
|
|
|
2020-02-04 10:26:52 +00:00
|
|
|
static const arg_t cont_args_def[] = {
|
|
|
|
POCKETSPHINX_OPTIONS,
|
|
|
|
/* Argument file. */
|
|
|
|
{"-argfile",
|
|
|
|
ARG_STRING,
|
|
|
|
NULL,
|
|
|
|
"Argument file giving extra arguments."},
|
|
|
|
{"-adcdev",
|
|
|
|
ARG_STRING,
|
|
|
|
NULL,
|
|
|
|
"Name of audio device to use for input."},
|
|
|
|
{"-infile",
|
|
|
|
ARG_STRING,
|
|
|
|
NULL,
|
|
|
|
"Audio file to transcribe."},
|
|
|
|
{"-inmic",
|
|
|
|
ARG_BOOLEAN,
|
|
|
|
"no",
|
|
|
|
"Transcribe audio from microphone."},
|
|
|
|
{"-time",
|
|
|
|
ARG_BOOLEAN,
|
|
|
|
"no",
|
|
|
|
"Print word times in file transcription."},
|
|
|
|
CMDLN_EMPTY_OPTION
|
|
|
|
};
|
|
|
|
|
2020-02-01 10:57:32 +00:00
|
|
|
@interface PocketsphinxSpeechRecognitionEngine : GSSpeechRecognitionEngine
|
2020-02-01 10:43:06 +00:00
|
|
|
{
|
2020-02-03 12:56:55 +00:00
|
|
|
ps_decoder_t *ps;
|
|
|
|
cmd_ln_t *config;
|
2020-02-03 13:13:08 +00:00
|
|
|
FILE *fh;
|
2020-02-03 16:02:03 +00:00
|
|
|
char const *uttid;
|
2020-02-03 13:13:08 +00:00
|
|
|
int16 buf[512];
|
|
|
|
int rv;
|
|
|
|
int32 score;
|
2020-02-03 16:02:03 +00:00
|
|
|
NSThread *_listeningThread;
|
|
|
|
id<NSSpeechRecognizerDelegate> _delegate;
|
2020-01-30 21:28:18 +00:00
|
|
|
}
|
|
|
|
@end
|
|
|
|
|
2020-02-01 10:57:32 +00:00
|
|
|
@implementation PocketsphinxSpeechRecognitionEngine
|
2020-02-03 12:56:55 +00:00
|
|
|
|
2020-01-30 21:28:18 +00:00
|
|
|
+ (void)initialize
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
- (id)init
|
|
|
|
{
|
2020-02-03 16:02:03 +00:00
|
|
|
if ((self = [super init]) != nil)
|
2020-02-03 12:56:55 +00:00
|
|
|
{
|
2020-02-04 10:26:52 +00:00
|
|
|
char *arg[3];
|
|
|
|
arg[0] = "";
|
|
|
|
arg[1] = "-inmic";
|
|
|
|
arg[2] = "yes";
|
|
|
|
|
|
|
|
config = cmd_ln_parse_r(NULL, cont_args_def, 3, arg, TRUE);
|
|
|
|
ps_default_search_args(config);
|
2020-02-03 12:56:55 +00:00
|
|
|
ps = ps_init(config);
|
2020-02-04 10:26:52 +00:00
|
|
|
if (ps == NULL)
|
|
|
|
{
|
|
|
|
cmd_ln_free_r(config);
|
|
|
|
NSLog(@"Could not start server");
|
|
|
|
return nil;
|
|
|
|
}
|
2020-02-03 16:02:03 +00:00
|
|
|
_listeningThread = nil;
|
2020-02-03 12:56:55 +00:00
|
|
|
}
|
2020-01-30 21:28:18 +00:00
|
|
|
return self;
|
|
|
|
}
|
2020-02-03 06:02:50 +00:00
|
|
|
|
2020-02-03 16:02:03 +00:00
|
|
|
- (void) _recognizedWord: (NSString *)word
|
|
|
|
{
|
2020-02-04 10:26:52 +00:00
|
|
|
[[NSDistributedNotificationCenter defaultCenter]
|
|
|
|
postNotificationName: GSSpeechRecognizerDidRecognizeWordNotification
|
|
|
|
object: word
|
|
|
|
userInfo: nil];
|
2020-02-03 16:02:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-02-04 11:13:13 +00:00
|
|
|
* NOTE: This code is derived from continuous.c under pocketsphinx
|
|
|
|
* which is MIT licensed
|
2020-02-03 16:02:03 +00:00
|
|
|
* Main utterance processing loop:
|
|
|
|
* for (;;) {
|
|
|
|
* start utterance and wait for speech to process
|
|
|
|
* decoding till end-of-utterance silence will be detected
|
|
|
|
* print utterance result;
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
- (void) recognize
|
|
|
|
{
|
|
|
|
ad_rec_t *ad;
|
|
|
|
int16 adbuf[2048];
|
2020-02-04 11:20:36 +00:00
|
|
|
BOOL utt_started, in_speech;
|
2020-02-03 16:02:03 +00:00
|
|
|
int32 k;
|
|
|
|
char const *hyp;
|
|
|
|
|
|
|
|
if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
|
|
|
|
(int) cmd_ln_float32_r(config,
|
|
|
|
"-samprate"))) == NULL)
|
2020-02-04 11:13:13 +00:00
|
|
|
{
|
2020-02-04 11:20:36 +00:00
|
|
|
NSLog(@"Failed to open audio device");
|
2020-02-04 11:13:13 +00:00
|
|
|
}
|
|
|
|
|
2020-02-03 16:02:03 +00:00
|
|
|
if (ad_start_rec(ad) < 0)
|
2020-02-04 11:13:13 +00:00
|
|
|
{
|
2020-02-04 11:20:36 +00:00
|
|
|
NSLog(@"Failed to start recording");
|
2020-02-04 11:13:13 +00:00
|
|
|
}
|
2020-02-03 16:02:03 +00:00
|
|
|
|
|
|
|
if (ps_start_utt(ps) < 0)
|
2020-02-04 11:13:13 +00:00
|
|
|
{
|
2020-02-04 11:20:36 +00:00
|
|
|
NSLog(@"Failed to start utterance");
|
2020-02-04 11:13:13 +00:00
|
|
|
}
|
|
|
|
|
2020-02-04 11:20:36 +00:00
|
|
|
utt_started = NO;
|
|
|
|
NSLog(@"Ready....");
|
2020-02-03 16:02:03 +00:00
|
|
|
|
2020-02-04 11:20:36 +00:00
|
|
|
while(YES)
|
|
|
|
{
|
|
|
|
if ((k = ad_read(ad, adbuf, 2048)) < 0)
|
|
|
|
{
|
|
|
|
NSLog(@"Failed to read audio");
|
|
|
|
}
|
|
|
|
|
|
|
|
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
|
|
|
|
in_speech = ps_get_in_speech(ps);
|
|
|
|
|
|
|
|
if (in_speech && !utt_started)
|
|
|
|
{
|
|
|
|
utt_started = YES;
|
|
|
|
NSLog(@"Listening...");
|
|
|
|
}
|
2020-02-03 16:02:03 +00:00
|
|
|
|
2020-02-04 11:20:36 +00:00
|
|
|
if (!in_speech && utt_started)
|
|
|
|
{
|
|
|
|
/* speech -> silence transition, time to start new utterance */
|
|
|
|
ps_end_utt(ps);
|
|
|
|
hyp = ps_get_hyp(ps, NULL);
|
|
|
|
if (hyp != NULL)
|
|
|
|
{
|
|
|
|
NSString *recognizedString = [NSString stringWithCString: hyp
|
|
|
|
encoding: NSUTF8StringEncoding];
|
|
|
|
[self performSelectorOnMainThread: @selector(_recognizedWord:)
|
|
|
|
withObject: recognizedString
|
|
|
|
waitUntilDone: NO];
|
|
|
|
NSDebugLog(@"RECOGNIZED WORD: %s", hyp);
|
|
|
|
fflush(stdout);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ps_start_utt(ps) < 0)
|
|
|
|
{
|
|
|
|
NSLog(@"Failed to start utterance");
|
|
|
|
}
|
|
|
|
|
|
|
|
utt_started = NO;
|
|
|
|
NSLog(@"Ready....");
|
|
|
|
}
|
|
|
|
[NSThread sleepForTimeInterval: 0.01];
|
2020-02-03 16:02:03 +00:00
|
|
|
}
|
|
|
|
ad_close(ad);
|
|
|
|
}
|
|
|
|
|
2020-02-03 06:02:50 +00:00
|
|
|
- (void) startListening
|
|
|
|
{
|
2020-02-03 16:02:03 +00:00
|
|
|
[NSThread detachNewThreadSelector: @selector(recognize)
|
|
|
|
toTarget: self
|
|
|
|
withObject: nil];
|
2020-02-03 06:02:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
- (void) stopListening
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-01-30 21:28:18 +00:00
|
|
|
@end
|
|
|
|
|
2020-02-01 10:43:06 +00:00
|
|
|
@implementation GSSpeechRecognitionEngine (Pocketsphinx)
|
2020-02-03 12:56:55 +00:00
|
|
|
|
2020-02-01 10:43:06 +00:00
|
|
|
+ (GSSpeechRecognitionEngine*)defaultSpeechRecognitionEngine
|
2020-01-30 21:28:18 +00:00
|
|
|
{
|
2020-02-03 16:02:03 +00:00
|
|
|
return AUTORELEASE([[PocketsphinxSpeechRecognitionEngine alloc] init]);
|
2020-01-30 21:28:18 +00:00
|
|
|
}
|
2020-02-03 12:56:55 +00:00
|
|
|
|
2020-01-30 21:28:18 +00:00
|
|
|
@end
|