libs-gui/Tools/speech_recognizer/PocketsphinxSpeechRecognitionEngine.m

140 lines
3.4 KiB
Mathematica
Raw Normal View History

2020-02-01 10:43:06 +00:00
#import "GSSpeechRecognitionEngine.h"
2020-02-03 16:02:03 +00:00
#include <sphinxbase/err.h>
#include <sphinxbase/ad.h>
2020-02-03 06:02:50 +00:00
#include <pocketsphinx/pocketsphinx.h>
2020-01-30 21:28:18 +00:00
/**
2020-02-01 10:43:06 +00:00
* Implementation of a speech engine using pocketsphinx. This should be the default
2020-01-30 21:28:18 +00:00
* for resource-constrained platforms.
*/
2020-02-03 12:56:55 +00:00
#define MODELDIR "/share/pocketsphinx/model"
@interface PocketsphinxSpeechRecognitionEngine : GSSpeechRecognitionEngine
2020-02-01 10:43:06 +00:00
{
2020-02-03 12:56:55 +00:00
ps_decoder_t *ps;
cmd_ln_t *config;
2020-02-03 13:13:08 +00:00
FILE *fh;
2020-02-03 16:02:03 +00:00
char const *uttid;
2020-02-03 13:13:08 +00:00
int16 buf[512];
int rv;
int32 score;
2020-02-03 16:02:03 +00:00
NSThread *_listeningThread;
id<NSSpeechRecognizerDelegate> _delegate;
2020-01-30 21:28:18 +00:00
}
@end
@implementation PocketsphinxSpeechRecognitionEngine
2020-02-03 12:56:55 +00:00
2020-01-30 21:28:18 +00:00
+ (void)initialize
{
}
- (id)init
{
2020-02-03 16:02:03 +00:00
if ((self = [super init]) != nil)
2020-02-03 12:56:55 +00:00
{
config = cmd_ln_init(NULL, ps_args(), TRUE,
"-hmm", MODELDIR "/en-us/en-us",
"-lm", MODELDIR "/en-us/en-us.lm.bin",
"-dict", MODELDIR "/en-us/cmudict-en-us.dict",
NULL);
ps = ps_init(config);
2020-02-03 16:02:03 +00:00
_listeningThread = nil;
2020-02-03 12:56:55 +00:00
}
2020-01-30 21:28:18 +00:00
return self;
}
2020-02-03 06:02:50 +00:00
2020-02-03 16:02:03 +00:00
- (void) _recognizedWord: (NSString *)word
{
}
/*
* Main utterance processing loop:
* for (;;) {
* start utterance and wait for speech to process
* decoding till end-of-utterance silence will be detected
* print utterance result;
* }
*/
- (void) recognize
{
ad_rec_t *ad;
int16 adbuf[2048];
uint8 utt_started, in_speech;
int32 k;
char const *hyp;
if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
(int) cmd_ln_float32_r(config,
"-samprate"))) == NULL)
E_FATAL("Failed to open audio device\n");
if (ad_start_rec(ad) < 0)
E_FATAL("Failed to start recording\n");
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
E_INFO("Ready....\n");
for (;;) {
if ((k = ad_read(ad, adbuf, 2048)) < 0)
E_FATAL("Failed to read audio\n");
ps_process_raw(ps, adbuf, k, FALSE, FALSE);
in_speech = ps_get_in_speech(ps);
if (in_speech && !utt_started) {
utt_started = TRUE;
E_INFO("Listening...\n");
}
if (!in_speech && utt_started) {
/* speech -> silence transition, time to start new utterance */
ps_end_utt(ps);
hyp = ps_get_hyp(ps, NULL );
if (hyp != NULL) {
NSString *recognizedString = [NSString stringWithCString: hyp
encoding: NSUTF8StringEncoding];
[self performSelectorOnMainThread: @selector(_recognizedWord:)
withObject: recognizedString
waitUntilDone: NO];
printf("%s\n", hyp);
fflush(stdout);
}
if (ps_start_utt(ps) < 0)
E_FATAL("Failed to start utterance\n");
utt_started = FALSE;
E_INFO("Ready....\n");
}
[NSThread sleepForTimeInterval: 0.01];
}
ad_close(ad);
}
- (void) _startProcessing
{
}
2020-02-03 06:02:50 +00:00
- (void) startListening
{
2020-02-03 16:02:03 +00:00
[NSThread detachNewThreadSelector: @selector(recognize)
toTarget: self
withObject: nil];
2020-02-03 06:02:50 +00:00
}
- (void) stopListening
{
}
2020-01-30 21:28:18 +00:00
@end
2020-02-01 10:43:06 +00:00
@implementation GSSpeechRecognitionEngine (Pocketsphinx)
2020-02-03 12:56:55 +00:00
2020-02-01 10:43:06 +00:00
+ (GSSpeechRecognitionEngine*)defaultSpeechRecognitionEngine
2020-01-30 21:28:18 +00:00
{
2020-02-03 16:02:03 +00:00
return AUTORELEASE([[PocketsphinxSpeechRecognitionEngine alloc] init]);
2020-01-30 21:28:18 +00:00
}
2020-02-03 12:56:55 +00:00
2020-01-30 21:28:18 +00:00
@end