-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathp5.speech.js
369 lines (314 loc) · 12.8 KB
/
p5.speech.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/*! p5.speech.js v0.0.1 2015-06-12 */
/* updated v0.0.2 2017-10-17 */
/**
* @module p5.speech
* @submodule p5.speech
* @for p5.speech
* @main
*/
/**
* p5.speech
* R. Luke DuBois ([email protected])
* ABILITY Lab / Brooklyn Experimental Media Center
* New York University
* The MIT License (MIT).
*
* https://github.com/IDMNYU/p5.js-speech
*
* Web Speech API: https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
* Web Speech Recognition API: https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html
*/
(function (root, factory) {
if (typeof define === 'function' && define.amd)
define('p5.speech', ['p5'], function (p5) { (factory(p5));});
else if (typeof exports === 'object')
factory(require('../p5'));
else
factory(root['p5']);
}(this, function (p5) {
// =============================================================================
// p5.Speech
// =============================================================================
/**
* Base class for a Speech Synthesizer
*
* @class p5.Speech
* @constructor
*/
p5.Speech = function(_dv, _callback) {
//
// speech synthesizers consist of a single synthesis engine
// per window instance, and a variable number of 'utterance'
// objects, which can be cached and re-used for, e.g.
// auditory UI.
//
// this implementation assumes a monolithic (one synth,
// one phrase at a time) system.
//
// make a speech synthizer (this will load voices):
this.synth = window.speechSynthesis;
// make an utterance to use with this synthesizer:
this.utterance = new SpeechSynthesisUtterance();
this.isLoaded = 0; // do we have voices yet?
// do we queue new utterances upon firing speak()
// or interrupt what's speaking:
this.interrupt = false;
// callback properties to be filled in within the p5 sketch
// if the author needs custom callbacks:
this.onLoad; // fires when voices are loaded and synth is ready
this.onStart; // fires when an utterance begins...
this.onPause; // ...is paused...
this.onResume; // ...resumes...
this.onEnd; // ...and ends.
this.voices = []; // array of available voices (dependent on browser/OS)
// first parameter of constructor is an initial voice selector
this.initvoice;
if(_dv !== undefined) this.initvoice=_dv;
if(_callback !== undefined) this.onLoad =_callback;
var that = this; // some bullshit
// onvoiceschanged() fires automatically when the synthesizer
// is configured and has its voices loaded. you don't need
// to wait for this if you're okay with the default voice.
//
// we use this function to load the voice array and bind our
// custom callback functions.
window.speechSynthesis.onvoiceschanged = function() {
if(that.isLoaded==0) { // run only once
that.voices = window.speechSynthesis.getVoices();
that.isLoaded = 1; // we're ready
console.log("p5.Speech: voices loaded!");
if(that.initvoice!=undefined) {
that.setVoice(that.initvoice); // set a custom initial voice
console.log("p5.Speech: initial voice: " + that.initvoice);
}
// fire custom onLoad() callback, if it exists:
if(that.onLoad!=undefined) that.onLoad();
//
// bind other custom callbacks:
//
that.utterance.onstart = function(e) {
//console.log("STARTED");
if(that.onStart!=undefined) that.onStart(e);
};
that.utterance.onpause = function(e) {
//console.log("PAUSED");
if(that.onPause!=undefined) that.onPause(e);
};
that.utterance.onresume = function(e) {
//console.log("RESUMED");
if(that.onResume!=undefined) that.onResume(e);
};
that.utterance.onend = function(e) {
//console.log("ENDED");
if(that.onEnd!=undefined) that.onEnd(e);
};
}
};
}; // end p5.Speech constructor
// listVoices() - dump voice names to javascript console:
p5.Speech.prototype.listVoices = function() {
if(this.isLoaded)
{
for(var i = 0;i<this.voices.length;i++)
{
console.log(this.voices[i].name);
}
}
else
{
console.log("p5.Speech: voices not loaded yet!")
}
};
// setVoice() - assign voice to speech synthesizer, by name
// (using voices found in the voices[] array), or by index.
p5.Speech.prototype.setVoice = function(_v) {
// type check so you can set by label or by index:
if(typeof(_v)=='string') this.utterance.voice = this.voices.filter(function(v) { return v.name == _v; })[0];
else if(typeof(_v)=='number') this.utterance.voice = this.voices[Math.min(Math.max(_v,0),this.voices.length-1)];
};
// volume of voice. API range 0.0-1.0.
p5.Speech.prototype.setVolume = function(_v) {
this.utterance.volume = Math.min(Math.max(_v, 0.0), 1.0);
};
// rate of voice. not all voices support this feature.
// API range 0.1-2.0. voice will crash out of bounds.
p5.Speech.prototype.setRate = function(_v) {
this.utterance.rate = Math.min(Math.max(_v, 0.1), 2.0);
};
// pitch of voice. not all voices support this feature.
// API range >0.0-2.0. voice will crash out of bounds.
p5.Speech.prototype.setPitch = function(_v) {
this.utterance.pitch = Math.min(Math.max(_v, 0.01), 2.0);
};
// sets the language of the voice.
p5.Speech.prototype.setLang = function(_lang) {
this.utterance.lang = _lang;
}
// speak a phrase through the current synthesizer:
p5.Speech.prototype.speak = function(_phrase) {
if(this.interrupt) this.synth.cancel();
this.utterance.text = _phrase;
this.synth.speak(this.utterance);
};
// not working...
p5.Speech.prototype.pause = function() {
this.synth.pause();
};
// not working...
p5.Speech.prototype.resume = function() {
this.synth.resume();
};
// stop current utterance:
p5.Speech.prototype.stop = function() {
// not working...
//this.synth.stop();
this.synth.cancel();
};
// kill synthesizer completely, clearing any queued utterances:
p5.Speech.prototype.cancel = function() {
this.synth.cancel(); // KILL SYNTH
};
// Setting callbacks with functions instead
p5.Speech.prototype.started = function(_cb) {
this.onStart = _cb;
}
p5.Speech.prototype.ended = function(_cb) {
this.onEnd = _cb;
}
p5.Speech.prototype.paused = function(_cb) {
this.onPause = _cb;
}
p5.Speech.prototype.resumed = function(_cb) {
this.onResume = _cb;
}
// =============================================================================
// p5.SpeechRec
// =============================================================================
/**
* Base class for a Speech Recognizer
*
* @class p5.SpeechRec
* @constructor
*/
p5.SpeechRec = function(_lang, _callback) {
//
// speech recognition consists of a recognizer object per
// window instance that returns a JSON object containing
// recognition. this JSON object grows when the synthesizer
// is in 'continuous' mode, with new recognized phrases
// appended into an internal array.
//
// this implementation returns the full JSON, but also a set
// of simple, query-ready properties containing the most
// recently recognized speech.
//
// make a recognizer object.
if('webkitSpeechRecognition' in window) {
this.rec = new webkitSpeechRecognition();
}
else {
this.rec = new Object();
console.log("p5.SpeechRec: webkitSpeechRecognition not supported in this browser.");
}
// first parameter is language model (defaults to empty=U.S. English)
// no list of valid models in API, but it must use BCP-47.
// here's some hints:
// http://stackoverflow.com/questions/14257598/what-are-language-codes-for-voice-recognition-languages-in-chromes-implementati
if(_lang !== undefined) this.rec.lang=_lang;
// callback properties to be filled in within the p5 sketch
// if the author needs custom callbacks:
this.onResult; // fires when something has been recognized
this.onStart; // fires when the recognition system is started...
this.onError; // ...has a problem (e.g. the mic is shut off)...
this.onEnd; // ...and ends (in non-continuous mode).
if(_callback !== undefined) this.onResult=_callback;
// recognizer properties:
// continous mode means the object keeps recognizing speech,
// appending new tokens to the internal JSON.
this.continuous = false;
// interimResults means the object will report (i.e. fire its
// onresult() callback) more frequently, rather than at pauses
// in microphone input. this gets you quicker, but less accurate,
// results.
this.interimResults = false;
// result data:
// resultJSON:
// this is a full JSON returned by onresult(). it consists of a
// SpeechRecognitionEvent object, which contains a (wait for it)
// SpeechRecognitionResultList. this is an array. in continuous
// mode, it will be appended to, not cleared. each element is a
// SpeechRecognition result, which contains a (groan)
// SpeechRecognitionAlternative, containing a 'transcript' property.
// the 'transcript' is the recognized phrase. have fun.
this.resultJSON;
// resultValue:
// validation flag which indicates whether the recognizer succeeded.
// this is *not* a metric of speech clarity, but rather whether the
// speech recognition system successfully connected to and received
// a response from the server. you can construct an if() around this
// if you're feeling worried.
this.resultValue;
// resultValue:
// the 'transcript' of the most recently recognized speech as a simple
// string. this will be blown out and replaced at every firing of the
// onresult() callback.
this.resultString;
// resultConfidence:
// the 'confidence' (0-1) of the most recently recognized speech, e.g.
// that it reflects what was actually spoken. you can use this to filter
// out potentially bogus recognition tokens.
this.resultConfidence;
var that = this; // some bullshit
// onresult() fires automatically when the recognition engine
// detects speech, or times out trying.
//
// it fills up a JSON array internal to the webkitSpeechRecognition
// object. we reference it over in our struct here, and also copy
// out the most recently detected phrase and confidence value.
this.rec.onresult = function(e) {
that.resultJSON = e; // full JSON of callback event
that.resultValue = e.returnValue; // was successful?
// store latest result in top-level object struct
that.resultString = e.results[e.results.length-1][0].transcript.trim();
that.resultConfidence = e.results[e.results.length-1][0].confidence;
if(that.onResult!=undefined) that.onResult();
};
// fires when the recognition system starts (i.e. when you 'allow'
// the mic to be used in the browser).
this.rec.onstart = function(e) {
if(that.onStart!=undefined) that.onStart(e);
};
// fires on a client-side error (server-side errors are expressed
// by the resultValue in the JSON coming back as 'false').
this.rec.onerror = function(e) {
if(that.onError!=undefined) that.onError(e);
};
// fires when the recognition finishes, in non-continuous mode.
this.rec.onend = function() {
if(that.onEnd!=undefined) that.onEnd();
};
}; // end p5.SpeechRec constructor
// start the speech recognition engine. this will prompt a
// security dialog in the browser asking for permission to
// use the microphone. this permission will persist throughout
// this one 'start' cycle. if you need to recognize speech more
// than once, use continuous mode rather than firing start()
// multiple times in a single script.
p5.SpeechRec.prototype.start = function(_continuous, _interim) {
if('webkitSpeechRecognition' in window) {
if(_continuous !== undefined) this.continuous = _continuous;
if(_interim !== undefined) this.interimResults = _interim;
this.rec.continuous = this.continuous;
this.rec.interimResults = this.interimResults;
this.rec.start();
}
};
}));
/*
todo:
* fix callbacks (pause, resume) in synthesizer.
* support speech grammar models for scoped auditory UI.
* support markdown, boundaries, etc for better synthesis tracking.
* support utterance parser for long phrases.
*/
// EOF