PageRenderTime 38ms CodeModel.GetById 5ms app.highlight 28ms RepoModel.GetById 2ms app.codeStats 0ms

/native/frameworks/espeakengine/jni/com_google_espeakengine.cpp

http://eyes-free.googlecode.com/
C++ | 484 lines | 346 code | 61 blank | 77 comment | 135 complexity | d789e59f418467c930feacdd3ae758fd MD5 | raw file
  1/*
  2 * Copyright (C) 2008 Google Inc.
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *      http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 * See the License for the specific language governing permissions and
 14 * limitations under the License.
 15 */
 16
 17#include <stdio.h>
 18#include <unistd.h>
 19
 20#define LOG_TAG "eSpeak Engine"
 21
 22#include <utils/Log.h>
 23#include <android_runtime/AndroidRuntime.h>
 24#include <speak_lib.h>
 25#include <tts/TtsEngine.h>
 26
 27
 28namespace android {
 29
 30const char * supportedLangIso3[] = {
 31"afr",
 32"bos",
 33"yue",
 34"cmn",
 35"zho",
 36"hrv",
 37"ces",
 38"nld",
 39"eng",
 40"epo",
 41"fin",
 42"fra",
 43"deu",
 44"ell",
 45"hin",
 46"hun",
 47"isl",
 48"ind",
 49"ita",
 50"kur",
 51"lat",
 52"mkd",
 53"nor",
 54"pol",
 55"por",
 56"ron",
 57"rus",
 58"srp",
 59"slk",
 60"spa",
 61"swa",
 62"swe",
 63"tam",
 64"tur",
 65"vie",
 66"cym"
 67 };
 68
 69
 70const char * supportedLang[] = { 
 71"af",
 72"bs",
 73"zh-rHK",
 74"zh",
 75"zh",
 76"hr",
 77"cz",
 78"nl",
 79"en",
 80"eo",
 81"fi",
 82"fr",
 83"de",
 84"el",
 85"hi",
 86"hu",
 87"is",
 88"id",
 89"it",
 90"ku",
 91"la",
 92"mk",
 93"no",
 94"pl",
 95"pt",
 96"ro",
 97"ru",
 98"sr",
 99"sk",
100"es",
101"sw",
102"sv",
103"ta",
104"tu",
105"vi",
106"cy"
107 };
108
109int languageCount = 36;
110
111// Callback to the TTS API
112synthDoneCB_t* ttsSynthDoneCBPointer;
113
114char* currentLanguage = "en-rUS";
115char* currentRate = "140";
116
117char currentLang[10];
118char currentCountry[10];
119char currentVariant[10];
120
121
122/* Functions internal to the eSpeak engine wrapper */
123static void setSpeechRate(int speechRate)
124{
125    espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0);
126}
127
128
129/* Functions exposed to the TTS API */
130
131/* Callback from espeak.  Should call back to the TTS API */
132static int eSpeakCallback(short *wav, int numsamples,
133				      espeak_EVENT *events) {
134    int8_t * castedWav = (int8_t *)wav;
135    size_t bufferSize = 0;
136    if (numsamples < 1){
137      size_t silenceBufferSize = 2;
138      int8_t *silence = new int8_t[silenceBufferSize]; // TODO: This will be a small memory leak, but do it this way for now because passing in an empty buffer can cause a crash.
139      silence[0] = 0;
140      silence[1] = 0;
141      ttsSynthDoneCBPointer(events->user_data, 22050, AudioSystem::PCM_16_BIT, 1, silence, silenceBufferSize, TTS_SYNTH_DONE);
142      return 1;
143    }
144    LOGI("eSpeak callback received! Sample count: %d", numsamples);
145    bufferSize = numsamples * sizeof(short);    
146    ttsSynthDoneCBPointer(events->user_data, 22050, AudioSystem::PCM_16_BIT, 1, castedWav, bufferSize, TTS_SYNTH_PENDING);
147    LOGI("eSpeak callback processed!");
148    return 0;  // continue synthesis (1 is to abort)
149}
150
151
152// Initializes the TTS engine and returns whether initialization succeeded
153tts_result TtsEngine::init(synthDoneCB_t synthDoneCBPtr)
154{
155    // TODO Make sure that the speech data is loaded in 
156    // the directory /sdcard/espeak-data before calling this.
157    int sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,
158                                       4096, "/sdcard", 0);
159
160    if (sampleRate <= 0) {
161        LOGI("eSpeak initialization failed!");
162        return TTS_FAILURE;
163    }
164    espeak_SetSynthCallback(eSpeakCallback);
165
166    int speechRate = 140;
167    espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0);
168
169    espeak_VOICE voice;
170    memset( &voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
171    const char *langNativeString = "en-us";   //Default to US English
172    voice.languages = langNativeString;
173    voice.variant = 0;
174    err = espeak_SetVoiceByProperties(&voice);
175
176    ttsSynthDoneCBPointer = synthDoneCBPtr;
177    return TTS_SUCCESS;
178}
179
180// Shutsdown the TTS engine
181tts_result TtsEngine::shutdown( void )
182{
183    espeak_Terminate();
184    return TTS_SUCCESS;
185}
186
187
188tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
189{   
190    return TTS_FAILURE;
191}
192
193// Language will be specified according to the Android conventions for 
194// localization as documented here: 
195// http://developer.android.com/guide/topics/resources/resources-i18n.html
196//
197// language will be a string of the form "xx" or "xx-rYY", where xx is a 
198// two letter ISO 639-1 language code in lowercase and rYY is a two letter ISO 
199// 3166-1-alpha-2 language code in uppercase preceded by a lowercase "r".
200// Note that the "-rYY" portion may be omitted if the region is unimportant.
201//
202tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant )
203{ 
204    LOGE("lang input param: %s   country input param: %s", lang, country);
205
206    char language[10];
207    int langIndex = -1;
208    for (int i = 0; i < languageCount; i ++)
209        {
210        if (strcmp(lang, supportedLangIso3[i]) == 0)
211            {
212            langIndex = i;
213            break;
214            }
215        }
216    if (langIndex < 0)
217        {
218        /* The language isn't supported.    */
219        LOGE("TtsEngine::setLanguage called with unsupported language");
220        return TTS_FAILURE;
221        }
222
223    
224    strcpy(currentLang, lang);
225    strcpy(currentCountry, country);
226
227    strcpy(language, supportedLang[langIndex]);
228
229    if (strcmp(language, "en") == 0){
230      if (strcmp(country, "USA") == 0){
231        strcpy(language, "en-rUS");
232      }
233      if (strcmp(country, "GBR") == 0){
234        strcpy(language, "en-rGB");
235      }
236    }
237
238    if (strcmp(language, "es") == 0){
239      if (strcmp(country, "MEX") == 0){
240        strcpy(language, "es-rMX");
241      }
242    }
243
244    LOGE("Language: %s", language);
245
246
247    espeak_VOICE voice;
248    memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
249    voice.variant = 0;
250    char espeakLangStr[6];
251    if ((strlen(language) != 2) && (strlen(language) != 6)){
252        LOGI("Error: Invalid language. Language must be in either xx or xx-rYY format.");
253        return TTS_VALUE_INVALID;
254    }
255    if (strcmp(language, "en-rUS") == 0){
256        strcpy(espeakLangStr, "en-us");
257    } else if (strcmp(language, "en-rGB") == 0){
258        strcpy(espeakLangStr, "en-uk");
259    } else if (strcmp(language, "es-rMX") == 0){
260        strcpy(espeakLangStr, "es-la");
261    } else if (strcmp(language, "zh-rHK") == 0){
262        strcpy(espeakLangStr, "zh");
263        voice.variant = 5;
264    } else {
265        espeakLangStr[0] = language[0];
266        espeakLangStr[1] = language[1];
267        espeakLangStr[2] = 0;
268        // Bail out and do nothing if the language is not supported by eSpeak
269        if ((strcmp(language, "af") != 0) && 
270            (strcmp(language, "bs") != 0) && 
271            (strcmp(language, "zh") != 0) && 
272            (strcmp(language, "hr") != 0) && 
273            (strcmp(language, "cz") != 0) && 
274            (strcmp(language, "nl") != 0) && 
275            (strcmp(language, "en") != 0) && 
276            (strcmp(language, "eo") != 0) && 
277            (strcmp(language, "fi") != 0) && 
278            (strcmp(language, "fr") != 0) && 
279            (strcmp(language, "de") != 0) && 
280            (strcmp(language, "el") != 0) && 
281            (strcmp(language, "hi") != 0) && 
282            (strcmp(language, "hu") != 0) && 
283            (strcmp(language, "is") != 0) && 
284            (strcmp(language, "id") != 0) && 
285            (strcmp(language, "it") != 0) && 
286            (strcmp(language, "ku") != 0) && 
287            (strcmp(language, "la") != 0) && 
288            (strcmp(language, "mk") != 0) && 
289            (strcmp(language, "no") != 0) && 
290            (strcmp(language, "pl") != 0) && 
291            (strcmp(language, "pt") != 0) && 
292            (strcmp(language, "ro") != 0) && 
293            (strcmp(language, "ru") != 0) && 
294            (strcmp(language, "sr") != 0) && 
295            (strcmp(language, "sk") != 0) && 
296            (strcmp(language, "es") != 0) && 
297            (strcmp(language, "sw") != 0) && 
298            (strcmp(language, "sv") != 0) && 
299            (strcmp(language, "ta") != 0) && 
300            (strcmp(language, "tr") != 0) && 
301            (strcmp(language, "vi") != 0) && 
302            (strcmp(language, "cy") != 0) ){
303            LOGI("Error: Unsupported language.");
304            return TTS_PROPERTY_UNSUPPORTED;
305        }
306        // Use American English as the default English
307        if (strcmp(language, "en") == 0) {
308            strcpy(espeakLangStr, "en-us");
309        }
310    }
311    voice.languages = espeakLangStr;
312    espeak_ERROR err = espeak_SetVoiceByProperties(&voice);
313    currentLanguage = new char [strlen(language)];
314    strcpy(currentLanguage, language);
315    return TTS_SUCCESS;
316}
317
318
319tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country,
320            const char *variant) {
321    // TODO: Make this account for data files!
322    for (int i = 0; i < languageCount; i ++)
323        {
324        if (strcmp(lang, supportedLangIso3[i]) == 0)
325            {
326            return TTS_LANG_AVAILABLE;
327            }
328        }
329    return TTS_LANG_NOT_SUPPORTED;
330}
331
332tts_result TtsEngine::getLanguage(char *language, char *country, char *variant)
333{
334    strcpy(language, currentLang);
335    strcpy(country, currentCountry);
336    strcpy(variant, "");
337    return TTS_SUCCESS;
338}
339
340
341/** setAudioFormat
342 * sets the audio format to use for synthesis, returns what is actually used.
343 * @encoding - reference to encoding format
344 * @rate - reference to sample rate
345 * @channels - reference to number of channels
346 * return tts_result
347 * */
348tts_result TtsEngine::setAudioFormat(AudioSystem::audio_format& encoding, uint32_t& rate,
349            int& channels)
350{
351    // TODO: Fix this!
352    return TTS_SUCCESS;
353}
354
355// Sets the property with the specified value
356//
357// TODO: add pitch property here
358tts_result TtsEngine::setProperty(const char *property, const char *value, const size_t size)
359{
360    int rate;
361    int pitch;
362    int volume;
363
364    /* Set a specific property for the engine.
365       Supported properties include: language (locale), rate, pitch, volume.    */
366    /* Sanity check */
367    if (property == NULL) {
368        LOGE("setProperty called with property NULL");
369        return TTS_PROPERTY_UNSUPPORTED;
370    }
371
372    if (value == NULL) {
373        LOGE("setProperty called with value NULL");
374        return TTS_VALUE_INVALID;
375    }
376
377    if (strncmp(property, "language", 8) == 0) {
378        // TODO: Fix this
379        return TTS_SUCCESS;
380    } else if (strncmp(property, "rate", 4) == 0) {
381        rate = atoi(value);
382        espeak_SetParameter(espeakRATE, rate, 0);
383        // TODO: Fix this - use the return value here, don't just automatically return success!
384        return TTS_SUCCESS;
385    } else if (strncmp(property, "pitch", 5) == 0) {
386        // TODO: Fix this
387        return TTS_SUCCESS;
388    } else if (strncmp(property, "volume", 6) == 0) {
389        // TODO: Fix this
390        return TTS_SUCCESS;
391    }
392    return TTS_PROPERTY_UNSUPPORTED;
393}
394
395
396// Sets the property with the specified value
397//
398// TODO: add pitch property here
399tts_result TtsEngine::getProperty(const char *property, char *value, size_t *iosize)
400{
401    /* Get the property for the engine.
402       This property was previously set by setProperty or by default.       */
403    /* sanity check */
404    if (property == NULL) {
405        LOGE("getProperty called with property NULL");
406        return TTS_PROPERTY_UNSUPPORTED;
407    }
408
409    if (value == NULL) {
410        LOGE("getProperty called with value NULL");
411        return TTS_VALUE_INVALID;
412    }
413
414    if (strncmp(property, "language", 8) == 0) {
415        // TODO: Fix this
416        return TTS_SUCCESS;
417    } else if (strncmp(property, "rate", 4) == 0) {
418        // TODO: Fix this
419        return TTS_SUCCESS;
420    } else if (strncmp(property, "pitch", 5) == 0) {
421        // TODO: Fix this
422        return TTS_SUCCESS;
423    } else if (strncmp(property, "volume", 6) == 0) {
424        // TODO: Fix this
425        return TTS_SUCCESS;
426    }
427
428    /* Unknown property */
429    LOGE("Unsupported property");
430    return TTS_PROPERTY_UNSUPPORTED;
431}
432
433/** synthesizeText
434 *  Synthesizes a text string.
435 *  The text string could be annotated with SSML tags.
436 *  @text     - text to synthesize
437 *  @buffer   - buffer which will receive generated samples
438 *  @bufferSize - size of buffer
439 *  @userdata - pointer to user data which will be passed back to callback function
440 *  return tts_result
441*/
442tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata )
443{
444    espeak_SetSynthCallback(eSpeakCallback);
445
446    unsigned int unique_identifier;
447    espeak_ERROR err;
448
449    err = espeak_Synth(text,
450                       strlen(text),
451                       0,  // position
452                       POS_CHARACTER,
453                       0,  // end position (0 means no end position)
454                       espeakCHARS_UTF8,
455                       &unique_identifier,
456                       userdata);
457
458    err = espeak_Synchronize();
459    return TTS_SUCCESS;
460}
461
462// Synthesizes IPA text
463tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t bufferSize, void * userdata )
464{
465    // deprecated call
466    return TTS_FAILURE;
467}
468
469
470// Interrupts synthesis
471tts_result TtsEngine::stop()
472{
473    espeak_Cancel();
474    return TTS_SUCCESS;
475}
476
477
478
479TtsEngine* getTtsEngine()
480{
481    return new TtsEngine();
482}
483
484}; // namespace android