/native/frameworks/espeakengine/jni/com_google_espeakengine.cpp
C++ | 484 lines | 346 code | 61 blank | 77 comment | 135 complexity | d789e59f418467c930feacdd3ae758fd MD5 | raw file
1/* 2 * Copyright (C) 2008 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <stdio.h> 18#include <unistd.h> 19 20#define LOG_TAG "eSpeak Engine" 21 22#include <utils/Log.h> 23#include <android_runtime/AndroidRuntime.h> 24#include <speak_lib.h> 25#include <tts/TtsEngine.h> 26 27 28namespace android { 29 30const char * supportedLangIso3[] = { 31"afr", 32"bos", 33"yue", 34"cmn", 35"zho", 36"hrv", 37"ces", 38"nld", 39"eng", 40"epo", 41"fin", 42"fra", 43"deu", 44"ell", 45"hin", 46"hun", 47"isl", 48"ind", 49"ita", 50"kur", 51"lat", 52"mkd", 53"nor", 54"pol", 55"por", 56"ron", 57"rus", 58"srp", 59"slk", 60"spa", 61"swa", 62"swe", 63"tam", 64"tur", 65"vie", 66"cym" 67 }; 68 69 70const char * supportedLang[] = { 71"af", 72"bs", 73"zh-rHK", 74"zh", 75"zh", 76"hr", 77"cz", 78"nl", 79"en", 80"eo", 81"fi", 82"fr", 83"de", 84"el", 85"hi", 86"hu", 87"is", 88"id", 89"it", 90"ku", 91"la", 92"mk", 93"no", 94"pl", 95"pt", 96"ro", 97"ru", 98"sr", 99"sk", 100"es", 101"sw", 102"sv", 103"ta", 104"tu", 105"vi", 106"cy" 107 }; 108 109int languageCount = 36; 110 111// Callback to the TTS API 112synthDoneCB_t* ttsSynthDoneCBPointer; 113 114char* currentLanguage = "en-rUS"; 115char* currentRate = "140"; 116 117char currentLang[10]; 118char currentCountry[10]; 119char currentVariant[10]; 120 121 122/* Functions internal to the eSpeak engine wrapper */ 123static void setSpeechRate(int speechRate) 124{ 125 espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0); 126} 127 128 129/* Functions exposed to the TTS API */ 130 131/* Callback from espeak. Should call back to the TTS API */ 132static int eSpeakCallback(short *wav, int numsamples, 133 espeak_EVENT *events) { 134 int8_t * castedWav = (int8_t *)wav; 135 size_t bufferSize = 0; 136 if (numsamples < 1){ 137 size_t silenceBufferSize = 2; 138 int8_t *silence = new int8_t[silenceBufferSize]; // TODO: This will be a small memory leak, but do it this way for now because passing in an empty buffer can cause a crash. 139 silence[0] = 0; 140 silence[1] = 0; 141 ttsSynthDoneCBPointer(events->user_data, 22050, AudioSystem::PCM_16_BIT, 1, silence, silenceBufferSize, TTS_SYNTH_DONE); 142 return 1; 143 } 144 LOGI("eSpeak callback received! Sample count: %d", numsamples); 145 bufferSize = numsamples * sizeof(short); 146 ttsSynthDoneCBPointer(events->user_data, 22050, AudioSystem::PCM_16_BIT, 1, castedWav, bufferSize, TTS_SYNTH_PENDING); 147 LOGI("eSpeak callback processed!"); 148 return 0; // continue synthesis (1 is to abort) 149} 150 151 152// Initializes the TTS engine and returns whether initialization succeeded 153tts_result TtsEngine::init(synthDoneCB_t synthDoneCBPtr) 154{ 155 // TODO Make sure that the speech data is loaded in 156 // the directory /sdcard/espeak-data before calling this. 157 int sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 158 4096, "/sdcard", 0); 159 160 if (sampleRate <= 0) { 161 LOGI("eSpeak initialization failed!"); 162 return TTS_FAILURE; 163 } 164 espeak_SetSynthCallback(eSpeakCallback); 165 166 int speechRate = 140; 167 espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0); 168 169 espeak_VOICE voice; 170 memset( &voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first 171 const char *langNativeString = "en-us"; //Default to US English 172 voice.languages = langNativeString; 173 voice.variant = 0; 174 err = espeak_SetVoiceByProperties(&voice); 175 176 ttsSynthDoneCBPointer = synthDoneCBPtr; 177 return TTS_SUCCESS; 178} 179 180// Shutsdown the TTS engine 181tts_result TtsEngine::shutdown( void ) 182{ 183 espeak_Terminate(); 184 return TTS_SUCCESS; 185} 186 187 188tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) 189{ 190 return TTS_FAILURE; 191} 192 193// Language will be specified according to the Android conventions for 194// localization as documented here: 195// http://developer.android.com/guide/topics/resources/resources-i18n.html 196// 197// language will be a string of the form "xx" or "xx-rYY", where xx is a 198// two letter ISO 639-1 language code in lowercase and rYY is a two letter ISO 199// 3166-1-alpha-2 language code in uppercase preceded by a lowercase "r". 200// Note that the "-rYY" portion may be omitted if the region is unimportant. 201// 202tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant ) 203{ 204 LOGE("lang input param: %s country input param: %s", lang, country); 205 206 char language[10]; 207 int langIndex = -1; 208 for (int i = 0; i < languageCount; i ++) 209 { 210 if (strcmp(lang, supportedLangIso3[i]) == 0) 211 { 212 langIndex = i; 213 break; 214 } 215 } 216 if (langIndex < 0) 217 { 218 /* The language isn't supported. */ 219 LOGE("TtsEngine::setLanguage called with unsupported language"); 220 return TTS_FAILURE; 221 } 222 223 224 strcpy(currentLang, lang); 225 strcpy(currentCountry, country); 226 227 strcpy(language, supportedLang[langIndex]); 228 229 if (strcmp(language, "en") == 0){ 230 if (strcmp(country, "USA") == 0){ 231 strcpy(language, "en-rUS"); 232 } 233 if (strcmp(country, "GBR") == 0){ 234 strcpy(language, "en-rGB"); 235 } 236 } 237 238 if (strcmp(language, "es") == 0){ 239 if (strcmp(country, "MEX") == 0){ 240 strcpy(language, "es-rMX"); 241 } 242 } 243 244 LOGE("Language: %s", language); 245 246 247 espeak_VOICE voice; 248 memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first 249 voice.variant = 0; 250 char espeakLangStr[6]; 251 if ((strlen(language) != 2) && (strlen(language) != 6)){ 252 LOGI("Error: Invalid language. Language must be in either xx or xx-rYY format."); 253 return TTS_VALUE_INVALID; 254 } 255 if (strcmp(language, "en-rUS") == 0){ 256 strcpy(espeakLangStr, "en-us"); 257 } else if (strcmp(language, "en-rGB") == 0){ 258 strcpy(espeakLangStr, "en-uk"); 259 } else if (strcmp(language, "es-rMX") == 0){ 260 strcpy(espeakLangStr, "es-la"); 261 } else if (strcmp(language, "zh-rHK") == 0){ 262 strcpy(espeakLangStr, "zh"); 263 voice.variant = 5; 264 } else { 265 espeakLangStr[0] = language[0]; 266 espeakLangStr[1] = language[1]; 267 espeakLangStr[2] = 0; 268 // Bail out and do nothing if the language is not supported by eSpeak 269 if ((strcmp(language, "af") != 0) && 270 (strcmp(language, "bs") != 0) && 271 (strcmp(language, "zh") != 0) && 272 (strcmp(language, "hr") != 0) && 273 (strcmp(language, "cz") != 0) && 274 (strcmp(language, "nl") != 0) && 275 (strcmp(language, "en") != 0) && 276 (strcmp(language, "eo") != 0) && 277 (strcmp(language, "fi") != 0) && 278 (strcmp(language, "fr") != 0) && 279 (strcmp(language, "de") != 0) && 280 (strcmp(language, "el") != 0) && 281 (strcmp(language, "hi") != 0) && 282 (strcmp(language, "hu") != 0) && 283 (strcmp(language, "is") != 0) && 284 (strcmp(language, "id") != 0) && 285 (strcmp(language, "it") != 0) && 286 (strcmp(language, "ku") != 0) && 287 (strcmp(language, "la") != 0) && 288 (strcmp(language, "mk") != 0) && 289 (strcmp(language, "no") != 0) && 290 (strcmp(language, "pl") != 0) && 291 (strcmp(language, "pt") != 0) && 292 (strcmp(language, "ro") != 0) && 293 (strcmp(language, "ru") != 0) && 294 (strcmp(language, "sr") != 0) && 295 (strcmp(language, "sk") != 0) && 296 (strcmp(language, "es") != 0) && 297 (strcmp(language, "sw") != 0) && 298 (strcmp(language, "sv") != 0) && 299 (strcmp(language, "ta") != 0) && 300 (strcmp(language, "tr") != 0) && 301 (strcmp(language, "vi") != 0) && 302 (strcmp(language, "cy") != 0) ){ 303 LOGI("Error: Unsupported language."); 304 return TTS_PROPERTY_UNSUPPORTED; 305 } 306 // Use American English as the default English 307 if (strcmp(language, "en") == 0) { 308 strcpy(espeakLangStr, "en-us"); 309 } 310 } 311 voice.languages = espeakLangStr; 312 espeak_ERROR err = espeak_SetVoiceByProperties(&voice); 313 currentLanguage = new char [strlen(language)]; 314 strcpy(currentLanguage, language); 315 return TTS_SUCCESS; 316} 317 318 319tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country, 320 const char *variant) { 321 // TODO: Make this account for data files! 322 for (int i = 0; i < languageCount; i ++) 323 { 324 if (strcmp(lang, supportedLangIso3[i]) == 0) 325 { 326 return TTS_LANG_AVAILABLE; 327 } 328 } 329 return TTS_LANG_NOT_SUPPORTED; 330} 331 332tts_result TtsEngine::getLanguage(char *language, char *country, char *variant) 333{ 334 strcpy(language, currentLang); 335 strcpy(country, currentCountry); 336 strcpy(variant, ""); 337 return TTS_SUCCESS; 338} 339 340 341/** setAudioFormat 342 * sets the audio format to use for synthesis, returns what is actually used. 343 * @encoding - reference to encoding format 344 * @rate - reference to sample rate 345 * @channels - reference to number of channels 346 * return tts_result 347 * */ 348tts_result TtsEngine::setAudioFormat(AudioSystem::audio_format& encoding, uint32_t& rate, 349 int& channels) 350{ 351 // TODO: Fix this! 352 return TTS_SUCCESS; 353} 354 355// Sets the property with the specified value 356// 357// TODO: add pitch property here 358tts_result TtsEngine::setProperty(const char *property, const char *value, const size_t size) 359{ 360 int rate; 361 int pitch; 362 int volume; 363 364 /* Set a specific property for the engine. 365 Supported properties include: language (locale), rate, pitch, volume. */ 366 /* Sanity check */ 367 if (property == NULL) { 368 LOGE("setProperty called with property NULL"); 369 return TTS_PROPERTY_UNSUPPORTED; 370 } 371 372 if (value == NULL) { 373 LOGE("setProperty called with value NULL"); 374 return TTS_VALUE_INVALID; 375 } 376 377 if (strncmp(property, "language", 8) == 0) { 378 // TODO: Fix this 379 return TTS_SUCCESS; 380 } else if (strncmp(property, "rate", 4) == 0) { 381 rate = atoi(value); 382 espeak_SetParameter(espeakRATE, rate, 0); 383 // TODO: Fix this - use the return value here, don't just automatically return success! 384 return TTS_SUCCESS; 385 } else if (strncmp(property, "pitch", 5) == 0) { 386 // TODO: Fix this 387 return TTS_SUCCESS; 388 } else if (strncmp(property, "volume", 6) == 0) { 389 // TODO: Fix this 390 return TTS_SUCCESS; 391 } 392 return TTS_PROPERTY_UNSUPPORTED; 393} 394 395 396// Sets the property with the specified value 397// 398// TODO: add pitch property here 399tts_result TtsEngine::getProperty(const char *property, char *value, size_t *iosize) 400{ 401 /* Get the property for the engine. 402 This property was previously set by setProperty or by default. */ 403 /* sanity check */ 404 if (property == NULL) { 405 LOGE("getProperty called with property NULL"); 406 return TTS_PROPERTY_UNSUPPORTED; 407 } 408 409 if (value == NULL) { 410 LOGE("getProperty called with value NULL"); 411 return TTS_VALUE_INVALID; 412 } 413 414 if (strncmp(property, "language", 8) == 0) { 415 // TODO: Fix this 416 return TTS_SUCCESS; 417 } else if (strncmp(property, "rate", 4) == 0) { 418 // TODO: Fix this 419 return TTS_SUCCESS; 420 } else if (strncmp(property, "pitch", 5) == 0) { 421 // TODO: Fix this 422 return TTS_SUCCESS; 423 } else if (strncmp(property, "volume", 6) == 0) { 424 // TODO: Fix this 425 return TTS_SUCCESS; 426 } 427 428 /* Unknown property */ 429 LOGE("Unsupported property"); 430 return TTS_PROPERTY_UNSUPPORTED; 431} 432 433/** synthesizeText 434 * Synthesizes a text string. 435 * The text string could be annotated with SSML tags. 436 * @text - text to synthesize 437 * @buffer - buffer which will receive generated samples 438 * @bufferSize - size of buffer 439 * @userdata - pointer to user data which will be passed back to callback function 440 * return tts_result 441*/ 442tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata ) 443{ 444 espeak_SetSynthCallback(eSpeakCallback); 445 446 unsigned int unique_identifier; 447 espeak_ERROR err; 448 449 err = espeak_Synth(text, 450 strlen(text), 451 0, // position 452 POS_CHARACTER, 453 0, // end position (0 means no end position) 454 espeakCHARS_UTF8, 455 &unique_identifier, 456 userdata); 457 458 err = espeak_Synchronize(); 459 return TTS_SUCCESS; 460} 461 462// Synthesizes IPA text 463tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t bufferSize, void * userdata ) 464{ 465 // deprecated call 466 return TTS_FAILURE; 467} 468 469 470// Interrupts synthesis 471tts_result TtsEngine::stop() 472{ 473 espeak_Cancel(); 474 return TTS_SUCCESS; 475} 476 477 478 479TtsEngine* getTtsEngine() 480{ 481 return new TtsEngine(); 482} 483 484}; // namespace android