/native/frameworks/espeakengine/jni/com_google_espeakengine.cpp

http://eyes-free.googlecode.com/ · C++ · 484 lines · 346 code · 61 blank · 77 comment · 135 complexity · d789e59f418467c930feacdd3ae758fd MD5 · raw file

  1. /*
  2. * Copyright (C) 2008 Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <stdio.h>
  17. #include <unistd.h>
  18. #define LOG_TAG "eSpeak Engine"
  19. #include <utils/Log.h>
  20. #include <android_runtime/AndroidRuntime.h>
  21. #include <speak_lib.h>
  22. #include <tts/TtsEngine.h>
  23. namespace android {
  24. const char * supportedLangIso3[] = {
  25. "afr",
  26. "bos",
  27. "yue",
  28. "cmn",
  29. "zho",
  30. "hrv",
  31. "ces",
  32. "nld",
  33. "eng",
  34. "epo",
  35. "fin",
  36. "fra",
  37. "deu",
  38. "ell",
  39. "hin",
  40. "hun",
  41. "isl",
  42. "ind",
  43. "ita",
  44. "kur",
  45. "lat",
  46. "mkd",
  47. "nor",
  48. "pol",
  49. "por",
  50. "ron",
  51. "rus",
  52. "srp",
  53. "slk",
  54. "spa",
  55. "swa",
  56. "swe",
  57. "tam",
  58. "tur",
  59. "vie",
  60. "cym"
  61. };
  62. const char * supportedLang[] = {
  63. "af",
  64. "bs",
  65. "zh-rHK",
  66. "zh",
  67. "zh",
  68. "hr",
  69. "cz",
  70. "nl",
  71. "en",
  72. "eo",
  73. "fi",
  74. "fr",
  75. "de",
  76. "el",
  77. "hi",
  78. "hu",
  79. "is",
  80. "id",
  81. "it",
  82. "ku",
  83. "la",
  84. "mk",
  85. "no",
  86. "pl",
  87. "pt",
  88. "ro",
  89. "ru",
  90. "sr",
  91. "sk",
  92. "es",
  93. "sw",
  94. "sv",
  95. "ta",
  96. "tu",
  97. "vi",
  98. "cy"
  99. };
  100. int languageCount = 36;
  101. // Callback to the TTS API
  102. synthDoneCB_t* ttsSynthDoneCBPointer;
  103. char* currentLanguage = "en-rUS";
  104. char* currentRate = "140";
  105. char currentLang[10];
  106. char currentCountry[10];
  107. char currentVariant[10];
  108. /* Functions internal to the eSpeak engine wrapper */
  109. static void setSpeechRate(int speechRate)
  110. {
  111. espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0);
  112. }
  113. /* Functions exposed to the TTS API */
  114. /* Callback from espeak. Should call back to the TTS API */
  115. static int eSpeakCallback(short *wav, int numsamples,
  116. espeak_EVENT *events) {
  117. int8_t * castedWav = (int8_t *)wav;
  118. size_t bufferSize = 0;
  119. if (numsamples < 1){
  120. size_t silenceBufferSize = 2;
  121. int8_t *silence = new int8_t[silenceBufferSize]; // TODO: This will be a small memory leak, but do it this way for now because passing in an empty buffer can cause a crash.
  122. silence[0] = 0;
  123. silence[1] = 0;
  124. ttsSynthDoneCBPointer(events->user_data, 22050, AudioSystem::PCM_16_BIT, 1, silence, silenceBufferSize, TTS_SYNTH_DONE);
  125. return 1;
  126. }
  127. LOGI("eSpeak callback received! Sample count: %d", numsamples);
  128. bufferSize = numsamples * sizeof(short);
  129. ttsSynthDoneCBPointer(events->user_data, 22050, AudioSystem::PCM_16_BIT, 1, castedWav, bufferSize, TTS_SYNTH_PENDING);
  130. LOGI("eSpeak callback processed!");
  131. return 0; // continue synthesis (1 is to abort)
  132. }
  133. // Initializes the TTS engine and returns whether initialization succeeded
  134. tts_result TtsEngine::init(synthDoneCB_t synthDoneCBPtr)
  135. {
  136. // TODO Make sure that the speech data is loaded in
  137. // the directory /sdcard/espeak-data before calling this.
  138. int sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,
  139. 4096, "/sdcard", 0);
  140. if (sampleRate <= 0) {
  141. LOGI("eSpeak initialization failed!");
  142. return TTS_FAILURE;
  143. }
  144. espeak_SetSynthCallback(eSpeakCallback);
  145. int speechRate = 140;
  146. espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0);
  147. espeak_VOICE voice;
  148. memset( &voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
  149. const char *langNativeString = "en-us"; //Default to US English
  150. voice.languages = langNativeString;
  151. voice.variant = 0;
  152. err = espeak_SetVoiceByProperties(&voice);
  153. ttsSynthDoneCBPointer = synthDoneCBPtr;
  154. return TTS_SUCCESS;
  155. }
  156. // Shutsdown the TTS engine
  157. tts_result TtsEngine::shutdown( void )
  158. {
  159. espeak_Terminate();
  160. return TTS_SUCCESS;
  161. }
  162. tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant)
  163. {
  164. return TTS_FAILURE;
  165. }
  166. // Language will be specified according to the Android conventions for
  167. // localization as documented here:
  168. // http://developer.android.com/guide/topics/resources/resources-i18n.html
  169. //
  170. // language will be a string of the form "xx" or "xx-rYY", where xx is a
  171. // two letter ISO 639-1 language code in lowercase and rYY is a two letter ISO
  172. // 3166-1-alpha-2 language code in uppercase preceded by a lowercase "r".
  173. // Note that the "-rYY" portion may be omitted if the region is unimportant.
  174. //
  175. tts_result TtsEngine::setLanguage( const char * lang, const char * country, const char * variant )
  176. {
  177. LOGE("lang input param: %s country input param: %s", lang, country);
  178. char language[10];
  179. int langIndex = -1;
  180. for (int i = 0; i < languageCount; i ++)
  181. {
  182. if (strcmp(lang, supportedLangIso3[i]) == 0)
  183. {
  184. langIndex = i;
  185. break;
  186. }
  187. }
  188. if (langIndex < 0)
  189. {
  190. /* The language isn't supported. */
  191. LOGE("TtsEngine::setLanguage called with unsupported language");
  192. return TTS_FAILURE;
  193. }
  194. strcpy(currentLang, lang);
  195. strcpy(currentCountry, country);
  196. strcpy(language, supportedLang[langIndex]);
  197. if (strcmp(language, "en") == 0){
  198. if (strcmp(country, "USA") == 0){
  199. strcpy(language, "en-rUS");
  200. }
  201. if (strcmp(country, "GBR") == 0){
  202. strcpy(language, "en-rGB");
  203. }
  204. }
  205. if (strcmp(language, "es") == 0){
  206. if (strcmp(country, "MEX") == 0){
  207. strcpy(language, "es-rMX");
  208. }
  209. }
  210. LOGE("Language: %s", language);
  211. espeak_VOICE voice;
  212. memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
  213. voice.variant = 0;
  214. char espeakLangStr[6];
  215. if ((strlen(language) != 2) && (strlen(language) != 6)){
  216. LOGI("Error: Invalid language. Language must be in either xx or xx-rYY format.");
  217. return TTS_VALUE_INVALID;
  218. }
  219. if (strcmp(language, "en-rUS") == 0){
  220. strcpy(espeakLangStr, "en-us");
  221. } else if (strcmp(language, "en-rGB") == 0){
  222. strcpy(espeakLangStr, "en-uk");
  223. } else if (strcmp(language, "es-rMX") == 0){
  224. strcpy(espeakLangStr, "es-la");
  225. } else if (strcmp(language, "zh-rHK") == 0){
  226. strcpy(espeakLangStr, "zh");
  227. voice.variant = 5;
  228. } else {
  229. espeakLangStr[0] = language[0];
  230. espeakLangStr[1] = language[1];
  231. espeakLangStr[2] = 0;
  232. // Bail out and do nothing if the language is not supported by eSpeak
  233. if ((strcmp(language, "af") != 0) &&
  234. (strcmp(language, "bs") != 0) &&
  235. (strcmp(language, "zh") != 0) &&
  236. (strcmp(language, "hr") != 0) &&
  237. (strcmp(language, "cz") != 0) &&
  238. (strcmp(language, "nl") != 0) &&
  239. (strcmp(language, "en") != 0) &&
  240. (strcmp(language, "eo") != 0) &&
  241. (strcmp(language, "fi") != 0) &&
  242. (strcmp(language, "fr") != 0) &&
  243. (strcmp(language, "de") != 0) &&
  244. (strcmp(language, "el") != 0) &&
  245. (strcmp(language, "hi") != 0) &&
  246. (strcmp(language, "hu") != 0) &&
  247. (strcmp(language, "is") != 0) &&
  248. (strcmp(language, "id") != 0) &&
  249. (strcmp(language, "it") != 0) &&
  250. (strcmp(language, "ku") != 0) &&
  251. (strcmp(language, "la") != 0) &&
  252. (strcmp(language, "mk") != 0) &&
  253. (strcmp(language, "no") != 0) &&
  254. (strcmp(language, "pl") != 0) &&
  255. (strcmp(language, "pt") != 0) &&
  256. (strcmp(language, "ro") != 0) &&
  257. (strcmp(language, "ru") != 0) &&
  258. (strcmp(language, "sr") != 0) &&
  259. (strcmp(language, "sk") != 0) &&
  260. (strcmp(language, "es") != 0) &&
  261. (strcmp(language, "sw") != 0) &&
  262. (strcmp(language, "sv") != 0) &&
  263. (strcmp(language, "ta") != 0) &&
  264. (strcmp(language, "tr") != 0) &&
  265. (strcmp(language, "vi") != 0) &&
  266. (strcmp(language, "cy") != 0) ){
  267. LOGI("Error: Unsupported language.");
  268. return TTS_PROPERTY_UNSUPPORTED;
  269. }
  270. // Use American English as the default English
  271. if (strcmp(language, "en") == 0) {
  272. strcpy(espeakLangStr, "en-us");
  273. }
  274. }
  275. voice.languages = espeakLangStr;
  276. espeak_ERROR err = espeak_SetVoiceByProperties(&voice);
  277. currentLanguage = new char [strlen(language)];
  278. strcpy(currentLanguage, language);
  279. return TTS_SUCCESS;
  280. }
  281. tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country,
  282. const char *variant) {
  283. // TODO: Make this account for data files!
  284. for (int i = 0; i < languageCount; i ++)
  285. {
  286. if (strcmp(lang, supportedLangIso3[i]) == 0)
  287. {
  288. return TTS_LANG_AVAILABLE;
  289. }
  290. }
  291. return TTS_LANG_NOT_SUPPORTED;
  292. }
  293. tts_result TtsEngine::getLanguage(char *language, char *country, char *variant)
  294. {
  295. strcpy(language, currentLang);
  296. strcpy(country, currentCountry);
  297. strcpy(variant, "");
  298. return TTS_SUCCESS;
  299. }
  300. /** setAudioFormat
  301. * sets the audio format to use for synthesis, returns what is actually used.
  302. * @encoding - reference to encoding format
  303. * @rate - reference to sample rate
  304. * @channels - reference to number of channels
  305. * return tts_result
  306. * */
  307. tts_result TtsEngine::setAudioFormat(AudioSystem::audio_format& encoding, uint32_t& rate,
  308. int& channels)
  309. {
  310. // TODO: Fix this!
  311. return TTS_SUCCESS;
  312. }
  313. // Sets the property with the specified value
  314. //
  315. // TODO: add pitch property here
  316. tts_result TtsEngine::setProperty(const char *property, const char *value, const size_t size)
  317. {
  318. int rate;
  319. int pitch;
  320. int volume;
  321. /* Set a specific property for the engine.
  322. Supported properties include: language (locale), rate, pitch, volume. */
  323. /* Sanity check */
  324. if (property == NULL) {
  325. LOGE("setProperty called with property NULL");
  326. return TTS_PROPERTY_UNSUPPORTED;
  327. }
  328. if (value == NULL) {
  329. LOGE("setProperty called with value NULL");
  330. return TTS_VALUE_INVALID;
  331. }
  332. if (strncmp(property, "language", 8) == 0) {
  333. // TODO: Fix this
  334. return TTS_SUCCESS;
  335. } else if (strncmp(property, "rate", 4) == 0) {
  336. rate = atoi(value);
  337. espeak_SetParameter(espeakRATE, rate, 0);
  338. // TODO: Fix this - use the return value here, don't just automatically return success!
  339. return TTS_SUCCESS;
  340. } else if (strncmp(property, "pitch", 5) == 0) {
  341. // TODO: Fix this
  342. return TTS_SUCCESS;
  343. } else if (strncmp(property, "volume", 6) == 0) {
  344. // TODO: Fix this
  345. return TTS_SUCCESS;
  346. }
  347. return TTS_PROPERTY_UNSUPPORTED;
  348. }
  349. // Sets the property with the specified value
  350. //
  351. // TODO: add pitch property here
  352. tts_result TtsEngine::getProperty(const char *property, char *value, size_t *iosize)
  353. {
  354. /* Get the property for the engine.
  355. This property was previously set by setProperty or by default. */
  356. /* sanity check */
  357. if (property == NULL) {
  358. LOGE("getProperty called with property NULL");
  359. return TTS_PROPERTY_UNSUPPORTED;
  360. }
  361. if (value == NULL) {
  362. LOGE("getProperty called with value NULL");
  363. return TTS_VALUE_INVALID;
  364. }
  365. if (strncmp(property, "language", 8) == 0) {
  366. // TODO: Fix this
  367. return TTS_SUCCESS;
  368. } else if (strncmp(property, "rate", 4) == 0) {
  369. // TODO: Fix this
  370. return TTS_SUCCESS;
  371. } else if (strncmp(property, "pitch", 5) == 0) {
  372. // TODO: Fix this
  373. return TTS_SUCCESS;
  374. } else if (strncmp(property, "volume", 6) == 0) {
  375. // TODO: Fix this
  376. return TTS_SUCCESS;
  377. }
  378. /* Unknown property */
  379. LOGE("Unsupported property");
  380. return TTS_PROPERTY_UNSUPPORTED;
  381. }
  382. /** synthesizeText
  383. * Synthesizes a text string.
  384. * The text string could be annotated with SSML tags.
  385. * @text - text to synthesize
  386. * @buffer - buffer which will receive generated samples
  387. * @bufferSize - size of buffer
  388. * @userdata - pointer to user data which will be passed back to callback function
  389. * return tts_result
  390. */
  391. tts_result TtsEngine::synthesizeText( const char * text, int8_t * buffer, size_t bufferSize, void * userdata )
  392. {
  393. espeak_SetSynthCallback(eSpeakCallback);
  394. unsigned int unique_identifier;
  395. espeak_ERROR err;
  396. err = espeak_Synth(text,
  397. strlen(text),
  398. 0, // position
  399. POS_CHARACTER,
  400. 0, // end position (0 means no end position)
  401. espeakCHARS_UTF8,
  402. &unique_identifier,
  403. userdata);
  404. err = espeak_Synchronize();
  405. return TTS_SUCCESS;
  406. }
  407. // Synthesizes IPA text
  408. tts_result TtsEngine::synthesizeIpa( const char * ipa, int8_t * buffer, size_t bufferSize, void * userdata )
  409. {
  410. // deprecated call
  411. return TTS_FAILURE;
  412. }
  413. // Interrupts synthesis
  414. tts_result TtsEngine::stop()
  415. {
  416. espeak_Cancel();
  417. return TTS_SUCCESS;
  418. }
  419. TtsEngine* getTtsEngine()
  420. {
  421. return new TtsEngine();
  422. }
  423. }; // namespace android