/native/external/espeak/src/speak_lib.h
C++ Header | 599 lines | 186 code | 60 blank | 353 comment | 0 complexity | 6db1f5703123e5bc5a6ffd32e1f304b8 MD5 | raw file
1#ifndef SPEAK_LIB_H 2#define SPEAK_LIB_H 3/*************************************************************************** 4 * Copyright (C) 2005 to 2007 by Jonathan Duddington * 5 * email: jonsd@users.sourceforge.net * 6 * * 7 * This program is free software; you can redistribute it and/or modify * 8 * it under the terms of the GNU General Public License as published by * 9 * the Free Software Foundation; either version 3 of the License, or * 10 * (at your option) any later version. * 11 * * 12 * This program is distributed in the hope that it will be useful, * 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 15 * GNU General Public License for more details. * 16 * * 17 * You should have received a copy of the GNU General Public License * 18 * along with this program; if not, see: * 19 * <http://www.gnu.org/licenses/>. * 20 ***************************************************************************/ 21 22 23/*************************************************************/ 24/* This is the header file for the library version of espeak */ 25/* */ 26/*************************************************************/ 27 28#include <stdio.h> 29#include <wctype.h> 30 31#define ESPEAK_API_REVISION 3 32/* 33Revision 2 34 Added parameter "options" to eSpeakInitialize() 35 36Revision 3 37 Added espeakWORDGAP to espeak_PARAMETER 38 39Revision 4 40 Added flags parameter to espeak_CompileDictionary() 41 42*/ 43 /********************/ 44 /* Initialization */ 45 /********************/ 46 47 48typedef enum { 49 espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list. 50 espeakEVENT_WORD = 1, // Start of word 51 espeakEVENT_SENTENCE, // Start of sentence 52 espeakEVENT_MARK, // Mark 53 espeakEVENT_PLAY, // Audio element 54 espeakEVENT_END, // End of sentence or clause 55 espeakEVENT_MSG_TERMINATED, // End of message 56 espeakEVENT_PHONEME // Phoneme, if enabled in espeak_Initialize() 57} espeak_EVENT_TYPE; 58 59 60 61typedef struct { 62 espeak_EVENT_TYPE type; 63 unsigned int unique_identifier; // message identifier (or 0 for key or character) 64 int text_position; // the number of characters from the start of the text 65 int length; // word length, in characters (for espeakEVENT_WORD) 66 int audio_position; // the time in mS within the generated speech output data 67 int sample; // sample id (internal use) 68 void* user_data; // pointer supplied by the calling program 69 union { 70 int number; // used for WORD and SENTENCE events. For PHONEME events this is the phoneme mnemonic. 71 const char *name; // used for MARK and PLAY events. UTF8 string 72 } id; 73} espeak_EVENT; 74/* 75 When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called. 76 77 78 In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED). 79 80 In PLAYBACK mode, the callback function is called as soon as an event happens. 81 82 For example suppose that the following message is supplied to espeak_Synth: 83 "hello, hello." 84 85 86 * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function : 87 88 ** Block 1: 89 <audio data> + 90 List of events: SENTENCE + WORD + LIST_TERMINATED 91 92 ** Block 2: 93 <audio data> + 94 List of events: WORD + END + LIST_TERMINATED 95 96 ** Block 3: 97 no audio data 98 List of events: MSG_TERMINATED + LIST_TERMINATED 99 100 101 * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function: 102 103 ** SENTENCE 104 ** WORD (call when the sounds are actually played) 105 ** WORD 106 ** END (call when the end of sentence is actually played.) 107 ** MSG_TERMINATED 108 109 110 The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message. 111 So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event. 112 113 A MARK event indicates a <mark> element in the text. 114 A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file. 115*/ 116 117 118 119typedef enum { 120 POS_CHARACTER = 1, 121 POS_WORD, 122 POS_SENTENCE 123} espeak_POSITION_TYPE; 124 125 126typedef enum { 127 /* PLAYBACK mode: plays the audio data, supplies events to the calling program*/ 128 AUDIO_OUTPUT_PLAYBACK, 129 130 /* RETRIEVAL mode: supplies audio data and events to the calling program */ 131 AUDIO_OUTPUT_RETRIEVAL, 132 133 /* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */ 134 AUDIO_OUTPUT_SYNCHRONOUS, 135 136 /* Synchronous playback */ 137 AUDIO_OUTPUT_SYNCH_PLAYBACK 138 139} espeak_AUDIO_OUTPUT; 140 141 142typedef enum { 143 EE_OK=0, 144 EE_INTERNAL_ERROR=-1, 145 EE_BUFFER_FULL=1, 146 EE_NOT_FOUND=2 147} espeak_ERROR; 148 149 150#ifdef __cplusplus 151extern "C" 152#endif 153int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options); 154/* Must be called before any synthesis functions are called. 155 output: the audio data can either be played by eSpeak or passed back by the SynthCallback function. 156 157 buflength: The length in mS of sound buffers passed to the SynthCallback function. 158 159 path: The directory which contains the espeak-data directory, or NULL for the default location. 160 161 options: bit 0: 1=allow espeakEVENT_PHONEME events. 162 163 164 Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR). 165*/ 166 167typedef int (t_espeak_callback)(short*, int, espeak_EVENT*); 168 169#ifdef __cplusplus 170extern "C" 171#endif 172void espeak_SetSynthCallback(t_espeak_callback* SynthCallback); 173/* Must be called before any synthesis functions are called. 174 This specifies a function in the calling program which is called when a buffer of 175 speech sound data has been produced. 176 177 178 The callback function is of the form: 179 180int SynthCallback(short *wav, int numsamples, espeak_EVENT *events); 181 182 wav: is the speech sound data which has been produced. 183 NULL indicates that the synthesis has been completed. 184 185 numsamples: is the number of entries in wav. This number may vary, may be less than 186 the value implied by the buflength parameter given in espeak_Initialize, and may 187 sometimes be zero (which does NOT indicate end of synthesis). 188 189 events: an array of espeak_EVENT items which indicate word and sentence events, and 190 also the occurance if <mark> and <audio> elements within the text. The list of 191 events is terminated by an event of type = 0. 192 193 194 Callback returns: 0=continue synthesis, 1=abort synthesis. 195*/ 196 197#ifdef __cplusplus 198extern "C" 199#endif 200void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*)); 201/* This function may be called before synthesis functions are used, in order to deal with 202 <audio> tags. It specifies a callback function which is called when an <audio> element is 203 encountered and allows the calling program to indicate whether the sound file which 204 is specified in the <audio> element is available and is to be played. 205 206 The callback function is of the form: 207 208int UriCallback(int type, const char *uri, const char *base); 209 210 type: type of callback event. Currently only 1= <audio> element 211 212 uri: the "src" attribute from the <audio> element 213 214 base: the "xml:base" attribute (if any) from the <speak> element 215 216 Return: 1=don't play the sound, but speak the text alternative. 217 0=place a PLAY event in the event list at the point where the <audio> element 218 occurs. The calling program can then play the sound at that point. 219*/ 220 221 222 /********************/ 223 /* Synthesis */ 224 /********************/ 225 226 227#define espeakCHARS_AUTO 0 228#define espeakCHARS_UTF8 1 229#define espeakCHARS_8BIT 2 230#define espeakCHARS_WCHAR 3 231 232#define espeakSSML 0x10 233#define espeakPHONEMES 0x100 234#define espeakENDPAUSE 0x1000 235#define espeakKEEP_NAMEDATA 0x2000 236 237#ifdef __cplusplus 238extern "C" 239#endif 240espeak_ERROR espeak_Synth(const void *text, 241 size_t size, 242 unsigned int position, 243 espeak_POSITION_TYPE position_type, 244 unsigned int end_position, 245 unsigned int flags, 246 unsigned int* unique_identifier, 247 void* user_data); 248/* Synthesize speech for the specified text. The speech sound data is passed to the calling 249 program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak. 250 251 text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters, 252 wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags" 253 parameter. 254 255 size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order 256 to allocate internal storage space for the text. This value is not used for 257 AUDIO_OUTPUT_SYNCHRONOUS mode. 258 259 position: The position in the text where speaking starts. Zero indicates speak from the 260 start of the text. 261 262 position_type: Determines whether "position" is a number of characters, words, or sentences. 263 Values: 264 265 end_position: If set, this gives a character position at which speaking will stop. A value 266 of zero indicates no end position. 267 268 flags: These may be OR'd together: 269 Type of character codes, one of: 270 espeakCHARS_UTF8 UTF8 encoding 271 espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language. 272 espeakCHARS_AUTO 8 bit or UTF8 (this is the default) 273 espeakCHARS_WCHAR Wide characters (wchar_t) 274 275 espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored. 276 277 espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirshenbaum encoding). 278 279 espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then 280 this pause is suppressed. 281 282 unique_identifier: message identifier; helpful for identifying later 283 data supplied to the callback. 284 285 user_data: pointer which will be passed to the callback function. 286 287 Return: EE_OK: operation achieved 288 EE_BUFFER_FULL: the command can not be buffered; 289 you may try after a while to call the function again. 290 EE_INTERNAL_ERROR. 291*/ 292 293#ifdef __cplusplus 294extern "C" 295#endif 296espeak_ERROR espeak_Synth_Mark(const void *text, 297 size_t size, 298 const char *index_mark, 299 unsigned int end_position, 300 unsigned int flags, 301 unsigned int* unique_identifier, 302 void* user_data); 303/* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is 304 specified by the name of a <mark> element in the text. 305 306 index_mark: The "name" attribute of a <mark> element within the text which specified the 307 point at which synthesis starts. UTF8 string. 308 309 For the other parameters, see espeak_Synth() 310 311 Return: EE_OK: operation achieved 312 EE_BUFFER_FULL: the command can not be buffered; 313 you may try after a while to call the function again. 314 EE_INTERNAL_ERROR. 315*/ 316 317#ifdef __cplusplus 318extern "C" 319#endif 320espeak_ERROR espeak_Key(const char *key_name); 321/* Speak the name of a keyboard key. 322 If key_name is a single character, it speaks the name of the character. 323 Otherwise, it speaks key_name as a text string. 324 325 Return: EE_OK: operation achieved 326 EE_BUFFER_FULL: the command can not be buffered; 327 you may try after a while to call the function again. 328 EE_INTERNAL_ERROR. 329*/ 330 331#ifdef __cplusplus 332extern "C" 333#endif 334espeak_ERROR espeak_Char(wchar_t character); 335/* Speak the name of the given character 336 337 Return: EE_OK: operation achieved 338 EE_BUFFER_FULL: the command can not be buffered; 339 you may try after a while to call the function again. 340 EE_INTERNAL_ERROR. 341*/ 342 343 344 345 346 /***********************/ 347 /* Speech Parameters */ 348 /***********************/ 349 350typedef enum { 351 espeakSILENCE=0, /* internal use */ 352 espeakRATE=1, 353 espeakVOLUME=2, 354 espeakPITCH=3, 355 espeakRANGE=4, 356 espeakPUNCTUATION=5, 357 espeakCAPITALS=6, 358 espeakWORDGAP=7, 359 espeakOPTIONS=8, // reserved for misc. options. not yet used 360 espeakINTONATION=9, 361 362 espeakRESERVED1=10, 363 espeakRESERVED2=11, 364 espeakEMPHASIS, /* internal use */ 365 espeakLINELENGTH, /* internal use */ 366 espeakVOICETYPE, // internal, 1=mbrola 367 N_SPEECH_PARAM /* last enum */ 368} espeak_PARAMETER; 369 370typedef enum { 371 espeakPUNCT_NONE=0, 372 espeakPUNCT_ALL=1, 373 espeakPUNCT_SOME=2 374} espeak_PUNCT_TYPE; 375 376#ifdef __cplusplus 377extern "C" 378#endif 379espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative); 380/* Sets the value of the specified parameter. 381 relative=0 Sets the absolute value of the parameter. 382 relative=1 Sets a relative value of the parameter. 383 384 parameter: 385 espeakRATE: speaking speed in word per minute. 386 387 espeakVOLUME: volume in range 0-100 0=silence 388 389 espeakPITCH: base pitch, range 0-100. 50=normal 390 391 espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 392 393 espeakPUNCTUATION: which punctuation characters to announce: 394 value in espeak_PUNCT_TYPE (none, all, some), 395 see espeak_GetParameter() to specify which characters are announced. 396 397 espeakCAPITALS: announce capital letters by: 398 0=none, 399 1=sound icon, 400 2=spelling, 401 3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch 402 of a word raised to indicate it has a capital letter. 403 404 espeakWORDGAP: pause between words, units of 10mS (at the default speed) 405 406 Return: EE_OK: operation achieved 407 EE_BUFFER_FULL: the command can not be buffered; 408 you may try after a while to call the function again. 409 EE_INTERNAL_ERROR. 410*/ 411 412#ifdef __cplusplus 413extern "C" 414#endif 415int espeak_GetParameter(espeak_PARAMETER parameter, int current); 416/* current=0 Returns the default value of the specified parameter. 417 current=1 Returns the current value of the specified parameter, as set by SetParameter() 418*/ 419 420#ifdef __cplusplus 421extern "C" 422#endif 423espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist); 424/* Specified a list of punctuation characters whose names are to be spoken when the 425 value of the Punctuation parameter is set to "some". 426 427 punctlist: A list of character codes, terminated by a zero character. 428 429 Return: EE_OK: operation achieved 430 EE_BUFFER_FULL: the command can not be buffered; 431 you may try after a while to call the function again. 432 EE_INTERNAL_ERROR. 433*/ 434 435#ifdef __cplusplus 436extern "C" 437#endif 438void espeak_SetPhonemeTrace(int value, FILE *stream); 439/* Controls the output of phoneme symbols for the text 440 value=0 No phoneme output (default) 441 value=1 Output the translated phoneme symbols for the text 442 value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries) 443 444 stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout. 445*/ 446 447#ifdef __cplusplus 448extern "C" 449#endif 450void espeak_CompileDictionary(const char *path, FILE *log, int flags); 451/* Compile pronunciation dictionary for a language which corresponds to the currently 452 selected voice. The required voice should be selected before calling this function. 453 454 path: The directory which contains the language's '_rules' and '_list' files. 455 'path' should end with a path separator character ('/'). 456 log: Stream for error reports and statistics information. If log=NULL then stderr will be used. 457 458 flags: Bit 0: include source line information for debug purposes (This is displayed with the 459 -X command line option). 460*/ 461 /***********************/ 462 /* Voice Selection */ 463 /***********************/ 464 465 466// voice table 467typedef struct { 468 const char *name; // a given name for this voice. UTF8 string. 469 const char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier) 470 const char *identifier; // the filename for this voice within espeak-data/voices 471 unsigned char gender; // 0=none 1=male, 2=female, 472 unsigned char age; // 0=not specified, or age in years 473 unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties 474 unsigned char xx1; // for internal use 475 int score; // for internal use 476 void *spare; // for internal use 477} espeak_VOICE; 478 479/* Note: The espeak_VOICE structure is used for two purposes: 480 1. To return the details of the available voices. 481 2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria. 482 483 In (1), the "languages" field consists of a list of (UTF8) language names for which this voice 484 may be used, each language name in the list is terminated by a zero byte and is also preceded by 485 a single byte which gives a "priority" number. The list of languages is terminated by an 486 additional zero byte. 487 488 A language name consists of a language code, optionally followed by one or more qualifier (dialect) 489 names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and 490 "en". Even without "en" listed, voice would still be selected for the "en" language (because 491 "en-uk" is related) but at a lower priority. 492 493 The priority byte indicates how the voice is preferred for the language. A low number indicates a 494 more preferred voice, a higher number indicates a less preferred voice. 495 496 In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding 497 priority byte. 498*/ 499 500#ifdef __cplusplus 501extern "C" 502#endif 503const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec); 504/* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers. 505 The list is terminated by a NULL pointer 506 507 If voice_spec is NULL then all voices are listed. 508 If voice spec is give, then only the voices which are compatible with the voice_spec 509 are listed, and they are listed in preference order. 510*/ 511 512#ifdef __cplusplus 513extern "C" 514#endif 515espeak_ERROR espeak_SetVoiceByName(const char *name); 516/* Searches for a voice with a matching "name" field. Language is not considered. 517 "name" is a UTF8 string. 518 519 Return: EE_OK: operation achieved 520 EE_BUFFER_FULL: the command can not be buffered; 521 you may try after a while to call the function again. 522 EE_INTERNAL_ERROR. 523*/ 524 525#ifdef __cplusplus 526extern "C" 527#endif 528espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec); 529/* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following 530 fields may be set: 531 532 name NULL, or a voice name 533 534 languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en" 535 536 gender 0=not specified, 1=male, 2=female 537 538 age 0=not specified, or an age in years 539 540 variant After a list of candidates is produced, scored and sorted, "variant" is used to index 541 that list and choose a voice. 542 variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc 543*/ 544 545#ifdef __cplusplus 546extern "C" 547#endif 548espeak_VOICE *espeak_GetCurrentVoice(void); 549/* Returns the espeak_VOICE data for the currently selected voice. 550 This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s> 551*/ 552 553#ifdef __cplusplus 554extern "C" 555#endif 556espeak_ERROR espeak_Cancel(void); 557/* Stop immediately synthesis and audio output of the current text. When this 558 function returns, the audio output is fully stopped and the synthesizer is ready to 559 synthesize a new message. 560 561 Return: EE_OK: operation achieved 562 EE_INTERNAL_ERROR. 563*/ 564 565 566#ifdef __cplusplus 567extern "C" 568#endif 569int espeak_IsPlaying(void); 570/* Returns 1 if audio is played, 0 otherwise. 571*/ 572 573#ifdef __cplusplus 574extern "C" 575#endif 576espeak_ERROR espeak_Synchronize(void); 577/* This function returns when all data have been spoken. 578 Return: EE_OK: operation achieved 579 EE_INTERNAL_ERROR. 580*/ 581 582#ifdef __cplusplus 583extern "C" 584#endif 585espeak_ERROR espeak_Terminate(void); 586/* last function to be called. 587 Return: EE_OK: operation achieved 588 EE_INTERNAL_ERROR. 589*/ 590 591 592#ifdef __cplusplus 593extern "C" 594#endif 595const char *espeak_Info(void* ptr); 596/* Returns the version number string. 597 The parameter is for future use, and should be set to NULL 598*/ 599#endif