/native/external/espeak/docs/speak_lib.h
C++ Header | 591 lines | 185 code | 60 blank | 346 comment | 0 complexity | d6ae1d5f835117993932ec1efa625c44 MD5 | raw file
1#ifndef SPEAK_LIB_H 2#define SPEAK_LIB_H 3/*************************************************************************** 4 * Copyright (C) 2005 to 2007 by Jonathan Duddington * 5 * email: jonsd@users.sourceforge.net * 6 * * 7 * This program is free software; you can redistribute it and/or modify * 8 * it under the terms of the GNU General Public License as published by * 9 * the Free Software Foundation; either version 3 of the License, or * 10 * (at your option) any later version. * 11 * * 12 * This program is distributed in the hope that it will be useful, * 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 15 * GNU General Public License for more details. * 16 * * 17 * You should have received a copy of the GNU General Public License * 18 * along with this program; if not, see: * 19 * <http://www.gnu.org/licenses/>. * 20 ***************************************************************************/ 21 22 23/*************************************************************/ 24/* This is the header file for the library version of espeak */ 25/* */ 26/*************************************************************/ 27 28#include <stdio.h> 29 30#define ESPEAK_API_REVISION 3 31/* 32Revision 2 33 Added parameter "options" to eSpeakInitialize() 34 35Revision 3 36 Added espeakWORDGAP to espeak_PARAMETER 37 38*/ 39 /********************/ 40 /* Initialization */ 41 /********************/ 42 43 44typedef enum { 45 espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list. 46 espeakEVENT_WORD = 1, // Start of word 47 espeakEVENT_SENTENCE, // Start of sentence 48 espeakEVENT_MARK, // Mark 49 espeakEVENT_PLAY, // Audio element 50 espeakEVENT_END, // End of sentence 51 espeakEVENT_MSG_TERMINATED, // End of message 52 espeakEVENT_PHONEME // Phoneme, if enabled in espeak_Initialize() 53} espeak_EVENT_TYPE; 54 55 56 57typedef struct { 58 espeak_EVENT_TYPE type; 59 unsigned int unique_identifier; // message identifier (or 0 for key or character) 60 int text_position; // the number of characters from the start of the text 61 int length; // word length, in characters (for espeakEVENT_WORD) 62 int audio_position; // the time in mS within the generated speech output data 63 int sample; // sample id (internal use) 64 void* user_data; // pointer supplied by the calling program 65 union { 66 int number; // used for WORD and SENTENCE events. For PHONEME events this is the phoneme mnemonic. 67 const char *name; // used for MARK and PLAY events. UTF8 string 68 } id; 69} espeak_EVENT; 70/* 71 When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called. 72 73 74 In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED). 75 76 In PLAYBACK mode, the callback function is called as soon as an event happens. 77 78 For example suppose that the following message is supplied to espeak_Synth: 79 "hello, hello." 80 81 82 * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function : 83 84 ** Block 1: 85 <audio data> + 86 List of events: SENTENCE + WORD + LIST_TERMINATED 87 88 ** Block 2: 89 <audio data> + 90 List of events: WORD + END + LIST_TERMINATED 91 92 ** Block 3: 93 no audio data 94 List of events: MSG_TERMINATED + LIST_TERMINATED 95 96 97 * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function: 98 99 ** SENTENCE 100 ** WORD (call when the sounds are actually played) 101 ** WORD 102 ** END (call when the end of sentence is actually played.) 103 ** MSG_TERMINATED 104 105 106 The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message. 107 So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event. 108 109 A MARK event indicates a <mark> element in the text. 110 A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file. 111*/ 112 113 114 115typedef enum { 116 POS_CHARACTER = 1, 117 POS_WORD, 118 POS_SENTENCE 119} espeak_POSITION_TYPE; 120 121 122typedef enum { 123 /* PLAYBACK mode: plays the audio data, supplies events to the calling program*/ 124 AUDIO_OUTPUT_PLAYBACK, 125 126 /* RETRIEVAL mode: supplies audio data and events to the calling program */ 127 AUDIO_OUTPUT_RETRIEVAL, 128 129 /* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */ 130 AUDIO_OUTPUT_SYNCHRONOUS, 131 132 /* Synchronous playback */ 133 AUDIO_OUTPUT_SYNCH_PLAYBACK 134 135} espeak_AUDIO_OUTPUT; 136 137 138typedef enum { 139 EE_OK=0, 140 EE_INTERNAL_ERROR=-1, 141 EE_BUFFER_FULL=1, 142 EE_NOT_FOUND=2 143} espeak_ERROR; 144 145 146#ifdef __cplusplus 147extern "C" 148#endif 149int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options); 150/* Must be called before any synthesis functions are called. 151 output: the audio data can either be played by eSpeak or passed back by the SynthCallback function. 152 153 buflength: The length in mS of sound buffers passed to the SynthCallback function. 154 155 path: The directory which contains the espeak-data directory, or NULL for the default location. 156 157 options: bit 0: 1=allow espeakEVENT_PHONEME events. 158 159 160 Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR). 161*/ 162 163typedef int (t_espeak_callback)(short*, int, espeak_EVENT*); 164 165#ifdef __cplusplus 166extern "C" 167#endif 168void espeak_SetSynthCallback(t_espeak_callback* SynthCallback); 169/* Must be called before any synthesis functions are called. 170 This specifies a function in the calling program which is called when a buffer of 171 speech sound data has been produced. 172 173 174 The callback function is of the form: 175 176int SynthCallback(short *wav, int numsamples, espeak_EVENT *events); 177 178 wav: is the speech sound data which has been produced. 179 NULL indicates that the synthesis has been completed. 180 181 numsamples: is the number of entries in wav. This number may vary, may be less than 182 the value implied by the buflength parameter given in espeak_Initialize, and may 183 sometimes be zero (which does NOT indicate end of synthesis). 184 185 events: an array of espeak_EVENT items which indicate word and sentence events, and 186 also the occurance if <mark> and <audio> elements within the text. The list of 187 events is terminated by an event of type = 0. 188 189 190 Callback returns: 0=continue synthesis, 1=abort synthesis. 191*/ 192 193#ifdef __cplusplus 194extern "C" 195#endif 196void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*)); 197/* This function may be called before synthesis functions are used, in order to deal with 198 <audio> tags. It specifies a callback function which is called when an <audio> element is 199 encountered and allows the calling program to indicate whether the sound file which 200 is specified in the <audio> element is available and is to be played. 201 202 The callback function is of the form: 203 204int UriCallback(int type, const char *uri, const char *base); 205 206 type: type of callback event. Currently only 1= <audio> element 207 208 uri: the "src" attribute from the <audio> element 209 210 base: the "xml:base" attribute (if any) from the <speak> element 211 212 Return: 1=don't play the sound, but speak the text alternative. 213 0=place a PLAY event in the event list at the point where the <audio> element 214 occurs. The calling program can then play the sound at that point. 215*/ 216 217 218 /********************/ 219 /* Synthesis */ 220 /********************/ 221 222 223#define espeakCHARS_AUTO 0 224#define espeakCHARS_UTF8 1 225#define espeakCHARS_8BIT 2 226#define espeakCHARS_WCHAR 3 227 228#define espeakSSML 0x10 229#define espeakPHONEMES 0x100 230#define espeakENDPAUSE 0x1000 231#define espeakKEEP_NAMEDATA 0x2000 232 233#ifdef __cplusplus 234extern "C" 235#endif 236espeak_ERROR espeak_Synth(const void *text, 237 size_t size, 238 unsigned int position, 239 espeak_POSITION_TYPE position_type, 240 unsigned int end_position, 241 unsigned int flags, 242 unsigned int* unique_identifier, 243 void* user_data); 244/* Synthesize speech for the specified text. The speech sound data is passed to the calling 245 program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak. 246 247 text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters, 248 wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags" 249 parameter. 250 251 size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order 252 to allocate internal storage space for the text. This value is not used for 253 AUDIO_OUTPUT_SYNCHRONOUS mode. 254 255 position: The position in the text where speaking starts. Zero indicates speak from the 256 start of the text. 257 258 position_type: Determines whether "position" is a number of characters, words, or sentences. 259 Values: 260 261 end_position: If set, this gives a character position at which speaking will stop. A value 262 of zero indicates no end position. 263 264 flags: These may be OR'd together: 265 Type of character codes, one of: 266 espeakCHARS_UTF8 UTF8 encoding 267 espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language. 268 espeakCHARS_AUTO 8 bit or UTF8 (this is the default) 269 espeakCHARS_WCHAR Wide characters (wchar_t) 270 271 espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored. 272 273 espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirshenbaum encoding). 274 275 espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then 276 this pause is suppressed. 277 278 unique_identifier: message identifier; helpful for identifying later 279 data supplied to the callback. 280 281 user_data: pointer which will be passed to the callback function. 282 283 Return: EE_OK: operation achieved 284 EE_BUFFER_FULL: the command can not be buffered; 285 you may try after a while to call the function again. 286 EE_INTERNAL_ERROR. 287*/ 288 289#ifdef __cplusplus 290extern "C" 291#endif 292espeak_ERROR espeak_Synth_Mark(const void *text, 293 size_t size, 294 const char *index_mark, 295 unsigned int end_position, 296 unsigned int flags, 297 unsigned int* unique_identifier, 298 void* user_data); 299/* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is 300 specified by the name of a <mark> element in the text. 301 302 index_mark: The "name" attribute of a <mark> element within the text which specified the 303 point at which synthesis starts. UTF8 string. 304 305 For the other parameters, see espeak_Synth() 306 307 Return: EE_OK: operation achieved 308 EE_BUFFER_FULL: the command can not be buffered; 309 you may try after a while to call the function again. 310 EE_INTERNAL_ERROR. 311*/ 312 313#ifdef __cplusplus 314extern "C" 315#endif 316espeak_ERROR espeak_Key(const char *key_name); 317/* Speak the name of a keyboard key. 318 Currently this just speaks the "key_name" as given 319 320 Return: EE_OK: operation achieved 321 EE_BUFFER_FULL: the command can not be buffered; 322 you may try after a while to call the function again. 323 EE_INTERNAL_ERROR. 324*/ 325 326#ifdef __cplusplus 327extern "C" 328#endif 329espeak_ERROR espeak_Char(wchar_t character); 330/* Speak the name of the given character 331 332 Return: EE_OK: operation achieved 333 EE_BUFFER_FULL: the command can not be buffered; 334 you may try after a while to call the function again. 335 EE_INTERNAL_ERROR. 336*/ 337 338 339 340 341 /***********************/ 342 /* Speech Parameters */ 343 /***********************/ 344 345typedef enum { 346 espeakSILENCE=0, /* internal use */ 347 espeakRATE=1, 348 espeakVOLUME=2, 349 espeakPITCH=3, 350 espeakRANGE=4, 351 espeakPUNCTUATION=5, 352 espeakCAPITALS=6, 353 espeakWORDGAP=7, 354 espeakOPTIONS=8, // reserved for misc. options. not yet used 355 espeakINTONATION=9, 356 357 espeakRESERVED1=10, 358 espeakRESERVED2=11, 359 espeakEMPHASIS, /* internal use */ 360 espeakLINELENGTH, /* internal use */ 361 espeakVOICETYPE, // internal, 1=mbrola 362 N_SPEECH_PARAM /* last enum */ 363} espeak_PARAMETER; 364 365typedef enum { 366 espeakPUNCT_NONE=0, 367 espeakPUNCT_ALL=1, 368 espeakPUNCT_SOME=2 369} espeak_PUNCT_TYPE; 370 371#ifdef __cplusplus 372extern "C" 373#endif 374espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative); 375/* Sets the value of the specified parameter. 376 relative=0 Sets the absolute value of the parameter. 377 relative=1 Sets a relative value of the parameter. 378 379 parameter: 380 espeakRATE: speaking speed in word per minute. 381 382 espeakVOLUME: volume in range 0-100 0=silence 383 384 espeakPITCH: base pitch, range 0-100. 50=normal 385 386 espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 387 388 espeakPUNCTUATION: which punctuation characters to announce: 389 value in espeak_PUNCT_TYPE (none, all, some), 390 see espeak_GetParameter() to specify which characters are announced. 391 392 espeakCAPITALS: announce capital letters by: 393 0=none, 394 1=sound icon, 395 2=spelling, 396 3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch 397 of a word raised to indicate it has a capital letter. 398 399 espeakWORDGAP: pause between words, units of 10mS (at the default speed) 400 401 Return: EE_OK: operation achieved 402 EE_BUFFER_FULL: the command can not be buffered; 403 you may try after a while to call the function again. 404 EE_INTERNAL_ERROR. 405*/ 406 407#ifdef __cplusplus 408extern "C" 409#endif 410int espeak_GetParameter(espeak_PARAMETER parameter, int current); 411/* current=0 Returns the default value of the specified parameter. 412 current=1 Returns the current value of the specified parameter, as set by SetParameter() 413*/ 414 415#ifdef __cplusplus 416extern "C" 417#endif 418espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist); 419/* Specified a list of punctuation characters whose names are to be spoken when the 420 value of the Punctuation parameter is set to "some". 421 422 punctlist: A list of character codes, terminated by a zero character. 423 424 Return: EE_OK: operation achieved 425 EE_BUFFER_FULL: the command can not be buffered; 426 you may try after a while to call the function again. 427 EE_INTERNAL_ERROR. 428*/ 429 430#ifdef __cplusplus 431extern "C" 432#endif 433void espeak_SetPhonemeTrace(int value, FILE *stream); 434/* Controls the output of phoneme symbols for the text 435 value=0 No phoneme output (default) 436 value=1 Output the translated phoneme symbols for the text 437 value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries) 438 439 stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout. 440*/ 441 442#ifdef __cplusplus 443extern "C" 444#endif 445void espeak_CompileDictionary(const char *path, FILE *log); 446/* Compile pronunciation dictionary for a language which corresponds to the currently 447 selected voice. The required voice should be selected before calling this function. 448 449 path: The directory which contains the language's '_rules' and '_list' files. 450 'path' should end with a path separator character ('/'). 451 log: Stream for error reports and statistics information. If log=NULL then stderr will be used. 452*/ 453 /***********************/ 454 /* Voice Selection */ 455 /***********************/ 456 457 458// voice table 459typedef struct { 460 const char *name; // a given name for this voice. UTF8 string. 461 const char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier) 462 const char *identifier; // the filename for this voice within espeak-data/voices 463 unsigned char gender; // 0=none 1=male, 2=female, 464 unsigned char age; // 0=not specified, or age in years 465 unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties 466 unsigned char xx1; // for internal use 467 int score; // for internal use 468 void *spare; // for internal use 469} espeak_VOICE; 470 471/* Note: The espeak_VOICE structure is used for two purposes: 472 1. To return the details of the available voices. 473 2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria. 474 475 In (1), the "languages" field consists of a list of (UTF8) language names for which this voice 476 may be used, each language name in the list is terminated by a zero byte and is also preceded by 477 a single byte which gives a "priority" number. The list of languages is terminated by an 478 additional zero byte. 479 480 A language name consists of a language code, optionally followed by one or more qualifier (dialect) 481 names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and 482 "en". Even without "en" listed, voice would still be selected for the "en" language (because 483 "en-uk" is related) but at a lower priority. 484 485 The priority byte indicates how the voice is preferred for the language. A low number indicates a 486 more preferred voice, a higher number indicates a less preferred voice. 487 488 In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding 489 priority byte. 490*/ 491 492#ifdef __cplusplus 493extern "C" 494#endif 495const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec); 496/* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers. 497 The list is terminated by a NULL pointer 498 499 If voice_spec is NULL then all voices are listed. 500 If voice spec is give, then only the voices which are compatible with the voice_spec 501 are listed, and they are listed in preference order. 502*/ 503 504#ifdef __cplusplus 505extern "C" 506#endif 507espeak_ERROR espeak_SetVoiceByName(const char *name); 508/* Searches for a voice with a matching "name" field. Language is not considered. 509 "name" is a UTF8 string. 510 511 Return: EE_OK: operation achieved 512 EE_BUFFER_FULL: the command can not be buffered; 513 you may try after a while to call the function again. 514 EE_INTERNAL_ERROR. 515*/ 516 517#ifdef __cplusplus 518extern "C" 519#endif 520espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec); 521/* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following 522 fields may be set: 523 524 name NULL, or a voice name 525 526 languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en" 527 528 gender 0=not specified, 1=male, 2=female 529 530 age 0=not specified, or an age in years 531 532 variant After a list of candidates is produced, scored and sorted, "variant" is used to index 533 that list and choose a voice. 534 variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc 535*/ 536 537#ifdef __cplusplus 538extern "C" 539#endif 540espeak_VOICE *espeak_GetCurrentVoice(void); 541/* Returns the espeak_VOICE data for the currently selected voice. 542 This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s> 543*/ 544 545#ifdef __cplusplus 546extern "C" 547#endif 548espeak_ERROR espeak_Cancel(void); 549/* Stop immediately synthesis and audio output of the current text. When this 550 function returns, the audio output is fully stopped and the synthesizer is ready to 551 synthesize a new message. 552 553 Return: EE_OK: operation achieved 554 EE_INTERNAL_ERROR. 555*/ 556 557 558#ifdef __cplusplus 559extern "C" 560#endif 561int espeak_IsPlaying(void); 562/* Returns 1 if audio is played, 0 otherwise. 563*/ 564 565#ifdef __cplusplus 566extern "C" 567#endif 568espeak_ERROR espeak_Synchronize(void); 569/* This function returns when all data have been spoken. 570 Return: EE_OK: operation achieved 571 EE_INTERNAL_ERROR. 572*/ 573 574#ifdef __cplusplus 575extern "C" 576#endif 577espeak_ERROR espeak_Terminate(void); 578/* last function to be called. 579 Return: EE_OK: operation achieved 580 EE_INTERNAL_ERROR. 581*/ 582 583 584#ifdef __cplusplus 585extern "C" 586#endif 587const char *espeak_Info(void* ptr); 588/* Returns the version number string. 589 The parameter is for future use, and should be set to NULL 590*/ 591#endif