/native/external/espeak/platforms/windows/windows_dll/src/speak_lib.h
C++ Header | 593 lines | 186 code | 60 blank | 347 comment | 0 complexity | ad1c3fd58ab00ceb879a2bd1dae5c8c9 MD5 | raw file
1#ifndef SPEAK_LIB_H 2#define SPEAK_LIB_H 3/*************************************************************************** 4 * Copyright (C) 2005 to 2007 by Jonathan Duddington * 5 * email: jonsd@users.sourceforge.net * 6 * * 7 * This program is free software; you can redistribute it and/or modify * 8 * it under the terms of the GNU General Public License as published by * 9 * the Free Software Foundation; either version 3 of the License, or * 10 * (at your option) any later version. * 11 * * 12 * This program is distributed in the hope that it will be useful, * 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 15 * GNU General Public License for more details. * 16 * * 17 * You should have received a copy of the GNU General Public License * 18 * along with this program; if not, see: * 19 * <http://www.gnu.org/licenses/>. * 20 ***************************************************************************/ 21 22 23/*************************************************************/ 24/* This is the header file for the library version of espeak */ 25/* */ 26/*************************************************************/ 27#define ESPEAK_API __declspec(dllexport) 28 29#include <stdio.h> 30 31#define ESPEAK_API_REVISION 3 32/* 33Revision 2 34 Added parameter "options" to eSpeakInitialize() 35 36Revision 3 37 Added espeakWORDGAP to espeak_PARAMETER 38 39*/ 40 /********************/ 41 /* Initialization */ 42 /********************/ 43 44 45typedef enum { 46 espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list. 47 espeakEVENT_WORD = 1, // Start of word 48 espeakEVENT_SENTENCE, // Start of sentence 49 espeakEVENT_MARK, // Mark 50 espeakEVENT_PLAY, // Audio element 51 espeakEVENT_END, // End of sentence 52 espeakEVENT_MSG_TERMINATED, // End of message 53 espeakEVENT_PHONEME // Phoneme, if enabled in espeak_Initialize() 54} espeak_EVENT_TYPE; 55 56 57 58typedef struct { 59 espeak_EVENT_TYPE type; 60 unsigned int unique_identifier; // message identifier (or 0 for key or character) 61 int text_position; // the number of characters from the start of the text 62 int length; // word length, in characters (for espeakEVENT_WORD) 63 int audio_position; // the time in mS within the generated speech output data 64 int sample; // sample id (internal use) 65 void* user_data; // pointer supplied by the calling program 66 union { 67 int number; // used for WORD and SENTENCE events. For PHONEME events this is the phoneme mnemonic. 68 const char *name; // used for MARK and PLAY events. UTF8 string 69 } id; 70} espeak_EVENT; 71/* 72 When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called. 73 74 75 In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED). 76 77 In PLAYBACK mode, the callback function is called as soon as an event happens. 78 79 For example suppose that the following message is supplied to espeak_Synth: 80 "hello, hello." 81 82 83 * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function : 84 85 ** Block 1: 86 <audio data> + 87 List of events: SENTENCE + WORD + LIST_TERMINATED 88 89 ** Block 2: 90 <audio data> + 91 List of events: WORD + END + LIST_TERMINATED 92 93 ** Block 3: 94 no audio data 95 List of events: MSG_TERMINATED + LIST_TERMINATED 96 97 98 * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function: 99 100 ** SENTENCE 101 ** WORD (call when the sounds are actually played) 102 ** WORD 103 ** END (call when the end of sentence is actually played.) 104 ** MSG_TERMINATED 105 106 107 The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message. 108 So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event. 109 110 A MARK event indicates a <mark> element in the text. 111 A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file. 112*/ 113 114 115 116typedef enum { 117 POS_CHARACTER = 1, 118 POS_WORD, 119 POS_SENTENCE 120} espeak_POSITION_TYPE; 121 122 123typedef enum { 124 /* PLAYBACK mode: plays the audio data, supplies events to the calling program*/ 125 AUDIO_OUTPUT_PLAYBACK, 126 127 /* RETRIEVAL mode: supplies audio data and events to the calling program */ 128 AUDIO_OUTPUT_RETRIEVAL, 129 130 /* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */ 131 AUDIO_OUTPUT_SYNCHRONOUS, 132 133 /* Synchronous playback */ 134 AUDIO_OUTPUT_SYNCH_PLAYBACK 135 136} espeak_AUDIO_OUTPUT; 137 138 139typedef enum { 140 EE_OK=0, 141 EE_INTERNAL_ERROR=-1, 142 EE_BUFFER_FULL=1, 143 EE_NOT_FOUND=2 144} espeak_ERROR; 145 146 147#ifdef __cplusplus 148extern "C" 149#endif 150ESPEAK_API int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path, int options); 151/* Must be called before any synthesis functions are called. 152 output: the audio data can either be played by eSpeak or passed back by the SynthCallback function. 153 154 buflength: The length in mS of sound buffers passed to the SynthCallback function. 155 156 path: The directory which contains the espeak-data directory, or NULL for the default location. 157 158 options: bit 0: 1=allow espeakEVENT_PHONEME events. 159 160 161 Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR). 162*/ 163 164typedef int (t_espeak_callback)(short*, int, espeak_EVENT*); 165 166#ifdef __cplusplus 167extern "C" 168#endif 169ESPEAK_API void espeak_SetSynthCallback(t_espeak_callback* SynthCallback); 170/* Must be called before any synthesis functions are called. 171 This specifies a function in the calling program which is called when a buffer of 172 speech sound data has been produced. 173 174 175 The callback function is of the form: 176 177int SynthCallback(short *wav, int numsamples, espeak_EVENT *events); 178 179 wav: is the speech sound data which has been produced. 180 NULL indicates that the synthesis has been completed. 181 182 numsamples: is the number of entries in wav. This number may vary, may be less than 183 the value implied by the buflength parameter given in espeak_Initialize, and may 184 sometimes be zero (which does NOT indicate end of synthesis). 185 186 events: an array of espeak_EVENT items which indicate word and sentence events, and 187 also the occurance if <mark> and <audio> elements within the text. The list of 188 events is terminated by an event of type = 0. 189 190 191 Callback returns: 0=continue synthesis, 1=abort synthesis. 192*/ 193 194#ifdef __cplusplus 195extern "C" 196#endif 197ESPEAK_API void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*)); 198/* This function may be called before synthesis functions are used, in order to deal with 199 <audio> tags. It specifies a callback function which is called when an <audio> element is 200 encountered and allows the calling program to indicate whether the sound file which 201 is specified in the <audio> element is available and is to be played. 202 203 The callback function is of the form: 204 205int UriCallback(int type, const char *uri, const char *base); 206 207 type: type of callback event. Currently only 1= <audio> element 208 209 uri: the "src" attribute from the <audio> element 210 211 base: the "xml:base" attribute (if any) from the <speak> element 212 213 Return: 1=don't play the sound, but speak the text alternative. 214 0=place a PLAY event in the event list at the point where the <audio> element 215 occurs. The calling program can then play the sound at that point. 216*/ 217 218 219 /********************/ 220 /* Synthesis */ 221 /********************/ 222 223 224#define espeakCHARS_AUTO 0 225#define espeakCHARS_UTF8 1 226#define espeakCHARS_8BIT 2 227#define espeakCHARS_WCHAR 3 228 229#define espeakSSML 0x10 230#define espeakPHONEMES 0x100 231#define espeakENDPAUSE 0x1000 232#define espeakKEEP_NAMEDATA 0x2000 233 234#ifdef __cplusplus 235extern "C" 236#endif 237ESPEAK_API espeak_ERROR espeak_Synth(const void *text, 238 size_t size, 239 unsigned int position, 240 espeak_POSITION_TYPE position_type, 241 unsigned int end_position, 242 unsigned int flags, 243 unsigned int* unique_identifier, 244 void* user_data); 245/* Synthesize speech for the specified text. The speech sound data is passed to the calling 246 program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak. 247 248 text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters, 249 wide characters (wchar_t), or UTF8 encoding. Which of these is determined by the "flags" 250 parameter. 251 252 size: Equal to (or greatrer than) the size of the text data, in bytes. This is used in order 253 to allocate internal storage space for the text. This value is not used for 254 AUDIO_OUTPUT_SYNCHRONOUS mode. 255 256 position: The position in the text where speaking starts. Zero indicates speak from the 257 start of the text. 258 259 position_type: Determines whether "position" is a number of characters, words, or sentences. 260 Values: 261 262 end_position: If set, this gives a character position at which speaking will stop. A value 263 of zero indicates no end position. 264 265 flags: These may be OR'd together: 266 Type of character codes, one of: 267 espeakCHARS_UTF8 UTF8 encoding 268 espeakCHARS_8BIT The 8 bit ISO-8859 character set for the particular language. 269 espeakCHARS_AUTO 8 bit or UTF8 (this is the default) 270 espeakCHARS_WCHAR Wide characters (wchar_t) 271 272 espeakSSML Elements within < > are treated as SSML elements, or if not recognised are ignored. 273 274 espeakPHONEMES Text within [[ ]] is treated as phonemes codes (in espeak's Hirshenbaum encoding). 275 276 espeakENDPAUSE If set then a sentence pause is added at the end of the text. If not set then 277 this pause is suppressed. 278 279 unique_identifier: message identifier; helpful for identifying later 280 data supplied to the callback. 281 282 user_data: pointer which will be passed to the callback function. 283 284 Return: EE_OK: operation achieved 285 EE_BUFFER_FULL: the command can not be buffered; 286 you may try after a while to call the function again. 287 EE_INTERNAL_ERROR. 288*/ 289 290#ifdef __cplusplus 291extern "C" 292#endif 293ESPEAK_API espeak_ERROR espeak_Synth_Mark(const void *text, 294 size_t size, 295 const char *index_mark, 296 unsigned int end_position, 297 unsigned int flags, 298 unsigned int* unique_identifier, 299 void* user_data); 300/* Synthesize speech for the specified text. Similar to espeak_Synth() but the start position is 301 specified by the name of a <mark> element in the text. 302 303 index_mark: The "name" attribute of a <mark> element within the text which specified the 304 point at which synthesis starts. UTF8 string. 305 306 For the other parameters, see espeak_Synth() 307 308 Return: EE_OK: operation achieved 309 EE_BUFFER_FULL: the command can not be buffered; 310 you may try after a while to call the function again. 311 EE_INTERNAL_ERROR. 312*/ 313 314#ifdef __cplusplus 315extern "C" 316#endif 317ESPEAK_API espeak_ERROR espeak_Key(const char *key_name); 318/* Speak the name of a keyboard key. 319 Currently this just speaks the "key_name" as given 320 321 Return: EE_OK: operation achieved 322 EE_BUFFER_FULL: the command can not be buffered; 323 you may try after a while to call the function again. 324 EE_INTERNAL_ERROR. 325*/ 326 327#ifdef __cplusplus 328extern "C" 329#endif 330ESPEAK_API espeak_ERROR espeak_Char(wchar_t character); 331/* Speak the name of the given character 332 333 Return: EE_OK: operation achieved 334 EE_BUFFER_FULL: the command can not be buffered; 335 you may try after a while to call the function again. 336 EE_INTERNAL_ERROR. 337*/ 338 339/* Note, there is no function to play a sound icon. This would be done by the calling program */ 340 341 342 343 /***********************/ 344 /* Speech Parameters */ 345 /***********************/ 346 347typedef enum { 348 espeakSILENCE=0, /* internal use */ 349 espeakRATE=1, 350 espeakVOLUME=2, 351 espeakPITCH=3, 352 espeakRANGE=4, 353 espeakPUNCTUATION=5, 354 espeakCAPITALS=6, 355 espeakWORDGAP=7, 356 espeakOPTIONS=8, // reserved for misc. options. not yet used 357 espeakINTONATION=9, 358 359 espeakRESERVED1=10, 360 espeakRESERVED2=11, 361 espeakEMPHASIS, /* internal use */ 362 espeakLINELENGTH, /* internal use */ 363 espeakVOICETYPE, // internal, 1=mbrola 364 N_SPEECH_PARAM /* last enum */ 365} espeak_PARAMETER; 366 367typedef enum { 368 espeakPUNCT_NONE=0, 369 espeakPUNCT_ALL=1, 370 espeakPUNCT_SOME=2 371} espeak_PUNCT_TYPE; 372 373#ifdef __cplusplus 374extern "C" 375#endif 376ESPEAK_API espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative); 377/* Sets the value of the specified parameter. 378 relative=0 Sets the absolute value of the parameter. 379 relative=1 Sets a relative value of the parameter. 380 381 parameter: 382 espeakRATE: speaking speed in word per minute. 383 384 espeakVOLUME: volume in range 0-100 0=silence 385 386 espeakPITCH: base pitch, range 0-100. 50=normal 387 388 espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 389 390 espeakPUNCTUATION: which punctuation characters to announce: 391 value in espeak_PUNCT_TYPE (none, all, some), 392 see espeak_GetParameter() to specify which characters are announced. 393 394 espeakCAPITALS: announce capital letters by: 395 0=none, 396 1=sound icon, 397 2=spelling, 398 3 or higher, by raising pitch. This values gives the amount in Hz by which the pitch 399 of a word raised to indicate it has a capital letter. 400 401 espeakWORDGAP: pause between words, units of 10mS (at the default speed) 402 403 Return: EE_OK: operation achieved 404 EE_BUFFER_FULL: the command can not be buffered; 405 you may try after a while to call the function again. 406 EE_INTERNAL_ERROR. 407*/ 408 409#ifdef __cplusplus 410extern "C" 411#endif 412ESPEAK_API int espeak_GetParameter(espeak_PARAMETER parameter, int current); 413/* current=0 Returns the default value of the specified parameter. 414 current=1 Returns the current value of the specified parameter, as set by SetParameter() 415*/ 416 417#ifdef __cplusplus 418extern "C" 419#endif 420ESPEAK_API espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist); 421/* Specified a list of punctuation characters whose names are to be spoken when the 422 value of the Punctuation parameter is set to "some". 423 424 punctlist: A list of character codes, terminated by a zero character. 425 426 Return: EE_OK: operation achieved 427 EE_BUFFER_FULL: the command can not be buffered; 428 you may try after a while to call the function again. 429 EE_INTERNAL_ERROR. 430*/ 431 432#ifdef __cplusplus 433extern "C" 434#endif 435ESPEAK_API void espeak_SetPhonemeTrace(int value, FILE *stream); 436/* Controls the output of phoneme symbols for the text 437 value=0 No phoneme output (default) 438 value=1 Output the translated phoneme symbols for the text 439 value=2 as (1), but also output a trace of how the translation was done (matching rules and list entries) 440 441 stream output stream for the phoneme symbols (and trace). If stream=NULL then it uses stdout. 442*/ 443 444#ifdef __cplusplus 445extern "C" 446#endif 447ESPEAK_API void espeak_CompileDictionary(const char *path, FILE *log); 448/* Compile pronunciation dictionary for a language which corresponds to the currently 449 selected voice. The required voice should be selected before calling this function. 450 451 path: The directory which contains the language's '_rules' and '_list' files. 452 'path' should end with a path separator character ('/'). 453 log: Stream for error reports and statistics information. If log=NULL then stderr will be used. 454*/ 455 /***********************/ 456 /* Voice Selection */ 457 /***********************/ 458 459 460// voice table 461typedef struct { 462 char *name; // a given name for this voice. UTF8 string. 463 char *languages; // list of pairs of (byte) priority + (string) language (and dialect qualifier) 464 char *identifier; // the filename for this voice within espeak-data/voices 465 unsigned char gender; // 0=none 1=male, 2=female, 466 unsigned char age; // 0=not specified, or age in years 467 unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties 468 unsigned char xx1; // for internal use 469 int score; // for internal use 470 void *spare; // for internal use 471} espeak_VOICE; 472 473/* Note: The espeak_VOICE structure is used for two purposes: 474 1. To return the details of the available voices. 475 2. As a parameter to espeak_SetVoiceByProperties() in order to specify selection criteria. 476 477 In (1), the "languages" field consists of a list of (UTF8) language names for which this voice 478 may be used, each language name in the list is terminated by a zero byte and is also preceded by 479 a single byte which gives a "priority" number. The list of languages is terminated by an 480 additional zero byte. 481 482 A language name consists of a language code, optionally followed by one or more qualifier (dialect) 483 names separated by hyphens (eg. "en-uk"). A voice might, for example, have languages "en-uk" and 484 "en". Even without "en" listed, voice would still be selected for the "en" language (because 485 "en-uk" is related) but at a lower priority. 486 487 The priority byte indicates how the voice is preferred for the language. A low number indicates a 488 more preferred voice, a higher number indicates a less preferred voice. 489 490 In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding 491 priority byte. 492*/ 493 494#ifdef __cplusplus 495extern "C" 496#endif 497ESPEAK_API const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec); 498/* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers. 499 The list is terminated by a NULL pointer 500 501 If voice_spec is NULL then all voices are listed. 502 If voice spec is give, then only the voices which are compatible with the voice_spec 503 are listed, and they are listed in preference order. 504*/ 505 506#ifdef __cplusplus 507extern "C" 508#endif 509ESPEAK_API espeak_ERROR espeak_SetVoiceByName(const char *name); 510/* Searches for a voice with a matching "name" field. Language is not considered. 511 "name" is a UTF8 string. 512 513 Return: EE_OK: operation achieved 514 EE_BUFFER_FULL: the command can not be buffered; 515 you may try after a while to call the function again. 516 EE_INTERNAL_ERROR. 517*/ 518 519#ifdef __cplusplus 520extern "C" 521#endif 522ESPEAK_API espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec); 523/* An espeak_VOICE structure is used to pass criteria to select a voice. Any of the following 524 fields may be set: 525 526 name NULL, or a voice name 527 528 languages NULL, or a single language string (with optional dialect), eg. "en-uk", or "en" 529 530 gender 0=not specified, 1=male, 2=female 531 532 age 0=not specified, or an age in years 533 534 variant After a list of candidates is produced, scored and sorted, "variant" is used to index 535 that list and choose a voice. 536 variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc 537*/ 538 539#ifdef __cplusplus 540extern "C" 541#endif 542ESPEAK_API espeak_VOICE *espeak_GetCurrentVoice(void); 543/* Returns the espeak_VOICE data for the currently selected voice. 544 This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s> 545*/ 546 547#ifdef __cplusplus 548extern "C" 549#endif 550ESPEAK_API espeak_ERROR espeak_Cancel(void); 551/* Stop immediately synthesis and audio output of the current text. When this 552 function returns, the audio output is fully stopped and the synthesizer is ready to 553 synthesize a new message. 554 555 Return: EE_OK: operation achieved 556 EE_INTERNAL_ERROR. 557*/ 558 559 560#ifdef __cplusplus 561extern "C" 562#endif 563ESPEAK_API int espeak_IsPlaying(void); 564/* Returns 1 if audio is played, 0 otherwise. 565*/ 566 567#ifdef __cplusplus 568extern "C" 569#endif 570ESPEAK_API espeak_ERROR espeak_Synchronize(void); 571/* This function returns when all data have been spoken. 572 Return: EE_OK: operation achieved 573 EE_INTERNAL_ERROR. 574*/ 575 576#ifdef __cplusplus 577extern "C" 578#endif 579ESPEAK_API espeak_ERROR espeak_Terminate(void); 580/* last function to be called. 581 Return: EE_OK: operation achieved 582 EE_INTERNAL_ERROR. 583*/ 584 585 586#ifdef __cplusplus 587extern "C" 588#endif 589ESPEAK_API const char *espeak_Info(void* ptr); 590/* Returns the version number string. 591 The parameter is for future use, and should be set to NULL 592*/ 593#endif