PageRenderTime 161ms CodeModel.GetById 24ms app.highlight 122ms RepoModel.GetById 2ms app.codeStats 0ms

/native/external/espeak/src/readclause.cpp

http://eyes-free.googlecode.com/
C++ | 2338 lines | 1976 code | 262 blank | 100 comment | 324 complexity | 59e41bf86d1aec9ae0b37c183c043200 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/***************************************************************************
   2 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
   3 *   email: jonsd@users.sourceforge.net                                    *
   4 *                                                                         *
   5 *   This program is free software; you can redistribute it and/or modify  *
   6 *   it under the terms of the GNU General Public License as published by  *
   7 *   the Free Software Foundation; either version 3 of the License, or     *
   8 *   (at your option) any later version.                                   *
   9 *                                                                         *
  10 *   This program is distributed in the hope that it will be useful,       *
  11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  13 *   GNU General Public License for more details.                          *
  14 *                                                                         *
  15 *   You should have received a copy of the GNU General Public License     *
  16 *   along with this program; if not, write see:                           *
  17 *               <http://www.gnu.org/licenses/>.                           *
  18 ***************************************************************************/
  19
  20#include "StdAfx.h"
  21
  22#include <stdio.h>
  23#include <ctype.h>
  24#include <stdlib.h>
  25#include <string.h>
  26#include <wctype.h>
  27////#include <wchar.h>
  28#include <math.h>
  29
  30#include "speak_lib.h"
  31#include "speech.h"
  32#include "phoneme.h"
  33#include "synthesize.h"
  34#include "voice.h"
  35#include "translate.h"
  36
  37#ifdef PLATFORM_POSIX
  38#include <unistd.h>
  39#endif
  40
  41#include <locale.h>
  42#define N_XML_BUF   256
  43
  44
  45const char *xmlbase = "";    // base URL from <speak>
  46
  47int namedata_ix=0;
  48int n_namedata = 0;
  49char *namedata = NULL;
  50
  51
  52FILE *f_input = NULL;
  53int ungot_char2 = 0;
  54char *p_textinput;
  55wchar_t *p_wchar_input;
  56int ungot_char;
  57const char *ungot_word = NULL;
  58int end_of_input;
  59
  60int ignore_text=0;   // set during <sub> ... </sub>  to ignore text which has been replaced by an alias
  61int clear_skipping_text = 0;  // next clause should clear the skipping_text flag
  62int count_characters = 0;
  63int sayas_mode;
  64int ssml_ignore_l_angle = 0;
  65
  66static const char *punct_stop = ".:!?";    // pitch fall if followed by space
  67static const char *punct_close = ")]}>;'\"";  // always pitch fall unless followed by alnum
  68
  69// alter tone for announce punctuation or capitals
  70static const char *tone_punct_on = "\0016T";  // add reverberation, lower pitch
  71static const char *tone_punct_off = "\001T";
  72
  73// punctuations symbols that can end a clause
  74const unsigned short punct_chars[] = {',','.','?','!',':',';',
  75  0x2013,  // en-dash
  76  0x2014,  // em-dash
  77  0x2026,  // elipsis
  78
  79  0x037e,  // Greek question mark (looks like semicolon)
  80  0x0387,  // Greek semicolon, ano teleia
  81  0x0964,  // Devanagari Danda (fullstop)
  82  0x3001,  // ideograph comma
  83  0x3002,  // ideograph period
  84
  85  0xff01,  // fullwidth exclamation
  86  0xff0c,  // fullwidth comma
  87  0xff0e,  // fullwidth period
  88  0xff1a,  // fullwidth colon
  89  0xff1b,  // fullwidth semicolon
  90  0xff1f,  // fullwidth question mark
  91  
  92  0};
  93
  94
  95// indexed by (entry num. in punct_chars) + 1
  96// bits 0-7 pause x 10mS, bits 12-14 intonation type, bit 15 don't need following space or bracket
  97static const unsigned int punct_attributes [] = { 0,
  98  CLAUSE_COMMA, CLAUSE_PERIOD, CLAUSE_QUESTION, CLAUSE_EXCLAMATION, CLAUSE_COLON, CLAUSE_SEMICOLON,
  99  CLAUSE_SEMICOLON,  // en-dash
 100  CLAUSE_SEMICOLON,  // em-dash
 101  CLAUSE_SEMICOLON,  // elipsis
 102
 103  CLAUSE_QUESTION,   // Greek question mark
 104  CLAUSE_SEMICOLON,  // Greek semicolon
 105  CLAUSE_PERIOD+0x8000,     // Devanagari Danda (fullstop)
 106  CLAUSE_COMMA+0x8000,      // ideograph comma
 107  CLAUSE_PERIOD+0x8000,     // ideograph period
 108
 109  CLAUSE_EXCLAMATION+0x8000, // fullwidth
 110  CLAUSE_COMMA+0x8000,
 111  CLAUSE_PERIOD+0x8000,
 112  CLAUSE_COLON+0x8000,
 113  CLAUSE_SEMICOLON+0x8000,
 114  CLAUSE_QUESTION+0x8000,
 115
 116  CLAUSE_SEMICOLON,  // spare
 117  0 };
 118
 119
 120// stack for language and voice properties
 121// frame 0 is for the defaults, before any ssml tags.
 122typedef struct {
 123	int tag_type;
 124	int voice_variant;
 125	int voice_gender;
 126	int voice_age;
 127	char voice_name[40];
 128	char language[20];
 129} SSML_STACK;
 130
 131#define N_SSML_STACK  20
 132int n_ssml_stack;
 133SSML_STACK ssml_stack[N_SSML_STACK];
 134
 135char current_voice_id[40] = {0};
 136
 137
 138#define N_PARAM_STACK  20
 139int n_param_stack;
 140PARAM_STACK param_stack[N_PARAM_STACK];
 141
 142int speech_parameters[N_SPEECH_PARAM];     // current values, from param_stack
 143
 144const int param_defaults[N_SPEECH_PARAM] = {
 145   0,     // silence (internal use)
 146  170,    // rate wpm
 147  100,    // volume
 148   50,    // pitch
 149   50,    // range
 150   0,     // punctuation
 151   0,     // capital letters
 152   0,     // wordgap
 153   0,     // options
 154   0,     // intonation
 155   0,
 156   0,
 157   0,     // emphasis
 158   0,     // line length
 159   0,     // voice type
 160};
 161
 162
 163#ifdef NEED_WCHAR_FUNCTIONS
 164
 165// additional Latin characters beyond the Latin1 character set
 166#define MAX_WALPHA  0x233
 167// indexed by character - 0x100
 168// 0=not alphabetic, 0xff=lower case, other=value to add to upper case to convert to lower case
 169static unsigned char walpha_tab[MAX_WALPHA-0xff] = {
 170      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 100
 171      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 110
 172      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 120
 173   0xff,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,   1,  // 130
 174   0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,  // 140
 175      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 150
 176      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 160
 177      1,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,  // 170
 178   0xff, 210,   1,0xff,   1,0xff, 206,   1,0xff, 205, 205,   1,0xff,0xff,  79, 202,  // 180
 179    203,   1,0xff, 205, 207,0xff, 211, 209,   1,0xff,0xff,0xff, 211, 213,0xff, 214,  // 190
 180      1,0xff,   1,0xff,   1,0xff, 218,   1,0xff, 218,0xff,0xff,   1,0xff, 218,   1,  // 1a0
 181   0xff, 217, 217,   1,0xff,   1,0xff, 219,   1,0xff,0xff,0xff,   1,0xff,0xff,0xff,  // 1b0
 182   0xff,0xff,0xff,0xff,   2,   1,0xff,   2,   1,0xff,   2,   1,0xff,   1,0xff,   1,  // 1c0
 183   0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,0xff,   1,0xff,  // 1d0
 184      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 1e0
 185   0xff,   2,   1,0xff,   1,0xff,0xff,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 1f0
 186      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 200
 187      1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 210
 188   0xff,   0,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,   1,0xff,  // 220
 189      1,0xff,   1,0xff };    // 230
 190
 191// use ctype.h functions for Latin1 (character < 0x100)
 192int iswalpha(int c)
 193{
 194	if(c < 0x100)
 195		return(isalpha(c));
 196	if((c > 0x3040) && (c <= 0xa700))
 197		return(1);  // japanese, chinese characters
 198	if(c > MAX_WALPHA)
 199		return(0);
 200	return(walpha_tab[c-0x100]);
 201}
 202
 203int iswdigit(int c)
 204{
 205	if(c < 0x100)
 206		return(isdigit(c));
 207	return(0);
 208}
 209
 210int iswalnum(int c)
 211{
 212	if(iswdigit(c))
 213		return(1);
 214	return(iswalpha(c));
 215}
 216
 217int towlower(int c)
 218{
 219	int x;
 220	if(c < 0x100)
 221		return(tolower(c));
 222	if((c > MAX_WALPHA) || ((x = walpha_tab[c-0x100])==0xff))
 223		return(c);  // already lower case
 224	return(c + x);  // convert to lower case
 225}
 226
 227int towupper(int c)
 228{
 229	// check whether the previous character code is the upper-case equivalent of this character
 230	if(tolower(c-1) == c)
 231		return(c-1);  // yes, use it
 232	return(c);  // no
 233}
 234
 235int iswupper(int c)
 236{
 237	int x;
 238	if(c < 0x100)
 239		return(isupper(c));
 240	if(((c > MAX_WALPHA) || (x = walpha_tab[c-0x100])==0) || (x == 0xff))
 241		return(0);
 242	return(1);
 243}
 244
 245int iswlower(int c)
 246{
 247	if(c < 0x100)
 248		return(islower(c));
 249	if((c > MAX_WALPHA) || (walpha_tab[c-0x100] != 0xff))
 250		return(0);
 251	return(1);
 252}
 253
 254int iswspace(int c)
 255{
 256	if(c < 0x100)
 257		return(isspace(c));
 258	return(0);
 259}
 260
 261int iswpunct(int c)
 262{
 263	if(c < 0x100)
 264		return(ispunct(c));
 265	return(0);
 266}
 267
 268const wchar_t *wcschr(const wchar_t *str, int c)
 269{
 270   while(*str != 0)
 271   {
 272      if(*str == c)
 273         return(str);
 274      str++;
 275   }
 276   return(NULL);
 277}
 278
 279const int wcslen(const wchar_t *str)
 280{
 281	int ix=0;
 282
 283	while(*str != 0)
 284	{
 285		ix++;
 286	}
 287	return(ix);
 288}
 289
 290float wcstod(const wchar_t *str, wchar_t **tailptr)
 291{
 292   int ix;
 293   char buf[80];
 294   while(isspace(*str)) str++;
 295   for(ix=0; ix<80; ix++)
 296   {
 297      buf[ix] = str[ix];
 298      if(isspace(buf[ix]))
 299         break;
 300   }
 301   *tailptr = (wchar_t *)&str[ix];
 302   return(atof(buf));
 303}
 304#endif
 305
 306int towlower2(unsigned int c)
 307{
 308	// check for non-standard upper to lower case conversions
 309	if(c == 'I')
 310	{
 311		if(translator->translator_name == L('t','r'))
 312		{
 313			c = 0x131;   // I -> ?
 314		}
 315	}
 316	return(towlower(c));
 317}
 318
 319static void GetC_unget(int c)
 320{//==========================
 321// This is only called with UTF8 input, not wchar input
 322	if(f_input != NULL)
 323		ungetc(c,f_input);
 324	else
 325	{
 326		p_textinput--;
 327		*p_textinput = c;
 328		end_of_input = 0;
 329	}
 330}
 331
 332int Eof(void)
 333{//==========
 334	if(ungot_char != 0)
 335		return(0);
 336
 337	if(f_input != 0)
 338		return(feof(f_input));
 339
 340	return(end_of_input);
 341}
 342
 343
 344static int GetC_get(void)
 345{//======================
 346	int c;
 347
 348	if(f_input != NULL)
 349	{
 350		c = fgetc(f_input);
 351		if(feof(f_input)) c = ' ';
 352		return(c & 0xff);
 353	}
 354
 355	if(option_multibyte == espeakCHARS_WCHAR)
 356	{
 357		if(*p_wchar_input == 0)
 358		{
 359			end_of_input = 1;
 360			return(0);
 361		}
 362
 363		if(!end_of_input)
 364			return(*p_wchar_input++);
 365	}
 366	else
 367	{
 368		if(*p_textinput == 0)
 369		{
 370			end_of_input = 1;
 371			return(0);
 372		}
 373	
 374		if(!end_of_input)
 375			return(*p_textinput++ & 0xff);
 376	}
 377	return(0);
 378}
 379
 380
 381static int GetC(void)
 382{//==================
 383// Returns a unicode wide character
 384// Performs UTF8 checking and conversion
 385
 386	int c;
 387	int c1;
 388	int c2;
 389	int cbuf[4];
 390	int ix;
 391	int n_bytes;
 392	unsigned char m;
 393	static int ungot2 = 0;
 394	static const unsigned char mask[4] = {0xff,0x1f,0x0f,0x07};
 395	static const unsigned char mask2[4] = {0,0x80,0x20,0x30};
 396
 397	if((c1 = ungot_char) != 0)
 398	{
 399		ungot_char = 0;
 400		return(c1);
 401	}
 402
 403	if(ungot2 != 0)
 404	{
 405		c1 = ungot2;
 406		ungot2 = 0;
 407	}
 408	else
 409	{
 410		c1 = GetC_get();
 411	}
 412
 413	if(option_multibyte == espeakCHARS_WCHAR)
 414	{
 415		count_characters++;
 416		return(c1);   // wchar_t  text
 417	}
 418
 419	if((option_multibyte < 2) && (c1 & 0x80))
 420	{
 421		// multi-byte utf8 encoding, convert to unicode
 422		n_bytes = 0;
 423
 424		if(((c1 & 0xe0) == 0xc0) && ((c1 & 0x1e) != 0))
 425			n_bytes = 1;
 426		else
 427		if((c1 & 0xf0) == 0xe0)
 428			n_bytes = 2;
 429		else
 430		if(((c1 & 0xf8) == 0xf0) && ((c1 & 0x0f) <= 4))
 431			n_bytes = 3;
 432
 433		if((ix = n_bytes) > 0)
 434		{
 435			c = c1 & mask[ix];
 436			m = mask2[ix];
 437			while(ix > 0)
 438			{
 439				if((c2 = cbuf[ix] = GetC_get()) == 0)
 440				{
 441					if(option_multibyte==espeakCHARS_AUTO)
 442						option_multibyte=espeakCHARS_8BIT;   // change "auto" option to "no"
 443					GetC_unget(' ');
 444					break;
 445				}
 446
 447				if((c2 & 0xc0) != 0x80)
 448				{
 449					// This is not UTF8.  Change to 8-bit characterset.
 450					if((n_bytes == 2) && (ix == 1))
 451						ungot2 = cbuf[2];
 452					GetC_unget(c2);
 453					break;
 454				}
 455				m = 0x80;
 456				c = (c << 6) + (c2 & 0x3f);
 457				ix--;
 458			}
 459			if(ix == 0)
 460			{
 461				count_characters++;
 462				return(c);
 463			}
 464		}
 465		// top-bit-set character is not utf8, drop through to 8bit charset case
 466		if((option_multibyte==espeakCHARS_AUTO) && !Eof())
 467			option_multibyte=espeakCHARS_8BIT;   // change "auto" option to "no"
 468	}
 469
 470	// 8 bit character set, convert to unicode if
 471	count_characters++;
 472	if(c1 >= 0xa0)
 473		return(translator->charset_a0[c1-0xa0]);
 474	return(c1);
 475}  // end of GetC
 476
 477
 478static void UngetC(int c)
 479{//======================
 480	ungot_char = c;
 481}
 482
 483
 484const char *WordToString2(unsigned int word)
 485{//========================================
 486// Convert a language mnemonic word into a string
 487	int  ix;
 488	static char buf[5];
 489	char *p;
 490
 491	p = buf;
 492	for(ix=3; ix>=0; ix--)
 493	{
 494		if((*p = word >> (ix*8)) != 0)
 495			p++;
 496	}
 497	*p = 0;
 498	return(buf);
 499}
 500
 501
 502const char *Translator::LookupSpecial(const char *string, char* text_out)
 503{//======================================================================
 504	unsigned int flags[2];
 505	char phonemes[55];
 506	char phonemes2[55];
 507	char *string1 = (char *)string;
 508
 509	if(LookupDictList(&string1,phonemes,flags,0,NULL))
 510	{
 511		SetWordStress(phonemes,flags[0],-1,0);
 512		DecodePhonemes(phonemes,phonemes2);
 513		sprintf(text_out,"[[%s]]",phonemes2);
 514		option_phoneme_input = 1;
 515		return(text_out);
 516	}
 517	return(NULL);
 518}
 519
 520
 521const char *Translator::LookupCharName(int c)
 522{//==========================================
 523// Find the phoneme string (in ascii) to speak the name of character c
 524// Used for punctuation characters and symbols
 525
 526	int ix;
 527	unsigned int flags[2];
 528	char single_letter[24];
 529	char phonemes[60];
 530	char phonemes2[60];
 531	const char *lang_name = NULL;
 532	char *string;
 533	static char buf[60];
 534
 535	buf[0] = 0;
 536	flags[0] = 0;
 537	flags[1] = 0;
 538	single_letter[0] = 0;
 539	single_letter[1] = '_';
 540	ix = utf8_out(c,&single_letter[2]);
 541	single_letter[2+ix]=0;
 542
 543	string = &single_letter[1];
 544	if(LookupDictList(&string, phonemes, flags, 0, NULL) == 0)
 545	{
 546		// try _* then *
 547		string = &single_letter[2];
 548		if(LookupDictList(&string, phonemes, flags, 0, NULL) == 0)
 549		{
 550			// now try the rules
 551			single_letter[1] = ' ';
 552			TranslateRules(&single_letter[2], phonemes, sizeof(phonemes), NULL,0,NULL);
 553		}
 554	}
 555
 556	if((phonemes[0] == 0) && (translator_name != L('e','n')))
 557	{
 558		// not found, try English
 559		SetTranslator2("en");
 560		string = &single_letter[1];
 561		single_letter[1] = '_';
 562		if(translator2->LookupDictList(&string, phonemes, flags, 0, NULL) == 0)
 563		{
 564			string = &single_letter[2];
 565			translator2->LookupDictList(&string, phonemes, flags, 0, NULL);
 566		}
 567		if(phonemes[0])
 568		{
 569			lang_name = "en";
 570		}
 571		else
 572		{
 573			SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
 574		}
 575	}
 576
 577	if(phonemes[0])
 578	{
 579		if(lang_name)
 580		{
 581			translator2->SetWordStress(phonemes,flags[0],-1,0);
 582			DecodePhonemes(phonemes,phonemes2);
 583			sprintf(buf,"[[_^_%s %s _^_%s]]","en",phonemes2,WordToString2(translator_name));
 584			SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
 585		}
 586		else
 587		{
 588			SetWordStress(phonemes,flags[0],-1,0);
 589			DecodePhonemes(phonemes,phonemes2);
 590			sprintf(buf,"[[%s]] ",phonemes2);
 591		}
 592	}
 593	else
 594	{
 595		strcpy(buf,"[[(X1)(X1)(X1)]]");
 596	}
 597
 598	option_phoneme_input = 1;
 599	return(buf);
 600}
 601
 602int Read4Bytes(FILE *f)
 603{//====================
 604// Read 4 bytes (least significant first) into a word
 605	int ix;
 606	unsigned char c;
 607	int acc=0;
 608
 609	for(ix=0; ix<4; ix++)
 610	{
 611		c = fgetc(f) & 0xff;
 612		acc += (c << (ix*8));
 613	}
 614	return(acc);
 615}
 616
 617
 618static int LoadSoundFile(const char *fname, int index)
 619{//===================================================
 620	FILE *f;
 621	char *p;
 622	int *ip;
 623	int  length;
 624	char fname_temp[100];
 625	char fname2[sizeof(path_home)+13+40];
 626	char command[sizeof(fname2)+sizeof(fname2)+40];
 627
 628	if(fname == NULL)
 629	{
 630		// filename is already in the table
 631		fname = soundicon_tab[index].filename;
 632	}
 633
 634	if(fname==NULL)
 635		return(1);
 636
 637	if(fname[0] != '/')
 638	{
 639		// a relative path, look in espeak-data/soundicons
 640		sprintf(fname2,"%s%csoundicons%c%s",path_home,PATHSEP,PATHSEP,fname);
 641		fname = fname2;
 642	}
 643
 644	f = NULL;
 645#ifdef PLATFORM_POSIX
 646	if((f = fopen(fname,"rb")) != NULL)
 647	{
 648		int ix;
 649		int fd_temp;
 650		const char *resample;
 651		int header[3];
 652
 653		fseek(f,20,SEEK_SET);
 654		for(ix=0; ix<3; ix++)
 655			header[ix] = Read4Bytes(f);
 656
 657		// if the sound file is not mono, 16 bit signed, at the correct sample rate, then convert it
 658		if((header[0] != 0x10001) || (header[1] != samplerate) || (header[2] != samplerate*2))
 659		{
 660			fclose(f);
 661			f = NULL;
 662
 663			if(header[2] == samplerate)
 664				resample = "";
 665			else
 666				resample = "polyphase";
 667
 668			strcpy(fname_temp,"/tmp/espeakXXXXXX");
 669			if((fd_temp = mkstemp(fname_temp)) >= 0)
 670			{
 671				close(fd_temp);
 672//			sprintf(fname_temp,"%s.wav",tmpnam(NULL));
 673				sprintf(command,"sox \"%s\" -r %d -w -s -c1 %s %s\n", fname, samplerate, fname_temp, resample);
 674				if(system(command) == 0)
 675				{
 676					fname = fname_temp;
 677				}
 678			}
 679		}
 680	}
 681#endif
 682
 683	if(f == NULL)
 684	{
 685		f = fopen(fname,"rb");
 686		if(f == NULL)
 687		{
 688			fprintf(stderr,"Can't read temp file: %s\n",fname);
 689			return(3);
 690		}
 691	}
 692
 693	length = GetFileLength(fname);
 694	fseek(f,0,SEEK_SET);
 695	if((p = (char *)realloc(soundicon_tab[index].data, length)) == NULL)
 696	{
 697		fclose(f);
 698		return(4);
 699	}
 700	fread(p,length,1,f);
 701	fclose(f);
 702	remove(fname_temp);
 703
 704	ip = (int *)(&p[40]);
 705	soundicon_tab[index].length = (*ip) / 2;  // length in samples
 706	soundicon_tab[index].data = p;
 707	return(0);
 708}  //  end of LoadSoundFile
 709
 710
 711static int LookupSoundicon(int c)
 712{//==============================
 713// Find the sound icon number for a punctuation chatacter
 714	int ix;
 715
 716	for(ix=N_SOUNDICON_SLOTS; ix<n_soundicon_tab; ix++)
 717	{
 718		if(soundicon_tab[ix].name == c)
 719		{
 720			if(soundicon_tab[ix].length == 0)
 721			{
 722				if(LoadSoundFile(NULL,ix)!=0)
 723					return(-1);  // sound file is not available
 724			}
 725			return(ix);
 726		}
 727	}
 728	return(-1);
 729}
 730
 731
 732static int LoadSoundFile2(const char *fname)
 733{//=========================================
 734// Load a sound file into one of the reserved slots in the sound icon table
 735// (if it'snot already loaded)
 736
 737	int ix;
 738	static int slot = -1;
 739
 740	for(ix=0; ix<n_soundicon_tab; ix++)
 741	{
 742		if(((soundicon_tab[ix].filename != NULL) && strcmp(fname, soundicon_tab[ix].filename) == 0))
 743			return(ix);   // already loaded
 744	}
 745
 746	// load the file into the next slot
 747	slot++;
 748	if(slot >= N_SOUNDICON_SLOTS)
 749		slot = 0;
 750
 751	if(LoadSoundFile(fname, slot) != 0)
 752		return(-1);
 753
 754	soundicon_tab[slot].filename = (char *)realloc(soundicon_tab[ix].filename, strlen(fname)+1);
 755	strcpy(soundicon_tab[slot].filename, fname);
 756	return(slot);
 757}
 758
 759
 760
 761int Translator::AnnouncePunctuation(int c1, int c2, char *buf, int bufix)
 762{//======================================================================
 763	// announce punctuation names
 764	// c1:  the punctuation character
 765	// c2:  the following character
 766
 767	int punct_count;
 768	const char *punctname;
 769	int found = 0;
 770	int soundicon;
 771	char *p;
 772
 773	if((soundicon = LookupSoundicon(c1)) >= 0)
 774	{
 775		// add an embedded command to play the soundicon
 776		sprintf(&buf[bufix],"\001%dI ",soundicon);
 777		UngetC(c2);
 778		found = 1;
 779	}
 780	else
 781	if((punctname = LookupCharName(c1)) != NULL)
 782	{
 783		found = 1;
 784		if(bufix==0)
 785		{
 786			punct_count=1;
 787			while(c2 == c1)
 788			{
 789				punct_count++;
 790				c2 = GetC();
 791			}
 792			UngetC(c2);
 793
 794			p = &buf[bufix];
 795			if(punct_count==1)
 796			{
 797				sprintf(p,"%s %s %s",tone_punct_on,punctname,tone_punct_off);
 798			}
 799			else
 800			if(punct_count < 4)
 801			{
 802				sprintf(p,"\001+10S%s",tone_punct_on);
 803				while(punct_count-- > 0)
 804					sprintf(buf,"%s %s",buf,punctname);
 805				sprintf(p,"%s %s\001-10S",buf,tone_punct_off);
 806			}
 807			else
 808			{
 809				sprintf(p,"%s %s %d %s %s [[______]]",
 810						tone_punct_on,punctname,punct_count,punctname,tone_punct_off);
 811				option_phoneme_input = 1;
 812			}
 813		}
 814		else
 815		{
 816			// end the clause now and pick up the punctuation next time
 817			UngetC(c2);
 818			if(option_ssml)
 819			{
 820				if((c1 == '<') || (c1 == '&'))
 821					ssml_ignore_l_angle = c1;  // this was &lt; which was converted to <, don't pick it up again as <
 822			}
 823			ungot_char2 = c1;
 824			buf[bufix] = ' ';
 825			buf[bufix+1] = 0;
 826		}
 827	}
 828
 829	if(found == 0)
 830		return(-1);
 831
 832	if(c1 == '-')
 833		return(CLAUSE_NONE);   // no pause
 834	if((strchr_w(punct_close,c1) != NULL) && !iswalnum(c2))
 835		return(CLAUSE_COLON);
 836	if(iswspace(c2) && strchr_w(punct_stop,c1)!=NULL)
 837		return(punct_attributes[lookupwchar(punct_chars,c1)]);
 838	
 839	return(CLAUSE_COMMA);
 840}  //  end of AnnouncePunctuation
 841
 842#define SSML_SPEAK     1
 843#define SSML_VOICE     2
 844#define SSML_PROSODY   3
 845#define SSML_SAYAS     4
 846#define SSML_MARK      5
 847#define SSML_SENTENCE  6
 848#define SSML_PARAGRAPH 7
 849#define SSML_PHONEME   8
 850#define SSML_SUB       9
 851#define SSML_STYLE    10
 852#define SSML_AUDIO    11
 853#define SSML_EMPHASIS 12
 854#define SSML_BREAK    13
 855#define SSML_METADATA 14
 856#define HTML_BREAK    15
 857#define SSML_CLOSE    0x10   // for a closing tag, OR this with the tag type
 858
 859// these tags have no effect if they are self-closing, eg. <voice />
 860static char ignore_if_self_closing[] = {0,1,1,1,1,0,0,0,0,1,1,0,1,0,1,0,0};
 861
 862
 863MNEM_TAB ssmltags[] = {
 864	{"speak", SSML_SPEAK},
 865	{"voice", SSML_VOICE},
 866	{"prosody", SSML_PROSODY},
 867	{"say-as", SSML_SAYAS},
 868	{"mark", SSML_MARK},
 869	{"s", SSML_SENTENCE},
 870	{"p", SSML_PARAGRAPH},
 871	{"phoneme", SSML_PHONEME},
 872	{"sub", SSML_SUB},
 873	{"tts:style", SSML_STYLE},
 874	{"audio", SSML_AUDIO},
 875	{"emphasis", SSML_EMPHASIS},
 876	{"break", SSML_BREAK},
 877	{"metadata", SSML_METADATA},
 878
 879	{"br", HTML_BREAK},
 880	{"li", HTML_BREAK},
 881	{"img", HTML_BREAK},
 882	{"td", HTML_BREAK},
 883	{"h1", SSML_PARAGRAPH},
 884	{"h2", SSML_PARAGRAPH},
 885	{"h3", SSML_PARAGRAPH},
 886	{"h4", SSML_PARAGRAPH},
 887	{"hr", SSML_PARAGRAPH},
 888	{NULL,0}};
 889
 890
 891
 892
 893static const char *VoiceFromStack()
 894{//================================
 895// Use the voice properties from the SSML stack to choose a voice, and switch
 896// to that voice if it's not the current voice
 897	int ix;
 898	SSML_STACK *sp;
 899	const char *v_id;
 900	int voice_name_specified;
 901	espeak_VOICE voice_select;
 902	char voice_name[40];
 903	char language[40];
 904
 905	strcpy(voice_name,ssml_stack[0].voice_name);
 906	strcpy(language,ssml_stack[0].language);
 907	voice_select.age = ssml_stack[0].voice_age;
 908	voice_select.gender = ssml_stack[0].voice_gender;
 909	voice_select.variant = ssml_stack[0].voice_variant;
 910	voice_select.identifier = NULL;
 911
 912	for(ix=0; ix<n_ssml_stack; ix++)
 913	{
 914		sp = &ssml_stack[ix];
 915		voice_name_specified = 0;
 916
 917		if((sp->voice_name[0] != 0) && (SelectVoiceByName(NULL,sp->voice_name) != NULL))
 918		{
 919			voice_name_specified = 1;
 920			strcpy(voice_name, sp->voice_name);
 921			language[0] = 0;
 922			voice_select.gender = 0;
 923			voice_select.age = 0;
 924			voice_select.variant = 0;
 925		}
 926		if(sp->language[0] != 0)
 927		{
 928			strcpy(language, sp->language);
 929			if(voice_name_specified == 0)
 930				voice_name[0] = 0;  // forget a previous voice name if a language is specified
 931		}
 932		if(sp->voice_gender != 0)
 933			voice_select.gender = sp->voice_gender;
 934		if(sp->voice_age != 0)
 935			voice_select.age = sp->voice_age;
 936		if(sp->voice_variant != 0)
 937			voice_select.variant = sp->voice_variant;
 938	}
 939
 940	voice_select.name = voice_name;
 941	voice_select.languages = language;
 942	v_id = SelectVoice(&voice_select);
 943	if(v_id == NULL)
 944		return("default");
 945	return(v_id);
 946}  // end of VoiceFromStack
 947
 948
 949
 950static void ProcessParamStack(char *outbuf, int &outix)
 951{//====================================================
 952// Set the speech parameters from the parameter stack
 953	int param;
 954	int ix;
 955	int value;
 956	char buf[20];
 957	int new_parameters[N_SPEECH_PARAM];
 958	static char cmd_letter[N_SPEECH_PARAM] = {0, 'S','A','P','R', 0, 0, 0, 0, 0, 0, 0, 'F'};  // embedded command letters
 959
 960
 961	for(param=0; param<N_SPEECH_PARAM; param++)
 962		new_parameters[param] = -1;
 963
 964	for(ix=0; ix<n_param_stack; ix++)
 965	{
 966		for(param=0; param<N_SPEECH_PARAM; param++)
 967		{
 968			if(param_stack[ix].parameter[param] >= 0)
 969				new_parameters[param] = param_stack[ix].parameter[param];
 970		}
 971	}
 972
 973	for(param=0; param<N_SPEECH_PARAM; param++)
 974	{
 975		if((value = new_parameters[param]) != speech_parameters[param])
 976		{
 977			buf[0] = 0;
 978
 979			switch(param)
 980			{
 981			case espeakPUNCTUATION:
 982				option_punctuation = value-1;
 983				break;
 984
 985			case espeakCAPITALS:
 986				option_capitals = value;
 987				break;
 988
 989			case espeakRATE:
 990			case espeakVOLUME:
 991			case espeakPITCH:
 992			case espeakRANGE:
 993			case espeakEMPHASIS:
 994				sprintf(buf,"%c%d%c",CTRL_EMBEDDED,value,cmd_letter[param]);
 995				break;
 996			}
 997
 998			speech_parameters[param] = new_parameters[param];
 999			strcpy(&outbuf[outix],buf);
1000			outix += strlen(buf);
1001		}
1002	}
1003}  // end of ProcessParamStack
1004
1005
1006static PARAM_STACK *PushParamStack(int tag_type)
1007{//=============================================
1008	int  ix;
1009	PARAM_STACK *sp;
1010
1011	sp = &param_stack[n_param_stack];
1012	if(n_param_stack < (N_PARAM_STACK-1))
1013		n_param_stack++;
1014
1015	sp->type = tag_type;
1016	for(ix=0; ix<N_SPEECH_PARAM; ix++)
1017	{
1018		sp->parameter[ix] = -1;
1019	}
1020	return(sp);
1021}  //  end of PushParamStack
1022
1023
1024static void PopParamStack(int tag_type, char *outbuf, int &outix)
1025{//==============================================================
1026	// unwind the stack up to and including the previous tag of this type
1027	int ix;
1028	int top = 0;
1029
1030	if(tag_type >= SSML_CLOSE)
1031		tag_type -= SSML_CLOSE;
1032
1033	for(ix=0; ix<n_param_stack; ix++)
1034	{
1035		if(param_stack[ix].type == tag_type)
1036		{
1037			top = ix;
1038		}
1039	}
1040	if(top > 0)
1041	{
1042		n_param_stack = top;
1043	}
1044	ProcessParamStack(outbuf, outix);
1045}  // end of PopParamStack
1046
1047
1048
1049static wchar_t *GetSsmlAttribute(wchar_t *pw, const char *name)
1050{//============================================================
1051// Gets the value string for an attribute.
1052// Returns NULL if the attribute is not present
1053	int ix;
1054	static wchar_t empty[1] = {0};
1055
1056	while(*pw != 0)
1057	{
1058		if(iswspace(pw[-1]))
1059		{
1060			ix = 0;
1061			while(*pw == name[ix])
1062			{
1063				pw++;
1064				ix++;
1065			}
1066			if(name[ix]==0)
1067			{
1068				// found the attribute, now get the value
1069				while(iswspace(*pw)) pw++;
1070				if(*pw == '=') pw++;
1071				while(iswspace(*pw)) pw++;
1072				if(*pw == '"')
1073					return(pw+1);
1074				else
1075					return(empty);
1076			}
1077		}
1078		pw++;
1079	}
1080	return(NULL);
1081}  //  end of GetSsmlAttribute
1082
1083
1084static int attrcmp(const wchar_t *string1, const char *string2)
1085{//============================================================
1086	int  ix;
1087	
1088	if(string1 == NULL)
1089		return(1);
1090
1091	for(ix=0; (string1[ix] == string2[ix]) && (string1[ix] != 0); ix++)
1092	{
1093	}
1094	if((string1[ix]=='"') && (string2[ix]==0))
1095		return(0);
1096	return(1);
1097}
1098
1099
1100static int attrlookup(const wchar_t *string1, const MNEM_TAB *mtab)
1101{//================================================================
1102	int ix;
1103
1104	for(ix=0; mtab[ix].mnem != NULL; ix++)
1105	{
1106		if(attrcmp(string1,mtab[ix].mnem) == 0)
1107			return(mtab[ix].value);
1108	}
1109	return(mtab[ix].value);
1110}
1111
1112
1113static int attrnumber(const wchar_t *pw, int default_value, int type)
1114{//==================================================================
1115	int value = 0;
1116
1117	if((pw == NULL) || !isdigit(*pw))
1118		return(default_value);
1119
1120	while(isdigit(*pw))
1121	{
1122		value = value*10 + *pw++ - '0';
1123	}
1124	if((type==1) && (towlower(*pw)=='s'))
1125	{
1126		// time: seconds rather than ms
1127		value *= 1000;
1128	}
1129	return(value);
1130}  // end of attrnumber
1131
1132
1133
1134static int attrcopy_utf8(char *buf, const wchar_t *pw, int len)
1135{//============================================================
1136// Convert attribute string into utf8, write to buf, and return its utf8 length
1137	unsigned int c;
1138	int ix = 0;
1139	int n;
1140	int prev_c = 0;
1141
1142	if(pw != NULL)
1143	{
1144		while((ix < (len-4)) && ((c = *pw++) != 0))
1145		{
1146			if((c=='"') && (prev_c != '\\'))
1147				break;   // " indicates end of attribute, unless preceded by backstroke
1148			n = utf8_out(c,&buf[ix]);
1149			ix += n;
1150			prev_c = c;
1151		}
1152	}
1153	buf[ix] = 0;
1154	return(ix);
1155}  // end of attrcopy_utf8
1156
1157
1158
1159static int attr_prosody_value(int param_type, const wchar_t *pw, int *value_out)
1160{//=============================================================================
1161	int sign = 0;
1162	wchar_t *tail;
1163	float value;
1164
1165	while(iswspace(*pw)) pw++;
1166	if(*pw == '+')
1167	{
1168		pw++;
1169		sign = 1;
1170	}
1171	if(*pw == '-')
1172	{
1173		pw++;	
1174		sign = -1;
1175	}
1176	value = wcstod(pw,&tail);
1177	if(tail == pw)
1178	{
1179		// failed to find a number, return 100%
1180		*value_out = 100;
1181		return(2);
1182	}
1183
1184	if(*tail == '%')
1185	{
1186		if(sign != 0)
1187			value = 100 + (sign * value);
1188		*value_out = (int)value;
1189		return(2);   // percentage
1190	}
1191
1192	if((tail[0]=='s') && (tail[1]=='t'))
1193	{
1194		double x;
1195		// convert from semitones to a  frequency percentage
1196		x = pow(double(2.0),double((value*sign)/12)) * 100;
1197		*value_out = (int)x;
1198		return(2);   // percentage
1199	}
1200
1201	if(param_type == espeakRATE)
1202	{
1203		*value_out = (int)(value * 100);
1204		return(2);   // percentage
1205	}
1206
1207	*value_out = (int)value;
1208	return(sign);   // -1, 0, or 1
1209}  // end of attr_prosody_value
1210
1211
1212int AddNameData(const char *name, int wide)
1213{//========================================
1214// Add the name to the namedata and return its position
1215	int ix;
1216	int len;
1217	void *vp;
1218
1219	if(wide)
1220	{
1221		len = (wcslen((const wchar_t *)name)+1)*sizeof(wchar_t);
1222		n_namedata = (n_namedata + sizeof(wchar_t) - 1) % sizeof(wchar_t);  // round to wchar_t boundary
1223	}
1224	else
1225	{
1226		len = strlen(name)+1;
1227	}
1228
1229	if(namedata_ix+len >= n_namedata)
1230	{
1231		// allocate more space for marker names
1232		if((vp = realloc(namedata, namedata_ix+len + 300)) == NULL)
1233			return(-1);  // failed to allocate, original data is unchanged but ignore this new name
1234
1235		namedata = (char *)vp;
1236		n_namedata = namedata_ix+len + 300;
1237	}
1238	memcpy(&namedata[ix = namedata_ix],name,len);
1239	namedata_ix += len;
1240	return(ix);
1241}  //  end of AddNameData
1242
1243
1244void SetVoiceStack(espeak_VOICE *v)
1245{//================================
1246	SSML_STACK *sp;
1247	sp = &ssml_stack[0];
1248
1249	if(v == NULL)
1250	{
1251		memset(sp,0,sizeof(ssml_stack[0]));
1252		return;
1253	}
1254	if(v->languages != NULL)
1255		strcpy(sp->language,v->languages);
1256	if(v->name != NULL)
1257		strcpy(sp->voice_name,v->name);
1258	sp->voice_variant = v->variant;
1259	sp->voice_age = v->age;
1260	sp->voice_gender = v->gender;
1261}
1262
1263
1264static int GetVoiceAttributes(wchar_t *pw, int tag_type)
1265{//=====================================================
1266// Determines whether voice attribute are specified in this tag, and if so, whether this means
1267// a voice change.
1268// If it's a closing tag, delete the top frame of the stack and determine whether this implies
1269// a voice change.
1270// Returns  CLAUSE_BIT_VOICE if there is a voice change
1271
1272	wchar_t *lang;
1273	wchar_t *gender;
1274	wchar_t *name;
1275	wchar_t *age;
1276	wchar_t *variant;
1277	const char *new_voice_id;
1278	SSML_STACK *ssml_sp;
1279
1280	static const MNEM_TAB mnem_gender[] = {
1281		{"male", 1},
1282		{"female", 2},
1283		{"neutral", 3},
1284		{NULL, 0}};
1285
1286	if(tag_type & SSML_CLOSE)
1287	{
1288		// delete a stack frame
1289		if(n_ssml_stack > 1)
1290		{
1291			n_ssml_stack--;
1292		}
1293	}
1294	else
1295	{
1296		// add a stack frame if any voice details are specified
1297		lang = GetSsmlAttribute(pw,"xml:lang");
1298
1299		if(tag_type != SSML_VOICE)
1300		{
1301			// only expect an xml:lang attribute
1302			name = NULL;
1303			variant = NULL;
1304			age = NULL;
1305			gender = NULL;
1306		}
1307		else
1308		{
1309			name = GetSsmlAttribute(pw,"name");
1310			variant = GetSsmlAttribute(pw,"variant");
1311			age = GetSsmlAttribute(pw,"age");
1312			gender = GetSsmlAttribute(pw,"gender");
1313		}
1314	
1315		if((tag_type != SSML_VOICE) && (lang==NULL))
1316			return(0);  // <s> or <p> without language spec, nothing to do
1317	
1318		ssml_sp = &ssml_stack[n_ssml_stack++];
1319
1320		attrcopy_utf8(ssml_sp->language,lang,sizeof(ssml_sp->language));
1321		attrcopy_utf8(ssml_sp->voice_name,name,sizeof(ssml_sp->voice_name));
1322		ssml_sp->voice_variant = attrnumber(variant,1,0)-1;
1323		ssml_sp->voice_age = attrnumber(age,0,0);
1324		ssml_sp->voice_gender = attrlookup(gender,mnem_gender);
1325		ssml_sp->tag_type = tag_type;
1326	}
1327
1328	new_voice_id = VoiceFromStack();
1329	if(strcmp(new_voice_id,current_voice_id) != 0)
1330	{
1331		// add an embedded command to change the voice
1332		strcpy(current_voice_id,new_voice_id);
1333		return(CLAUSE_BIT_VOICE);    // change of voice
1334	}
1335
1336	return(0);
1337}  //  end of GetVoiceAttributes
1338
1339
1340static void SetProsodyParameter(int param_type, wchar_t *attr1, PARAM_STACK *sp)
1341{//=============================================================================
1342	int value;
1343	int sign;
1344
1345	static const MNEM_TAB mnem_volume[] = {
1346		{"default",100},
1347		{"silent",0},
1348		{"x-soft",30},
1349		{"soft",65},
1350		{"medium",100},
1351		{"loud",150},
1352		{"x-loud",230},
1353		{NULL, -1}};
1354
1355	static const MNEM_TAB mnem_rate[] = {
1356		{"default",100},
1357		{"x-slow",60},
1358		{"slow",80},
1359		{"medium",100},
1360		{"fast",120},
1361		{"x-fast",150},
1362		{NULL, -1}};
1363
1364	static const MNEM_TAB mnem_pitch[] = {
1365		{"default",100},
1366		{"x-low",70},
1367		{"low",85},
1368		{"medium",100},
1369		{"high",110},
1370		{"x-high",120},
1371		{NULL, -1}};
1372
1373	static const MNEM_TAB mnem_range[] = {
1374		{"default",100},
1375		{"x-low",20},
1376		{"low",50},
1377		{"medium",100},
1378		{"high",140},
1379		{"x-high",180},
1380		{NULL, -1}};
1381
1382	static const MNEM_TAB *mnem_tabs[5] = {
1383		NULL, mnem_rate, mnem_volume, mnem_pitch, mnem_range };
1384
1385
1386	if((value = attrlookup(attr1,mnem_tabs[param_type])) >= 0)
1387	{
1388		// mnemonic specifies a value as a percentage of the base pitch/range/rate/volume
1389		sp->parameter[param_type] = (param_stack[0].parameter[param_type] * value)/100;
1390	}
1391	else
1392	{
1393		sign = attr_prosody_value(param_type,attr1,&value);
1394
1395		if(sign == 0)
1396			sp->parameter[param_type] = value;   // absolute value in Hz
1397		else
1398		if(sign == 2)
1399		{
1400			// change specified as percentage or in semitones
1401			sp->parameter[param_type] = (speech_parameters[param_type] * value)/100;
1402		}
1403		else
1404		{
1405			// change specified as plus or minus Hz
1406			sp->parameter[param_type] = speech_parameters[param_type] + (value*sign);
1407		}
1408	}
1409}  // end of SetProsodyParemeter
1410
1411
1412
1413static int ProcessSsmlTag(wchar_t *xml_buf, char *outbuf, int &outix, int n_outbuf, int self_closing)
1414{//==================================================================================================
1415// xml_buf is the tag and attributes with a zero terminator in place of the original '>'
1416// returns a clause terminator value.
1417
1418	unsigned int ix;
1419	int index;
1420	int c;
1421	int tag_type;
1422	int value;
1423	int value2;
1424	int value3;
1425	int voice_change_flag;
1426	wchar_t *px;
1427	wchar_t *attr1;
1428	wchar_t *attr2; 
1429	wchar_t *attr3;
1430	int terminator;
1431	char *uri;
1432	int param_type;
1433	char tag_name[40];
1434	char buf[80];
1435	PARAM_STACK *sp;
1436	SSML_STACK *ssml_sp;
1437
1438	static const MNEM_TAB mnem_punct[] = {
1439		{"none", 1},
1440		{"all", 2},
1441		{"some", 3},
1442		{NULL, -1}};
1443
1444	static const MNEM_TAB mnem_capitals[] = {
1445		{"no", 0},
1446		{"spelling", 2},
1447		{"icon", 1},
1448		{"pitch", 20},  // this is the amount by which to raise the pitch
1449		{NULL, -1}};
1450
1451	static const MNEM_TAB mnem_interpret_as[] = {
1452		{"characters",SAYAS_CHARS},
1453		{"tts:char",SAYAS_SINGLE_CHARS},
1454		{"tts:key",SAYAS_KEY},
1455		{"tts:digits",SAYAS_DIGITS},
1456		{"telephone",SAYAS_DIGITS1},
1457		{NULL, -1}};
1458
1459	static const MNEM_TAB mnem_sayas_format[] = {
1460		{"glyphs",1},
1461		{NULL, -1}};
1462
1463	static const MNEM_TAB mnem_break[] = {
1464		{"none",0},
1465		{"x-weak",1},
1466		{"weak",2},
1467		{"medium",3},
1468		{"strong",4},
1469		{"x-strong",5},
1470		{NULL,-1}};
1471
1472	static const MNEM_TAB mnem_emphasis[] = {
1473		{"none",1},
1474		{"reduced",2},
1475		{"moderate",3},
1476		{"strong",4},
1477		{NULL,-1}};
1478
1479	static const char *prosody_attr[5] = {
1480	 NULL, "rate", "volume", "pitch", "range" };
1481
1482	for(ix=0; ix<(sizeof(tag_name)-1); ix++)
1483	{
1484		if(((c = xml_buf[ix]) == 0) || iswspace(c))
1485			break;
1486		tag_name[ix] = tolower((char)c);
1487	}
1488	tag_name[ix] = 0;
1489
1490	px = &xml_buf[ix];   // the tag's attributes
1491	
1492	if(tag_name[0] == '/')
1493	{
1494		tag_type = LookupMnem(ssmltags,&tag_name[1]) + SSML_CLOSE;  // closing tag
1495	}
1496	else
1497	{
1498		tag_type = LookupMnem(ssmltags,tag_name);
1499
1500		if(self_closing && ignore_if_self_closing[tag_type])
1501			return(0);
1502	}
1503
1504	voice_change_flag = 0;
1505	terminator = CLAUSE_NONE;
1506	ssml_sp = &ssml_stack[n_ssml_stack-1];
1507
1508	switch(tag_type)
1509	{
1510	case SSML_STYLE:
1511		sp = PushParamStack(tag_type);
1512		attr1 = GetSsmlAttribute(px,"field");
1513		attr2 = GetSsmlAttribute(px,"mode");
1514
1515
1516		if(attrcmp(attr1,"punctuation")==0)
1517		{
1518			value = attrlookup(attr2,mnem_punct);
1519			sp->parameter[espeakPUNCTUATION] = value;
1520		}
1521		else
1522		if(attrcmp(attr1,"capital_letters")==0)
1523		{
1524			value = attrlookup(attr2,mnem_capitals);
1525			sp->parameter[espeakCAPITALS] = value;
1526		}
1527		ProcessParamStack(outbuf, outix);
1528		break;
1529
1530	case SSML_PROSODY:
1531		sp = PushParamStack(tag_type);
1532
1533		// look for attributes:  rate, volume, pitch, range
1534		for(param_type=espeakRATE; param_type <= espeakRANGE; param_type++)
1535		{
1536			if((attr1 = GetSsmlAttribute(px,prosody_attr[param_type])) != NULL)
1537			{
1538				SetProsodyParameter(param_type, attr1, sp);
1539			}
1540		}
1541
1542		ProcessParamStack(outbuf, outix);
1543		break;
1544
1545	case SSML_EMPHASIS:
1546		sp = PushParamStack(tag_type);
1547		value = 3;   // default is "moderate"
1548		if((attr1 = GetSsmlAttribute(px,"level")) != NULL)
1549		{
1550			value = attrlookup(attr1,mnem_emphasis);
1551		}
1552
1553		if(translator->langopts.tone_language == 1)
1554		{
1555			static unsigned char emphasis_to_pitch_range[] = {50,50,40,70,90,90};
1556			static unsigned char emphasis_to_volume[] = {100,100,70,110,140,140};
1557			// tone language (eg.Chinese) do emphasis by increasing the pitch range.
1558			sp->parameter[espeakRANGE] = emphasis_to_pitch_range[value];
1559			sp->parameter[espeakVOLUME] = emphasis_to_volume[value];
1560		}
1561		else
1562		{
1563			sp->parameter[espeakEMPHASIS] = value;
1564		}
1565		ProcessParamStack(outbuf, outix);
1566		break;
1567
1568	case SSML_STYLE + SSML_CLOSE:
1569	case SSML_PROSODY + SSML_CLOSE:
1570	case SSML_EMPHASIS + SSML_CLOSE:
1571		PopParamStack(tag_type, outbuf, outix);
1572		break;
1573
1574	case SSML_SAYAS:
1575		attr1 = GetSsmlAttribute(px,"interpret-as");
1576		attr2 = GetSsmlAttribute(px,"format");
1577		attr3 = GetSsmlAttribute(px,"detail");
1578		value = attrlookup(attr1,mnem_interpret_as);
1579		value2 = attrlookup(attr2,mnem_sayas_format);
1580		if(value2 == 1)
1581			value = SAYAS_GLYPHS;
1582
1583		value3 = attrnumber(attr3,0,0);
1584
1585		if(value == SAYAS_DIGITS)
1586		{
1587			if(value3 <= 1)
1588				value = SAYAS_DIGITS1;
1589			else
1590				value = SAYAS_DIGITS + value3;
1591		}
1592
1593		sprintf(buf,"%c%dY",CTRL_EMBEDDED,value);
1594		strcpy(&outbuf[outix],buf);
1595		outix += strlen(buf);
1596
1597		sayas_mode = value;   // punctuation doesn't end clause during SAY-AS
1598		break;
1599
1600	case SSML_SAYAS + SSML_CLOSE:
1601		outbuf[outix++] = CTRL_EMBEDDED;
1602		outbuf[outix++] = 'Y';
1603		sayas_mode = 0;
1604		break;
1605
1606	case SSML_SUB:
1607		if((attr1 = GetSsmlAttribute(px,"alias")) != NULL)
1608		{
1609			// use the alias  rather than the text
1610			ignore_text = 1;
1611			outix += attrcopy_utf8(&outbuf[outix],attr1,n_outbuf-outix);
1612		}
1613		break;
1614
1615	case SSML_METADATA:
1616		ignore_text = 1;
1617		break;
1618
1619	case SSML_SUB + SSML_CLOSE:
1620	case SSML_METADATA + SSML_CLOSE:
1621		ignore_text = 0;
1622		break;
1623
1624	case SSML_MARK:
1625		if((attr1 = GetSsmlAttribute(px,"name")) != NULL)
1626		{
1627			// add name to circular buffer of marker names
1628			attrcopy_utf8(buf,attr1,sizeof(buf));
1629
1630			if(strcmp(skip_marker,buf)==0)
1631			{
1632				// This is the marker we are waiting for before starting to speak
1633				clear_skipping_text = 1;
1634				skip_marker[0] = 0;
1635				return(CLAUSE_NONE);
1636			}
1637
1638			if((index = AddNameData(buf,0)) >= 0)
1639			{
1640				sprintf(buf,"%c%dM",CTRL_EMBEDDED,index);
1641				strcpy(&outbuf[outix],buf);
1642				outix += strlen(buf);
1643			}
1644		}
1645		break;
1646
1647	case SSML_AUDIO:
1648		sp = PushParamStack(tag_type);
1649
1650		if((attr1 = GetSsmlAttribute(px,"src")) != NULL)
1651		{
1652			char fname[256];
1653			attrcopy_utf8(buf,attr1,sizeof(buf));
1654
1655			if(uri_callback == NULL)
1656			{
1657				if((xmlbase != NULL) && (buf[0] != '/'))
1658				{
1659					sprintf(fname,"%s/%s",xmlbase,buf);
1660					index = LoadSoundFile2(fname);
1661				}
1662				else
1663				{
1664					index = LoadSoundFile2(buf);
1665				}
1666				if(index >= 0)
1667				{
1668					sprintf(buf,"%c%dI",CTRL_EMBEDDED,index);
1669					strcpy(&outbuf[outix],buf);
1670					outix += strlen(buf);
1671					sp->parameter[espeakSILENCE] = 1;
1672				}
1673			}
1674			else
1675			{
1676				if((index = AddNameData(buf,0)) >= 0)
1677				{
1678					uri = &namedata[index];
1679					if(uri_callback(1,uri,xmlbase) == 0)
1680					{
1681						sprintf(buf,"%c%dU",CTRL_EMBEDDED,index);
1682						strcpy(&outbuf[outix],buf);
1683						outix += strlen(buf);
1684						sp->parameter[espeakSILENCE] = 1;
1685					}
1686				}
1687			}
1688		}
1689		ProcessParamStack(outbuf, outix);
1690
1691		if(self_closing)
1692			PopParamStack(tag_type, outbuf, outix);
1693		return(CLAUSE_NONE);
1694
1695	case SSML_AUDIO + SSML_CLOSE:
1696		PopParamStack(tag_type, outbuf, outix);
1697		return(CLAUSE_NONE);
1698
1699	case SSML_BREAK:
1700		value = 21;
1701		terminator = CLAUSE_NONE;
1702
1703		if((attr1 = GetSsmlAttribute(px,"strength")) != NULL)
1704		{
1705			static int break_value[6] = {0,7,14,21,40,80};  // *10mS
1706			value = attrlookup(attr1,mnem_break);
1707			if(value < 3)
1708			{
1709				// adjust prepause on the following word
1710				sprintf(&outbuf[outix],"%c%dB",CTRL_EMBEDDED,value);
1711				outix += 3;
1712				terminator = 0;
1713			}
1714			value = break_value[value];
1715		}
1716		if((attr2 = GetSsmlAttribute(px,"time")) != NULL)
1717		{
1718			value = (attrnumber(attr2,0,1) * 25) / speed_factor1; // compensate for speaking speed to keep constant pause length
1719
1720			if(terminator == 0)
1721				terminator = CLAUSE_NONE;
1722		}
1723		if(terminator)
1724		{
1725			if(value > 0xfff)
1726				value = 0xfff;
1727			return(terminator + value);
1728		}
1729		break;
1730
1731	case SSML_SPEAK:
1732		if((attr1 = GetSsmlAttribute(px,"xml:base")) != NULL)
1733		{
1734			attrcopy_utf8(buf,attr1,sizeof(buf));
1735			if((index = AddNameData(buf,0)) >= 0)
1736			{
1737				xmlbase = &namedata[index];
1738			}
1739		}
1740		if(GetVoiceAttributes(px, tag_type) == 0)
1741			return(0);   // no voice change
1742		return(CLAUSE_VOICE);
1743
1744	case SSML_VOICE:
1745		if(GetVoiceAttributes(px, tag_type) == 0)
1746			return(0);   // no voice change
1747		return(CLAUSE_VOICE);
1748
1749	case SSML_SPEAK + SSML_CLOSE:
1750		// unwind stack until the previous <voice> or <speak> tag
1751		while((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_SPEAK))
1752		{
1753			n_ssml_stack--;
1754		}
1755		return(CLAUSE_PERIOD + GetVoiceAttributes(px, tag_type));
1756
1757	case SSML_VOICE + SSML_CLOSE:
1758		// unwind stack until the previous <voice> or <speak> tag
1759		while((n_ssml_stack > 1) && (ssml_stack[n_ssml_stack-1].tag_type != SSML_VOICE))
1760		{
1761			n_ssml_stack--;
1762		}
1763
1764terminator=0;  // ??  Sentence intonation, but no pause ??
1765		return(terminator + GetVoiceAttributes(px, tag_type));
1766
1767	case HTML_BREAK:
1768	case HTML_BREAK + SSML_CLOSE:
1769		return(CLAUSE_COLON);
1770
1771	case SSML_SENTENCE:
1772		if(ssml_sp->tag_type == SSML_SENTENCE)
1773		{
1774			// new sentence implies end-of-sentence
1775			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE);
1776		}
1777		voice_change_flag |= GetVoiceAttributes(px, tag_type);
1778		return(CLAUSE_PARAGRAPH + voice_change_flag);
1779
1780
1781	case SSML_PARAGRAPH:
1782		if(ssml_sp->tag_type == SSML_SENTENCE)
1783		{
1784			// new paragraph implies end-of-sentence or end-of-paragraph
1785			voice_change_flag = GetVoiceAttributes(px, SSML_SENTENCE+SSML_CLOSE);
1786		}
1787		if(ssml_sp->tag_type == SSML_PARAGRAPH)
1788		{
1789			// new paragraph implies end-of-sentence or end-of-paragraph
1790			voice_change_flag |= GetVoiceAttributes(px, SSML_PARAGRAPH+SSML_CLOSE);
1791		}
1792		voice_change_flag |= GetVoiceAttributes(px, tag_type);
1793		return(CLAUSE_PARAGRAPH + voice_change_flag);
1794
1795
1796	case SSML_SENTENCE + SSML_CLOSE:
1797		if(ssml_sp->tag_type == SSML_SENTENCE)
1798		{
1799			// end of a sentence which specified a language
1800			voice_change_flag = GetVoiceAttributes(px, tag_type);
1801		}
1802		return(CLAUSE_PERIOD + voice_change_flag);
1803
1804
1805	case SSML_PARAGRAPH + SSML_CLOSE:
1806		if((ssml_sp->tag_type == SSML_SENTENCE) || (ssml_sp->tag_type == SSML_PARAGRAPH))
1807		{
1808			// End of a paragraph which specified a language.
1809			// (End-of-paragraph also implies end-of-sentence)
1810			return(GetVoiceAttributes(px, tag_type) + CLAUSE_PARAGRAPH);
1811		}
1812		return(CLAUSE_PARAGRAPH);
1813	}
1814	return(0);
1815}  // end of ProcessSsmlTag
1816
1817
1818MNEM_TAB xml_char_mnemonics[] = {
1819	{"gt",'>'},
1820	{"lt",'<'},
1821	{"amp", '&'},
1822	{"quot", '"'},
1823	{"nbsp", ' '},
1824	{"apos", '\''},
1825	{NULL,-1}};
1826
1827
1828int Translator::ReadClause(FILE *f_in, char *buf, short *charix, int n_buf)
1829{//========================================================================
1830/* Find the end of the current clause.
1831	Write the clause into  buf
1832
1833	returns: clause type (bits 0-7: pause x10mS, bits 8-11 intonation type)
1834
1835	Also checks for blank line (paragraph) as end-of-clause indicator.
1836
1837	Does not end clause for:
1838		punctuation immediately followed by alphanumeric  eg.  1.23  !Speak  :path
1839		repeated punctuation, eg.   ...   !!!
1840*/
1841	int c1=' ';  // current character
1842	int c2;  // next character
1843	int cprev=' ';  // previous character
1844	int parag;
1845	int ix = 0;
1846	int j;
1847	int nl_count;
1848	int linelength = 0;
1849	int phoneme_mode = 0;
1850	int n_xml_buf;
1851	int terminator;
1852	int punct;
1853	int found;
1854	int any_alnum = 0;
1855	int self_closing;
1856	int punct_data;
1857	const char *p;
1858	wchar_t xml_buf[N_XML_BUF+1];
1859
1860#define N_XML_BUF2   20
1861	char xml_buf2[N_XML_BUF2+2];           // for &<name> and &<number> sequences
1862	static char ungot_string[N_XML_BUF2+4];
1863	static int ungot_string_ix = -1;
1864
1865	if(clear_skipping_text)
1866	{
1867		skipping_text = 0;
1868		clear_skipping_text = 0;
1869	}
1870
1871	clause_upper_count = 0;
1872	clause_lower_count = 0;
1873	end_of_input = 0;
1874
1875f_input = f_in;  // for GetC etc
1876
1877	if(ungot_word != NULL)
1878	{
1879		strcpy(buf,ungot_word);
1880		ix += strlen(ungot_word);
1881		ungot_word = NULL;
1882	}
1883
1884	if(ungot_char2 != 0)
1885	{
1886		c2 = ungot_char2;
1887	}
1888	else
1889	{
1890		c2 = GetC();
1891	}
1892
1893	while(!Eof() || (ungot_char != 0) || (ungot_char2 != 0) || (ungot_string_ix >= 0))
1894	{
1895		if(!iswalnum(c1))
1896		{
1897			if((end_character_position > 0) && (count_characters > end_character_position))
1898			{
1899				end_of_input = 1;
1900				return(CLAUSE_EOF);
1901			}
1902
1903			if((skip_characters > 0) && (count_characters > skip_characters))
1904			{
1905				// reached the specified start position
1906				// don't break a word
1907				clear_skipping_text = 1;
1908				skip_characters = 0;
1909				UngetC(c2);
1910				return(CLAUSE_NONE);
1911			}
1912		}
1913
1914		cprev = c1;
1915		c1 = c2;
1916
1917		if(ungot_string_ix >= 0)
1918		{
1919			if(ungot_string[ungot_string_ix] == 0)
1920				ungot_string_ix = -1;
1921		}
1922
1923		if((ungot_string_ix == 0) && (ungot_char2 == 0))
1924		{
1925			c1 = ungot_string[ungot_string_ix++];
1926		}
1927		if(ungot_string_ix >= 0)
1928		{
1929			c2 = ungot_string[ungot_string_ix++];
1930		}
1931		else
1932		{
1933			c2 = GetC();
1934
1935			if(Eof())
1936			{
1937				c2 = ' ';
1938			}
1939		}
1940		ungot_char2 = 0;
1941
1942		if((option_ssml) && (phoneme_mode==0))
1943		{
1944			if((ssml_ignore_l_angle != '&') && (c1 == '&') && ((c2=='#') || ((c2 >= 'a') && (c2 <= 'z'))))
1945			{
1946				n_xml_buf = 0;
1947				c1 = c2;
1948				while(!Eof() && (iswalnum(c1) || (c1=='#')) && (n_xml_buf < N_XML_BUF2))
1949				{
1950					xml_buf2[n_xml_buf++] = c1;
1951					c1 = GetC();
1952				}
1953				xml_buf2[n_xml_buf] = 0;
1954				c2 = GetC();
1955				sprintf(ungot_string,"%s%c%c",&xml_buf2[0],c1,c2);
1956
1957				if(c1 == ';')
1958				{
1959					if(xml_buf2[0] == '#')
1960					{
1961						// character code number
1962						if(xml_buf2[1] == 'x')
1963							found = sscanf(&xml_buf2[2],"%x",(unsigned int *)(&c1));
1964						else
1965							found = sscanf(&xml_buf2[1],"%d",&c1);
1966					}
1967					else
1968					{
1969						if((found = LookupMnem(xml_char_mnemonics,xml_buf2)) != -1)
1970						{
1971							c1 = found;
1972							if(c2 == 0)
1973								c2 = ' ';
1974						}
1975					}
1976				}
1977				else
1978				{
1979					found = -1;
1980				}
1981
1982				if(found <= 0)
1983				{
1984					ungot_string_ix = 0;
1985					c1 = '&';
1986					c2 = ' ';
1987				}
1988
1989				if((c1 <= 0x20) && ((sayas_mode == SAYAS_SINGLE_CHARS) || (sayas_mode == SAYAS_KEY)))
1990				{
1991					c1 += 0xe000;  // move into unicode private usage area
1992				}
1993			}
1994			else
1995			if((c1 == '<') && (ssml_ignore_l_angle != '<'))
1996			{
1997				// SSML Tag
1998				n_xml_buf = 0;
1999				c1 = c2;
2000				while(!Eof() && (c1 != '>') && (n_xml_buf < N_XML_BUF))
2001				{
2002					xml_buf[n_xml_buf++] = c1;
2003					c1 = GetC();
2004				}
2005				xml_buf[n_xml_buf] = 0;
2006				c2 = ' ';
2007	
2008				buf[ix++] = ' ';
2009	
2010				self_closing = 0;
2011				if(xml_buf[n_xml_buf-1] == '/')
2012				{
2013					// a self-closing tag
2014					xml_buf[n_xml_buf-1] = ' ';
2015					self_closing = 1;
2016				}
2017	
2018				terminator = ProcessSsmlTag(xml_buf,buf,ix,n_buf,self_closing);
2019	
2020				if(terminator != 0)
2021				{
2022					buf[ix] = ' ';
2023					buf[ix++] = 0;
2024	
2025					if(terminator & CLAUSE_BIT_VOICE)
2026					{
2027						// a change in voice, write the new voice name to the end of the buf
2028						p = current_voice_id;
2029						while((*p != 0) && (ix < (n_buf-1)))
2030						{
2031							buf[ix++] = *p++;
2032						}
2033						buf[ix++] = 0;
2034					}
2035					return(terminator);
2036				}
2037				continue;
2038			}
2039		}
2040		ssml_ignore_l_angle=0;
2041
2042		if(ignore_text)
2043			continue;
2044
2045		if((c2=='\n') && (option_linelength == -1))
2046		{
2047			// single-line mode, return immediately on NL
2048			if((punct = lookupwchar(punct_chars,c1)) == 0)
2049			{
2050				charix[ix] = count_characters - clause_start_char;
2051				ix += utf8_out(c1,&buf[ix]);
2052				terminator = CLAUSE_PERIOD;  // line doesn't end in punctuation, assume period
2053			}
2054			else
2055			{
2056				terminator = punct_attributes[punct];
2057			}
2058			buf[ix] = ' ';
2059			buf[ix+1] = 0;
2060			return(terminator);
2061		}
2062
2063		if((c1 == CTRL_EMBEDDED) || (c1 == ctrl_embedded))
2064		{
2065			// an embedded command. If it's a voice change, end the clause
2066			if(c2 == 'V')
2067			{
2068				buf[ix++] = 0;      // end the clause at this point
2069				while(!iswspace(c1 = GetC()) && !Eof() && (ix < (n_buf-1)))
2070					buf[ix++] = c1;  // add voice name to end of buffer, after the text
2071				buf[ix++] = 0;
2072				return(CLAUSE_VOICE);
2073			}
2074			else
2075			if(c2 == 'B')
2076			{
2077				// set the punctuation option from an embedded command
2078				//  B0     B1     B<punct list><space>
2079				strcpy(&buf[ix],"   ");
2080				ix += 3;
2081
2082				if((c2 = GetC()) == '0')
2083					option_punctuation = 0;
2084				else
2085				{
2086					option_punctuation = 1;
2087					option_punctlist[0] = 0;
2088					if(c2 != '1')
2089					{
2090						// a list of punctuation characters to be spoken, terminated by space
2091						j = 0;
2092						while(!iswspace(c2) && !Eof())
2093						{
2094							option_punctlist[j++] = c2;
2095							c2 = GetC();
2096							buf[ix++] = ' ';
2097						}
2098						option_punctlist[j] = 0;  // terminate punctuation list
2099						option_punctuation = 2;
2100					}
2101				}
2102				c2 = GetC();
2103				continue;
2104			}
2105		}
2106
2107		linelength++;
2108
2109		if(iswalnum(c1))
2110			any_alnum = 1;
2111		else
2112		if(iswspace(c1))
2113		{
2114			char *p_word;
2115
2116			if(translator_name == 0x6a626f)
2117			{
2118				// language jbo : lojban
2119				// treat "i" or ".i" as end-of-sentence
2120				p_word = &buf[ix-1];
2121				if(p_word[0] == 'i')
2122				{
2123					if(p_word[-1] == '.')
2124						p_word--;
2125					if(p_word[-1] == ' ')
2126					{
2127						ungot_word = "i ";
2128						UngetC(c2);
2129						p_word[0] = 0;
2130						return(CLAUSE_PERIOD);
2131					}
2132				}
2133			}
2134		}
2135
2136		if(iswupper(c1))
2137		{
2138			clause_upper_count++;
2139			if((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev))
2140			{
2141				char text_buf[40];
2142				char text_buf2[30];
2143				if(LookupSpecial("_cap",text_buf2) != NULL)
2144				{
2145					sprintf(text_buf,"%s%s%s",tone_punct_on,text_buf2,tone_punct_off);
2146					j = strlen(text_buf);
2147					if((ix + j) < n_buf)
2148					{
2149						strcpy(&buf[ix],text_buf);
2150						ix += j;
2151					}
2152				}
2153			}
2154		}
2155		else
2156		if(iswalpha(c1))
2157			clause_lower_count++;
2158
2159		if(option_phoneme_input)
2160		{
2161			if(phoneme_mode > 0)
2162				phoneme_mode--;
2163			else
2164			if((c1 == '[') && (c2 == '['))
2165				phoneme_mode = -1;     // input is phoneme mnemonics, so don't look for punctuation
2166			else
2167			if((c1 == ']') && (c2 == ']'))
2168				phoneme_mode = 2;      // set phoneme_mode to zero after the next two characters
2169		}
2170
2171		if(c1 == '\n')
2172		{
2173			parag = 0;
2174
2175			// count consecutive newlines, ignoring other spaces
2176			while(!Eof() && iswspace(c2))
2177			{
2178				if(c2 == '\n')
2179					parag++;
2180				c2 = GetC();
2181			}
2182			if(parag > 0)
2183			{
2184				// 2nd newline, assume paragraph
2185

Large files files are truncated, but you can click here to view the full file