PageRenderTime 107ms CodeModel.GetById 14ms app.highlight 82ms RepoModel.GetById 1ms app.codeStats 1ms

/native/external/espeak/src/numbers.cpp

http://eyes-free.googlecode.com/
C++ | 1397 lines | 1159 code | 166 blank | 72 comment | 303 complexity | eb0938b3bbca1fa7f551fddc169df735 MD5 | raw file
   1/***************************************************************************
   2 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
   3 *   email: jonsd@users.sourceforge.net                                    *
   4 *                                                                         *
   5 *   This program is free software; you can redistribute it and/or modify  *
   6 *   it under the terms of the GNU General Public License as published by  *
   7 *   the Free Software Foundation; either version 3 of the License, or     *
   8 *   (at your option) any later version.                                   *
   9 *                                                                         *
  10 *   This program is distributed in the hope that it will be useful,       *
  11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
  12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
  13 *   GNU General Public License for more details.                          *
  14 *                                                                         *
  15 *   You should have received a copy of the GNU General Public License     *
  16 *   along with this program; if not, see:                                 *
  17 *               <http://www.gnu.org/licenses/>.                           *
  18 ***************************************************************************/
  19
  20#include "StdAfx.h"
  21
  22#include <stdio.h>
  23#include <ctype.h>
  24#include <stdlib.h>
  25#include <string.h>
  26
  27#include <wctype.h>
  28////#include <wchar.h>
  29
  30#include "speak_lib.h"
  31#include "speech.h"
  32#include "phoneme.h"
  33#include "synthesize.h"
  34#include "voice.h"
  35#include "translate.h"
  36
  37
  38
  39#define M_NAME      0
  40#define M_SMALLCAP  1
  41#define M_TURNED    2
  42#define M_REVERSED  3
  43#define M_CURL      4
  44
  45#define M_ACUTE     5
  46#define M_BREVE     6
  47#define M_CARON     7
  48#define M_CEDILLA   8
  49#define M_CIRCUMFLEX 9
  50#define M_DIAERESIS 10
  51#define M_DOUBLE_ACUTE 11
  52#define M_DOT_ABOVE 12
  53#define M_GRAVE     13
  54#define M_MACRON    14
  55#define M_OGONEK    15
  56#define M_RING      16
  57#define M_STROKE    17
  58#define M_TILDE     18
  59
  60#define M_BAR       19
  61#define M_RETROFLEX 20
  62#define M_HOOK      21
  63
  64
  65#define M_MIDDLE_DOT  M_DOT_ABOVE  // duplicate of M_DOT_ABOVE
  66#define M_IMPLOSIVE   M_HOOK
  67
  68typedef struct {
  69const char *name;
  70int  flags;
  71} ACCENTS;
  72
  73// these are tokens to look up in the *_list file.
  74ACCENTS accents_tab[] = {
  75{"_lig", 1},
  76{"_smc", 1},  // smallcap
  77{"_tur", 1},  // turned
  78{"_rev", 1},  // reversed
  79{"_crl", 0},  // curl
  80
  81{"_acu", 0},  // acute
  82{"_brv", 0},  // breve
  83{"_hac", 0},  // caron/hacek
  84{"_ced", 0},  // cedilla
  85{"_cir", 0},  // circumflex
  86{"_dia", 0},  // diaeresis
  87{"_ac2", 0},  // double acute
  88{"_dot", 0},  // dot
  89{"_grv", 0},  // grave
  90{"_mcn", 0},  // macron
  91{"_ogo", 0},  // ogonek
  92{"_rng", 0},  // ring
  93{"_stk", 0},  // stroke
  94{"_tld", 0},  // tilde
  95
  96{"_bar", 0},  // bar
  97{"_rfx", 0},  // retroflex
  98{"_hok", 0},  // hook
  99};
 100
 101
 102#define CAPITAL  0
 103#define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
 104#define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
 105
 106
 107#define L_ALPHA  60   // U+3B1
 108#define L_SCHWA  61   // U+259
 109#define L_OPEN_E 62   // U+25B
 110#define L_GAMMA  63   // U+3B3
 111#define L_IOTA   64   // U+3B9
 112#define L_OE     65   // U+153
 113#define L_OMEGA  66   // U+3C9
 114
 115#define L_PHI    67   // U+3C6
 116#define L_ESH    68   // U+283
 117#define L_UPSILON 69 // U+3C5
 118#define L_EZH     70 // U+292
 119#define L_GLOTTAL 71 // U+294
 120#define L_RTAP    72 // U+27E
 121
 122
 123static const short non_ascii_tab[] = {
 124    0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
 1250x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e };
 126
 127
 128// characters U+00e0 to U+017f
 129const unsigned short letter_accents_0e0[] = {
 130LETTER('a',M_GRAVE,0),   // U+00e0
 131LETTER('a',M_ACUTE,0),
 132LETTER('a',M_CIRCUMFLEX,0),
 133LETTER('a',M_TILDE,0),
 134LETTER('a',M_DIAERESIS,0),
 135LETTER('a',M_RING,0),
 136LIGATURE('a','e',0),
 137LETTER('c',M_CEDILLA,0),
 138LETTER('e',M_GRAVE,0),
 139LETTER('e',M_ACUTE,0),
 140LETTER('e',M_CIRCUMFLEX,0),
 141LETTER('e',M_DIAERESIS,0),
 142LETTER('i',M_GRAVE,0),
 143LETTER('i',M_ACUTE,0),
 144LETTER('i',M_CIRCUMFLEX,0),
 145LETTER('i',M_DIAERESIS,0),
 146LETTER('d',M_NAME,0),  // eth  // U+00f0
 147LETTER('n',M_TILDE,0),
 148LETTER('o',M_GRAVE,0),
 149LETTER('o',M_ACUTE,0),
 150LETTER('o',M_CIRCUMFLEX,0),
 151LETTER('o',M_TILDE,0),
 152LETTER('o',M_DIAERESIS,0),
 1530,     // division sign
 154LETTER('o',M_STROKE,0),
 155LETTER('u',M_GRAVE,0),
 156LETTER('u',M_ACUTE,0),
 157LETTER('u',M_CIRCUMFLEX,0),
 158LETTER('u',M_DIAERESIS,0),
 159LETTER('y',M_ACUTE,0),
 160LETTER('t',M_NAME,0),  // thorn
 161LETTER('y',M_DIAERESIS,0),
 162CAPITAL,                 // U+0100
 163LETTER('a',M_MACRON,0),
 164CAPITAL,
 165LETTER('a',M_BREVE,0),
 166CAPITAL,
 167LETTER('a',M_OGONEK,0),
 168CAPITAL,
 169LETTER('c',M_ACUTE,0),
 170CAPITAL,
 171LETTER('c',M_CIRCUMFLEX,0),
 172CAPITAL,
 173LETTER('c',M_DOT_ABOVE,0),
 174CAPITAL,
 175LETTER('c',M_CARON,0),
 176CAPITAL,
 177LETTER('d',M_CARON,0),
 178CAPITAL,                 // U+0110
 179LETTER('d',M_STROKE,0),
 180CAPITAL,
 181LETTER('e',M_MACRON,0),
 182CAPITAL,
 183LETTER('e',M_BREVE,0),
 184CAPITAL,
 185LETTER('e',M_DOT_ABOVE,0),
 186CAPITAL,
 187LETTER('e',M_OGONEK,0),
 188CAPITAL,
 189LETTER('e',M_CARON,0),
 190CAPITAL,
 191LETTER('g',M_CIRCUMFLEX,0),
 192CAPITAL,
 193LETTER('g',M_BREVE,0),
 194CAPITAL,                // U+0120
 195LETTER('g',M_DOT_ABOVE,0),
 196CAPITAL,
 197LETTER('g',M_CEDILLA,0),
 198CAPITAL,
 199LETTER('h',M_CIRCUMFLEX,0),
 200CAPITAL,
 201LETTER('h',M_STROKE,0),
 202CAPITAL,
 203LETTER('i',M_TILDE,0),
 204CAPITAL,
 205LETTER('i',M_MACRON,0),
 206CAPITAL,
 207LETTER('i',M_BREVE,0),
 208CAPITAL,
 209LETTER('i',M_OGONEK,0),
 210CAPITAL,               // U+0130
 211LETTER('i',M_NAME,0), // dotless i
 212CAPITAL,
 213LIGATURE('i','j',0),
 214CAPITAL,
 215LETTER('j',M_CIRCUMFLEX,0),
 216CAPITAL,
 217LETTER('k',M_CEDILLA,0),
 218LETTER('k',M_NAME,0),  // kra
 219CAPITAL,
 220LETTER('l',M_ACUTE,0),
 221CAPITAL,
 222LETTER('l',M_CEDILLA,0),
 223CAPITAL,
 224LETTER('l',M_CARON,0),
 225CAPITAL,
 226LETTER('l',M_MIDDLE_DOT,0),  // U+0140
 227CAPITAL,
 228LETTER('l',M_STROKE,0),
 229CAPITAL,
 230LETTER('n',M_ACUTE,0),
 231CAPITAL,
 232LETTER('n',M_CEDILLA,0),
 233CAPITAL,
 234LETTER('n',M_CARON,0),
 235LETTER('n',M_NAME,0),  // apostrophe n
 236CAPITAL,
 237LETTER('n',M_NAME,0),  // eng
 238CAPITAL,
 239LETTER('o',M_MACRON,0),
 240CAPITAL,
 241LETTER('o',M_BREVE,0),
 242CAPITAL,             // U+0150
 243LETTER('o',M_DOUBLE_ACUTE,0),
 244CAPITAL,
 245LIGATURE('o','e',0),
 246CAPITAL,
 247LETTER('r',M_ACUTE,0),
 248CAPITAL,
 249LETTER('r',M_CEDILLA,0),
 250CAPITAL,
 251LETTER('r',M_CARON,0),
 252CAPITAL,
 253LETTER('s',M_ACUTE,0),
 254CAPITAL,
 255LETTER('s',M_CIRCUMFLEX,0),
 256CAPITAL,
 257LETTER('s',M_CEDILLA,0),
 258CAPITAL,              // U+0160
 259LETTER('s',M_CARON,0),
 260CAPITAL,
 261LETTER('t',M_CEDILLA,0),
 262CAPITAL,
 263LETTER('t',M_CARON,0),
 264CAPITAL,
 265LETTER('t',M_STROKE,0),
 266CAPITAL,
 267LETTER('u',M_TILDE,0),
 268CAPITAL,
 269LETTER('u',M_MACRON,0),
 270CAPITAL,
 271LETTER('u',M_BREVE,0),
 272CAPITAL,
 273LETTER('u',M_RING,0),
 274CAPITAL,              // U+0170
 275LETTER('u',M_DOUBLE_ACUTE,0),
 276CAPITAL,
 277LETTER('u',M_OGONEK,0),
 278CAPITAL,
 279LETTER('w',M_CIRCUMFLEX,0),
 280CAPITAL,
 281LETTER('y',M_CIRCUMFLEX,0),
 282CAPITAL,   // Y-DIAERESIS
 283CAPITAL,
 284LETTER('z',M_ACUTE,0),
 285CAPITAL,
 286LETTER('z',M_DOT_ABOVE,0),
 287CAPITAL,
 288LETTER('z',M_CARON,0),
 289LETTER('s',M_NAME,0), // long-s  // U+17f
 290};
 291
 292
 293// characters U+0250 to U+029F
 294const unsigned short letter_accents_250[] = {
 295LETTER('a',M_TURNED,0),		// U+250
 296LETTER(L_ALPHA,0,0),
 297LETTER(L_ALPHA,M_TURNED,0),
 298LETTER('b',M_IMPLOSIVE,0),
 2990,  // open-o
 300LETTER('c',M_CURL,0),
 301LETTER('d',M_RETROFLEX,0),
 302LETTER('d',M_IMPLOSIVE,0),
 303LETTER('e',M_REVERSED,0),	// U+258
 3040,   // schwa
 305LETTER(L_SCHWA,M_HOOK,0),
 3060,   // open-e
 307LETTER(L_OPEN_E,M_REVERSED,0),
 308LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
 3090,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
 310LETTER('j',M_BAR,0),
 311LETTER('g',M_IMPLOSIVE,0),	// U+260
 312LETTER('g',0,0),
 313LETTER('g',M_SMALLCAP,0),
 314LETTER(L_GAMMA,0,0),
 3150,   // ramshorn
 316LETTER('h',M_TURNED,0),
 317LETTER('h',M_HOOK,0),
 3180,//LETTER(L_HENG,M_HOOK,0),
 319LETTER('i',M_BAR,0),		// U+268
 320LETTER(L_IOTA,0,0),
 321LETTER('i',M_SMALLCAP,0),
 322LETTER('l',M_TILDE,0),
 323LETTER('l',M_BAR,0),
 324LETTER('l',M_RETROFLEX,0),
 325LIGATURE('l','z',0),
 326LETTER('m',M_TURNED,0),
 3270,//LETTER('m',M_TURNED,M_LEG),	// U+270
 328LETTER('m',M_HOOK,0),
 3290,//LETTER('n',M_LEFTHOOK,0),
 330LETTER('n',M_RETROFLEX,0),
 331LETTER('n',M_SMALLCAP,0),
 332LETTER('o',M_BAR,0),
 333LIGATURE('o','e',M_SMALLCAP),
 3340,//LETTER(L_OMEGA,M_CLOSED,0),
 335LETTER(L_PHI,0,0),		// U+278
 336LETTER('r',M_TURNED,0),
 3370,//LETTER('r',M_TURNED,M_LEG),
 338LETTER('r',M_RETROFLEX,M_TURNED),
 3390,//LETTER('r',M_LEG,0),
 340LETTER('r',M_RETROFLEX,0),
 3410,  // r-tap
 342LETTER(L_RTAP,M_REVERSED,0),
 343LETTER('r',M_SMALLCAP,0),	// U+280
 344LETTER('r',M_TURNED,M_SMALLCAP),
 345LETTER('s',M_RETROFLEX,0),
 3460,  // esh
 3470,//LETTER('j',M_BAR,L_IMPLOSIVE),
 348LETTER(L_ESH,M_REVERSED,0),
 349LETTER(L_ESH,M_CURL,0),
 350LETTER('t',M_TURNED,0),
 351LETTER('t',M_RETROFLEX,0),	// U+288
 352LETTER('u',M_BAR,0),
 353LETTER(L_UPSILON,0,0),
 354LETTER('v',M_HOOK,0),
 355LETTER('v',M_TURNED,0),
 356LETTER('w',M_TURNED,0),
 357LETTER('y',M_TURNED,0),
 358LETTER('y',M_SMALLCAP,0),
 359LETTER('z',M_RETROFLEX,0),	// U+290
 360LETTER('z',M_CURL,0),
 3610,  // ezh
 362LETTER(L_EZH,M_CURL,0),
 3630,  // glottal stop
 364LETTER(L_GLOTTAL,M_REVERSED,0),
 365LETTER(L_GLOTTAL,M_TURNED,0),
 3660,//LETTER('c',M_LONG,0),
 3670,  // bilabial click		// U+298
 368LETTER('b',M_SMALLCAP,0),
 3690,//LETTER(L_OPEN_E,M_CLOSED,0),
 370LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
 371LETTER('h',M_SMALLCAP,0),
 372LETTER('j',M_CURL,0),
 373LETTER('k',M_TURNED,0),
 374LETTER('l',M_SMALLCAP,0),
 375LETTER('q',M_HOOK,0),      // U+2a0
 376LETTER(L_GLOTTAL,M_STROKE,0),
 377LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
 378LIGATURE('d','z',0),
 3790,   // dezh
 380LIGATURE('d','z',M_CURL),
 381LIGATURE('t','s',0),
 3820,   // tesh
 383LIGATURE('t','s',M_CURL),
 384};
 385
 386int Translator::LookupLetter2(unsigned int letter, char *ph_buf)
 387{//=============================================================
 388	int len;
 389	char single_letter[10];
 390
 391	single_letter[0] = 0;
 392	single_letter[1] = '_';
 393	len = utf8_out(letter, &single_letter[2]);
 394	single_letter[len+2] = ' ';
 395	single_letter[len+3] = 0;
 396
 397	if(Lookup(&single_letter[1],ph_buf) == 0)
 398	{
 399		single_letter[1] = ' ';
 400		if(Lookup(&single_letter[2],ph_buf) == 0)
 401		{
 402			TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,NULL);
 403		}
 404	}
 405	return(ph_buf[0]);
 406}
 407
 408
 409void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf)
 410{//=====================================================================
 411	// lookup the character in the accents table
 412	int accent_data = 0;
 413	int accent1 = 0;
 414	int accent2 = 0;
 415	int basic_letter;
 416	int letter2=0;
 417	char ph_letter1[30];
 418	char ph_letter2[30];
 419	char ph_accent1[30];
 420	char ph_accent2[30];
 421
 422	ph_accent2[0] = 0;
 423
 424	if((letter >= 0xe0) && (letter < 0x17f))
 425	{
 426		accent_data = letter_accents_0e0[letter - 0xe0];
 427	}
 428	else
 429	if((letter >= 0x250) && (letter <= 0x2a8))
 430	{
 431		accent_data = letter_accents_250[letter - 0x250];
 432	} 
 433
 434	if(accent_data != 0)
 435	{
 436		basic_letter = (accent_data & 0x3f) + 59;
 437		if(basic_letter < 'a')
 438			basic_letter = non_ascii_tab[basic_letter-59];
 439
 440		if(accent_data & 0x8000)
 441		{
 442			letter2 = (accent_data >> 6) & 0x3f;
 443			letter2 += 59;
 444			accent2 = (accent_data >> 12) & 0x7;
 445		}
 446		else
 447		{
 448			accent1 = (accent_data >> 6) & 0x1f;
 449			accent2 = (accent_data >> 11) & 0xf;
 450		}
 451
 452
 453		if(Lookup(accents_tab[accent1].name, ph_accent1) != 0)
 454		{
 455
 456			if(LookupLetter2(basic_letter, ph_letter1) != 0)
 457			{
 458				if(accent2 != 0)
 459				{
 460					if(Lookup(accents_tab[accent2].name, ph_accent2) == 0)
 461					{
 462//						break;
 463					}
 464
 465					if(accents_tab[accent2].flags & 1)
 466					{
 467						strcpy(ph_buf,ph_accent2);
 468						ph_buf += strlen(ph_buf);
 469						ph_accent2[0] = 0;
 470					}
 471				}
 472				if(letter2 != 0)
 473				{
 474					//ligature
 475					LookupLetter2(letter2, ph_letter2);
 476					sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
 477				}
 478				else
 479				{
 480					if(accent1 == 0)
 481						strcpy(ph_buf, ph_letter1);
 482					else
 483					if((langopts.accents & 1) || (accents_tab[accent1].flags & 1))
 484						sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
 485					else
 486						sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
 487				}
 488			}
 489		}
 490	}
 491}  // end of LookupAccentedLetter
 492
 493
 494
 495void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
 496{//=============================================================================
 497	int len;
 498	unsigned char *p;
 499	static char single_letter[10] = {0,0};
 500	char ph_stress[2];
 501	unsigned int dict_flags[2];
 502	char ph_buf3[40];
 503	char *ptr;
 504
 505	ph_buf1[0] = 0;
 506	len = utf8_out(letter,&single_letter[2]);
 507	single_letter[len+2] = ' ';
 508
 509	if(next_byte == -1)
 510	{
 511		// speaking normal text, not individual characters
 512		if(Lookup(&single_letter[2],ph_buf1) != 0)
 513			return;
 514
 515		single_letter[1] = '_';
 516		if(Lookup(&single_letter[1],ph_buf3) != 0)
 517			return;   // the character is specified as _* so ignore it when speaking normal text
 518
 519		// check whether this character is specified for English
 520		SetTranslator2("en");
 521		if(translator2->Lookup(&single_letter[2], ph_buf3) != 0)
 522		{
 523			// yes, switch to English and re-translate the word
 524			sprintf(ph_buf1,"%c",phonSWITCH);
 525		}
 526		SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
 527		return;
 528	}
 529
 530	if((letter <= 32) || iswspace(letter))
 531	{
 532		// lookup space as _&32 etc.
 533		sprintf(&single_letter[1],"_#%d ",letter);
 534		Lookup(&single_letter[1],ph_buf1);
 535		return;
 536	}
 537
 538	if(next_byte != ' ')
 539		next_byte = RULE_SPELLING;
 540	single_letter[3+len] = next_byte;   // follow by space-space if the end of the word, or space-0x31
 541
 542	single_letter[1] = '_';
 543
 544	// if the $accent flag is set for this letter, use the accents table (below)
 545	dict_flags[1] = 0;
 546	ptr = &single_letter[1];
 547	
 548	if(Lookup(&single_letter[1],ph_buf3) == 0)
 549	{
 550		single_letter[1] = ' ';
 551		if(Lookup(&single_letter[2],ph_buf3) == 0)
 552		{
 553			TranslateRules(&single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
 554		}
 555	}
 556
 557	if(ph_buf3[0] == 0)
 558	{
 559		LookupAccentedLetter(letter, ph_buf3);
 560	}
 561
 562	if(ph_buf3[0] == 0)
 563	{
 564		ph_buf1[0] = 0;
 565		return;
 566	}
 567	if(ph_buf3[0] == phonSWITCH)
 568	{
 569		strcpy(ph_buf1,ph_buf3);
 570		return;
 571	}
 572	// at a stress marker at the start of the letter name, unless one is already marked
 573	ph_stress[0] = phonSTRESS_P;
 574	ph_stress[1] = 0;
 575
 576	for(p=(unsigned char *)ph_buf3; *p != 0; p++)
 577	{
 578		if(phoneme_tab[*p]->type == phSTRESS)
 579			ph_stress[0] = 0;  // stress is already marked
 580	}
 581	sprintf(ph_buf1,"%s%s",ph_stress,ph_buf3);
 582}
 583
 584
 585
 586int Translator::TranslateLetter(char *word, char *phonemes, int control, int word_length)
 587{//======================================================================================
 588// get pronunciation for an isolated letter
 589// return number of bytes used by the letter
 590// control 2=say-as glyphs, 3-say-as chars
 591	int n_bytes;
 592	int letter;
 593	int len;
 594	int save_option_phonemes;
 595	char *p2;
 596	char *pbuf;
 597	char capital[20];
 598	char ph_buf[60];
 599	char ph_buf2[60];
 600	char hexbuf[6];
 601
 602	ph_buf[0] = 0;
 603	capital[0] = 0;
 604
 605	n_bytes = utf8_in(&letter,word,0);
 606
 607	if((letter & 0xfff00) == 0x0e000)
 608	{
 609		letter &= 0xff;   // uncode private usage area
 610	}
 611
 612	if(control > 2)
 613	{
 614		// include CAPITAL information
 615		if(iswupper(letter))
 616		{
 617			Lookup("_cap",capital);
 618		}
 619	}
 620	letter = towlower2(letter);
 621
 622	LookupLetter(letter, word[n_bytes], ph_buf);
 623
 624	if(ph_buf[0] == phonSWITCH)
 625	{
 626		strcpy(phonemes,ph_buf);
 627		return(0);
 628	}
 629
 630	if((ph_buf[0] == 0) && (translator_name != L('e','n')))
 631	{
 632		// speak as English, check whether there is a translation for this character
 633		SetTranslator2("en");
 634		save_option_phonemes = option_phonemes;
 635		option_phonemes = 0;
 636		translator2->LookupLetter(letter, word[n_bytes], ph_buf);
 637		SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
 638		option_phonemes = save_option_phonemes;
 639
 640		if(ph_buf[0] != 0)
 641		{
 642			sprintf(phonemes,"%cen",phonSWITCH);
 643			return(0);
 644		}
 645	}
 646
 647	if(ph_buf[0] == 0)
 648	{
 649		// character name not found
 650		if(iswalpha(letter))
 651			Lookup("_?A",ph_buf);
 652
 653		if((ph_buf[0]==0) && !iswspace(letter))
 654			Lookup("_??",ph_buf);
 655
 656		if(ph_buf[0] != 0)
 657		{
 658			// speak the hexadecimal number of the character code
 659			sprintf(hexbuf,"%x",letter);
 660			pbuf = ph_buf;
 661			for(p2 = hexbuf; *p2 != 0; p2++)
 662			{
 663				pbuf += strlen(pbuf);
 664				*pbuf++ = phonPAUSE_VSHORT;
 665				LookupLetter(*p2, 0, pbuf);
 666			}
 667		}
 668	}
 669
 670	len = strlen(phonemes);
 671	if(langopts.accents & 2)
 672		sprintf(ph_buf2,"%c%s%s",0xff,ph_buf,capital);
 673	else
 674		sprintf(ph_buf2,"%c%s%s",0xff,capital,ph_buf);  // the 0xff marker will be removed or replaced in SetSpellingStress()
 675	if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
 676	{
 677		strcpy(&phonemes[len],ph_buf2);
 678	}
 679	return(n_bytes);
 680}  // end of TranslateLetter
 681
 682
 683
 684void Translator::SetSpellingStress(char *phonemes, int control, int n_chars)
 685{//=========================================================================
 686// Individual letter names, reduce the stress of some.
 687	int ix;
 688	unsigned int c;
 689	int n_stress=0;
 690	int count;
 691	unsigned char buf[N_WORD_PHONEMES];
 692
 693	for(ix=0; (c = phonemes[ix]) != 0; ix++)
 694	{
 695		if(c == phonSTRESS_P)
 696		{
 697			n_stress++;
 698		}
 699		buf[ix] = c;
 700	}
 701	buf[ix] = 0;
 702
 703	count = 0;
 704	for(ix=0; (c = buf[ix]) != 0; ix++)
 705	{
 706		if((c == phonSTRESS_P) && (n_chars > 1))
 707		{
 708			count++;
 709
 710			if(langopts.spelling_stress == 1)
 711			{
 712				// stress on initial letter when spelling
 713				if(count > 1)
 714					c = phonSTRESS_3;
 715			}
 716			else
 717			{
 718				if(count != n_stress)
 719				{
 720					if(((count % 3) != 0) || (count == n_stress-1))
 721						c = phonSTRESS_3;   // reduce to secondary stress
 722				}
 723			}
 724		}
 725		else
 726		if(c == 0xff)
 727		{
 728			if((control < 2) || (ix==0))
 729				continue;   // don't insert pauses
 730
 731			if(control == 4)
 732				c = phonPAUSE;    // pause after each character
 733			if(((count % 3) == 0) || (control > 2))
 734				c = phonPAUSE_SHORT;  // pause following a primary stress
 735			else
 736				continue;       // remove marker
 737		}
 738		*phonemes++ = c;
 739	}
 740	if(control >= 2)
 741		*phonemes++ = phonPAUSE_NOLINK;
 742	*phonemes = 0;
 743}  // end of SetSpellingStress
 744
 745
 746
 747
 748int Translator::TranslateRoman(char *word, char *ph_out)
 749{//=====================================================
 750	int c;
 751	char *p;
 752	const char *p2;
 753	int acc;
 754	int prev;
 755	int value;
 756	int subtract;
 757	int repeat = 0;
 758	unsigned int flags;
 759	char number_chars[N_WORD_BYTES];
 760
 761	static const char *roman_numbers = "ixcmvld";
 762	static int roman_values[] = {1,10,100,1000,5,50,500};
 763 
 764	acc = 0;
 765	prev = 0;
 766	subtract = 0x7fff;
 767
 768	while((c = *word++) != ' ')
 769	{
 770		if((p2 = strchr(roman_numbers,c)) == NULL)
 771			return(0);
 772
 773		value = roman_values[p2 - roman_numbers];
 774		if(value == prev)
 775		{
 776			repeat++;
 777			if(repeat >= 3)
 778				return(0);
 779		}
 780		else
 781			repeat = 0;
 782
 783		if((prev==5) || (prev==50) || (prev==500))
 784		{
 785			if(value >= prev)
 786				return(0);
 787		}
 788		if((prev != 0) && (prev < value))
 789		{
 790			if(((acc % 10) != 0) || ((prev*10) < value))
 791				return(0);
 792			subtract = prev;
 793			value -= subtract;
 794		}
 795		else
 796		if(value >= subtract)
 797			return(0);
 798		else
 799			acc += prev;
 800		prev = value;
 801	}
 802	acc += prev;
 803	if(acc < 2)
 804		return(0);
 805
 806	if(acc > langopts.max_roman)
 807		return(0);
 808
 809	Lookup("_roman",ph_out);   // precede by "roman" if _rom is defined in *_list
 810	p = &ph_out[strlen(ph_out)];
 811
 812	sprintf(number_chars," %d ",acc);
 813	TranslateNumber(&number_chars[1],p,&flags,0);
 814	return(1);
 815}  // end of TranslateRoman
 816
 817
 818int Translator::LookupNum2(int value, int control, char *ph_out)
 819{//=============================================================
 820// Lookup a 2 digit number
 821// control bit 0: use special form of '1'
 822// control bit 2: use feminine form of '2'
 823
 824	int found;
 825	int ix;
 826	int units;
 827	int used_and=0;
 828	int next_phtype;
 829	char string[12];  // for looking up entries in de_list
 830	char ph_tens[50];
 831	char ph_digits[50];
 832	char ph_and[12];
 833
 834	if((value == 1) && (control & 1))
 835	{
 836		if(Lookup("_1a",ph_out) != 0)
 837			return(0);
 838	}
 839	// is there a special pronunciation for this 2-digit number
 840	found = 0;
 841	if(control & 4)
 842	{
 843		sprintf(string,"_%df",value);
 844		found = Lookup(string,ph_digits);
 845	}
 846	if(found == 0)
 847	{
 848		sprintf(string,"_%d",value);
 849		found = Lookup(string,ph_digits);
 850	}
 851
 852	// no, speak as tens+units
 853	if((control & 2) && (value < 10))
 854	{
 855		// speak leading zero
 856		Lookup("_0",ph_tens);
 857	}
 858	else
 859	{
 860		if(found)
 861		{
 862			strcpy(ph_out,ph_digits);
 863			return(0);
 864		}
 865
 866		if((value % 10) == 0)
 867		{
 868			sprintf(string,"_%d0",value / 10);
 869			found = Lookup(string,ph_tens);
 870		}
 871		if(!found)
 872		{
 873			sprintf(string,"_%dX",value / 10);
 874			Lookup(string,ph_tens);
 875		}
 876
 877		if((value % 10) == 0)
 878		{
 879			strcpy(ph_out,ph_tens);
 880			return(0);
 881		}
 882
 883		found = 0;
 884		units = (value % 10);
 885		if(control & 4)
 886		{
 887			// is there a variant form of this number?
 888			sprintf(string,"_%df",units);
 889			found = Lookup(string,ph_digits);
 890		}
 891		if(found == 0)
 892		{
 893			sprintf(string,"_%d",units);
 894			Lookup(string,ph_digits);
 895		}
 896	}
 897
 898	if(langopts.numbers & 0x30)
 899	{
 900		Lookup("_0and",ph_and);
 901		if(langopts.numbers & 0x10)
 902			sprintf(ph_out,"%s%s%s",ph_digits,ph_and,ph_tens);
 903		else
 904			sprintf(ph_out,"%s%s%s",ph_tens,ph_and,ph_digits);
 905		used_and = 1;
 906	}
 907	else
 908	{
 909		if(langopts.numbers & 0x200)
 910		{
 911			// remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
 912			if((ix = strlen(ph_tens)-1) >= 0)
 913			{
 914				if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
 915					next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
 916	
 917				if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
 918					ph_tens[ix] = 0;
 919			}
 920		}
 921		sprintf(ph_out,"%s%s",ph_tens,ph_digits);
 922	}
 923
 924	if(langopts.numbers & 0x100)
 925	{
 926		// only one primary stress
 927		found = 0;
 928		for(ix=strlen(ph_out)-1; ix>=0; ix--)
 929		{
 930			if(ph_out[ix] == phonSTRESS_P)
 931			{
 932				if(found)
 933					ph_out[ix] = phonSTRESS_3;
 934				else
 935					found = 1;
 936			}
 937		}
 938	}
 939	return(used_and);
 940}  // end of LookupNum2
 941
 942
 943int Translator::LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands)
 944{//=========================================================================================================
 945// Translate a 3 digit number
 946	int found;
 947	int hundreds;
 948	int x;
 949	char string[12];  // for looking up entries in **_list
 950	char buf1[100];
 951	char buf2[100];
 952	char ph_100[20];
 953	char ph_10T[20];
 954	char ph_digits[50];
 955	char ph_thousands[50];
 956	char ph_hundred_and[12];
 957	char ph_thousand_and[12];
 958	
 959	hundreds = value / 100;
 960	buf1[0] = 0;
 961
 962	if(hundreds > 0)
 963	{
 964		ph_thousands[0] = 0;
 965		ph_thousand_and[0] = 0;
 966
 967		Lookup("_0C",ph_100);
 968
 969		if((hundreds >= 10) && (((langopts.numbers & 0x0800) == 0) || (hundreds != 19)))
 970		{
 971			ph_digits[0] = 0;
 972
 973			if(LookupThousands(hundreds / 10, thousandplex+1, ph_10T) == 0)
 974			{
 975				x = 0;
 976				if(langopts.numbers2 & (1 << (thousandplex+1)))
 977					x = 4;
 978				LookupNum2(hundreds/10, x, ph_digits);
 979			}
 980
 981			if(langopts.numbers2 & 0x200)
 982				sprintf(ph_thousands,"%s%s%c",ph_10T,ph_digits,phonPAUSE_NOLINK);  // say "thousands" before its number, not after
 983			else
 984				sprintf(ph_thousands,"%s%s%c",ph_digits,ph_10T,phonPAUSE_NOLINK);
 985
 986			hundreds %= 10;
 987			if(hundreds == 0)
 988				ph_100[0] = 0;
 989			suppress_null = 1;
 990		}
 991
 992		ph_digits[0] = 0;
 993		if(hundreds > 0)
 994		{
 995			if((langopts.numbers & 0x100000) && (prev_thousands || (ph_thousands[0] != 0)))
 996			{
 997				Lookup("_0and",ph_thousand_and);
 998			}
 999
1000			suppress_null = 1;
1001
1002			found = 0;
1003			if((value % 1000) == 100)
1004			{
1005				// is there a special pronunciation for exactly 100 ?
1006				found = Lookup("_1C0",ph_digits);
1007			}
1008			if(!found)
1009			{
1010				sprintf(string,"_%dC",hundreds);
1011				found = Lookup(string,ph_digits);  // is there a specific pronunciation for n-hundred ?
1012			}
1013
1014			if(found)
1015			{
1016				ph_100[0] = 0;
1017			}
1018			else
1019			{
1020				if((hundreds > 1) || ((langopts.numbers & 0x400) == 0))
1021				{
1022					LookupNum2(hundreds,0,ph_digits);
1023				}
1024			}
1025		}
1026
1027		sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
1028	}
1029
1030	ph_hundred_and[0] = 0;
1031	if((langopts.numbers & 0x40) && ((value % 100) != 0))
1032	{
1033		if((value > 100) || (prev_thousands && (thousandplex==0)))
1034		{
1035			Lookup("_0and",ph_hundred_and);
1036		}
1037	}
1038
1039
1040	buf2[0] = 0;
1041	value = value % 100;
1042
1043	if(value == 0)
1044	{
1045		if(suppress_null == 0)
1046			Lookup("_0",buf2);
1047	}
1048	else
1049	{
1050		x = 0;
1051		if(thousandplex==0)
1052			x = 1;   // allow "eins" for 1 rather than "ein"
1053		else
1054		{
1055			if(langopts.numbers2 & (1 << thousandplex))
1056				x = 4;   // use variant (feminine) for before thousands and millions
1057		}
1058
1059		if(LookupNum2(value,x,buf2) != 0)
1060		{
1061			if(langopts.numbers & 0x80)
1062				ph_hundred_and[0] = 0;  // don't put 'and' after 'hundred' if there's 'and' between tens and units
1063		}
1064	}
1065
1066	sprintf(ph_out,"%s%s%s",buf1,ph_hundred_and,buf2);
1067
1068	return(0);
1069}  // end of LookupNum3
1070
1071
1072
1073static const char *M_Variant(int value)
1074{//====================================
1075	// returns M, or perhaps MA for some cases
1076	
1077	if(((value % 100)>20) || ((value % 100)<10))   // but not teens, 10 to 19
1078	{
1079		if ((translator->langopts.numbers2 & 0x40) &&
1080			((value % 10)>=2) &&
1081			((value % 10)<=4))
1082		{
1083		// for Polish language - two forms of plural!
1084			return("0MA");
1085		}
1086
1087		if((translator->langopts.numbers2 & 0x80) &&
1088			((value % 10)==1))
1089		{
1090			return("1MA");
1091		}
1092
1093	}
1094	return("0M");
1095}
1096
1097
1098int Translator::LookupThousands(int value, int thousandplex, char *ph_out)
1099{//=======================================================================
1100	int found;
1101	char string[12];
1102	char ph_of[12];
1103	char ph_thousands[40];
1104
1105	ph_of[0] = 0;
1106
1107	// first look fora match with the exact value of thousands
1108	sprintf(string,"_%dM%d",value,thousandplex);
1109
1110	if((found = Lookup(string,ph_thousands)) == 0)
1111	{
1112		if((value % 100) >= 20) 
1113		{
1114			Lookup("_0of",ph_of);
1115		}
1116
1117		sprintf(string,"_%s%d",M_Variant(value),thousandplex);
1118
1119		if(Lookup(string,ph_thousands) == 0)
1120		{
1121			// repeat "thousand" if higher order names are not available
1122			sprintf(string,"_%dM1",value);
1123			if((found = Lookup(string,ph_thousands)) == 0)
1124				Lookup("_0M1",ph_thousands);
1125		}
1126	}
1127	sprintf(ph_out,"%s%s",ph_of,ph_thousands);
1128	return(found);
1129}
1130
1131
1132int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags, int wflags)
1133{//=========================================================================================
1134//  Number translation with various options
1135// the "word" may be up to 4 digits
1136// "words" of 3 digits may be preceded by another number "word" for thousands or millions
1137
1138	int n_digits;
1139	int value;
1140	int ix;
1141	unsigned char c;
1142	int suppress_null = 0;
1143	int decimal_point = 0;
1144	int thousandplex = 0;
1145	int thousands_inc = 0;
1146	int prev_thousands = 0;
1147	int this_value;
1148	static int prev_value;
1149	int decimal_count;
1150	int max_decimal_count;
1151	char string[12];  // for looking up entries in de_list
1152	char buf1[100];
1153	char ph_append[50];
1154	char ph_buf[200];
1155	char ph_buf2[50];
1156
1157	static const char str_pause[2] = {phonPAUSE_NOLINK,0};
1158
1159	for(ix=0; isdigit(word[ix]); ix++) ;
1160	n_digits = ix;
1161	value = this_value = atoi(word);
1162
1163	ph_append[0] = 0;
1164	ph_buf2[0] = 0;
1165
1166	// is there a previous thousands part (as a previous "word") ?
1167	if((n_digits == 3) && (word[-2] == langopts.thousands_sep) && isdigit(word[-3]))
1168	{
1169		prev_thousands = 1;
1170	}
1171	else
1172	if((langopts.thousands_sep == ' ') || (langopts.numbers & 0x1000))
1173	{
1174		// thousands groups can be separated by spaces
1175		if((n_digits == 3) && isdigit(word[-2]))
1176		{
1177			prev_thousands = 1;
1178		}
1179	}
1180
1181	if((word[0] == '0') && (prev_thousands == 0) && (word[1] != langopts.decimal_sep))
1182	{
1183		if((n_digits == 2) && (word[3] == ':') && isdigit(word[5]) && isspace(word[7]))
1184		{
1185			// looks like a time 02:30, omit the leading zero
1186		}
1187		else
1188		{
1189			return(0);     // number string with leading zero, speak as individual digits
1190		}
1191	}
1192
1193	if((langopts.numbers & 0x1000) && (word[n_digits] == ' '))
1194		thousands_inc = 1;
1195	else
1196	if(word[n_digits] == langopts.thousands_sep)
1197		thousands_inc = 2;
1198
1199	if(thousands_inc > 0)
1200	{
1201		// if the following "words" are three-digit groups, count them and add
1202		// a "thousand"/"million" suffix to this one
1203
1204		ix = n_digits + thousands_inc;
1205		while(isdigit(word[ix]) && isdigit(word[ix+1]) && isdigit(word[ix+2]))
1206		{
1207			thousandplex++;
1208			if(word[ix+3] == langopts.thousands_sep)
1209				ix += (3 + thousands_inc);
1210			else
1211				break;
1212		}
1213	}
1214
1215	if((value == 0) && prev_thousands)
1216	{
1217		suppress_null = 1;
1218	}
1219
1220	if((word[n_digits] == langopts.decimal_sep) && isdigit(word[n_digits+1]))
1221	{
1222		// this "word" ends with a decimal point
1223		Lookup("_dpt",ph_append);
1224		decimal_point = 1;
1225	}
1226	else
1227	if(suppress_null == 0)
1228	{
1229		if(thousands_inc > 0)
1230		{
1231			if((thousandplex > 0) && (value < 1000))
1232			{
1233				if(langopts.numbers2 & 0x100)
1234				{
1235					if((thousandplex == 1) && (value >= 100))
1236					{
1237						// special word for 100,000's
1238						char ph_buf3[20];
1239						sprintf(string,"_%dL",value / 100);
1240						if(Lookup(string,ph_buf2) == 0)
1241						{
1242							LookupNum2(value/100,0,ph_buf2);
1243							Lookup("_0L",ph_buf3);
1244							strcat(ph_buf2,ph_buf3);
1245						}
1246						value %= 100;
1247						if(value == 0)
1248							suppress_null = 1;
1249					}
1250				}
1251				if((suppress_null == 0) && (LookupThousands(value,thousandplex,ph_append)))
1252				{
1253					// found an exact match for N thousand
1254					value = 0;
1255					suppress_null = 1;
1256				}
1257			}
1258		}
1259	}
1260	else
1261	if((thousandplex > 1) && prev_thousands && (prev_value > 0))
1262	{
1263		sprintf(string,"_%s%d",M_Variant(value),thousandplex+1);
1264		if(Lookup(string,buf1)==0)
1265		{
1266			// speak this thousandplex if there was no word for the previous thousandplex
1267			sprintf(string,"_0M%d",thousandplex);
1268			Lookup(string,ph_append);
1269		}
1270	}
1271
1272	if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
1273	{
1274		Lookup("_.",ph_append);
1275	}
1276
1277	LookupNum3(value, ph_buf, suppress_null, thousandplex, prev_thousands);
1278	if((thousandplex > 0) && (langopts.numbers2 & 0x200))
1279		sprintf(ph_out,"%s%s%s",ph_append,ph_buf2,ph_buf);  // say "thousands" before its number
1280	else
1281		sprintf(ph_out,"%s%s%s",ph_buf2,ph_buf,ph_append);
1282
1283
1284	while(decimal_point)
1285	{
1286		n_digits++;
1287
1288		decimal_count = 0;
1289		while(isdigit(word[n_digits+decimal_count]))
1290			decimal_count++;
1291
1292		if(decimal_count > 1)
1293		{
1294			max_decimal_count = 2;
1295			switch(langopts.numbers & 0xe000)
1296			{
1297			case 0x8000:
1298				max_decimal_count = 5;
1299			case 0x4000:
1300				// French/Polish decimal fraction
1301				while(word[n_digits] == '0')
1302				{
1303					Lookup("_0",buf1);
1304					strcat(ph_out,buf1);
1305					decimal_count--;
1306					n_digits++;
1307				}
1308				if((decimal_count <= max_decimal_count) && isdigit(word[n_digits]))
1309				{
1310					LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
1311					strcat(ph_out,buf1);
1312					n_digits += decimal_count;
1313				}
1314				break;
1315
1316			case 0x2000:
1317				// Italian decimal fractions
1318				if((decimal_count < 4) || ((decimal_count==4) && (word[n_digits] != '0')))
1319				{
1320					LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
1321					strcat(ph_out,buf1);
1322					if(word[n_digits]=='0')
1323					{
1324						// decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
1325						sprintf(string,"_0Z%d",decimal_count);
1326						Lookup(string,buf1);
1327						strcat(ph_out,buf1);
1328					}
1329					n_digits += decimal_count;
1330				}
1331				break;
1332
1333			case 0x6000:
1334				// Romanian decimal fractions
1335				if((decimal_count <= 4) && (word[n_digits] != '0'))
1336				{
1337						LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
1338						strcat(ph_out,buf1);
1339						n_digits += decimal_count;
1340				}
1341				break;
1342			}
1343		}
1344
1345		while(isdigit(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
1346		{
1347			value = word[n_digits++] - '0';
1348			LookupNum2(value, 1, buf1);
1349			strcat(ph_out,buf1);
1350		}
1351
1352		// something after the decimal part ?
1353		if(Lookup("_dpt2",buf1))
1354			strcat(ph_out,buf1);
1355
1356		if(c == langopts.decimal_sep)
1357		{
1358			Lookup("_dpt",buf1);
1359			strcat(ph_out,buf1);
1360		}
1361		else
1362		{
1363			decimal_point = 0;
1364		}
1365	}
1366	if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
1367	{
1368		int next_char;
1369		char *p;
1370		p = &word[n_digits+1];
1371
1372		p += utf8_in(&next_char,p,0);
1373		if((langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
1374			utf8_in(&next_char,p,0);
1375
1376		if(!iswalpha(next_char))
1377			strcat(ph_out,str_pause);  // don't add pause for 100s,  6th, etc.
1378	}
1379
1380	*flags = FLAG_FOUND;
1381	prev_value = this_value;
1382	return(1);
1383}  // end of TranslateNumber_1
1384
1385
1386
1387int Translator::TranslateNumber(char *word1, char *ph_out, unsigned int *flags, int wflags)
1388{//=======================================================================================
1389	if(option_sayas == SAYAS_DIGITS1)
1390		return(0);  // speak digits individually
1391
1392	if((langopts.numbers & 0x3) == 1)
1393		return(TranslateNumber_1(word1,ph_out,flags,wflags));
1394
1395	return(0);
1396}  // end of TranslateNumber
1397