/native/external/espeak/src/numbers.cpp

http://eyes-free.googlecode.com/ · C++ · 1397 lines · 1159 code · 166 blank · 72 comment · 310 complexity · eb0938b3bbca1fa7f551fddc169df735 MD5 · raw file

  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: jonsd@users.sourceforge.net *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <ctype.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <wctype.h>
  25. ////#include <wchar.h>
  26. #include "speak_lib.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "voice.h"
  31. #include "translate.h"
  32. #define M_NAME 0
  33. #define M_SMALLCAP 1
  34. #define M_TURNED 2
  35. #define M_REVERSED 3
  36. #define M_CURL 4
  37. #define M_ACUTE 5
  38. #define M_BREVE 6
  39. #define M_CARON 7
  40. #define M_CEDILLA 8
  41. #define M_CIRCUMFLEX 9
  42. #define M_DIAERESIS 10
  43. #define M_DOUBLE_ACUTE 11
  44. #define M_DOT_ABOVE 12
  45. #define M_GRAVE 13
  46. #define M_MACRON 14
  47. #define M_OGONEK 15
  48. #define M_RING 16
  49. #define M_STROKE 17
  50. #define M_TILDE 18
  51. #define M_BAR 19
  52. #define M_RETROFLEX 20
  53. #define M_HOOK 21
  54. #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
  55. #define M_IMPLOSIVE M_HOOK
  56. typedef struct {
  57. const char *name;
  58. int flags;
  59. } ACCENTS;
  60. // these are tokens to look up in the *_list file.
  61. ACCENTS accents_tab[] = {
  62. {"_lig", 1},
  63. {"_smc", 1}, // smallcap
  64. {"_tur", 1}, // turned
  65. {"_rev", 1}, // reversed
  66. {"_crl", 0}, // curl
  67. {"_acu", 0}, // acute
  68. {"_brv", 0}, // breve
  69. {"_hac", 0}, // caron/hacek
  70. {"_ced", 0}, // cedilla
  71. {"_cir", 0}, // circumflex
  72. {"_dia", 0}, // diaeresis
  73. {"_ac2", 0}, // double acute
  74. {"_dot", 0}, // dot
  75. {"_grv", 0}, // grave
  76. {"_mcn", 0}, // macron
  77. {"_ogo", 0}, // ogonek
  78. {"_rng", 0}, // ring
  79. {"_stk", 0}, // stroke
  80. {"_tld", 0}, // tilde
  81. {"_bar", 0}, // bar
  82. {"_rfx", 0}, // retroflex
  83. {"_hok", 0}, // hook
  84. };
  85. #define CAPITAL 0
  86. #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
  87. #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
  88. #define L_ALPHA 60 // U+3B1
  89. #define L_SCHWA 61 // U+259
  90. #define L_OPEN_E 62 // U+25B
  91. #define L_GAMMA 63 // U+3B3
  92. #define L_IOTA 64 // U+3B9
  93. #define L_OE 65 // U+153
  94. #define L_OMEGA 66 // U+3C9
  95. #define L_PHI 67 // U+3C6
  96. #define L_ESH 68 // U+283
  97. #define L_UPSILON 69 // U+3C5
  98. #define L_EZH 70 // U+292
  99. #define L_GLOTTAL 71 // U+294
  100. #define L_RTAP 72 // U+27E
  101. static const short non_ascii_tab[] = {
  102. 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
  103. 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e };
  104. // characters U+00e0 to U+017f
  105. const unsigned short letter_accents_0e0[] = {
  106. LETTER('a',M_GRAVE,0), // U+00e0
  107. LETTER('a',M_ACUTE,0),
  108. LETTER('a',M_CIRCUMFLEX,0),
  109. LETTER('a',M_TILDE,0),
  110. LETTER('a',M_DIAERESIS,0),
  111. LETTER('a',M_RING,0),
  112. LIGATURE('a','e',0),
  113. LETTER('c',M_CEDILLA,0),
  114. LETTER('e',M_GRAVE,0),
  115. LETTER('e',M_ACUTE,0),
  116. LETTER('e',M_CIRCUMFLEX,0),
  117. LETTER('e',M_DIAERESIS,0),
  118. LETTER('i',M_GRAVE,0),
  119. LETTER('i',M_ACUTE,0),
  120. LETTER('i',M_CIRCUMFLEX,0),
  121. LETTER('i',M_DIAERESIS,0),
  122. LETTER('d',M_NAME,0), // eth // U+00f0
  123. LETTER('n',M_TILDE,0),
  124. LETTER('o',M_GRAVE,0),
  125. LETTER('o',M_ACUTE,0),
  126. LETTER('o',M_CIRCUMFLEX,0),
  127. LETTER('o',M_TILDE,0),
  128. LETTER('o',M_DIAERESIS,0),
  129. 0, // division sign
  130. LETTER('o',M_STROKE,0),
  131. LETTER('u',M_GRAVE,0),
  132. LETTER('u',M_ACUTE,0),
  133. LETTER('u',M_CIRCUMFLEX,0),
  134. LETTER('u',M_DIAERESIS,0),
  135. LETTER('y',M_ACUTE,0),
  136. LETTER('t',M_NAME,0), // thorn
  137. LETTER('y',M_DIAERESIS,0),
  138. CAPITAL, // U+0100
  139. LETTER('a',M_MACRON,0),
  140. CAPITAL,
  141. LETTER('a',M_BREVE,0),
  142. CAPITAL,
  143. LETTER('a',M_OGONEK,0),
  144. CAPITAL,
  145. LETTER('c',M_ACUTE,0),
  146. CAPITAL,
  147. LETTER('c',M_CIRCUMFLEX,0),
  148. CAPITAL,
  149. LETTER('c',M_DOT_ABOVE,0),
  150. CAPITAL,
  151. LETTER('c',M_CARON,0),
  152. CAPITAL,
  153. LETTER('d',M_CARON,0),
  154. CAPITAL, // U+0110
  155. LETTER('d',M_STROKE,0),
  156. CAPITAL,
  157. LETTER('e',M_MACRON,0),
  158. CAPITAL,
  159. LETTER('e',M_BREVE,0),
  160. CAPITAL,
  161. LETTER('e',M_DOT_ABOVE,0),
  162. CAPITAL,
  163. LETTER('e',M_OGONEK,0),
  164. CAPITAL,
  165. LETTER('e',M_CARON,0),
  166. CAPITAL,
  167. LETTER('g',M_CIRCUMFLEX,0),
  168. CAPITAL,
  169. LETTER('g',M_BREVE,0),
  170. CAPITAL, // U+0120
  171. LETTER('g',M_DOT_ABOVE,0),
  172. CAPITAL,
  173. LETTER('g',M_CEDILLA,0),
  174. CAPITAL,
  175. LETTER('h',M_CIRCUMFLEX,0),
  176. CAPITAL,
  177. LETTER('h',M_STROKE,0),
  178. CAPITAL,
  179. LETTER('i',M_TILDE,0),
  180. CAPITAL,
  181. LETTER('i',M_MACRON,0),
  182. CAPITAL,
  183. LETTER('i',M_BREVE,0),
  184. CAPITAL,
  185. LETTER('i',M_OGONEK,0),
  186. CAPITAL, // U+0130
  187. LETTER('i',M_NAME,0), // dotless i
  188. CAPITAL,
  189. LIGATURE('i','j',0),
  190. CAPITAL,
  191. LETTER('j',M_CIRCUMFLEX,0),
  192. CAPITAL,
  193. LETTER('k',M_CEDILLA,0),
  194. LETTER('k',M_NAME,0), // kra
  195. CAPITAL,
  196. LETTER('l',M_ACUTE,0),
  197. CAPITAL,
  198. LETTER('l',M_CEDILLA,0),
  199. CAPITAL,
  200. LETTER('l',M_CARON,0),
  201. CAPITAL,
  202. LETTER('l',M_MIDDLE_DOT,0), // U+0140
  203. CAPITAL,
  204. LETTER('l',M_STROKE,0),
  205. CAPITAL,
  206. LETTER('n',M_ACUTE,0),
  207. CAPITAL,
  208. LETTER('n',M_CEDILLA,0),
  209. CAPITAL,
  210. LETTER('n',M_CARON,0),
  211. LETTER('n',M_NAME,0), // apostrophe n
  212. CAPITAL,
  213. LETTER('n',M_NAME,0), // eng
  214. CAPITAL,
  215. LETTER('o',M_MACRON,0),
  216. CAPITAL,
  217. LETTER('o',M_BREVE,0),
  218. CAPITAL, // U+0150
  219. LETTER('o',M_DOUBLE_ACUTE,0),
  220. CAPITAL,
  221. LIGATURE('o','e',0),
  222. CAPITAL,
  223. LETTER('r',M_ACUTE,0),
  224. CAPITAL,
  225. LETTER('r',M_CEDILLA,0),
  226. CAPITAL,
  227. LETTER('r',M_CARON,0),
  228. CAPITAL,
  229. LETTER('s',M_ACUTE,0),
  230. CAPITAL,
  231. LETTER('s',M_CIRCUMFLEX,0),
  232. CAPITAL,
  233. LETTER('s',M_CEDILLA,0),
  234. CAPITAL, // U+0160
  235. LETTER('s',M_CARON,0),
  236. CAPITAL,
  237. LETTER('t',M_CEDILLA,0),
  238. CAPITAL,
  239. LETTER('t',M_CARON,0),
  240. CAPITAL,
  241. LETTER('t',M_STROKE,0),
  242. CAPITAL,
  243. LETTER('u',M_TILDE,0),
  244. CAPITAL,
  245. LETTER('u',M_MACRON,0),
  246. CAPITAL,
  247. LETTER('u',M_BREVE,0),
  248. CAPITAL,
  249. LETTER('u',M_RING,0),
  250. CAPITAL, // U+0170
  251. LETTER('u',M_DOUBLE_ACUTE,0),
  252. CAPITAL,
  253. LETTER('u',M_OGONEK,0),
  254. CAPITAL,
  255. LETTER('w',M_CIRCUMFLEX,0),
  256. CAPITAL,
  257. LETTER('y',M_CIRCUMFLEX,0),
  258. CAPITAL, // Y-DIAERESIS
  259. CAPITAL,
  260. LETTER('z',M_ACUTE,0),
  261. CAPITAL,
  262. LETTER('z',M_DOT_ABOVE,0),
  263. CAPITAL,
  264. LETTER('z',M_CARON,0),
  265. LETTER('s',M_NAME,0), // long-s // U+17f
  266. };
  267. // characters U+0250 to U+029F
  268. const unsigned short letter_accents_250[] = {
  269. LETTER('a',M_TURNED,0), // U+250
  270. LETTER(L_ALPHA,0,0),
  271. LETTER(L_ALPHA,M_TURNED,0),
  272. LETTER('b',M_IMPLOSIVE,0),
  273. 0, // open-o
  274. LETTER('c',M_CURL,0),
  275. LETTER('d',M_RETROFLEX,0),
  276. LETTER('d',M_IMPLOSIVE,0),
  277. LETTER('e',M_REVERSED,0), // U+258
  278. 0, // schwa
  279. LETTER(L_SCHWA,M_HOOK,0),
  280. 0, // open-e
  281. LETTER(L_OPEN_E,M_REVERSED,0),
  282. LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
  283. 0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
  284. LETTER('j',M_BAR,0),
  285. LETTER('g',M_IMPLOSIVE,0), // U+260
  286. LETTER('g',0,0),
  287. LETTER('g',M_SMALLCAP,0),
  288. LETTER(L_GAMMA,0,0),
  289. 0, // ramshorn
  290. LETTER('h',M_TURNED,0),
  291. LETTER('h',M_HOOK,0),
  292. 0,//LETTER(L_HENG,M_HOOK,0),
  293. LETTER('i',M_BAR,0), // U+268
  294. LETTER(L_IOTA,0,0),
  295. LETTER('i',M_SMALLCAP,0),
  296. LETTER('l',M_TILDE,0),
  297. LETTER('l',M_BAR,0),
  298. LETTER('l',M_RETROFLEX,0),
  299. LIGATURE('l','z',0),
  300. LETTER('m',M_TURNED,0),
  301. 0,//LETTER('m',M_TURNED,M_LEG), // U+270
  302. LETTER('m',M_HOOK,0),
  303. 0,//LETTER('n',M_LEFTHOOK,0),
  304. LETTER('n',M_RETROFLEX,0),
  305. LETTER('n',M_SMALLCAP,0),
  306. LETTER('o',M_BAR,0),
  307. LIGATURE('o','e',M_SMALLCAP),
  308. 0,//LETTER(L_OMEGA,M_CLOSED,0),
  309. LETTER(L_PHI,0,0), // U+278
  310. LETTER('r',M_TURNED,0),
  311. 0,//LETTER('r',M_TURNED,M_LEG),
  312. LETTER('r',M_RETROFLEX,M_TURNED),
  313. 0,//LETTER('r',M_LEG,0),
  314. LETTER('r',M_RETROFLEX,0),
  315. 0, // r-tap
  316. LETTER(L_RTAP,M_REVERSED,0),
  317. LETTER('r',M_SMALLCAP,0), // U+280
  318. LETTER('r',M_TURNED,M_SMALLCAP),
  319. LETTER('s',M_RETROFLEX,0),
  320. 0, // esh
  321. 0,//LETTER('j',M_BAR,L_IMPLOSIVE),
  322. LETTER(L_ESH,M_REVERSED,0),
  323. LETTER(L_ESH,M_CURL,0),
  324. LETTER('t',M_TURNED,0),
  325. LETTER('t',M_RETROFLEX,0), // U+288
  326. LETTER('u',M_BAR,0),
  327. LETTER(L_UPSILON,0,0),
  328. LETTER('v',M_HOOK,0),
  329. LETTER('v',M_TURNED,0),
  330. LETTER('w',M_TURNED,0),
  331. LETTER('y',M_TURNED,0),
  332. LETTER('y',M_SMALLCAP,0),
  333. LETTER('z',M_RETROFLEX,0), // U+290
  334. LETTER('z',M_CURL,0),
  335. 0, // ezh
  336. LETTER(L_EZH,M_CURL,0),
  337. 0, // glottal stop
  338. LETTER(L_GLOTTAL,M_REVERSED,0),
  339. LETTER(L_GLOTTAL,M_TURNED,0),
  340. 0,//LETTER('c',M_LONG,0),
  341. 0, // bilabial click // U+298
  342. LETTER('b',M_SMALLCAP,0),
  343. 0,//LETTER(L_OPEN_E,M_CLOSED,0),
  344. LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
  345. LETTER('h',M_SMALLCAP,0),
  346. LETTER('j',M_CURL,0),
  347. LETTER('k',M_TURNED,0),
  348. LETTER('l',M_SMALLCAP,0),
  349. LETTER('q',M_HOOK,0), // U+2a0
  350. LETTER(L_GLOTTAL,M_STROKE,0),
  351. LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
  352. LIGATURE('d','z',0),
  353. 0, // dezh
  354. LIGATURE('d','z',M_CURL),
  355. LIGATURE('t','s',0),
  356. 0, // tesh
  357. LIGATURE('t','s',M_CURL),
  358. };
  359. int Translator::LookupLetter2(unsigned int letter, char *ph_buf)
  360. {//=============================================================
  361. int len;
  362. char single_letter[10];
  363. single_letter[0] = 0;
  364. single_letter[1] = '_';
  365. len = utf8_out(letter, &single_letter[2]);
  366. single_letter[len+2] = ' ';
  367. single_letter[len+3] = 0;
  368. if(Lookup(&single_letter[1],ph_buf) == 0)
  369. {
  370. single_letter[1] = ' ';
  371. if(Lookup(&single_letter[2],ph_buf) == 0)
  372. {
  373. TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,NULL);
  374. }
  375. }
  376. return(ph_buf[0]);
  377. }
  378. void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf)
  379. {//=====================================================================
  380. // lookup the character in the accents table
  381. int accent_data = 0;
  382. int accent1 = 0;
  383. int accent2 = 0;
  384. int basic_letter;
  385. int letter2=0;
  386. char ph_letter1[30];
  387. char ph_letter2[30];
  388. char ph_accent1[30];
  389. char ph_accent2[30];
  390. ph_accent2[0] = 0;
  391. if((letter >= 0xe0) && (letter < 0x17f))
  392. {
  393. accent_data = letter_accents_0e0[letter - 0xe0];
  394. }
  395. else
  396. if((letter >= 0x250) && (letter <= 0x2a8))
  397. {
  398. accent_data = letter_accents_250[letter - 0x250];
  399. }
  400. if(accent_data != 0)
  401. {
  402. basic_letter = (accent_data & 0x3f) + 59;
  403. if(basic_letter < 'a')
  404. basic_letter = non_ascii_tab[basic_letter-59];
  405. if(accent_data & 0x8000)
  406. {
  407. letter2 = (accent_data >> 6) & 0x3f;
  408. letter2 += 59;
  409. accent2 = (accent_data >> 12) & 0x7;
  410. }
  411. else
  412. {
  413. accent1 = (accent_data >> 6) & 0x1f;
  414. accent2 = (accent_data >> 11) & 0xf;
  415. }
  416. if(Lookup(accents_tab[accent1].name, ph_accent1) != 0)
  417. {
  418. if(LookupLetter2(basic_letter, ph_letter1) != 0)
  419. {
  420. if(accent2 != 0)
  421. {
  422. if(Lookup(accents_tab[accent2].name, ph_accent2) == 0)
  423. {
  424. // break;
  425. }
  426. if(accents_tab[accent2].flags & 1)
  427. {
  428. strcpy(ph_buf,ph_accent2);
  429. ph_buf += strlen(ph_buf);
  430. ph_accent2[0] = 0;
  431. }
  432. }
  433. if(letter2 != 0)
  434. {
  435. //ligature
  436. LookupLetter2(letter2, ph_letter2);
  437. sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
  438. }
  439. else
  440. {
  441. if(accent1 == 0)
  442. strcpy(ph_buf, ph_letter1);
  443. else
  444. if((langopts.accents & 1) || (accents_tab[accent1].flags & 1))
  445. sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
  446. else
  447. sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
  448. }
  449. }
  450. }
  451. }
  452. } // end of LookupAccentedLetter
  453. void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
  454. {//=============================================================================
  455. int len;
  456. unsigned char *p;
  457. static char single_letter[10] = {0,0};
  458. char ph_stress[2];
  459. unsigned int dict_flags[2];
  460. char ph_buf3[40];
  461. char *ptr;
  462. ph_buf1[0] = 0;
  463. len = utf8_out(letter,&single_letter[2]);
  464. single_letter[len+2] = ' ';
  465. if(next_byte == -1)
  466. {
  467. // speaking normal text, not individual characters
  468. if(Lookup(&single_letter[2],ph_buf1) != 0)
  469. return;
  470. single_letter[1] = '_';
  471. if(Lookup(&single_letter[1],ph_buf3) != 0)
  472. return; // the character is specified as _* so ignore it when speaking normal text
  473. // check whether this character is specified for English
  474. SetTranslator2("en");
  475. if(translator2->Lookup(&single_letter[2], ph_buf3) != 0)
  476. {
  477. // yes, switch to English and re-translate the word
  478. sprintf(ph_buf1,"%c",phonSWITCH);
  479. }
  480. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  481. return;
  482. }
  483. if((letter <= 32) || iswspace(letter))
  484. {
  485. // lookup space as _&32 etc.
  486. sprintf(&single_letter[1],"_#%d ",letter);
  487. Lookup(&single_letter[1],ph_buf1);
  488. return;
  489. }
  490. if(next_byte != ' ')
  491. next_byte = RULE_SPELLING;
  492. single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-0x31
  493. single_letter[1] = '_';
  494. // if the $accent flag is set for this letter, use the accents table (below)
  495. dict_flags[1] = 0;
  496. ptr = &single_letter[1];
  497. if(Lookup(&single_letter[1],ph_buf3) == 0)
  498. {
  499. single_letter[1] = ' ';
  500. if(Lookup(&single_letter[2],ph_buf3) == 0)
  501. {
  502. TranslateRules(&single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
  503. }
  504. }
  505. if(ph_buf3[0] == 0)
  506. {
  507. LookupAccentedLetter(letter, ph_buf3);
  508. }
  509. if(ph_buf3[0] == 0)
  510. {
  511. ph_buf1[0] = 0;
  512. return;
  513. }
  514. if(ph_buf3[0] == phonSWITCH)
  515. {
  516. strcpy(ph_buf1,ph_buf3);
  517. return;
  518. }
  519. // at a stress marker at the start of the letter name, unless one is already marked
  520. ph_stress[0] = phonSTRESS_P;
  521. ph_stress[1] = 0;
  522. for(p=(unsigned char *)ph_buf3; *p != 0; p++)
  523. {
  524. if(phoneme_tab[*p]->type == phSTRESS)
  525. ph_stress[0] = 0; // stress is already marked
  526. }
  527. sprintf(ph_buf1,"%s%s",ph_stress,ph_buf3);
  528. }
  529. int Translator::TranslateLetter(char *word, char *phonemes, int control, int word_length)
  530. {//======================================================================================
  531. // get pronunciation for an isolated letter
  532. // return number of bytes used by the letter
  533. // control 2=say-as glyphs, 3-say-as chars
  534. int n_bytes;
  535. int letter;
  536. int len;
  537. int save_option_phonemes;
  538. char *p2;
  539. char *pbuf;
  540. char capital[20];
  541. char ph_buf[60];
  542. char ph_buf2[60];
  543. char hexbuf[6];
  544. ph_buf[0] = 0;
  545. capital[0] = 0;
  546. n_bytes = utf8_in(&letter,word,0);
  547. if((letter & 0xfff00) == 0x0e000)
  548. {
  549. letter &= 0xff; // uncode private usage area
  550. }
  551. if(control > 2)
  552. {
  553. // include CAPITAL information
  554. if(iswupper(letter))
  555. {
  556. Lookup("_cap",capital);
  557. }
  558. }
  559. letter = towlower2(letter);
  560. LookupLetter(letter, word[n_bytes], ph_buf);
  561. if(ph_buf[0] == phonSWITCH)
  562. {
  563. strcpy(phonemes,ph_buf);
  564. return(0);
  565. }
  566. if((ph_buf[0] == 0) && (translator_name != L('e','n')))
  567. {
  568. // speak as English, check whether there is a translation for this character
  569. SetTranslator2("en");
  570. save_option_phonemes = option_phonemes;
  571. option_phonemes = 0;
  572. translator2->LookupLetter(letter, word[n_bytes], ph_buf);
  573. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  574. option_phonemes = save_option_phonemes;
  575. if(ph_buf[0] != 0)
  576. {
  577. sprintf(phonemes,"%cen",phonSWITCH);
  578. return(0);
  579. }
  580. }
  581. if(ph_buf[0] == 0)
  582. {
  583. // character name not found
  584. if(iswalpha(letter))
  585. Lookup("_?A",ph_buf);
  586. if((ph_buf[0]==0) && !iswspace(letter))
  587. Lookup("_??",ph_buf);
  588. if(ph_buf[0] != 0)
  589. {
  590. // speak the hexadecimal number of the character code
  591. sprintf(hexbuf,"%x",letter);
  592. pbuf = ph_buf;
  593. for(p2 = hexbuf; *p2 != 0; p2++)
  594. {
  595. pbuf += strlen(pbuf);
  596. *pbuf++ = phonPAUSE_VSHORT;
  597. LookupLetter(*p2, 0, pbuf);
  598. }
  599. }
  600. }
  601. len = strlen(phonemes);
  602. if(langopts.accents & 2)
  603. sprintf(ph_buf2,"%c%s%s",0xff,ph_buf,capital);
  604. else
  605. sprintf(ph_buf2,"%c%s%s",0xff,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  606. if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  607. {
  608. strcpy(&phonemes[len],ph_buf2);
  609. }
  610. return(n_bytes);
  611. } // end of TranslateLetter
  612. void Translator::SetSpellingStress(char *phonemes, int control, int n_chars)
  613. {//=========================================================================
  614. // Individual letter names, reduce the stress of some.
  615. int ix;
  616. unsigned int c;
  617. int n_stress=0;
  618. int count;
  619. unsigned char buf[N_WORD_PHONEMES];
  620. for(ix=0; (c = phonemes[ix]) != 0; ix++)
  621. {
  622. if(c == phonSTRESS_P)
  623. {
  624. n_stress++;
  625. }
  626. buf[ix] = c;
  627. }
  628. buf[ix] = 0;
  629. count = 0;
  630. for(ix=0; (c = buf[ix]) != 0; ix++)
  631. {
  632. if((c == phonSTRESS_P) && (n_chars > 1))
  633. {
  634. count++;
  635. if(langopts.spelling_stress == 1)
  636. {
  637. // stress on initial letter when spelling
  638. if(count > 1)
  639. c = phonSTRESS_3;
  640. }
  641. else
  642. {
  643. if(count != n_stress)
  644. {
  645. if(((count % 3) != 0) || (count == n_stress-1))
  646. c = phonSTRESS_3; // reduce to secondary stress
  647. }
  648. }
  649. }
  650. else
  651. if(c == 0xff)
  652. {
  653. if((control < 2) || (ix==0))
  654. continue; // don't insert pauses
  655. if(control == 4)
  656. c = phonPAUSE; // pause after each character
  657. if(((count % 3) == 0) || (control > 2))
  658. c = phonPAUSE_SHORT; // pause following a primary stress
  659. else
  660. continue; // remove marker
  661. }
  662. *phonemes++ = c;
  663. }
  664. if(control >= 2)
  665. *phonemes++ = phonPAUSE_NOLINK;
  666. *phonemes = 0;
  667. } // end of SetSpellingStress
  668. int Translator::TranslateRoman(char *word, char *ph_out)
  669. {//=====================================================
  670. int c;
  671. char *p;
  672. const char *p2;
  673. int acc;
  674. int prev;
  675. int value;
  676. int subtract;
  677. int repeat = 0;
  678. unsigned int flags;
  679. char number_chars[N_WORD_BYTES];
  680. static const char *roman_numbers = "ixcmvld";
  681. static int roman_values[] = {1,10,100,1000,5,50,500};
  682. acc = 0;
  683. prev = 0;
  684. subtract = 0x7fff;
  685. while((c = *word++) != ' ')
  686. {
  687. if((p2 = strchr(roman_numbers,c)) == NULL)
  688. return(0);
  689. value = roman_values[p2 - roman_numbers];
  690. if(value == prev)
  691. {
  692. repeat++;
  693. if(repeat >= 3)
  694. return(0);
  695. }
  696. else
  697. repeat = 0;
  698. if((prev==5) || (prev==50) || (prev==500))
  699. {
  700. if(value >= prev)
  701. return(0);
  702. }
  703. if((prev != 0) && (prev < value))
  704. {
  705. if(((acc % 10) != 0) || ((prev*10) < value))
  706. return(0);
  707. subtract = prev;
  708. value -= subtract;
  709. }
  710. else
  711. if(value >= subtract)
  712. return(0);
  713. else
  714. acc += prev;
  715. prev = value;
  716. }
  717. acc += prev;
  718. if(acc < 2)
  719. return(0);
  720. if(acc > langopts.max_roman)
  721. return(0);
  722. Lookup("_roman",ph_out); // precede by "roman" if _rom is defined in *_list
  723. p = &ph_out[strlen(ph_out)];
  724. sprintf(number_chars," %d ",acc);
  725. TranslateNumber(&number_chars[1],p,&flags,0);
  726. return(1);
  727. } // end of TranslateRoman
  728. int Translator::LookupNum2(int value, int control, char *ph_out)
  729. {//=============================================================
  730. // Lookup a 2 digit number
  731. // control bit 0: use special form of '1'
  732. // control bit 2: use feminine form of '2'
  733. int found;
  734. int ix;
  735. int units;
  736. int used_and=0;
  737. int next_phtype;
  738. char string[12]; // for looking up entries in de_list
  739. char ph_tens[50];
  740. char ph_digits[50];
  741. char ph_and[12];
  742. if((value == 1) && (control & 1))
  743. {
  744. if(Lookup("_1a",ph_out) != 0)
  745. return(0);
  746. }
  747. // is there a special pronunciation for this 2-digit number
  748. found = 0;
  749. if(control & 4)
  750. {
  751. sprintf(string,"_%df",value);
  752. found = Lookup(string,ph_digits);
  753. }
  754. if(found == 0)
  755. {
  756. sprintf(string,"_%d",value);
  757. found = Lookup(string,ph_digits);
  758. }
  759. // no, speak as tens+units
  760. if((control & 2) && (value < 10))
  761. {
  762. // speak leading zero
  763. Lookup("_0",ph_tens);
  764. }
  765. else
  766. {
  767. if(found)
  768. {
  769. strcpy(ph_out,ph_digits);
  770. return(0);
  771. }
  772. if((value % 10) == 0)
  773. {
  774. sprintf(string,"_%d0",value / 10);
  775. found = Lookup(string,ph_tens);
  776. }
  777. if(!found)
  778. {
  779. sprintf(string,"_%dX",value / 10);
  780. Lookup(string,ph_tens);
  781. }
  782. if((value % 10) == 0)
  783. {
  784. strcpy(ph_out,ph_tens);
  785. return(0);
  786. }
  787. found = 0;
  788. units = (value % 10);
  789. if(control & 4)
  790. {
  791. // is there a variant form of this number?
  792. sprintf(string,"_%df",units);
  793. found = Lookup(string,ph_digits);
  794. }
  795. if(found == 0)
  796. {
  797. sprintf(string,"_%d",units);
  798. Lookup(string,ph_digits);
  799. }
  800. }
  801. if(langopts.numbers & 0x30)
  802. {
  803. Lookup("_0and",ph_and);
  804. if(langopts.numbers & 0x10)
  805. sprintf(ph_out,"%s%s%s",ph_digits,ph_and,ph_tens);
  806. else
  807. sprintf(ph_out,"%s%s%s",ph_tens,ph_and,ph_digits);
  808. used_and = 1;
  809. }
  810. else
  811. {
  812. if(langopts.numbers & 0x200)
  813. {
  814. // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
  815. if((ix = strlen(ph_tens)-1) >= 0)
  816. {
  817. if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
  818. next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
  819. if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
  820. ph_tens[ix] = 0;
  821. }
  822. }
  823. sprintf(ph_out,"%s%s",ph_tens,ph_digits);
  824. }
  825. if(langopts.numbers & 0x100)
  826. {
  827. // only one primary stress
  828. found = 0;
  829. for(ix=strlen(ph_out)-1; ix>=0; ix--)
  830. {
  831. if(ph_out[ix] == phonSTRESS_P)
  832. {
  833. if(found)
  834. ph_out[ix] = phonSTRESS_3;
  835. else
  836. found = 1;
  837. }
  838. }
  839. }
  840. return(used_and);
  841. } // end of LookupNum2
  842. int Translator::LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands)
  843. {//=========================================================================================================
  844. // Translate a 3 digit number
  845. int found;
  846. int hundreds;
  847. int x;
  848. char string[12]; // for looking up entries in **_list
  849. char buf1[100];
  850. char buf2[100];
  851. char ph_100[20];
  852. char ph_10T[20];
  853. char ph_digits[50];
  854. char ph_thousands[50];
  855. char ph_hundred_and[12];
  856. char ph_thousand_and[12];
  857. hundreds = value / 100;
  858. buf1[0] = 0;
  859. if(hundreds > 0)
  860. {
  861. ph_thousands[0] = 0;
  862. ph_thousand_and[0] = 0;
  863. Lookup("_0C",ph_100);
  864. if((hundreds >= 10) && (((langopts.numbers & 0x0800) == 0) || (hundreds != 19)))
  865. {
  866. ph_digits[0] = 0;
  867. if(LookupThousands(hundreds / 10, thousandplex+1, ph_10T) == 0)
  868. {
  869. x = 0;
  870. if(langopts.numbers2 & (1 << (thousandplex+1)))
  871. x = 4;
  872. LookupNum2(hundreds/10, x, ph_digits);
  873. }
  874. if(langopts.numbers2 & 0x200)
  875. sprintf(ph_thousands,"%s%s%c",ph_10T,ph_digits,phonPAUSE_NOLINK); // say "thousands" before its number, not after
  876. else
  877. sprintf(ph_thousands,"%s%s%c",ph_digits,ph_10T,phonPAUSE_NOLINK);
  878. hundreds %= 10;
  879. if(hundreds == 0)
  880. ph_100[0] = 0;
  881. suppress_null = 1;
  882. }
  883. ph_digits[0] = 0;
  884. if(hundreds > 0)
  885. {
  886. if((langopts.numbers & 0x100000) && (prev_thousands || (ph_thousands[0] != 0)))
  887. {
  888. Lookup("_0and",ph_thousand_and);
  889. }
  890. suppress_null = 1;
  891. found = 0;
  892. if((value % 1000) == 100)
  893. {
  894. // is there a special pronunciation for exactly 100 ?
  895. found = Lookup("_1C0",ph_digits);
  896. }
  897. if(!found)
  898. {
  899. sprintf(string,"_%dC",hundreds);
  900. found = Lookup(string,ph_digits); // is there a specific pronunciation for n-hundred ?
  901. }
  902. if(found)
  903. {
  904. ph_100[0] = 0;
  905. }
  906. else
  907. {
  908. if((hundreds > 1) || ((langopts.numbers & 0x400) == 0))
  909. {
  910. LookupNum2(hundreds,0,ph_digits);
  911. }
  912. }
  913. }
  914. sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
  915. }
  916. ph_hundred_and[0] = 0;
  917. if((langopts.numbers & 0x40) && ((value % 100) != 0))
  918. {
  919. if((value > 100) || (prev_thousands && (thousandplex==0)))
  920. {
  921. Lookup("_0and",ph_hundred_and);
  922. }
  923. }
  924. buf2[0] = 0;
  925. value = value % 100;
  926. if(value == 0)
  927. {
  928. if(suppress_null == 0)
  929. Lookup("_0",buf2);
  930. }
  931. else
  932. {
  933. x = 0;
  934. if(thousandplex==0)
  935. x = 1; // allow "eins" for 1 rather than "ein"
  936. else
  937. {
  938. if(langopts.numbers2 & (1 << thousandplex))
  939. x = 4; // use variant (feminine) for before thousands and millions
  940. }
  941. if(LookupNum2(value,x,buf2) != 0)
  942. {
  943. if(langopts.numbers & 0x80)
  944. ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
  945. }
  946. }
  947. sprintf(ph_out,"%s%s%s",buf1,ph_hundred_and,buf2);
  948. return(0);
  949. } // end of LookupNum3
  950. static const char *M_Variant(int value)
  951. {//====================================
  952. // returns M, or perhaps MA for some cases
  953. if(((value % 100)>20) || ((value % 100)<10)) // but not teens, 10 to 19
  954. {
  955. if ((translator->langopts.numbers2 & 0x40) &&
  956. ((value % 10)>=2) &&
  957. ((value % 10)<=4))
  958. {
  959. // for Polish language - two forms of plural!
  960. return("0MA");
  961. }
  962. if((translator->langopts.numbers2 & 0x80) &&
  963. ((value % 10)==1))
  964. {
  965. return("1MA");
  966. }
  967. }
  968. return("0M");
  969. }
  970. int Translator::LookupThousands(int value, int thousandplex, char *ph_out)
  971. {//=======================================================================
  972. int found;
  973. char string[12];
  974. char ph_of[12];
  975. char ph_thousands[40];
  976. ph_of[0] = 0;
  977. // first look fora match with the exact value of thousands
  978. sprintf(string,"_%dM%d",value,thousandplex);
  979. if((found = Lookup(string,ph_thousands)) == 0)
  980. {
  981. if((value % 100) >= 20)
  982. {
  983. Lookup("_0of",ph_of);
  984. }
  985. sprintf(string,"_%s%d",M_Variant(value),thousandplex);
  986. if(Lookup(string,ph_thousands) == 0)
  987. {
  988. // repeat "thousand" if higher order names are not available
  989. sprintf(string,"_%dM1",value);
  990. if((found = Lookup(string,ph_thousands)) == 0)
  991. Lookup("_0M1",ph_thousands);
  992. }
  993. }
  994. sprintf(ph_out,"%s%s",ph_of,ph_thousands);
  995. return(found);
  996. }
  997. int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags, int wflags)
  998. {//=========================================================================================
  999. // Number translation with various options
  1000. // the "word" may be up to 4 digits
  1001. // "words" of 3 digits may be preceded by another number "word" for thousands or millions
  1002. int n_digits;
  1003. int value;
  1004. int ix;
  1005. unsigned char c;
  1006. int suppress_null = 0;
  1007. int decimal_point = 0;
  1008. int thousandplex = 0;
  1009. int thousands_inc = 0;
  1010. int prev_thousands = 0;
  1011. int this_value;
  1012. static int prev_value;
  1013. int decimal_count;
  1014. int max_decimal_count;
  1015. char string[12]; // for looking up entries in de_list
  1016. char buf1[100];
  1017. char ph_append[50];
  1018. char ph_buf[200];
  1019. char ph_buf2[50];
  1020. static const char str_pause[2] = {phonPAUSE_NOLINK,0};
  1021. for(ix=0; isdigit(word[ix]); ix++) ;
  1022. n_digits = ix;
  1023. value = this_value = atoi(word);
  1024. ph_append[0] = 0;
  1025. ph_buf2[0] = 0;
  1026. // is there a previous thousands part (as a previous "word") ?
  1027. if((n_digits == 3) && (word[-2] == langopts.thousands_sep) && isdigit(word[-3]))
  1028. {
  1029. prev_thousands = 1;
  1030. }
  1031. else
  1032. if((langopts.thousands_sep == ' ') || (langopts.numbers & 0x1000))
  1033. {
  1034. // thousands groups can be separated by spaces
  1035. if((n_digits == 3) && isdigit(word[-2]))
  1036. {
  1037. prev_thousands = 1;
  1038. }
  1039. }
  1040. if((word[0] == '0') && (prev_thousands == 0) && (word[1] != langopts.decimal_sep))
  1041. {
  1042. if((n_digits == 2) && (word[3] == ':') && isdigit(word[5]) && isspace(word[7]))
  1043. {
  1044. // looks like a time 02:30, omit the leading zero
  1045. }
  1046. else
  1047. {
  1048. return(0); // number string with leading zero, speak as individual digits
  1049. }
  1050. }
  1051. if((langopts.numbers & 0x1000) && (word[n_digits] == ' '))
  1052. thousands_inc = 1;
  1053. else
  1054. if(word[n_digits] == langopts.thousands_sep)
  1055. thousands_inc = 2;
  1056. if(thousands_inc > 0)
  1057. {
  1058. // if the following "words" are three-digit groups, count them and add
  1059. // a "thousand"/"million" suffix to this one
  1060. ix = n_digits + thousands_inc;
  1061. while(isdigit(word[ix]) && isdigit(word[ix+1]) && isdigit(word[ix+2]))
  1062. {
  1063. thousandplex++;
  1064. if(word[ix+3] == langopts.thousands_sep)
  1065. ix += (3 + thousands_inc);
  1066. else
  1067. break;
  1068. }
  1069. }
  1070. if((value == 0) && prev_thousands)
  1071. {
  1072. suppress_null = 1;
  1073. }
  1074. if((word[n_digits] == langopts.decimal_sep) && isdigit(word[n_digits+1]))
  1075. {
  1076. // this "word" ends with a decimal point
  1077. Lookup("_dpt",ph_append);
  1078. decimal_point = 1;
  1079. }
  1080. else
  1081. if(suppress_null == 0)
  1082. {
  1083. if(thousands_inc > 0)
  1084. {
  1085. if((thousandplex > 0) && (value < 1000))
  1086. {
  1087. if(langopts.numbers2 & 0x100)
  1088. {
  1089. if((thousandplex == 1) && (value >= 100))
  1090. {
  1091. // special word for 100,000's
  1092. char ph_buf3[20];
  1093. sprintf(string,"_%dL",value / 100);
  1094. if(Lookup(string,ph_buf2) == 0)
  1095. {
  1096. LookupNum2(value/100,0,ph_buf2);
  1097. Lookup("_0L",ph_buf3);
  1098. strcat(ph_buf2,ph_buf3);
  1099. }
  1100. value %= 100;
  1101. if(value == 0)
  1102. suppress_null = 1;
  1103. }
  1104. }
  1105. if((suppress_null == 0) && (LookupThousands(value,thousandplex,ph_append)))
  1106. {
  1107. // found an exact match for N thousand
  1108. value = 0;
  1109. suppress_null = 1;
  1110. }
  1111. }
  1112. }
  1113. }
  1114. else
  1115. if((thousandplex > 1) && prev_thousands && (prev_value > 0))
  1116. {
  1117. sprintf(string,"_%s%d",M_Variant(value),thousandplex+1);
  1118. if(Lookup(string,buf1)==0)
  1119. {
  1120. // speak this thousandplex if there was no word for the previous thousandplex
  1121. sprintf(string,"_0M%d",thousandplex);
  1122. Lookup(string,ph_append);
  1123. }
  1124. }
  1125. if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
  1126. {
  1127. Lookup("_.",ph_append);
  1128. }
  1129. LookupNum3(value, ph_buf, suppress_null, thousandplex, prev_thousands);
  1130. if((thousandplex > 0) && (langopts.numbers2 & 0x200))
  1131. sprintf(ph_out,"%s%s%s",ph_append,ph_buf2,ph_buf); // say "thousands" before its number
  1132. else
  1133. sprintf(ph_out,"%s%s%s",ph_buf2,ph_buf,ph_append);
  1134. while(decimal_point)
  1135. {
  1136. n_digits++;
  1137. decimal_count = 0;
  1138. while(isdigit(word[n_digits+decimal_count]))
  1139. decimal_count++;
  1140. if(decimal_count > 1)
  1141. {
  1142. max_decimal_count = 2;
  1143. switch(langopts.numbers & 0xe000)
  1144. {
  1145. case 0x8000:
  1146. max_decimal_count = 5;
  1147. case 0x4000:
  1148. // French/Polish decimal fraction
  1149. while(word[n_digits] == '0')
  1150. {
  1151. Lookup("_0",buf1);
  1152. strcat(ph_out,buf1);
  1153. decimal_count--;
  1154. n_digits++;
  1155. }
  1156. if((decimal_count <= max_decimal_count) && isdigit(word[n_digits]))
  1157. {
  1158. LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
  1159. strcat(ph_out,buf1);
  1160. n_digits += decimal_count;
  1161. }
  1162. break;
  1163. case 0x2000:
  1164. // Italian decimal fractions
  1165. if((decimal_count < 4) || ((decimal_count==4) && (word[n_digits] != '0')))
  1166. {
  1167. LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
  1168. strcat(ph_out,buf1);
  1169. if(word[n_digits]=='0')
  1170. {
  1171. // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
  1172. sprintf(string,"_0Z%d",decimal_count);
  1173. Lookup(string,buf1);
  1174. strcat(ph_out,buf1);
  1175. }
  1176. n_digits += decimal_count;
  1177. }
  1178. break;
  1179. case 0x6000:
  1180. // Romanian decimal fractions
  1181. if((decimal_count <= 4) && (word[n_digits] != '0'))
  1182. {
  1183. LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
  1184. strcat(ph_out,buf1);
  1185. n_digits += decimal_count;
  1186. }
  1187. break;
  1188. }
  1189. }
  1190. while(isdigit(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
  1191. {
  1192. value = word[n_digits++] - '0';
  1193. LookupNum2(value, 1, buf1);
  1194. strcat(ph_out,buf1);
  1195. }
  1196. // something after the decimal part ?
  1197. if(Lookup("_dpt2",buf1))
  1198. strcat(ph_out,buf1);
  1199. if(c == langopts.decimal_sep)
  1200. {
  1201. Lookup("_dpt",buf1);
  1202. strcat(ph_out,buf1);
  1203. }
  1204. else
  1205. {
  1206. decimal_point = 0;
  1207. }
  1208. }
  1209. if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
  1210. {
  1211. int next_char;
  1212. char *p;
  1213. p = &word[n_digits+1];
  1214. p += utf8_in(&next_char,p,0);
  1215. if((langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
  1216. utf8_in(&next_char,p,0);
  1217. if(!iswalpha(next_char))
  1218. strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
  1219. }
  1220. *flags = FLAG_FOUND;
  1221. prev_value = this_value;
  1222. return(1);
  1223. } // end of TranslateNumber_1
  1224. int Translator::TranslateNumber(char *word1, char *ph_out, unsigned int *flags, int wflags)
  1225. {//=======================================================================================
  1226. if(option_sayas == SAYAS_DIGITS1)
  1227. return(0); // speak digits individually
  1228. if((langopts.numbers & 0x3) == 1)
  1229. return(TranslateNumber_1(word1,ph_out,flags,wflags));
  1230. return(0);
  1231. } // end of TranslateNumber