/native/external/espeak/src/numbers.cpp
http://eyes-free.googlecode.com/ · C++ · 1397 lines · 1159 code · 166 blank · 72 comment · 310 complexity · eb0938b3bbca1fa7f551fddc169df735 MD5 · raw file
- /***************************************************************************
- * Copyright (C) 2005 to 2007 by Jonathan Duddington *
- * email: jonsd@users.sourceforge.net *
- * *
- * This program is free software; you can redistribute it and/or modify *
- * it under the terms of the GNU General Public License as published by *
- * the Free Software Foundation; either version 3 of the License, or *
- * (at your option) any later version. *
- * *
- * This program is distributed in the hope that it will be useful, *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
- * GNU General Public License for more details. *
- * *
- * You should have received a copy of the GNU General Public License *
- * along with this program; if not, see: *
- * <http://www.gnu.org/licenses/>. *
- ***************************************************************************/
- #include "StdAfx.h"
- #include <stdio.h>
- #include <ctype.h>
- #include <stdlib.h>
- #include <string.h>
- #include <wctype.h>
- ////#include <wchar.h>
- #include "speak_lib.h"
- #include "speech.h"
- #include "phoneme.h"
- #include "synthesize.h"
- #include "voice.h"
- #include "translate.h"
- #define M_NAME 0
- #define M_SMALLCAP 1
- #define M_TURNED 2
- #define M_REVERSED 3
- #define M_CURL 4
- #define M_ACUTE 5
- #define M_BREVE 6
- #define M_CARON 7
- #define M_CEDILLA 8
- #define M_CIRCUMFLEX 9
- #define M_DIAERESIS 10
- #define M_DOUBLE_ACUTE 11
- #define M_DOT_ABOVE 12
- #define M_GRAVE 13
- #define M_MACRON 14
- #define M_OGONEK 15
- #define M_RING 16
- #define M_STROKE 17
- #define M_TILDE 18
- #define M_BAR 19
- #define M_RETROFLEX 20
- #define M_HOOK 21
- #define M_MIDDLE_DOT M_DOT_ABOVE // duplicate of M_DOT_ABOVE
- #define M_IMPLOSIVE M_HOOK
- typedef struct {
- const char *name;
- int flags;
- } ACCENTS;
- // these are tokens to look up in the *_list file.
- ACCENTS accents_tab[] = {
- {"_lig", 1},
- {"_smc", 1}, // smallcap
- {"_tur", 1}, // turned
- {"_rev", 1}, // reversed
- {"_crl", 0}, // curl
- {"_acu", 0}, // acute
- {"_brv", 0}, // breve
- {"_hac", 0}, // caron/hacek
- {"_ced", 0}, // cedilla
- {"_cir", 0}, // circumflex
- {"_dia", 0}, // diaeresis
- {"_ac2", 0}, // double acute
- {"_dot", 0}, // dot
- {"_grv", 0}, // grave
- {"_mcn", 0}, // macron
- {"_ogo", 0}, // ogonek
- {"_rng", 0}, // ring
- {"_stk", 0}, // stroke
- {"_tld", 0}, // tilde
- {"_bar", 0}, // bar
- {"_rfx", 0}, // retroflex
- {"_hok", 0}, // hook
- };
- #define CAPITAL 0
- #define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
- #define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000
- #define L_ALPHA 60 // U+3B1
- #define L_SCHWA 61 // U+259
- #define L_OPEN_E 62 // U+25B
- #define L_GAMMA 63 // U+3B3
- #define L_IOTA 64 // U+3B9
- #define L_OE 65 // U+153
- #define L_OMEGA 66 // U+3C9
- #define L_PHI 67 // U+3C6
- #define L_ESH 68 // U+283
- #define L_UPSILON 69 // U+3C5
- #define L_EZH 70 // U+292
- #define L_GLOTTAL 71 // U+294
- #define L_RTAP 72 // U+27E
- static const short non_ascii_tab[] = {
- 0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
- 0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e };
- // characters U+00e0 to U+017f
- const unsigned short letter_accents_0e0[] = {
- LETTER('a',M_GRAVE,0), // U+00e0
- LETTER('a',M_ACUTE,0),
- LETTER('a',M_CIRCUMFLEX,0),
- LETTER('a',M_TILDE,0),
- LETTER('a',M_DIAERESIS,0),
- LETTER('a',M_RING,0),
- LIGATURE('a','e',0),
- LETTER('c',M_CEDILLA,0),
- LETTER('e',M_GRAVE,0),
- LETTER('e',M_ACUTE,0),
- LETTER('e',M_CIRCUMFLEX,0),
- LETTER('e',M_DIAERESIS,0),
- LETTER('i',M_GRAVE,0),
- LETTER('i',M_ACUTE,0),
- LETTER('i',M_CIRCUMFLEX,0),
- LETTER('i',M_DIAERESIS,0),
- LETTER('d',M_NAME,0), // eth // U+00f0
- LETTER('n',M_TILDE,0),
- LETTER('o',M_GRAVE,0),
- LETTER('o',M_ACUTE,0),
- LETTER('o',M_CIRCUMFLEX,0),
- LETTER('o',M_TILDE,0),
- LETTER('o',M_DIAERESIS,0),
- 0, // division sign
- LETTER('o',M_STROKE,0),
- LETTER('u',M_GRAVE,0),
- LETTER('u',M_ACUTE,0),
- LETTER('u',M_CIRCUMFLEX,0),
- LETTER('u',M_DIAERESIS,0),
- LETTER('y',M_ACUTE,0),
- LETTER('t',M_NAME,0), // thorn
- LETTER('y',M_DIAERESIS,0),
- CAPITAL, // U+0100
- LETTER('a',M_MACRON,0),
- CAPITAL,
- LETTER('a',M_BREVE,0),
- CAPITAL,
- LETTER('a',M_OGONEK,0),
- CAPITAL,
- LETTER('c',M_ACUTE,0),
- CAPITAL,
- LETTER('c',M_CIRCUMFLEX,0),
- CAPITAL,
- LETTER('c',M_DOT_ABOVE,0),
- CAPITAL,
- LETTER('c',M_CARON,0),
- CAPITAL,
- LETTER('d',M_CARON,0),
- CAPITAL, // U+0110
- LETTER('d',M_STROKE,0),
- CAPITAL,
- LETTER('e',M_MACRON,0),
- CAPITAL,
- LETTER('e',M_BREVE,0),
- CAPITAL,
- LETTER('e',M_DOT_ABOVE,0),
- CAPITAL,
- LETTER('e',M_OGONEK,0),
- CAPITAL,
- LETTER('e',M_CARON,0),
- CAPITAL,
- LETTER('g',M_CIRCUMFLEX,0),
- CAPITAL,
- LETTER('g',M_BREVE,0),
- CAPITAL, // U+0120
- LETTER('g',M_DOT_ABOVE,0),
- CAPITAL,
- LETTER('g',M_CEDILLA,0),
- CAPITAL,
- LETTER('h',M_CIRCUMFLEX,0),
- CAPITAL,
- LETTER('h',M_STROKE,0),
- CAPITAL,
- LETTER('i',M_TILDE,0),
- CAPITAL,
- LETTER('i',M_MACRON,0),
- CAPITAL,
- LETTER('i',M_BREVE,0),
- CAPITAL,
- LETTER('i',M_OGONEK,0),
- CAPITAL, // U+0130
- LETTER('i',M_NAME,0), // dotless i
- CAPITAL,
- LIGATURE('i','j',0),
- CAPITAL,
- LETTER('j',M_CIRCUMFLEX,0),
- CAPITAL,
- LETTER('k',M_CEDILLA,0),
- LETTER('k',M_NAME,0), // kra
- CAPITAL,
- LETTER('l',M_ACUTE,0),
- CAPITAL,
- LETTER('l',M_CEDILLA,0),
- CAPITAL,
- LETTER('l',M_CARON,0),
- CAPITAL,
- LETTER('l',M_MIDDLE_DOT,0), // U+0140
- CAPITAL,
- LETTER('l',M_STROKE,0),
- CAPITAL,
- LETTER('n',M_ACUTE,0),
- CAPITAL,
- LETTER('n',M_CEDILLA,0),
- CAPITAL,
- LETTER('n',M_CARON,0),
- LETTER('n',M_NAME,0), // apostrophe n
- CAPITAL,
- LETTER('n',M_NAME,0), // eng
- CAPITAL,
- LETTER('o',M_MACRON,0),
- CAPITAL,
- LETTER('o',M_BREVE,0),
- CAPITAL, // U+0150
- LETTER('o',M_DOUBLE_ACUTE,0),
- CAPITAL,
- LIGATURE('o','e',0),
- CAPITAL,
- LETTER('r',M_ACUTE,0),
- CAPITAL,
- LETTER('r',M_CEDILLA,0),
- CAPITAL,
- LETTER('r',M_CARON,0),
- CAPITAL,
- LETTER('s',M_ACUTE,0),
- CAPITAL,
- LETTER('s',M_CIRCUMFLEX,0),
- CAPITAL,
- LETTER('s',M_CEDILLA,0),
- CAPITAL, // U+0160
- LETTER('s',M_CARON,0),
- CAPITAL,
- LETTER('t',M_CEDILLA,0),
- CAPITAL,
- LETTER('t',M_CARON,0),
- CAPITAL,
- LETTER('t',M_STROKE,0),
- CAPITAL,
- LETTER('u',M_TILDE,0),
- CAPITAL,
- LETTER('u',M_MACRON,0),
- CAPITAL,
- LETTER('u',M_BREVE,0),
- CAPITAL,
- LETTER('u',M_RING,0),
- CAPITAL, // U+0170
- LETTER('u',M_DOUBLE_ACUTE,0),
- CAPITAL,
- LETTER('u',M_OGONEK,0),
- CAPITAL,
- LETTER('w',M_CIRCUMFLEX,0),
- CAPITAL,
- LETTER('y',M_CIRCUMFLEX,0),
- CAPITAL, // Y-DIAERESIS
- CAPITAL,
- LETTER('z',M_ACUTE,0),
- CAPITAL,
- LETTER('z',M_DOT_ABOVE,0),
- CAPITAL,
- LETTER('z',M_CARON,0),
- LETTER('s',M_NAME,0), // long-s // U+17f
- };
- // characters U+0250 to U+029F
- const unsigned short letter_accents_250[] = {
- LETTER('a',M_TURNED,0), // U+250
- LETTER(L_ALPHA,0,0),
- LETTER(L_ALPHA,M_TURNED,0),
- LETTER('b',M_IMPLOSIVE,0),
- 0, // open-o
- LETTER('c',M_CURL,0),
- LETTER('d',M_RETROFLEX,0),
- LETTER('d',M_IMPLOSIVE,0),
- LETTER('e',M_REVERSED,0), // U+258
- 0, // schwa
- LETTER(L_SCHWA,M_HOOK,0),
- 0, // open-e
- LETTER(L_OPEN_E,M_REVERSED,0),
- LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
- 0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
- LETTER('j',M_BAR,0),
- LETTER('g',M_IMPLOSIVE,0), // U+260
- LETTER('g',0,0),
- LETTER('g',M_SMALLCAP,0),
- LETTER(L_GAMMA,0,0),
- 0, // ramshorn
- LETTER('h',M_TURNED,0),
- LETTER('h',M_HOOK,0),
- 0,//LETTER(L_HENG,M_HOOK,0),
- LETTER('i',M_BAR,0), // U+268
- LETTER(L_IOTA,0,0),
- LETTER('i',M_SMALLCAP,0),
- LETTER('l',M_TILDE,0),
- LETTER('l',M_BAR,0),
- LETTER('l',M_RETROFLEX,0),
- LIGATURE('l','z',0),
- LETTER('m',M_TURNED,0),
- 0,//LETTER('m',M_TURNED,M_LEG), // U+270
- LETTER('m',M_HOOK,0),
- 0,//LETTER('n',M_LEFTHOOK,0),
- LETTER('n',M_RETROFLEX,0),
- LETTER('n',M_SMALLCAP,0),
- LETTER('o',M_BAR,0),
- LIGATURE('o','e',M_SMALLCAP),
- 0,//LETTER(L_OMEGA,M_CLOSED,0),
- LETTER(L_PHI,0,0), // U+278
- LETTER('r',M_TURNED,0),
- 0,//LETTER('r',M_TURNED,M_LEG),
- LETTER('r',M_RETROFLEX,M_TURNED),
- 0,//LETTER('r',M_LEG,0),
- LETTER('r',M_RETROFLEX,0),
- 0, // r-tap
- LETTER(L_RTAP,M_REVERSED,0),
- LETTER('r',M_SMALLCAP,0), // U+280
- LETTER('r',M_TURNED,M_SMALLCAP),
- LETTER('s',M_RETROFLEX,0),
- 0, // esh
- 0,//LETTER('j',M_BAR,L_IMPLOSIVE),
- LETTER(L_ESH,M_REVERSED,0),
- LETTER(L_ESH,M_CURL,0),
- LETTER('t',M_TURNED,0),
- LETTER('t',M_RETROFLEX,0), // U+288
- LETTER('u',M_BAR,0),
- LETTER(L_UPSILON,0,0),
- LETTER('v',M_HOOK,0),
- LETTER('v',M_TURNED,0),
- LETTER('w',M_TURNED,0),
- LETTER('y',M_TURNED,0),
- LETTER('y',M_SMALLCAP,0),
- LETTER('z',M_RETROFLEX,0), // U+290
- LETTER('z',M_CURL,0),
- 0, // ezh
- LETTER(L_EZH,M_CURL,0),
- 0, // glottal stop
- LETTER(L_GLOTTAL,M_REVERSED,0),
- LETTER(L_GLOTTAL,M_TURNED,0),
- 0,//LETTER('c',M_LONG,0),
- 0, // bilabial click // U+298
- LETTER('b',M_SMALLCAP,0),
- 0,//LETTER(L_OPEN_E,M_CLOSED,0),
- LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
- LETTER('h',M_SMALLCAP,0),
- LETTER('j',M_CURL,0),
- LETTER('k',M_TURNED,0),
- LETTER('l',M_SMALLCAP,0),
- LETTER('q',M_HOOK,0), // U+2a0
- LETTER(L_GLOTTAL,M_STROKE,0),
- LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
- LIGATURE('d','z',0),
- 0, // dezh
- LIGATURE('d','z',M_CURL),
- LIGATURE('t','s',0),
- 0, // tesh
- LIGATURE('t','s',M_CURL),
- };
- int Translator::LookupLetter2(unsigned int letter, char *ph_buf)
- {//=============================================================
- int len;
- char single_letter[10];
- single_letter[0] = 0;
- single_letter[1] = '_';
- len = utf8_out(letter, &single_letter[2]);
- single_letter[len+2] = ' ';
- single_letter[len+3] = 0;
- if(Lookup(&single_letter[1],ph_buf) == 0)
- {
- single_letter[1] = ' ';
- if(Lookup(&single_letter[2],ph_buf) == 0)
- {
- TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,NULL);
- }
- }
- return(ph_buf[0]);
- }
- void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf)
- {//=====================================================================
- // lookup the character in the accents table
- int accent_data = 0;
- int accent1 = 0;
- int accent2 = 0;
- int basic_letter;
- int letter2=0;
- char ph_letter1[30];
- char ph_letter2[30];
- char ph_accent1[30];
- char ph_accent2[30];
- ph_accent2[0] = 0;
- if((letter >= 0xe0) && (letter < 0x17f))
- {
- accent_data = letter_accents_0e0[letter - 0xe0];
- }
- else
- if((letter >= 0x250) && (letter <= 0x2a8))
- {
- accent_data = letter_accents_250[letter - 0x250];
- }
- if(accent_data != 0)
- {
- basic_letter = (accent_data & 0x3f) + 59;
- if(basic_letter < 'a')
- basic_letter = non_ascii_tab[basic_letter-59];
- if(accent_data & 0x8000)
- {
- letter2 = (accent_data >> 6) & 0x3f;
- letter2 += 59;
- accent2 = (accent_data >> 12) & 0x7;
- }
- else
- {
- accent1 = (accent_data >> 6) & 0x1f;
- accent2 = (accent_data >> 11) & 0xf;
- }
- if(Lookup(accents_tab[accent1].name, ph_accent1) != 0)
- {
- if(LookupLetter2(basic_letter, ph_letter1) != 0)
- {
- if(accent2 != 0)
- {
- if(Lookup(accents_tab[accent2].name, ph_accent2) == 0)
- {
- // break;
- }
- if(accents_tab[accent2].flags & 1)
- {
- strcpy(ph_buf,ph_accent2);
- ph_buf += strlen(ph_buf);
- ph_accent2[0] = 0;
- }
- }
- if(letter2 != 0)
- {
- //ligature
- LookupLetter2(letter2, ph_letter2);
- sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
- }
- else
- {
- if(accent1 == 0)
- strcpy(ph_buf, ph_letter1);
- else
- if((langopts.accents & 1) || (accents_tab[accent1].flags & 1))
- sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
- else
- sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
- }
- }
- }
- }
- } // end of LookupAccentedLetter
- void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
- {//=============================================================================
- int len;
- unsigned char *p;
- static char single_letter[10] = {0,0};
- char ph_stress[2];
- unsigned int dict_flags[2];
- char ph_buf3[40];
- char *ptr;
- ph_buf1[0] = 0;
- len = utf8_out(letter,&single_letter[2]);
- single_letter[len+2] = ' ';
- if(next_byte == -1)
- {
- // speaking normal text, not individual characters
- if(Lookup(&single_letter[2],ph_buf1) != 0)
- return;
- single_letter[1] = '_';
- if(Lookup(&single_letter[1],ph_buf3) != 0)
- return; // the character is specified as _* so ignore it when speaking normal text
- // check whether this character is specified for English
- SetTranslator2("en");
- if(translator2->Lookup(&single_letter[2], ph_buf3) != 0)
- {
- // yes, switch to English and re-translate the word
- sprintf(ph_buf1,"%c",phonSWITCH);
- }
- SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
- return;
- }
- if((letter <= 32) || iswspace(letter))
- {
- // lookup space as _&32 etc.
- sprintf(&single_letter[1],"_#%d ",letter);
- Lookup(&single_letter[1],ph_buf1);
- return;
- }
- if(next_byte != ' ')
- next_byte = RULE_SPELLING;
- single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-0x31
- single_letter[1] = '_';
- // if the $accent flag is set for this letter, use the accents table (below)
- dict_flags[1] = 0;
- ptr = &single_letter[1];
-
- if(Lookup(&single_letter[1],ph_buf3) == 0)
- {
- single_letter[1] = ' ';
- if(Lookup(&single_letter[2],ph_buf3) == 0)
- {
- TranslateRules(&single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
- }
- }
- if(ph_buf3[0] == 0)
- {
- LookupAccentedLetter(letter, ph_buf3);
- }
- if(ph_buf3[0] == 0)
- {
- ph_buf1[0] = 0;
- return;
- }
- if(ph_buf3[0] == phonSWITCH)
- {
- strcpy(ph_buf1,ph_buf3);
- return;
- }
- // at a stress marker at the start of the letter name, unless one is already marked
- ph_stress[0] = phonSTRESS_P;
- ph_stress[1] = 0;
- for(p=(unsigned char *)ph_buf3; *p != 0; p++)
- {
- if(phoneme_tab[*p]->type == phSTRESS)
- ph_stress[0] = 0; // stress is already marked
- }
- sprintf(ph_buf1,"%s%s",ph_stress,ph_buf3);
- }
- int Translator::TranslateLetter(char *word, char *phonemes, int control, int word_length)
- {//======================================================================================
- // get pronunciation for an isolated letter
- // return number of bytes used by the letter
- // control 2=say-as glyphs, 3-say-as chars
- int n_bytes;
- int letter;
- int len;
- int save_option_phonemes;
- char *p2;
- char *pbuf;
- char capital[20];
- char ph_buf[60];
- char ph_buf2[60];
- char hexbuf[6];
- ph_buf[0] = 0;
- capital[0] = 0;
- n_bytes = utf8_in(&letter,word,0);
- if((letter & 0xfff00) == 0x0e000)
- {
- letter &= 0xff; // uncode private usage area
- }
- if(control > 2)
- {
- // include CAPITAL information
- if(iswupper(letter))
- {
- Lookup("_cap",capital);
- }
- }
- letter = towlower2(letter);
- LookupLetter(letter, word[n_bytes], ph_buf);
- if(ph_buf[0] == phonSWITCH)
- {
- strcpy(phonemes,ph_buf);
- return(0);
- }
- if((ph_buf[0] == 0) && (translator_name != L('e','n')))
- {
- // speak as English, check whether there is a translation for this character
- SetTranslator2("en");
- save_option_phonemes = option_phonemes;
- option_phonemes = 0;
- translator2->LookupLetter(letter, word[n_bytes], ph_buf);
- SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
- option_phonemes = save_option_phonemes;
- if(ph_buf[0] != 0)
- {
- sprintf(phonemes,"%cen",phonSWITCH);
- return(0);
- }
- }
- if(ph_buf[0] == 0)
- {
- // character name not found
- if(iswalpha(letter))
- Lookup("_?A",ph_buf);
- if((ph_buf[0]==0) && !iswspace(letter))
- Lookup("_??",ph_buf);
- if(ph_buf[0] != 0)
- {
- // speak the hexadecimal number of the character code
- sprintf(hexbuf,"%x",letter);
- pbuf = ph_buf;
- for(p2 = hexbuf; *p2 != 0; p2++)
- {
- pbuf += strlen(pbuf);
- *pbuf++ = phonPAUSE_VSHORT;
- LookupLetter(*p2, 0, pbuf);
- }
- }
- }
- len = strlen(phonemes);
- if(langopts.accents & 2)
- sprintf(ph_buf2,"%c%s%s",0xff,ph_buf,capital);
- else
- sprintf(ph_buf2,"%c%s%s",0xff,capital,ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
- if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
- {
- strcpy(&phonemes[len],ph_buf2);
- }
- return(n_bytes);
- } // end of TranslateLetter
- void Translator::SetSpellingStress(char *phonemes, int control, int n_chars)
- {//=========================================================================
- // Individual letter names, reduce the stress of some.
- int ix;
- unsigned int c;
- int n_stress=0;
- int count;
- unsigned char buf[N_WORD_PHONEMES];
- for(ix=0; (c = phonemes[ix]) != 0; ix++)
- {
- if(c == phonSTRESS_P)
- {
- n_stress++;
- }
- buf[ix] = c;
- }
- buf[ix] = 0;
- count = 0;
- for(ix=0; (c = buf[ix]) != 0; ix++)
- {
- if((c == phonSTRESS_P) && (n_chars > 1))
- {
- count++;
- if(langopts.spelling_stress == 1)
- {
- // stress on initial letter when spelling
- if(count > 1)
- c = phonSTRESS_3;
- }
- else
- {
- if(count != n_stress)
- {
- if(((count % 3) != 0) || (count == n_stress-1))
- c = phonSTRESS_3; // reduce to secondary stress
- }
- }
- }
- else
- if(c == 0xff)
- {
- if((control < 2) || (ix==0))
- continue; // don't insert pauses
- if(control == 4)
- c = phonPAUSE; // pause after each character
- if(((count % 3) == 0) || (control > 2))
- c = phonPAUSE_SHORT; // pause following a primary stress
- else
- continue; // remove marker
- }
- *phonemes++ = c;
- }
- if(control >= 2)
- *phonemes++ = phonPAUSE_NOLINK;
- *phonemes = 0;
- } // end of SetSpellingStress
- int Translator::TranslateRoman(char *word, char *ph_out)
- {//=====================================================
- int c;
- char *p;
- const char *p2;
- int acc;
- int prev;
- int value;
- int subtract;
- int repeat = 0;
- unsigned int flags;
- char number_chars[N_WORD_BYTES];
- static const char *roman_numbers = "ixcmvld";
- static int roman_values[] = {1,10,100,1000,5,50,500};
-
- acc = 0;
- prev = 0;
- subtract = 0x7fff;
- while((c = *word++) != ' ')
- {
- if((p2 = strchr(roman_numbers,c)) == NULL)
- return(0);
- value = roman_values[p2 - roman_numbers];
- if(value == prev)
- {
- repeat++;
- if(repeat >= 3)
- return(0);
- }
- else
- repeat = 0;
- if((prev==5) || (prev==50) || (prev==500))
- {
- if(value >= prev)
- return(0);
- }
- if((prev != 0) && (prev < value))
- {
- if(((acc % 10) != 0) || ((prev*10) < value))
- return(0);
- subtract = prev;
- value -= subtract;
- }
- else
- if(value >= subtract)
- return(0);
- else
- acc += prev;
- prev = value;
- }
- acc += prev;
- if(acc < 2)
- return(0);
- if(acc > langopts.max_roman)
- return(0);
- Lookup("_roman",ph_out); // precede by "roman" if _rom is defined in *_list
- p = &ph_out[strlen(ph_out)];
- sprintf(number_chars," %d ",acc);
- TranslateNumber(&number_chars[1],p,&flags,0);
- return(1);
- } // end of TranslateRoman
- int Translator::LookupNum2(int value, int control, char *ph_out)
- {//=============================================================
- // Lookup a 2 digit number
- // control bit 0: use special form of '1'
- // control bit 2: use feminine form of '2'
- int found;
- int ix;
- int units;
- int used_and=0;
- int next_phtype;
- char string[12]; // for looking up entries in de_list
- char ph_tens[50];
- char ph_digits[50];
- char ph_and[12];
- if((value == 1) && (control & 1))
- {
- if(Lookup("_1a",ph_out) != 0)
- return(0);
- }
- // is there a special pronunciation for this 2-digit number
- found = 0;
- if(control & 4)
- {
- sprintf(string,"_%df",value);
- found = Lookup(string,ph_digits);
- }
- if(found == 0)
- {
- sprintf(string,"_%d",value);
- found = Lookup(string,ph_digits);
- }
- // no, speak as tens+units
- if((control & 2) && (value < 10))
- {
- // speak leading zero
- Lookup("_0",ph_tens);
- }
- else
- {
- if(found)
- {
- strcpy(ph_out,ph_digits);
- return(0);
- }
- if((value % 10) == 0)
- {
- sprintf(string,"_%d0",value / 10);
- found = Lookup(string,ph_tens);
- }
- if(!found)
- {
- sprintf(string,"_%dX",value / 10);
- Lookup(string,ph_tens);
- }
- if((value % 10) == 0)
- {
- strcpy(ph_out,ph_tens);
- return(0);
- }
- found = 0;
- units = (value % 10);
- if(control & 4)
- {
- // is there a variant form of this number?
- sprintf(string,"_%df",units);
- found = Lookup(string,ph_digits);
- }
- if(found == 0)
- {
- sprintf(string,"_%d",units);
- Lookup(string,ph_digits);
- }
- }
- if(langopts.numbers & 0x30)
- {
- Lookup("_0and",ph_and);
- if(langopts.numbers & 0x10)
- sprintf(ph_out,"%s%s%s",ph_digits,ph_and,ph_tens);
- else
- sprintf(ph_out,"%s%s%s",ph_tens,ph_and,ph_digits);
- used_and = 1;
- }
- else
- {
- if(langopts.numbers & 0x200)
- {
- // remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
- if((ix = strlen(ph_tens)-1) >= 0)
- {
- if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
- next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
-
- if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
- ph_tens[ix] = 0;
- }
- }
- sprintf(ph_out,"%s%s",ph_tens,ph_digits);
- }
- if(langopts.numbers & 0x100)
- {
- // only one primary stress
- found = 0;
- for(ix=strlen(ph_out)-1; ix>=0; ix--)
- {
- if(ph_out[ix] == phonSTRESS_P)
- {
- if(found)
- ph_out[ix] = phonSTRESS_3;
- else
- found = 1;
- }
- }
- }
- return(used_and);
- } // end of LookupNum2
- int Translator::LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands)
- {//=========================================================================================================
- // Translate a 3 digit number
- int found;
- int hundreds;
- int x;
- char string[12]; // for looking up entries in **_list
- char buf1[100];
- char buf2[100];
- char ph_100[20];
- char ph_10T[20];
- char ph_digits[50];
- char ph_thousands[50];
- char ph_hundred_and[12];
- char ph_thousand_and[12];
-
- hundreds = value / 100;
- buf1[0] = 0;
- if(hundreds > 0)
- {
- ph_thousands[0] = 0;
- ph_thousand_and[0] = 0;
- Lookup("_0C",ph_100);
- if((hundreds >= 10) && (((langopts.numbers & 0x0800) == 0) || (hundreds != 19)))
- {
- ph_digits[0] = 0;
- if(LookupThousands(hundreds / 10, thousandplex+1, ph_10T) == 0)
- {
- x = 0;
- if(langopts.numbers2 & (1 << (thousandplex+1)))
- x = 4;
- LookupNum2(hundreds/10, x, ph_digits);
- }
- if(langopts.numbers2 & 0x200)
- sprintf(ph_thousands,"%s%s%c",ph_10T,ph_digits,phonPAUSE_NOLINK); // say "thousands" before its number, not after
- else
- sprintf(ph_thousands,"%s%s%c",ph_digits,ph_10T,phonPAUSE_NOLINK);
- hundreds %= 10;
- if(hundreds == 0)
- ph_100[0] = 0;
- suppress_null = 1;
- }
- ph_digits[0] = 0;
- if(hundreds > 0)
- {
- if((langopts.numbers & 0x100000) && (prev_thousands || (ph_thousands[0] != 0)))
- {
- Lookup("_0and",ph_thousand_and);
- }
- suppress_null = 1;
- found = 0;
- if((value % 1000) == 100)
- {
- // is there a special pronunciation for exactly 100 ?
- found = Lookup("_1C0",ph_digits);
- }
- if(!found)
- {
- sprintf(string,"_%dC",hundreds);
- found = Lookup(string,ph_digits); // is there a specific pronunciation for n-hundred ?
- }
- if(found)
- {
- ph_100[0] = 0;
- }
- else
- {
- if((hundreds > 1) || ((langopts.numbers & 0x400) == 0))
- {
- LookupNum2(hundreds,0,ph_digits);
- }
- }
- }
- sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
- }
- ph_hundred_and[0] = 0;
- if((langopts.numbers & 0x40) && ((value % 100) != 0))
- {
- if((value > 100) || (prev_thousands && (thousandplex==0)))
- {
- Lookup("_0and",ph_hundred_and);
- }
- }
- buf2[0] = 0;
- value = value % 100;
- if(value == 0)
- {
- if(suppress_null == 0)
- Lookup("_0",buf2);
- }
- else
- {
- x = 0;
- if(thousandplex==0)
- x = 1; // allow "eins" for 1 rather than "ein"
- else
- {
- if(langopts.numbers2 & (1 << thousandplex))
- x = 4; // use variant (feminine) for before thousands and millions
- }
- if(LookupNum2(value,x,buf2) != 0)
- {
- if(langopts.numbers & 0x80)
- ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
- }
- }
- sprintf(ph_out,"%s%s%s",buf1,ph_hundred_and,buf2);
- return(0);
- } // end of LookupNum3
- static const char *M_Variant(int value)
- {//====================================
- // returns M, or perhaps MA for some cases
-
- if(((value % 100)>20) || ((value % 100)<10)) // but not teens, 10 to 19
- {
- if ((translator->langopts.numbers2 & 0x40) &&
- ((value % 10)>=2) &&
- ((value % 10)<=4))
- {
- // for Polish language - two forms of plural!
- return("0MA");
- }
- if((translator->langopts.numbers2 & 0x80) &&
- ((value % 10)==1))
- {
- return("1MA");
- }
- }
- return("0M");
- }
- int Translator::LookupThousands(int value, int thousandplex, char *ph_out)
- {//=======================================================================
- int found;
- char string[12];
- char ph_of[12];
- char ph_thousands[40];
- ph_of[0] = 0;
- // first look fora match with the exact value of thousands
- sprintf(string,"_%dM%d",value,thousandplex);
- if((found = Lookup(string,ph_thousands)) == 0)
- {
- if((value % 100) >= 20)
- {
- Lookup("_0of",ph_of);
- }
- sprintf(string,"_%s%d",M_Variant(value),thousandplex);
- if(Lookup(string,ph_thousands) == 0)
- {
- // repeat "thousand" if higher order names are not available
- sprintf(string,"_%dM1",value);
- if((found = Lookup(string,ph_thousands)) == 0)
- Lookup("_0M1",ph_thousands);
- }
- }
- sprintf(ph_out,"%s%s",ph_of,ph_thousands);
- return(found);
- }
- int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags, int wflags)
- {//=========================================================================================
- // Number translation with various options
- // the "word" may be up to 4 digits
- // "words" of 3 digits may be preceded by another number "word" for thousands or millions
- int n_digits;
- int value;
- int ix;
- unsigned char c;
- int suppress_null = 0;
- int decimal_point = 0;
- int thousandplex = 0;
- int thousands_inc = 0;
- int prev_thousands = 0;
- int this_value;
- static int prev_value;
- int decimal_count;
- int max_decimal_count;
- char string[12]; // for looking up entries in de_list
- char buf1[100];
- char ph_append[50];
- char ph_buf[200];
- char ph_buf2[50];
- static const char str_pause[2] = {phonPAUSE_NOLINK,0};
- for(ix=0; isdigit(word[ix]); ix++) ;
- n_digits = ix;
- value = this_value = atoi(word);
- ph_append[0] = 0;
- ph_buf2[0] = 0;
- // is there a previous thousands part (as a previous "word") ?
- if((n_digits == 3) && (word[-2] == langopts.thousands_sep) && isdigit(word[-3]))
- {
- prev_thousands = 1;
- }
- else
- if((langopts.thousands_sep == ' ') || (langopts.numbers & 0x1000))
- {
- // thousands groups can be separated by spaces
- if((n_digits == 3) && isdigit(word[-2]))
- {
- prev_thousands = 1;
- }
- }
- if((word[0] == '0') && (prev_thousands == 0) && (word[1] != langopts.decimal_sep))
- {
- if((n_digits == 2) && (word[3] == ':') && isdigit(word[5]) && isspace(word[7]))
- {
- // looks like a time 02:30, omit the leading zero
- }
- else
- {
- return(0); // number string with leading zero, speak as individual digits
- }
- }
- if((langopts.numbers & 0x1000) && (word[n_digits] == ' '))
- thousands_inc = 1;
- else
- if(word[n_digits] == langopts.thousands_sep)
- thousands_inc = 2;
- if(thousands_inc > 0)
- {
- // if the following "words" are three-digit groups, count them and add
- // a "thousand"/"million" suffix to this one
- ix = n_digits + thousands_inc;
- while(isdigit(word[ix]) && isdigit(word[ix+1]) && isdigit(word[ix+2]))
- {
- thousandplex++;
- if(word[ix+3] == langopts.thousands_sep)
- ix += (3 + thousands_inc);
- else
- break;
- }
- }
- if((value == 0) && prev_thousands)
- {
- suppress_null = 1;
- }
- if((word[n_digits] == langopts.decimal_sep) && isdigit(word[n_digits+1]))
- {
- // this "word" ends with a decimal point
- Lookup("_dpt",ph_append);
- decimal_point = 1;
- }
- else
- if(suppress_null == 0)
- {
- if(thousands_inc > 0)
- {
- if((thousandplex > 0) && (value < 1000))
- {
- if(langopts.numbers2 & 0x100)
- {
- if((thousandplex == 1) && (value >= 100))
- {
- // special word for 100,000's
- char ph_buf3[20];
- sprintf(string,"_%dL",value / 100);
- if(Lookup(string,ph_buf2) == 0)
- {
- LookupNum2(value/100,0,ph_buf2);
- Lookup("_0L",ph_buf3);
- strcat(ph_buf2,ph_buf3);
- }
- value %= 100;
- if(value == 0)
- suppress_null = 1;
- }
- }
- if((suppress_null == 0) && (LookupThousands(value,thousandplex,ph_append)))
- {
- // found an exact match for N thousand
- value = 0;
- suppress_null = 1;
- }
- }
- }
- }
- else
- if((thousandplex > 1) && prev_thousands && (prev_value > 0))
- {
- sprintf(string,"_%s%d",M_Variant(value),thousandplex+1);
- if(Lookup(string,buf1)==0)
- {
- // speak this thousandplex if there was no word for the previous thousandplex
- sprintf(string,"_0M%d",thousandplex);
- Lookup(string,ph_append);
- }
- }
- if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
- {
- Lookup("_.",ph_append);
- }
- LookupNum3(value, ph_buf, suppress_null, thousandplex, prev_thousands);
- if((thousandplex > 0) && (langopts.numbers2 & 0x200))
- sprintf(ph_out,"%s%s%s",ph_append,ph_buf2,ph_buf); // say "thousands" before its number
- else
- sprintf(ph_out,"%s%s%s",ph_buf2,ph_buf,ph_append);
- while(decimal_point)
- {
- n_digits++;
- decimal_count = 0;
- while(isdigit(word[n_digits+decimal_count]))
- decimal_count++;
- if(decimal_count > 1)
- {
- max_decimal_count = 2;
- switch(langopts.numbers & 0xe000)
- {
- case 0x8000:
- max_decimal_count = 5;
- case 0x4000:
- // French/Polish decimal fraction
- while(word[n_digits] == '0')
- {
- Lookup("_0",buf1);
- strcat(ph_out,buf1);
- decimal_count--;
- n_digits++;
- }
- if((decimal_count <= max_decimal_count) && isdigit(word[n_digits]))
- {
- LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
- strcat(ph_out,buf1);
- n_digits += decimal_count;
- }
- break;
- case 0x2000:
- // Italian decimal fractions
- if((decimal_count < 4) || ((decimal_count==4) && (word[n_digits] != '0')))
- {
- LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
- strcat(ph_out,buf1);
- if(word[n_digits]=='0')
- {
- // decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
- sprintf(string,"_0Z%d",decimal_count);
- Lookup(string,buf1);
- strcat(ph_out,buf1);
- }
- n_digits += decimal_count;
- }
- break;
- case 0x6000:
- // Romanian decimal fractions
- if((decimal_count <= 4) && (word[n_digits] != '0'))
- {
- LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
- strcat(ph_out,buf1);
- n_digits += decimal_count;
- }
- break;
- }
- }
- while(isdigit(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
- {
- value = word[n_digits++] - '0';
- LookupNum2(value, 1, buf1);
- strcat(ph_out,buf1);
- }
- // something after the decimal part ?
- if(Lookup("_dpt2",buf1))
- strcat(ph_out,buf1);
- if(c == langopts.decimal_sep)
- {
- Lookup("_dpt",buf1);
- strcat(ph_out,buf1);
- }
- else
- {
- decimal_point = 0;
- }
- }
- if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
- {
- int next_char;
- char *p;
- p = &word[n_digits+1];
- p += utf8_in(&next_char,p,0);
- if((langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
- utf8_in(&next_char,p,0);
- if(!iswalpha(next_char))
- strcat(ph_out,str_pause); // don't add pause for 100s, 6th, etc.
- }
- *flags = FLAG_FOUND;
- prev_value = this_value;
- return(1);
- } // end of TranslateNumber_1
- int Translator::TranslateNumber(char *word1, char *ph_out, unsigned int *flags, int wflags)
- {//=======================================================================================
- if(option_sayas == SAYAS_DIGITS1)
- return(0); // speak digits individually
- if((langopts.numbers & 0x3) == 1)
- return(TranslateNumber_1(word1,ph_out,flags,wflags));
- return(0);
- } // end of TranslateNumber