numbers.cpp - This C++ code is part of a speech synthesis s…

/native/external/espeak/src/numbers.cpp

http://eyes-free.googlecode.com/ · C++ · 1397 lines · 1159 code · 166 blank · 72 comment · 310 complexity · eb0938b3bbca1fa7f551fddc169df735 MD5 · raw file

/***************************************************************************
 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
 *   email: jonsd@users.sourceforge.net                                    *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 3 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, see:                                 *
 *               <http://www.gnu.org/licenses/>.                           *
 ***************************************************************************/

#include "StdAfx.h"

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#include <wctype.h>
////#include <wchar.h>

#include "speak_lib.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"
#include "voice.h"
#include "translate.h"



#define M_NAME      0
#define M_SMALLCAP  1
#define M_TURNED    2
#define M_REVERSED  3
#define M_CURL      4

#define M_ACUTE     5
#define M_BREVE     6
#define M_CARON     7
#define M_CEDILLA   8
#define M_CIRCUMFLEX 9
#define M_DIAERESIS 10
#define M_DOUBLE_ACUTE 11
#define M_DOT_ABOVE 12
#define M_GRAVE     13
#define M_MACRON    14
#define M_OGONEK    15
#define M_RING      16
#define M_STROKE    17
#define M_TILDE     18

#define M_BAR       19
#define M_RETROFLEX 20
#define M_HOOK      21


#define M_MIDDLE_DOT  M_DOT_ABOVE  // duplicate of M_DOT_ABOVE
#define M_IMPLOSIVE   M_HOOK

typedef struct {
const char *name;
int  flags;
} ACCENTS;

// these are tokens to look up in the *_list file.
ACCENTS accents_tab[] = {
{"_lig", 1},
{"_smc", 1},  // smallcap
{"_tur", 1},  // turned
{"_rev", 1},  // reversed
{"_crl", 0},  // curl

{"_acu", 0},  // acute
{"_brv", 0},  // breve
{"_hac", 0},  // caron/hacek
{"_ced", 0},  // cedilla
{"_cir", 0},  // circumflex
{"_dia", 0},  // diaeresis
{"_ac2", 0},  // double acute
{"_dot", 0},  // dot
{"_grv", 0},  // grave
{"_mcn", 0},  // macron
{"_ogo", 0},  // ogonek
{"_rng", 0},  // ring
{"_stk", 0},  // stroke
{"_tld", 0},  // tilde

{"_bar", 0},  // bar
{"_rfx", 0},  // retroflex
{"_hok", 0},  // hook
};


#define CAPITAL  0
#define LETTER(ch,mod1,mod2) (ch-59)+(mod1 << 6)+(mod2 << 11)
#define LIGATURE(ch1,ch2,mod1) (ch1-59)+((ch2-59) << 6)+(mod1 << 12)+0x8000


#define L_ALPHA  60   // U+3B1
#define L_SCHWA  61   // U+259
#define L_OPEN_E 62   // U+25B
#define L_GAMMA  63   // U+3B3
#define L_IOTA   64   // U+3B9
#define L_OE     65   // U+153
#define L_OMEGA  66   // U+3C9

#define L_PHI    67   // U+3C6
#define L_ESH    68   // U+283
#define L_UPSILON 69 // U+3C5
#define L_EZH     70 // U+292
#define L_GLOTTAL 71 // U+294
#define L_RTAP    72 // U+27E


static const short non_ascii_tab[] = {
    0, 0x3b1, 0x259, 0x25b, 0x3b3, 0x3b9, 0x153, 0x3c9,
0x3c6, 0x283, 0x3c5, 0x292, 0x294, 0x27e };


// characters U+00e0 to U+017f
const unsigned short letter_accents_0e0[] = {
LETTER('a',M_GRAVE,0),   // U+00e0
LETTER('a',M_ACUTE,0),
LETTER('a',M_CIRCUMFLEX,0),
LETTER('a',M_TILDE,0),
LETTER('a',M_DIAERESIS,0),
LETTER('a',M_RING,0),
LIGATURE('a','e',0),
LETTER('c',M_CEDILLA,0),
LETTER('e',M_GRAVE,0),
LETTER('e',M_ACUTE,0),
LETTER('e',M_CIRCUMFLEX,0),
LETTER('e',M_DIAERESIS,0),
LETTER('i',M_GRAVE,0),
LETTER('i',M_ACUTE,0),
LETTER('i',M_CIRCUMFLEX,0),
LETTER('i',M_DIAERESIS,0),
LETTER('d',M_NAME,0),  // eth  // U+00f0
LETTER('n',M_TILDE,0),
LETTER('o',M_GRAVE,0),
LETTER('o',M_ACUTE,0),
LETTER('o',M_CIRCUMFLEX,0),
LETTER('o',M_TILDE,0),
LETTER('o',M_DIAERESIS,0),
0,     // division sign
LETTER('o',M_STROKE,0),
LETTER('u',M_GRAVE,0),
LETTER('u',M_ACUTE,0),
LETTER('u',M_CIRCUMFLEX,0),
LETTER('u',M_DIAERESIS,0),
LETTER('y',M_ACUTE,0),
LETTER('t',M_NAME,0),  // thorn
LETTER('y',M_DIAERESIS,0),
CAPITAL,                 // U+0100
LETTER('a',M_MACRON,0),
CAPITAL,
LETTER('a',M_BREVE,0),
CAPITAL,
LETTER('a',M_OGONEK,0),
CAPITAL,
LETTER('c',M_ACUTE,0),
CAPITAL,
LETTER('c',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('c',M_DOT_ABOVE,0),
CAPITAL,
LETTER('c',M_CARON,0),
CAPITAL,
LETTER('d',M_CARON,0),
CAPITAL,                 // U+0110
LETTER('d',M_STROKE,0),
CAPITAL,
LETTER('e',M_MACRON,0),
CAPITAL,
LETTER('e',M_BREVE,0),
CAPITAL,
LETTER('e',M_DOT_ABOVE,0),
CAPITAL,
LETTER('e',M_OGONEK,0),
CAPITAL,
LETTER('e',M_CARON,0),
CAPITAL,
LETTER('g',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('g',M_BREVE,0),
CAPITAL,                // U+0120
LETTER('g',M_DOT_ABOVE,0),
CAPITAL,
LETTER('g',M_CEDILLA,0),
CAPITAL,
LETTER('h',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('h',M_STROKE,0),
CAPITAL,
LETTER('i',M_TILDE,0),
CAPITAL,
LETTER('i',M_MACRON,0),
CAPITAL,
LETTER('i',M_BREVE,0),
CAPITAL,
LETTER('i',M_OGONEK,0),
CAPITAL,               // U+0130
LETTER('i',M_NAME,0), // dotless i
CAPITAL,
LIGATURE('i','j',0),
CAPITAL,
LETTER('j',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('k',M_CEDILLA,0),
LETTER('k',M_NAME,0),  // kra
CAPITAL,
LETTER('l',M_ACUTE,0),
CAPITAL,
LETTER('l',M_CEDILLA,0),
CAPITAL,
LETTER('l',M_CARON,0),
CAPITAL,
LETTER('l',M_MIDDLE_DOT,0),  // U+0140
CAPITAL,
LETTER('l',M_STROKE,0),
CAPITAL,
LETTER('n',M_ACUTE,0),
CAPITAL,
LETTER('n',M_CEDILLA,0),
CAPITAL,
LETTER('n',M_CARON,0),
LETTER('n',M_NAME,0),  // apostrophe n
CAPITAL,
LETTER('n',M_NAME,0),  // eng
CAPITAL,
LETTER('o',M_MACRON,0),
CAPITAL,
LETTER('o',M_BREVE,0),
CAPITAL,             // U+0150
LETTER('o',M_DOUBLE_ACUTE,0),
CAPITAL,
LIGATURE('o','e',0),
CAPITAL,
LETTER('r',M_ACUTE,0),
CAPITAL,
LETTER('r',M_CEDILLA,0),
CAPITAL,
LETTER('r',M_CARON,0),
CAPITAL,
LETTER('s',M_ACUTE,0),
CAPITAL,
LETTER('s',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('s',M_CEDILLA,0),
CAPITAL,              // U+0160
LETTER('s',M_CARON,0),
CAPITAL,
LETTER('t',M_CEDILLA,0),
CAPITAL,
LETTER('t',M_CARON,0),
CAPITAL,
LETTER('t',M_STROKE,0),
CAPITAL,
LETTER('u',M_TILDE,0),
CAPITAL,
LETTER('u',M_MACRON,0),
CAPITAL,
LETTER('u',M_BREVE,0),
CAPITAL,
LETTER('u',M_RING,0),
CAPITAL,              // U+0170
LETTER('u',M_DOUBLE_ACUTE,0),
CAPITAL,
LETTER('u',M_OGONEK,0),
CAPITAL,
LETTER('w',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('y',M_CIRCUMFLEX,0),
CAPITAL,   // Y-DIAERESIS
CAPITAL,
LETTER('z',M_ACUTE,0),
CAPITAL,
LETTER('z',M_DOT_ABOVE,0),
CAPITAL,
LETTER('z',M_CARON,0),
LETTER('s',M_NAME,0), // long-s  // U+17f
};


// characters U+0250 to U+029F
const unsigned short letter_accents_250[] = {
LETTER('a',M_TURNED,0),		// U+250
LETTER(L_ALPHA,0,0),
LETTER(L_ALPHA,M_TURNED,0),
LETTER('b',M_IMPLOSIVE,0),
0,  // open-o
LETTER('c',M_CURL,0),
LETTER('d',M_RETROFLEX,0),
LETTER('d',M_IMPLOSIVE,0),
LETTER('e',M_REVERSED,0),	// U+258
0,   // schwa
LETTER(L_SCHWA,M_HOOK,0),
0,   // open-e
LETTER(L_OPEN_E,M_REVERSED,0),
LETTER(L_OPEN_E,M_HOOK,M_REVERSED),
0,//LETTER(L_OPEN_E,M_CLOSED,M_REVERSED),
LETTER('j',M_BAR,0),
LETTER('g',M_IMPLOSIVE,0),	// U+260
LETTER('g',0,0),
LETTER('g',M_SMALLCAP,0),
LETTER(L_GAMMA,0,0),
0,   // ramshorn
LETTER('h',M_TURNED,0),
LETTER('h',M_HOOK,0),
0,//LETTER(L_HENG,M_HOOK,0),
LETTER('i',M_BAR,0),		// U+268
LETTER(L_IOTA,0,0),
LETTER('i',M_SMALLCAP,0),
LETTER('l',M_TILDE,0),
LETTER('l',M_BAR,0),
LETTER('l',M_RETROFLEX,0),
LIGATURE('l','z',0),
LETTER('m',M_TURNED,0),
0,//LETTER('m',M_TURNED,M_LEG),	// U+270
LETTER('m',M_HOOK,0),
0,//LETTER('n',M_LEFTHOOK,0),
LETTER('n',M_RETROFLEX,0),
LETTER('n',M_SMALLCAP,0),
LETTER('o',M_BAR,0),
LIGATURE('o','e',M_SMALLCAP),
0,//LETTER(L_OMEGA,M_CLOSED,0),
LETTER(L_PHI,0,0),		// U+278
LETTER('r',M_TURNED,0),
0,//LETTER('r',M_TURNED,M_LEG),
LETTER('r',M_RETROFLEX,M_TURNED),
0,//LETTER('r',M_LEG,0),
LETTER('r',M_RETROFLEX,0),
0,  // r-tap
LETTER(L_RTAP,M_REVERSED,0),
LETTER('r',M_SMALLCAP,0),	// U+280
LETTER('r',M_TURNED,M_SMALLCAP),
LETTER('s',M_RETROFLEX,0),
0,  // esh
0,//LETTER('j',M_BAR,L_IMPLOSIVE),
LETTER(L_ESH,M_REVERSED,0),
LETTER(L_ESH,M_CURL,0),
LETTER('t',M_TURNED,0),
LETTER('t',M_RETROFLEX,0),	// U+288
LETTER('u',M_BAR,0),
LETTER(L_UPSILON,0,0),
LETTER('v',M_HOOK,0),
LETTER('v',M_TURNED,0),
LETTER('w',M_TURNED,0),
LETTER('y',M_TURNED,0),
LETTER('y',M_SMALLCAP,0),
LETTER('z',M_RETROFLEX,0),	// U+290
LETTER('z',M_CURL,0),
0,  // ezh
LETTER(L_EZH,M_CURL,0),
0,  // glottal stop
LETTER(L_GLOTTAL,M_REVERSED,0),
LETTER(L_GLOTTAL,M_TURNED,0),
0,//LETTER('c',M_LONG,0),
0,  // bilabial click		// U+298
LETTER('b',M_SMALLCAP,0),
0,//LETTER(L_OPEN_E,M_CLOSED,0),
LETTER('g',M_IMPLOSIVE,M_SMALLCAP),
LETTER('h',M_SMALLCAP,0),
LETTER('j',M_CURL,0),
LETTER('k',M_TURNED,0),
LETTER('l',M_SMALLCAP,0),
LETTER('q',M_HOOK,0),      // U+2a0
LETTER(L_GLOTTAL,M_STROKE,0),
LETTER(L_GLOTTAL,M_STROKE,M_REVERSED),
LIGATURE('d','z',0),
0,   // dezh
LIGATURE('d','z',M_CURL),
LIGATURE('t','s',0),
0,   // tesh
LIGATURE('t','s',M_CURL),
};

int Translator::LookupLetter2(unsigned int letter, char *ph_buf)
{//=============================================================
	int len;
	char single_letter[10];

	single_letter[0] = 0;
	single_letter[1] = '_';
	len = utf8_out(letter, &single_letter[2]);
	single_letter[len+2] = ' ';
	single_letter[len+3] = 0;

	if(Lookup(&single_letter[1],ph_buf) == 0)
	{
		single_letter[1] = ' ';
		if(Lookup(&single_letter[2],ph_buf) == 0)
		{
			TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,NULL);
		}
	}
	return(ph_buf[0]);
}


void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf)
{//=====================================================================
	// lookup the character in the accents table
	int accent_data = 0;
	int accent1 = 0;
	int accent2 = 0;
	int basic_letter;
	int letter2=0;
	char ph_letter1[30];
	char ph_letter2[30];
	char ph_accent1[30];
	char ph_accent2[30];

	ph_accent2[0] = 0;

	if((letter >= 0xe0) && (letter < 0x17f))
	{
		accent_data = letter_accents_0e0[letter - 0xe0];
	}
	else
	if((letter >= 0x250) && (letter <= 0x2a8))
	{
		accent_data = letter_accents_250[letter - 0x250];
	} 

	if(accent_data != 0)
	{
		basic_letter = (accent_data & 0x3f) + 59;
		if(basic_letter < 'a')
			basic_letter = non_ascii_tab[basic_letter-59];

		if(accent_data & 0x8000)
		{
			letter2 = (accent_data >> 6) & 0x3f;
			letter2 += 59;
			accent2 = (accent_data >> 12) & 0x7;
		}
		else
		{
			accent1 = (accent_data >> 6) & 0x1f;
			accent2 = (accent_data >> 11) & 0xf;
		}


		if(Lookup(accents_tab[accent1].name, ph_accent1) != 0)
		{

			if(LookupLetter2(basic_letter, ph_letter1) != 0)
			{
				if(accent2 != 0)
				{
					if(Lookup(accents_tab[accent2].name, ph_accent2) == 0)
					{
//						break;
					}

					if(accents_tab[accent2].flags & 1)
					{
						strcpy(ph_buf,ph_accent2);
						ph_buf += strlen(ph_buf);
						ph_accent2[0] = 0;
					}
				}
				if(letter2 != 0)
				{
					//ligature
					LookupLetter2(letter2, ph_letter2);
					sprintf(ph_buf,"%s%c%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, phonSTRESS_P, ph_letter2, ph_accent2);
				}
				else
				{
					if(accent1 == 0)
						strcpy(ph_buf, ph_letter1);
					else
					if((langopts.accents & 1) || (accents_tab[accent1].flags & 1))
						sprintf(ph_buf,"%s%c%c%s", ph_accent1, phonPAUSE_VSHORT, phonSTRESS_P, ph_letter1);
					else
						sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
				}
			}
		}
	}
}  // end of LookupAccentedLetter



void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
{//=============================================================================
	int len;
	unsigned char *p;
	static char single_letter[10] = {0,0};
	char ph_stress[2];
	unsigned int dict_flags[2];
	char ph_buf3[40];
	char *ptr;

	ph_buf1[0] = 0;
	len = utf8_out(letter,&single_letter[2]);
	single_letter[len+2] = ' ';

	if(next_byte == -1)
	{
		// speaking normal text, not individual characters
		if(Lookup(&single_letter[2],ph_buf1) != 0)
			return;

		single_letter[1] = '_';
		if(Lookup(&single_letter[1],ph_buf3) != 0)
			return;   // the character is specified as _* so ignore it when speaking normal text

		// check whether this character is specified for English
		SetTranslator2("en");
		if(translator2->Lookup(&single_letter[2], ph_buf3) != 0)
		{
			// yes, switch to English and re-translate the word
			sprintf(ph_buf1,"%c",phonSWITCH);
		}
		SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
		return;
	}

	if((letter <= 32) || iswspace(letter))
	{
		// lookup space as _&32 etc.
		sprintf(&single_letter[1],"_#%d ",letter);
		Lookup(&single_letter[1],ph_buf1);
		return;
	}

	if(next_byte != ' ')
		next_byte = RULE_SPELLING;
	single_letter[3+len] = next_byte;   // follow by space-space if the end of the word, or space-0x31

	single_letter[1] = '_';

	// if the $accent flag is set for this letter, use the accents table (below)
	dict_flags[1] = 0;
	ptr = &single_letter[1];
	
	if(Lookup(&single_letter[1],ph_buf3) == 0)
	{
		single_letter[1] = ' ';
		if(Lookup(&single_letter[2],ph_buf3) == 0)
		{
			TranslateRules(&single_letter[2], ph_buf3, sizeof(ph_buf3), NULL,FLAG_NO_TRACE,NULL);
		}
	}

	if(ph_buf3[0] == 0)
	{
		LookupAccentedLetter(letter, ph_buf3);
	}

	if(ph_buf3[0] == 0)
	{
		ph_buf1[0] = 0;
		return;
	}
	if(ph_buf3[0] == phonSWITCH)
	{
		strcpy(ph_buf1,ph_buf3);
		return;
	}
	// at a stress marker at the start of the letter name, unless one is already marked
	ph_stress[0] = phonSTRESS_P;
	ph_stress[1] = 0;

	for(p=(unsigned char *)ph_buf3; *p != 0; p++)
	{
		if(phoneme_tab[*p]->type == phSTRESS)
			ph_stress[0] = 0;  // stress is already marked
	}
	sprintf(ph_buf1,"%s%s",ph_stress,ph_buf3);
}



int Translator::TranslateLetter(char *word, char *phonemes, int control, int word_length)
{//======================================================================================
// get pronunciation for an isolated letter
// return number of bytes used by the letter
// control 2=say-as glyphs, 3-say-as chars
	int n_bytes;
	int letter;
	int len;
	int save_option_phonemes;
	char *p2;
	char *pbuf;
	char capital[20];
	char ph_buf[60];
	char ph_buf2[60];
	char hexbuf[6];

	ph_buf[0] = 0;
	capital[0] = 0;

	n_bytes = utf8_in(&letter,word,0);

	if((letter & 0xfff00) == 0x0e000)
	{
		letter &= 0xff;   // uncode private usage area
	}

	if(control > 2)
	{
		// include CAPITAL information
		if(iswupper(letter))
		{
			Lookup("_cap",capital);
		}
	}
	letter = towlower2(letter);

	LookupLetter(letter, word[n_bytes], ph_buf);

	if(ph_buf[0] == phonSWITCH)
	{
		strcpy(phonemes,ph_buf);
		return(0);
	}

	if((ph_buf[0] == 0) && (translator_name != L('e','n')))
	{
		// speak as English, check whether there is a translation for this character
		SetTranslator2("en");
		save_option_phonemes = option_phonemes;
		option_phonemes = 0;
		translator2->LookupLetter(letter, word[n_bytes], ph_buf);
		SelectPhonemeTable(voice->phoneme_tab_ix);  // revert to original phoneme table
		option_phonemes = save_option_phonemes;

		if(ph_buf[0] != 0)
		{
			sprintf(phonemes,"%cen",phonSWITCH);
			return(0);
		}
	}

	if(ph_buf[0] == 0)
	{
		// character name not found
		if(iswalpha(letter))
			Lookup("_?A",ph_buf);

		if((ph_buf[0]==0) && !iswspace(letter))
			Lookup("_??",ph_buf);

		if(ph_buf[0] != 0)
		{
			// speak the hexadecimal number of the character code
			sprintf(hexbuf,"%x",letter);
			pbuf = ph_buf;
			for(p2 = hexbuf; *p2 != 0; p2++)
			{
				pbuf += strlen(pbuf);
				*pbuf++ = phonPAUSE_VSHORT;
				LookupLetter(*p2, 0, pbuf);
			}
		}
	}

	len = strlen(phonemes);
	if(langopts.accents & 2)
		sprintf(ph_buf2,"%c%s%s",0xff,ph_buf,capital);
	else
		sprintf(ph_buf2,"%c%s%s",0xff,capital,ph_buf);  // the 0xff marker will be removed or replaced in SetSpellingStress()
	if((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
	{
		strcpy(&phonemes[len],ph_buf2);
	}
	return(n_bytes);
}  // end of TranslateLetter



void Translator::SetSpellingStress(char *phonemes, int control, int n_chars)
{//=========================================================================
// Individual letter names, reduce the stress of some.
	int ix;
	unsigned int c;
	int n_stress=0;
	int count;
	unsigned char buf[N_WORD_PHONEMES];

	for(ix=0; (c = phonemes[ix]) != 0; ix++)
	{
		if(c == phonSTRESS_P)
		{
			n_stress++;
		}
		buf[ix] = c;
	}
	buf[ix] = 0;

	count = 0;
	for(ix=0; (c = buf[ix]) != 0; ix++)
	{
		if((c == phonSTRESS_P) && (n_chars > 1))
		{
			count++;

			if(langopts.spelling_stress == 1)
			{
				// stress on initial letter when spelling
				if(count > 1)
					c = phonSTRESS_3;
			}
			else
			{
				if(count != n_stress)
				{
					if(((count % 3) != 0) || (count == n_stress-1))
						c = phonSTRESS_3;   // reduce to secondary stress
				}
			}
		}
		else
		if(c == 0xff)
		{
			if((control < 2) || (ix==0))
				continue;   // don't insert pauses

			if(control == 4)
				c = phonPAUSE;    // pause after each character
			if(((count % 3) == 0) || (control > 2))
				c = phonPAUSE_SHORT;  // pause following a primary stress
			else
				continue;       // remove marker
		}
		*phonemes++ = c;
	}
	if(control >= 2)
		*phonemes++ = phonPAUSE_NOLINK;
	*phonemes = 0;
}  // end of SetSpellingStress




int Translator::TranslateRoman(char *word, char *ph_out)
{//=====================================================
	int c;
	char *p;
	const char *p2;
	int acc;
	int prev;
	int value;
	int subtract;
	int repeat = 0;
	unsigned int flags;
	char number_chars[N_WORD_BYTES];

	static const char *roman_numbers = "ixcmvld";
	static int roman_values[] = {1,10,100,1000,5,50,500};
 
	acc = 0;
	prev = 0;
	subtract = 0x7fff;

	while((c = *word++) != ' ')
	{
		if((p2 = strchr(roman_numbers,c)) == NULL)
			return(0);

		value = roman_values[p2 - roman_numbers];
		if(value == prev)
		{
			repeat++;
			if(repeat >= 3)
				return(0);
		}
		else
			repeat = 0;

		if((prev==5) || (prev==50) || (prev==500))
		{
			if(value >= prev)
				return(0);
		}
		if((prev != 0) && (prev < value))
		{
			if(((acc % 10) != 0) || ((prev*10) < value))
				return(0);
			subtract = prev;
			value -= subtract;
		}
		else
		if(value >= subtract)
			return(0);
		else
			acc += prev;
		prev = value;
	}
	acc += prev;
	if(acc < 2)
		return(0);

	if(acc > langopts.max_roman)
		return(0);

	Lookup("_roman",ph_out);   // precede by "roman" if _rom is defined in *_list
	p = &ph_out[strlen(ph_out)];

	sprintf(number_chars," %d ",acc);
	TranslateNumber(&number_chars[1],p,&flags,0);
	return(1);
}  // end of TranslateRoman


int Translator::LookupNum2(int value, int control, char *ph_out)
{//=============================================================
// Lookup a 2 digit number
// control bit 0: use special form of '1'
// control bit 2: use feminine form of '2'

	int found;
	int ix;
	int units;
	int used_and=0;
	int next_phtype;
	char string[12];  // for looking up entries in de_list
	char ph_tens[50];
	char ph_digits[50];
	char ph_and[12];

	if((value == 1) && (control & 1))
	{
		if(Lookup("_1a",ph_out) != 0)
			return(0);
	}
	// is there a special pronunciation for this 2-digit number
	found = 0;
	if(control & 4)
	{
		sprintf(string,"_%df",value);
		found = Lookup(string,ph_digits);
	}
	if(found == 0)
	{
		sprintf(string,"_%d",value);
		found = Lookup(string,ph_digits);
	}

	// no, speak as tens+units
	if((control & 2) && (value < 10))
	{
		// speak leading zero
		Lookup("_0",ph_tens);
	}
	else
	{
		if(found)
		{
			strcpy(ph_out,ph_digits);
			return(0);
		}

		if((value % 10) == 0)
		{
			sprintf(string,"_%d0",value / 10);
			found = Lookup(string,ph_tens);
		}
		if(!found)
		{
			sprintf(string,"_%dX",value / 10);
			Lookup(string,ph_tens);
		}

		if((value % 10) == 0)
		{
			strcpy(ph_out,ph_tens);
			return(0);
		}

		found = 0;
		units = (value % 10);
		if(control & 4)
		{
			// is there a variant form of this number?
			sprintf(string,"_%df",units);
			found = Lookup(string,ph_digits);
		}
		if(found == 0)
		{
			sprintf(string,"_%d",units);
			Lookup(string,ph_digits);
		}
	}

	if(langopts.numbers & 0x30)
	{
		Lookup("_0and",ph_and);
		if(langopts.numbers & 0x10)
			sprintf(ph_out,"%s%s%s",ph_digits,ph_and,ph_tens);
		else
			sprintf(ph_out,"%s%s%s",ph_tens,ph_and,ph_digits);
		used_and = 1;
	}
	else
	{
		if(langopts.numbers & 0x200)
		{
			// remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
			if((ix = strlen(ph_tens)-1) >= 0)
			{
				if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS)
					next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type;
	
				if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL))
					ph_tens[ix] = 0;
			}
		}
		sprintf(ph_out,"%s%s",ph_tens,ph_digits);
	}

	if(langopts.numbers & 0x100)
	{
		// only one primary stress
		found = 0;
		for(ix=strlen(ph_out)-1; ix>=0; ix--)
		{
			if(ph_out[ix] == phonSTRESS_P)
			{
				if(found)
					ph_out[ix] = phonSTRESS_3;
				else
					found = 1;
			}
		}
	}
	return(used_and);
}  // end of LookupNum2


int Translator::LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands)
{//=========================================================================================================
// Translate a 3 digit number
	int found;
	int hundreds;
	int x;
	char string[12];  // for looking up entries in **_list
	char buf1[100];
	char buf2[100];
	char ph_100[20];
	char ph_10T[20];
	char ph_digits[50];
	char ph_thousands[50];
	char ph_hundred_and[12];
	char ph_thousand_and[12];
	
	hundreds = value / 100;
	buf1[0] = 0;

	if(hundreds > 0)
	{
		ph_thousands[0] = 0;
		ph_thousand_and[0] = 0;

		Lookup("_0C",ph_100);

		if((hundreds >= 10) && (((langopts.numbers & 0x0800) == 0) || (hundreds != 19)))
		{
			ph_digits[0] = 0;

			if(LookupThousands(hundreds / 10, thousandplex+1, ph_10T) == 0)
			{
				x = 0;
				if(langopts.numbers2 & (1 << (thousandplex+1)))
					x = 4;
				LookupNum2(hundreds/10, x, ph_digits);
			}

			if(langopts.numbers2 & 0x200)
				sprintf(ph_thousands,"%s%s%c",ph_10T,ph_digits,phonPAUSE_NOLINK);  // say "thousands" before its number, not after
			else
				sprintf(ph_thousands,"%s%s%c",ph_digits,ph_10T,phonPAUSE_NOLINK);

			hundreds %= 10;
			if(hundreds == 0)
				ph_100[0] = 0;
			suppress_null = 1;
		}

		ph_digits[0] = 0;
		if(hundreds > 0)
		{
			if((langopts.numbers & 0x100000) && (prev_thousands || (ph_thousands[0] != 0)))
			{
				Lookup("_0and",ph_thousand_and);
			}

			suppress_null = 1;

			found = 0;
			if((value % 1000) == 100)
			{
				// is there a special pronunciation for exactly 100 ?
				found = Lookup("_1C0",ph_digits);
			}
			if(!found)
			{
				sprintf(string,"_%dC",hundreds);
				found = Lookup(string,ph_digits);  // is there a specific pronunciation for n-hundred ?
			}

			if(found)
			{
				ph_100[0] = 0;
			}
			else
			{
				if((hundreds > 1) || ((langopts.numbers & 0x400) == 0))
				{
					LookupNum2(hundreds,0,ph_digits);
				}
			}
		}

		sprintf(buf1,"%s%s%s%s",ph_thousands,ph_thousand_and,ph_digits,ph_100);
	}

	ph_hundred_and[0] = 0;
	if((langopts.numbers & 0x40) && ((value % 100) != 0))
	{
		if((value > 100) || (prev_thousands && (thousandplex==0)))
		{
			Lookup("_0and",ph_hundred_and);
		}
	}


	buf2[0] = 0;
	value = value % 100;

	if(value == 0)
	{
		if(suppress_null == 0)
			Lookup("_0",buf2);
	}
	else
	{
		x = 0;
		if(thousandplex==0)
			x = 1;   // allow "eins" for 1 rather than "ein"
		else
		{
			if(langopts.numbers2 & (1 << thousandplex))
				x = 4;   // use variant (feminine) for before thousands and millions
		}

		if(LookupNum2(value,x,buf2) != 0)
		{
			if(langopts.numbers & 0x80)
				ph_hundred_and[0] = 0;  // don't put 'and' after 'hundred' if there's 'and' between tens and units
		}
	}

	sprintf(ph_out,"%s%s%s",buf1,ph_hundred_and,buf2);

	return(0);
}  // end of LookupNum3



static const char *M_Variant(int value)
{//====================================
	// returns M, or perhaps MA for some cases
	
	if(((value % 100)>20) || ((value % 100)<10))   // but not teens, 10 to 19
	{
		if ((translator->langopts.numbers2 & 0x40) &&
			((value % 10)>=2) &&
			((value % 10)<=4))
		{
		// for Polish language - two forms of plural!
			return("0MA");
		}

		if((translator->langopts.numbers2 & 0x80) &&
			((value % 10)==1))
		{
			return("1MA");
		}

	}
	return("0M");
}


int Translator::LookupThousands(int value, int thousandplex, char *ph_out)
{//=======================================================================
	int found;
	char string[12];
	char ph_of[12];
	char ph_thousands[40];

	ph_of[0] = 0;

	// first look fora match with the exact value of thousands
	sprintf(string,"_%dM%d",value,thousandplex);

	if((found = Lookup(string,ph_thousands)) == 0)
	{
		if((value % 100) >= 20) 
		{
			Lookup("_0of",ph_of);
		}

		sprintf(string,"_%s%d",M_Variant(value),thousandplex);

		if(Lookup(string,ph_thousands) == 0)
		{
			// repeat "thousand" if higher order names are not available
			sprintf(string,"_%dM1",value);
			if((found = Lookup(string,ph_thousands)) == 0)
				Lookup("_0M1",ph_thousands);
		}
	}
	sprintf(ph_out,"%s%s",ph_of,ph_thousands);
	return(found);
}


int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags, int wflags)
{//=========================================================================================
//  Number translation with various options
// the "word" may be up to 4 digits
// "words" of 3 digits may be preceded by another number "word" for thousands or millions

	int n_digits;
	int value;
	int ix;
	unsigned char c;
	int suppress_null = 0;
	int decimal_point = 0;
	int thousandplex = 0;
	int thousands_inc = 0;
	int prev_thousands = 0;
	int this_value;
	static int prev_value;
	int decimal_count;
	int max_decimal_count;
	char string[12];  // for looking up entries in de_list
	char buf1[100];
	char ph_append[50];
	char ph_buf[200];
	char ph_buf2[50];

	static const char str_pause[2] = {phonPAUSE_NOLINK,0};

	for(ix=0; isdigit(word[ix]); ix++) ;
	n_digits = ix;
	value = this_value = atoi(word);

	ph_append[0] = 0;
	ph_buf2[0] = 0;

	// is there a previous thousands part (as a previous "word") ?
	if((n_digits == 3) && (word[-2] == langopts.thousands_sep) && isdigit(word[-3]))
	{
		prev_thousands = 1;
	}
	else
	if((langopts.thousands_sep == ' ') || (langopts.numbers & 0x1000))
	{
		// thousands groups can be separated by spaces
		if((n_digits == 3) && isdigit(word[-2]))
		{
			prev_thousands = 1;
		}
	}

	if((word[0] == '0') && (prev_thousands == 0) && (word[1] != langopts.decimal_sep))
	{
		if((n_digits == 2) && (word[3] == ':') && isdigit(word[5]) && isspace(word[7]))
		{
			// looks like a time 02:30, omit the leading zero
		}
		else
		{
			return(0);     // number string with leading zero, speak as individual digits
		}
	}

	if((langopts.numbers & 0x1000) && (word[n_digits] == ' '))
		thousands_inc = 1;
	else
	if(word[n_digits] == langopts.thousands_sep)
		thousands_inc = 2;

	if(thousands_inc > 0)
	{
		// if the following "words" are three-digit groups, count them and add
		// a "thousand"/"million" suffix to this one

		ix = n_digits + thousands_inc;
		while(isdigit(word[ix]) && isdigit(word[ix+1]) && isdigit(word[ix+2]))
		{
			thousandplex++;
			if(word[ix+3] == langopts.thousands_sep)
				ix += (3 + thousands_inc);
			else
				break;
		}
	}

	if((value == 0) && prev_thousands)
	{
		suppress_null = 1;
	}

	if((word[n_digits] == langopts.decimal_sep) && isdigit(word[n_digits+1]))
	{
		// this "word" ends with a decimal point
		Lookup("_dpt",ph_append);
		decimal_point = 1;
	}
	else
	if(suppress_null == 0)
	{
		if(thousands_inc > 0)
		{
			if((thousandplex > 0) && (value < 1000))
			{
				if(langopts.numbers2 & 0x100)
				{
					if((thousandplex == 1) && (value >= 100))
					{
						// special word for 100,000's
						char ph_buf3[20];
						sprintf(string,"_%dL",value / 100);
						if(Lookup(string,ph_buf2) == 0)
						{
							LookupNum2(value/100,0,ph_buf2);
							Lookup("_0L",ph_buf3);
							strcat(ph_buf2,ph_buf3);
						}
						value %= 100;
						if(value == 0)
							suppress_null = 1;
					}
				}
				if((suppress_null == 0) && (LookupThousands(value,thousandplex,ph_append)))
				{
					// found an exact match for N thousand
					value = 0;
					suppress_null = 1;
				}
			}
		}
	}
	else
	if((thousandplex > 1) && prev_thousands && (prev_value > 0))
	{
		sprintf(string,"_%s%d",M_Variant(value),thousandplex+1);
		if(Lookup(string,buf1)==0)
		{
			// speak this thousandplex if there was no word for the previous thousandplex
			sprintf(string,"_0M%d",thousandplex);
			Lookup(string,ph_append);
		}
	}

	if((ph_append[0] == 0) && (word[n_digits] == '.') && (thousandplex == 0))
	{
		Lookup("_.",ph_append);
	}

	LookupNum3(value, ph_buf, suppress_null, thousandplex, prev_thousands);
	if((thousandplex > 0) && (langopts.numbers2 & 0x200))
		sprintf(ph_out,"%s%s%s",ph_append,ph_buf2,ph_buf);  // say "thousands" before its number
	else
		sprintf(ph_out,"%s%s%s",ph_buf2,ph_buf,ph_append);


	while(decimal_point)
	{
		n_digits++;

		decimal_count = 0;
		while(isdigit(word[n_digits+decimal_count]))
			decimal_count++;

		if(decimal_count > 1)
		{
			max_decimal_count = 2;
			switch(langopts.numbers & 0xe000)
			{
			case 0x8000:
				max_decimal_count = 5;
			case 0x4000:
				// French/Polish decimal fraction
				while(word[n_digits] == '0')
				{
					Lookup("_0",buf1);
					strcat(ph_out,buf1);
					decimal_count--;
					n_digits++;
				}
				if((decimal_count <= max_decimal_count) && isdigit(word[n_digits]))
				{
					LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
					strcat(ph_out,buf1);
					n_digits += decimal_count;
				}
				break;

			case 0x2000:
				// Italian decimal fractions
				if((decimal_count < 4) || ((decimal_count==4) && (word[n_digits] != '0')))
				{
					LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
					strcat(ph_out,buf1);
					if(word[n_digits]=='0')
					{
						// decimal part has leading zeros, so add a "hundredths" or "thousandths" suffix
						sprintf(string,"_0Z%d",decimal_count);
						Lookup(string,buf1);
						strcat(ph_out,buf1);
					}
					n_digits += decimal_count;
				}
				break;

			case 0x6000:
				// Romanian decimal fractions
				if((decimal_count <= 4) && (word[n_digits] != '0'))
				{
						LookupNum3(atoi(&word[n_digits]),buf1,0,0,0);
						strcat(ph_out,buf1);
						n_digits += decimal_count;
				}
				break;
			}
		}

		while(isdigit(c = word[n_digits]) && (strlen(ph_out) < (N_WORD_PHONEMES - 10)))
		{
			value = word[n_digits++] - '0';
			LookupNum2(value, 1, buf1);
			strcat(ph_out,buf1);
		}

		// something after the decimal part ?
		if(Lookup("_dpt2",buf1))
			strcat(ph_out,buf1);

		if(c == langopts.decimal_sep)
		{
			Lookup("_dpt",buf1);
			strcat(ph_out,buf1);
		}
		else
		{
			decimal_point = 0;
		}
	}
	if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH))
	{
		int next_char;
		char *p;
		p = &word[n_digits+1];

		p += utf8_in(&next_char,p,0);
		if((langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
			utf8_in(&next_char,p,0);

		if(!iswalpha(next_char))
			strcat(ph_out,str_pause);  // don't add pause for 100s,  6th, etc.
	}

	*flags = FLAG_FOUND;
	prev_value = this_value;
	return(1);
}  // end of TranslateNumber_1



int Translator::TranslateNumber(char *word1, char *ph_out, unsigned int *flags, int wflags)
{//=======================================================================================
	if(option_sayas == SAYAS_DIGITS1)
		return(0);  // speak digits individually

	if((langopts.numbers & 0x3) == 1)
		return(TranslateNumber_1(word1,ph_out,flags,wflags));

	return(0);
}  // end of TranslateNumber
Summary ✨

This C++ code is part of a speech synthesis system, specifically designed to translate numerical values into phonetic sounds for different languages. It processes input numbers and generates corresponding audio output based on language-specific rules and formatting conventions. The code handles various decimal formats, punctuation, and special cases for different languages.
Alerts (3)

Complexity hotspot; line 1181 (total complexity: 6)
1181
Complexity hotspot; line 1183 (total complexity: 6)
1183
Complexity hotspot; line 1272 (total complexity: 6)
1272