/native/external/espeak/src/phonemelist.cpp

http://eyes-free.googlecode.com/ · C++ · 664 lines · 502 code · 96 blank · 66 comment · 221 complexity · 2a91be9ac15d9f2dd5bb6d97b662b3ba MD5 · raw file

  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: jonsd@users.sourceforge.net *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include "speak_lib.h"
  24. #include "speech.h"
  25. #include "phoneme.h"
  26. #include "synthesize.h"
  27. #include "translate.h"
  28. const unsigned char pause_phonemes[8] = {0, phonPAUSE_VSHORT, phonPAUSE_SHORT, phonPAUSE, phonPAUSE_LONG, phonGLOTTALSTOP, phonPAUSE_LONG, phonPAUSE_LONG};
  29. int Translator::ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch)
  30. {//======================================================================================================
  31. // Called for each phoneme in the phoneme list, to allow a language to make changes
  32. // ph The current phoneme
  33. return(0);
  34. }
  35. int Translator::SubstitutePhonemes(PHONEME_LIST2 *plist_out)
  36. {//=========================================================
  37. // Copy the phonemes list and perform any substitutions that are required for the
  38. // current voice
  39. int ix;
  40. int k;
  41. int replace_flags;
  42. int n_plist_out = 0;
  43. int word_end;
  44. int max_stress = -1;
  45. int switched_language = 0;
  46. int max_stress_posn=0;
  47. int n_syllables = 0;
  48. int syllable = 0;
  49. int syllable_stressed = 0;
  50. PHONEME_LIST2 *plist2;
  51. PHONEME_LIST2 *pl;
  52. PHONEME_TAB *next=NULL;
  53. for(ix=0; (ix < n_ph_list2) && (n_plist_out < N_PHONEME_LIST); ix++)
  54. {
  55. plist2 = &ph_list2[ix];
  56. if(plist2->phcode == phonSWITCH)
  57. switched_language ^= 1;
  58. // don't do any substitution if the language has been temporarily changed
  59. if(switched_language == 0)
  60. {
  61. if(ix < (n_ph_list2 -1))
  62. next = phoneme_tab[ph_list2[ix+1].phcode];
  63. word_end = 0;
  64. if((plist2+1)->sourceix || ((next != 0) && (next->type == phPAUSE)))
  65. word_end = 1; // this phoneme is the end of a word
  66. if(langopts.phoneme_change != 0)
  67. {
  68. // this language does changes to phonemes after translation
  69. if(plist2->sourceix)
  70. {
  71. // start of a word, find the stressed vowel
  72. syllable = 0;
  73. syllable_stressed = 0;
  74. n_syllables = 0;
  75. max_stress = -1;
  76. max_stress_posn = ix;
  77. for(k=ix; k < n_ph_list2; k++)
  78. {
  79. if(((pl = &ph_list2[k])->sourceix != 0) && (k > ix))
  80. break;
  81. pl->stress &= 0xf;
  82. if(phoneme_tab[pl->phcode]->type == phVOWEL)
  83. {
  84. n_syllables++;
  85. if(pl->stress > max_stress)
  86. {
  87. syllable_stressed = n_syllables;
  88. max_stress = pl->stress;
  89. max_stress_posn = k;
  90. }
  91. }
  92. }
  93. }
  94. if(phoneme_tab[plist2->phcode]->type == phVOWEL)
  95. {
  96. syllable++;
  97. }
  98. // make any language specific changes
  99. int flags;
  100. CHANGEPH ch;
  101. flags = 0;
  102. if(ix == max_stress_posn)
  103. flags |= 2;
  104. if(ix > max_stress_posn)
  105. flags |= 4;
  106. if(ph_list2[ix].synthflags & SFLAG_DICTIONARY)
  107. flags |= 8;
  108. ch.flags = flags | word_end;
  109. ch.stress = plist2->stress;
  110. ch.stress_highest = max_stress;
  111. ch.n_vowels = n_syllables;
  112. ch.vowel_this = syllable;
  113. ch.vowel_stressed = syllable_stressed;
  114. ChangePhonemes(ph_list2, n_ph_list2, ix, phoneme_tab[ph_list2[ix].phcode], &ch);
  115. }
  116. // check whether a Voice has specified that we should replace this phoneme
  117. for(k=0; k<n_replace_phonemes; k++)
  118. {
  119. if(plist2->phcode == replace_phonemes[k].old_ph)
  120. {
  121. replace_flags = replace_phonemes[k].type;
  122. if((replace_flags & 1) && (word_end == 0))
  123. continue; // this replacement only occurs at the end of a word
  124. if((replace_flags & 2) && ((plist2->stress & 0x7) > 3))
  125. continue; // this replacement doesn't occur in stressed syllables
  126. // substitute the replacement phoneme
  127. plist2->phcode = replace_phonemes[k].new_ph;
  128. break;
  129. }
  130. }
  131. if(plist2->phcode == 0)
  132. {
  133. continue; // phoneme has been replaced by NULL, so don't copy it
  134. }
  135. }
  136. // copy phoneme into the output list
  137. memcpy(&plist_out[n_plist_out++],plist2,sizeof(PHONEME_LIST2));
  138. }
  139. return(n_plist_out);
  140. } // end of SubstitutePhonemes
  141. void Translator::MakePhonemeList(int post_pause, int start_sentence)
  142. {//============================================================================================
  143. int ix=0;
  144. int j;
  145. int insert_ph = 0;
  146. PHONEME_LIST *phlist;
  147. PHONEME_TAB *ph;
  148. PHONEME_TAB *prev, *next, *next2;
  149. int unstress_count = 0;
  150. int word_stress = 0;
  151. int switched_language = 0;
  152. int max_stress;
  153. int voicing;
  154. int regression;
  155. int end_sourceix;
  156. int alternative;
  157. int first_vowel=0; // first vowel in a word
  158. PHONEME_LIST2 ph_list3[N_PHONEME_LIST];
  159. static PHONEME_LIST2 ph_list2_null = {0,0,0,0,0};
  160. PHONEME_LIST2 *plist2 = &ph_list2_null;
  161. PHONEME_LIST2 *plist2_inserted = NULL;
  162. phlist = phoneme_list;
  163. end_sourceix = ph_list2[n_ph_list2-1].sourceix;
  164. // is the last word of the clause unstressed ?
  165. max_stress = 0;
  166. for(j=n_ph_list2-3; j>=0; j--)
  167. {
  168. // start with the last phoneme (before the terminating pauses) and move forwards
  169. if((ph_list2[j].stress & 0x7f) > max_stress)
  170. max_stress = ph_list2[j].stress & 0x7f;
  171. if(ph_list2[j].sourceix != 0)
  172. break;
  173. }
  174. if(max_stress < 4)
  175. {
  176. // the last word is unstressed, look for a previous word that can be stressed
  177. while(--j >= 0)
  178. {
  179. if(ph_list2[j].synthflags & SFLAG_PROMOTE_STRESS) // dictionary flags indicated that this stress can be promoted
  180. {
  181. ph_list2[j].stress = 4; // promote to stressed
  182. break;
  183. }
  184. if(ph_list2[j].stress >= 4)
  185. {
  186. // found a stressed syllable, so stop looking
  187. break;
  188. }
  189. }
  190. }
  191. if((regression = langopts.param[LOPT_REGRESSIVE_VOICING]) != 0)
  192. {
  193. // set consonant clusters to all voiced or all unvoiced
  194. // Regressive
  195. int type;
  196. voicing = 0;
  197. for(j=n_ph_list2-1; j>=0; j--)
  198. {
  199. ph = phoneme_tab[ph_list2[j].phcode];
  200. if(ph == NULL)
  201. continue;
  202. if(ph->code == phonSWITCH)
  203. switched_language ^= 1;
  204. if(switched_language)
  205. continue;
  206. type = ph->type;
  207. if(regression & 0x2)
  208. {
  209. // LANG=Russian, [v] amd [v;] don't cause regression, or [R^]
  210. if((ph->mnemonic == 'v') || (ph->mnemonic == ((';'<<8)+'v')) || ((ph->mnemonic & 0xff)== 'R'))
  211. type = phLIQUID;
  212. }
  213. if((type==phSTOP) || type==(phFRICATIVE))
  214. {
  215. if(voicing==0)
  216. {
  217. voicing = 1;
  218. }
  219. else
  220. if((voicing==2) && ((ph->phflags & phALTERNATIVE)==phSWITCHVOICING))
  221. {
  222. ph_list2[j].phcode = ph->alternative_ph; // change to voiced equivalent
  223. }
  224. }
  225. else
  226. if((type==phVSTOP) || type==(phVFRICATIVE))
  227. {
  228. if(voicing==0)
  229. {
  230. voicing = 2;
  231. }
  232. else
  233. if((voicing==1) && ((ph->phflags & phALTERNATIVE)==phSWITCHVOICING))
  234. {
  235. ph_list2[j].phcode = ph->alternative_ph; // change to unvoiced equivalent
  236. }
  237. }
  238. else
  239. {
  240. if(regression & 0x8)
  241. {
  242. // LANG=Polish, propagate through liquids and nasals
  243. if((type == phPAUSE) || (type == phVOWEL))
  244. voicing = 0;
  245. }
  246. else
  247. {
  248. voicing = 0;
  249. }
  250. }
  251. if((regression & 0x4) && (ph_list2[j].sourceix))
  252. {
  253. // stop propagation at a word boundary
  254. voicing = 0;
  255. }
  256. }
  257. }
  258. n_ph_list2 = SubstitutePhonemes(ph_list3) - 2;
  259. // transfer all the phonemes of the clause into phoneme_list
  260. ph = phoneme_tab[phonPAUSE];
  261. switched_language = 0;
  262. for(j=0; insert_ph || ((j<n_ph_list2) && (ix < N_PHONEME_LIST-3)); j++)
  263. {
  264. prev = ph;
  265. plist2 = &ph_list3[j];
  266. if(insert_ph != 0)
  267. {
  268. // we have a (linking) phoneme which we need to insert here
  269. next = phoneme_tab[plist2->phcode]; // this phoneme, i.e. after the insert
  270. // re-use the previous entry for the inserted phoneme.
  271. // That's OK because we don't look backwards from plist2
  272. j--;
  273. plist2 = plist2_inserted = &ph_list3[j];
  274. memset(plist2, 0, sizeof(*plist2));
  275. plist2->phcode = insert_ph;
  276. ph = phoneme_tab[insert_ph];
  277. insert_ph = 0;
  278. }
  279. else
  280. {
  281. // otherwise get the next phoneme from the list
  282. ph = phoneme_tab[plist2->phcode];
  283. if(plist2->phcode == phonSWITCH)
  284. {
  285. // change phoneme table
  286. SelectPhonemeTable(plist2->tone_number);
  287. switched_language ^= SFLAG_SWITCHED_LANG;
  288. }
  289. next = phoneme_tab[(plist2+1)->phcode]; // the phoneme after this one
  290. }
  291. if(plist2->sourceix)
  292. {
  293. // start of a word
  294. int k;
  295. word_stress = 0;
  296. first_vowel = 1;
  297. // find the highest stress level in this word
  298. for(k=j+1; k < n_ph_list2; k++)
  299. {
  300. if(ph_list3[k].sourceix)
  301. break; // start of the next word
  302. if(ph_list3[k].stress > word_stress)
  303. word_stress = ph_list3[k].stress;
  304. }
  305. }
  306. if(ph == NULL) continue;
  307. if(ph->type == phVOWEL)
  308. {
  309. // check for consecutive unstressed syllables
  310. if(plist2->stress == 0)
  311. {
  312. // an unstressed vowel
  313. unstress_count++;
  314. if((unstress_count > 1) && ((unstress_count & 1)==0))
  315. {
  316. // in a sequence of unstressed syllables, reduce alternate syllables to 'diminished'
  317. // stress. But not for the last phoneme of a stressed word
  318. if((langopts.stress_flags & 0x2) || ((word_stress > 3) && ((plist2+1)->sourceix!=0)))
  319. {
  320. // An unstressed final vowel of a stressed word
  321. unstress_count=1; // try again for next syllable
  322. }
  323. else
  324. {
  325. plist2->stress = 1; // change stress to 'diminished'
  326. }
  327. }
  328. }
  329. else
  330. {
  331. unstress_count = 0;
  332. }
  333. }
  334. alternative = 0;
  335. if(ph->alternative_ph > 0)
  336. {
  337. switch(ph->phflags & phALTERNATIVE)
  338. {
  339. // This phoneme changes if vowel follows, or doesn't follow, depending on its phNOTFOLLOWS flag
  340. case phBEFORENOTVOWEL:
  341. if(next->type != phVOWEL)
  342. alternative = ph->alternative_ph;
  343. break;
  344. case phBEFORENOTVOWEL2: // LANG=tr
  345. if(((plist2+1)->sourceix != 0) ||
  346. ((next->type != phVOWEL) && ((phoneme_tab[(plist2+2)->phcode]->type != phVOWEL) || ((plist2+2)->sourceix != 0))))
  347. {
  348. alternative = ph->alternative_ph;
  349. }
  350. break;
  351. case phBEFOREVOWELPAUSE:
  352. if((next->type == phVOWEL) || (next->type == phPAUSE))
  353. alternative = ph->alternative_ph;
  354. break;
  355. case phBEFOREVOWEL:
  356. if(next->type == phVOWEL)
  357. alternative = ph->alternative_ph;
  358. break;
  359. }
  360. }
  361. if(ph->phflags & phBEFOREPAUSE)
  362. {
  363. if(next->type == phPAUSE)
  364. alternative = ph->link_out; // replace with the link_out phoneme
  365. }
  366. if(alternative == 1)
  367. continue; // NULL phoneme, discard
  368. if(alternative > 1)
  369. {
  370. PHONEME_TAB *ph2;
  371. ph2 = ph;
  372. ph = phoneme_tab[alternative];
  373. if(ph->type == phVOWEL)
  374. {
  375. plist2->synthflags |= SFLAG_SYLLABLE;
  376. if(ph2->type != phVOWEL)
  377. plist2->stress = 0; // change from non-vowel to vowel, make sure it's unstressed
  378. }
  379. else
  380. plist2->synthflags &= ~SFLAG_SYLLABLE;
  381. }
  382. if(langopts.param[LOPT_REDUCE_T])
  383. {
  384. if((ph->mnemonic == 't') && (plist2->sourceix == 0) && ((prev->type == phVOWEL) || (prev->mnemonic == 'n')))
  385. {
  386. if(((plist2+1)->sourceix == 0) && ((plist2+1)->stress < 3) && (next->type == phVOWEL))
  387. {
  388. ph = phoneme_tab[phonT_REDUCED];
  389. }
  390. }
  391. }
  392. while((ph->reduce_to != 0) && (!(plist2->synthflags & SFLAG_DICTIONARY) || (langopts.param[LOPT_REDUCE] & 1)))
  393. {
  394. int reduce_level;
  395. int stress_level;
  396. int reduce = 0;
  397. reduce_level = (ph->phflags >> 28) & 7;
  398. if(ph->type == phVOWEL)
  399. {
  400. stress_level = plist2->stress;
  401. }
  402. else
  403. {
  404. // consonant, get stress from the following vowel
  405. if(next->type == phVOWEL)
  406. stress_level = (plist2+1)->stress;
  407. else
  408. break;
  409. }
  410. if((stress_level == 1) && (first_vowel))
  411. stress_level = 0; // ignore 'dimished' stress on first syllable
  412. if(stress_level == 1)
  413. reduce = 1; // stress = 'reduced'
  414. if(stress_level < reduce_level)
  415. reduce =1;
  416. if((word_stress < 4) && (langopts.param[LOPT_REDUCE] & 0x2) && (stress_level >= word_stress))
  417. {
  418. // don't reduce the most stressed syllable in an unstressed word
  419. reduce = 0;
  420. }
  421. if(reduce)
  422. ph = phoneme_tab[ph->reduce_to];
  423. else
  424. break;
  425. }
  426. if(ph->type == phVOWEL)
  427. first_vowel = 0;
  428. if((plist2+1)->synthflags & SFLAG_LENGTHEN)
  429. {
  430. static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0};
  431. if(strchr(types_double,next->type))
  432. {
  433. // lengthen this consonant by doubling it
  434. insert_ph = next->code;
  435. (plist2+1)->synthflags ^= SFLAG_LENGTHEN;
  436. }
  437. }
  438. if((plist2+1)->sourceix != 0)
  439. {
  440. int x;
  441. if(langopts.vowel_pause && (ph->type != phPAUSE))
  442. {
  443. if((ph->type != phVOWEL) && (langopts.vowel_pause & 0x200))
  444. {
  445. // add a pause after a word which ends in a consonant
  446. insert_ph = phonPAUSE_NOLINK;
  447. }
  448. if(next->type == phVOWEL)
  449. {
  450. if((x = langopts.vowel_pause & 0x0c) != 0)
  451. {
  452. // break before a word which starts with a vowel
  453. if(x == 0xc)
  454. insert_ph = phonPAUSE_NOLINK;
  455. else
  456. insert_ph = phonPAUSE_VSHORT;
  457. }
  458. if((ph->type == phVOWEL) && ((x = langopts.vowel_pause & 0x03) != 0))
  459. {
  460. // adjacent vowels over a word boundary
  461. if(x == 2)
  462. insert_ph = phonPAUSE_SHORT;
  463. else
  464. insert_ph = phonPAUSE_VSHORT;
  465. }
  466. if(((plist2+1)->stress >= 4) && (langopts.vowel_pause & 0x100))
  467. {
  468. // pause before a words which starts with a stressed vowel
  469. insert_ph = phonPAUSE_SHORT;
  470. }
  471. }
  472. }
  473. if(plist2 != plist2_inserted)
  474. {
  475. if((x = (langopts.word_gap & 0x7)) != 0)
  476. {
  477. insert_ph = pause_phonemes[x];
  478. }
  479. if(option_wordgap > 0)
  480. {
  481. insert_ph = phonPAUSE_LONG;
  482. }
  483. }
  484. }
  485. next2 = phoneme_tab[(plist2+2)->phcode];
  486. if((insert_ph == 0) && (ph->link_out != 0) && !(ph->phflags & phBEFOREPAUSE) && (((plist2+1)->synthflags & SFLAG_EMBEDDED)==0))
  487. {
  488. if(ph->phflags & phAPPENDPH)
  489. {
  490. // always append the specified phoneme, unless it already is the next phoneme
  491. if((ph->link_out != (plist2+1)->phcode) && (next->type == phVOWEL))
  492. // if(ph->link_out != (plist2+1)->phcode)
  493. {
  494. insert_ph = ph->link_out;
  495. }
  496. }
  497. else
  498. if(((langopts.word_gap & 8)==0) || ((plist2+1)->sourceix == 0))
  499. {
  500. // This phoneme can be linked to a following vowel by inserting a linking phoneme
  501. if(next->type == phVOWEL)
  502. insert_ph = ph->link_out;
  503. else
  504. if(next->code == phonPAUSE_SHORT)
  505. {
  506. // Pause followed by Vowel, replace the Short Pause with the linking phoneme,
  507. if(next2->type == phVOWEL)
  508. (plist2+1)->phcode = ph->link_out; // replace pause by linking phoneme
  509. }
  510. }
  511. }
  512. if(ph->phflags & phVOICED)
  513. {
  514. // check that a voiced consonant is preceded or followed by a vowel or liquid
  515. // and if not, add a short schwa
  516. // not yet implemented
  517. }
  518. phlist[ix].ph = ph;
  519. phlist[ix].type = ph->type;
  520. phlist[ix].env = PITCHfall; // default, can be changed in the "intonation" module
  521. phlist[ix].synthflags = plist2->synthflags | switched_language;
  522. phlist[ix].tone = plist2->stress & 0xf;
  523. phlist[ix].tone_ph = plist2->tone_number;
  524. phlist[ix].sourceix = 0;
  525. if(plist2->sourceix != 0)
  526. {
  527. phlist[ix].sourceix = plist2->sourceix;
  528. phlist[ix].newword = 1; // this phoneme is the start of a word
  529. if(start_sentence)
  530. {
  531. phlist[ix].newword = 5; // start of sentence + start of word
  532. start_sentence = 0;
  533. }
  534. }
  535. else
  536. {
  537. phlist[ix].newword = 0;
  538. }
  539. phlist[ix].length = ph->std_length;
  540. if((ph->code == phonPAUSE_LONG) && (option_wordgap > 0))
  541. {
  542. phlist[ix].ph = phoneme_tab[phonPAUSE_SHORT];
  543. phlist[ix].length = option_wordgap*14; // 10mS per unit at the default speed
  544. }
  545. if(ph->type==phVOWEL || ph->type==phLIQUID || ph->type==phNASAL || ph->type==phVSTOP || ph->type==phVFRICATIVE)
  546. {
  547. phlist[ix].length = 128; // length_mod
  548. phlist[ix].env = PITCHfall;
  549. }
  550. phlist[ix].prepause = 0;
  551. phlist[ix].amp = 20; // default, will be changed later
  552. phlist[ix].pitch1 = 0x400;
  553. phlist[ix].pitch2 = 0x400;
  554. ix++;
  555. }
  556. phlist[ix].newword = 2; // end of clause
  557. phlist[ix].type = phPAUSE; // terminate with 2 Pause phonemes
  558. phlist[ix].length = post_pause; // length of the pause, depends on the punctuation
  559. phlist[ix].sourceix = end_sourceix;
  560. phlist[ix].synthflags = 0;
  561. phlist[ix++].ph = phoneme_tab[phonPAUSE];
  562. phlist[ix].type = phPAUSE;
  563. phlist[ix].length = 0;
  564. phlist[ix].sourceix=0;
  565. phlist[ix].synthflags = 0;
  566. phlist[ix++].ph = phoneme_tab[phonPAUSE_SHORT];
  567. n_phoneme_list = ix;
  568. } // end of MakePhonemeList