/src/modules/Lexicon/lex_aux.cc

https://gitlab.com/generic-library/festival · C++ · 194 lines · 121 code · 20 blank · 53 comment · 39 complexity · 24d1aace24faa04f368e34739804be15 MD5 · raw file

  1. /*************************************************************************/
  2. /* */
  3. /* Centre for Speech Technology Research */
  4. /* University of Edinburgh, UK */
  5. /* Copyright (c) 1996,1997 */
  6. /* All Rights Reserved. */
  7. /* */
  8. /* Permission is hereby granted, free of charge, to use and distribute */
  9. /* this software and its documentation without restriction, including */
  10. /* without limitation the rights to use, copy, modify, merge, publish, */
  11. /* distribute, sublicense, and/or sell copies of this work, and to */
  12. /* permit persons to whom this work is furnished to do so, subject to */
  13. /* the following conditions: */
  14. /* 1. The code must retain the above copyright notice, this list of */
  15. /* conditions and the following disclaimer. */
  16. /* 2. Any modifications must be clearly marked as such. */
  17. /* 3. Original authors' names are not deleted. */
  18. /* 4. The authors' names are not used to endorse or promote products */
  19. /* derived from this software without specific prior written */
  20. /* permission. */
  21. /* */
  22. /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
  23. /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
  24. /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
  25. /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
  26. /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
  27. /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
  28. /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
  29. /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
  30. /* THIS SOFTWARE. */
  31. /* */
  32. /*************************************************************************/
  33. /* Author : Alan W Black */
  34. /* Date : April 1996 */
  35. /*-----------------------------------------------------------------------*/
  36. /* */
  37. /* Basic lexicon utilities */
  38. /* */
  39. /*=======================================================================*/
  40. #include <cstdio>
  41. #include "festival.h"
  42. #include "lexicon.h"
  43. #include "lexiconP.h"
  44. static void split_stress(LISP phones, LISP &phs, LISP &stresses);
  45. static char *v_stress(const char *ph,int &stress);
  46. static int syl_contains_vowel(LISP phones);
  47. static int syl_breakable(LISP syl, LISP rest);
  48. LISP lex_syllabify(LISP phones)
  49. {
  50. /* Given a simple list of phones, syllabify them and add stress */
  51. LISP syl,syls,p;
  52. int stress = 1;
  53. for (syl=NIL,syls=NIL,p=phones; p != NIL; p=cdr(p))
  54. {
  55. syl = cons(car(p),syl);
  56. if (syl_breakable(syl,cdr(p)))
  57. {
  58. syls = cons(cons(reverse(syl),cons(flocons(stress),NIL)),syls);
  59. stress = 0;
  60. syl = NIL;
  61. }
  62. }
  63. return reverse(syls);
  64. }
  65. LISP lex_syllabify_phstress(LISP phones)
  66. {
  67. /* Given a list of phones where vowels may have stress numeral, */
  68. /* as found in BEEP and CMU syllabify them */
  69. LISP syl,syls,p,phs,stresses,s;
  70. int stress = 0;
  71. const char *ph;
  72. split_stress(phones,phs,stresses);
  73. for (syl=NIL,syls=NIL,p=phs,s=stresses;
  74. p != NIL;
  75. p=cdr(p),s=cdr(s))
  76. {
  77. ph = get_c_string(car(p));
  78. if (!streq(ph,ph_silence()))
  79. syl = cons(car(p),syl);
  80. if (car(s) && (!streq(get_c_string(car(s)),"0")))
  81. stress = 1; // should worry about 2 stress too
  82. if (streq(ph,ph_silence()) || syl_breakable(syl,cdr(p)))
  83. {
  84. syls = cons(cons(reverse(syl),cons(flocons(stress),NIL)),syls);
  85. stress = 0;
  86. syl = NIL;
  87. }
  88. }
  89. return reverse(syls);
  90. }
  91. static void split_stress(LISP phones, LISP &phs, LISP &stresses)
  92. {
  93. // unpack the list of phones. When they come from certain types
  94. // of lexical entries (CMU, BEEP) vowels may have a 1 or 2 at their
  95. // end to denote stress.
  96. // This returns two list of equal length, one with the phones and
  97. // one with nils (for each phone) except when there is an explicit
  98. // stress number
  99. LISP p,np,ns;
  100. char *nph;
  101. int stress;
  102. for (p=phones,np=ns=NIL; p != NIL; p=cdr(p))
  103. {
  104. stress = 0;
  105. nph = v_stress(get_c_string(car(p)),stress);
  106. if (streq(nph,"-")) // a break of some sort
  107. np = cons(rintern(ph_silence()),np);
  108. else
  109. np = cons(rintern(nph),np);
  110. wfree(nph);
  111. if (stress != 0)
  112. ns = cons(flocons(stress),ns);
  113. else
  114. ns = cons(NIL,ns);
  115. }
  116. phs = reverse(np);
  117. stresses = reverse(ns);
  118. }
  119. static char *v_stress(const char *ph,int &stress)
  120. {
  121. // Checks to see if final character is a numeral, if so treats
  122. // is as stress value.
  123. char *nph;
  124. if ((strlen(ph) > 1) &&
  125. ((ph[strlen(ph)-1] == '1') ||
  126. (ph[strlen(ph)-1] == '2') ||
  127. (ph[strlen(ph)-1] == '0')))
  128. {
  129. stress = ph[strlen(ph)-1]-'0';
  130. nph = wstrdup(ph);
  131. nph[strlen(ph)-1] = '\0';
  132. return nph;
  133. }
  134. else
  135. return wstrdup(ph);
  136. }
  137. static int syl_breakable(LISP syl, LISP rest)
  138. {
  139. if (rest == NIL)
  140. return TRUE;
  141. else if (!syl_contains_vowel(rest))
  142. return FALSE; // must be a vowel remaining in rest
  143. else if (syl_contains_vowel(syl))
  144. {
  145. if (ph_is_vowel(get_c_string(car(rest))))
  146. return TRUE;
  147. else if (cdr(rest) == NIL)
  148. return FALSE;
  149. int p = ph_sonority(get_c_string(car(syl)));
  150. int n = ph_sonority(get_c_string(car(rest)));
  151. int nn = ph_sonority(get_c_string(car(cdr(rest))));
  152. if ((p <= n) && (n <= nn))
  153. return TRUE;
  154. else
  155. return FALSE;
  156. }
  157. else
  158. return FALSE;
  159. }
  160. static int syl_contains_vowel(LISP phones)
  161. {
  162. // So we can support "vowels" like ah2, oy2 (i.e. vowels with
  163. // stress markings) we need to make this a hack. Vowels are
  164. // assumed to start with one of aiueo
  165. LISP p;
  166. for (p=phones; p !=NIL; p=cdr(p))
  167. if (strchr("aiueoAIUEO",get_c_string(car(p))[0]) != NULL)
  168. return TRUE;
  169. else if (ph_is_vowel(get_c_string(car(p))))
  170. return TRUE;
  171. else if (ph_is_silence(get_c_string(car(p))))
  172. return FALSE;
  173. return FALSE;
  174. }