PageRenderTime 54ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/tts.scm

https://gitlab.com/generic-library/festival
Scheme | 304 lines | 228 code | 28 blank | 48 comment | 8 complexity | 5fb4441bc4f014d531df8f72bd36dd7c MD5 | raw file
  1. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  2. ;;; ;;
  3. ;;; Centre for Speech Technology Research ;;
  4. ;;; University of Edinburgh, UK ;;
  5. ;;; Copyright (c) 1996,1997 ;;
  6. ;;; All Rights Reserved. ;;
  7. ;;; ;;
  8. ;;; Permission is hereby granted, free of charge, to use and distribute ;;
  9. ;;; this software and its documentation without restriction, including ;;
  10. ;;; without limitation the rights to use, copy, modify, merge, publish, ;;
  11. ;;; distribute, sublicense, and/or sell copies of this work, and to ;;
  12. ;;; permit persons to whom this work is furnished to do so, subject to ;;
  13. ;;; the following conditions: ;;
  14. ;;; 1. The code must retain the above copyright notice, this list of ;;
  15. ;;; conditions and the following disclaimer. ;;
  16. ;;; 2. Any modifications must be clearly marked as such. ;;
  17. ;;; 3. Original authors' names are not deleted. ;;
  18. ;;; 4. The authors' names are not used to endorse or promote products ;;
  19. ;;; derived from this software without specific prior written ;;
  20. ;;; permission. ;;
  21. ;;; ;;
  22. ;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
  23. ;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
  24. ;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
  25. ;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
  26. ;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
  27. ;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
  28. ;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
  29. ;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
  30. ;;; THIS SOFTWARE. ;;
  31. ;;; ;;
  32. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  33. ;;;
  34. ;;; Various tts functions and hooks
  35. ;;; Once the utterance is built these functions synth and play it
  36. (defvar tts_hooks (list utt.synth utt.play)
  37. "tts_hooks
  38. Function or list of functions to be called during text to speech.
  39. The function tts_file, chunks data into Utterances of type Token and
  40. applies this hook to the utterance. This typically contains the utt.synth
  41. function and utt.play. [see TTS]")
  42. ;;; This is used to define utterance breaks in tts on files
  43. (defvar eou_tree
  44. '((lisp_max_num_tokens > 200)
  45. ((1))
  46. ((n.whitespace matches ".*\n.*\n\\(.\\|\n\\)*");; significant break (2 nls)
  47. ((1))
  48. ((name matches "--+")
  49. ((1))
  50. ((punc matches ".*[\\?:!;].*")
  51. ((1))
  52. ((punc matches ".*\\..*")
  53. ((punc matches "..+");; longer punctuation string
  54. ((punc matches "\\..*,") ;; for U.S.S.R., like tokens
  55. ((0))
  56. ((1)))
  57. ;; This is to distinguish abbreviations vs periods
  58. ;; These are heuristics
  59. ((name matches "\\(.*\\..*\\|[A-Z][A-Za-z]?[A-Za-z]?\\|etc\\)");; an abbreviation
  60. ((n.whitespace is " ")
  61. ((0));; if abbrev single space isn't enough for break
  62. ((n.name matches "[A-Z].*")
  63. ((1))
  64. ((0))))
  65. ((n.whitespace is " ");; if it doesn't look like an abbreviation
  66. ((n.name matches "[A-Z].*");; single space and non-cap is no break
  67. ((1))
  68. ((0)))
  69. ((1)))))
  70. ((0)))))))
  71. "eou_tree
  72. End of utterance tree. A decision tree used to determine if the given
  73. token marks the end of an utterance. It may look one token ahead to
  74. do this. [see Utterance chunking]")
  75. (define (max_num_tokens x)
  76. "(num_tokens x)
  77. This is probably controversial, but its good to have a maximum number
  78. of tokens in an utterance. You really dont want to wait on very long
  79. utterances, some utts can be thousands of words long, these maybe
  80. shouldn't be spoken, but we do have to deal with them."
  81. (let ((c 1) (y x))
  82. (while y
  83. (set! c (+ 1 c))
  84. (set! y (item.prev y)))
  85. c))
  86. ;;; The program used to parse stml files
  87. ;;; Needs version 1.0 to allow -D option to work
  88. (defvar sgml_parse_progname "nsgmls-1.0"
  89. "sgml_parse_progname
  90. The name of the program to use to parse SGML files. Typically this is
  91. nsgml-1.0 from the sp SGML package. [see XML/SGML requirements]")
  92. ;;; When PHRASE elements are specified in an utterance in STML
  93. ;;; no other method for phrase prediction is to be used, so we
  94. ;;; use the following tree
  95. (set! stml_phrase_cart_tree
  96. '((R:Token.parent.pbreak is B)
  97. ((B))
  98. ((n.name is 0)
  99. ((B))
  100. ((NB)))))
  101. (define (xxml_synth utt)
  102. "(xxml_synth UTT)
  103. This applies the xxml_hooks (mode specific) and tts_hooks to the
  104. given utterance. This function should be called from xxml element
  105. definitions that signal an utterance boundary."
  106. (cond
  107. ((or (not utt)
  108. (not (utt.relation utt 'Token))) ;; no tokens
  109. nil)
  110. (t
  111. (apply_hooks xxml_hooks utt)
  112. (apply_hooks tts_hooks utt)
  113. (set! utt nil) ;; not enough ...
  114. (gc)
  115. utt))
  116. )
  117. (define (xxml_attval ATTNAME ATTLIST)
  118. "(xxml_attval ATTNAME ATTLIST)
  119. Returns attribute value of ATTNAME in ATTLIST or nil if it doesn't
  120. exists."
  121. (cond
  122. ((not ATTLIST)
  123. nil)
  124. ((string-equal ATTNAME (car (car ATTLIST)))
  125. (car (cdr (car ATTLIST))))
  126. (t
  127. (xxml_attval ATTNAME (cdr ATTLIST)))))
  128. (defvar xxml_word_features nil
  129. "xxml_word_features
  130. An assoc list of features to be added to the current word when
  131. in xxml parse mode.")
  132. (defvar xxml_token_hooks nil
  133. "xxml_token_hooks
  134. Functions to apply to each token.")
  135. (defvar xxml_hooks nil
  136. "xxml_hooks
  137. Function or list of functions to be applied to an utterance when
  138. parsed with xxML, before tts_hooks.")
  139. (defvar xxml_elements nil
  140. "xxml_elements
  141. List of Scheme actions to perform on finding xxML tags.")
  142. (defvar xml_dtd_dir libdir
  143. "xml_dtd_dir
  144. The directory holding standard DTD form the xml parser.")
  145. (set! tts_fnum 1)
  146. (define (save_tts_output utt)
  147. (let ((fname (string-append "tts_file_" tts_fnum ".wav")))
  148. (format stderr "festival: saving waveform in %s\n" fname)
  149. (utt.save.wave utt fname)
  150. (set! tts_fnum (+ 1 tts_fnum))
  151. utt))
  152. (define (save_waves_during_tts)
  153. "(save_waves_during_tts)
  154. Save each waveform in the current directory in files \"tts_file_XXX.wav\".
  155. use (save_waves_during_tts_STOP) to stop saving waveforms"
  156. (if (not (member save_tts_output tts_hooks))
  157. (set! tts_hooks (append tts_hooks (list save_tts_output))))
  158. t)
  159. (define (save_waves_during_tts_STOP)
  160. "(save_waves_during_tts_STOP)
  161. Stop saving waveforms when doing tts."
  162. (if (member save_tts_output tts_hooks)
  163. (set! tts_hooks (delq save_tts_output tts_hooks)))
  164. t)
  165. (define (tts file mode)
  166. "(tts FILE MODE)
  167. Convert FILE to speech. MODE identifies any special treatment
  168. necessary for FILE. This is simply a front end to tts_file but
  169. puts the system in async audio mode first. [see TTS]"
  170. (audio_mode 'async)
  171. (if mode
  172. (tts_file file mode)
  173. (tts_file file (tts_find_text_mode file auto-text-mode-alist)))
  174. ;; (audio_mode 'sync) ;; Hmm this is probably bad
  175. )
  176. (define (tts_text string mode)
  177. "(tts_text STRING mode)
  178. Apply tts on given string. That is, segment it into utterances and
  179. apply tts_hooks to each utterance. This is naively done by saving the
  180. string to a file and calling tts_file on that file. This differs from
  181. SayText which constructs a single utterance for the whole given text."
  182. (let ((tmpfile (make_tmp_filename))
  183. (fd))
  184. (set! fd (fopen tmpfile "wb"))
  185. (format fd "%s" string)
  186. (fclose fd)
  187. (audio_mode 'async)
  188. (tts_file tmpfile mode)
  189. (delete-file tmpfile)))
  190. (define (save_record_wave utt)
  191. "Saves the waveform and records its so it can be joined into a
  192. a single waveform at the end."
  193. (let ((fn (make_tmp_filename)))
  194. (utt.save.wave utt fn)
  195. (set! wavefiles (cons fn wavefiles))
  196. utt))
  197. (define (combine_waves)
  198. "(combine_waves)
  199. Join all the waves together into the desired output file
  200. and delete the intermediate ones."
  201. (let ((wholeutt (Utterance Text "")))
  202. (mapcar
  203. (lambda (d)
  204. (utt.import.wave wholeutt d t)
  205. (delete-file d))
  206. (reverse wavefiles))
  207. wholeutt))
  208. (define (tts_textall string mode)
  209. "(tts_textall STRING MODE)
  210. Apply tts to STRING. This function is specifically designed for
  211. use in server mode so a single function call may synthesize the string.
  212. This function name maybe added to the server safe functions."
  213. (if (not (string-equal mode "nil"))
  214. (begin
  215. ;; a mode has been specified so do something different
  216. (let ((tmpfile (make_tmp_filename))
  217. (fd))
  218. (set! fd (fopen tmpfile "wb"))
  219. (format fd "%s" string)
  220. (fclose fd)
  221. (set! tts_hooks (list utt.synth save_record_wave))
  222. (set! wavefiles nil)
  223. (tts_file tmpfile mode)
  224. (delete-file tmpfile)
  225. (utt.send.wave.client (combine_waves))
  226. ))
  227. ;; Simple fundamental mode
  228. (utt.send.wave.client
  229. (utt.synth
  230. (eval (list 'Utterance 'Text string))))))
  231. ;; Function to interface with app_festival for asterisk
  232. ;; See http://www.asterisk.org
  233. (define (tts_textasterisk string mode)
  234. "(tts_textasterisk STRING MODE)
  235. Apply tts to STRING. This function is specifically designed for
  236. use in server mode so a single function call may synthesize the string.
  237. This function name may be added to the server safe functions."
  238. (utt.send.wave.asterisk
  239. (utt.synth
  240. (eval (list 'Utterance 'Text string)))))
  241. (define (tts_return_to_client)
  242. "(tts_return_to_client)
  243. This function is called by clients who wish to return waveforms of
  244. their text samples asynchronously. This replaces utt.play in tts_hooks
  245. with utt.send.wave.client."
  246. (if (not (member utt.send.wave.client tts_hooks))
  247. (set! tts_hooks
  248. (append (delq utt.play tts_hooks)
  249. (list utt.send.wave.client)))))
  250. (defvar tts_text_modes nil
  251. "tts_text_modes
  252. An a-list of text modes data for file type specific tts functions.
  253. See the manual for an example. [see Text modes]")
  254. (define (tts_find_text_mode file alist)
  255. "(find_text_mode FILE ALIST)
  256. Search through ALIST for one that matches FILE. Returns nil if
  257. nothing macthes."
  258. (cond
  259. ((null alist) nil) ;; can't find a match
  260. ((string-matches file (string-append ".*" (car (car alist)) ".*"))
  261. (cdr (car alist)))
  262. (t
  263. (tts_find_text_mode file (cdr alist)))))
  264. (defvar auto-text-mode-alist
  265. (list
  266. (cons "\\.sable$" 'sable)
  267. (cons "\\.ogi" 'ogimarkup)
  268. (cons "\\.email" 'email)
  269. (cons "" 'fundamental)
  270. )
  271. "auto-text-mode-alist
  272. Following Emacs' auto-mode-alist thios provides a mechanism for auto
  273. selecting a TTS text mode based on the filename being analyzed. Its
  274. format is exactly the same as Emacs in that it consists of an alist of
  275. dotted pairs of regular expression and text mode name.")
  276. (provide 'tts)