/native/external/espeak/src/setlengths.cpp

http://eyes-free.googlecode.com/ · C++ · 634 lines · 491 code · 101 blank · 42 comment · 150 complexity · b109a09b160cc9f8c3726201a1696eda MD5 · raw file

  1. /***************************************************************************
  2. * Copyright (C) 2005 to 2007 by Jonathan Duddington *
  3. * email: jonsd@users.sourceforge.net *
  4. * *
  5. * This program is free software; you can redistribute it and/or modify *
  6. * it under the terms of the GNU General Public License as published by *
  7. * the Free Software Foundation; either version 3 of the License, or *
  8. * (at your option) any later version. *
  9. * *
  10. * This program is distributed in the hope that it will be useful, *
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  13. * GNU General Public License for more details. *
  14. * *
  15. * You should have received a copy of the GNU General Public License *
  16. * along with this program; if not, write see: *
  17. * <http://www.gnu.org/licenses/>. *
  18. ***************************************************************************/
  19. #include "StdAfx.h"
  20. #include <stdlib.h>
  21. #include <stdio.h>
  22. #include <wctype.h>
  23. #include "speak_lib.h"
  24. #include "speech.h"
  25. #include "phoneme.h"
  26. #include "synthesize.h"
  27. #include "voice.h"
  28. #include "translate.h"
  29. extern int GetAmplitude(void);
  30. // convert from words-per-minute to internal speed factor
  31. static unsigned char speed_lookup[290] = {
  32. 250, 246, 243, 239, 236, // 80
  33. 233, 229, 226, 223, 220, // 85
  34. 217, 214, 211, 208, 205, // 90
  35. 202, 197, 194, 192, 190, // 95
  36. 187, 185, 183, 180, 178, // 100
  37. 176, 174, 172, 170, 168, // 105
  38. 166, 164, 161, 159, 158, // 110
  39. 156, 154, 152, 150, 148, // 115
  40. 146, 145, 143, 141, 137, // 120
  41. 136, 135, 133, 132, 131, // 125
  42. 129, 128, 127, 126, 125, // 130
  43. 124, 122, 121, 120, 119, // 135
  44. 117, 116, 115, 114, 113, // 140
  45. 112, 111, 110, 108, 107, // 145
  46. 106, 105, 104, 103, 102, // 150
  47. 101, 100, 99, 98, 97, // 155
  48. 96, 95, 93, 92, 92, // 160
  49. 91, 90, 89, 89, 88, // 165
  50. 87, 87, 86, 85, 85, // 170
  51. 84, 83, 83, 82, 81, // 175
  52. 80, 80, 79, 78, 78, // 180
  53. 77, 76, 76, 75, 73, // 185
  54. 72, 72, 71, 71, 70, // 190
  55. 70, 69, 69, 68, 67, // 195
  56. 67, 66, 66, 65, 65, // 200
  57. 64, 64, 63, 63, 62, // 205
  58. 62, 61, 60, 60, 59, // 210
  59. 59, 58, 58, 57, 57, // 215
  60. 56, 56, 55, 55, 55, // 220
  61. 54, 54, 53, 53, 52, // 225
  62. 52, 51, 51, 50, 50, // 230
  63. 49, 49, 49, 48, 48, // 235
  64. 47, 47, 46, 46, 46, // 240
  65. 45, 45, 44, 44, 43, // 245
  66. 43, 43, 42, 42, 41, // 250
  67. 41, 41, 40, 40, 39, // 255
  68. 39, 39, 38, 38, 38, // 260
  69. 37, 37, 37, 36, 36, // 265
  70. 35, 35, 35, 34, 34, // 270
  71. 34, 33, 33, 33, 32, // 275
  72. 32, 32, 32, 31, 31, // 280
  73. 31, 30, 30, 30, 29, // 285
  74. 29, 29, 29, 28, 28, // 290
  75. 28, 28, 27, 27, 27, // 295
  76. 26, 26, 26, 26, 25, // 300
  77. 25, 25, 22, 22, 22, // 305
  78. 22, 22, 22, 22, 22, // 310
  79. 21, 21, 21, 21, 21, // 315
  80. 21, 20, 20, 20, 20, // 320
  81. 20, 15, 15, 15, 15, // 325
  82. 15, 15, 15, 15, 16, // 330
  83. 16, 16, 16, 15, 15, // 335
  84. 15, 15, 15, 15, 15, // 340
  85. 15, 17, 17, 16, 16, // 345
  86. 15, 15, 14, 14, 13, // 350
  87. 13, 12, 12, 11, 11, // 355
  88. 10, 10, 9, 8, 8, // 360
  89. 7, 6, 5, 5, 4, // 365
  90. };
  91. // speed_factor2 adjustments for speeds 370 to 390
  92. static unsigned char faster[] = {
  93. 114,112,110,109,107,105,104,102,100,98, // 370-379
  94. 96,94,92,90,88,85,83,80,78,75,72 }; //380-390
  95. static int speed1 = 130;
  96. static int speed2 = 121;
  97. static int speed3 = 118;
  98. void SetSpeed(int control)
  99. {//=======================
  100. int x;
  101. int s1;
  102. int wpm;
  103. int wpm2;
  104. wpm = embedded_value[EMBED_S];
  105. if(control == 2)
  106. wpm = embedded_value[EMBED_S2];
  107. wpm2 = wpm;
  108. if(wpm > 369) wpm = 369;
  109. if(wpm < 80) wpm = 80;
  110. x = speed_lookup[wpm-80];
  111. if(control & 1)
  112. {
  113. // set speed factors for different syllable positions within a word
  114. // these are used in CalcLengths()
  115. speed1 = (x * voice->speedf1)/256;
  116. speed2 = (x * voice->speedf2)/256;
  117. speed3 = (x * voice->speedf3)/256;
  118. }
  119. if(control & 2)
  120. {
  121. // these are used in synthesis file
  122. s1 = (x * voice->speedf1)/256;
  123. speed_factor1 = (256 * s1)/115; // full speed adjustment, used for pause length
  124. if(speed_factor1 < 15)
  125. speed_factor1 = 15;
  126. if(wpm >= 170)
  127. // speed_factor2 = 100 + (166*s1)/128; // reduced speed adjustment, used for playing recorded sounds
  128. speed_factor2 = 110 + (150*s1)/128; // reduced speed adjustment, used for playing recorded sounds
  129. else
  130. speed_factor2 = 128 + (128*s1)/130; // = 215 at 170 wpm
  131. if(wpm2 > 369)
  132. {
  133. if(wpm2 > 390)
  134. wpm2 = 390;
  135. speed_factor2 = faster[wpm2 - 370];
  136. }
  137. }
  138. speed_min_sample_len = 450;
  139. } // end of SetSpeed
  140. #ifdef deleted
  141. void SetAmplitude(int amp)
  142. {//=======================
  143. static unsigned char amplitude_factor[] = {0,5,6,7,9,11,14,17,21,26, 32, 38,44,50,56,63,70,77,84,91,100 };
  144. if((amp >= 0) && (amp <= 20))
  145. {
  146. option_amplitude = (amplitude_factor[amp] * 480)/256;
  147. }
  148. }
  149. #endif
  150. void SetParameter(int parameter, int value, int relative)
  151. {//======================================================
  152. // parameter: reset-all, amp, pitch, speed, linelength, expression, capitals, number grouping
  153. // relative 0=absolute 1=relative
  154. int new_value = value;
  155. int default_value;
  156. if(relative)
  157. {
  158. if(parameter < 5)
  159. {
  160. default_value = param_defaults[parameter];
  161. new_value = default_value + (default_value * value)/100;
  162. }
  163. }
  164. param_stack[0].parameter[parameter] = new_value;
  165. switch(parameter)
  166. {
  167. case espeakRATE:
  168. embedded_value[EMBED_S] = new_value;
  169. embedded_value[EMBED_S2] = new_value;
  170. SetSpeed(3);
  171. break;
  172. case espeakVOLUME:
  173. embedded_value[EMBED_A] = new_value;
  174. GetAmplitude();
  175. break;
  176. case espeakPITCH:
  177. if(new_value > 99) new_value = 99;
  178. if(new_value < 0) new_value = 0;
  179. embedded_value[EMBED_P] = new_value;
  180. break;
  181. case espeakRANGE:
  182. if(new_value > 99) new_value = 99;
  183. embedded_value[EMBED_R] = new_value;
  184. break;
  185. case espeakLINELENGTH:
  186. option_linelength = new_value;
  187. break;
  188. case espeakWORDGAP:
  189. option_wordgap = new_value;
  190. break;
  191. case espeakINTONATION:
  192. if((new_value & 0xff) != 0)
  193. translator->langopts.intonation_group = new_value & 0xff;
  194. option_tone_flags = new_value;
  195. break;
  196. default:
  197. break;
  198. }
  199. } // end of SetParameter
  200. static void DoEmbedded2(int &embix)
  201. {//================================
  202. // There were embedded commands in the text at this point
  203. unsigned int word;
  204. do {
  205. word = embedded_list[embix++];
  206. if((word & 0x1f) == EMBED_S)
  207. {
  208. // speed
  209. SetEmbedded(word & 0x7f, word >> 8); // adjusts embedded_value[EMBED_S]
  210. SetSpeed(1);
  211. }
  212. } while((word & 0x80) == 0);
  213. }
  214. void Translator::CalcLengths()
  215. {//===========================
  216. int ix;
  217. int ix2;
  218. PHONEME_LIST *prev;
  219. PHONEME_LIST *next;
  220. PHONEME_LIST *next2;
  221. PHONEME_LIST *next3;
  222. PHONEME_LIST *p;
  223. PHONEME_LIST *p2;
  224. int stress;
  225. int type;
  226. static int more_syllables=0;
  227. int pre_sonorant=0;
  228. int pre_voiced=0;
  229. int last_pitch = 0;
  230. int pitch_start;
  231. int length_mod;
  232. int len;
  233. int env2;
  234. int end_of_clause;
  235. int embedded_ix = 0;
  236. int min_drop;
  237. int emphasized;
  238. unsigned char *pitch_env=NULL;
  239. for(ix=1; ix<n_phoneme_list; ix++)
  240. {
  241. prev = &phoneme_list[ix-1];
  242. p = &phoneme_list[ix];
  243. stress = p->tone & 0x7;
  244. emphasized = p->tone & 0x8;
  245. next = &phoneme_list[ix+1];
  246. if(p->synthflags & SFLAG_EMBEDDED)
  247. {
  248. DoEmbedded2(embedded_ix);
  249. }
  250. type = p->type;
  251. if(p->synthflags & SFLAG_SYLLABLE)
  252. type = phVOWEL;
  253. switch(type)
  254. {
  255. case phPAUSE:
  256. last_pitch = 0;
  257. break;
  258. case phSTOP:
  259. last_pitch = 0;
  260. if(prev->type == phFRICATIVE)
  261. p->prepause = 20;
  262. else
  263. if((more_syllables > 0) || (stress < 4))
  264. p->prepause = 40;
  265. else
  266. p->prepause = 60;
  267. if(prev->type == phSTOP)
  268. p->prepause = 60;
  269. if((langopts.word_gap & 0x10) && (p->newword))
  270. p->prepause = 60;
  271. if(p->synthflags & SFLAG_LENGTHEN)
  272. p->prepause += langopts.long_stop;
  273. break;
  274. case phVFRICATIVE:
  275. if(next->type==phVOWEL)
  276. {
  277. pre_voiced = 1;
  278. } // drop through
  279. case phFRICATIVE:
  280. if(p->newword)
  281. p->prepause = 15;
  282. if(next->type==phPAUSE && prev->type==phNASAL && !(p->ph->phflags&phFORTIS))
  283. p->prepause = 25;
  284. if(prev->ph->phflags & phBRKAFTER)
  285. p->prepause = 30;
  286. if((p->ph->phflags & phSIBILANT) && next->type==phSTOP && !next->newword)
  287. {
  288. if(prev->type == phVOWEL)
  289. p->length = 200; // ?? should do this if it's from a prefix
  290. else
  291. p->length = 150;
  292. }
  293. else
  294. p->length = 256;
  295. if((langopts.word_gap & 0x10) && (p->newword))
  296. p->prepause = 30;
  297. break;
  298. case phVSTOP:
  299. if(prev->type==phVFRICATIVE || prev->type==phFRICATIVE || (prev->ph->phflags & phSIBILANT) || (prev->type == phLIQUID))
  300. p->prepause = 30;
  301. if(next->type==phVOWEL || next->type==phLIQUID)
  302. {
  303. if((next->type==phVOWEL) || !next->newword)
  304. pre_voiced = 1;
  305. p->prepause = 40;
  306. if((prev->type == phPAUSE) || (prev->type == phVOWEL)) // || (prev->ph->mnemonic == ('/'*256+'r')))
  307. p->prepause = 0;
  308. else
  309. if(p->newword==0)
  310. {
  311. if(prev->type==phLIQUID)
  312. p->prepause = 20;
  313. if(prev->type==phNASAL)
  314. p->prepause = 12;
  315. if(prev->type==phSTOP && !(prev->ph->phflags & phFORTIS))
  316. p->prepause = 0;
  317. }
  318. }
  319. if((langopts.word_gap & 0x10) && (p->newword) && (p->prepause < 20))
  320. p->prepause = 20;
  321. break;
  322. case phLIQUID:
  323. case phNASAL:
  324. p->amp = stress_amps[1]; // unless changed later
  325. p->length = 256; // TEMPORARY
  326. min_drop = 0;
  327. if(p->newword)
  328. {
  329. if(prev->type==phLIQUID)
  330. p->prepause = 25;
  331. if(prev->type==phVOWEL)
  332. p->prepause = 12;
  333. }
  334. if(next->type==phVOWEL)
  335. {
  336. pre_sonorant = 1;
  337. }
  338. else
  339. if((prev->type==phVOWEL) || (prev->type == phLIQUID))
  340. {
  341. p->length = prev->length;
  342. p->pitch2 = last_pitch;
  343. if(p->pitch2 < 7)
  344. p->pitch2 = 7;
  345. p->pitch1 = p->pitch2 - 8;
  346. p->env = PITCHfall;
  347. pre_voiced = 0;
  348. if(p->type == phLIQUID)
  349. {
  350. p->length = speed1;
  351. //p->pitch1 = p->pitch2 - 20; // post vocalic [r/]
  352. }
  353. if(next->type == phVSTOP)
  354. {
  355. p->length = (p->length * 160)/100;
  356. }
  357. if(next->type == phVFRICATIVE)
  358. {
  359. p->length = (p->length * 120)/100;
  360. }
  361. }
  362. else
  363. {
  364. p->pitch2 = last_pitch;
  365. for(ix2=ix; ix2<n_phoneme_list; ix2++)
  366. {
  367. if(phoneme_list[ix2].type == phVOWEL)
  368. {
  369. p->pitch2 = phoneme_list[ix2].pitch2;
  370. break;
  371. }
  372. }
  373. p->pitch1 = p->pitch2-8;
  374. p->env = PITCHfall;
  375. pre_voiced = 0;
  376. }
  377. break;
  378. case phVOWEL:
  379. min_drop = 0;
  380. next2 = &phoneme_list[ix+2];
  381. next3 = &phoneme_list[ix+3];
  382. if(stress > 7) stress = 7;
  383. if(pre_sonorant)
  384. p->amp = stress_amps[stress]-1;
  385. else
  386. p->amp = stress_amps[stress];
  387. if(emphasized)
  388. p->amp = 25;
  389. if(ix >= (n_phoneme_list-3))
  390. {
  391. // last phoneme of a clause, limit its amplitude
  392. if(p->amp > langopts.param[LOPT_MAXAMP_EOC])
  393. p->amp = langopts.param[LOPT_MAXAMP_EOC];
  394. }
  395. // is the last syllable of a word ?
  396. more_syllables=0;
  397. end_of_clause = 0;
  398. for(p2 = p+1; p2->newword== 0; p2++)
  399. {
  400. if((p2->type == phVOWEL) && !(p2->ph->phflags & phNONSYLLABIC))
  401. more_syllables++;
  402. if(p2->ph->code == phonPAUSE_CLAUSE)
  403. end_of_clause = 2;
  404. }
  405. if(p2->ph->code == phonPAUSE_CLAUSE)
  406. end_of_clause = 2;
  407. if((p2->newword & 2) && (more_syllables==0))
  408. {
  409. end_of_clause = 2;
  410. }
  411. // calc length modifier
  412. if(more_syllables==0)
  413. {
  414. len = langopts.length_mods0[next2->ph->length_mod *10+ next->ph->length_mod];
  415. if((next->newword) && (langopts.word_gap & 0x20))
  416. {
  417. // consider as a pause + first phoneme of the next word
  418. length_mod = (len + langopts.length_mods0[next->ph->length_mod *10+ 1])/2;
  419. }
  420. else
  421. length_mod = len;
  422. }
  423. else
  424. {
  425. length_mod = langopts.length_mods[next2->ph->length_mod *10+ next->ph->length_mod];
  426. if((next->type == phNASAL) && (next2->type == phSTOP || next2->type == phVSTOP) && (next3->ph->phflags & phFORTIS))
  427. length_mod -= 15;
  428. }
  429. if(more_syllables==0)
  430. length_mod *= speed1;
  431. else
  432. if(more_syllables==1)
  433. length_mod *= speed2;
  434. else
  435. length_mod *= speed3;
  436. length_mod = length_mod / 128;
  437. // if(length_mod < 9)
  438. // length_mod = 9; // restrict how much lengths can be reduced
  439. if(length_mod < 8)
  440. length_mod = 8; // restrict how much lengths can be reduced
  441. if(stress >= 7)
  442. {
  443. // tonic syllable, include a constant component so it doesn't decrease directly with speed
  444. length_mod += 20;
  445. if(emphasized)
  446. length_mod += 10;
  447. }
  448. else
  449. if(emphasized)
  450. {
  451. length_mod += 20;
  452. }
  453. if((len = stress_lengths[stress]) == 0)
  454. len = stress_lengths[6];
  455. length_mod = (length_mod * len)/128;
  456. if(end_of_clause == 2)
  457. {
  458. // this is the last syllable in the clause, lengthen it - more for short vowels
  459. length_mod = length_mod * (256 + (280 - p->ph->std_length)/3)/256;
  460. }
  461. if(p->type != phVOWEL)
  462. {
  463. length_mod = 256; // syllabic consonant
  464. min_drop = 8;
  465. }
  466. p->length = length_mod;
  467. // pre-vocalic part
  468. // set last-pitch
  469. env2 = p->env;
  470. if(env2 > 1) env2++; // version for use with preceding semi-vowel
  471. if(p->tone_ph != 0)
  472. {
  473. pitch_env = LookupEnvelope(phoneme_tab[p->tone_ph]->spect);
  474. }
  475. else
  476. {
  477. pitch_env = envelope_data[env2];
  478. }
  479. pitch_start = p->pitch1 + ((p->pitch2-p->pitch1)*pitch_env[0])/256;
  480. if(pre_sonorant || pre_voiced)
  481. {
  482. // set pitch for pre-vocalic part
  483. if(pitch_start - last_pitch > 8) // was 9
  484. last_pitch = pitch_start - 8;
  485. prev->pitch1 = last_pitch;
  486. prev->pitch2 = pitch_start;
  487. if(last_pitch < pitch_start)
  488. {
  489. prev->env = PITCHrise;
  490. p->env = env2;
  491. }
  492. else
  493. {
  494. prev->env = PITCHfall;
  495. }
  496. prev->length = length_mod;
  497. prev->amp = p->amp;
  498. if((prev->type != phLIQUID) && (prev->amp > 18))
  499. prev->amp = 18;
  500. }
  501. // vowel & post-vocalic part
  502. next->synthflags &= ~SFLAG_SEQCONTINUE;
  503. if(next->type == phNASAL && next2->type != phVOWEL)
  504. next->synthflags |= SFLAG_SEQCONTINUE;
  505. if(next->type == phLIQUID)
  506. {
  507. next->synthflags |= SFLAG_SEQCONTINUE;
  508. if(next2->type == phVOWEL)
  509. {
  510. next->synthflags &= ~SFLAG_SEQCONTINUE;
  511. }
  512. if(next2->type != phVOWEL)
  513. {
  514. if(next->ph->mnemonic == ('/'*256+'r'))
  515. {
  516. next->synthflags &= ~SFLAG_SEQCONTINUE;
  517. // min_drop = 15;
  518. }
  519. }
  520. }
  521. if((min_drop > 0) && ((p->pitch2 - p->pitch1) < min_drop))
  522. {
  523. p->pitch1 = p->pitch2 - min_drop;
  524. if(p->pitch1 < 0)
  525. p->pitch1 = 0;
  526. }
  527. last_pitch = p->pitch1 + ((p->pitch2-p->pitch1)*envelope_data[p->env][127])/256;
  528. pre_sonorant = 0;
  529. pre_voiced = 0;
  530. break;
  531. }
  532. }
  533. } // end of CalcLengths