/native/external/espeak/src/setlengths.cpp
C++ | 634 lines | 491 code | 101 blank | 42 comment | 147 complexity | b109a09b160cc9f8c3726201a1696eda MD5 | raw file
1/*************************************************************************** 2 * Copyright (C) 2005 to 2007 by Jonathan Duddington * 3 * email: jonsd@users.sourceforge.net * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 3 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write see: * 17 * <http://www.gnu.org/licenses/>. * 18 ***************************************************************************/ 19 20#include "StdAfx.h" 21 22#include <stdlib.h> 23#include <stdio.h> 24#include <wctype.h> 25 26#include "speak_lib.h" 27#include "speech.h" 28#include "phoneme.h" 29#include "synthesize.h" 30#include "voice.h" 31#include "translate.h" 32 33extern int GetAmplitude(void); 34 35 36// convert from words-per-minute to internal speed factor 37static unsigned char speed_lookup[290] = { 38 250, 246, 243, 239, 236, // 80 39 233, 229, 226, 223, 220, // 85 40 217, 214, 211, 208, 205, // 90 41 202, 197, 194, 192, 190, // 95 42 187, 185, 183, 180, 178, // 100 43 176, 174, 172, 170, 168, // 105 44 166, 164, 161, 159, 158, // 110 45 156, 154, 152, 150, 148, // 115 46 146, 145, 143, 141, 137, // 120 47 136, 135, 133, 132, 131, // 125 48 129, 128, 127, 126, 125, // 130 49 124, 122, 121, 120, 119, // 135 50 117, 116, 115, 114, 113, // 140 51 112, 111, 110, 108, 107, // 145 52 106, 105, 104, 103, 102, // 150 53 101, 100, 99, 98, 97, // 155 54 96, 95, 93, 92, 92, // 160 55 91, 90, 89, 89, 88, // 165 56 87, 87, 86, 85, 85, // 170 57 84, 83, 83, 82, 81, // 175 58 80, 80, 79, 78, 78, // 180 59 77, 76, 76, 75, 73, // 185 60 72, 72, 71, 71, 70, // 190 61 70, 69, 69, 68, 67, // 195 62 67, 66, 66, 65, 65, // 200 63 64, 64, 63, 63, 62, // 205 64 62, 61, 60, 60, 59, // 210 65 59, 58, 58, 57, 57, // 215 66 56, 56, 55, 55, 55, // 220 67 54, 54, 53, 53, 52, // 225 68 52, 51, 51, 50, 50, // 230 69 49, 49, 49, 48, 48, // 235 70 47, 47, 46, 46, 46, // 240 71 45, 45, 44, 44, 43, // 245 72 43, 43, 42, 42, 41, // 250 73 41, 41, 40, 40, 39, // 255 74 39, 39, 38, 38, 38, // 260 75 37, 37, 37, 36, 36, // 265 76 35, 35, 35, 34, 34, // 270 77 34, 33, 33, 33, 32, // 275 78 32, 32, 32, 31, 31, // 280 79 31, 30, 30, 30, 29, // 285 80 29, 29, 29, 28, 28, // 290 81 28, 28, 27, 27, 27, // 295 82 26, 26, 26, 26, 25, // 300 83 25, 25, 22, 22, 22, // 305 84 22, 22, 22, 22, 22, // 310 85 21, 21, 21, 21, 21, // 315 86 21, 20, 20, 20, 20, // 320 87 20, 15, 15, 15, 15, // 325 88 15, 15, 15, 15, 16, // 330 89 16, 16, 16, 15, 15, // 335 90 15, 15, 15, 15, 15, // 340 91 15, 17, 17, 16, 16, // 345 92 15, 15, 14, 14, 13, // 350 93 13, 12, 12, 11, 11, // 355 94 10, 10, 9, 8, 8, // 360 95 7, 6, 5, 5, 4, // 365 96}; 97 98// speed_factor2 adjustments for speeds 370 to 390 99static unsigned char faster[] = { 100114,112,110,109,107,105,104,102,100,98, // 370-379 10196,94,92,90,88,85,83,80,78,75,72 }; //380-390 102 103static int speed1 = 130; 104static int speed2 = 121; 105static int speed3 = 118; 106 107 108 109void SetSpeed(int control) 110{//======================= 111 int x; 112 int s1; 113 int wpm; 114 int wpm2; 115 116 wpm = embedded_value[EMBED_S]; 117 if(control == 2) 118 wpm = embedded_value[EMBED_S2]; 119 wpm2 = wpm; 120 121 if(wpm > 369) wpm = 369; 122 if(wpm < 80) wpm = 80; 123 124 x = speed_lookup[wpm-80]; 125 126 if(control & 1) 127 { 128 // set speed factors for different syllable positions within a word 129 // these are used in CalcLengths() 130 speed1 = (x * voice->speedf1)/256; 131 speed2 = (x * voice->speedf2)/256; 132 speed3 = (x * voice->speedf3)/256; 133 } 134 135 if(control & 2) 136 { 137 // these are used in synthesis file 138 s1 = (x * voice->speedf1)/256; 139 speed_factor1 = (256 * s1)/115; // full speed adjustment, used for pause length 140if(speed_factor1 < 15) 141 speed_factor1 = 15; 142 if(wpm >= 170) 143// speed_factor2 = 100 + (166*s1)/128; // reduced speed adjustment, used for playing recorded sounds 144 speed_factor2 = 110 + (150*s1)/128; // reduced speed adjustment, used for playing recorded sounds 145 else 146 speed_factor2 = 128 + (128*s1)/130; // = 215 at 170 wpm 147 148 if(wpm2 > 369) 149 { 150 if(wpm2 > 390) 151 wpm2 = 390; 152 speed_factor2 = faster[wpm2 - 370]; 153 } 154 } 155 156 speed_min_sample_len = 450; 157} // end of SetSpeed 158 159 160#ifdef deleted 161void SetAmplitude(int amp) 162{//======================= 163 static unsigned char amplitude_factor[] = {0,5,6,7,9,11,14,17,21,26, 32, 38,44,50,56,63,70,77,84,91,100 }; 164 165 if((amp >= 0) && (amp <= 20)) 166 { 167 option_amplitude = (amplitude_factor[amp] * 480)/256; 168 } 169} 170#endif 171 172 173 174void SetParameter(int parameter, int value, int relative) 175{//====================================================== 176// parameter: reset-all, amp, pitch, speed, linelength, expression, capitals, number grouping 177// relative 0=absolute 1=relative 178 179 int new_value = value; 180 int default_value; 181 182 if(relative) 183 { 184 if(parameter < 5) 185 { 186 default_value = param_defaults[parameter]; 187 new_value = default_value + (default_value * value)/100; 188 } 189 } 190 param_stack[0].parameter[parameter] = new_value; 191 192 switch(parameter) 193 { 194 case espeakRATE: 195 embedded_value[EMBED_S] = new_value; 196 embedded_value[EMBED_S2] = new_value; 197 SetSpeed(3); 198 break; 199 200 case espeakVOLUME: 201 embedded_value[EMBED_A] = new_value; 202 GetAmplitude(); 203 break; 204 205 case espeakPITCH: 206 if(new_value > 99) new_value = 99; 207 if(new_value < 0) new_value = 0; 208 embedded_value[EMBED_P] = new_value; 209 break; 210 211 case espeakRANGE: 212 if(new_value > 99) new_value = 99; 213 embedded_value[EMBED_R] = new_value; 214 break; 215 216 case espeakLINELENGTH: 217 option_linelength = new_value; 218 break; 219 220 case espeakWORDGAP: 221 option_wordgap = new_value; 222 break; 223 224 case espeakINTONATION: 225 if((new_value & 0xff) != 0) 226 translator->langopts.intonation_group = new_value & 0xff; 227 option_tone_flags = new_value; 228 break; 229 230 default: 231 break; 232 } 233} // end of SetParameter 234 235 236 237static void DoEmbedded2(int &embix) 238{//================================ 239 // There were embedded commands in the text at this point 240 241 unsigned int word; 242 243 do { 244 word = embedded_list[embix++]; 245 246 if((word & 0x1f) == EMBED_S) 247 { 248 // speed 249 SetEmbedded(word & 0x7f, word >> 8); // adjusts embedded_value[EMBED_S] 250 SetSpeed(1); 251 } 252 } while((word & 0x80) == 0); 253} 254 255 256void Translator::CalcLengths() 257{//=========================== 258 int ix; 259 int ix2; 260 PHONEME_LIST *prev; 261 PHONEME_LIST *next; 262 PHONEME_LIST *next2; 263 PHONEME_LIST *next3; 264 PHONEME_LIST *p; 265 PHONEME_LIST *p2; 266 267 int stress; 268 int type; 269 static int more_syllables=0; 270 int pre_sonorant=0; 271 int pre_voiced=0; 272 int last_pitch = 0; 273 int pitch_start; 274 int length_mod; 275 int len; 276 int env2; 277 int end_of_clause; 278 int embedded_ix = 0; 279 int min_drop; 280 int emphasized; 281 unsigned char *pitch_env=NULL; 282 283 for(ix=1; ix<n_phoneme_list; ix++) 284 { 285 prev = &phoneme_list[ix-1]; 286 p = &phoneme_list[ix]; 287 stress = p->tone & 0x7; 288 emphasized = p->tone & 0x8; 289 290 next = &phoneme_list[ix+1]; 291 292 if(p->synthflags & SFLAG_EMBEDDED) 293 { 294 DoEmbedded2(embedded_ix); 295 } 296 297 type = p->type; 298 if(p->synthflags & SFLAG_SYLLABLE) 299 type = phVOWEL; 300 301 switch(type) 302 { 303 case phPAUSE: 304 last_pitch = 0; 305 break; 306 307 case phSTOP: 308 last_pitch = 0; 309 if(prev->type == phFRICATIVE) 310 p->prepause = 20; 311 else 312 if((more_syllables > 0) || (stress < 4)) 313 p->prepause = 40; 314 else 315 p->prepause = 60; 316 317 if(prev->type == phSTOP) 318 p->prepause = 60; 319 320 if((langopts.word_gap & 0x10) && (p->newword)) 321 p->prepause = 60; 322 323 if(p->synthflags & SFLAG_LENGTHEN) 324 p->prepause += langopts.long_stop; 325 break; 326 327 case phVFRICATIVE: 328 if(next->type==phVOWEL) 329 { 330 pre_voiced = 1; 331 } // drop through 332 case phFRICATIVE: 333 if(p->newword) 334 p->prepause = 15; 335 336 if(next->type==phPAUSE && prev->type==phNASAL && !(p->ph->phflags&phFORTIS)) 337 p->prepause = 25; 338 339 if(prev->ph->phflags & phBRKAFTER) 340 p->prepause = 30; 341 342 if((p->ph->phflags & phSIBILANT) && next->type==phSTOP && !next->newword) 343 { 344 if(prev->type == phVOWEL) 345 p->length = 200; // ?? should do this if it's from a prefix 346 else 347 p->length = 150; 348 } 349 else 350 p->length = 256; 351 352 if((langopts.word_gap & 0x10) && (p->newword)) 353 p->prepause = 30; 354 355 break; 356 357 case phVSTOP: 358 if(prev->type==phVFRICATIVE || prev->type==phFRICATIVE || (prev->ph->phflags & phSIBILANT) || (prev->type == phLIQUID)) 359 p->prepause = 30; 360 361 if(next->type==phVOWEL || next->type==phLIQUID) 362 { 363 if((next->type==phVOWEL) || !next->newword) 364 pre_voiced = 1; 365 366 p->prepause = 40; 367 368 if((prev->type == phPAUSE) || (prev->type == phVOWEL)) // || (prev->ph->mnemonic == ('/'*256+'r'))) 369 p->prepause = 0; 370 else 371 if(p->newword==0) 372 { 373 if(prev->type==phLIQUID) 374 p->prepause = 20; 375 if(prev->type==phNASAL) 376 p->prepause = 12; 377 378 if(prev->type==phSTOP && !(prev->ph->phflags & phFORTIS)) 379 p->prepause = 0; 380 } 381 } 382 if((langopts.word_gap & 0x10) && (p->newword) && (p->prepause < 20)) 383 p->prepause = 20; 384 385 break; 386 387 case phLIQUID: 388 case phNASAL: 389 p->amp = stress_amps[1]; // unless changed later 390 p->length = 256; // TEMPORARY 391 min_drop = 0; 392 393 if(p->newword) 394 { 395 if(prev->type==phLIQUID) 396 p->prepause = 25; 397 if(prev->type==phVOWEL) 398 p->prepause = 12; 399 } 400 401 if(next->type==phVOWEL) 402 { 403 pre_sonorant = 1; 404 } 405 else 406 if((prev->type==phVOWEL) || (prev->type == phLIQUID)) 407 { 408 p->length = prev->length; 409 p->pitch2 = last_pitch; 410 if(p->pitch2 < 7) 411 p->pitch2 = 7; 412 p->pitch1 = p->pitch2 - 8; 413 p->env = PITCHfall; 414 pre_voiced = 0; 415 416 if(p->type == phLIQUID) 417 { 418 p->length = speed1; 419//p->pitch1 = p->pitch2 - 20; // post vocalic [r/] 420 } 421 422 if(next->type == phVSTOP) 423 { 424 p->length = (p->length * 160)/100; 425 } 426 if(next->type == phVFRICATIVE) 427 { 428 p->length = (p->length * 120)/100; 429 } 430 } 431 else 432 { 433 p->pitch2 = last_pitch; 434 for(ix2=ix; ix2<n_phoneme_list; ix2++) 435 { 436 if(phoneme_list[ix2].type == phVOWEL) 437 { 438 p->pitch2 = phoneme_list[ix2].pitch2; 439 break; 440 } 441 } 442 p->pitch1 = p->pitch2-8; 443 p->env = PITCHfall; 444 pre_voiced = 0; 445 } 446 break; 447 448 case phVOWEL: 449 min_drop = 0; 450 next2 = &phoneme_list[ix+2]; 451 next3 = &phoneme_list[ix+3]; 452 453 if(stress > 7) stress = 7; 454 455 if(pre_sonorant) 456 p->amp = stress_amps[stress]-1; 457 else 458 p->amp = stress_amps[stress]; 459 460 if(emphasized) 461 p->amp = 25; 462 463 if(ix >= (n_phoneme_list-3)) 464 { 465 // last phoneme of a clause, limit its amplitude 466 if(p->amp > langopts.param[LOPT_MAXAMP_EOC]) 467 p->amp = langopts.param[LOPT_MAXAMP_EOC]; 468 } 469 470 // is the last syllable of a word ? 471 more_syllables=0; 472 end_of_clause = 0; 473 for(p2 = p+1; p2->newword== 0; p2++) 474 { 475 if((p2->type == phVOWEL) && !(p2->ph->phflags & phNONSYLLABIC)) 476 more_syllables++; 477 478 if(p2->ph->code == phonPAUSE_CLAUSE) 479 end_of_clause = 2; 480 } 481 if(p2->ph->code == phonPAUSE_CLAUSE) 482 end_of_clause = 2; 483 484 if((p2->newword & 2) && (more_syllables==0)) 485 { 486 end_of_clause = 2; 487 } 488 489 // calc length modifier 490 if(more_syllables==0) 491 { 492 len = langopts.length_mods0[next2->ph->length_mod *10+ next->ph->length_mod]; 493 494 if((next->newword) && (langopts.word_gap & 0x20)) 495 { 496 // consider as a pause + first phoneme of the next word 497 length_mod = (len + langopts.length_mods0[next->ph->length_mod *10+ 1])/2; 498 } 499 else 500 length_mod = len; 501 } 502 else 503 { 504 length_mod = langopts.length_mods[next2->ph->length_mod *10+ next->ph->length_mod]; 505 506 if((next->type == phNASAL) && (next2->type == phSTOP || next2->type == phVSTOP) && (next3->ph->phflags & phFORTIS)) 507 length_mod -= 15; 508 } 509 510 if(more_syllables==0) 511 length_mod *= speed1; 512 else 513 if(more_syllables==1) 514 length_mod *= speed2; 515 else 516 length_mod *= speed3; 517 518 length_mod = length_mod / 128; 519// if(length_mod < 9) 520// length_mod = 9; // restrict how much lengths can be reduced 521 if(length_mod < 8) 522 length_mod = 8; // restrict how much lengths can be reduced 523 524 if(stress >= 7) 525 { 526 // tonic syllable, include a constant component so it doesn't decrease directly with speed 527 length_mod += 20; 528 if(emphasized) 529 length_mod += 10; 530 } 531 else 532 if(emphasized) 533 { 534 length_mod += 20; 535 } 536 537 if((len = stress_lengths[stress]) == 0) 538 len = stress_lengths[6]; 539 540 length_mod = (length_mod * len)/128; 541 542 if(end_of_clause == 2) 543 { 544 // this is the last syllable in the clause, lengthen it - more for short vowels 545 length_mod = length_mod * (256 + (280 - p->ph->std_length)/3)/256; 546 } 547 548if(p->type != phVOWEL) 549{ 550 length_mod = 256; // syllabic consonant 551 min_drop = 8; 552} 553 p->length = length_mod; 554 555 // pre-vocalic part 556 // set last-pitch 557 env2 = p->env; 558 if(env2 > 1) env2++; // version for use with preceding semi-vowel 559 560 if(p->tone_ph != 0) 561 { 562 pitch_env = LookupEnvelope(phoneme_tab[p->tone_ph]->spect); 563 } 564 else 565 { 566 pitch_env = envelope_data[env2]; 567 } 568 569 pitch_start = p->pitch1 + ((p->pitch2-p->pitch1)*pitch_env[0])/256; 570 571 if(pre_sonorant || pre_voiced) 572 { 573 // set pitch for pre-vocalic part 574 if(pitch_start - last_pitch > 8) // was 9 575 last_pitch = pitch_start - 8; 576 577 prev->pitch1 = last_pitch; 578 prev->pitch2 = pitch_start; 579 if(last_pitch < pitch_start) 580 { 581 prev->env = PITCHrise; 582 p->env = env2; 583 } 584 else 585 { 586 prev->env = PITCHfall; 587 } 588 589 prev->length = length_mod; 590 591 prev->amp = p->amp; 592 if((prev->type != phLIQUID) && (prev->amp > 18)) 593 prev->amp = 18; 594 } 595 596 // vowel & post-vocalic part 597 next->synthflags &= ~SFLAG_SEQCONTINUE; 598 if(next->type == phNASAL && next2->type != phVOWEL) 599 next->synthflags |= SFLAG_SEQCONTINUE; 600 601 if(next->type == phLIQUID) 602 { 603 next->synthflags |= SFLAG_SEQCONTINUE; 604 605 if(next2->type == phVOWEL) 606 { 607 next->synthflags &= ~SFLAG_SEQCONTINUE; 608 } 609 610 if(next2->type != phVOWEL) 611 { 612 if(next->ph->mnemonic == ('/'*256+'r')) 613 { 614 next->synthflags &= ~SFLAG_SEQCONTINUE; 615// min_drop = 15; 616 } 617 } 618 } 619 620 if((min_drop > 0) && ((p->pitch2 - p->pitch1) < min_drop)) 621 { 622 p->pitch1 = p->pitch2 - min_drop; 623 if(p->pitch1 < 0) 624 p->pitch1 = 0; 625 } 626 627 last_pitch = p->pitch1 + ((p->pitch2-p->pitch1)*envelope_data[p->env][127])/256; 628 pre_sonorant = 0; 629 pre_voiced = 0; 630 break; 631 } 632 } 633} // end of CalcLengths 634