/native/external/espeak/src/synth_mbrola.cpp
C++ | 753 lines | 581 code | 127 blank | 45 comment | 131 complexity | d89e79291fddd0e500a5c60f1c9ee4f9 MD5 | raw file
1/*************************************************************************** 2 * Copyright (C) 2005 to 2007 by Jonathan Duddington * 3 * email: jonsd@users.sourceforge.net * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 3 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write see: * 17 * <http://www.gnu.org/licenses/>. * 18 ***************************************************************************/ 19 20#include "StdAfx.h" 21 22#include <stdio.h> 23#include <ctype.h> 24#include <wctype.h> 25#include <stdlib.h> 26#include <string.h> 27#include <math.h> 28 29#include "speak_lib.h" 30#include "speech.h" 31#include "phoneme.h" 32#include "synthesize.h" 33#include "translate.h" 34#include "voice.h" 35 36extern int Read4Bytes(FILE *f); 37extern void SetPitch2(voice_t *voice, int pitch1, int pitch2, int *pitch_base, int *pitch_range); 38 39#ifdef USE_MBROLA_LIB 40 41extern unsigned char *outbuf; 42 43#ifndef PLATFORM_WINDOWS 44 45#include "mbrolib.h" 46void * mb_handle; 47 48#else 49#include <windows.h> 50typedef void (WINAPI *PROCVV)(void); 51typedef void (WINAPI *PROCVI)(int); 52typedef void (WINAPI *PROCVF)(float); 53typedef int (WINAPI *PROCIV)(); 54typedef int (WINAPI *PROCIC) (char *); 55typedef int (WINAPI *PROCISI)(short *,int); 56typedef char* (WINAPI *PROCVCI)(char *,int); 57 58PROCIC init_MBR; 59PROCIC write_MBR; 60PROCIV flush_MBR; 61PROCISI read_MBR; 62PROCVV close_MBR; 63PROCVV reset_MBR; 64PROCIV lastError_MBR; 65PROCVCI lastErrorStr_MBR; 66PROCVI setNoError_MBR; 67PROCVI setFreq_MBR; 68PROCVF setVolumeRatio_MBR; 69 70 71 72HINSTANCE hinstDllMBR = NULL; 73 74 75BOOL load_MBR() 76{ 77 if(hinstDllMBR != NULL) 78 return TRUE; // already loaded 79 80 if (!(hinstDllMBR=LoadLibrary("mbrola.dll"))) 81 return FALSE; 82 init_MBR =(PROCIC) GetProcAddress(hinstDllMBR,"init_MBR"); 83 write_MBR =(PROCIC) GetProcAddress(hinstDllMBR,"write_MBR"); 84 flush_MBR =(PROCIV) GetProcAddress(hinstDllMBR,"flush_MBR"); 85 read_MBR =(PROCISI) GetProcAddress(hinstDllMBR,"read_MBR"); 86 close_MBR =(PROCVV) GetProcAddress(hinstDllMBR,"close_MBR"); 87 reset_MBR =(PROCVV) GetProcAddress(hinstDllMBR,"reset_MBR"); 88 lastError_MBR =(PROCIV) GetProcAddress(hinstDllMBR,"lastError_MBR"); 89 lastErrorStr_MBR =(PROCVCI) GetProcAddress(hinstDllMBR,"lastErrorStr_MBR"); 90 setNoError_MBR =(PROCVI) GetProcAddress(hinstDllMBR,"setNoError_MBR"); 91 setVolumeRatio_MBR =(PROCVF) GetProcAddress(hinstDllMBR,"setVolumeRatio_MBR"); 92 return TRUE; 93} 94 95 96void unload_MBR() 97{ 98 if (hinstDllMBR) 99 { 100 FreeLibrary (hinstDllMBR); 101 hinstDllMBR=NULL; 102 } 103} 104 105#endif // windows 106#endif // USE_MBROLA_LIB 107 108 109MBROLA_TAB *mbrola_tab = NULL; 110int mbrola_control = 0; 111 112 113 114 115espeak_ERROR LoadMbrolaTable(const char *mbrola_voice, const char *phtrans, int srate) 116{//=================================================================================== 117// Load a phoneme name translation table from espeak-data/mbrola 118 119 int size; 120 int ix; 121 int *pw; 122 FILE *f_in; 123 char path[sizeof(path_home)+15]; 124 125 mbrola_name[0] = 0; 126 if(mbrola_voice == NULL) 127 { 128 samplerate = samplerate_native; 129 SetParameter(espeakVOICETYPE,0,0); 130 return(EE_OK); 131 } 132 133 sprintf(path,"%s/mbrola/%s",path_home,mbrola_voice); 134#ifdef USE_MBROLA_LIB 135#ifdef PLATFORM_WINDOWS 136 if(load_MBR() == FALSE) // load mbrola.dll 137 return(EE_INTERNAL_ERROR); 138 139 if(init_MBR(path) != 0) // initialise the required mbrola voice 140 return(EE_NOT_FOUND); 141 142 setNoError_MBR(1); // don't stop on phoneme errors 143#else 144 mb_handle = mbrolib_init(srate); 145 mbrolib_parameter m_parameters; 146 147 if(mb_handle == NULL) 148 return(EE_INTERNAL_ERROR); 149 150 MBROLIB_ERROR a_status = mbrolib_set_voice(mb_handle, mbrola_voice); 151 if(a_status != MBROLIB_OK) 152 return(EE_NOT_FOUND); 153#endif // not windows 154#endif // USE_MBROLA_LIB 155 156 // read eSpeak's mbrola phoneme translation data, eg. en1_phtrans 157 sprintf(path,"%s/mbrola_ph/%s",path_home,phtrans); 158 size = GetFileLength(path); 159 if((f_in = fopen(path,"r")) == NULL) 160 return(EE_NOT_FOUND); 161 162 if((mbrola_tab = (MBROLA_TAB *)realloc(mbrola_tab,size)) == NULL) 163 { 164 fclose(f_in); 165 return(EE_INTERNAL_ERROR); 166 } 167 168 mbrola_control = Read4Bytes(f_in); 169 pw = (int *)mbrola_tab; 170 for(ix=4; ix<size; ix+=4) 171 { 172 *pw++ = Read4Bytes(f_in); 173 } 174 fread(mbrola_tab,size,1,f_in); 175 fclose(f_in); 176 177 178#ifdef USE_MBROLA_LIB 179#ifdef PLATFORM_WINDOWS 180 setVolumeRatio_MBR((float)(mbrola_control & 0xff) /16.0); 181#else 182 mbrolib_get_parameter(mb_handle,&m_parameters); 183 m_parameters.ignore_error = 1; 184 m_parameters.volume_ratio = (float)(mbrola_control & 0xff) /16.0; 185 mbrolib_set_parameter(mb_handle,&m_parameters); 186#endif // not windows 187#endif // USE_MBROLA_LIB 188 189 option_quiet = 1; 190 samplerate = srate; 191 if(srate == 22050) 192 SetParameter(espeakVOICETYPE,0,0); 193 else 194 SetParameter(espeakVOICETYPE,1,0); 195 strcpy(mbrola_name,mbrola_voice); 196 return(EE_OK); 197} // end of LoadMbrolaTable 198 199 200int GetMbrName(PHONEME_LIST *plist, PHONEME_TAB *ph, PHONEME_TAB *ph_prev, PHONEME_TAB *ph_next, int *name2, int *split, int *control) 201{//============================================================================================================== 202// Look up a phoneme in the mbrola phoneme name translation table 203// It may give none, 1, or 2 mbrola phonemes 204 int mnem = ph->mnemonic; 205 MBROLA_TAB *pr; 206 PHONEME_TAB *other_ph; 207 int found = 0; 208 209 // control 210 // bit 0 skip the next phoneme 211 // bit 1 match this and Previous phoneme 212 // bit 2 only at the start of a word 213 // bit 3 don't match two phonemes across a word boundary 214 215 pr = mbrola_tab; 216 while(pr->name != 0) 217 { 218 if(mnem == pr->name) 219 { 220 if(pr->next_phoneme == 0) 221 found = 1; 222 else 223 if((pr->next_phoneme == ':') && (plist->synthflags & SFLAG_LENGTHEN)) 224 { 225 found = 1; 226 } 227 else 228 { 229 if(pr->control & 2) 230 other_ph = ph_prev; 231 else 232 if((pr->control & 8) && ((plist+1)->newword)) 233 other_ph = phoneme_tab[phPAUSE]; // don't match the next phoneme over a word boundary 234 else 235 other_ph = ph_next; 236 237 if((pr->next_phoneme == other_ph->mnemonic) || 238 ((pr->next_phoneme == 2) && (other_ph->type == phVOWEL)) || 239 ((pr->next_phoneme == '_') && (other_ph->type == phPAUSE))) 240 { 241 found = 1; 242 } 243 } 244 245 if((pr->control & 4) && (plist->newword == 0)) // only at start of word 246 found = 0; 247 248 if(found) 249 { 250 *name2 = pr->mbr_name2; 251 *split = pr->percent; 252 *control = pr->control; 253 return(pr->mbr_name); 254 } 255 } 256 257 pr++; 258 } 259 *name2=0; 260 *split=0; 261 *control=0; 262 return(mnem); 263} 264 265 266static char *WritePitch(int env, int pitch1, int pitch2, int split, int final) 267{//=========================================================================== 268// final=1: only give the final pitch value. 269 int x; 270 int ix; 271 int pitch_base; 272 int pitch_range; 273 int p1,p2,p_end; 274 unsigned char *pitch_env; 275 int max = -1; 276 int min = 999; 277 int y_max=0; 278 int y_min=0; 279 int env100 = 80; // apply the pitch change only over this proportion of the mbrola phoneme(s) 280 int y2; 281 int y[4]; 282 int env_split; 283 char buf[50]; 284 static char output[50]; 285 286 output[0] = 0; 287 pitch_env = envelope_data[env]; 288 289 290 SetPitch2(voice, pitch1, pitch2, &pitch_base, &pitch_range); 291 292 293 env_split = (split * 128)/100; 294 if(env_split < 0) 295 env_split = 0-env_split; 296 297 // find max and min in the pitch envelope 298 for(x=0; x<128; x++) 299 { 300 if(pitch_env[x] > max) 301 { 302 max = pitch_env[x]; 303 y_max = x; 304 } 305 if(pitch_env[x] < min) 306 { 307 min = pitch_env[x]; 308 y_min = x; 309 } 310 } 311 // set an additional pitch point half way through the phoneme. 312 // but look for a maximum or a minimum and use that instead 313 y[2] = 64; 314 if((y_max > 0) && (y_max < 127)) 315 { 316 y[2] = y_max; 317 } 318 if((y_min > 0) && (y_min < 127)) 319 { 320 y[2] = y_min; 321 } 322 y[1] = y[2] / 2; 323 y[3] = y[2] + (127 - y[2])/2; 324 325 // set initial pitch 326 p1 = ((pitch_env[0]*pitch_range)>>8) + pitch_base; // Hz << 12 327 p_end = ((pitch_env[127]*pitch_range)>>8) + pitch_base; 328 329 330 if(split >= 0) 331 { 332 sprintf(buf," 0 %d",p1/4096); 333 strcat(output,buf); 334 } 335 336 // don't use intermediate pitch points for linear rise and fall 337 if(env > 1) 338 { 339 for(ix=1; ix<4; ix++) 340 { 341 p2 = ((pitch_env[y[ix]]*pitch_range)>>8) + pitch_base; 342 343 if(split > 0) 344 { 345 y2 = (y[ix] * env100)/env_split; 346 } 347 else 348 if(split < 0) 349 { 350 y2 = ((y[ix]-env_split) * env100)/env_split; 351 } 352 else 353 { 354 y2 = (y[ix] * env100)/128; 355 } 356 if((y2 > 0) && (y2 <= env100)) 357 { 358 sprintf(buf," %d %d",y2,p2/4096); 359 strcat(output,buf); 360 } 361 } 362 } 363 364 p_end = p_end/4096; 365 if(split <= 0) 366 { 367 sprintf(buf," %d %d",env100,p_end); 368 strcat(output,buf); 369 } 370 if(env100 < 100) 371 { 372 sprintf(buf," %d %d",100,p_end); 373 strcat(output,buf); 374 } 375 strcat(output,"\n"); 376 377 if(final) 378 sprintf(output,"\t100 %d\n",p_end); 379 return(output); 380} // end of WritePitch 381 382 383#ifdef USE_MBROLA_LIB 384 385static void MbrolaMarker(int type, int char_posn, int length, int value) 386{//===================================================================== 387 388 MarkerEvent(type,(char_posn & 0xffffff) | (length << 24),value,outbuf); 389 390} 391 392 393static void MbrolaEmbedded(int &embix, int sourceix) 394{//================================================= 395 // There were embedded commands in the text at this point 396 unsigned int word; // bit 7=last command for this word, bits 5,6 sign, bits 0-4 command 397 unsigned int value; 398 int command; 399 int sign=0; 400 401 do { 402 word = embedded_list[embix++]; 403 value = word >> 8; 404 command = word & 0x1f; 405 406 if((word & 0x60) == 0x60) 407 sign = -1; 408 else 409 if((word & 0x60) == 0x40) 410 sign = 1; 411 412 if(command < N_EMBEDDED_VALUES) 413 { 414 if(sign == 0) 415 embedded_value[command] = value; 416 else 417 embedded_value[command] += (value * sign); 418 } 419 420 switch(command & 0x1f) 421 { 422 case EMBED_M: // named marker 423 MbrolaMarker(espeakEVENT_MARK, (sourceix & 0x7ff) + clause_start_char, 0, value); 424 break; 425 } 426 } while ((word & 0x80) == 0); 427} 428 429 430#ifdef PLATFORM_WINDOWS 431int MbrolaSynth(char *p_mbrola) 432{//============================ 433// p_mbrola is a string of mbrola pho lines - Windows 434 int len; 435 int finished; 436 int result=0; 437 438 if(synth_callback == NULL) 439 return(1); 440 441 if(p_mbrola == NULL) 442 flush_MBR(); 443 else 444 result = write_MBR(p_mbrola); 445 446 447 finished = 0; 448 while(!finished && ((len = read_MBR((short *)outbuf, outbuf_size/2)) > 0)) 449 { 450 out_ptr = outbuf + len*2; 451 452 if(event_list) 453 { 454 event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list 455 event_list[event_list_ix].user_data = 0; 456 } 457 count_samples += len; 458 finished = synth_callback((short *)outbuf, len, event_list); 459 event_list_ix=0; 460 } 461 462 if(finished) 463 { 464 // cancelled by user, discard any unused mbrola speech 465 flush_MBR(); 466 while((len = read_MBR((short *)outbuf, outbuf_size/2)) > 0); 467 } 468 return(finished); 469} // end of SynthMbrola 470#else 471 472int MbrolaSynth(char *p_mbrola) 473{//============================ 474// p_mbrola is a string of mbrola pho lines - Linux 475 476// This is wrong 477// It must be called from WavegenFill() 478 479 int len; 480 int finished; 481 int result=0; 482 483 if(synth_callback == NULL) 484 return(1); 485 486 if(p_mbrola == NULL) 487 mbrolib_flush(mb_handle); 488 else 489 result = mbrolib_write(mb_handle,p_mbrola,strlen(p_mbrola)); 490 491 492 finished = 0; 493 while(!finished && (mbrolib_read(mb_handle, (short *)out_ptr, (out_end - out_ptr)/2, &len) == MBROLIB_OK)) 494 { 495 if(len == 0) 496 break; 497 498 out_ptr += (len*2); 499 500 if(event_list) 501 { 502 event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list 503 event_list[event_list_ix].user_data = 0; 504 } 505 count_samples += len; 506 finished = synth_callback((short *)outbuf, len, event_list); 507 event_list_ix=0; 508 } 509 510 if(finished) 511 { 512 // cancelled by user, discard any unused mbrola speech 513 mbrolib_flush(mb_handle); 514 while(mbrolib_read(mb_handle, (short *)outbuf, outbuf_size/2, &len) == MBROLIB_OK) 515 { 516 if(len == 0) 517 break; 518 } 519 } 520 return(finished); 521} // end of SynthMbrola 522#endif // not windows 523#endif // USE_MBROLA_LIB 524 525 526 527void MbrolaTranslate(PHONEME_LIST *plist, int n_phonemes, FILE *f_mbrola) 528{//====================================================================== 529// Generate a mbrola pho file 530 unsigned int name; 531 int phix; 532 int len; 533 int len1; 534 PHONEME_TAB *ph; 535 PHONEME_TAB *ph_next; 536 PHONEME_TAB *ph_prev; 537 PHONEME_LIST *p; 538 PHONEME_LIST *next; 539 PHONEME_LIST *prev; 540 int pause = 0; 541 int released; 542 int name2; 543 int control; 544 int done; 545 int len_percent; 546 const char *final_pitch; 547 char buf[80]; 548 char mbr_buf[120]; 549 550#ifdef USE_MBROLA_LIB 551 int embedded_ix=0; 552 int word_count=0; 553 554 event_list_ix = 0; 555 out_ptr = outbuf; 556#ifdef PLATFORM_WINDOWS 557 setNoError_MBR(1); // don't stop on phoneme errors 558#endif 559#else 560// fprintf(f_mbrola,";; v=%.2f\n",(float)(mbrola_control & 0xff)/16.0); // ;; v= has no effect on mbrola 561#endif 562 563 for(phix=1; phix < n_phonemes; phix++) 564 { 565 mbr_buf[0] = 0; 566 567 p = &plist[phix]; 568 next = &plist[phix+1]; 569 prev = &plist[phix-1]; 570 ph = p->ph; 571 ph_prev = plist[phix-1].ph; 572 ph_next = plist[phix+1].ph; 573 574#ifdef USE_MBROLA_LIB 575 if(p->synthflags & SFLAG_EMBEDDED) 576 { 577 MbrolaEmbedded(embedded_ix, p->sourceix); 578 } 579 if(p->newword & 4) 580 MbrolaMarker(espeakEVENT_SENTENCE, (p->sourceix & 0x7ff) + clause_start_char, 0, count_sentences); 581 582 if(p->newword & 1) 583 MbrolaMarker(espeakEVENT_WORD, (p->sourceix & 0x7ff) + clause_start_char, p->sourceix >> 11, clause_start_word + word_count++); 584#endif 585 586 name = GetMbrName(p,ph,ph_prev,ph_next,&name2,&len_percent,&control); 587 if(control & 1) 588 phix++; 589 590 if(name == 0) 591 continue; // ignore this phoneme 592 593 if((ph->type == phPAUSE) && (name == ph->mnemonic)) 594 { 595 // a pause phoneme, which has not been changed by the translation 596 name = '_'; 597 len = (p->length * speed_factor1)/256; 598// if(len == 0) continue; 599 if(len == 0) 600 len = 1; 601 } 602 else 603 len = (80 * speed_factor2)/256; 604 605 sprintf(buf,"%s\t",WordToString(name)); 606 strcat(mbr_buf,buf); 607 608 if(name2 == '_') 609 { 610 // add a pause after this phoneme 611 pause = PauseLength(len_percent,0); 612 name2 = 0; 613 } 614 615 done = 0; 616 final_pitch = ""; 617 618 switch(ph->type) 619 { 620 case phVOWEL: 621 len = ph->std_length; 622 if(p->synthflags & SFLAG_LENGTHEN) 623 len += phoneme_tab[phonLENGTHEN]->std_length; // phoneme was followed by an extra : symbol 624 625 if(ph_next->type == phPAUSE) 626 len += 50; // lengthen vowels before a pause 627 len = (len * p->length)/256; 628 629 if(name2 == 0) 630 { 631 sprintf(buf,"%d\t%s", len, WritePitch(p->env,p->pitch1,p->pitch2,0,0)); 632 strcat(mbr_buf,buf); 633 } 634 else 635 { 636 len1 = (len * len_percent)/100; 637 sprintf(buf,"%d\t%s", len1, WritePitch(p->env,p->pitch1,p->pitch2,len_percent,0)); 638 strcat(mbr_buf,buf); 639 640 sprintf(buf,"%s\t%d\t%s", WordToString(name2), len-len1, WritePitch(p->env,p->pitch1,p->pitch2,-len_percent,0)); 641 strcat(mbr_buf,buf); 642 } 643 done = 1; 644 break; 645 646 case phSTOP: 647 released = 0; 648 if(next->type==phVOWEL) released = 1; 649 if(next->type==phLIQUID && !next->newword) released = 1; 650 651 if(released) 652 len = DoSample(p->ph,next->ph,2,0,-1); 653 else 654 len = DoSample(p->ph,phoneme_tab[phonPAUSE],2,0,-1); 655 len = (len * 1000)/samplerate; // convert to mS 656 len += PauseLength(p->prepause,1); 657 break; 658 659 case phVSTOP: 660 len = (80 * speed_factor2)/256; 661 break; 662 663 case phFRICATIVE: 664 len = 0; 665 if(p->synthflags & SFLAG_LENGTHEN) 666 len = DoSample(ph,ph_next,2,p->length,-1); // play it twice for [s:] etc. 667 len += DoSample(ph,ph_next,2,p->length,-1); 668 669 len = (len * 1000)/samplerate; // convert to mS 670 break; 671 672 case phNASAL: 673 if(next->type != phVOWEL) 674 { 675 len = DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,-1); 676 len = (len * 1000)/samplerate; 677 if(next->type == phPAUSE) 678 len += 50; 679 final_pitch = WritePitch(p->env,p->pitch1,p->pitch2,0,1); 680 } 681 break; 682 683 case phLIQUID: 684 if(next->type == phPAUSE) 685 { 686 len += 50; 687 final_pitch = WritePitch(p->env,p->pitch1,p->pitch2,0,1); 688 } 689 break; 690 } 691 692 if(!done) 693 { 694 if(name2 != 0) 695 { 696 len1 = (len * len_percent)/100; 697 sprintf(buf,"%d\n%s\t",len1,WordToString(name2)); 698 strcat(mbr_buf,buf); 699 len -= len1; 700 } 701 sprintf(buf,"%d%s\n",len,final_pitch); 702 strcat(mbr_buf,buf); 703 } 704 705 if(pause) 706 { 707 sprintf(buf,"_ \t%d\n",PauseLength(pause,0)); 708 strcat(mbr_buf,buf); 709 pause = 0; 710 } 711 712 if(f_mbrola) 713 { 714 fwrite(mbr_buf,1,strlen(mbr_buf),f_mbrola); // write .pho to a file 715 } 716 else 717 { 718#ifdef USE_MBROLA_LIB 719 if(MbrolaSynth(mbr_buf) != 0) 720 return; 721#endif 722 } 723 } 724 725#ifdef USE_MBROLA_LIB 726 MbrolaSynth(NULL); 727#endif 728} // end of MbrolaTranslate 729 730 731#ifdef TEST_MBROLA 732 733PHONEME_LIST mbrola_phlist; 734int mbrola_n_ph; 735int mbrola_phix; 736 737 738int MbrolaFill(int fill_zeros) 739{//=========================== 740} 741 742int MbrolaGenerate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) 743{//================================================================== 744 if(resume == 0) 745 { 746 mbrola_phlist = phoneme_list; 747 mbrola_n_ph = n_ph; 748 mbrola_phix = 0; 749 } 750 751 resume(0); // finished phoneme list 752} 753#endif