PageRenderTime 62ms CodeModel.GetById 12ms app.highlight 44ms RepoModel.GetById 1ms app.codeStats 0ms

/native/external/espeak/src/synth_mbrola.cpp

http://eyes-free.googlecode.com/
C++ | 753 lines | 581 code | 127 blank | 45 comment | 131 complexity | d89e79291fddd0e500a5c60f1c9ee4f9 MD5 | raw file
  1/***************************************************************************
  2 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
  3 *   email: jonsd@users.sourceforge.net                                    *
  4 *                                                                         *
  5 *   This program is free software; you can redistribute it and/or modify  *
  6 *   it under the terms of the GNU General Public License as published by  *
  7 *   the Free Software Foundation; either version 3 of the License, or     *
  8 *   (at your option) any later version.                                   *
  9 *                                                                         *
 10 *   This program is distributed in the hope that it will be useful,       *
 11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 13 *   GNU General Public License for more details.                          *
 14 *                                                                         *
 15 *   You should have received a copy of the GNU General Public License     *
 16 *   along with this program; if not, write see:                           *
 17 *               <http://www.gnu.org/licenses/>.                           *
 18 ***************************************************************************/
 19
 20#include "StdAfx.h"
 21
 22#include <stdio.h>
 23#include <ctype.h>
 24#include <wctype.h>
 25#include <stdlib.h>
 26#include <string.h>
 27#include <math.h>
 28
 29#include "speak_lib.h"
 30#include "speech.h"
 31#include "phoneme.h"
 32#include "synthesize.h"
 33#include "translate.h"
 34#include "voice.h"
 35
 36extern int Read4Bytes(FILE *f);
 37extern void SetPitch2(voice_t *voice, int pitch1, int pitch2, int *pitch_base, int *pitch_range);
 38
 39#ifdef USE_MBROLA_LIB
 40
 41extern unsigned char *outbuf;
 42
 43#ifndef PLATFORM_WINDOWS
 44
 45#include "mbrolib.h"
 46void * mb_handle;
 47
 48#else
 49#include <windows.h>
 50typedef void (WINAPI *PROCVV)(void);
 51typedef void (WINAPI *PROCVI)(int);
 52typedef void (WINAPI *PROCVF)(float);
 53typedef int (WINAPI *PROCIV)();
 54typedef int (WINAPI *PROCIC) (char *);
 55typedef int (WINAPI *PROCISI)(short *,int);
 56typedef char* (WINAPI *PROCVCI)(char *,int);
 57
 58PROCIC		init_MBR;
 59PROCIC		write_MBR;
 60PROCIV		flush_MBR;
 61PROCISI		read_MBR;
 62PROCVV		close_MBR;
 63PROCVV		reset_MBR;
 64PROCIV		lastError_MBR;
 65PROCVCI		lastErrorStr_MBR;
 66PROCVI		setNoError_MBR;
 67PROCVI		setFreq_MBR;
 68PROCVF		setVolumeRatio_MBR;
 69
 70
 71
 72HINSTANCE	hinstDllMBR = NULL;
 73
 74
 75BOOL load_MBR()
 76{
 77	if(hinstDllMBR != NULL)
 78		return TRUE;   // already loaded 
 79
 80	if (!(hinstDllMBR=LoadLibrary("mbrola.dll")))
 81		return FALSE;
 82	init_MBR			=(PROCIC) GetProcAddress(hinstDllMBR,"init_MBR");
 83	write_MBR			=(PROCIC) GetProcAddress(hinstDllMBR,"write_MBR");
 84	flush_MBR			=(PROCIV) GetProcAddress(hinstDllMBR,"flush_MBR");
 85	read_MBR			=(PROCISI) GetProcAddress(hinstDllMBR,"read_MBR");
 86	close_MBR			=(PROCVV) GetProcAddress(hinstDllMBR,"close_MBR");
 87	reset_MBR			=(PROCVV) GetProcAddress(hinstDllMBR,"reset_MBR");
 88	lastError_MBR		=(PROCIV) GetProcAddress(hinstDllMBR,"lastError_MBR");
 89	lastErrorStr_MBR	=(PROCVCI) GetProcAddress(hinstDllMBR,"lastErrorStr_MBR");
 90	setNoError_MBR		=(PROCVI) GetProcAddress(hinstDllMBR,"setNoError_MBR");
 91	setVolumeRatio_MBR	=(PROCVF) GetProcAddress(hinstDllMBR,"setVolumeRatio_MBR");
 92	return TRUE;
 93}
 94
 95
 96void unload_MBR()
 97{
 98	if (hinstDllMBR)
 99	{
100		FreeLibrary (hinstDllMBR);
101		hinstDllMBR=NULL;
102	}
103}
104
105#endif   // windows
106#endif   // USE_MBROLA_LIB
107
108
109MBROLA_TAB *mbrola_tab = NULL;
110int mbrola_control = 0;
111
112
113
114
115espeak_ERROR LoadMbrolaTable(const char *mbrola_voice, const char *phtrans, int srate)
116{//===================================================================================
117// Load a phoneme name translation table from espeak-data/mbrola
118
119	int size;
120	int ix;
121	int *pw;
122	FILE *f_in;
123	char path[sizeof(path_home)+15];
124
125	mbrola_name[0] = 0;
126	if(mbrola_voice == NULL)
127	{
128		samplerate = samplerate_native;
129		SetParameter(espeakVOICETYPE,0,0);
130		return(EE_OK);
131	}
132
133	sprintf(path,"%s/mbrola/%s",path_home,mbrola_voice);
134#ifdef USE_MBROLA_LIB
135#ifdef PLATFORM_WINDOWS
136	if(load_MBR() == FALSE)     // load mbrola.dll
137		return(EE_INTERNAL_ERROR); 
138
139	if(init_MBR(path) != 0)      // initialise the required mbrola voice
140		return(EE_NOT_FOUND);
141
142	setNoError_MBR(1);     // don't stop on phoneme errors
143#else
144	mb_handle = mbrolib_init(srate);
145	mbrolib_parameter m_parameters;
146
147	if(mb_handle == NULL)
148		return(EE_INTERNAL_ERROR);
149
150  MBROLIB_ERROR a_status = mbrolib_set_voice(mb_handle, mbrola_voice);
151  if(a_status != MBROLIB_OK)
152		return(EE_NOT_FOUND);
153#endif   // not windows
154#endif   // USE_MBROLA_LIB
155
156	// read eSpeak's mbrola phoneme translation data, eg. en1_phtrans
157	sprintf(path,"%s/mbrola_ph/%s",path_home,phtrans);
158	size = GetFileLength(path);
159	if((f_in = fopen(path,"r")) == NULL)
160		return(EE_NOT_FOUND);
161
162	if((mbrola_tab = (MBROLA_TAB *)realloc(mbrola_tab,size)) == NULL)
163	{
164		fclose(f_in);
165		return(EE_INTERNAL_ERROR);
166	}
167
168	mbrola_control = Read4Bytes(f_in);
169	pw = (int *)mbrola_tab;
170	for(ix=4; ix<size; ix+=4)
171	{
172		*pw++ = Read4Bytes(f_in);
173	}
174	fread(mbrola_tab,size,1,f_in);
175	fclose(f_in);
176
177
178#ifdef USE_MBROLA_LIB
179#ifdef PLATFORM_WINDOWS
180	setVolumeRatio_MBR((float)(mbrola_control & 0xff) /16.0);
181#else
182	mbrolib_get_parameter(mb_handle,&m_parameters);
183	m_parameters.ignore_error = 1;
184	m_parameters.volume_ratio = (float)(mbrola_control & 0xff) /16.0;
185	mbrolib_set_parameter(mb_handle,&m_parameters);
186#endif    // not windows
187#endif    // USE_MBROLA_LIB
188
189	option_quiet = 1;
190	samplerate = srate;
191	if(srate == 22050)
192		SetParameter(espeakVOICETYPE,0,0);
193	else
194		SetParameter(espeakVOICETYPE,1,0);
195	strcpy(mbrola_name,mbrola_voice);
196	return(EE_OK);
197}  // end of LoadMbrolaTable
198
199
200int GetMbrName(PHONEME_LIST *plist, PHONEME_TAB *ph, PHONEME_TAB *ph_prev, PHONEME_TAB *ph_next, int *name2, int *split, int *control)
201{//==============================================================================================================
202// Look up a phoneme in the mbrola phoneme name translation table
203// It may give none, 1, or 2 mbrola phonemes
204	int mnem = ph->mnemonic;
205	MBROLA_TAB *pr;
206	PHONEME_TAB *other_ph;
207	int found = 0;
208
209	// control
210	// bit 0  skip the next phoneme
211	// bit 1  match this and Previous phoneme
212	// bit 2  only at the start of a word
213	// bit 3  don't match two phonemes across a word boundary
214
215	pr = mbrola_tab;
216	while(pr->name != 0)
217	{
218		if(mnem == pr->name)
219		{
220			if(pr->next_phoneme == 0)
221				found = 1;
222			else
223			if((pr->next_phoneme == ':') && (plist->synthflags & SFLAG_LENGTHEN))
224			{
225				found = 1;
226			}
227			else
228			{
229				if(pr->control & 2)
230					other_ph = ph_prev;
231				else
232				if((pr->control & 8) && ((plist+1)->newword))
233					other_ph = phoneme_tab[phPAUSE];  // don't match the next phoneme over a word boundary
234				else
235					other_ph = ph_next;
236
237				if((pr->next_phoneme == other_ph->mnemonic) ||
238					((pr->next_phoneme == 2) && (other_ph->type == phVOWEL)) ||
239					((pr->next_phoneme == '_') && (other_ph->type == phPAUSE)))
240				{
241					found = 1;
242				}
243			}
244
245			if((pr->control & 4) && (plist->newword == 0))  // only at start of word
246				found = 0;
247
248			if(found)
249			{
250				*name2 = pr->mbr_name2;
251				*split = pr->percent;
252				*control = pr->control;
253				return(pr->mbr_name);
254			}
255		}
256
257		pr++;
258	}
259	*name2=0;
260	*split=0;
261	*control=0;
262	return(mnem);
263}
264
265
266static char *WritePitch(int env, int pitch1, int pitch2, int split, int final)
267{//===========================================================================
268// final=1:  only give the final pitch value.
269	int x;
270	int ix;
271	int pitch_base;
272	int pitch_range;
273	int p1,p2,p_end;
274	unsigned char *pitch_env;
275	int max = -1;
276	int min = 999;
277	int y_max=0;
278	int y_min=0;
279	int env100 = 80;  // apply the pitch change only over this proportion of the mbrola phoneme(s)
280	int y2;
281	int y[4];
282	int env_split;
283	char buf[50];
284	static char output[50];
285
286	output[0] = 0;
287	pitch_env = envelope_data[env];
288
289
290	SetPitch2(voice, pitch1, pitch2, &pitch_base, &pitch_range);
291
292
293	env_split = (split * 128)/100;
294	if(env_split < 0)
295		env_split = 0-env_split;
296
297	// find max and min in the pitch envelope
298	for(x=0; x<128; x++)
299	{
300		if(pitch_env[x] > max)
301		{
302			max = pitch_env[x];
303			y_max = x;
304		}
305		if(pitch_env[x] < min)
306		{
307			min = pitch_env[x];
308			y_min = x;
309		}
310	}
311	// set an additional pitch point half way through the phoneme.
312	// but look for a maximum or a minimum and use that instead
313	y[2] = 64;
314	if((y_max > 0) && (y_max < 127))
315	{
316		y[2] = y_max;
317	}
318	if((y_min > 0) && (y_min < 127))
319	{
320		y[2] = y_min;
321	}
322	y[1] = y[2] / 2;
323	y[3] = y[2] + (127 - y[2])/2;
324
325	// set initial pitch
326	p1 = ((pitch_env[0]*pitch_range)>>8) + pitch_base;   // Hz << 12
327	p_end = ((pitch_env[127]*pitch_range)>>8) + pitch_base;
328
329
330	if(split >= 0)
331	{
332		sprintf(buf," 0 %d",p1/4096);
333		strcat(output,buf);
334	}
335
336	// don't use intermediate pitch points for linear rise and fall
337	if(env > 1)
338	{
339		for(ix=1; ix<4; ix++)
340		{
341			p2 = ((pitch_env[y[ix]]*pitch_range)>>8) + pitch_base;
342
343			if(split > 0)
344			{
345				y2 = (y[ix] * env100)/env_split;
346			}
347			else
348			if(split < 0)
349			{
350				y2 = ((y[ix]-env_split) * env100)/env_split;
351			}
352			else
353			{
354				y2 = (y[ix] * env100)/128;
355			}
356			if((y2 > 0) && (y2 <= env100))
357			{
358				sprintf(buf," %d %d",y2,p2/4096);
359				strcat(output,buf);
360			}
361		}
362	}
363
364	p_end = p_end/4096;
365	if(split <= 0)
366	{
367		sprintf(buf," %d %d",env100,p_end);
368		strcat(output,buf);
369	}
370	if(env100 < 100)
371	{
372		sprintf(buf," %d %d",100,p_end);
373		strcat(output,buf);
374	}
375	strcat(output,"\n");
376
377	if(final)
378		sprintf(output,"\t100 %d\n",p_end);
379	return(output);
380}  // end of WritePitch
381
382
383#ifdef USE_MBROLA_LIB
384
385static void MbrolaMarker(int type, int char_posn, int length, int value)
386{//=====================================================================
387
388	MarkerEvent(type,(char_posn & 0xffffff) | (length << 24),value,outbuf);
389
390}
391
392
393static void MbrolaEmbedded(int &embix, int sourceix)
394{//=================================================
395	// There were embedded commands in the text at this point
396	unsigned int word;  // bit 7=last command for this word, bits 5,6 sign, bits 0-4 command
397	unsigned int value;
398	int command;
399	int sign=0;
400
401	do {
402		word = embedded_list[embix++];
403		value = word >> 8;
404		command = word & 0x1f;
405
406		if((word & 0x60) == 0x60)
407			sign = -1;
408		else
409		if((word & 0x60) == 0x40)
410			sign = 1;
411
412		if(command < N_EMBEDDED_VALUES)
413		{
414			if(sign == 0)
415				embedded_value[command] = value;
416			else
417				embedded_value[command] += (value * sign);
418		}
419
420		switch(command & 0x1f)
421		{
422		case EMBED_M:   // named marker
423			MbrolaMarker(espeakEVENT_MARK, (sourceix & 0x7ff) + clause_start_char, 0, value);
424			break;
425		}
426	} while ((word & 0x80) == 0);
427}
428
429
430#ifdef PLATFORM_WINDOWS
431int MbrolaSynth(char *p_mbrola)
432{//============================
433// p_mbrola is a string of mbrola pho lines - Windows
434	int len;
435	int finished;
436	int result=0;
437
438	if(synth_callback == NULL)
439		return(1);
440
441	if(p_mbrola == NULL)
442		flush_MBR();
443	else
444		result = write_MBR(p_mbrola);
445
446
447	finished = 0;
448	while(!finished && ((len = read_MBR((short *)outbuf, outbuf_size/2)) > 0))
449	{
450		out_ptr = outbuf + len*2;
451
452		if(event_list)
453		{
454			event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list
455			event_list[event_list_ix].user_data = 0;
456		}
457		count_samples += len;
458		finished = synth_callback((short *)outbuf, len, event_list);
459		event_list_ix=0;
460	}
461
462	if(finished)
463	{
464		// cancelled by user, discard any unused mbrola speech
465		flush_MBR();
466		while((len = read_MBR((short *)outbuf, outbuf_size/2)) > 0);
467	}
468	return(finished);
469}  // end of SynthMbrola
470#else
471
472int MbrolaSynth(char *p_mbrola)
473{//============================
474// p_mbrola is a string of mbrola pho lines - Linux
475
476// This is wrong
477// It must be called from WavegenFill()
478
479	int len;
480	int finished;
481	int result=0;
482
483	if(synth_callback == NULL)
484		return(1);
485
486	if(p_mbrola == NULL)
487		mbrolib_flush(mb_handle);
488	else
489		result = mbrolib_write(mb_handle,p_mbrola,strlen(p_mbrola));
490
491
492	finished = 0;
493	while(!finished && (mbrolib_read(mb_handle, (short *)out_ptr, (out_end - out_ptr)/2, &len) == MBROLIB_OK))
494	{
495		if(len == 0)
496			break;
497
498		out_ptr += (len*2);
499
500		if(event_list)
501		{
502			event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list
503			event_list[event_list_ix].user_data = 0;
504		}
505		count_samples += len;
506		finished = synth_callback((short *)outbuf, len, event_list);
507		event_list_ix=0;
508	}
509
510	if(finished)
511	{
512		// cancelled by user, discard any unused mbrola speech
513		mbrolib_flush(mb_handle);
514		while(mbrolib_read(mb_handle, (short *)outbuf, outbuf_size/2, &len) == MBROLIB_OK)
515		{
516			if(len == 0)
517				break;
518		}
519	}
520	return(finished);
521}  // end of SynthMbrola
522#endif  // not windows
523#endif  // USE_MBROLA_LIB
524
525
526
527void MbrolaTranslate(PHONEME_LIST *plist, int n_phonemes, FILE *f_mbrola)
528{//======================================================================
529// Generate a mbrola pho file
530	unsigned int name;
531	int phix;
532	int len;
533	int len1;
534	PHONEME_TAB *ph;
535	PHONEME_TAB *ph_next;
536	PHONEME_TAB *ph_prev;
537	PHONEME_LIST *p;
538	PHONEME_LIST *next;
539	PHONEME_LIST *prev;
540	int pause = 0;
541	int released;
542	int name2;
543	int control;
544	int done;
545	int len_percent;
546	const char *final_pitch;
547	char buf[80];
548	char mbr_buf[120];
549
550#ifdef USE_MBROLA_LIB
551	int embedded_ix=0;
552	int word_count=0;
553
554	event_list_ix = 0;
555	out_ptr = outbuf;
556#ifdef PLATFORM_WINDOWS
557	setNoError_MBR(1);     // don't stop on phoneme errors
558#endif
559#else
560//	fprintf(f_mbrola,";; v=%.2f\n",(float)(mbrola_control & 0xff)/16.0);   //  ;; v=  has no effect on mbrola
561#endif
562
563	for(phix=1; phix < n_phonemes; phix++)
564	{
565		mbr_buf[0] = 0;
566
567		p = &plist[phix];
568		next = &plist[phix+1];
569		prev = &plist[phix-1];
570		ph = p->ph;
571		ph_prev = plist[phix-1].ph;
572		ph_next = plist[phix+1].ph;
573
574#ifdef USE_MBROLA_LIB
575		if(p->synthflags & SFLAG_EMBEDDED)
576		{
577			MbrolaEmbedded(embedded_ix, p->sourceix);
578		}
579		if(p->newword & 4)
580			MbrolaMarker(espeakEVENT_SENTENCE, (p->sourceix & 0x7ff) + clause_start_char, 0, count_sentences);
581
582		if(p->newword & 1)
583			MbrolaMarker(espeakEVENT_WORD, (p->sourceix & 0x7ff) + clause_start_char, p->sourceix >> 11, clause_start_word + word_count++);
584#endif
585
586		name = GetMbrName(p,ph,ph_prev,ph_next,&name2,&len_percent,&control);
587		if(control & 1)
588			phix++;
589
590		if(name == 0)
591			continue;   // ignore this phoneme
592
593		if((ph->type == phPAUSE) && (name == ph->mnemonic))
594		{
595			// a pause phoneme, which has not been changed by the translation
596			name = '_';
597			len = (p->length * speed_factor1)/256;
598//			if(len == 0) continue;
599			if(len == 0)
600				len = 1;
601		}
602		else
603			len = (80 * speed_factor2)/256;
604
605		sprintf(buf,"%s\t",WordToString(name));
606		strcat(mbr_buf,buf);
607
608		if(name2 == '_')
609		{
610			// add a pause after this phoneme
611			pause = PauseLength(len_percent,0);
612			name2 = 0;
613		}
614
615		done = 0;
616		final_pitch = "";
617
618		switch(ph->type)
619		{
620		case phVOWEL:
621			len = ph->std_length;
622			if(p->synthflags & SFLAG_LENGTHEN)
623				len += phoneme_tab[phonLENGTHEN]->std_length;  // phoneme was followed by an extra : symbol
624
625			if(ph_next->type == phPAUSE)
626				len += 50;        // lengthen vowels before a pause
627			len = (len * p->length)/256;
628
629			if(name2 == 0)
630			{
631				sprintf(buf,"%d\t%s", len, WritePitch(p->env,p->pitch1,p->pitch2,0,0));
632				strcat(mbr_buf,buf);
633			}
634			else
635			{
636				len1 = (len * len_percent)/100;
637				sprintf(buf,"%d\t%s", len1, WritePitch(p->env,p->pitch1,p->pitch2,len_percent,0));
638				strcat(mbr_buf,buf);
639
640				sprintf(buf,"%s\t%d\t%s", WordToString(name2), len-len1, WritePitch(p->env,p->pitch1,p->pitch2,-len_percent,0));
641				strcat(mbr_buf,buf);
642			}
643			done = 1;
644			break;
645
646		case phSTOP:
647			released = 0;
648			if(next->type==phVOWEL) released = 1;
649			if(next->type==phLIQUID && !next->newword) released = 1;
650
651			if(released)
652				len = DoSample(p->ph,next->ph,2,0,-1);
653			else
654				len = DoSample(p->ph,phoneme_tab[phonPAUSE],2,0,-1);
655			len = (len * 1000)/samplerate;  // convert to mS
656			len += PauseLength(p->prepause,1);
657			break;
658
659		case phVSTOP:
660			len = (80 * speed_factor2)/256;
661			break;
662
663		case phFRICATIVE:
664			len = 0;
665			if(p->synthflags & SFLAG_LENGTHEN)
666				len = DoSample(ph,ph_next,2,p->length,-1);  // play it twice for [s:] etc.
667			len += DoSample(ph,ph_next,2,p->length,-1);
668
669			len = (len * 1000)/samplerate;  // convert to mS
670			break;
671
672		case phNASAL:
673			if(next->type != phVOWEL)
674			{
675				len = DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,-1);
676				len = (len * 1000)/samplerate;
677				if(next->type == phPAUSE)
678					len += 50;
679				final_pitch = WritePitch(p->env,p->pitch1,p->pitch2,0,1);
680			}
681			break;
682
683		case phLIQUID:
684			if(next->type == phPAUSE)
685			{
686				len += 50;
687				final_pitch = WritePitch(p->env,p->pitch1,p->pitch2,0,1);
688			}
689			break;
690		}
691
692		if(!done)
693		{
694			if(name2 != 0)
695			{
696				len1 = (len * len_percent)/100;
697				sprintf(buf,"%d\n%s\t",len1,WordToString(name2));
698				strcat(mbr_buf,buf);
699				len -= len1;
700			}
701			sprintf(buf,"%d%s\n",len,final_pitch);
702			strcat(mbr_buf,buf);
703		}
704
705		if(pause)
706		{
707			sprintf(buf,"_ \t%d\n",PauseLength(pause,0));
708			strcat(mbr_buf,buf);
709			pause = 0;
710		}
711
712		if(f_mbrola)
713		{
714			fwrite(mbr_buf,1,strlen(mbr_buf),f_mbrola);  // write .pho to a file
715		}
716		else
717		{
718#ifdef USE_MBROLA_LIB
719			if(MbrolaSynth(mbr_buf) != 0)
720				return;
721#endif
722		}
723	}
724
725#ifdef USE_MBROLA_LIB
726	MbrolaSynth(NULL);
727#endif
728}  // end of MbrolaTranslate
729
730
731#ifdef TEST_MBROLA
732
733PHONEME_LIST mbrola_phlist;
734int mbrola_n_ph;
735int mbrola_phix;
736
737
738int MbrolaFill(int fill_zeros)
739{//===========================
740}
741
742int MbrolaGenerate(PHONEME_LIST *phoneme_list, int *n_ph, int resume)
743{//==================================================================
744	if(resume == 0)
745	{
746		mbrola_phlist = phoneme_list;
747		mbrola_n_ph = n_ph;
748		mbrola_phix = 0;
749	}
750
751	resume(0);  // finished phoneme list
752}
753#endif