PageRenderTime 58ms CodeModel.GetById 15ms app.highlight 38ms RepoModel.GetById 1ms app.codeStats 0ms

/native/external/espeak/src/synthdata.cpp

http://eyes-free.googlecode.com/
C++ | 665 lines | 510 code | 102 blank | 53 comment | 128 complexity | 8e8f690ab23de9041723496028897721 MD5 | raw file
  1/***************************************************************************
  2 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
  3 *   email: jonsd@users.sourceforge.net                                    *
  4 *                                                                         *
  5 *   This program is free software; you can redistribute it and/or modify  *
  6 *   it under the terms of the GNU General Public License as published by  *
  7 *   the Free Software Foundation; either version 3 of the License, or     *
  8 *   (at your option) any later version.                                   *
  9 *                                                                         *
 10 *   This program is distributed in the hope that it will be useful,       *
 11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 13 *   GNU General Public License for more details.                          *
 14 *                                                                         *
 15 *   You should have received a copy of the GNU General Public License     *
 16 *   along with this program; if not, see:                                 *
 17 *               <http://www.gnu.org/licenses/>.                           *
 18 ***************************************************************************/
 19
 20
 21#include "StdAfx.h"
 22
 23#include <stdio.h>
 24#include <stdlib.h>
 25#include <ctype.h>
 26#include <wctype.h>
 27#include <string.h>
 28
 29
 30#include "speak_lib.h"
 31#include "speech.h"
 32#include "phoneme.h"
 33#include "synthesize.h"
 34#include "voice.h"
 35#include "translate.h"
 36#include "wave.h"
 37
 38const char *version_string = "1.39  08.Sep.08";
 39const int version_phdata  = 0x013900;
 40
 41int option_device_number = -1;
 42
 43// copy the current phoneme table into here
 44int n_phoneme_tab;
 45int current_phoneme_table;
 46PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
 47unsigned char phoneme_tab_flags[N_PHONEME_TAB];   // bit 0: not inherited
 48
 49unsigned int *phoneme_index=NULL;
 50char *spects_data=NULL;
 51unsigned char *wavefile_data=NULL;
 52static unsigned char *phoneme_tab_data = NULL;
 53
 54int n_phoneme_tables;
 55PHONEME_TAB_LIST phoneme_tab_list[N_PHONEME_TABS];
 56static int phoneme_tab_number = 0;
 57
 58int wavefile_ix;              // a wavefile to play along with the synthesis
 59int wavefile_amp;
 60int wavefile_ix2;
 61int wavefile_amp2;
 62
 63int seq_len_adjust;
 64int vowel_transition[4];
 65int vowel_transition0;
 66int vowel_transition1;
 67
 68void FormantTransitions(frameref_t *seq, int &n_frames, PHONEME_TAB *this_ph, PHONEME_TAB *other_ph, int which);
 69int FormantTransition2(frameref_t *seq, int &n_frames, unsigned int data1, unsigned int data2, PHONEME_TAB *other_ph, int which);
 70
 71
 72const char *PhonemeTabName(void)
 73{//=============================
 74	return(phoneme_tab_list[phoneme_tab_number].name);
 75}
 76
 77
 78static int ReadPhFile(char **ptr, const char *fname)
 79{//=================================================
 80	FILE *f_in;
 81	char *p;
 82	unsigned int  length;
 83	char buf[sizeof(path_home)+40];
 84
 85	sprintf(buf,"%s%c%s",path_home,PATHSEP,fname);
 86	length = GetFileLength(buf);
 87	
 88	if((f_in = fopen(buf,"rb")) == NULL)
 89	{
 90		fprintf(stderr,"Can't read data file: '%s'\n",buf);
 91		return(1);
 92	}
 93
 94	if(*ptr != NULL)
 95		Free(*ptr);
 96		
 97	if((p = Alloc(length)) == NULL)
 98	{
 99		fclose(f_in);
100		return(-1);
101	}
102	if(fread(p,1,length,f_in) != length)
103	{
104		fclose(f_in);
105		return(-1);
106	}
107	*ptr = p;
108	fclose(f_in);
109	return(0);
110}  //  end of ReadPhFile
111
112
113int LoadPhData()
114{//=============
115	int ix;
116	int n_phonemes;
117	int version;
118	int result = 1;
119	unsigned char *p;
120
121	if(ReadPhFile((char **)(&phoneme_tab_data),"phontab") != 0)
122		return(-1);
123	if(ReadPhFile((char **)(&phoneme_index),"phonindex") != 0)
124		return(-1);
125	if(ReadPhFile((char **)(&spects_data),"phondata") != 0)
126		return(-1);
127   wavefile_data = (unsigned char *)spects_data;
128
129	// read the version number from the first 4 bytes of phondata
130	version = 0;
131	for(ix=0; ix<4; ix++)
132	{
133		version += (wavefile_data[ix] << (ix*8));
134	}
135
136	if(version != version_phdata)
137	{
138		result = version;
139	}
140
141	// set up phoneme tables
142	p = phoneme_tab_data;
143	n_phoneme_tables = p[0];
144	p+=4;
145
146	for(ix=0; ix<n_phoneme_tables; ix++)
147	{
148		n_phonemes = p[0];
149		phoneme_tab_list[ix].n_phonemes = p[0];
150		phoneme_tab_list[ix].includes = p[1];
151		p += 4;
152		memcpy(phoneme_tab_list[ix].name,p,N_PHONEME_TAB_NAME);
153		p += N_PHONEME_TAB_NAME;
154		phoneme_tab_list[ix].phoneme_tab_ptr = (PHONEME_TAB *)p;
155		p += (n_phonemes * sizeof(PHONEME_TAB));
156	}
157
158	if(phoneme_tab_number >= n_phoneme_tables)
159		phoneme_tab_number = 0;
160
161	return(result);
162}  //  end of LoadPhData
163
164
165void FreePhData(void)
166{//==================
167	Free(phoneme_tab_data);
168	Free(phoneme_index);
169	Free(spects_data);
170	phoneme_tab_data=NULL;
171	phoneme_index=NULL;
172	spects_data=NULL;
173}
174
175
176int LookupPh(const char *string)
177{//=============================
178	int  ix;
179	unsigned char c;
180	unsigned int  mnem;
181
182	// Pack up to 4 characters into a word
183	mnem = 0;
184	for(ix=0; ix<4; ix++)
185	{
186		if(string[ix]==0) break;
187		c = string[ix];
188		mnem |= (c << (ix*8));
189	}
190
191	for(ix=0; ix<n_phoneme_tab; ix++)
192	{
193		if(phoneme_tab[ix] == NULL)
194			continue;
195		if(phoneme_tab[ix]->mnemonic == mnem)
196			return(ix);
197	}
198	return(0);
199}
200
201
202
203
204static unsigned int LookupSound2(int index, unsigned int other_phcode, int control)
205{//================================================================================
206// control=1  get formant transition data only
207
208	unsigned int code;
209	unsigned int value, value2;
210	
211	while((value = phoneme_index[index++]) != 0)
212	{
213		if((code = (value & 0xff)) == other_phcode)
214		{
215			while(((value2 = phoneme_index[index]) != 0) && ((value2 & 0xff) < 8))
216			{
217				switch(value2 & 0xff)
218				{
219				case 0:
220					// next entry is a wavefile to be played along with the synthesis
221					if(control==0)
222					{
223						wavefile_ix = value2 >> 8;
224					}
225					break;
226				case 1:
227					if(control==0)
228					{
229						seq_len_adjust = value2 >> 8;
230					}
231					break;
232				case 2:
233					if(control==0)
234					{
235						seq_len_adjust = value2 >> 8;
236						seq_len_adjust = -seq_len_adjust;
237					}
238					break;
239				case 3:
240					if(control==0)
241					{
242						wavefile_amp = value2 >> 8;
243					}
244					break;
245				case 4:
246					// formant transition data, 2 words
247					vowel_transition[0] = value2 >> 8;
248					vowel_transition[1] = phoneme_index[index++ + 1];
249					break;
250				case 5:
251					// formant transition data, 2 words
252					vowel_transition[2] = value2 >> 8;
253					vowel_transition[3] = phoneme_index[index++ + 1];
254					break;
255				}
256				index++;
257			}
258			return(value >> 8);
259		}
260		else
261		if((code == 4) || (code == 5))
262		{
263			// formant transition data, ignore next word of data
264			index++;
265		}
266	}
267	return(3);   // not found
268}  //  end of LookupSound2
269
270
271unsigned int LookupSound(PHONEME_TAB *this_ph, PHONEME_TAB *other_ph, int which, int *match_level, int control)
272{//============================================================================================================
273	// follows,  1 other_ph preceeds this_ph,   2 other_ph follows this_ph
274   // control:  1= get formant transition data only
275	int spect_list;
276	int spect_list2;
277	int s_list;
278	unsigned char virtual_ph;
279	int  result;
280	int  level=0;
281	unsigned int  other_code;
282	unsigned int  other_virtual;
283	
284	if(control==0)
285	{
286		wavefile_ix = 0;
287		wavefile_amp = 32;
288		seq_len_adjust = 0;
289	}
290	memset(vowel_transition,0,sizeof(vowel_transition));
291	
292	other_code = other_ph->code;
293	if(phoneme_tab[other_code]->type == phPAUSE)
294		other_code = phonPAUSE_SHORT;       // use this version of Pause for matching
295
296	if(which==1)
297	{
298		spect_list = this_ph->after;
299		virtual_ph = this_ph->start_type;
300		spect_list2 = phoneme_tab[virtual_ph]->after;
301		other_virtual = other_ph->end_type;
302	}
303	else
304	{
305		spect_list = this_ph->before;
306		virtual_ph = this_ph->end_type;
307		spect_list2 = phoneme_tab[virtual_ph]->before;
308		other_virtual = other_ph->start_type;
309	}
310
311	result = 3;
312	// look for ph1-ph2 combination
313	if((s_list = spect_list) != 0)
314	{
315		if((result = LookupSound2(s_list,other_code,control)) != 3)
316		{
317			level = 2;
318		}
319		else
320		if(other_virtual != 0)
321		{
322			if((result = LookupSound2(spect_list,other_virtual,control)) != 3)
323			{
324				level = 1;
325			}
326		}
327	}
328	// not found, look in a virtual phoneme if one is given for this phoneme
329	if((result==3) && (virtual_ph != 0) && ((s_list = spect_list2) != 0))
330	{
331		if((result = LookupSound2(s_list,other_code,control)) != 3)
332		{
333			level = 1;
334		}
335		else
336		if(other_virtual != 0)
337		{
338			if((result = LookupSound2(spect_list2,other_virtual,control)) != 3)
339			{
340				level = 1;
341			}
342		}
343	}
344
345	if(match_level != NULL)
346		*match_level = level;
347	
348	if(result==0)
349		return(0);   // NULL was given in the phoneme source
350
351	// note: values = 1 indicates use the default for this phoneme, even though we found a match
352	// which set a secondary reference 
353	if(result >= 4)
354	{
355		// values 1-3 can be used for special codes
356		// 1 = DFT from the phoneme source file
357		return(result);
358	}
359	
360	// no match found for other_ph, return the default
361	return(LookupSound2(this_ph->spect,phonPAUSE,control));
362
363}  //  end of LookupSound
364
365
366
367frameref_t *LookupSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph,
368			int which, int *match_level, int *n_frames, PHONEME_LIST *plist)
369{//=========================================================================================================
370	int  ix;
371	int  nf;
372	int  nf1;
373	int  seq_break;
374	frameref_t *frames;
375	int  length1;
376	int  length_std;
377	int  length_factor;
378	SPECT_SEQ *seq;
379	SPECT_SEQ *seq2;
380	PHONEME_TAB *next2_ph;
381	static frameref_t frames_buf[N_SEQ_FRAMES];
382	
383	PHONEME_TAB *other_ph;
384	if(which == 1)
385		other_ph = prev_ph;
386	else
387		other_ph = next_ph;
388
389	if((ix = LookupSound(this_ph,other_ph,which,match_level,0)) < 4)
390		return(NULL);
391	seq = (SPECT_SEQ *)(&spects_data[ix]);
392	nf = seq->n_frames;
393
394
395	if(nf >= N_SEQ_FRAMES)
396		nf = N_SEQ_FRAMES - 1;
397
398	seq_break = 0;
399	length1 = 0;
400	for(ix=0; ix<nf; ix++)
401	{
402		frames_buf[ix].frame = &seq->frame[ix];
403		frames_buf[ix].frflags = seq->frame[ix].frflags;
404		frames_buf[ix].length = seq->frame[ix].length;
405		if(seq->frame[ix].frflags & FRFLAG_VOWEL_CENTRE)
406			seq_break = ix;
407	}
408	
409	frames = &frames_buf[0];
410	if(seq_break > 0)
411	{
412		if(which==1)
413		{
414			nf = seq_break + 1;
415		}
416		else
417		{
418			frames = &frames_buf[seq_break];  // body of vowel, skip past initial frames
419			nf -= seq_break;
420		}
421	}
422	
423	// do we need to modify a frame for blending with a consonant?
424	if(this_ph->type == phVOWEL)
425	{
426		if((which==2) && ((frames[nf-1].frflags & FRFLAG_BREAK) == 0))
427		{
428			// lookup formant transition for the following phoneme
429
430			if((*match_level == 0) || (next_ph->type == phNASAL))
431			{
432				LookupSound(next_ph,this_ph,1,NULL,1);
433				seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next_ph,which);
434			}
435			else
436			if(next_ph->phflags == phVOWEL2)
437			{
438				// not really a consonant, rather a coloured vowel
439				if(LookupSound(next_ph,this_ph,1,NULL,1) == 0)
440				{
441					next2_ph = plist[2].ph;
442					LookupSound(next2_ph,next_ph,1,NULL,1);
443					seq_len_adjust += FormantTransition2(frames,nf,vowel_transition[2],vowel_transition[3],next2_ph,which);
444				}
445			}
446		}
447		else
448		{
449			if(*match_level == 0)
450				seq_len_adjust = FormantTransition2(frames,nf,vowel_transition0,vowel_transition1,prev_ph,which);
451		}
452//		FormantTransitions(frames,nf,this_ph,other_ph,which);
453	}
454
455	nf1 = nf - 1;
456	for(ix=0; ix<nf1; ix++)
457		length1 += frames[ix].length;
458
459
460	if((wavefile_ix != 0) && ((wavefile_ix & 0x800000)==0))
461	{
462		// a secondary reference has been returned, which is not a wavefile
463		// add these spectra to the main sequence
464		seq2 = (SPECT_SEQ *)(&spects_data[wavefile_ix]);
465	
466		// first frame of the addition just sets the length of the last frame of the main seq
467		nf--;
468		for(ix=0; ix<seq2->n_frames; ix++)
469		{
470			frames[nf].length = seq2->frame[ix].length;
471			if(ix > 0)
472				frames[nf].frame = &seq2->frame[ix];
473			nf++;
474		}
475		wavefile_ix = 0;
476	}
477	
478	if((this_ph->type == phVOWEL) && (length1 > 0))
479	{
480		if(which==2)
481		{
482			// adjust the length of the main part to match the standard length specified for the vowel
483			//   less the front part of the vowel and any added suffix
484	
485			length_std = this_ph->std_length + seq_len_adjust - 45;
486			if(length_std < 10)
487				length_std = 10;
488			if(plist->synthflags & SFLAG_LENGTHEN)
489				length_std += phoneme_tab[phonLENGTHEN]->std_length;  // phoneme was followed by an extra : symbol
490
491// can adjust vowel length for stressed syllables here
492
493
494			length_factor = (length_std * 256)/ length1;
495			
496			for(ix=0; ix<nf1; ix++)
497			{
498				frames[ix].length = (frames[ix].length * length_factor)/256;
499			}
500		}
501		else
502		{
503			// front of a vowel
504			if(*match_level == 0)
505			{
506				// allow very short vowels to have shorter front parts
507				if(this_ph->std_length < 130)
508					frames[0].length = (frames[0].length * this_ph->std_length)/130;
509			}
510
511			if(seq_len_adjust != 0)
512			{
513				length_std = 0;
514				for(ix=0; ix<nf1; ix++)
515				{
516					length_std += frames[ix].length;
517				}
518				length_factor = ((length_std + seq_len_adjust) * 256)/length_std;
519				for(ix=0; ix<nf1; ix++)
520				{
521					frames[ix].length = (frames[ix].length * length_factor)/256;
522				}
523			}
524		}
525	}
526	
527	*n_frames = nf;
528	return(frames);
529}  //  end of LookupSpect
530
531
532unsigned char *LookupEnvelope(int ix)
533{//================================
534	if(ix==0)
535		return(NULL);
536	return((unsigned char *)&spects_data[phoneme_index[ix]]);
537}
538
539
540static void SetUpPhonemeTable(int number, int recursing)
541{//=====================================================
542	int ix;
543	int includes;
544	int ph_code;
545	PHONEME_TAB *phtab;
546
547	if(recursing==0)
548	{
549		memset(phoneme_tab_flags,0,sizeof(phoneme_tab_flags));
550	}
551
552	if((includes = phoneme_tab_list[number].includes) > 0)
553	{
554		// recursively include base phoneme tables
555		SetUpPhonemeTable(includes-1,1);
556	}
557
558	// now add the phonemes from this table
559	phtab = phoneme_tab_list[number].phoneme_tab_ptr;
560	for(ix=0; ix<phoneme_tab_list[number].n_phonemes; ix++)
561	{
562		ph_code = phtab[ix].code;
563		phoneme_tab[ph_code] = &phtab[ix];
564		if(ph_code > n_phoneme_tab)
565			n_phoneme_tab = ph_code;
566
567		if(recursing == 0)
568			phoneme_tab_flags[ph_code] |= 1;   // not inherited
569	}
570}  // end of SetUpPhonemeTable
571
572
573void SelectPhonemeTable(int number)
574{//================================
575	n_phoneme_tab = 0;
576	SetUpPhonemeTable(number,0);  // recursively for included phoneme tables
577	n_phoneme_tab++;
578	current_phoneme_table = number;
579}  //  end of SelectPhonemeTable
580
581
582int LookupPhonemeTable(const char *name)
583{//=====================================
584	int ix;
585
586	for(ix=0; ix<n_phoneme_tables; ix++)
587	{
588		if(strcmp(name,phoneme_tab_list[ix].name)==0)
589		{
590			phoneme_tab_number = ix;
591			break;
592		}
593	}
594	if(ix == n_phoneme_tables)
595		return(-1);
596
597	return(ix);
598}
599
600
601int SelectPhonemeTableName(const char *name)
602{//=========================================
603// Look up a phoneme set by name, and select it if it exists
604// Returns the phoneme table number
605	int ix;
606
607	if((ix = LookupPhonemeTable(name)) == -1)
608		return(-1);
609
610	SelectPhonemeTable(ix);
611	return(ix);
612}  //  end of DelectPhonemeTableName
613
614
615
616
617void LoadConfig(void)
618{//==================
619// Load configuration file, if one exists
620	char buf[sizeof(path_home)+10];
621	FILE *f;
622	int ix;
623	char c1;
624	char *p;
625	char string[200];
626
627	for(ix=0; ix<N_SOUNDICON_SLOTS; ix++)
628	{
629		soundicon_tab[ix].filename = NULL;
630		soundicon_tab[ix].data = NULL;
631	}
632
633	sprintf(buf,"%s%c%s",path_home,PATHSEP,"config");
634	if((f = fopen(buf,"r"))==NULL)
635	{
636		return;
637	}
638
639	while(fgets(buf,sizeof(buf),f)!=NULL)
640	{
641		if(memcmp(buf,"tone",4)==0)
642		{
643			ReadTonePoints(&buf[5],tone_points);
644		}
645		else
646		if(memcmp(buf,"pa_device",9)==0)
647		{
648			sscanf(&buf[7],"%d",&option_device_number);
649		}
650		else
651		if(memcmp(buf,"soundicon",9)==0)
652		{
653			ix = sscanf(&buf[10],"_%c %s",&c1,string);
654			if(ix==2)
655			{
656				soundicon_tab[n_soundicon_tab].name = c1;
657				p = Alloc(strlen(string)+1);
658				strcpy(p,string);
659				soundicon_tab[n_soundicon_tab].filename = p;
660				soundicon_tab[n_soundicon_tab++].length = 0;
661			}
662		}
663	}
664}  //  end of LoadConfig
665