PageRenderTime 57ms CodeModel.GetById 8ms app.highlight 42ms RepoModel.GetById 1ms app.codeStats 1ms

/native/external/espeak/src/setlengths.cpp

http://eyes-free.googlecode.com/
C++ | 634 lines | 491 code | 101 blank | 42 comment | 147 complexity | b109a09b160cc9f8c3726201a1696eda MD5 | raw file
  1/***************************************************************************
  2 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
  3 *   email: jonsd@users.sourceforge.net                                    *
  4 *                                                                         *
  5 *   This program is free software; you can redistribute it and/or modify  *
  6 *   it under the terms of the GNU General Public License as published by  *
  7 *   the Free Software Foundation; either version 3 of the License, or     *
  8 *   (at your option) any later version.                                   *
  9 *                                                                         *
 10 *   This program is distributed in the hope that it will be useful,       *
 11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 13 *   GNU General Public License for more details.                          *
 14 *                                                                         *
 15 *   You should have received a copy of the GNU General Public License     *
 16 *   along with this program; if not, write see:                           *
 17 *               <http://www.gnu.org/licenses/>.                           *
 18 ***************************************************************************/
 19
 20#include "StdAfx.h"
 21
 22#include <stdlib.h>
 23#include <stdio.h>
 24#include <wctype.h>
 25
 26#include "speak_lib.h"
 27#include "speech.h"
 28#include "phoneme.h"
 29#include "synthesize.h"
 30#include "voice.h"
 31#include "translate.h"
 32
 33extern int GetAmplitude(void);
 34
 35
 36// convert from words-per-minute to internal speed factor
 37static unsigned char speed_lookup[290] = {
 38	 250, 246, 243, 239, 236,	//  80
 39	 233, 229, 226, 223, 220,	//  85
 40	 217, 214, 211, 208, 205,	//  90
 41	 202, 197, 194, 192, 190,	//  95
 42	 187, 185, 183, 180, 178,	// 100
 43	 176, 174, 172, 170, 168,	// 105
 44	 166, 164, 161, 159, 158,	// 110
 45	 156, 154, 152, 150, 148,	// 115
 46	 146, 145, 143, 141, 137,	// 120
 47	 136, 135, 133, 132, 131,	// 125
 48	 129, 128, 127, 126, 125,	// 130
 49	 124, 122, 121, 120, 119,	// 135
 50	 117, 116, 115, 114, 113,	// 140
 51	 112, 111, 110, 108, 107,	// 145
 52	 106, 105, 104, 103, 102,	// 150
 53	 101, 100,  99,  98,  97,	// 155
 54	  96,  95,  93,  92,  92,	// 160
 55	  91,  90,  89,  89,  88,	// 165
 56	  87,  87,  86,  85,  85,	// 170
 57	  84,  83,  83,  82,  81,	// 175
 58	  80,  80,  79,  78,  78,	// 180
 59	  77,  76,  76,  75,  73,	// 185
 60	  72,  72,  71,  71,  70,	// 190
 61	  70,  69,  69,  68,  67,	// 195
 62	  67,  66,  66,  65,  65,	// 200
 63	  64,  64,  63,  63,  62,	// 205
 64	  62,  61,  60,  60,  59,	// 210
 65	  59,  58,  58,  57,  57,	// 215
 66	  56,  56,  55,  55,  55,	// 220
 67	  54,  54,  53,  53,  52,	// 225
 68	  52,  51,  51,  50,  50,	// 230
 69	  49,  49,  49,  48,  48,	// 235
 70	  47,  47,  46,  46,  46,	// 240
 71	  45,  45,  44,  44,  43,	// 245
 72	  43,  43,  42,  42,  41,	// 250
 73	  41,  41,  40,  40,  39,	// 255
 74	  39,  39,  38,  38,  38,	// 260
 75	  37,  37,  37,  36,  36,	// 265
 76	  35,  35,  35,  34,  34,	// 270
 77	  34,  33,  33,  33,  32,	// 275
 78	  32,  32,  32,  31,  31,	// 280
 79	  31,  30,  30,  30,  29,	// 285
 80	  29,  29,  29,  28,  28,	// 290
 81	  28,  28,  27,  27,  27,	// 295
 82	  26,  26,  26,  26,  25,	// 300
 83	  25,  25,  22,  22,  22,	// 305
 84	  22,  22,  22,  22,  22,	// 310
 85	  21,  21,  21,  21,  21,	// 315
 86	  21,  20,  20,  20,  20,	// 320
 87	  20,  15,  15,  15,  15,	// 325
 88	  15,  15,  15,  15,  16,	// 330
 89	  16,  16,  16,  15,  15,	// 335
 90	  15,  15,  15,  15,  15,	// 340
 91	  15,  17,  17,  16,  16,	// 345
 92	  15,  15,  14,  14,  13,	// 350
 93	  13,  12,  12,  11,  11,	// 355
 94	  10,  10,   9,   8,   8,	// 360
 95	   7,   6,   5,   5,   4,	// 365
 96};
 97
 98// speed_factor2 adjustments for speeds 370 to 390
 99static unsigned char faster[] = {
100114,112,110,109,107,105,104,102,100,98, // 370-379
10196,94,92,90,88,85,83,80,78,75,72 }; //380-390
102
103static int speed1 = 130;
104static int speed2 = 121;
105static int speed3 = 118;
106
107
108
109void SetSpeed(int control)
110{//=======================
111	int x;
112	int s1;
113	int wpm;
114	int wpm2;
115
116	wpm = embedded_value[EMBED_S];
117	if(control == 2)
118		wpm = embedded_value[EMBED_S2];
119	wpm2 = wpm;
120
121	if(wpm > 369) wpm = 369;
122	if(wpm < 80) wpm = 80;
123
124	x = speed_lookup[wpm-80];
125
126	if(control & 1)
127	{
128		// set speed factors for different syllable positions within a word
129		// these are used in CalcLengths()
130		speed1 = (x * voice->speedf1)/256;
131		speed2 = (x * voice->speedf2)/256;
132		speed3 = (x * voice->speedf3)/256;
133	}
134
135	if(control & 2)
136	{
137		// these are used in synthesis file
138		s1 = (x * voice->speedf1)/256;
139		speed_factor1 = (256 * s1)/115;      // full speed adjustment, used for pause length
140if(speed_factor1 < 15)
141	speed_factor1 = 15;
142		if(wpm >= 170)
143//			speed_factor2 = 100 + (166*s1)/128;  // reduced speed adjustment, used for playing recorded sounds
144			speed_factor2 = 110 + (150*s1)/128;  // reduced speed adjustment, used for playing recorded sounds
145		else
146			speed_factor2 = 128 + (128*s1)/130;  // = 215 at 170 wpm
147
148		if(wpm2 > 369)
149		{
150			if(wpm2 > 390)
151				wpm2 = 390;
152			speed_factor2 = faster[wpm2 - 370];
153		}
154	}
155
156	speed_min_sample_len = 450;
157}  //  end of SetSpeed
158
159
160#ifdef deleted
161void SetAmplitude(int amp)
162{//=======================
163	static unsigned char amplitude_factor[] = {0,5,6,7,9,11,14,17,21,26, 32, 38,44,50,56,63,70,77,84,91,100 };
164
165	if((amp >= 0) && (amp <= 20))
166	{
167		option_amplitude = (amplitude_factor[amp] * 480)/256; 
168	}
169}
170#endif
171
172
173
174void SetParameter(int parameter, int value, int relative)
175{//======================================================
176// parameter: reset-all, amp, pitch, speed, linelength, expression, capitals, number grouping
177// relative 0=absolute  1=relative
178
179	int new_value = value;
180	int default_value;
181
182	if(relative)
183	{
184		if(parameter < 5)
185		{
186			default_value = param_defaults[parameter];
187			new_value = default_value + (default_value * value)/100;
188		}
189	}
190	param_stack[0].parameter[parameter] = new_value;
191
192	switch(parameter)
193	{
194	case espeakRATE:
195		embedded_value[EMBED_S] = new_value;
196		embedded_value[EMBED_S2] = new_value;
197		SetSpeed(3);
198		break;
199
200	case espeakVOLUME:
201		embedded_value[EMBED_A] = new_value;
202		GetAmplitude();
203		break;
204
205	case espeakPITCH:
206		if(new_value > 99) new_value = 99;
207		if(new_value < 0) new_value = 0;
208		embedded_value[EMBED_P] = new_value;
209		break;
210
211	case espeakRANGE:
212		if(new_value > 99) new_value = 99;
213		embedded_value[EMBED_R] = new_value;
214		break;
215
216	case espeakLINELENGTH:
217		option_linelength = new_value;
218		break;
219
220	case espeakWORDGAP:
221		option_wordgap = new_value;
222		break;
223
224	case espeakINTONATION:
225		if((new_value & 0xff) != 0)
226			translator->langopts.intonation_group = new_value & 0xff;
227		option_tone_flags = new_value;
228		break;
229
230	default:
231		break;
232	}
233}  // end of SetParameter
234
235
236
237static void DoEmbedded2(int &embix)
238{//================================
239	// There were embedded commands in the text at this point
240
241	unsigned int word;
242
243	do {
244		word = embedded_list[embix++];
245
246		if((word & 0x1f) == EMBED_S)
247		{
248			// speed
249			SetEmbedded(word & 0x7f, word >> 8);   // adjusts embedded_value[EMBED_S]
250			SetSpeed(1);
251		}
252	} while((word & 0x80) == 0);
253}
254
255
256void Translator::CalcLengths()
257{//===========================
258	int ix;
259	int ix2;
260	PHONEME_LIST *prev;
261	PHONEME_LIST *next;
262	PHONEME_LIST *next2;
263	PHONEME_LIST *next3;
264	PHONEME_LIST *p;
265	PHONEME_LIST *p2;
266
267	int  stress;
268	int  type;
269	static int  more_syllables=0;
270	int  pre_sonorant=0;
271	int  pre_voiced=0;
272	int  last_pitch = 0;
273	int  pitch_start;
274	int  length_mod;
275	int  len;
276	int  env2;
277	int  end_of_clause;
278	int  embedded_ix = 0;
279	int  min_drop;
280	int emphasized;
281	unsigned char *pitch_env=NULL;
282
283	for(ix=1; ix<n_phoneme_list; ix++)
284	{
285		prev = &phoneme_list[ix-1];
286		p = &phoneme_list[ix];
287		stress = p->tone & 0x7;
288		emphasized = p->tone & 0x8;
289
290		next = &phoneme_list[ix+1];
291
292		if(p->synthflags & SFLAG_EMBEDDED)
293		{
294			DoEmbedded2(embedded_ix);
295		}
296
297		type = p->type;
298		if(p->synthflags & SFLAG_SYLLABLE)
299			type = phVOWEL;
300
301		switch(type)
302		{
303		case phPAUSE:
304			last_pitch = 0;
305			break;
306			
307		case phSTOP:
308			last_pitch = 0;
309			if(prev->type == phFRICATIVE)
310				p->prepause = 20;
311			else
312			if((more_syllables > 0) || (stress < 4))
313				p->prepause = 40;
314			else
315				p->prepause = 60;
316
317			if(prev->type == phSTOP)
318				p->prepause = 60;
319
320			if((langopts.word_gap & 0x10) && (p->newword))
321				p->prepause = 60;
322
323			if(p->synthflags & SFLAG_LENGTHEN)
324				p->prepause += langopts.long_stop;
325			break;
326
327		case phVFRICATIVE:
328			if(next->type==phVOWEL)
329			{
330				pre_voiced = 1;
331			}   // drop through
332		case phFRICATIVE:
333			if(p->newword)
334				p->prepause = 15;
335
336			if(next->type==phPAUSE && prev->type==phNASAL && !(p->ph->phflags&phFORTIS))
337				p->prepause = 25;
338
339			if(prev->ph->phflags & phBRKAFTER)
340				p->prepause = 30;
341
342			if((p->ph->phflags & phSIBILANT) && next->type==phSTOP && !next->newword)
343			{
344				if(prev->type == phVOWEL)
345					p->length = 200;      // ?? should do this if it's from a prefix
346				else
347					p->length = 150;
348			}
349			else
350				p->length = 256;
351
352			if((langopts.word_gap & 0x10) && (p->newword))
353				p->prepause = 30;
354
355			break;
356
357		case phVSTOP:
358			if(prev->type==phVFRICATIVE || prev->type==phFRICATIVE || (prev->ph->phflags & phSIBILANT) || (prev->type == phLIQUID))
359				p->prepause = 30;
360
361			if(next->type==phVOWEL || next->type==phLIQUID)
362			{
363				if((next->type==phVOWEL) || !next->newword)
364					pre_voiced = 1;
365
366				p->prepause = 40;
367
368				if((prev->type == phPAUSE) || (prev->type == phVOWEL)) // || (prev->ph->mnemonic == ('/'*256+'r')))
369					p->prepause = 0;
370				else
371				if(p->newword==0)
372				{
373					if(prev->type==phLIQUID)
374						p->prepause = 20;
375					if(prev->type==phNASAL)
376						p->prepause = 12;
377
378					if(prev->type==phSTOP && !(prev->ph->phflags & phFORTIS))
379						p->prepause = 0;
380				}
381			}
382			if((langopts.word_gap & 0x10) && (p->newword) && (p->prepause < 20))
383				p->prepause = 20;
384
385			break;
386
387		case phLIQUID:
388		case phNASAL:
389			p->amp = stress_amps[1];  // unless changed later
390			p->length = 256;  //  TEMPORARY
391			min_drop = 0;
392			
393			if(p->newword)
394			{
395				if(prev->type==phLIQUID)
396					p->prepause = 25;
397				if(prev->type==phVOWEL)
398					p->prepause = 12;
399			}
400
401			if(next->type==phVOWEL)
402			{
403				pre_sonorant = 1;
404			}
405			else
406			if((prev->type==phVOWEL) || (prev->type == phLIQUID))
407			{
408				p->length = prev->length;
409				p->pitch2 = last_pitch;
410				if(p->pitch2 < 7)
411					p->pitch2 = 7;
412				p->pitch1 = p->pitch2 - 8;
413				p->env = PITCHfall;
414				pre_voiced = 0;
415				
416				if(p->type == phLIQUID)
417				{
418					p->length = speed1;
419//p->pitch1 = p->pitch2 - 20;   // post vocalic [r/]
420				}
421
422				if(next->type == phVSTOP)
423				{
424					p->length = (p->length * 160)/100;
425				}
426				if(next->type == phVFRICATIVE)
427				{
428					p->length = (p->length * 120)/100;
429				}
430			}
431			else
432			{
433				p->pitch2 = last_pitch;
434				for(ix2=ix; ix2<n_phoneme_list; ix2++)
435				{
436					if(phoneme_list[ix2].type == phVOWEL)
437					{
438						p->pitch2 = phoneme_list[ix2].pitch2;
439						break;
440					}
441				}
442				p->pitch1 = p->pitch2-8;
443				p->env = PITCHfall;
444				pre_voiced = 0;
445			}
446			break;
447
448		case phVOWEL:
449			min_drop = 0;
450			next2 = &phoneme_list[ix+2];
451			next3 = &phoneme_list[ix+3];
452
453			if(stress > 7) stress = 7;
454
455			if(pre_sonorant)
456				p->amp = stress_amps[stress]-1;
457			else
458				p->amp = stress_amps[stress];
459
460			if(emphasized)
461				p->amp = 25;
462
463			if(ix >= (n_phoneme_list-3))
464			{
465				// last phoneme of a clause, limit its amplitude
466				if(p->amp > langopts.param[LOPT_MAXAMP_EOC])
467					p->amp = langopts.param[LOPT_MAXAMP_EOC];
468			}
469
470			// is the last syllable of a word ?
471			more_syllables=0;
472			end_of_clause = 0;
473			for(p2 = p+1; p2->newword== 0; p2++)
474			{
475				if((p2->type == phVOWEL) && !(p2->ph->phflags & phNONSYLLABIC))
476					more_syllables++;
477
478				if(p2->ph->code == phonPAUSE_CLAUSE)
479					end_of_clause = 2;
480			}
481			if(p2->ph->code == phonPAUSE_CLAUSE)
482				end_of_clause = 2;
483
484			if((p2->newword & 2) && (more_syllables==0))
485			{
486				end_of_clause = 2;
487			}
488
489			// calc length modifier
490			if(more_syllables==0)
491			{
492				len = langopts.length_mods0[next2->ph->length_mod *10+ next->ph->length_mod];
493
494				if((next->newword) && (langopts.word_gap & 0x20))
495				{
496					// consider as a pause + first phoneme of the next word
497					length_mod = (len + langopts.length_mods0[next->ph->length_mod *10+ 1])/2;
498				}
499				else
500					length_mod = len;
501			}
502			else
503			{
504				length_mod = langopts.length_mods[next2->ph->length_mod *10+ next->ph->length_mod];
505
506				if((next->type == phNASAL) && (next2->type == phSTOP || next2->type == phVSTOP) && (next3->ph->phflags & phFORTIS))
507					length_mod -= 15;
508			}
509
510			if(more_syllables==0)
511				length_mod *= speed1;
512			else
513			if(more_syllables==1)
514				length_mod *= speed2;
515			else
516				length_mod *= speed3;
517
518			length_mod = length_mod / 128;
519//			if(length_mod < 9)
520//				length_mod = 9;     // restrict how much lengths can be reduced
521			if(length_mod < 8)
522				length_mod = 8;     // restrict how much lengths can be reduced
523
524			if(stress >= 7)
525			{
526				// tonic syllable, include a constant component so it doesn't decrease directly with speed
527				length_mod += 20;
528				if(emphasized)
529					length_mod += 10;
530			}
531			else
532			if(emphasized)
533			{
534				length_mod += 20;
535			}
536			
537			if((len = stress_lengths[stress]) == 0)
538				len = stress_lengths[6];
539
540			length_mod = (length_mod * len)/128;
541
542			if(end_of_clause == 2)
543			{
544				// this is the last syllable in the clause, lengthen it - more for short vowels
545				length_mod = length_mod * (256 + (280 - p->ph->std_length)/3)/256;
546			}
547
548if(p->type != phVOWEL)
549{
550	length_mod = 256;   // syllabic consonant
551	min_drop = 8;
552}
553			p->length = length_mod;
554
555			// pre-vocalic part
556			// set last-pitch
557			env2 = p->env;
558			if(env2 > 1) env2++;   // version for use with preceding semi-vowel
559
560			if(p->tone_ph != 0)
561			{
562				pitch_env = LookupEnvelope(phoneme_tab[p->tone_ph]->spect);
563			}
564			else
565			{
566				pitch_env = envelope_data[env2];
567			}
568
569			pitch_start = p->pitch1 + ((p->pitch2-p->pitch1)*pitch_env[0])/256;
570
571			if(pre_sonorant || pre_voiced)
572			{
573				// set pitch for pre-vocalic part
574				if(pitch_start - last_pitch > 8)   // was 9
575					last_pitch = pitch_start - 8;
576
577				prev->pitch1 = last_pitch;
578				prev->pitch2 = pitch_start;
579				if(last_pitch < pitch_start)
580				{
581					prev->env = PITCHrise;
582					p->env = env2;
583				}
584				else
585				{
586					prev->env = PITCHfall;
587				}
588
589				prev->length = length_mod;
590
591				prev->amp = p->amp;
592				if((prev->type != phLIQUID) && (prev->amp > 18))
593					prev->amp = 18;
594			}
595
596			// vowel & post-vocalic part
597			next->synthflags &= ~SFLAG_SEQCONTINUE;
598			if(next->type == phNASAL && next2->type != phVOWEL)
599				next->synthflags |= SFLAG_SEQCONTINUE;
600				
601			if(next->type == phLIQUID)
602			{
603				next->synthflags |= SFLAG_SEQCONTINUE;
604					
605				if(next2->type == phVOWEL)
606				{
607					next->synthflags &= ~SFLAG_SEQCONTINUE;
608				}
609
610				if(next2->type != phVOWEL)
611				{
612					if(next->ph->mnemonic == ('/'*256+'r'))
613					{
614						next->synthflags &= ~SFLAG_SEQCONTINUE;
615//						min_drop = 15;
616					}
617				}
618			}
619
620			if((min_drop > 0) && ((p->pitch2 - p->pitch1) < min_drop))
621			{
622				p->pitch1 = p->pitch2 - min_drop;
623				if(p->pitch1 < 0)
624					p->pitch1 = 0;
625			}
626
627			last_pitch = p->pitch1 + ((p->pitch2-p->pitch1)*envelope_data[p->env][127])/256;
628			pre_sonorant = 0;
629			pre_voiced = 0;
630			break;
631		}
632	}
633}  //  end of CalcLengths
634