PageRenderTime 69ms CodeModel.GetById 54ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/native/external/espeak/src/synthesize.h

http://eyes-free.googlecode.com/
C++ Header | 279 lines | 206 code | 47 blank | 26 comment | 0 complexity | 95fa4216af047146c1cc6e6e3cc2ca7c MD5 | raw file
  1/***************************************************************************
  2 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
  3 *   email: jonsd@users.sourceforge.net                                    *
  4 *                                                                         *
  5 *   This program is free software; you can redistribute it and/or modify  *
  6 *   it under the terms of the GNU General Public License as published by  *
  7 *   the Free Software Foundation; either version 3 of the License, or     *
  8 *   (at your option) any later version.                                   *
  9 *                                                                         *
 10 *   This program is distributed in the hope that it will be useful,       *
 11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 13 *   GNU General Public License for more details.                          *
 14 *                                                                         *
 15 *   You should have received a copy of the GNU General Public License     *
 16 *   along with this program; if not, write see:                           *
 17 *               <http://www.gnu.org/licenses/>.                           *
 18 ***************************************************************************/
 19
 20
 21#define N_PHONEME_LIST  1000    // enough for source[N_TR_SOURCE] full of text, else it will truncate
 22
 23#define MAX_HARMONIC  400           // 400 * 50Hz = 20 kHz, more than enough
 24#define N_SEQ_FRAMES   25           // max frames in a spectrum sequence (real max is ablut 8)
 25
 26#define    PITCHfall   0
 27#define    PITCHrise   1
 28
 29// flags set for frames within a spectrum sequence
 30#define FRFLAG_VOWEL_CENTRE    0x02   // centre point of vowel
 31#define FRFLAG_LEN_MOD         0x04   // reduce effect of length adjustment
 32#define FRFLAG_BREAK_LF        0x08   // but keep f3 upwards
 33#define FRFLAG_BREAK           0x10   // don't merge with next frame
 34#define FRFLAG_BREAK_2         0x18   // FRFLAG_BREAK_LF or FRFLAG_BREAK
 35#define FRFLAG_FORMANT_RATE    0x20   // Flag5 allow increased rate of change of formant freq
 36#define FRFLAG_MODULATE        0x40   // Flag6 modulate amplitude of some cycles to give trill
 37#define FRFLAG_DEFER_WAV       0x80   // Flag7 defer mixing WAV until the next frame
 38#define FRFLAG_COPIED        0x8000   // This frame has been copied into temporary rw memory
 39
 40#define SFLAG_SEQCONTINUE      0x01   // a liquid or nasal after a vowel, but not followed by a vowel
 41#define SFLAG_EMBEDDED         0x02   // there are embedded commands before this phoneme
 42#define SFLAG_SYLLABLE         0x04   // vowel or syllabic consonant
 43#define SFLAG_LENGTHEN         0x08   // lengthen symbol : included after this phoneme
 44#define SFLAG_DICTIONARY       0x10   // the pronunciation of this word was listed in the xx_list dictionary
 45#define SFLAG_SWITCHED_LANG    0x20   // this word uses phonemes from a different language
 46#define SFLAG_PROMOTE_STRESS   0x40   // this unstressed word can be promoted to stressed
 47
 48// embedded command numbers
 49#define EMBED_P     1   // pitch
 50#define EMBED_S     2   // speed (used in setlengths)
 51#define EMBED_A     3   // amplitude/volume
 52#define EMBED_R     4   // pitch range/expression
 53#define EMBED_H     5   // echo/reverberation
 54#define EMBED_T     6   // different tone for announcing punctuation
 55#define EMBED_I     7   // sound icon
 56#define EMBED_S2    8   // speed (used in synthesize)
 57#define EMBED_Y     9   // say-as commands
 58#define EMBED_M    10   // mark name
 59#define EMBED_U    11   // audio uri
 60#define EMBED_B    12   // break
 61#define EMBED_F    13   // emphasis
 62
 63#define N_EMBEDDED_VALUES    14
 64extern int embedded_value[N_EMBEDDED_VALUES];
 65extern int embedded_default[N_EMBEDDED_VALUES];
 66
 67
 68#define N_PEAKS   9
 69#define N_MARKERS 8
 70
 71typedef struct {
 72   short pkfreq;
 73   short pkheight;
 74   short pkwidth;
 75   short pkright;
 76}  peak_t;
 77
 78typedef struct {
 79	short frflags;
 80	unsigned char length;
 81	unsigned char rms;
 82	short ffreq[9];
 83	unsigned char fheight[9];
 84	unsigned char fwidth[6];          // width/4
 85	unsigned char fright[6];          // width/4
 86} frame_t;
 87
 88
 89
 90// formant data used by wavegen
 91typedef struct {
 92	int freq;     // Hz<<16
 93	int height;   // height<<15
 94	int left;     // Hz<<16
 95	int right;    // Hz<<16
 96	DOUBLEX freq1; // floating point versions of the above
 97	DOUBLEX height1;
 98	DOUBLEX left1;
 99	DOUBLEX right1;
100	DOUBLEX freq_inc;    // increment by this every 64 samples
101	DOUBLEX height_inc;
102	DOUBLEX left_inc;
103	DOUBLEX right_inc;
104}  wavegen_peaks_t;
105
106typedef struct {
107	double a;
108	double b;
109	double c;
110	double x1;
111	double x2;
112}  RESONATOR;
113
114
115typedef struct {
116   short length;
117   unsigned char  n_frames;
118   unsigned char  flags;
119   frame_t  frame[N_SEQ_FRAMES];     // max. frames in a spectrum sequence
120} SPECT_SEQ;
121
122typedef struct {
123	short length;
124	short frflags;
125	frame_t *frame;
126} frameref_t;
127
128
129typedef struct {
130	PHONEME_TAB *ph;
131	unsigned char env;    // pitch envelope number
132	unsigned char tone;
133	unsigned char type;
134	unsigned char prepause;
135	unsigned char amp;
136	unsigned char tone_ph;   // tone phoneme to use with this vowel
137	unsigned char newword;   // bit 0=start of word, bit 1=end of clause, bit 2=start of sentence
138	unsigned char synthflags;
139	short length;  // length_mod
140	short pitch1;  // pitch, 0-4095 within the Voice's pitch range
141	short pitch2;
142	unsigned short sourceix;  // ix into the original source text string, only set at the start of a word
143} PHONEME_LIST;
144
145
146typedef struct {
147	int name;
148	int length;
149	char *data;
150	char *filename;
151} SOUND_ICON;
152
153typedef struct {
154	int  name;
155	unsigned int  next_phoneme;
156	int  mbr_name;
157	int  mbr_name2;
158	int  percent;         // percentage length of first component
159	int  control;
160} MBROLA_TAB;
161
162
163// phoneme table
164extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB];
165
166// list of phonemes in a clause
167extern int n_phoneme_list;
168extern PHONEME_LIST phoneme_list[N_PHONEME_LIST];
169extern unsigned int embedded_list[];
170
171extern unsigned char env_fall[128];
172extern unsigned char env_rise[128];
173extern unsigned char env_frise[128];
174
175#define MAX_PITCH_VALUE  101
176extern unsigned char pitch_adjust_tab[MAX_PITCH_VALUE+1];
177
178// queue of commands for wavegen
179#define WCMD_AMPLITUDE 1
180#define WCMD_PITCH	2
181#define WCMD_SPECT	3
182#define WCMD_SPECT2	4
183#define WCMD_PAUSE	5
184#define WCMD_WAVE    6
185#define WCMD_WAVE2   7
186#define WCMD_MARKER	8
187#define WCMD_VOICE   9
188#define WCMD_EMBEDDED 10
189
190
191#define N_WCMDQ   160
192#define MIN_WCMDQ  22   // need this many free entries before adding new phoneme
193
194extern long wcmdq[N_WCMDQ][4];
195extern int wcmdq_head;
196extern int wcmdq_tail;
197
198// from Wavegen file
199int  WcmdqFree();
200void WcmdqStop();
201int  WcmdqUsed();
202void WcmdqInc();
203int  WavegenOpenSound();
204int  WavegenCloseSound();
205int  WavegenInitSound();
206void WavegenInit(int rate, int wavemult_fact);
207float polint(float xa[],float ya[],int n,float x);
208int WavegenFill(int fill_zeros);
209void MarkerEvent(int type, unsigned int char_position, int value, unsigned char *out_ptr);
210
211
212extern unsigned char *wavefile_data;
213extern int samplerate;
214extern int samplerate_native;
215
216extern int wavefile_ix;
217extern int wavefile_amp;
218extern int wavefile_ix2;
219extern int wavefile_amp2;
220extern int vowel_transition[4];
221extern int vowel_transition0, vowel_transition1;
222
223extern char mbrola_name[20];
224
225// from synthdata file
226unsigned int LookupSound(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int *match_level, int control);
227frameref_t *LookupSpect(PHONEME_TAB *ph1, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph, int which, int *match_level, int *n_frames, PHONEME_LIST *plist);
228
229unsigned char *LookupEnvelope(int ix);
230int LoadPhData();
231
232void SynthesizeInit(void);
233int  Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume);
234void MakeWave2(PHONEME_LIST *p, int n_ph);
235int  SynthOnTimer(void);
236int  SpeakNextClause(FILE *f_text, const void *text_in, int control);
237int  SynthStatus(void);
238void SetSpeed(int control);
239void SetEmbedded(int control, int value);
240void SelectPhonemeTable(int number);
241int  SelectPhonemeTableName(const char *name);
242
243
244extern unsigned char *envelope_data[18];
245extern int formant_rate[];         // max rate of change of each formant
246extern int speed_factor1;
247extern int speed_factor2;
248extern int speed_min_sample_len;
249
250extern long count_samples;
251extern int outbuf_size;
252extern unsigned char *out_ptr;
253extern unsigned char *out_start;
254extern unsigned char *out_end;
255extern int event_list_ix;
256extern espeak_EVENT *event_list;
257extern t_espeak_callback* synth_callback;
258extern int option_log_frames;
259extern const char *version_string;
260extern const int version_phdata;
261
262#define N_SOUNDICON_TAB  80   // total entries in soundicon_tab
263#define N_SOUNDICON_SLOTS 4    // number of slots reserved for dynamic loading of audio files
264extern int n_soundicon_tab;
265extern SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];
266
267espeak_ERROR SetVoiceByName(const char *name);
268espeak_ERROR SetVoiceByProperties(espeak_VOICE *voice_selector);
269espeak_ERROR LoadMbrolaTable(const char *mbrola_voice, const char *phtrans, int srate);
270void SetParameter(int parameter, int value, int relative);
271void MbrolaTranslate(PHONEME_LIST *plist, int n_phonemes, FILE *f_mbrola);
272int MbrolaSynth(char *p_mbrola);
273int DoSample(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int length_mod, int amp);
274int DoSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph,
275		int which, PHONEME_LIST *plist, int modulation);
276int PauseLength(int pause, int control);
277int LookupPhonemeTable(const char *name);
278
279void InitBreath(void);