/native/external/espeak/src/synthesize.h
C++ Header | 279 lines | 206 code | 47 blank | 26 comment | 0 complexity | 95fa4216af047146c1cc6e6e3cc2ca7c MD5 | raw file
1/*************************************************************************** 2 * Copyright (C) 2005 to 2007 by Jonathan Duddington * 3 * email: jonsd@users.sourceforge.net * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 3 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write see: * 17 * <http://www.gnu.org/licenses/>. * 18 ***************************************************************************/ 19 20 21#define N_PHONEME_LIST 1000 // enough for source[N_TR_SOURCE] full of text, else it will truncate 22 23#define MAX_HARMONIC 400 // 400 * 50Hz = 20 kHz, more than enough 24#define N_SEQ_FRAMES 25 // max frames in a spectrum sequence (real max is ablut 8) 25 26#define PITCHfall 0 27#define PITCHrise 1 28 29// flags set for frames within a spectrum sequence 30#define FRFLAG_VOWEL_CENTRE 0x02 // centre point of vowel 31#define FRFLAG_LEN_MOD 0x04 // reduce effect of length adjustment 32#define FRFLAG_BREAK_LF 0x08 // but keep f3 upwards 33#define FRFLAG_BREAK 0x10 // don't merge with next frame 34#define FRFLAG_BREAK_2 0x18 // FRFLAG_BREAK_LF or FRFLAG_BREAK 35#define FRFLAG_FORMANT_RATE 0x20 // Flag5 allow increased rate of change of formant freq 36#define FRFLAG_MODULATE 0x40 // Flag6 modulate amplitude of some cycles to give trill 37#define FRFLAG_DEFER_WAV 0x80 // Flag7 defer mixing WAV until the next frame 38#define FRFLAG_COPIED 0x8000 // This frame has been copied into temporary rw memory 39 40#define SFLAG_SEQCONTINUE 0x01 // a liquid or nasal after a vowel, but not followed by a vowel 41#define SFLAG_EMBEDDED 0x02 // there are embedded commands before this phoneme 42#define SFLAG_SYLLABLE 0x04 // vowel or syllabic consonant 43#define SFLAG_LENGTHEN 0x08 // lengthen symbol : included after this phoneme 44#define SFLAG_DICTIONARY 0x10 // the pronunciation of this word was listed in the xx_list dictionary 45#define SFLAG_SWITCHED_LANG 0x20 // this word uses phonemes from a different language 46#define SFLAG_PROMOTE_STRESS 0x40 // this unstressed word can be promoted to stressed 47 48// embedded command numbers 49#define EMBED_P 1 // pitch 50#define EMBED_S 2 // speed (used in setlengths) 51#define EMBED_A 3 // amplitude/volume 52#define EMBED_R 4 // pitch range/expression 53#define EMBED_H 5 // echo/reverberation 54#define EMBED_T 6 // different tone for announcing punctuation 55#define EMBED_I 7 // sound icon 56#define EMBED_S2 8 // speed (used in synthesize) 57#define EMBED_Y 9 // say-as commands 58#define EMBED_M 10 // mark name 59#define EMBED_U 11 // audio uri 60#define EMBED_B 12 // break 61#define EMBED_F 13 // emphasis 62 63#define N_EMBEDDED_VALUES 14 64extern int embedded_value[N_EMBEDDED_VALUES]; 65extern int embedded_default[N_EMBEDDED_VALUES]; 66 67 68#define N_PEAKS 9 69#define N_MARKERS 8 70 71typedef struct { 72 short pkfreq; 73 short pkheight; 74 short pkwidth; 75 short pkright; 76} peak_t; 77 78typedef struct { 79 short frflags; 80 unsigned char length; 81 unsigned char rms; 82 short ffreq[9]; 83 unsigned char fheight[9]; 84 unsigned char fwidth[6]; // width/4 85 unsigned char fright[6]; // width/4 86} frame_t; 87 88 89 90// formant data used by wavegen 91typedef struct { 92 int freq; // Hz<<16 93 int height; // height<<15 94 int left; // Hz<<16 95 int right; // Hz<<16 96 DOUBLEX freq1; // floating point versions of the above 97 DOUBLEX height1; 98 DOUBLEX left1; 99 DOUBLEX right1; 100 DOUBLEX freq_inc; // increment by this every 64 samples 101 DOUBLEX height_inc; 102 DOUBLEX left_inc; 103 DOUBLEX right_inc; 104} wavegen_peaks_t; 105 106typedef struct { 107 double a; 108 double b; 109 double c; 110 double x1; 111 double x2; 112} RESONATOR; 113 114 115typedef struct { 116 short length; 117 unsigned char n_frames; 118 unsigned char flags; 119 frame_t frame[N_SEQ_FRAMES]; // max. frames in a spectrum sequence 120} SPECT_SEQ; 121 122typedef struct { 123 short length; 124 short frflags; 125 frame_t *frame; 126} frameref_t; 127 128 129typedef struct { 130 PHONEME_TAB *ph; 131 unsigned char env; // pitch envelope number 132 unsigned char tone; 133 unsigned char type; 134 unsigned char prepause; 135 unsigned char amp; 136 unsigned char tone_ph; // tone phoneme to use with this vowel 137 unsigned char newword; // bit 0=start of word, bit 1=end of clause, bit 2=start of sentence 138 unsigned char synthflags; 139 short length; // length_mod 140 short pitch1; // pitch, 0-4095 within the Voice's pitch range 141 short pitch2; 142 unsigned short sourceix; // ix into the original source text string, only set at the start of a word 143} PHONEME_LIST; 144 145 146typedef struct { 147 int name; 148 int length; 149 char *data; 150 char *filename; 151} SOUND_ICON; 152 153typedef struct { 154 int name; 155 unsigned int next_phoneme; 156 int mbr_name; 157 int mbr_name2; 158 int percent; // percentage length of first component 159 int control; 160} MBROLA_TAB; 161 162 163// phoneme table 164extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; 165 166// list of phonemes in a clause 167extern int n_phoneme_list; 168extern PHONEME_LIST phoneme_list[N_PHONEME_LIST]; 169extern unsigned int embedded_list[]; 170 171extern unsigned char env_fall[128]; 172extern unsigned char env_rise[128]; 173extern unsigned char env_frise[128]; 174 175#define MAX_PITCH_VALUE 101 176extern unsigned char pitch_adjust_tab[MAX_PITCH_VALUE+1]; 177 178// queue of commands for wavegen 179#define WCMD_AMPLITUDE 1 180#define WCMD_PITCH 2 181#define WCMD_SPECT 3 182#define WCMD_SPECT2 4 183#define WCMD_PAUSE 5 184#define WCMD_WAVE 6 185#define WCMD_WAVE2 7 186#define WCMD_MARKER 8 187#define WCMD_VOICE 9 188#define WCMD_EMBEDDED 10 189 190 191#define N_WCMDQ 160 192#define MIN_WCMDQ 22 // need this many free entries before adding new phoneme 193 194extern long wcmdq[N_WCMDQ][4]; 195extern int wcmdq_head; 196extern int wcmdq_tail; 197 198// from Wavegen file 199int WcmdqFree(); 200void WcmdqStop(); 201int WcmdqUsed(); 202void WcmdqInc(); 203int WavegenOpenSound(); 204int WavegenCloseSound(); 205int WavegenInitSound(); 206void WavegenInit(int rate, int wavemult_fact); 207float polint(float xa[],float ya[],int n,float x); 208int WavegenFill(int fill_zeros); 209void MarkerEvent(int type, unsigned int char_position, int value, unsigned char *out_ptr); 210 211 212extern unsigned char *wavefile_data; 213extern int samplerate; 214extern int samplerate_native; 215 216extern int wavefile_ix; 217extern int wavefile_amp; 218extern int wavefile_ix2; 219extern int wavefile_amp2; 220extern int vowel_transition[4]; 221extern int vowel_transition0, vowel_transition1; 222 223extern char mbrola_name[20]; 224 225// from synthdata file 226unsigned int LookupSound(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int *match_level, int control); 227frameref_t *LookupSpect(PHONEME_TAB *ph1, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph, int which, int *match_level, int *n_frames, PHONEME_LIST *plist); 228 229unsigned char *LookupEnvelope(int ix); 230int LoadPhData(); 231 232void SynthesizeInit(void); 233int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume); 234void MakeWave2(PHONEME_LIST *p, int n_ph); 235int SynthOnTimer(void); 236int SpeakNextClause(FILE *f_text, const void *text_in, int control); 237int SynthStatus(void); 238void SetSpeed(int control); 239void SetEmbedded(int control, int value); 240void SelectPhonemeTable(int number); 241int SelectPhonemeTableName(const char *name); 242 243 244extern unsigned char *envelope_data[18]; 245extern int formant_rate[]; // max rate of change of each formant 246extern int speed_factor1; 247extern int speed_factor2; 248extern int speed_min_sample_len; 249 250extern long count_samples; 251extern int outbuf_size; 252extern unsigned char *out_ptr; 253extern unsigned char *out_start; 254extern unsigned char *out_end; 255extern int event_list_ix; 256extern espeak_EVENT *event_list; 257extern t_espeak_callback* synth_callback; 258extern int option_log_frames; 259extern const char *version_string; 260extern const int version_phdata; 261 262#define N_SOUNDICON_TAB 80 // total entries in soundicon_tab 263#define N_SOUNDICON_SLOTS 4 // number of slots reserved for dynamic loading of audio files 264extern int n_soundicon_tab; 265extern SOUND_ICON soundicon_tab[N_SOUNDICON_TAB]; 266 267espeak_ERROR SetVoiceByName(const char *name); 268espeak_ERROR SetVoiceByProperties(espeak_VOICE *voice_selector); 269espeak_ERROR LoadMbrolaTable(const char *mbrola_voice, const char *phtrans, int srate); 270void SetParameter(int parameter, int value, int relative); 271void MbrolaTranslate(PHONEME_LIST *plist, int n_phonemes, FILE *f_mbrola); 272int MbrolaSynth(char *p_mbrola); 273int DoSample(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int length_mod, int amp); 274int DoSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph, 275 int which, PHONEME_LIST *plist, int modulation); 276int PauseLength(int pause, int control); 277int LookupPhonemeTable(const char *name); 278 279void InitBreath(void);