/native/external/espeak/src/wavegen.cpp
C++ | 1910 lines | 1420 code | 327 blank | 163 comment | 239 complexity | 5f54a1ae98eb91bfb24278e383b1ce88 MD5 | raw file
Large files files are truncated, but you can click here to view the full file
1/*************************************************************************** 2 * Copyright (C) 2005 to 2007 by Jonathan Duddington * 3 * email: jonsd@users.sourceforge.net * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 3 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, see: * 17 * <http://www.gnu.org/licenses/>. * 18 ***************************************************************************/ 19 20#include "StdAfx.h" 21 22// this version keeps wavemult window as a constant fraction 23// of the cycle length - but that spreads out the HF peaks too much 24 25#include <stdio.h> 26#include <string.h> 27#include <stdlib.h> 28#include <math.h> 29 30 31#include "speak_lib.h" 32#include "speech.h" 33#include "phoneme.h" 34#include "synthesize.h" 35#include "voice.h" 36 37#ifdef USE_PORTAUDIO 38#include "portaudio.h" 39#undef USE_PORTAUDIO 40// determine portaudio version by looking for a #define which is not in V18 41#ifdef paNeverDropInput 42#define USE_PORTAUDIO 19 43#else 44#define USE_PORTAUDIO 18 45#endif 46#endif 47 48#define N_SINTAB 2048 49#include "sintab.h" 50 51 52#define PI 3.1415927 53#define PI2 6.283185307 54#define STEPSIZE 64 // 2.9mS at 22 kHz sample rate 55#define N_WAV_BUF 10 56 57static voice_t *wvoice; 58 59FILE *f_log = NULL; 60int option_waveout = 0; 61int option_harmonic1 = 11; // 10 62int option_log_frames = 0; 63static int flutter_amp = 64; 64 65static int general_amplitude = 60; 66static int consonant_amp = 26; // 24 67 68int embedded_value[N_EMBEDDED_VALUES]; 69 70static int PHASE_INC_FACTOR; 71int samplerate = 0; // this is set by Wavegeninit() 72int samplerate_native=0; 73extern int option_device_number; 74extern int option_quiet; 75 76static wavegen_peaks_t peaks[N_PEAKS]; 77static int peak_harmonic[N_PEAKS]; 78static int peak_height[N_PEAKS]; 79 80#define N_ECHO_BUF 5500 // max of 250mS at 22050 Hz 81static int echo_head; 82static int echo_tail; 83static int echo_length = 0; // period (in sample\) to ensure completion of echo at the end of speech, set in WavegenSetEcho() 84static int echo_amp = 0; 85static short echo_buf[N_ECHO_BUF]; 86 87static int voicing; 88RESONATOR rbreath[N_PEAKS]; 89 90static int harm_sqrt_n = 0; 91 92 93#define N_LOWHARM 30 94static int harm_inc[N_LOWHARM]; // only for these harmonics do we interpolate amplitude between steps 95static int *harmspect; 96static int hswitch=0; 97static int hspect[2][MAX_HARMONIC]; // 2 copies, we interpolate between then 98static int max_hval=0; 99 100static int nsamples=0; // number to do 101static int amplitude = 32; 102static int amplitude_v = 0; 103static int modulation_type = 0; 104static int glottal_flag = 0; 105static int glottal_reduce = 0; 106 107static unsigned char *mix_wavefile = NULL; // wave file to be added to synthesis 108static int n_mix_wavefile = 0; // length in bytes 109static int mix_wave_scale = 0; // 0=2 byte samples 110static int mix_wave_amp = 32; 111static int mix_wavefile_ix = 0; 112 113static int pitch; // pitch Hz*256 114static int pitch_ix; // index into pitch envelope (*256) 115static int pitch_inc; // increment to pitch_ix 116static unsigned char *pitch_env=NULL; 117static int pitch_base; // Hz*256 low, before modified by envelope 118static int pitch_range; // Hz*256 range of envelope 119static int amp_ix; 120static int amp_inc; 121static unsigned char *amplitude_env = NULL; 122 123static int samplecount=0; // number done 124static int samplecount_start=0; // count at start of this segment 125static int end_wave=0; // continue to end of wave cycle 126static int wavephase; 127static int phaseinc; 128static int cycle_samples; // number of samples in a cycle at current pitch 129static int cbytes; 130static int hf_factor; 131 132static double minus_pi_t; 133static double two_pi_t; 134 135 136unsigned char *out_ptr; 137unsigned char *out_start; 138unsigned char *out_end; 139int outbuf_size = 0; 140 141// the queue of operations passed to wavegen from sythesize 142long wcmdq[N_WCMDQ][4]; 143int wcmdq_head=0; 144int wcmdq_tail=0; 145 146// pitch,speed, 147int embedded_default[N_EMBEDDED_VALUES] = {0,50,170,100,50, 0,0, 0,170,0,0,0,0,0}; 148static int embedded_max[N_EMBEDDED_VALUES] = {0,0x7fff,400,300,99,99,99, 0,360,0,0,0,0,4}; 149 150#define N_CALLBACK_IX N_WAV_BUF-2 // adjust this delay to match display with the currently spoken word 151int current_source_index=0; 152 153extern FILE *f_wave; 154 155#if (USE_PORTAUDIO == 18) 156static PortAudioStream *pa_stream=NULL; 157#endif 158#if (USE_PORTAUDIO == 19) 159static PaStream *pa_stream=NULL; 160#endif 161 162/* default pitch envelope, a steady fall */ 163#define ENV_LEN 128 164 165 166/* 167unsigned char Pitch_env0[ENV_LEN] = { 168 255,253,251,249,247,245,243,241,239,237,235,233,231,229,227,225, 169 223,221,219,217,215,213,211,209,207,205,203,201,199,197,195,193, 170 191,189,187,185,183,181,179,177,175,173,171,169,167,165,163,161, 171 159,157,155,153,151,149,147,145,143,141,139,137,135,133,131,129, 172 127,125,123,121,119,117,115,113,111,109,107,105,103,101, 99, 97, 173 95, 93, 91, 89, 87, 85, 83, 81, 79, 77, 75, 73, 71, 69, 67, 65, 174 63, 61, 59, 57, 55, 53, 51, 49, 47, 45, 43, 41, 39, 37, 35, 33, 175 31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1 176}; 177*/ 178 179/* 180unsigned char Pitch_long[ENV_LEN] = { 181 254,249,250,251,252,253,254,254, 255,255,255,255,254,254,253,252, 182 251,250,249,247,244,242,238,234, 230,225,221,217,213,209,206,203, 183 199,195,191,187,183,179,175,172, 168,165,162,159,156,153,150,148, 184 145,143,140,138,136,134,132,130, 128,126,123,120,117,114,111,107, 185 104,100,96,91, 86,82,77,73, 70,66,63,60, 58,55,53,51, 186 49,47,46,45, 43,42,40,38, 36,34,31,28, 26,24,22,20, 187 18,16,14,12, 11,10,9,8, 8,8,8,8, 9,8,8,8, 188 8,8,7,7, 6,6,6,5, 4,4,3,3, 2,1,1,0 189}; 190*/ 191 192// 1st index=roughness 193// 2nd index=modulation_type 194// value: bits 0-3 amplitude (16ths), bits 4-7 every n cycles 195#define N_ROUGHNESS 8 196static unsigned char modulation_tab[N_ROUGHNESS][8] = { 197 {0, 0x00, 0x00, 0x00, 0, 0x46, 0xf2, 0x29}, 198 {0, 0x2f, 0x00, 0x2f, 0, 0x45, 0xf2, 0x29}, 199 {0, 0x2f, 0x00, 0x2e, 0, 0x45, 0xf2, 0x28}, 200 {0, 0x2e, 0x00, 0x2d, 0, 0x34, 0xf2, 0x28}, 201 {0, 0x2d, 0x2d, 0x2c, 0, 0x34, 0xf2, 0x28}, 202 {0, 0x2b, 0x2b, 0x2b, 0, 0x34, 0xf2, 0x28}, 203 {0, 0x2a, 0x2a, 0x2a, 0, 0x34, 0xf2, 0x28}, 204 {0, 0x29, 0x29, 0x29, 0, 0x34, 0xf2, 0x28}, 205}; 206 207// Flutter table, to add natural variations to the pitch 208#define N_FLUTTER 0x170 209static int Flutter_inc; 210static const unsigned char Flutter_tab[N_FLUTTER] = { 211 0x80, 0x9b, 0xb5, 0xcb, 0xdc, 0xe8, 0xed, 0xec, 212 0xe6, 0xdc, 0xce, 0xbf, 0xb0, 0xa3, 0x98, 0x90, 213 0x8c, 0x8b, 0x8c, 0x8f, 0x92, 0x94, 0x95, 0x92, 214 0x8c, 0x83, 0x78, 0x69, 0x59, 0x49, 0x3c, 0x31, 215 0x2a, 0x29, 0x2d, 0x36, 0x44, 0x56, 0x69, 0x7d, 216 0x8f, 0x9f, 0xaa, 0xb1, 0xb2, 0xad, 0xa4, 0x96, 217 0x87, 0x78, 0x69, 0x5c, 0x53, 0x4f, 0x4f, 0x55, 218 0x5e, 0x6b, 0x7a, 0x88, 0x96, 0xa2, 0xab, 0xb0, 219 220 0xb1, 0xae, 0xa8, 0xa0, 0x98, 0x91, 0x8b, 0x88, 221 0x89, 0x8d, 0x94, 0x9d, 0xa8, 0xb2, 0xbb, 0xc0, 222 0xc1, 0xbd, 0xb4, 0xa5, 0x92, 0x7c, 0x63, 0x4a, 223 0x32, 0x1e, 0x0e, 0x05, 0x02, 0x05, 0x0f, 0x1e, 224 0x30, 0x44, 0x59, 0x6d, 0x7f, 0x8c, 0x96, 0x9c, 225 0x9f, 0x9f, 0x9d, 0x9b, 0x99, 0x99, 0x9c, 0xa1, 226 0xa9, 0xb3, 0xbf, 0xca, 0xd5, 0xdc, 0xe0, 0xde, 227 0xd8, 0xcc, 0xbb, 0xa6, 0x8f, 0x77, 0x60, 0x4b, 228 229 0x3a, 0x2e, 0x28, 0x29, 0x2f, 0x3a, 0x48, 0x59, 230 0x6a, 0x7a, 0x86, 0x90, 0x94, 0x95, 0x91, 0x89, 231 0x80, 0x75, 0x6b, 0x62, 0x5c, 0x5a, 0x5c, 0x61, 232 0x69, 0x74, 0x80, 0x8a, 0x94, 0x9a, 0x9e, 0x9d, 233 0x98, 0x90, 0x86, 0x7c, 0x71, 0x68, 0x62, 0x60, 234 0x63, 0x6b, 0x78, 0x88, 0x9b, 0xaf, 0xc2, 0xd2, 235 0xdf, 0xe6, 0xe7, 0xe2, 0xd7, 0xc6, 0xb2, 0x9c, 236 0x84, 0x6f, 0x5b, 0x4b, 0x40, 0x39, 0x37, 0x38, 237 238 0x3d, 0x43, 0x4a, 0x50, 0x54, 0x56, 0x55, 0x52, 239 0x4d, 0x48, 0x42, 0x3f, 0x3e, 0x41, 0x49, 0x56, 240 0x67, 0x7c, 0x93, 0xab, 0xc3, 0xd9, 0xea, 0xf6, 241 0xfc, 0xfb, 0xf4, 0xe7, 0xd5, 0xc0, 0xaa, 0x94, 242 0x80, 0x71, 0x64, 0x5d, 0x5a, 0x5c, 0x61, 0x68, 243 0x70, 0x77, 0x7d, 0x7f, 0x7f, 0x7b, 0x74, 0x6b, 244 0x61, 0x57, 0x4e, 0x48, 0x46, 0x48, 0x4e, 0x59, 245 0x66, 0x75, 0x84, 0x93, 0x9f, 0xa7, 0xab, 0xaa, 246 247 0xa4, 0x99, 0x8b, 0x7b, 0x6a, 0x5b, 0x4e, 0x46, 248 0x43, 0x45, 0x4d, 0x5a, 0x6b, 0x7f, 0x92, 0xa6, 249 0xb8, 0xc5, 0xcf, 0xd3, 0xd2, 0xcd, 0xc4, 0xb9, 250 0xad, 0xa1, 0x96, 0x8e, 0x89, 0x87, 0x87, 0x8a, 251 0x8d, 0x91, 0x92, 0x91, 0x8c, 0x84, 0x78, 0x68, 252 0x55, 0x41, 0x2e, 0x1c, 0x0e, 0x05, 0x01, 0x05, 253 0x0f, 0x1f, 0x34, 0x4d, 0x68, 0x81, 0x9a, 0xb0, 254 0xc1, 0xcd, 0xd3, 0xd3, 0xd0, 0xc8, 0xbf, 0xb5, 255 256 0xab, 0xa4, 0x9f, 0x9c, 0x9d, 0xa0, 0xa5, 0xaa, 257 0xae, 0xb1, 0xb0, 0xab, 0xa3, 0x96, 0x87, 0x76, 258 0x63, 0x51, 0x42, 0x36, 0x2f, 0x2d, 0x31, 0x3a, 259 0x48, 0x59, 0x6b, 0x7e, 0x8e, 0x9c, 0xa6, 0xaa, 260 0xa9, 0xa3, 0x98, 0x8a, 0x7b, 0x6c, 0x5d, 0x52, 261 0x4a, 0x48, 0x4a, 0x50, 0x5a, 0x67, 0x75, 0x82 262}; 263 264// waveform shape table for HF peaks, formants 6,7,8 265#define N_WAVEMULT 512 266static int wavemult_offset=0; 267static int wavemult_max=0; 268 269// the presets are for 22050 Hz sample rate. 270// A different rate will need to recalculate the presets in WavegenInit() 271static unsigned char wavemult[N_WAVEMULT] = { 272 0, 0, 0, 2, 3, 5, 8, 11, 14, 18, 22, 27, 32, 37, 43, 49, 273 55, 62, 69, 76, 83, 90, 98,105,113,121,128,136,144,152,159,166, 274 174,181,188,194,201,207,213,218,224,228,233,237,240,244,246,249, 275 251,252,253,253,253,253,252,251,249,246,244,240,237,233,228,224, 276 218,213,207,201,194,188,181,174,166,159,152,144,136,128,121,113, 277 105, 98, 90, 83, 76, 69, 62, 55, 49, 43, 37, 32, 27, 22, 18, 14, 278 11, 8, 5, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 279 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 283 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 284 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 285 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 286 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 287 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 291 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 294 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 296 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 297 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 299 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 300 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 301 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 302 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 303 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 304 305 306// set from y = pow(2,x) * 128, x=-1 to 1 307unsigned char pitch_adjust_tab[MAX_PITCH_VALUE+1] = { 308 64, 65, 66, 67, 68, 69, 70, 71, 309 72, 73, 74, 75, 76, 77, 78, 79, 310 80, 81, 82, 83, 84, 86, 87, 88, 311 89, 91, 92, 93, 94, 96, 97, 98, 312 100,101,103,104,105,107,108,110, 313 111,113,115,116,118,119,121,123, 314 124,126,128,130,132,133,135,137, 315 139,141,143,145,147,149,151,153, 316 155,158,160,162,164,167,169,171, 317 174,176,179,181,184,186,189,191, 318 194,197,199,202,205,208,211,214, 319 217,220,223,226,229,232,236,239, 320 242,246,249,252, 254,255 }; 321 322int WavegenFill(int fill_zeros); 323 324 325#ifdef LOG_FRAMES 326static void LogMarker(int type, int value) 327{//======================================= 328 if(option_log_frames == 0) 329 return; 330 331 if((type == espeakEVENT_PHONEME) || (type == espeakEVENT_SENTENCE)) 332 { 333 f_log=fopen("log-espeakedit","a"); 334 if(f_log) 335 { 336 if(type == espeakEVENT_PHONEME) 337 fprintf(f_log,"Phoneme [%s]\n",WordToString(value)); 338 else 339 fprintf(f_log,"\n"); 340 fclose(f_log); 341 f_log = NULL; 342 } 343 } 344} 345#endif 346 347void WcmdqStop() 348{//============= 349 wcmdq_head = 0; 350 wcmdq_tail = 0; 351#ifdef USE_PORTAUDIO 352 Pa_AbortStream(pa_stream); 353#endif 354} 355 356 357int WcmdqFree() 358{//============ 359 int i; 360 i = wcmdq_head - wcmdq_tail; 361 if(i <= 0) i += N_WCMDQ; 362 return(i); 363} 364 365int WcmdqUsed() 366{//============ 367 return(N_WCMDQ - WcmdqFree()); 368} 369 370 371void WcmdqInc() 372{//============ 373 wcmdq_tail++; 374 if(wcmdq_tail >= N_WCMDQ) wcmdq_tail=0; 375} 376 377static void WcmdqIncHead() 378{//======================= 379 wcmdq_head++; 380 if(wcmdq_head >= N_WCMDQ) wcmdq_head=0; 381} 382 383 384 385// data points from which to make the presets for pk_shape1 and pk_shape2 386#define PEAKSHAPEW 256 387static const float pk_shape_x[2][8] = { 388 {0,-0.6, 0.0, 0.6, 1.4, 2.5, 4.5, 5.5}, 389 {0,-0.6, 0.0, 0.6, 1.4, 2.0, 4.5, 5.5 }}; 390static const float pk_shape_y[2][8] = { 391 {0, 67, 81, 67, 31, 14, 0, -6} , 392 {0, 77, 81, 77, 31, 7, 0, -6 }}; 393 394unsigned char pk_shape1[PEAKSHAPEW+1] = { 395 255,254,254,254,254,254,253,253,252,251,251,250,249,248,247,246, 396 245,244,242,241,239,238,236,234,233,231,229,227,225,223,220,218, 397 216,213,211,209,207,205,203,201,199,197,195,193,191,189,187,185, 398 183,180,178,176,173,171,169,166,164,161,159,156,154,151,148,146, 399 143,140,138,135,132,129,126,123,120,118,115,112,108,105,102, 99, 400 96, 95, 93, 91, 90, 88, 86, 85, 83, 82, 80, 79, 77, 76, 74, 73, 401 72, 70, 69, 68, 67, 66, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 402 55, 54, 53, 52, 52, 51, 50, 50, 49, 48, 48, 47, 47, 46, 46, 46, 403 45, 45, 45, 44, 44, 44, 44, 44, 44, 44, 43, 43, 43, 43, 44, 43, 404 42, 42, 41, 40, 40, 39, 38, 38, 37, 36, 36, 35, 35, 34, 33, 33, 405 32, 32, 31, 30, 30, 29, 29, 28, 28, 27, 26, 26, 25, 25, 24, 24, 406 23, 23, 22, 22, 21, 21, 20, 20, 19, 19, 18, 18, 18, 17, 17, 16, 407 16, 15, 15, 15, 14, 14, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10, 408 10, 9, 9, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 5, 5, 409 5, 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 410 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 411 0 }; 412 413static unsigned char pk_shape2[PEAKSHAPEW+1] = { 414 255,254,254,254,254,254,254,254,254,254,253,253,253,253,252,252, 415 252,251,251,251,250,250,249,249,248,248,247,247,246,245,245,244, 416 243,243,242,241,239,237,235,233,231,229,227,225,223,221,218,216, 417 213,211,208,205,203,200,197,194,191,187,184,181,178,174,171,167, 418 163,160,156,152,148,144,140,136,132,127,123,119,114,110,105,100, 419 96, 94, 91, 88, 86, 83, 81, 78, 76, 74, 71, 69, 66, 64, 62, 60, 420 57, 55, 53, 51, 49, 47, 44, 42, 40, 38, 36, 34, 32, 30, 29, 27, 421 25, 23, 21, 19, 18, 16, 14, 12, 11, 9, 7, 6, 4, 3, 1, 0, 422 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 423 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 424 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 425 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 426 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 427 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 428 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 429 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 430 0 }; 431 432static unsigned char *pk_shape; 433 434 435static void WavegenInitPkData(int which) 436{//===================================== 437// this is only needed to set up the presets for pk_shape1 and pk_shape2 438// These have already been pre-calculated and preset 439#ifdef deleted 440 int ix; 441 int p; 442 float x; 443 float y[PEAKSHAPEW]; 444 float maxy=0; 445 446 if(which==0) 447 pk_shape = pk_shape1; 448 else 449 pk_shape = pk_shape2; 450 451 p = 0; 452 for(ix=0;ix<PEAKSHAPEW;ix++) 453 { 454 x = (4.5*ix)/PEAKSHAPEW; 455 if(x >= pk_shape_x[which][p+3]) p++; 456 y[ix] = polint(&pk_shape_x[which][p],&pk_shape_y[which][p],3,x); 457 if(y[ix] > maxy) maxy = y[ix]; 458 } 459 for(ix=0;ix<PEAKSHAPEW;ix++) 460 { 461 p = (int)(y[ix]*255/maxy); 462 pk_shape[ix] = (p >= 0) ? p : 0; 463 } 464 pk_shape[PEAKSHAPEW]=0; 465#endif 466} // end of WavegenInitPkData 467 468 469 470#ifdef USE_PORTAUDIO 471// PortAudio interface 472 473static int userdata[4]; 474static PaError pa_init_err=0; 475static int out_channels=1; 476 477#if USE_PORTAUDIO == 18 478static int WaveCallback(void *inputBuffer, void *outputBuffer, 479 unsigned long framesPerBuffer, PaTimestamp outTime, void *userData ) 480#else 481static int WaveCallback(const void *inputBuffer, void *outputBuffer, 482 long unsigned int framesPerBuffer, const PaStreamCallbackTimeInfo *outTime, 483 PaStreamCallbackFlags flags, void *userData ) 484#endif 485{ 486 int ix; 487 int result; 488 unsigned char *p; 489 490 out_ptr = out_start = (unsigned char *)outputBuffer; 491 out_end = out_ptr + framesPerBuffer*2; 492 493#ifdef LIBRARY 494 event_list_ix = 0; 495#endif 496 497 result = WavegenFill(1); 498 499#ifdef LIBRARY 500 count_samples += framesPerBuffer; 501 if(synth_callback) 502 { 503 // synchronous-playback mode, allow the calling process to abort the speech 504 event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list 505 event_list[event_list_ix].user_data = 0; 506 507 if(synth_callback(NULL,0,event_list) == 1) 508 { 509 SpeakNextClause(NULL,NULL,2); // stop speaking 510 result = 1; 511 } 512 } 513#endif 514 515#ifdef ARCH_BIG 516 { 517 // swap the order of bytes in each sound sample in the portaudio buffer 518 int c; 519 out_ptr = (unsigned char *)outputBuffer; 520 out_end = out_ptr + framesPerBuffer*2; 521 while(out_ptr < out_end) 522 { 523 c = out_ptr[0]; 524 out_ptr[0] = out_ptr[1]; 525 out_ptr[1] = c; 526 out_ptr += 2; 527 } 528 } 529#endif 530 531 if(out_channels == 2) 532 { 533 // sound output can only do stereo, not mono. Duplicate each sound sample to 534 // produce 2 channels. 535 out_ptr = (unsigned char *)outputBuffer; 536 for(ix=framesPerBuffer-1; ix>=0; ix--) 537 { 538 p = &out_ptr[ix*4]; 539 p[3] = p[1] = out_ptr[ix*2 + 1]; 540 p[2] = p[0] = out_ptr[ix*2]; 541 } 542 } 543 544#if USE_PORTAUDIO == 18 545#ifdef PLATFORM_WINDOWS 546 return(result); 547#endif 548 if(result != 0) 549 { 550 static int end_timer = 0; 551 if(end_timer == 0) 552 end_timer = 4; 553 if(end_timer > 0) 554 { 555 end_timer--; 556 if(end_timer == 0) 557 return(1); 558 } 559 } 560 return(0); 561#else 562 return(result); 563#endif 564 565} // end of WaveCallBack 566 567 568#if USE_PORTAUDIO == 19 569/* This is a fixed version of Pa_OpenDefaultStream() for use if the version in portaudio V19 570 is broken */ 571 572PaError Pa_OpenDefaultStream2( PaStream** stream, 573 int inputChannelCount, 574 int outputChannelCount, 575 PaSampleFormat sampleFormat, 576 double sampleRate, 577 unsigned long framesPerBuffer, 578 PaStreamCallback *streamCallback, 579 void *userData ) 580{ 581 PaError result; 582 PaStreamParameters hostApiOutputParameters; 583 584 if(option_device_number >= 0) 585 hostApiOutputParameters.device = option_device_number; 586 else 587 hostApiOutputParameters.device = Pa_GetDefaultOutputDevice(); 588 589 if( hostApiOutputParameters.device == paNoDevice ) 590 return paDeviceUnavailable; 591 592 hostApiOutputParameters.channelCount = outputChannelCount; 593 hostApiOutputParameters.sampleFormat = sampleFormat; 594 /* defaultHighOutputLatency is used below instead of 595 defaultLowOutputLatency because it is more important for the default 596 stream to work reliably than it is for it to work with the lowest 597 latency. 598 */ 599 hostApiOutputParameters.suggestedLatency = 600 Pa_GetDeviceInfo( hostApiOutputParameters.device )->defaultHighOutputLatency; 601 hostApiOutputParameters.hostApiSpecificStreamInfo = NULL; 602 603 result = Pa_OpenStream( 604 stream, NULL, &hostApiOutputParameters, sampleRate, framesPerBuffer, paNoFlag, streamCallback, userData ); 605 606 return(result); 607} 608#endif 609 610 611int WavegenOpenSound() 612{//=================== 613 PaError err, err2; 614 PaError active; 615 616 if(option_waveout || option_quiet) 617 { 618 // writing to WAV file, not to portaudio 619 return(0); 620 } 621 622#if USE_PORTAUDIO == 18 623 active = Pa_StreamActive(pa_stream); 624#else 625 active = Pa_IsStreamActive(pa_stream); 626#endif 627 628 if(active == 1) 629 return(0); 630 if(active < 0) 631 { 632 out_channels = 1; 633 634#if USE_PORTAUDIO == 18 635 err2 = Pa_OpenDefaultStream(&pa_stream,0,1,paInt16,samplerate,512,N_WAV_BUF,WaveCallback,(void *)userdata); 636 637 if(err2 == paInvalidChannelCount) 638 { 639 // failed to open with mono, try stereo 640 out_channels=2; 641 err2 = Pa_OpenDefaultStream(&pa_stream,0,2,paInt16,samplerate,512,N_WAV_BUF,WaveCallback,(void *)userdata); 642 } 643#else 644 err2 = Pa_OpenDefaultStream2(&pa_stream,0,1,paInt16,(double)samplerate,512,WaveCallback,(void *)userdata); 645 646 if(err2 == paInvalidChannelCount) 647 { 648 // failed to open with mono, try stereo 649 out_channels=2; 650 err2 = Pa_OpenDefaultStream(&pa_stream,0,2,paInt16,(double)samplerate,512,WaveCallback,(void *)userdata); 651 } 652#endif 653 } 654 err = Pa_StartStream(pa_stream); 655 656#if USE_PORTAUDIO == 19 657 if(err == paStreamIsNotStopped) 658 { 659 // not sure why we need this, but PA v19 seems to need it 660 err = Pa_StopStream(pa_stream); 661 err = Pa_StartStream(pa_stream); 662 } 663#endif 664 665 if(err != paNoError) 666 { 667 // exit speak if we can't open the sound device - this is OK if speak is being run for each utterance 668 exit(2); 669 } 670 671 return(0); 672} 673 674 675int WavegenCloseSound() 676{//==================== 677 PaError active; 678 679 // check whether speaking has finished, and close the stream 680// if(pa_stream != NULL) 681 if((pa_stream != NULL) && (WcmdqUsed() == 0)) // TEST, also check that queue is empty 682 { 683#if USE_PORTAUDIO == 18 684 active = Pa_StreamActive(pa_stream); 685#else 686 active = Pa_IsStreamActive(pa_stream); 687#endif 688 if(active == 0) 689 { 690 Pa_CloseStream(pa_stream); 691 pa_stream = NULL; 692 return(1); 693 } 694 } 695 return(0); 696} 697 698 699int WavegenInitSound() 700{//=================== 701 PaError err; 702 703 if(option_quiet) 704 return(0); 705 706 // PortAudio sound output library 707 err = Pa_Initialize(); 708 pa_init_err = err; 709 if(err != paNoError) 710 { 711 fprintf(stderr,"Failed to initialise the PortAudio sound\n"); 712 return(1); 713 } 714 return(0); 715} 716#else 717int WavegenOpenSound() 718{//=================== 719 return(0); 720} 721int WavegenCloseSound() 722{//==================== 723 return(0); 724} 725int WavegenInitSound() 726{//=================== 727 return(0); 728} 729#endif 730 731 732void WavegenInit(int rate, int wavemult_fact) 733{//========================================== 734 int ix; 735 double x; 736 737 if(wavemult_fact == 0) 738 wavemult_fact=60; // default 739 740 samplerate = samplerate_native = rate; 741 PHASE_INC_FACTOR = 0x8000000 / samplerate; // assumes pitch is Hz*32 742 Flutter_inc = (64 * samplerate)/rate; 743 samplecount = 0; 744 nsamples = 0; 745 wavephase = 0x7fffffff; 746 max_hval = 0; 747 748 for(ix=0; ix<N_EMBEDDED_VALUES; ix++) 749 embedded_value[ix] = embedded_default[ix]; 750 751 752 // set up window to generate a spread of harmonics from a 753 // single peak for HF peaks 754 wavemult_max = (samplerate * wavemult_fact)/(256 * 50); 755 if(wavemult_max > N_WAVEMULT) wavemult_max = N_WAVEMULT; 756 757 wavemult_offset = wavemult_max/2; 758 759 if(samplerate != 22050) 760 { 761 // wavemult table has preset values for 22050 Hz, we only need to 762 // recalculate them if we have a different sample rate 763 for(ix=0; ix<wavemult_max; ix++) 764 { 765 x = 127*(1.0 - cos(PI2*ix/wavemult_max)); 766 wavemult[ix] = (int)x; 767 } 768 } 769 770 WavegenInitPkData(1); 771 WavegenInitPkData(0); 772 pk_shape = pk_shape2; // ph_shape2 773 774#ifdef LOG_FRAMES 775remove("log-espeakedit"); 776#endif 777} // end of WavegenInit 778 779 780int GetAmplitude(void) 781{//=================== 782 int amp; 783 784 // normal, none, reduced, moderate, strong 785 static const unsigned char amp_emphasis[5] = {16, 16, 10, 16, 22}; 786 787 amp = (embedded_value[EMBED_A])*60/100; 788 general_amplitude = amp * amp_emphasis[embedded_value[EMBED_F]] / 16; 789 return(general_amplitude); 790} 791 792 793static void WavegenSetEcho(void) 794{//============================= 795 int delay; 796 int amp; 797 798 voicing = wvoice->voicing; 799 delay = wvoice->echo_delay; 800 amp = wvoice->echo_amp; 801 802 if(delay >= N_ECHO_BUF) 803 delay = N_ECHO_BUF-1; 804 if(amp > 100) 805 amp = 100; 806 807 memset(echo_buf,0,sizeof(echo_buf)); 808 echo_tail = 0; 809 810 if(embedded_value[EMBED_H] > 0) 811 { 812 // set echo from an embedded command in the text 813 amp = embedded_value[EMBED_H]; 814 delay = 130; 815 } 816 if(embedded_value[EMBED_T] > 0) 817 { 818 // announcing punctuation 819 amp = embedded_value[EMBED_T] * 8; 820 delay = 60; 821 } 822 823 if(delay == 0) 824 amp = 0; 825 826 echo_head = (delay * samplerate)/1000; 827 echo_length = echo_head; // ensure completion of echo at the end of speech. Use 1 delay period? 828 if(amp == 0) 829 echo_length = 0; 830 if(amp > 20) 831 echo_length = echo_head * 2; // perhaps allow 2 echo periods if the echo is loud. 832 833 // echo_amp units are 1/256ths of the amplitude of the original sound. 834 echo_amp = amp; 835 // compensate (partially) for increase in amplitude due to echo 836 general_amplitude = GetAmplitude(); 837 general_amplitude = ((general_amplitude * (500-amp))/500); 838} // end of WavegenSetEcho 839 840 841 842int PeaksToHarmspect(wavegen_peaks_t *peaks, int pitch, int *htab, int control) 843{//============================================================================ 844// Calculate the amplitude of each harmonics from the formants 845// Only for formants 0 to 5 846 847// control 0=initial call, 1=every 64 cycles 848 849 // pitch and freqs are Hz<<16 850 851 int f; 852 wavegen_peaks_t *p; 853 int fp; // centre freq of peak 854 int fhi; // high freq of peak 855 int h; // harmonic number 856 int pk; 857 int hmax; 858 int hmax_samplerate; // highest harmonic allowed for the samplerate 859 int x; 860 int ix; 861 int h1; 862 863#ifdef SPECT_EDITOR 864 if(harm_sqrt_n > 0) 865 return(HarmToHarmspect(pitch,htab)); 866#endif 867 868 // initialise as much of *out as we will need 869 if(wvoice == NULL) 870 return(1); 871 hmax = (peaks[wvoice->n_harmonic_peaks].freq + peaks[wvoice->n_harmonic_peaks].right)/pitch; 872 if(hmax >= MAX_HARMONIC) 873 hmax = MAX_HARMONIC-1; 874 875 // restrict highest harmonic to half the samplerate 876 hmax_samplerate = (((samplerate * 19)/40) << 16)/pitch; // only 95% of Nyquist freq 877// hmax_samplerate = (samplerate << 16)/(pitch*2); 878 879 if(hmax > hmax_samplerate) 880 hmax = hmax_samplerate; 881 882 for(h=0;h<=hmax;h++) 883 htab[h]=0; 884 885 h=0; 886 for(pk=0; pk<=wvoice->n_harmonic_peaks; pk++) 887 { 888 p = &peaks[pk]; 889 if((p->height == 0) || (fp = p->freq)==0) 890 continue; 891 892 fhi = p->freq + p->right; 893 h = ((p->freq - p->left) / pitch) + 1; 894 if(h <= 0) h = 1; 895 896 for(f=pitch*h; f < fp; f+=pitch) 897 { 898 htab[h++] += pk_shape[(fp-f)/(p->left>>8)] * p->height; 899 } 900 for(; f < fhi; f+=pitch) 901 { 902 htab[h++] += pk_shape[(f-fp)/(p->right>>8)] * p->height; 903 } 904 } 905 906 // find the nearest harmonic for HF peaks where we don't use shape 907 for(; pk<N_PEAKS; pk++) 908 { 909 x = peaks[pk].height >> 14; 910 peak_height[pk] = (x * x * 5)/2; 911 912 // find the nearest harmonic for HF peaks where we don't use shape 913 if(control == 0) 914 { 915 // set this initially, but make changes only at the quiet point 916 peak_harmonic[pk] = peaks[pk].freq / pitch; 917 } 918 // only use harmonics up to half the samplerate 919 if(peak_harmonic[pk] >= hmax_samplerate) 920 peak_height[pk] = 0; 921 } 922 923 // convert from the square-rooted values 924 f = 0; 925 for(h=0; h<=hmax; h++, f+=pitch) 926 { 927 x = htab[h] >> 15; 928 htab[h] = (x * x) >> 8; 929 930 if((ix = (f >> 19)) < N_TONE_ADJUST) 931 { 932 htab[h] = (htab[h] * wvoice->tone_adjust[ix]) >> 13; // index tone_adjust with Hz/8 933 } 934 } 935 936 // adjust the amplitude of the first harmonic, affects tonal quality 937 h1 = htab[1] * option_harmonic1; 938 htab[1] = h1/8; 939 940 941 // calc intermediate increments of LF harmonics 942 if(control & 1) 943 { 944 for(h=1; h<N_LOWHARM; h++) 945 { 946 harm_inc[h] = (htab[h] - harmspect[h]) >> 3; 947 } 948 } 949 950 return(hmax); // highest harmonic number 951} // end of PeaksToHarmspect 952 953 954 955static void AdvanceParameters() 956{//============================ 957// Called every 64 samples to increment the formant freq, height, and widths 958 959 int x; 960 int ix; 961 static int Flutter_ix = 0; 962 963 // advance the pitch 964 pitch_ix += pitch_inc; 965 if((ix = pitch_ix>>8) > 127) ix = 127; 966 x = pitch_env[ix] * pitch_range; 967 pitch = (x>>8) + pitch_base; 968 969 amp_ix += amp_inc; 970 971 /* add pitch flutter */ 972 if(Flutter_ix >= (N_FLUTTER*64)) 973 Flutter_ix = 0; 974 x = ((int)(Flutter_tab[Flutter_ix >> 6])-0x80) * flutter_amp; 975 Flutter_ix += Flutter_inc; 976 pitch += x; 977 if(pitch < 102400) 978 pitch = 102400; // min pitch, 25 Hz (25 << 12) 979 980 if(samplecount == samplecount_start) 981 return; 982 983 for(ix=0; ix <= wvoice->n_harmonic_peaks; ix++) 984 { 985 peaks[ix].freq1 += peaks[ix].freq_inc; 986 peaks[ix].freq = int(peaks[ix].freq1); 987 peaks[ix].height1 += peaks[ix].height_inc; 988 if((peaks[ix].height = int(peaks[ix].height1)) < 0) 989 peaks[ix].height = 0; 990 peaks[ix].left1 += peaks[ix].left_inc; 991 peaks[ix].left = int(peaks[ix].left1); 992 peaks[ix].right1 += peaks[ix].right_inc; 993 peaks[ix].right = int(peaks[ix].right1); 994 } 995 for(;ix < N_PEAKS; ix++) 996 { 997 // formants 6,7,8 don't have a width parameter 998 peaks[ix].freq1 += peaks[ix].freq_inc; 999 peaks[ix].freq = int(peaks[ix].freq1); 1000 peaks[ix].height1 += peaks[ix].height_inc; 1001 if((peaks[ix].height = int(peaks[ix].height1)) < 0) 1002 peaks[ix].height = 0; 1003 } 1004 1005#ifdef SPECT_EDITOR 1006 if(harm_sqrt_n != 0) 1007 { 1008 // We are generating from a harmonic spectrum at a given pitch, not from formant peaks 1009 for(ix=0; ix<harm_sqrt_n; ix++) 1010 harm_sqrt[ix] += harm_sqrt_inc[ix]; 1011 } 1012#endif 1013} // end of AdvanceParameters 1014 1015 1016#ifndef PLATFORM_RISCOS 1017static double resonator(RESONATOR *r, double input) 1018{//================================================ 1019 double x; 1020 1021 x = r->a * input + r->b * r->x1 + r->c * r->x2; 1022 r->x2 = r->x1; 1023 r->x1 = x; 1024 1025 return x; 1026} 1027 1028 1029 1030static void setresonator(RESONATOR *rp, int freq, int bwidth, int init) 1031{//==================================================================== 1032// freq Frequency of resonator in Hz 1033// bwidth Bandwidth of resonator in Hz 1034// init Initialize internal data 1035 1036 double x; 1037 double arg; 1038 1039 if(init) 1040 { 1041 rp->x1 = 0; 1042 rp->x2 = 0; 1043 } 1044 1045 // x = exp(-pi * bwidth * t) 1046 arg = minus_pi_t * bwidth; 1047 x = exp(arg); 1048 1049 // c = -(x*x) 1050 rp->c = -(x * x); 1051 1052 // b = x * 2*cos(2 pi * freq * t) 1053 1054 arg = two_pi_t * freq; 1055 rp->b = x * cos(arg) * 2.0; 1056 1057 // a = 1.0 - b - c 1058 rp->a = 1.0 - rp->b - rp->c; 1059} // end if setresonator 1060#endif 1061 1062 1063void InitBreath(void) 1064{//================== 1065#ifndef PLATFORM_RISCOS 1066 int ix; 1067 1068 minus_pi_t = -PI / samplerate; 1069 two_pi_t = -2.0 * minus_pi_t; 1070 1071 for(ix=0; ix<N_PEAKS; ix++) 1072 { 1073 setresonator(&rbreath[ix],2000,200,1); 1074 } 1075#endif 1076} // end of InitBreath 1077 1078 1079 1080void SetBreath() 1081{//============= 1082#ifndef PLATFORM_RISCOS 1083 int pk; 1084 1085 if(wvoice->breath[0] == 0) 1086 return; 1087 1088 for(pk=1; pk<N_PEAKS; pk++) 1089 { 1090 if(wvoice->breath[pk] != 0) 1091 { 1092 // breath[0] indicates that some breath formants are needed 1093 // set the freq from the current ynthesis formant and the width from the voice data 1094 setresonator(&rbreath[pk], peaks[pk].freq >> 16, wvoice->breathw[pk],0); 1095 } 1096 } 1097#endif 1098} // end of SetBreath 1099 1100 1101int ApplyBreath(void) 1102{//================== 1103 int value = 0; 1104#ifndef PLATFORM_RISCOS 1105 int noise; 1106 int ix; 1107 int amp; 1108 1109 // use two random numbers, for alternate formants 1110 noise = (rand() & 0x3fff) - 0x2000; 1111 1112 for(ix=1; ix < N_PEAKS; ix++) 1113 { 1114 if((amp = wvoice->breath[ix]) != 0) 1115 { 1116 amp *= (peaks[ix].height >> 14); 1117 value += int(resonator(&rbreath[ix],noise) * amp); 1118 } 1119 } 1120#endif 1121 return (value); 1122} 1123 1124 1125 1126int Wavegen() 1127{//========== 1128 unsigned short waveph; 1129 unsigned short theta; 1130 int total; 1131 int h; 1132 int ix; 1133 int z, z1, z2; 1134 int echo; 1135 int ov; 1136 static int maxh, maxh2; 1137 int pk; 1138 signed char c; 1139 int sample; 1140 int amp; 1141 int modn_amp, modn_period; 1142 static int agc = 256; 1143 static int h_switch_sign = 0; 1144 static int cycle_count = 0; 1145 static int amplitude2 = 0; // adjusted for pitch 1146 1147 // continue until the output buffer is full, or 1148 // the required number of samples have been produced 1149 1150 for(;;) 1151 { 1152 if((end_wave==0) && (samplecount==nsamples)) 1153 return(0); 1154 1155 if((samplecount & 0x3f) == 0) 1156 { 1157 // every 64 samples, adjust the parameters 1158 if(samplecount == 0) 1159 { 1160 hswitch = 0; 1161 harmspect = hspect[0]; 1162 maxh2 = PeaksToHarmspect(peaks,pitch<<4,hspect[0],0); 1163 1164 // adjust amplitude to compensate for fewer harmonics at higher pitch 1165 amplitude2 = (amplitude * pitch)/(100 << 11); 1166 1167 // switch sign of harmonics above about 900Hz, to reduce max peak amplitude 1168 h_switch_sign = 890 / (pitch >> 12); 1169 } 1170 else 1171 AdvanceParameters(); 1172 1173 // pitch is Hz<<12 1174 phaseinc = (pitch>>7) * PHASE_INC_FACTOR; 1175 cycle_samples = samplerate/(pitch >> 12); // sr/(pitch*2) 1176 hf_factor = pitch >> 11; 1177 1178 maxh = maxh2; 1179 harmspect = hspect[hswitch]; 1180 hswitch ^= 1; 1181 maxh2 = PeaksToHarmspect(peaks,pitch<<4,hspect[hswitch],1); 1182 1183 SetBreath(); 1184 } 1185 else 1186 if((samplecount & 0x07) == 0) 1187 { 1188 for(h=1; h<N_LOWHARM && h<=maxh2 && h<=maxh; h++) 1189 { 1190 harmspect[h] += harm_inc[h]; 1191 } 1192 1193 // bring automctic gain control back towards unity 1194 if(agc < 256) agc++; 1195 } 1196 1197 samplecount++; 1198 1199 if(wavephase > 0) 1200 { 1201 wavephase += phaseinc; 1202 if(wavephase < 0) 1203 { 1204 // sign has changed, reached a quiet point in the waveform 1205 cbytes = wavemult_offset - (cycle_samples)/2; 1206 if(samplecount > nsamples) 1207 return(0); 1208 1209 cycle_count++; 1210 1211 for(pk=wvoice->n_harmonic_peaks+1; pk<N_PEAKS; pk++) 1212 { 1213 // find the nearest harmonic for HF peaks where we don't use shape 1214 peak_harmonic[pk] = peaks[pk].freq / (pitch*16); 1215 } 1216 1217 // adjust amplitude to compensate for fewer harmonics at higher pitch 1218 amplitude2 = (amplitude * pitch)/(100 << 11); 1219 1220 if(glottal_flag > 0) 1221 { 1222 if(glottal_flag == 3) 1223 { 1224 if((nsamples-samplecount) < (cycle_samples*2)) 1225 { 1226 // Vowel before glottal-stop. 1227 // This is the start of the penultimate cycle, reduce its amplitude 1228 glottal_flag = 2; 1229 amplitude2 = (amplitude2 * glottal_reduce)/256; 1230 } 1231 } 1232 else 1233 if(glottal_flag == 4) 1234 { 1235 // Vowel following a glottal-stop. 1236 // This is the start of the second cycle, reduce its amplitude 1237 glottal_flag = 2; 1238 amplitude2 = (amplitude2 * glottal_reduce)/256; 1239 } 1240 else 1241 { 1242 glottal_flag--; 1243 } 1244 } 1245 1246 if(amplitude_env != NULL) 1247 { 1248 // amplitude envelope is only used for creaky voice effect on certain vowels/tones 1249 if((ix = amp_ix>>8) > 127) ix = 127; 1250 amp = amplitude_env[ix]; 1251 amplitude2 = (amplitude2 * amp)/255; 1252 if(amp < 255) 1253 modulation_type = 7; 1254 } 1255 1256 // introduce roughness into the sound by reducing the amplitude of 1257 modn_period = 0; 1258 if(voice->roughness < N_ROUGHNESS) 1259 { 1260 modn_period = modulation_tab[voice->roughness][modulation_type]; 1261 modn_amp = modn_period & 0xf; 1262 modn_period = modn_period >> 4; 1263 } 1264 1265 if(modn_period != 0) 1266 { 1267 if(modn_period==0xf) 1268 { 1269 // just once */ 1270 amplitude2 = (amplitude2 * modn_amp)/16; 1271 modulation_type = 0; 1272 } 1273 else 1274 { 1275 // reduce amplitude every [modn_period} cycles 1276 if((cycle_count % modn_period)==0) 1277 amplitude2 = (amplitude2 * modn_amp)/16; 1278 } 1279 } 1280 } 1281 } 1282 else 1283 { 1284 wavephase += phaseinc; 1285 } 1286 waveph = (unsigned short)(wavephase >> 16); 1287 total = 0; 1288 1289 // apply HF peaks, formants 6,7,8 1290 // add a single harmonic and then spread this my multiplying by a 1291 // window. This is to reduce the processing power needed to add the 1292 // higher frequence harmonics. 1293 cbytes++; 1294 if(cbytes >=0 && cbytes<wavemult_max) 1295 { 1296 for(pk=wvoice->n_harmonic_peaks+1; pk<N_PEAKS; pk++) 1297 { 1298 theta = peak_harmonic[pk] * waveph; 1299 total += (long)sin_tab[theta >> 5] * peak_height[pk]; 1300 } 1301 1302 // spread the peaks by multiplying by a window 1303 total = (long)(total / hf_factor) * wavemult[cbytes]; 1304 } 1305 1306 // apply main peaks, formants 0 to 5 1307#ifdef USE_ASSEMBLER_1 1308 // use an optimised routine for this loop, if available 1309 total += AddSineWaves(waveph, h_switch_sign, maxh, harmspect); // call an assembler code routine 1310#else 1311 theta = waveph; 1312 1313 for(h=1; h<=h_switch_sign; h++) 1314 { 1315 total += (int(sin_tab[theta >> 5]) * harmspect[h]); 1316 theta += waveph; 1317 } 1318 while(h<=maxh) 1319 { 1320 total -= (int(sin_tab[theta >> 5]) * harmspect[h]); 1321 theta += waveph; 1322 h++; 1323 } 1324#endif 1325 1326 if(voicing != 64) 1327 { 1328 total = (total >> 6) * voicing; 1329 } 1330 1331#ifndef PLATFORM_RISCOS 1332 if(wvoice->breath[0]) 1333 { 1334 total += ApplyBreath(); 1335 } 1336#endif 1337 1338 // mix with sampled wave if required 1339 z2 = 0; 1340 if(mix_wavefile_ix < n_mix_wavefile) 1341 { 1342 if(mix_wave_scale == 0) 1343 { 1344 // a 16 bit sample 1345 c = mix_wavefile[mix_wavefile_ix+1]; 1346 sample = mix_wavefile[mix_wavefile_ix] + (c * 256); 1347 mix_wavefile_ix += 2; 1348 } 1349 else 1350 { 1351 // a 8 bit sample, scaled 1352 sample = (signed char)mix_wavefile[mix_wavefile_ix++] * mix_wave_scale; 1353 } 1354 z2 = (sample * amplitude_v) >> 10; 1355 z2 = (z2 * mix_wave_amp)/32; 1356 } 1357 1358 z1 = z2 + (((total>>8) * amplitude2) >> 13); 1359 1360 echo = (echo_buf[echo_tail++] * echo_amp); 1361 z1 += echo >> 8; 1362 if(echo_tail >= N_ECHO_BUF) 1363 echo_tail=0; 1364 1365 z = (z1 * agc) >> 8; 1366 1367 // check for overflow, 16bit signed samples 1368 if(z >= 32768) 1369 { 1370 ov = 8388608/z1 - 1; // 8388608 is 2^23, i.e. max value * 256 1371 if(ov < agc) agc = ov; // set agc to number of 1/256ths to multiply the sample by 1372 z = (z1 * agc) >> 8; // reduce sample by agc value to prevent overflow 1373 } 1374 else 1375 if(z <= -32768) 1376 { 1377 ov = -8388608/z1 - 1; 1378 if(ov < agc) agc = ov; 1379 z = (z1 * agc) >> 8; 1380 } 1381 *out_ptr++ = z; 1382 *out_ptr++ = z >> 8; 1383 1384 echo_buf[echo_head++] = z; 1385 if(echo_head >= N_ECHO_BUF) 1386 echo_head = 0; 1387 1388 if(out_ptr >= out_end) 1389 return(1); 1390 } 1391 return(0); 1392} // end of Wavegen 1393 1394 1395static int PlaySilence(int length, int resume) 1396{//=========================================== 1397 static int n_samples; 1398 int value=0; 1399 1400 if(length == 0) 1401 return(0); 1402 1403 nsamples = 0; 1404 samplecount = 0; 1405 1406 if(resume==0) 1407 n_samples = length; 1408 1409 while(n_samples-- > 0) 1410 { 1411 value = (echo_buf[echo_tail++] * echo_amp) >> 8; 1412 1413 if(echo_tail >= N_ECHO_BUF) 1414 echo_tail = 0; 1415 1416 *out_ptr++ = value; 1417 *out_ptr++ = value >> 8; 1418 1419 echo_buf[echo_head++] = value; 1420 if(echo_head >= N_ECHO_BUF) 1421 echo_head = 0; 1422 1423 if(out_ptr >= out_end) 1424 return(1); 1425 } 1426 return(0); 1427} // end of PlaySilence 1428 1429 1430 1431static int PlayWave(int length, int resume, unsigned char *data, int scale, int amp) 1432{//================================================================================= 1433 static int n_samples; 1434 static int ix=0; 1435 int value; 1436 signed char c; 1437 1438 if(resume==0) 1439 { 1440 n_samples = length; 1441 ix = 0; 1442 } 1443 1444 nsamples = 0; 1445 samplecount = 0; 1446 1447 while(n_samples-- > 0) 1448 { 1449 if(scale == 0) 1450 { 1451 // 16 bits data 1452 c = data[ix+1]; 1453 value = data[ix] + (c * 256); 1454 ix+=2; 1455 } 1456 else 1457 { 1458 // 8 bit data, shift by the specified scale factor 1459 value = (signed char)data[ix++] * scale; 1460 } 1461 value *= (consonant_amp * general_amplitude); // reduce strength of consonant 1462 value = value >> 10; 1463 value = (value * amp)/32; 1464 1465 value += ((echo_buf[echo_tail++] * echo_amp) >> 8); 1466 1467 if(value > 32767) 1468 value = 32768; 1469 else 1470 if(value < -32768) 1471 value = -32768; 1472 1473 if(echo_tail >= N_ECHO_BUF) 1474 echo_tail = 0; 1475 1476 out_ptr[0] = value; 1477 out_ptr[1] = value >> 8; 1478 out_ptr+=2; 1479 1480 echo_buf[echo_head++] = (value*3)/4; 1481 if(echo_head >= N_ECHO_BUF) 1482 echo_head = 0; 1483 1484 if(out_ptr >= out_end) 1485 return(1); 1486 } 1487 return(0); 1488} 1489 1490 1491static int SetWithRange0(int value, int max) 1492{//========================================= 1493 if(value < 0) 1494 return(0); 1495 if(value > max) 1496 return(max); 1497 return(value); 1498} 1499 1500 1501void SetEmbedded(int control, int value) 1502{//===================================== 1503 // there was an embedded command in the text at this point 1504 int sign=0; 1505 int command; 1506 int ix; 1507 int factor; 1508 int pitch_value; 1509 1510 command = control & 0x1f; 1511 if((control & 0x60) == 0x60) 1512 sign = -1; 1513 else 1514 if((control & 0x60) == 0x40) 1515 sign = 1; 1516 1517 if(command < N_EMBEDDED_VALUES) 1518 { 1519 if(sign == 0) 1520 embedded_value[command] = value; 1521 else 1522 embedded_value[command] += (value * sign); 1523 embedded_value[command] = SetWithRange0(embedded_value[command],embedded_max[command]); 1524 } 1525 1526 switch(command) 1527 { 1528 case EMBED_T: 1529 WavegenSetEcho(); // and drop through to case P 1530 case EMBED_P: 1531 // adjust formants to give better results for a different voice pitch 1532 if((pitch_value = embedded_value[EMBED_P]) > MAX_PITCH_VALUE) 1533 pitch_value = MAX_PITCH_VALUE; 1534 1535 factor = 256 + (25 * (pitch_value - 50))/50; 1536 for(ix=0; ix<=5; ix++) 1537 { 1538 wvoice->freq[ix] = (wvoice->freq2[ix] * factor)/256; 1539 } 1540 factor = embedded_value[EMBED_T]*3; 1541 wvoice->height[0] = (wvoice->height2[0] * (256 - factor*2))/256; 1542 wvoice->height[1] = (wvoice->height2[1] * (256 - factor))/256; 1543 break; 1544 1545 case EMBED_A: // amplitude 1546 general_amplitude = GetAmplitude(); 1547 break; 1548 1549 case EMBED_F: // emphasiis 1550 general_amplitude = GetAmplitude(); 1551 break; 1552 1553 case EMBED_H: 1554 WavegenSetEcho(); 1555 break; 1556 } 1557} 1558 1559 1560void WavegenSetVoice(voice_t *v) 1561{//============================= 1562 static voice_t v2; 1563 1564 memcpy(&v2,v,sizeof(v2)); 1565 wvoice = &v2; 1566 1567 if(v->peak_shape==0) 1568 pk_shape = pk_shape1; 1569 else 1570 pk_shape = pk_shape2; 1571 1572 consonant_amp = (v->consonant_amp * 26) /100; 1573 if(samplerate <= 11000) 1574 { 1575 consonant_amp = consonant_amp*2; // emphasize consonants at low sample rates 1576 option_harmonic1 = 6; 1577 } 1578 WavegenSetEcho(); 1579} 1580 1581 1582static void SetAmplitude(int length, unsigned char *amp_env, int value) 1583{//==================================================================== 1584 amp_ix = 0; 1585 if(length==0) 1586 amp_inc = 0; 1587 else 1588 amp_inc = (256 * ENV_LEN * STEPSIZE)/length; 1589 1590 amplitude = (value * general_amplitude)/16; 1591 amplitude_v = (amplitude * wvoice->consonant_ampv * 15)/100; // for wave mixed with voiced sounds 1592 1593 amplitude_env = amp_env; 1594} 1595 1596 1597void SetPitch2(voice_t *voice, int pitch1, int pitch2, int *pitch_base, int *pitch_range) 1598{//====================================================================================== 1599 int x; 1600 int base; 1601 int range; 1602 int pitch_value; 1603 1604 if(pitch1 > pitch2) 1605 { 1606 x = pitch1; // swap values 1607 pitch1 = pitch2; 1608 pitch2 = x; 1609 } 1610 1611 if((pitch_value = embedded_value[EMBED_P]) > MAX_PITCH_VALUE) 1612 pitch_value = MAX_PITCH_VALUE; 1613 pitch_value -= embedded_value[EMBED_T]; // adjust tone for announcing punctuation 1614 if(pitch_value < 0) 1615 pitch_value = 0; 1616 1617 base = (voice->pitch_base * pitch_adjust_tab[pitch_value])/128; 1618 range = (voice->pitch_range * embedded_value[EMBED_R])/50; 1619 1620 // compensate for change in pitch when the range is narrowed or widened 1621 base -= (range - voice->pitch_range)*18; 1622 1623 *pitch_base = base + (pitch1 * range); 1624 *pitch_range = base + (pitch2 * range) - *pitch_base; 1625} 1626 1627 1628void SetPitch(int length, unsigned char *env, int pitch1, int pitch2) 1629{//================================================================== 1630// length in samples 1631 1632#ifdef LOG_FRAMES 1633if(option_log_frames) 1634{ 1635 f_log=fopen("log-espeakedit","a"); 1636 if(f_log != NULL) 1637 { 1638 fprintf(f_log," pitch %3d %3d %3dmS\n",pitch1,pitch2,(length*1000)/samplerate); 1639 fclose(f_log); 1640 f_log=NULL; 1641 } 1642} 1643#endif 1644 if((pitch_env = env)==NULL) 1645 pitch_env = env_fall; // default 1646 1647 pitch_ix = 0; 1648 if(length==0) 1649 pitch_inc = 0; 1650 else 1651 pitch_inc = (256 * ENV_LEN * STEPSIZE)/length; 1652 1653 SetPitch2(wvoice, pitch1, pitch2, &pitch_base, &pitch_range); 1654 // set initial pitch 1655 pitch = ((pitch_env[0]*pitch_range)>>8) + pitch_base; // Hz << 12 1656 1657 flutter_amp = wvoice->flutter; 1658 1659} // end of SetPitch 1660 1661 1662 1663 1664 1665void SetSynth(int length, int modn, frame_t *fr1, frame_t *fr2) 1666{//============================================================ 1667 int ix; 1668 DOUBLEX next; 1669 int length2; 1670 int length4; 1671 int qix; 1672 int cmd; 1673 voice_t *v; 1674 static int glottal_reduce_tab1[4] = {0x30, 0x30, 0x40, 0x50}; // vowel before [?], amp * 1/256 1675// static int glottal_reduce_tab1[4] = {0x30, 0x40, 0x50, 0x60}; // vowel before [?], amp * 1/256 1676 static int glottal_reduce_tab2[4] = {0x90, 0xa0, 0xb0, 0xc0}; // vowel after [?], amp * 1/256 1677 1678#ifdef LOG_FRAMES 1679if(option_log_frames) 1680{ 1681 f_log=fopen("log-espeakedit","a"); 1682 if(f_log != NULL) 1683 { 1684 fprintf(f_log,"%3dmS %3d %3d %4d %4d (%3d %3d %3d %3d) to %3d %3d %4d %4d (%3d %3d %3d %3d)\n",length*1000/samplerate, 1685 fr1->ffreq[0],fr1->ffreq[1],fr1->ffreq[2],fr1->ffreq[3], fr1->fheight[0],fr1->fheight[1],fr1->fheight[2],fr1->fheight[3], 1686 fr2->ffreq[0],fr2->ffreq[1],fr2->ffreq[2],fr2->ffreq[3], fr2->fheight[0],fr2->fheight[1],fr2->fheight[2],fr2->fheight[3] ); 1687 1688 fclose(f_log); 1689 f_log=NULL; 1690 } 1691} 1692#endif 1693 1694 harm_sqrt_n = 0; 1695 end_wave = 1; 1696 1697 // any additional information in the param1 ? 1698 modulation_type = modn & 0xff; 1699 1700 glottal_flag = 0; 1701 if(modn & 0x400) 1702 { 1703 glottal_flag = 3; // before a glottal stop 1704 glottal_reduce = glottal_reduce_tab1[(modn >> 8) & 3]; 1705 } 1706 if(modn & 0x800) 1707 { 1708 glottal_flag = 4; // after a glottal stop 1709 glottal_reduce = glottal_reduce_tab2[(modn >> 8) & 3]; 1710 } 1711 1712 for(qix=wcmdq_head+1;;qix++) 1713 { 1714 if(qix >= N_WCMDQ) qix = 0; 1715 if(qix == wcmdq_tail) break; 1716 1717 cmd = wcmdq[qix][0]; 1718 if(cmd==WCMD_SPECT) 1719 { 1720 end_wave = 0; // next wave generation is from another spectrum 1721 break; 1722 } 1723 if((cmd==WCMD_WAVE) || (cmd==WCMD_PAUSE)) 1724 break; // next is not from spectrum, so continue until end of wave cycle 1725 } 1726 1727 v = wvoice; 1728 1729 // round the length to a multiple of the stepsize 1730 length2 = (length + STEPSIZE/2) & ~0x3f; 1731 if(length2 == 0) 1732 length2 = STEPSIZE; 1733 1734 // add this length to any left over from the previous synth 1735 samplecount_start = samplecount; 1736 nsamples += length2; 1737 1738 length4 = length2/4; 1739 for(ix=0; ix<N_PEAKS; ix++) 1740 { 1741 peaks[ix].freq1 = (fr1->ffreq[ix] * v->freq[ix] + v->freqadd[ix]*256) << 8; 1742 peaks[ix].freq = int(peaks[ix].freq1); 1743 next = (fr2->ffreq[ix] * v->freq[ix] + v->freqadd[ix]*256) << 8; 1744 peaks[ix].freq_inc = ((next - peaks[ix].freq1) * (STEPSIZE/4)) / length4; // lower headroom for fixed point math 1745 1746 peaks[ix].height1 = (fr1->fheight[ix] * v->height[ix]) << 6; 1747 peaks[ix].height = int(peaks[ix].height1); 1748 next = (fr2->fheight[ix] * v->height[ix]) << 6; 1749 peaks[ix].height_inc = ((next - peaks[ix].height1) * STEPSIZE) / length2; 1750 1751 if(ix <= wvoice->n_harmonic_peaks) 1752 { 1753 peaks[ix].left1 = (fr1->fwidth[ix] * v->width[ix]) << 10; 1754 peaks[ix].left = int(peaks[ix].left1); 1755 next = (fr2->fwidth[ix] * v->width[ix]) << 10; 1756 peaks[ix].left_inc = ((next - peaks[ix].left1) * STEPSIZE) / length2; 1757 1758 peaks[ix].right1 = (fr1->fright[ix] * v->width[ix]) << 10; 1759 peaks[ix].right = int(peaks[ix].right1); 1760 next = (fr2->fright[ix] * v->width[ix]) << 10; 1761 peaks[ix].right_inc = ((next - peaks[ix].right1) * STEPSIZE) / length2; 1762 } 1763 } 1764} // end of SetSynth 1765 1766 1767static int Wavegen2(int length, int modulation, int resume, frame_t *fr1, frame_t *fr2) 1768{//==================================================================================== 1769 if(resume==0) 1770 SetSynth(length,modulation,fr1,fr2); 1771 1772 return(Wavegen()); 1773} 1774 1775void Write4Bytes(FILE *f, int value) 1776{//================================= 1777// Write 4 bytes to a file, least significant first 1778 int ix; 1779 1780 for(ix=0; ix<4; ix++) 1781 { 1782 fputc(value & 0xff,f); 1783 value = value >> 8; 1784 } 1785} 1786 1787 1788 1789 1790int WavegenFill(int fill_zeros) 1791{//============================ 1792// Pick up next wavegen commands from the queue 1793// return: 0 output buffer has been filled 1794// return: 1 input command queue is now empty 1795 1796 long *q; 1797 int length; 1798 int result; 1799 static int resume=0; 1800 static int echo_complete=0; 1801 1802#ifdef TEST_MBROLA 1803 if(mbrola_name[0] != 0) 1804 return(MbrolaFill(fill_zeros)); 1805#endif 1806 1807 while(out_ptr < out_end) 1808 { 1809 if(WcmdqUsed() <= 0) 1810 { 1811 if(echo_complete > 0) 1812 { 1813 // continue to play silence until echo is completed 1814 resume = PlaySilence(echo_complete,resume); 1815 if(resume == 1) 1816 return(0); // not yet finished 1817 } 1818 1819 if(fill_zeros) 1820 { 1821 while(out_ptr < out_end) 1822 *out_ptr++ = 0; 1823 } 1824 return(1); // queue empty, close sound channel 1825 } 1826 1827 result = 0; 1828 q = wcmdq[wcmdq_head]; 1829 length = q[1]; 1830 1831 switch(q[0]) 1832 { 1833 case WCMD_PITCH: 1834 SetPitch(length,(unsigned char *)q[2],q[3] >> 16,q[3] & 0xffff); 1835 break; 1836 1837 case WCMD_PAUSE: 1838 if(resume==0) 1839 { 1840 echo_complete -= length; 1841 } 1842 n_mix_wavefile = 0; 1843 result = PlaySilence(length,resume); 1844 break; 1845 1846 case WCMD_WAVE: 1847 echo_complete = echo_length; 1848 n_mix_wavefile = 0; 1849 result = PlayWave(length,resume,(unsigned char*)q[2], q[3] & 0xff, q[3] >> 8); 1850 break; 1851 1852 case WCMD_WAVE2: 1853 // wave file to be played at the same time as synthesis 1854 mix_wave_amp = q[3] >> 8; 1855 mix_wave_scale = q[3] & 0xff; 1856 if(mix_wave_scale == 0) 1857 n_mix_wavefile = length*2; 1858 else 1859 n_mix_wavefile = length; 1860 mix_wavefile_ix = 0; 1861 mix_wavefile = (unsigned char *)q[2]; 1862 break; 1863 1864 case WCMD_SPECT2: // as WCMD_SPECT but stop any concurrent wave file 1865 n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case 1866 case WCMD_SPECT: 1867 echo_complete = echo_length; 1868 result = Wavegen2(length & 0xffff,q[1] >> 16,resume,(frame_t *)q[2],(frame_t *)q[3]); 1869 break; 1870 1871 case WCMD_MARKER: 1872 MarkerEvent(q[1],q[2],q[3],out_ptr); 1873#ifdef LOG_FRAMES 1874 LogMarker(q[1],q[3]); 1875#endif 1876 if(q[1] == 1) 1877 { 1878 current_source_index = q[2] & 0xffffff; 1879 } 1880 break; 1881 1882 case WCMD_AMPLITUDE: 1883 SetAmplitude(length,(unsigned char *)q[2],q[3]); 1884 break; 1885 1886 case WCMD_VOICE: 1887 WavegenSetVoice((voice_t *)q[1]); 1888 free((voice_t *)q[1]); 1889 break; 1890 1891 case WCMD_EMBEDDED: 1892 SetEmbedded(q[1],q[2]); 1893 break; 1894 } 1895 1896 if(result==0) 1897 { 1898 WcmdqIncHead(); 1899 resume=0; 1900 } 1901 else 1902 { 1903 resume=1; 1904 } 1905 } 1906 1907 retur…
Large files files are truncated, but you can click here to view the full file