/src/middleware/stb_vorbis/stb_vorbis.c
C | 5143 lines | 3973 code | 572 blank | 598 comment | 1013 complexity | 2048a9cd8e2bddc7e14a8b87072fc020 MD5 | raw file
1#include "stb_vorbis.h" 2 3#ifndef STB_VORBIS_HEADER_ONLY 4 5// global configuration settings (e.g. set these in the project/makefile), 6// or just set them in this file at the top (although ideally the first few 7// should be visible when the header file is compiled too, although it's not 8// crucial) 9 10// STB_VORBIS_NO_PUSHDATA_API 11// does not compile the code for the various stb_vorbis_*_pushdata() 12// functions 13// #define STB_VORBIS_NO_PUSHDATA_API 14 15// STB_VORBIS_NO_PULLDATA_API 16// does not compile the code for the non-pushdata APIs 17// #define STB_VORBIS_NO_PULLDATA_API 18 19// STB_VORBIS_NO_STDIO 20// does not compile the code for the APIs that use FILE *s internally 21// or externally (implied by STB_VORBIS_NO_PULLDATA_API) 22// #define STB_VORBIS_NO_STDIO 23 24// STB_VORBIS_NO_INTEGER_CONVERSION 25// does not compile the code for converting audio sample data from 26// float to integer (implied by STB_VORBIS_NO_PULLDATA_API) 27// #define STB_VORBIS_NO_INTEGER_CONVERSION 28 29// STB_VORBIS_NO_FAST_SCALED_FLOAT 30// does not use a fast float-to-int trick to accelerate float-to-int on 31// most platforms which requires endianness be defined correctly. 32//#define STB_VORBIS_NO_FAST_SCALED_FLOAT 33 34 35// STB_VORBIS_MAX_CHANNELS [number] 36// globally define this to the maximum number of channels you need. 37// The spec does not put a restriction on channels except that 38// the count is stored in a byte, so 255 is the hard limit. 39// Reducing this saves about 16 bytes per value, so using 16 saves 40// (255-16)*16 or around 4KB. Plus anything other memory usage 41// I forgot to account for. Can probably go as low as 8 (7.1 audio), 42// 6 (5.1 audio), or 2 (stereo only). 43#ifndef STB_VORBIS_MAX_CHANNELS 44#define STB_VORBIS_MAX_CHANNELS 16 // enough for anyone? 45#endif 46 47// STB_VORBIS_PUSHDATA_CRC_COUNT [number] 48// after a flush_pushdata(), stb_vorbis begins scanning for the 49// next valid page, without backtracking. when it finds something 50// that looks like a page, it streams through it and verifies its 51// CRC32. Should that validation fail, it keeps scanning. But it's 52// possible that _while_ streaming through to check the CRC32 of 53// one candidate page, it sees another candidate page. This #define 54// determines how many "overlapping" candidate pages it can search 55// at once. Note that "real" pages are typically ~4KB to ~8KB, whereas 56// garbage pages could be as big as 64KB, but probably average ~16KB. 57// So don't hose ourselves by scanning an apparent 64KB page and 58// missing a ton of real ones in the interim; so minimum of 2 59#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT 60#define STB_VORBIS_PUSHDATA_CRC_COUNT 4 61#endif 62 63// STB_VORBIS_FAST_HUFFMAN_LENGTH [number] 64// sets the log size of the huffman-acceleration table. Maximum 65// supported value is 24. with larger numbers, more decodings are O(1), 66// but the table size is larger so worse cache missing, so you'll have 67// to probe (and try multiple ogg vorbis files) to find the sweet spot. 68#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH 69#define STB_VORBIS_FAST_HUFFMAN_LENGTH 10 70#endif 71 72// STB_VORBIS_FAST_BINARY_LENGTH [number] 73// sets the log size of the binary-search acceleration table. this 74// is used in similar fashion to the fast-huffman size to set initial 75// parameters for the binary search 76 77// STB_VORBIS_FAST_HUFFMAN_INT 78// The fast huffman tables are much more efficient if they can be 79// stored as 16-bit results instead of 32-bit results. This restricts 80// the codebooks to having only 65535 possible outcomes, though. 81// (At least, accelerated by the huffman table.) 82#ifndef STB_VORBIS_FAST_HUFFMAN_INT 83#define STB_VORBIS_FAST_HUFFMAN_SHORT 84#endif 85 86// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH 87// If the 'fast huffman' search doesn't succeed, then stb_vorbis falls 88// back on binary searching for the correct one. This requires storing 89// extra tables with the huffman codes in sorted order. Defining this 90// symbol trades off space for speed by forcing a linear search in the 91// non-fast case, except for "sparse" codebooks. 92// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH 93 94// STB_VORBIS_DIVIDES_IN_RESIDUE 95// stb_vorbis precomputes the result of the scalar residue decoding 96// that would otherwise require a divide per chunk. you can trade off 97// space for time by defining this symbol. 98// #define STB_VORBIS_DIVIDES_IN_RESIDUE 99 100// STB_VORBIS_DIVIDES_IN_CODEBOOK 101// vorbis VQ codebooks can be encoded two ways: with every case explicitly 102// stored, or with all elements being chosen from a small range of values, 103// and all values possible in all elements. By default, stb_vorbis expands 104// this latter kind out to look like the former kind for ease of decoding, 105// because otherwise an integer divide-per-vector-element is required to 106// unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can 107// trade off storage for speed. 108//#define STB_VORBIS_DIVIDES_IN_CODEBOOK 109 110// STB_VORBIS_CODEBOOK_SHORTS 111// The vorbis file format encodes VQ codebook floats as ax+b where a and 112// b are floating point per-codebook constants, and x is a 16-bit int. 113// Normally, stb_vorbis decodes them to floats rather than leaving them 114// as 16-bit ints and computing ax+b while decoding. This is a speed/space 115// tradeoff; you can save space by defining this flag. 116#ifndef STB_VORBIS_CODEBOOK_SHORTS 117#define STB_VORBIS_CODEBOOK_FLOATS 118#endif 119 120// STB_VORBIS_DIVIDE_TABLE 121// this replaces small integer divides in the floor decode loop with 122// table lookups. made less than 1% difference, so disabled by default. 123 124// STB_VORBIS_NO_INLINE_DECODE 125// disables the inlining of the scalar codebook fast-huffman decode. 126// might save a little codespace; useful for debugging 127// #define STB_VORBIS_NO_INLINE_DECODE 128 129// STB_VORBIS_NO_DEFER_FLOOR 130// Normally we only decode the floor without synthesizing the actual 131// full curve. We can instead synthesize the curve immediately. This 132// requires more memory and is very likely slower, so I don't think 133// you'd ever want to do it except for debugging. 134// #define STB_VORBIS_NO_DEFER_FLOOR 135 136 137 138 139////////////////////////////////////////////////////////////////////////////// 140 141#ifdef STB_VORBIS_NO_PULLDATA_API 142 #define STB_VORBIS_NO_INTEGER_CONVERSION 143 #define STB_VORBIS_NO_STDIO 144#endif 145 146#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO) 147 #define STB_VORBIS_NO_STDIO 1 148#endif 149 150#ifndef STB_VORBIS_NO_INTEGER_CONVERSION 151#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT 152 153 // only need endianness for fast-float-to-int, which we don't 154 // use for pushdata 155 156 #ifndef STB_VORBIS_BIG_ENDIAN 157 #define STB_VORBIS_ENDIAN 0 158 #else 159 #define STB_VORBIS_ENDIAN 1 160 #endif 161 162#endif 163#endif 164 165 166#ifndef STB_VORBIS_NO_STDIO 167#include <stdio.h> 168#endif 169 170#ifndef STB_VORBIS_NO_CRT 171#include <stdlib.h> 172#include <string.h> 173#include <assert.h> 174#include <math.h> 175#include <malloc.h> 176#else 177#define NULL 0 178#endif 179 180#ifndef _MSC_VER 181 #if __GNUC__ 182 #define __forceinline inline 183 #else 184 #define __forceinline 185 #endif 186#endif 187 188#if STB_VORBIS_MAX_CHANNELS > 256 189#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range" 190#endif 191 192#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24 193#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range" 194#endif 195 196 197#define MAX_BLOCKSIZE_LOG 13 // from specification 198#define MAX_BLOCKSIZE (1 << MAX_BLOCKSIZE_LOG) 199 200 201typedef unsigned char uint8; 202typedef signed char int8; 203typedef unsigned short uint16; 204typedef signed short int16; 205typedef unsigned int uint32; 206typedef signed int int32; 207 208#ifndef TRUE 209#define TRUE 1 210#define FALSE 0 211#endif 212 213#ifdef STB_VORBIS_CODEBOOK_FLOATS 214typedef float codetype; 215#else 216typedef uint16 codetype; 217#endif 218 219// @NOTE 220// 221// Some arrays below are tagged "//varies", which means it's actually 222// a variable-sized piece of data, but rather than malloc I assume it's 223// small enough it's better to just allocate it all together with the 224// main thing 225// 226// Most of the variables are specified with the smallest size I could pack 227// them into. It might give better performance to make them all full-sized 228// integers. It should be safe to freely rearrange the structures or change 229// the sizes larger--nothing relies on silently truncating etc., nor the 230// order of variables. 231 232#define FAST_HUFFMAN_TABLE_SIZE (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH) 233#define FAST_HUFFMAN_TABLE_MASK (FAST_HUFFMAN_TABLE_SIZE - 1) 234 235typedef struct 236{ 237 int dimensions, entries; 238 uint8 *codeword_lengths; 239 float minimum_value; 240 float delta_value; 241 uint8 value_bits; 242 uint8 lookup_type; 243 uint8 sequence_p; 244 uint8 sparse; 245 uint32 lookup_values; 246 codetype *multiplicands; 247 uint32 *codewords; 248 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT 249 int16 fast_huffman[FAST_HUFFMAN_TABLE_SIZE]; 250 #else 251 int32 fast_huffman[FAST_HUFFMAN_TABLE_SIZE]; 252 #endif 253 uint32 *sorted_codewords; 254 int *sorted_values; 255 int sorted_entries; 256} Codebook; 257 258typedef struct 259{ 260 uint8 order; 261 uint16 rate; 262 uint16 bark_map_size; 263 uint8 amplitude_bits; 264 uint8 amplitude_offset; 265 uint8 number_of_books; 266 uint8 book_list[16]; // varies 267} Floor0; 268 269typedef struct 270{ 271 uint8 partitions; 272 uint8 partition_class_list[32]; // varies 273 uint8 class_dimensions[16]; // varies 274 uint8 class_subclasses[16]; // varies 275 uint8 class_masterbooks[16]; // varies 276 int16 subclass_books[16][8]; // varies 277 uint16 Xlist[31*8+2]; // varies 278 uint8 sorted_order[31*8+2]; 279 uint8 neighbors[31*8+2][2]; 280 uint8 floor1_multiplier; 281 uint8 rangebits; 282 int values; 283} Floor1; 284 285typedef union 286{ 287 Floor0 floor0; 288 Floor1 floor1; 289} Floor; 290 291typedef struct 292{ 293 uint32 begin, end; 294 uint32 part_size; 295 uint8 classifications; 296 uint8 classbook; 297 uint8 **classdata; 298 int16 (*residue_books)[8]; 299} Residue; 300 301typedef struct 302{ 303 uint8 magnitude; 304 uint8 angle; 305 uint8 mux; 306} MappingChannel; 307 308typedef struct 309{ 310 uint16 coupling_steps; 311 MappingChannel *chan; 312 uint8 submaps; 313 uint8 submap_floor[15]; // varies 314 uint8 submap_residue[15]; // varies 315} Mapping; 316 317typedef struct 318{ 319 uint8 blockflag; 320 uint8 mapping; 321 uint16 windowtype; 322 uint16 transformtype; 323} Mode; 324 325typedef struct 326{ 327 uint32 goal_crc; // expected crc if match 328 int bytes_left; // bytes left in packet 329 uint32 crc_so_far; // running crc 330 int bytes_done; // bytes processed in _current_ chunk 331 uint32 sample_loc; // granule pos encoded in page 332} CRCscan; 333 334typedef struct 335{ 336 uint32 page_start, page_end; 337 uint32 after_previous_page_start; 338 uint32 first_decoded_sample; 339 uint32 last_decoded_sample; 340} ProbedPage; 341 342struct stb_vorbis 343{ 344 // user-accessible info 345 unsigned int sample_rate; 346 int channels; 347 348 unsigned int setup_memory_required; 349 unsigned int temp_memory_required; 350 unsigned int setup_temp_memory_required; 351 352 // input config 353#ifndef STB_VORBIS_NO_STDIO 354 FILE *f; 355 uint32 f_start; 356 int close_on_free; 357#endif 358#ifdef STB_VORBIS_USE_CALLBACKS 359 STREAM_DATA_CLLBACK data_callback; 360 STREAM_RESET_CLLBACK reset_callback; 361 void* user_data; 362 uint32 cb_offset; 363#endif 364 365 uint8 *stream; 366 uint8 *stream_start; 367 uint8 *stream_end; 368 369 uint32 stream_len; 370 371 uint8 push_mode; 372 373 uint32 first_audio_page_offset; 374 375 ProbedPage p_first, p_last; 376 377 // memory management 378 stb_vorbis_alloc alloc; 379 int setup_offset; 380 int temp_offset; 381 382 // run-time results 383 int eof; 384 enum STBVorbisError error; 385 386 // user-useful data 387 388 // header info 389 int blocksize[2]; 390 int blocksize_0, blocksize_1; 391 int codebook_count; 392 Codebook *codebooks; 393 int floor_count; 394 uint16 floor_types[64]; // varies 395 Floor *floor_config; 396 int residue_count; 397 uint16 residue_types[64]; // varies 398 Residue *residue_config; 399 int mapping_count; 400 Mapping *mapping; 401 int mode_count; 402 Mode mode_config[64]; // varies 403 404 uint32 total_samples; 405 406 // decode buffer 407 float *channel_buffers[STB_VORBIS_MAX_CHANNELS]; 408 float *outputs [STB_VORBIS_MAX_CHANNELS]; 409 410 float *previous_window[STB_VORBIS_MAX_CHANNELS]; 411 int previous_length; 412 413 #ifndef STB_VORBIS_NO_DEFER_FLOOR 414 int16 *finalY[STB_VORBIS_MAX_CHANNELS]; 415 #else 416 float *floor_buffers[STB_VORBIS_MAX_CHANNELS]; 417 #endif 418 419 uint32 current_loc; // sample location of next frame to decode 420 int current_loc_valid; 421 422 // per-blocksize precomputed data 423 424 // twiddle factors 425 float *A[2],*B[2],*C[2]; 426 float *window[2]; 427 uint16 *bit_reverse[2]; 428 429 // current page/packet/segment streaming info 430 uint32 serial; // stream serial number for verification 431 int last_page; 432 int segment_count; 433 uint8 segments[255]; 434 uint8 page_flag; 435 uint8 bytes_in_seg; 436 uint8 first_decode; 437 int next_seg; 438 int last_seg; // flag that we're on the last segment 439 int last_seg_which; // what was the segment number of the last seg? 440 uint32 acc; 441 int valid_bits; 442 int packet_bytes; 443 int end_seg_with_known_loc; 444 uint32 known_loc_for_packet; 445 int discard_samples_deferred; 446 uint32 samples_output; 447 448 // push mode scanning 449 int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching 450#ifndef STB_VORBIS_NO_PUSHDATA_API 451 CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT]; 452#endif 453 454 // sample-access 455 int channel_buffer_start; 456 int channel_buffer_end; 457}; 458 459extern int my_prof(int slot); 460//#define stb_prof my_prof 461 462#ifndef stb_prof 463#define stb_prof(x) 0 464#endif 465 466#if defined(STB_VORBIS_NO_PUSHDATA_API) 467 #define IS_PUSH_MODE(f) FALSE 468#elif defined(STB_VORBIS_NO_PULLDATA_API) 469 #define IS_PUSH_MODE(f) TRUE 470#else 471 #define IS_PUSH_MODE(f) ((f)->push_mode) 472#endif 473 474typedef struct stb_vorbis vorb; 475 476static int error(vorb *f, enum STBVorbisError e) 477{ 478 f->error = e; 479 if (!f->eof && e != VORBIS_need_more_data) { 480 f->error=e; // breakpoint for debugging 481 } 482 return 0; 483} 484 485 486// these functions are used for allocating temporary memory 487// while decoding. if you can afford the stack space, use 488// alloca(); otherwise, provide a temp buffer and it will 489// allocate out of those. 490 491#define array_size_required(count,size) (count*(sizeof(void *)+(size))) 492 493#define temp_alloc(f,size) (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size)) 494#ifdef dealloca 495#define temp_free(f,p) (f->alloc.alloc_buffer ? 0 : dealloca(size)) 496#else 497#define temp_free(f,p) 0 498#endif 499#define temp_alloc_save(f) ((f)->temp_offset) 500#define temp_alloc_restore(f,p) ((f)->temp_offset = (p)) 501 502#define temp_block_array(f,count,size) make_block_array(temp_alloc(f,array_size_required(count,size)), count, size) 503 504// given a sufficiently large block of memory, make an array of pointers to subblocks of it 505static void *make_block_array(void *mem, int count, int size) 506{ 507 int i; 508 void ** p = (void **) mem; 509 char *q = (char *) (p + count); 510 for (i=0; i < count; ++i) { 511 p[i] = q; 512 q += size; 513 } 514 return p; 515} 516 517static void *setup_malloc(vorb *f, int sz) 518{ 519 sz = (sz+3) & ~3; 520 f->setup_memory_required += sz; 521 if (f->alloc.alloc_buffer) { 522 void *p = (char *) f->alloc.alloc_buffer + f->setup_offset; 523 if (f->setup_offset + sz > f->temp_offset) return NULL; 524 f->setup_offset += sz; 525 return p; 526 } 527 return sz ? malloc(sz) : NULL; 528} 529 530static void setup_free(vorb *f, void *p) 531{ 532 if (f->alloc.alloc_buffer) return; // do nothing; setup mem is not a stack 533 free(p); 534} 535 536static void *setup_temp_malloc(vorb *f, int sz) 537{ 538 sz = (sz+3) & ~3; 539 if (f->alloc.alloc_buffer) { 540 if (f->temp_offset - sz < f->setup_offset) return NULL; 541 f->temp_offset -= sz; 542 return (char *) f->alloc.alloc_buffer + f->temp_offset; 543 } 544 return malloc(sz); 545} 546 547static void setup_temp_free(vorb *f, void *p, size_t sz) 548{ 549 if (f->alloc.alloc_buffer) { 550 f->temp_offset += (sz+3)&~3; 551 return; 552 } 553 free(p); 554} 555 556#define CRC32_POLY 0x04c11db7 // from spec 557 558static uint32 crc_table[256]; 559static void crc32_init(void) 560{ 561 int i,j; 562 uint32 s; 563 for(i=0; i < 256; i++) { 564 for (s=i<<24, j=0; j < 8; ++j) 565 s = (s << 1) ^ (s >= (1<<31) ? CRC32_POLY : 0); 566 crc_table[i] = s; 567 } 568} 569 570static __forceinline uint32 crc32_update(uint32 crc, uint8 byte) 571{ 572 return (crc << 8) ^ crc_table[byte ^ (crc >> 24)]; 573} 574 575 576// used in setup, and for huffman that doesn't go fast path 577static unsigned int bit_reverse(unsigned int n) 578{ 579 n = ((n & 0xAAAAAAAA) >> 1) | ((n & 0x55555555) << 1); 580 n = ((n & 0xCCCCCCCC) >> 2) | ((n & 0x33333333) << 2); 581 n = ((n & 0xF0F0F0F0) >> 4) | ((n & 0x0F0F0F0F) << 4); 582 n = ((n & 0xFF00FF00) >> 8) | ((n & 0x00FF00FF) << 8); 583 return (n >> 16) | (n << 16); 584} 585 586static float square(float x) 587{ 588 return x*x; 589} 590 591// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3 592// as required by the specification. fast(?) implementation from stb.h 593// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup 594static int ilog(int32 n) 595{ 596 static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 }; 597 598 // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29) 599 if (n < (1U << 14)) 600 if (n < (1U << 4)) return 0 + log2_4[n ]; 601 else if (n < (1U << 9)) return 5 + log2_4[n >> 5]; 602 else return 10 + log2_4[n >> 10]; 603 else if (n < (1U << 24)) 604 if (n < (1U << 19)) return 15 + log2_4[n >> 15]; 605 else return 20 + log2_4[n >> 20]; 606 else if (n < (1U << 29)) return 25 + log2_4[n >> 25]; 607 else if (n < (1U << 31)) return 30 + log2_4[n >> 30]; 608 else return 0; // signed n returns 0 609} 610 611#ifndef M_PI 612 #define M_PI 3.14159265358979323846264f // from CRC 613#endif 614 615// code length assigned to a value with no huffman encoding 616#define NO_CODE 255 617 618/////////////////////// LEAF SETUP FUNCTIONS ////////////////////////// 619// 620// these functions are only called at setup, and only a few times 621// per file 622 623static float float32_unpack(uint32 x) 624{ 625 // from the specification 626 uint32 mantissa = x & 0x1fffff; 627 uint32 sign = x & 0x80000000; 628 uint32 exp = (x & 0x7fe00000) >> 21; 629 double res = sign ? -(double)mantissa : (double)mantissa; 630 return (float) ldexp((float)res, exp-788); 631} 632 633 634// zlib & jpeg huffman tables assume that the output symbols 635// can either be arbitrarily arranged, or have monotonically 636// increasing frequencies--they rely on the lengths being sorted; 637// this makes for a very simple generation algorithm. 638// vorbis allows a huffman table with non-sorted lengths. This 639// requires a more sophisticated construction, since symbols in 640// order do not map to huffman codes "in order". 641static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values) 642{ 643 if (!c->sparse) { 644 c->codewords [symbol] = huff_code; 645 } else { 646 c->codewords [count] = huff_code; 647 c->codeword_lengths[count] = len; 648 values [count] = symbol; 649 } 650} 651 652static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values) 653{ 654 int i,k,m=0; 655 uint32 available[32]; 656 657 memset(available, 0, sizeof(available)); 658 // find the first entry 659 for (k=0; k < n; ++k) if (len[k] < NO_CODE) break; 660 if (k == n) { assert(c->sorted_entries == 0); return TRUE; } 661 // add to the list 662 add_entry(c, 0, k, m++, len[k], values); 663 // add all available leaves 664 for (i=1; i <= len[k]; ++i) 665 available[i] = 1 << (32-i); 666 // note that the above code treats the first case specially, 667 // but it's really the same as the following code, so they 668 // could probably be combined (except the initial code is 0, 669 // and I use 0 in available[] to mean 'empty') 670 for (i=k+1; i < n; ++i) { 671 uint32 res; 672 int z = len[i], y; 673 if (z == NO_CODE) continue; 674 // find lowest available leaf (should always be earliest, 675 // which is what the specification calls for) 676 // note that this property, and the fact we can never have 677 // more than one free leaf at a given level, isn't totally 678 // trivial to prove, but it seems true and the assert never 679 // fires, so! 680 while (z > 0 && !available[z]) --z; 681 if (z == 0) { assert(0); return FALSE; } 682 res = available[z]; 683 available[z] = 0; 684 add_entry(c, bit_reverse(res), i, m++, len[i], values); 685 // propogate availability up the tree 686 if (z != len[i]) { 687 for (y=len[i]; y > z; --y) { 688 assert(available[y] == 0); 689 available[y] = res + (1 << (32-y)); 690 } 691 } 692 } 693 return TRUE; 694} 695 696// accelerated huffman table allows fast O(1) match of all symbols 697// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH 698static void compute_accelerated_huffman(Codebook *c) 699{ 700 int i, len; 701 for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) 702 c->fast_huffman[i] = -1; 703 704 len = c->sparse ? c->sorted_entries : c->entries; 705 #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT 706 if (len > 32767) len = 32767; // largest possible value we can encode! 707 #endif 708 for (i=0; i < len; ++i) { 709 if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) { 710 uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i]; 711 // set table entries for all bit combinations in the higher bits 712 while (z < FAST_HUFFMAN_TABLE_SIZE) { 713 c->fast_huffman[z] = i; 714 z += 1 << c->codeword_lengths[i]; 715 } 716 } 717 } 718} 719 720static int uint32_compare(const void *p, const void *q) 721{ 722 uint32 x = * (uint32 *) p; 723 uint32 y = * (uint32 *) q; 724 return x < y ? -1 : x > y; 725} 726 727static int include_in_sort(Codebook *c, uint8 len) 728{ 729 if (c->sparse) { assert(len != NO_CODE); return TRUE; } 730 if (len == NO_CODE) return FALSE; 731 if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE; 732 return FALSE; 733} 734 735// if the fast table above doesn't work, we want to binary 736// search them... need to reverse the bits 737static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values) 738{ 739 int i, len; 740 // build a list of all the entries 741 // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN. 742 // this is kind of a frivolous optimization--I don't see any performance improvement, 743 // but it's like 4 extra lines of code, so. 744 if (!c->sparse) { 745 int k = 0; 746 for (i=0; i < c->entries; ++i) 747 if (include_in_sort(c, lengths[i])) 748 c->sorted_codewords[k++] = bit_reverse(c->codewords[i]); 749 assert(k == c->sorted_entries); 750 } else { 751 for (i=0; i < c->sorted_entries; ++i) 752 c->sorted_codewords[i] = bit_reverse(c->codewords[i]); 753 } 754 755 qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare); 756 c->sorted_codewords[c->sorted_entries] = 0xffffffff; 757 758 len = c->sparse ? c->sorted_entries : c->entries; 759 // now we need to indicate how they correspond; we could either 760 // #1: sort a different data structure that says who they correspond to 761 // #2: for each sorted entry, search the original list to find who corresponds 762 // #3: for each original entry, find the sorted entry 763 // #1 requires extra storage, #2 is slow, #3 can use binary search! 764 for (i=0; i < len; ++i) { 765 int huff_len = c->sparse ? lengths[values[i]] : lengths[i]; 766 if (include_in_sort(c,huff_len)) { 767 uint32 code = bit_reverse(c->codewords[i]); 768 int x=0, n=c->sorted_entries; 769 while (n > 1) { 770 // invariant: sc[x] <= code < sc[x+n] 771 int m = x + (n >> 1); 772 if (c->sorted_codewords[m] <= code) { 773 x = m; 774 n -= (n>>1); 775 } else { 776 n >>= 1; 777 } 778 } 779 assert(c->sorted_codewords[x] == code); 780 if (c->sparse) { 781 c->sorted_values[x] = values[i]; 782 c->codeword_lengths[x] = huff_len; 783 } else { 784 c->sorted_values[x] = i; 785 } 786 } 787 } 788} 789 790// only run while parsing the header (3 times) 791static int vorbis_validate(uint8 *data) 792{ 793 static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' }; 794 return memcmp(data, vorbis, 6) == 0; 795} 796 797// called from setup only, once per code book 798// (formula implied by specification) 799static int lookup1_values(int entries, int dim) 800{ 801 int r = (int) floor(exp((float) log((float) entries) / dim)); 802 if ((int) floor(pow((float) r+1, dim)) <= entries) // (int) cast for MinGW warning; 803 ++r; // floor() to avoid _ftol() when non-CRT 804 assert(pow((float) r+1, dim) > entries); 805 assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above 806 return r; 807} 808 809// called twice per file 810static void compute_twiddle_factors(int n, float *A, float *B, float *C) 811{ 812 int n4 = n >> 2, n8 = n >> 3; 813 int k,k2; 814 815 for (k=k2=0; k < n4; ++k,k2+=2) { 816 A[k2 ] = (float) cos(4*k*M_PI/n); 817 A[k2+1] = (float) -sin(4*k*M_PI/n); 818 B[k2 ] = (float) cos((k2+1)*M_PI/n/2) * 0.5f; 819 B[k2+1] = (float) sin((k2+1)*M_PI/n/2) * 0.5f; 820 } 821 for (k=k2=0; k < n8; ++k,k2+=2) { 822 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n); 823 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n); 824 } 825} 826 827static void compute_window(int n, float *window) 828{ 829 int n2 = n >> 1, i; 830 for (i=0; i < n2; ++i) 831 window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI))); 832} 833 834static void compute_bitreverse(int n, uint16 *rev) 835{ 836 int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions 837 int i, n8 = n >> 3; 838 for (i=0; i < n8; ++i) 839 rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2; 840} 841 842static int init_blocksize(vorb *f, int b, int n) 843{ 844 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3; 845 f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2); 846 f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2); 847 f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4); 848 if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem); 849 compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]); 850 f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2); 851 if (!f->window[b]) return error(f, VORBIS_outofmem); 852 compute_window(n, f->window[b]); 853 f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8); 854 if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem); 855 compute_bitreverse(n, f->bit_reverse[b]); 856 return TRUE; 857} 858 859static void neighbors(uint16 *x, int n, int *plow, int *phigh) 860{ 861 int low = -1; 862 int high = 65536; 863 int i; 864 for (i=0; i < n; ++i) { 865 if (x[i] > low && x[i] < x[n]) { *plow = i; low = x[i]; } 866 if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; } 867 } 868} 869 870// this has been repurposed so y is now the original index instead of y 871typedef struct 872{ 873 uint16 x,y; 874} Point; 875 876int point_compare(const void *p, const void *q) 877{ 878 Point *a = (Point *) p; 879 Point *b = (Point *) q; 880 return a->x < b->x ? -1 : a->x > b->x; 881} 882 883// 884/////////////////////// END LEAF SETUP FUNCTIONS ////////////////////////// 885 886 887#if defined(STB_VORBIS_NO_STDIO) 888 #define USE_MEMORY(z) TRUE 889#else 890 #define USE_MEMORY(z) ((z)->stream) 891#endif 892#ifdef STB_VORBIS_USE_CALLBACKS 893 894#define USE_CALLBACKS(z) ((z)->data_callback) 895 896int stb_read_from_callback(vorb* z, int size, uint8* ptr) 897{ 898 int read = z->data_callback(size,ptr,z->user_data); 899 if(read < 1 && size > 0) 900 z->eof = 1; 901 else 902 z->cb_offset+=read; 903 return read; 904} 905 906int stb_reset_callback(vorb* z) 907{ 908 int result = z->reset_callback(z->user_data); 909 if(result == -1) 910 z->eof = 1; 911 else 912 { 913 z->cb_offset = 0; 914 z->eof = 0; 915 } 916 return result; 917} 918 919#endif 920 921static uint8 get8(vorb *z) 922{ 923 if (USE_MEMORY(z)) { 924 if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; } 925 return *z->stream++; 926 } 927 928#ifdef STB_VORBIS_USE_CALLBACKS 929 if(USE_CALLBACKS(z)) 930 { 931 uint8 data; 932 int read = stb_read_from_callback(z,1,&data); 933 if(z->eof) 934 return 0; 935 else 936 return data; 937 } 938#endif 939 940 #ifndef STB_VORBIS_NO_STDIO 941 { 942 int c = fgetc(z->f); 943 if (c == EOF) { z->eof = TRUE; return 0; } 944 return c; 945 } 946 #endif 947} 948 949static uint32 get32(vorb *f) 950{ 951 uint32 x; 952 x = get8(f); 953 x += get8(f) << 8; 954 x += get8(f) << 16; 955 x += get8(f) << 24; 956 return x; 957} 958 959static int getn(vorb *z, uint8 *data, int n) 960{ 961 if (USE_MEMORY(z)) { 962 if (z->stream+n > z->stream_end) { z->eof = 1; return 0; } 963 memcpy(data, z->stream, n); 964 z->stream += n; 965 return 1; 966 } 967 968#ifdef STB_VORBIS_USE_CALLBACKS 969 if(USE_CALLBACKS(z)) 970 { 971 int read = stb_read_from_callback(z,n,data); 972 if(read < n) 973 { 974 z->eof = 1; 975 return 0; 976 } 977 else 978 return 1; 979 } 980#endif 981 982 #ifndef STB_VORBIS_NO_STDIO 983 if (fread(data, n, 1, z->f) == 1) 984 return 1; 985 else { 986 z->eof = 1; 987 return 0; 988 } 989 #endif 990} 991 992static void skip(vorb *z, int n) 993{ 994 if (USE_MEMORY(z)) { 995 z->stream += n; 996 if (z->stream >= z->stream_end) z->eof = 1; 997 return; 998 } 999#ifdef STB_VORBIS_USE_CALLBACKS 1000 if(USE_CALLBACKS(z)) 1001 { 1002 int read = stb_read_from_callback(z,n,NULL); 1003 if(read < n) 1004 z->eof = 1; 1005 return; 1006 } 1007#endif 1008 1009 1010 #ifndef STB_VORBIS_NO_STDIO 1011 { 1012 long x = ftell(z->f); 1013 fseek(z->f, x+n, SEEK_SET); 1014 } 1015 #endif 1016} 1017 1018static int set_file_offset(stb_vorbis *f, unsigned int loc) 1019{ 1020 #ifndef STB_VORBIS_NO_PUSHDATA_API 1021 if (f->push_mode) return 0; 1022 #endif 1023 f->eof = 0; 1024 if (USE_MEMORY(f)) { 1025 if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) { 1026 f->stream = f->stream_end; 1027 f->eof = 1; 1028 return 0; 1029 } else { 1030 f->stream = f->stream_start + loc; 1031 return 1; 1032 } 1033 } 1034 1035#ifdef STB_VORBIS_USE_CALLBACKS 1036 if(USE_CALLBACKS(f)) 1037 { 1038 int read = stb_reset_callback(f); 1039 if(read < 0) 1040 { 1041 f->eof = 1; 1042 return 0; 1043 } 1044 read = stb_read_from_callback(f,loc,NULL); 1045 if(read < loc) 1046 { 1047 f->eof = 1; 1048 return 0; 1049 } 1050 return 1; 1051 } 1052#endif 1053 1054 #ifndef STB_VORBIS_NO_STDIO 1055 if (loc + f->f_start < loc || loc >= 0x80000000) { 1056 loc = 0x7fffffff; 1057 f->eof = 1; 1058 } else { 1059 loc += f->f_start; 1060 } 1061 if (!fseek(f->f, loc, SEEK_SET)) 1062 return 1; 1063 f->eof = 1; 1064 fseek(f->f, f->f_start, SEEK_END); 1065 return 0; 1066 #endif 1067} 1068 1069 1070static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 }; 1071 1072static int capture_pattern(vorb *f) 1073{ 1074 if (0x4f != get8(f)) return FALSE; 1075 if (0x67 != get8(f)) return FALSE; 1076 if (0x67 != get8(f)) return FALSE; 1077 if (0x53 != get8(f)) return FALSE; 1078 return TRUE; 1079} 1080 1081#define PAGEFLAG_continued_packet 1 1082#define PAGEFLAG_first_page 2 1083#define PAGEFLAG_last_page 4 1084 1085static int start_page_no_capturepattern(vorb *f) 1086{ 1087 uint32 loc0,loc1,n,i; 1088 // stream structure version 1089 if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version); 1090 // header flag 1091 f->page_flag = get8(f); 1092 // absolute granule position 1093 loc0 = get32(f); 1094 loc1 = get32(f); 1095 // @TODO: validate loc0,loc1 as valid positions? 1096 // stream serial number -- vorbis doesn't interleave, so discard 1097 get32(f); 1098 //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number); 1099 // page sequence number 1100 n = get32(f); 1101 f->last_page = n; 1102 // CRC32 1103 get32(f); 1104 // page_segments 1105 f->segment_count = get8(f); 1106 if (!getn(f, f->segments, f->segment_count)) 1107 return error(f, VORBIS_unexpected_eof); 1108 // assume we _don't_ know any the sample position of any segments 1109 f->end_seg_with_known_loc = -2; 1110 if (loc0 != ~0 || loc1 != ~0) { 1111 // determine which packet is the last one that will complete 1112 for (i=f->segment_count-1; i >= 0; --i) 1113 if (f->segments[i] < 255) 1114 break; 1115 // 'i' is now the index of the _last_ segment of a packet that ends 1116 if (i >= 0) { 1117 f->end_seg_with_known_loc = i; 1118 f->known_loc_for_packet = loc0; 1119 } 1120 } 1121 if (f->first_decode) { 1122 int i,len; 1123 ProbedPage p; 1124 len = 0; 1125 for (i=0; i < f->segment_count; ++i) 1126 len += f->segments[i]; 1127 len += 27 + f->segment_count; 1128 p.page_start = f->first_audio_page_offset; 1129 p.page_end = p.page_start + len; 1130 p.after_previous_page_start = p.page_start; 1131 p.first_decoded_sample = 0; 1132 p.last_decoded_sample = loc0; 1133 f->p_first = p; 1134 } 1135 f->next_seg = 0; 1136 return TRUE; 1137} 1138 1139static int start_page(vorb *f) 1140{ 1141 if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern); 1142 return start_page_no_capturepattern(f); 1143} 1144 1145static int start_packet(vorb *f) 1146{ 1147 while (f->next_seg == -1) { 1148 if (!start_page(f)) return FALSE; 1149 if (f->page_flag & PAGEFLAG_continued_packet) 1150 return error(f, VORBIS_continued_packet_flag_invalid); 1151 } 1152 f->last_seg = FALSE; 1153 f->valid_bits = 0; 1154 f->packet_bytes = 0; 1155 f->bytes_in_seg = 0; 1156 // f->next_seg is now valid 1157 return TRUE; 1158} 1159 1160static int maybe_start_packet(vorb *f) 1161{ 1162 if (f->next_seg == -1) { 1163 int x = get8(f); 1164 if (f->eof) return FALSE; // EOF at page boundary is not an error! 1165 if (0x4f != x ) return error(f, VORBIS_missing_capture_pattern); 1166 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1167 if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1168 if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern); 1169 if (!start_page_no_capturepattern(f)) return FALSE; 1170 if (f->page_flag & PAGEFLAG_continued_packet) { 1171 // set up enough state that we can read this packet if we want, 1172 // e.g. during recovery 1173 f->last_seg = FALSE; 1174 f->bytes_in_seg = 0; 1175 return error(f, VORBIS_continued_packet_flag_invalid); 1176 } 1177 } 1178 return start_packet(f); 1179} 1180 1181static int next_segment(vorb *f) 1182{ 1183 int len; 1184 if (f->last_seg) return 0; 1185 if (f->next_seg == -1) { 1186 f->last_seg_which = f->segment_count-1; // in case start_page fails 1187 if (!start_page(f)) { f->last_seg = 1; return 0; } 1188 if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid); 1189 } 1190 len = f->segments[f->next_seg++]; 1191 if (len < 255) { 1192 f->last_seg = TRUE; 1193 f->last_seg_which = f->next_seg-1; 1194 } 1195 if (f->next_seg >= f->segment_count) 1196 f->next_seg = -1; 1197 assert(f->bytes_in_seg == 0); 1198 f->bytes_in_seg = len; 1199 return len; 1200} 1201 1202#define EOP (-1) 1203#define INVALID_BITS (-1) 1204 1205static int get8_packet_raw(vorb *f) 1206{ 1207 if (!f->bytes_in_seg) 1208 if (f->last_seg) return EOP; 1209 else if (!next_segment(f)) return EOP; 1210 assert(f->bytes_in_seg > 0); 1211 --f->bytes_in_seg; 1212 ++f->packet_bytes; 1213 return get8(f); 1214} 1215 1216static int get8_packet(vorb *f) 1217{ 1218 int x = get8_packet_raw(f); 1219 f->valid_bits = 0; 1220 return x; 1221} 1222 1223static void flush_packet(vorb *f) 1224{ 1225 while (get8_packet_raw(f) != EOP); 1226} 1227 1228// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important 1229// as the huffman decoder? 1230static uint32 get_bits(vorb *f, int n) 1231{ 1232 uint32 z; 1233 1234 if (f->valid_bits < 0) return 0; 1235 if (f->valid_bits < n) { 1236 if (n > 24) { 1237 // the accumulator technique below would not work correctly in this case 1238 z = get_bits(f, 24); 1239 z += get_bits(f, n-24) << 24; 1240 return z; 1241 } 1242 if (f->valid_bits == 0) f->acc = 0; 1243 while (f->valid_bits < n) { 1244 int z = get8_packet_raw(f); 1245 if (z == EOP) { 1246 f->valid_bits = INVALID_BITS; 1247 return 0; 1248 } 1249 f->acc += z << f->valid_bits; 1250 f->valid_bits += 8; 1251 } 1252 } 1253 if (f->valid_bits < 0) return 0; 1254 z = f->acc & ((1 << n)-1); 1255 f->acc >>= n; 1256 f->valid_bits -= n; 1257 return z; 1258} 1259 1260static int32 get_bits_signed(vorb *f, int n) 1261{ 1262 uint32 z = get_bits(f, n); 1263 if (z & (1 << (n-1))) 1264 z += ~((1 << n) - 1); 1265 return (int32) z; 1266} 1267 1268// @OPTIMIZE: primary accumulator for huffman 1269// expand the buffer to as many bits as possible without reading off end of packet 1270// it might be nice to allow f->valid_bits and f->acc to be stored in registers, 1271// e.g. cache them locally and decode locally 1272static __forceinline void prep_huffman(vorb *f) 1273{ 1274 if (f->valid_bits <= 24) { 1275 if (f->valid_bits == 0) f->acc = 0; 1276 do { 1277 int z; 1278 if (f->last_seg && !f->bytes_in_seg) return; 1279 z = get8_packet_raw(f); 1280 if (z == EOP) return; 1281 f->acc += z << f->valid_bits; 1282 f->valid_bits += 8; 1283 } while (f->valid_bits <= 24); 1284 } 1285} 1286 1287enum 1288{ 1289 VORBIS_packet_id = 1, 1290 VORBIS_packet_comment = 3, 1291 VORBIS_packet_setup = 5, 1292}; 1293 1294static int codebook_decode_scalar_raw(vorb *f, Codebook *c) 1295{ 1296 int i; 1297 prep_huffman(f); 1298 1299 assert(c->sorted_codewords || c->codewords); 1300 // cases to use binary search: sorted_codewords && !c->codewords 1301 // sorted_codewords && c->entries > 8 1302 if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) { 1303 // binary search 1304 uint32 code = bit_reverse(f->acc); 1305 int x=0, n=c->sorted_entries, len; 1306 1307 while (n > 1) { 1308 // invariant: sc[x] <= code < sc[x+n] 1309 int m = x + (n >> 1); 1310 if (c->sorted_codewords[m] <= code) { 1311 x = m; 1312 n -= (n>>1); 1313 } else { 1314 n >>= 1; 1315 } 1316 } 1317 // x is now the sorted index 1318 if (!c->sparse) x = c->sorted_values[x]; 1319 // x is now sorted index if sparse, or symbol otherwise 1320 len = c->codeword_lengths[x]; 1321 if (f->valid_bits >= len) { 1322 f->acc >>= len; 1323 f->valid_bits -= len; 1324 return x; 1325 } 1326 1327 f->valid_bits = 0; 1328 return -1; 1329 } 1330 1331 // if small, linear search 1332 assert(!c->sparse); 1333 for (i=0; i < c->entries; ++i) { 1334 if (c->codeword_lengths[i] == NO_CODE) continue; 1335 if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) { 1336 if (f->valid_bits >= c->codeword_lengths[i]) { 1337 f->acc >>= c->codeword_lengths[i]; 1338 f->valid_bits -= c->codeword_lengths[i]; 1339 return i; 1340 } 1341 f->valid_bits = 0; 1342 return -1; 1343 } 1344 } 1345 1346 error(f, VORBIS_invalid_stream); 1347 f->valid_bits = 0; 1348 return -1; 1349} 1350 1351static int codebook_decode_scalar(vorb *f, Codebook *c) 1352{ 1353 int i; 1354 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) 1355 prep_huffman(f); 1356 // fast huffman table lookup 1357 i = f->acc & FAST_HUFFMAN_TABLE_MASK; 1358 i = c->fast_huffman[i]; 1359 if (i >= 0) { 1360 f->acc >>= c->codeword_lengths[i]; 1361 f->valid_bits -= c->codeword_lengths[i]; 1362 if (f->valid_bits < 0) { f->valid_bits = 0; return -1; } 1363 return i; 1364 } 1365 return codebook_decode_scalar_raw(f,c); 1366} 1367 1368#ifndef STB_VORBIS_NO_INLINE_DECODE 1369 1370#define DECODE_RAW(var, f,c) \ 1371 if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) \ 1372 prep_huffman(f); \ 1373 var = f->acc & FAST_HUFFMAN_TABLE_MASK; \ 1374 var = c->fast_huffman[var]; \ 1375 if (var >= 0) { \ 1376 int n = c->codeword_lengths[var]; \ 1377 f->acc >>= n; \ 1378 f->valid_bits -= n; \ 1379 if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \ 1380 } else { \ 1381 var = codebook_decode_scalar_raw(f,c); \ 1382 } 1383 1384#else 1385 1386#define DECODE_RAW(var,f,c) var = codebook_decode_scalar(f,c); 1387 1388#endif 1389 1390#define DECODE(var,f,c) \ 1391 DECODE_RAW(var,f,c) \ 1392 if (c->sparse) var = c->sorted_values[var]; 1393 1394#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK 1395 #define DECODE_VQ(var,f,c) DECODE_RAW(var,f,c) 1396#else 1397 #define DECODE_VQ(var,f,c) DECODE(var,f,c) 1398#endif 1399 1400 1401 1402 1403 1404 1405// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case 1406// where we avoid one addition 1407#ifndef STB_VORBIS_CODEBOOK_FLOATS 1408 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off] * c->delta_value + c->minimum_value) 1409 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off] * c->delta_value) 1410 #define CODEBOOK_ELEMENT_BASE(c) (c->minimum_value) 1411#else 1412 #define CODEBOOK_ELEMENT(c,off) (c->multiplicands[off]) 1413 #define CODEBOOK_ELEMENT_FAST(c,off) (c->multiplicands[off]) 1414 #define CODEBOOK_ELEMENT_BASE(c) (0) 1415#endif 1416 1417static int codebook_decode_start(vorb *f, Codebook *c, int len) 1418{ 1419 int z = -1; 1420 1421 // type 0 is only legal in a scalar context 1422 if (c->lookup_type == 0) 1423 error(f, VORBIS_invalid_stream); 1424 else { 1425 DECODE_VQ(z,f,c); 1426 if (c->sparse) assert(z < c->sorted_entries); 1427 if (z < 0) { // check for EOP 1428 if (!f->bytes_in_seg) 1429 if (f->last_seg) 1430 return z; 1431 error(f, VORBIS_invalid_stream); 1432 } 1433 } 1434 return z; 1435} 1436 1437static int codebook_decode(vorb *f, Codebook *c, float *output, int len) 1438{ 1439 int i,z = codebook_decode_start(f,c,len); 1440 if (z < 0) return FALSE; 1441 if (len > c->dimensions) len = c->dimensions; 1442 1443#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK 1444 if (c->lookup_type == 1) { 1445 float last = CODEBOOK_ELEMENT_BASE(c); 1446 int div = 1; 1447 for (i=0; i < len; ++i) { 1448 int off = (z / div) % c->lookup_values; 1449 float val = CODEBOOK_ELEMENT_FAST(c,off) + last; 1450 output[i] += val; 1451 if (c->sequence_p) last = val + c->minimum_value; 1452 div *= c->lookup_values; 1453 } 1454 return TRUE; 1455 } 1456#endif 1457 1458 z *= c->dimensions; 1459 if (c->sequence_p) { 1460 float last = CODEBOOK_ELEMENT_BASE(c); 1461 for (i=0; i < len; ++i) { 1462 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1463 output[i] += val; 1464 last = val + c->minimum_value; 1465 } 1466 } else { 1467 float last = CODEBOOK_ELEMENT_BASE(c); 1468 for (i=0; i < len; ++i) { 1469 output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1470 } 1471 } 1472 1473 return TRUE; 1474} 1475 1476static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step) 1477{ 1478 int i,z = codebook_decode_start(f,c,len); 1479 float last = CODEBOOK_ELEMENT_BASE(c); 1480 if (z < 0) return FALSE; 1481 if (len > c->dimensions) len = c->dimensions; 1482 1483#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK 1484 if (c->lookup_type == 1) { 1485 int div = 1; 1486 for (i=0; i < len; ++i) { 1487 int off = (z / div) % c->lookup_values; 1488 float val = CODEBOOK_ELEMENT_FAST(c,off) + last; 1489 output[i*step] += val; 1490 if (c->sequence_p) last = val; 1491 div *= c->lookup_values; 1492 } 1493 return TRUE; 1494 } 1495#endif 1496 1497 z *= c->dimensions; 1498 for (i=0; i < len; ++i) { 1499 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1500 output[i*step] += val; 1501 if (c->sequence_p) last = val; 1502 } 1503 1504 return TRUE; 1505} 1506 1507static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode) 1508{ 1509 int c_inter = *c_inter_p; 1510 int p_inter = *p_inter_p; 1511 int i,z, effective = c->dimensions; 1512 1513 // type 0 is only legal in a scalar context 1514 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream); 1515 1516 while (total_decode > 0) { 1517 float last = CODEBOOK_ELEMENT_BASE(c); 1518 DECODE_VQ(z,f,c); 1519 #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK 1520 assert(!c->sparse || z < c->sorted_entries); 1521 #endif 1522 if (z < 0) { 1523 if (!f->bytes_in_seg) 1524 if (f->last_seg) return FALSE; 1525 return error(f, VORBIS_invalid_stream); 1526 } 1527 1528 // if this will take us off the end of the buffers, stop short! 1529 // we check by computing the length of the virtual interleaved 1530 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter), 1531 // and the length we'll be using (effective) 1532 if (c_inter + p_inter*ch + effective > len * ch) { 1533 effective = len*ch - (p_inter*ch - c_inter); 1534 } 1535 1536 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK 1537 if (c->lookup_type == 1) { 1538 int div = 1; 1539 for (i=0; i < effective; ++i) { 1540 int off = (z / div) % c->lookup_values; 1541 float val = CODEBOOK_ELEMENT_FAST(c,off) + last; 1542 outputs[c_inter][p_inter] += val; 1543 if (++c_inter == ch) { c_inter = 0; ++p_inter; } 1544 if (c->sequence_p) last = val; 1545 div *= c->lookup_values; 1546 } 1547 } else 1548 #endif 1549 { 1550 z *= c->dimensions; 1551 if (c->sequence_p) { 1552 for (i=0; i < effective; ++i) { 1553 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1554 outputs[c_inter][p_inter] += val; 1555 if (++c_inter == ch) { c_inter = 0; ++p_inter; } 1556 last = val; 1557 } 1558 } else { 1559 for (i=0; i < effective; ++i) { 1560 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1561 outputs[c_inter][p_inter] += val; 1562 if (++c_inter == ch) { c_inter = 0; ++p_inter; } 1563 } 1564 } 1565 } 1566 1567 total_decode -= effective; 1568 } 1569 *c_inter_p = c_inter; 1570 *p_inter_p = p_inter; 1571 return TRUE; 1572} 1573 1574#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK 1575static int codebook_decode_deinterleave_repeat_2(vorb *f, Codebook *c, float **outputs, int *c_inter_p, int *p_inter_p, int len, int total_decode) 1576{ 1577 int c_inter = *c_inter_p; 1578 int p_inter = *p_inter_p; 1579 int i,z, effective = c->dimensions; 1580 1581 // type 0 is only legal in a scalar context 1582 if (c->lookup_type == 0) return error(f, VORBIS_invalid_stream); 1583 1584 while (total_decode > 0) { 1585 float last = CODEBOOK_ELEMENT_BASE(c); 1586 DECODE_VQ(z,f,c); 1587 1588 if (z < 0) { 1589 if (!f->bytes_in_seg) 1590 if (f->last_seg) return FALSE; 1591 return error(f, VORBIS_invalid_stream); 1592 } 1593 1594 // if this will take us off the end of the buffers, stop short! 1595 // we check by computing the length of the virtual interleaved 1596 // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter), 1597 // and the length we'll be using (effective) 1598 if (c_inter + p_inter*2 + effective > len * 2) { 1599 effective = len*2 - (p_inter*2 - c_inter); 1600 } 1601 1602 { 1603 z *= c->dimensions; 1604 stb_prof(11); 1605 if (c->sequence_p) { 1606 // haven't optimized this case because I don't have any examples 1607 for (i=0; i < effective; ++i) { 1608 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1609 outputs[c_inter][p_inter] += val; 1610 if (++c_inter == 2) { c_inter = 0; ++p_inter; } 1611 last = val; 1612 } 1613 } else { 1614 i=0; 1615 if (c_inter == 1) { 1616 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1617 outputs[c_inter][p_inter] += val; 1618 c_inter = 0; ++p_inter; 1619 ++i; 1620 } 1621 { 1622 float *z0 = outputs[0]; 1623 float *z1 = outputs[1]; 1624 for (; i+1 < effective;) { 1625 z0[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1626 z1[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i+1) + last; 1627 ++p_inter; 1628 i += 2; 1629 } 1630 } 1631 if (i < effective) { 1632 float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last; 1633 outputs[c_inter][p_inter] += val; 1634 if (++c_inter == 2) { c_inter = 0; ++p_inter; } 1635 } 1636 } 1637 } 1638 1639 total_decode -= effective; 1640 } 1641 *c_inter_p = c_inter; 1642 *p_inter_p = p_inter; 1643 return TRUE; 1644} 1645#endif 1646 1647static int predict_point(int x, int x0, int x1, int y0, int y1) 1648{ 1649 int dy = y1 - y0; 1650 int adx = x1 - x0; 1651 // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86? 1652 int err = abs(dy) * (x - x0); 1653 int off = err / adx; 1654 return dy < 0 ? y0 - off : y0 + off; 1655} 1656 1657// the following table is block-copied from the specification 1658static float inverse_db_table[256] = 1659{ 1660 1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, 1661 1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, 1662 1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, 1663 2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, 1664 2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, 1665 3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, 1666 4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, 1667 6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, 1668 7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, 1669 1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, 1670 1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, 1671 1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, 1672 2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, 1673 2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, 1674 3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, 1675 4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, 1676 5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, 1677 7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, 1678 9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, 1679 1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, 1680 1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, 1681 2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, 1682 2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, 1683 3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, 1684 4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, 1685 5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, 1686 7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, 1687 9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, 1688 0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, 1689 0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, 1690 0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, 1691 0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, 1692 0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, 1693 0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, 1694 0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, 1695 0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, 1696 0.00092223983f, 0.00098217216f, 0.0010459992f, 0.0011139742f, 1697 0.0011863665f, 0.0012634633f, 0.0013455702f, 0.0014330129f, 1698 0.0015261382f, 0.0016253153f, 0.0017309374f, 0.0018434235f, 1699 0.0019632195f, 0.0020908006f, 0.0022266726f, 0.0023713743f, 1700 0.0025254795f, 0.0026895994f, 0.0028643847f, 0.0030505286f, 1701 0.0032487691f, 0.0034598925f, 0.0036847358f, 0.0039241906f, 1702 0.0041792066f, 0.0044507950f, 0.0047400328f, 0.0050480668f, 1703 0.0053761186f, 0.0057254891f, 0.0060975636f, 0.0064938176f, 1704 0.0069158225f, 0.0073652516f, 0.0078438871f, 0.0083536271f, 1705 0.0088964928f, 0.009474637f, 0.010090352f, 0.010746080f, 1706 0.011444421f, 0.012188144f, 0.012980198f, 0.013823725f, 1707 0.014722068f, 0.015678791f, 0.016697687f, 0.017782797f, 1708 0.018938423f, 0.020169149f, 0.021479854f, 0.022875735f, 1709 0.024362330f, 0.025945531f, 0.027631618f, 0.029427276f, 1710 0.031339626f, 0.033376252f, 0.035545228f, 0.037855157f, 1711 0.040315199f, 0.042935108f, 0.045725273f, 0.048696758f, 1712 0.051861348f, 0.055231591f, 0.058820850f, 0.062643361f, 1713 0.066714279f, 0.071049749f, 0.075666962f, 0.080584227f, 1714 0.085821044f, 0.091398179f, 0.097337747f, 0.10366330f, 1715 0.11039993f, 0.11757434f, 0.12521498f, 0.13335215f, 1716 0.14201813f, 0.15124727f, 0.16107617f, 0.17154380f, 1717 0.18269168f, 0.19456402f, 0.20720788f, 0.22067342f, 1718 0.23501402f, 0.25028656f, 0.26655159f, 0.28387361f, 1719 0.30232132f, 0.32196786f, 0.34289114f, 0.36517414f, 1720 0.38890521f, 0.41417847f, 0.44109412f, 0.46975890f, 1721 0.50028648f, 0.53279791f, 0.56742212f, 0.60429640f, 1722 0.64356699f, 0.68538959f, 0.72993007f, 0.77736504f, 1723 0.82788260f, 0.88168307f, 0.9389798f, 1.0f 1724}; 1725 1726 1727// @OPTIMIZE: if you want to replace this bresenham line-drawing routine, 1728// note that you must produce bit-identical output to decode correctly; 1729// this specific sequence of operations is specified in the spec (it's 1730// drawing integer-quantized frequency-space lines that the encoder 1731// expects to be exactly the same) 1732// ... also, isn't the whole point of Bresenham's algorithm to NOT 1733// have to divide in the setup? sigh. 1734#ifndef STB_VORBIS_NO_DEFER_FLOOR 1735#define LINE_OP(a,b) a *= b 1736#else 1737#define LINE_OP(a,b) a = b 1738#endif 1739 1740#ifdef STB_VORBIS_DIVIDE_TABLE 1741#define DIVTAB_NUMER 32 1742#define DIVTAB_DENOM 64 1743int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB 1744#endif 1745 1746static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n) 1747{ 1748 int dy = y1 - y0; 1749 int adx = x1 - x0; 1750 int ady = abs(dy); 1751 int base; 1752 int x=x0,y=y0; 1753 int err = 0; 1754 int sy; 1755 1756#ifdef STB_VORBIS_DIVIDE_TABLE 1757 if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) { 1758 if (dy < 0) { 1759 base = -integer_divide_table[ady][adx]; 1760 sy = base-1; 1761 } else { 1762 base = integer_divide_table[ady][adx]; 1763 sy = base+1; 1764 } 1765 } else { 1766 base = dy / adx; 1767 if (dy < 0) 1768 sy = base - 1; 1769 else 1770 sy = base+1; 1771 } 1772#else 1773 base = dy / adx; 1774 if (dy < 0) 1775 sy = base - 1; 1776 else 1777 sy = base+1; 1778#endif 1779 ady -= abs(base) * adx; 1780 if (x1 > n) x1 = n; 1781 LINE_OP(output[x], inverse_db_table[y]); 1782 for (++x; x < x1; ++x) { 1783 err += ady; 1784 if (err >= adx) { 1785 err -= adx; 1786 y += sy; 1787 } else 1788 y += base; 1789 LINE_OP(output[x], inverse_db_table[y]); 1790 } 1791} 1792 1793static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype) 1794{ 1795 int k; 1796 if (rtype == 0) { 1797 int step = n / book->dimensions; 1798 for (k=0; k < step; ++k) 1799 if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) 1800 return FALSE; 1801 } else { 1802 for (k=0; k < n; ) { 1803 if (!codebook_decode(f, book, target+offset, n-k)) 1804 return FALSE; 1805 k += book->dimensions; 1806 offset += book->dimensions; 1807 } 1808 } 1809 return TRUE; 1810} 1811 1812static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode) 1813{ 1814 int i,j,pass; 1815 Residue *r = f->residue_config + rn; 1816 int rtype = f->residue_types[rn]; 1817 int c = r->classbook; 1818 int classwords = f->codebooks[c].dimensions; 1819 int n_read = r->end - r->begin; 1820 int part_read = n_read / r->part_size; 1821 int temp_alloc_point = temp_alloc_save(f); 1822 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1823 uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata)); 1824 #else 1825 int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications)); 1826 #endif 1827 1828 stb_prof(2); 1829 for (i=0; i < ch; ++i) 1830 if (!do_not_decode[i]) 1831 memset(residue_buffers[i], 0, sizeof(float) * n); 1832 1833 if (rtype == 2 && ch != 1) { 1834 int len = ch * n; 1835 for (j=0; j < ch; ++j) 1836 if (!do_not_decode[j]) 1837 break; 1838 if (j == ch) 1839 goto done; 1840 1841 stb_prof(3); 1842 for (pass=0; pass < 8; ++pass) { 1843 int pcount = 0, class_set = 0; 1844 if (ch == 2) { 1845 stb_prof(13); 1846 while (pcount < part_read) { 1847 int z = r->begin + pcount*r->part_size; 1848 int c_inter = (z & 1), p_inter = z>>1; 1849 if (pass == 0) { 1850 Codebook *c = f->codebooks+r->classbook; 1851 int q; 1852 DECODE(q,f,c); 1853 if (q == EOP) goto done; 1854 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1855 part_classdata[0][class_set] = r->classdata[q]; 1856 #else 1857 for (i=classwords-1; i >= 0; --i) { 1858 classifications[0][i+pcount] = q % r->classifications; 1859 q /= r->classifications; 1860 } 1861 #endif 1862 } 1863 stb_prof(5); 1864 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 1865 int z = r->begin + pcount*r->part_size; 1866 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1867 int c = part_classdata[0][class_set][i]; 1868 #else 1869 int c = classifications[0][pcount]; 1870 #endif 1871 int b = r->residue_books[c][pass]; 1872 if (b >= 0) { 1873 Codebook *book = f->codebooks + b; 1874 stb_prof(20); // accounts for X time 1875 #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK 1876 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) 1877 goto done; 1878 #else 1879 // saves 1% 1880 if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r->part_size)) 1881 goto done; 1882 #endif 1883 stb_prof(7); 1884 } else { 1885 z += r->part_size; 1886 c_inter = z & 1; 1887 p_inter = z >> 1; 1888 } 1889 } 1890 stb_prof(8); 1891 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1892 ++class_set; 1893 #endif 1894 } 1895 } else if (ch == 1) { 1896 while (pcount < part_read) { 1897 int z = r->begin + pcount*r->part_size; 1898 int c_inter = 0, p_inter = z; 1899 if (pass == 0) { 1900 Codebook *c = f->codebooks+r->classbook; 1901 int q; 1902 DECODE(q,f,c); 1903 if (q == EOP) goto done; 1904 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1905 part_classdata[0][class_set] = r->classdata[q]; 1906 #else 1907 for (i=classwords-1; i >= 0; --i) { 1908 classifications[0][i+pcount] = q % r->classifications; 1909 q /= r->classifications; 1910 } 1911 #endif 1912 } 1913 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 1914 int z = r->begin + pcount*r->part_size; 1915 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1916 int c = part_classdata[0][class_set][i]; 1917 #else 1918 int c = classifications[0][pcount]; 1919 #endif 1920 int b = r->residue_books[c][pass]; 1921 if (b >= 0) { 1922 Codebook *book = f->codebooks + b; 1923 stb_prof(22); 1924 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) 1925 goto done; 1926 stb_prof(3); 1927 } else { 1928 z += r->part_size; 1929 c_inter = 0; 1930 p_inter = z; 1931 } 1932 } 1933 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1934 ++class_set; 1935 #endif 1936 } 1937 } else { 1938 while (pcount < part_read) { 1939 int z = r->begin + pcount*r->part_size; 1940 int c_inter = z % ch, p_inter = z/ch; 1941 if (pass == 0) { 1942 Codebook *c = f->codebooks+r->classbook; 1943 int q; 1944 DECODE(q,f,c); 1945 if (q == EOP) goto done; 1946 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1947 part_classdata[0][class_set] = r->classdata[q]; 1948 #else 1949 for (i=classwords-1; i >= 0; --i) { 1950 classifications[0][i+pcount] = q % r->classifications; 1951 q /= r->classifications; 1952 } 1953 #endif 1954 } 1955 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 1956 int z = r->begin + pcount*r->part_size; 1957 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1958 int c = part_classdata[0][class_set][i]; 1959 #else 1960 int c = classifications[0][pcount]; 1961 #endif 1962 int b = r->residue_books[c][pass]; 1963 if (b >= 0) { 1964 Codebook *book = f->codebooks + b; 1965 stb_prof(22); 1966 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size)) 1967 goto done; 1968 stb_prof(3); 1969 } else { 1970 z += r->part_size; 1971 c_inter = z % ch; 1972 p_inter = z / ch; 1973 } 1974 } 1975 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1976 ++class_set; 1977 #endif 1978 } 1979 } 1980 } 1981 goto done; 1982 } 1983 stb_prof(9); 1984 1985 for (pass=0; pass < 8; ++pass) { 1986 int pcount = 0, class_set=0; 1987 while (pcount < part_read) { 1988 if (pass == 0) { 1989 for (j=0; j < ch; ++j) { 1990 if (!do_not_decode[j]) { 1991 Codebook *c = f->codebooks+r->classbook; 1992 int temp; 1993 DECODE(temp,f,c); 1994 if (temp == EOP) goto done; 1995 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 1996 part_classdata[j][class_set] = r->classdata[temp]; 1997 #else 1998 for (i=classwords-1; i >= 0; --i) { 1999 classifications[j][i+pcount] = temp % r->classifications; 2000 temp /= r->classifications; 2001 } 2002 #endif 2003 } 2004 } 2005 } 2006 for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) { 2007 for (j=0; j < ch; ++j) { 2008 if (!do_not_decode[j]) { 2009 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 2010 int c = part_classdata[j][class_set][i]; 2011 #else 2012 int c = classifications[j][pcount]; 2013 #endif 2014 int b = r->residue_books[c][pass]; 2015 if (b >= 0) { 2016 float *target = residue_buffers[j]; 2017 int offset = r->begin + pcount * r->part_size; 2018 int n = r->part_size; 2019 Codebook *book = f->codebooks + b; 2020 if (!residue_decode(f, book, target, offset, n, rtype)) 2021 goto done; 2022 } 2023 } 2024 } 2025 } 2026 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 2027 ++class_set; 2028 #endif 2029 } 2030 } 2031 done: 2032 stb_prof(0); 2033 temp_alloc_restore(f,temp_alloc_point); 2034} 2035 2036 2037#if 0 2038// slow way for debugging 2039void inverse_mdct_slow(float *buffer, int n) 2040{ 2041 int i,j; 2042 int n2 = n >> 1; 2043 float *x = (float *) malloc(sizeof(*x) * n2); 2044 memcpy(x, buffer, sizeof(*x) * n2); 2045 for (i=0; i < n; ++i) { 2046 float acc = 0; 2047 for (j=0; j < n2; ++j) 2048 // formula from paper: 2049 //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1)); 2050 // formula from wikipedia 2051 //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5)); 2052 // these are equivalent, except the formula from the paper inverts the multiplier! 2053 // however, what actually works is NO MULTIPLIER!?! 2054 //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5)); 2055 acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1)); 2056 buffer[i] = acc; 2057 } 2058 free(x); 2059} 2060#elif 0 2061// same as above, but just barely able to run in real time on modern machines 2062void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype) 2063{ 2064 float mcos[16384]; 2065 int i,j; 2066 int n2 = n >> 1, nmask = (n << 2) -1; 2067 float *x = (float *) malloc(sizeof(*x) * n2); 2068 memcpy(x, buffer, sizeof(*x) * n2); 2069 for (i=0; i < 4*n; ++i) 2070 mcos[i] = (float) cos(M_PI / 2 * i / n); 2071 2072 for (i=0; i < n; ++i) { 2073 float acc = 0; 2074 for (j=0; j < n2; ++j) 2075 acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask]; 2076 buffer[i] = acc; 2077 } 2078 free(x); 2079} 2080#else 2081// transform to use a slow dct-iv; this is STILL basically trivial, 2082// but only requires half as many ops 2083void dct_iv_slow(float *buffer, int n) 2084{ 2085 float mcos[16384]; 2086 float x[2048]; 2087 int i,j; 2088 int n2 = n >> 1, nmask = (n << 3) - 1; 2089 memcpy(x, buffer, sizeof(*x) * n); 2090 for (i=0; i < 8*n; ++i) 2091 mcos[i] = (float) cos(M_PI / 4 * i / n); 2092 for (i=0; i < n; ++i) { 2093 float acc = 0; 2094 for (j=0; j < n; ++j) 2095 acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask]; 2096 //acc += x[j] * cos(M_PI / n * (i + 0.5) * (j + 0.5)); 2097 buffer[i] = acc; 2098 } 2099 free(x); 2100} 2101 2102void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype) 2103{ 2104 int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4; 2105 float temp[4096]; 2106 2107 memcpy(temp, buffer, n2 * sizeof(float)); 2108 dct_iv_slow(temp, n2); // returns -c'-d, a-b' 2109 2110 for (i=0; i < n4 ; ++i) buffer[i] = temp[i+n4]; // a-b' 2111 for ( ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1]; // b-a', c+d' 2112 for ( ; i < n ; ++i) buffer[i] = -temp[i - n3_4]; // c'+d 2113} 2114#endif 2115 2116#ifndef LIBVORBIS_MDCT 2117#define LIBVORBIS_MDCT 0 2118#endif 2119 2120#if LIBVORBIS_MDCT 2121// directly call the vorbis MDCT using an interface documented 2122// by Jeff Roberts... useful for performance comparison 2123typedef struct 2124{ 2125 int n; 2126 int log2n; 2127 2128 float *trig; 2129 int *bitrev; 2130 2131 float scale; 2132} mdct_lookup; 2133 2134extern void mdct_init(mdct_lookup *lookup, int n); 2135extern void mdct_clear(mdct_lookup *l); 2136extern void mdct_backward(mdct_lookup *init, float *in, float *out); 2137 2138mdct_lookup M1,M2; 2139 2140void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) 2141{ 2142 mdct_lookup *M; 2143 if (M1.n == n) M = &M1; 2144 else if (M2.n == n) M = &M2; 2145 else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; } 2146 else { 2147 if (M2.n) __asm int 3; 2148 mdct_init(&M2, n); 2149 M = &M2; 2150 } 2151 2152 mdct_backward(M, buffer, buffer); 2153} 2154#endif 2155 2156 2157// the following were split out into separate functions while optimizing; 2158// they could be pushed back up but eh. __forceinline showed no change; 2159// they're probably already being inlined. 2160static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A) 2161{ 2162 float *ee0 = e + i_off; 2163 float *ee2 = ee0 + k_off; 2164 int i; 2165 2166 assert((n & 3) == 0); 2167 for (i=(n>>2); i > 0; --i) { 2168 float k00_20, k01_21; 2169 k00_20 = ee0[ 0] - ee2[ 0]; 2170 k01_21 = ee0[-1] - ee2[-1]; 2171 ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0]; 2172 ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1]; 2173 ee2[ 0] = k00_20 * A[0] - k01_21 * A[1]; 2174 ee2[-1] = k01_21 * A[0] + k00_20 * A[1]; 2175 A += 8; 2176 2177 k00_20 = ee0[-2] - ee2[-2]; 2178 k01_21 = ee0[-3] - ee2[-3]; 2179 ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2]; 2180 ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3]; 2181 ee2[-2] = k00_20 * A[0] - k01_21 * A[1]; 2182 ee2[-3] = k01_21 * A[0] + k00_20 * A[1]; 2183 A += 8; 2184 2185 k00_20 = ee0[-4] - ee2[-4]; 2186 k01_21 = ee0[-5] - ee2[-5]; 2187 ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4]; 2188 ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5]; 2189 ee2[-4] = k00_20 * A[0] - k01_21 * A[1]; 2190 ee2[-5] = k01_21 * A[0] + k00_20 * A[1]; 2191 A += 8; 2192 2193 k00_20 = ee0[-6] - ee2[-6]; 2194 k01_21 = ee0[-7] - ee2[-7]; 2195 ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6]; 2196 ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7]; 2197 ee2[-6] = k00_20 * A[0] - k01_21 * A[1]; 2198 ee2[-7] = k01_21 * A[0] + k00_20 * A[1]; 2199 A += 8; 2200 ee0 -= 8; 2201 ee2 -= 8; 2202 } 2203} 2204 2205static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1) 2206{ 2207 int i; 2208 float k00_20, k01_21; 2209 2210 float *e0 = e + d0; 2211 float *e2 = e0 + k_off; 2212 2213 for (i=lim >> 2; i > 0; --i) { 2214 k00_20 = e0[-0] - e2[-0]; 2215 k01_21 = e0[-1] - e2[-1]; 2216 e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0]; 2217 e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1]; 2218 e2[-0] = (k00_20)*A[0] - (k01_21) * A[1]; 2219 e2[-1] = (k01_21)*A[0] + (k00_20) * A[1]; 2220 2221 A += k1; 2222 2223 k00_20 = e0[-2] - e2[-2]; 2224 k01_21 = e0[-3] - e2[-3]; 2225 e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2]; 2226 e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3]; 2227 e2[-2] = (k00_20)*A[0] - (k01_21) * A[1]; 2228 e2[-3] = (k01_21)*A[0] + (k00_20) * A[1]; 2229 2230 A += k1; 2231 2232 k00_20 = e0[-4] - e2[-4]; 2233 k01_21 = e0[-5] - e2[-5]; 2234 e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4]; 2235 e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5]; 2236 e2[-4] = (k00_20)*A[0] - (k01_21) * A[1]; 2237 e2[-5] = (k01_21)*A[0] + (k00_20) * A[1]; 2238 2239 A += k1; 2240 2241 k00_20 = e0[-6] - e2[-6]; 2242 k01_21 = e0[-7] - e2[-7]; 2243 e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6]; 2244 e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7]; 2245 e2[-6] = (k00_20)*A[0] - (k01_21) * A[1]; 2246 e2[-7] = (k01_21)*A[0] + (k00_20) * A[1]; 2247 2248 e0 -= 8; 2249 e2 -= 8; 2250 2251 A += k1; 2252 } 2253} 2254 2255static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0) 2256{ 2257 int i; 2258 float A0 = A[0]; 2259 float A1 = A[0+1]; 2260 float A2 = A[0+a_off]; 2261 float A3 = A[0+a_off+1]; 2262 float A4 = A[0+a_off*2+0]; 2263 float A5 = A[0+a_off*2+1]; 2264 float A6 = A[0+a_off*3+0]; 2265 float A7 = A[0+a_off*3+1]; 2266 2267 float k00,k11; 2268 2269 float *ee0 = e +i_off; 2270 float *ee2 = ee0+k_off; 2271 2272 for (i=n; i > 0; --i) { 2273 k00 = ee0[ 0] - ee2[ 0]; 2274 k11 = ee0[-1] - ee2[-1]; 2275 ee0[ 0] = ee0[ 0] + ee2[ 0]; 2276 ee0[-1] = ee0[-1] + ee2[-1]; 2277 ee2[ 0] = (k00) * A0 - (k11) * A1; 2278 ee2[-1] = (k11) * A0 + (k00) * A1; 2279 2280 k00 = ee0[-2] - ee2[-2]; 2281 k11 = ee0[-3] - ee2[-3]; 2282 ee0[-2] = ee0[-2] + ee2[-2]; 2283 ee0[-3] = ee0[-3] + ee2[-3]; 2284 ee2[-2] = (k00) * A2 - (k11) * A3; 2285 ee2[-3] = (k11) * A2 + (k00) * A3; 2286 2287 k00 = ee0[-4] - ee2[-4]; 2288 k11 = ee0[-5] - ee2[-5]; 2289 ee0[-4] = ee0[-4] + ee2[-4]; 2290 ee0[-5] = ee0[-5] + ee2[-5]; 2291 ee2[-4] = (k00) * A4 - (k11) * A5; 2292 ee2[-5] = (k11) * A4 + (k00) * A5; 2293 2294 k00 = ee0[-6] - ee2[-6]; 2295 k11 = ee0[-7] - ee2[-7]; 2296 ee0[-6] = ee0[-6] + ee2[-6]; 2297 ee0[-7] = ee0[-7] + ee2[-7]; 2298 ee2[-6] = (k00) * A6 - (k11) * A7; 2299 ee2[-7] = (k11) * A6 + (k00) * A7; 2300 2301 ee0 -= k0; 2302 ee2 -= k0; 2303 } 2304} 2305 2306static __forceinline void iter_54(float *z) 2307{ 2308 float k00,k11,k22,k33; 2309 float y0,y1,y2,y3; 2310 2311 k00 = z[ 0] - z[-4]; 2312 y0 = z[ 0] + z[-4]; 2313 y2 = z[-2] + z[-6]; 2314 k22 = z[-2] - z[-6]; 2315 2316 z[-0] = y0 + y2; // z0 + z4 + z2 + z6 2317 z[-2] = y0 - y2; // z0 + z4 - z2 - z6 2318 2319 // done with y0,y2 2320 2321 k33 = z[-3] - z[-7]; 2322 2323 z[-4] = k00 + k33; // z0 - z4 + z3 - z7 2324 z[-6] = k00 - k33; // z0 - z4 - z3 + z7 2325 2326 // done with k33 2327 2328 k11 = z[-1] - z[-5]; 2329 y1 = z[-1] + z[-5]; 2330 y3 = z[-3] + z[-7]; 2331 2332 z[-1] = y1 + y3; // z1 + z5 + z3 + z7 2333 z[-3] = y1 - y3; // z1 + z5 - z3 - z7 2334 z[-5] = k11 - k22; // z1 - z5 + z2 - z6 2335 z[-7] = k11 + k22; // z1 - z5 - z2 + z6 2336} 2337 2338static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n) 2339{ 2340 int k_off = -8; 2341 int a_off = base_n >> 3; 2342 float A2 = A[0+a_off]; 2343 float *z = e + i_off; 2344 float *base = z - 16 * n; 2345 2346 while (z > base) { 2347 float k00,k11; 2348 2349 k00 = z[-0] - z[-8]; 2350 k11 = z[-1] - z[-9]; 2351 z[-0] = z[-0] + z[-8]; 2352 z[-1] = z[-1] + z[-9]; 2353 z[-8] = k00; 2354 z[-9] = k11 ; 2355 2356 k00 = z[ -2] - z[-10]; 2357 k11 = z[ -3] - z[-11]; 2358 z[ -2] = z[ -2] + z[-10]; 2359 z[ -3] = z[ -3] + z[-11]; 2360 z[-10] = (k00+k11) * A2; 2361 z[-11] = (k11-k00) * A2; 2362 2363 k00 = z[-12] - z[ -4]; // reverse to avoid a unary negation 2364 k11 = z[ -5] - z[-13]; 2365 z[ -4] = z[ -4] + z[-12]; 2366 z[ -5] = z[ -5] + z[-13]; 2367 z[-12] = k11; 2368 z[-13] = k00; 2369 2370 k00 = z[-14] - z[ -6]; // reverse to avoid a unary negation 2371 k11 = z[ -7] - z[-15]; 2372 z[ -6] = z[ -6] + z[-14]; 2373 z[ -7] = z[ -7] + z[-15]; 2374 z[-14] = (k00+k11) * A2; 2375 z[-15] = (k00-k11) * A2; 2376 2377 iter_54(z); 2378 iter_54(z-8); 2379 z -= 16; 2380 } 2381} 2382 2383static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype) 2384{ 2385 int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; 2386 int n3_4 = n - n4, ld; 2387 // @OPTIMIZE: reduce register pressure by using fewer variables? 2388 int save_point = temp_alloc_save(f); 2389 float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2)); 2390 float *u=NULL,*v=NULL; 2391 // twiddle factors 2392 float *A = f->A[blocktype]; 2393 2394 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" 2395 // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function. 2396 2397 // kernel from paper 2398 2399 2400 // merged: 2401 // copy and reflect spectral data 2402 // step 0 2403 2404 // note that it turns out that the items added together during 2405 // this step are, in fact, being added to themselves (as reflected 2406 // by step 0). inexplicable inefficiency! this became obvious 2407 // once I combined the passes. 2408 2409 // so there's a missing 'times 2' here (for adding X to itself). 2410 // this propogates through linearly to the end, where the numbers 2411 // are 1/2 too small, and need to be compensated for. 2412 2413 { 2414 float *d,*e, *AA, *e_stop; 2415 d = &buf2[n2-2]; 2416 AA = A; 2417 e = &buffer[0]; 2418 e_stop = &buffer[n2]; 2419 while (e != e_stop) { 2420 d[1] = (e[0] * AA[0] - e[2]*AA[1]); 2421 d[0] = (e[0] * AA[1] + e[2]*AA[0]); 2422 d -= 2; 2423 AA += 2; 2424 e += 4; 2425 } 2426 2427 e = &buffer[n2-3]; 2428 while (d >= buf2) { 2429 d[1] = (-e[2] * AA[0] - -e[0]*AA[1]); 2430 d[0] = (-e[2] * AA[1] + -e[0]*AA[0]); 2431 d -= 2; 2432 AA += 2; 2433 e -= 4; 2434 } 2435 } 2436 2437 // now we use symbolic names for these, so that we can 2438 // possibly swap their meaning as we change which operations 2439 // are in place 2440 2441 u = buffer; 2442 v = buf2; 2443 2444 // step 2 (paper output is w, now u) 2445 // this could be in place, but the data ends up in the wrong 2446 // place... _somebody_'s got to swap it, so this is nominated 2447 { 2448 float *AA = &A[n2-8]; 2449 float *d0,*d1, *e0, *e1; 2450 2451 e0 = &v[n4]; 2452 e1 = &v[0]; 2453 2454 d0 = &u[n4]; 2455 d1 = &u[0]; 2456 2457 while (AA >= A) { 2458 float v40_20, v41_21; 2459 2460 v41_21 = e0[1] - e1[1]; 2461 v40_20 = e0[0] - e1[0]; 2462 d0[1] = e0[1] + e1[1]; 2463 d0[0] = e0[0] + e1[0]; 2464 d1[1] = v41_21*AA[4] - v40_20*AA[5]; 2465 d1[0] = v40_20*AA[4] + v41_21*AA[5]; 2466 2467 v41_21 = e0[3] - e1[3]; 2468 v40_20 = e0[2] - e1[2]; 2469 d0[3] = e0[3] + e1[3]; 2470 d0[2] = e0[2] + e1[2]; 2471 d1[3] = v41_21*AA[0] - v40_20*AA[1]; 2472 d1[2] = v40_20*AA[0] + v41_21*AA[1]; 2473 2474 AA -= 8; 2475 2476 d0 += 4; 2477 d1 += 4; 2478 e0 += 4; 2479 e1 += 4; 2480 } 2481 } 2482 2483 // step 3 2484 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions 2485 2486 // optimized step 3: 2487 2488 // the original step3 loop can be nested r inside s or s inside r; 2489 // it's written originally as s inside r, but this is dumb when r 2490 // iterates many times, and s few. So I have two copies of it and 2491 // switch between them halfway. 2492 2493 // this is iteration 0 of step 3 2494 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A); 2495 imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A); 2496 2497 // this is iteration 1 of step 3 2498 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16); 2499 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16); 2500 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16); 2501 imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16); 2502 2503 l=2; 2504 for (; l < (ld-3)>>1; ++l) { 2505 int k0 = n >> (l+2), k0_2 = k0>>1; 2506 int lim = 1 << (l+1); 2507 int i; 2508 for (i=0; i < lim; ++i) 2509 imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3)); 2510 } 2511 2512 for (; l < ld-6; ++l) { 2513 int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1; 2514 int rlim = n >> (l+6), r; 2515 int lim = 1 << (l+1); 2516 int i_off; 2517 float *A0 = A; 2518 i_off = n2-1; 2519 for (r=rlim; r > 0; --r) { 2520 imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0); 2521 A0 += k1*4; 2522 i_off -= 8; 2523 } 2524 } 2525 2526 // iterations with count: 2527 // ld-6,-5,-4 all interleaved together 2528 // the big win comes from getting rid of needless flops 2529 // due to the constants on pass 5 & 4 being all 1 and 0; 2530 // combining them to be simultaneous to improve cache made little difference 2531 imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n); 2532 2533 // output is u 2534 2535 // step 4, 5, and 6 2536 // cannot be in-place because of step 5 2537 { 2538 uint16 *bitrev = f->bit_reverse[blocktype]; 2539 // weirdly, I'd have thought reading sequentially and writing 2540 // erratically would have been better than vice-versa, but in 2541 // fact that's not what my testing showed. (That is, with 2542 // j = bitreverse(i), do you read i and write j, or read j and write i.) 2543 2544 float *d0 = &v[n4-4]; 2545 float *d1 = &v[n2-4]; 2546 while (d0 >= v) { 2547 int k4; 2548 2549 k4 = bitrev[0]; 2550 d1[3] = u[k4+0]; 2551 d1[2] = u[k4+1]; 2552 d0[3] = u[k4+2]; 2553 d0[2] = u[k4+3]; 2554 2555 k4 = bitrev[1]; 2556 d1[1] = u[k4+0]; 2557 d1[0] = u[k4+1]; 2558 d0[1] = u[k4+2]; 2559 d0[0] = u[k4+3]; 2560 2561 d0 -= 4; 2562 d1 -= 4; 2563 bitrev += 2; 2564 } 2565 } 2566 // (paper output is u, now v) 2567 2568 2569 // data must be in buf2 2570 assert(v == buf2); 2571 2572 // step 7 (paper output is v, now v) 2573 // this is now in place 2574 { 2575 float *C = f->C[blocktype]; 2576 float *d, *e; 2577 2578 d = v; 2579 e = v + n2 - 4; 2580 2581 while (d < e) { 2582 float a02,a11,b0,b1,b2,b3; 2583 2584 a02 = d[0] - e[2]; 2585 a11 = d[1] + e[3]; 2586 2587 b0 = C[1]*a02 + C[0]*a11; 2588 b1 = C[1]*a11 - C[0]*a02; 2589 2590 b2 = d[0] + e[ 2]; 2591 b3 = d[1] - e[ 3]; 2592 2593 d[0] = b2 + b0; 2594 d[1] = b3 + b1; 2595 e[2] = b2 - b0; 2596 e[3] = b1 - b3; 2597 2598 a02 = d[2] - e[0]; 2599 a11 = d[3] + e[1]; 2600 2601 b0 = C[3]*a02 + C[2]*a11; 2602 b1 = C[3]*a11 - C[2]*a02; 2603 2604 b2 = d[2] + e[ 0]; 2605 b3 = d[3] - e[ 1]; 2606 2607 d[2] = b2 + b0; 2608 d[3] = b3 + b1; 2609 e[0] = b2 - b0; 2610 e[1] = b1 - b3; 2611 2612 C += 4; 2613 d += 4; 2614 e -= 4; 2615 } 2616 } 2617 2618 // data must be in buf2 2619 2620 2621 // step 8+decode (paper output is X, now buffer) 2622 // this generates pairs of data a la 8 and pushes them directly through 2623 // the decode kernel (pushing rather than pulling) to avoid having 2624 // to make another pass later 2625 2626 // this cannot POSSIBLY be in place, so we refer to the buffers directly 2627 2628 { 2629 float *d0,*d1,*d2,*d3; 2630 2631 float *B = f->B[blocktype] + n2 - 8; 2632 float *e = buf2 + n2 - 8; 2633 d0 = &buffer[0]; 2634 d1 = &buffer[n2-4]; 2635 d2 = &buffer[n2]; 2636 d3 = &buffer[n-4]; 2637 while (e >= v) { 2638 float p0,p1,p2,p3; 2639 2640 p3 = e[6]*B[7] - e[7]*B[6]; 2641 p2 = -e[6]*B[6] - e[7]*B[7]; 2642 2643 d0[0] = p3; 2644 d1[3] = - p3; 2645 d2[0] = p2; 2646 d3[3] = p2; 2647 2648 p1 = e[4]*B[5] - e[5]*B[4]; 2649 p0 = -e[4]*B[4] - e[5]*B[5]; 2650 2651 d0[1] = p1; 2652 d1[2] = - p1; 2653 d2[1] = p0; 2654 d3[2] = p0; 2655 2656 p3 = e[2]*B[3] - e[3]*B[2]; 2657 p2 = -e[2]*B[2] - e[3]*B[3]; 2658 2659 d0[2] = p3; 2660 d1[1] = - p3; 2661 d2[2] = p2; 2662 d3[1] = p2; 2663 2664 p1 = e[0]*B[1] - e[1]*B[0]; 2665 p0 = -e[0]*B[0] - e[1]*B[1]; 2666 2667 d0[3] = p1; 2668 d1[0] = - p1; 2669 d2[3] = p0; 2670 d3[0] = p0; 2671 2672 B -= 8; 2673 e -= 8; 2674 d0 += 4; 2675 d2 += 4; 2676 d1 -= 4; 2677 d3 -= 4; 2678 } 2679 } 2680 2681 temp_alloc_restore(f,save_point); 2682} 2683 2684#if 0 2685// this is the original version of the above code, if you want to optimize it from scratch 2686void inverse_mdct_naive(float *buffer, int n) 2687{ 2688 float s; 2689 float A[1 << 12], B[1 << 12], C[1 << 11]; 2690 int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l; 2691 int n3_4 = n - n4, ld; 2692 // how can they claim this only uses N words?! 2693 // oh, because they're only used sparsely, whoops 2694 float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13]; 2695 // set up twiddle factors 2696 2697 for (k=k2=0; k < n4; ++k,k2+=2) { 2698 A[k2 ] = (float) cos(4*k*M_PI/n); 2699 A[k2+1] = (float) -sin(4*k*M_PI/n); 2700 B[k2 ] = (float) cos((k2+1)*M_PI/n/2); 2701 B[k2+1] = (float) sin((k2+1)*M_PI/n/2); 2702 } 2703 for (k=k2=0; k < n8; ++k,k2+=2) { 2704 C[k2 ] = (float) cos(2*(k2+1)*M_PI/n); 2705 C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n); 2706 } 2707 2708 // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio" 2709 // Note there are bugs in that pseudocode, presumably due to them attempting 2710 // to rename the arrays nicely rather than representing the way their actual 2711 // implementation bounces buffers back and forth. As a result, even in the 2712 // "some formulars corrected" version, a direct implementation fails. These 2713 // are noted below as "paper bug". 2714 2715 // copy and reflect spectral data 2716 for (k=0; k < n2; ++k) u[k] = buffer[k]; 2717 for ( ; k < n ; ++k) u[k] = -buffer[n - k - 1]; 2718 // kernel from paper 2719 // step 1 2720 for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) { 2721 v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2] - (u[k4+2] - u[n-k4-3])*A[k2+1]; 2722 v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2]; 2723 } 2724 // step 2 2725 for (k=k4=0; k < n8; k+=1, k4+=4) { 2726 w[n2+3+k4] = v[n2+3+k4] + v[k4+3]; 2727 w[n2+1+k4] = v[n2+1+k4] + v[k4+1]; 2728 w[k4+3] = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4]; 2729 w[k4+1] = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4]; 2730 } 2731 // step 3 2732 ld = ilog(n) - 1; // ilog is off-by-one from normal definitions 2733 for (l=0; l < ld-3; ++l) { 2734 int k0 = n >> (l+2), k1 = 1 << (l+3); 2735 int rlim = n >> (l+4), r4, r; 2736 int s2lim = 1 << (l+2), s2; 2737 for (r=r4=0; r < rlim; r4+=4,++r) { 2738 for (s2=0; s2 < s2lim; s2+=2) { 2739 u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4]; 2740 u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4]; 2741 u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1] 2742 - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1]; 2743 u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1] 2744 + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1]; 2745 } 2746 } 2747 if (l+1 < ld-3) { 2748 // paper bug: ping-ponging of u&w here is omitted 2749 memcpy(w, u, sizeof(u)); 2750 } 2751 } 2752 2753 // step 4 2754 for (i=0; i < n8; ++i) { 2755 int j = bit_reverse(i) >> (32-ld+3); 2756 assert(j < n8); 2757 if (i == j) { 2758 // paper bug: original code probably swapped in place; if copying, 2759 // need to directly copy in this case 2760 int i8 = i << 3; 2761 v[i8+1] = u[i8+1]; 2762 v[i8+3] = u[i8+3]; 2763 v[i8+5] = u[i8+5]; 2764 v[i8+7] = u[i8+7]; 2765 } else if (i < j) { 2766 int i8 = i << 3, j8 = j << 3; 2767 v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1]; 2768 v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3]; 2769 v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5]; 2770 v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7]; 2771 } 2772 } 2773 // step 5 2774 for (k=0; k < n2; ++k) { 2775 w[k] = v[k*2+1]; 2776 } 2777 // step 6 2778 for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) { 2779 u[n-1-k2] = w[k4]; 2780 u[n-2-k2] = w[k4+1]; 2781 u[n3_4 - 1 - k2] = w[k4+2]; 2782 u[n3_4 - 2 - k2] = w[k4+3]; 2783 } 2784 // step 7 2785 for (k=k2=0; k < n8; ++k, k2 += 2) { 2786 v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2; 2787 v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2; 2788 v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2; 2789 v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2; 2790 } 2791 // step 8 2792 for (k=k2=0; k < n4; ++k,k2 += 2) { 2793 X[k] = v[k2+n2]*B[k2 ] + v[k2+1+n2]*B[k2+1]; 2794 X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2 ]; 2795 } 2796 2797 // decode kernel to output 2798 // determined the following value experimentally 2799 // (by first figuring out what made inverse_mdct_slow work); then matching that here 2800 // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?) 2801 s = 0.5; // theoretically would be n4 2802 2803 // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code, 2804 // so it needs to use the "old" B values to behave correctly, or else 2805 // set s to 1.0 ]]] 2806 for (i=0; i < n4 ; ++i) buffer[i] = s * X[i+n4]; 2807 for ( ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1]; 2808 for ( ; i < n ; ++i) buffer[i] = -s * X[i - n3_4]; 2809} 2810#endif 2811 2812static float *get_window(vorb *f, int len) 2813{ 2814 len <<= 1; 2815 if (len == f->blocksize_0) return f->window[0]; 2816 if (len == f->blocksize_1) return f->window[1]; 2817 assert(0); 2818 return NULL; 2819} 2820 2821#ifndef STB_VORBIS_NO_DEFER_FLOOR 2822typedef int16 YTYPE; 2823#else 2824typedef int YTYPE; 2825#endif 2826static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag) 2827{ 2828 int n2 = n >> 1; 2829 int s = map->chan[i].mux, floor; 2830 floor = map->submap_floor[s]; 2831 if (f->floor_types[floor] == 0) { 2832 return error(f, VORBIS_invalid_stream); 2833 } else { 2834 Floor1 *g = &f->floor_config[floor].floor1; 2835 int j,q; 2836 int lx = 0, ly = finalY[0] * g->floor1_multiplier; 2837 for (q=1; q < g->values; ++q) { 2838 j = g->sorted_order[q]; 2839 #ifndef STB_VORBIS_NO_DEFER_FLOOR 2840 if (finalY[j] >= 0) 2841 #else 2842 if (step2_flag[j]) 2843 #endif 2844 { 2845 int hy = finalY[j] * g->floor1_multiplier; 2846 int hx = g->Xlist[j]; 2847 draw_line(target, lx,ly, hx,hy, n2); 2848 lx = hx, ly = hy; 2849 } 2850 } 2851 if (lx < n2) 2852 // optimization of: draw_line(target, lx,ly, n,ly, n2); 2853 for (j=lx; j < n2; ++j) 2854 LINE_OP(target[j], inverse_db_table[ly]); 2855 } 2856 return TRUE; 2857} 2858 2859static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode) 2860{ 2861 Mode *m; 2862 int i, n, prev, next, window_center; 2863 f->channel_buffer_start = f->channel_buffer_end = 0; 2864 2865 retry: 2866 if (f->eof) return FALSE; 2867 if (!maybe_start_packet(f)) 2868 return FALSE; 2869 // check packet type 2870 if (get_bits(f,1) != 0) { 2871 if (IS_PUSH_MODE(f)) 2872 return error(f,VORBIS_bad_packet_type); 2873 while (EOP != get8_packet(f)); 2874 goto retry; 2875 } 2876 2877 if (f->alloc.alloc_buffer) 2878 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); 2879 2880 i = get_bits(f, ilog(f->mode_count-1)); 2881 if (i == EOP) return FALSE; 2882 if (i >= f->mode_count) return FALSE; 2883 *mode = i; 2884 m = f->mode_config + i; 2885 if (m->blockflag) { 2886 n = f->blocksize_1; 2887 prev = get_bits(f,1); 2888 next = get_bits(f,1); 2889 } else { 2890 prev = next = 0; 2891 n = f->blocksize_0; 2892 } 2893 2894// WINDOWING 2895 2896 window_center = n >> 1; 2897 if (m->blockflag && !prev) { 2898 *p_left_start = (n - f->blocksize_0) >> 2; 2899 *p_left_end = (n + f->blocksize_0) >> 2; 2900 } else { 2901 *p_left_start = 0; 2902 *p_left_end = window_center; 2903 } 2904 if (m->blockflag && !next) { 2905 *p_right_start = (n*3 - f->blocksize_0) >> 2; 2906 *p_right_end = (n*3 + f->blocksize_0) >> 2; 2907 } else { 2908 *p_right_start = window_center; 2909 *p_right_end = n; 2910 } 2911 return TRUE; 2912} 2913 2914static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left) 2915{ 2916 Mapping *map; 2917 int i,j,k,n,n2; 2918 int zero_channel[256]; 2919 int really_zero_channel[256]; 2920 int window_center; 2921 2922// WINDOWING 2923 2924 n = f->blocksize[m->blockflag]; 2925 window_center = n >> 1; 2926 2927 map = &f->mapping[m->mapping]; 2928 2929// FLOORS 2930 n2 = n >> 1; 2931 2932 stb_prof(1); 2933 for (i=0; i < f->channels; ++i) { 2934 int s = map->chan[i].mux, floor; 2935 zero_channel[i] = FALSE; 2936 floor = map->submap_floor[s]; 2937 if (f->floor_types[floor] == 0) { 2938 return error(f, VORBIS_invalid_stream); 2939 } else { 2940 Floor1 *g = &f->floor_config[floor].floor1; 2941 if (get_bits(f, 1)) { 2942 short *finalY; 2943 uint8 step2_flag[256]; 2944 static int range_list[4] = { 256, 128, 86, 64 }; 2945 int range = range_list[g->floor1_multiplier-1]; 2946 int offset = 2; 2947 finalY = f->finalY[i]; 2948 finalY[0] = get_bits(f, ilog(range)-1); 2949 finalY[1] = get_bits(f, ilog(range)-1); 2950 for (j=0; j < g->partitions; ++j) { 2951 int pclass = g->partition_class_list[j]; 2952 int cdim = g->class_dimensions[pclass]; 2953 int cbits = g->class_subclasses[pclass]; 2954 int csub = (1 << cbits)-1; 2955 int cval = 0; 2956 if (cbits) { 2957 Codebook *c = f->codebooks + g->class_masterbooks[pclass]; 2958 DECODE(cval,f,c); 2959 } 2960 for (k=0; k < cdim; ++k) { 2961 int book = g->subclass_books[pclass][cval & csub]; 2962 cval = cval >> cbits; 2963 if (book >= 0) { 2964 int temp; 2965 Codebook *c = f->codebooks + book; 2966 DECODE(temp,f,c); 2967 finalY[offset++] = temp; 2968 } else 2969 finalY[offset++] = 0; 2970 } 2971 } 2972 if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec 2973 step2_flag[0] = step2_flag[1] = 1; 2974 for (j=2; j < g->values; ++j) { 2975 int low, high, pred, highroom, lowroom, room, val; 2976 low = g->neighbors[j][0]; 2977 high = g->neighbors[j][1]; 2978 //neighbors(g->Xlist, j, &low, &high); 2979 pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]); 2980 val = finalY[j]; 2981 highroom = range - pred; 2982 lowroom = pred; 2983 if (highroom < lowroom) 2984 room = highroom * 2; 2985 else 2986 room = lowroom * 2; 2987 if (val) { 2988 step2_flag[low] = step2_flag[high] = 1; 2989 step2_flag[j] = 1; 2990 if (val >= room) 2991 if (highroom > lowroom) 2992 finalY[j] = val - lowroom + pred; 2993 else 2994 finalY[j] = pred - val + highroom - 1; 2995 else 2996 if (val & 1) 2997 finalY[j] = pred - ((val+1)>>1); 2998 else 2999 finalY[j] = pred + (val>>1); 3000 } else { 3001 step2_flag[j] = 0; 3002 finalY[j] = pred; 3003 } 3004 } 3005 3006#ifdef STB_VORBIS_NO_DEFER_FLOOR 3007 do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag); 3008#else 3009 // defer final floor computation until _after_ residue 3010 for (j=0; j < g->values; ++j) { 3011 if (!step2_flag[j]) 3012 finalY[j] = -1; 3013 } 3014#endif 3015 } else { 3016 error: 3017 zero_channel[i] = TRUE; 3018 } 3019 // So we just defer everything else to later 3020 3021 // at this point we've decoded the floor into buffer 3022 } 3023 } 3024 stb_prof(0); 3025 // at this point we've decoded all floors 3026 3027 if (f->alloc.alloc_buffer) 3028 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); 3029 3030 // re-enable coupled channels if necessary 3031 memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels); 3032 for (i=0; i < map->coupling_steps; ++i) 3033 if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) { 3034 zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE; 3035 } 3036 3037// RESIDUE DECODE 3038 for (i=0; i < map->submaps; ++i) { 3039 float *residue_buffers[STB_VORBIS_MAX_CHANNELS]; 3040 int r,t; 3041 uint8 do_not_decode[256]; 3042 int ch = 0; 3043 for (j=0; j < f->channels; ++j) { 3044 if (map->chan[j].mux == i) { 3045 if (zero_channel[j]) { 3046 do_not_decode[ch] = TRUE; 3047 residue_buffers[ch] = NULL; 3048 } else { 3049 do_not_decode[ch] = FALSE; 3050 residue_buffers[ch] = f->channel_buffers[j]; 3051 } 3052 ++ch; 3053 } 3054 } 3055 r = map->submap_residue[i]; 3056 t = f->residue_types[r]; 3057 decode_residue(f, residue_buffers, ch, n2, r, do_not_decode); 3058 } 3059 3060 if (f->alloc.alloc_buffer) 3061 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); 3062 3063// INVERSE COUPLING 3064 stb_prof(14); 3065 for (i = map->coupling_steps-1; i >= 0; --i) { 3066 int n2 = n >> 1; 3067 float *m = f->channel_buffers[map->chan[i].magnitude]; 3068 float *a = f->channel_buffers[map->chan[i].angle ]; 3069 for (j=0; j < n2; ++j) { 3070 float a2,m2; 3071 if (m[j] > 0) 3072 if (a[j] > 0) 3073 m2 = m[j], a2 = m[j] - a[j]; 3074 else 3075 a2 = m[j], m2 = m[j] + a[j]; 3076 else 3077 if (a[j] > 0) 3078 m2 = m[j], a2 = m[j] + a[j]; 3079 else 3080 a2 = m[j], m2 = m[j] - a[j]; 3081 m[j] = m2; 3082 a[j] = a2; 3083 } 3084 } 3085 3086 // finish decoding the floors 3087#ifndef STB_VORBIS_NO_DEFER_FLOOR 3088 stb_prof(15); 3089 for (i=0; i < f->channels; ++i) { 3090 if (really_zero_channel[i]) { 3091 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2); 3092 } else { 3093 do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL); 3094 } 3095 } 3096#else 3097 for (i=0; i < f->channels; ++i) { 3098 if (really_zero_channel[i]) { 3099 memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2); 3100 } else { 3101 for (j=0; j < n2; ++j) 3102 f->channel_buffers[i][j] *= f->floor_buffers[i][j]; 3103 } 3104 } 3105#endif 3106 3107// INVERSE MDCT 3108 stb_prof(16); 3109 for (i=0; i < f->channels; ++i) 3110 inverse_mdct(f->channel_buffers[i], n, f, m->blockflag); 3111 stb_prof(0); 3112 3113 // this shouldn't be necessary, unless we exited on an error 3114 // and want to flush to get to the next packet 3115 flush_packet(f); 3116 3117 if (f->first_decode) { 3118 // assume we start so first non-discarded sample is sample 0 3119 // this isn't to spec, but spec would require us to read ahead 3120 // and decode the size of all current frames--could be done, 3121 // but presumably it's not a commonly used feature 3122 f->current_loc = -n2; // start of first frame is positioned for discard 3123 // we might have to discard samples "from" the next frame too, 3124 // if we're lapping a large block then a small at the start? 3125 f->discard_samples_deferred = n - right_end; 3126 f->current_loc_valid = TRUE; 3127 f->first_decode = FALSE; 3128 } else if (f->discard_samples_deferred) { 3129 left_start += f->discard_samples_deferred; 3130 *p_left = left_start; 3131 f->discard_samples_deferred = 0; 3132 } else if (f->previous_length == 0 && f->current_loc_valid) { 3133 // we're recovering from a seek... that means we're going to discard 3134 // the samples from this packet even though we know our position from 3135 // the last page header, so we need to update the position based on 3136 // the discarded samples here 3137 // but wait, the code below is going to add this in itself even 3138 // on a discard, so we don't need to do it here... 3139 } 3140 3141 // check if we have ogg information about the sample # for this packet 3142 if (f->last_seg_which == f->end_seg_with_known_loc) { 3143 // if we have a valid current loc, and this is final: 3144 if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) { 3145 uint32 current_end = f->known_loc_for_packet - (n-right_end); 3146 // then let's infer the size of the (probably) short final frame 3147 if (current_end < f->current_loc + right_end) { 3148 if (current_end < f->current_loc) { 3149 // negative truncation, that's impossible! 3150 *len = 0; 3151 } else { 3152 *len = current_end - f->current_loc; 3153 } 3154 *len += left_start; 3155 f->current_loc += *len; 3156 return TRUE; 3157 } 3158 } 3159 // otherwise, just set our sample loc 3160 // guess that the ogg granule pos refers to the _middle_ of the 3161 // last frame? 3162 // set f->current_loc to the position of left_start 3163 f->current_loc = f->known_loc_for_packet - (n2-left_start); 3164 f->current_loc_valid = TRUE; 3165 } 3166 if (f->current_loc_valid) 3167 f->current_loc += (right_start - left_start); 3168 3169 if (f->alloc.alloc_buffer) 3170 assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset); 3171 *len = right_end; // ignore samples after the window goes to 0 3172 return TRUE; 3173} 3174 3175static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right) 3176{ 3177 int mode, left_end, right_end; 3178 if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0; 3179 return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left); 3180} 3181 3182static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right) 3183{ 3184 int prev,i,j; 3185 // we use right&left (the start of the right- and left-window sin()-regions) 3186 // to determine how much to return, rather than inferring from the rules 3187 // (same result, clearer code); 'left' indicates where our sin() window 3188 // starts, therefore where the previous window's right edge starts, and 3189 // therefore where to start mixing from the previous buffer. 'right' 3190 // indicates where our sin() ending-window starts, therefore that's where 3191 // we start saving, and where our returned-data ends. 3192 3193 // mixin from previous window 3194 if (f->previous_length) { 3195 int i,j, n = f->previous_length; 3196 float *w = get_window(f, n); 3197 for (i=0; i < f->channels; ++i) { 3198 for (j=0; j < n; ++j) 3199 f->channel_buffers[i][left+j] = 3200 f->channel_buffers[i][left+j]*w[ j] + 3201 f->previous_window[i][ j]*w[n-1-j]; 3202 } 3203 } 3204 3205 prev = f->previous_length; 3206 3207 // last half of this data becomes previous window 3208 f->previous_length = len - right; 3209 3210 // @OPTIMIZE: could avoid this copy by double-buffering the 3211 // output (flipping previous_window with channel_buffers), but 3212 // then previous_window would have to be 2x as large, and 3213 // channel_buffers couldn't be temp mem (although they're NOT 3214 // currently temp mem, they could be (unless we want to level 3215 // performance by spreading out the computation)) 3216 for (i=0; i < f->channels; ++i) 3217 for (j=0; right+j < len; ++j) 3218 f->previous_window[i][j] = f->channel_buffers[i][right+j]; 3219 3220 if (!prev) 3221 // there was no previous packet, so this data isn't valid... 3222 // this isn't entirely true, only the would-have-overlapped data 3223 // isn't valid, but this seems to be what the spec requires 3224 return 0; 3225 3226 // truncate a short frame 3227 if (len < right) right = len; 3228 3229 f->samples_output += right-left; 3230 3231 return right - left; 3232} 3233 3234static void vorbis_pump_first_frame(stb_vorbis *f) 3235{ 3236 int len, right, left; 3237 if (vorbis_decode_packet(f, &len, &left, &right)) 3238 vorbis_finish_frame(f, len, left, right); 3239} 3240 3241#ifndef STB_VORBIS_NO_PUSHDATA_API 3242static int is_whole_packet_present(stb_vorbis *f, int end_page) 3243{ 3244 // make sure that we have the packet available before continuing... 3245 // this requires a full ogg parse, but we know we can fetch from f->stream 3246 3247 // instead of coding this out explicitly, we could save the current read state, 3248 // read the next packet with get8() until end-of-packet, check f->eof, then 3249 // reset the state? but that would be slower, esp. since we'd have over 256 bytes 3250 // of state to restore (primarily the page segment table) 3251 3252 int s = f->next_seg, first = TRUE; 3253 uint8 *p = f->stream; 3254 3255 if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag 3256 for (; s < f->segment_count; ++s) { 3257 p += f->segments[s]; 3258 if (f->segments[s] < 255) // stop at first short segment 3259 break; 3260 } 3261 // either this continues, or it ends it... 3262 if (end_page) 3263 if (s < f->segment_count-1) return error(f, VORBIS_invalid_stream); 3264 if (s == f->segment_count) 3265 s = -1; // set 'crosses page' flag 3266 if (p > f->stream_end) return error(f, VORBIS_need_more_data); 3267 first = FALSE; 3268 } 3269 for (; s == -1;) { 3270 uint8 *q; 3271 int n; 3272 3273 // check that we have the page header ready 3274 if (p + 26 >= f->stream_end) return error(f, VORBIS_need_more_data); 3275 // validate the page 3276 if (memcmp(p, ogg_page_header, 4)) return error(f, VORBIS_invalid_stream); 3277 if (p[4] != 0) return error(f, VORBIS_invalid_stream); 3278 if (first) { // the first segment must NOT have 'continued_packet', later ones MUST 3279 if (f->previous_length) 3280 if ((p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream); 3281 // if no previous length, we're resynching, so we can come in on a continued-packet, 3282 // which we'll just drop 3283 } else { 3284 if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream); 3285 } 3286 n = p[26]; // segment counts 3287 q = p+27; // q points to segment table 3288 p = q + n; // advance past header 3289 // make sure we've read the segment table 3290 if (p > f->stream_end) return error(f, VORBIS_need_more_data); 3291 for (s=0; s < n; ++s) { 3292 p += q[s]; 3293 if (q[s] < 255) 3294 break; 3295 } 3296 if (end_page) 3297 if (s < n-1) return error(f, VORBIS_invalid_stream); 3298 if (s == f->segment_count) 3299 s = -1; // set 'crosses page' flag 3300 if (p > f->stream_end) return error(f, VORBIS_need_more_data); 3301 first = FALSE; 3302 } 3303 return TRUE; 3304} 3305#endif // !STB_VORBIS_NO_PUSHDATA_API 3306 3307static int start_decoder(vorb *f) 3308{ 3309 uint8 header[6], x,y; 3310 int len,i,j,k, max_submaps = 0; 3311 int longest_floorlist=0; 3312 3313 // first page, first packet 3314 3315 if (!start_page(f)) return FALSE; 3316 // validate page flag 3317 if (!(f->page_flag & PAGEFLAG_first_page)) return error(f, VORBIS_invalid_first_page); 3318 if (f->page_flag & PAGEFLAG_last_page) return error(f, VORBIS_invalid_first_page); 3319 if (f->page_flag & PAGEFLAG_continued_packet) return error(f, VORBIS_invalid_first_page); 3320 // check for expected packet length 3321 if (f->segment_count != 1) return error(f, VORBIS_invalid_first_page); 3322 if (f->segments[0] != 30) return error(f, VORBIS_invalid_first_page); 3323 // read packet 3324 // check packet header 3325 if (get8(f) != VORBIS_packet_id) return error(f, VORBIS_invalid_first_page); 3326 if (!getn(f, header, 6)) return error(f, VORBIS_unexpected_eof); 3327 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_first_page); 3328 // vorbis_version 3329 if (get32(f) != 0) return error(f, VORBIS_invalid_first_page); 3330 f->channels = get8(f); if (!f->channels) return error(f, VORBIS_invalid_first_page); 3331 if (f->channels > STB_VORBIS_MAX_CHANNELS) return error(f, VORBIS_too_many_channels); 3332 f->sample_rate = get32(f); if (!f->sample_rate) return error(f, VORBIS_invalid_first_page); 3333 get32(f); // bitrate_maximum 3334 get32(f); // bitrate_nominal 3335 get32(f); // bitrate_minimum 3336 x = get8(f); 3337 { int log0,log1; 3338 log0 = x & 15; 3339 log1 = x >> 4; 3340 f->blocksize_0 = 1 << log0; 3341 f->blocksize_1 = 1 << log1; 3342 if (log0 < 6 || log0 > 13) return error(f, VORBIS_invalid_setup); 3343 if (log1 < 6 || log1 > 13) return error(f, VORBIS_invalid_setup); 3344 if (log0 > log1) return error(f, VORBIS_invalid_setup); 3345 } 3346 3347 // framing_flag 3348 x = get8(f); 3349 if (!(x & 1)) return error(f, VORBIS_invalid_first_page); 3350 3351 // second packet! 3352 if (!start_page(f)) return FALSE; 3353 3354 if (!start_packet(f)) return FALSE; 3355 do { 3356 len = next_segment(f); 3357 skip(f, len); 3358 f->bytes_in_seg = 0; 3359 } while (len); 3360 3361 // third packet! 3362 if (!start_packet(f)) return FALSE; 3363 3364 #ifndef STB_VORBIS_NO_PUSHDATA_API 3365 if (IS_PUSH_MODE(f)) { 3366 if (!is_whole_packet_present(f, TRUE)) { 3367 // convert error in ogg header to write type 3368 if (f->error == VORBIS_invalid_stream) 3369 f->error = VORBIS_invalid_setup; 3370 return FALSE; 3371 } 3372 } 3373 #endif 3374 3375 crc32_init(); // always init it, to avoid multithread race conditions 3376 3377 if (get8_packet(f) != VORBIS_packet_setup) return error(f, VORBIS_invalid_setup); 3378 for (i=0; i < 6; ++i) header[i] = get8_packet(f); 3379 if (!vorbis_validate(header)) return error(f, VORBIS_invalid_setup); 3380 3381 // codebooks 3382 3383 f->codebook_count = get_bits(f,8) + 1; 3384 f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count); 3385 if (f->codebooks == NULL) return error(f, VORBIS_outofmem); 3386 memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count); 3387 for (i=0; i < f->codebook_count; ++i) { 3388 uint32 *values; 3389 int ordered, sorted_count; 3390 int total=0; 3391 uint8 *lengths; 3392 Codebook *c = f->codebooks+i; 3393 x = get_bits(f, 8); if (x != 0x42) return error(f, VORBIS_invalid_setup); 3394 x = get_bits(f, 8); if (x != 0x43) return error(f, VORBIS_invalid_setup); 3395 x = get_bits(f, 8); if (x != 0x56) return error(f, VORBIS_invalid_setup); 3396 x = get_bits(f, 8); 3397 c->dimensions = (get_bits(f, 8)<<8) + x; 3398 x = get_bits(f, 8); 3399 y = get_bits(f, 8); 3400 c->entries = (get_bits(f, 8)<<16) + (y<<8) + x; 3401 ordered = get_bits(f,1); 3402 c->sparse = ordered ? 0 : get_bits(f,1); 3403 3404 if (c->sparse) 3405 lengths = (uint8 *) setup_temp_malloc(f, c->entries); 3406 else 3407 lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries); 3408 3409 if (!lengths) return error(f, VORBIS_outofmem); 3410 3411 if (ordered) { 3412 int current_entry = 0; 3413 int current_length = get_bits(f,5) + 1; 3414 while (current_entry < c->entries) { 3415 int limit = c->entries - current_entry; 3416 int n = get_bits(f, ilog(limit)); 3417 if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); } 3418 memset(lengths + current_entry, current_length, n); 3419 current_entry += n; 3420 ++current_length; 3421 } 3422 } else { 3423 for (j=0; j < c->entries; ++j) { 3424 int present = c->sparse ? get_bits(f,1) : 1; 3425 if (present) { 3426 lengths[j] = get_bits(f, 5) + 1; 3427 ++total; 3428 } else { 3429 lengths[j] = NO_CODE; 3430 } 3431 } 3432 } 3433 3434 if (c->sparse && total >= c->entries >> 2) { 3435 // convert sparse items to non-sparse! 3436 if (c->entries > (int) f->setup_temp_memory_required) 3437 f->setup_temp_memory_required = c->entries; 3438 3439 c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries); 3440 memcpy(c->codeword_lengths, lengths, c->entries); 3441 setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs! 3442 lengths = c->codeword_lengths; 3443 c->sparse = 0; 3444 } 3445 3446 // compute the size of the sorted tables 3447 if (c->sparse) { 3448 sorted_count = total; 3449 //assert(total != 0); 3450 } else { 3451 sorted_count = 0; 3452 #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH 3453 for (j=0; j < c->entries; ++j) 3454 if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) 3455 ++sorted_count; 3456 #endif 3457 } 3458 3459 c->sorted_entries = sorted_count; 3460 values = NULL; 3461 3462 if (!c->sparse) { 3463 c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries); 3464 if (!c->codewords) return error(f, VORBIS_outofmem); 3465 } else { 3466 unsigned int size; 3467 if (c->sorted_entries) { 3468 c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries); 3469 if (!c->codeword_lengths) return error(f, VORBIS_outofmem); 3470 c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries); 3471 if (!c->codewords) return error(f, VORBIS_outofmem); 3472 values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries); 3473 if (!values) return error(f, VORBIS_outofmem); 3474 } 3475 size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries; 3476 if (size > f->setup_temp_memory_required) 3477 f->setup_temp_memory_required = size; 3478 } 3479 3480 if (!compute_codewords(c, lengths, c->entries, values)) { 3481 if (c->sparse) setup_temp_free(f, values, 0); 3482 return error(f, VORBIS_invalid_setup); 3483 } 3484 3485 if (c->sorted_entries) { 3486 // allocate an extra slot for sentinels 3487 c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1)); 3488 // allocate an extra slot at the front so that c->sorted_values[-1] is defined 3489 // so that we can catch that case without an extra if 3490 c->sorted_values = ( int *) setup_malloc(f, sizeof(*c->sorted_values ) * (c->sorted_entries+1)); 3491 if (c->sorted_values) { ++c->sorted_values; c->sorted_values[-1] = -1; } 3492 compute_sorted_huffman(c, lengths, values); 3493 } 3494 3495 if (c->sparse) { 3496 setup_temp_free(f, values, sizeof(*values)*c->sorted_entries); 3497 setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries); 3498 setup_temp_free(f, lengths, c->entries); 3499 c->codewords = NULL; 3500 } 3501 3502 compute_accelerated_huffman(c); 3503 3504 c->lookup_type = get_bits(f, 4); 3505 if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup); 3506 if (c->lookup_type > 0) { 3507 uint16 *mults; 3508 c->minimum_value = float32_unpack(get_bits(f, 32)); 3509 c->delta_value = float32_unpack(get_bits(f, 32)); 3510 c->value_bits = get_bits(f, 4)+1; 3511 c->sequence_p = get_bits(f,1); 3512 if (c->lookup_type == 1) { 3513 c->lookup_values = lookup1_values(c->entries, c->dimensions); 3514 } else { 3515 c->lookup_values = c->entries * c->dimensions; 3516 } 3517 mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values); 3518 if (mults == NULL) return error(f, VORBIS_outofmem); 3519 for (j=0; j < (int) c->lookup_values; ++j) { 3520 int q = get_bits(f, c->value_bits); 3521 if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); } 3522 mults[j] = q; 3523 } 3524 3525#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK 3526 if (c->lookup_type == 1) { 3527 int len, sparse = c->sparse; 3528 // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop 3529 if (sparse) { 3530 if (c->sorted_entries == 0) goto skip; 3531 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions); 3532 } else 3533 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries * c->dimensions); 3534 if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); } 3535 len = sparse ? c->sorted_entries : c->entries; 3536 for (j=0; j < len; ++j) { 3537 int z = sparse ? c->sorted_values[j] : j, div=1; 3538 for (k=0; k < c->dimensions; ++k) { 3539 int off = (z / div) % c->lookup_values; 3540 c->multiplicands[j*c->dimensions + k] = 3541 #ifndef STB_VORBIS_CODEBOOK_FLOATS 3542 mults[off]; 3543 #else 3544 mults[off]*c->delta_value + c->minimum_value; 3545 // in this case (and this case only) we could pre-expand c->sequence_p, 3546 // and throw away the decode logic for it; have to ALSO do 3547 // it in the case below, but it can only be done if 3548 // STB_VORBIS_CODEBOOK_FLOATS 3549 // !STB_VORBIS_DIVIDES_IN_CODEBOOK 3550 #endif 3551 div *= c->lookup_values; 3552 } 3553 } 3554 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); 3555 c->lookup_type = 2; 3556 } 3557 else 3558#endif 3559 { 3560 c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values); 3561 #ifndef STB_VORBIS_CODEBOOK_FLOATS 3562 memcpy(c->multiplicands, mults, sizeof(c->multiplicands[0]) * c->lookup_values); 3563 #else 3564 for (j=0; j < (int) c->lookup_values; ++j) 3565 c->multiplicands[j] = mults[j] * c->delta_value + c->minimum_value; 3566 setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values); 3567 #endif 3568 } 3569 skip:; 3570 3571 #ifdef STB_VORBIS_CODEBOOK_FLOATS 3572 if (c->lookup_type == 2 && c->sequence_p) { 3573 for (j=1; j < (int) c->lookup_values; ++j) 3574 c->multiplicands[j] = c->multiplicands[j-1]; 3575 c->sequence_p = 0; 3576 } 3577 #endif 3578 } 3579 } 3580 3581 // time domain transfers (notused) 3582 3583 x = get_bits(f, 6) + 1; 3584 for (i=0; i < x; ++i) { 3585 uint32 z = get_bits(f, 16); 3586 if (z != 0) return error(f, VORBIS_invalid_setup); 3587 } 3588 3589 // Floors 3590 f->floor_count = get_bits(f, 6)+1; 3591 f->floor_config = (Floor *) setup_malloc(f, f->floor_count * sizeof(*f->floor_config)); 3592 for (i=0; i < f->floor_count; ++i) { 3593 f->floor_types[i] = get_bits(f, 16); 3594 if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup); 3595 if (f->floor_types[i] == 0) { 3596 Floor0 *g = &f->floor_config[i].floor0; 3597 g->order = get_bits(f,8); 3598 g->rate = get_bits(f,16); 3599 g->bark_map_size = get_bits(f,16); 3600 g->amplitude_bits = get_bits(f,6); 3601 g->amplitude_offset = get_bits(f,8); 3602 g->number_of_books = get_bits(f,4) + 1; 3603 for (j=0; j < g->number_of_books; ++j) 3604 g->book_list[j] = get_bits(f,8); 3605 return error(f, VORBIS_feature_not_supported); 3606 } else { 3607 Point p[31*8+2]; 3608 Floor1 *g = &f->floor_config[i].floor1; 3609 int max_class = -1; 3610 g->partitions = get_bits(f, 5); 3611 for (j=0; j < g->partitions; ++j) { 3612 g->partition_class_list[j] = get_bits(f, 4); 3613 if (g->partition_class_list[j] > max_class) 3614 max_class = g->partition_class_list[j]; 3615 } 3616 for (j=0; j <= max_class; ++j) { 3617 g->class_dimensions[j] = get_bits(f, 3)+1; 3618 g->class_subclasses[j] = get_bits(f, 2); 3619 if (g->class_subclasses[j]) { 3620 g->class_masterbooks[j] = get_bits(f, 8); 3621 if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup); 3622 } 3623 for (k=0; k < 1 << g->class_subclasses[j]; ++k) { 3624 g->subclass_books[j][k] = get_bits(f,8)-1; 3625 if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup); 3626 } 3627 } 3628 g->floor1_multiplier = get_bits(f,2)+1; 3629 g->rangebits = get_bits(f,4); 3630 g->Xlist[0] = 0; 3631 g->Xlist[1] = 1 << g->rangebits; 3632 g->values = 2; 3633 for (j=0; j < g->partitions; ++j) { 3634 int c = g->partition_class_list[j]; 3635 for (k=0; k < g->class_dimensions[c]; ++k) { 3636 g->Xlist[g->values] = get_bits(f, g->rangebits); 3637 ++g->values; 3638 } 3639 } 3640 // precompute the sorting 3641 for (j=0; j < g->values; ++j) { 3642 p[j].x = g->Xlist[j]; 3643 p[j].y = j; 3644 } 3645 qsort(p, g->values, sizeof(p[0]), point_compare); 3646 for (j=0; j < g->values; ++j) 3647 g->sorted_order[j] = (uint8) p[j].y; 3648 // precompute the neighbors 3649 for (j=2; j < g->values; ++j) { 3650 int low,hi; 3651 neighbors(g->Xlist, j, &low,&hi); 3652 g->neighbors[j][0] = low; 3653 g->neighbors[j][1] = hi; 3654 } 3655 3656 if (g->values > longest_floorlist) 3657 longest_floorlist = g->values; 3658 } 3659 } 3660 3661 // Residue 3662 f->residue_count = get_bits(f, 6)+1; 3663 f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(*f->residue_config)); 3664 for (i=0; i < f->residue_count; ++i) { 3665 uint8 residue_cascade[64]; 3666 Residue *r = f->residue_config+i; 3667 f->residue_types[i] = get_bits(f, 16); 3668 if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup); 3669 r->begin = get_bits(f, 24); 3670 r->end = get_bits(f, 24); 3671 r->part_size = get_bits(f,24)+1; 3672 r->classifications = get_bits(f,6)+1; 3673 r->classbook = get_bits(f,8); 3674 for (j=0; j < r->classifications; ++j) { 3675 uint8 high_bits=0; 3676 uint8 low_bits=get_bits(f,3); 3677 if (get_bits(f,1)) 3678 high_bits = get_bits(f,5); 3679 residue_cascade[j] = high_bits*8 + low_bits; 3680 } 3681 r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications); 3682 for (j=0; j < r->classifications; ++j) { 3683 for (k=0; k < 8; ++k) { 3684 if (residue_cascade[j] & (1 << k)) { 3685 r->residue_books[j][k] = get_bits(f, 8); 3686 if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup); 3687 } else { 3688 r->residue_books[j][k] = -1; 3689 } 3690 } 3691 } 3692 // precompute the classifications[] array to avoid inner-loop mod/divide 3693 // call it 'classdata' since we already have r->classifications 3694 r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries); 3695 if (!r->classdata) return error(f, VORBIS_outofmem); 3696 memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries); 3697 for (j=0; j < f->codebooks[r->classbook].entries; ++j) { 3698 int classwords = f->codebooks[r->classbook].dimensions; 3699 int temp = j; 3700 r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords); 3701 for (k=classwords-1; k >= 0; --k) { 3702 r->classdata[j][k] = temp % r->classifications; 3703 temp /= r->classifications; 3704 } 3705 } 3706 } 3707 3708 f->mapping_count = get_bits(f,6)+1; 3709 f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping)); 3710 for (i=0; i < f->mapping_count; ++i) { 3711 Mapping *m = f->mapping + i; 3712 int mapping_type = get_bits(f,16); 3713 if (mapping_type != 0) return error(f, VORBIS_invalid_setup); 3714 m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan)); 3715 if (get_bits(f,1)) 3716 m->submaps = get_bits(f,4); 3717 else 3718 m->submaps = 1; 3719 if (m->submaps > max_submaps) 3720 max_submaps = m->submaps; 3721 if (get_bits(f,1)) { 3722 m->coupling_steps = get_bits(f,8)+1; 3723 for (k=0; k < m->coupling_steps; ++k) { 3724 m->chan[k].magnitude = get_bits(f, ilog(f->channels)-1); 3725 m->chan[k].angle = get_bits(f, ilog(f->channels)-1); 3726 if (m->chan[k].magnitude >= f->channels) return error(f, VORBIS_invalid_setup); 3727 if (m->chan[k].angle >= f->channels) return error(f, VORBIS_invalid_setup); 3728 if (m->chan[k].magnitude == m->chan[k].angle) return error(f, VORBIS_invalid_setup); 3729 } 3730 } else 3731 m->coupling_steps = 0; 3732 3733 // reserved field 3734 if (get_bits(f,2)) return error(f, VORBIS_invalid_setup); 3735 if (m->submaps > 1) { 3736 for (j=0; j < f->channels; ++j) { 3737 m->chan[j].mux = get_bits(f, 4); 3738 if (m->chan[j].mux >= m->submaps) return error(f, VORBIS_invalid_setup); 3739 } 3740 } else 3741 // @SPECIFICATION: this case is missing from the spec 3742 for (j=0; j < f->channels; ++j) 3743 m->chan[j].mux = 0; 3744 3745 for (j=0; j < m->submaps; ++j) { 3746 get_bits(f,8); // discard 3747 m->submap_floor[j] = get_bits(f,8); 3748 m->submap_residue[j] = get_bits(f,8); 3749 if (m->submap_floor[j] >= f->floor_count) return error(f, VORBIS_invalid_setup); 3750 if (m->submap_residue[j] >= f->residue_count) return error(f, VORBIS_invalid_setup); 3751 } 3752 } 3753 3754 // Modes 3755 f->mode_count = get_bits(f, 6)+1; 3756 for (i=0; i < f->mode_count; ++i) { 3757 Mode *m = f->mode_config+i; 3758 m->blockflag = get_bits(f,1); 3759 m->windowtype = get_bits(f,16); 3760 m->transformtype = get_bits(f,16); 3761 m->mapping = get_bits(f,8); 3762 if (m->windowtype != 0) return error(f, VORBIS_invalid_setup); 3763 if (m->transformtype != 0) return error(f, VORBIS_invalid_setup); 3764 if (m->mapping >= f->mapping_count) return error(f, VORBIS_invalid_setup); 3765 } 3766 3767 flush_packet(f); 3768 3769 f->previous_length = 0; 3770 3771 for (i=0; i < f->channels; ++i) { 3772 f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1); 3773 f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2); 3774 f->finalY[i] = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist); 3775 #ifdef STB_VORBIS_NO_DEFER_FLOOR 3776 f->floor_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2); 3777 #endif 3778 } 3779 3780 if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE; 3781 if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE; 3782 f->blocksize[0] = f->blocksize_0; 3783 f->blocksize[1] = f->blocksize_1; 3784 3785#ifdef STB_VORBIS_DIVIDE_TABLE 3786 if (integer_divide_table[1][1]==0) 3787 for (i=0; i < DIVTAB_NUMER; ++i) 3788 for (j=1; j < DIVTAB_DENOM; ++j) 3789 integer_divide_table[i][j] = i / j; 3790#endif 3791 3792 // compute how much temporary memory is needed 3793 3794 // 1. 3795 { 3796 uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1); 3797 uint32 classify_mem; 3798 int i,max_part_read=0; 3799 for (i=0; i < f->residue_count; ++i) { 3800 Residue *r = f->residue_config + i; 3801 int n_read = r->end - r->begin; 3802 int part_read = n_read / r->part_size; 3803 if (part_read > max_part_read) 3804 max_part_read = part_read; 3805 } 3806 #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE 3807 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *)); 3808 #else 3809 classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *)); 3810 #endif 3811 3812 f->temp_memory_required = classify_mem; 3813 if (imdct_mem > f->temp_memory_required) 3814 f->temp_memory_required = imdct_mem; 3815 } 3816 3817 f->first_decode = TRUE; 3818 3819 if (f->alloc.alloc_buffer) { 3820 assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes); 3821 // check if there's enough temp memory so we don't error later 3822 if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset) 3823 return error(f, VORBIS_outofmem); 3824 } 3825 3826 f->first_audio_page_offset = stb_vorbis_get_file_offset(f); 3827 3828 return TRUE; 3829} 3830 3831static void vorbis_deinit(stb_vorbis *p) 3832{ 3833 int i,j; 3834 for (i=0; i < p->residue_count; ++i) { 3835 Residue *r = p->residue_config+i; 3836 if (r->classdata) { 3837 for (j=0; j < p->codebooks[r->classbook].entries; ++j) 3838 setup_free(p, r->classdata[j]); 3839 setup_free(p, r->classdata); 3840 } 3841 setup_free(p, r->residue_books); 3842 } 3843 3844 if (p->codebooks) { 3845 for (i=0; i < p->codebook_count; ++i) { 3846 Codebook *c = p->codebooks + i; 3847 setup_free(p, c->codeword_lengths); 3848 setup_free(p, c->multiplicands); 3849 setup_free(p, c->codewords); 3850 setup_free(p, c->sorted_codewords); 3851 // c->sorted_values[-1] is the first entry in the array 3852 setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL); 3853 } 3854 setup_free(p, p->codebooks); 3855 } 3856 setup_free(p, p->floor_config); 3857 setup_free(p, p->residue_config); 3858 for (i=0; i < p->mapping_count; ++i) 3859 setup_free(p, p->mapping[i].chan); 3860 setup_free(p, p->mapping); 3861 for (i=0; i < p->channels; ++i) { 3862 setup_free(p, p->channel_buffers[i]); 3863 setup_free(p, p->previous_window[i]); 3864 #ifdef STB_VORBIS_NO_DEFER_FLOOR 3865 setup_free(p, p->floor_buffers[i]); 3866 #endif 3867 setup_free(p, p->finalY[i]); 3868 } 3869 for (i=0; i < 2; ++i) { 3870 setup_free(p, p->A[i]); 3871 setup_free(p, p->B[i]); 3872 setup_free(p, p->C[i]); 3873 setup_free(p, p->window[i]); 3874 } 3875 #ifndef STB_VORBIS_NO_STDIO 3876 if (p->close_on_free) fclose(p->f); 3877 #endif 3878} 3879 3880void stb_vorbis_close(stb_vorbis *p) 3881{ 3882 if (p == NULL) return; 3883 vorbis_deinit(p); 3884 setup_free(p,p); 3885} 3886 3887static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z) 3888{ 3889 memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start 3890 if (z) { 3891 p->alloc = *z; 3892 p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3; 3893 p->temp_offset = p->alloc.alloc_buffer_length_in_bytes; 3894 } 3895 p->eof = 0; 3896 p->error = VORBIS__no_error; 3897 p->stream = NULL; 3898 p->codebooks = NULL; 3899 p->page_crc_tests = -1; 3900 #ifndef STB_VORBIS_NO_STDIO 3901 p->close_on_free = FALSE; 3902 p->f = NULL; 3903 #endif 3904} 3905 3906int stb_vorbis_get_sample_offset(stb_vorbis *f) 3907{ 3908 if (f->current_loc_valid) 3909 return f->current_loc; 3910 else 3911 return -1; 3912} 3913 3914stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f) 3915{ 3916 stb_vorbis_info d; 3917 d.channels = f->channels; 3918 d.sample_rate = f->sample_rate; 3919 d.setup_memory_required = f->setup_memory_required; 3920 d.setup_temp_memory_required = f->setup_temp_memory_required; 3921 d.temp_memory_required = f->temp_memory_required; 3922 d.max_frame_size = f->blocksize_1 >> 1; 3923 return d; 3924} 3925 3926int stb_vorbis_get_error(stb_vorbis *f) 3927{ 3928 int e = f->error; 3929 f->error = VORBIS__no_error; 3930 return e; 3931} 3932 3933static stb_vorbis * vorbis_alloc(stb_vorbis *f) 3934{ 3935 stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p)); 3936 return p; 3937} 3938 3939#ifndef STB_VORBIS_NO_PUSHDATA_API 3940 3941void stb_vorbis_flush_pushdata(stb_vorbis *f) 3942{ 3943 f->previous_length = 0; 3944 f->page_crc_tests = 0; 3945 f->discard_samples_deferred = 0; 3946 f->current_loc_valid = FALSE; 3947 f->first_decode = FALSE; 3948 f->samples_output = 0; 3949 f->channel_buffer_start = 0; 3950 f->channel_buffer_end = 0; 3951} 3952 3953static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len) 3954{ 3955 int i,n; 3956 for (i=0; i < f->page_crc_tests; ++i) 3957 f->scan[i].bytes_done = 0; 3958 3959 // if we have room for more scans, search for them first, because 3960 // they may cause us to stop early if their header is incomplete 3961 if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) { 3962 if (data_len < 4) return 0; 3963 data_len -= 3; // need to look for 4-byte sequence, so don't miss 3964 // one that straddles a boundary 3965 for (i=0; i < data_len; ++i) { 3966 if (data[i] == 0x4f) { 3967 if (0==memcmp(data+i, ogg_page_header, 4)) { 3968 int j,len; 3969 uint32 crc; 3970 // make sure we have the whole page header 3971 if (i+26 >= data_len || i+27+data[i+26] >= data_len) { 3972 // only read up to this page start, so hopefully we'll 3973 // have the whole page header start next time 3974 data_len = i; 3975 break; 3976 } 3977 // ok, we have it all; compute the length of the page 3978 len = 27 + data[i+26]; 3979 for (j=0; j < data[i+26]; ++j) 3980 len += data[i+27+j]; 3981 // scan everything up to the embedded crc (which we must 0) 3982 crc = 0; 3983 for (j=0; j < 22; ++j) 3984 crc = crc32_update(crc, data[i+j]); 3985 // now process 4 0-bytes 3986 for ( ; j < 26; ++j) 3987 crc = crc32_update(crc, 0); 3988 // len is the total number of bytes we need to scan 3989 n = f->page_crc_tests++; 3990 f->scan[n].bytes_left = len-j; 3991 f->scan[n].crc_so_far = crc; 3992 f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24); 3993 // if the last frame on a page is continued to the next, then 3994 // we can't recover the sample_loc immediately 3995 if (data[i+27+data[i+26]-1] == 255) 3996 f->scan[n].sample_loc = ~0; 3997 else 3998 f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24); 3999 f->scan[n].bytes_done = i+j; 4000 if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) 4001 break; 4002 // keep going if we still have room for more 4003 } 4004 } 4005 } 4006 } 4007 4008 for (i=0; i < f->page_crc_tests;) { 4009 uint32 crc; 4010 int j; 4011 int n = f->scan[i].bytes_done; 4012 int m = f->scan[i].bytes_left; 4013 if (m > data_len - n) m = data_len - n; 4014 // m is the bytes to scan in the current chunk 4015 crc = f->scan[i].crc_so_far; 4016 for (j=0; j < m; ++j) 4017 crc = crc32_update(crc, data[n+j]); 4018 f->scan[i].bytes_left -= m; 4019 f->scan[i].crc_so_far = crc; 4020 if (f->scan[i].bytes_left == 0) { 4021 // does it match? 4022 if (f->scan[i].crc_so_far == f->scan[i].goal_crc) { 4023 // Houston, we have page 4024 data_len = n+m; // consumption amount is wherever that scan ended 4025 f->page_crc_tests = -1; // drop out of page scan mode 4026 f->previous_length = 0; // decode-but-don't-output one frame 4027 f->next_seg = -1; // start a new page 4028 f->current_loc = f->scan[i].sample_loc; // set the current sample location 4029 // to the amount we'd have decoded had we decoded this page 4030 f->current_loc_valid = f->current_loc != ~0; 4031 return data_len; 4032 } 4033 // delete entry 4034 f->scan[i] = f->scan[--f->page_crc_tests]; 4035 } else { 4036 ++i; 4037 } 4038 } 4039 4040 return data_len; 4041} 4042 4043// return value: number of bytes we used 4044int stb_vorbis_decode_frame_pushdata( 4045 stb_vorbis *f, // the file we're decoding 4046 uint8 *data, int data_len, // the memory available for decoding 4047 int *channels, // place to write number of float * buffers 4048 float ***output, // place to write float ** array of float * buffers 4049 int *samples // place to write number of output samples 4050 ) 4051{ 4052 int i; 4053 int len,right,left; 4054 4055 if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); 4056 4057 if (f->page_crc_tests >= 0) { 4058 *samples = 0; 4059 return vorbis_search_for_page_pushdata(f, data, data_len); 4060 } 4061 4062 f->stream = data; 4063 f->stream_end = data + data_len; 4064 f->error = VORBIS__no_error; 4065 4066 // check that we have the entire packet in memory 4067 if (!is_whole_packet_present(f, FALSE)) { 4068 *samples = 0; 4069 return 0; 4070 } 4071 4072 if (!vorbis_decode_packet(f, &len, &left, &right)) { 4073 // save the actual error we encountered 4074 enum STBVorbisError error = f->error; 4075 if (error == VORBIS_bad_packet_type) { 4076 // flush and resynch 4077 f->error = VORBIS__no_error; 4078 while (get8_packet(f) != EOP) 4079 if (f->eof) break; 4080 *samples = 0; 4081 return f->stream - data; 4082 } 4083 if (error == VORBIS_continued_packet_flag_invalid) { 4084 if (f->previous_length == 0) { 4085 // we may be resynching, in which case it's ok to hit one 4086 // of these; just discard the packet 4087 f->error = VORBIS__no_error; 4088 while (get8_packet(f) != EOP) 4089 if (f->eof) break; 4090 *samples = 0; 4091 return f->stream - data; 4092 } 4093 } 4094 // if we get an error while parsing, what to do? 4095 // well, it DEFINITELY won't work to continue from where we are! 4096 stb_vorbis_flush_pushdata(f); 4097 // restore the error that actually made us bail 4098 f->error = error; 4099 *samples = 0; 4100 return 1; 4101 } 4102 4103 // success! 4104 len = vorbis_finish_frame(f, len, left, right); 4105 for (i=0; i < f->channels; ++i) 4106 f->outputs[i] = f->channel_buffers[i] + left; 4107 4108 if (channels) *channels = f->channels; 4109 *samples = len; 4110 *output = f->outputs; 4111 return f->stream - data; 4112} 4113 4114stb_vorbis *stb_vorbis_open_pushdata( 4115 unsigned char *data, int data_len, // the memory available for decoding 4116 int *data_used, // only defined if result is not NULL 4117 int *error, stb_vorbis_alloc *alloc) 4118{ 4119 stb_vorbis *f, p; 4120 vorbis_init(&p, alloc); 4121 p.stream = data; 4122 p.stream_end = data + data_len; 4123 p.push_mode = TRUE; 4124 if (!start_decoder(&p)) { 4125 if (p.eof) 4126 *error = VORBIS_need_more_data; 4127 else 4128 *error = p.error; 4129 return NULL; 4130 } 4131 f = vorbis_alloc(&p); 4132 if (f) { 4133 *f = p; 4134 *data_used = f->stream - data; 4135 *error = 0; 4136 return f; 4137 } else { 4138 vorbis_deinit(&p); 4139 return NULL; 4140 } 4141} 4142#endif // STB_VORBIS_NO_PUSHDATA_API 4143 4144unsigned int stb_vorbis_get_file_offset(stb_vorbis *f) 4145{ 4146 #ifndef STB_VORBIS_NO_PUSHDATA_API 4147 if (f->push_mode) return 0; 4148 #endif 4149 if (USE_MEMORY(f)) return f->stream - f->stream_start; 4150 4151#ifdef STB_VORBIS_USE_CALLBACKS 4152 if(USE_CALLBACKS(f)) 4153 return f->cb_offset; 4154#endif 4155 4156 #ifndef STB_VORBIS_NO_STDIO 4157 return ftell(f->f) - f->f_start; 4158 #endif 4159} 4160 4161#ifndef STB_VORBIS_NO_PULLDATA_API 4162// 4163// DATA-PULLING API 4164// 4165 4166static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last) 4167{ 4168 for(;;) { 4169 int n; 4170 if (f->eof) return 0; 4171 n = get8(f); 4172 if (n == 0x4f) { // page header 4173 unsigned int retry_loc = stb_vorbis_get_file_offset(f); 4174 int i; 4175 // check if we're off the end of a file_section stream 4176 if (retry_loc - 25 > f->stream_len) 4177 return 0; 4178 // check the rest of the header 4179 for (i=1; i < 4; ++i) 4180 if (get8(f) != ogg_page_header[i]) 4181 break; 4182 if (f->eof) return 0; 4183 if (i == 4) { 4184 uint8 header[27]; 4185 uint32 i, crc, goal, len; 4186 for (i=0; i < 4; ++i) 4187 header[i] = ogg_page_header[i]; 4188 for (; i < 27; ++i) 4189 header[i] = get8(f); 4190 if (f->eof) return 0; 4191 if (header[4] != 0) goto invalid; 4192 goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24); 4193 for (i=22; i < 26; ++i) 4194 header[i] = 0; 4195 crc = 0; 4196 for (i=0; i < 27; ++i) 4197 crc = crc32_update(crc, header[i]); 4198 len = 0; 4199 for (i=0; i < header[26]; ++i) { 4200 int s = get8(f); 4201 crc = crc32_update(crc, s); 4202 len += s; 4203 } 4204 if (len && f->eof) return 0; 4205 for (i=0; i < len; ++i) 4206 crc = crc32_update(crc, get8(f)); 4207 // finished parsing probable page 4208 if (crc == goal) { 4209 // we could now check that it's either got the last 4210 // page flag set, OR it's followed by the capture 4211 // pattern, but I guess TECHNICALLY you could have 4212 // a file with garbage between each ogg page and recover 4213 // from it automatically? So even though that paranoia 4214 // might decrease the chance of an invalid decode by 4215 // another 2^32, not worth it since it would hose those 4216 // invalid-but-useful files? 4217 if (end) 4218 *end = stb_vorbis_get_file_offset(f); 4219 if (last) 4220 if (header[5] & 0x04) 4221 *last = 1; 4222 else 4223 *last = 0; 4224 set_file_offset(f, retry_loc-1); 4225 return 1; 4226 } 4227 } 4228 invalid: 4229 // not a valid page, so rewind and look for next one 4230 set_file_offset(f, retry_loc); 4231 } 4232 } 4233} 4234 4235// seek is implemented with 'interpolation search'--this is like 4236// binary search, but we use the data values to estimate the likely 4237// location of the data item (plus a bit of a bias so when the 4238// estimation is wrong we don't waste overly much time) 4239 4240#define SAMPLE_unknown 0xffffffff 4241 4242 4243// ogg vorbis, in its insane infinite wisdom, only provides 4244// information about the sample at the END of the page. 4245// therefore we COULD have the data we need in the current 4246// page, and not know it. we could just use the end location 4247// as our only knowledge for bounds, seek back, and eventually 4248// the binary search finds it. or we can try to be smart and 4249// not waste time trying to locate more pages. we try to be 4250// smart, since this data is already in memory anyway, so 4251// doing needless I/O would be crazy! 4252static int vorbis_analyze_page(stb_vorbis *f, ProbedPage *z) 4253{ 4254 uint8 header[27], lacing[255]; 4255 uint8 packet_type[255]; 4256 int num_packet, packet_start, previous =0; 4257 int i,len; 4258 uint32 samples; 4259 4260 // record where the page starts 4261 z->page_start = stb_vorbis_get_file_offset(f); 4262 4263 // parse the header 4264 getn(f, header, 27); 4265 assert(header[0] == 'O' && header[1] == 'g' && header[2] == 'g' && header[3] == 'S'); 4266 getn(f, lacing, header[26]); 4267 4268 // determine the length of the payload 4269 len = 0; 4270 for (i=0; i < header[26]; ++i) 4271 len += lacing[i]; 4272 4273 // this implies where the page ends 4274 z->page_end = z->page_start + 27 + header[26] + len; 4275 4276 // read the last-decoded sample out of the data 4277 z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 16); 4278 4279 if (header[5] & 4) { 4280 // if this is the last page, it's not possible to work 4281 // backwards to figure out the first sample! whoops! fuck. 4282 z->first_decoded_sample = SAMPLE_unknown; 4283 set_file_offset(f, z->page_start); 4284 return 1; 4285 } 4286 4287 // scan through the frames to determine the sample-count of each one... 4288 // our goal is the sample # of the first fully-decoded sample on the 4289 // page, which is the first decoded sample of the 2nd page 4290 4291 num_packet=0; 4292 4293 packet_start = ((header[5] & 1) == 0); 4294 4295 for (i=0; i < header[26]; ++i) { 4296 if (packet_start) { 4297 uint8 n,b,m; 4298 if (lacing[i] == 0) goto bail; // trying to read from zero-length packet 4299 n = get8(f); 4300 // if bottom bit is non-zero, we've got corruption 4301 if (n & 1) goto bail; 4302 n >>= 1; 4303 b = ilog(f->mode_count-1); 4304 m = n >> b; 4305 n &= (1 << b)-1; 4306 if (n >= f->mode_count) goto bail; 4307 if (num_packet == 0 && f->mode_config[n].blockflag) 4308 previous = (m & 1); 4309 packet_type[num_packet++] = f->mode_config[n].blockflag; 4310 skip(f, lacing[i]-1); 4311 } else 4312 skip(f, lacing[i]); 4313 packet_start = (lacing[i] < 255); 4314 } 4315 4316 // now that we know the sizes of all the pages, we can start determining 4317 // how much sample data there is. 4318 4319 samples = 0; 4320 4321 // for the last packet, we step by its whole length, because the definition 4322 // is that we encoded the end sample loc of the 'last packet completed', 4323 // where 'completed' refers to packets being split, and we are left to guess 4324 // what 'end sample loc' means. we assume it means ignoring the fact that 4325 // the last half of the data is useless without windowing against the next 4326 // packet... (so it's not REALLY complete in that sense) 4327 if (num_packet > 1) 4328 samples += f->blocksize[packet_type[num_packet-1]]; 4329 4330 for (i=num_packet-2; i >= 1; --i) { 4331 // now, for this packet, how many samples do we have that 4332 // do not overlap the following packet? 4333 if (packet_type[i] == 1) 4334 if (packet_type[i+1] == 1) 4335 samples += f->blocksize_1 >> 1; 4336 else 4337 samples += ((f->blocksize_1 - f->blocksize_0) >> 2) + (f->blocksize_0 >> 1); 4338 else 4339 samples += f->blocksize_0 >> 1; 4340 } 4341 // now, at this point, we've rewound to the very beginning of the 4342 // _second_ packet. if we entirely discard the first packet after 4343 // a seek, this will be exactly the right sample number. HOWEVER! 4344 // we can't as easily compute this number for the LAST page. The 4345 // only way to get the sample offset of the LAST page is to use 4346 // the end loc from the previous page. But what that returns us 4347 // is _exactly_ the place where we get our first non-overlapped 4348 // sample. (I think. Stupid spec for being ambiguous.) So for 4349 // consistency it's better to do that here, too. However, that 4350 // will then require us to NOT discard all of the first frame we 4351 // decode, in some cases, which means an even weirder frame size 4352 // and extra code. what a fucking pain. 4353 4354 // we're going to discard the first packet if we 4355 // start the seek here, so we don't care about it. (we could actually 4356 // do better; if the first packet is long, and the previous packet 4357 // is short, there's actually data in the first half of the first 4358 // packet that doesn't need discarding... but not worth paying the 4359 // effort of tracking that of that here and in the seeking logic) 4360 // except crap, if we infer it from the _previous_ packet's end 4361 // location, we DO need to use that definition... and we HAVE to 4362 // infer the start loc of the LAST packet from the previous packet's 4363 // end location. fuck you, ogg vorbis. 4364 4365 z->first_decoded_sample = z->last_decoded_sample - samples; 4366 4367 // restore file state to where we were 4368 set_file_offset(f, z->page_start); 4369 return 1; 4370 4371 // restore file state to where we were 4372 bail: 4373 set_file_offset(f, z->page_start); 4374 return 0; 4375} 4376 4377static int vorbis_seek_frame_from_page(stb_vorbis *f, uint32 page_start, uint32 first_sample, uint32 target_sample, int fine) 4378{ 4379 int left_start, left_end, right_start, right_end, mode,i; 4380 int frame=0; 4381 uint32 frame_start; 4382 int frames_to_skip, data_to_skip; 4383 4384 // first_sample is the sample # of the first sample that doesn't 4385 // overlap the previous page... note that this requires us to 4386 // _partially_ discard the first packet! bleh. 4387 set_file_offset(f, page_start); 4388 4389 f->next_seg = -1; // force page resync 4390 4391 frame_start = first_sample; 4392 // frame start is where the previous packet's last decoded sample 4393 // was, which corresponds to left_end... EXCEPT if the previous 4394 // packet was long and this packet is short? Probably a bug here. 4395 4396 4397 // now, we can start decoding frames... we'll only FAKE decode them, 4398 // until we find the frame that contains our sample; then we'll rewind, 4399 // and try again 4400 for (;;) { 4401 int start; 4402 4403 if (!vorbis_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode)) 4404 return error(f, VORBIS_seek_failed); 4405 4406 if (frame == 0) 4407 start = left_end; 4408 else 4409 start = left_start; 4410 4411 // the window starts at left_start; the last valid sample we generate 4412 // before the next frame's window start is right_start-1 4413 if (target_sample < frame_start + right_start-start) 4414 break; 4415 4416 flush_packet(f); 4417 if (f->eof) 4418 return error(f, VORBIS_seek_failed); 4419 4420 frame_start += right_start - start; 4421 4422 ++frame; 4423 } 4424 4425 // ok, at this point, the sample we want is contained in frame #'frame' 4426 4427 // to decode frame #'frame' normally, we have to decode the 4428 // previous frame first... but if it's the FIRST frame of the page 4429 // we can't. if it's the first frame, it means it falls in the part 4430 // of the first frame that doesn't overlap either of the other frames. 4431 // so, if we have to handle that case for the first frame, we might 4432 // as well handle it for all of them, so: 4433 if (target_sample > frame_start + (left_end - left_start)) { 4434 // so what we want to do is go ahead and just immediately decode 4435 // this frame, but then make it so the next get_frame_float() uses 4436 // this already-decoded data? or do we want to go ahead and rewind, 4437 // and leave a flag saying to skip the first N data? let's do that 4438 frames_to_skip = frame; // if this is frame #1, skip 1 frame (#0) 4439 data_to_skip = left_end - left_start; 4440 } else { 4441 // otherwise, we want to skip frames 0, 1, 2, ... frame-2 4442 // (which means frame-2+1 total frames) then decode frame-1, 4443 // then leave frame pending 4444 frames_to_skip = frame - 1; 4445 assert(frames_to_skip >= 0); 4446 data_to_skip = -1; 4447 } 4448 4449 set_file_offset(f, page_start); 4450 f->next_seg = - 1; // force page resync 4451 4452 for (i=0; i < frames_to_skip; ++i) { 4453 maybe_start_packet(f); 4454 flush_packet(f); 4455 } 4456 4457 if (data_to_skip >= 0) { 4458 int i,j,n = f->blocksize_0 >> 1; 4459 f->discard_samples_deferred = data_to_skip; 4460 for (i=0; i < f->channels; ++i) 4461 for (j=0; j < n; ++j) 4462 f->previous_window[i][j] = 0; 4463 f->previous_length = n; 4464 frame_start += data_to_skip; 4465 } else { 4466 f->previous_length = 0; 4467 vorbis_pump_first_frame(f); 4468 } 4469 4470 // at this point, the NEXT decoded frame will generate the desired sample 4471 if (fine) { 4472 // so if we're doing sample accurate streaming, we want to go ahead and decode it! 4473 if (target_sample != frame_start) { 4474 int n; 4475 stb_vorbis_get_frame_float(f, &n, NULL); 4476 assert(target_sample > frame_start); 4477 assert(f->channel_buffer_start + (int) (target_sample-frame_start) < f->channel_buffer_end); 4478 f->channel_buffer_start += (target_sample - frame_start); 4479 } 4480 } 4481 4482 return 0; 4483} 4484 4485static int vorbis_seek_base(stb_vorbis *f, unsigned int sample_number, int fine) 4486{ 4487 ProbedPage p[2],q; 4488 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); 4489 4490 // do we know the location of the last page? 4491 if (f->p_last.page_start == 0) { 4492 uint32 z = stb_vorbis_stream_length_in_samples(f); 4493 if (z == 0) return error(f, VORBIS_cant_find_last_page); 4494 } 4495 4496 p[0] = f->p_first; 4497 p[1] = f->p_last; 4498 4499 if (sample_number >= f->p_last.last_decoded_sample) 4500 sample_number = f->p_last.last_decoded_sample-1; 4501 4502 if (sample_number < f->p_first.last_decoded_sample) { 4503 vorbis_seek_frame_from_page(f, p[0].page_start, 0, sample_number, fine); 4504 return 0; 4505 } else { 4506 int attempts=0; 4507 while (p[0].page_end < p[1].page_start) { 4508 uint32 probe; 4509 uint32 start_offset, end_offset; 4510 uint32 start_sample, end_sample; 4511 4512 // copy these into local variables so we can tweak them 4513 // if any are unknown 4514 start_offset = p[0].page_end; 4515 end_offset = p[1].after_previous_page_start; // an address known to seek to page p[1] 4516 start_sample = p[0].last_decoded_sample; 4517 end_sample = p[1].last_decoded_sample; 4518 4519 // currently there is no such tweaking logic needed/possible? 4520 if (start_sample == SAMPLE_unknown || end_sample == SAMPLE_unknown) 4521 return error(f, VORBIS_seek_failed); 4522 4523 // now we want to lerp between these for the target samples... 4524 4525 // step 1: we need to bias towards the page start... 4526 if (start_offset + 4000 < end_offset) 4527 end_offset -= 4000; 4528 4529 // now compute an interpolated search loc 4530 probe = start_offset + (int) floor((float) (end_offset - start_offset) / (end_sample - start_sample) * (sample_number - start_sample)); 4531 4532 // next we need to bias towards binary search... 4533 // code is a little wonky to allow for full 32-bit unsigned values 4534 if (attempts >= 4) { 4535 uint32 probe2 = start_offset + ((end_offset - start_offset) >> 1); 4536 if (attempts >= 8) 4537 probe = probe2; 4538 else if (probe < probe2) 4539 probe = probe + ((probe2 - probe) >> 1); 4540 else 4541 probe = probe2 + ((probe - probe2) >> 1); 4542 } 4543 ++attempts; 4544 4545 set_file_offset(f, probe); 4546 if (!vorbis_find_page(f, NULL, NULL)) return error(f, VORBIS_seek_failed); 4547 if (!vorbis_analyze_page(f, &q)) return error(f, VORBIS_seek_failed); 4548 q.after_previous_page_start = probe; 4549 4550 // it's possible we've just found the last page again 4551 if (q.page_start == p[1].page_start) { 4552 p[1] = q; 4553 continue; 4554 } 4555 4556 if (sample_number < q.last_decoded_sample) 4557 p[1] = q; 4558 else 4559 p[0] = q; 4560 } 4561 4562 if (p[0].last_decoded_sample <= sample_number && sample_number < p[1].last_decoded_sample) { 4563 vorbis_seek_frame_from_page(f, p[1].page_start, p[0].last_decoded_sample, sample_number, fine); 4564 return 0; 4565 } 4566 return error(f, VORBIS_seek_failed); 4567 } 4568} 4569 4570int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number) 4571{ 4572 return vorbis_seek_base(f, sample_number, FALSE); 4573} 4574 4575int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number) 4576{ 4577 return vorbis_seek_base(f, sample_number, TRUE); 4578} 4579 4580void stb_vorbis_seek_start(stb_vorbis *f) 4581{ 4582 if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; } 4583 set_file_offset(f, f->first_audio_page_offset); 4584 f->previous_length = 0; 4585 f->first_decode = TRUE; 4586 f->next_seg = -1; 4587 vorbis_pump_first_frame(f); 4588} 4589 4590unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f) 4591{ 4592 unsigned int restore_offset, previous_safe; 4593 unsigned int end, last_page_loc; 4594 4595 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); 4596 if (!f->total_samples) { 4597 int last; 4598 uint32 lo,hi; 4599 char header[6]; 4600 4601 // first, store the current decode position so we can restore it 4602 restore_offset = stb_vorbis_get_file_offset(f); 4603 4604 // now we want to seek back 64K from the end (the last page must 4605 // be at most a little less than 64K, but let's allow a little slop) 4606 if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset) 4607 previous_safe = f->stream_len - 65536; 4608 else 4609 previous_safe = f->first_audio_page_offset; 4610 4611 set_file_offset(f, previous_safe); 4612 // previous_safe is now our candidate 'earliest known place that seeking 4613 // to will lead to the final page' 4614 4615 if (!vorbis_find_page(f, &end, (int unsigned *)&last)) { 4616 // if we can't find a page, we're hosed! 4617 f->error = VORBIS_cant_find_last_page; 4618 f->total_samples = 0xffffffff; 4619 goto done; 4620 } 4621 4622 // check if there are more pages 4623 last_page_loc = stb_vorbis_get_file_offset(f); 4624 4625 // stop when the last_page flag is set, not when we reach eof; 4626 // this allows us to stop short of a 'file_section' end without 4627 // explicitly checking the length of the section 4628 while (!last) { 4629 set_file_offset(f, end); 4630 if (!vorbis_find_page(f, &end, (int unsigned *)&last)) { 4631 // the last page we found didn't have the 'last page' flag 4632 // set. whoops! 4633 break; 4634 } 4635 previous_safe = last_page_loc+1; 4636 last_page_loc = stb_vorbis_get_file_offset(f); 4637 } 4638 4639 set_file_offset(f, last_page_loc); 4640 4641 // parse the header 4642 getn(f, (unsigned char *)header, 6); 4643 // extract the absolute granule position 4644 lo = get32(f); 4645 hi = get32(f); 4646 if (lo == 0xffffffff && hi == 0xffffffff) { 4647 f->error = VORBIS_cant_find_last_page; 4648 f->total_samples = SAMPLE_unknown; 4649 goto done; 4650 } 4651 if (hi) 4652 lo = 0xfffffffe; // saturate 4653 f->total_samples = lo; 4654 4655 f->p_last.page_start = last_page_loc; 4656 f->p_last.page_end = end; 4657 f->p_last.last_decoded_sample = lo; 4658 f->p_last.first_decoded_sample = SAMPLE_unknown; 4659 f->p_last.after_previous_page_start = previous_safe; 4660 4661 done: 4662 set_file_offset(f, restore_offset); 4663 } 4664 return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples; 4665} 4666 4667float stb_vorbis_stream_length_in_seconds(stb_vorbis *f) 4668{ 4669 return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate; 4670} 4671 4672 4673 4674int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output) 4675{ 4676 int len, right,left,i; 4677 if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing); 4678 4679 if (!vorbis_decode_packet(f, &len, &left, &right)) { 4680 f->channel_buffer_start = f->channel_buffer_end = 0; 4681 return 0; 4682 } 4683 4684 len = vorbis_finish_frame(f, len, left, right); 4685 for (i=0; i < f->channels; ++i) 4686 f->outputs[i] = f->channel_buffers[i] + left; 4687 4688 f->channel_buffer_start = left; 4689 f->channel_buffer_end = left+len; 4690 4691 if (channels) *channels = f->channels; 4692 if (output) *output = f->outputs; 4693 return len; 4694} 4695 4696#ifndef STB_VORBIS_NO_STDIO 4697 4698stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, stb_vorbis_alloc *alloc, unsigned int length) 4699{ 4700 stb_vorbis *f, p; 4701 vorbis_init(&p, alloc); 4702 p.f = file; 4703 p.f_start = ftell(file); 4704 p.stream_len = length; 4705 p.close_on_free = close_on_free; 4706 if (start_decoder(&p)) { 4707 f = vorbis_alloc(&p); 4708 if (f) { 4709 *f = p; 4710 vorbis_pump_first_frame(f); 4711 return f; 4712 } 4713 } 4714 if (error) *error = p.error; 4715 vorbis_deinit(&p); 4716 return NULL; 4717} 4718 4719stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, stb_vorbis_alloc *alloc) 4720{ 4721 unsigned int len, start; 4722 start = ftell(file); 4723 fseek(file, 0, SEEK_END); 4724 len = ftell(file) - start; 4725 fseek(file, start, SEEK_SET); 4726 return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len); 4727} 4728 4729stb_vorbis * stb_vorbis_open_filename(char *filename, int *error, stb_vorbis_alloc *alloc) 4730{ 4731 FILE *f = fopen(filename, "rb"); 4732 if (f) 4733 return stb_vorbis_open_file(f, TRUE, error, alloc); 4734 if (error) *error = VORBIS_file_open_failure; 4735 return NULL; 4736} 4737#endif // STB_VORBIS_NO_STDIO 4738 4739stb_vorbis * stb_vorbis_open_memory(unsigned char *data, int len, int *error, stb_vorbis_alloc *alloc) 4740{ 4741 stb_vorbis *f, p; 4742 if (data == NULL) return NULL; 4743 vorbis_init(&p, alloc); 4744 p.stream = data; 4745 p.stream_end = data + len; 4746 p.stream_start = p.stream; 4747 p.stream_len = len; 4748 p.push_mode = FALSE; 4749 if (start_decoder(&p)) { 4750 f = vorbis_alloc(&p); 4751 if (f) { 4752 *f = p; 4753 vorbis_pump_first_frame(f); 4754 return f; 4755 } 4756 } 4757 if (error) *error = p.error; 4758 vorbis_deinit(&p); 4759 return NULL; 4760} 4761 4762#ifndef STB_VORBIS_NO_INTEGER_CONVERSION 4763#define PLAYBACK_MONO 1 4764#define PLAYBACK_LEFT 2 4765#define PLAYBACK_RIGHT 4 4766 4767#define L (PLAYBACK_LEFT | PLAYBACK_MONO) 4768#define C (PLAYBACK_LEFT | PLAYBACK_RIGHT | PLAYBACK_MONO) 4769#define R (PLAYBACK_RIGHT | PLAYBACK_MONO) 4770 4771static int8 channel_position[7][6] = 4772{ 4773 { 0 }, 4774 { C }, 4775 { L, R }, 4776 { L, C, R }, 4777 { L, R, L, R }, 4778 { L, C, R, L, R }, 4779 { L, C, R, L, R, C }, 4780}; 4781 4782 4783#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT 4784 typedef union { 4785 float f; 4786 int i; 4787 } float_conv; 4788 typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4]; 4789 #define FASTDEF(x) float_conv x 4790 // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round 4791 #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT)) 4792 #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22)) 4793 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s)) 4794 #define check_endianness() 4795#else 4796 #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s)))) 4797 #define check_endianness() 4798 #define FASTDEF(x)