PageRenderTime 215ms CodeModel.GetById 117ms app.highlight 77ms RepoModel.GetById 1ms app.codeStats 1ms

/src/middleware/stb_vorbis/stb_vorbis.c

https://bitbucket.org/vivkin/gam3b00bs/
C | 5143 lines | 3973 code | 572 blank | 598 comment | 1013 complexity | 2048a9cd8e2bddc7e14a8b87072fc020 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1#include "stb_vorbis.h"
   2
   3#ifndef STB_VORBIS_HEADER_ONLY
   4
   5// global configuration settings (e.g. set these in the project/makefile),
   6// or just set them in this file at the top (although ideally the first few
   7// should be visible when the header file is compiled too, although it's not
   8// crucial)
   9
  10// STB_VORBIS_NO_PUSHDATA_API
  11//     does not compile the code for the various stb_vorbis_*_pushdata()
  12//     functions
  13// #define STB_VORBIS_NO_PUSHDATA_API
  14
  15// STB_VORBIS_NO_PULLDATA_API
  16//     does not compile the code for the non-pushdata APIs
  17// #define STB_VORBIS_NO_PULLDATA_API
  18
  19// STB_VORBIS_NO_STDIO
  20//     does not compile the code for the APIs that use FILE *s internally
  21//     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
  22// #define STB_VORBIS_NO_STDIO
  23
  24// STB_VORBIS_NO_INTEGER_CONVERSION
  25//     does not compile the code for converting audio sample data from
  26//     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
  27// #define STB_VORBIS_NO_INTEGER_CONVERSION
  28
  29// STB_VORBIS_NO_FAST_SCALED_FLOAT
  30//      does not use a fast float-to-int trick to accelerate float-to-int on
  31//      most platforms which requires endianness be defined correctly.
  32//#define STB_VORBIS_NO_FAST_SCALED_FLOAT
  33
  34
  35// STB_VORBIS_MAX_CHANNELS [number]
  36//     globally define this to the maximum number of channels you need.
  37//     The spec does not put a restriction on channels except that
  38//     the count is stored in a byte, so 255 is the hard limit.
  39//     Reducing this saves about 16 bytes per value, so using 16 saves
  40//     (255-16)*16 or around 4KB. Plus anything other memory usage
  41//     I forgot to account for. Can probably go as low as 8 (7.1 audio),
  42//     6 (5.1 audio), or 2 (stereo only).
  43#ifndef STB_VORBIS_MAX_CHANNELS
  44#define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
  45#endif
  46
  47// STB_VORBIS_PUSHDATA_CRC_COUNT [number]
  48//     after a flush_pushdata(), stb_vorbis begins scanning for the
  49//     next valid page, without backtracking. when it finds something
  50//     that looks like a page, it streams through it and verifies its
  51//     CRC32. Should that validation fail, it keeps scanning. But it's
  52//     possible that _while_ streaming through to check the CRC32 of
  53//     one candidate page, it sees another candidate page. This #define
  54//     determines how many "overlapping" candidate pages it can search
  55//     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
  56//     garbage pages could be as big as 64KB, but probably average ~16KB.
  57//     So don't hose ourselves by scanning an apparent 64KB page and
  58//     missing a ton of real ones in the interim; so minimum of 2
  59#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
  60#define STB_VORBIS_PUSHDATA_CRC_COUNT  4
  61#endif
  62
  63// STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
  64//     sets the log size of the huffman-acceleration table.  Maximum
  65//     supported value is 24. with larger numbers, more decodings are O(1),
  66//     but the table size is larger so worse cache missing, so you'll have
  67//     to probe (and try multiple ogg vorbis files) to find the sweet spot.
  68#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
  69#define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
  70#endif
  71
  72// STB_VORBIS_FAST_BINARY_LENGTH [number]
  73//     sets the log size of the binary-search acceleration table. this
  74//     is used in similar fashion to the fast-huffman size to set initial
  75//     parameters for the binary search
  76
  77// STB_VORBIS_FAST_HUFFMAN_INT
  78//     The fast huffman tables are much more efficient if they can be
  79//     stored as 16-bit results instead of 32-bit results. This restricts
  80//     the codebooks to having only 65535 possible outcomes, though.
  81//     (At least, accelerated by the huffman table.)
  82#ifndef STB_VORBIS_FAST_HUFFMAN_INT
  83#define STB_VORBIS_FAST_HUFFMAN_SHORT
  84#endif
  85
  86// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
  87//     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
  88//     back on binary searching for the correct one. This requires storing
  89//     extra tables with the huffman codes in sorted order. Defining this
  90//     symbol trades off space for speed by forcing a linear search in the
  91//     non-fast case, except for "sparse" codebooks.
  92// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
  93
  94// STB_VORBIS_DIVIDES_IN_RESIDUE
  95//     stb_vorbis precomputes the result of the scalar residue decoding
  96//     that would otherwise require a divide per chunk. you can trade off
  97//     space for time by defining this symbol.
  98// #define STB_VORBIS_DIVIDES_IN_RESIDUE
  99
 100// STB_VORBIS_DIVIDES_IN_CODEBOOK
 101//     vorbis VQ codebooks can be encoded two ways: with every case explicitly
 102//     stored, or with all elements being chosen from a small range of values,
 103//     and all values possible in all elements. By default, stb_vorbis expands
 104//     this latter kind out to look like the former kind for ease of decoding,
 105//     because otherwise an integer divide-per-vector-element is required to
 106//     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
 107//     trade off storage for speed.
 108//#define STB_VORBIS_DIVIDES_IN_CODEBOOK
 109
 110// STB_VORBIS_CODEBOOK_SHORTS
 111//     The vorbis file format encodes VQ codebook floats as ax+b where a and
 112//     b are floating point per-codebook constants, and x is a 16-bit int.
 113//     Normally, stb_vorbis decodes them to floats rather than leaving them
 114//     as 16-bit ints and computing ax+b while decoding. This is a speed/space
 115//     tradeoff; you can save space by defining this flag.
 116#ifndef STB_VORBIS_CODEBOOK_SHORTS
 117#define STB_VORBIS_CODEBOOK_FLOATS
 118#endif
 119
 120// STB_VORBIS_DIVIDE_TABLE
 121//     this replaces small integer divides in the floor decode loop with
 122//     table lookups. made less than 1% difference, so disabled by default.
 123
 124// STB_VORBIS_NO_INLINE_DECODE
 125//     disables the inlining of the scalar codebook fast-huffman decode.
 126//     might save a little codespace; useful for debugging
 127// #define STB_VORBIS_NO_INLINE_DECODE
 128
 129// STB_VORBIS_NO_DEFER_FLOOR
 130//     Normally we only decode the floor without synthesizing the actual
 131//     full curve. We can instead synthesize the curve immediately. This
 132//     requires more memory and is very likely slower, so I don't think
 133//     you'd ever want to do it except for debugging.
 134// #define STB_VORBIS_NO_DEFER_FLOOR
 135
 136
 137
 138
 139//////////////////////////////////////////////////////////////////////////////
 140
 141#ifdef STB_VORBIS_NO_PULLDATA_API
 142   #define STB_VORBIS_NO_INTEGER_CONVERSION
 143   #define STB_VORBIS_NO_STDIO
 144#endif
 145
 146#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
 147   #define STB_VORBIS_NO_STDIO 1
 148#endif
 149
 150#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
 151#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
 152
 153   // only need endianness for fast-float-to-int, which we don't
 154   // use for pushdata
 155
 156   #ifndef STB_VORBIS_BIG_ENDIAN
 157     #define STB_VORBIS_ENDIAN  0
 158   #else
 159     #define STB_VORBIS_ENDIAN  1
 160   #endif
 161
 162#endif
 163#endif
 164
 165
 166#ifndef STB_VORBIS_NO_STDIO
 167#include <stdio.h>
 168#endif
 169
 170#ifndef STB_VORBIS_NO_CRT
 171#include <stdlib.h>
 172#include <string.h>
 173#include <assert.h>
 174#include <math.h>
 175#include <malloc.h>
 176#else
 177#define NULL 0
 178#endif
 179
 180#ifndef _MSC_VER
 181   #if __GNUC__
 182      #define __forceinline inline
 183   #else
 184      #define __forceinline
 185   #endif
 186#endif
 187
 188#if STB_VORBIS_MAX_CHANNELS > 256
 189#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
 190#endif
 191
 192#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
 193#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
 194#endif
 195
 196
 197#define MAX_BLOCKSIZE_LOG  13   // from specification
 198#define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
 199
 200
 201typedef unsigned char  uint8;
 202typedef   signed char   int8;
 203typedef unsigned short uint16;
 204typedef   signed short  int16;
 205typedef unsigned int   uint32;
 206typedef   signed int    int32;
 207
 208#ifndef TRUE
 209#define TRUE 1
 210#define FALSE 0
 211#endif
 212
 213#ifdef STB_VORBIS_CODEBOOK_FLOATS
 214typedef float codetype;
 215#else
 216typedef uint16 codetype;
 217#endif
 218
 219// @NOTE
 220//
 221// Some arrays below are tagged "//varies", which means it's actually
 222// a variable-sized piece of data, but rather than malloc I assume it's
 223// small enough it's better to just allocate it all together with the
 224// main thing
 225//
 226// Most of the variables are specified with the smallest size I could pack
 227// them into. It might give better performance to make them all full-sized
 228// integers. It should be safe to freely rearrange the structures or change
 229// the sizes larger--nothing relies on silently truncating etc., nor the
 230// order of variables.
 231
 232#define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
 233#define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
 234
 235typedef struct
 236{
 237   int dimensions, entries;
 238   uint8 *codeword_lengths;
 239   float  minimum_value;
 240   float  delta_value;
 241   uint8  value_bits;
 242   uint8  lookup_type;
 243   uint8  sequence_p;
 244   uint8  sparse;
 245   uint32 lookup_values;
 246   codetype *multiplicands;
 247   uint32 *codewords;
 248   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
 249    int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
 250   #else
 251    int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
 252   #endif
 253   uint32 *sorted_codewords;
 254   int    *sorted_values;
 255   int     sorted_entries;
 256} Codebook;
 257
 258typedef struct
 259{
 260   uint8 order;
 261   uint16 rate;
 262   uint16 bark_map_size;
 263   uint8 amplitude_bits;
 264   uint8 amplitude_offset;
 265   uint8 number_of_books;
 266   uint8 book_list[16]; // varies
 267} Floor0;
 268
 269typedef struct
 270{
 271   uint8 partitions;
 272   uint8 partition_class_list[32]; // varies
 273   uint8 class_dimensions[16]; // varies
 274   uint8 class_subclasses[16]; // varies
 275   uint8 class_masterbooks[16]; // varies
 276   int16 subclass_books[16][8]; // varies
 277   uint16 Xlist[31*8+2]; // varies
 278   uint8 sorted_order[31*8+2];
 279   uint8 neighbors[31*8+2][2];
 280   uint8 floor1_multiplier;
 281   uint8 rangebits;
 282   int values;
 283} Floor1;
 284
 285typedef union
 286{
 287   Floor0 floor0;
 288   Floor1 floor1;
 289} Floor;
 290
 291typedef struct
 292{
 293   uint32 begin, end;
 294   uint32 part_size;
 295   uint8 classifications;
 296   uint8 classbook;
 297   uint8 **classdata;
 298   int16 (*residue_books)[8];
 299} Residue;
 300
 301typedef struct
 302{
 303   uint8 magnitude;
 304   uint8 angle;
 305   uint8 mux;
 306} MappingChannel;
 307
 308typedef struct
 309{
 310   uint16 coupling_steps;
 311   MappingChannel *chan;
 312   uint8  submaps;
 313   uint8  submap_floor[15]; // varies
 314   uint8  submap_residue[15]; // varies
 315} Mapping;
 316
 317typedef struct
 318{
 319   uint8 blockflag;
 320   uint8 mapping;
 321   uint16 windowtype;
 322   uint16 transformtype;
 323} Mode;
 324
 325typedef struct
 326{
 327   uint32  goal_crc;    // expected crc if match
 328   int     bytes_left;  // bytes left in packet
 329   uint32  crc_so_far;  // running crc
 330   int     bytes_done;  // bytes processed in _current_ chunk
 331   uint32  sample_loc;  // granule pos encoded in page
 332} CRCscan;
 333
 334typedef struct
 335{
 336   uint32 page_start, page_end;
 337   uint32 after_previous_page_start;
 338   uint32 first_decoded_sample;
 339   uint32 last_decoded_sample;
 340} ProbedPage;
 341
 342struct stb_vorbis
 343{
 344  // user-accessible info
 345   unsigned int sample_rate;
 346   int channels;
 347
 348   unsigned int setup_memory_required;
 349   unsigned int temp_memory_required;
 350   unsigned int setup_temp_memory_required;
 351
 352  // input config
 353#ifndef STB_VORBIS_NO_STDIO
 354   FILE *f;
 355   uint32 f_start;
 356   int close_on_free;
 357#endif
 358#ifdef STB_VORBIS_USE_CALLBACKS
 359	STREAM_DATA_CLLBACK data_callback;
 360	STREAM_RESET_CLLBACK reset_callback;
 361	void* user_data;
 362	uint32 cb_offset;
 363#endif
 364
 365   uint8 *stream;
 366   uint8 *stream_start;
 367   uint8 *stream_end;
 368
 369   uint32 stream_len;
 370
 371   uint8  push_mode;
 372
 373   uint32 first_audio_page_offset;
 374
 375   ProbedPage p_first, p_last;
 376
 377  // memory management
 378   stb_vorbis_alloc alloc;
 379   int setup_offset;
 380   int temp_offset;
 381
 382  // run-time results
 383   int eof;
 384   enum STBVorbisError error;
 385
 386  // user-useful data
 387
 388  // header info
 389   int blocksize[2];
 390   int blocksize_0, blocksize_1;
 391   int codebook_count;
 392   Codebook *codebooks;
 393   int floor_count;
 394   uint16 floor_types[64]; // varies
 395   Floor *floor_config;
 396   int residue_count;
 397   uint16 residue_types[64]; // varies
 398   Residue *residue_config;
 399   int mapping_count;
 400   Mapping *mapping;
 401   int mode_count;
 402   Mode mode_config[64];  // varies
 403
 404   uint32 total_samples;
 405
 406  // decode buffer
 407   float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
 408   float *outputs        [STB_VORBIS_MAX_CHANNELS];
 409
 410   float *previous_window[STB_VORBIS_MAX_CHANNELS];
 411   int previous_length;
 412
 413   #ifndef STB_VORBIS_NO_DEFER_FLOOR
 414   int16 *finalY[STB_VORBIS_MAX_CHANNELS];
 415   #else
 416   float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
 417   #endif
 418
 419   uint32 current_loc; // sample location of next frame to decode
 420   int    current_loc_valid;
 421
 422  // per-blocksize precomputed data
 423   
 424   // twiddle factors
 425   float *A[2],*B[2],*C[2];
 426   float *window[2];
 427   uint16 *bit_reverse[2];
 428
 429  // current page/packet/segment streaming info
 430   uint32 serial; // stream serial number for verification
 431   int last_page;
 432   int segment_count;
 433   uint8 segments[255];
 434   uint8 page_flag;
 435   uint8 bytes_in_seg;
 436   uint8 first_decode;
 437   int next_seg;
 438   int last_seg;  // flag that we're on the last segment
 439   int last_seg_which; // what was the segment number of the last seg?
 440   uint32 acc;
 441   int valid_bits;
 442   int packet_bytes;
 443   int end_seg_with_known_loc;
 444   uint32 known_loc_for_packet;
 445   int discard_samples_deferred;
 446   uint32 samples_output;
 447
 448  // push mode scanning
 449   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
 450#ifndef STB_VORBIS_NO_PUSHDATA_API
 451   CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
 452#endif
 453
 454  // sample-access
 455   int channel_buffer_start;
 456   int channel_buffer_end;
 457};
 458
 459extern int my_prof(int slot);
 460//#define stb_prof my_prof
 461
 462#ifndef stb_prof
 463#define stb_prof(x)  0
 464#endif
 465
 466#if defined(STB_VORBIS_NO_PUSHDATA_API)
 467   #define IS_PUSH_MODE(f)   FALSE
 468#elif defined(STB_VORBIS_NO_PULLDATA_API)
 469   #define IS_PUSH_MODE(f)   TRUE
 470#else
 471   #define IS_PUSH_MODE(f)   ((f)->push_mode)
 472#endif
 473
 474typedef struct stb_vorbis vorb;
 475
 476static int error(vorb *f, enum STBVorbisError e)
 477{
 478   f->error = e;
 479   if (!f->eof && e != VORBIS_need_more_data) {
 480      f->error=e; // breakpoint for debugging
 481   }
 482   return 0;
 483}
 484
 485
 486// these functions are used for allocating temporary memory
 487// while decoding. if you can afford the stack space, use
 488// alloca(); otherwise, provide a temp buffer and it will
 489// allocate out of those.
 490
 491#define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
 492
 493#define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
 494#ifdef dealloca
 495#define temp_free(f,p)                  (f->alloc.alloc_buffer ? 0 : dealloca(size))
 496#else
 497#define temp_free(f,p)                  0
 498#endif
 499#define temp_alloc_save(f)              ((f)->temp_offset)
 500#define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
 501
 502#define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
 503
 504// given a sufficiently large block of memory, make an array of pointers to subblocks of it
 505static void *make_block_array(void *mem, int count, int size)
 506{
 507   int i;
 508   void ** p = (void **) mem;
 509   char *q = (char *) (p + count);
 510   for (i=0; i < count; ++i) {
 511      p[i] = q;
 512      q += size;
 513   }
 514   return p;
 515}
 516
 517static void *setup_malloc(vorb *f, int sz)
 518{
 519   sz = (sz+3) & ~3;
 520   f->setup_memory_required += sz;
 521   if (f->alloc.alloc_buffer) {
 522      void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
 523      if (f->setup_offset + sz > f->temp_offset) return NULL;
 524      f->setup_offset += sz;
 525      return p;
 526   }
 527   return sz ? malloc(sz) : NULL;
 528}
 529
 530static void setup_free(vorb *f, void *p)
 531{
 532   if (f->alloc.alloc_buffer) return; // do nothing; setup mem is not a stack
 533   free(p);
 534}
 535
 536static void *setup_temp_malloc(vorb *f, int sz)
 537{
 538   sz = (sz+3) & ~3;
 539   if (f->alloc.alloc_buffer) {
 540      if (f->temp_offset - sz < f->setup_offset) return NULL;
 541      f->temp_offset -= sz;
 542      return (char *) f->alloc.alloc_buffer + f->temp_offset;
 543   }
 544   return malloc(sz);
 545}
 546
 547static void setup_temp_free(vorb *f, void *p, size_t sz)
 548{
 549   if (f->alloc.alloc_buffer) {
 550      f->temp_offset += (sz+3)&~3;
 551      return;
 552   }
 553   free(p);
 554}
 555
 556#define CRC32_POLY    0x04c11db7   // from spec
 557
 558static uint32 crc_table[256];
 559static void crc32_init(void)
 560{
 561   int i,j;
 562   uint32 s;
 563   for(i=0; i < 256; i++) {
 564      for (s=i<<24, j=0; j < 8; ++j)
 565         s = (s << 1) ^ (s >= (1<<31) ? CRC32_POLY : 0);
 566      crc_table[i] = s;
 567   }
 568}
 569
 570static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
 571{
 572   return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
 573}
 574
 575
 576// used in setup, and for huffman that doesn't go fast path
 577static unsigned int bit_reverse(unsigned int n)
 578{
 579  n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
 580  n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
 581  n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
 582  n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
 583  return (n >> 16) | (n << 16);
 584}
 585
 586static float square(float x)
 587{
 588   return x*x;
 589}
 590
 591// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
 592// as required by the specification. fast(?) implementation from stb.h
 593// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
 594static int ilog(int32 n)
 595{
 596   static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
 597
 598   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
 599   if (n < (1U << 14))
 600        if (n < (1U <<  4))        return     0 + log2_4[n      ];
 601        else if (n < (1U <<  9))      return  5 + log2_4[n >>  5];
 602             else                     return 10 + log2_4[n >> 10];
 603   else if (n < (1U << 24))
 604             if (n < (1U << 19))      return 15 + log2_4[n >> 15];
 605             else                     return 20 + log2_4[n >> 20];
 606        else if (n < (1U << 29))      return 25 + log2_4[n >> 25];
 607             else if (n < (1U << 31)) return 30 + log2_4[n >> 30];
 608                  else                return 0; // signed n returns 0
 609}
 610
 611#ifndef M_PI
 612  #define M_PI  3.14159265358979323846264f  // from CRC
 613#endif
 614
 615// code length assigned to a value with no huffman encoding
 616#define NO_CODE   255
 617
 618/////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
 619//
 620// these functions are only called at setup, and only a few times
 621// per file
 622
 623static float float32_unpack(uint32 x)
 624{
 625   // from the specification
 626   uint32 mantissa = x & 0x1fffff;
 627   uint32 sign = x & 0x80000000;
 628   uint32 exp = (x & 0x7fe00000) >> 21;
 629   double res = sign ? -(double)mantissa : (double)mantissa;
 630   return (float) ldexp((float)res, exp-788);
 631}
 632
 633
 634// zlib & jpeg huffman tables assume that the output symbols
 635// can either be arbitrarily arranged, or have monotonically
 636// increasing frequencies--they rely on the lengths being sorted;
 637// this makes for a very simple generation algorithm.
 638// vorbis allows a huffman table with non-sorted lengths. This
 639// requires a more sophisticated construction, since symbols in
 640// order do not map to huffman codes "in order".
 641static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
 642{
 643   if (!c->sparse) {
 644      c->codewords      [symbol] = huff_code;
 645   } else {
 646      c->codewords       [count] = huff_code;
 647      c->codeword_lengths[count] = len;
 648      values             [count] = symbol;
 649   }
 650}
 651
 652static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
 653{
 654   int i,k,m=0;
 655   uint32 available[32];
 656
 657   memset(available, 0, sizeof(available));
 658   // find the first entry
 659   for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
 660   if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
 661   // add to the list
 662   add_entry(c, 0, k, m++, len[k], values);
 663   // add all available leaves
 664   for (i=1; i <= len[k]; ++i)
 665      available[i] = 1 << (32-i);
 666   // note that the above code treats the first case specially,
 667   // but it's really the same as the following code, so they
 668   // could probably be combined (except the initial code is 0,
 669   // and I use 0 in available[] to mean 'empty')
 670   for (i=k+1; i < n; ++i) {
 671      uint32 res;
 672      int z = len[i], y;
 673      if (z == NO_CODE) continue;
 674      // find lowest available leaf (should always be earliest,
 675      // which is what the specification calls for)
 676      // note that this property, and the fact we can never have
 677      // more than one free leaf at a given level, isn't totally
 678      // trivial to prove, but it seems true and the assert never
 679      // fires, so!
 680      while (z > 0 && !available[z]) --z;
 681      if (z == 0) { assert(0); return FALSE; }
 682      res = available[z];
 683      available[z] = 0;
 684      add_entry(c, bit_reverse(res), i, m++, len[i], values);
 685      // propogate availability up the tree
 686      if (z != len[i]) {
 687         for (y=len[i]; y > z; --y) {
 688            assert(available[y] == 0);
 689            available[y] = res + (1 << (32-y));
 690         }
 691      }
 692   }
 693   return TRUE;
 694}
 695
 696// accelerated huffman table allows fast O(1) match of all symbols
 697// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
 698static void compute_accelerated_huffman(Codebook *c)
 699{
 700   int i, len;
 701   for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
 702      c->fast_huffman[i] = -1;
 703
 704   len = c->sparse ? c->sorted_entries : c->entries;
 705   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
 706   if (len > 32767) len = 32767; // largest possible value we can encode!
 707   #endif
 708   for (i=0; i < len; ++i) {
 709      if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
 710         uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
 711         // set table entries for all bit combinations in the higher bits
 712         while (z < FAST_HUFFMAN_TABLE_SIZE) {
 713             c->fast_huffman[z] = i;
 714             z += 1 << c->codeword_lengths[i];
 715         }
 716      }
 717   }
 718}
 719
 720static int uint32_compare(const void *p, const void *q)
 721{
 722   uint32 x = * (uint32 *) p;
 723   uint32 y = * (uint32 *) q;
 724   return x < y ? -1 : x > y;
 725}
 726
 727static int include_in_sort(Codebook *c, uint8 len)
 728{
 729   if (c->sparse) { assert(len != NO_CODE); return TRUE; }
 730   if (len == NO_CODE) return FALSE;
 731   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
 732   return FALSE;
 733}
 734
 735// if the fast table above doesn't work, we want to binary
 736// search them... need to reverse the bits
 737static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
 738{
 739   int i, len;
 740   // build a list of all the entries
 741   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
 742   // this is kind of a frivolous optimization--I don't see any performance improvement,
 743   // but it's like 4 extra lines of code, so.
 744   if (!c->sparse) {
 745      int k = 0;
 746      for (i=0; i < c->entries; ++i)
 747         if (include_in_sort(c, lengths[i])) 
 748            c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
 749      assert(k == c->sorted_entries);
 750   } else {
 751      for (i=0; i < c->sorted_entries; ++i)
 752         c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
 753   }
 754
 755   qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
 756   c->sorted_codewords[c->sorted_entries] = 0xffffffff;
 757
 758   len = c->sparse ? c->sorted_entries : c->entries;
 759   // now we need to indicate how they correspond; we could either
 760   //   #1: sort a different data structure that says who they correspond to
 761   //   #2: for each sorted entry, search the original list to find who corresponds
 762   //   #3: for each original entry, find the sorted entry
 763   // #1 requires extra storage, #2 is slow, #3 can use binary search!
 764   for (i=0; i < len; ++i) {
 765      int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
 766      if (include_in_sort(c,huff_len)) {
 767         uint32 code = bit_reverse(c->codewords[i]);
 768         int x=0, n=c->sorted_entries;
 769         while (n > 1) {
 770            // invariant: sc[x] <= code < sc[x+n]
 771            int m = x + (n >> 1);
 772            if (c->sorted_codewords[m] <= code) {
 773               x = m;
 774               n -= (n>>1);
 775            } else {
 776               n >>= 1;
 777            }
 778         }
 779         assert(c->sorted_codewords[x] == code);
 780         if (c->sparse) {
 781            c->sorted_values[x] = values[i];
 782            c->codeword_lengths[x] = huff_len;
 783         } else {
 784            c->sorted_values[x] = i;
 785         }
 786      }
 787   }
 788}
 789
 790// only run while parsing the header (3 times)
 791static int vorbis_validate(uint8 *data)
 792{
 793   static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
 794   return memcmp(data, vorbis, 6) == 0;
 795}
 796
 797// called from setup only, once per code book
 798// (formula implied by specification)
 799static int lookup1_values(int entries, int dim)
 800{
 801   int r = (int) floor(exp((float) log((float) entries) / dim));
 802   if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
 803      ++r;                                              // floor() to avoid _ftol() when non-CRT
 804   assert(pow((float) r+1, dim) > entries);
 805   assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
 806   return r;
 807}
 808
 809// called twice per file
 810static void compute_twiddle_factors(int n, float *A, float *B, float *C)
 811{
 812   int n4 = n >> 2, n8 = n >> 3;
 813   int k,k2;
 814
 815   for (k=k2=0; k < n4; ++k,k2+=2) {
 816      A[k2  ] = (float)  cos(4*k*M_PI/n);
 817      A[k2+1] = (float) -sin(4*k*M_PI/n);
 818      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
 819      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
 820   }
 821   for (k=k2=0; k < n8; ++k,k2+=2) {
 822      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
 823      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
 824   }
 825}
 826
 827static void compute_window(int n, float *window)
 828{
 829   int n2 = n >> 1, i;
 830   for (i=0; i < n2; ++i)
 831      window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
 832}
 833
 834static void compute_bitreverse(int n, uint16 *rev)
 835{
 836   int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
 837   int i, n8 = n >> 3;
 838   for (i=0; i < n8; ++i)
 839      rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
 840}
 841
 842static int init_blocksize(vorb *f, int b, int n)
 843{
 844   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
 845   f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
 846   f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
 847   f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
 848   if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
 849   compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
 850   f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
 851   if (!f->window[b]) return error(f, VORBIS_outofmem);
 852   compute_window(n, f->window[b]);
 853   f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
 854   if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
 855   compute_bitreverse(n, f->bit_reverse[b]);
 856   return TRUE;
 857}
 858
 859static void neighbors(uint16 *x, int n, int *plow, int *phigh)
 860{
 861   int low = -1;
 862   int high = 65536;
 863   int i;
 864   for (i=0; i < n; ++i) {
 865      if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
 866      if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
 867   }
 868}
 869
 870// this has been repurposed so y is now the original index instead of y
 871typedef struct
 872{
 873   uint16 x,y;
 874} Point;
 875
 876int point_compare(const void *p, const void *q)
 877{
 878   Point *a = (Point *) p;
 879   Point *b = (Point *) q;
 880   return a->x < b->x ? -1 : a->x > b->x;
 881}
 882
 883//
 884/////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
 885
 886
 887#if defined(STB_VORBIS_NO_STDIO)
 888   #define USE_MEMORY(z)    TRUE
 889#else
 890   #define USE_MEMORY(z)    ((z)->stream)
 891#endif
 892#ifdef STB_VORBIS_USE_CALLBACKS
 893
 894#define USE_CALLBACKS(z)  ((z)->data_callback)
 895
 896int stb_read_from_callback(vorb* z, int size, uint8* ptr)
 897{
 898	int read = z->data_callback(size,ptr,z->user_data);
 899	if(read < 1 && size > 0)
 900		z->eof = 1;
 901	else
 902		z->cb_offset+=read;
 903	return read;
 904}	
 905
 906int stb_reset_callback(vorb* z)
 907{
 908	int result = z->reset_callback(z->user_data);
 909	if(result == -1)
 910		z->eof = 1;
 911	else
 912	{	
 913		z->cb_offset = 0;
 914		z->eof = 0;
 915	}
 916	return result;
 917}
 918
 919#endif 
 920
 921static uint8 get8(vorb *z)
 922{
 923   if (USE_MEMORY(z)) {
 924      if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
 925      return *z->stream++;
 926   }
 927
 928#ifdef STB_VORBIS_USE_CALLBACKS
 929   if(USE_CALLBACKS(z))
 930   {
 931		uint8 data;
 932		int read = stb_read_from_callback(z,1,&data);
 933		if(z->eof)
 934			return 0;
 935		else
 936			return data;
 937   }
 938#endif
 939   
 940   #ifndef STB_VORBIS_NO_STDIO
 941   {
 942   int c = fgetc(z->f);
 943   if (c == EOF) { z->eof = TRUE; return 0; }
 944   return c;
 945   }
 946   #endif
 947}
 948
 949static uint32 get32(vorb *f)
 950{
 951   uint32 x;
 952   x = get8(f);
 953   x += get8(f) << 8;
 954   x += get8(f) << 16;
 955   x += get8(f) << 24;
 956   return x;
 957}
 958
 959static int getn(vorb *z, uint8 *data, int n)
 960{
 961   if (USE_MEMORY(z)) {
 962      if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
 963      memcpy(data, z->stream, n);
 964      z->stream += n;
 965      return 1;
 966   }
 967
 968#ifdef STB_VORBIS_USE_CALLBACKS
 969   if(USE_CALLBACKS(z))
 970   {
 971		int read = stb_read_from_callback(z,n,data);
 972		if(read < n)
 973		{
 974			z->eof = 1;
 975			return 0;
 976		}
 977		else
 978			return 1;
 979   }
 980#endif
 981
 982   #ifndef STB_VORBIS_NO_STDIO   
 983   if (fread(data, n, 1, z->f) == 1)
 984      return 1;
 985   else {
 986      z->eof = 1;
 987      return 0;
 988   }
 989   #endif
 990}
 991
 992static void skip(vorb *z, int n)
 993{
 994   if (USE_MEMORY(z)) {
 995      z->stream += n;
 996      if (z->stream >= z->stream_end) z->eof = 1;
 997      return;
 998   }
 999#ifdef STB_VORBIS_USE_CALLBACKS
1000 if(USE_CALLBACKS(z))
1001   {
1002		int read = stb_read_from_callback(z,n,NULL);
1003		if(read < n)
1004			z->eof = 1;
1005		return;
1006   }
1007#endif
1008   
1009
1010   #ifndef STB_VORBIS_NO_STDIO
1011   {
1012      long x = ftell(z->f);
1013      fseek(z->f, x+n, SEEK_SET);
1014   }
1015   #endif
1016}
1017
1018static int set_file_offset(stb_vorbis *f, unsigned int loc)
1019{
1020   #ifndef STB_VORBIS_NO_PUSHDATA_API
1021   if (f->push_mode) return 0;
1022   #endif
1023   f->eof = 0;
1024   if (USE_MEMORY(f)) {
1025      if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1026         f->stream = f->stream_end;
1027         f->eof = 1;
1028         return 0;
1029      } else {
1030         f->stream = f->stream_start + loc;
1031         return 1;
1032      }
1033   }
1034
1035#ifdef STB_VORBIS_USE_CALLBACKS
1036	if(USE_CALLBACKS(f))
1037	{
1038		int read = stb_reset_callback(f);
1039		if(read < 0)
1040		{
1041			f->eof = 1;
1042			return 0;
1043		}
1044		read = stb_read_from_callback(f,loc,NULL);
1045		if(read < loc)
1046		{
1047			f->eof = 1;
1048			return 0;
1049		}
1050		return 1;
1051	}
1052#endif
1053   
1054   #ifndef STB_VORBIS_NO_STDIO
1055   if (loc + f->f_start < loc || loc >= 0x80000000) {
1056      loc = 0x7fffffff;
1057      f->eof = 1;
1058   } else {
1059      loc += f->f_start;
1060   }
1061   if (!fseek(f->f, loc, SEEK_SET))
1062      return 1;
1063   f->eof = 1;
1064   fseek(f->f, f->f_start, SEEK_END);
1065   return 0;
1066   #endif
1067}
1068
1069
1070static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1071
1072static int capture_pattern(vorb *f)
1073{
1074   if (0x4f != get8(f)) return FALSE;
1075   if (0x67 != get8(f)) return FALSE;
1076   if (0x67 != get8(f)) return FALSE;
1077   if (0x53 != get8(f)) return FALSE;
1078   return TRUE;
1079}
1080
1081#define PAGEFLAG_continued_packet   1
1082#define PAGEFLAG_first_page         2
1083#define PAGEFLAG_last_page          4
1084
1085static int start_page_no_capturepattern(vorb *f)
1086{
1087   uint32 loc0,loc1,n,i;
1088   // stream structure version
1089   if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1090   // header flag
1091   f->page_flag = get8(f);
1092   // absolute granule position
1093   loc0 = get32(f); 
1094   loc1 = get32(f);
1095   // @TODO: validate loc0,loc1 as valid positions?
1096   // stream serial number -- vorbis doesn't interleave, so discard
1097   get32(f);
1098   //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1099   // page sequence number
1100   n = get32(f);
1101   f->last_page = n;
1102   // CRC32
1103   get32(f);
1104   // page_segments
1105   f->segment_count = get8(f);
1106   if (!getn(f, f->segments, f->segment_count))
1107      return error(f, VORBIS_unexpected_eof);
1108   // assume we _don't_ know any the sample position of any segments
1109   f->end_seg_with_known_loc = -2;
1110   if (loc0 != ~0 || loc1 != ~0) {
1111      // determine which packet is the last one that will complete
1112      for (i=f->segment_count-1; i >= 0; --i)
1113         if (f->segments[i] < 255)
1114            break;
1115      // 'i' is now the index of the _last_ segment of a packet that ends
1116      if (i >= 0) {
1117         f->end_seg_with_known_loc = i;
1118         f->known_loc_for_packet   = loc0;
1119      }
1120   }
1121   if (f->first_decode) {
1122      int i,len;
1123      ProbedPage p;
1124      len = 0;
1125      for (i=0; i < f->segment_count; ++i)
1126         len += f->segments[i];
1127      len += 27 + f->segment_count;
1128      p.page_start = f->first_audio_page_offset;
1129      p.page_end = p.page_start + len;
1130      p.after_previous_page_start = p.page_start;
1131      p.first_decoded_sample = 0;
1132      p.last_decoded_sample = loc0;
1133      f->p_first = p;
1134   }
1135   f->next_seg = 0;
1136   return TRUE;
1137}
1138
1139static int start_page(vorb *f)
1140{
1141   if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1142   return start_page_no_capturepattern(f);
1143}
1144
1145static int start_packet(vorb *f)
1146{
1147   while (f->next_seg == -1) {
1148      if (!start_page(f)) return FALSE;
1149      if (f->page_flag & PAGEFLAG_continued_packet)
1150         return error(f, VORBIS_continued_packet_flag_invalid);
1151   }
1152   f->last_seg = FALSE;
1153   f->valid_bits = 0;
1154   f->packet_bytes = 0;
1155   f->bytes_in_seg = 0;
1156   // f->next_seg is now valid
1157   return TRUE;
1158}
1159
1160static int maybe_start_packet(vorb *f)
1161{
1162   if (f->next_seg == -1) {
1163      int x = get8(f);
1164      if (f->eof) return FALSE; // EOF at page boundary is not an error!
1165      if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1166      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1167      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1168      if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1169      if (!start_page_no_capturepattern(f)) return FALSE;
1170      if (f->page_flag & PAGEFLAG_continued_packet) {
1171         // set up enough state that we can read this packet if we want,
1172         // e.g. during recovery
1173         f->last_seg = FALSE;
1174         f->bytes_in_seg = 0;
1175         return error(f, VORBIS_continued_packet_flag_invalid);
1176      }
1177   }
1178   return start_packet(f);
1179}
1180
1181static int next_segment(vorb *f)
1182{
1183   int len;
1184   if (f->last_seg) return 0;
1185   if (f->next_seg == -1) {
1186      f->last_seg_which = f->segment_count-1; // in case start_page fails
1187      if (!start_page(f)) { f->last_seg = 1; return 0; }
1188      if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1189   }
1190   len = f->segments[f->next_seg++];
1191   if (len < 255) {
1192      f->last_seg = TRUE;
1193      f->last_seg_which = f->next_seg-1;
1194   }
1195   if (f->next_seg >= f->segment_count)
1196      f->next_seg = -1;
1197   assert(f->bytes_in_seg == 0);
1198   f->bytes_in_seg = len;
1199   return len;
1200}
1201
1202#define EOP    (-1)
1203#define INVALID_BITS  (-1)
1204
1205static int get8_packet_raw(vorb *f)
1206{
1207   if (!f->bytes_in_seg)
1208      if (f->last_seg) return EOP;
1209      else if (!next_segment(f)) return EOP;
1210   assert(f->bytes_in_seg > 0);
1211   --f->bytes_in_seg;
1212   ++f->packet_bytes;
1213   return get8(f);
1214}
1215
1216static int get8_packet(vorb *f)
1217{
1218   int x = get8_packet_raw(f);
1219   f->valid_bits = 0;
1220   return x;
1221}
1222
1223static void flush_packet(vorb *f)
1224{
1225   while (get8_packet_raw(f) != EOP);
1226}
1227
1228// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1229// as the huffman decoder?
1230static uint32 get_bits(vorb *f, int n)
1231{
1232   uint32 z;
1233
1234   if (f->valid_bits < 0) return 0;
1235   if (f->valid_bits < n) {
1236      if (n > 24) {
1237         // the accumulator technique below would not work correctly in this case
1238         z = get_bits(f, 24);
1239         z += get_bits(f, n-24) << 24;
1240         return z;
1241      }
1242      if (f->valid_bits == 0) f->acc = 0;
1243      while (f->valid_bits < n) {
1244         int z = get8_packet_raw(f);
1245         if (z == EOP) {
1246            f->valid_bits = INVALID_BITS;
1247            return 0;
1248         }
1249         f->acc += z << f->valid_bits;
1250         f->valid_bits += 8;
1251      }
1252   }
1253   if (f->valid_bits < 0) return 0;
1254   z = f->acc & ((1 << n)-1);
1255   f->acc >>= n;
1256   f->valid_bits -= n;
1257   return z;
1258}
1259
1260static int32 get_bits_signed(vorb *f, int n)
1261{
1262   uint32 z = get_bits(f, n);
1263   if (z & (1 << (n-1)))
1264      z += ~((1 << n) - 1);
1265   return (int32) z;
1266}
1267
1268// @OPTIMIZE: primary accumulator for huffman
1269// expand the buffer to as many bits as possible without reading off end of packet
1270// it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1271// e.g. cache them locally and decode locally
1272static __forceinline void prep_huffman(vorb *f)
1273{
1274   if (f->valid_bits <= 24) {
1275      if (f->valid_bits == 0) f->acc = 0;
1276      do {
1277         int z;
1278         if (f->last_seg && !f->bytes_in_seg) return;
1279         z = get8_packet_raw(f);
1280         if (z == EOP) return;
1281         f->acc += z << f->valid_bits;
1282         f->valid_bits += 8;
1283      } while (f->valid_bits <= 24);
1284   }
1285}
1286
1287enum
1288{
1289   VORBIS_packet_id = 1,
1290   VORBIS_packet_comment = 3,
1291   VORBIS_packet_setup = 5,
1292};
1293
1294static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1295{
1296   int i;
1297   prep_huffman(f);
1298
1299   assert(c->sorted_codewords || c->codewords);
1300   // cases to use binary search: sorted_codewords && !c->codewords
1301   //                             sorted_codewords && c->entries > 8
1302   if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1303      // binary search
1304      uint32 code = bit_reverse(f->acc);
1305      int x=0, n=c->sorted_entries, len;
1306
1307      while (n > 1) {
1308         // invariant: sc[x] <= code < sc[x+n]
1309         int m = x + (n >> 1);
1310         if (c->sorted_codewords[m] <= code) {
1311            x = m;
1312            n -= (n>>1);
1313         } else {
1314            n >>= 1;
1315         }
1316      }
1317      // x is now the sorted index
1318      if (!c->sparse) x = c->sorted_values[x];
1319      // x is now sorted index if sparse, or symbol otherwise
1320      len = c->codeword_lengths[x];
1321      if (f->valid_bits >= len) {
1322         f->acc >>= len;
1323         f->valid_bits -= len;
1324         return x;
1325      }
1326
1327      f->valid_bits = 0;
1328      return -1;
1329   }
1330
1331   // if small, linear search
1332   assert(!c->sparse);
1333   for (i=0; i < c->entries; ++i) {
1334      if (c->codeword_lengths[i] == NO_CODE) continue;
1335      if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1336         if (f->valid_bits >= c->codeword_lengths[i]) {
1337            f->acc >>= c->codeword_lengths[i];
1338            f->valid_bits -= c->codeword_lengths[i];
1339            return i;
1340         }
1341         f->valid_bits = 0;
1342         return -1;
1343      }
1344   }
1345
1346   error(f, VORBIS_invalid_stream);
1347   f->valid_bits = 0;
1348   return -1;
1349}
1350
1351static int codebook_decode_scalar(vorb *f, Codebook *c)
1352{
1353   int i;
1354   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1355      prep_huffman(f);
1356   // fast huffman table lookup
1357   i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1358   i = c->fast_huffman[i];
1359   if (i >= 0) {
1360      f->acc >>= c->codeword_lengths[i];
1361      f->valid_bits -= c->codeword_lengths[i];
1362      if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1363      return i;
1364   }
1365   return codebook_decode_scalar_raw(f,c);
1366}
1367
1368#ifndef STB_VORBIS_NO_INLINE_DECODE
1369
1370#define DECODE_RAW(var, f,c)                                  \
1371   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1372      prep_huffman(f);                                        \
1373   var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1374   var = c->fast_huffman[var];                                \
1375   if (var >= 0) {                                            \
1376      int n = c->codeword_lengths[var];                       \
1377      f->acc >>= n;                                           \
1378      f->valid_bits -= n;                                     \
1379      if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1380   } else {                                                   \
1381      var = codebook_decode_scalar_raw(f,c);                  \
1382   }
1383
1384#else
1385
1386#define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1387
1388#endif
1389
1390#define DECODE(var,f,c)                                       \
1391   DECODE_RAW(var,f,c)                                        \
1392   if (c->sparse) var = c->sorted_values[var];
1393
1394#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1395  #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1396#else
1397  #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1398#endif
1399
1400
1401
1402
1403
1404
1405// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1406// where we avoid one addition
1407#ifndef STB_VORBIS_CODEBOOK_FLOATS
1408   #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off] * c->delta_value + c->minimum_value)
1409   #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off] * c->delta_value)
1410   #define CODEBOOK_ELEMENT_BASE(c)         (c->minimum_value)
1411#else
1412   #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1413   #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1414   #define CODEBOOK_ELEMENT_BASE(c)         (0)
1415#endif
1416
1417static int codebook_decode_start(vorb *f, Codebook *c, int len)
1418{
1419   int z = -1;
1420
1421   // type 0 is only legal in a scalar context
1422   if (c->lookup_type == 0)
1423      error(f, VORBIS_invalid_stream);
1424   else {
1425      DECODE_VQ(z,f,c);
1426      if (c->sparse) assert(z < c->sorted_entries);
1427      if (z < 0) {  // check for EOP
1428         if (!f->bytes_in_seg)
1429            if (f->last_seg)
1430               return z;
1431         error(f, VORBIS_invalid_stream);
1432      }
1433   }
1434   return z;
1435}
1436
1437static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1438{
1439   int i,z = codebook_decode_start(f,c,len);
1440   if (z < 0) return FALSE;
1441   if (len > c->dimensions) len = c->dimensions;
1442
1443#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1444   if (c->lookup_type == 1) {
1445      float last = CODEBOOK_ELEMENT_BASE(c);
1446      int div = 1;
1447      for (i=0; i < len; ++i) {
1448         int off = (z / div) % c->lookup_values;
1449         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1450         output[i] += val;
1451         if (c->sequence_p) last = val + c->minimum_value;
1452         div *= c->lookup_values;
1453      }
1454      return TRUE;
1455   }
1456#endif
1457
1458   z *= c->dimensions;
1459   if (c->sequence_p) {
1460      float last = CODEBOOK_ELEMENT_BASE(c);
1461      for (i=0; i < len; ++i) {
1462         float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1463         output[i] += val;
1464         last = val + c->minimum_value;
1465      }
1466   } else {
1467      float last = CODEBOOK_ELEMENT_BASE(c);
1468      for (i=0; i < len; ++i) {
1469         output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1470      }
1471   }
1472
1473   return TRUE;
1474}
1475
1476static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1477{
1478   int i,z = codebook_decode_start(f,c,len);
1479   float last = CODEBOOK_ELEMENT_BASE(c);
1480   if (z < 0) return FALSE;
1481   if (len > c->dimensions) len = c->dimensions;
1482
1483#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1484   if (c->lookup_type == 1) {
1485      int div = 1;
1486      for (i=0; i < len; ++i) {
1487         int off = (z / div) % c->lookup_values;
1488         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1489         output[i*step] += val;
1490         if (c->sequence_p) last = val;
1491         div *= c->lookup_values;
1492      }
1493      return TRUE;
1494   }
1495#endif
1496
1497   z *= c->dimensions;
1498   for (i=0; i < len; ++i) {
1499      float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1500      output[i*step] += val;
1501      if (c->sequence_p) last = val;
1502   }
1503
1504   return TRUE;
1505}
1506
1507static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1508{
1509   int c_inter = *c_inter_p;
1510   int p_inter = *p_inter_p;
1511   int i,z, effective = c->dimensions;
1512
1513   // type 0 is only legal in a scalar context
1514   if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1515
1516   while (total_decode > 0) {
1517      float last = CODEBOOK_ELEMENT_BASE(c);
1518      DECODE_VQ(z,f,c);
1519      #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1520      assert(!c->sparse || z < c->sorted_entries);
1521      #endif
1522      if (z < 0) {
1523         if (!f->bytes_in_seg)
1524            if (f->last_seg) return FALSE;
1525         return error(f, VORBIS_invalid_stream);
1526      }
1527
1528      // if this will take us off the end of the buffers, stop short!
1529      // we check by computing the length of the virtual interleaved
1530      // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1531      // and the length we'll be using (effective)
1532      if (c_inter + p_inter*ch + effective > len * ch) {
1533         effective = len*ch - (p_inter*ch - c_inter);
1534      }
1535
1536   #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1537      if (c->lookup_type == 1) {
1538         int div = 1;
1539         for (i=0; i < effective; ++i) {
1540            int off = (z / div) % c->lookup_values;
1541            float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1542            outputs[c_inter][p_inter] += val;
1543            if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1544            if (c->sequence_p) last = val;
1545            div *= c->lookup_values;
1546         }
1547      } else
1548   #endif
1549      {
1550         z *= c->dimensions;
1551         if (c->sequence_p) {
1552            for (i=0; i < effective; ++i) {
1553               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1554               outputs[c_inter][p_inter] += val;
1555               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1556               last = val;
1557            }
1558         } else {
1559            for (i=0; i < effective; ++i) {
1560               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1561               outputs[c_inter][p_inter] += val;
1562               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1563            }
1564         }
1565      }
1566
1567      total_decode -= effective;
1568   }
1569   *c_inter_p = c_inter;
1570   *p_inter_p = p_inter;
1571   return TRUE;
1572}
1573
1574#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1575static int codebook_decode_deinterleave_repeat_2(vorb *f, Codebook *c, float **outputs, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1576{
1577   int c_inter = *c_inter_p;
1578   int p_inter = *p_inter_p;
1579   int i,z, effective = c->dimensions;
1580
1581   // type 0 is only legal in a scalar context
1582   if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1583
1584   while (total_decode > 0) {
1585      float last = CODEBOOK_ELEMENT_BASE(c);
1586      DECODE_VQ(z,f,c);
1587
1588      if (z < 0) {
1589         if (!f->bytes_in_seg)
1590            if (f->last_seg) return FALSE;
1591         return error(f, VORBIS_invalid_stream);
1592      }
1593
1594      // if this will take us off the end of the buffers, stop short!
1595      // we check by computing the length of the virtual interleaved
1596      // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1597      // and the length we'll be using (effective)
1598      if (c_inter + p_inter*2 + effective > len * 2) {
1599         effective = len*2 - (p_inter*2 - c_inter);
1600      }
1601
1602      {
1603         z *= c->dimensions;
1604         stb_prof(11);
1605         if (c->sequence_p) {
1606            // haven't optimized this case because I don't have any examples
1607            for (i=0; i < effective; ++i) {
1608               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1609               outputs[c_inter][p_inter] += val;
1610               if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1611               last = val;
1612            }
1613         } else {
1614            i=0;
1615            if (c_inter == 1) {
1616               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1617               outputs[c_inter][p_inter] += val;
1618               c_inter = 0; ++p_inter;
1619               ++i;
1620            }
1621            {
1622               float *z0 = outputs[0];
1623               float *z1 = outputs[1];
1624               for (; i+1 < effective;) {
1625                  z0[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1626                  z1[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i+1) + last;
1627                  ++p_inter;
1628                  i += 2;
1629               }
1630            }
1631            if (i < effective) {
1632               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1633               outputs[c_inter][p_inter] += val;
1634               if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1635            }
1636         }
1637      }
1638
1639      total_decode -= effective;
1640   }
1641   *c_inter_p = c_inter;
1642   *p_inter_p = p_inter;
1643   return TRUE;
1644}
1645#endif
1646
1647static int predict_point(int x, int x0, int x1, int y0, int y1)
1648{
1649   int dy = y1 - y0;
1650   int adx = x1 - x0;
1651   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1652   int err = abs(dy) * (x - x0);
1653   int off = err / adx;
1654   return dy < 0 ? y0 - off : y0 + off;
1655}
1656
1657// the following table is block-copied from the specification
1658static float inverse_db_table[256] =
1659{
1660  1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, 
1661  1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, 
1662  1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, 
1663  2.2670913e-07f, 2.41

Large files files are truncated, but you can click here to view the full file