PageRenderTime 374ms CodeModel.GetById 34ms app.highlight 300ms RepoModel.GetById 1ms app.codeStats 2ms

/src/middleware/stb_vorbis/stb_vorbis.c

https://bitbucket.org/vivkin/gam3b00bs/
C | 5143 lines | 3973 code | 572 blank | 598 comment | 1013 complexity | 2048a9cd8e2bddc7e14a8b87072fc020 MD5 | raw file
   1#include "stb_vorbis.h"
   2
   3#ifndef STB_VORBIS_HEADER_ONLY
   4
   5// global configuration settings (e.g. set these in the project/makefile),
   6// or just set them in this file at the top (although ideally the first few
   7// should be visible when the header file is compiled too, although it's not
   8// crucial)
   9
  10// STB_VORBIS_NO_PUSHDATA_API
  11//     does not compile the code for the various stb_vorbis_*_pushdata()
  12//     functions
  13// #define STB_VORBIS_NO_PUSHDATA_API
  14
  15// STB_VORBIS_NO_PULLDATA_API
  16//     does not compile the code for the non-pushdata APIs
  17// #define STB_VORBIS_NO_PULLDATA_API
  18
  19// STB_VORBIS_NO_STDIO
  20//     does not compile the code for the APIs that use FILE *s internally
  21//     or externally (implied by STB_VORBIS_NO_PULLDATA_API)
  22// #define STB_VORBIS_NO_STDIO
  23
  24// STB_VORBIS_NO_INTEGER_CONVERSION
  25//     does not compile the code for converting audio sample data from
  26//     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
  27// #define STB_VORBIS_NO_INTEGER_CONVERSION
  28
  29// STB_VORBIS_NO_FAST_SCALED_FLOAT
  30//      does not use a fast float-to-int trick to accelerate float-to-int on
  31//      most platforms which requires endianness be defined correctly.
  32//#define STB_VORBIS_NO_FAST_SCALED_FLOAT
  33
  34
  35// STB_VORBIS_MAX_CHANNELS [number]
  36//     globally define this to the maximum number of channels you need.
  37//     The spec does not put a restriction on channels except that
  38//     the count is stored in a byte, so 255 is the hard limit.
  39//     Reducing this saves about 16 bytes per value, so using 16 saves
  40//     (255-16)*16 or around 4KB. Plus anything other memory usage
  41//     I forgot to account for. Can probably go as low as 8 (7.1 audio),
  42//     6 (5.1 audio), or 2 (stereo only).
  43#ifndef STB_VORBIS_MAX_CHANNELS
  44#define STB_VORBIS_MAX_CHANNELS    16  // enough for anyone?
  45#endif
  46
  47// STB_VORBIS_PUSHDATA_CRC_COUNT [number]
  48//     after a flush_pushdata(), stb_vorbis begins scanning for the
  49//     next valid page, without backtracking. when it finds something
  50//     that looks like a page, it streams through it and verifies its
  51//     CRC32. Should that validation fail, it keeps scanning. But it's
  52//     possible that _while_ streaming through to check the CRC32 of
  53//     one candidate page, it sees another candidate page. This #define
  54//     determines how many "overlapping" candidate pages it can search
  55//     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
  56//     garbage pages could be as big as 64KB, but probably average ~16KB.
  57//     So don't hose ourselves by scanning an apparent 64KB page and
  58//     missing a ton of real ones in the interim; so minimum of 2
  59#ifndef STB_VORBIS_PUSHDATA_CRC_COUNT
  60#define STB_VORBIS_PUSHDATA_CRC_COUNT  4
  61#endif
  62
  63// STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
  64//     sets the log size of the huffman-acceleration table.  Maximum
  65//     supported value is 24. with larger numbers, more decodings are O(1),
  66//     but the table size is larger so worse cache missing, so you'll have
  67//     to probe (and try multiple ogg vorbis files) to find the sweet spot.
  68#ifndef STB_VORBIS_FAST_HUFFMAN_LENGTH
  69#define STB_VORBIS_FAST_HUFFMAN_LENGTH   10
  70#endif
  71
  72// STB_VORBIS_FAST_BINARY_LENGTH [number]
  73//     sets the log size of the binary-search acceleration table. this
  74//     is used in similar fashion to the fast-huffman size to set initial
  75//     parameters for the binary search
  76
  77// STB_VORBIS_FAST_HUFFMAN_INT
  78//     The fast huffman tables are much more efficient if they can be
  79//     stored as 16-bit results instead of 32-bit results. This restricts
  80//     the codebooks to having only 65535 possible outcomes, though.
  81//     (At least, accelerated by the huffman table.)
  82#ifndef STB_VORBIS_FAST_HUFFMAN_INT
  83#define STB_VORBIS_FAST_HUFFMAN_SHORT
  84#endif
  85
  86// STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
  87//     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
  88//     back on binary searching for the correct one. This requires storing
  89//     extra tables with the huffman codes in sorted order. Defining this
  90//     symbol trades off space for speed by forcing a linear search in the
  91//     non-fast case, except for "sparse" codebooks.
  92// #define STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
  93
  94// STB_VORBIS_DIVIDES_IN_RESIDUE
  95//     stb_vorbis precomputes the result of the scalar residue decoding
  96//     that would otherwise require a divide per chunk. you can trade off
  97//     space for time by defining this symbol.
  98// #define STB_VORBIS_DIVIDES_IN_RESIDUE
  99
 100// STB_VORBIS_DIVIDES_IN_CODEBOOK
 101//     vorbis VQ codebooks can be encoded two ways: with every case explicitly
 102//     stored, or with all elements being chosen from a small range of values,
 103//     and all values possible in all elements. By default, stb_vorbis expands
 104//     this latter kind out to look like the former kind for ease of decoding,
 105//     because otherwise an integer divide-per-vector-element is required to
 106//     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
 107//     trade off storage for speed.
 108//#define STB_VORBIS_DIVIDES_IN_CODEBOOK
 109
 110// STB_VORBIS_CODEBOOK_SHORTS
 111//     The vorbis file format encodes VQ codebook floats as ax+b where a and
 112//     b are floating point per-codebook constants, and x is a 16-bit int.
 113//     Normally, stb_vorbis decodes them to floats rather than leaving them
 114//     as 16-bit ints and computing ax+b while decoding. This is a speed/space
 115//     tradeoff; you can save space by defining this flag.
 116#ifndef STB_VORBIS_CODEBOOK_SHORTS
 117#define STB_VORBIS_CODEBOOK_FLOATS
 118#endif
 119
 120// STB_VORBIS_DIVIDE_TABLE
 121//     this replaces small integer divides in the floor decode loop with
 122//     table lookups. made less than 1% difference, so disabled by default.
 123
 124// STB_VORBIS_NO_INLINE_DECODE
 125//     disables the inlining of the scalar codebook fast-huffman decode.
 126//     might save a little codespace; useful for debugging
 127// #define STB_VORBIS_NO_INLINE_DECODE
 128
 129// STB_VORBIS_NO_DEFER_FLOOR
 130//     Normally we only decode the floor without synthesizing the actual
 131//     full curve. We can instead synthesize the curve immediately. This
 132//     requires more memory and is very likely slower, so I don't think
 133//     you'd ever want to do it except for debugging.
 134// #define STB_VORBIS_NO_DEFER_FLOOR
 135
 136
 137
 138
 139//////////////////////////////////////////////////////////////////////////////
 140
 141#ifdef STB_VORBIS_NO_PULLDATA_API
 142   #define STB_VORBIS_NO_INTEGER_CONVERSION
 143   #define STB_VORBIS_NO_STDIO
 144#endif
 145
 146#if defined(STB_VORBIS_NO_CRT) && !defined(STB_VORBIS_NO_STDIO)
 147   #define STB_VORBIS_NO_STDIO 1
 148#endif
 149
 150#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
 151#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
 152
 153   // only need endianness for fast-float-to-int, which we don't
 154   // use for pushdata
 155
 156   #ifndef STB_VORBIS_BIG_ENDIAN
 157     #define STB_VORBIS_ENDIAN  0
 158   #else
 159     #define STB_VORBIS_ENDIAN  1
 160   #endif
 161
 162#endif
 163#endif
 164
 165
 166#ifndef STB_VORBIS_NO_STDIO
 167#include <stdio.h>
 168#endif
 169
 170#ifndef STB_VORBIS_NO_CRT
 171#include <stdlib.h>
 172#include <string.h>
 173#include <assert.h>
 174#include <math.h>
 175#include <malloc.h>
 176#else
 177#define NULL 0
 178#endif
 179
 180#ifndef _MSC_VER
 181   #if __GNUC__
 182      #define __forceinline inline
 183   #else
 184      #define __forceinline
 185   #endif
 186#endif
 187
 188#if STB_VORBIS_MAX_CHANNELS > 256
 189#error "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range"
 190#endif
 191
 192#if STB_VORBIS_FAST_HUFFMAN_LENGTH > 24
 193#error "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range"
 194#endif
 195
 196
 197#define MAX_BLOCKSIZE_LOG  13   // from specification
 198#define MAX_BLOCKSIZE      (1 << MAX_BLOCKSIZE_LOG)
 199
 200
 201typedef unsigned char  uint8;
 202typedef   signed char   int8;
 203typedef unsigned short uint16;
 204typedef   signed short  int16;
 205typedef unsigned int   uint32;
 206typedef   signed int    int32;
 207
 208#ifndef TRUE
 209#define TRUE 1
 210#define FALSE 0
 211#endif
 212
 213#ifdef STB_VORBIS_CODEBOOK_FLOATS
 214typedef float codetype;
 215#else
 216typedef uint16 codetype;
 217#endif
 218
 219// @NOTE
 220//
 221// Some arrays below are tagged "//varies", which means it's actually
 222// a variable-sized piece of data, but rather than malloc I assume it's
 223// small enough it's better to just allocate it all together with the
 224// main thing
 225//
 226// Most of the variables are specified with the smallest size I could pack
 227// them into. It might give better performance to make them all full-sized
 228// integers. It should be safe to freely rearrange the structures or change
 229// the sizes larger--nothing relies on silently truncating etc., nor the
 230// order of variables.
 231
 232#define FAST_HUFFMAN_TABLE_SIZE   (1 << STB_VORBIS_FAST_HUFFMAN_LENGTH)
 233#define FAST_HUFFMAN_TABLE_MASK   (FAST_HUFFMAN_TABLE_SIZE - 1)
 234
 235typedef struct
 236{
 237   int dimensions, entries;
 238   uint8 *codeword_lengths;
 239   float  minimum_value;
 240   float  delta_value;
 241   uint8  value_bits;
 242   uint8  lookup_type;
 243   uint8  sequence_p;
 244   uint8  sparse;
 245   uint32 lookup_values;
 246   codetype *multiplicands;
 247   uint32 *codewords;
 248   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
 249    int16  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
 250   #else
 251    int32  fast_huffman[FAST_HUFFMAN_TABLE_SIZE];
 252   #endif
 253   uint32 *sorted_codewords;
 254   int    *sorted_values;
 255   int     sorted_entries;
 256} Codebook;
 257
 258typedef struct
 259{
 260   uint8 order;
 261   uint16 rate;
 262   uint16 bark_map_size;
 263   uint8 amplitude_bits;
 264   uint8 amplitude_offset;
 265   uint8 number_of_books;
 266   uint8 book_list[16]; // varies
 267} Floor0;
 268
 269typedef struct
 270{
 271   uint8 partitions;
 272   uint8 partition_class_list[32]; // varies
 273   uint8 class_dimensions[16]; // varies
 274   uint8 class_subclasses[16]; // varies
 275   uint8 class_masterbooks[16]; // varies
 276   int16 subclass_books[16][8]; // varies
 277   uint16 Xlist[31*8+2]; // varies
 278   uint8 sorted_order[31*8+2];
 279   uint8 neighbors[31*8+2][2];
 280   uint8 floor1_multiplier;
 281   uint8 rangebits;
 282   int values;
 283} Floor1;
 284
 285typedef union
 286{
 287   Floor0 floor0;
 288   Floor1 floor1;
 289} Floor;
 290
 291typedef struct
 292{
 293   uint32 begin, end;
 294   uint32 part_size;
 295   uint8 classifications;
 296   uint8 classbook;
 297   uint8 **classdata;
 298   int16 (*residue_books)[8];
 299} Residue;
 300
 301typedef struct
 302{
 303   uint8 magnitude;
 304   uint8 angle;
 305   uint8 mux;
 306} MappingChannel;
 307
 308typedef struct
 309{
 310   uint16 coupling_steps;
 311   MappingChannel *chan;
 312   uint8  submaps;
 313   uint8  submap_floor[15]; // varies
 314   uint8  submap_residue[15]; // varies
 315} Mapping;
 316
 317typedef struct
 318{
 319   uint8 blockflag;
 320   uint8 mapping;
 321   uint16 windowtype;
 322   uint16 transformtype;
 323} Mode;
 324
 325typedef struct
 326{
 327   uint32  goal_crc;    // expected crc if match
 328   int     bytes_left;  // bytes left in packet
 329   uint32  crc_so_far;  // running crc
 330   int     bytes_done;  // bytes processed in _current_ chunk
 331   uint32  sample_loc;  // granule pos encoded in page
 332} CRCscan;
 333
 334typedef struct
 335{
 336   uint32 page_start, page_end;
 337   uint32 after_previous_page_start;
 338   uint32 first_decoded_sample;
 339   uint32 last_decoded_sample;
 340} ProbedPage;
 341
 342struct stb_vorbis
 343{
 344  // user-accessible info
 345   unsigned int sample_rate;
 346   int channels;
 347
 348   unsigned int setup_memory_required;
 349   unsigned int temp_memory_required;
 350   unsigned int setup_temp_memory_required;
 351
 352  // input config
 353#ifndef STB_VORBIS_NO_STDIO
 354   FILE *f;
 355   uint32 f_start;
 356   int close_on_free;
 357#endif
 358#ifdef STB_VORBIS_USE_CALLBACKS
 359	STREAM_DATA_CLLBACK data_callback;
 360	STREAM_RESET_CLLBACK reset_callback;
 361	void* user_data;
 362	uint32 cb_offset;
 363#endif
 364
 365   uint8 *stream;
 366   uint8 *stream_start;
 367   uint8 *stream_end;
 368
 369   uint32 stream_len;
 370
 371   uint8  push_mode;
 372
 373   uint32 first_audio_page_offset;
 374
 375   ProbedPage p_first, p_last;
 376
 377  // memory management
 378   stb_vorbis_alloc alloc;
 379   int setup_offset;
 380   int temp_offset;
 381
 382  // run-time results
 383   int eof;
 384   enum STBVorbisError error;
 385
 386  // user-useful data
 387
 388  // header info
 389   int blocksize[2];
 390   int blocksize_0, blocksize_1;
 391   int codebook_count;
 392   Codebook *codebooks;
 393   int floor_count;
 394   uint16 floor_types[64]; // varies
 395   Floor *floor_config;
 396   int residue_count;
 397   uint16 residue_types[64]; // varies
 398   Residue *residue_config;
 399   int mapping_count;
 400   Mapping *mapping;
 401   int mode_count;
 402   Mode mode_config[64];  // varies
 403
 404   uint32 total_samples;
 405
 406  // decode buffer
 407   float *channel_buffers[STB_VORBIS_MAX_CHANNELS];
 408   float *outputs        [STB_VORBIS_MAX_CHANNELS];
 409
 410   float *previous_window[STB_VORBIS_MAX_CHANNELS];
 411   int previous_length;
 412
 413   #ifndef STB_VORBIS_NO_DEFER_FLOOR
 414   int16 *finalY[STB_VORBIS_MAX_CHANNELS];
 415   #else
 416   float *floor_buffers[STB_VORBIS_MAX_CHANNELS];
 417   #endif
 418
 419   uint32 current_loc; // sample location of next frame to decode
 420   int    current_loc_valid;
 421
 422  // per-blocksize precomputed data
 423   
 424   // twiddle factors
 425   float *A[2],*B[2],*C[2];
 426   float *window[2];
 427   uint16 *bit_reverse[2];
 428
 429  // current page/packet/segment streaming info
 430   uint32 serial; // stream serial number for verification
 431   int last_page;
 432   int segment_count;
 433   uint8 segments[255];
 434   uint8 page_flag;
 435   uint8 bytes_in_seg;
 436   uint8 first_decode;
 437   int next_seg;
 438   int last_seg;  // flag that we're on the last segment
 439   int last_seg_which; // what was the segment number of the last seg?
 440   uint32 acc;
 441   int valid_bits;
 442   int packet_bytes;
 443   int end_seg_with_known_loc;
 444   uint32 known_loc_for_packet;
 445   int discard_samples_deferred;
 446   uint32 samples_output;
 447
 448  // push mode scanning
 449   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
 450#ifndef STB_VORBIS_NO_PUSHDATA_API
 451   CRCscan scan[STB_VORBIS_PUSHDATA_CRC_COUNT];
 452#endif
 453
 454  // sample-access
 455   int channel_buffer_start;
 456   int channel_buffer_end;
 457};
 458
 459extern int my_prof(int slot);
 460//#define stb_prof my_prof
 461
 462#ifndef stb_prof
 463#define stb_prof(x)  0
 464#endif
 465
 466#if defined(STB_VORBIS_NO_PUSHDATA_API)
 467   #define IS_PUSH_MODE(f)   FALSE
 468#elif defined(STB_VORBIS_NO_PULLDATA_API)
 469   #define IS_PUSH_MODE(f)   TRUE
 470#else
 471   #define IS_PUSH_MODE(f)   ((f)->push_mode)
 472#endif
 473
 474typedef struct stb_vorbis vorb;
 475
 476static int error(vorb *f, enum STBVorbisError e)
 477{
 478   f->error = e;
 479   if (!f->eof && e != VORBIS_need_more_data) {
 480      f->error=e; // breakpoint for debugging
 481   }
 482   return 0;
 483}
 484
 485
 486// these functions are used for allocating temporary memory
 487// while decoding. if you can afford the stack space, use
 488// alloca(); otherwise, provide a temp buffer and it will
 489// allocate out of those.
 490
 491#define array_size_required(count,size)  (count*(sizeof(void *)+(size)))
 492
 493#define temp_alloc(f,size)              (f->alloc.alloc_buffer ? setup_temp_malloc(f,size) : alloca(size))
 494#ifdef dealloca
 495#define temp_free(f,p)                  (f->alloc.alloc_buffer ? 0 : dealloca(size))
 496#else
 497#define temp_free(f,p)                  0
 498#endif
 499#define temp_alloc_save(f)              ((f)->temp_offset)
 500#define temp_alloc_restore(f,p)         ((f)->temp_offset = (p))
 501
 502#define temp_block_array(f,count,size)  make_block_array(temp_alloc(f,array_size_required(count,size)), count, size)
 503
 504// given a sufficiently large block of memory, make an array of pointers to subblocks of it
 505static void *make_block_array(void *mem, int count, int size)
 506{
 507   int i;
 508   void ** p = (void **) mem;
 509   char *q = (char *) (p + count);
 510   for (i=0; i < count; ++i) {
 511      p[i] = q;
 512      q += size;
 513   }
 514   return p;
 515}
 516
 517static void *setup_malloc(vorb *f, int sz)
 518{
 519   sz = (sz+3) & ~3;
 520   f->setup_memory_required += sz;
 521   if (f->alloc.alloc_buffer) {
 522      void *p = (char *) f->alloc.alloc_buffer + f->setup_offset;
 523      if (f->setup_offset + sz > f->temp_offset) return NULL;
 524      f->setup_offset += sz;
 525      return p;
 526   }
 527   return sz ? malloc(sz) : NULL;
 528}
 529
 530static void setup_free(vorb *f, void *p)
 531{
 532   if (f->alloc.alloc_buffer) return; // do nothing; setup mem is not a stack
 533   free(p);
 534}
 535
 536static void *setup_temp_malloc(vorb *f, int sz)
 537{
 538   sz = (sz+3) & ~3;
 539   if (f->alloc.alloc_buffer) {
 540      if (f->temp_offset - sz < f->setup_offset) return NULL;
 541      f->temp_offset -= sz;
 542      return (char *) f->alloc.alloc_buffer + f->temp_offset;
 543   }
 544   return malloc(sz);
 545}
 546
 547static void setup_temp_free(vorb *f, void *p, size_t sz)
 548{
 549   if (f->alloc.alloc_buffer) {
 550      f->temp_offset += (sz+3)&~3;
 551      return;
 552   }
 553   free(p);
 554}
 555
 556#define CRC32_POLY    0x04c11db7   // from spec
 557
 558static uint32 crc_table[256];
 559static void crc32_init(void)
 560{
 561   int i,j;
 562   uint32 s;
 563   for(i=0; i < 256; i++) {
 564      for (s=i<<24, j=0; j < 8; ++j)
 565         s = (s << 1) ^ (s >= (1<<31) ? CRC32_POLY : 0);
 566      crc_table[i] = s;
 567   }
 568}
 569
 570static __forceinline uint32 crc32_update(uint32 crc, uint8 byte)
 571{
 572   return (crc << 8) ^ crc_table[byte ^ (crc >> 24)];
 573}
 574
 575
 576// used in setup, and for huffman that doesn't go fast path
 577static unsigned int bit_reverse(unsigned int n)
 578{
 579  n = ((n & 0xAAAAAAAA) >>  1) | ((n & 0x55555555) << 1);
 580  n = ((n & 0xCCCCCCCC) >>  2) | ((n & 0x33333333) << 2);
 581  n = ((n & 0xF0F0F0F0) >>  4) | ((n & 0x0F0F0F0F) << 4);
 582  n = ((n & 0xFF00FF00) >>  8) | ((n & 0x00FF00FF) << 8);
 583  return (n >> 16) | (n << 16);
 584}
 585
 586static float square(float x)
 587{
 588   return x*x;
 589}
 590
 591// this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
 592// as required by the specification. fast(?) implementation from stb.h
 593// @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
 594static int ilog(int32 n)
 595{
 596   static signed char log2_4[16] = { 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4 };
 597
 598   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
 599   if (n < (1U << 14))
 600        if (n < (1U <<  4))        return     0 + log2_4[n      ];
 601        else if (n < (1U <<  9))      return  5 + log2_4[n >>  5];
 602             else                     return 10 + log2_4[n >> 10];
 603   else if (n < (1U << 24))
 604             if (n < (1U << 19))      return 15 + log2_4[n >> 15];
 605             else                     return 20 + log2_4[n >> 20];
 606        else if (n < (1U << 29))      return 25 + log2_4[n >> 25];
 607             else if (n < (1U << 31)) return 30 + log2_4[n >> 30];
 608                  else                return 0; // signed n returns 0
 609}
 610
 611#ifndef M_PI
 612  #define M_PI  3.14159265358979323846264f  // from CRC
 613#endif
 614
 615// code length assigned to a value with no huffman encoding
 616#define NO_CODE   255
 617
 618/////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
 619//
 620// these functions are only called at setup, and only a few times
 621// per file
 622
 623static float float32_unpack(uint32 x)
 624{
 625   // from the specification
 626   uint32 mantissa = x & 0x1fffff;
 627   uint32 sign = x & 0x80000000;
 628   uint32 exp = (x & 0x7fe00000) >> 21;
 629   double res = sign ? -(double)mantissa : (double)mantissa;
 630   return (float) ldexp((float)res, exp-788);
 631}
 632
 633
 634// zlib & jpeg huffman tables assume that the output symbols
 635// can either be arbitrarily arranged, or have monotonically
 636// increasing frequencies--they rely on the lengths being sorted;
 637// this makes for a very simple generation algorithm.
 638// vorbis allows a huffman table with non-sorted lengths. This
 639// requires a more sophisticated construction, since symbols in
 640// order do not map to huffman codes "in order".
 641static void add_entry(Codebook *c, uint32 huff_code, int symbol, int count, int len, uint32 *values)
 642{
 643   if (!c->sparse) {
 644      c->codewords      [symbol] = huff_code;
 645   } else {
 646      c->codewords       [count] = huff_code;
 647      c->codeword_lengths[count] = len;
 648      values             [count] = symbol;
 649   }
 650}
 651
 652static int compute_codewords(Codebook *c, uint8 *len, int n, uint32 *values)
 653{
 654   int i,k,m=0;
 655   uint32 available[32];
 656
 657   memset(available, 0, sizeof(available));
 658   // find the first entry
 659   for (k=0; k < n; ++k) if (len[k] < NO_CODE) break;
 660   if (k == n) { assert(c->sorted_entries == 0); return TRUE; }
 661   // add to the list
 662   add_entry(c, 0, k, m++, len[k], values);
 663   // add all available leaves
 664   for (i=1; i <= len[k]; ++i)
 665      available[i] = 1 << (32-i);
 666   // note that the above code treats the first case specially,
 667   // but it's really the same as the following code, so they
 668   // could probably be combined (except the initial code is 0,
 669   // and I use 0 in available[] to mean 'empty')
 670   for (i=k+1; i < n; ++i) {
 671      uint32 res;
 672      int z = len[i], y;
 673      if (z == NO_CODE) continue;
 674      // find lowest available leaf (should always be earliest,
 675      // which is what the specification calls for)
 676      // note that this property, and the fact we can never have
 677      // more than one free leaf at a given level, isn't totally
 678      // trivial to prove, but it seems true and the assert never
 679      // fires, so!
 680      while (z > 0 && !available[z]) --z;
 681      if (z == 0) { assert(0); return FALSE; }
 682      res = available[z];
 683      available[z] = 0;
 684      add_entry(c, bit_reverse(res), i, m++, len[i], values);
 685      // propogate availability up the tree
 686      if (z != len[i]) {
 687         for (y=len[i]; y > z; --y) {
 688            assert(available[y] == 0);
 689            available[y] = res + (1 << (32-y));
 690         }
 691      }
 692   }
 693   return TRUE;
 694}
 695
 696// accelerated huffman table allows fast O(1) match of all symbols
 697// of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
 698static void compute_accelerated_huffman(Codebook *c)
 699{
 700   int i, len;
 701   for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i)
 702      c->fast_huffman[i] = -1;
 703
 704   len = c->sparse ? c->sorted_entries : c->entries;
 705   #ifdef STB_VORBIS_FAST_HUFFMAN_SHORT
 706   if (len > 32767) len = 32767; // largest possible value we can encode!
 707   #endif
 708   for (i=0; i < len; ++i) {
 709      if (c->codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
 710         uint32 z = c->sparse ? bit_reverse(c->sorted_codewords[i]) : c->codewords[i];
 711         // set table entries for all bit combinations in the higher bits
 712         while (z < FAST_HUFFMAN_TABLE_SIZE) {
 713             c->fast_huffman[z] = i;
 714             z += 1 << c->codeword_lengths[i];
 715         }
 716      }
 717   }
 718}
 719
 720static int uint32_compare(const void *p, const void *q)
 721{
 722   uint32 x = * (uint32 *) p;
 723   uint32 y = * (uint32 *) q;
 724   return x < y ? -1 : x > y;
 725}
 726
 727static int include_in_sort(Codebook *c, uint8 len)
 728{
 729   if (c->sparse) { assert(len != NO_CODE); return TRUE; }
 730   if (len == NO_CODE) return FALSE;
 731   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return TRUE;
 732   return FALSE;
 733}
 734
 735// if the fast table above doesn't work, we want to binary
 736// search them... need to reverse the bits
 737static void compute_sorted_huffman(Codebook *c, uint8 *lengths, uint32 *values)
 738{
 739   int i, len;
 740   // build a list of all the entries
 741   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
 742   // this is kind of a frivolous optimization--I don't see any performance improvement,
 743   // but it's like 4 extra lines of code, so.
 744   if (!c->sparse) {
 745      int k = 0;
 746      for (i=0; i < c->entries; ++i)
 747         if (include_in_sort(c, lengths[i])) 
 748            c->sorted_codewords[k++] = bit_reverse(c->codewords[i]);
 749      assert(k == c->sorted_entries);
 750   } else {
 751      for (i=0; i < c->sorted_entries; ++i)
 752         c->sorted_codewords[i] = bit_reverse(c->codewords[i]);
 753   }
 754
 755   qsort(c->sorted_codewords, c->sorted_entries, sizeof(c->sorted_codewords[0]), uint32_compare);
 756   c->sorted_codewords[c->sorted_entries] = 0xffffffff;
 757
 758   len = c->sparse ? c->sorted_entries : c->entries;
 759   // now we need to indicate how they correspond; we could either
 760   //   #1: sort a different data structure that says who they correspond to
 761   //   #2: for each sorted entry, search the original list to find who corresponds
 762   //   #3: for each original entry, find the sorted entry
 763   // #1 requires extra storage, #2 is slow, #3 can use binary search!
 764   for (i=0; i < len; ++i) {
 765      int huff_len = c->sparse ? lengths[values[i]] : lengths[i];
 766      if (include_in_sort(c,huff_len)) {
 767         uint32 code = bit_reverse(c->codewords[i]);
 768         int x=0, n=c->sorted_entries;
 769         while (n > 1) {
 770            // invariant: sc[x] <= code < sc[x+n]
 771            int m = x + (n >> 1);
 772            if (c->sorted_codewords[m] <= code) {
 773               x = m;
 774               n -= (n>>1);
 775            } else {
 776               n >>= 1;
 777            }
 778         }
 779         assert(c->sorted_codewords[x] == code);
 780         if (c->sparse) {
 781            c->sorted_values[x] = values[i];
 782            c->codeword_lengths[x] = huff_len;
 783         } else {
 784            c->sorted_values[x] = i;
 785         }
 786      }
 787   }
 788}
 789
 790// only run while parsing the header (3 times)
 791static int vorbis_validate(uint8 *data)
 792{
 793   static uint8 vorbis[6] = { 'v', 'o', 'r', 'b', 'i', 's' };
 794   return memcmp(data, vorbis, 6) == 0;
 795}
 796
 797// called from setup only, once per code book
 798// (formula implied by specification)
 799static int lookup1_values(int entries, int dim)
 800{
 801   int r = (int) floor(exp((float) log((float) entries) / dim));
 802   if ((int) floor(pow((float) r+1, dim)) <= entries)   // (int) cast for MinGW warning;
 803      ++r;                                              // floor() to avoid _ftol() when non-CRT
 804   assert(pow((float) r+1, dim) > entries);
 805   assert((int) floor(pow((float) r, dim)) <= entries); // (int),floor() as above
 806   return r;
 807}
 808
 809// called twice per file
 810static void compute_twiddle_factors(int n, float *A, float *B, float *C)
 811{
 812   int n4 = n >> 2, n8 = n >> 3;
 813   int k,k2;
 814
 815   for (k=k2=0; k < n4; ++k,k2+=2) {
 816      A[k2  ] = (float)  cos(4*k*M_PI/n);
 817      A[k2+1] = (float) -sin(4*k*M_PI/n);
 818      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2) * 0.5f;
 819      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2) * 0.5f;
 820   }
 821   for (k=k2=0; k < n8; ++k,k2+=2) {
 822      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
 823      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
 824   }
 825}
 826
 827static void compute_window(int n, float *window)
 828{
 829   int n2 = n >> 1, i;
 830   for (i=0; i < n2; ++i)
 831      window[i] = (float) sin(0.5 * M_PI * square((float) sin((i - 0 + 0.5) / n2 * 0.5 * M_PI)));
 832}
 833
 834static void compute_bitreverse(int n, uint16 *rev)
 835{
 836   int ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
 837   int i, n8 = n >> 3;
 838   for (i=0; i < n8; ++i)
 839      rev[i] = (bit_reverse(i) >> (32-ld+3)) << 2;
 840}
 841
 842static int init_blocksize(vorb *f, int b, int n)
 843{
 844   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3;
 845   f->A[b] = (float *) setup_malloc(f, sizeof(float) * n2);
 846   f->B[b] = (float *) setup_malloc(f, sizeof(float) * n2);
 847   f->C[b] = (float *) setup_malloc(f, sizeof(float) * n4);
 848   if (!f->A[b] || !f->B[b] || !f->C[b]) return error(f, VORBIS_outofmem);
 849   compute_twiddle_factors(n, f->A[b], f->B[b], f->C[b]);
 850   f->window[b] = (float *) setup_malloc(f, sizeof(float) * n2);
 851   if (!f->window[b]) return error(f, VORBIS_outofmem);
 852   compute_window(n, f->window[b]);
 853   f->bit_reverse[b] = (uint16 *) setup_malloc(f, sizeof(uint16) * n8);
 854   if (!f->bit_reverse[b]) return error(f, VORBIS_outofmem);
 855   compute_bitreverse(n, f->bit_reverse[b]);
 856   return TRUE;
 857}
 858
 859static void neighbors(uint16 *x, int n, int *plow, int *phigh)
 860{
 861   int low = -1;
 862   int high = 65536;
 863   int i;
 864   for (i=0; i < n; ++i) {
 865      if (x[i] > low  && x[i] < x[n]) { *plow  = i; low = x[i]; }
 866      if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
 867   }
 868}
 869
 870// this has been repurposed so y is now the original index instead of y
 871typedef struct
 872{
 873   uint16 x,y;
 874} Point;
 875
 876int point_compare(const void *p, const void *q)
 877{
 878   Point *a = (Point *) p;
 879   Point *b = (Point *) q;
 880   return a->x < b->x ? -1 : a->x > b->x;
 881}
 882
 883//
 884/////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
 885
 886
 887#if defined(STB_VORBIS_NO_STDIO)
 888   #define USE_MEMORY(z)    TRUE
 889#else
 890   #define USE_MEMORY(z)    ((z)->stream)
 891#endif
 892#ifdef STB_VORBIS_USE_CALLBACKS
 893
 894#define USE_CALLBACKS(z)  ((z)->data_callback)
 895
 896int stb_read_from_callback(vorb* z, int size, uint8* ptr)
 897{
 898	int read = z->data_callback(size,ptr,z->user_data);
 899	if(read < 1 && size > 0)
 900		z->eof = 1;
 901	else
 902		z->cb_offset+=read;
 903	return read;
 904}	
 905
 906int stb_reset_callback(vorb* z)
 907{
 908	int result = z->reset_callback(z->user_data);
 909	if(result == -1)
 910		z->eof = 1;
 911	else
 912	{	
 913		z->cb_offset = 0;
 914		z->eof = 0;
 915	}
 916	return result;
 917}
 918
 919#endif 
 920
 921static uint8 get8(vorb *z)
 922{
 923   if (USE_MEMORY(z)) {
 924      if (z->stream >= z->stream_end) { z->eof = TRUE; return 0; }
 925      return *z->stream++;
 926   }
 927
 928#ifdef STB_VORBIS_USE_CALLBACKS
 929   if(USE_CALLBACKS(z))
 930   {
 931		uint8 data;
 932		int read = stb_read_from_callback(z,1,&data);
 933		if(z->eof)
 934			return 0;
 935		else
 936			return data;
 937   }
 938#endif
 939   
 940   #ifndef STB_VORBIS_NO_STDIO
 941   {
 942   int c = fgetc(z->f);
 943   if (c == EOF) { z->eof = TRUE; return 0; }
 944   return c;
 945   }
 946   #endif
 947}
 948
 949static uint32 get32(vorb *f)
 950{
 951   uint32 x;
 952   x = get8(f);
 953   x += get8(f) << 8;
 954   x += get8(f) << 16;
 955   x += get8(f) << 24;
 956   return x;
 957}
 958
 959static int getn(vorb *z, uint8 *data, int n)
 960{
 961   if (USE_MEMORY(z)) {
 962      if (z->stream+n > z->stream_end) { z->eof = 1; return 0; }
 963      memcpy(data, z->stream, n);
 964      z->stream += n;
 965      return 1;
 966   }
 967
 968#ifdef STB_VORBIS_USE_CALLBACKS
 969   if(USE_CALLBACKS(z))
 970   {
 971		int read = stb_read_from_callback(z,n,data);
 972		if(read < n)
 973		{
 974			z->eof = 1;
 975			return 0;
 976		}
 977		else
 978			return 1;
 979   }
 980#endif
 981
 982   #ifndef STB_VORBIS_NO_STDIO   
 983   if (fread(data, n, 1, z->f) == 1)
 984      return 1;
 985   else {
 986      z->eof = 1;
 987      return 0;
 988   }
 989   #endif
 990}
 991
 992static void skip(vorb *z, int n)
 993{
 994   if (USE_MEMORY(z)) {
 995      z->stream += n;
 996      if (z->stream >= z->stream_end) z->eof = 1;
 997      return;
 998   }
 999#ifdef STB_VORBIS_USE_CALLBACKS
1000 if(USE_CALLBACKS(z))
1001   {
1002		int read = stb_read_from_callback(z,n,NULL);
1003		if(read < n)
1004			z->eof = 1;
1005		return;
1006   }
1007#endif
1008   
1009
1010   #ifndef STB_VORBIS_NO_STDIO
1011   {
1012      long x = ftell(z->f);
1013      fseek(z->f, x+n, SEEK_SET);
1014   }
1015   #endif
1016}
1017
1018static int set_file_offset(stb_vorbis *f, unsigned int loc)
1019{
1020   #ifndef STB_VORBIS_NO_PUSHDATA_API
1021   if (f->push_mode) return 0;
1022   #endif
1023   f->eof = 0;
1024   if (USE_MEMORY(f)) {
1025      if (f->stream_start + loc >= f->stream_end || f->stream_start + loc < f->stream_start) {
1026         f->stream = f->stream_end;
1027         f->eof = 1;
1028         return 0;
1029      } else {
1030         f->stream = f->stream_start + loc;
1031         return 1;
1032      }
1033   }
1034
1035#ifdef STB_VORBIS_USE_CALLBACKS
1036	if(USE_CALLBACKS(f))
1037	{
1038		int read = stb_reset_callback(f);
1039		if(read < 0)
1040		{
1041			f->eof = 1;
1042			return 0;
1043		}
1044		read = stb_read_from_callback(f,loc,NULL);
1045		if(read < loc)
1046		{
1047			f->eof = 1;
1048			return 0;
1049		}
1050		return 1;
1051	}
1052#endif
1053   
1054   #ifndef STB_VORBIS_NO_STDIO
1055   if (loc + f->f_start < loc || loc >= 0x80000000) {
1056      loc = 0x7fffffff;
1057      f->eof = 1;
1058   } else {
1059      loc += f->f_start;
1060   }
1061   if (!fseek(f->f, loc, SEEK_SET))
1062      return 1;
1063   f->eof = 1;
1064   fseek(f->f, f->f_start, SEEK_END);
1065   return 0;
1066   #endif
1067}
1068
1069
1070static uint8 ogg_page_header[4] = { 0x4f, 0x67, 0x67, 0x53 };
1071
1072static int capture_pattern(vorb *f)
1073{
1074   if (0x4f != get8(f)) return FALSE;
1075   if (0x67 != get8(f)) return FALSE;
1076   if (0x67 != get8(f)) return FALSE;
1077   if (0x53 != get8(f)) return FALSE;
1078   return TRUE;
1079}
1080
1081#define PAGEFLAG_continued_packet   1
1082#define PAGEFLAG_first_page         2
1083#define PAGEFLAG_last_page          4
1084
1085static int start_page_no_capturepattern(vorb *f)
1086{
1087   uint32 loc0,loc1,n,i;
1088   // stream structure version
1089   if (0 != get8(f)) return error(f, VORBIS_invalid_stream_structure_version);
1090   // header flag
1091   f->page_flag = get8(f);
1092   // absolute granule position
1093   loc0 = get32(f); 
1094   loc1 = get32(f);
1095   // @TODO: validate loc0,loc1 as valid positions?
1096   // stream serial number -- vorbis doesn't interleave, so discard
1097   get32(f);
1098   //if (f->serial != get32(f)) return error(f, VORBIS_incorrect_stream_serial_number);
1099   // page sequence number
1100   n = get32(f);
1101   f->last_page = n;
1102   // CRC32
1103   get32(f);
1104   // page_segments
1105   f->segment_count = get8(f);
1106   if (!getn(f, f->segments, f->segment_count))
1107      return error(f, VORBIS_unexpected_eof);
1108   // assume we _don't_ know any the sample position of any segments
1109   f->end_seg_with_known_loc = -2;
1110   if (loc0 != ~0 || loc1 != ~0) {
1111      // determine which packet is the last one that will complete
1112      for (i=f->segment_count-1; i >= 0; --i)
1113         if (f->segments[i] < 255)
1114            break;
1115      // 'i' is now the index of the _last_ segment of a packet that ends
1116      if (i >= 0) {
1117         f->end_seg_with_known_loc = i;
1118         f->known_loc_for_packet   = loc0;
1119      }
1120   }
1121   if (f->first_decode) {
1122      int i,len;
1123      ProbedPage p;
1124      len = 0;
1125      for (i=0; i < f->segment_count; ++i)
1126         len += f->segments[i];
1127      len += 27 + f->segment_count;
1128      p.page_start = f->first_audio_page_offset;
1129      p.page_end = p.page_start + len;
1130      p.after_previous_page_start = p.page_start;
1131      p.first_decoded_sample = 0;
1132      p.last_decoded_sample = loc0;
1133      f->p_first = p;
1134   }
1135   f->next_seg = 0;
1136   return TRUE;
1137}
1138
1139static int start_page(vorb *f)
1140{
1141   if (!capture_pattern(f)) return error(f, VORBIS_missing_capture_pattern);
1142   return start_page_no_capturepattern(f);
1143}
1144
1145static int start_packet(vorb *f)
1146{
1147   while (f->next_seg == -1) {
1148      if (!start_page(f)) return FALSE;
1149      if (f->page_flag & PAGEFLAG_continued_packet)
1150         return error(f, VORBIS_continued_packet_flag_invalid);
1151   }
1152   f->last_seg = FALSE;
1153   f->valid_bits = 0;
1154   f->packet_bytes = 0;
1155   f->bytes_in_seg = 0;
1156   // f->next_seg is now valid
1157   return TRUE;
1158}
1159
1160static int maybe_start_packet(vorb *f)
1161{
1162   if (f->next_seg == -1) {
1163      int x = get8(f);
1164      if (f->eof) return FALSE; // EOF at page boundary is not an error!
1165      if (0x4f != x      ) return error(f, VORBIS_missing_capture_pattern);
1166      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1167      if (0x67 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1168      if (0x53 != get8(f)) return error(f, VORBIS_missing_capture_pattern);
1169      if (!start_page_no_capturepattern(f)) return FALSE;
1170      if (f->page_flag & PAGEFLAG_continued_packet) {
1171         // set up enough state that we can read this packet if we want,
1172         // e.g. during recovery
1173         f->last_seg = FALSE;
1174         f->bytes_in_seg = 0;
1175         return error(f, VORBIS_continued_packet_flag_invalid);
1176      }
1177   }
1178   return start_packet(f);
1179}
1180
1181static int next_segment(vorb *f)
1182{
1183   int len;
1184   if (f->last_seg) return 0;
1185   if (f->next_seg == -1) {
1186      f->last_seg_which = f->segment_count-1; // in case start_page fails
1187      if (!start_page(f)) { f->last_seg = 1; return 0; }
1188      if (!(f->page_flag & PAGEFLAG_continued_packet)) return error(f, VORBIS_continued_packet_flag_invalid);
1189   }
1190   len = f->segments[f->next_seg++];
1191   if (len < 255) {
1192      f->last_seg = TRUE;
1193      f->last_seg_which = f->next_seg-1;
1194   }
1195   if (f->next_seg >= f->segment_count)
1196      f->next_seg = -1;
1197   assert(f->bytes_in_seg == 0);
1198   f->bytes_in_seg = len;
1199   return len;
1200}
1201
1202#define EOP    (-1)
1203#define INVALID_BITS  (-1)
1204
1205static int get8_packet_raw(vorb *f)
1206{
1207   if (!f->bytes_in_seg)
1208      if (f->last_seg) return EOP;
1209      else if (!next_segment(f)) return EOP;
1210   assert(f->bytes_in_seg > 0);
1211   --f->bytes_in_seg;
1212   ++f->packet_bytes;
1213   return get8(f);
1214}
1215
1216static int get8_packet(vorb *f)
1217{
1218   int x = get8_packet_raw(f);
1219   f->valid_bits = 0;
1220   return x;
1221}
1222
1223static void flush_packet(vorb *f)
1224{
1225   while (get8_packet_raw(f) != EOP);
1226}
1227
1228// @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1229// as the huffman decoder?
1230static uint32 get_bits(vorb *f, int n)
1231{
1232   uint32 z;
1233
1234   if (f->valid_bits < 0) return 0;
1235   if (f->valid_bits < n) {
1236      if (n > 24) {
1237         // the accumulator technique below would not work correctly in this case
1238         z = get_bits(f, 24);
1239         z += get_bits(f, n-24) << 24;
1240         return z;
1241      }
1242      if (f->valid_bits == 0) f->acc = 0;
1243      while (f->valid_bits < n) {
1244         int z = get8_packet_raw(f);
1245         if (z == EOP) {
1246            f->valid_bits = INVALID_BITS;
1247            return 0;
1248         }
1249         f->acc += z << f->valid_bits;
1250         f->valid_bits += 8;
1251      }
1252   }
1253   if (f->valid_bits < 0) return 0;
1254   z = f->acc & ((1 << n)-1);
1255   f->acc >>= n;
1256   f->valid_bits -= n;
1257   return z;
1258}
1259
1260static int32 get_bits_signed(vorb *f, int n)
1261{
1262   uint32 z = get_bits(f, n);
1263   if (z & (1 << (n-1)))
1264      z += ~((1 << n) - 1);
1265   return (int32) z;
1266}
1267
1268// @OPTIMIZE: primary accumulator for huffman
1269// expand the buffer to as many bits as possible without reading off end of packet
1270// it might be nice to allow f->valid_bits and f->acc to be stored in registers,
1271// e.g. cache them locally and decode locally
1272static __forceinline void prep_huffman(vorb *f)
1273{
1274   if (f->valid_bits <= 24) {
1275      if (f->valid_bits == 0) f->acc = 0;
1276      do {
1277         int z;
1278         if (f->last_seg && !f->bytes_in_seg) return;
1279         z = get8_packet_raw(f);
1280         if (z == EOP) return;
1281         f->acc += z << f->valid_bits;
1282         f->valid_bits += 8;
1283      } while (f->valid_bits <= 24);
1284   }
1285}
1286
1287enum
1288{
1289   VORBIS_packet_id = 1,
1290   VORBIS_packet_comment = 3,
1291   VORBIS_packet_setup = 5,
1292};
1293
1294static int codebook_decode_scalar_raw(vorb *f, Codebook *c)
1295{
1296   int i;
1297   prep_huffman(f);
1298
1299   assert(c->sorted_codewords || c->codewords);
1300   // cases to use binary search: sorted_codewords && !c->codewords
1301   //                             sorted_codewords && c->entries > 8
1302   if (c->entries > 8 ? c->sorted_codewords!=NULL : !c->codewords) {
1303      // binary search
1304      uint32 code = bit_reverse(f->acc);
1305      int x=0, n=c->sorted_entries, len;
1306
1307      while (n > 1) {
1308         // invariant: sc[x] <= code < sc[x+n]
1309         int m = x + (n >> 1);
1310         if (c->sorted_codewords[m] <= code) {
1311            x = m;
1312            n -= (n>>1);
1313         } else {
1314            n >>= 1;
1315         }
1316      }
1317      // x is now the sorted index
1318      if (!c->sparse) x = c->sorted_values[x];
1319      // x is now sorted index if sparse, or symbol otherwise
1320      len = c->codeword_lengths[x];
1321      if (f->valid_bits >= len) {
1322         f->acc >>= len;
1323         f->valid_bits -= len;
1324         return x;
1325      }
1326
1327      f->valid_bits = 0;
1328      return -1;
1329   }
1330
1331   // if small, linear search
1332   assert(!c->sparse);
1333   for (i=0; i < c->entries; ++i) {
1334      if (c->codeword_lengths[i] == NO_CODE) continue;
1335      if (c->codewords[i] == (f->acc & ((1 << c->codeword_lengths[i])-1))) {
1336         if (f->valid_bits >= c->codeword_lengths[i]) {
1337            f->acc >>= c->codeword_lengths[i];
1338            f->valid_bits -= c->codeword_lengths[i];
1339            return i;
1340         }
1341         f->valid_bits = 0;
1342         return -1;
1343      }
1344   }
1345
1346   error(f, VORBIS_invalid_stream);
1347   f->valid_bits = 0;
1348   return -1;
1349}
1350
1351static int codebook_decode_scalar(vorb *f, Codebook *c)
1352{
1353   int i;
1354   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)
1355      prep_huffman(f);
1356   // fast huffman table lookup
1357   i = f->acc & FAST_HUFFMAN_TABLE_MASK;
1358   i = c->fast_huffman[i];
1359   if (i >= 0) {
1360      f->acc >>= c->codeword_lengths[i];
1361      f->valid_bits -= c->codeword_lengths[i];
1362      if (f->valid_bits < 0) { f->valid_bits = 0; return -1; }
1363      return i;
1364   }
1365   return codebook_decode_scalar_raw(f,c);
1366}
1367
1368#ifndef STB_VORBIS_NO_INLINE_DECODE
1369
1370#define DECODE_RAW(var, f,c)                                  \
1371   if (f->valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH)        \
1372      prep_huffman(f);                                        \
1373   var = f->acc & FAST_HUFFMAN_TABLE_MASK;                    \
1374   var = c->fast_huffman[var];                                \
1375   if (var >= 0) {                                            \
1376      int n = c->codeword_lengths[var];                       \
1377      f->acc >>= n;                                           \
1378      f->valid_bits -= n;                                     \
1379      if (f->valid_bits < 0) { f->valid_bits = 0; var = -1; } \
1380   } else {                                                   \
1381      var = codebook_decode_scalar_raw(f,c);                  \
1382   }
1383
1384#else
1385
1386#define DECODE_RAW(var,f,c)    var = codebook_decode_scalar(f,c);
1387
1388#endif
1389
1390#define DECODE(var,f,c)                                       \
1391   DECODE_RAW(var,f,c)                                        \
1392   if (c->sparse) var = c->sorted_values[var];
1393
1394#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1395  #define DECODE_VQ(var,f,c)   DECODE_RAW(var,f,c)
1396#else
1397  #define DECODE_VQ(var,f,c)   DECODE(var,f,c)
1398#endif
1399
1400
1401
1402
1403
1404
1405// CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1406// where we avoid one addition
1407#ifndef STB_VORBIS_CODEBOOK_FLOATS
1408   #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off] * c->delta_value + c->minimum_value)
1409   #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off] * c->delta_value)
1410   #define CODEBOOK_ELEMENT_BASE(c)         (c->minimum_value)
1411#else
1412   #define CODEBOOK_ELEMENT(c,off)          (c->multiplicands[off])
1413   #define CODEBOOK_ELEMENT_FAST(c,off)     (c->multiplicands[off])
1414   #define CODEBOOK_ELEMENT_BASE(c)         (0)
1415#endif
1416
1417static int codebook_decode_start(vorb *f, Codebook *c, int len)
1418{
1419   int z = -1;
1420
1421   // type 0 is only legal in a scalar context
1422   if (c->lookup_type == 0)
1423      error(f, VORBIS_invalid_stream);
1424   else {
1425      DECODE_VQ(z,f,c);
1426      if (c->sparse) assert(z < c->sorted_entries);
1427      if (z < 0) {  // check for EOP
1428         if (!f->bytes_in_seg)
1429            if (f->last_seg)
1430               return z;
1431         error(f, VORBIS_invalid_stream);
1432      }
1433   }
1434   return z;
1435}
1436
1437static int codebook_decode(vorb *f, Codebook *c, float *output, int len)
1438{
1439   int i,z = codebook_decode_start(f,c,len);
1440   if (z < 0) return FALSE;
1441   if (len > c->dimensions) len = c->dimensions;
1442
1443#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1444   if (c->lookup_type == 1) {
1445      float last = CODEBOOK_ELEMENT_BASE(c);
1446      int div = 1;
1447      for (i=0; i < len; ++i) {
1448         int off = (z / div) % c->lookup_values;
1449         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1450         output[i] += val;
1451         if (c->sequence_p) last = val + c->minimum_value;
1452         div *= c->lookup_values;
1453      }
1454      return TRUE;
1455   }
1456#endif
1457
1458   z *= c->dimensions;
1459   if (c->sequence_p) {
1460      float last = CODEBOOK_ELEMENT_BASE(c);
1461      for (i=0; i < len; ++i) {
1462         float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1463         output[i] += val;
1464         last = val + c->minimum_value;
1465      }
1466   } else {
1467      float last = CODEBOOK_ELEMENT_BASE(c);
1468      for (i=0; i < len; ++i) {
1469         output[i] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1470      }
1471   }
1472
1473   return TRUE;
1474}
1475
1476static int codebook_decode_step(vorb *f, Codebook *c, float *output, int len, int step)
1477{
1478   int i,z = codebook_decode_start(f,c,len);
1479   float last = CODEBOOK_ELEMENT_BASE(c);
1480   if (z < 0) return FALSE;
1481   if (len > c->dimensions) len = c->dimensions;
1482
1483#ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1484   if (c->lookup_type == 1) {
1485      int div = 1;
1486      for (i=0; i < len; ++i) {
1487         int off = (z / div) % c->lookup_values;
1488         float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1489         output[i*step] += val;
1490         if (c->sequence_p) last = val;
1491         div *= c->lookup_values;
1492      }
1493      return TRUE;
1494   }
1495#endif
1496
1497   z *= c->dimensions;
1498   for (i=0; i < len; ++i) {
1499      float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1500      output[i*step] += val;
1501      if (c->sequence_p) last = val;
1502   }
1503
1504   return TRUE;
1505}
1506
1507static int codebook_decode_deinterleave_repeat(vorb *f, Codebook *c, float **outputs, int ch, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1508{
1509   int c_inter = *c_inter_p;
1510   int p_inter = *p_inter_p;
1511   int i,z, effective = c->dimensions;
1512
1513   // type 0 is only legal in a scalar context
1514   if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1515
1516   while (total_decode > 0) {
1517      float last = CODEBOOK_ELEMENT_BASE(c);
1518      DECODE_VQ(z,f,c);
1519      #ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1520      assert(!c->sparse || z < c->sorted_entries);
1521      #endif
1522      if (z < 0) {
1523         if (!f->bytes_in_seg)
1524            if (f->last_seg) return FALSE;
1525         return error(f, VORBIS_invalid_stream);
1526      }
1527
1528      // if this will take us off the end of the buffers, stop short!
1529      // we check by computing the length of the virtual interleaved
1530      // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1531      // and the length we'll be using (effective)
1532      if (c_inter + p_inter*ch + effective > len * ch) {
1533         effective = len*ch - (p_inter*ch - c_inter);
1534      }
1535
1536   #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1537      if (c->lookup_type == 1) {
1538         int div = 1;
1539         for (i=0; i < effective; ++i) {
1540            int off = (z / div) % c->lookup_values;
1541            float val = CODEBOOK_ELEMENT_FAST(c,off) + last;
1542            outputs[c_inter][p_inter] += val;
1543            if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1544            if (c->sequence_p) last = val;
1545            div *= c->lookup_values;
1546         }
1547      } else
1548   #endif
1549      {
1550         z *= c->dimensions;
1551         if (c->sequence_p) {
1552            for (i=0; i < effective; ++i) {
1553               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1554               outputs[c_inter][p_inter] += val;
1555               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1556               last = val;
1557            }
1558         } else {
1559            for (i=0; i < effective; ++i) {
1560               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1561               outputs[c_inter][p_inter] += val;
1562               if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1563            }
1564         }
1565      }
1566
1567      total_decode -= effective;
1568   }
1569   *c_inter_p = c_inter;
1570   *p_inter_p = p_inter;
1571   return TRUE;
1572}
1573
1574#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
1575static int codebook_decode_deinterleave_repeat_2(vorb *f, Codebook *c, float **outputs, int *c_inter_p, int *p_inter_p, int len, int total_decode)
1576{
1577   int c_inter = *c_inter_p;
1578   int p_inter = *p_inter_p;
1579   int i,z, effective = c->dimensions;
1580
1581   // type 0 is only legal in a scalar context
1582   if (c->lookup_type == 0)   return error(f, VORBIS_invalid_stream);
1583
1584   while (total_decode > 0) {
1585      float last = CODEBOOK_ELEMENT_BASE(c);
1586      DECODE_VQ(z,f,c);
1587
1588      if (z < 0) {
1589         if (!f->bytes_in_seg)
1590            if (f->last_seg) return FALSE;
1591         return error(f, VORBIS_invalid_stream);
1592      }
1593
1594      // if this will take us off the end of the buffers, stop short!
1595      // we check by computing the length of the virtual interleaved
1596      // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1597      // and the length we'll be using (effective)
1598      if (c_inter + p_inter*2 + effective > len * 2) {
1599         effective = len*2 - (p_inter*2 - c_inter);
1600      }
1601
1602      {
1603         z *= c->dimensions;
1604         stb_prof(11);
1605         if (c->sequence_p) {
1606            // haven't optimized this case because I don't have any examples
1607            for (i=0; i < effective; ++i) {
1608               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1609               outputs[c_inter][p_inter] += val;
1610               if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1611               last = val;
1612            }
1613         } else {
1614            i=0;
1615            if (c_inter == 1) {
1616               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1617               outputs[c_inter][p_inter] += val;
1618               c_inter = 0; ++p_inter;
1619               ++i;
1620            }
1621            {
1622               float *z0 = outputs[0];
1623               float *z1 = outputs[1];
1624               for (; i+1 < effective;) {
1625                  z0[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1626                  z1[p_inter] += CODEBOOK_ELEMENT_FAST(c,z+i+1) + last;
1627                  ++p_inter;
1628                  i += 2;
1629               }
1630            }
1631            if (i < effective) {
1632               float val = CODEBOOK_ELEMENT_FAST(c,z+i) + last;
1633               outputs[c_inter][p_inter] += val;
1634               if (++c_inter == 2) { c_inter = 0; ++p_inter; }
1635            }
1636         }
1637      }
1638
1639      total_decode -= effective;
1640   }
1641   *c_inter_p = c_inter;
1642   *p_inter_p = p_inter;
1643   return TRUE;
1644}
1645#endif
1646
1647static int predict_point(int x, int x0, int x1, int y0, int y1)
1648{
1649   int dy = y1 - y0;
1650   int adx = x1 - x0;
1651   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1652   int err = abs(dy) * (x - x0);
1653   int off = err / adx;
1654   return dy < 0 ? y0 - off : y0 + off;
1655}
1656
1657// the following table is block-copied from the specification
1658static float inverse_db_table[256] =
1659{
1660  1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f, 
1661  1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f, 
1662  1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f, 
1663  2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f, 
1664  2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f, 
1665  3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f, 
1666  4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f, 
1667  6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f, 
1668  7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f, 
1669  1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f, 
1670  1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f, 
1671  1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f, 
1672  2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f, 
1673  2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f, 
1674  3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f, 
1675  4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f, 
1676  5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f, 
1677  7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f, 
1678  9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f, 
1679  1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f, 
1680  1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f, 
1681  2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f, 
1682  2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f, 
1683  3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f, 
1684  4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f, 
1685  5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f, 
1686  7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f, 
1687  9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f, 
1688  0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f, 
1689  0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f, 
1690  0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f, 
1691  0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f, 
1692  0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f, 
1693  0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f, 
1694  0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f, 
1695  0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f, 
1696  0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f, 
1697  0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f, 
1698  0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f, 
1699  0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f, 
1700  0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f, 
1701  0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f, 
1702  0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f, 
1703  0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f, 
1704  0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f, 
1705  0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f, 
1706  0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f, 
1707  0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f, 
1708  0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f, 
1709  0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f, 
1710  0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f, 
1711  0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f, 
1712  0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f, 
1713  0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f, 
1714  0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f, 
1715  0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f, 
1716  0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f, 
1717  0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f, 
1718  0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f, 
1719  0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f, 
1720  0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f, 
1721  0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f, 
1722  0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f, 
1723  0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1724};
1725
1726
1727// @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1728// note that you must produce bit-identical output to decode correctly;
1729// this specific sequence of operations is specified in the spec (it's
1730// drawing integer-quantized frequency-space lines that the encoder
1731// expects to be exactly the same)
1732//     ... also, isn't the whole point of Bresenham's algorithm to NOT
1733// have to divide in the setup? sigh.
1734#ifndef STB_VORBIS_NO_DEFER_FLOOR
1735#define LINE_OP(a,b)   a *= b
1736#else
1737#define LINE_OP(a,b)   a = b
1738#endif
1739
1740#ifdef STB_VORBIS_DIVIDE_TABLE
1741#define DIVTAB_NUMER   32
1742#define DIVTAB_DENOM   64
1743int8 integer_divide_table[DIVTAB_NUMER][DIVTAB_DENOM]; // 2KB
1744#endif
1745
1746static __forceinline void draw_line(float *output, int x0, int y0, int x1, int y1, int n)
1747{
1748   int dy = y1 - y0;
1749   int adx = x1 - x0;
1750   int ady = abs(dy);
1751   int base;
1752   int x=x0,y=y0;
1753   int err = 0;
1754   int sy;
1755
1756#ifdef STB_VORBIS_DIVIDE_TABLE
1757   if (adx < DIVTAB_DENOM && ady < DIVTAB_NUMER) {
1758      if (dy < 0) {
1759         base = -integer_divide_table[ady][adx];
1760         sy = base-1;
1761      } else {
1762         base =  integer_divide_table[ady][adx];
1763         sy = base+1;
1764      }
1765   } else {
1766      base = dy / adx;
1767      if (dy < 0)
1768         sy = base - 1;
1769      else
1770         sy = base+1;
1771   }
1772#else
1773   base = dy / adx;
1774   if (dy < 0)
1775      sy = base - 1;
1776   else
1777      sy = base+1;
1778#endif
1779   ady -= abs(base) * adx;
1780   if (x1 > n) x1 = n;
1781   LINE_OP(output[x], inverse_db_table[y]);
1782   for (++x; x < x1; ++x) {
1783      err += ady;
1784      if (err >= adx) {
1785         err -= adx;
1786         y += sy;
1787      } else
1788         y += base;
1789      LINE_OP(output[x], inverse_db_table[y]);
1790   }
1791}
1792
1793static int residue_decode(vorb *f, Codebook *book, float *target, int offset, int n, int rtype)
1794{
1795   int k;
1796   if (rtype == 0) {
1797      int step = n / book->dimensions;
1798      for (k=0; k < step; ++k)
1799         if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step))
1800            return FALSE;
1801   } else {
1802      for (k=0; k < n; ) {
1803         if (!codebook_decode(f, book, target+offset, n-k))
1804            return FALSE;
1805         k += book->dimensions;
1806         offset += book->dimensions;
1807      }
1808   }
1809   return TRUE;
1810}
1811
1812static void decode_residue(vorb *f, float *residue_buffers[], int ch, int n, int rn, uint8 *do_not_decode)
1813{
1814   int i,j,pass;
1815   Residue *r = f->residue_config + rn;
1816   int rtype = f->residue_types[rn];
1817   int c = r->classbook;
1818   int classwords = f->codebooks[c].dimensions;
1819   int n_read = r->end - r->begin;
1820   int part_read = n_read / r->part_size;
1821   int temp_alloc_point = temp_alloc_save(f);
1822   #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1823   uint8 ***part_classdata = (uint8 ***) temp_block_array(f,f->channels, part_read * sizeof(**part_classdata));
1824   #else
1825   int **classifications = (int **) temp_block_array(f,f->channels, part_read * sizeof(**classifications));
1826   #endif
1827
1828   stb_prof(2);
1829   for (i=0; i < ch; ++i)
1830      if (!do_not_decode[i])
1831         memset(residue_buffers[i], 0, sizeof(float) * n);
1832
1833   if (rtype == 2 && ch != 1) {
1834      int len = ch * n;
1835      for (j=0; j < ch; ++j)
1836         if (!do_not_decode[j])
1837            break;
1838      if (j == ch)
1839         goto done;
1840
1841      stb_prof(3);
1842      for (pass=0; pass < 8; ++pass) {
1843         int pcount = 0, class_set = 0;
1844         if (ch == 2) {
1845            stb_prof(13);
1846            while (pcount < part_read) {
1847               int z = r->begin + pcount*r->part_size;
1848               int c_inter = (z & 1), p_inter = z>>1;
1849               if (pass == 0) {
1850                  Codebook *c = f->codebooks+r->classbook;
1851                  int q;
1852                  DECODE(q,f,c);
1853                  if (q == EOP) goto done;
1854                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1855                  part_classdata[0][class_set] = r->classdata[q];
1856                  #else
1857                  for (i=classwords-1; i >= 0; --i) {
1858                     classifications[0][i+pcount] = q % r->classifications;
1859                     q /= r->classifications;
1860                  }
1861                  #endif
1862               }
1863               stb_prof(5);
1864               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1865                  int z = r->begin + pcount*r->part_size;
1866                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1867                  int c = part_classdata[0][class_set][i];
1868                  #else
1869                  int c = classifications[0][pcount];
1870                  #endif
1871                  int b = r->residue_books[c][pass];
1872                  if (b >= 0) {
1873                     Codebook *book = f->codebooks + b;
1874                     stb_prof(20);  // accounts for X time
1875                     #ifdef STB_VORBIS_DIVIDES_IN_CODEBOOK
1876                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
1877                        goto done;
1878                     #else
1879                     // saves 1%
1880                     if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r->part_size))
1881                        goto done;
1882                     #endif
1883                     stb_prof(7);
1884                  } else {
1885                     z += r->part_size;
1886                     c_inter = z & 1;
1887                     p_inter = z >> 1;
1888                  }
1889               }
1890               stb_prof(8);
1891               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1892               ++class_set;
1893               #endif
1894            }
1895         } else if (ch == 1) {
1896            while (pcount < part_read) {
1897               int z = r->begin + pcount*r->part_size;
1898               int c_inter = 0, p_inter = z;
1899               if (pass == 0) {
1900                  Codebook *c = f->codebooks+r->classbook;
1901                  int q;
1902                  DECODE(q,f,c);
1903                  if (q == EOP) goto done;
1904                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1905                  part_classdata[0][class_set] = r->classdata[q];
1906                  #else
1907                  for (i=classwords-1; i >= 0; --i) {
1908                     classifications[0][i+pcount] = q % r->classifications;
1909                     q /= r->classifications;
1910                  }
1911                  #endif
1912               }
1913               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1914                  int z = r->begin + pcount*r->part_size;
1915                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1916                  int c = part_classdata[0][class_set][i];
1917                  #else
1918                  int c = classifications[0][pcount];
1919                  #endif
1920                  int b = r->residue_books[c][pass];
1921                  if (b >= 0) {
1922                     Codebook *book = f->codebooks + b;
1923                     stb_prof(22);
1924                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
1925                        goto done;
1926                     stb_prof(3);
1927                  } else {
1928                     z += r->part_size;
1929                     c_inter = 0;
1930                     p_inter = z;
1931                  }
1932               }
1933               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1934               ++class_set;
1935               #endif
1936            }
1937         } else {
1938            while (pcount < part_read) {
1939               int z = r->begin + pcount*r->part_size;
1940               int c_inter = z % ch, p_inter = z/ch;
1941               if (pass == 0) {
1942                  Codebook *c = f->codebooks+r->classbook;
1943                  int q;
1944                  DECODE(q,f,c);
1945                  if (q == EOP) goto done;
1946                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1947                  part_classdata[0][class_set] = r->classdata[q];
1948                  #else
1949                  for (i=classwords-1; i >= 0; --i) {
1950                     classifications[0][i+pcount] = q % r->classifications;
1951                     q /= r->classifications;
1952                  }
1953                  #endif
1954               }
1955               for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
1956                  int z = r->begin + pcount*r->part_size;
1957                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1958                  int c = part_classdata[0][class_set][i];
1959                  #else
1960                  int c = classifications[0][pcount];
1961                  #endif
1962                  int b = r->residue_books[c][pass];
1963                  if (b >= 0) {
1964                     Codebook *book = f->codebooks + b;
1965                     stb_prof(22);
1966                     if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r->part_size))
1967                        goto done;
1968                     stb_prof(3);
1969                  } else {
1970                     z += r->part_size;
1971                     c_inter = z % ch;
1972                     p_inter = z / ch;
1973                  }
1974               }
1975               #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1976               ++class_set;
1977               #endif
1978            }
1979         }
1980      }
1981      goto done;
1982   }
1983   stb_prof(9);
1984
1985   for (pass=0; pass < 8; ++pass) {
1986      int pcount = 0, class_set=0;
1987      while (pcount < part_read) {
1988         if (pass == 0) {
1989            for (j=0; j < ch; ++j) {
1990               if (!do_not_decode[j]) {
1991                  Codebook *c = f->codebooks+r->classbook;
1992                  int temp;
1993                  DECODE(temp,f,c);
1994                  if (temp == EOP) goto done;
1995                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
1996                  part_classdata[j][class_set] = r->classdata[temp];
1997                  #else
1998                  for (i=classwords-1; i >= 0; --i) {
1999                     classifications[j][i+pcount] = temp % r->classifications;
2000                     temp /= r->classifications;
2001                  }
2002                  #endif
2003               }
2004            }
2005         }
2006         for (i=0; i < classwords && pcount < part_read; ++i, ++pcount) {
2007            for (j=0; j < ch; ++j) {
2008               if (!do_not_decode[j]) {
2009                  #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2010                  int c = part_classdata[j][class_set][i];
2011                  #else
2012                  int c = classifications[j][pcount];
2013                  #endif
2014                  int b = r->residue_books[c][pass];
2015                  if (b >= 0) {
2016                     float *target = residue_buffers[j];
2017                     int offset = r->begin + pcount * r->part_size;
2018                     int n = r->part_size;
2019                     Codebook *book = f->codebooks + b;
2020                     if (!residue_decode(f, book, target, offset, n, rtype))
2021                        goto done;
2022                  }
2023               }
2024            }
2025         }
2026         #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
2027         ++class_set;
2028         #endif
2029      }
2030   }
2031  done:
2032   stb_prof(0);
2033   temp_alloc_restore(f,temp_alloc_point);
2034}
2035
2036
2037#if 0
2038// slow way for debugging
2039void inverse_mdct_slow(float *buffer, int n)
2040{
2041   int i,j;
2042   int n2 = n >> 1;
2043   float *x = (float *) malloc(sizeof(*x) * n2);
2044   memcpy(x, buffer, sizeof(*x) * n2);
2045   for (i=0; i < n; ++i) {
2046      float acc = 0;
2047      for (j=0; j < n2; ++j)
2048         // formula from paper:
2049         //acc += n/4.0f * x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2050         // formula from wikipedia
2051         //acc += 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2052         // these are equivalent, except the formula from the paper inverts the multiplier!
2053         // however, what actually works is NO MULTIPLIER!?!
2054         //acc += 64 * 2.0f / n2 * x[j] * (float) cos(M_PI/n2 * (i + 0.5 + n2/2)*(j + 0.5));
2055         acc += x[j] * (float) cos(M_PI / 2 / n * (2 * i + 1 + n/2.0)*(2*j+1));
2056      buffer[i] = acc;
2057   }
2058   free(x);
2059}
2060#elif 0
2061// same as above, but just barely able to run in real time on modern machines
2062void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2063{
2064   float mcos[16384];
2065   int i,j;
2066   int n2 = n >> 1, nmask = (n << 2) -1;
2067   float *x = (float *) malloc(sizeof(*x) * n2);
2068   memcpy(x, buffer, sizeof(*x) * n2);
2069   for (i=0; i < 4*n; ++i)
2070      mcos[i] = (float) cos(M_PI / 2 * i / n);
2071
2072   for (i=0; i < n; ++i) {
2073      float acc = 0;
2074      for (j=0; j < n2; ++j)
2075         acc += x[j] * mcos[(2 * i + 1 + n2)*(2*j+1) & nmask];
2076      buffer[i] = acc;
2077   }
2078   free(x);
2079}
2080#else
2081// transform to use a slow dct-iv; this is STILL basically trivial,
2082// but only requires half as many ops
2083void dct_iv_slow(float *buffer, int n)
2084{
2085   float mcos[16384];
2086   float x[2048];
2087   int i,j;
2088   int n2 = n >> 1, nmask = (n << 3) - 1;
2089   memcpy(x, buffer, sizeof(*x) * n);
2090   for (i=0; i < 8*n; ++i)
2091      mcos[i] = (float) cos(M_PI / 4 * i / n);
2092   for (i=0; i < n; ++i) {
2093      float acc = 0;
2094      for (j=0; j < n; ++j)
2095         acc += x[j] * mcos[((2 * i + 1)*(2*j+1)) & nmask];
2096         //acc += x[j] * cos(M_PI / n * (i + 0.5) * (j + 0.5));
2097      buffer[i] = acc;
2098   }
2099   free(x);
2100}
2101
2102void inverse_mdct_slow(float *buffer, int n, vorb *f, int blocktype)
2103{
2104   int i, n4 = n >> 2, n2 = n >> 1, n3_4 = n - n4;
2105   float temp[4096];
2106
2107   memcpy(temp, buffer, n2 * sizeof(float));
2108   dct_iv_slow(temp, n2);  // returns -c'-d, a-b'
2109
2110   for (i=0; i < n4  ; ++i) buffer[i] = temp[i+n4];            // a-b'
2111   for (   ; i < n3_4; ++i) buffer[i] = -temp[n3_4 - i - 1];   // b-a', c+d'
2112   for (   ; i < n   ; ++i) buffer[i] = -temp[i - n3_4];       // c'+d
2113}
2114#endif
2115
2116#ifndef LIBVORBIS_MDCT
2117#define LIBVORBIS_MDCT 0
2118#endif
2119
2120#if LIBVORBIS_MDCT
2121// directly call the vorbis MDCT using an interface documented
2122// by Jeff Roberts... useful for performance comparison
2123typedef struct 
2124{
2125  int n;
2126  int log2n;
2127  
2128  float *trig;
2129  int   *bitrev;
2130
2131  float scale;
2132} mdct_lookup;
2133
2134extern void mdct_init(mdct_lookup *lookup, int n);
2135extern void mdct_clear(mdct_lookup *l);
2136extern void mdct_backward(mdct_lookup *init, float *in, float *out);
2137
2138mdct_lookup M1,M2;
2139
2140void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2141{
2142   mdct_lookup *M;
2143   if (M1.n == n) M = &M1;
2144   else if (M2.n == n) M = &M2;
2145   else if (M1.n == 0) { mdct_init(&M1, n); M = &M1; }
2146   else { 
2147      if (M2.n) __asm int 3;
2148      mdct_init(&M2, n);
2149      M = &M2;
2150   }
2151
2152   mdct_backward(M, buffer, buffer);
2153}
2154#endif
2155
2156
2157// the following were split out into separate functions while optimizing;
2158// they could be pushed back up but eh. __forceinline showed no change;
2159// they're probably already being inlined.
2160static void imdct_step3_iter0_loop(int n, float *e, int i_off, int k_off, float *A)
2161{
2162   float *ee0 = e + i_off;
2163   float *ee2 = ee0 + k_off;
2164   int i;
2165
2166   assert((n & 3) == 0);
2167   for (i=(n>>2); i > 0; --i) {
2168      float k00_20, k01_21;
2169      k00_20  = ee0[ 0] - ee2[ 0];
2170      k01_21  = ee0[-1] - ee2[-1];
2171      ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0] + ee2[ 0];
2172      ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1] + ee2[-1];
2173      ee2[ 0] = k00_20 * A[0] - k01_21 * A[1];
2174      ee2[-1] = k01_21 * A[0] + k00_20 * A[1];
2175      A += 8;
2176
2177      k00_20  = ee0[-2] - ee2[-2];
2178      k01_21  = ee0[-3] - ee2[-3];
2179      ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2] + ee2[-2];
2180      ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3] + ee2[-3];
2181      ee2[-2] = k00_20 * A[0] - k01_21 * A[1];
2182      ee2[-3] = k01_21 * A[0] + k00_20 * A[1];
2183      A += 8;
2184
2185      k00_20  = ee0[-4] - ee2[-4];
2186      k01_21  = ee0[-5] - ee2[-5];
2187      ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4] + ee2[-4];
2188      ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5] + ee2[-5];
2189      ee2[-4] = k00_20 * A[0] - k01_21 * A[1];
2190      ee2[-5] = k01_21 * A[0] + k00_20 * A[1];
2191      A += 8;
2192
2193      k00_20  = ee0[-6] - ee2[-6];
2194      k01_21  = ee0[-7] - ee2[-7];
2195      ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6] + ee2[-6];
2196      ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7] + ee2[-7];
2197      ee2[-6] = k00_20 * A[0] - k01_21 * A[1];
2198      ee2[-7] = k01_21 * A[0] + k00_20 * A[1];
2199      A += 8;
2200      ee0 -= 8;
2201      ee2 -= 8;
2202   }
2203}
2204
2205static void imdct_step3_inner_r_loop(int lim, float *e, int d0, int k_off, float *A, int k1)
2206{
2207   int i;
2208   float k00_20, k01_21;
2209
2210   float *e0 = e + d0;
2211   float *e2 = e0 + k_off;
2212
2213   for (i=lim >> 2; i > 0; --i) {
2214      k00_20 = e0[-0] - e2[-0];
2215      k01_21 = e0[-1] - e2[-1];
2216      e0[-0] += e2[-0];//e0[-0] = e0[-0] + e2[-0];
2217      e0[-1] += e2[-1];//e0[-1] = e0[-1] + e2[-1];
2218      e2[-0] = (k00_20)*A[0] - (k01_21) * A[1];
2219      e2[-1] = (k01_21)*A[0] + (k00_20) * A[1];
2220
2221      A += k1;
2222
2223      k00_20 = e0[-2] - e2[-2];
2224      k01_21 = e0[-3] - e2[-3];
2225      e0[-2] += e2[-2];//e0[-2] = e0[-2] + e2[-2];
2226      e0[-3] += e2[-3];//e0[-3] = e0[-3] + e2[-3];
2227      e2[-2] = (k00_20)*A[0] - (k01_21) * A[1];
2228      e2[-3] = (k01_21)*A[0] + (k00_20) * A[1];
2229
2230      A += k1;
2231
2232      k00_20 = e0[-4] - e2[-4];
2233      k01_21 = e0[-5] - e2[-5];
2234      e0[-4] += e2[-4];//e0[-4] = e0[-4] + e2[-4];
2235      e0[-5] += e2[-5];//e0[-5] = e0[-5] + e2[-5];
2236      e2[-4] = (k00_20)*A[0] - (k01_21) * A[1];
2237      e2[-5] = (k01_21)*A[0] + (k00_20) * A[1];
2238
2239      A += k1;
2240
2241      k00_20 = e0[-6] - e2[-6];
2242      k01_21 = e0[-7] - e2[-7];
2243      e0[-6] += e2[-6];//e0[-6] = e0[-6] + e2[-6];
2244      e0[-7] += e2[-7];//e0[-7] = e0[-7] + e2[-7];
2245      e2[-6] = (k00_20)*A[0] - (k01_21) * A[1];
2246      e2[-7] = (k01_21)*A[0] + (k00_20) * A[1];
2247
2248      e0 -= 8;
2249      e2 -= 8;
2250
2251      A += k1;
2252   }
2253}
2254
2255static void imdct_step3_inner_s_loop(int n, float *e, int i_off, int k_off, float *A, int a_off, int k0)
2256{
2257   int i;
2258   float A0 = A[0];
2259   float A1 = A[0+1];
2260   float A2 = A[0+a_off];
2261   float A3 = A[0+a_off+1];
2262   float A4 = A[0+a_off*2+0];
2263   float A5 = A[0+a_off*2+1];
2264   float A6 = A[0+a_off*3+0];
2265   float A7 = A[0+a_off*3+1];
2266
2267   float k00,k11;
2268
2269   float *ee0 = e  +i_off;
2270   float *ee2 = ee0+k_off;
2271
2272   for (i=n; i > 0; --i) {
2273      k00     = ee0[ 0] - ee2[ 0];
2274      k11     = ee0[-1] - ee2[-1];
2275      ee0[ 0] =  ee0[ 0] + ee2[ 0];
2276      ee0[-1] =  ee0[-1] + ee2[-1];
2277      ee2[ 0] = (k00) * A0 - (k11) * A1;
2278      ee2[-1] = (k11) * A0 + (k00) * A1;
2279
2280      k00     = ee0[-2] - ee2[-2];
2281      k11     = ee0[-3] - ee2[-3];
2282      ee0[-2] =  ee0[-2] + ee2[-2];
2283      ee0[-3] =  ee0[-3] + ee2[-3];
2284      ee2[-2] = (k00) * A2 - (k11) * A3;
2285      ee2[-3] = (k11) * A2 + (k00) * A3;
2286
2287      k00     = ee0[-4] - ee2[-4];
2288      k11     = ee0[-5] - ee2[-5];
2289      ee0[-4] =  ee0[-4] + ee2[-4];
2290      ee0[-5] =  ee0[-5] + ee2[-5];
2291      ee2[-4] = (k00) * A4 - (k11) * A5;
2292      ee2[-5] = (k11) * A4 + (k00) * A5;
2293
2294      k00     = ee0[-6] - ee2[-6];
2295      k11     = ee0[-7] - ee2[-7];
2296      ee0[-6] =  ee0[-6] + ee2[-6];
2297      ee0[-7] =  ee0[-7] + ee2[-7];
2298      ee2[-6] = (k00) * A6 - (k11) * A7;
2299      ee2[-7] = (k11) * A6 + (k00) * A7;
2300
2301      ee0 -= k0;
2302      ee2 -= k0;
2303   }
2304}
2305
2306static __forceinline void iter_54(float *z)
2307{
2308   float k00,k11,k22,k33;
2309   float y0,y1,y2,y3;
2310
2311   k00  = z[ 0] - z[-4];
2312   y0   = z[ 0] + z[-4];
2313   y2   = z[-2] + z[-6];
2314   k22  = z[-2] - z[-6];
2315
2316   z[-0] = y0 + y2;      // z0 + z4 + z2 + z6
2317   z[-2] = y0 - y2;      // z0 + z4 - z2 - z6
2318
2319   // done with y0,y2
2320
2321   k33  = z[-3] - z[-7];
2322
2323   z[-4] = k00 + k33;    // z0 - z4 + z3 - z7
2324   z[-6] = k00 - k33;    // z0 - z4 - z3 + z7
2325
2326   // done with k33
2327
2328   k11  = z[-1] - z[-5];
2329   y1   = z[-1] + z[-5];
2330   y3   = z[-3] + z[-7];
2331
2332   z[-1] = y1 + y3;      // z1 + z5 + z3 + z7
2333   z[-3] = y1 - y3;      // z1 + z5 - z3 - z7
2334   z[-5] = k11 - k22;    // z1 - z5 + z2 - z6
2335   z[-7] = k11 + k22;    // z1 - z5 - z2 + z6
2336}
2337
2338static void imdct_step3_inner_s_loop_ld654(int n, float *e, int i_off, float *A, int base_n)
2339{
2340   int k_off = -8;
2341   int a_off = base_n >> 3;
2342   float A2 = A[0+a_off];
2343   float *z = e + i_off;
2344   float *base = z - 16 * n;
2345
2346   while (z > base) {
2347      float k00,k11;
2348
2349      k00   = z[-0] - z[-8];
2350      k11   = z[-1] - z[-9];
2351      z[-0] = z[-0] + z[-8];
2352      z[-1] = z[-1] + z[-9];
2353      z[-8] =  k00;
2354      z[-9] =  k11 ;
2355
2356      k00    = z[ -2] - z[-10];
2357      k11    = z[ -3] - z[-11];
2358      z[ -2] = z[ -2] + z[-10];
2359      z[ -3] = z[ -3] + z[-11];
2360      z[-10] = (k00+k11) * A2;
2361      z[-11] = (k11-k00) * A2;
2362
2363      k00    = z[-12] - z[ -4];  // reverse to avoid a unary negation
2364      k11    = z[ -5] - z[-13];
2365      z[ -4] = z[ -4] + z[-12];
2366      z[ -5] = z[ -5] + z[-13];
2367      z[-12] = k11;
2368      z[-13] = k00;
2369
2370      k00    = z[-14] - z[ -6];  // reverse to avoid a unary negation
2371      k11    = z[ -7] - z[-15];
2372      z[ -6] = z[ -6] + z[-14];
2373      z[ -7] = z[ -7] + z[-15];
2374      z[-14] = (k00+k11) * A2;
2375      z[-15] = (k00-k11) * A2;
2376
2377      iter_54(z);
2378      iter_54(z-8);
2379      z -= 16;
2380   }
2381}
2382
2383static void inverse_mdct(float *buffer, int n, vorb *f, int blocktype)
2384{
2385   int n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2386   int n3_4 = n - n4, ld;
2387   // @OPTIMIZE: reduce register pressure by using fewer variables?
2388   int save_point = temp_alloc_save(f);
2389   float *buf2 = (float *) temp_alloc(f, n2 * sizeof(*buf2));
2390   float *u=NULL,*v=NULL;
2391   // twiddle factors
2392   float *A = f->A[blocktype];
2393
2394   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2395   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2396
2397   // kernel from paper
2398
2399
2400   // merged:
2401   //   copy and reflect spectral data
2402   //   step 0
2403
2404   // note that it turns out that the items added together during
2405   // this step are, in fact, being added to themselves (as reflected
2406   // by step 0). inexplicable inefficiency! this became obvious
2407   // once I combined the passes.
2408
2409   // so there's a missing 'times 2' here (for adding X to itself).
2410   // this propogates through linearly to the end, where the numbers
2411   // are 1/2 too small, and need to be compensated for.
2412
2413   {
2414      float *d,*e, *AA, *e_stop;
2415      d = &buf2[n2-2];
2416      AA = A;
2417      e = &buffer[0];
2418      e_stop = &buffer[n2];
2419      while (e != e_stop) {
2420         d[1] = (e[0] * AA[0] - e[2]*AA[1]);
2421         d[0] = (e[0] * AA[1] + e[2]*AA[0]);
2422         d -= 2;
2423         AA += 2;
2424         e += 4;
2425      }
2426
2427      e = &buffer[n2-3];
2428      while (d >= buf2) {
2429         d[1] = (-e[2] * AA[0] - -e[0]*AA[1]);
2430         d[0] = (-e[2] * AA[1] + -e[0]*AA[0]);
2431         d -= 2;
2432         AA += 2;
2433         e -= 4;
2434      }
2435   }
2436
2437   // now we use symbolic names for these, so that we can
2438   // possibly swap their meaning as we change which operations
2439   // are in place
2440
2441   u = buffer;
2442   v = buf2;
2443
2444   // step 2    (paper output is w, now u)
2445   // this could be in place, but the data ends up in the wrong
2446   // place... _somebody_'s got to swap it, so this is nominated
2447   {
2448      float *AA = &A[n2-8];
2449      float *d0,*d1, *e0, *e1;
2450
2451      e0 = &v[n4];
2452      e1 = &v[0];
2453
2454      d0 = &u[n4];
2455      d1 = &u[0];
2456
2457      while (AA >= A) {
2458         float v40_20, v41_21;
2459
2460         v41_21 = e0[1] - e1[1];
2461         v40_20 = e0[0] - e1[0];
2462         d0[1]  = e0[1] + e1[1];
2463         d0[0]  = e0[0] + e1[0];
2464         d1[1]  = v41_21*AA[4] - v40_20*AA[5];
2465         d1[0]  = v40_20*AA[4] + v41_21*AA[5];
2466
2467         v41_21 = e0[3] - e1[3];
2468         v40_20 = e0[2] - e1[2];
2469         d0[3]  = e0[3] + e1[3];
2470         d0[2]  = e0[2] + e1[2];
2471         d1[3]  = v41_21*AA[0] - v40_20*AA[1];
2472         d1[2]  = v40_20*AA[0] + v41_21*AA[1];
2473
2474         AA -= 8;
2475
2476         d0 += 4;
2477         d1 += 4;
2478         e0 += 4;
2479         e1 += 4;
2480      }
2481   }
2482
2483   // step 3
2484   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2485
2486   // optimized step 3:
2487
2488   // the original step3 loop can be nested r inside s or s inside r;
2489   // it's written originally as s inside r, but this is dumb when r
2490   // iterates many times, and s few. So I have two copies of it and
2491   // switch between them halfway.
2492
2493   // this is iteration 0 of step 3
2494   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*0, -(n >> 3), A);
2495   imdct_step3_iter0_loop(n >> 4, u, n2-1-n4*1, -(n >> 3), A);
2496
2497   // this is iteration 1 of step 3
2498   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*0, -(n >> 4), A, 16);
2499   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*1, -(n >> 4), A, 16);
2500   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*2, -(n >> 4), A, 16);
2501   imdct_step3_inner_r_loop(n >> 5, u, n2-1 - n8*3, -(n >> 4), A, 16);
2502
2503   l=2;
2504   for (; l < (ld-3)>>1; ++l) {
2505      int k0 = n >> (l+2), k0_2 = k0>>1;
2506      int lim = 1 << (l+1);
2507      int i;
2508      for (i=0; i < lim; ++i)
2509         imdct_step3_inner_r_loop(n >> (l+4), u, n2-1 - k0*i, -k0_2, A, 1 << (l+3));
2510   }
2511
2512   for (; l < ld-6; ++l) {
2513      int k0 = n >> (l+2), k1 = 1 << (l+3), k0_2 = k0>>1;
2514      int rlim = n >> (l+6), r;
2515      int lim = 1 << (l+1);
2516      int i_off;
2517      float *A0 = A;
2518      i_off = n2-1;
2519      for (r=rlim; r > 0; --r) {
2520         imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2521         A0 += k1*4;
2522         i_off -= 8;
2523      }
2524   }
2525
2526   // iterations with count:
2527   //   ld-6,-5,-4 all interleaved together
2528   //       the big win comes from getting rid of needless flops
2529   //         due to the constants on pass 5 & 4 being all 1 and 0;
2530   //       combining them to be simultaneous to improve cache made little difference
2531   imdct_step3_inner_s_loop_ld654(n >> 5, u, n2-1, A, n);
2532
2533   // output is u
2534
2535   // step 4, 5, and 6
2536   // cannot be in-place because of step 5
2537   {
2538      uint16 *bitrev = f->bit_reverse[blocktype];
2539      // weirdly, I'd have thought reading sequentially and writing
2540      // erratically would have been better than vice-versa, but in
2541      // fact that's not what my testing showed. (That is, with
2542      // j = bitreverse(i), do you read i and write j, or read j and write i.)
2543
2544      float *d0 = &v[n4-4];
2545      float *d1 = &v[n2-4];
2546      while (d0 >= v) {
2547         int k4;
2548
2549         k4 = bitrev[0];
2550         d1[3] = u[k4+0];
2551         d1[2] = u[k4+1];
2552         d0[3] = u[k4+2];
2553         d0[2] = u[k4+3];
2554
2555         k4 = bitrev[1];
2556         d1[1] = u[k4+0];
2557         d1[0] = u[k4+1];
2558         d0[1] = u[k4+2];
2559         d0[0] = u[k4+3];
2560         
2561         d0 -= 4;
2562         d1 -= 4;
2563         bitrev += 2;
2564      }
2565   }
2566   // (paper output is u, now v)
2567
2568
2569   // data must be in buf2
2570   assert(v == buf2);
2571
2572   // step 7   (paper output is v, now v)
2573   // this is now in place
2574   {
2575      float *C = f->C[blocktype];
2576      float *d, *e;
2577
2578      d = v;
2579      e = v + n2 - 4;
2580
2581      while (d < e) {
2582         float a02,a11,b0,b1,b2,b3;
2583
2584         a02 = d[0] - e[2];
2585         a11 = d[1] + e[3];
2586
2587         b0 = C[1]*a02 + C[0]*a11;
2588         b1 = C[1]*a11 - C[0]*a02;
2589
2590         b2 = d[0] + e[ 2];
2591         b3 = d[1] - e[ 3];
2592
2593         d[0] = b2 + b0;
2594         d[1] = b3 + b1;
2595         e[2] = b2 - b0;
2596         e[3] = b1 - b3;
2597
2598         a02 = d[2] - e[0];
2599         a11 = d[3] + e[1];
2600
2601         b0 = C[3]*a02 + C[2]*a11;
2602         b1 = C[3]*a11 - C[2]*a02;
2603
2604         b2 = d[2] + e[ 0];
2605         b3 = d[3] - e[ 1];
2606
2607         d[2] = b2 + b0;
2608         d[3] = b3 + b1;
2609         e[0] = b2 - b0;
2610         e[1] = b1 - b3;
2611
2612         C += 4;
2613         d += 4;
2614         e -= 4;
2615      }
2616   }
2617
2618   // data must be in buf2
2619
2620
2621   // step 8+decode   (paper output is X, now buffer)
2622   // this generates pairs of data a la 8 and pushes them directly through
2623   // the decode kernel (pushing rather than pulling) to avoid having
2624   // to make another pass later
2625
2626   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2627
2628   {
2629      float *d0,*d1,*d2,*d3;
2630
2631      float *B = f->B[blocktype] + n2 - 8;
2632      float *e = buf2 + n2 - 8;
2633      d0 = &buffer[0];
2634      d1 = &buffer[n2-4];
2635      d2 = &buffer[n2];
2636      d3 = &buffer[n-4];
2637      while (e >= v) {
2638         float p0,p1,p2,p3;
2639
2640         p3 =  e[6]*B[7] - e[7]*B[6];
2641         p2 = -e[6]*B[6] - e[7]*B[7]; 
2642
2643         d0[0] =   p3;
2644         d1[3] = - p3;
2645         d2[0] =   p2;
2646         d3[3] =   p2;
2647
2648         p1 =  e[4]*B[5] - e[5]*B[4];
2649         p0 = -e[4]*B[4] - e[5]*B[5]; 
2650
2651         d0[1] =   p1;
2652         d1[2] = - p1;
2653         d2[1] =   p0;
2654         d3[2] =   p0;
2655
2656         p3 =  e[2]*B[3] - e[3]*B[2];
2657         p2 = -e[2]*B[2] - e[3]*B[3]; 
2658
2659         d0[2] =   p3;
2660         d1[1] = - p3;
2661         d2[2] =   p2;
2662         d3[1] =   p2;
2663
2664         p1 =  e[0]*B[1] - e[1]*B[0];
2665         p0 = -e[0]*B[0] - e[1]*B[1]; 
2666
2667         d0[3] =   p1;
2668         d1[0] = - p1;
2669         d2[3] =   p0;
2670         d3[0] =   p0;
2671
2672         B -= 8;
2673         e -= 8;
2674         d0 += 4;
2675         d2 += 4;
2676         d1 -= 4;
2677         d3 -= 4;
2678      }
2679   }
2680
2681   temp_alloc_restore(f,save_point);
2682}
2683
2684#if 0
2685// this is the original version of the above code, if you want to optimize it from scratch
2686void inverse_mdct_naive(float *buffer, int n)
2687{
2688   float s;
2689   float A[1 << 12], B[1 << 12], C[1 << 11];
2690   int i,k,k2,k4, n2 = n >> 1, n4 = n >> 2, n8 = n >> 3, l;
2691   int n3_4 = n - n4, ld;
2692   // how can they claim this only uses N words?!
2693   // oh, because they're only used sparsely, whoops
2694   float u[1 << 13], X[1 << 13], v[1 << 13], w[1 << 13];
2695   // set up twiddle factors
2696
2697   for (k=k2=0; k < n4; ++k,k2+=2) {
2698      A[k2  ] = (float)  cos(4*k*M_PI/n);
2699      A[k2+1] = (float) -sin(4*k*M_PI/n);
2700      B[k2  ] = (float)  cos((k2+1)*M_PI/n/2);
2701      B[k2+1] = (float)  sin((k2+1)*M_PI/n/2);
2702   }
2703   for (k=k2=0; k < n8; ++k,k2+=2) {
2704      C[k2  ] = (float)  cos(2*(k2+1)*M_PI/n);
2705      C[k2+1] = (float) -sin(2*(k2+1)*M_PI/n);
2706   }
2707
2708   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2709   // Note there are bugs in that pseudocode, presumably due to them attempting
2710   // to rename the arrays nicely rather than representing the way their actual
2711   // implementation bounces buffers back and forth. As a result, even in the
2712   // "some formulars corrected" version, a direct implementation fails. These
2713   // are noted below as "paper bug".
2714
2715   // copy and reflect spectral data
2716   for (k=0; k < n2; ++k) u[k] = buffer[k];
2717   for (   ; k < n ; ++k) u[k] = -buffer[n - k - 1];
2718   // kernel from paper
2719   // step 1
2720   for (k=k2=k4=0; k < n4; k+=1, k2+=2, k4+=4) {
2721      v[n-k4-1] = (u[k4] - u[n-k4-1]) * A[k2]   - (u[k4+2] - u[n-k4-3])*A[k2+1];
2722      v[n-k4-3] = (u[k4] - u[n-k4-1]) * A[k2+1] + (u[k4+2] - u[n-k4-3])*A[k2];
2723   }
2724   // step 2
2725   for (k=k4=0; k < n8; k+=1, k4+=4) {
2726      w[n2+3+k4] = v[n2+3+k4] + v[k4+3];
2727      w[n2+1+k4] = v[n2+1+k4] + v[k4+1];
2728      w[k4+3]    = (v[n2+3+k4] - v[k4+3])*A[n2-4-k4] - (v[n2+1+k4]-v[k4+1])*A[n2-3-k4];
2729      w[k4+1]    = (v[n2+1+k4] - v[k4+1])*A[n2-4-k4] + (v[n2+3+k4]-v[k4+3])*A[n2-3-k4];
2730   }
2731   // step 3
2732   ld = ilog(n) - 1; // ilog is off-by-one from normal definitions
2733   for (l=0; l < ld-3; ++l) {
2734      int k0 = n >> (l+2), k1 = 1 << (l+3);
2735      int rlim = n >> (l+4), r4, r;
2736      int s2lim = 1 << (l+2), s2;
2737      for (r=r4=0; r < rlim; r4+=4,++r) {
2738         for (s2=0; s2 < s2lim; s2+=2) {
2739            u[n-1-k0*s2-r4] = w[n-1-k0*s2-r4] + w[n-1-k0*(s2+1)-r4];
2740            u[n-3-k0*s2-r4] = w[n-3-k0*s2-r4] + w[n-3-k0*(s2+1)-r4];
2741            u[n-1-k0*(s2+1)-r4] = (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1]
2742                                - (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1+1];
2743            u[n-3-k0*(s2+1)-r4] = (w[n-3-k0*s2-r4] - w[n-3-k0*(s2+1)-r4]) * A[r*k1]
2744                                + (w[n-1-k0*s2-r4] - w[n-1-k0*(s2+1)-r4]) * A[r*k1+1];
2745         }
2746      }
2747      if (l+1 < ld-3) {
2748         // paper bug: ping-ponging of u&w here is omitted
2749         memcpy(w, u, sizeof(u));
2750      }
2751   }
2752
2753   // step 4
2754   for (i=0; i < n8; ++i) {
2755      int j = bit_reverse(i) >> (32-ld+3);
2756      assert(j < n8);
2757      if (i == j) {
2758         // paper bug: original code probably swapped in place; if copying,
2759         //            need to directly copy in this case
2760         int i8 = i << 3;
2761         v[i8+1] = u[i8+1];
2762         v[i8+3] = u[i8+3];
2763         v[i8+5] = u[i8+5];
2764         v[i8+7] = u[i8+7];
2765      } else if (i < j) {
2766         int i8 = i << 3, j8 = j << 3;
2767         v[j8+1] = u[i8+1], v[i8+1] = u[j8 + 1];
2768         v[j8+3] = u[i8+3], v[i8+3] = u[j8 + 3];
2769         v[j8+5] = u[i8+5], v[i8+5] = u[j8 + 5];
2770         v[j8+7] = u[i8+7], v[i8+7] = u[j8 + 7];
2771      }
2772   }
2773   // step 5
2774   for (k=0; k < n2; ++k) {
2775      w[k] = v[k*2+1];
2776   }
2777   // step 6
2778   for (k=k2=k4=0; k < n8; ++k, k2 += 2, k4 += 4) {
2779      u[n-1-k2] = w[k4];
2780      u[n-2-k2] = w[k4+1];
2781      u[n3_4 - 1 - k2] = w[k4+2];
2782      u[n3_4 - 2 - k2] = w[k4+3];
2783   }
2784   // step 7
2785   for (k=k2=0; k < n8; ++k, k2 += 2) {
2786      v[n2 + k2 ] = ( u[n2 + k2] + u[n-2-k2] + C[k2+1]*(u[n2+k2]-u[n-2-k2]) + C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
2787      v[n-2 - k2] = ( u[n2 + k2] + u[n-2-k2] - C[k2+1]*(u[n2+k2]-u[n-2-k2]) - C[k2]*(u[n2+k2+1]+u[n-2-k2+1]))/2;
2788      v[n2+1+ k2] = ( u[n2+1+k2] - u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
2789      v[n-1 - k2] = (-u[n2+1+k2] + u[n-1-k2] + C[k2+1]*(u[n2+1+k2]+u[n-1-k2]) - C[k2]*(u[n2+k2]-u[n-2-k2]))/2;
2790   }
2791   // step 8
2792   for (k=k2=0; k < n4; ++k,k2 += 2) {
2793      X[k]      = v[k2+n2]*B[k2  ] + v[k2+1+n2]*B[k2+1];
2794      X[n2-1-k] = v[k2+n2]*B[k2+1] - v[k2+1+n2]*B[k2  ];
2795   }
2796
2797   // decode kernel to output
2798   // determined the following value experimentally
2799   // (by first figuring out what made inverse_mdct_slow work); then matching that here
2800   // (probably vorbis encoder premultiplies by n or n/2, to save it on the decoder?)
2801   s = 0.5; // theoretically would be n4
2802
2803   // [[[ note! the s value of 0.5 is compensated for by the B[] in the current code,
2804   //     so it needs to use the "old" B values to behave correctly, or else
2805   //     set s to 1.0 ]]]
2806   for (i=0; i < n4  ; ++i) buffer[i] = s * X[i+n4];
2807   for (   ; i < n3_4; ++i) buffer[i] = -s * X[n3_4 - i - 1];
2808   for (   ; i < n   ; ++i) buffer[i] = -s * X[i - n3_4];
2809}
2810#endif
2811
2812static float *get_window(vorb *f, int len)
2813{
2814   len <<= 1;
2815   if (len == f->blocksize_0) return f->window[0];
2816   if (len == f->blocksize_1) return f->window[1];
2817   assert(0);
2818   return NULL;
2819}
2820
2821#ifndef STB_VORBIS_NO_DEFER_FLOOR
2822typedef int16 YTYPE;
2823#else
2824typedef int YTYPE;
2825#endif
2826static int do_floor(vorb *f, Mapping *map, int i, int n, float *target, YTYPE *finalY, uint8 *step2_flag)
2827{
2828   int n2 = n >> 1;
2829   int s = map->chan[i].mux, floor;
2830   floor = map->submap_floor[s];
2831   if (f->floor_types[floor] == 0) {
2832      return error(f, VORBIS_invalid_stream);
2833   } else {
2834      Floor1 *g = &f->floor_config[floor].floor1;
2835      int j,q;
2836      int lx = 0, ly = finalY[0] * g->floor1_multiplier;
2837      for (q=1; q < g->values; ++q) {
2838         j = g->sorted_order[q];
2839         #ifndef STB_VORBIS_NO_DEFER_FLOOR
2840         if (finalY[j] >= 0)
2841         #else
2842         if (step2_flag[j])
2843         #endif
2844         {
2845            int hy = finalY[j] * g->floor1_multiplier;
2846            int hx = g->Xlist[j];
2847            draw_line(target, lx,ly, hx,hy, n2);
2848            lx = hx, ly = hy;
2849         }
2850      }
2851      if (lx < n2)
2852         // optimization of: draw_line(target, lx,ly, n,ly, n2);
2853         for (j=lx; j < n2; ++j)
2854            LINE_OP(target[j], inverse_db_table[ly]);
2855   }
2856   return TRUE;
2857}
2858
2859static int vorbis_decode_initial(vorb *f, int *p_left_start, int *p_left_end, int *p_right_start, int *p_right_end, int *mode)
2860{
2861   Mode *m;
2862   int i, n, prev, next, window_center;
2863   f->channel_buffer_start = f->channel_buffer_end = 0;
2864
2865  retry:
2866   if (f->eof) return FALSE;
2867   if (!maybe_start_packet(f))
2868      return FALSE;
2869   // check packet type
2870   if (get_bits(f,1) != 0) {
2871      if (IS_PUSH_MODE(f))
2872         return error(f,VORBIS_bad_packet_type);
2873      while (EOP != get8_packet(f));
2874      goto retry;
2875   }
2876
2877   if (f->alloc.alloc_buffer)
2878      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
2879
2880   i = get_bits(f, ilog(f->mode_count-1));
2881   if (i == EOP) return FALSE;
2882   if (i >= f->mode_count) return FALSE;
2883   *mode = i;
2884   m = f->mode_config + i;
2885   if (m->blockflag) {
2886      n = f->blocksize_1;
2887      prev = get_bits(f,1);
2888      next = get_bits(f,1);
2889   } else {
2890      prev = next = 0;
2891      n = f->blocksize_0;
2892   }
2893
2894// WINDOWING
2895
2896   window_center = n >> 1;
2897   if (m->blockflag && !prev) {
2898      *p_left_start = (n - f->blocksize_0) >> 2;
2899      *p_left_end   = (n + f->blocksize_0) >> 2;
2900   } else {
2901      *p_left_start = 0;
2902      *p_left_end   = window_center;
2903   }
2904   if (m->blockflag && !next) {
2905      *p_right_start = (n*3 - f->blocksize_0) >> 2;
2906      *p_right_end   = (n*3 + f->blocksize_0) >> 2;
2907   } else {
2908      *p_right_start = window_center;
2909      *p_right_end   = n;
2910   }
2911   return TRUE;
2912}
2913
2914static int vorbis_decode_packet_rest(vorb *f, int *len, Mode *m, int left_start, int left_end, int right_start, int right_end, int *p_left)
2915{
2916   Mapping *map;
2917   int i,j,k,n,n2;
2918   int zero_channel[256];
2919   int really_zero_channel[256];
2920   int window_center;
2921
2922// WINDOWING
2923
2924   n = f->blocksize[m->blockflag];
2925   window_center = n >> 1;
2926
2927   map = &f->mapping[m->mapping];
2928
2929// FLOORS
2930   n2 = n >> 1;
2931
2932   stb_prof(1);
2933   for (i=0; i < f->channels; ++i) {
2934      int s = map->chan[i].mux, floor;
2935      zero_channel[i] = FALSE;
2936      floor = map->submap_floor[s];
2937      if (f->floor_types[floor] == 0) {
2938         return error(f, VORBIS_invalid_stream);
2939      } else {
2940         Floor1 *g = &f->floor_config[floor].floor1;
2941         if (get_bits(f, 1)) {
2942            short *finalY;
2943            uint8 step2_flag[256];
2944            static int range_list[4] = { 256, 128, 86, 64 };
2945            int range = range_list[g->floor1_multiplier-1];
2946            int offset = 2;
2947            finalY = f->finalY[i];
2948            finalY[0] = get_bits(f, ilog(range)-1);
2949            finalY[1] = get_bits(f, ilog(range)-1);
2950            for (j=0; j < g->partitions; ++j) {
2951               int pclass = g->partition_class_list[j];
2952               int cdim = g->class_dimensions[pclass];
2953               int cbits = g->class_subclasses[pclass];
2954               int csub = (1 << cbits)-1;
2955               int cval = 0;
2956               if (cbits) {
2957                  Codebook *c = f->codebooks + g->class_masterbooks[pclass];
2958                  DECODE(cval,f,c);
2959               }
2960               for (k=0; k < cdim; ++k) {
2961                  int book = g->subclass_books[pclass][cval & csub];
2962                  cval = cval >> cbits;
2963                  if (book >= 0) {
2964                     int temp;
2965                     Codebook *c = f->codebooks + book;
2966                     DECODE(temp,f,c);
2967                     finalY[offset++] = temp;
2968                  } else
2969                     finalY[offset++] = 0;
2970               }
2971            }
2972            if (f->valid_bits == INVALID_BITS) goto error; // behavior according to spec
2973            step2_flag[0] = step2_flag[1] = 1;
2974            for (j=2; j < g->values; ++j) {
2975               int low, high, pred, highroom, lowroom, room, val;
2976               low = g->neighbors[j][0];
2977               high = g->neighbors[j][1];
2978               //neighbors(g->Xlist, j, &low, &high);
2979               pred = predict_point(g->Xlist[j], g->Xlist[low], g->Xlist[high], finalY[low], finalY[high]);
2980               val = finalY[j];
2981               highroom = range - pred;
2982               lowroom = pred;
2983               if (highroom < lowroom)
2984                  room = highroom * 2;
2985               else
2986                  room = lowroom * 2;
2987               if (val) {
2988                  step2_flag[low] = step2_flag[high] = 1;
2989                  step2_flag[j] = 1;
2990                  if (val >= room)
2991                     if (highroom > lowroom)
2992                        finalY[j] = val - lowroom + pred;
2993                     else
2994                        finalY[j] = pred - val + highroom - 1;
2995                  else
2996                     if (val & 1)
2997                        finalY[j] = pred - ((val+1)>>1);
2998                     else
2999                        finalY[j] = pred + (val>>1);
3000               } else {
3001                  step2_flag[j] = 0;
3002                  finalY[j] = pred;
3003               }
3004            }
3005
3006#ifdef STB_VORBIS_NO_DEFER_FLOOR
3007            do_floor(f, map, i, n, f->floor_buffers[i], finalY, step2_flag);
3008#else
3009            // defer final floor computation until _after_ residue
3010            for (j=0; j < g->values; ++j) {
3011               if (!step2_flag[j])
3012                  finalY[j] = -1;
3013            }
3014#endif
3015         } else {
3016           error:
3017            zero_channel[i] = TRUE;
3018         }
3019         // So we just defer everything else to later
3020
3021         // at this point we've decoded the floor into buffer
3022      }
3023   }
3024   stb_prof(0);
3025   // at this point we've decoded all floors
3026
3027   if (f->alloc.alloc_buffer)
3028      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3029
3030   // re-enable coupled channels if necessary
3031   memcpy(really_zero_channel, zero_channel, sizeof(really_zero_channel[0]) * f->channels);
3032   for (i=0; i < map->coupling_steps; ++i)
3033      if (!zero_channel[map->chan[i].magnitude] || !zero_channel[map->chan[i].angle]) {
3034         zero_channel[map->chan[i].magnitude] = zero_channel[map->chan[i].angle] = FALSE;
3035      }
3036
3037// RESIDUE DECODE
3038   for (i=0; i < map->submaps; ++i) {
3039      float *residue_buffers[STB_VORBIS_MAX_CHANNELS];
3040      int r,t;
3041      uint8 do_not_decode[256];
3042      int ch = 0;
3043      for (j=0; j < f->channels; ++j) {
3044         if (map->chan[j].mux == i) {
3045            if (zero_channel[j]) {
3046               do_not_decode[ch] = TRUE;
3047               residue_buffers[ch] = NULL;
3048            } else {
3049               do_not_decode[ch] = FALSE;
3050               residue_buffers[ch] = f->channel_buffers[j];
3051            }
3052            ++ch;
3053         }
3054      }
3055      r = map->submap_residue[i];
3056      t = f->residue_types[r];
3057      decode_residue(f, residue_buffers, ch, n2, r, do_not_decode);
3058   }
3059
3060   if (f->alloc.alloc_buffer)
3061      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3062
3063// INVERSE COUPLING
3064   stb_prof(14);
3065   for (i = map->coupling_steps-1; i >= 0; --i) {
3066      int n2 = n >> 1;
3067      float *m = f->channel_buffers[map->chan[i].magnitude];
3068      float *a = f->channel_buffers[map->chan[i].angle    ];
3069      for (j=0; j < n2; ++j) {
3070         float a2,m2;
3071         if (m[j] > 0)
3072            if (a[j] > 0)
3073               m2 = m[j], a2 = m[j] - a[j];
3074            else
3075               a2 = m[j], m2 = m[j] + a[j];
3076         else
3077            if (a[j] > 0)
3078               m2 = m[j], a2 = m[j] + a[j];
3079            else
3080               a2 = m[j], m2 = m[j] - a[j];
3081         m[j] = m2;
3082         a[j] = a2;
3083      }
3084   }
3085
3086   // finish decoding the floors
3087#ifndef STB_VORBIS_NO_DEFER_FLOOR
3088   stb_prof(15);
3089   for (i=0; i < f->channels; ++i) {
3090      if (really_zero_channel[i]) {
3091         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3092      } else {
3093         do_floor(f, map, i, n, f->channel_buffers[i], f->finalY[i], NULL);
3094      }
3095   }
3096#else
3097   for (i=0; i < f->channels; ++i) {
3098      if (really_zero_channel[i]) {
3099         memset(f->channel_buffers[i], 0, sizeof(*f->channel_buffers[i]) * n2);
3100      } else {
3101         for (j=0; j < n2; ++j)
3102            f->channel_buffers[i][j] *= f->floor_buffers[i][j];
3103      }
3104   }
3105#endif
3106
3107// INVERSE MDCT
3108   stb_prof(16);
3109   for (i=0; i < f->channels; ++i)
3110      inverse_mdct(f->channel_buffers[i], n, f, m->blockflag);
3111   stb_prof(0);
3112
3113   // this shouldn't be necessary, unless we exited on an error
3114   // and want to flush to get to the next packet
3115   flush_packet(f);
3116
3117   if (f->first_decode) {
3118      // assume we start so first non-discarded sample is sample 0
3119      // this isn't to spec, but spec would require us to read ahead
3120      // and decode the size of all current frames--could be done,
3121      // but presumably it's not a commonly used feature
3122      f->current_loc = -n2; // start of first frame is positioned for discard
3123      // we might have to discard samples "from" the next frame too,
3124      // if we're lapping a large block then a small at the start?
3125      f->discard_samples_deferred = n - right_end;
3126      f->current_loc_valid = TRUE;
3127      f->first_decode = FALSE;
3128   } else if (f->discard_samples_deferred) {
3129      left_start += f->discard_samples_deferred;
3130      *p_left = left_start;
3131      f->discard_samples_deferred = 0;
3132   } else if (f->previous_length == 0 && f->current_loc_valid) {
3133      // we're recovering from a seek... that means we're going to discard
3134      // the samples from this packet even though we know our position from
3135      // the last page header, so we need to update the position based on
3136      // the discarded samples here
3137      // but wait, the code below is going to add this in itself even
3138      // on a discard, so we don't need to do it here...
3139   }
3140
3141   // check if we have ogg information about the sample # for this packet
3142   if (f->last_seg_which == f->end_seg_with_known_loc) {
3143      // if we have a valid current loc, and this is final:
3144      if (f->current_loc_valid && (f->page_flag & PAGEFLAG_last_page)) {
3145         uint32 current_end = f->known_loc_for_packet - (n-right_end);
3146         // then let's infer the size of the (probably) short final frame
3147         if (current_end < f->current_loc + right_end) {
3148            if (current_end < f->current_loc) {
3149               // negative truncation, that's impossible!
3150               *len = 0;
3151            } else {
3152               *len = current_end - f->current_loc;
3153            }
3154            *len += left_start;
3155            f->current_loc += *len;
3156            return TRUE;
3157         }
3158      }
3159      // otherwise, just set our sample loc
3160      // guess that the ogg granule pos refers to the _middle_ of the
3161      // last frame?
3162      // set f->current_loc to the position of left_start
3163      f->current_loc = f->known_loc_for_packet - (n2-left_start);
3164      f->current_loc_valid = TRUE;
3165   }
3166   if (f->current_loc_valid)
3167      f->current_loc += (right_start - left_start);
3168
3169   if (f->alloc.alloc_buffer)
3170      assert(f->alloc.alloc_buffer_length_in_bytes == f->temp_offset);
3171   *len = right_end;  // ignore samples after the window goes to 0
3172   return TRUE;
3173}
3174
3175static int vorbis_decode_packet(vorb *f, int *len, int *p_left, int *p_right)
3176{
3177   int mode, left_end, right_end;
3178   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
3179   return vorbis_decode_packet_rest(f, len, f->mode_config + mode, *p_left, left_end, *p_right, right_end, p_left);
3180}
3181
3182static int vorbis_finish_frame(stb_vorbis *f, int len, int left, int right)
3183{
3184   int prev,i,j;
3185   // we use right&left (the start of the right- and left-window sin()-regions)
3186   // to determine how much to return, rather than inferring from the rules
3187   // (same result, clearer code); 'left' indicates where our sin() window
3188   // starts, therefore where the previous window's right edge starts, and
3189   // therefore where to start mixing from the previous buffer. 'right'
3190   // indicates where our sin() ending-window starts, therefore that's where
3191   // we start saving, and where our returned-data ends.
3192
3193   // mixin from previous window
3194   if (f->previous_length) {
3195      int i,j, n = f->previous_length;
3196      float *w = get_window(f, n);
3197      for (i=0; i < f->channels; ++i) {
3198         for (j=0; j < n; ++j)
3199            f->channel_buffers[i][left+j] =
3200               f->channel_buffers[i][left+j]*w[    j] +
3201               f->previous_window[i][     j]*w[n-1-j];
3202      }
3203   }
3204
3205   prev = f->previous_length;
3206
3207   // last half of this data becomes previous window
3208   f->previous_length = len - right;
3209
3210   // @OPTIMIZE: could avoid this copy by double-buffering the
3211   // output (flipping previous_window with channel_buffers), but
3212   // then previous_window would have to be 2x as large, and
3213   // channel_buffers couldn't be temp mem (although they're NOT
3214   // currently temp mem, they could be (unless we want to level
3215   // performance by spreading out the computation))
3216   for (i=0; i < f->channels; ++i)
3217      for (j=0; right+j < len; ++j)
3218         f->previous_window[i][j] = f->channel_buffers[i][right+j];
3219
3220   if (!prev)
3221      // there was no previous packet, so this data isn't valid...
3222      // this isn't entirely true, only the would-have-overlapped data
3223      // isn't valid, but this seems to be what the spec requires
3224      return 0;
3225
3226   // truncate a short frame
3227   if (len < right) right = len;
3228
3229   f->samples_output += right-left;
3230
3231   return right - left;
3232}
3233
3234static void vorbis_pump_first_frame(stb_vorbis *f)
3235{
3236   int len, right, left;
3237   if (vorbis_decode_packet(f, &len, &left, &right))
3238      vorbis_finish_frame(f, len, left, right);
3239}
3240
3241#ifndef STB_VORBIS_NO_PUSHDATA_API
3242static int is_whole_packet_present(stb_vorbis *f, int end_page)
3243{
3244   // make sure that we have the packet available before continuing...
3245   // this requires a full ogg parse, but we know we can fetch from f->stream
3246
3247   // instead of coding this out explicitly, we could save the current read state,
3248   // read the next packet with get8() until end-of-packet, check f->eof, then
3249   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
3250   // of state to restore (primarily the page segment table)
3251
3252   int s = f->next_seg, first = TRUE;
3253   uint8 *p = f->stream;
3254
3255   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
3256      for (; s < f->segment_count; ++s) {
3257         p += f->segments[s];
3258         if (f->segments[s] < 255)               // stop at first short segment
3259            break;
3260      }
3261      // either this continues, or it ends it...
3262      if (end_page)
3263         if (s < f->segment_count-1)             return error(f, VORBIS_invalid_stream);
3264      if (s == f->segment_count)
3265         s = -1; // set 'crosses page' flag
3266      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3267      first = FALSE;
3268   }
3269   for (; s == -1;) {
3270      uint8 *q; 
3271      int n;
3272
3273      // check that we have the page header ready
3274      if (p + 26 >= f->stream_end)               return error(f, VORBIS_need_more_data);
3275      // validate the page
3276      if (memcmp(p, ogg_page_header, 4))         return error(f, VORBIS_invalid_stream);
3277      if (p[4] != 0)                             return error(f, VORBIS_invalid_stream);
3278      if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
3279         if (f->previous_length)
3280            if ((p[5] & PAGEFLAG_continued_packet))  return error(f, VORBIS_invalid_stream);
3281         // if no previous length, we're resynching, so we can come in on a continued-packet,
3282         // which we'll just drop
3283      } else {
3284         if (!(p[5] & PAGEFLAG_continued_packet)) return error(f, VORBIS_invalid_stream);
3285      }
3286      n = p[26]; // segment counts
3287      q = p+27;  // q points to segment table
3288      p = q + n; // advance past header
3289      // make sure we've read the segment table
3290      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3291      for (s=0; s < n; ++s) {
3292         p += q[s];
3293         if (q[s] < 255)
3294            break;
3295      }
3296      if (end_page)
3297         if (s < n-1)                            return error(f, VORBIS_invalid_stream);
3298      if (s == f->segment_count)
3299         s = -1; // set 'crosses page' flag
3300      if (p > f->stream_end)                     return error(f, VORBIS_need_more_data);
3301      first = FALSE;
3302   }
3303   return TRUE;
3304}
3305#endif // !STB_VORBIS_NO_PUSHDATA_API
3306
3307static int start_decoder(vorb *f)
3308{
3309   uint8 header[6], x,y;
3310   int len,i,j,k, max_submaps = 0;
3311   int longest_floorlist=0;
3312
3313   // first page, first packet
3314
3315   if (!start_page(f))                              return FALSE;
3316   // validate page flag
3317   if (!(f->page_flag & PAGEFLAG_first_page))       return error(f, VORBIS_invalid_first_page);
3318   if (f->page_flag & PAGEFLAG_last_page)           return error(f, VORBIS_invalid_first_page);
3319   if (f->page_flag & PAGEFLAG_continued_packet)    return error(f, VORBIS_invalid_first_page);
3320   // check for expected packet length
3321   if (f->segment_count != 1)                       return error(f, VORBIS_invalid_first_page);
3322   if (f->segments[0] != 30)                        return error(f, VORBIS_invalid_first_page);
3323   // read packet
3324   // check packet header
3325   if (get8(f) != VORBIS_packet_id)                 return error(f, VORBIS_invalid_first_page);
3326   if (!getn(f, header, 6))                         return error(f, VORBIS_unexpected_eof);
3327   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_first_page);
3328   // vorbis_version
3329   if (get32(f) != 0)                               return error(f, VORBIS_invalid_first_page);
3330   f->channels = get8(f); if (!f->channels)         return error(f, VORBIS_invalid_first_page);
3331   if (f->channels > STB_VORBIS_MAX_CHANNELS)       return error(f, VORBIS_too_many_channels);
3332   f->sample_rate = get32(f); if (!f->sample_rate)  return error(f, VORBIS_invalid_first_page);
3333   get32(f); // bitrate_maximum
3334   get32(f); // bitrate_nominal
3335   get32(f); // bitrate_minimum
3336   x = get8(f);
3337   { int log0,log1;
3338   log0 = x & 15;
3339   log1 = x >> 4;
3340   f->blocksize_0 = 1 << log0;
3341   f->blocksize_1 = 1 << log1;
3342   if (log0 < 6 || log0 > 13)                       return error(f, VORBIS_invalid_setup);
3343   if (log1 < 6 || log1 > 13)                       return error(f, VORBIS_invalid_setup);
3344   if (log0 > log1)                                 return error(f, VORBIS_invalid_setup);
3345   }
3346
3347   // framing_flag
3348   x = get8(f);
3349   if (!(x & 1))                                    return error(f, VORBIS_invalid_first_page);
3350
3351   // second packet!
3352   if (!start_page(f))                              return FALSE;
3353
3354   if (!start_packet(f))                            return FALSE;
3355   do {
3356      len = next_segment(f);
3357      skip(f, len);
3358      f->bytes_in_seg = 0;
3359   } while (len);
3360
3361   // third packet!
3362   if (!start_packet(f))                            return FALSE;
3363
3364   #ifndef STB_VORBIS_NO_PUSHDATA_API
3365   if (IS_PUSH_MODE(f)) {
3366      if (!is_whole_packet_present(f, TRUE)) {
3367         // convert error in ogg header to write type
3368         if (f->error == VORBIS_invalid_stream)
3369            f->error = VORBIS_invalid_setup;
3370         return FALSE;
3371      }
3372   }
3373   #endif
3374
3375   crc32_init(); // always init it, to avoid multithread race conditions
3376
3377   if (get8_packet(f) != VORBIS_packet_setup)       return error(f, VORBIS_invalid_setup);
3378   for (i=0; i < 6; ++i) header[i] = get8_packet(f);
3379   if (!vorbis_validate(header))                    return error(f, VORBIS_invalid_setup);
3380
3381   // codebooks
3382
3383   f->codebook_count = get_bits(f,8) + 1;
3384   f->codebooks = (Codebook *) setup_malloc(f, sizeof(*f->codebooks) * f->codebook_count);
3385   if (f->codebooks == NULL)                        return error(f, VORBIS_outofmem);
3386   memset(f->codebooks, 0, sizeof(*f->codebooks) * f->codebook_count);
3387   for (i=0; i < f->codebook_count; ++i) {
3388      uint32 *values;
3389      int ordered, sorted_count;
3390      int total=0;
3391      uint8 *lengths;
3392      Codebook *c = f->codebooks+i;
3393      x = get_bits(f, 8); if (x != 0x42)            return error(f, VORBIS_invalid_setup);
3394      x = get_bits(f, 8); if (x != 0x43)            return error(f, VORBIS_invalid_setup);
3395      x = get_bits(f, 8); if (x != 0x56)            return error(f, VORBIS_invalid_setup);
3396      x = get_bits(f, 8);
3397      c->dimensions = (get_bits(f, 8)<<8) + x;
3398      x = get_bits(f, 8);
3399      y = get_bits(f, 8);
3400      c->entries = (get_bits(f, 8)<<16) + (y<<8) + x;
3401      ordered = get_bits(f,1);
3402      c->sparse = ordered ? 0 : get_bits(f,1);
3403
3404      if (c->sparse)
3405         lengths = (uint8 *) setup_temp_malloc(f, c->entries);
3406      else
3407         lengths = c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3408
3409      if (!lengths) return error(f, VORBIS_outofmem);
3410
3411      if (ordered) {
3412         int current_entry = 0;
3413         int current_length = get_bits(f,5) + 1;
3414         while (current_entry < c->entries) {
3415            int limit = c->entries - current_entry;
3416            int n = get_bits(f, ilog(limit));
3417            if (current_entry + n > (int) c->entries) { return error(f, VORBIS_invalid_setup); }
3418            memset(lengths + current_entry, current_length, n);
3419            current_entry += n;
3420            ++current_length;
3421         }
3422      } else {
3423         for (j=0; j < c->entries; ++j) {
3424            int present = c->sparse ? get_bits(f,1) : 1;
3425            if (present) {
3426               lengths[j] = get_bits(f, 5) + 1;
3427               ++total;
3428            } else {
3429               lengths[j] = NO_CODE;
3430            }
3431         }
3432      }
3433
3434      if (c->sparse && total >= c->entries >> 2) {
3435         // convert sparse items to non-sparse!
3436         if (c->entries > (int) f->setup_temp_memory_required)
3437            f->setup_temp_memory_required = c->entries;
3438
3439         c->codeword_lengths = (uint8 *) setup_malloc(f, c->entries);
3440         memcpy(c->codeword_lengths, lengths, c->entries);
3441         setup_temp_free(f, lengths, c->entries); // note this is only safe if there have been no intervening temp mallocs!
3442         lengths = c->codeword_lengths;
3443         c->sparse = 0;
3444      }
3445
3446      // compute the size of the sorted tables
3447      if (c->sparse) {
3448         sorted_count = total;
3449         //assert(total != 0);
3450      } else {
3451         sorted_count = 0;
3452         #ifndef STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
3453         for (j=0; j < c->entries; ++j)
3454            if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE)
3455               ++sorted_count;
3456         #endif
3457      }
3458
3459      c->sorted_entries = sorted_count;
3460      values = NULL;
3461
3462      if (!c->sparse) {
3463         c->codewords = (uint32 *) setup_malloc(f, sizeof(c->codewords[0]) * c->entries);
3464         if (!c->codewords)                  return error(f, VORBIS_outofmem);
3465      } else {
3466         unsigned int size;
3467         if (c->sorted_entries) {
3468            c->codeword_lengths = (uint8 *) setup_malloc(f, c->sorted_entries);
3469            if (!c->codeword_lengths)           return error(f, VORBIS_outofmem);
3470            c->codewords = (uint32 *) setup_temp_malloc(f, sizeof(*c->codewords) * c->sorted_entries);
3471            if (!c->codewords)                  return error(f, VORBIS_outofmem);
3472            values = (uint32 *) setup_temp_malloc(f, sizeof(*values) * c->sorted_entries);
3473            if (!values)                        return error(f, VORBIS_outofmem);
3474         }
3475         size = c->entries + (sizeof(*c->codewords) + sizeof(*values)) * c->sorted_entries;
3476         if (size > f->setup_temp_memory_required)
3477            f->setup_temp_memory_required = size;
3478      }
3479
3480      if (!compute_codewords(c, lengths, c->entries, values)) {
3481         if (c->sparse) setup_temp_free(f, values, 0);
3482         return error(f, VORBIS_invalid_setup);
3483      }
3484
3485      if (c->sorted_entries) {
3486         // allocate an extra slot for sentinels
3487         c->sorted_codewords = (uint32 *) setup_malloc(f, sizeof(*c->sorted_codewords) * (c->sorted_entries+1));
3488         // allocate an extra slot at the front so that c->sorted_values[-1] is defined
3489         // so that we can catch that case without an extra if
3490         c->sorted_values    = ( int   *) setup_malloc(f, sizeof(*c->sorted_values   ) * (c->sorted_entries+1));
3491         if (c->sorted_values) { ++c->sorted_values; c->sorted_values[-1] = -1; }
3492         compute_sorted_huffman(c, lengths, values);
3493      }
3494
3495      if (c->sparse) {
3496         setup_temp_free(f, values, sizeof(*values)*c->sorted_entries);
3497         setup_temp_free(f, c->codewords, sizeof(*c->codewords)*c->sorted_entries);
3498         setup_temp_free(f, lengths, c->entries);
3499         c->codewords = NULL;
3500      }
3501
3502      compute_accelerated_huffman(c);
3503
3504      c->lookup_type = get_bits(f, 4);
3505      if (c->lookup_type > 2) return error(f, VORBIS_invalid_setup);
3506      if (c->lookup_type > 0) {
3507         uint16 *mults;
3508         c->minimum_value = float32_unpack(get_bits(f, 32));
3509         c->delta_value = float32_unpack(get_bits(f, 32));
3510         c->value_bits = get_bits(f, 4)+1;
3511         c->sequence_p = get_bits(f,1);
3512         if (c->lookup_type == 1) {
3513            c->lookup_values = lookup1_values(c->entries, c->dimensions);
3514         } else {
3515            c->lookup_values = c->entries * c->dimensions;
3516         }
3517         mults = (uint16 *) setup_temp_malloc(f, sizeof(mults[0]) * c->lookup_values);
3518         if (mults == NULL) return error(f, VORBIS_outofmem);
3519         for (j=0; j < (int) c->lookup_values; ++j) {
3520            int q = get_bits(f, c->value_bits);
3521            if (q == EOP) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_invalid_setup); }
3522            mults[j] = q;
3523         }
3524
3525#ifndef STB_VORBIS_DIVIDES_IN_CODEBOOK
3526         if (c->lookup_type == 1) {
3527            int len, sparse = c->sparse;
3528            // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3529            if (sparse) {
3530               if (c->sorted_entries == 0) goto skip;
3531               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->sorted_entries * c->dimensions);
3532            } else
3533               c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->entries        * c->dimensions);
3534            if (c->multiplicands == NULL) { setup_temp_free(f,mults,sizeof(mults[0])*c->lookup_values); return error(f, VORBIS_outofmem); }
3535            len = sparse ? c->sorted_entries : c->entries;
3536            for (j=0; j < len; ++j) {
3537               int z = sparse ? c->sorted_values[j] : j, div=1;
3538               for (k=0; k < c->dimensions; ++k) {
3539                  int off = (z / div) % c->lookup_values;
3540                  c->multiplicands[j*c->dimensions + k] =
3541                         #ifndef STB_VORBIS_CODEBOOK_FLOATS
3542                            mults[off];
3543                         #else
3544                            mults[off]*c->delta_value + c->minimum_value;
3545                            // in this case (and this case only) we could pre-expand c->sequence_p,
3546                            // and throw away the decode logic for it; have to ALSO do
3547                            // it in the case below, but it can only be done if
3548                            //    STB_VORBIS_CODEBOOK_FLOATS
3549                            //   !STB_VORBIS_DIVIDES_IN_CODEBOOK
3550                         #endif
3551                  div *= c->lookup_values;
3552               }
3553            }
3554            setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3555            c->lookup_type = 2;
3556         }
3557         else
3558#endif
3559         {
3560            c->multiplicands = (codetype *) setup_malloc(f, sizeof(c->multiplicands[0]) * c->lookup_values);
3561            #ifndef STB_VORBIS_CODEBOOK_FLOATS
3562            memcpy(c->multiplicands, mults, sizeof(c->multiplicands[0]) * c->lookup_values);
3563            #else
3564            for (j=0; j < (int) c->lookup_values; ++j)
3565               c->multiplicands[j] = mults[j] * c->delta_value + c->minimum_value;
3566            setup_temp_free(f, mults,sizeof(mults[0])*c->lookup_values);
3567            #endif
3568         }
3569        skip:;
3570
3571         #ifdef STB_VORBIS_CODEBOOK_FLOATS
3572         if (c->lookup_type == 2 && c->sequence_p) {
3573            for (j=1; j < (int) c->lookup_values; ++j)
3574               c->multiplicands[j] = c->multiplicands[j-1];
3575            c->sequence_p = 0;
3576         }
3577         #endif
3578      }
3579   }
3580
3581   // time domain transfers (notused)
3582
3583   x = get_bits(f, 6) + 1;
3584   for (i=0; i < x; ++i) {
3585      uint32 z = get_bits(f, 16);
3586      if (z != 0) return error(f, VORBIS_invalid_setup);
3587   }
3588
3589   // Floors
3590   f->floor_count = get_bits(f, 6)+1;
3591   f->floor_config = (Floor *)  setup_malloc(f, f->floor_count * sizeof(*f->floor_config));
3592   for (i=0; i < f->floor_count; ++i) {
3593      f->floor_types[i] = get_bits(f, 16);
3594      if (f->floor_types[i] > 1) return error(f, VORBIS_invalid_setup);
3595      if (f->floor_types[i] == 0) {
3596         Floor0 *g = &f->floor_config[i].floor0;
3597         g->order = get_bits(f,8);
3598         g->rate = get_bits(f,16);
3599         g->bark_map_size = get_bits(f,16);
3600         g->amplitude_bits = get_bits(f,6);
3601         g->amplitude_offset = get_bits(f,8);
3602         g->number_of_books = get_bits(f,4) + 1;
3603         for (j=0; j < g->number_of_books; ++j)
3604            g->book_list[j] = get_bits(f,8);
3605         return error(f, VORBIS_feature_not_supported);
3606      } else {
3607         Point p[31*8+2];
3608         Floor1 *g = &f->floor_config[i].floor1;
3609         int max_class = -1; 
3610         g->partitions = get_bits(f, 5);
3611         for (j=0; j < g->partitions; ++j) {
3612            g->partition_class_list[j] = get_bits(f, 4);
3613            if (g->partition_class_list[j] > max_class)
3614               max_class = g->partition_class_list[j];
3615         }
3616         for (j=0; j <= max_class; ++j) {
3617            g->class_dimensions[j] = get_bits(f, 3)+1;
3618            g->class_subclasses[j] = get_bits(f, 2);
3619            if (g->class_subclasses[j]) {
3620               g->class_masterbooks[j] = get_bits(f, 8);
3621               if (g->class_masterbooks[j] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3622            }
3623            for (k=0; k < 1 << g->class_subclasses[j]; ++k) {
3624               g->subclass_books[j][k] = get_bits(f,8)-1;
3625               if (g->subclass_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3626            }
3627         }
3628         g->floor1_multiplier = get_bits(f,2)+1;
3629         g->rangebits = get_bits(f,4);
3630         g->Xlist[0] = 0;
3631         g->Xlist[1] = 1 << g->rangebits;
3632         g->values = 2;
3633         for (j=0; j < g->partitions; ++j) {
3634            int c = g->partition_class_list[j];
3635            for (k=0; k < g->class_dimensions[c]; ++k) {
3636               g->Xlist[g->values] = get_bits(f, g->rangebits);
3637               ++g->values;
3638            }
3639         }
3640         // precompute the sorting
3641         for (j=0; j < g->values; ++j) {
3642            p[j].x = g->Xlist[j];
3643            p[j].y = j;
3644         }
3645         qsort(p, g->values, sizeof(p[0]), point_compare);
3646         for (j=0; j < g->values; ++j)
3647            g->sorted_order[j] = (uint8) p[j].y;
3648         // precompute the neighbors
3649         for (j=2; j < g->values; ++j) {
3650            int low,hi;
3651            neighbors(g->Xlist, j, &low,&hi);
3652            g->neighbors[j][0] = low;
3653            g->neighbors[j][1] = hi;
3654         }
3655
3656         if (g->values > longest_floorlist)
3657            longest_floorlist = g->values;
3658      }
3659   }
3660
3661   // Residue
3662   f->residue_count = get_bits(f, 6)+1;
3663   f->residue_config = (Residue *) setup_malloc(f, f->residue_count * sizeof(*f->residue_config));
3664   for (i=0; i < f->residue_count; ++i) {
3665      uint8 residue_cascade[64];
3666      Residue *r = f->residue_config+i;
3667      f->residue_types[i] = get_bits(f, 16);
3668      if (f->residue_types[i] > 2) return error(f, VORBIS_invalid_setup);
3669      r->begin = get_bits(f, 24);
3670      r->end = get_bits(f, 24);
3671      r->part_size = get_bits(f,24)+1;
3672      r->classifications = get_bits(f,6)+1;
3673      r->classbook = get_bits(f,8);
3674      for (j=0; j < r->classifications; ++j) {
3675         uint8 high_bits=0;
3676         uint8 low_bits=get_bits(f,3);
3677         if (get_bits(f,1))
3678            high_bits = get_bits(f,5);
3679         residue_cascade[j] = high_bits*8 + low_bits;
3680      }
3681      r->residue_books = (short (*)[8]) setup_malloc(f, sizeof(r->residue_books[0]) * r->classifications);
3682      for (j=0; j < r->classifications; ++j) {
3683         for (k=0; k < 8; ++k) {
3684            if (residue_cascade[j] & (1 << k)) {
3685               r->residue_books[j][k] = get_bits(f, 8);
3686               if (r->residue_books[j][k] >= f->codebook_count) return error(f, VORBIS_invalid_setup);
3687            } else {
3688               r->residue_books[j][k] = -1;
3689            }
3690         }
3691      }
3692      // precompute the classifications[] array to avoid inner-loop mod/divide
3693      // call it 'classdata' since we already have r->classifications
3694      r->classdata = (uint8 **) setup_malloc(f, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3695      if (!r->classdata) return error(f, VORBIS_outofmem);
3696      memset(r->classdata, 0, sizeof(*r->classdata) * f->codebooks[r->classbook].entries);
3697      for (j=0; j < f->codebooks[r->classbook].entries; ++j) {
3698         int classwords = f->codebooks[r->classbook].dimensions;
3699         int temp = j;
3700         r->classdata[j] = (uint8 *) setup_malloc(f, sizeof(r->classdata[j][0]) * classwords);
3701         for (k=classwords-1; k >= 0; --k) {
3702            r->classdata[j][k] = temp % r->classifications;
3703            temp /= r->classifications;
3704         }
3705      }
3706   }
3707
3708   f->mapping_count = get_bits(f,6)+1;
3709   f->mapping = (Mapping *) setup_malloc(f, f->mapping_count * sizeof(*f->mapping));
3710   for (i=0; i < f->mapping_count; ++i) {
3711      Mapping *m = f->mapping + i;      
3712      int mapping_type = get_bits(f,16);
3713      if (mapping_type != 0) return error(f, VORBIS_invalid_setup);
3714      m->chan = (MappingChannel *) setup_malloc(f, f->channels * sizeof(*m->chan));
3715      if (get_bits(f,1))
3716         m->submaps = get_bits(f,4);
3717      else
3718         m->submaps = 1;
3719      if (m->submaps > max_submaps)
3720         max_submaps = m->submaps;
3721      if (get_bits(f,1)) {
3722         m->coupling_steps = get_bits(f,8)+1;
3723         for (k=0; k < m->coupling_steps; ++k) {
3724            m->chan[k].magnitude = get_bits(f, ilog(f->channels)-1);
3725            m->chan[k].angle = get_bits(f, ilog(f->channels)-1);
3726            if (m->chan[k].magnitude >= f->channels)        return error(f, VORBIS_invalid_setup);
3727            if (m->chan[k].angle     >= f->channels)        return error(f, VORBIS_invalid_setup);
3728            if (m->chan[k].magnitude == m->chan[k].angle)   return error(f, VORBIS_invalid_setup);
3729         }
3730      } else
3731         m->coupling_steps = 0;
3732
3733      // reserved field
3734      if (get_bits(f,2)) return error(f, VORBIS_invalid_setup);
3735      if (m->submaps > 1) {
3736         for (j=0; j < f->channels; ++j) {
3737            m->chan[j].mux = get_bits(f, 4);
3738            if (m->chan[j].mux >= m->submaps)                return error(f, VORBIS_invalid_setup);
3739         }
3740      } else
3741         // @SPECIFICATION: this case is missing from the spec
3742         for (j=0; j < f->channels; ++j)
3743            m->chan[j].mux = 0;
3744
3745      for (j=0; j < m->submaps; ++j) {
3746         get_bits(f,8); // discard
3747         m->submap_floor[j] = get_bits(f,8);
3748         m->submap_residue[j] = get_bits(f,8);
3749         if (m->submap_floor[j] >= f->floor_count)      return error(f, VORBIS_invalid_setup);
3750         if (m->submap_residue[j] >= f->residue_count)  return error(f, VORBIS_invalid_setup);
3751      }
3752   }
3753
3754   // Modes
3755   f->mode_count = get_bits(f, 6)+1;
3756   for (i=0; i < f->mode_count; ++i) {
3757      Mode *m = f->mode_config+i;
3758      m->blockflag = get_bits(f,1);
3759      m->windowtype = get_bits(f,16);
3760      m->transformtype = get_bits(f,16);
3761      m->mapping = get_bits(f,8);
3762      if (m->windowtype != 0)                 return error(f, VORBIS_invalid_setup);
3763      if (m->transformtype != 0)              return error(f, VORBIS_invalid_setup);
3764      if (m->mapping >= f->mapping_count)     return error(f, VORBIS_invalid_setup);
3765   }
3766
3767   flush_packet(f);
3768
3769   f->previous_length = 0;
3770
3771   for (i=0; i < f->channels; ++i) {
3772      f->channel_buffers[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1);
3773      f->previous_window[i] = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
3774      f->finalY[i]          = (int16 *) setup_malloc(f, sizeof(int16) * longest_floorlist);
3775      #ifdef STB_VORBIS_NO_DEFER_FLOOR
3776      f->floor_buffers[i]   = (float *) setup_malloc(f, sizeof(float) * f->blocksize_1/2);
3777      #endif
3778   }
3779
3780   if (!init_blocksize(f, 0, f->blocksize_0)) return FALSE;
3781   if (!init_blocksize(f, 1, f->blocksize_1)) return FALSE;
3782   f->blocksize[0] = f->blocksize_0;
3783   f->blocksize[1] = f->blocksize_1;
3784
3785#ifdef STB_VORBIS_DIVIDE_TABLE
3786   if (integer_divide_table[1][1]==0)
3787      for (i=0; i < DIVTAB_NUMER; ++i)
3788         for (j=1; j < DIVTAB_DENOM; ++j)
3789            integer_divide_table[i][j] = i / j;
3790#endif
3791
3792   // compute how much temporary memory is needed
3793
3794   // 1.
3795   {
3796      uint32 imdct_mem = (f->blocksize_1 * sizeof(float) >> 1);
3797      uint32 classify_mem;
3798      int i,max_part_read=0;
3799      for (i=0; i < f->residue_count; ++i) {
3800         Residue *r = f->residue_config + i;
3801         int n_read = r->end - r->begin;
3802         int part_read = n_read / r->part_size;
3803         if (part_read > max_part_read)
3804            max_part_read = part_read;
3805      }
3806      #ifndef STB_VORBIS_DIVIDES_IN_RESIDUE
3807      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(uint8 *));
3808      #else
3809      classify_mem = f->channels * (sizeof(void*) + max_part_read * sizeof(int *));
3810      #endif
3811
3812      f->temp_memory_required = classify_mem;
3813      if (imdct_mem > f->temp_memory_required)
3814         f->temp_memory_required = imdct_mem;
3815   }
3816
3817   f->first_decode = TRUE;
3818
3819   if (f->alloc.alloc_buffer) {
3820      assert(f->temp_offset == f->alloc.alloc_buffer_length_in_bytes);
3821      // check if there's enough temp memory so we don't error later
3822      if (f->setup_offset + sizeof(*f) + f->temp_memory_required > (unsigned) f->temp_offset)
3823         return error(f, VORBIS_outofmem);
3824   }
3825
3826   f->first_audio_page_offset = stb_vorbis_get_file_offset(f);
3827
3828   return TRUE;
3829}
3830
3831static void vorbis_deinit(stb_vorbis *p)
3832{
3833   int i,j;
3834   for (i=0; i < p->residue_count; ++i) {
3835      Residue *r = p->residue_config+i;
3836      if (r->classdata) {
3837         for (j=0; j < p->codebooks[r->classbook].entries; ++j)
3838            setup_free(p, r->classdata[j]);
3839         setup_free(p, r->classdata);
3840      }
3841      setup_free(p, r->residue_books);
3842   }
3843
3844   if (p->codebooks) {
3845      for (i=0; i < p->codebook_count; ++i) {
3846         Codebook *c = p->codebooks + i;
3847         setup_free(p, c->codeword_lengths);
3848         setup_free(p, c->multiplicands);
3849         setup_free(p, c->codewords);
3850         setup_free(p, c->sorted_codewords);
3851         // c->sorted_values[-1] is the first entry in the array
3852         setup_free(p, c->sorted_values ? c->sorted_values-1 : NULL);
3853      }
3854      setup_free(p, p->codebooks);
3855   }
3856   setup_free(p, p->floor_config);
3857   setup_free(p, p->residue_config);
3858   for (i=0; i < p->mapping_count; ++i)
3859      setup_free(p, p->mapping[i].chan);
3860   setup_free(p, p->mapping);
3861   for (i=0; i < p->channels; ++i) {
3862      setup_free(p, p->channel_buffers[i]);
3863      setup_free(p, p->previous_window[i]);
3864      #ifdef STB_VORBIS_NO_DEFER_FLOOR
3865      setup_free(p, p->floor_buffers[i]);
3866      #endif
3867      setup_free(p, p->finalY[i]);
3868   }
3869   for (i=0; i < 2; ++i) {
3870      setup_free(p, p->A[i]);
3871      setup_free(p, p->B[i]);
3872      setup_free(p, p->C[i]);
3873      setup_free(p, p->window[i]);
3874   }
3875   #ifndef STB_VORBIS_NO_STDIO
3876   if (p->close_on_free) fclose(p->f);
3877   #endif
3878}
3879
3880void stb_vorbis_close(stb_vorbis *p)
3881{
3882   if (p == NULL) return;
3883   vorbis_deinit(p);
3884   setup_free(p,p);
3885}
3886
3887static void vorbis_init(stb_vorbis *p, stb_vorbis_alloc *z)
3888{
3889   memset(p, 0, sizeof(*p)); // NULL out all malloc'd pointers to start
3890   if (z) {
3891      p->alloc = *z;
3892      p->alloc.alloc_buffer_length_in_bytes = (p->alloc.alloc_buffer_length_in_bytes+3) & ~3;
3893      p->temp_offset = p->alloc.alloc_buffer_length_in_bytes;
3894   }
3895   p->eof = 0;
3896   p->error = VORBIS__no_error;
3897   p->stream = NULL;
3898   p->codebooks = NULL;
3899   p->page_crc_tests = -1;
3900   #ifndef STB_VORBIS_NO_STDIO
3901   p->close_on_free = FALSE;
3902   p->f = NULL;
3903   #endif
3904}
3905
3906int stb_vorbis_get_sample_offset(stb_vorbis *f)
3907{
3908   if (f->current_loc_valid)
3909      return f->current_loc;
3910   else
3911      return -1;
3912}
3913
3914stb_vorbis_info stb_vorbis_get_info(stb_vorbis *f)
3915{
3916   stb_vorbis_info d;
3917   d.channels = f->channels;
3918   d.sample_rate = f->sample_rate;
3919   d.setup_memory_required = f->setup_memory_required;
3920   d.setup_temp_memory_required = f->setup_temp_memory_required;
3921   d.temp_memory_required = f->temp_memory_required;
3922   d.max_frame_size = f->blocksize_1 >> 1;
3923   return d;
3924}
3925
3926int stb_vorbis_get_error(stb_vorbis *f)
3927{
3928   int e = f->error;
3929   f->error = VORBIS__no_error;
3930   return e;
3931}
3932
3933static stb_vorbis * vorbis_alloc(stb_vorbis *f)
3934{
3935   stb_vorbis *p = (stb_vorbis *) setup_malloc(f, sizeof(*p));
3936   return p;
3937}
3938
3939#ifndef STB_VORBIS_NO_PUSHDATA_API
3940
3941void stb_vorbis_flush_pushdata(stb_vorbis *f)
3942{
3943   f->previous_length = 0;
3944   f->page_crc_tests  = 0;
3945   f->discard_samples_deferred = 0;
3946   f->current_loc_valid = FALSE;
3947   f->first_decode = FALSE;
3948   f->samples_output = 0;
3949   f->channel_buffer_start = 0;
3950   f->channel_buffer_end = 0;
3951}
3952
3953static int vorbis_search_for_page_pushdata(vorb *f, uint8 *data, int data_len)
3954{
3955   int i,n;
3956   for (i=0; i < f->page_crc_tests; ++i)
3957      f->scan[i].bytes_done = 0;
3958
3959   // if we have room for more scans, search for them first, because
3960   // they may cause us to stop early if their header is incomplete
3961   if (f->page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
3962      if (data_len < 4) return 0;
3963      data_len -= 3; // need to look for 4-byte sequence, so don't miss
3964                     // one that straddles a boundary
3965      for (i=0; i < data_len; ++i) {
3966         if (data[i] == 0x4f) {
3967            if (0==memcmp(data+i, ogg_page_header, 4)) {
3968               int j,len;
3969               uint32 crc;
3970               // make sure we have the whole page header
3971               if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
3972                  // only read up to this page start, so hopefully we'll
3973                  // have the whole page header start next time
3974                  data_len = i;
3975                  break;
3976               }
3977               // ok, we have it all; compute the length of the page
3978               len = 27 + data[i+26];
3979               for (j=0; j < data[i+26]; ++j)
3980                  len += data[i+27+j];
3981               // scan everything up to the embedded crc (which we must 0)
3982               crc = 0;
3983               for (j=0; j < 22; ++j)
3984                  crc = crc32_update(crc, data[i+j]);
3985               // now process 4 0-bytes
3986               for (   ; j < 26; ++j)
3987                  crc = crc32_update(crc, 0);
3988               // len is the total number of bytes we need to scan
3989               n = f->page_crc_tests++;
3990               f->scan[n].bytes_left = len-j;
3991               f->scan[n].crc_so_far = crc;
3992               f->scan[n].goal_crc = data[i+22] + (data[i+23] << 8) + (data[i+24]<<16) + (data[i+25]<<24);
3993               // if the last frame on a page is continued to the next, then
3994               // we can't recover the sample_loc immediately
3995               if (data[i+27+data[i+26]-1] == 255)
3996                  f->scan[n].sample_loc = ~0;
3997               else
3998                  f->scan[n].sample_loc = data[i+6] + (data[i+7] << 8) + (data[i+ 8]<<16) + (data[i+ 9]<<24);
3999               f->scan[n].bytes_done = i+j;
4000               if (f->page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT)
4001                  break;
4002               // keep going if we still have room for more
4003            }
4004         }
4005      }
4006   }
4007
4008   for (i=0; i < f->page_crc_tests;) {
4009      uint32 crc;
4010      int j;
4011      int n = f->scan[i].bytes_done;
4012      int m = f->scan[i].bytes_left;
4013      if (m > data_len - n) m = data_len - n;
4014      // m is the bytes to scan in the current chunk
4015      crc = f->scan[i].crc_so_far;
4016      for (j=0; j < m; ++j)
4017         crc = crc32_update(crc, data[n+j]);
4018      f->scan[i].bytes_left -= m;
4019      f->scan[i].crc_so_far = crc;
4020      if (f->scan[i].bytes_left == 0) {
4021         // does it match?
4022         if (f->scan[i].crc_so_far == f->scan[i].goal_crc) {
4023            // Houston, we have page
4024            data_len = n+m; // consumption amount is wherever that scan ended
4025            f->page_crc_tests = -1; // drop out of page scan mode
4026            f->previous_length = 0; // decode-but-don't-output one frame
4027            f->next_seg = -1;       // start a new page
4028            f->current_loc = f->scan[i].sample_loc; // set the current sample location
4029                                    // to the amount we'd have decoded had we decoded this page
4030            f->current_loc_valid = f->current_loc != ~0;
4031            return data_len;
4032         }
4033         // delete entry
4034         f->scan[i] = f->scan[--f->page_crc_tests];
4035      } else {
4036         ++i;
4037      }
4038   }
4039
4040   return data_len;
4041}
4042
4043// return value: number of bytes we used
4044int stb_vorbis_decode_frame_pushdata(
4045         stb_vorbis *f,                 // the file we're decoding
4046         uint8 *data, int data_len,     // the memory available for decoding
4047         int *channels,                 // place to write number of float * buffers
4048         float ***output,               // place to write float ** array of float * buffers
4049         int *samples                   // place to write number of output samples
4050     )
4051{
4052   int i;
4053   int len,right,left;
4054
4055   if (!IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4056
4057   if (f->page_crc_tests >= 0) {
4058      *samples = 0;
4059      return vorbis_search_for_page_pushdata(f, data, data_len);
4060   }
4061
4062   f->stream     = data;
4063   f->stream_end = data + data_len;
4064   f->error      = VORBIS__no_error;
4065
4066   // check that we have the entire packet in memory
4067   if (!is_whole_packet_present(f, FALSE)) {
4068      *samples = 0;
4069      return 0;
4070   }
4071
4072   if (!vorbis_decode_packet(f, &len, &left, &right)) {
4073      // save the actual error we encountered
4074      enum STBVorbisError error = f->error;
4075      if (error == VORBIS_bad_packet_type) {
4076         // flush and resynch
4077         f->error = VORBIS__no_error;
4078         while (get8_packet(f) != EOP)
4079            if (f->eof) break;
4080         *samples = 0;
4081         return f->stream - data;
4082      }
4083      if (error == VORBIS_continued_packet_flag_invalid) {
4084         if (f->previous_length == 0) {
4085            // we may be resynching, in which case it's ok to hit one
4086            // of these; just discard the packet
4087            f->error = VORBIS__no_error;
4088            while (get8_packet(f) != EOP)
4089               if (f->eof) break;
4090            *samples = 0;
4091            return f->stream - data;
4092         }
4093      }
4094      // if we get an error while parsing, what to do?
4095      // well, it DEFINITELY won't work to continue from where we are!
4096      stb_vorbis_flush_pushdata(f);
4097      // restore the error that actually made us bail
4098      f->error = error;
4099      *samples = 0;
4100      return 1;
4101   }
4102
4103   // success!
4104   len = vorbis_finish_frame(f, len, left, right);
4105   for (i=0; i < f->channels; ++i)
4106      f->outputs[i] = f->channel_buffers[i] + left;
4107
4108   if (channels) *channels = f->channels;
4109   *samples = len;
4110   *output = f->outputs;
4111   return f->stream - data;
4112}
4113
4114stb_vorbis *stb_vorbis_open_pushdata(
4115         unsigned char *data, int data_len, // the memory available for decoding
4116         int *data_used,              // only defined if result is not NULL
4117         int *error, stb_vorbis_alloc *alloc)
4118{
4119   stb_vorbis *f, p;
4120   vorbis_init(&p, alloc);
4121   p.stream     = data;
4122   p.stream_end = data + data_len;
4123   p.push_mode  = TRUE;
4124   if (!start_decoder(&p)) {
4125      if (p.eof)
4126         *error = VORBIS_need_more_data;
4127      else
4128         *error = p.error;
4129      return NULL;
4130   }
4131   f = vorbis_alloc(&p);
4132   if (f) {
4133      *f = p;
4134      *data_used = f->stream - data;
4135      *error = 0;
4136      return f;
4137   } else {
4138      vorbis_deinit(&p);
4139      return NULL;
4140   }
4141}
4142#endif // STB_VORBIS_NO_PUSHDATA_API
4143
4144unsigned int stb_vorbis_get_file_offset(stb_vorbis *f)
4145{
4146   #ifndef STB_VORBIS_NO_PUSHDATA_API
4147   if (f->push_mode) return 0;
4148   #endif
4149   if (USE_MEMORY(f)) return f->stream - f->stream_start;
4150
4151#ifdef STB_VORBIS_USE_CALLBACKS
4152	if(USE_CALLBACKS(f))
4153		return f->cb_offset;
4154#endif
4155
4156   #ifndef STB_VORBIS_NO_STDIO
4157   return ftell(f->f) - f->f_start;
4158   #endif
4159}
4160
4161#ifndef STB_VORBIS_NO_PULLDATA_API
4162//
4163// DATA-PULLING API
4164//
4165
4166static uint32 vorbis_find_page(stb_vorbis *f, uint32 *end, uint32 *last)
4167{
4168   for(;;) {
4169      int n;
4170      if (f->eof) return 0;
4171      n = get8(f);
4172      if (n == 0x4f) { // page header
4173         unsigned int retry_loc = stb_vorbis_get_file_offset(f);
4174         int i;
4175         // check if we're off the end of a file_section stream
4176         if (retry_loc - 25 > f->stream_len)
4177            return 0;
4178         // check the rest of the header
4179         for (i=1; i < 4; ++i)
4180            if (get8(f) != ogg_page_header[i])
4181               break;
4182         if (f->eof) return 0;
4183         if (i == 4) {
4184            uint8 header[27];
4185            uint32 i, crc, goal, len;
4186            for (i=0; i < 4; ++i)
4187               header[i] = ogg_page_header[i];
4188            for (; i < 27; ++i)
4189               header[i] = get8(f);
4190            if (f->eof) return 0;
4191            if (header[4] != 0) goto invalid;
4192            goal = header[22] + (header[23] << 8) + (header[24]<<16) + (header[25]<<24);
4193            for (i=22; i < 26; ++i)
4194               header[i] = 0;
4195            crc = 0;
4196            for (i=0; i < 27; ++i)
4197               crc = crc32_update(crc, header[i]);
4198            len = 0;
4199            for (i=0; i < header[26]; ++i) {
4200               int s = get8(f);
4201               crc = crc32_update(crc, s);
4202               len += s;
4203            }
4204            if (len && f->eof) return 0;
4205            for (i=0; i < len; ++i)
4206               crc = crc32_update(crc, get8(f));
4207            // finished parsing probable page
4208            if (crc == goal) {
4209               // we could now check that it's either got the last
4210               // page flag set, OR it's followed by the capture
4211               // pattern, but I guess TECHNICALLY you could have
4212               // a file with garbage between each ogg page and recover
4213               // from it automatically? So even though that paranoia
4214               // might decrease the chance of an invalid decode by
4215               // another 2^32, not worth it since it would hose those
4216               // invalid-but-useful files?
4217               if (end)
4218                  *end = stb_vorbis_get_file_offset(f);
4219               if (last)
4220                  if (header[5] & 0x04)
4221                     *last = 1;
4222                  else
4223                     *last = 0;
4224               set_file_offset(f, retry_loc-1);
4225               return 1;
4226            }
4227         }
4228        invalid:
4229         // not a valid page, so rewind and look for next one
4230         set_file_offset(f, retry_loc);
4231      }
4232   }
4233}
4234
4235// seek is implemented with 'interpolation search'--this is like
4236// binary search, but we use the data values to estimate the likely
4237// location of the data item (plus a bit of a bias so when the
4238// estimation is wrong we don't waste overly much time)
4239
4240#define SAMPLE_unknown  0xffffffff
4241
4242
4243// ogg vorbis, in its insane infinite wisdom, only provides
4244// information about the sample at the END of the page.
4245// therefore we COULD have the data we need in the current
4246// page, and not know it. we could just use the end location
4247// as our only knowledge for bounds, seek back, and eventually
4248// the binary search finds it. or we can try to be smart and
4249// not waste time trying to locate more pages. we try to be
4250// smart, since this data is already in memory anyway, so
4251// doing needless I/O would be crazy!
4252static int vorbis_analyze_page(stb_vorbis *f, ProbedPage *z)
4253{
4254   uint8 header[27], lacing[255];
4255   uint8 packet_type[255];
4256   int num_packet, packet_start, previous =0;
4257   int i,len;
4258   uint32 samples;
4259
4260   // record where the page starts
4261   z->page_start = stb_vorbis_get_file_offset(f);
4262
4263   // parse the header
4264   getn(f, header, 27);
4265   assert(header[0] == 'O' && header[1] == 'g' && header[2] == 'g' && header[3] == 'S');
4266   getn(f, lacing, header[26]);
4267
4268   // determine the length of the payload
4269   len = 0;
4270   for (i=0; i < header[26]; ++i)
4271      len += lacing[i];
4272
4273   // this implies where the page ends
4274   z->page_end = z->page_start + 27 + header[26] + len;
4275
4276   // read the last-decoded sample out of the data
4277   z->last_decoded_sample = header[6] + (header[7] << 8) + (header[8] << 16) + (header[9] << 16);
4278
4279   if (header[5] & 4) {
4280      // if this is the last page, it's not possible to work
4281      // backwards to figure out the first sample! whoops! fuck.
4282      z->first_decoded_sample = SAMPLE_unknown;
4283      set_file_offset(f, z->page_start);
4284      return 1;
4285   }
4286
4287   // scan through the frames to determine the sample-count of each one...
4288   // our goal is the sample # of the first fully-decoded sample on the
4289   // page, which is the first decoded sample of the 2nd page
4290
4291   num_packet=0;
4292
4293   packet_start = ((header[5] & 1) == 0);
4294
4295   for (i=0; i < header[26]; ++i) {
4296      if (packet_start) {
4297         uint8 n,b,m;
4298         if (lacing[i] == 0) goto bail; // trying to read from zero-length packet
4299         n = get8(f);
4300         // if bottom bit is non-zero, we've got corruption
4301         if (n & 1) goto bail;
4302         n >>= 1;
4303         b = ilog(f->mode_count-1);
4304         m = n >> b;
4305         n &= (1 << b)-1;
4306         if (n >= f->mode_count) goto bail;
4307         if (num_packet == 0 && f->mode_config[n].blockflag)
4308            previous = (m & 1);
4309         packet_type[num_packet++] = f->mode_config[n].blockflag;
4310         skip(f, lacing[i]-1);
4311      } else
4312         skip(f, lacing[i]);
4313      packet_start = (lacing[i] < 255);
4314   }
4315
4316   // now that we know the sizes of all the pages, we can start determining
4317   // how much sample data there is.
4318
4319   samples = 0;
4320
4321   // for the last packet, we step by its whole length, because the definition
4322   // is that we encoded the end sample loc of the 'last packet completed',
4323   // where 'completed' refers to packets being split, and we are left to guess
4324   // what 'end sample loc' means. we assume it means ignoring the fact that
4325   // the last half of the data is useless without windowing against the next
4326   // packet... (so it's not REALLY complete in that sense)
4327   if (num_packet > 1)
4328      samples += f->blocksize[packet_type[num_packet-1]];
4329
4330   for (i=num_packet-2; i >= 1; --i) {
4331      // now, for this packet, how many samples do we have that
4332      // do not overlap the following packet?
4333      if (packet_type[i] == 1)
4334         if (packet_type[i+1] == 1)
4335            samples += f->blocksize_1 >> 1;
4336         else
4337            samples += ((f->blocksize_1 - f->blocksize_0) >> 2) + (f->blocksize_0 >> 1);
4338      else
4339         samples += f->blocksize_0 >> 1;
4340   }
4341   // now, at this point, we've rewound to the very beginning of the
4342   // _second_ packet. if we entirely discard the first packet after
4343   // a seek, this will be exactly the right sample number. HOWEVER!
4344   // we can't as easily compute this number for the LAST page. The
4345   // only way to get the sample offset of the LAST page is to use
4346   // the end loc from the previous page. But what that returns us
4347   // is _exactly_ the place where we get our first non-overlapped
4348   // sample. (I think. Stupid spec for being ambiguous.) So for
4349   // consistency it's better to do that here, too. However, that
4350   // will then require us to NOT discard all of the first frame we
4351   // decode, in some cases, which means an even weirder frame size
4352   // and extra code. what a fucking pain.
4353   
4354   // we're going to discard the first packet if we
4355   // start the seek here, so we don't care about it. (we could actually
4356   // do better; if the first packet is long, and the previous packet
4357   // is short, there's actually data in the first half of the first
4358   // packet that doesn't need discarding... but not worth paying the
4359   // effort of tracking that of that here and in the seeking logic)
4360   // except crap, if we infer it from the _previous_ packet's end
4361   // location, we DO need to use that definition... and we HAVE to
4362   // infer the start loc of the LAST packet from the previous packet's
4363   // end location. fuck you, ogg vorbis.
4364
4365   z->first_decoded_sample = z->last_decoded_sample - samples;
4366
4367   // restore file state to where we were
4368   set_file_offset(f, z->page_start);
4369   return 1;
4370
4371   // restore file state to where we were
4372  bail:
4373   set_file_offset(f, z->page_start);
4374   return 0;
4375}
4376
4377static int vorbis_seek_frame_from_page(stb_vorbis *f, uint32 page_start, uint32 first_sample, uint32 target_sample, int fine)
4378{
4379   int left_start, left_end, right_start, right_end, mode,i;
4380   int frame=0;
4381   uint32 frame_start;
4382   int frames_to_skip, data_to_skip;
4383
4384   // first_sample is the sample # of the first sample that doesn't
4385   // overlap the previous page... note that this requires us to
4386   // _partially_ discard the first packet! bleh.
4387   set_file_offset(f, page_start);
4388
4389   f->next_seg = -1;  // force page resync
4390
4391   frame_start = first_sample;
4392   // frame start is where the previous packet's last decoded sample
4393   // was, which corresponds to left_end... EXCEPT if the previous
4394   // packet was long and this packet is short? Probably a bug here.
4395
4396
4397   // now, we can start decoding frames... we'll only FAKE decode them,
4398   // until we find the frame that contains our sample; then we'll rewind,
4399   // and try again
4400   for (;;) {
4401      int start;
4402
4403      if (!vorbis_decode_initial(f, &left_start, &left_end, &right_start, &right_end, &mode))
4404         return error(f, VORBIS_seek_failed);
4405
4406      if (frame == 0)
4407         start = left_end;
4408      else
4409         start = left_start;
4410
4411      // the window starts at left_start; the last valid sample we generate
4412      // before the next frame's window start is right_start-1
4413      if (target_sample < frame_start + right_start-start)
4414         break;
4415
4416      flush_packet(f);
4417      if (f->eof)
4418         return error(f, VORBIS_seek_failed);
4419
4420      frame_start += right_start - start;
4421
4422      ++frame;
4423   }
4424
4425   // ok, at this point, the sample we want is contained in frame #'frame'
4426
4427   // to decode frame #'frame' normally, we have to decode the
4428   // previous frame first... but if it's the FIRST frame of the page
4429   // we can't. if it's the first frame, it means it falls in the part
4430   // of the first frame that doesn't overlap either of the other frames.
4431   // so, if we have to handle that case for the first frame, we might
4432   // as well handle it for all of them, so:
4433   if (target_sample > frame_start + (left_end - left_start)) {
4434      // so what we want to do is go ahead and just immediately decode
4435      // this frame, but then make it so the next get_frame_float() uses
4436      // this already-decoded data? or do we want to go ahead and rewind,
4437      // and leave a flag saying to skip the first N data? let's do that
4438      frames_to_skip = frame;  // if this is frame #1, skip 1 frame (#0)
4439      data_to_skip = left_end - left_start;
4440   } else {
4441      // otherwise, we want to skip frames 0, 1, 2, ... frame-2
4442      // (which means frame-2+1 total frames) then decode frame-1,
4443      // then leave frame pending
4444      frames_to_skip = frame - 1;
4445      assert(frames_to_skip >= 0);
4446      data_to_skip = -1;      
4447   }
4448
4449   set_file_offset(f, page_start);
4450   f->next_seg = - 1; // force page resync
4451
4452   for (i=0; i < frames_to_skip; ++i) {
4453      maybe_start_packet(f);
4454      flush_packet(f);
4455   }
4456
4457   if (data_to_skip >= 0) {
4458      int i,j,n = f->blocksize_0 >> 1;
4459      f->discard_samples_deferred = data_to_skip;
4460      for (i=0; i < f->channels; ++i)
4461         for (j=0; j < n; ++j)
4462            f->previous_window[i][j] = 0;
4463      f->previous_length = n;
4464      frame_start += data_to_skip;
4465   } else {
4466      f->previous_length = 0;
4467      vorbis_pump_first_frame(f);
4468   }
4469
4470   // at this point, the NEXT decoded frame will generate the desired sample
4471   if (fine) {
4472      // so if we're doing sample accurate streaming, we want to go ahead and decode it!
4473      if (target_sample != frame_start) {
4474         int n;
4475         stb_vorbis_get_frame_float(f, &n, NULL);
4476         assert(target_sample > frame_start);
4477         assert(f->channel_buffer_start + (int) (target_sample-frame_start) < f->channel_buffer_end);
4478         f->channel_buffer_start += (target_sample - frame_start);
4479      }
4480   }
4481
4482   return 0;
4483}
4484
4485static int vorbis_seek_base(stb_vorbis *f, unsigned int sample_number, int fine)
4486{
4487   ProbedPage p[2],q;
4488   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4489
4490   // do we know the location of the last page?
4491   if (f->p_last.page_start == 0) {
4492      uint32 z = stb_vorbis_stream_length_in_samples(f);
4493      if (z == 0) return error(f, VORBIS_cant_find_last_page);
4494   }
4495
4496   p[0] = f->p_first;
4497   p[1] = f->p_last;
4498
4499   if (sample_number >= f->p_last.last_decoded_sample)
4500      sample_number = f->p_last.last_decoded_sample-1;
4501
4502   if (sample_number < f->p_first.last_decoded_sample) {
4503      vorbis_seek_frame_from_page(f, p[0].page_start, 0, sample_number, fine);
4504      return 0;
4505   } else {
4506      int attempts=0;
4507      while (p[0].page_end < p[1].page_start) {
4508         uint32 probe;
4509         uint32 start_offset, end_offset;
4510         uint32 start_sample, end_sample;
4511
4512         // copy these into local variables so we can tweak them
4513         // if any are unknown
4514         start_offset = p[0].page_end;
4515         end_offset   = p[1].after_previous_page_start; // an address known to seek to page p[1]
4516         start_sample = p[0].last_decoded_sample;
4517         end_sample   = p[1].last_decoded_sample;
4518
4519         // currently there is no such tweaking logic needed/possible?
4520         if (start_sample == SAMPLE_unknown || end_sample == SAMPLE_unknown)
4521            return error(f, VORBIS_seek_failed);
4522
4523         // now we want to lerp between these for the target samples...
4524      
4525         // step 1: we need to bias towards the page start...
4526         if (start_offset + 4000 < end_offset)
4527            end_offset -= 4000;
4528
4529         // now compute an interpolated search loc
4530         probe = start_offset + (int) floor((float) (end_offset - start_offset) / (end_sample - start_sample) * (sample_number - start_sample));
4531
4532         // next we need to bias towards binary search...
4533         // code is a little wonky to allow for full 32-bit unsigned values
4534         if (attempts >= 4) {
4535            uint32 probe2 = start_offset + ((end_offset - start_offset) >> 1);
4536            if (attempts >= 8)
4537               probe = probe2;
4538            else if (probe < probe2)
4539               probe = probe + ((probe2 - probe) >> 1);
4540            else
4541               probe = probe2 + ((probe - probe2) >> 1);
4542         }
4543         ++attempts;
4544
4545         set_file_offset(f, probe);
4546         if (!vorbis_find_page(f, NULL, NULL))   return error(f, VORBIS_seek_failed);
4547         if (!vorbis_analyze_page(f, &q))        return error(f, VORBIS_seek_failed);
4548         q.after_previous_page_start = probe;
4549
4550         // it's possible we've just found the last page again
4551         if (q.page_start == p[1].page_start) {
4552            p[1] = q;
4553            continue;
4554         }
4555
4556         if (sample_number < q.last_decoded_sample)
4557            p[1] = q;
4558         else
4559            p[0] = q;
4560      }
4561
4562      if (p[0].last_decoded_sample <= sample_number && sample_number < p[1].last_decoded_sample) {
4563         vorbis_seek_frame_from_page(f, p[1].page_start, p[0].last_decoded_sample, sample_number, fine);
4564         return 0;
4565      }
4566      return error(f, VORBIS_seek_failed);
4567   }
4568}
4569
4570int stb_vorbis_seek_frame(stb_vorbis *f, unsigned int sample_number)
4571{
4572   return vorbis_seek_base(f, sample_number, FALSE);
4573}
4574
4575int stb_vorbis_seek(stb_vorbis *f, unsigned int sample_number)
4576{
4577   return vorbis_seek_base(f, sample_number, TRUE);
4578}
4579
4580void stb_vorbis_seek_start(stb_vorbis *f)
4581{
4582   if (IS_PUSH_MODE(f)) { error(f, VORBIS_invalid_api_mixing); return; }
4583   set_file_offset(f, f->first_audio_page_offset);
4584   f->previous_length = 0;
4585   f->first_decode = TRUE;
4586   f->next_seg = -1;
4587   vorbis_pump_first_frame(f);
4588}
4589
4590unsigned int stb_vorbis_stream_length_in_samples(stb_vorbis *f)
4591{
4592   unsigned int restore_offset, previous_safe;
4593   unsigned int end, last_page_loc;
4594
4595   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4596   if (!f->total_samples) {
4597      int last;
4598      uint32 lo,hi;
4599      char header[6];
4600
4601      // first, store the current decode position so we can restore it
4602      restore_offset = stb_vorbis_get_file_offset(f);
4603
4604      // now we want to seek back 64K from the end (the last page must
4605      // be at most a little less than 64K, but let's allow a little slop)
4606      if (f->stream_len >= 65536 && f->stream_len-65536 >= f->first_audio_page_offset)
4607         previous_safe = f->stream_len - 65536;
4608      else
4609         previous_safe = f->first_audio_page_offset;
4610
4611      set_file_offset(f, previous_safe);
4612      // previous_safe is now our candidate 'earliest known place that seeking
4613      // to will lead to the final page'
4614
4615      if (!vorbis_find_page(f, &end, (int unsigned *)&last)) {
4616         // if we can't find a page, we're hosed!
4617         f->error = VORBIS_cant_find_last_page;
4618         f->total_samples = 0xffffffff;
4619         goto done;
4620      }
4621
4622      // check if there are more pages
4623      last_page_loc = stb_vorbis_get_file_offset(f);
4624
4625      // stop when the last_page flag is set, not when we reach eof;
4626      // this allows us to stop short of a 'file_section' end without
4627      // explicitly checking the length of the section
4628      while (!last) {
4629         set_file_offset(f, end);
4630         if (!vorbis_find_page(f, &end, (int unsigned *)&last)) {
4631            // the last page we found didn't have the 'last page' flag
4632            // set. whoops!
4633            break;
4634         }
4635         previous_safe = last_page_loc+1;
4636         last_page_loc = stb_vorbis_get_file_offset(f);
4637      }
4638
4639      set_file_offset(f, last_page_loc);
4640
4641      // parse the header
4642      getn(f, (unsigned char *)header, 6);
4643      // extract the absolute granule position
4644      lo = get32(f);
4645      hi = get32(f);
4646      if (lo == 0xffffffff && hi == 0xffffffff) {
4647         f->error = VORBIS_cant_find_last_page;
4648         f->total_samples = SAMPLE_unknown;
4649         goto done;
4650      }
4651      if (hi)
4652         lo = 0xfffffffe; // saturate
4653      f->total_samples = lo;
4654
4655      f->p_last.page_start = last_page_loc;
4656      f->p_last.page_end   = end;
4657      f->p_last.last_decoded_sample = lo;
4658      f->p_last.first_decoded_sample = SAMPLE_unknown;
4659      f->p_last.after_previous_page_start = previous_safe;
4660
4661     done:
4662      set_file_offset(f, restore_offset);
4663   }
4664   return f->total_samples == SAMPLE_unknown ? 0 : f->total_samples;
4665}
4666
4667float stb_vorbis_stream_length_in_seconds(stb_vorbis *f)
4668{
4669   return stb_vorbis_stream_length_in_samples(f) / (float) f->sample_rate;
4670}
4671
4672
4673
4674int stb_vorbis_get_frame_float(stb_vorbis *f, int *channels, float ***output)
4675{
4676   int len, right,left,i;
4677   if (IS_PUSH_MODE(f)) return error(f, VORBIS_invalid_api_mixing);
4678
4679   if (!vorbis_decode_packet(f, &len, &left, &right)) {
4680      f->channel_buffer_start = f->channel_buffer_end = 0;
4681      return 0;
4682   }
4683
4684   len = vorbis_finish_frame(f, len, left, right);
4685   for (i=0; i < f->channels; ++i)
4686      f->outputs[i] = f->channel_buffers[i] + left;
4687
4688   f->channel_buffer_start = left;
4689   f->channel_buffer_end   = left+len;
4690
4691   if (channels) *channels = f->channels;
4692   if (output)   *output = f->outputs;
4693   return len;
4694}
4695
4696#ifndef STB_VORBIS_NO_STDIO
4697
4698stb_vorbis * stb_vorbis_open_file_section(FILE *file, int close_on_free, int *error, stb_vorbis_alloc *alloc, unsigned int length)
4699{
4700   stb_vorbis *f, p;
4701   vorbis_init(&p, alloc);
4702   p.f = file;
4703   p.f_start = ftell(file);
4704   p.stream_len   = length;
4705   p.close_on_free = close_on_free;
4706   if (start_decoder(&p)) {
4707      f = vorbis_alloc(&p);
4708      if (f) {
4709         *f = p;
4710         vorbis_pump_first_frame(f);
4711         return f;
4712      }
4713   }
4714   if (error) *error = p.error;
4715   vorbis_deinit(&p);
4716   return NULL;
4717}
4718
4719stb_vorbis * stb_vorbis_open_file(FILE *file, int close_on_free, int *error, stb_vorbis_alloc *alloc)
4720{
4721   unsigned int len, start;
4722   start = ftell(file);
4723   fseek(file, 0, SEEK_END);
4724   len = ftell(file) - start;
4725   fseek(file, start, SEEK_SET);
4726   return stb_vorbis_open_file_section(file, close_on_free, error, alloc, len);
4727}
4728
4729stb_vorbis * stb_vorbis_open_filename(char *filename, int *error, stb_vorbis_alloc *alloc)
4730{
4731   FILE *f = fopen(filename, "rb");
4732   if (f) 
4733      return stb_vorbis_open_file(f, TRUE, error, alloc);
4734   if (error) *error = VORBIS_file_open_failure;
4735   return NULL;
4736}
4737#endif // STB_VORBIS_NO_STDIO
4738
4739stb_vorbis * stb_vorbis_open_memory(unsigned char *data, int len, int *error, stb_vorbis_alloc *alloc)
4740{
4741   stb_vorbis *f, p;
4742   if (data == NULL) return NULL;
4743   vorbis_init(&p, alloc);
4744   p.stream = data;
4745   p.stream_end = data + len;
4746   p.stream_start = p.stream;
4747   p.stream_len = len;
4748   p.push_mode = FALSE;
4749   if (start_decoder(&p)) {
4750      f = vorbis_alloc(&p);
4751      if (f) {
4752         *f = p;
4753         vorbis_pump_first_frame(f);
4754         return f;
4755      }
4756   }
4757   if (error) *error = p.error;
4758   vorbis_deinit(&p);
4759   return NULL;
4760}
4761
4762#ifndef STB_VORBIS_NO_INTEGER_CONVERSION
4763#define PLAYBACK_MONO     1
4764#define PLAYBACK_LEFT     2
4765#define PLAYBACK_RIGHT    4
4766
4767#define L  (PLAYBACK_LEFT  | PLAYBACK_MONO)
4768#define C  (PLAYBACK_LEFT  | PLAYBACK_RIGHT | PLAYBACK_MONO)
4769#define R  (PLAYBACK_RIGHT | PLAYBACK_MONO)
4770
4771static int8 channel_position[7][6] =
4772{
4773   { 0 },
4774   { C },
4775   { L, R },
4776   { L, C, R },
4777   { L, R, L, R },
4778   { L, C, R, L, R },
4779   { L, C, R, L, R, C },
4780};
4781
4782
4783#ifndef STB_VORBIS_NO_FAST_SCALED_FLOAT
4784   typedef union {
4785      float f;
4786      int i;
4787   } float_conv;
4788   typedef char stb_vorbis_float_size_test[sizeof(float)==4 && sizeof(int) == 4];
4789   #define FASTDEF(x) float_conv x
4790   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
4791   #define MAGIC(SHIFT) (1.5f * (1 << (23-SHIFT)) + 0.5f/(1 << SHIFT))
4792   #define ADDEND(SHIFT) (((150-SHIFT) << 23) + (1 << 22))
4793   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) (temp.f = (x) + MAGIC(s), temp.i - ADDEND(s))
4794   #define check_endianness()  
4795#else
4796   #define FAST_SCALED_FLOAT_TO_INT(temp,x,s) ((int) ((x) * (1 << (s))))
4797   #define check_endianness()
4798   #define FASTDEF(x)