/libavcodec/sonic.c

http://github.com/FFmpeg/FFmpeg · C · 1128 lines · 851 code · 211 blank · 66 comment · 173 complexity · 1e06fd051f506eedeff2a80aa12ef058 MD5 · raw file

  1. /*
  2. * Simple free lossless/lossy audio codec
  3. * Copyright (c) 2004 Alex Beregszaszi
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "get_bits.h"
  23. #include "golomb.h"
  24. #include "internal.h"
  25. #include "rangecoder.h"
  26. /**
  27. * @file
  28. * Simple free lossless/lossy audio codec
  29. * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
  30. * Written and designed by Alex Beregszaszi
  31. *
  32. * TODO:
  33. * - CABAC put/get_symbol
  34. * - independent quantizer for channels
  35. * - >2 channels support
  36. * - more decorrelation types
  37. * - more tap_quant tests
  38. * - selectable intlist writers/readers (bonk-style, golomb, cabac)
  39. */
  40. #define MAX_CHANNELS 2
  41. #define MID_SIDE 0
  42. #define LEFT_SIDE 1
  43. #define RIGHT_SIDE 2
  44. typedef struct SonicContext {
  45. int version;
  46. int minor_version;
  47. int lossless, decorrelation;
  48. int num_taps, downsampling;
  49. double quantization;
  50. int channels, samplerate, block_align, frame_size;
  51. int *tap_quant;
  52. int *int_samples;
  53. int *coded_samples[MAX_CHANNELS];
  54. // for encoding
  55. int *tail;
  56. int tail_size;
  57. int *window;
  58. int window_size;
  59. // for decoding
  60. int *predictor_k;
  61. int *predictor_state[MAX_CHANNELS];
  62. } SonicContext;
  63. #define LATTICE_SHIFT 10
  64. #define SAMPLE_SHIFT 4
  65. #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
  66. #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
  67. #define BASE_QUANT 0.6
  68. #define RATE_VARIATION 3.0
  69. static inline int shift(int a,int b)
  70. {
  71. return (a+(1<<(b-1))) >> b;
  72. }
  73. static inline int shift_down(int a,int b)
  74. {
  75. return (a>>b)+(a<0);
  76. }
  77. static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
  78. int i;
  79. #define put_rac(C,S,B) \
  80. do{\
  81. if(rc_stat){\
  82. rc_stat[*(S)][B]++;\
  83. rc_stat2[(S)-state][B]++;\
  84. }\
  85. put_rac(C,S,B);\
  86. }while(0)
  87. if(v){
  88. const int a= FFABS(v);
  89. const int e= av_log2(a);
  90. put_rac(c, state+0, 0);
  91. if(e<=9){
  92. for(i=0; i<e; i++){
  93. put_rac(c, state+1+i, 1); //1..10
  94. }
  95. put_rac(c, state+1+i, 0);
  96. for(i=e-1; i>=0; i--){
  97. put_rac(c, state+22+i, (a>>i)&1); //22..31
  98. }
  99. if(is_signed)
  100. put_rac(c, state+11 + e, v < 0); //11..21
  101. }else{
  102. for(i=0; i<e; i++){
  103. put_rac(c, state+1+FFMIN(i,9), 1); //1..10
  104. }
  105. put_rac(c, state+1+9, 0);
  106. for(i=e-1; i>=0; i--){
  107. put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
  108. }
  109. if(is_signed)
  110. put_rac(c, state+11 + 10, v < 0); //11..21
  111. }
  112. }else{
  113. put_rac(c, state+0, 1);
  114. }
  115. #undef put_rac
  116. }
  117. static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
  118. if(get_rac(c, state+0))
  119. return 0;
  120. else{
  121. int i, e, a;
  122. e= 0;
  123. while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
  124. e++;
  125. if (e > 31)
  126. return AVERROR_INVALIDDATA;
  127. }
  128. a= 1;
  129. for(i=e-1; i>=0; i--){
  130. a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
  131. }
  132. e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
  133. return (a^e)-e;
  134. }
  135. }
  136. #if 1
  137. static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  138. {
  139. int i;
  140. for (i = 0; i < entries; i++)
  141. put_symbol(c, state, buf[i], 1, NULL, NULL);
  142. return 1;
  143. }
  144. static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
  145. {
  146. int i;
  147. for (i = 0; i < entries; i++)
  148. buf[i] = get_symbol(c, state, 1);
  149. return 1;
  150. }
  151. #elif 1
  152. static inline int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  153. {
  154. int i;
  155. for (i = 0; i < entries; i++)
  156. set_se_golomb(pb, buf[i]);
  157. return 1;
  158. }
  159. static inline int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  160. {
  161. int i;
  162. for (i = 0; i < entries; i++)
  163. buf[i] = get_se_golomb(gb);
  164. return 1;
  165. }
  166. #else
  167. #define ADAPT_LEVEL 8
  168. static int bits_to_store(uint64_t x)
  169. {
  170. int res = 0;
  171. while(x)
  172. {
  173. res++;
  174. x >>= 1;
  175. }
  176. return res;
  177. }
  178. static void write_uint_max(PutBitContext *pb, unsigned int value, unsigned int max)
  179. {
  180. int i, bits;
  181. if (!max)
  182. return;
  183. bits = bits_to_store(max);
  184. for (i = 0; i < bits-1; i++)
  185. put_bits(pb, 1, value & (1 << i));
  186. if ( (value | (1 << (bits-1))) <= max)
  187. put_bits(pb, 1, value & (1 << (bits-1)));
  188. }
  189. static unsigned int read_uint_max(GetBitContext *gb, int max)
  190. {
  191. int i, bits, value = 0;
  192. if (!max)
  193. return 0;
  194. bits = bits_to_store(max);
  195. for (i = 0; i < bits-1; i++)
  196. if (get_bits1(gb))
  197. value += 1 << i;
  198. if ( (value | (1<<(bits-1))) <= max)
  199. if (get_bits1(gb))
  200. value += 1 << (bits-1);
  201. return value;
  202. }
  203. static int intlist_write(PutBitContext *pb, int *buf, int entries, int base_2_part)
  204. {
  205. int i, j, x = 0, low_bits = 0, max = 0;
  206. int step = 256, pos = 0, dominant = 0, any = 0;
  207. int *copy, *bits;
  208. copy = av_calloc(entries, sizeof(*copy));
  209. if (!copy)
  210. return AVERROR(ENOMEM);
  211. if (base_2_part)
  212. {
  213. int energy = 0;
  214. for (i = 0; i < entries; i++)
  215. energy += abs(buf[i]);
  216. low_bits = bits_to_store(energy / (entries * 2));
  217. if (low_bits > 15)
  218. low_bits = 15;
  219. put_bits(pb, 4, low_bits);
  220. }
  221. for (i = 0; i < entries; i++)
  222. {
  223. put_bits(pb, low_bits, abs(buf[i]));
  224. copy[i] = abs(buf[i]) >> low_bits;
  225. if (copy[i] > max)
  226. max = abs(copy[i]);
  227. }
  228. bits = av_calloc(entries*max, sizeof(*bits));
  229. if (!bits)
  230. {
  231. av_free(copy);
  232. return AVERROR(ENOMEM);
  233. }
  234. for (i = 0; i <= max; i++)
  235. {
  236. for (j = 0; j < entries; j++)
  237. if (copy[j] >= i)
  238. bits[x++] = copy[j] > i;
  239. }
  240. // store bitstream
  241. while (pos < x)
  242. {
  243. int steplet = step >> 8;
  244. if (pos + steplet > x)
  245. steplet = x - pos;
  246. for (i = 0; i < steplet; i++)
  247. if (bits[i+pos] != dominant)
  248. any = 1;
  249. put_bits(pb, 1, any);
  250. if (!any)
  251. {
  252. pos += steplet;
  253. step += step / ADAPT_LEVEL;
  254. }
  255. else
  256. {
  257. int interloper = 0;
  258. while (((pos + interloper) < x) && (bits[pos + interloper] == dominant))
  259. interloper++;
  260. // note change
  261. write_uint_max(pb, interloper, (step >> 8) - 1);
  262. pos += interloper + 1;
  263. step -= step / ADAPT_LEVEL;
  264. }
  265. if (step < 256)
  266. {
  267. step = 65536 / step;
  268. dominant = !dominant;
  269. }
  270. }
  271. // store signs
  272. for (i = 0; i < entries; i++)
  273. if (buf[i])
  274. put_bits(pb, 1, buf[i] < 0);
  275. av_free(bits);
  276. av_free(copy);
  277. return 0;
  278. }
  279. static int intlist_read(GetBitContext *gb, int *buf, int entries, int base_2_part)
  280. {
  281. int i, low_bits = 0, x = 0;
  282. int n_zeros = 0, step = 256, dominant = 0;
  283. int pos = 0, level = 0;
  284. int *bits = av_calloc(entries, sizeof(*bits));
  285. if (!bits)
  286. return AVERROR(ENOMEM);
  287. if (base_2_part)
  288. {
  289. low_bits = get_bits(gb, 4);
  290. if (low_bits)
  291. for (i = 0; i < entries; i++)
  292. buf[i] = get_bits(gb, low_bits);
  293. }
  294. // av_log(NULL, AV_LOG_INFO, "entries: %d, low bits: %d\n", entries, low_bits);
  295. while (n_zeros < entries)
  296. {
  297. int steplet = step >> 8;
  298. if (!get_bits1(gb))
  299. {
  300. for (i = 0; i < steplet; i++)
  301. bits[x++] = dominant;
  302. if (!dominant)
  303. n_zeros += steplet;
  304. step += step / ADAPT_LEVEL;
  305. }
  306. else
  307. {
  308. int actual_run = read_uint_max(gb, steplet-1);
  309. // av_log(NULL, AV_LOG_INFO, "actual run: %d\n", actual_run);
  310. for (i = 0; i < actual_run; i++)
  311. bits[x++] = dominant;
  312. bits[x++] = !dominant;
  313. if (!dominant)
  314. n_zeros += actual_run;
  315. else
  316. n_zeros++;
  317. step -= step / ADAPT_LEVEL;
  318. }
  319. if (step < 256)
  320. {
  321. step = 65536 / step;
  322. dominant = !dominant;
  323. }
  324. }
  325. // reconstruct unsigned values
  326. n_zeros = 0;
  327. for (i = 0; n_zeros < entries; i++)
  328. {
  329. while(1)
  330. {
  331. if (pos >= entries)
  332. {
  333. pos = 0;
  334. level += 1 << low_bits;
  335. }
  336. if (buf[pos] >= level)
  337. break;
  338. pos++;
  339. }
  340. if (bits[i])
  341. buf[pos] += 1 << low_bits;
  342. else
  343. n_zeros++;
  344. pos++;
  345. }
  346. av_free(bits);
  347. // read signs
  348. for (i = 0; i < entries; i++)
  349. if (buf[i] && get_bits1(gb))
  350. buf[i] = -buf[i];
  351. // av_log(NULL, AV_LOG_INFO, "zeros: %d pos: %d\n", n_zeros, pos);
  352. return 0;
  353. }
  354. #endif
  355. static void predictor_init_state(int *k, int *state, int order)
  356. {
  357. int i;
  358. for (i = order-2; i >= 0; i--)
  359. {
  360. int j, p, x = state[i];
  361. for (j = 0, p = i+1; p < order; j++,p++)
  362. {
  363. int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT);
  364. state[p] += shift_down(k[j]*x, LATTICE_SHIFT);
  365. x = tmp;
  366. }
  367. }
  368. }
  369. static int predictor_calc_error(int *k, int *state, int order, int error)
  370. {
  371. int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT);
  372. #if 1
  373. int *k_ptr = &(k[order-2]),
  374. *state_ptr = &(state[order-2]);
  375. for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
  376. {
  377. int k_value = *k_ptr, state_value = *state_ptr;
  378. x -= shift_down(k_value * state_value, LATTICE_SHIFT);
  379. state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
  380. }
  381. #else
  382. for (i = order-2; i >= 0; i--)
  383. {
  384. x -= shift_down(k[i] * state[i], LATTICE_SHIFT);
  385. state[i+1] = state[i] + shift_down(k[i] * x, LATTICE_SHIFT);
  386. }
  387. #endif
  388. // don't drift too far, to avoid overflows
  389. if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
  390. if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
  391. state[0] = x;
  392. return x;
  393. }
  394. #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
  395. // Heavily modified Levinson-Durbin algorithm which
  396. // copes better with quantization, and calculates the
  397. // actual whitened result as it goes.
  398. static int modified_levinson_durbin(int *window, int window_entries,
  399. int *out, int out_entries, int channels, int *tap_quant)
  400. {
  401. int i;
  402. int *state = av_calloc(window_entries, sizeof(*state));
  403. if (!state)
  404. return AVERROR(ENOMEM);
  405. memcpy(state, window, 4* window_entries);
  406. for (i = 0; i < out_entries; i++)
  407. {
  408. int step = (i+1)*channels, k, j;
  409. double xx = 0.0, xy = 0.0;
  410. #if 1
  411. int *x_ptr = &(window[step]);
  412. int *state_ptr = &(state[0]);
  413. j = window_entries - step;
  414. for (;j>0;j--,x_ptr++,state_ptr++)
  415. {
  416. double x_value = *x_ptr;
  417. double state_value = *state_ptr;
  418. xx += state_value*state_value;
  419. xy += x_value*state_value;
  420. }
  421. #else
  422. for (j = 0; j <= (window_entries - step); j++);
  423. {
  424. double stepval = window[step+j];
  425. double stateval = window[j];
  426. // xx += (double)window[j]*(double)window[j];
  427. // xy += (double)window[step+j]*(double)window[j];
  428. xx += stateval*stateval;
  429. xy += stepval*stateval;
  430. }
  431. #endif
  432. if (xx == 0.0)
  433. k = 0;
  434. else
  435. k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
  436. if (k > (LATTICE_FACTOR/tap_quant[i]))
  437. k = LATTICE_FACTOR/tap_quant[i];
  438. if (-k > (LATTICE_FACTOR/tap_quant[i]))
  439. k = -(LATTICE_FACTOR/tap_quant[i]);
  440. out[i] = k;
  441. k *= tap_quant[i];
  442. #if 1
  443. x_ptr = &(window[step]);
  444. state_ptr = &(state[0]);
  445. j = window_entries - step;
  446. for (;j>0;j--,x_ptr++,state_ptr++)
  447. {
  448. int x_value = *x_ptr;
  449. int state_value = *state_ptr;
  450. *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
  451. *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
  452. }
  453. #else
  454. for (j=0; j <= (window_entries - step); j++)
  455. {
  456. int stepval = window[step+j];
  457. int stateval=state[j];
  458. window[step+j] += shift_down(k * stateval, LATTICE_SHIFT);
  459. state[j] += shift_down(k * stepval, LATTICE_SHIFT);
  460. }
  461. #endif
  462. }
  463. av_free(state);
  464. return 0;
  465. }
  466. static inline int code_samplerate(int samplerate)
  467. {
  468. switch (samplerate)
  469. {
  470. case 44100: return 0;
  471. case 22050: return 1;
  472. case 11025: return 2;
  473. case 96000: return 3;
  474. case 48000: return 4;
  475. case 32000: return 5;
  476. case 24000: return 6;
  477. case 16000: return 7;
  478. case 8000: return 8;
  479. }
  480. return AVERROR(EINVAL);
  481. }
  482. static av_cold int sonic_encode_init(AVCodecContext *avctx)
  483. {
  484. SonicContext *s = avctx->priv_data;
  485. PutBitContext pb;
  486. int i;
  487. s->version = 2;
  488. if (avctx->channels > MAX_CHANNELS)
  489. {
  490. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  491. return AVERROR(EINVAL); /* only stereo or mono for now */
  492. }
  493. if (avctx->channels == 2)
  494. s->decorrelation = MID_SIDE;
  495. else
  496. s->decorrelation = 3;
  497. if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
  498. {
  499. s->lossless = 1;
  500. s->num_taps = 32;
  501. s->downsampling = 1;
  502. s->quantization = 0.0;
  503. }
  504. else
  505. {
  506. s->num_taps = 128;
  507. s->downsampling = 2;
  508. s->quantization = 1.0;
  509. }
  510. // max tap 2048
  511. if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
  512. av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
  513. return AVERROR_INVALIDDATA;
  514. }
  515. // generate taps
  516. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  517. if (!s->tap_quant)
  518. return AVERROR(ENOMEM);
  519. for (i = 0; i < s->num_taps; i++)
  520. s->tap_quant[i] = ff_sqrt(i+1);
  521. s->channels = avctx->channels;
  522. s->samplerate = avctx->sample_rate;
  523. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  524. s->frame_size = s->channels*s->block_align*s->downsampling;
  525. s->tail_size = s->num_taps*s->channels;
  526. s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
  527. if (!s->tail)
  528. return AVERROR(ENOMEM);
  529. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
  530. if (!s->predictor_k)
  531. return AVERROR(ENOMEM);
  532. for (i = 0; i < s->channels; i++)
  533. {
  534. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  535. if (!s->coded_samples[i])
  536. return AVERROR(ENOMEM);
  537. }
  538. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  539. s->window_size = ((2*s->tail_size)+s->frame_size);
  540. s->window = av_calloc(s->window_size, sizeof(*s->window));
  541. if (!s->window || !s->int_samples)
  542. return AVERROR(ENOMEM);
  543. avctx->extradata = av_mallocz(16);
  544. if (!avctx->extradata)
  545. return AVERROR(ENOMEM);
  546. init_put_bits(&pb, avctx->extradata, 16*8);
  547. put_bits(&pb, 2, s->version); // version
  548. if (s->version >= 1)
  549. {
  550. if (s->version >= 2) {
  551. put_bits(&pb, 8, s->version);
  552. put_bits(&pb, 8, s->minor_version);
  553. }
  554. put_bits(&pb, 2, s->channels);
  555. put_bits(&pb, 4, code_samplerate(s->samplerate));
  556. }
  557. put_bits(&pb, 1, s->lossless);
  558. if (!s->lossless)
  559. put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
  560. put_bits(&pb, 2, s->decorrelation);
  561. put_bits(&pb, 2, s->downsampling);
  562. put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
  563. put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
  564. flush_put_bits(&pb);
  565. avctx->extradata_size = put_bits_count(&pb)/8;
  566. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  567. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  568. avctx->frame_size = s->block_align*s->downsampling;
  569. return 0;
  570. }
  571. static av_cold int sonic_encode_close(AVCodecContext *avctx)
  572. {
  573. SonicContext *s = avctx->priv_data;
  574. int i;
  575. for (i = 0; i < s->channels; i++)
  576. av_freep(&s->coded_samples[i]);
  577. av_freep(&s->predictor_k);
  578. av_freep(&s->tail);
  579. av_freep(&s->tap_quant);
  580. av_freep(&s->window);
  581. av_freep(&s->int_samples);
  582. return 0;
  583. }
  584. static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  585. const AVFrame *frame, int *got_packet_ptr)
  586. {
  587. SonicContext *s = avctx->priv_data;
  588. RangeCoder c;
  589. int i, j, ch, quant = 0, x = 0;
  590. int ret;
  591. const short *samples = (const int16_t*)frame->data[0];
  592. uint8_t state[32];
  593. if ((ret = ff_alloc_packet2(avctx, avpkt, s->frame_size * 5 + 1000, 0)) < 0)
  594. return ret;
  595. ff_init_range_encoder(&c, avpkt->data, avpkt->size);
  596. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  597. memset(state, 128, sizeof(state));
  598. // short -> internal
  599. for (i = 0; i < s->frame_size; i++)
  600. s->int_samples[i] = samples[i];
  601. if (!s->lossless)
  602. for (i = 0; i < s->frame_size; i++)
  603. s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
  604. switch(s->decorrelation)
  605. {
  606. case MID_SIDE:
  607. for (i = 0; i < s->frame_size; i += s->channels)
  608. {
  609. s->int_samples[i] += s->int_samples[i+1];
  610. s->int_samples[i+1] -= shift(s->int_samples[i], 1);
  611. }
  612. break;
  613. case LEFT_SIDE:
  614. for (i = 0; i < s->frame_size; i += s->channels)
  615. s->int_samples[i+1] -= s->int_samples[i];
  616. break;
  617. case RIGHT_SIDE:
  618. for (i = 0; i < s->frame_size; i += s->channels)
  619. s->int_samples[i] -= s->int_samples[i+1];
  620. break;
  621. }
  622. memset(s->window, 0, 4* s->window_size);
  623. for (i = 0; i < s->tail_size; i++)
  624. s->window[x++] = s->tail[i];
  625. for (i = 0; i < s->frame_size; i++)
  626. s->window[x++] = s->int_samples[i];
  627. for (i = 0; i < s->tail_size; i++)
  628. s->window[x++] = 0;
  629. for (i = 0; i < s->tail_size; i++)
  630. s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
  631. // generate taps
  632. ret = modified_levinson_durbin(s->window, s->window_size,
  633. s->predictor_k, s->num_taps, s->channels, s->tap_quant);
  634. if (ret < 0)
  635. return ret;
  636. if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
  637. return ret;
  638. for (ch = 0; ch < s->channels; ch++)
  639. {
  640. x = s->tail_size+ch;
  641. for (i = 0; i < s->block_align; i++)
  642. {
  643. int sum = 0;
  644. for (j = 0; j < s->downsampling; j++, x += s->channels)
  645. sum += s->window[x];
  646. s->coded_samples[ch][i] = sum;
  647. }
  648. }
  649. // simple rate control code
  650. if (!s->lossless)
  651. {
  652. double energy1 = 0.0, energy2 = 0.0;
  653. for (ch = 0; ch < s->channels; ch++)
  654. {
  655. for (i = 0; i < s->block_align; i++)
  656. {
  657. double sample = s->coded_samples[ch][i];
  658. energy2 += sample*sample;
  659. energy1 += fabs(sample);
  660. }
  661. }
  662. energy2 = sqrt(energy2/(s->channels*s->block_align));
  663. energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
  664. // increase bitrate when samples are like a gaussian distribution
  665. // reduce bitrate when samples are like a two-tailed exponential distribution
  666. if (energy2 > energy1)
  667. energy2 += (energy2-energy1)*RATE_VARIATION;
  668. quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
  669. // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
  670. quant = av_clip(quant, 1, 65534);
  671. put_symbol(&c, state, quant, 0, NULL, NULL);
  672. quant *= SAMPLE_FACTOR;
  673. }
  674. // write out coded samples
  675. for (ch = 0; ch < s->channels; ch++)
  676. {
  677. if (!s->lossless)
  678. for (i = 0; i < s->block_align; i++)
  679. s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
  680. if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
  681. return ret;
  682. }
  683. // av_log(avctx, AV_LOG_DEBUG, "used bytes: %d\n", (put_bits_count(&pb)+7)/8);
  684. avpkt->size = ff_rac_terminate(&c, 0);
  685. *got_packet_ptr = 1;
  686. return 0;
  687. }
  688. #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
  689. #if CONFIG_SONIC_DECODER
  690. static const int samplerate_table[] =
  691. { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
  692. static av_cold int sonic_decode_init(AVCodecContext *avctx)
  693. {
  694. SonicContext *s = avctx->priv_data;
  695. GetBitContext gb;
  696. int i;
  697. int ret;
  698. s->channels = avctx->channels;
  699. s->samplerate = avctx->sample_rate;
  700. if (!avctx->extradata)
  701. {
  702. av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
  703. return AVERROR_INVALIDDATA;
  704. }
  705. ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
  706. if (ret < 0)
  707. return ret;
  708. s->version = get_bits(&gb, 2);
  709. if (s->version >= 2) {
  710. s->version = get_bits(&gb, 8);
  711. s->minor_version = get_bits(&gb, 8);
  712. }
  713. if (s->version != 2)
  714. {
  715. av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
  716. return AVERROR_INVALIDDATA;
  717. }
  718. if (s->version >= 1)
  719. {
  720. int sample_rate_index;
  721. s->channels = get_bits(&gb, 2);
  722. sample_rate_index = get_bits(&gb, 4);
  723. if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
  724. av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
  725. return AVERROR_INVALIDDATA;
  726. }
  727. s->samplerate = samplerate_table[sample_rate_index];
  728. av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
  729. s->channels, s->samplerate);
  730. }
  731. if (s->channels > MAX_CHANNELS || s->channels < 1)
  732. {
  733. av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
  734. return AVERROR_INVALIDDATA;
  735. }
  736. avctx->channels = s->channels;
  737. s->lossless = get_bits1(&gb);
  738. if (!s->lossless)
  739. skip_bits(&gb, 3); // XXX FIXME
  740. s->decorrelation = get_bits(&gb, 2);
  741. if (s->decorrelation != 3 && s->channels != 2) {
  742. av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
  743. return AVERROR_INVALIDDATA;
  744. }
  745. s->downsampling = get_bits(&gb, 2);
  746. if (!s->downsampling) {
  747. av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
  748. return AVERROR_INVALIDDATA;
  749. }
  750. s->num_taps = (get_bits(&gb, 5)+1)<<5;
  751. if (get_bits1(&gb)) // XXX FIXME
  752. av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
  753. s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
  754. s->frame_size = s->channels*s->block_align*s->downsampling;
  755. // avctx->frame_size = s->block_align;
  756. if (s->num_taps * s->channels > s->frame_size) {
  757. av_log(avctx, AV_LOG_ERROR,
  758. "number of taps times channels (%d * %d) larger than frame size %d\n",
  759. s->num_taps, s->channels, s->frame_size);
  760. return AVERROR_INVALIDDATA;
  761. }
  762. av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
  763. s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
  764. // generate taps
  765. s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
  766. if (!s->tap_quant)
  767. return AVERROR(ENOMEM);
  768. for (i = 0; i < s->num_taps; i++)
  769. s->tap_quant[i] = ff_sqrt(i+1);
  770. s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
  771. for (i = 0; i < s->channels; i++)
  772. {
  773. s->predictor_state[i] = av_calloc(s->num_taps, sizeof(**s->predictor_state));
  774. if (!s->predictor_state[i])
  775. return AVERROR(ENOMEM);
  776. }
  777. for (i = 0; i < s->channels; i++)
  778. {
  779. s->coded_samples[i] = av_calloc(s->block_align, sizeof(**s->coded_samples));
  780. if (!s->coded_samples[i])
  781. return AVERROR(ENOMEM);
  782. }
  783. s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
  784. if (!s->int_samples)
  785. return AVERROR(ENOMEM);
  786. avctx->sample_fmt = AV_SAMPLE_FMT_S16;
  787. return 0;
  788. }
  789. static av_cold int sonic_decode_close(AVCodecContext *avctx)
  790. {
  791. SonicContext *s = avctx->priv_data;
  792. int i;
  793. av_freep(&s->int_samples);
  794. av_freep(&s->tap_quant);
  795. av_freep(&s->predictor_k);
  796. for (i = 0; i < s->channels; i++)
  797. {
  798. av_freep(&s->predictor_state[i]);
  799. av_freep(&s->coded_samples[i]);
  800. }
  801. return 0;
  802. }
  803. static int sonic_decode_frame(AVCodecContext *avctx,
  804. void *data, int *got_frame_ptr,
  805. AVPacket *avpkt)
  806. {
  807. const uint8_t *buf = avpkt->data;
  808. int buf_size = avpkt->size;
  809. SonicContext *s = avctx->priv_data;
  810. RangeCoder c;
  811. uint8_t state[32];
  812. int i, quant, ch, j, ret;
  813. int16_t *samples;
  814. AVFrame *frame = data;
  815. if (buf_size == 0) return 0;
  816. frame->nb_samples = s->frame_size / avctx->channels;
  817. if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
  818. return ret;
  819. samples = (int16_t *)frame->data[0];
  820. // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
  821. memset(state, 128, sizeof(state));
  822. ff_init_range_decoder(&c, buf, buf_size);
  823. ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
  824. intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
  825. // dequantize
  826. for (i = 0; i < s->num_taps; i++)
  827. s->predictor_k[i] *= s->tap_quant[i];
  828. if (s->lossless)
  829. quant = 1;
  830. else
  831. quant = get_symbol(&c, state, 0) * SAMPLE_FACTOR;
  832. // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
  833. for (ch = 0; ch < s->channels; ch++)
  834. {
  835. int x = ch;
  836. predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
  837. intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
  838. for (i = 0; i < s->block_align; i++)
  839. {
  840. for (j = 0; j < s->downsampling - 1; j++)
  841. {
  842. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
  843. x += s->channels;
  844. }
  845. s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * quant);
  846. x += s->channels;
  847. }
  848. for (i = 0; i < s->num_taps; i++)
  849. s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
  850. }
  851. switch(s->decorrelation)
  852. {
  853. case MID_SIDE:
  854. for (i = 0; i < s->frame_size; i += s->channels)
  855. {
  856. s->int_samples[i+1] += shift(s->int_samples[i], 1);
  857. s->int_samples[i] -= s->int_samples[i+1];
  858. }
  859. break;
  860. case LEFT_SIDE:
  861. for (i = 0; i < s->frame_size; i += s->channels)
  862. s->int_samples[i+1] += s->int_samples[i];
  863. break;
  864. case RIGHT_SIDE:
  865. for (i = 0; i < s->frame_size; i += s->channels)
  866. s->int_samples[i] += s->int_samples[i+1];
  867. break;
  868. }
  869. if (!s->lossless)
  870. for (i = 0; i < s->frame_size; i++)
  871. s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
  872. // internal -> short
  873. for (i = 0; i < s->frame_size; i++)
  874. samples[i] = av_clip_int16(s->int_samples[i]);
  875. *got_frame_ptr = 1;
  876. return buf_size;
  877. }
  878. AVCodec ff_sonic_decoder = {
  879. .name = "sonic",
  880. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  881. .type = AVMEDIA_TYPE_AUDIO,
  882. .id = AV_CODEC_ID_SONIC,
  883. .priv_data_size = sizeof(SonicContext),
  884. .init = sonic_decode_init,
  885. .close = sonic_decode_close,
  886. .decode = sonic_decode_frame,
  887. .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL,
  888. };
  889. #endif /* CONFIG_SONIC_DECODER */
  890. #if CONFIG_SONIC_ENCODER
  891. AVCodec ff_sonic_encoder = {
  892. .name = "sonic",
  893. .long_name = NULL_IF_CONFIG_SMALL("Sonic"),
  894. .type = AVMEDIA_TYPE_AUDIO,
  895. .id = AV_CODEC_ID_SONIC,
  896. .priv_data_size = sizeof(SonicContext),
  897. .init = sonic_encode_init,
  898. .encode2 = sonic_encode_frame,
  899. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  900. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  901. .close = sonic_encode_close,
  902. };
  903. #endif
  904. #if CONFIG_SONIC_LS_ENCODER
  905. AVCodec ff_sonic_ls_encoder = {
  906. .name = "sonicls",
  907. .long_name = NULL_IF_CONFIG_SMALL("Sonic lossless"),
  908. .type = AVMEDIA_TYPE_AUDIO,
  909. .id = AV_CODEC_ID_SONIC_LS,
  910. .priv_data_size = sizeof(SonicContext),
  911. .init = sonic_encode_init,
  912. .encode2 = sonic_encode_frame,
  913. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE },
  914. .capabilities = AV_CODEC_CAP_EXPERIMENTAL,
  915. .close = sonic_encode_close,
  916. };
  917. #endif