/media/libjpeg/jcdctmgr.c

http://github.com/zpao/v8monkey · C · 642 lines · 413 code · 76 blank · 153 comment · 58 complexity · b6da7b64fea85e4819bff0947a3178ed MD5 · raw file

  1. /*
  2. * jcdctmgr.c
  3. *
  4. * Copyright (C) 1994-1996, Thomas G. Lane.
  5. * Copyright (C) 1999-2006, MIYASAKA Masaru.
  6. * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  7. * Copyright (C) 2011 D. R. Commander
  8. * This file is part of the Independent JPEG Group's software.
  9. * For conditions of distribution and use, see the accompanying README file.
  10. *
  11. * This file contains the forward-DCT management logic.
  12. * This code selects a particular DCT implementation to be used,
  13. * and it performs related housekeeping chores including coefficient
  14. * quantization.
  15. */
  16. #define JPEG_INTERNALS
  17. #include "jinclude.h"
  18. #include "jpeglib.h"
  19. #include "jdct.h" /* Private declarations for DCT subsystem */
  20. #include "jsimddct.h"
  21. /* Private subobject for this module */
  22. typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
  23. typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
  24. typedef JMETHOD(void, convsamp_method_ptr,
  25. (JSAMPARRAY sample_data, JDIMENSION start_col,
  26. DCTELEM * workspace));
  27. typedef JMETHOD(void, float_convsamp_method_ptr,
  28. (JSAMPARRAY sample_data, JDIMENSION start_col,
  29. FAST_FLOAT *workspace));
  30. typedef JMETHOD(void, quantize_method_ptr,
  31. (JCOEFPTR coef_block, DCTELEM * divisors,
  32. DCTELEM * workspace));
  33. typedef JMETHOD(void, float_quantize_method_ptr,
  34. (JCOEFPTR coef_block, FAST_FLOAT * divisors,
  35. FAST_FLOAT * workspace));
  36. METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
  37. typedef struct {
  38. struct jpeg_forward_dct pub; /* public fields */
  39. /* Pointer to the DCT routine actually in use */
  40. forward_DCT_method_ptr dct;
  41. convsamp_method_ptr convsamp;
  42. quantize_method_ptr quantize;
  43. /* The actual post-DCT divisors --- not identical to the quant table
  44. * entries, because of scaling (especially for an unnormalized DCT).
  45. * Each table is given in normal array order.
  46. */
  47. DCTELEM * divisors[NUM_QUANT_TBLS];
  48. /* work area for FDCT subroutine */
  49. DCTELEM * workspace;
  50. #ifdef DCT_FLOAT_SUPPORTED
  51. /* Same as above for the floating-point case. */
  52. float_DCT_method_ptr float_dct;
  53. float_convsamp_method_ptr float_convsamp;
  54. float_quantize_method_ptr float_quantize;
  55. FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
  56. FAST_FLOAT * float_workspace;
  57. #endif
  58. } my_fdct_controller;
  59. typedef my_fdct_controller * my_fdct_ptr;
  60. /*
  61. * Find the highest bit in an integer through binary search.
  62. */
  63. LOCAL(int)
  64. flss (UINT16 val)
  65. {
  66. int bit;
  67. bit = 16;
  68. if (!val)
  69. return 0;
  70. if (!(val & 0xff00)) {
  71. bit -= 8;
  72. val <<= 8;
  73. }
  74. if (!(val & 0xf000)) {
  75. bit -= 4;
  76. val <<= 4;
  77. }
  78. if (!(val & 0xc000)) {
  79. bit -= 2;
  80. val <<= 2;
  81. }
  82. if (!(val & 0x8000)) {
  83. bit -= 1;
  84. val <<= 1;
  85. }
  86. return bit;
  87. }
  88. /*
  89. * Compute values to do a division using reciprocal.
  90. *
  91. * This implementation is based on an algorithm described in
  92. * "How to optimize for the Pentium family of microprocessors"
  93. * (http://www.agner.org/assem/).
  94. * More information about the basic algorithm can be found in
  95. * the paper "Integer Division Using Reciprocals" by Robert Alverson.
  96. *
  97. * The basic idea is to replace x/d by x * d^-1. In order to store
  98. * d^-1 with enough precision we shift it left a few places. It turns
  99. * out that this algoright gives just enough precision, and also fits
  100. * into DCTELEM:
  101. *
  102. * b = (the number of significant bits in divisor) - 1
  103. * r = (word size) + b
  104. * f = 2^r / divisor
  105. *
  106. * f will not be an integer for most cases, so we need to compensate
  107. * for the rounding error introduced:
  108. *
  109. * no fractional part:
  110. *
  111. * result = input >> r
  112. *
  113. * fractional part of f < 0.5:
  114. *
  115. * round f down to nearest integer
  116. * result = ((input + 1) * f) >> r
  117. *
  118. * fractional part of f > 0.5:
  119. *
  120. * round f up to nearest integer
  121. * result = (input * f) >> r
  122. *
  123. * This is the original algorithm that gives truncated results. But we
  124. * want properly rounded results, so we replace "input" with
  125. * "input + divisor/2".
  126. *
  127. * In order to allow SIMD implementations we also tweak the values to
  128. * allow the same calculation to be made at all times:
  129. *
  130. * dctbl[0] = f rounded to nearest integer
  131. * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
  132. * dctbl[2] = 1 << ((word size) * 2 - r)
  133. * dctbl[3] = r - (word size)
  134. *
  135. * dctbl[2] is for stupid instruction sets where the shift operation
  136. * isn't member wise (e.g. MMX).
  137. *
  138. * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
  139. * is that most SIMD implementations have a "multiply and store top
  140. * half" operation.
  141. *
  142. * Lastly, we store each of the values in their own table instead
  143. * of in a consecutive manner, yet again in order to allow SIMD
  144. * routines.
  145. */
  146. LOCAL(int)
  147. compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
  148. {
  149. UDCTELEM2 fq, fr;
  150. UDCTELEM c;
  151. int b, r;
  152. b = flss(divisor) - 1;
  153. r = sizeof(DCTELEM) * 8 + b;
  154. fq = ((UDCTELEM2)1 << r) / divisor;
  155. fr = ((UDCTELEM2)1 << r) % divisor;
  156. c = divisor / 2; /* for rounding */
  157. if (fr == 0) { /* divisor is power of two */
  158. /* fq will be one bit too large to fit in DCTELEM, so adjust */
  159. fq >>= 1;
  160. r--;
  161. } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
  162. c++;
  163. } else { /* fractional part is > 0.5 */
  164. fq++;
  165. }
  166. dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
  167. dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
  168. dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
  169. dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
  170. if(r <= 16) return 0;
  171. else return 1;
  172. }
  173. /*
  174. * Initialize for a processing pass.
  175. * Verify that all referenced Q-tables are present, and set up
  176. * the divisor table for each one.
  177. * In the current implementation, DCT of all components is done during
  178. * the first pass, even if only some components will be output in the
  179. * first scan. Hence all components should be examined here.
  180. */
  181. METHODDEF(void)
  182. start_pass_fdctmgr (j_compress_ptr cinfo)
  183. {
  184. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  185. int ci, qtblno, i;
  186. jpeg_component_info *compptr;
  187. JQUANT_TBL * qtbl;
  188. DCTELEM * dtbl;
  189. for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
  190. ci++, compptr++) {
  191. qtblno = compptr->quant_tbl_no;
  192. /* Make sure specified quantization table is present */
  193. if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
  194. cinfo->quant_tbl_ptrs[qtblno] == NULL)
  195. ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
  196. qtbl = cinfo->quant_tbl_ptrs[qtblno];
  197. /* Compute divisors for this quant table */
  198. /* We may do this more than once for same table, but it's not a big deal */
  199. switch (cinfo->dct_method) {
  200. #ifdef DCT_ISLOW_SUPPORTED
  201. case JDCT_ISLOW:
  202. /* For LL&M IDCT method, divisors are equal to raw quantization
  203. * coefficients multiplied by 8 (to counteract scaling).
  204. */
  205. if (fdct->divisors[qtblno] == NULL) {
  206. fdct->divisors[qtblno] = (DCTELEM *)
  207. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  208. (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
  209. }
  210. dtbl = fdct->divisors[qtblno];
  211. for (i = 0; i < DCTSIZE2; i++) {
  212. if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
  213. && fdct->quantize == jsimd_quantize)
  214. fdct->quantize = quantize;
  215. }
  216. break;
  217. #endif
  218. #ifdef DCT_IFAST_SUPPORTED
  219. case JDCT_IFAST:
  220. {
  221. /* For AA&N IDCT method, divisors are equal to quantization
  222. * coefficients scaled by scalefactor[row]*scalefactor[col], where
  223. * scalefactor[0] = 1
  224. * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
  225. * We apply a further scale factor of 8.
  226. */
  227. #define CONST_BITS 14
  228. static const INT16 aanscales[DCTSIZE2] = {
  229. /* precomputed values scaled up by 14 bits */
  230. 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
  231. 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
  232. 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
  233. 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
  234. 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
  235. 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
  236. 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
  237. 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
  238. };
  239. SHIFT_TEMPS
  240. if (fdct->divisors[qtblno] == NULL) {
  241. fdct->divisors[qtblno] = (DCTELEM *)
  242. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  243. (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
  244. }
  245. dtbl = fdct->divisors[qtblno];
  246. for (i = 0; i < DCTSIZE2; i++) {
  247. if(!compute_reciprocal(
  248. DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
  249. (INT32) aanscales[i]),
  250. CONST_BITS-3), &dtbl[i])
  251. && fdct->quantize == jsimd_quantize)
  252. fdct->quantize = quantize;
  253. }
  254. }
  255. break;
  256. #endif
  257. #ifdef DCT_FLOAT_SUPPORTED
  258. case JDCT_FLOAT:
  259. {
  260. /* For float AA&N IDCT method, divisors are equal to quantization
  261. * coefficients scaled by scalefactor[row]*scalefactor[col], where
  262. * scalefactor[0] = 1
  263. * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
  264. * We apply a further scale factor of 8.
  265. * What's actually stored is 1/divisor so that the inner loop can
  266. * use a multiplication rather than a division.
  267. */
  268. FAST_FLOAT * fdtbl;
  269. int row, col;
  270. static const double aanscalefactor[DCTSIZE] = {
  271. 1.0, 1.387039845, 1.306562965, 1.175875602,
  272. 1.0, 0.785694958, 0.541196100, 0.275899379
  273. };
  274. if (fdct->float_divisors[qtblno] == NULL) {
  275. fdct->float_divisors[qtblno] = (FAST_FLOAT *)
  276. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  277. DCTSIZE2 * SIZEOF(FAST_FLOAT));
  278. }
  279. fdtbl = fdct->float_divisors[qtblno];
  280. i = 0;
  281. for (row = 0; row < DCTSIZE; row++) {
  282. for (col = 0; col < DCTSIZE; col++) {
  283. fdtbl[i] = (FAST_FLOAT)
  284. (1.0 / (((double) qtbl->quantval[i] *
  285. aanscalefactor[row] * aanscalefactor[col] * 8.0)));
  286. i++;
  287. }
  288. }
  289. }
  290. break;
  291. #endif
  292. default:
  293. ERREXIT(cinfo, JERR_NOT_COMPILED);
  294. break;
  295. }
  296. }
  297. }
  298. /*
  299. * Load data into workspace, applying unsigned->signed conversion.
  300. */
  301. METHODDEF(void)
  302. convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
  303. {
  304. register DCTELEM *workspaceptr;
  305. register JSAMPROW elemptr;
  306. register int elemr;
  307. workspaceptr = workspace;
  308. for (elemr = 0; elemr < DCTSIZE; elemr++) {
  309. elemptr = sample_data[elemr] + start_col;
  310. #if DCTSIZE == 8 /* unroll the inner loop */
  311. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  312. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  313. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  314. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  315. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  316. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  317. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  318. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  319. #else
  320. {
  321. register int elemc;
  322. for (elemc = DCTSIZE; elemc > 0; elemc--)
  323. *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
  324. }
  325. #endif
  326. }
  327. }
  328. /*
  329. * Quantize/descale the coefficients, and store into coef_blocks[].
  330. */
  331. METHODDEF(void)
  332. quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
  333. {
  334. int i;
  335. DCTELEM temp;
  336. UDCTELEM recip, corr, shift;
  337. UDCTELEM2 product;
  338. JCOEFPTR output_ptr = coef_block;
  339. for (i = 0; i < DCTSIZE2; i++) {
  340. temp = workspace[i];
  341. recip = divisors[i + DCTSIZE2 * 0];
  342. corr = divisors[i + DCTSIZE2 * 1];
  343. shift = divisors[i + DCTSIZE2 * 3];
  344. if (temp < 0) {
  345. temp = -temp;
  346. product = (UDCTELEM2)(temp + corr) * recip;
  347. product >>= shift + sizeof(DCTELEM)*8;
  348. temp = product;
  349. temp = -temp;
  350. } else {
  351. product = (UDCTELEM2)(temp + corr) * recip;
  352. product >>= shift + sizeof(DCTELEM)*8;
  353. temp = product;
  354. }
  355. output_ptr[i] = (JCOEF) temp;
  356. }
  357. }
  358. /*
  359. * Perform forward DCT on one or more blocks of a component.
  360. *
  361. * The input samples are taken from the sample_data[] array starting at
  362. * position start_row/start_col, and moving to the right for any additional
  363. * blocks. The quantized coefficients are returned in coef_blocks[].
  364. */
  365. METHODDEF(void)
  366. forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
  367. JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  368. JDIMENSION start_row, JDIMENSION start_col,
  369. JDIMENSION num_blocks)
  370. /* This version is used for integer DCT implementations. */
  371. {
  372. /* This routine is heavily used, so it's worth coding it tightly. */
  373. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  374. DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
  375. DCTELEM * workspace;
  376. JDIMENSION bi;
  377. /* Make sure the compiler doesn't look up these every pass */
  378. forward_DCT_method_ptr do_dct = fdct->dct;
  379. convsamp_method_ptr do_convsamp = fdct->convsamp;
  380. quantize_method_ptr do_quantize = fdct->quantize;
  381. workspace = fdct->workspace;
  382. sample_data += start_row; /* fold in the vertical offset once */
  383. for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
  384. /* Load data into workspace, applying unsigned->signed conversion */
  385. (*do_convsamp) (sample_data, start_col, workspace);
  386. /* Perform the DCT */
  387. (*do_dct) (workspace);
  388. /* Quantize/descale the coefficients, and store into coef_blocks[] */
  389. (*do_quantize) (coef_blocks[bi], divisors, workspace);
  390. }
  391. }
  392. #ifdef DCT_FLOAT_SUPPORTED
  393. METHODDEF(void)
  394. convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
  395. {
  396. register FAST_FLOAT *workspaceptr;
  397. register JSAMPROW elemptr;
  398. register int elemr;
  399. workspaceptr = workspace;
  400. for (elemr = 0; elemr < DCTSIZE; elemr++) {
  401. elemptr = sample_data[elemr] + start_col;
  402. #if DCTSIZE == 8 /* unroll the inner loop */
  403. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  404. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  405. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  406. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  407. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  408. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  409. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  410. *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  411. #else
  412. {
  413. register int elemc;
  414. for (elemc = DCTSIZE; elemc > 0; elemc--)
  415. *workspaceptr++ = (FAST_FLOAT)
  416. (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
  417. }
  418. #endif
  419. }
  420. }
  421. METHODDEF(void)
  422. quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
  423. {
  424. register FAST_FLOAT temp;
  425. register int i;
  426. register JCOEFPTR output_ptr = coef_block;
  427. for (i = 0; i < DCTSIZE2; i++) {
  428. /* Apply the quantization and scaling factor */
  429. temp = workspace[i] * divisors[i];
  430. /* Round to nearest integer.
  431. * Since C does not specify the direction of rounding for negative
  432. * quotients, we have to force the dividend positive for portability.
  433. * The maximum coefficient size is +-16K (for 12-bit data), so this
  434. * code should work for either 16-bit or 32-bit ints.
  435. */
  436. output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
  437. }
  438. }
  439. METHODDEF(void)
  440. forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
  441. JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
  442. JDIMENSION start_row, JDIMENSION start_col,
  443. JDIMENSION num_blocks)
  444. /* This version is used for floating-point DCT implementations. */
  445. {
  446. /* This routine is heavily used, so it's worth coding it tightly. */
  447. my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
  448. FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
  449. FAST_FLOAT * workspace;
  450. JDIMENSION bi;
  451. /* Make sure the compiler doesn't look up these every pass */
  452. float_DCT_method_ptr do_dct = fdct->float_dct;
  453. float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
  454. float_quantize_method_ptr do_quantize = fdct->float_quantize;
  455. workspace = fdct->float_workspace;
  456. sample_data += start_row; /* fold in the vertical offset once */
  457. for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
  458. /* Load data into workspace, applying unsigned->signed conversion */
  459. (*do_convsamp) (sample_data, start_col, workspace);
  460. /* Perform the DCT */
  461. (*do_dct) (workspace);
  462. /* Quantize/descale the coefficients, and store into coef_blocks[] */
  463. (*do_quantize) (coef_blocks[bi], divisors, workspace);
  464. }
  465. }
  466. #endif /* DCT_FLOAT_SUPPORTED */
  467. /*
  468. * Initialize FDCT manager.
  469. */
  470. GLOBAL(void)
  471. jinit_forward_dct (j_compress_ptr cinfo)
  472. {
  473. my_fdct_ptr fdct;
  474. int i;
  475. fdct = (my_fdct_ptr)
  476. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  477. SIZEOF(my_fdct_controller));
  478. cinfo->fdct = (struct jpeg_forward_dct *) fdct;
  479. fdct->pub.start_pass = start_pass_fdctmgr;
  480. /* First determine the DCT... */
  481. switch (cinfo->dct_method) {
  482. #ifdef DCT_ISLOW_SUPPORTED
  483. case JDCT_ISLOW:
  484. fdct->pub.forward_DCT = forward_DCT;
  485. if (jsimd_can_fdct_islow())
  486. fdct->dct = jsimd_fdct_islow;
  487. else
  488. fdct->dct = jpeg_fdct_islow;
  489. break;
  490. #endif
  491. #ifdef DCT_IFAST_SUPPORTED
  492. case JDCT_IFAST:
  493. fdct->pub.forward_DCT = forward_DCT;
  494. if (jsimd_can_fdct_ifast())
  495. fdct->dct = jsimd_fdct_ifast;
  496. else
  497. fdct->dct = jpeg_fdct_ifast;
  498. break;
  499. #endif
  500. #ifdef DCT_FLOAT_SUPPORTED
  501. case JDCT_FLOAT:
  502. fdct->pub.forward_DCT = forward_DCT_float;
  503. if (jsimd_can_fdct_float())
  504. fdct->float_dct = jsimd_fdct_float;
  505. else
  506. fdct->float_dct = jpeg_fdct_float;
  507. break;
  508. #endif
  509. default:
  510. ERREXIT(cinfo, JERR_NOT_COMPILED);
  511. break;
  512. }
  513. /* ...then the supporting stages. */
  514. switch (cinfo->dct_method) {
  515. #ifdef DCT_ISLOW_SUPPORTED
  516. case JDCT_ISLOW:
  517. #endif
  518. #ifdef DCT_IFAST_SUPPORTED
  519. case JDCT_IFAST:
  520. #endif
  521. #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
  522. if (jsimd_can_convsamp())
  523. fdct->convsamp = jsimd_convsamp;
  524. else
  525. fdct->convsamp = convsamp;
  526. if (jsimd_can_quantize())
  527. fdct->quantize = jsimd_quantize;
  528. else
  529. fdct->quantize = quantize;
  530. break;
  531. #endif
  532. #ifdef DCT_FLOAT_SUPPORTED
  533. case JDCT_FLOAT:
  534. if (jsimd_can_convsamp_float())
  535. fdct->float_convsamp = jsimd_convsamp_float;
  536. else
  537. fdct->float_convsamp = convsamp_float;
  538. if (jsimd_can_quantize_float())
  539. fdct->float_quantize = jsimd_quantize_float;
  540. else
  541. fdct->float_quantize = quantize_float;
  542. break;
  543. #endif
  544. default:
  545. ERREXIT(cinfo, JERR_NOT_COMPILED);
  546. break;
  547. }
  548. /* Allocate workspace memory */
  549. #ifdef DCT_FLOAT_SUPPORTED
  550. if (cinfo->dct_method == JDCT_FLOAT)
  551. fdct->float_workspace = (FAST_FLOAT *)
  552. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  553. SIZEOF(FAST_FLOAT) * DCTSIZE2);
  554. else
  555. #endif
  556. fdct->workspace = (DCTELEM *)
  557. (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
  558. SIZEOF(DCTELEM) * DCTSIZE2);
  559. /* Mark divisor tables unallocated */
  560. for (i = 0; i < NUM_QUANT_TBLS; i++) {
  561. fdct->divisors[i] = NULL;
  562. #ifdef DCT_FLOAT_SUPPORTED
  563. fdct->float_divisors[i] = NULL;
  564. #endif
  565. }
  566. }