PageRenderTime 56ms CodeModel.GetById 13ms app.highlight 37ms RepoModel.GetById 1ms app.codeStats 1ms

/media/libjpeg/jcdctmgr.c

http://github.com/zpao/v8monkey
C | 642 lines | 413 code | 76 blank | 153 comment | 58 complexity | b6da7b64fea85e4819bff0947a3178ed MD5 | raw file
  1/*
  2 * jcdctmgr.c
  3 *
  4 * Copyright (C) 1994-1996, Thomas G. Lane.
  5 * Copyright (C) 1999-2006, MIYASAKA Masaru.
  6 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
  7 * Copyright (C) 2011 D. R. Commander
  8 * This file is part of the Independent JPEG Group's software.
  9 * For conditions of distribution and use, see the accompanying README file.
 10 *
 11 * This file contains the forward-DCT management logic.
 12 * This code selects a particular DCT implementation to be used,
 13 * and it performs related housekeeping chores including coefficient
 14 * quantization.
 15 */
 16
 17#define JPEG_INTERNALS
 18#include "jinclude.h"
 19#include "jpeglib.h"
 20#include "jdct.h"		/* Private declarations for DCT subsystem */
 21#include "jsimddct.h"
 22
 23
 24/* Private subobject for this module */
 25
 26typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
 27typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
 28
 29typedef JMETHOD(void, convsamp_method_ptr,
 30                (JSAMPARRAY sample_data, JDIMENSION start_col,
 31                 DCTELEM * workspace));
 32typedef JMETHOD(void, float_convsamp_method_ptr,
 33                (JSAMPARRAY sample_data, JDIMENSION start_col,
 34                 FAST_FLOAT *workspace));
 35
 36typedef JMETHOD(void, quantize_method_ptr,
 37                (JCOEFPTR coef_block, DCTELEM * divisors,
 38                 DCTELEM * workspace));
 39typedef JMETHOD(void, float_quantize_method_ptr,
 40                (JCOEFPTR coef_block, FAST_FLOAT * divisors,
 41                 FAST_FLOAT * workspace));
 42
 43METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
 44
 45typedef struct {
 46  struct jpeg_forward_dct pub;	/* public fields */
 47
 48  /* Pointer to the DCT routine actually in use */
 49  forward_DCT_method_ptr dct;
 50  convsamp_method_ptr convsamp;
 51  quantize_method_ptr quantize;
 52
 53  /* The actual post-DCT divisors --- not identical to the quant table
 54   * entries, because of scaling (especially for an unnormalized DCT).
 55   * Each table is given in normal array order.
 56   */
 57  DCTELEM * divisors[NUM_QUANT_TBLS];
 58
 59  /* work area for FDCT subroutine */
 60  DCTELEM * workspace;
 61
 62#ifdef DCT_FLOAT_SUPPORTED
 63  /* Same as above for the floating-point case. */
 64  float_DCT_method_ptr float_dct;
 65  float_convsamp_method_ptr float_convsamp;
 66  float_quantize_method_ptr float_quantize;
 67  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
 68  FAST_FLOAT * float_workspace;
 69#endif
 70} my_fdct_controller;
 71
 72typedef my_fdct_controller * my_fdct_ptr;
 73
 74
 75/*
 76 * Find the highest bit in an integer through binary search.
 77 */
 78LOCAL(int)
 79flss (UINT16 val)
 80{
 81  int bit;
 82
 83  bit = 16;
 84
 85  if (!val)
 86    return 0;
 87
 88  if (!(val & 0xff00)) {
 89    bit -= 8;
 90    val <<= 8;
 91  }
 92  if (!(val & 0xf000)) {
 93    bit -= 4;
 94    val <<= 4;
 95  }
 96  if (!(val & 0xc000)) {
 97    bit -= 2;
 98    val <<= 2;
 99  }
100  if (!(val & 0x8000)) {
101    bit -= 1;
102    val <<= 1;
103  }
104
105  return bit;
106}
107
108/*
109 * Compute values to do a division using reciprocal.
110 *
111 * This implementation is based on an algorithm described in
112 *   "How to optimize for the Pentium family of microprocessors"
113 *   (http://www.agner.org/assem/).
114 * More information about the basic algorithm can be found in
115 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
116 *
117 * The basic idea is to replace x/d by x * d^-1. In order to store
118 * d^-1 with enough precision we shift it left a few places. It turns
119 * out that this algoright gives just enough precision, and also fits
120 * into DCTELEM:
121 *
122 *   b = (the number of significant bits in divisor) - 1
123 *   r = (word size) + b
124 *   f = 2^r / divisor
125 *
126 * f will not be an integer for most cases, so we need to compensate
127 * for the rounding error introduced:
128 *
129 *   no fractional part:
130 *
131 *       result = input >> r
132 *
133 *   fractional part of f < 0.5:
134 *
135 *       round f down to nearest integer
136 *       result = ((input + 1) * f) >> r
137 *
138 *   fractional part of f > 0.5:
139 *
140 *       round f up to nearest integer
141 *       result = (input * f) >> r
142 *
143 * This is the original algorithm that gives truncated results. But we
144 * want properly rounded results, so we replace "input" with
145 * "input + divisor/2".
146 *
147 * In order to allow SIMD implementations we also tweak the values to
148 * allow the same calculation to be made at all times:
149 * 
150 *   dctbl[0] = f rounded to nearest integer
151 *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
152 *   dctbl[2] = 1 << ((word size) * 2 - r)
153 *   dctbl[3] = r - (word size)
154 *
155 * dctbl[2] is for stupid instruction sets where the shift operation
156 * isn't member wise (e.g. MMX).
157 *
158 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
159 * is that most SIMD implementations have a "multiply and store top
160 * half" operation.
161 *
162 * Lastly, we store each of the values in their own table instead
163 * of in a consecutive manner, yet again in order to allow SIMD
164 * routines.
165 */
166LOCAL(int)
167compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
168{
169  UDCTELEM2 fq, fr;
170  UDCTELEM c;
171  int b, r;
172
173  b = flss(divisor) - 1;
174  r  = sizeof(DCTELEM) * 8 + b;
175
176  fq = ((UDCTELEM2)1 << r) / divisor;
177  fr = ((UDCTELEM2)1 << r) % divisor;
178
179  c = divisor / 2; /* for rounding */
180
181  if (fr == 0) { /* divisor is power of two */
182    /* fq will be one bit too large to fit in DCTELEM, so adjust */
183    fq >>= 1;
184    r--;
185  } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
186    c++;
187  } else { /* fractional part is > 0.5 */
188    fq++;
189  }
190
191  dtbl[DCTSIZE2 * 0] = (DCTELEM) fq;      /* reciprocal */
192  dtbl[DCTSIZE2 * 1] = (DCTELEM) c;       /* correction + roundfactor */
193  dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r));  /* scale */
194  dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
195
196  if(r <= 16) return 0;
197  else return 1;
198}
199
200/*
201 * Initialize for a processing pass.
202 * Verify that all referenced Q-tables are present, and set up
203 * the divisor table for each one.
204 * In the current implementation, DCT of all components is done during
205 * the first pass, even if only some components will be output in the
206 * first scan.  Hence all components should be examined here.
207 */
208
209METHODDEF(void)
210start_pass_fdctmgr (j_compress_ptr cinfo)
211{
212  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
213  int ci, qtblno, i;
214  jpeg_component_info *compptr;
215  JQUANT_TBL * qtbl;
216  DCTELEM * dtbl;
217
218  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
219       ci++, compptr++) {
220    qtblno = compptr->quant_tbl_no;
221    /* Make sure specified quantization table is present */
222    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
223	cinfo->quant_tbl_ptrs[qtblno] == NULL)
224      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
225    qtbl = cinfo->quant_tbl_ptrs[qtblno];
226    /* Compute divisors for this quant table */
227    /* We may do this more than once for same table, but it's not a big deal */
228    switch (cinfo->dct_method) {
229#ifdef DCT_ISLOW_SUPPORTED
230    case JDCT_ISLOW:
231      /* For LL&M IDCT method, divisors are equal to raw quantization
232       * coefficients multiplied by 8 (to counteract scaling).
233       */
234      if (fdct->divisors[qtblno] == NULL) {
235	fdct->divisors[qtblno] = (DCTELEM *)
236	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
237				      (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
238      }
239      dtbl = fdct->divisors[qtblno];
240      for (i = 0; i < DCTSIZE2; i++) {
241	if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
242	  && fdct->quantize == jsimd_quantize)
243	  fdct->quantize = quantize;
244      }
245      break;
246#endif
247#ifdef DCT_IFAST_SUPPORTED
248    case JDCT_IFAST:
249      {
250	/* For AA&N IDCT method, divisors are equal to quantization
251	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
252	 *   scalefactor[0] = 1
253	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
254	 * We apply a further scale factor of 8.
255	 */
256#define CONST_BITS 14
257	static const INT16 aanscales[DCTSIZE2] = {
258	  /* precomputed values scaled up by 14 bits */
259	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
260	  22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
261	  21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
262	  19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
263	  16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
264	  12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
265	   8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
266	   4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
267	};
268	SHIFT_TEMPS
269
270	if (fdct->divisors[qtblno] == NULL) {
271	  fdct->divisors[qtblno] = (DCTELEM *)
272	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
273					(DCTSIZE2 * 4) * SIZEOF(DCTELEM));
274	}
275	dtbl = fdct->divisors[qtblno];
276	for (i = 0; i < DCTSIZE2; i++) {
277	  if(!compute_reciprocal(
278	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
279				  (INT32) aanscales[i]),
280		    CONST_BITS-3), &dtbl[i])
281	    && fdct->quantize == jsimd_quantize)
282	    fdct->quantize = quantize;
283	}
284      }
285      break;
286#endif
287#ifdef DCT_FLOAT_SUPPORTED
288    case JDCT_FLOAT:
289      {
290	/* For float AA&N IDCT method, divisors are equal to quantization
291	 * coefficients scaled by scalefactor[row]*scalefactor[col], where
292	 *   scalefactor[0] = 1
293	 *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
294	 * We apply a further scale factor of 8.
295	 * What's actually stored is 1/divisor so that the inner loop can
296	 * use a multiplication rather than a division.
297	 */
298	FAST_FLOAT * fdtbl;
299	int row, col;
300	static const double aanscalefactor[DCTSIZE] = {
301	  1.0, 1.387039845, 1.306562965, 1.175875602,
302	  1.0, 0.785694958, 0.541196100, 0.275899379
303	};
304
305	if (fdct->float_divisors[qtblno] == NULL) {
306	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
307	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
308					DCTSIZE2 * SIZEOF(FAST_FLOAT));
309	}
310	fdtbl = fdct->float_divisors[qtblno];
311	i = 0;
312	for (row = 0; row < DCTSIZE; row++) {
313	  for (col = 0; col < DCTSIZE; col++) {
314	    fdtbl[i] = (FAST_FLOAT)
315	      (1.0 / (((double) qtbl->quantval[i] *
316		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
317	    i++;
318	  }
319	}
320      }
321      break;
322#endif
323    default:
324      ERREXIT(cinfo, JERR_NOT_COMPILED);
325      break;
326    }
327  }
328}
329
330
331/*
332 * Load data into workspace, applying unsigned->signed conversion.
333 */
334
335METHODDEF(void)
336convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
337{
338  register DCTELEM *workspaceptr;
339  register JSAMPROW elemptr;
340  register int elemr;
341
342  workspaceptr = workspace;
343  for (elemr = 0; elemr < DCTSIZE; elemr++) {
344    elemptr = sample_data[elemr] + start_col;
345
346#if DCTSIZE == 8		/* unroll the inner loop */
347    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
348    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
349    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
350    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
351    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
352    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
353    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
354    *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
355#else
356    {
357      register int elemc;
358      for (elemc = DCTSIZE; elemc > 0; elemc--)
359        *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
360    }
361#endif
362  }
363}
364
365
366/*
367 * Quantize/descale the coefficients, and store into coef_blocks[].
368 */
369
370METHODDEF(void)
371quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
372{
373  int i;
374  DCTELEM temp;
375  UDCTELEM recip, corr, shift;
376  UDCTELEM2 product;
377  JCOEFPTR output_ptr = coef_block;
378
379  for (i = 0; i < DCTSIZE2; i++) {
380    temp = workspace[i];
381    recip = divisors[i + DCTSIZE2 * 0];
382    corr =  divisors[i + DCTSIZE2 * 1];
383    shift = divisors[i + DCTSIZE2 * 3];
384
385    if (temp < 0) {
386      temp = -temp;
387      product = (UDCTELEM2)(temp + corr) * recip;
388      product >>= shift + sizeof(DCTELEM)*8;
389      temp = product;
390      temp = -temp;
391    } else {
392      product = (UDCTELEM2)(temp + corr) * recip;
393      product >>= shift + sizeof(DCTELEM)*8;
394      temp = product;
395    }
396
397    output_ptr[i] = (JCOEF) temp;
398  }
399}
400
401
402/*
403 * Perform forward DCT on one or more blocks of a component.
404 *
405 * The input samples are taken from the sample_data[] array starting at
406 * position start_row/start_col, and moving to the right for any additional
407 * blocks. The quantized coefficients are returned in coef_blocks[].
408 */
409
410METHODDEF(void)
411forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
412	     JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
413	     JDIMENSION start_row, JDIMENSION start_col,
414	     JDIMENSION num_blocks)
415/* This version is used for integer DCT implementations. */
416{
417  /* This routine is heavily used, so it's worth coding it tightly. */
418  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
419  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
420  DCTELEM * workspace;
421  JDIMENSION bi;
422
423  /* Make sure the compiler doesn't look up these every pass */
424  forward_DCT_method_ptr do_dct = fdct->dct;
425  convsamp_method_ptr do_convsamp = fdct->convsamp;
426  quantize_method_ptr do_quantize = fdct->quantize;
427  workspace = fdct->workspace;
428
429  sample_data += start_row;	/* fold in the vertical offset once */
430
431  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
432    /* Load data into workspace, applying unsigned->signed conversion */
433    (*do_convsamp) (sample_data, start_col, workspace);
434
435    /* Perform the DCT */
436    (*do_dct) (workspace);
437
438    /* Quantize/descale the coefficients, and store into coef_blocks[] */
439    (*do_quantize) (coef_blocks[bi], divisors, workspace);
440  }
441}
442
443
444#ifdef DCT_FLOAT_SUPPORTED
445
446
447METHODDEF(void)
448convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
449{
450  register FAST_FLOAT *workspaceptr;
451  register JSAMPROW elemptr;
452  register int elemr;
453
454  workspaceptr = workspace;
455  for (elemr = 0; elemr < DCTSIZE; elemr++) {
456    elemptr = sample_data[elemr] + start_col;
457#if DCTSIZE == 8		/* unroll the inner loop */
458    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
459    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
460    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
461    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
462    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
463    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
464    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
465    *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
466#else
467    {
468      register int elemc;
469      for (elemc = DCTSIZE; elemc > 0; elemc--)
470        *workspaceptr++ = (FAST_FLOAT)
471                          (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
472    }
473#endif
474  }
475}
476
477
478METHODDEF(void)
479quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
480{
481  register FAST_FLOAT temp;
482  register int i;
483  register JCOEFPTR output_ptr = coef_block;
484
485  for (i = 0; i < DCTSIZE2; i++) {
486    /* Apply the quantization and scaling factor */
487    temp = workspace[i] * divisors[i];
488
489    /* Round to nearest integer.
490     * Since C does not specify the direction of rounding for negative
491     * quotients, we have to force the dividend positive for portability.
492     * The maximum coefficient size is +-16K (for 12-bit data), so this
493     * code should work for either 16-bit or 32-bit ints.
494     */
495    output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
496  }
497}
498
499
500METHODDEF(void)
501forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
502		   JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
503		   JDIMENSION start_row, JDIMENSION start_col,
504		   JDIMENSION num_blocks)
505/* This version is used for floating-point DCT implementations. */
506{
507  /* This routine is heavily used, so it's worth coding it tightly. */
508  my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
509  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
510  FAST_FLOAT * workspace;
511  JDIMENSION bi;
512
513
514  /* Make sure the compiler doesn't look up these every pass */
515  float_DCT_method_ptr do_dct = fdct->float_dct;
516  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
517  float_quantize_method_ptr do_quantize = fdct->float_quantize;
518  workspace = fdct->float_workspace;
519
520  sample_data += start_row;	/* fold in the vertical offset once */
521
522  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
523    /* Load data into workspace, applying unsigned->signed conversion */
524    (*do_convsamp) (sample_data, start_col, workspace);
525
526    /* Perform the DCT */
527    (*do_dct) (workspace);
528
529    /* Quantize/descale the coefficients, and store into coef_blocks[] */
530    (*do_quantize) (coef_blocks[bi], divisors, workspace);
531  }
532}
533
534#endif /* DCT_FLOAT_SUPPORTED */
535
536
537/*
538 * Initialize FDCT manager.
539 */
540
541GLOBAL(void)
542jinit_forward_dct (j_compress_ptr cinfo)
543{
544  my_fdct_ptr fdct;
545  int i;
546
547  fdct = (my_fdct_ptr)
548    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
549				SIZEOF(my_fdct_controller));
550  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
551  fdct->pub.start_pass = start_pass_fdctmgr;
552
553  /* First determine the DCT... */
554  switch (cinfo->dct_method) {
555#ifdef DCT_ISLOW_SUPPORTED
556  case JDCT_ISLOW:
557    fdct->pub.forward_DCT = forward_DCT;
558    if (jsimd_can_fdct_islow())
559      fdct->dct = jsimd_fdct_islow;
560    else
561      fdct->dct = jpeg_fdct_islow;
562    break;
563#endif
564#ifdef DCT_IFAST_SUPPORTED
565  case JDCT_IFAST:
566    fdct->pub.forward_DCT = forward_DCT;
567    if (jsimd_can_fdct_ifast())
568      fdct->dct = jsimd_fdct_ifast;
569    else
570      fdct->dct = jpeg_fdct_ifast;
571    break;
572#endif
573#ifdef DCT_FLOAT_SUPPORTED
574  case JDCT_FLOAT:
575    fdct->pub.forward_DCT = forward_DCT_float;
576    if (jsimd_can_fdct_float())
577      fdct->float_dct = jsimd_fdct_float;
578    else
579      fdct->float_dct = jpeg_fdct_float;
580    break;
581#endif
582  default:
583    ERREXIT(cinfo, JERR_NOT_COMPILED);
584    break;
585  }
586
587  /* ...then the supporting stages. */
588  switch (cinfo->dct_method) {
589#ifdef DCT_ISLOW_SUPPORTED
590  case JDCT_ISLOW:
591#endif
592#ifdef DCT_IFAST_SUPPORTED
593  case JDCT_IFAST:
594#endif
595#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
596    if (jsimd_can_convsamp())
597      fdct->convsamp = jsimd_convsamp;
598    else
599      fdct->convsamp = convsamp;
600    if (jsimd_can_quantize())
601      fdct->quantize = jsimd_quantize;
602    else
603      fdct->quantize = quantize;
604    break;
605#endif
606#ifdef DCT_FLOAT_SUPPORTED
607  case JDCT_FLOAT:
608    if (jsimd_can_convsamp_float())
609      fdct->float_convsamp = jsimd_convsamp_float;
610    else
611      fdct->float_convsamp = convsamp_float;
612    if (jsimd_can_quantize_float())
613      fdct->float_quantize = jsimd_quantize_float;
614    else
615      fdct->float_quantize = quantize_float;
616    break;
617#endif
618  default:
619    ERREXIT(cinfo, JERR_NOT_COMPILED);
620    break;
621  }
622
623  /* Allocate workspace memory */
624#ifdef DCT_FLOAT_SUPPORTED
625  if (cinfo->dct_method == JDCT_FLOAT)
626    fdct->float_workspace = (FAST_FLOAT *)
627      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
628				  SIZEOF(FAST_FLOAT) * DCTSIZE2);
629  else
630#endif
631    fdct->workspace = (DCTELEM *)
632      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
633				  SIZEOF(DCTELEM) * DCTSIZE2);
634
635  /* Mark divisor tables unallocated */
636  for (i = 0; i < NUM_QUANT_TBLS; i++) {
637    fdct->divisors[i] = NULL;
638#ifdef DCT_FLOAT_SUPPORTED
639    fdct->float_divisors[i] = NULL;
640#endif
641  }
642}