blob: 156957ab6677311d13e250503a3875724261dfe8 [file] [log] [blame]
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00001/*
2 * jcdctmgr.c
3 *
Thomas G. Lane489583f1996-02-07 00:00:00 +00004 * Copyright (C) 1994-1996, Thomas G. Lane.
Pierre Ossmandedc42e2009-03-09 13:23:04 +00005 * Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman59a39382009-03-09 13:15:56 +00006 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00007 * This file is part of the Independent JPEG Group's software.
8 * For conditions of distribution and use, see the accompanying README file.
9 *
10 * This file contains the forward-DCT management logic.
11 * This code selects a particular DCT implementation to be used,
12 * and it performs related housekeeping chores including coefficient
13 * quantization.
14 */
15
16#define JPEG_INTERNALS
17#include "jinclude.h"
18#include "jpeglib.h"
19#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman59a39382009-03-09 13:15:56 +000020#include "jsimddct.h"
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000021
22
23/* Private subobject for this module */
24
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000025typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
26typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
27
28typedef JMETHOD(void, convsamp_method_ptr,
29 (JSAMPARRAY sample_data, JDIMENSION start_col,
30 DCTELEM * workspace));
31typedef JMETHOD(void, float_convsamp_method_ptr,
32 (JSAMPARRAY sample_data, JDIMENSION start_col,
33 FAST_FLOAT *workspace));
34
35typedef JMETHOD(void, quantize_method_ptr,
36 (JCOEFPTR coef_block, DCTELEM * divisors,
37 DCTELEM * workspace));
38typedef JMETHOD(void, float_quantize_method_ptr,
39 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
40 FAST_FLOAT * workspace));
41
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000042typedef struct {
43 struct jpeg_forward_dct pub; /* public fields */
44
45 /* Pointer to the DCT routine actually in use */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000046 forward_DCT_method_ptr dct;
47 convsamp_method_ptr convsamp;
48 quantize_method_ptr quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000049
50 /* The actual post-DCT divisors --- not identical to the quant table
51 * entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane489583f1996-02-07 00:00:00 +000052 * Each table is given in normal array order.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000053 */
54 DCTELEM * divisors[NUM_QUANT_TBLS];
55
Pierre Ossman35c47192009-03-09 13:29:37 +000056 /* work area for FDCT subroutine */
57 DCTELEM * workspace;
58
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000059#ifdef DCT_FLOAT_SUPPORTED
60 /* Same as above for the floating-point case. */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000061 float_DCT_method_ptr float_dct;
62 float_convsamp_method_ptr float_convsamp;
63 float_quantize_method_ptr float_quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000064 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman35c47192009-03-09 13:29:37 +000065 FAST_FLOAT * float_workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000066#endif
67} my_fdct_controller;
68
69typedef my_fdct_controller * my_fdct_ptr;
70
71
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000072/*
Pierre Ossmandedc42e2009-03-09 13:23:04 +000073 * Find the highest bit in an integer through binary search.
74 */
75LOCAL(int)
DRCfc5dc4f2009-10-01 22:26:14 +000076flss (UINT16 val)
Pierre Ossmandedc42e2009-03-09 13:23:04 +000077{
78 int bit;
79
80 bit = 16;
81
82 if (!val)
83 return 0;
84
85 if (!(val & 0xff00)) {
86 bit -= 8;
87 val <<= 8;
88 }
89 if (!(val & 0xf000)) {
90 bit -= 4;
91 val <<= 4;
92 }
93 if (!(val & 0xc000)) {
94 bit -= 2;
95 val <<= 2;
96 }
97 if (!(val & 0x8000)) {
98 bit -= 1;
99 val <<= 1;
100 }
101
102 return bit;
103}
104
105/*
106 * Compute values to do a division using reciprocal.
107 *
108 * This implementation is based on an algorithm described in
109 * "How to optimize for the Pentium family of microprocessors"
110 * (http://www.agner.org/assem/).
111 * More information about the basic algorithm can be found in
112 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
113 *
114 * The basic idea is to replace x/d by x * d^-1. In order to store
115 * d^-1 with enough precision we shift it left a few places. It turns
116 * out that this algoright gives just enough precision, and also fits
117 * into DCTELEM:
118 *
119 * b = (the number of significant bits in divisor) - 1
120 * r = (word size) + b
121 * f = 2^r / divisor
122 *
123 * f will not be an integer for most cases, so we need to compensate
124 * for the rounding error introduced:
125 *
126 * no fractional part:
127 *
128 * result = input >> r
129 *
130 * fractional part of f < 0.5:
131 *
132 * round f down to nearest integer
133 * result = ((input + 1) * f) >> r
134 *
135 * fractional part of f > 0.5:
136 *
137 * round f up to nearest integer
138 * result = (input * f) >> r
139 *
140 * This is the original algorithm that gives truncated results. But we
141 * want properly rounded results, so we replace "input" with
142 * "input + divisor/2".
143 *
144 * In order to allow SIMD implementations we also tweak the values to
145 * allow the same calculation to be made at all times:
146 *
147 * dctbl[0] = f rounded to nearest integer
148 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
149 * dctbl[2] = 1 << ((word size) * 2 - r)
150 * dctbl[3] = r - (word size)
151 *
152 * dctbl[2] is for stupid instruction sets where the shift operation
153 * isn't member wise (e.g. MMX).
154 *
155 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
156 * is that most SIMD implementations have a "multiply and store top
157 * half" operation.
158 *
159 * Lastly, we store each of the values in their own table instead
160 * of in a consecutive manner, yet again in order to allow SIMD
161 * routines.
162 */
163LOCAL(void)
164compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
165{
166 UDCTELEM2 fq, fr;
167 UDCTELEM c;
168 int b, r;
169
DRCfc5dc4f2009-10-01 22:26:14 +0000170 b = flss(divisor) - 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000171 r = sizeof(DCTELEM) * 8 + b;
172
173 fq = ((UDCTELEM2)1 << r) / divisor;
174 fr = ((UDCTELEM2)1 << r) % divisor;
175
176 c = divisor / 2; /* for rounding */
177
178 if (fr == 0) { /* divisor is power of two */
179 /* fq will be one bit too large to fit in DCTELEM, so adjust */
180 fq >>= 1;
181 r--;
182 } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
183 c++;
184 } else { /* fractional part is > 0.5 */
185 fq++;
186 }
187
188 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
189 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
190 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
191 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
192}
193
194/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000195 * Initialize for a processing pass.
196 * Verify that all referenced Q-tables are present, and set up
197 * the divisor table for each one.
198 * In the current implementation, DCT of all components is done during
199 * the first pass, even if only some components will be output in the
200 * first scan. Hence all components should be examined here.
201 */
202
Thomas G. Lane489583f1996-02-07 00:00:00 +0000203METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000204start_pass_fdctmgr (j_compress_ptr cinfo)
205{
206 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
207 int ci, qtblno, i;
208 jpeg_component_info *compptr;
209 JQUANT_TBL * qtbl;
210 DCTELEM * dtbl;
211
212 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
213 ci++, compptr++) {
214 qtblno = compptr->quant_tbl_no;
215 /* Make sure specified quantization table is present */
216 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
217 cinfo->quant_tbl_ptrs[qtblno] == NULL)
218 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
219 qtbl = cinfo->quant_tbl_ptrs[qtblno];
220 /* Compute divisors for this quant table */
221 /* We may do this more than once for same table, but it's not a big deal */
222 switch (cinfo->dct_method) {
223#ifdef DCT_ISLOW_SUPPORTED
224 case JDCT_ISLOW:
225 /* For LL&M IDCT method, divisors are equal to raw quantization
226 * coefficients multiplied by 8 (to counteract scaling).
227 */
228 if (fdct->divisors[qtblno] == NULL) {
229 fdct->divisors[qtblno] = (DCTELEM *)
230 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000231 (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000232 }
233 dtbl = fdct->divisors[qtblno];
234 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000235 compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000236 }
237 break;
238#endif
239#ifdef DCT_IFAST_SUPPORTED
240 case JDCT_IFAST:
241 {
242 /* For AA&N IDCT method, divisors are equal to quantization
243 * coefficients scaled by scalefactor[row]*scalefactor[col], where
244 * scalefactor[0] = 1
245 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
246 * We apply a further scale factor of 8.
247 */
248#define CONST_BITS 14
249 static const INT16 aanscales[DCTSIZE2] = {
Thomas G. Lane489583f1996-02-07 00:00:00 +0000250 /* precomputed values scaled up by 14 bits */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000251 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
252 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
253 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
254 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
255 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
256 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
257 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
258 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
259 };
260 SHIFT_TEMPS
261
262 if (fdct->divisors[qtblno] == NULL) {
263 fdct->divisors[qtblno] = (DCTELEM *)
264 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000265 (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000266 }
267 dtbl = fdct->divisors[qtblno];
268 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000269 compute_reciprocal(
Thomas G. Lane489583f1996-02-07 00:00:00 +0000270 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000271 (INT32) aanscales[i]),
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000272 CONST_BITS-3), &dtbl[i]);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000273 }
274 }
275 break;
276#endif
277#ifdef DCT_FLOAT_SUPPORTED
278 case JDCT_FLOAT:
279 {
280 /* For float AA&N IDCT method, divisors are equal to quantization
281 * coefficients scaled by scalefactor[row]*scalefactor[col], where
282 * scalefactor[0] = 1
283 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
284 * We apply a further scale factor of 8.
285 * What's actually stored is 1/divisor so that the inner loop can
286 * use a multiplication rather than a division.
287 */
288 FAST_FLOAT * fdtbl;
289 int row, col;
290 static const double aanscalefactor[DCTSIZE] = {
291 1.0, 1.387039845, 1.306562965, 1.175875602,
292 1.0, 0.785694958, 0.541196100, 0.275899379
293 };
294
295 if (fdct->float_divisors[qtblno] == NULL) {
296 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
297 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
298 DCTSIZE2 * SIZEOF(FAST_FLOAT));
299 }
300 fdtbl = fdct->float_divisors[qtblno];
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000301 i = 0;
302 for (row = 0; row < DCTSIZE; row++) {
303 for (col = 0; col < DCTSIZE; col++) {
304 fdtbl[i] = (FAST_FLOAT)
Thomas G. Lane489583f1996-02-07 00:00:00 +0000305 (1.0 / (((double) qtbl->quantval[i] *
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000306 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
307 i++;
308 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000309 }
310 }
311 break;
312#endif
313 default:
314 ERREXIT(cinfo, JERR_NOT_COMPILED);
315 break;
316 }
317 }
318}
319
320
321/*
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000322 * Load data into workspace, applying unsigned->signed conversion.
323 */
324
325METHODDEF(void)
326convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
327{
328 register DCTELEM *workspaceptr;
329 register JSAMPROW elemptr;
330 register int elemr;
331
332 workspaceptr = workspace;
333 for (elemr = 0; elemr < DCTSIZE; elemr++) {
334 elemptr = sample_data[elemr] + start_col;
335
336#if DCTSIZE == 8 /* unroll the inner loop */
337 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
338 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
339 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
340 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
341 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
342 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
343 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
344 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
345#else
346 {
347 register int elemc;
348 for (elemc = DCTSIZE; elemc > 0; elemc--)
349 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
350 }
351#endif
352 }
353}
354
355
356/*
357 * Quantize/descale the coefficients, and store into coef_blocks[].
358 */
359
360METHODDEF(void)
361quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
362{
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000363 int i;
364 DCTELEM temp;
365 UDCTELEM recip, corr, shift;
366 UDCTELEM2 product;
367 JCOEFPTR output_ptr = coef_block;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000368
369 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000370 temp = workspace[i];
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000371 recip = divisors[i + DCTSIZE2 * 0];
372 corr = divisors[i + DCTSIZE2 * 1];
373 shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000374
375 if (temp < 0) {
376 temp = -temp;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000377 product = (UDCTELEM2)(temp + corr) * recip;
378 product >>= shift + sizeof(DCTELEM)*8;
379 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000380 temp = -temp;
381 } else {
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000382 product = (UDCTELEM2)(temp + corr) * recip;
383 product >>= shift + sizeof(DCTELEM)*8;
384 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000385 }
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000386
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000387 output_ptr[i] = (JCOEF) temp;
388 }
389}
390
391
392/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000393 * Perform forward DCT on one or more blocks of a component.
394 *
395 * The input samples are taken from the sample_data[] array starting at
396 * position start_row/start_col, and moving to the right for any additional
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000397 * blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000398 */
399
Thomas G. Lane489583f1996-02-07 00:00:00 +0000400METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000401forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
402 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
403 JDIMENSION start_row, JDIMENSION start_col,
404 JDIMENSION num_blocks)
405/* This version is used for integer DCT implementations. */
406{
407 /* This routine is heavily used, so it's worth coding it tightly. */
408 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000409 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000410 DCTELEM * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000411 JDIMENSION bi;
412
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000413 /* Make sure the compiler doesn't look up these every pass */
414 forward_DCT_method_ptr do_dct = fdct->dct;
415 convsamp_method_ptr do_convsamp = fdct->convsamp;
416 quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000417 workspace = fdct->workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000418
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000419 sample_data += start_row; /* fold in the vertical offset once */
420
421 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
422 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000423 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000424
425 /* Perform the DCT */
426 (*do_dct) (workspace);
427
428 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000429 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000430 }
431}
432
433
434#ifdef DCT_FLOAT_SUPPORTED
435
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000436
437METHODDEF(void)
438convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
439{
440 register FAST_FLOAT *workspaceptr;
441 register JSAMPROW elemptr;
442 register int elemr;
443
444 workspaceptr = workspace;
445 for (elemr = 0; elemr < DCTSIZE; elemr++) {
446 elemptr = sample_data[elemr] + start_col;
447#if DCTSIZE == 8 /* unroll the inner loop */
448 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
449 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
450 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
451 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
452 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
453 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
454 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
455 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
456#else
457 {
458 register int elemc;
459 for (elemc = DCTSIZE; elemc > 0; elemc--)
460 *workspaceptr++ = (FAST_FLOAT)
461 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
462 }
463#endif
464 }
465}
466
467
468METHODDEF(void)
469quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
470{
471 register FAST_FLOAT temp;
472 register int i;
473 register JCOEFPTR output_ptr = coef_block;
474
475 for (i = 0; i < DCTSIZE2; i++) {
476 /* Apply the quantization and scaling factor */
477 temp = workspace[i] * divisors[i];
478
479 /* Round to nearest integer.
480 * Since C does not specify the direction of rounding for negative
481 * quotients, we have to force the dividend positive for portability.
482 * The maximum coefficient size is +-16K (for 12-bit data), so this
483 * code should work for either 16-bit or 32-bit ints.
484 */
485 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
486 }
487}
488
489
Thomas G. Lane489583f1996-02-07 00:00:00 +0000490METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000491forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
492 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
493 JDIMENSION start_row, JDIMENSION start_col,
494 JDIMENSION num_blocks)
495/* This version is used for floating-point DCT implementations. */
496{
497 /* This routine is heavily used, so it's worth coding it tightly. */
498 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000499 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000500 FAST_FLOAT * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000501 JDIMENSION bi;
502
Pierre Ossman35c47192009-03-09 13:29:37 +0000503
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000504 /* Make sure the compiler doesn't look up these every pass */
505 float_DCT_method_ptr do_dct = fdct->float_dct;
506 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
507 float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000508 workspace = fdct->float_workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000509
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000510 sample_data += start_row; /* fold in the vertical offset once */
511
512 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
513 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000514 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000515
516 /* Perform the DCT */
517 (*do_dct) (workspace);
518
519 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000520 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000521 }
522}
523
524#endif /* DCT_FLOAT_SUPPORTED */
525
526
527/*
528 * Initialize FDCT manager.
529 */
530
Thomas G. Lane489583f1996-02-07 00:00:00 +0000531GLOBAL(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000532jinit_forward_dct (j_compress_ptr cinfo)
533{
534 my_fdct_ptr fdct;
535 int i;
536
537 fdct = (my_fdct_ptr)
538 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
539 SIZEOF(my_fdct_controller));
540 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
541 fdct->pub.start_pass = start_pass_fdctmgr;
542
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000543 /* First determine the DCT... */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000544 switch (cinfo->dct_method) {
545#ifdef DCT_ISLOW_SUPPORTED
546 case JDCT_ISLOW:
547 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000548 if (jsimd_can_fdct_islow())
549 fdct->dct = jsimd_fdct_islow;
550 else
551 fdct->dct = jpeg_fdct_islow;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000552 break;
553#endif
554#ifdef DCT_IFAST_SUPPORTED
555 case JDCT_IFAST:
556 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000557 if (jsimd_can_fdct_ifast())
558 fdct->dct = jsimd_fdct_ifast;
559 else
560 fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000561 break;
562#endif
563#ifdef DCT_FLOAT_SUPPORTED
564 case JDCT_FLOAT:
565 fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman59a39382009-03-09 13:15:56 +0000566 if (jsimd_can_fdct_float())
567 fdct->float_dct = jsimd_fdct_float;
568 else
569 fdct->float_dct = jpeg_fdct_float;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000570 break;
571#endif
572 default:
573 ERREXIT(cinfo, JERR_NOT_COMPILED);
574 break;
575 }
576
577 /* ...then the supporting stages. */
578 switch (cinfo->dct_method) {
579#ifdef DCT_ISLOW_SUPPORTED
580 case JDCT_ISLOW:
581#endif
582#ifdef DCT_IFAST_SUPPORTED
583 case JDCT_IFAST:
584#endif
585#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
Pierre Ossman59a39382009-03-09 13:15:56 +0000586 if (jsimd_can_convsamp())
587 fdct->convsamp = jsimd_convsamp;
588 else
589 fdct->convsamp = convsamp;
590 if (jsimd_can_quantize())
591 fdct->quantize = jsimd_quantize;
592 else
593 fdct->quantize = quantize;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000594 break;
595#endif
596#ifdef DCT_FLOAT_SUPPORTED
597 case JDCT_FLOAT:
Pierre Ossman59a39382009-03-09 13:15:56 +0000598 if (jsimd_can_convsamp_float())
599 fdct->float_convsamp = jsimd_convsamp_float;
600 else
601 fdct->float_convsamp = convsamp_float;
602 if (jsimd_can_quantize_float())
603 fdct->float_quantize = jsimd_quantize_float;
604 else
605 fdct->float_quantize = quantize_float;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000606 break;
607#endif
608 default:
609 ERREXIT(cinfo, JERR_NOT_COMPILED);
610 break;
611 }
612
Pierre Ossman35c47192009-03-09 13:29:37 +0000613 /* Allocate workspace memory */
614#ifdef DCT_FLOAT_SUPPORTED
615 if (cinfo->dct_method == JDCT_FLOAT)
616 fdct->float_workspace = (FAST_FLOAT *)
617 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
618 SIZEOF(FAST_FLOAT) * DCTSIZE2);
619 else
620#endif
621 fdct->workspace = (DCTELEM *)
622 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
623 SIZEOF(DCTELEM) * DCTSIZE2);
624
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000625 /* Mark divisor tables unallocated */
626 for (i = 0; i < NUM_QUANT_TBLS; i++) {
627 fdct->divisors[i] = NULL;
628#ifdef DCT_FLOAT_SUPPORTED
629 fdct->float_divisors[i] = NULL;
630#endif
631 }
632}