blob: 45b9c0dca24738ec2f23db94f079dfddd0782aa2 [file] [log] [blame]
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00001/*
2 * jcdctmgr.c
3 *
Thomas G. Lane489583f1996-02-07 00:00:00 +00004 * Copyright (C) 1994-1996, Thomas G. Lane.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00005 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains the forward-DCT management logic.
9 * This code selects a particular DCT implementation to be used,
10 * and it performs related housekeeping chores including coefficient
11 * quantization.
12 */
13
14#define JPEG_INTERNALS
15#include "jinclude.h"
16#include "jpeglib.h"
17#include "jdct.h" /* Private declarations for DCT subsystem */
18
19
20/* Private subobject for this module */
21
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000022typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
23typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
24
25typedef JMETHOD(void, convsamp_method_ptr,
26 (JSAMPARRAY sample_data, JDIMENSION start_col,
27 DCTELEM * workspace));
28typedef JMETHOD(void, float_convsamp_method_ptr,
29 (JSAMPARRAY sample_data, JDIMENSION start_col,
30 FAST_FLOAT *workspace));
31
32typedef JMETHOD(void, quantize_method_ptr,
33 (JCOEFPTR coef_block, DCTELEM * divisors,
34 DCTELEM * workspace));
35typedef JMETHOD(void, float_quantize_method_ptr,
36 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
37 FAST_FLOAT * workspace));
38
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000039typedef struct {
40 struct jpeg_forward_dct pub; /* public fields */
41
42 /* Pointer to the DCT routine actually in use */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000043 forward_DCT_method_ptr dct;
44 convsamp_method_ptr convsamp;
45 quantize_method_ptr quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000046
47 /* The actual post-DCT divisors --- not identical to the quant table
48 * entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane489583f1996-02-07 00:00:00 +000049 * Each table is given in normal array order.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000050 */
51 DCTELEM * divisors[NUM_QUANT_TBLS];
52
53#ifdef DCT_FLOAT_SUPPORTED
54 /* Same as above for the floating-point case. */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000055 float_DCT_method_ptr float_dct;
56 float_convsamp_method_ptr float_convsamp;
57 float_quantize_method_ptr float_quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000058 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
59#endif
60} my_fdct_controller;
61
62typedef my_fdct_controller * my_fdct_ptr;
63
64
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000065/*
66 * Initialize for a processing pass.
67 * Verify that all referenced Q-tables are present, and set up
68 * the divisor table for each one.
69 * In the current implementation, DCT of all components is done during
70 * the first pass, even if only some components will be output in the
71 * first scan. Hence all components should be examined here.
72 */
73
Thomas G. Lane489583f1996-02-07 00:00:00 +000074METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000075start_pass_fdctmgr (j_compress_ptr cinfo)
76{
77 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
78 int ci, qtblno, i;
79 jpeg_component_info *compptr;
80 JQUANT_TBL * qtbl;
81 DCTELEM * dtbl;
82
83 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
84 ci++, compptr++) {
85 qtblno = compptr->quant_tbl_no;
86 /* Make sure specified quantization table is present */
87 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
88 cinfo->quant_tbl_ptrs[qtblno] == NULL)
89 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
90 qtbl = cinfo->quant_tbl_ptrs[qtblno];
91 /* Compute divisors for this quant table */
92 /* We may do this more than once for same table, but it's not a big deal */
93 switch (cinfo->dct_method) {
94#ifdef DCT_ISLOW_SUPPORTED
95 case JDCT_ISLOW:
96 /* For LL&M IDCT method, divisors are equal to raw quantization
97 * coefficients multiplied by 8 (to counteract scaling).
98 */
99 if (fdct->divisors[qtblno] == NULL) {
100 fdct->divisors[qtblno] = (DCTELEM *)
101 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
102 DCTSIZE2 * SIZEOF(DCTELEM));
103 }
104 dtbl = fdct->divisors[qtblno];
105 for (i = 0; i < DCTSIZE2; i++) {
Thomas G. Lane489583f1996-02-07 00:00:00 +0000106 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000107 }
108 break;
109#endif
110#ifdef DCT_IFAST_SUPPORTED
111 case JDCT_IFAST:
112 {
113 /* For AA&N IDCT method, divisors are equal to quantization
114 * coefficients scaled by scalefactor[row]*scalefactor[col], where
115 * scalefactor[0] = 1
116 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
117 * We apply a further scale factor of 8.
118 */
119#define CONST_BITS 14
120 static const INT16 aanscales[DCTSIZE2] = {
Thomas G. Lane489583f1996-02-07 00:00:00 +0000121 /* precomputed values scaled up by 14 bits */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000122 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
123 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
124 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
125 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
126 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
127 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
128 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
129 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
130 };
131 SHIFT_TEMPS
132
133 if (fdct->divisors[qtblno] == NULL) {
134 fdct->divisors[qtblno] = (DCTELEM *)
135 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
136 DCTSIZE2 * SIZEOF(DCTELEM));
137 }
138 dtbl = fdct->divisors[qtblno];
139 for (i = 0; i < DCTSIZE2; i++) {
140 dtbl[i] = (DCTELEM)
Thomas G. Lane489583f1996-02-07 00:00:00 +0000141 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000142 (INT32) aanscales[i]),
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000143 CONST_BITS-3);
144 }
145 }
146 break;
147#endif
148#ifdef DCT_FLOAT_SUPPORTED
149 case JDCT_FLOAT:
150 {
151 /* For float AA&N IDCT method, divisors are equal to quantization
152 * coefficients scaled by scalefactor[row]*scalefactor[col], where
153 * scalefactor[0] = 1
154 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
155 * We apply a further scale factor of 8.
156 * What's actually stored is 1/divisor so that the inner loop can
157 * use a multiplication rather than a division.
158 */
159 FAST_FLOAT * fdtbl;
160 int row, col;
161 static const double aanscalefactor[DCTSIZE] = {
162 1.0, 1.387039845, 1.306562965, 1.175875602,
163 1.0, 0.785694958, 0.541196100, 0.275899379
164 };
165
166 if (fdct->float_divisors[qtblno] == NULL) {
167 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
168 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
169 DCTSIZE2 * SIZEOF(FAST_FLOAT));
170 }
171 fdtbl = fdct->float_divisors[qtblno];
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000172 i = 0;
173 for (row = 0; row < DCTSIZE; row++) {
174 for (col = 0; col < DCTSIZE; col++) {
175 fdtbl[i] = (FAST_FLOAT)
Thomas G. Lane489583f1996-02-07 00:00:00 +0000176 (1.0 / (((double) qtbl->quantval[i] *
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000177 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
178 i++;
179 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000180 }
181 }
182 break;
183#endif
184 default:
185 ERREXIT(cinfo, JERR_NOT_COMPILED);
186 break;
187 }
188 }
189}
190
191
192/*
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000193 * Load data into workspace, applying unsigned->signed conversion.
194 */
195
196METHODDEF(void)
197convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
198{
199 register DCTELEM *workspaceptr;
200 register JSAMPROW elemptr;
201 register int elemr;
202
203 workspaceptr = workspace;
204 for (elemr = 0; elemr < DCTSIZE; elemr++) {
205 elemptr = sample_data[elemr] + start_col;
206
207#if DCTSIZE == 8 /* unroll the inner loop */
208 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
209 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
210 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
211 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
212 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
213 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
214 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
215 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
216#else
217 {
218 register int elemc;
219 for (elemc = DCTSIZE; elemc > 0; elemc--)
220 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
221 }
222#endif
223 }
224}
225
226
227/*
228 * Quantize/descale the coefficients, and store into coef_blocks[].
229 */
230
231METHODDEF(void)
232quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
233{
234 register DCTELEM temp, qval;
235 register int i;
236 register JCOEFPTR output_ptr = coef_block;
237
238 for (i = 0; i < DCTSIZE2; i++) {
239 qval = divisors[i];
240 temp = workspace[i];
241
242 /* Divide the coefficient value by qval, ensuring proper rounding.
243 * Since C does not specify the direction of rounding for negative
244 * quotients, we have to force the dividend positive for portability.
245 *
246 * In most files, at least half of the output values will be zero
247 * (at default quantization settings, more like three-quarters...)
248 * so we should ensure that this case is fast. On many machines,
249 * a comparison is enough cheaper than a divide to make a special test
250 * a win. Since both inputs will be nonnegative, we need only test
251 * for a < b to discover whether a/b is 0.
252 * If your machine's division is fast enough, define FAST_DIVIDE.
253 */
254#ifdef FAST_DIVIDE
255#define DIVIDE_BY(a,b) a /= b
256#else
257#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
258#endif
259
260 if (temp < 0) {
261 temp = -temp;
262 temp += qval>>1; /* for rounding */
263 DIVIDE_BY(temp, qval);
264 temp = -temp;
265 } else {
266 temp += qval>>1; /* for rounding */
267 DIVIDE_BY(temp, qval);
268 }
269 output_ptr[i] = (JCOEF) temp;
270 }
271}
272
273
274/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000275 * Perform forward DCT on one or more blocks of a component.
276 *
277 * The input samples are taken from the sample_data[] array starting at
278 * position start_row/start_col, and moving to the right for any additional
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000279 * blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000280 */
281
Thomas G. Lane489583f1996-02-07 00:00:00 +0000282METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000283forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
284 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
285 JDIMENSION start_row, JDIMENSION start_col,
286 JDIMENSION num_blocks)
287/* This version is used for integer DCT implementations. */
288{
289 /* This routine is heavily used, so it's worth coding it tightly. */
290 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000291 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
292 DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */
293 JDIMENSION bi;
294
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000295 /* Make sure the compiler doesn't look up these every pass */
296 forward_DCT_method_ptr do_dct = fdct->dct;
297 convsamp_method_ptr do_convsamp = fdct->convsamp;
298 quantize_method_ptr do_quantize = fdct->quantize;
299
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000300 sample_data += start_row; /* fold in the vertical offset once */
301
302 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
303 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000304 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000305
306 /* Perform the DCT */
307 (*do_dct) (workspace);
308
309 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000310 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000311 }
312}
313
314
315#ifdef DCT_FLOAT_SUPPORTED
316
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000317
318METHODDEF(void)
319convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
320{
321 register FAST_FLOAT *workspaceptr;
322 register JSAMPROW elemptr;
323 register int elemr;
324
325 workspaceptr = workspace;
326 for (elemr = 0; elemr < DCTSIZE; elemr++) {
327 elemptr = sample_data[elemr] + start_col;
328#if DCTSIZE == 8 /* unroll the inner loop */
329 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
330 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
331 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
332 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
333 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
334 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
335 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
336 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
337#else
338 {
339 register int elemc;
340 for (elemc = DCTSIZE; elemc > 0; elemc--)
341 *workspaceptr++ = (FAST_FLOAT)
342 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
343 }
344#endif
345 }
346}
347
348
349METHODDEF(void)
350quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
351{
352 register FAST_FLOAT temp;
353 register int i;
354 register JCOEFPTR output_ptr = coef_block;
355
356 for (i = 0; i < DCTSIZE2; i++) {
357 /* Apply the quantization and scaling factor */
358 temp = workspace[i] * divisors[i];
359
360 /* Round to nearest integer.
361 * Since C does not specify the direction of rounding for negative
362 * quotients, we have to force the dividend positive for portability.
363 * The maximum coefficient size is +-16K (for 12-bit data), so this
364 * code should work for either 16-bit or 32-bit ints.
365 */
366 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
367 }
368}
369
370
Thomas G. Lane489583f1996-02-07 00:00:00 +0000371METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000372forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
373 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
374 JDIMENSION start_row, JDIMENSION start_col,
375 JDIMENSION num_blocks)
376/* This version is used for floating-point DCT implementations. */
377{
378 /* This routine is heavily used, so it's worth coding it tightly. */
379 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000380 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
381 FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
382 JDIMENSION bi;
383
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000384 /* Make sure the compiler doesn't look up these every pass */
385 float_DCT_method_ptr do_dct = fdct->float_dct;
386 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
387 float_quantize_method_ptr do_quantize = fdct->float_quantize;
388
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000389 sample_data += start_row; /* fold in the vertical offset once */
390
391 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
392 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000393 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000394
395 /* Perform the DCT */
396 (*do_dct) (workspace);
397
398 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000399 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000400 }
401}
402
403#endif /* DCT_FLOAT_SUPPORTED */
404
405
406/*
407 * Initialize FDCT manager.
408 */
409
Thomas G. Lane489583f1996-02-07 00:00:00 +0000410GLOBAL(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000411jinit_forward_dct (j_compress_ptr cinfo)
412{
413 my_fdct_ptr fdct;
414 int i;
415
416 fdct = (my_fdct_ptr)
417 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
418 SIZEOF(my_fdct_controller));
419 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
420 fdct->pub.start_pass = start_pass_fdctmgr;
421
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000422 /* First determine the DCT... */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000423 switch (cinfo->dct_method) {
424#ifdef DCT_ISLOW_SUPPORTED
425 case JDCT_ISLOW:
426 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000427 fdct->dct = jpeg_fdct_islow;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000428 break;
429#endif
430#ifdef DCT_IFAST_SUPPORTED
431 case JDCT_IFAST:
432 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000433 fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000434 break;
435#endif
436#ifdef DCT_FLOAT_SUPPORTED
437 case JDCT_FLOAT:
438 fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000439 fdct->float_dct = jpeg_fdct_float;
440 break;
441#endif
442 default:
443 ERREXIT(cinfo, JERR_NOT_COMPILED);
444 break;
445 }
446
447 /* ...then the supporting stages. */
448 switch (cinfo->dct_method) {
449#ifdef DCT_ISLOW_SUPPORTED
450 case JDCT_ISLOW:
451#endif
452#ifdef DCT_IFAST_SUPPORTED
453 case JDCT_IFAST:
454#endif
455#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
456 fdct->convsamp = convsamp;
457 fdct->quantize = quantize;
458 break;
459#endif
460#ifdef DCT_FLOAT_SUPPORTED
461 case JDCT_FLOAT:
462 fdct->float_convsamp = convsamp_float;
463 fdct->float_quantize = quantize_float;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000464 break;
465#endif
466 default:
467 ERREXIT(cinfo, JERR_NOT_COMPILED);
468 break;
469 }
470
471 /* Mark divisor tables unallocated */
472 for (i = 0; i < NUM_QUANT_TBLS; i++) {
473 fdct->divisors[i] = NULL;
474#ifdef DCT_FLOAT_SUPPORTED
475 fdct->float_divisors[i] = NULL;
476#endif
477 }
478}