blob: 3234a01aa14ca91421f133b2f6ed62881e85aca9 [file] [log] [blame]
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00001/*
2 * jcdctmgr.c
3 *
DRCa73e8702012-12-31 02:52:30 +00004 * This file was part of the Independent JPEG Group's software:
Thomas G. Lane489583f1996-02-07 00:00:00 +00005 * Copyright (C) 1994-1996, Thomas G. Lane.
DRCa6ef2822013-09-28 03:23:49 +00006 * libjpeg-turbo Modifications:
Pierre Ossmandedc42e2009-03-09 13:23:04 +00007 * Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman59a39382009-03-09 13:15:56 +00008 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRCa49c4e52011-02-18 20:50:08 +00009 * Copyright (C) 2011 D. R. Commander
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000010 * For conditions of distribution and use, see the accompanying README file.
11 *
12 * This file contains the forward-DCT management logic.
13 * This code selects a particular DCT implementation to be used,
14 * and it performs related housekeeping chores including coefficient
15 * quantization.
16 */
17
18#define JPEG_INTERNALS
19#include "jinclude.h"
20#include "jpeglib.h"
21#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman59a39382009-03-09 13:15:56 +000022#include "jsimddct.h"
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000023
24
25/* Private subobject for this module */
26
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000027typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data));
28typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data));
29
30typedef JMETHOD(void, convsamp_method_ptr,
31 (JSAMPARRAY sample_data, JDIMENSION start_col,
32 DCTELEM * workspace));
33typedef JMETHOD(void, float_convsamp_method_ptr,
34 (JSAMPARRAY sample_data, JDIMENSION start_col,
35 FAST_FLOAT *workspace));
36
37typedef JMETHOD(void, quantize_method_ptr,
38 (JCOEFPTR coef_block, DCTELEM * divisors,
39 DCTELEM * workspace));
40typedef JMETHOD(void, float_quantize_method_ptr,
41 (JCOEFPTR coef_block, FAST_FLOAT * divisors,
42 FAST_FLOAT * workspace));
43
DRCa49c4e52011-02-18 20:50:08 +000044METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
45
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000046typedef struct {
47 struct jpeg_forward_dct pub; /* public fields */
48
49 /* Pointer to the DCT routine actually in use */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000050 forward_DCT_method_ptr dct;
51 convsamp_method_ptr convsamp;
52 quantize_method_ptr quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000053
54 /* The actual post-DCT divisors --- not identical to the quant table
55 * entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane489583f1996-02-07 00:00:00 +000056 * Each table is given in normal array order.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000057 */
58 DCTELEM * divisors[NUM_QUANT_TBLS];
59
Pierre Ossman35c47192009-03-09 13:29:37 +000060 /* work area for FDCT subroutine */
61 DCTELEM * workspace;
62
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000063#ifdef DCT_FLOAT_SUPPORTED
64 /* Same as above for the floating-point case. */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000065 float_DCT_method_ptr float_dct;
66 float_convsamp_method_ptr float_convsamp;
67 float_quantize_method_ptr float_quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000068 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman35c47192009-03-09 13:29:37 +000069 FAST_FLOAT * float_workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000070#endif
71} my_fdct_controller;
72
73typedef my_fdct_controller * my_fdct_ptr;
74
75
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000076/*
Pierre Ossmandedc42e2009-03-09 13:23:04 +000077 * Find the highest bit in an integer through binary search.
78 */
79LOCAL(int)
DRCfc5dc4f2009-10-01 22:26:14 +000080flss (UINT16 val)
Pierre Ossmandedc42e2009-03-09 13:23:04 +000081{
82 int bit;
83
84 bit = 16;
85
86 if (!val)
87 return 0;
88
89 if (!(val & 0xff00)) {
90 bit -= 8;
91 val <<= 8;
92 }
93 if (!(val & 0xf000)) {
94 bit -= 4;
95 val <<= 4;
96 }
97 if (!(val & 0xc000)) {
98 bit -= 2;
99 val <<= 2;
100 }
101 if (!(val & 0x8000)) {
102 bit -= 1;
103 val <<= 1;
104 }
105
106 return bit;
107}
108
109/*
110 * Compute values to do a division using reciprocal.
111 *
112 * This implementation is based on an algorithm described in
113 * "How to optimize for the Pentium family of microprocessors"
114 * (http://www.agner.org/assem/).
115 * More information about the basic algorithm can be found in
116 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
117 *
118 * The basic idea is to replace x/d by x * d^-1. In order to store
119 * d^-1 with enough precision we shift it left a few places. It turns
120 * out that this algoright gives just enough precision, and also fits
121 * into DCTELEM:
122 *
123 * b = (the number of significant bits in divisor) - 1
124 * r = (word size) + b
125 * f = 2^r / divisor
126 *
127 * f will not be an integer for most cases, so we need to compensate
128 * for the rounding error introduced:
129 *
130 * no fractional part:
131 *
132 * result = input >> r
133 *
134 * fractional part of f < 0.5:
135 *
136 * round f down to nearest integer
137 * result = ((input + 1) * f) >> r
138 *
139 * fractional part of f > 0.5:
140 *
141 * round f up to nearest integer
142 * result = (input * f) >> r
143 *
144 * This is the original algorithm that gives truncated results. But we
145 * want properly rounded results, so we replace "input" with
146 * "input + divisor/2".
147 *
148 * In order to allow SIMD implementations we also tweak the values to
149 * allow the same calculation to be made at all times:
150 *
151 * dctbl[0] = f rounded to nearest integer
152 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
153 * dctbl[2] = 1 << ((word size) * 2 - r)
154 * dctbl[3] = r - (word size)
155 *
156 * dctbl[2] is for stupid instruction sets where the shift operation
157 * isn't member wise (e.g. MMX).
158 *
159 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
160 * is that most SIMD implementations have a "multiply and store top
161 * half" operation.
162 *
163 * Lastly, we store each of the values in their own table instead
164 * of in a consecutive manner, yet again in order to allow SIMD
165 * routines.
166 */
DRCa49c4e52011-02-18 20:50:08 +0000167LOCAL(int)
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000168compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
169{
170 UDCTELEM2 fq, fr;
171 UDCTELEM c;
172 int b, r;
173
DRCfc5dc4f2009-10-01 22:26:14 +0000174 b = flss(divisor) - 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000175 r = sizeof(DCTELEM) * 8 + b;
176
177 fq = ((UDCTELEM2)1 << r) / divisor;
178 fr = ((UDCTELEM2)1 << r) % divisor;
179
180 c = divisor / 2; /* for rounding */
181
182 if (fr == 0) { /* divisor is power of two */
183 /* fq will be one bit too large to fit in DCTELEM, so adjust */
184 fq >>= 1;
185 r--;
DRCd65d99a2012-01-31 03:39:23 +0000186 } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000187 c++;
188 } else { /* fractional part is > 0.5 */
189 fq++;
190 }
191
192 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
193 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
194 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
195 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
DRCa49c4e52011-02-18 20:50:08 +0000196
197 if(r <= 16) return 0;
198 else return 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000199}
200
201/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000202 * Initialize for a processing pass.
203 * Verify that all referenced Q-tables are present, and set up
204 * the divisor table for each one.
205 * In the current implementation, DCT of all components is done during
206 * the first pass, even if only some components will be output in the
207 * first scan. Hence all components should be examined here.
208 */
209
Thomas G. Lane489583f1996-02-07 00:00:00 +0000210METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000211start_pass_fdctmgr (j_compress_ptr cinfo)
212{
213 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
214 int ci, qtblno, i;
215 jpeg_component_info *compptr;
216 JQUANT_TBL * qtbl;
217 DCTELEM * dtbl;
218
219 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
220 ci++, compptr++) {
221 qtblno = compptr->quant_tbl_no;
222 /* Make sure specified quantization table is present */
223 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
224 cinfo->quant_tbl_ptrs[qtblno] == NULL)
225 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
226 qtbl = cinfo->quant_tbl_ptrs[qtblno];
227 /* Compute divisors for this quant table */
228 /* We may do this more than once for same table, but it's not a big deal */
229 switch (cinfo->dct_method) {
230#ifdef DCT_ISLOW_SUPPORTED
231 case JDCT_ISLOW:
232 /* For LL&M IDCT method, divisors are equal to raw quantization
233 * coefficients multiplied by 8 (to counteract scaling).
234 */
235 if (fdct->divisors[qtblno] == NULL) {
236 fdct->divisors[qtblno] = (DCTELEM *)
237 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000238 (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000239 }
240 dtbl = fdct->divisors[qtblno];
241 for (i = 0; i < DCTSIZE2; i++) {
DRCa49c4e52011-02-18 20:50:08 +0000242 if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
243 && fdct->quantize == jsimd_quantize)
244 fdct->quantize = quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000245 }
246 break;
247#endif
248#ifdef DCT_IFAST_SUPPORTED
249 case JDCT_IFAST:
250 {
251 /* For AA&N IDCT method, divisors are equal to quantization
252 * coefficients scaled by scalefactor[row]*scalefactor[col], where
253 * scalefactor[0] = 1
254 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
255 * We apply a further scale factor of 8.
256 */
257#define CONST_BITS 14
258 static const INT16 aanscales[DCTSIZE2] = {
Thomas G. Lane489583f1996-02-07 00:00:00 +0000259 /* precomputed values scaled up by 14 bits */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000260 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
261 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
262 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
263 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
264 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
265 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
266 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
267 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
268 };
269 SHIFT_TEMPS
270
271 if (fdct->divisors[qtblno] == NULL) {
272 fdct->divisors[qtblno] = (DCTELEM *)
273 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000274 (DCTSIZE2 * 4) * SIZEOF(DCTELEM));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000275 }
276 dtbl = fdct->divisors[qtblno];
277 for (i = 0; i < DCTSIZE2; i++) {
DRCa49c4e52011-02-18 20:50:08 +0000278 if(!compute_reciprocal(
Thomas G. Lane489583f1996-02-07 00:00:00 +0000279 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000280 (INT32) aanscales[i]),
DRCa49c4e52011-02-18 20:50:08 +0000281 CONST_BITS-3), &dtbl[i])
282 && fdct->quantize == jsimd_quantize)
283 fdct->quantize = quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000284 }
285 }
286 break;
287#endif
288#ifdef DCT_FLOAT_SUPPORTED
289 case JDCT_FLOAT:
290 {
291 /* For float AA&N IDCT method, divisors are equal to quantization
292 * coefficients scaled by scalefactor[row]*scalefactor[col], where
293 * scalefactor[0] = 1
294 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
295 * We apply a further scale factor of 8.
296 * What's actually stored is 1/divisor so that the inner loop can
297 * use a multiplication rather than a division.
298 */
299 FAST_FLOAT * fdtbl;
300 int row, col;
301 static const double aanscalefactor[DCTSIZE] = {
302 1.0, 1.387039845, 1.306562965, 1.175875602,
303 1.0, 0.785694958, 0.541196100, 0.275899379
304 };
305
306 if (fdct->float_divisors[qtblno] == NULL) {
307 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
308 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
309 DCTSIZE2 * SIZEOF(FAST_FLOAT));
310 }
311 fdtbl = fdct->float_divisors[qtblno];
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000312 i = 0;
313 for (row = 0; row < DCTSIZE; row++) {
314 for (col = 0; col < DCTSIZE; col++) {
315 fdtbl[i] = (FAST_FLOAT)
Thomas G. Lane489583f1996-02-07 00:00:00 +0000316 (1.0 / (((double) qtbl->quantval[i] *
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000317 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
318 i++;
319 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000320 }
321 }
322 break;
323#endif
324 default:
325 ERREXIT(cinfo, JERR_NOT_COMPILED);
326 break;
327 }
328 }
329}
330
331
332/*
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000333 * Load data into workspace, applying unsigned->signed conversion.
334 */
335
336METHODDEF(void)
337convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
338{
339 register DCTELEM *workspaceptr;
340 register JSAMPROW elemptr;
341 register int elemr;
342
343 workspaceptr = workspace;
344 for (elemr = 0; elemr < DCTSIZE; elemr++) {
345 elemptr = sample_data[elemr] + start_col;
346
347#if DCTSIZE == 8 /* unroll the inner loop */
348 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
349 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
350 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
351 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
352 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
353 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
354 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
355 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
356#else
357 {
358 register int elemc;
359 for (elemc = DCTSIZE; elemc > 0; elemc--)
360 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
361 }
362#endif
363 }
364}
365
366
367/*
368 * Quantize/descale the coefficients, and store into coef_blocks[].
369 */
370
371METHODDEF(void)
372quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
373{
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000374 int i;
375 DCTELEM temp;
376 UDCTELEM recip, corr, shift;
377 UDCTELEM2 product;
378 JCOEFPTR output_ptr = coef_block;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000379
380 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000381 temp = workspace[i];
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000382 recip = divisors[i + DCTSIZE2 * 0];
383 corr = divisors[i + DCTSIZE2 * 1];
384 shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000385
386 if (temp < 0) {
387 temp = -temp;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000388 product = (UDCTELEM2)(temp + corr) * recip;
389 product >>= shift + sizeof(DCTELEM)*8;
390 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000391 temp = -temp;
392 } else {
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000393 product = (UDCTELEM2)(temp + corr) * recip;
394 product >>= shift + sizeof(DCTELEM)*8;
395 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000396 }
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000397
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000398 output_ptr[i] = (JCOEF) temp;
399 }
400}
401
402
403/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000404 * Perform forward DCT on one or more blocks of a component.
405 *
406 * The input samples are taken from the sample_data[] array starting at
407 * position start_row/start_col, and moving to the right for any additional
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000408 * blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000409 */
410
Thomas G. Lane489583f1996-02-07 00:00:00 +0000411METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000412forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
413 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
414 JDIMENSION start_row, JDIMENSION start_col,
415 JDIMENSION num_blocks)
416/* This version is used for integer DCT implementations. */
417{
418 /* This routine is heavily used, so it's worth coding it tightly. */
419 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000420 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000421 DCTELEM * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000422 JDIMENSION bi;
423
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000424 /* Make sure the compiler doesn't look up these every pass */
425 forward_DCT_method_ptr do_dct = fdct->dct;
426 convsamp_method_ptr do_convsamp = fdct->convsamp;
427 quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000428 workspace = fdct->workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000429
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000430 sample_data += start_row; /* fold in the vertical offset once */
431
432 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
433 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000434 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000435
436 /* Perform the DCT */
437 (*do_dct) (workspace);
438
439 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000440 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000441 }
442}
443
444
445#ifdef DCT_FLOAT_SUPPORTED
446
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000447
448METHODDEF(void)
449convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
450{
451 register FAST_FLOAT *workspaceptr;
452 register JSAMPROW elemptr;
453 register int elemr;
454
455 workspaceptr = workspace;
456 for (elemr = 0; elemr < DCTSIZE; elemr++) {
457 elemptr = sample_data[elemr] + start_col;
458#if DCTSIZE == 8 /* unroll the inner loop */
459 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
460 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
461 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
462 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
463 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
464 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
465 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
466 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
467#else
468 {
469 register int elemc;
470 for (elemc = DCTSIZE; elemc > 0; elemc--)
471 *workspaceptr++ = (FAST_FLOAT)
472 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
473 }
474#endif
475 }
476}
477
478
479METHODDEF(void)
480quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
481{
482 register FAST_FLOAT temp;
483 register int i;
484 register JCOEFPTR output_ptr = coef_block;
485
486 for (i = 0; i < DCTSIZE2; i++) {
487 /* Apply the quantization and scaling factor */
488 temp = workspace[i] * divisors[i];
489
490 /* Round to nearest integer.
491 * Since C does not specify the direction of rounding for negative
492 * quotients, we have to force the dividend positive for portability.
493 * The maximum coefficient size is +-16K (for 12-bit data), so this
494 * code should work for either 16-bit or 32-bit ints.
495 */
496 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
497 }
498}
499
500
Thomas G. Lane489583f1996-02-07 00:00:00 +0000501METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000502forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
503 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
504 JDIMENSION start_row, JDIMENSION start_col,
505 JDIMENSION num_blocks)
506/* This version is used for floating-point DCT implementations. */
507{
508 /* This routine is heavily used, so it's worth coding it tightly. */
509 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000510 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000511 FAST_FLOAT * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000512 JDIMENSION bi;
513
Pierre Ossman35c47192009-03-09 13:29:37 +0000514
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000515 /* Make sure the compiler doesn't look up these every pass */
516 float_DCT_method_ptr do_dct = fdct->float_dct;
517 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
518 float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000519 workspace = fdct->float_workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000520
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000521 sample_data += start_row; /* fold in the vertical offset once */
522
523 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
524 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000525 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000526
527 /* Perform the DCT */
528 (*do_dct) (workspace);
529
530 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000531 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000532 }
533}
534
535#endif /* DCT_FLOAT_SUPPORTED */
536
537
538/*
539 * Initialize FDCT manager.
540 */
541
Thomas G. Lane489583f1996-02-07 00:00:00 +0000542GLOBAL(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000543jinit_forward_dct (j_compress_ptr cinfo)
544{
545 my_fdct_ptr fdct;
546 int i;
547
548 fdct = (my_fdct_ptr)
549 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
550 SIZEOF(my_fdct_controller));
551 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
552 fdct->pub.start_pass = start_pass_fdctmgr;
553
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000554 /* First determine the DCT... */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000555 switch (cinfo->dct_method) {
556#ifdef DCT_ISLOW_SUPPORTED
557 case JDCT_ISLOW:
558 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000559 if (jsimd_can_fdct_islow())
560 fdct->dct = jsimd_fdct_islow;
561 else
562 fdct->dct = jpeg_fdct_islow;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000563 break;
564#endif
565#ifdef DCT_IFAST_SUPPORTED
566 case JDCT_IFAST:
567 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000568 if (jsimd_can_fdct_ifast())
569 fdct->dct = jsimd_fdct_ifast;
570 else
571 fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000572 break;
573#endif
574#ifdef DCT_FLOAT_SUPPORTED
575 case JDCT_FLOAT:
576 fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman59a39382009-03-09 13:15:56 +0000577 if (jsimd_can_fdct_float())
578 fdct->float_dct = jsimd_fdct_float;
579 else
580 fdct->float_dct = jpeg_fdct_float;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000581 break;
582#endif
583 default:
584 ERREXIT(cinfo, JERR_NOT_COMPILED);
585 break;
586 }
587
588 /* ...then the supporting stages. */
589 switch (cinfo->dct_method) {
590#ifdef DCT_ISLOW_SUPPORTED
591 case JDCT_ISLOW:
592#endif
593#ifdef DCT_IFAST_SUPPORTED
594 case JDCT_IFAST:
595#endif
596#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
Pierre Ossman59a39382009-03-09 13:15:56 +0000597 if (jsimd_can_convsamp())
598 fdct->convsamp = jsimd_convsamp;
599 else
600 fdct->convsamp = convsamp;
601 if (jsimd_can_quantize())
602 fdct->quantize = jsimd_quantize;
603 else
604 fdct->quantize = quantize;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000605 break;
606#endif
607#ifdef DCT_FLOAT_SUPPORTED
608 case JDCT_FLOAT:
Pierre Ossman59a39382009-03-09 13:15:56 +0000609 if (jsimd_can_convsamp_float())
610 fdct->float_convsamp = jsimd_convsamp_float;
611 else
612 fdct->float_convsamp = convsamp_float;
613 if (jsimd_can_quantize_float())
614 fdct->float_quantize = jsimd_quantize_float;
615 else
616 fdct->float_quantize = quantize_float;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000617 break;
618#endif
619 default:
620 ERREXIT(cinfo, JERR_NOT_COMPILED);
621 break;
622 }
623
Pierre Ossman35c47192009-03-09 13:29:37 +0000624 /* Allocate workspace memory */
625#ifdef DCT_FLOAT_SUPPORTED
626 if (cinfo->dct_method == JDCT_FLOAT)
627 fdct->float_workspace = (FAST_FLOAT *)
628 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
629 SIZEOF(FAST_FLOAT) * DCTSIZE2);
630 else
631#endif
632 fdct->workspace = (DCTELEM *)
633 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
634 SIZEOF(DCTELEM) * DCTSIZE2);
635
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000636 /* Mark divisor tables unallocated */
637 for (i = 0; i < NUM_QUANT_TBLS; i++) {
638 fdct->divisors[i] = NULL;
639#ifdef DCT_FLOAT_SUPPORTED
640 fdct->float_divisors[i] = NULL;
641#endif
642 }
643}