blob: 78935729cfd6b2b7b7841e0997187c170d873382 [file] [log] [blame]
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00001/*
2 * jcdctmgr.c
3 *
DRCa73e8702012-12-31 02:52:30 +00004 * This file was part of the Independent JPEG Group's software:
Thomas G. Lane489583f1996-02-07 00:00:00 +00005 * Copyright (C) 1994-1996, Thomas G. Lane.
DRCa6ef2822013-09-28 03:23:49 +00006 * libjpeg-turbo Modifications:
Pierre Ossmandedc42e2009-03-09 13:23:04 +00007 * Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman59a39382009-03-09 13:15:56 +00008 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRCaee4f722014-08-09 23:06:07 +00009 * Copyright (C) 2011, 2014 D. R. Commander
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000010 * For conditions of distribution and use, see the accompanying README file.
11 *
12 * This file contains the forward-DCT management logic.
13 * This code selects a particular DCT implementation to be used,
14 * and it performs related housekeeping chores including coefficient
15 * quantization.
16 */
17
18#define JPEG_INTERNALS
19#include "jinclude.h"
20#include "jpeglib.h"
DRCe5eaf372014-05-09 18:00:32 +000021#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman59a39382009-03-09 13:15:56 +000022#include "jsimddct.h"
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000023
24
25/* Private subobject for this module */
26
DRCbc56b752014-05-16 10:43:44 +000027typedef void (*forward_DCT_method_ptr) (DCTELEM * data);
28typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000029
DRCbc56b752014-05-16 10:43:44 +000030typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
31 JDIMENSION start_col,
32 DCTELEM * workspace);
33typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
34 JDIMENSION start_col,
35 FAST_FLOAT *workspace);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000036
DRCbc56b752014-05-16 10:43:44 +000037typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM * divisors,
38 DCTELEM * workspace);
39typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
40 FAST_FLOAT * divisors,
41 FAST_FLOAT * workspace);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000042
DRCa49c4e52011-02-18 20:50:08 +000043METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
44
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000045typedef struct {
DRCe5eaf372014-05-09 18:00:32 +000046 struct jpeg_forward_dct pub; /* public fields */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000047
48 /* Pointer to the DCT routine actually in use */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000049 forward_DCT_method_ptr dct;
50 convsamp_method_ptr convsamp;
51 quantize_method_ptr quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000052
53 /* The actual post-DCT divisors --- not identical to the quant table
54 * entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane489583f1996-02-07 00:00:00 +000055 * Each table is given in normal array order.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000056 */
57 DCTELEM * divisors[NUM_QUANT_TBLS];
58
Pierre Ossman35c47192009-03-09 13:29:37 +000059 /* work area for FDCT subroutine */
60 DCTELEM * workspace;
61
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000062#ifdef DCT_FLOAT_SUPPORTED
63 /* Same as above for the floating-point case. */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000064 float_DCT_method_ptr float_dct;
65 float_convsamp_method_ptr float_convsamp;
66 float_quantize_method_ptr float_quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000067 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman35c47192009-03-09 13:29:37 +000068 FAST_FLOAT * float_workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000069#endif
70} my_fdct_controller;
71
72typedef my_fdct_controller * my_fdct_ptr;
73
74
DRCaee4f722014-08-09 23:06:07 +000075#if BITS_IN_JSAMPLE == 8
76
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000077/*
Pierre Ossmandedc42e2009-03-09 13:23:04 +000078 * Find the highest bit in an integer through binary search.
79 */
DRCaee4f722014-08-09 23:06:07 +000080
Pierre Ossmandedc42e2009-03-09 13:23:04 +000081LOCAL(int)
DRCfc5dc4f2009-10-01 22:26:14 +000082flss (UINT16 val)
Pierre Ossmandedc42e2009-03-09 13:23:04 +000083{
84 int bit;
85
86 bit = 16;
87
88 if (!val)
89 return 0;
90
91 if (!(val & 0xff00)) {
92 bit -= 8;
93 val <<= 8;
94 }
95 if (!(val & 0xf000)) {
96 bit -= 4;
97 val <<= 4;
98 }
99 if (!(val & 0xc000)) {
100 bit -= 2;
101 val <<= 2;
102 }
103 if (!(val & 0x8000)) {
104 bit -= 1;
105 val <<= 1;
106 }
107
108 return bit;
109}
110
DRCaee4f722014-08-09 23:06:07 +0000111
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000112/*
113 * Compute values to do a division using reciprocal.
114 *
115 * This implementation is based on an algorithm described in
116 * "How to optimize for the Pentium family of microprocessors"
117 * (http://www.agner.org/assem/).
118 * More information about the basic algorithm can be found in
119 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
120 *
121 * The basic idea is to replace x/d by x * d^-1. In order to store
122 * d^-1 with enough precision we shift it left a few places. It turns
123 * out that this algoright gives just enough precision, and also fits
124 * into DCTELEM:
125 *
126 * b = (the number of significant bits in divisor) - 1
127 * r = (word size) + b
128 * f = 2^r / divisor
129 *
130 * f will not be an integer for most cases, so we need to compensate
131 * for the rounding error introduced:
132 *
133 * no fractional part:
134 *
135 * result = input >> r
136 *
137 * fractional part of f < 0.5:
138 *
139 * round f down to nearest integer
140 * result = ((input + 1) * f) >> r
141 *
142 * fractional part of f > 0.5:
143 *
144 * round f up to nearest integer
145 * result = (input * f) >> r
146 *
147 * This is the original algorithm that gives truncated results. But we
148 * want properly rounded results, so we replace "input" with
149 * "input + divisor/2".
150 *
151 * In order to allow SIMD implementations we also tweak the values to
152 * allow the same calculation to be made at all times:
DRCe5eaf372014-05-09 18:00:32 +0000153 *
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000154 * dctbl[0] = f rounded to nearest integer
155 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
156 * dctbl[2] = 1 << ((word size) * 2 - r)
157 * dctbl[3] = r - (word size)
158 *
159 * dctbl[2] is for stupid instruction sets where the shift operation
160 * isn't member wise (e.g. MMX).
161 *
162 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
163 * is that most SIMD implementations have a "multiply and store top
164 * half" operation.
165 *
166 * Lastly, we store each of the values in their own table instead
167 * of in a consecutive manner, yet again in order to allow SIMD
168 * routines.
169 */
DRCaee4f722014-08-09 23:06:07 +0000170
DRCa49c4e52011-02-18 20:50:08 +0000171LOCAL(int)
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000172compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
173{
174 UDCTELEM2 fq, fr;
175 UDCTELEM c;
176 int b, r;
177
DRCfc5dc4f2009-10-01 22:26:14 +0000178 b = flss(divisor) - 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000179 r = sizeof(DCTELEM) * 8 + b;
180
181 fq = ((UDCTELEM2)1 << r) / divisor;
182 fr = ((UDCTELEM2)1 << r) % divisor;
183
184 c = divisor / 2; /* for rounding */
185
186 if (fr == 0) { /* divisor is power of two */
187 /* fq will be one bit too large to fit in DCTELEM, so adjust */
188 fq >>= 1;
189 r--;
DRCd65d99a2012-01-31 03:39:23 +0000190 } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000191 c++;
192 } else { /* fractional part is > 0.5 */
193 fq++;
194 }
195
196 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
197 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
198 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
199 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
DRCa49c4e52011-02-18 20:50:08 +0000200
201 if(r <= 16) return 0;
202 else return 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000203}
204
DRCaee4f722014-08-09 23:06:07 +0000205#endif
206
207
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000208/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000209 * Initialize for a processing pass.
210 * Verify that all referenced Q-tables are present, and set up
211 * the divisor table for each one.
212 * In the current implementation, DCT of all components is done during
213 * the first pass, even if only some components will be output in the
214 * first scan. Hence all components should be examined here.
215 */
216
Thomas G. Lane489583f1996-02-07 00:00:00 +0000217METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000218start_pass_fdctmgr (j_compress_ptr cinfo)
219{
220 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
221 int ci, qtblno, i;
222 jpeg_component_info *compptr;
223 JQUANT_TBL * qtbl;
224 DCTELEM * dtbl;
225
226 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
227 ci++, compptr++) {
228 qtblno = compptr->quant_tbl_no;
229 /* Make sure specified quantization table is present */
230 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
DRCe5eaf372014-05-09 18:00:32 +0000231 cinfo->quant_tbl_ptrs[qtblno] == NULL)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000232 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
233 qtbl = cinfo->quant_tbl_ptrs[qtblno];
234 /* Compute divisors for this quant table */
235 /* We may do this more than once for same table, but it's not a big deal */
236 switch (cinfo->dct_method) {
237#ifdef DCT_ISLOW_SUPPORTED
238 case JDCT_ISLOW:
239 /* For LL&M IDCT method, divisors are equal to raw quantization
240 * coefficients multiplied by 8 (to counteract scaling).
241 */
242 if (fdct->divisors[qtblno] == NULL) {
DRCe5eaf372014-05-09 18:00:32 +0000243 fdct->divisors[qtblno] = (DCTELEM *)
244 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000245 (DCTSIZE2 * 4) * sizeof(DCTELEM));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000246 }
247 dtbl = fdct->divisors[qtblno];
248 for (i = 0; i < DCTSIZE2; i++) {
DRCaee4f722014-08-09 23:06:07 +0000249#if BITS_IN_JSAMPLE == 8
DRCe5eaf372014-05-09 18:00:32 +0000250 if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
251 && fdct->quantize == jsimd_quantize)
252 fdct->quantize = quantize;
DRCaee4f722014-08-09 23:06:07 +0000253#else
254 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
255#endif
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000256 }
257 break;
258#endif
259#ifdef DCT_IFAST_SUPPORTED
260 case JDCT_IFAST:
261 {
DRCe5eaf372014-05-09 18:00:32 +0000262 /* For AA&N IDCT method, divisors are equal to quantization
263 * coefficients scaled by scalefactor[row]*scalefactor[col], where
264 * scalefactor[0] = 1
265 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
266 * We apply a further scale factor of 8.
267 */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000268#define CONST_BITS 14
DRCe5eaf372014-05-09 18:00:32 +0000269 static const INT16 aanscales[DCTSIZE2] = {
270 /* precomputed values scaled up by 14 bits */
271 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
272 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
273 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
274 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
275 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
276 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
277 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
278 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
279 };
280 SHIFT_TEMPS
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000281
DRCe5eaf372014-05-09 18:00:32 +0000282 if (fdct->divisors[qtblno] == NULL) {
283 fdct->divisors[qtblno] = (DCTELEM *)
284 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000285 (DCTSIZE2 * 4) * sizeof(DCTELEM));
DRCe5eaf372014-05-09 18:00:32 +0000286 }
287 dtbl = fdct->divisors[qtblno];
288 for (i = 0; i < DCTSIZE2; i++) {
DRCaee4f722014-08-09 23:06:07 +0000289#if BITS_IN_JSAMPLE == 8
DRCe5eaf372014-05-09 18:00:32 +0000290 if(!compute_reciprocal(
291 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
292 (INT32) aanscales[i]),
293 CONST_BITS-3), &dtbl[i])
294 && fdct->quantize == jsimd_quantize)
295 fdct->quantize = quantize;
DRCaee4f722014-08-09 23:06:07 +0000296#else
297 dtbl[i] = (DCTELEM)
298 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
299 (INT32) aanscales[i]),
300 CONST_BITS-3);
301#endif
DRCe5eaf372014-05-09 18:00:32 +0000302 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000303 }
304 break;
305#endif
306#ifdef DCT_FLOAT_SUPPORTED
307 case JDCT_FLOAT:
308 {
DRCe5eaf372014-05-09 18:00:32 +0000309 /* For float AA&N IDCT method, divisors are equal to quantization
310 * coefficients scaled by scalefactor[row]*scalefactor[col], where
311 * scalefactor[0] = 1
312 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
313 * We apply a further scale factor of 8.
314 * What's actually stored is 1/divisor so that the inner loop can
315 * use a multiplication rather than a division.
316 */
317 FAST_FLOAT * fdtbl;
318 int row, col;
319 static const double aanscalefactor[DCTSIZE] = {
320 1.0, 1.387039845, 1.306562965, 1.175875602,
321 1.0, 0.785694958, 0.541196100, 0.275899379
322 };
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000323
DRCe5eaf372014-05-09 18:00:32 +0000324 if (fdct->float_divisors[qtblno] == NULL) {
325 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
326 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000327 DCTSIZE2 * sizeof(FAST_FLOAT));
DRCe5eaf372014-05-09 18:00:32 +0000328 }
329 fdtbl = fdct->float_divisors[qtblno];
330 i = 0;
331 for (row = 0; row < DCTSIZE; row++) {
332 for (col = 0; col < DCTSIZE; col++) {
333 fdtbl[i] = (FAST_FLOAT)
334 (1.0 / (((double) qtbl->quantval[i] *
335 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
336 i++;
337 }
338 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000339 }
340 break;
341#endif
342 default:
343 ERREXIT(cinfo, JERR_NOT_COMPILED);
344 break;
345 }
346 }
347}
348
349
350/*
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000351 * Load data into workspace, applying unsigned->signed conversion.
352 */
353
354METHODDEF(void)
355convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
356{
357 register DCTELEM *workspaceptr;
358 register JSAMPROW elemptr;
359 register int elemr;
360
361 workspaceptr = workspace;
362 for (elemr = 0; elemr < DCTSIZE; elemr++) {
363 elemptr = sample_data[elemr] + start_col;
364
DRCe5eaf372014-05-09 18:00:32 +0000365#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000366 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
367 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
368 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
369 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
370 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
371 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
372 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
373 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
374#else
375 {
376 register int elemc;
377 for (elemc = DCTSIZE; elemc > 0; elemc--)
378 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
379 }
380#endif
381 }
382}
383
384
385/*
386 * Quantize/descale the coefficients, and store into coef_blocks[].
387 */
388
389METHODDEF(void)
390quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
391{
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000392 int i;
393 DCTELEM temp;
DRCaee4f722014-08-09 23:06:07 +0000394 JCOEFPTR output_ptr = coef_block;
395
396#if BITS_IN_JSAMPLE == 8
397
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000398 UDCTELEM recip, corr, shift;
399 UDCTELEM2 product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000400
401 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000402 temp = workspace[i];
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000403 recip = divisors[i + DCTSIZE2 * 0];
404 corr = divisors[i + DCTSIZE2 * 1];
405 shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000406
407 if (temp < 0) {
408 temp = -temp;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000409 product = (UDCTELEM2)(temp + corr) * recip;
410 product >>= shift + sizeof(DCTELEM)*8;
411 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000412 temp = -temp;
413 } else {
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000414 product = (UDCTELEM2)(temp + corr) * recip;
415 product >>= shift + sizeof(DCTELEM)*8;
416 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000417 }
418 output_ptr[i] = (JCOEF) temp;
419 }
DRCaee4f722014-08-09 23:06:07 +0000420
421#else
422
423 register DCTELEM qval;
424
425 for (i = 0; i < DCTSIZE2; i++) {
426 qval = divisors[i];
427 temp = workspace[i];
428 /* Divide the coefficient value by qval, ensuring proper rounding.
429 * Since C does not specify the direction of rounding for negative
430 * quotients, we have to force the dividend positive for portability.
431 *
432 * In most files, at least half of the output values will be zero
433 * (at default quantization settings, more like three-quarters...)
434 * so we should ensure that this case is fast. On many machines,
435 * a comparison is enough cheaper than a divide to make a special test
436 * a win. Since both inputs will be nonnegative, we need only test
437 * for a < b to discover whether a/b is 0.
438 * If your machine's division is fast enough, define FAST_DIVIDE.
439 */
440#ifdef FAST_DIVIDE
441#define DIVIDE_BY(a,b) a /= b
442#else
443#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
444#endif
445 if (temp < 0) {
446 temp = -temp;
447 temp += qval>>1; /* for rounding */
448 DIVIDE_BY(temp, qval);
449 temp = -temp;
450 } else {
451 temp += qval>>1; /* for rounding */
452 DIVIDE_BY(temp, qval);
453 }
454 output_ptr[i] = (JCOEF) temp;
455 }
456
457#endif
458
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000459}
460
461
462/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000463 * Perform forward DCT on one or more blocks of a component.
464 *
465 * The input samples are taken from the sample_data[] array starting at
466 * position start_row/start_col, and moving to the right for any additional
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000467 * blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000468 */
469
Thomas G. Lane489583f1996-02-07 00:00:00 +0000470METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000471forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRCe5eaf372014-05-09 18:00:32 +0000472 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
473 JDIMENSION start_row, JDIMENSION start_col,
474 JDIMENSION num_blocks)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000475/* This version is used for integer DCT implementations. */
476{
477 /* This routine is heavily used, so it's worth coding it tightly. */
478 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000479 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000480 DCTELEM * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000481 JDIMENSION bi;
482
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000483 /* Make sure the compiler doesn't look up these every pass */
484 forward_DCT_method_ptr do_dct = fdct->dct;
485 convsamp_method_ptr do_convsamp = fdct->convsamp;
486 quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000487 workspace = fdct->workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000488
DRCe5eaf372014-05-09 18:00:32 +0000489 sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000490
491 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
492 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000493 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000494
495 /* Perform the DCT */
496 (*do_dct) (workspace);
497
498 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000499 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000500 }
501}
502
503
504#ifdef DCT_FLOAT_SUPPORTED
505
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000506
507METHODDEF(void)
508convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
509{
510 register FAST_FLOAT *workspaceptr;
511 register JSAMPROW elemptr;
512 register int elemr;
513
514 workspaceptr = workspace;
515 for (elemr = 0; elemr < DCTSIZE; elemr++) {
516 elemptr = sample_data[elemr] + start_col;
DRCe5eaf372014-05-09 18:00:32 +0000517#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000518 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
519 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
520 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
521 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
522 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
523 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
524 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
525 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
526#else
527 {
528 register int elemc;
529 for (elemc = DCTSIZE; elemc > 0; elemc--)
530 *workspaceptr++ = (FAST_FLOAT)
531 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
532 }
533#endif
534 }
535}
536
537
538METHODDEF(void)
539quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
540{
541 register FAST_FLOAT temp;
542 register int i;
543 register JCOEFPTR output_ptr = coef_block;
544
545 for (i = 0; i < DCTSIZE2; i++) {
546 /* Apply the quantization and scaling factor */
547 temp = workspace[i] * divisors[i];
548
549 /* Round to nearest integer.
550 * Since C does not specify the direction of rounding for negative
551 * quotients, we have to force the dividend positive for portability.
552 * The maximum coefficient size is +-16K (for 12-bit data), so this
553 * code should work for either 16-bit or 32-bit ints.
554 */
555 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
556 }
557}
558
559
Thomas G. Lane489583f1996-02-07 00:00:00 +0000560METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000561forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRCe5eaf372014-05-09 18:00:32 +0000562 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
563 JDIMENSION start_row, JDIMENSION start_col,
564 JDIMENSION num_blocks)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000565/* This version is used for floating-point DCT implementations. */
566{
567 /* This routine is heavily used, so it's worth coding it tightly. */
568 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000569 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000570 FAST_FLOAT * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000571 JDIMENSION bi;
572
Pierre Ossman35c47192009-03-09 13:29:37 +0000573
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000574 /* Make sure the compiler doesn't look up these every pass */
575 float_DCT_method_ptr do_dct = fdct->float_dct;
576 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
577 float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000578 workspace = fdct->float_workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000579
DRCe5eaf372014-05-09 18:00:32 +0000580 sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000581
582 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
583 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000584 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000585
586 /* Perform the DCT */
587 (*do_dct) (workspace);
588
589 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000590 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000591 }
592}
593
594#endif /* DCT_FLOAT_SUPPORTED */
595
596
597/*
598 * Initialize FDCT manager.
599 */
600
Thomas G. Lane489583f1996-02-07 00:00:00 +0000601GLOBAL(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000602jinit_forward_dct (j_compress_ptr cinfo)
603{
604 my_fdct_ptr fdct;
605 int i;
606
607 fdct = (my_fdct_ptr)
608 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000609 sizeof(my_fdct_controller));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000610 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
611 fdct->pub.start_pass = start_pass_fdctmgr;
612
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000613 /* First determine the DCT... */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000614 switch (cinfo->dct_method) {
615#ifdef DCT_ISLOW_SUPPORTED
616 case JDCT_ISLOW:
617 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000618 if (jsimd_can_fdct_islow())
619 fdct->dct = jsimd_fdct_islow;
620 else
621 fdct->dct = jpeg_fdct_islow;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000622 break;
623#endif
624#ifdef DCT_IFAST_SUPPORTED
625 case JDCT_IFAST:
626 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000627 if (jsimd_can_fdct_ifast())
628 fdct->dct = jsimd_fdct_ifast;
629 else
630 fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000631 break;
632#endif
633#ifdef DCT_FLOAT_SUPPORTED
634 case JDCT_FLOAT:
635 fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman59a39382009-03-09 13:15:56 +0000636 if (jsimd_can_fdct_float())
637 fdct->float_dct = jsimd_fdct_float;
638 else
639 fdct->float_dct = jpeg_fdct_float;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000640 break;
641#endif
642 default:
643 ERREXIT(cinfo, JERR_NOT_COMPILED);
644 break;
645 }
646
647 /* ...then the supporting stages. */
648 switch (cinfo->dct_method) {
649#ifdef DCT_ISLOW_SUPPORTED
650 case JDCT_ISLOW:
651#endif
652#ifdef DCT_IFAST_SUPPORTED
653 case JDCT_IFAST:
654#endif
655#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
Pierre Ossman59a39382009-03-09 13:15:56 +0000656 if (jsimd_can_convsamp())
657 fdct->convsamp = jsimd_convsamp;
658 else
659 fdct->convsamp = convsamp;
660 if (jsimd_can_quantize())
661 fdct->quantize = jsimd_quantize;
662 else
663 fdct->quantize = quantize;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000664 break;
665#endif
666#ifdef DCT_FLOAT_SUPPORTED
667 case JDCT_FLOAT:
Pierre Ossman59a39382009-03-09 13:15:56 +0000668 if (jsimd_can_convsamp_float())
669 fdct->float_convsamp = jsimd_convsamp_float;
670 else
671 fdct->float_convsamp = convsamp_float;
672 if (jsimd_can_quantize_float())
673 fdct->float_quantize = jsimd_quantize_float;
674 else
675 fdct->float_quantize = quantize_float;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000676 break;
677#endif
678 default:
679 ERREXIT(cinfo, JERR_NOT_COMPILED);
680 break;
681 }
682
Pierre Ossman35c47192009-03-09 13:29:37 +0000683 /* Allocate workspace memory */
684#ifdef DCT_FLOAT_SUPPORTED
685 if (cinfo->dct_method == JDCT_FLOAT)
686 fdct->float_workspace = (FAST_FLOAT *)
687 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000688 sizeof(FAST_FLOAT) * DCTSIZE2);
Pierre Ossman35c47192009-03-09 13:29:37 +0000689 else
690#endif
691 fdct->workspace = (DCTELEM *)
692 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000693 sizeof(DCTELEM) * DCTSIZE2);
Pierre Ossman35c47192009-03-09 13:29:37 +0000694
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000695 /* Mark divisor tables unallocated */
696 for (i = 0; i < NUM_QUANT_TBLS; i++) {
697 fdct->divisors[i] = NULL;
698#ifdef DCT_FLOAT_SUPPORTED
699 fdct->float_divisors[i] = NULL;
700#endif
701 }
702}