blob: 2d256590ebff380ea368eb6c1bdf4736539e5d28 [file] [log] [blame]
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00001/*
2 * jcdctmgr.c
3 *
DRCa73e8702012-12-31 02:52:30 +00004 * This file was part of the Independent JPEG Group's software:
Thomas G. Lane489583f1996-02-07 00:00:00 +00005 * Copyright (C) 1994-1996, Thomas G. Lane.
DRCa6ef2822013-09-28 03:23:49 +00006 * libjpeg-turbo Modifications:
Pierre Ossmandedc42e2009-03-09 13:23:04 +00007 * Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman59a39382009-03-09 13:15:56 +00008 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRCa49c4e52011-02-18 20:50:08 +00009 * Copyright (C) 2011 D. R. Commander
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000010 * For conditions of distribution and use, see the accompanying README file.
11 *
12 * This file contains the forward-DCT management logic.
13 * This code selects a particular DCT implementation to be used,
14 * and it performs related housekeeping chores including coefficient
15 * quantization.
16 */
17
18#define JPEG_INTERNALS
19#include "jinclude.h"
20#include "jpeglib.h"
DRCe5eaf372014-05-09 18:00:32 +000021#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman59a39382009-03-09 13:15:56 +000022#include "jsimddct.h"
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000023
24
25/* Private subobject for this module */
26
DRCbc56b752014-05-16 10:43:44 +000027typedef void (*forward_DCT_method_ptr) (DCTELEM * data);
28typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000029
DRCbc56b752014-05-16 10:43:44 +000030typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
31 JDIMENSION start_col,
32 DCTELEM * workspace);
33typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
34 JDIMENSION start_col,
35 FAST_FLOAT *workspace);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000036
DRCbc56b752014-05-16 10:43:44 +000037typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM * divisors,
38 DCTELEM * workspace);
39typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
40 FAST_FLOAT * divisors,
41 FAST_FLOAT * workspace);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000042
DRCa49c4e52011-02-18 20:50:08 +000043METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
44
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000045typedef struct {
DRCe5eaf372014-05-09 18:00:32 +000046 struct jpeg_forward_dct pub; /* public fields */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000047
48 /* Pointer to the DCT routine actually in use */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000049 forward_DCT_method_ptr dct;
50 convsamp_method_ptr convsamp;
51 quantize_method_ptr quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000052
53 /* The actual post-DCT divisors --- not identical to the quant table
54 * entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane489583f1996-02-07 00:00:00 +000055 * Each table is given in normal array order.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000056 */
57 DCTELEM * divisors[NUM_QUANT_TBLS];
58
Pierre Ossman35c47192009-03-09 13:29:37 +000059 /* work area for FDCT subroutine */
60 DCTELEM * workspace;
61
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000062#ifdef DCT_FLOAT_SUPPORTED
63 /* Same as above for the floating-point case. */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000064 float_DCT_method_ptr float_dct;
65 float_convsamp_method_ptr float_convsamp;
66 float_quantize_method_ptr float_quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000067 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman35c47192009-03-09 13:29:37 +000068 FAST_FLOAT * float_workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000069#endif
70} my_fdct_controller;
71
72typedef my_fdct_controller * my_fdct_ptr;
73
74
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000075/*
Pierre Ossmandedc42e2009-03-09 13:23:04 +000076 * Find the highest bit in an integer through binary search.
77 */
78LOCAL(int)
DRCfc5dc4f2009-10-01 22:26:14 +000079flss (UINT16 val)
Pierre Ossmandedc42e2009-03-09 13:23:04 +000080{
81 int bit;
82
83 bit = 16;
84
85 if (!val)
86 return 0;
87
88 if (!(val & 0xff00)) {
89 bit -= 8;
90 val <<= 8;
91 }
92 if (!(val & 0xf000)) {
93 bit -= 4;
94 val <<= 4;
95 }
96 if (!(val & 0xc000)) {
97 bit -= 2;
98 val <<= 2;
99 }
100 if (!(val & 0x8000)) {
101 bit -= 1;
102 val <<= 1;
103 }
104
105 return bit;
106}
107
108/*
109 * Compute values to do a division using reciprocal.
110 *
111 * This implementation is based on an algorithm described in
112 * "How to optimize for the Pentium family of microprocessors"
113 * (http://www.agner.org/assem/).
114 * More information about the basic algorithm can be found in
115 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
116 *
117 * The basic idea is to replace x/d by x * d^-1. In order to store
118 * d^-1 with enough precision we shift it left a few places. It turns
119 * out that this algoright gives just enough precision, and also fits
120 * into DCTELEM:
121 *
122 * b = (the number of significant bits in divisor) - 1
123 * r = (word size) + b
124 * f = 2^r / divisor
125 *
126 * f will not be an integer for most cases, so we need to compensate
127 * for the rounding error introduced:
128 *
129 * no fractional part:
130 *
131 * result = input >> r
132 *
133 * fractional part of f < 0.5:
134 *
135 * round f down to nearest integer
136 * result = ((input + 1) * f) >> r
137 *
138 * fractional part of f > 0.5:
139 *
140 * round f up to nearest integer
141 * result = (input * f) >> r
142 *
143 * This is the original algorithm that gives truncated results. But we
144 * want properly rounded results, so we replace "input" with
145 * "input + divisor/2".
146 *
147 * In order to allow SIMD implementations we also tweak the values to
148 * allow the same calculation to be made at all times:
DRCe5eaf372014-05-09 18:00:32 +0000149 *
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000150 * dctbl[0] = f rounded to nearest integer
151 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
152 * dctbl[2] = 1 << ((word size) * 2 - r)
153 * dctbl[3] = r - (word size)
154 *
155 * dctbl[2] is for stupid instruction sets where the shift operation
156 * isn't member wise (e.g. MMX).
157 *
158 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
159 * is that most SIMD implementations have a "multiply and store top
160 * half" operation.
161 *
162 * Lastly, we store each of the values in their own table instead
163 * of in a consecutive manner, yet again in order to allow SIMD
164 * routines.
165 */
DRCa49c4e52011-02-18 20:50:08 +0000166LOCAL(int)
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000167compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
168{
169 UDCTELEM2 fq, fr;
170 UDCTELEM c;
171 int b, r;
172
DRCfc5dc4f2009-10-01 22:26:14 +0000173 b = flss(divisor) - 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000174 r = sizeof(DCTELEM) * 8 + b;
175
176 fq = ((UDCTELEM2)1 << r) / divisor;
177 fr = ((UDCTELEM2)1 << r) % divisor;
178
179 c = divisor / 2; /* for rounding */
180
181 if (fr == 0) { /* divisor is power of two */
182 /* fq will be one bit too large to fit in DCTELEM, so adjust */
183 fq >>= 1;
184 r--;
DRCd65d99a2012-01-31 03:39:23 +0000185 } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000186 c++;
187 } else { /* fractional part is > 0.5 */
188 fq++;
189 }
190
191 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
192 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
193 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
194 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
DRCa49c4e52011-02-18 20:50:08 +0000195
196 if(r <= 16) return 0;
197 else return 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000198}
199
200/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000201 * Initialize for a processing pass.
202 * Verify that all referenced Q-tables are present, and set up
203 * the divisor table for each one.
204 * In the current implementation, DCT of all components is done during
205 * the first pass, even if only some components will be output in the
206 * first scan. Hence all components should be examined here.
207 */
208
Thomas G. Lane489583f1996-02-07 00:00:00 +0000209METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000210start_pass_fdctmgr (j_compress_ptr cinfo)
211{
212 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
213 int ci, qtblno, i;
214 jpeg_component_info *compptr;
215 JQUANT_TBL * qtbl;
216 DCTELEM * dtbl;
217
218 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
219 ci++, compptr++) {
220 qtblno = compptr->quant_tbl_no;
221 /* Make sure specified quantization table is present */
222 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
DRCe5eaf372014-05-09 18:00:32 +0000223 cinfo->quant_tbl_ptrs[qtblno] == NULL)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000224 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
225 qtbl = cinfo->quant_tbl_ptrs[qtblno];
226 /* Compute divisors for this quant table */
227 /* We may do this more than once for same table, but it's not a big deal */
228 switch (cinfo->dct_method) {
229#ifdef DCT_ISLOW_SUPPORTED
230 case JDCT_ISLOW:
231 /* For LL&M IDCT method, divisors are equal to raw quantization
232 * coefficients multiplied by 8 (to counteract scaling).
233 */
234 if (fdct->divisors[qtblno] == NULL) {
DRCe5eaf372014-05-09 18:00:32 +0000235 fdct->divisors[qtblno] = (DCTELEM *)
236 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000237 (DCTSIZE2 * 4) * sizeof(DCTELEM));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000238 }
239 dtbl = fdct->divisors[qtblno];
240 for (i = 0; i < DCTSIZE2; i++) {
DRCe5eaf372014-05-09 18:00:32 +0000241 if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
242 && fdct->quantize == jsimd_quantize)
243 fdct->quantize = quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000244 }
245 break;
246#endif
247#ifdef DCT_IFAST_SUPPORTED
248 case JDCT_IFAST:
249 {
DRCe5eaf372014-05-09 18:00:32 +0000250 /* For AA&N IDCT method, divisors are equal to quantization
251 * coefficients scaled by scalefactor[row]*scalefactor[col], where
252 * scalefactor[0] = 1
253 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
254 * We apply a further scale factor of 8.
255 */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000256#define CONST_BITS 14
DRCe5eaf372014-05-09 18:00:32 +0000257 static const INT16 aanscales[DCTSIZE2] = {
258 /* precomputed values scaled up by 14 bits */
259 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
260 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
261 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
262 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
263 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
264 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
265 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
266 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
267 };
268 SHIFT_TEMPS
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000269
DRCe5eaf372014-05-09 18:00:32 +0000270 if (fdct->divisors[qtblno] == NULL) {
271 fdct->divisors[qtblno] = (DCTELEM *)
272 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000273 (DCTSIZE2 * 4) * sizeof(DCTELEM));
DRCe5eaf372014-05-09 18:00:32 +0000274 }
275 dtbl = fdct->divisors[qtblno];
276 for (i = 0; i < DCTSIZE2; i++) {
277 if(!compute_reciprocal(
278 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
279 (INT32) aanscales[i]),
280 CONST_BITS-3), &dtbl[i])
281 && fdct->quantize == jsimd_quantize)
282 fdct->quantize = quantize;
283 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000284 }
285 break;
286#endif
287#ifdef DCT_FLOAT_SUPPORTED
288 case JDCT_FLOAT:
289 {
DRCe5eaf372014-05-09 18:00:32 +0000290 /* For float AA&N IDCT method, divisors are equal to quantization
291 * coefficients scaled by scalefactor[row]*scalefactor[col], where
292 * scalefactor[0] = 1
293 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
294 * We apply a further scale factor of 8.
295 * What's actually stored is 1/divisor so that the inner loop can
296 * use a multiplication rather than a division.
297 */
298 FAST_FLOAT * fdtbl;
299 int row, col;
300 static const double aanscalefactor[DCTSIZE] = {
301 1.0, 1.387039845, 1.306562965, 1.175875602,
302 1.0, 0.785694958, 0.541196100, 0.275899379
303 };
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000304
DRCe5eaf372014-05-09 18:00:32 +0000305 if (fdct->float_divisors[qtblno] == NULL) {
306 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
307 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000308 DCTSIZE2 * sizeof(FAST_FLOAT));
DRCe5eaf372014-05-09 18:00:32 +0000309 }
310 fdtbl = fdct->float_divisors[qtblno];
311 i = 0;
312 for (row = 0; row < DCTSIZE; row++) {
313 for (col = 0; col < DCTSIZE; col++) {
314 fdtbl[i] = (FAST_FLOAT)
315 (1.0 / (((double) qtbl->quantval[i] *
316 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
317 i++;
318 }
319 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000320 }
321 break;
322#endif
323 default:
324 ERREXIT(cinfo, JERR_NOT_COMPILED);
325 break;
326 }
327 }
328}
329
330
331/*
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000332 * Load data into workspace, applying unsigned->signed conversion.
333 */
334
335METHODDEF(void)
336convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
337{
338 register DCTELEM *workspaceptr;
339 register JSAMPROW elemptr;
340 register int elemr;
341
342 workspaceptr = workspace;
343 for (elemr = 0; elemr < DCTSIZE; elemr++) {
344 elemptr = sample_data[elemr] + start_col;
345
DRCe5eaf372014-05-09 18:00:32 +0000346#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000347 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
348 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
349 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
350 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
351 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
352 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
353 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
354 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
355#else
356 {
357 register int elemc;
358 for (elemc = DCTSIZE; elemc > 0; elemc--)
359 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
360 }
361#endif
362 }
363}
364
365
366/*
367 * Quantize/descale the coefficients, and store into coef_blocks[].
368 */
369
370METHODDEF(void)
371quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
372{
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000373 int i;
374 DCTELEM temp;
375 UDCTELEM recip, corr, shift;
376 UDCTELEM2 product;
377 JCOEFPTR output_ptr = coef_block;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000378
379 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000380 temp = workspace[i];
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000381 recip = divisors[i + DCTSIZE2 * 0];
382 corr = divisors[i + DCTSIZE2 * 1];
383 shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000384
385 if (temp < 0) {
386 temp = -temp;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000387 product = (UDCTELEM2)(temp + corr) * recip;
388 product >>= shift + sizeof(DCTELEM)*8;
389 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000390 temp = -temp;
391 } else {
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000392 product = (UDCTELEM2)(temp + corr) * recip;
393 product >>= shift + sizeof(DCTELEM)*8;
394 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000395 }
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000396
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000397 output_ptr[i] = (JCOEF) temp;
398 }
399}
400
401
402/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000403 * Perform forward DCT on one or more blocks of a component.
404 *
405 * The input samples are taken from the sample_data[] array starting at
406 * position start_row/start_col, and moving to the right for any additional
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000407 * blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000408 */
409
Thomas G. Lane489583f1996-02-07 00:00:00 +0000410METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000411forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRCe5eaf372014-05-09 18:00:32 +0000412 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
413 JDIMENSION start_row, JDIMENSION start_col,
414 JDIMENSION num_blocks)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000415/* This version is used for integer DCT implementations. */
416{
417 /* This routine is heavily used, so it's worth coding it tightly. */
418 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000419 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000420 DCTELEM * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000421 JDIMENSION bi;
422
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000423 /* Make sure the compiler doesn't look up these every pass */
424 forward_DCT_method_ptr do_dct = fdct->dct;
425 convsamp_method_ptr do_convsamp = fdct->convsamp;
426 quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000427 workspace = fdct->workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000428
DRCe5eaf372014-05-09 18:00:32 +0000429 sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000430
431 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
432 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000433 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000434
435 /* Perform the DCT */
436 (*do_dct) (workspace);
437
438 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000439 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000440 }
441}
442
443
444#ifdef DCT_FLOAT_SUPPORTED
445
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000446
447METHODDEF(void)
448convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
449{
450 register FAST_FLOAT *workspaceptr;
451 register JSAMPROW elemptr;
452 register int elemr;
453
454 workspaceptr = workspace;
455 for (elemr = 0; elemr < DCTSIZE; elemr++) {
456 elemptr = sample_data[elemr] + start_col;
DRCe5eaf372014-05-09 18:00:32 +0000457#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000458 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
459 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
460 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
461 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
462 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
463 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
464 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
465 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
466#else
467 {
468 register int elemc;
469 for (elemc = DCTSIZE; elemc > 0; elemc--)
470 *workspaceptr++ = (FAST_FLOAT)
471 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
472 }
473#endif
474 }
475}
476
477
478METHODDEF(void)
479quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
480{
481 register FAST_FLOAT temp;
482 register int i;
483 register JCOEFPTR output_ptr = coef_block;
484
485 for (i = 0; i < DCTSIZE2; i++) {
486 /* Apply the quantization and scaling factor */
487 temp = workspace[i] * divisors[i];
488
489 /* Round to nearest integer.
490 * Since C does not specify the direction of rounding for negative
491 * quotients, we have to force the dividend positive for portability.
492 * The maximum coefficient size is +-16K (for 12-bit data), so this
493 * code should work for either 16-bit or 32-bit ints.
494 */
495 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
496 }
497}
498
499
Thomas G. Lane489583f1996-02-07 00:00:00 +0000500METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000501forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRCe5eaf372014-05-09 18:00:32 +0000502 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
503 JDIMENSION start_row, JDIMENSION start_col,
504 JDIMENSION num_blocks)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000505/* This version is used for floating-point DCT implementations. */
506{
507 /* This routine is heavily used, so it's worth coding it tightly. */
508 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000509 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000510 FAST_FLOAT * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000511 JDIMENSION bi;
512
Pierre Ossman35c47192009-03-09 13:29:37 +0000513
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000514 /* Make sure the compiler doesn't look up these every pass */
515 float_DCT_method_ptr do_dct = fdct->float_dct;
516 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
517 float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000518 workspace = fdct->float_workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000519
DRCe5eaf372014-05-09 18:00:32 +0000520 sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000521
522 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
523 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000524 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000525
526 /* Perform the DCT */
527 (*do_dct) (workspace);
528
529 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000530 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000531 }
532}
533
534#endif /* DCT_FLOAT_SUPPORTED */
535
536
537/*
538 * Initialize FDCT manager.
539 */
540
Thomas G. Lane489583f1996-02-07 00:00:00 +0000541GLOBAL(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000542jinit_forward_dct (j_compress_ptr cinfo)
543{
544 my_fdct_ptr fdct;
545 int i;
546
547 fdct = (my_fdct_ptr)
548 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000549 sizeof(my_fdct_controller));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000550 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
551 fdct->pub.start_pass = start_pass_fdctmgr;
552
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000553 /* First determine the DCT... */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000554 switch (cinfo->dct_method) {
555#ifdef DCT_ISLOW_SUPPORTED
556 case JDCT_ISLOW:
557 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000558 if (jsimd_can_fdct_islow())
559 fdct->dct = jsimd_fdct_islow;
560 else
561 fdct->dct = jpeg_fdct_islow;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000562 break;
563#endif
564#ifdef DCT_IFAST_SUPPORTED
565 case JDCT_IFAST:
566 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000567 if (jsimd_can_fdct_ifast())
568 fdct->dct = jsimd_fdct_ifast;
569 else
570 fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000571 break;
572#endif
573#ifdef DCT_FLOAT_SUPPORTED
574 case JDCT_FLOAT:
575 fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman59a39382009-03-09 13:15:56 +0000576 if (jsimd_can_fdct_float())
577 fdct->float_dct = jsimd_fdct_float;
578 else
579 fdct->float_dct = jpeg_fdct_float;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000580 break;
581#endif
582 default:
583 ERREXIT(cinfo, JERR_NOT_COMPILED);
584 break;
585 }
586
587 /* ...then the supporting stages. */
588 switch (cinfo->dct_method) {
589#ifdef DCT_ISLOW_SUPPORTED
590 case JDCT_ISLOW:
591#endif
592#ifdef DCT_IFAST_SUPPORTED
593 case JDCT_IFAST:
594#endif
595#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
Pierre Ossman59a39382009-03-09 13:15:56 +0000596 if (jsimd_can_convsamp())
597 fdct->convsamp = jsimd_convsamp;
598 else
599 fdct->convsamp = convsamp;
600 if (jsimd_can_quantize())
601 fdct->quantize = jsimd_quantize;
602 else
603 fdct->quantize = quantize;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000604 break;
605#endif
606#ifdef DCT_FLOAT_SUPPORTED
607 case JDCT_FLOAT:
Pierre Ossman59a39382009-03-09 13:15:56 +0000608 if (jsimd_can_convsamp_float())
609 fdct->float_convsamp = jsimd_convsamp_float;
610 else
611 fdct->float_convsamp = convsamp_float;
612 if (jsimd_can_quantize_float())
613 fdct->float_quantize = jsimd_quantize_float;
614 else
615 fdct->float_quantize = quantize_float;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000616 break;
617#endif
618 default:
619 ERREXIT(cinfo, JERR_NOT_COMPILED);
620 break;
621 }
622
Pierre Ossman35c47192009-03-09 13:29:37 +0000623 /* Allocate workspace memory */
624#ifdef DCT_FLOAT_SUPPORTED
625 if (cinfo->dct_method == JDCT_FLOAT)
626 fdct->float_workspace = (FAST_FLOAT *)
627 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000628 sizeof(FAST_FLOAT) * DCTSIZE2);
Pierre Ossman35c47192009-03-09 13:29:37 +0000629 else
630#endif
631 fdct->workspace = (DCTELEM *)
632 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000633 sizeof(DCTELEM) * DCTSIZE2);
Pierre Ossman35c47192009-03-09 13:29:37 +0000634
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000635 /* Mark divisor tables unallocated */
636 for (i = 0; i < NUM_QUANT_TBLS; i++) {
637 fdct->divisors[i] = NULL;
638#ifdef DCT_FLOAT_SUPPORTED
639 fdct->float_divisors[i] = NULL;
640#endif
641 }
642}