blob: cef4b5e7520242808dca64420aa62436df6103bb [file] [log] [blame]
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +00001/*
2 * jcdctmgr.c
3 *
DRCa73e8702012-12-31 02:52:30 +00004 * This file was part of the Independent JPEG Group's software:
Thomas G. Lane489583f1996-02-07 00:00:00 +00005 * Copyright (C) 1994-1996, Thomas G. Lane.
DRCa6ef2822013-09-28 03:23:49 +00006 * libjpeg-turbo Modifications:
Pierre Ossmandedc42e2009-03-09 13:23:04 +00007 * Copyright (C) 1999-2006, MIYASAKA Masaru.
Pierre Ossman59a39382009-03-09 13:15:56 +00008 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
DRCb5a55e62015-08-29 18:05:43 -05009 * Copyright (C) 2011, 2014-2015 D. R. Commander
DRC7e3acc02015-10-10 10:25:46 -050010 * For conditions of distribution and use, see the accompanying README.ijg
11 * file.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000012 *
13 * This file contains the forward-DCT management logic.
14 * This code selects a particular DCT implementation to be used,
15 * and it performs related housekeeping chores including coefficient
16 * quantization.
17 */
18
19#define JPEG_INTERNALS
20#include "jinclude.h"
21#include "jpeglib.h"
DRCe5eaf372014-05-09 18:00:32 +000022#include "jdct.h" /* Private declarations for DCT subsystem */
Pierre Ossman59a39382009-03-09 13:15:56 +000023#include "jsimddct.h"
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000024
25
26/* Private subobject for this module */
27
DRCbc56b752014-05-16 10:43:44 +000028typedef void (*forward_DCT_method_ptr) (DCTELEM * data);
29typedef void (*float_DCT_method_ptr) (FAST_FLOAT * data);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000030
DRCbc56b752014-05-16 10:43:44 +000031typedef void (*convsamp_method_ptr) (JSAMPARRAY sample_data,
32 JDIMENSION start_col,
33 DCTELEM * workspace);
34typedef void (*float_convsamp_method_ptr) (JSAMPARRAY sample_data,
35 JDIMENSION start_col,
36 FAST_FLOAT *workspace);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000037
DRCbc56b752014-05-16 10:43:44 +000038typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM * divisors,
39 DCTELEM * workspace);
40typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
41 FAST_FLOAT * divisors,
42 FAST_FLOAT * workspace);
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000043
DRCa49c4e52011-02-18 20:50:08 +000044METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *);
45
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000046typedef struct {
DRCe5eaf372014-05-09 18:00:32 +000047 struct jpeg_forward_dct pub; /* public fields */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000048
49 /* Pointer to the DCT routine actually in use */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000050 forward_DCT_method_ptr dct;
51 convsamp_method_ptr convsamp;
52 quantize_method_ptr quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000053
54 /* The actual post-DCT divisors --- not identical to the quant table
55 * entries, because of scaling (especially for an unnormalized DCT).
Thomas G. Lane489583f1996-02-07 00:00:00 +000056 * Each table is given in normal array order.
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000057 */
58 DCTELEM * divisors[NUM_QUANT_TBLS];
59
Pierre Ossman35c47192009-03-09 13:29:37 +000060 /* work area for FDCT subroutine */
61 DCTELEM * workspace;
62
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000063#ifdef DCT_FLOAT_SUPPORTED
64 /* Same as above for the floating-point case. */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +000065 float_DCT_method_ptr float_dct;
66 float_convsamp_method_ptr float_convsamp;
67 float_quantize_method_ptr float_quantize;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000068 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
Pierre Ossman35c47192009-03-09 13:29:37 +000069 FAST_FLOAT * float_workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000070#endif
71} my_fdct_controller;
72
73typedef my_fdct_controller * my_fdct_ptr;
74
75
DRCaee4f722014-08-09 23:06:07 +000076#if BITS_IN_JSAMPLE == 8
77
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +000078/*
Pierre Ossmandedc42e2009-03-09 13:23:04 +000079 * Find the highest bit in an integer through binary search.
80 */
DRCaee4f722014-08-09 23:06:07 +000081
Pierre Ossmandedc42e2009-03-09 13:23:04 +000082LOCAL(int)
DRCfc5dc4f2009-10-01 22:26:14 +000083flss (UINT16 val)
Pierre Ossmandedc42e2009-03-09 13:23:04 +000084{
85 int bit;
86
87 bit = 16;
88
89 if (!val)
90 return 0;
91
92 if (!(val & 0xff00)) {
93 bit -= 8;
94 val <<= 8;
95 }
96 if (!(val & 0xf000)) {
97 bit -= 4;
98 val <<= 4;
99 }
100 if (!(val & 0xc000)) {
101 bit -= 2;
102 val <<= 2;
103 }
104 if (!(val & 0x8000)) {
105 bit -= 1;
106 val <<= 1;
107 }
108
109 return bit;
110}
111
DRCaee4f722014-08-09 23:06:07 +0000112
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000113/*
114 * Compute values to do a division using reciprocal.
115 *
116 * This implementation is based on an algorithm described in
117 * "How to optimize for the Pentium family of microprocessors"
118 * (http://www.agner.org/assem/).
119 * More information about the basic algorithm can be found in
120 * the paper "Integer Division Using Reciprocals" by Robert Alverson.
121 *
122 * The basic idea is to replace x/d by x * d^-1. In order to store
123 * d^-1 with enough precision we shift it left a few places. It turns
124 * out that this algoright gives just enough precision, and also fits
125 * into DCTELEM:
126 *
127 * b = (the number of significant bits in divisor) - 1
128 * r = (word size) + b
129 * f = 2^r / divisor
130 *
131 * f will not be an integer for most cases, so we need to compensate
132 * for the rounding error introduced:
133 *
134 * no fractional part:
135 *
136 * result = input >> r
137 *
138 * fractional part of f < 0.5:
139 *
140 * round f down to nearest integer
141 * result = ((input + 1) * f) >> r
142 *
143 * fractional part of f > 0.5:
144 *
145 * round f up to nearest integer
146 * result = (input * f) >> r
147 *
148 * This is the original algorithm that gives truncated results. But we
149 * want properly rounded results, so we replace "input" with
150 * "input + divisor/2".
151 *
152 * In order to allow SIMD implementations we also tweak the values to
153 * allow the same calculation to be made at all times:
DRCe5eaf372014-05-09 18:00:32 +0000154 *
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000155 * dctbl[0] = f rounded to nearest integer
156 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
157 * dctbl[2] = 1 << ((word size) * 2 - r)
158 * dctbl[3] = r - (word size)
159 *
160 * dctbl[2] is for stupid instruction sets where the shift operation
161 * isn't member wise (e.g. MMX).
162 *
163 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
164 * is that most SIMD implementations have a "multiply and store top
165 * half" operation.
166 *
167 * Lastly, we store each of the values in their own table instead
168 * of in a consecutive manner, yet again in order to allow SIMD
169 * routines.
170 */
DRCaee4f722014-08-09 23:06:07 +0000171
DRCa49c4e52011-02-18 20:50:08 +0000172LOCAL(int)
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000173compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
174{
175 UDCTELEM2 fq, fr;
176 UDCTELEM c;
177 int b, r;
178
DRCb5a55e62015-08-29 18:05:43 -0500179 if (divisor == 1) {
180 /* divisor == 1 means unquantized, so these reciprocal/correction/shift
181 * values will cause the C quantization algorithm to act like the
182 * identity function. Since only the C quantization algorithm is used in
183 * these cases, the scale value is irrelevant.
184 */
185 dtbl[DCTSIZE2 * 0] = (DCTELEM) 1; /* reciprocal */
186 dtbl[DCTSIZE2 * 1] = (DCTELEM) 0; /* correction */
187 dtbl[DCTSIZE2 * 2] = (DCTELEM) 1; /* scale */
188 dtbl[DCTSIZE2 * 3] = (DCTELEM) (-sizeof(DCTELEM) * 8); /* shift */
189 return 0;
190 }
191
DRCfc5dc4f2009-10-01 22:26:14 +0000192 b = flss(divisor) - 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000193 r = sizeof(DCTELEM) * 8 + b;
194
195 fq = ((UDCTELEM2)1 << r) / divisor;
196 fr = ((UDCTELEM2)1 << r) % divisor;
197
198 c = divisor / 2; /* for rounding */
199
200 if (fr == 0) { /* divisor is power of two */
201 /* fq will be one bit too large to fit in DCTELEM, so adjust */
202 fq >>= 1;
203 r--;
DRCd65d99a2012-01-31 03:39:23 +0000204 } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000205 c++;
206 } else { /* fractional part is > 0.5 */
207 fq++;
208 }
209
210 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */
211 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */
212 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */
213 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */
DRCa49c4e52011-02-18 20:50:08 +0000214
215 if(r <= 16) return 0;
216 else return 1;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000217}
218
DRCaee4f722014-08-09 23:06:07 +0000219#endif
220
221
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000222/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000223 * Initialize for a processing pass.
224 * Verify that all referenced Q-tables are present, and set up
225 * the divisor table for each one.
226 * In the current implementation, DCT of all components is done during
227 * the first pass, even if only some components will be output in the
228 * first scan. Hence all components should be examined here.
229 */
230
Thomas G. Lane489583f1996-02-07 00:00:00 +0000231METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000232start_pass_fdctmgr (j_compress_ptr cinfo)
233{
234 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
235 int ci, qtblno, i;
236 jpeg_component_info *compptr;
237 JQUANT_TBL * qtbl;
238 DCTELEM * dtbl;
239
240 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
241 ci++, compptr++) {
242 qtblno = compptr->quant_tbl_no;
243 /* Make sure specified quantization table is present */
244 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
DRCe5eaf372014-05-09 18:00:32 +0000245 cinfo->quant_tbl_ptrs[qtblno] == NULL)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000246 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
247 qtbl = cinfo->quant_tbl_ptrs[qtblno];
248 /* Compute divisors for this quant table */
249 /* We may do this more than once for same table, but it's not a big deal */
250 switch (cinfo->dct_method) {
251#ifdef DCT_ISLOW_SUPPORTED
252 case JDCT_ISLOW:
253 /* For LL&M IDCT method, divisors are equal to raw quantization
254 * coefficients multiplied by 8 (to counteract scaling).
255 */
256 if (fdct->divisors[qtblno] == NULL) {
DRCe5eaf372014-05-09 18:00:32 +0000257 fdct->divisors[qtblno] = (DCTELEM *)
258 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000259 (DCTSIZE2 * 4) * sizeof(DCTELEM));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000260 }
261 dtbl = fdct->divisors[qtblno];
262 for (i = 0; i < DCTSIZE2; i++) {
DRCaee4f722014-08-09 23:06:07 +0000263#if BITS_IN_JSAMPLE == 8
DRCe5eaf372014-05-09 18:00:32 +0000264 if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i])
265 && fdct->quantize == jsimd_quantize)
266 fdct->quantize = quantize;
DRCaee4f722014-08-09 23:06:07 +0000267#else
268 dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
269#endif
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000270 }
271 break;
272#endif
273#ifdef DCT_IFAST_SUPPORTED
274 case JDCT_IFAST:
275 {
DRCe5eaf372014-05-09 18:00:32 +0000276 /* For AA&N IDCT method, divisors are equal to quantization
277 * coefficients scaled by scalefactor[row]*scalefactor[col], where
278 * scalefactor[0] = 1
279 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
280 * We apply a further scale factor of 8.
281 */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000282#define CONST_BITS 14
DRCe5eaf372014-05-09 18:00:32 +0000283 static const INT16 aanscales[DCTSIZE2] = {
284 /* precomputed values scaled up by 14 bits */
285 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
286 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
287 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
288 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
289 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
290 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
291 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
292 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
293 };
294 SHIFT_TEMPS
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000295
DRCe5eaf372014-05-09 18:00:32 +0000296 if (fdct->divisors[qtblno] == NULL) {
297 fdct->divisors[qtblno] = (DCTELEM *)
298 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000299 (DCTSIZE2 * 4) * sizeof(DCTELEM));
DRCe5eaf372014-05-09 18:00:32 +0000300 }
301 dtbl = fdct->divisors[qtblno];
302 for (i = 0; i < DCTSIZE2; i++) {
DRCaee4f722014-08-09 23:06:07 +0000303#if BITS_IN_JSAMPLE == 8
DRCe5eaf372014-05-09 18:00:32 +0000304 if(!compute_reciprocal(
DRC1e32fe32015-10-14 17:32:39 -0500305 DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
306 (JLONG) aanscales[i]),
DRCe5eaf372014-05-09 18:00:32 +0000307 CONST_BITS-3), &dtbl[i])
308 && fdct->quantize == jsimd_quantize)
309 fdct->quantize = quantize;
DRCaee4f722014-08-09 23:06:07 +0000310#else
311 dtbl[i] = (DCTELEM)
DRC1e32fe32015-10-14 17:32:39 -0500312 DESCALE(MULTIPLY16V16((JLONG) qtbl->quantval[i],
313 (JLONG) aanscales[i]),
DRCaee4f722014-08-09 23:06:07 +0000314 CONST_BITS-3);
315#endif
DRCe5eaf372014-05-09 18:00:32 +0000316 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000317 }
318 break;
319#endif
320#ifdef DCT_FLOAT_SUPPORTED
321 case JDCT_FLOAT:
322 {
DRCe5eaf372014-05-09 18:00:32 +0000323 /* For float AA&N IDCT method, divisors are equal to quantization
324 * coefficients scaled by scalefactor[row]*scalefactor[col], where
325 * scalefactor[0] = 1
326 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
327 * We apply a further scale factor of 8.
328 * What's actually stored is 1/divisor so that the inner loop can
329 * use a multiplication rather than a division.
330 */
331 FAST_FLOAT * fdtbl;
332 int row, col;
333 static const double aanscalefactor[DCTSIZE] = {
334 1.0, 1.387039845, 1.306562965, 1.175875602,
335 1.0, 0.785694958, 0.541196100, 0.275899379
336 };
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000337
DRCe5eaf372014-05-09 18:00:32 +0000338 if (fdct->float_divisors[qtblno] == NULL) {
339 fdct->float_divisors[qtblno] = (FAST_FLOAT *)
340 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000341 DCTSIZE2 * sizeof(FAST_FLOAT));
DRCe5eaf372014-05-09 18:00:32 +0000342 }
343 fdtbl = fdct->float_divisors[qtblno];
344 i = 0;
345 for (row = 0; row < DCTSIZE; row++) {
346 for (col = 0; col < DCTSIZE; col++) {
347 fdtbl[i] = (FAST_FLOAT)
348 (1.0 / (((double) qtbl->quantval[i] *
349 aanscalefactor[row] * aanscalefactor[col] * 8.0)));
350 i++;
351 }
352 }
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000353 }
354 break;
355#endif
356 default:
357 ERREXIT(cinfo, JERR_NOT_COMPILED);
358 break;
359 }
360 }
361}
362
363
364/*
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000365 * Load data into workspace, applying unsigned->signed conversion.
366 */
367
368METHODDEF(void)
369convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace)
370{
371 register DCTELEM *workspaceptr;
372 register JSAMPROW elemptr;
373 register int elemr;
374
375 workspaceptr = workspace;
376 for (elemr = 0; elemr < DCTSIZE; elemr++) {
377 elemptr = sample_data[elemr] + start_col;
378
DRCe5eaf372014-05-09 18:00:32 +0000379#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000380 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
381 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
382 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
383 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
384 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
385 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
386 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
387 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
388#else
389 {
390 register int elemc;
391 for (elemc = DCTSIZE; elemc > 0; elemc--)
392 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE;
393 }
394#endif
395 }
396}
397
398
399/*
400 * Quantize/descale the coefficients, and store into coef_blocks[].
401 */
402
403METHODDEF(void)
404quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace)
405{
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000406 int i;
407 DCTELEM temp;
DRCaee4f722014-08-09 23:06:07 +0000408 JCOEFPTR output_ptr = coef_block;
409
410#if BITS_IN_JSAMPLE == 8
411
DRCb5a55e62015-08-29 18:05:43 -0500412 UDCTELEM recip, corr;
413 int shift;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000414 UDCTELEM2 product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000415
416 for (i = 0; i < DCTSIZE2; i++) {
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000417 temp = workspace[i];
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000418 recip = divisors[i + DCTSIZE2 * 0];
419 corr = divisors[i + DCTSIZE2 * 1];
420 shift = divisors[i + DCTSIZE2 * 3];
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000421
422 if (temp < 0) {
423 temp = -temp;
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000424 product = (UDCTELEM2)(temp + corr) * recip;
425 product >>= shift + sizeof(DCTELEM)*8;
426 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000427 temp = -temp;
428 } else {
Pierre Ossmandedc42e2009-03-09 13:23:04 +0000429 product = (UDCTELEM2)(temp + corr) * recip;
430 product >>= shift + sizeof(DCTELEM)*8;
431 temp = product;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000432 }
433 output_ptr[i] = (JCOEF) temp;
434 }
DRCaee4f722014-08-09 23:06:07 +0000435
436#else
437
438 register DCTELEM qval;
439
440 for (i = 0; i < DCTSIZE2; i++) {
441 qval = divisors[i];
442 temp = workspace[i];
443 /* Divide the coefficient value by qval, ensuring proper rounding.
444 * Since C does not specify the direction of rounding for negative
445 * quotients, we have to force the dividend positive for portability.
446 *
447 * In most files, at least half of the output values will be zero
448 * (at default quantization settings, more like three-quarters...)
449 * so we should ensure that this case is fast. On many machines,
450 * a comparison is enough cheaper than a divide to make a special test
451 * a win. Since both inputs will be nonnegative, we need only test
452 * for a < b to discover whether a/b is 0.
453 * If your machine's division is fast enough, define FAST_DIVIDE.
454 */
455#ifdef FAST_DIVIDE
456#define DIVIDE_BY(a,b) a /= b
457#else
458#define DIVIDE_BY(a,b) if (a >= b) a /= b; else a = 0
459#endif
460 if (temp < 0) {
461 temp = -temp;
462 temp += qval>>1; /* for rounding */
463 DIVIDE_BY(temp, qval);
DRCeca06372014-11-06 09:32:38 +0000464 temp = -temp;
DRCaee4f722014-08-09 23:06:07 +0000465 } else {
466 temp += qval>>1; /* for rounding */
467 DIVIDE_BY(temp, qval);
468 }
469 output_ptr[i] = (JCOEF) temp;
470 }
471
472#endif
473
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000474}
475
476
477/*
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000478 * Perform forward DCT on one or more blocks of a component.
479 *
480 * The input samples are taken from the sample_data[] array starting at
481 * position start_row/start_col, and moving to the right for any additional
Thomas G. Lanebc79e061995-08-02 00:00:00 +0000482 * blocks. The quantized coefficients are returned in coef_blocks[].
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000483 */
484
Thomas G. Lane489583f1996-02-07 00:00:00 +0000485METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000486forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRCe5eaf372014-05-09 18:00:32 +0000487 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
488 JDIMENSION start_row, JDIMENSION start_col,
489 JDIMENSION num_blocks)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000490/* This version is used for integer DCT implementations. */
491{
492 /* This routine is heavily used, so it's worth coding it tightly. */
493 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000494 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000495 DCTELEM * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000496 JDIMENSION bi;
497
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000498 /* Make sure the compiler doesn't look up these every pass */
499 forward_DCT_method_ptr do_dct = fdct->dct;
500 convsamp_method_ptr do_convsamp = fdct->convsamp;
501 quantize_method_ptr do_quantize = fdct->quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000502 workspace = fdct->workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000503
DRCe5eaf372014-05-09 18:00:32 +0000504 sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000505
506 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
507 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000508 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000509
510 /* Perform the DCT */
511 (*do_dct) (workspace);
512
513 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000514 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000515 }
516}
517
518
519#ifdef DCT_FLOAT_SUPPORTED
520
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000521
522METHODDEF(void)
523convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace)
524{
525 register FAST_FLOAT *workspaceptr;
526 register JSAMPROW elemptr;
527 register int elemr;
528
529 workspaceptr = workspace;
530 for (elemr = 0; elemr < DCTSIZE; elemr++) {
531 elemptr = sample_data[elemr] + start_col;
DRCe5eaf372014-05-09 18:00:32 +0000532#if DCTSIZE == 8 /* unroll the inner loop */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000533 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
534 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
535 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
536 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
537 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
538 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
539 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
540 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
541#else
542 {
543 register int elemc;
544 for (elemc = DCTSIZE; elemc > 0; elemc--)
545 *workspaceptr++ = (FAST_FLOAT)
546 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);
547 }
548#endif
549 }
550}
551
552
553METHODDEF(void)
554quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace)
555{
556 register FAST_FLOAT temp;
557 register int i;
558 register JCOEFPTR output_ptr = coef_block;
559
560 for (i = 0; i < DCTSIZE2; i++) {
561 /* Apply the quantization and scaling factor */
562 temp = workspace[i] * divisors[i];
563
564 /* Round to nearest integer.
565 * Since C does not specify the direction of rounding for negative
566 * quotients, we have to force the dividend positive for portability.
567 * The maximum coefficient size is +-16K (for 12-bit data), so this
568 * code should work for either 16-bit or 32-bit ints.
569 */
570 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384);
571 }
572}
573
574
Thomas G. Lane489583f1996-02-07 00:00:00 +0000575METHODDEF(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000576forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr,
DRCe5eaf372014-05-09 18:00:32 +0000577 JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
578 JDIMENSION start_row, JDIMENSION start_col,
579 JDIMENSION num_blocks)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000580/* This version is used for floating-point DCT implementations. */
581{
582 /* This routine is heavily used, so it's worth coding it tightly. */
583 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000584 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
Pierre Ossman35c47192009-03-09 13:29:37 +0000585 FAST_FLOAT * workspace;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000586 JDIMENSION bi;
587
Pierre Ossman35c47192009-03-09 13:29:37 +0000588
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000589 /* Make sure the compiler doesn't look up these every pass */
590 float_DCT_method_ptr do_dct = fdct->float_dct;
591 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
592 float_quantize_method_ptr do_quantize = fdct->float_quantize;
Pierre Ossmandc5db142009-03-13 12:17:26 +0000593 workspace = fdct->float_workspace;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000594
DRCe5eaf372014-05-09 18:00:32 +0000595 sample_data += start_row; /* fold in the vertical offset once */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000596
597 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
598 /* Load data into workspace, applying unsigned->signed conversion */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000599 (*do_convsamp) (sample_data, start_col, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000600
601 /* Perform the DCT */
602 (*do_dct) (workspace);
603
604 /* Quantize/descale the coefficients, and store into coef_blocks[] */
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000605 (*do_quantize) (coef_blocks[bi], divisors, workspace);
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000606 }
607}
608
609#endif /* DCT_FLOAT_SUPPORTED */
610
611
612/*
613 * Initialize FDCT manager.
614 */
615
Thomas G. Lane489583f1996-02-07 00:00:00 +0000616GLOBAL(void)
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000617jinit_forward_dct (j_compress_ptr cinfo)
618{
619 my_fdct_ptr fdct;
620 int i;
621
622 fdct = (my_fdct_ptr)
623 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000624 sizeof(my_fdct_controller));
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000625 cinfo->fdct = (struct jpeg_forward_dct *) fdct;
626 fdct->pub.start_pass = start_pass_fdctmgr;
627
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000628 /* First determine the DCT... */
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000629 switch (cinfo->dct_method) {
630#ifdef DCT_ISLOW_SUPPORTED
631 case JDCT_ISLOW:
632 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000633 if (jsimd_can_fdct_islow())
634 fdct->dct = jsimd_fdct_islow;
635 else
636 fdct->dct = jpeg_fdct_islow;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000637 break;
638#endif
639#ifdef DCT_IFAST_SUPPORTED
640 case JDCT_IFAST:
641 fdct->pub.forward_DCT = forward_DCT;
Pierre Ossman59a39382009-03-09 13:15:56 +0000642 if (jsimd_can_fdct_ifast())
643 fdct->dct = jsimd_fdct_ifast;
644 else
645 fdct->dct = jpeg_fdct_ifast;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000646 break;
647#endif
648#ifdef DCT_FLOAT_SUPPORTED
649 case JDCT_FLOAT:
650 fdct->pub.forward_DCT = forward_DCT_float;
Pierre Ossman59a39382009-03-09 13:15:56 +0000651 if (jsimd_can_fdct_float())
652 fdct->float_dct = jsimd_fdct_float;
653 else
654 fdct->float_dct = jpeg_fdct_float;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000655 break;
656#endif
657 default:
658 ERREXIT(cinfo, JERR_NOT_COMPILED);
659 break;
660 }
661
662 /* ...then the supporting stages. */
663 switch (cinfo->dct_method) {
664#ifdef DCT_ISLOW_SUPPORTED
665 case JDCT_ISLOW:
666#endif
667#ifdef DCT_IFAST_SUPPORTED
668 case JDCT_IFAST:
669#endif
670#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
Pierre Ossman59a39382009-03-09 13:15:56 +0000671 if (jsimd_can_convsamp())
672 fdct->convsamp = jsimd_convsamp;
673 else
674 fdct->convsamp = convsamp;
675 if (jsimd_can_quantize())
676 fdct->quantize = jsimd_quantize;
677 else
678 fdct->quantize = quantize;
Pierre Ossman49dcbfb2009-03-09 10:37:20 +0000679 break;
680#endif
681#ifdef DCT_FLOAT_SUPPORTED
682 case JDCT_FLOAT:
Pierre Ossman59a39382009-03-09 13:15:56 +0000683 if (jsimd_can_convsamp_float())
684 fdct->float_convsamp = jsimd_convsamp_float;
685 else
686 fdct->float_convsamp = convsamp_float;
687 if (jsimd_can_quantize_float())
688 fdct->float_quantize = jsimd_quantize_float;
689 else
690 fdct->float_quantize = quantize_float;
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000691 break;
692#endif
693 default:
694 ERREXIT(cinfo, JERR_NOT_COMPILED);
695 break;
696 }
697
Pierre Ossman35c47192009-03-09 13:29:37 +0000698 /* Allocate workspace memory */
699#ifdef DCT_FLOAT_SUPPORTED
700 if (cinfo->dct_method == JDCT_FLOAT)
701 fdct->float_workspace = (FAST_FLOAT *)
702 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000703 sizeof(FAST_FLOAT) * DCTSIZE2);
Pierre Ossman35c47192009-03-09 13:29:37 +0000704 else
705#endif
706 fdct->workspace = (DCTELEM *)
707 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
DRC5de454b2014-05-18 19:04:03 +0000708 sizeof(DCTELEM) * DCTSIZE2);
Pierre Ossman35c47192009-03-09 13:29:37 +0000709
Thomas G. Lane36a4ccc1994-09-24 00:00:00 +0000710 /* Mark divisor tables unallocated */
711 for (i = 0; i < NUM_QUANT_TBLS; i++) {
712 fdct->divisors[i] = NULL;
713#ifdef DCT_FLOAT_SUPPORTED
714 fdct->float_divisors[i] = NULL;
715#endif
716 }
717}