Blame - jidctflt.c - platform/external/libjpeg-turbo

blob: 324a2cbc4420850ab0eb11b6e3ef2f9ee1e60321 [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jidctflt.c
				3	*
DRC	715bb41	2014-05-11 10:09:07 +0000	[diff] [blame]	4	* This file was part of the Independent JPEG Group's software:
Thomas G. Lane	5ead57a	1998-03-27 00:00:00 +0000	[diff] [blame]	5	* Copyright (C) 1994-1998, Thomas G. Lane.
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	6	* Modified 2010 by Guido Vollbeding.
DRC	715bb41	2014-05-11 10:09:07 +0000	[diff] [blame]	7	* libjpeg-turbo Modifications:
				8	* Copyright (C) 2014, D. R. Commander.
DRC	bf04316	2015-10-09 20:02:31 -0500	[diff] [blame]	9	* For conditions of distribution and use, see the accompanying README file.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	10	*
				11	* This file contains a floating-point implementation of the
				12	* inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
				13	* must also perform dequantization of the input coefficients.
				14	*
				15	* This implementation should be more accurate than either of the integer
				16	* IDCT implementations. However, it may not give the same results on all
				17	* machines because of differences in roundoff behavior. Speed will depend
				18	* on the hardware's floating point capacity.
				19	*
				20	* A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
				21	* on each row (or vice versa, but it's more convenient to emit a row at
				22	* a time). Direct algorithms are also available, but they are much more
				23	* complex and seem not to be any faster when reduced to code.
				24	*
				25	* This implementation is based on Arai, Agui, and Nakajima's algorithm for
				26	* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
				27	* Japanese, but the algorithm is described in the Pennebaker & Mitchell
				28	* JPEG textbook (see REFERENCES section in file README). The following code
				29	* is based directly on figure 4-8 in P&M.
				30	* While an 8-point DCT cannot be done in less than 11 multiplies, it is
				31	* possible to arrange the computation so that many of the multiplies are
				32	* simple scalings of the final outputs. These multiplies can then be
				33	* folded into the multiplications or divisions by the JPEG quantization
				34	* table entries. The AA&N method leaves only 5 multiplies and 29 adds
				35	* to be done in the DCT itself.
				36	* The primary disadvantage of this method is that with a fixed-point
				37	* implementation, accuracy is lost due to imprecise representation of the
				38	* scaled quantization values. However, that problem does not arise if
				39	* we use floating point arithmetic.
				40	*/
				41
				42	#define JPEG_INTERNALS
				43	#include "jinclude.h"
				44	#include "jpeglib.h"
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	45	#include "jdct.h" /* Private declarations for DCT subsystem */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	46
				47	#ifdef DCT_FLOAT_SUPPORTED
				48
				49
				50	/*
				51	* This module is specialized to the case DCTSIZE = 8.
				52	*/
				53
				54	#if DCTSIZE != 8
				55	Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
				56	#endif
				57
				58
				59	/* Dequantize a coefficient by multiplying it by the multiplier-table
				60	* entry; produce a float result.
				61	*/
				62
				63	#define DEQUANTIZE(coef,quantval) (((FAST_FLOAT) (coef)) * (quantval))
				64
				65
				66	/*
				67	* Perform dequantization and inverse DCT on one block of coefficients.
				68	*/
				69
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	70	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	71	jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	72	JCOEFPTR coef_block,
				73	JSAMPARRAY output_buf, JDIMENSION output_col)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	74	{
				75	FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
				76	FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
				77	FAST_FLOAT z5, z10, z11, z12, z13;
				78	JCOEFPTR inptr;
				79	FLOAT_MULT_TYPE * quantptr;
				80	FAST_FLOAT * wsptr;
				81	JSAMPROW outptr;
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	82	JSAMPLE *range_limit = cinfo->sample_range_limit;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	83	int ctr;
				84	FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
DRC	715bb41	2014-05-11 10:09:07 +0000	[diff] [blame]	85	#define _0_125 ((FLOAT_MULT_TYPE)0.125)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	86
				87	/* Pass 1: process columns from input, store into work array. */
				88
				89	inptr = coef_block;
				90	quantptr = (FLOAT_MULT_TYPE *) compptr->dct_table;
				91	wsptr = workspace;
				92	for (ctr = DCTSIZE; ctr > 0; ctr--) {
				93	/* Due to quantization, we will usually find that many of the input
				94	* coefficients are zero, especially the AC terms. We can exploit this
				95	* by short-circuiting the IDCT calculation for any column in which all
				96	* the AC terms are zero. In that case each output is equal to the
				97	* DC coefficient (with scale factor as needed).
				98	* With typical images and quantization tables, half or more of the
				99	* column DCT calculations can be simplified this way.
				100	*/
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	101
Thomas G. Lane	5ead57a	1998-03-27 00:00:00 +0000	[diff] [blame]	102	if (inptr[DCTSIZE1] == 0 && inptr[DCTSIZE2] == 0 &&
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	103	inptr[DCTSIZE3] == 0 && inptr[DCTSIZE4] == 0 &&
				104	inptr[DCTSIZE5] == 0 && inptr[DCTSIZE6] == 0 &&
				105	inptr[DCTSIZE*7] == 0) {
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	106	/* AC terms all zero */
DRC	715bb41	2014-05-11 10:09:07 +0000	[diff] [blame]	107	FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE*0],
				108	quantptr[DCTSIZE0] _0_125);
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	109
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	110	wsptr[DCTSIZE*0] = dcval;
				111	wsptr[DCTSIZE*1] = dcval;
				112	wsptr[DCTSIZE*2] = dcval;
				113	wsptr[DCTSIZE*3] = dcval;
				114	wsptr[DCTSIZE*4] = dcval;
				115	wsptr[DCTSIZE*5] = dcval;
				116	wsptr[DCTSIZE*6] = dcval;
				117	wsptr[DCTSIZE*7] = dcval;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	118
				119	inptr++; /* advance pointers to next column */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	120	quantptr++;
				121	wsptr++;
				122	continue;
				123	}
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	124
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	125	/* Even part */
				126
DRC	715bb41	2014-05-11 10:09:07 +0000	[diff] [blame]	127	tmp0 = DEQUANTIZE(inptr[DCTSIZE0], quantptr[DCTSIZE0] * _0_125);
				128	tmp1 = DEQUANTIZE(inptr[DCTSIZE2], quantptr[DCTSIZE2] * _0_125);
				129	tmp2 = DEQUANTIZE(inptr[DCTSIZE4], quantptr[DCTSIZE4] * _0_125);
				130	tmp3 = DEQUANTIZE(inptr[DCTSIZE6], quantptr[DCTSIZE6] * _0_125);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	131
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	132	tmp10 = tmp0 + tmp2; /* phase 3 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	133	tmp11 = tmp0 - tmp2;
				134
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	135	tmp13 = tmp1 + tmp3; /* phases 5-3 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	136	tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT) 1.414213562) - tmp13; /* 2c4 /
				137
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	138	tmp0 = tmp10 + tmp13; /* phase 2 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	139	tmp3 = tmp10 - tmp13;
				140	tmp1 = tmp11 + tmp12;
				141	tmp2 = tmp11 - tmp12;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	142
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	143	/* Odd part */
				144
DRC	715bb41	2014-05-11 10:09:07 +0000	[diff] [blame]	145	tmp4 = DEQUANTIZE(inptr[DCTSIZE1], quantptr[DCTSIZE1] * _0_125);
				146	tmp5 = DEQUANTIZE(inptr[DCTSIZE3], quantptr[DCTSIZE3] * _0_125);
				147	tmp6 = DEQUANTIZE(inptr[DCTSIZE5], quantptr[DCTSIZE5] * _0_125);
				148	tmp7 = DEQUANTIZE(inptr[DCTSIZE7], quantptr[DCTSIZE7] * _0_125);
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	149
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	150	z13 = tmp6 + tmp5; /* phase 6 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	151	z10 = tmp6 - tmp5;
				152	z11 = tmp4 + tmp7;
				153	z12 = tmp4 - tmp7;
				154
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	155	tmp7 = z11 + z13; /* phase 5 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	156	tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2c4 /
				157
				158	z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2c2 /
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	159	tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2(c2-c6) /
				160	tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2(c2+c6) /
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	161
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	162	tmp6 = tmp12 - tmp7; /* phase 2 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	163	tmp5 = tmp11 - tmp6;
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	164	tmp4 = tmp10 - tmp5;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	165
				166	wsptr[DCTSIZE*0] = tmp0 + tmp7;
				167	wsptr[DCTSIZE*7] = tmp0 - tmp7;
				168	wsptr[DCTSIZE*1] = tmp1 + tmp6;
				169	wsptr[DCTSIZE*6] = tmp1 - tmp6;
				170	wsptr[DCTSIZE*2] = tmp2 + tmp5;
				171	wsptr[DCTSIZE*5] = tmp2 - tmp5;
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	172	wsptr[DCTSIZE*3] = tmp3 + tmp4;
				173	wsptr[DCTSIZE*4] = tmp3 - tmp4;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	174
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	175	inptr++; /* advance pointers to next column */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	176	quantptr++;
				177	wsptr++;
				178	}
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	179
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	180	/* Pass 2: process rows from work array, store into output array. */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	181
				182	wsptr = workspace;
				183	for (ctr = 0; ctr < DCTSIZE; ctr++) {
				184	outptr = output_buf[ctr] + output_col;
				185	/* Rows of zeroes can be exploited in the same way as we did with columns.
				186	* However, the column calculation has created many nonzero AC terms, so
				187	* the simplification applies less often (typically 5% to 10% of the time).
				188	* And testing floats for zero is relatively expensive, so we don't bother.
				189	*/
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	190
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	191	/* Even part */
				192
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	193	/* Apply signed->unsigned and prepare float->int conversion */
				194	z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5);
				195	tmp10 = z5 + wsptr[4];
				196	tmp11 = z5 - wsptr[4];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	197
				198	tmp13 = wsptr[2] + wsptr[6];
				199	tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13;
				200
				201	tmp0 = tmp10 + tmp13;
				202	tmp3 = tmp10 - tmp13;
				203	tmp1 = tmp11 + tmp12;
				204	tmp2 = tmp11 - tmp12;
				205
				206	/* Odd part */
				207
				208	z13 = wsptr[5] + wsptr[3];
				209	z10 = wsptr[5] - wsptr[3];
				210	z11 = wsptr[1] + wsptr[7];
				211	z12 = wsptr[1] - wsptr[7];
				212
				213	tmp7 = z11 + z13;
				214	tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562);
				215
				216	z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2c2 /
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	217	tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2(c2-c6) /
				218	tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2(c2+c6) /
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	219
				220	tmp6 = tmp12 - tmp7;
				221	tmp5 = tmp11 - tmp6;
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	222	tmp4 = tmp10 - tmp5;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	223
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	224	/* Final output stage: float->int conversion and range-limit */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	225
Guido Vollbeding	f18f81b	2010-02-28 00:00:00 +0000	[diff] [blame]	226	outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK];
				227	outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK];
				228	outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK];
				229	outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK];
				230	outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK];
				231	outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK];
				232	outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK];
				233	outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK];
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	234
				235	wsptr += DCTSIZE; /* advance pointer to next row */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	236	}
				237	}
				238
				239	#endif /* DCT_FLOAT_SUPPORTED */