Blame - jidctfst.c - platform/external/libjpeg-turbo

blob: 10db739b86246ad36481ebfc51b38076deb4e375 [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jidctfst.c
				3	*
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	4	* This file was part of the Independent JPEG Group's software:
Thomas G. Lane	5ead57a	1998-03-27 00:00:00 +0000	[diff] [blame]	5	* Copyright (C) 1994-1998, Thomas G. Lane.
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	6	* libjpeg-turbo Modifications:
				7	* Copyright (C) 2015, D. R. Commander.
				8	* For conditions of distribution and use, see the accompanying README.ijg
				9	* file.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	10	*
				11	* This file contains a fast, not so accurate integer implementation of the
				12	* inverse DCT (Discrete Cosine Transform). In the IJG code, this routine
				13	* must also perform dequantization of the input coefficients.
				14	*
				15	* A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
				16	* on each row (or vice versa, but it's more convenient to emit a row at
				17	* a time). Direct algorithms are also available, but they are much more
				18	* complex and seem not to be any faster when reduced to code.
				19	*
				20	* This implementation is based on Arai, Agui, and Nakajima's algorithm for
				21	* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
				22	* Japanese, but the algorithm is described in the Pennebaker & Mitchell
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	23	* JPEG textbook (see REFERENCES section in file README.ijg). The following
				24	* code is based directly on figure 4-8 in P&M.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	25	* While an 8-point DCT cannot be done in less than 11 multiplies, it is
				26	* possible to arrange the computation so that many of the multiplies are
				27	* simple scalings of the final outputs. These multiplies can then be
				28	* folded into the multiplications or divisions by the JPEG quantization
				29	* table entries. The AA&N method leaves only 5 multiplies and 29 adds
				30	* to be done in the DCT itself.
				31	* The primary disadvantage of this method is that with fixed-point math,
				32	* accuracy is lost due to imprecise representation of the scaled
				33	* quantization values. The smaller the quantization table entry, the less
				34	* precise the scaled value, so this implementation does worse with high-
				35	* quality-setting files than with low-quality ones.
				36	*/
				37
				38	#define JPEG_INTERNALS
				39	#include "jinclude.h"
				40	#include "jpeglib.h"
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	41	#include "jdct.h" /* Private declarations for DCT subsystem */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	42
				43	#ifdef DCT_IFAST_SUPPORTED
				44
				45
				46	/*
				47	* This module is specialized to the case DCTSIZE = 8.
				48	*/
				49
				50	#if DCTSIZE != 8
				51	Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
				52	#endif
				53
				54
				55	/* Scaling decisions are generally the same as in the LL&M algorithm;
				56	* see jidctint.c for more details. However, we choose to descale
				57	* (right shift) multiplication products as soon as they are formed,
				58	* rather than carrying additional fractional bits into subsequent additions.
				59	* This compromises accuracy slightly, but it lets us save a few shifts.
				60	* More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
				61	* everywhere except in the multiplications proper; this saves a good deal
				62	* of work on 16-bit-int machines.
				63	*
				64	* The dequantized coefficients are not integers because the AA&N scaling
				65	* factors have been incorporated. We represent them scaled up by PASS1_BITS,
				66	* so that the first and second IDCT rounds have the same input scaling.
				67	* For 8-bit JSAMPLEs, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
				68	* avoid a descaling shift; this compromises accuracy rather drastically
				69	* for small quantization table entries, but it saves a lot of shifts.
				70	* For 12-bit JSAMPLEs, there's no hope of using 16x16 multiplies anyway,
				71	* so we use a much larger scaling factor to preserve accuracy.
				72	*
				73	* A final compromise is to represent the multiplicative constants to only
				74	* 8 fractional bits, rather than 13. This saves some shifting work on some
				75	* machines, and may also reduce the cost of multiplication (since there
				76	* are fewer one-bits in the constants).
				77	*/
				78
				79	#if BITS_IN_JSAMPLE == 8
				80	#define CONST_BITS 8
				81	#define PASS1_BITS 2
				82	#else
				83	#define CONST_BITS 8
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	84	#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	85	#endif
				86
				87	/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
				88	* causing a lot of useless floating-point operations at run time.
				89	* To get around this we use the following pre-calculated constants.
				90	* If you change CONST_BITS you may want to add appropriate values.
				91	* (With a reasonable C compiler, you can just rely on the FIX() macro...)
				92	*/
				93
				94	#if CONST_BITS == 8
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	95	#define FIX_1_082392200 ((JLONG) 277) /* FIX(1.082392200) */
				96	#define FIX_1_414213562 ((JLONG) 362) /* FIX(1.414213562) */
				97	#define FIX_1_847759065 ((JLONG) 473) /* FIX(1.847759065) */
				98	#define FIX_2_613125930 ((JLONG) 669) /* FIX(2.613125930) */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	99	#else
				100	#define FIX_1_082392200 FIX(1.082392200)
				101	#define FIX_1_414213562 FIX(1.414213562)
				102	#define FIX_1_847759065 FIX(1.847759065)
				103	#define FIX_2_613125930 FIX(2.613125930)
				104	#endif
				105
				106
				107	/* We can gain a little more speed, with a further compromise in accuracy,
				108	* by omitting the addition in a descaling shift. This yields an incorrectly
				109	* rounded result half the time...
				110	*/
				111
				112	#ifndef USE_ACCURATE_ROUNDING
				113	#undef DESCALE
				114	#define DESCALE(x,n) RIGHT_SHIFT(x, n)
				115	#endif
				116
				117
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	118	/* Multiply a DCTELEM variable by an JLONG constant, and immediately
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	119	* descale to yield a DCTELEM result.
				120	*/
				121
				122	#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS))
				123
				124
				125	/* Dequantize a coefficient by multiplying it by the multiplier-table
				126	* entry; produce a DCTELEM result. For 8-bit data a 16x16->16
				127	* multiplication will do. For 12-bit data, the multiplier table is
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	128	* declared JLONG, so a 32-bit multiply will be used.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	129	*/
				130
				131	#if BITS_IN_JSAMPLE == 8
				132	#define DEQUANTIZE(coef,quantval) (((IFAST_MULT_TYPE) (coef)) * (quantval))
				133	#else
				134	#define DEQUANTIZE(coef,quantval) \
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	135	DESCALE((coef)*(quantval), IFAST_SCALE_BITS-PASS1_BITS)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	136	#endif
				137
				138
				139	/* Like DESCALE, but applies to a DCTELEM and produces an int.
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	140	* We assume that int right shift is unsigned if JLONG right shift is.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	141	*/
				142
				143	#ifdef RIGHT_SHIFT_IS_UNSIGNED
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	144	#define ISHIFT_TEMPS DCTELEM ishift_temp;
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	145	#if BITS_IN_JSAMPLE == 8
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	146	#define DCTELEMBITS 16 /* DCTELEM may be 16 or 32 bits */
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	147	#else
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	148	#define DCTELEMBITS 32 /* DCTELEM must be 32 bits */
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	149	#endif
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	150	#define IRIGHT_SHIFT(x,shft) \
Thomas G. Lane	bc79e06	1995-08-02 00:00:00 +0000	[diff] [blame]	151	((ishift_temp = (x)) < 0 ? \
				152	(ishift_temp >> (shft)) \| ((~((DCTELEM) 0)) << (DCTELEMBITS-(shft))) : \
				153	(ishift_temp >> (shft)))
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	154	#else
				155	#define ISHIFT_TEMPS
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	156	#define IRIGHT_SHIFT(x,shft) ((x) >> (shft))
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	157	#endif
				158
				159	#ifdef USE_ACCURATE_ROUNDING
				160	#define IDESCALE(x,n) ((int) IRIGHT_SHIFT((x) + (1 << ((n)-1)), n))
				161	#else
				162	#define IDESCALE(x,n) ((int) IRIGHT_SHIFT(x, n))
				163	#endif
				164
				165
				166	/*
				167	* Perform dequantization and inverse DCT on one block of coefficients.
				168	*/
				169
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame]	170	GLOBAL(void)
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	171	jpeg_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr,
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	172	JCOEFPTR coef_block,
				173	JSAMPARRAY output_buf, JDIMENSION output_col)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	174	{
				175	DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
				176	DCTELEM tmp10, tmp11, tmp12, tmp13;
				177	DCTELEM z5, z10, z11, z12, z13;
				178	JCOEFPTR inptr;
Alex Naidis	6eb7d37	2016-10-16 23:10:08 +0200	[diff] [blame]	179	IFAST_MULT_TYPE *quantptr;
				180	int *wsptr;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	181	JSAMPROW outptr;
				182	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
				183	int ctr;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	184	int workspace[DCTSIZE2]; /* buffers data between passes */
				185	SHIFT_TEMPS /* for DESCALE */
				186	ISHIFT_TEMPS /* for IDESCALE */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	187
				188	/* Pass 1: process columns from input, store into work array. */
				189
				190	inptr = coef_block;
				191	quantptr = (IFAST_MULT_TYPE *) compptr->dct_table;
				192	wsptr = workspace;
				193	for (ctr = DCTSIZE; ctr > 0; ctr--) {
				194	/* Due to quantization, we will usually find that many of the input
				195	* coefficients are zero, especially the AC terms. We can exploit this
				196	* by short-circuiting the IDCT calculation for any column in which all
				197	* the AC terms are zero. In that case each output is equal to the
				198	* DC coefficient (with scale factor as needed).
				199	* With typical images and quantization tables, half or more of the
				200	* column DCT calculations can be simplified this way.
				201	*/
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	202
Thomas G. Lane	5ead57a	1998-03-27 00:00:00 +0000	[diff] [blame]	203	if (inptr[DCTSIZE1] == 0 && inptr[DCTSIZE2] == 0 &&
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	204	inptr[DCTSIZE3] == 0 && inptr[DCTSIZE4] == 0 &&
				205	inptr[DCTSIZE5] == 0 && inptr[DCTSIZE6] == 0 &&
				206	inptr[DCTSIZE*7] == 0) {
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	207	/* AC terms all zero */
				208	int dcval = (int) DEQUANTIZE(inptr[DCTSIZE0], quantptr[DCTSIZE0]);
				209
				210	wsptr[DCTSIZE*0] = dcval;
				211	wsptr[DCTSIZE*1] = dcval;
				212	wsptr[DCTSIZE*2] = dcval;
				213	wsptr[DCTSIZE*3] = dcval;
				214	wsptr[DCTSIZE*4] = dcval;
				215	wsptr[DCTSIZE*5] = dcval;
				216	wsptr[DCTSIZE*6] = dcval;
				217	wsptr[DCTSIZE*7] = dcval;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	218
				219	inptr++; /* advance pointers to next column */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	220	quantptr++;
				221	wsptr++;
				222	continue;
				223	}
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	224
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	225	/* Even part */
				226
				227	tmp0 = DEQUANTIZE(inptr[DCTSIZE0], quantptr[DCTSIZE0]);
				228	tmp1 = DEQUANTIZE(inptr[DCTSIZE2], quantptr[DCTSIZE2]);
				229	tmp2 = DEQUANTIZE(inptr[DCTSIZE4], quantptr[DCTSIZE4]);
				230	tmp3 = DEQUANTIZE(inptr[DCTSIZE6], quantptr[DCTSIZE6]);
				231
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	232	tmp10 = tmp0 + tmp2; /* phase 3 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	233	tmp11 = tmp0 - tmp2;
				234
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	235	tmp13 = tmp1 + tmp3; /* phases 5-3 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	236	tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2c4 /
				237
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	238	tmp0 = tmp10 + tmp13; /* phase 2 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	239	tmp3 = tmp10 - tmp13;
				240	tmp1 = tmp11 + tmp12;
				241	tmp2 = tmp11 - tmp12;
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	242
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	243	/* Odd part */
				244
				245	tmp4 = DEQUANTIZE(inptr[DCTSIZE1], quantptr[DCTSIZE1]);
				246	tmp5 = DEQUANTIZE(inptr[DCTSIZE3], quantptr[DCTSIZE3]);
				247	tmp6 = DEQUANTIZE(inptr[DCTSIZE5], quantptr[DCTSIZE5]);
				248	tmp7 = DEQUANTIZE(inptr[DCTSIZE7], quantptr[DCTSIZE7]);
				249
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	250	z13 = tmp6 + tmp5; /* phase 6 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	251	z10 = tmp6 - tmp5;
				252	z11 = tmp4 + tmp7;
				253	z12 = tmp4 - tmp7;
				254
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	255	tmp7 = z11 + z13; /* phase 5 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	256	tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2c4 /
				257
				258	z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2c2 /
				259	tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2(c2-c6) /
				260	tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2(c2+c6) /
				261
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	262	tmp6 = tmp12 - tmp7; /* phase 2 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	263	tmp5 = tmp11 - tmp6;
				264	tmp4 = tmp10 + tmp5;
				265
				266	wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
				267	wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
				268	wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
				269	wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
				270	wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
				271	wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
				272	wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
				273	wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
				274
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	275	inptr++; /* advance pointers to next column */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	276	quantptr++;
				277	wsptr++;
				278	}
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	279
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	280	/* Pass 2: process rows from work array, store into output array. */
				281	/* Note that we must descale the results by a factor of 8 == 2*3, /
				282	/* and also undo the PASS1_BITS scaling. */
				283
				284	wsptr = workspace;
				285	for (ctr = 0; ctr < DCTSIZE; ctr++) {
				286	outptr = output_buf[ctr] + output_col;
				287	/* Rows of zeroes can be exploited in the same way as we did with columns.
				288	* However, the column calculation has created many nonzero AC terms, so
				289	* the simplification applies less often (typically 5% to 10% of the time).
				290	* On machines with very fast multiplication, it's possible that the
				291	* test takes more time than it's worth. In that case this section
				292	* may be commented out.
				293	*/
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	294
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	295	#ifndef NO_ZERO_ROW_TEST
Thomas G. Lane	5ead57a	1998-03-27 00:00:00 +0000	[diff] [blame]	296	if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	297	wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	298	/* AC terms all zero */
				299	JSAMPLE dcval = range_limit[IDESCALE(wsptr[0], PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	300	& RANGE_MASK];
				301
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	302	outptr[0] = dcval;
				303	outptr[1] = dcval;
				304	outptr[2] = dcval;
				305	outptr[3] = dcval;
				306	outptr[4] = dcval;
				307	outptr[5] = dcval;
				308	outptr[6] = dcval;
				309	outptr[7] = dcval;
				310
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	311	wsptr += DCTSIZE; /* advance pointer to next row */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	312	continue;
				313	}
				314	#endif
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	315
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	316	/* Even part */
				317
				318	tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
				319	tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
				320
				321	tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
				322	tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	323	- tmp13;
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	324
				325	tmp0 = tmp10 + tmp13;
				326	tmp3 = tmp10 - tmp13;
				327	tmp1 = tmp11 + tmp12;
				328	tmp2 = tmp11 - tmp12;
				329
				330	/* Odd part */
				331
				332	z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
				333	z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
				334	z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
				335	z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
				336
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	337	tmp7 = z11 + z13; /* phase 5 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	338	tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2c4 /
				339
				340	z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2c2 /
				341	tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2(c2-c6) /
				342	tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; /* -2(c2+c6) /
				343
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	344	tmp6 = tmp12 - tmp7; /* phase 2 */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	345	tmp5 = tmp11 - tmp6;
				346	tmp4 = tmp10 + tmp5;
				347
				348	/* Final output stage: scale down by a factor of 8 and range-limit */
				349
				350	outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	351	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	352	outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	353	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	354	outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	355	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	356	outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	357	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	358	outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	359	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	360	outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	361	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	362	outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	363	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	364	outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	365	& RANGE_MASK];
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	366
DRC	e5eaf37	2014-05-09 18:00:32 +0000	[diff] [blame]	367	wsptr += DCTSIZE; /* advance pointer to next row */
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	368	}
				369	}
				370
				371	#endif /* DCT_IFAST_SUPPORTED */