Blame - jidctred.c - platform/external/libjpeg-turbo

blob: 3ec649c2b905fd815df69b7c76ac396e6f802881 [file] [log] [blame]

Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jidctred.c
				3	*
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame^]	4	* Copyright (C) 1994-1996, Thomas G. Lane.
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	5	* This file is part of the Independent JPEG Group's software.
				6	* For conditions of distribution and use, see the accompanying README file.
				7	*
				8	* This file contains inverse-DCT routines that produce reduced-size output:
				9	* either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
				10	*
				11	* The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
				12	* algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step
				13	* with an 8-to-4 step that produces the four averages of two adjacent outputs
				14	* (or an 8-to-2 step producing two averages of four outputs, for 2x2 output).
				15	* These steps were derived by computing the corresponding values at the end
				16	* of the normal LL&M code, then simplifying as much as possible.
				17	*
				18	* 1x1 is trivial: just take the DC coefficient divided by 8.
				19	*
				20	* See jidctint.c for additional comments.
				21	*/
				22
				23	#define JPEG_INTERNALS
				24	#include "jinclude.h"
				25	#include "jpeglib.h"
				26	#include "jdct.h" /* Private declarations for DCT subsystem */
				27
				28	#ifdef IDCT_SCALING_SUPPORTED
				29
				30
				31	/*
				32	* This module is specialized to the case DCTSIZE = 8.
				33	*/
				34
				35	#if DCTSIZE != 8
				36	Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
				37	#endif
				38
				39
				40	/* Scaling is the same as in jidctint.c. */
				41
				42	#if BITS_IN_JSAMPLE == 8
				43	#define CONST_BITS 13
				44	#define PASS1_BITS 2
				45	#else
				46	#define CONST_BITS 13
				47	#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
				48	#endif
				49
				50	/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
				51	* causing a lot of useless floating-point operations at run time.
				52	* To get around this we use the following pre-calculated constants.
				53	* If you change CONST_BITS you may want to add appropriate values.
				54	* (With a reasonable C compiler, you can just rely on the FIX() macro...)
				55	*/
				56
				57	#if CONST_BITS == 13
				58	#define FIX_0_211164243 ((INT32) 1730) /* FIX(0.211164243) */
				59	#define FIX_0_509795579 ((INT32) 4176) /* FIX(0.509795579) */
				60	#define FIX_0_601344887 ((INT32) 4926) /* FIX(0.601344887) */
				61	#define FIX_0_720959822 ((INT32) 5906) /* FIX(0.720959822) */
				62	#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */
				63	#define FIX_0_850430095 ((INT32) 6967) /* FIX(0.850430095) */
				64	#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */
				65	#define FIX_1_061594337 ((INT32) 8697) /* FIX(1.061594337) */
				66	#define FIX_1_272758580 ((INT32) 10426) /* FIX(1.272758580) */
				67	#define FIX_1_451774981 ((INT32) 11893) /* FIX(1.451774981) */
				68	#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */
				69	#define FIX_2_172734803 ((INT32) 17799) /* FIX(2.172734803) */
				70	#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */
				71	#define FIX_3_624509785 ((INT32) 29692) /* FIX(3.624509785) */
				72	#else
				73	#define FIX_0_211164243 FIX(0.211164243)
				74	#define FIX_0_509795579 FIX(0.509795579)
				75	#define FIX_0_601344887 FIX(0.601344887)
				76	#define FIX_0_720959822 FIX(0.720959822)
				77	#define FIX_0_765366865 FIX(0.765366865)
				78	#define FIX_0_850430095 FIX(0.850430095)
				79	#define FIX_0_899976223 FIX(0.899976223)
				80	#define FIX_1_061594337 FIX(1.061594337)
				81	#define FIX_1_272758580 FIX(1.272758580)
				82	#define FIX_1_451774981 FIX(1.451774981)
				83	#define FIX_1_847759065 FIX(1.847759065)
				84	#define FIX_2_172734803 FIX(2.172734803)
				85	#define FIX_2_562915447 FIX(2.562915447)
				86	#define FIX_3_624509785 FIX(3.624509785)
				87	#endif
				88
				89
				90	/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
				91	* For 8-bit samples with the recommended scaling, all the variable
				92	* and constant values involved are no more than 16 bits wide, so a
				93	* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
				94	* For 12-bit samples, a full 32-bit multiplication will be needed.
				95	*/
				96
				97	#if BITS_IN_JSAMPLE == 8
				98	#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
				99	#else
				100	#define MULTIPLY(var,const) ((var) * (const))
				101	#endif
				102
				103
				104	/* Dequantize a coefficient by multiplying it by the multiplier-table
				105	* entry; produce an int result. In this module, both inputs and result
				106	* are 16 bits or less, so either int or short multiply will work.
				107	*/
				108
				109	#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval))
				110
				111
				112	/*
				113	* Perform dequantization and inverse DCT on one block of coefficients,
				114	* producing a reduced-size 4x4 output block.
				115	*/
				116
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame^]	117	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	118	jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
				119	JCOEFPTR coef_block,
				120	JSAMPARRAY output_buf, JDIMENSION output_col)
				121	{
				122	INT32 tmp0, tmp2, tmp10, tmp12;
				123	INT32 z1, z2, z3, z4;
				124	JCOEFPTR inptr;
				125	ISLOW_MULT_TYPE * quantptr;
				126	int * wsptr;
				127	JSAMPROW outptr;
				128	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
				129	int ctr;
				130	int workspace[DCTSIZE4]; / buffers data between passes */
				131	SHIFT_TEMPS
				132
				133	/* Pass 1: process columns from input, store into work array. */
				134
				135	inptr = coef_block;
				136	quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
				137	wsptr = workspace;
				138	for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
				139	/* Don't bother to process column 4, because second pass won't use it */
				140	if (ctr == DCTSIZE-4)
				141	continue;
				142	if ((inptr[DCTSIZE1] \| inptr[DCTSIZE2] \| inptr[DCTSIZE*3] \|
				143	inptr[DCTSIZE5] \| inptr[DCTSIZE6] \| inptr[DCTSIZE*7]) == 0) {
				144	/* AC terms all zero; we need not examine term 4 for 4x4 output */
				145	int dcval = DEQUANTIZE(inptr[DCTSIZE0], quantptr[DCTSIZE0]) << PASS1_BITS;
				146
				147	wsptr[DCTSIZE*0] = dcval;
				148	wsptr[DCTSIZE*1] = dcval;
				149	wsptr[DCTSIZE*2] = dcval;
				150	wsptr[DCTSIZE*3] = dcval;
				151
				152	continue;
				153	}
				154
				155	/* Even part */
				156
				157	tmp0 = DEQUANTIZE(inptr[DCTSIZE0], quantptr[DCTSIZE0]);
				158	tmp0 <<= (CONST_BITS+1);
				159
				160	z2 = DEQUANTIZE(inptr[DCTSIZE2], quantptr[DCTSIZE2]);
				161	z3 = DEQUANTIZE(inptr[DCTSIZE6], quantptr[DCTSIZE6]);
				162
				163	tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865);
				164
				165	tmp10 = tmp0 + tmp2;
				166	tmp12 = tmp0 - tmp2;
				167
				168	/* Odd part */
				169
				170	z1 = DEQUANTIZE(inptr[DCTSIZE7], quantptr[DCTSIZE7]);
				171	z2 = DEQUANTIZE(inptr[DCTSIZE5], quantptr[DCTSIZE5]);
				172	z3 = DEQUANTIZE(inptr[DCTSIZE3], quantptr[DCTSIZE3]);
				173	z4 = DEQUANTIZE(inptr[DCTSIZE1], quantptr[DCTSIZE1]);
				174
				175	tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
				176	+ MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
				177	+ MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
				178	+ MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
				179
				180	tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
				181	+ MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
				182	+ MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
				183	+ MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
				184
				185	/* Final output stage */
				186
				187	wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1);
				188	wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1);
				189	wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1);
				190	wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1);
				191	}
				192
				193	/* Pass 2: process 4 rows from work array, store into output array. */
				194
				195	wsptr = workspace;
				196	for (ctr = 0; ctr < 4; ctr++) {
				197	outptr = output_buf[ctr] + output_col;
				198	/* It's not clear whether a zero row test is worthwhile here ... */
				199
				200	#ifndef NO_ZERO_ROW_TEST
				201	if ((wsptr[1] \| wsptr[2] \| wsptr[3] \| wsptr[5] \| wsptr[6] \|
				202	wsptr[7]) == 0) {
				203	/* AC terms all zero */
				204	JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
				205	& RANGE_MASK];
				206
				207	outptr[0] = dcval;
				208	outptr[1] = dcval;
				209	outptr[2] = dcval;
				210	outptr[3] = dcval;
				211
				212	wsptr += DCTSIZE; /* advance pointer to next row */
				213	continue;
				214	}
				215	#endif
				216
				217	/* Even part */
				218
				219	tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1);
				220
				221	tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065)
				222	+ MULTIPLY((INT32) wsptr[6], - FIX_0_765366865);
				223
				224	tmp10 = tmp0 + tmp2;
				225	tmp12 = tmp0 - tmp2;
				226
				227	/* Odd part */
				228
				229	z1 = (INT32) wsptr[7];
				230	z2 = (INT32) wsptr[5];
				231	z3 = (INT32) wsptr[3];
				232	z4 = (INT32) wsptr[1];
				233
				234	tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */
				235	+ MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */
				236	+ MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */
				237	+ MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */
				238
				239	tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */
				240	+ MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */
				241	+ MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */
				242	+ MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */
				243
				244	/* Final output stage */
				245
				246	outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2,
				247	CONST_BITS+PASS1_BITS+3+1)
				248	& RANGE_MASK];
				249	outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2,
				250	CONST_BITS+PASS1_BITS+3+1)
				251	& RANGE_MASK];
				252	outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0,
				253	CONST_BITS+PASS1_BITS+3+1)
				254	& RANGE_MASK];
				255	outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0,
				256	CONST_BITS+PASS1_BITS+3+1)
				257	& RANGE_MASK];
				258
				259	wsptr += DCTSIZE; /* advance pointer to next row */
				260	}
				261	}
				262
				263
				264	/*
				265	* Perform dequantization and inverse DCT on one block of coefficients,
				266	* producing a reduced-size 2x2 output block.
				267	*/
				268
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame^]	269	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	270	jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
				271	JCOEFPTR coef_block,
				272	JSAMPARRAY output_buf, JDIMENSION output_col)
				273	{
				274	INT32 tmp0, tmp10, z1;
				275	JCOEFPTR inptr;
				276	ISLOW_MULT_TYPE * quantptr;
				277	int * wsptr;
				278	JSAMPROW outptr;
				279	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
				280	int ctr;
				281	int workspace[DCTSIZE2]; / buffers data between passes */
				282	SHIFT_TEMPS
				283
				284	/* Pass 1: process columns from input, store into work array. */
				285
				286	inptr = coef_block;
				287	quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
				288	wsptr = workspace;
				289	for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
				290	/* Don't bother to process columns 2,4,6 */
				291	if (ctr == DCTSIZE-2 \|\| ctr == DCTSIZE-4 \|\| ctr == DCTSIZE-6)
				292	continue;
				293	if ((inptr[DCTSIZE1] \| inptr[DCTSIZE3] \|
				294	inptr[DCTSIZE5] \| inptr[DCTSIZE7]) == 0) {
				295	/* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */
				296	int dcval = DEQUANTIZE(inptr[DCTSIZE0], quantptr[DCTSIZE0]) << PASS1_BITS;
				297
				298	wsptr[DCTSIZE*0] = dcval;
				299	wsptr[DCTSIZE*1] = dcval;
				300
				301	continue;
				302	}
				303
				304	/* Even part */
				305
				306	z1 = DEQUANTIZE(inptr[DCTSIZE0], quantptr[DCTSIZE0]);
				307	tmp10 = z1 << (CONST_BITS+2);
				308
				309	/* Odd part */
				310
				311	z1 = DEQUANTIZE(inptr[DCTSIZE7], quantptr[DCTSIZE7]);
				312	tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */
				313	z1 = DEQUANTIZE(inptr[DCTSIZE5], quantptr[DCTSIZE5]);
				314	tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */
				315	z1 = DEQUANTIZE(inptr[DCTSIZE3], quantptr[DCTSIZE3]);
				316	tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */
				317	z1 = DEQUANTIZE(inptr[DCTSIZE1], quantptr[DCTSIZE1]);
				318	tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
				319
				320	/* Final output stage */
				321
				322	wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2);
				323	wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2);
				324	}
				325
				326	/* Pass 2: process 2 rows from work array, store into output array. */
				327
				328	wsptr = workspace;
				329	for (ctr = 0; ctr < 2; ctr++) {
				330	outptr = output_buf[ctr] + output_col;
				331	/* It's not clear whether a zero row test is worthwhile here ... */
				332
				333	#ifndef NO_ZERO_ROW_TEST
				334	if ((wsptr[1] \| wsptr[3] \| wsptr[5] \| wsptr[7]) == 0) {
				335	/* AC terms all zero */
				336	JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3)
				337	& RANGE_MASK];
				338
				339	outptr[0] = dcval;
				340	outptr[1] = dcval;
				341
				342	wsptr += DCTSIZE; /* advance pointer to next row */
				343	continue;
				344	}
				345	#endif
				346
				347	/* Even part */
				348
				349	tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2);
				350
				351	/* Odd part */
				352
				353	tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */
				354	+ MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */
				355	+ MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */
				356	+ MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */
				357
				358	/* Final output stage */
				359
				360	outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0,
				361	CONST_BITS+PASS1_BITS+3+2)
				362	& RANGE_MASK];
				363	outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0,
				364	CONST_BITS+PASS1_BITS+3+2)
				365	& RANGE_MASK];
				366
				367	wsptr += DCTSIZE; /* advance pointer to next row */
				368	}
				369	}
				370
				371
				372	/*
				373	* Perform dequantization and inverse DCT on one block of coefficients,
				374	* producing a reduced-size 1x1 output block.
				375	*/
				376
Thomas G. Lane	489583f	1996-02-07 00:00:00 +0000	[diff] [blame^]	377	GLOBAL(void)
Thomas G. Lane	36a4ccc	1994-09-24 00:00:00 +0000	[diff] [blame]	378	jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
				379	JCOEFPTR coef_block,
				380	JSAMPARRAY output_buf, JDIMENSION output_col)
				381	{
				382	int dcval;
				383	ISLOW_MULT_TYPE * quantptr;
				384	JSAMPLE *range_limit = IDCT_range_limit(cinfo);
				385	SHIFT_TEMPS
				386
				387	/* We hardly need an inverse DCT routine for this: just take the
				388	* average pixel value, which is one-eighth of the DC coefficient.
				389	*/
				390	quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
				391	dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
				392	dcval = (int) DESCALE((INT32) dcval, 3);
				393
				394	output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
				395	}
				396
				397	#endif /* IDCT_SCALING_SUPPORTED */