Blame - jfwddct.c - platform/external/libjpeg-turbo

blob: 0ca0e7834ab0497ddc9a17287300fbc6d4a37620 [file] [log] [blame]

Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	1	/*
				2	* jfwddct.c
				3	*
Thomas G. Lane	4a6b730	1992-03-17 00:00:00 +0000	[diff] [blame^]	4	* Copyright (C) 1991, 1992, Thomas G. Lane.
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	5	* This file is part of the Independent JPEG Group's software.
				6	* For conditions of distribution and use, see the accompanying README file.
				7	*
				8	* This file contains the basic DCT (Discrete Cosine Transform)
				9	* transformation subroutine.
				10	*
				11	* This implementation is based on Appendix A.2 of the book
				12	* "Discrete Cosine Transform---Algorithms, Advantages, Applications"
				13	* by K.R. Rao and P. Yip (Academic Press, Inc, London, 1990).
				14	* It uses scaled fixed-point arithmetic instead of floating point.
				15	*/
				16
				17	#include "jinclude.h"
				18
Thomas G. Lane	4a6b730	1992-03-17 00:00:00 +0000	[diff] [blame^]	19	/*
				20	* This routine is specialized to the case DCTSIZE = 8.
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	21	*/
				22
Thomas G. Lane	4a6b730	1992-03-17 00:00:00 +0000	[diff] [blame^]	23	#if DCTSIZE != 8
				24	Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	25	#endif
				26
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	27
				28	/* The poop on this scaling stuff is as follows:
				29	*
				30	* We have to do addition and subtraction of the integer inputs, which
				31	* is no problem, and multiplication by fractional constants, which is
				32	* a problem to do in integer arithmetic. We multiply all the constants
				33	* by DCT_SCALE and convert them to integer constants (thus retaining
				34	* LG2_DCT_SCALE bits of precision in the constants). After doing a
				35	* multiplication we have to divide the product by DCT_SCALE, with proper
				36	* rounding, to produce the correct output. The division can be implemented
				37	* cheaply as a right shift of LG2_DCT_SCALE bits. The DCT equations also
				38	* specify an additional division by 2 on the final outputs; this can be
				39	* folded into the right-shift by shifting one more bit (see UNFIXH).
				40	*
				41	* If you are planning to recode this in assembler, you might want to set
				42	* LG2_DCT_SCALE to 15. This loses a bit of precision, but then all the
				43	* multiplications are between 16-bit quantities (given 8-bit JSAMPLEs!)
				44	* so you could use a signed 16x16=>32 bit multiply instruction instead of
				45	* full 32x32 multiply. Unfortunately there's no way to describe such a
				46	* multiply portably in C, so we've gone for the extra bit of accuracy here.
				47	*/
				48
				49	#ifdef EIGHT_BIT_SAMPLES
				50	#define LG2_DCT_SCALE 16
				51	#else
				52	#define LG2_DCT_SCALE 15 /* lose a little precision to avoid overflow */
				53	#endif
				54
				55	#define ONE ((INT32) 1)
				56
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	57	#define DCT_SCALE (ONE << LG2_DCT_SCALE)
				58
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	59	/* In some places we shift the inputs left by a couple more bits, */
				60	/* so that they can be added to fractional results without too much */
				61	/* loss of precision. */
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	62	#define LG2_OVERSCALE 2
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	63	#define OVERSCALE (ONE << LG2_OVERSCALE)
				64	#define OVERSHIFT(x) ((x) <<= LG2_OVERSCALE)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	65
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	66	/* Scale a fractional constant by DCT_SCALE */
				67	#define FIX(x) ((INT32) ((x) * DCT_SCALE + 0.5))
				68
				69	/* Scale a fractional constant by DCT_SCALE/OVERSCALE */
				70	/* Such a constant can be multiplied with an overscaled input */
				71	/* to produce something that's scaled by DCT_SCALE */
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	72	#define FIXO(x) ((INT32) ((x) * DCT_SCALE / OVERSCALE + 0.5))
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	73
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	74	/* Descale and correctly round a value that's scaled by DCT_SCALE */
				75	#define UNFIX(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1)), LG2_DCT_SCALE)
				76
				77	/* Same with an additional division by 2, ie, correctly rounded UNFIX(x/2) */
				78	#define UNFIXH(x) RIGHT_SHIFT((x) + (ONE << LG2_DCT_SCALE), LG2_DCT_SCALE+1)
				79
				80	/* Take a value scaled by DCT_SCALE and round to integer scaled by OVERSCALE */
				81	#define UNFIXO(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1-LG2_OVERSCALE)),\
				82	LG2_DCT_SCALE-LG2_OVERSCALE)
				83
				84	/* Here are the constants we need */
				85	/* SIN_i_j is sine of ipi/j, scaled by DCT_SCALE /
				86	/* COS_i_j is cosine of ipi/j, scaled by DCT_SCALE /
				87
				88	#define SIN_1_4 FIX(0.707106781)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	89	#define COS_1_4 SIN_1_4
				90
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	91	#define SIN_1_8 FIX(0.382683432)
				92	#define COS_1_8 FIX(0.923879533)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	93	#define SIN_3_8 COS_1_8
				94	#define COS_3_8 SIN_1_8
				95
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	96	#define SIN_1_16 FIX(0.195090322)
				97	#define COS_1_16 FIX(0.980785280)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	98	#define SIN_7_16 COS_1_16
				99	#define COS_7_16 SIN_1_16
				100
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	101	#define SIN_3_16 FIX(0.555570233)
				102	#define COS_3_16 FIX(0.831469612)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	103	#define SIN_5_16 COS_3_16
				104	#define COS_5_16 SIN_3_16
				105
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	106	/* OSIN_i_j is sine of ipi/j, scaled by DCT_SCALE/OVERSCALE /
				107	/* OCOS_i_j is cosine of ipi/j, scaled by DCT_SCALE/OVERSCALE /
				108
				109	#define OSIN_1_4 FIXO(0.707106781)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	110	#define OCOS_1_4 OSIN_1_4
				111
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	112	#define OSIN_1_8 FIXO(0.382683432)
				113	#define OCOS_1_8 FIXO(0.923879533)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	114	#define OSIN_3_8 OCOS_1_8
				115	#define OCOS_3_8 OSIN_1_8
				116
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	117	#define OSIN_1_16 FIXO(0.195090322)
				118	#define OCOS_1_16 FIXO(0.980785280)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	119	#define OSIN_7_16 OCOS_1_16
				120	#define OCOS_7_16 OSIN_1_16
				121
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	122	#define OSIN_3_16 FIXO(0.555570233)
				123	#define OCOS_3_16 FIXO(0.831469612)
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	124	#define OSIN_5_16 OCOS_3_16
				125	#define OCOS_5_16 OSIN_3_16
				126
				127
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	128	/*
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	129	* Perform the forward DCT on one block of samples.
				130	*
Thomas G. Lane	bd543f0	1991-12-13 00:00:00 +0000	[diff] [blame]	131	* A 2-D DCT can be done by 1-D DCT on each row
				132	* followed by 1-D DCT on each column.
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	133	*/
				134
				135	GLOBAL void
				136	j_fwd_dct (DCTBLOCK data)
				137	{
Thomas G. Lane	4a6b730	1992-03-17 00:00:00 +0000	[diff] [blame^]	138	int pass, rowctr;
				139	register DCTELEM inptr, outptr;
				140	DCTBLOCK workspace;
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	141
Thomas G. Lane	4a6b730	1992-03-17 00:00:00 +0000	[diff] [blame^]	142	/* Each iteration of the inner loop performs one 8-point 1-D DCT.
				143	* It reads from a row of the input matrix and stores into a column
				144	* of the output matrix. In the first pass, we read from the data[] array
				145	* and store into the local workspace[]. In the second pass, we read from
				146	* the workspace[] array and store into data[], thus performing the
				147	* equivalent of a columnar DCT pass with no variable array indexing.
				148	*/
				149
				150	inptr = data; /* initialize pointers for first pass */
				151	outptr = workspace;
				152	for (pass = 1; pass >= 0; pass--) {
				153	for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
				154	/* many tmps have nonoverlapping lifetime -- flashy register colourers
				155	* should be able to do this lot very well
				156	*/
				157	INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
				158	INT32 tmp10, tmp11, tmp12, tmp13;
				159	INT32 tmp14, tmp15, tmp16, tmp17;
				160	INT32 tmp25, tmp26;
				161	SHIFT_TEMPS
				162
				163	tmp0 = inptr[7] + inptr[0];
				164	tmp1 = inptr[6] + inptr[1];
				165	tmp2 = inptr[5] + inptr[2];
				166	tmp3 = inptr[4] + inptr[3];
				167	tmp4 = inptr[3] - inptr[4];
				168	tmp5 = inptr[2] - inptr[5];
				169	tmp6 = inptr[1] - inptr[6];
				170	tmp7 = inptr[0] - inptr[7];
				171
				172	tmp10 = tmp3 + tmp0;
				173	tmp11 = tmp2 + tmp1;
				174	tmp12 = tmp1 - tmp2;
				175	tmp13 = tmp0 - tmp3;
				176
				177	outptr[ 0] = (DCTELEM) UNFIXH((tmp10 + tmp11) * SIN_1_4);
				178	outptr[DCTSIZE4] = (DCTELEM) UNFIXH((tmp10 - tmp11) COS_1_4);
				179
				180	outptr[DCTSIZE2] = (DCTELEM) UNFIXH(tmp13COS_1_8 + tmp12*SIN_1_8);
				181	outptr[DCTSIZE6] = (DCTELEM) UNFIXH(tmp13SIN_1_8 - tmp12*COS_1_8);
				182
				183	tmp16 = UNFIXO((tmp6 + tmp5) * SIN_1_4);
				184	tmp15 = UNFIXO((tmp6 - tmp5) * COS_1_4);
				185
				186	OVERSHIFT(tmp4);
				187	OVERSHIFT(tmp7);
				188
				189	/* tmp4, tmp7, tmp15, tmp16 are overscaled by OVERSCALE */
				190
				191	tmp14 = tmp4 + tmp15;
				192	tmp25 = tmp4 - tmp15;
				193	tmp26 = tmp7 - tmp16;
				194	tmp17 = tmp7 + tmp16;
				195
				196	outptr[DCTSIZE ] = (DCTELEM) UNFIXH(tmp17OCOS_1_16 + tmp14OSIN_1_16);
				197	outptr[DCTSIZE7] = (DCTELEM) UNFIXH(tmp17OCOS_7_16 - tmp14*OSIN_7_16);
				198	outptr[DCTSIZE5] = (DCTELEM) UNFIXH(tmp26OCOS_5_16 + tmp25*OSIN_5_16);
				199	outptr[DCTSIZE3] = (DCTELEM) UNFIXH(tmp26OCOS_3_16 - tmp25*OSIN_3_16);
				200
				201	inptr += DCTSIZE; /* advance inptr to next row */
				202	outptr++; /* advance outptr to next column */
				203	}
				204	/* end of pass; in case it was pass 1, set up for pass 2 */
				205	inptr = workspace;
				206	outptr = data;
				207	}
Thomas G. Lane	2cbeb8a	1991-10-07 00:00:00 +0000	[diff] [blame]	208	}