blob: 0ca0e7834ab0497ddc9a17287300fbc6d4a37620 [file] [log] [blame]
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +00001/*
2 * jfwddct.c
3 *
Thomas G. Lane4a6b7301992-03-17 00:00:00 +00004 * Copyright (C) 1991, 1992, Thomas G. Lane.
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +00005 * This file is part of the Independent JPEG Group's software.
6 * For conditions of distribution and use, see the accompanying README file.
7 *
8 * This file contains the basic DCT (Discrete Cosine Transform)
9 * transformation subroutine.
10 *
11 * This implementation is based on Appendix A.2 of the book
12 * "Discrete Cosine Transform---Algorithms, Advantages, Applications"
13 * by K.R. Rao and P. Yip (Academic Press, Inc, London, 1990).
14 * It uses scaled fixed-point arithmetic instead of floating point.
15 */
16
17#include "jinclude.h"
18
Thomas G. Lane4a6b7301992-03-17 00:00:00 +000019/*
20 * This routine is specialized to the case DCTSIZE = 8.
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000021 */
22
Thomas G. Lane4a6b7301992-03-17 00:00:00 +000023#if DCTSIZE != 8
24 Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000025#endif
26
Thomas G. Lanebd543f01991-12-13 00:00:00 +000027
28/* The poop on this scaling stuff is as follows:
29 *
30 * We have to do addition and subtraction of the integer inputs, which
31 * is no problem, and multiplication by fractional constants, which is
32 * a problem to do in integer arithmetic. We multiply all the constants
33 * by DCT_SCALE and convert them to integer constants (thus retaining
34 * LG2_DCT_SCALE bits of precision in the constants). After doing a
35 * multiplication we have to divide the product by DCT_SCALE, with proper
36 * rounding, to produce the correct output. The division can be implemented
37 * cheaply as a right shift of LG2_DCT_SCALE bits. The DCT equations also
38 * specify an additional division by 2 on the final outputs; this can be
39 * folded into the right-shift by shifting one more bit (see UNFIXH).
40 *
41 * If you are planning to recode this in assembler, you might want to set
42 * LG2_DCT_SCALE to 15. This loses a bit of precision, but then all the
43 * multiplications are between 16-bit quantities (given 8-bit JSAMPLEs!)
44 * so you could use a signed 16x16=>32 bit multiply instruction instead of
45 * full 32x32 multiply. Unfortunately there's no way to describe such a
46 * multiply portably in C, so we've gone for the extra bit of accuracy here.
47 */
48
49#ifdef EIGHT_BIT_SAMPLES
50#define LG2_DCT_SCALE 16
51#else
52#define LG2_DCT_SCALE 15 /* lose a little precision to avoid overflow */
53#endif
54
55#define ONE ((INT32) 1)
56
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000057#define DCT_SCALE (ONE << LG2_DCT_SCALE)
58
Thomas G. Lanebd543f01991-12-13 00:00:00 +000059/* In some places we shift the inputs left by a couple more bits, */
60/* so that they can be added to fractional results without too much */
61/* loss of precision. */
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000062#define LG2_OVERSCALE 2
Thomas G. Lanebd543f01991-12-13 00:00:00 +000063#define OVERSCALE (ONE << LG2_OVERSCALE)
64#define OVERSHIFT(x) ((x) <<= LG2_OVERSCALE)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000065
Thomas G. Lanebd543f01991-12-13 00:00:00 +000066/* Scale a fractional constant by DCT_SCALE */
67#define FIX(x) ((INT32) ((x) * DCT_SCALE + 0.5))
68
69/* Scale a fractional constant by DCT_SCALE/OVERSCALE */
70/* Such a constant can be multiplied with an overscaled input */
71/* to produce something that's scaled by DCT_SCALE */
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000072#define FIXO(x) ((INT32) ((x) * DCT_SCALE / OVERSCALE + 0.5))
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000073
Thomas G. Lanebd543f01991-12-13 00:00:00 +000074/* Descale and correctly round a value that's scaled by DCT_SCALE */
75#define UNFIX(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1)), LG2_DCT_SCALE)
76
77/* Same with an additional division by 2, ie, correctly rounded UNFIX(x/2) */
78#define UNFIXH(x) RIGHT_SHIFT((x) + (ONE << LG2_DCT_SCALE), LG2_DCT_SCALE+1)
79
80/* Take a value scaled by DCT_SCALE and round to integer scaled by OVERSCALE */
81#define UNFIXO(x) RIGHT_SHIFT((x) + (ONE << (LG2_DCT_SCALE-1-LG2_OVERSCALE)),\
82 LG2_DCT_SCALE-LG2_OVERSCALE)
83
84/* Here are the constants we need */
85/* SIN_i_j is sine of i*pi/j, scaled by DCT_SCALE */
86/* COS_i_j is cosine of i*pi/j, scaled by DCT_SCALE */
87
88#define SIN_1_4 FIX(0.707106781)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000089#define COS_1_4 SIN_1_4
90
Thomas G. Lanebd543f01991-12-13 00:00:00 +000091#define SIN_1_8 FIX(0.382683432)
92#define COS_1_8 FIX(0.923879533)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000093#define SIN_3_8 COS_1_8
94#define COS_3_8 SIN_1_8
95
Thomas G. Lanebd543f01991-12-13 00:00:00 +000096#define SIN_1_16 FIX(0.195090322)
97#define COS_1_16 FIX(0.980785280)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +000098#define SIN_7_16 COS_1_16
99#define COS_7_16 SIN_1_16
100
Thomas G. Lanebd543f01991-12-13 00:00:00 +0000101#define SIN_3_16 FIX(0.555570233)
102#define COS_3_16 FIX(0.831469612)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000103#define SIN_5_16 COS_3_16
104#define COS_5_16 SIN_3_16
105
Thomas G. Lanebd543f01991-12-13 00:00:00 +0000106/* OSIN_i_j is sine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
107/* OCOS_i_j is cosine of i*pi/j, scaled by DCT_SCALE/OVERSCALE */
108
109#define OSIN_1_4 FIXO(0.707106781)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000110#define OCOS_1_4 OSIN_1_4
111
Thomas G. Lanebd543f01991-12-13 00:00:00 +0000112#define OSIN_1_8 FIXO(0.382683432)
113#define OCOS_1_8 FIXO(0.923879533)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000114#define OSIN_3_8 OCOS_1_8
115#define OCOS_3_8 OSIN_1_8
116
Thomas G. Lanebd543f01991-12-13 00:00:00 +0000117#define OSIN_1_16 FIXO(0.195090322)
118#define OCOS_1_16 FIXO(0.980785280)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000119#define OSIN_7_16 OCOS_1_16
120#define OCOS_7_16 OSIN_1_16
121
Thomas G. Lanebd543f01991-12-13 00:00:00 +0000122#define OSIN_3_16 FIXO(0.555570233)
123#define OCOS_3_16 FIXO(0.831469612)
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000124#define OSIN_5_16 OCOS_3_16
125#define OCOS_5_16 OSIN_3_16
126
127
Thomas G. Lanebd543f01991-12-13 00:00:00 +0000128/*
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000129 * Perform the forward DCT on one block of samples.
130 *
Thomas G. Lanebd543f01991-12-13 00:00:00 +0000131 * A 2-D DCT can be done by 1-D DCT on each row
132 * followed by 1-D DCT on each column.
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000133 */
134
135GLOBAL void
136j_fwd_dct (DCTBLOCK data)
137{
Thomas G. Lane4a6b7301992-03-17 00:00:00 +0000138 int pass, rowctr;
139 register DCTELEM *inptr, *outptr;
140 DCTBLOCK workspace;
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000141
Thomas G. Lane4a6b7301992-03-17 00:00:00 +0000142 /* Each iteration of the inner loop performs one 8-point 1-D DCT.
143 * It reads from a *row* of the input matrix and stores into a *column*
144 * of the output matrix. In the first pass, we read from the data[] array
145 * and store into the local workspace[]. In the second pass, we read from
146 * the workspace[] array and store into data[], thus performing the
147 * equivalent of a columnar DCT pass with no variable array indexing.
148 */
149
150 inptr = data; /* initialize pointers for first pass */
151 outptr = workspace;
152 for (pass = 1; pass >= 0; pass--) {
153 for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
154 /* many tmps have nonoverlapping lifetime -- flashy register colourers
155 * should be able to do this lot very well
156 */
157 INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
158 INT32 tmp10, tmp11, tmp12, tmp13;
159 INT32 tmp14, tmp15, tmp16, tmp17;
160 INT32 tmp25, tmp26;
161 SHIFT_TEMPS
162
163 tmp0 = inptr[7] + inptr[0];
164 tmp1 = inptr[6] + inptr[1];
165 tmp2 = inptr[5] + inptr[2];
166 tmp3 = inptr[4] + inptr[3];
167 tmp4 = inptr[3] - inptr[4];
168 tmp5 = inptr[2] - inptr[5];
169 tmp6 = inptr[1] - inptr[6];
170 tmp7 = inptr[0] - inptr[7];
171
172 tmp10 = tmp3 + tmp0;
173 tmp11 = tmp2 + tmp1;
174 tmp12 = tmp1 - tmp2;
175 tmp13 = tmp0 - tmp3;
176
177 outptr[ 0] = (DCTELEM) UNFIXH((tmp10 + tmp11) * SIN_1_4);
178 outptr[DCTSIZE*4] = (DCTELEM) UNFIXH((tmp10 - tmp11) * COS_1_4);
179
180 outptr[DCTSIZE*2] = (DCTELEM) UNFIXH(tmp13*COS_1_8 + tmp12*SIN_1_8);
181 outptr[DCTSIZE*6] = (DCTELEM) UNFIXH(tmp13*SIN_1_8 - tmp12*COS_1_8);
182
183 tmp16 = UNFIXO((tmp6 + tmp5) * SIN_1_4);
184 tmp15 = UNFIXO((tmp6 - tmp5) * COS_1_4);
185
186 OVERSHIFT(tmp4);
187 OVERSHIFT(tmp7);
188
189 /* tmp4, tmp7, tmp15, tmp16 are overscaled by OVERSCALE */
190
191 tmp14 = tmp4 + tmp15;
192 tmp25 = tmp4 - tmp15;
193 tmp26 = tmp7 - tmp16;
194 tmp17 = tmp7 + tmp16;
195
196 outptr[DCTSIZE ] = (DCTELEM) UNFIXH(tmp17*OCOS_1_16 + tmp14*OSIN_1_16);
197 outptr[DCTSIZE*7] = (DCTELEM) UNFIXH(tmp17*OCOS_7_16 - tmp14*OSIN_7_16);
198 outptr[DCTSIZE*5] = (DCTELEM) UNFIXH(tmp26*OCOS_5_16 + tmp25*OSIN_5_16);
199 outptr[DCTSIZE*3] = (DCTELEM) UNFIXH(tmp26*OCOS_3_16 - tmp25*OSIN_3_16);
200
201 inptr += DCTSIZE; /* advance inptr to next row */
202 outptr++; /* advance outptr to next column */
203 }
204 /* end of pass; in case it was pass 1, set up for pass 2 */
205 inptr = workspace;
206 outptr = data;
207 }
Thomas G. Lane2cbeb8a1991-10-07 00:00:00 +0000208}