blob: 7319bdfffc9cdcb7ab1f4e9d9318dd0a1bafceaf [file] [log] [blame]
Matthieu Delahaye197fc092014-01-28 16:06:57 -06001#include "rs_idct.h"
2#include "rs_allocation.rsh"
3#include <string.h>
4
5static void idct4_1d(const int16_t *input, int16_t *output) {
6 int16_t step[4];
7 int temp1, temp2;
8 // stage 1
9 temp1 = (input[0] + input[2]) * cospi_16_64;
10 temp2 = (input[0] - input[2]) * cospi_16_64;
11 step[0] = dct_const_round_shift(temp1);
12 step[1] = dct_const_round_shift(temp2);
13 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
14 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
15 step[2] = dct_const_round_shift(temp1);
16 step[3] = dct_const_round_shift(temp2);
17
18 // stage 2
19 output[0] = step[0] + step[3];
20 output[1] = step[1] + step[2];
21 output[2] = step[1] - step[2];
22 output[3] = step[0] - step[3];
23}
24
Jason Samsd22e2e22014-02-11 16:59:22 -080025static void idct4x4_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -060026 int i, j;
27 int a1;
28 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
29 * cospi_16_64);
30 out = dct_const_round_shift(out * cospi_16_64);
31 a1 = ROUND_POWER_OF_TWO(out, 4);
32
33 uint8_t result;
34 for (i = 0; i < 4; ++i) {
35 for (j = 0; j < 4; ++j) {
36 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
37 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
38 }
39 }
40}
41
Jason Samsd22e2e22014-02-11 16:59:22 -080042static void idct4x4_16(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -060043 int16_t out[4 * 4];
44 int16_t *outptr = out;
45 int i, j;
46 int16_t temp_in[4], temp_out[4];
47
48 int16_t in[4 * 4];
49 int16_t *inptr = in;
50 for (i = 0; i < 4; ++i) {
51 for (j = 0; j < 4; ++j) {
52 in[j + i * 4] = rsGetElementAt_short(input, j + xoff, i + yoff);
53 }
54 }
55
56 // Rows
57 for (i = 0; i < 4; ++i) {
58 idct4_1d(inptr, outptr);
59 inptr += 4;
60 outptr += 4;
61 }
62
63 // Columns
64 uint8_t result;
65 for (i = 0; i < 4; ++i) {
66 for (j = 0; j < 4; ++j) {
67 temp_in[j] = out[j * 4 + i];
68 }
69 idct4_1d(temp_in, temp_out);
70 for (j = 0; j < 4; ++j) {
71 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
72 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
73 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
74 }
75 }
76}
77
78static void idct8_1d(const int16_t *input, int16_t *output) {
79 int16_t step1[8], step2[8];
80 int temp1, temp2;
81 // stage 1
82 step1[0] = input[0];
83 step1[2] = input[4];
84 step1[1] = input[2];
85 step1[3] = input[6];
86 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
87 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
88 step1[4] = dct_const_round_shift(temp1);
89 step1[7] = dct_const_round_shift(temp2);
90 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
91 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
92 step1[5] = dct_const_round_shift(temp1);
93 step1[6] = dct_const_round_shift(temp2);
94
95 // stage 2 & stage 3 - even half
96 idct4_1d(step1, step1);
97
98 // stage 2 - odd half
99 step2[4] = step1[4] + step1[5];
100 step2[5] = step1[4] - step1[5];
101 step2[6] = -step1[6] + step1[7];
102 step2[7] = step1[6] + step1[7];
103
104 // stage 3 -odd half
105 step1[4] = step2[4];
106 temp1 = (step2[6] - step2[5]) * cospi_16_64;
107 temp2 = (step2[5] + step2[6]) * cospi_16_64;
108 step1[5] = dct_const_round_shift(temp1);
109 step1[6] = dct_const_round_shift(temp2);
110 step1[7] = step2[7];
111
112 // stage 4
113 output[0] = step1[0] + step1[7];
114 output[1] = step1[1] + step1[6];
115 output[2] = step1[2] + step1[5];
116 output[3] = step1[3] + step1[4];
117 output[4] = step1[3] - step1[4];
118 output[5] = step1[2] - step1[5];
119 output[6] = step1[1] - step1[6];
120 output[7] = step1[0] - step1[7];
121}
122
Jason Samsd22e2e22014-02-11 16:59:22 -0800123static void idct8x8_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600124 int i, j;
125 int a1;
126 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
127 * cospi_16_64);
128 out = dct_const_round_shift(out * cospi_16_64);
129 a1 = ROUND_POWER_OF_TWO(out, 5);
130
131 uint8_t result;
132 for (i = 0; i < 8; ++i) {
133 for (j = 0; j < 8; ++j) {
134 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
135 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
136 }
137 }
138}
139
Jason Samsd22e2e22014-02-11 16:59:22 -0800140static void idct8x8_10(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600141 int16_t out[8 * 8] = { 0 };
142 int16_t *outptr = out;
143 int i, j;
144 int16_t temp_in[8], temp_out[8];
145
146 int16_t in[8 * 8];
147 int16_t *inptr = in;
148 for (i = 0; i < 8; ++i) {
149 for (j = 0; j < 8; ++j) {
150 in[j + i * 8] = rsGetElementAt_short(input, j + xoff, i + yoff);
151 }
152 }
153
154 // First transform rows
155 // only first 4 row has non-zero coefs
156 for (i = 0; i < 4; ++i) {
157 idct8_1d(inptr, outptr);
158 inptr += 8;
159 outptr += 8;
160 }
161
162 // Then transform columns
163 uint8_t result;
164 for (i = 0; i < 8; ++i) {
165 for (j = 0; j < 8; ++j)
166 temp_in[j] = out[j * 8 + i];
167 idct8_1d(temp_in, temp_out);
168 for (j = 0; j < 8; ++j) {
169 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
170 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
171 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
172 }
173 }
174}
175
Jason Samsd22e2e22014-02-11 16:59:22 -0800176static void idct8x8_64(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600177 int16_t out[8 * 8];
178 int16_t *outptr = out;
179 int i, j;
180 int16_t temp_in[8], temp_out[8];
181
182 int16_t in[8 * 8];
183 int16_t *inptr = in;
184 for (i = 0; i < 8; ++i) {
185 for (j = 0; j < 8; ++j) {
186 in[j + i * 8] = rsGetElementAt_short(input, j + xoff, i + yoff);
187 }
188 }
189
190 // First transform rows
191 for (i = 0; i < 8; ++i) {
192 idct8_1d(inptr, outptr);
193 inptr += 8;
194 outptr += 8;
195 }
196
197 // Then transform columns
198 uint8_t result;
199 for (i = 0; i < 8; ++i) {
200 for (j = 0; j < 8; ++j)
201 temp_in[j] = out[j * 8 + i];
202 idct8_1d(temp_in, temp_out);
203 for (j = 0; j < 8; ++j) {
204 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
205 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
206 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
207 }
208 }
209}
210
211static void idct16_1d(const int16_t *input, int16_t *output) {
212 int16_t step1[16], step2[16];
213 int temp1, temp2;
214
215 // stage 1
216 step1[0] = input[0/2];
217 step1[1] = input[16/2];
218 step1[2] = input[8/2];
219 step1[3] = input[24/2];
220 step1[4] = input[4/2];
221 step1[5] = input[20/2];
222 step1[6] = input[12/2];
223 step1[7] = input[28/2];
224 step1[8] = input[2/2];
225 step1[9] = input[18/2];
226 step1[10] = input[10/2];
227 step1[11] = input[26/2];
228 step1[12] = input[6/2];
229 step1[13] = input[22/2];
230 step1[14] = input[14/2];
231 step1[15] = input[30/2];
232
233 // stage 2
234 step2[0] = step1[0];
235 step2[1] = step1[1];
236 step2[2] = step1[2];
237 step2[3] = step1[3];
238 step2[4] = step1[4];
239 step2[5] = step1[5];
240 step2[6] = step1[6];
241 step2[7] = step1[7];
242
243 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
244 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
245 step2[8] = dct_const_round_shift(temp1);
246 step2[15] = dct_const_round_shift(temp2);
247
248 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
249 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
250 step2[9] = dct_const_round_shift(temp1);
251 step2[14] = dct_const_round_shift(temp2);
252
253 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
254 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
255 step2[10] = dct_const_round_shift(temp1);
256 step2[13] = dct_const_round_shift(temp2);
257
258 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
259 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
260 step2[11] = dct_const_round_shift(temp1);
261 step2[12] = dct_const_round_shift(temp2);
262
263 // stage 3
264 step1[0] = step2[0];
265 step1[1] = step2[1];
266 step1[2] = step2[2];
267 step1[3] = step2[3];
268
269 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
270 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
271 step1[4] = dct_const_round_shift(temp1);
272 step1[7] = dct_const_round_shift(temp2);
273 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
274 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
275 step1[5] = dct_const_round_shift(temp1);
276 step1[6] = dct_const_round_shift(temp2);
277
278 step1[8] = step2[8] + step2[9];
279 step1[9] = step2[8] - step2[9];
280 step1[10] = -step2[10] + step2[11];
281 step1[11] = step2[10] + step2[11];
282 step1[12] = step2[12] + step2[13];
283 step1[13] = step2[12] - step2[13];
284 step1[14] = -step2[14] + step2[15];
285 step1[15] = step2[14] + step2[15];
286
287 // stage 4
288 temp1 = (step1[0] + step1[1]) * cospi_16_64;
289 temp2 = (step1[0] - step1[1]) * cospi_16_64;
290 step2[0] = dct_const_round_shift(temp1);
291 step2[1] = dct_const_round_shift(temp2);
292 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
293 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
294 step2[2] = dct_const_round_shift(temp1);
295 step2[3] = dct_const_round_shift(temp2);
296 step2[4] = step1[4] + step1[5];
297 step2[5] = step1[4] - step1[5];
298 step2[6] = -step1[6] + step1[7];
299 step2[7] = step1[6] + step1[7];
300
301 step2[8] = step1[8];
302 step2[15] = step1[15];
303 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
304 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
305 step2[9] = dct_const_round_shift(temp1);
306 step2[14] = dct_const_round_shift(temp2);
307 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
308 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
309 step2[10] = dct_const_round_shift(temp1);
310 step2[13] = dct_const_round_shift(temp2);
311 step2[11] = step1[11];
312 step2[12] = step1[12];
313
314 // stage 5
315 step1[0] = step2[0] + step2[3];
316 step1[1] = step2[1] + step2[2];
317 step1[2] = step2[1] - step2[2];
318 step1[3] = step2[0] - step2[3];
319 step1[4] = step2[4];
320 temp1 = (step2[6] - step2[5]) * cospi_16_64;
321 temp2 = (step2[5] + step2[6]) * cospi_16_64;
322 step1[5] = dct_const_round_shift(temp1);
323 step1[6] = dct_const_round_shift(temp2);
324 step1[7] = step2[7];
325
326 step1[8] = step2[8] + step2[11];
327 step1[9] = step2[9] + step2[10];
328 step1[10] = step2[9] - step2[10];
329 step1[11] = step2[8] - step2[11];
330 step1[12] = -step2[12] + step2[15];
331 step1[13] = -step2[13] + step2[14];
332 step1[14] = step2[13] + step2[14];
333 step1[15] = step2[12] + step2[15];
334
335 // stage 6
336 step2[0] = step1[0] + step1[7];
337 step2[1] = step1[1] + step1[6];
338 step2[2] = step1[2] + step1[5];
339 step2[3] = step1[3] + step1[4];
340 step2[4] = step1[3] - step1[4];
341 step2[5] = step1[2] - step1[5];
342 step2[6] = step1[1] - step1[6];
343 step2[7] = step1[0] - step1[7];
344 step2[8] = step1[8];
345 step2[9] = step1[9];
346 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
347 temp2 = (step1[10] + step1[13]) * cospi_16_64;
348 step2[10] = dct_const_round_shift(temp1);
349 step2[13] = dct_const_round_shift(temp2);
350 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
351 temp2 = (step1[11] + step1[12]) * cospi_16_64;
352 step2[11] = dct_const_round_shift(temp1);
353 step2[12] = dct_const_round_shift(temp2);
354 step2[14] = step1[14];
355 step2[15] = step1[15];
356
357 // stage 7
358 output[0] = step2[0] + step2[15];
359 output[1] = step2[1] + step2[14];
360 output[2] = step2[2] + step2[13];
361 output[3] = step2[3] + step2[12];
362 output[4] = step2[4] + step2[11];
363 output[5] = step2[5] + step2[10];
364 output[6] = step2[6] + step2[9];
365 output[7] = step2[7] + step2[8];
366 output[8] = step2[7] - step2[8];
367 output[9] = step2[6] - step2[9];
368 output[10] = step2[5] - step2[10];
369 output[11] = step2[4] - step2[11];
370 output[12] = step2[3] - step2[12];
371 output[13] = step2[2] - step2[13];
372 output[14] = step2[1] - step2[14];
373 output[15] = step2[0] - step2[15];
374}
375
Jason Samsd22e2e22014-02-11 16:59:22 -0800376static void idct16x16_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600377 int i, j;
378 int a1;
379 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
380 * cospi_16_64);
381 out = dct_const_round_shift(out * cospi_16_64);
382 a1 = ROUND_POWER_OF_TWO(out, 6);
383
384 uint8_t result;
385 for (i = 0; i < 16; ++i) {
386 for (j = 0; j < 16; ++j) {
387 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
388 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
389 }
390 }
391}
392
Jason Samsd22e2e22014-02-11 16:59:22 -0800393static void idct16x16_10(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600394 int16_t out[16 * 16] = { 0 };
395 int16_t *outptr = out;
396 int i, j;
397 int16_t temp_in[16], temp_out[16];
398
399 int16_t in[16 * 16];
400 int16_t *inptr = in;
401 for (i = 0; i < 16; ++i) {
402 for (j = 0; j < 16; ++j) {
403 in[j + i * 16] = rsGetElementAt_short(input, j + xoff, i + yoff);
404 }
405 }
406
407 // First transform rows. Since all non-zero dct coefficients are in
408 // upper-left 4x4 area, we only need to calculate first 4 rows here.
409 for (i = 0; i < 4; ++i) {
410 idct16_1d(inptr, outptr);
411 inptr += 16;
412 outptr += 16;
413 }
414
415 // Then transform columns
416 uint8_t result;
417 for (i = 0; i < 16; ++i) {
418 for (j = 0; j < 16; ++j)
419 temp_in[j] = out[j * 16 + i];
420 idct16_1d(temp_in, temp_out);
421 for (j = 0; j < 16; ++j) {
422 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
423 + rsGetElementAt_uchar(dest, i + xoff,j + yoff));
424 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
425 }
426 }
427}
428
Jason Samsd22e2e22014-02-11 16:59:22 -0800429static void idct16x16_256(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600430 int16_t out[16 * 16];
431 int16_t *outptr = out;
432 int i, j;
433 int16_t temp_in[16], temp_out[16];
434
435 int16_t in[16 * 16];
436 int16_t *inptr = in;
437 for (i = 0; i < 16; ++i) {
438 for (j = 0; j < 16; ++j) {
439 in[j + i * 16] = rsGetElementAt_short(input, j + xoff, i + yoff);
440 }
441 }
442
443 // First transform rows
444 for (i = 0; i < 16; ++i) {
445 idct16_1d(inptr, outptr);
446 inptr += 16;
447 outptr += 16;
448 }
449
450 // Then transform columns
451 uint8_t result;
452 for (i = 0; i < 16; ++i) {
453 for (j = 0; j < 16; ++j)
454 temp_in[j] = out[j * 16 + i];
455 idct16_1d(temp_in, temp_out);
456 for (j = 0; j < 16; ++j) {
457 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
458 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
459 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
460 }
461 }
462}
463
464static void idct32_1d(const int16_t *input, int16_t *output) {
465 int16_t step1[32], step2[32];
466 int temp1, temp2;
467
468 // stage 1
469 step1[0] = input[0];
470 step1[1] = input[16];
471 step1[2] = input[8];
472 step1[3] = input[24];
473 step1[4] = input[4];
474 step1[5] = input[20];
475 step1[6] = input[12];
476 step1[7] = input[28];
477 step1[8] = input[2];
478 step1[9] = input[18];
479 step1[10] = input[10];
480 step1[11] = input[26];
481 step1[12] = input[6];
482 step1[13] = input[22];
483 step1[14] = input[14];
484 step1[15] = input[30];
485
486 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
487 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
488 step1[16] = dct_const_round_shift(temp1);
489 step1[31] = dct_const_round_shift(temp2);
490
491 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
492 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
493 step1[17] = dct_const_round_shift(temp1);
494 step1[30] = dct_const_round_shift(temp2);
495
496 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
497 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
498 step1[18] = dct_const_round_shift(temp1);
499 step1[29] = dct_const_round_shift(temp2);
500
501 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
502 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
503 step1[19] = dct_const_round_shift(temp1);
504 step1[28] = dct_const_round_shift(temp2);
505
506 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
507 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
508 step1[20] = dct_const_round_shift(temp1);
509 step1[27] = dct_const_round_shift(temp2);
510
511 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
512 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
513 step1[21] = dct_const_round_shift(temp1);
514 step1[26] = dct_const_round_shift(temp2);
515
516 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
517 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
518 step1[22] = dct_const_round_shift(temp1);
519 step1[25] = dct_const_round_shift(temp2);
520
521 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
522 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
523 step1[23] = dct_const_round_shift(temp1);
524 step1[24] = dct_const_round_shift(temp2);
525
526 // stage 2
527 step2[0] = step1[0];
528 step2[1] = step1[1];
529 step2[2] = step1[2];
530 step2[3] = step1[3];
531 step2[4] = step1[4];
532 step2[5] = step1[5];
533 step2[6] = step1[6];
534 step2[7] = step1[7];
535
536 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
537 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
538 step2[8] = dct_const_round_shift(temp1);
539 step2[15] = dct_const_round_shift(temp2);
540
541 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
542 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
543 step2[9] = dct_const_round_shift(temp1);
544 step2[14] = dct_const_round_shift(temp2);
545
546 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
547 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
548 step2[10] = dct_const_round_shift(temp1);
549 step2[13] = dct_const_round_shift(temp2);
550
551 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
552 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
553 step2[11] = dct_const_round_shift(temp1);
554 step2[12] = dct_const_round_shift(temp2);
555
556 step2[16] = step1[16] + step1[17];
557 step2[17] = step1[16] - step1[17];
558 step2[18] = -step1[18] + step1[19];
559 step2[19] = step1[18] + step1[19];
560 step2[20] = step1[20] + step1[21];
561 step2[21] = step1[20] - step1[21];
562 step2[22] = -step1[22] + step1[23];
563 step2[23] = step1[22] + step1[23];
564 step2[24] = step1[24] + step1[25];
565 step2[25] = step1[24] - step1[25];
566 step2[26] = -step1[26] + step1[27];
567 step2[27] = step1[26] + step1[27];
568 step2[28] = step1[28] + step1[29];
569 step2[29] = step1[28] - step1[29];
570 step2[30] = -step1[30] + step1[31];
571 step2[31] = step1[30] + step1[31];
572
573 // stage 3
574 step1[0] = step2[0];
575 step1[1] = step2[1];
576 step1[2] = step2[2];
577 step1[3] = step2[3];
578
579 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
580 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
581 step1[4] = dct_const_round_shift(temp1);
582 step1[7] = dct_const_round_shift(temp2);
583 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
584 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
585 step1[5] = dct_const_round_shift(temp1);
586 step1[6] = dct_const_round_shift(temp2);
587
588 step1[8] = step2[8] + step2[9];
589 step1[9] = step2[8] - step2[9];
590 step1[10] = -step2[10] + step2[11];
591 step1[11] = step2[10] + step2[11];
592 step1[12] = step2[12] + step2[13];
593 step1[13] = step2[12] - step2[13];
594 step1[14] = -step2[14] + step2[15];
595 step1[15] = step2[14] + step2[15];
596
597 step1[16] = step2[16];
598 step1[31] = step2[31];
599 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
600 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
601 step1[17] = dct_const_round_shift(temp1);
602 step1[30] = dct_const_round_shift(temp2);
603 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
604 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
605 step1[18] = dct_const_round_shift(temp1);
606 step1[29] = dct_const_round_shift(temp2);
607 step1[19] = step2[19];
608 step1[20] = step2[20];
609 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
610 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
611 step1[21] = dct_const_round_shift(temp1);
612 step1[26] = dct_const_round_shift(temp2);
613 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
614 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
615 step1[22] = dct_const_round_shift(temp1);
616 step1[25] = dct_const_round_shift(temp2);
617 step1[23] = step2[23];
618 step1[24] = step2[24];
619 step1[27] = step2[27];
620 step1[28] = step2[28];
621
622 // stage 4
623 temp1 = (step1[0] + step1[1]) * cospi_16_64;
624 temp2 = (step1[0] - step1[1]) * cospi_16_64;
625 step2[0] = dct_const_round_shift(temp1);
626 step2[1] = dct_const_round_shift(temp2);
627 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
628 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
629 step2[2] = dct_const_round_shift(temp1);
630 step2[3] = dct_const_round_shift(temp2);
631 step2[4] = step1[4] + step1[5];
632 step2[5] = step1[4] - step1[5];
633 step2[6] = -step1[6] + step1[7];
634 step2[7] = step1[6] + step1[7];
635
636 step2[8] = step1[8];
637 step2[15] = step1[15];
638 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
639 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
640 step2[9] = dct_const_round_shift(temp1);
641 step2[14] = dct_const_round_shift(temp2);
642 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
643 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
644 step2[10] = dct_const_round_shift(temp1);
645 step2[13] = dct_const_round_shift(temp2);
646 step2[11] = step1[11];
647 step2[12] = step1[12];
648
649 step2[16] = step1[16] + step1[19];
650 step2[17] = step1[17] + step1[18];
651 step2[18] = step1[17] - step1[18];
652 step2[19] = step1[16] - step1[19];
653 step2[20] = -step1[20] + step1[23];
654 step2[21] = -step1[21] + step1[22];
655 step2[22] = step1[21] + step1[22];
656 step2[23] = step1[20] + step1[23];
657
658 step2[24] = step1[24] + step1[27];
659 step2[25] = step1[25] + step1[26];
660 step2[26] = step1[25] - step1[26];
661 step2[27] = step1[24] - step1[27];
662 step2[28] = -step1[28] + step1[31];
663 step2[29] = -step1[29] + step1[30];
664 step2[30] = step1[29] + step1[30];
665 step2[31] = step1[28] + step1[31];
666
667 // stage 5
668 step1[0] = step2[0] + step2[3];
669 step1[1] = step2[1] + step2[2];
670 step1[2] = step2[1] - step2[2];
671 step1[3] = step2[0] - step2[3];
672 step1[4] = step2[4];
673 temp1 = (step2[6] - step2[5]) * cospi_16_64;
674 temp2 = (step2[5] + step2[6]) * cospi_16_64;
675 step1[5] = dct_const_round_shift(temp1);
676 step1[6] = dct_const_round_shift(temp2);
677 step1[7] = step2[7];
678
679 step1[8] = step2[8] + step2[11];
680 step1[9] = step2[9] + step2[10];
681 step1[10] = step2[9] - step2[10];
682 step1[11] = step2[8] - step2[11];
683 step1[12] = -step2[12] + step2[15];
684 step1[13] = -step2[13] + step2[14];
685 step1[14] = step2[13] + step2[14];
686 step1[15] = step2[12] + step2[15];
687
688 step1[16] = step2[16];
689 step1[17] = step2[17];
690 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
691 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
692 step1[18] = dct_const_round_shift(temp1);
693 step1[29] = dct_const_round_shift(temp2);
694 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
695 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
696 step1[19] = dct_const_round_shift(temp1);
697 step1[28] = dct_const_round_shift(temp2);
698 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
699 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
700 step1[20] = dct_const_round_shift(temp1);
701 step1[27] = dct_const_round_shift(temp2);
702 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
703 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
704 step1[21] = dct_const_round_shift(temp1);
705 step1[26] = dct_const_round_shift(temp2);
706 step1[22] = step2[22];
707 step1[23] = step2[23];
708 step1[24] = step2[24];
709 step1[25] = step2[25];
710 step1[30] = step2[30];
711 step1[31] = step2[31];
712
713 // stage 6
714 step2[0] = step1[0] + step1[7];
715 step2[1] = step1[1] + step1[6];
716 step2[2] = step1[2] + step1[5];
717 step2[3] = step1[3] + step1[4];
718 step2[4] = step1[3] - step1[4];
719 step2[5] = step1[2] - step1[5];
720 step2[6] = step1[1] - step1[6];
721 step2[7] = step1[0] - step1[7];
722 step2[8] = step1[8];
723 step2[9] = step1[9];
724 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
725 temp2 = (step1[10] + step1[13]) * cospi_16_64;
726 step2[10] = dct_const_round_shift(temp1);
727 step2[13] = dct_const_round_shift(temp2);
728 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
729 temp2 = (step1[11] + step1[12]) * cospi_16_64;
730 step2[11] = dct_const_round_shift(temp1);
731 step2[12] = dct_const_round_shift(temp2);
732 step2[14] = step1[14];
733 step2[15] = step1[15];
734
735 step2[16] = step1[16] + step1[23];
736 step2[17] = step1[17] + step1[22];
737 step2[18] = step1[18] + step1[21];
738 step2[19] = step1[19] + step1[20];
739 step2[20] = step1[19] - step1[20];
740 step2[21] = step1[18] - step1[21];
741 step2[22] = step1[17] - step1[22];
742 step2[23] = step1[16] - step1[23];
743
744 step2[24] = -step1[24] + step1[31];
745 step2[25] = -step1[25] + step1[30];
746 step2[26] = -step1[26] + step1[29];
747 step2[27] = -step1[27] + step1[28];
748 step2[28] = step1[27] + step1[28];
749 step2[29] = step1[26] + step1[29];
750 step2[30] = step1[25] + step1[30];
751 step2[31] = step1[24] + step1[31];
752
753 // stage 7
754 step1[0] = step2[0] + step2[15];
755 step1[1] = step2[1] + step2[14];
756 step1[2] = step2[2] + step2[13];
757 step1[3] = step2[3] + step2[12];
758 step1[4] = step2[4] + step2[11];
759 step1[5] = step2[5] + step2[10];
760 step1[6] = step2[6] + step2[9];
761 step1[7] = step2[7] + step2[8];
762 step1[8] = step2[7] - step2[8];
763 step1[9] = step2[6] - step2[9];
764 step1[10] = step2[5] - step2[10];
765 step1[11] = step2[4] - step2[11];
766 step1[12] = step2[3] - step2[12];
767 step1[13] = step2[2] - step2[13];
768 step1[14] = step2[1] - step2[14];
769 step1[15] = step2[0] - step2[15];
770
771 step1[16] = step2[16];
772 step1[17] = step2[17];
773 step1[18] = step2[18];
774 step1[19] = step2[19];
775 temp1 = (-step2[20] + step2[27]) * cospi_16_64;
776 temp2 = (step2[20] + step2[27]) * cospi_16_64;
777 step1[20] = dct_const_round_shift(temp1);
778 step1[27] = dct_const_round_shift(temp2);
779 temp1 = (-step2[21] + step2[26]) * cospi_16_64;
780 temp2 = (step2[21] + step2[26]) * cospi_16_64;
781 step1[21] = dct_const_round_shift(temp1);
782 step1[26] = dct_const_round_shift(temp2);
783 temp1 = (-step2[22] + step2[25]) * cospi_16_64;
784 temp2 = (step2[22] + step2[25]) * cospi_16_64;
785 step1[22] = dct_const_round_shift(temp1);
786 step1[25] = dct_const_round_shift(temp2);
787 temp1 = (-step2[23] + step2[24]) * cospi_16_64;
788 temp2 = (step2[23] + step2[24]) * cospi_16_64;
789 step1[23] = dct_const_round_shift(temp1);
790 step1[24] = dct_const_round_shift(temp2);
791 step1[28] = step2[28];
792 step1[29] = step2[29];
793 step1[30] = step2[30];
794 step1[31] = step2[31];
795
796 // final stage
797 output[0] = step1[0] + step1[31];
798 output[1] = step1[1] + step1[30];
799 output[2] = step1[2] + step1[29];
800 output[3] = step1[3] + step1[28];
801 output[4] = step1[4] + step1[27];
802 output[5] = step1[5] + step1[26];
803 output[6] = step1[6] + step1[25];
804 output[7] = step1[7] + step1[24];
805 output[8] = step1[8] + step1[23];
806 output[9] = step1[9] + step1[22];
807 output[10] = step1[10] + step1[21];
808 output[11] = step1[11] + step1[20];
809 output[12] = step1[12] + step1[19];
810 output[13] = step1[13] + step1[18];
811 output[14] = step1[14] + step1[17];
812 output[15] = step1[15] + step1[16];
813 output[16] = step1[15] - step1[16];
814 output[17] = step1[14] - step1[17];
815 output[18] = step1[13] - step1[18];
816 output[19] = step1[12] - step1[19];
817 output[20] = step1[11] - step1[20];
818 output[21] = step1[10] - step1[21];
819 output[22] = step1[9] - step1[22];
820 output[23] = step1[8] - step1[23];
821 output[24] = step1[7] - step1[24];
822 output[25] = step1[6] - step1[25];
823 output[26] = step1[5] - step1[26];
824 output[27] = step1[4] - step1[27];
825 output[28] = step1[3] - step1[28];
826 output[29] = step1[2] - step1[29];
827 output[30] = step1[1] - step1[30];
828 output[31] = step1[0] - step1[31];
829}
830
Jason Samsd22e2e22014-02-11 16:59:22 -0800831static void idct32x32_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600832 int i, j;
833 int a1;
834 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
835 * cospi_16_64);
836 out = dct_const_round_shift(out * cospi_16_64);
837 a1 = ROUND_POWER_OF_TWO(out, 6);
838 uint8_t result;
839 for (i = 0; i < 32; ++i) {
840 for (j = 0; j < 32; ++j) {
841 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
842 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
843 }
844 }
845}
846
Jason Samsd22e2e22014-02-11 16:59:22 -0800847static void idct32x32_34(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600848 int16_t out[32 * 32] = { 0 };
849 int16_t *outptr = out;
850 int i, j;
851 int16_t temp_in[32], temp_out[32];
852
853 int16_t in[32 * 32];
854 int16_t *inptr = in;
855 for (i = 0; i < 32; ++i) {
856 for (j = 0; j < 32; ++j) {
857 in[j + i * 32] = rsGetElementAt_short(input, j + xoff, i + yoff);
858 }
859 }
860
861 // Rows
862 // only upper-left 8x8 has non-zero coeff
863 for (i = 0; i < 8; ++i) {
864 idct32_1d(inptr, outptr);
865 inptr += 32;
866 outptr += 32;
867 }
868
869 // Columns
870 uint8_t result;
871 for (i = 0; i < 32; ++i) {
872 for (j = 0; j < 32; ++j)
873 temp_in[j] = out[j * 32 + i];
874 idct32_1d(temp_in, temp_out);
875 for (j = 0; j < 32; ++j) {
876 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
877 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
878 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
879 }
880 }
881}
882
Jason Samsd22e2e22014-02-11 16:59:22 -0800883static void idct32x32_1024(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600884 int16_t out[32 * 32];
885 int16_t *outptr = out;
886 int i, j;
887 int16_t temp_in[32], temp_out[32];
888
889 int16_t in[32 * 32];
890 int16_t *inptr = in;
891 for (i = 0; i < 32; ++i) {
892 for (j = 0; j < 32; ++j) {
893 in[j + i * 32] = rsGetElementAt_short(input, j + xoff, i + yoff);
894 }
895 }
896
897 // Rows
898 for (i = 0; i < 32; ++i) {
899 int16_t zero_coeff[16];
900 for (j = 0; j < 16; ++j)
901 zero_coeff[j] = inptr[2 * j] | inptr[2 * j + 1];
902 for (j = 0; j < 8; ++j)
903 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
904 for (j = 0; j < 4; ++j)
905 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
906 for (j = 0; j < 2; ++j)
907 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
908
909 if (zero_coeff[0] | zero_coeff[1])
910 idct32_1d(inptr, outptr);
911 else
912 memset(outptr, 0, sizeof(int16_t) * 32);
913 inptr += 32;
914 outptr += 32;
915 }
916
917 // Columns
918 uint8_t result;
919 for (i = 0; i < 32; ++i) {
920 for (j = 0; j < 32; ++j)
921 temp_in[j] = out[j * 32 + i];
922 idct32_1d(temp_in, temp_out);
923 for (j = 0; j < 32; ++j) {
924 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
925 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
926 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
927 }
928 }
929}
930
Jason Samsd22e2e22014-02-11 16:59:22 -0800931extern void rsIdct4x4(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600932 int xoff, int yoff) {
933 if (eob > 1) {
934 idct4x4_16(input, dest, xoff, yoff);
935 } else {
936 idct4x4_1(input, dest, xoff, yoff);
937 }
938}
939
Jason Samsd22e2e22014-02-11 16:59:22 -0800940extern void rsIdct8x8(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600941 int xoff, int yoff) {
942 if (eob == 1)
943 // DC only DCT coefficient
944 idct8x8_1(input, dest, xoff, yoff);
945 else if (eob <= 10)
946 idct8x8_10(input, dest, xoff, yoff);
947 else
948 idct8x8_64(input, dest, xoff, yoff);
949}
950
Jason Samsd22e2e22014-02-11 16:59:22 -0800951extern void rsIdct16x16(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600952 int xoff, int yoff) {
953 if (eob == 1)
954 /* DC only DCT coefficient. */
955 idct16x16_1(input, dest, xoff, yoff);
956 else if (eob <= 10)
957 idct16x16_10(input, dest, xoff, yoff);
958 else
959 idct16x16_256(input, dest, xoff, yoff);
960}
961
Jason Samsd22e2e22014-02-11 16:59:22 -0800962extern void rsIdct32x32(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600963 int xoff, int yoff) {
964 if (eob == 1)
965 idct32x32_1(input, dest, xoff, yoff);
966 else if (eob <= 34)
967 // non-zero coeff only in upper-left 8x8
968 idct32x32_34(input, dest, xoff, yoff);
969 else
970 idct32x32_1024(input, dest, xoff, yoff);
971}