blob: b29f0454056b46d9258a0e4c128c628fd137efa2 [file] [log] [blame]
Matthieu Delahaye197fc092014-01-28 16:06:57 -06001#include "rs_idct.h"
2#include "rs_allocation.rsh"
Matthieu Delahaye197fc092014-01-28 16:06:57 -06003
4static void idct4_1d(const int16_t *input, int16_t *output) {
5 int16_t step[4];
6 int temp1, temp2;
7 // stage 1
8 temp1 = (input[0] + input[2]) * cospi_16_64;
9 temp2 = (input[0] - input[2]) * cospi_16_64;
10 step[0] = dct_const_round_shift(temp1);
11 step[1] = dct_const_round_shift(temp2);
12 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
13 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
14 step[2] = dct_const_round_shift(temp1);
15 step[3] = dct_const_round_shift(temp2);
16
17 // stage 2
18 output[0] = step[0] + step[3];
19 output[1] = step[1] + step[2];
20 output[2] = step[1] - step[2];
21 output[3] = step[0] - step[3];
22}
23
Jason Samsd22e2e22014-02-11 16:59:22 -080024static void idct4x4_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -060025 int i, j;
26 int a1;
27 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
28 * cospi_16_64);
29 out = dct_const_round_shift(out * cospi_16_64);
30 a1 = ROUND_POWER_OF_TWO(out, 4);
31
32 uint8_t result;
33 for (i = 0; i < 4; ++i) {
34 for (j = 0; j < 4; ++j) {
35 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
36 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
37 }
38 }
39}
40
Jason Samsd22e2e22014-02-11 16:59:22 -080041static void idct4x4_16(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -060042 int16_t out[4 * 4];
43 int16_t *outptr = out;
44 int i, j;
45 int16_t temp_in[4], temp_out[4];
46
47 int16_t in[4 * 4];
48 int16_t *inptr = in;
49 for (i = 0; i < 4; ++i) {
50 for (j = 0; j < 4; ++j) {
51 in[j + i * 4] = rsGetElementAt_short(input, j + xoff, i + yoff);
52 }
53 }
54
55 // Rows
56 for (i = 0; i < 4; ++i) {
57 idct4_1d(inptr, outptr);
58 inptr += 4;
59 outptr += 4;
60 }
61
62 // Columns
63 uint8_t result;
64 for (i = 0; i < 4; ++i) {
65 for (j = 0; j < 4; ++j) {
66 temp_in[j] = out[j * 4 + i];
67 }
68 idct4_1d(temp_in, temp_out);
69 for (j = 0; j < 4; ++j) {
70 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
71 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
72 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
73 }
74 }
75}
76
77static void idct8_1d(const int16_t *input, int16_t *output) {
78 int16_t step1[8], step2[8];
79 int temp1, temp2;
80 // stage 1
81 step1[0] = input[0];
82 step1[2] = input[4];
83 step1[1] = input[2];
84 step1[3] = input[6];
85 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
86 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
87 step1[4] = dct_const_round_shift(temp1);
88 step1[7] = dct_const_round_shift(temp2);
89 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
90 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
91 step1[5] = dct_const_round_shift(temp1);
92 step1[6] = dct_const_round_shift(temp2);
93
94 // stage 2 & stage 3 - even half
95 idct4_1d(step1, step1);
96
97 // stage 2 - odd half
98 step2[4] = step1[4] + step1[5];
99 step2[5] = step1[4] - step1[5];
100 step2[6] = -step1[6] + step1[7];
101 step2[7] = step1[6] + step1[7];
102
103 // stage 3 -odd half
104 step1[4] = step2[4];
105 temp1 = (step2[6] - step2[5]) * cospi_16_64;
106 temp2 = (step2[5] + step2[6]) * cospi_16_64;
107 step1[5] = dct_const_round_shift(temp1);
108 step1[6] = dct_const_round_shift(temp2);
109 step1[7] = step2[7];
110
111 // stage 4
112 output[0] = step1[0] + step1[7];
113 output[1] = step1[1] + step1[6];
114 output[2] = step1[2] + step1[5];
115 output[3] = step1[3] + step1[4];
116 output[4] = step1[3] - step1[4];
117 output[5] = step1[2] - step1[5];
118 output[6] = step1[1] - step1[6];
119 output[7] = step1[0] - step1[7];
120}
121
Jason Samsd22e2e22014-02-11 16:59:22 -0800122static void idct8x8_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600123 int i, j;
124 int a1;
125 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
126 * cospi_16_64);
127 out = dct_const_round_shift(out * cospi_16_64);
128 a1 = ROUND_POWER_OF_TWO(out, 5);
129
130 uint8_t result;
131 for (i = 0; i < 8; ++i) {
132 for (j = 0; j < 8; ++j) {
133 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
134 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
135 }
136 }
137}
138
Jason Samsd22e2e22014-02-11 16:59:22 -0800139static void idct8x8_10(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600140 int16_t out[8 * 8] = { 0 };
141 int16_t *outptr = out;
142 int i, j;
143 int16_t temp_in[8], temp_out[8];
144
145 int16_t in[8 * 8];
146 int16_t *inptr = in;
147 for (i = 0; i < 8; ++i) {
148 for (j = 0; j < 8; ++j) {
149 in[j + i * 8] = rsGetElementAt_short(input, j + xoff, i + yoff);
150 }
151 }
152
153 // First transform rows
154 // only first 4 row has non-zero coefs
155 for (i = 0; i < 4; ++i) {
156 idct8_1d(inptr, outptr);
157 inptr += 8;
158 outptr += 8;
159 }
160
161 // Then transform columns
162 uint8_t result;
163 for (i = 0; i < 8; ++i) {
164 for (j = 0; j < 8; ++j)
165 temp_in[j] = out[j * 8 + i];
166 idct8_1d(temp_in, temp_out);
167 for (j = 0; j < 8; ++j) {
168 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
169 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
170 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
171 }
172 }
173}
174
Jason Samsd22e2e22014-02-11 16:59:22 -0800175static void idct8x8_64(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600176 int16_t out[8 * 8];
177 int16_t *outptr = out;
178 int i, j;
179 int16_t temp_in[8], temp_out[8];
180
181 int16_t in[8 * 8];
182 int16_t *inptr = in;
183 for (i = 0; i < 8; ++i) {
184 for (j = 0; j < 8; ++j) {
185 in[j + i * 8] = rsGetElementAt_short(input, j + xoff, i + yoff);
186 }
187 }
188
189 // First transform rows
190 for (i = 0; i < 8; ++i) {
191 idct8_1d(inptr, outptr);
192 inptr += 8;
193 outptr += 8;
194 }
195
196 // Then transform columns
197 uint8_t result;
198 for (i = 0; i < 8; ++i) {
199 for (j = 0; j < 8; ++j)
200 temp_in[j] = out[j * 8 + i];
201 idct8_1d(temp_in, temp_out);
202 for (j = 0; j < 8; ++j) {
203 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
204 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
205 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
206 }
207 }
208}
209
210static void idct16_1d(const int16_t *input, int16_t *output) {
211 int16_t step1[16], step2[16];
212 int temp1, temp2;
213
214 // stage 1
215 step1[0] = input[0/2];
216 step1[1] = input[16/2];
217 step1[2] = input[8/2];
218 step1[3] = input[24/2];
219 step1[4] = input[4/2];
220 step1[5] = input[20/2];
221 step1[6] = input[12/2];
222 step1[7] = input[28/2];
223 step1[8] = input[2/2];
224 step1[9] = input[18/2];
225 step1[10] = input[10/2];
226 step1[11] = input[26/2];
227 step1[12] = input[6/2];
228 step1[13] = input[22/2];
229 step1[14] = input[14/2];
230 step1[15] = input[30/2];
231
232 // stage 2
233 step2[0] = step1[0];
234 step2[1] = step1[1];
235 step2[2] = step1[2];
236 step2[3] = step1[3];
237 step2[4] = step1[4];
238 step2[5] = step1[5];
239 step2[6] = step1[6];
240 step2[7] = step1[7];
241
242 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
243 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
244 step2[8] = dct_const_round_shift(temp1);
245 step2[15] = dct_const_round_shift(temp2);
246
247 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
248 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
249 step2[9] = dct_const_round_shift(temp1);
250 step2[14] = dct_const_round_shift(temp2);
251
252 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
253 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
254 step2[10] = dct_const_round_shift(temp1);
255 step2[13] = dct_const_round_shift(temp2);
256
257 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
258 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
259 step2[11] = dct_const_round_shift(temp1);
260 step2[12] = dct_const_round_shift(temp2);
261
262 // stage 3
263 step1[0] = step2[0];
264 step1[1] = step2[1];
265 step1[2] = step2[2];
266 step1[3] = step2[3];
267
268 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
269 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
270 step1[4] = dct_const_round_shift(temp1);
271 step1[7] = dct_const_round_shift(temp2);
272 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
273 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
274 step1[5] = dct_const_round_shift(temp1);
275 step1[6] = dct_const_round_shift(temp2);
276
277 step1[8] = step2[8] + step2[9];
278 step1[9] = step2[8] - step2[9];
279 step1[10] = -step2[10] + step2[11];
280 step1[11] = step2[10] + step2[11];
281 step1[12] = step2[12] + step2[13];
282 step1[13] = step2[12] - step2[13];
283 step1[14] = -step2[14] + step2[15];
284 step1[15] = step2[14] + step2[15];
285
286 // stage 4
287 temp1 = (step1[0] + step1[1]) * cospi_16_64;
288 temp2 = (step1[0] - step1[1]) * cospi_16_64;
289 step2[0] = dct_const_round_shift(temp1);
290 step2[1] = dct_const_round_shift(temp2);
291 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
292 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
293 step2[2] = dct_const_round_shift(temp1);
294 step2[3] = dct_const_round_shift(temp2);
295 step2[4] = step1[4] + step1[5];
296 step2[5] = step1[4] - step1[5];
297 step2[6] = -step1[6] + step1[7];
298 step2[7] = step1[6] + step1[7];
299
300 step2[8] = step1[8];
301 step2[15] = step1[15];
302 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
303 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
304 step2[9] = dct_const_round_shift(temp1);
305 step2[14] = dct_const_round_shift(temp2);
306 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
307 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
308 step2[10] = dct_const_round_shift(temp1);
309 step2[13] = dct_const_round_shift(temp2);
310 step2[11] = step1[11];
311 step2[12] = step1[12];
312
313 // stage 5
314 step1[0] = step2[0] + step2[3];
315 step1[1] = step2[1] + step2[2];
316 step1[2] = step2[1] - step2[2];
317 step1[3] = step2[0] - step2[3];
318 step1[4] = step2[4];
319 temp1 = (step2[6] - step2[5]) * cospi_16_64;
320 temp2 = (step2[5] + step2[6]) * cospi_16_64;
321 step1[5] = dct_const_round_shift(temp1);
322 step1[6] = dct_const_round_shift(temp2);
323 step1[7] = step2[7];
324
325 step1[8] = step2[8] + step2[11];
326 step1[9] = step2[9] + step2[10];
327 step1[10] = step2[9] - step2[10];
328 step1[11] = step2[8] - step2[11];
329 step1[12] = -step2[12] + step2[15];
330 step1[13] = -step2[13] + step2[14];
331 step1[14] = step2[13] + step2[14];
332 step1[15] = step2[12] + step2[15];
333
334 // stage 6
335 step2[0] = step1[0] + step1[7];
336 step2[1] = step1[1] + step1[6];
337 step2[2] = step1[2] + step1[5];
338 step2[3] = step1[3] + step1[4];
339 step2[4] = step1[3] - step1[4];
340 step2[5] = step1[2] - step1[5];
341 step2[6] = step1[1] - step1[6];
342 step2[7] = step1[0] - step1[7];
343 step2[8] = step1[8];
344 step2[9] = step1[9];
345 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
346 temp2 = (step1[10] + step1[13]) * cospi_16_64;
347 step2[10] = dct_const_round_shift(temp1);
348 step2[13] = dct_const_round_shift(temp2);
349 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
350 temp2 = (step1[11] + step1[12]) * cospi_16_64;
351 step2[11] = dct_const_round_shift(temp1);
352 step2[12] = dct_const_round_shift(temp2);
353 step2[14] = step1[14];
354 step2[15] = step1[15];
355
356 // stage 7
357 output[0] = step2[0] + step2[15];
358 output[1] = step2[1] + step2[14];
359 output[2] = step2[2] + step2[13];
360 output[3] = step2[3] + step2[12];
361 output[4] = step2[4] + step2[11];
362 output[5] = step2[5] + step2[10];
363 output[6] = step2[6] + step2[9];
364 output[7] = step2[7] + step2[8];
365 output[8] = step2[7] - step2[8];
366 output[9] = step2[6] - step2[9];
367 output[10] = step2[5] - step2[10];
368 output[11] = step2[4] - step2[11];
369 output[12] = step2[3] - step2[12];
370 output[13] = step2[2] - step2[13];
371 output[14] = step2[1] - step2[14];
372 output[15] = step2[0] - step2[15];
373}
374
Jason Samsd22e2e22014-02-11 16:59:22 -0800375static void idct16x16_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600376 int i, j;
377 int a1;
378 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
379 * cospi_16_64);
380 out = dct_const_round_shift(out * cospi_16_64);
381 a1 = ROUND_POWER_OF_TWO(out, 6);
382
383 uint8_t result;
384 for (i = 0; i < 16; ++i) {
385 for (j = 0; j < 16; ++j) {
386 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
387 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
388 }
389 }
390}
391
Jason Samsd22e2e22014-02-11 16:59:22 -0800392static void idct16x16_10(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600393 int16_t out[16 * 16] = { 0 };
394 int16_t *outptr = out;
395 int i, j;
396 int16_t temp_in[16], temp_out[16];
397
398 int16_t in[16 * 16];
399 int16_t *inptr = in;
400 for (i = 0; i < 16; ++i) {
401 for (j = 0; j < 16; ++j) {
402 in[j + i * 16] = rsGetElementAt_short(input, j + xoff, i + yoff);
403 }
404 }
405
406 // First transform rows. Since all non-zero dct coefficients are in
407 // upper-left 4x4 area, we only need to calculate first 4 rows here.
408 for (i = 0; i < 4; ++i) {
409 idct16_1d(inptr, outptr);
410 inptr += 16;
411 outptr += 16;
412 }
413
414 // Then transform columns
415 uint8_t result;
416 for (i = 0; i < 16; ++i) {
417 for (j = 0; j < 16; ++j)
418 temp_in[j] = out[j * 16 + i];
419 idct16_1d(temp_in, temp_out);
420 for (j = 0; j < 16; ++j) {
421 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
422 + rsGetElementAt_uchar(dest, i + xoff,j + yoff));
423 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
424 }
425 }
426}
427
Jason Samsd22e2e22014-02-11 16:59:22 -0800428static void idct16x16_256(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600429 int16_t out[16 * 16];
430 int16_t *outptr = out;
431 int i, j;
432 int16_t temp_in[16], temp_out[16];
433
434 int16_t in[16 * 16];
435 int16_t *inptr = in;
436 for (i = 0; i < 16; ++i) {
437 for (j = 0; j < 16; ++j) {
438 in[j + i * 16] = rsGetElementAt_short(input, j + xoff, i + yoff);
439 }
440 }
441
442 // First transform rows
443 for (i = 0; i < 16; ++i) {
444 idct16_1d(inptr, outptr);
445 inptr += 16;
446 outptr += 16;
447 }
448
449 // Then transform columns
450 uint8_t result;
451 for (i = 0; i < 16; ++i) {
452 for (j = 0; j < 16; ++j)
453 temp_in[j] = out[j * 16 + i];
454 idct16_1d(temp_in, temp_out);
455 for (j = 0; j < 16; ++j) {
456 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
457 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
458 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
459 }
460 }
461}
462
463static void idct32_1d(const int16_t *input, int16_t *output) {
464 int16_t step1[32], step2[32];
465 int temp1, temp2;
466
467 // stage 1
468 step1[0] = input[0];
469 step1[1] = input[16];
470 step1[2] = input[8];
471 step1[3] = input[24];
472 step1[4] = input[4];
473 step1[5] = input[20];
474 step1[6] = input[12];
475 step1[7] = input[28];
476 step1[8] = input[2];
477 step1[9] = input[18];
478 step1[10] = input[10];
479 step1[11] = input[26];
480 step1[12] = input[6];
481 step1[13] = input[22];
482 step1[14] = input[14];
483 step1[15] = input[30];
484
485 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
486 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
487 step1[16] = dct_const_round_shift(temp1);
488 step1[31] = dct_const_round_shift(temp2);
489
490 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
491 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
492 step1[17] = dct_const_round_shift(temp1);
493 step1[30] = dct_const_round_shift(temp2);
494
495 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
496 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
497 step1[18] = dct_const_round_shift(temp1);
498 step1[29] = dct_const_round_shift(temp2);
499
500 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
501 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
502 step1[19] = dct_const_round_shift(temp1);
503 step1[28] = dct_const_round_shift(temp2);
504
505 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
506 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
507 step1[20] = dct_const_round_shift(temp1);
508 step1[27] = dct_const_round_shift(temp2);
509
510 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
511 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
512 step1[21] = dct_const_round_shift(temp1);
513 step1[26] = dct_const_round_shift(temp2);
514
515 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
516 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
517 step1[22] = dct_const_round_shift(temp1);
518 step1[25] = dct_const_round_shift(temp2);
519
520 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
521 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
522 step1[23] = dct_const_round_shift(temp1);
523 step1[24] = dct_const_round_shift(temp2);
524
525 // stage 2
526 step2[0] = step1[0];
527 step2[1] = step1[1];
528 step2[2] = step1[2];
529 step2[3] = step1[3];
530 step2[4] = step1[4];
531 step2[5] = step1[5];
532 step2[6] = step1[6];
533 step2[7] = step1[7];
534
535 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
536 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
537 step2[8] = dct_const_round_shift(temp1);
538 step2[15] = dct_const_round_shift(temp2);
539
540 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
541 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
542 step2[9] = dct_const_round_shift(temp1);
543 step2[14] = dct_const_round_shift(temp2);
544
545 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
546 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
547 step2[10] = dct_const_round_shift(temp1);
548 step2[13] = dct_const_round_shift(temp2);
549
550 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
551 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
552 step2[11] = dct_const_round_shift(temp1);
553 step2[12] = dct_const_round_shift(temp2);
554
555 step2[16] = step1[16] + step1[17];
556 step2[17] = step1[16] - step1[17];
557 step2[18] = -step1[18] + step1[19];
558 step2[19] = step1[18] + step1[19];
559 step2[20] = step1[20] + step1[21];
560 step2[21] = step1[20] - step1[21];
561 step2[22] = -step1[22] + step1[23];
562 step2[23] = step1[22] + step1[23];
563 step2[24] = step1[24] + step1[25];
564 step2[25] = step1[24] - step1[25];
565 step2[26] = -step1[26] + step1[27];
566 step2[27] = step1[26] + step1[27];
567 step2[28] = step1[28] + step1[29];
568 step2[29] = step1[28] - step1[29];
569 step2[30] = -step1[30] + step1[31];
570 step2[31] = step1[30] + step1[31];
571
572 // stage 3
573 step1[0] = step2[0];
574 step1[1] = step2[1];
575 step1[2] = step2[2];
576 step1[3] = step2[3];
577
578 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
579 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
580 step1[4] = dct_const_round_shift(temp1);
581 step1[7] = dct_const_round_shift(temp2);
582 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
583 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
584 step1[5] = dct_const_round_shift(temp1);
585 step1[6] = dct_const_round_shift(temp2);
586
587 step1[8] = step2[8] + step2[9];
588 step1[9] = step2[8] - step2[9];
589 step1[10] = -step2[10] + step2[11];
590 step1[11] = step2[10] + step2[11];
591 step1[12] = step2[12] + step2[13];
592 step1[13] = step2[12] - step2[13];
593 step1[14] = -step2[14] + step2[15];
594 step1[15] = step2[14] + step2[15];
595
596 step1[16] = step2[16];
597 step1[31] = step2[31];
598 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
599 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
600 step1[17] = dct_const_round_shift(temp1);
601 step1[30] = dct_const_round_shift(temp2);
602 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
603 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
604 step1[18] = dct_const_round_shift(temp1);
605 step1[29] = dct_const_round_shift(temp2);
606 step1[19] = step2[19];
607 step1[20] = step2[20];
608 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
609 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
610 step1[21] = dct_const_round_shift(temp1);
611 step1[26] = dct_const_round_shift(temp2);
612 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
613 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
614 step1[22] = dct_const_round_shift(temp1);
615 step1[25] = dct_const_round_shift(temp2);
616 step1[23] = step2[23];
617 step1[24] = step2[24];
618 step1[27] = step2[27];
619 step1[28] = step2[28];
620
621 // stage 4
622 temp1 = (step1[0] + step1[1]) * cospi_16_64;
623 temp2 = (step1[0] - step1[1]) * cospi_16_64;
624 step2[0] = dct_const_round_shift(temp1);
625 step2[1] = dct_const_round_shift(temp2);
626 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
627 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
628 step2[2] = dct_const_round_shift(temp1);
629 step2[3] = dct_const_round_shift(temp2);
630 step2[4] = step1[4] + step1[5];
631 step2[5] = step1[4] - step1[5];
632 step2[6] = -step1[6] + step1[7];
633 step2[7] = step1[6] + step1[7];
634
635 step2[8] = step1[8];
636 step2[15] = step1[15];
637 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
638 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
639 step2[9] = dct_const_round_shift(temp1);
640 step2[14] = dct_const_round_shift(temp2);
641 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
642 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
643 step2[10] = dct_const_round_shift(temp1);
644 step2[13] = dct_const_round_shift(temp2);
645 step2[11] = step1[11];
646 step2[12] = step1[12];
647
648 step2[16] = step1[16] + step1[19];
649 step2[17] = step1[17] + step1[18];
650 step2[18] = step1[17] - step1[18];
651 step2[19] = step1[16] - step1[19];
652 step2[20] = -step1[20] + step1[23];
653 step2[21] = -step1[21] + step1[22];
654 step2[22] = step1[21] + step1[22];
655 step2[23] = step1[20] + step1[23];
656
657 step2[24] = step1[24] + step1[27];
658 step2[25] = step1[25] + step1[26];
659 step2[26] = step1[25] - step1[26];
660 step2[27] = step1[24] - step1[27];
661 step2[28] = -step1[28] + step1[31];
662 step2[29] = -step1[29] + step1[30];
663 step2[30] = step1[29] + step1[30];
664 step2[31] = step1[28] + step1[31];
665
666 // stage 5
667 step1[0] = step2[0] + step2[3];
668 step1[1] = step2[1] + step2[2];
669 step1[2] = step2[1] - step2[2];
670 step1[3] = step2[0] - step2[3];
671 step1[4] = step2[4];
672 temp1 = (step2[6] - step2[5]) * cospi_16_64;
673 temp2 = (step2[5] + step2[6]) * cospi_16_64;
674 step1[5] = dct_const_round_shift(temp1);
675 step1[6] = dct_const_round_shift(temp2);
676 step1[7] = step2[7];
677
678 step1[8] = step2[8] + step2[11];
679 step1[9] = step2[9] + step2[10];
680 step1[10] = step2[9] - step2[10];
681 step1[11] = step2[8] - step2[11];
682 step1[12] = -step2[12] + step2[15];
683 step1[13] = -step2[13] + step2[14];
684 step1[14] = step2[13] + step2[14];
685 step1[15] = step2[12] + step2[15];
686
687 step1[16] = step2[16];
688 step1[17] = step2[17];
689 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
690 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
691 step1[18] = dct_const_round_shift(temp1);
692 step1[29] = dct_const_round_shift(temp2);
693 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
694 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
695 step1[19] = dct_const_round_shift(temp1);
696 step1[28] = dct_const_round_shift(temp2);
697 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
698 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
699 step1[20] = dct_const_round_shift(temp1);
700 step1[27] = dct_const_round_shift(temp2);
701 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
702 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
703 step1[21] = dct_const_round_shift(temp1);
704 step1[26] = dct_const_round_shift(temp2);
705 step1[22] = step2[22];
706 step1[23] = step2[23];
707 step1[24] = step2[24];
708 step1[25] = step2[25];
709 step1[30] = step2[30];
710 step1[31] = step2[31];
711
712 // stage 6
713 step2[0] = step1[0] + step1[7];
714 step2[1] = step1[1] + step1[6];
715 step2[2] = step1[2] + step1[5];
716 step2[3] = step1[3] + step1[4];
717 step2[4] = step1[3] - step1[4];
718 step2[5] = step1[2] - step1[5];
719 step2[6] = step1[1] - step1[6];
720 step2[7] = step1[0] - step1[7];
721 step2[8] = step1[8];
722 step2[9] = step1[9];
723 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
724 temp2 = (step1[10] + step1[13]) * cospi_16_64;
725 step2[10] = dct_const_round_shift(temp1);
726 step2[13] = dct_const_round_shift(temp2);
727 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
728 temp2 = (step1[11] + step1[12]) * cospi_16_64;
729 step2[11] = dct_const_round_shift(temp1);
730 step2[12] = dct_const_round_shift(temp2);
731 step2[14] = step1[14];
732 step2[15] = step1[15];
733
734 step2[16] = step1[16] + step1[23];
735 step2[17] = step1[17] + step1[22];
736 step2[18] = step1[18] + step1[21];
737 step2[19] = step1[19] + step1[20];
738 step2[20] = step1[19] - step1[20];
739 step2[21] = step1[18] - step1[21];
740 step2[22] = step1[17] - step1[22];
741 step2[23] = step1[16] - step1[23];
742
743 step2[24] = -step1[24] + step1[31];
744 step2[25] = -step1[25] + step1[30];
745 step2[26] = -step1[26] + step1[29];
746 step2[27] = -step1[27] + step1[28];
747 step2[28] = step1[27] + step1[28];
748 step2[29] = step1[26] + step1[29];
749 step2[30] = step1[25] + step1[30];
750 step2[31] = step1[24] + step1[31];
751
752 // stage 7
753 step1[0] = step2[0] + step2[15];
754 step1[1] = step2[1] + step2[14];
755 step1[2] = step2[2] + step2[13];
756 step1[3] = step2[3] + step2[12];
757 step1[4] = step2[4] + step2[11];
758 step1[5] = step2[5] + step2[10];
759 step1[6] = step2[6] + step2[9];
760 step1[7] = step2[7] + step2[8];
761 step1[8] = step2[7] - step2[8];
762 step1[9] = step2[6] - step2[9];
763 step1[10] = step2[5] - step2[10];
764 step1[11] = step2[4] - step2[11];
765 step1[12] = step2[3] - step2[12];
766 step1[13] = step2[2] - step2[13];
767 step1[14] = step2[1] - step2[14];
768 step1[15] = step2[0] - step2[15];
769
770 step1[16] = step2[16];
771 step1[17] = step2[17];
772 step1[18] = step2[18];
773 step1[19] = step2[19];
774 temp1 = (-step2[20] + step2[27]) * cospi_16_64;
775 temp2 = (step2[20] + step2[27]) * cospi_16_64;
776 step1[20] = dct_const_round_shift(temp1);
777 step1[27] = dct_const_round_shift(temp2);
778 temp1 = (-step2[21] + step2[26]) * cospi_16_64;
779 temp2 = (step2[21] + step2[26]) * cospi_16_64;
780 step1[21] = dct_const_round_shift(temp1);
781 step1[26] = dct_const_round_shift(temp2);
782 temp1 = (-step2[22] + step2[25]) * cospi_16_64;
783 temp2 = (step2[22] + step2[25]) * cospi_16_64;
784 step1[22] = dct_const_round_shift(temp1);
785 step1[25] = dct_const_round_shift(temp2);
786 temp1 = (-step2[23] + step2[24]) * cospi_16_64;
787 temp2 = (step2[23] + step2[24]) * cospi_16_64;
788 step1[23] = dct_const_round_shift(temp1);
789 step1[24] = dct_const_round_shift(temp2);
790 step1[28] = step2[28];
791 step1[29] = step2[29];
792 step1[30] = step2[30];
793 step1[31] = step2[31];
794
795 // final stage
796 output[0] = step1[0] + step1[31];
797 output[1] = step1[1] + step1[30];
798 output[2] = step1[2] + step1[29];
799 output[3] = step1[3] + step1[28];
800 output[4] = step1[4] + step1[27];
801 output[5] = step1[5] + step1[26];
802 output[6] = step1[6] + step1[25];
803 output[7] = step1[7] + step1[24];
804 output[8] = step1[8] + step1[23];
805 output[9] = step1[9] + step1[22];
806 output[10] = step1[10] + step1[21];
807 output[11] = step1[11] + step1[20];
808 output[12] = step1[12] + step1[19];
809 output[13] = step1[13] + step1[18];
810 output[14] = step1[14] + step1[17];
811 output[15] = step1[15] + step1[16];
812 output[16] = step1[15] - step1[16];
813 output[17] = step1[14] - step1[17];
814 output[18] = step1[13] - step1[18];
815 output[19] = step1[12] - step1[19];
816 output[20] = step1[11] - step1[20];
817 output[21] = step1[10] - step1[21];
818 output[22] = step1[9] - step1[22];
819 output[23] = step1[8] - step1[23];
820 output[24] = step1[7] - step1[24];
821 output[25] = step1[6] - step1[25];
822 output[26] = step1[5] - step1[26];
823 output[27] = step1[4] - step1[27];
824 output[28] = step1[3] - step1[28];
825 output[29] = step1[2] - step1[29];
826 output[30] = step1[1] - step1[30];
827 output[31] = step1[0] - step1[31];
828}
829
Jason Samsd22e2e22014-02-11 16:59:22 -0800830static void idct32x32_1(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600831 int i, j;
832 int a1;
833 int16_t out = dct_const_round_shift(rsGetElementAt_short(input, xoff, yoff)
834 * cospi_16_64);
835 out = dct_const_round_shift(out * cospi_16_64);
836 a1 = ROUND_POWER_OF_TWO(out, 6);
837 uint8_t result;
838 for (i = 0; i < 32; ++i) {
839 for (j = 0; j < 32; ++j) {
840 result = clip_pixel(rsGetElementAt_uchar(dest, j + xoff, i + yoff) + a1);
841 rsSetElementAt_uchar(dest, result, j + xoff, i + yoff);
842 }
843 }
844}
845
Jason Samsd22e2e22014-02-11 16:59:22 -0800846static void idct32x32_34(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600847 int16_t out[32 * 32] = { 0 };
848 int16_t *outptr = out;
849 int i, j;
850 int16_t temp_in[32], temp_out[32];
851
852 int16_t in[32 * 32];
853 int16_t *inptr = in;
854 for (i = 0; i < 32; ++i) {
855 for (j = 0; j < 32; ++j) {
856 in[j + i * 32] = rsGetElementAt_short(input, j + xoff, i + yoff);
857 }
858 }
859
860 // Rows
861 // only upper-left 8x8 has non-zero coeff
862 for (i = 0; i < 8; ++i) {
863 idct32_1d(inptr, outptr);
864 inptr += 32;
865 outptr += 32;
866 }
867
868 // Columns
869 uint8_t result;
870 for (i = 0; i < 32; ++i) {
871 for (j = 0; j < 32; ++j)
872 temp_in[j] = out[j * 32 + i];
873 idct32_1d(temp_in, temp_out);
874 for (j = 0; j < 32; ++j) {
875 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
876 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
877 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
878 }
879 }
880}
881
Jason Samsd22e2e22014-02-11 16:59:22 -0800882static void idct32x32_1024(const rs_allocation input, rs_allocation dest, int xoff, int yoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600883 int16_t out[32 * 32];
884 int16_t *outptr = out;
885 int i, j;
886 int16_t temp_in[32], temp_out[32];
887
888 int16_t in[32 * 32];
889 int16_t *inptr = in;
890 for (i = 0; i < 32; ++i) {
891 for (j = 0; j < 32; ++j) {
892 in[j + i * 32] = rsGetElementAt_short(input, j + xoff, i + yoff);
893 }
894 }
895
896 // Rows
897 for (i = 0; i < 32; ++i) {
898 int16_t zero_coeff[16];
899 for (j = 0; j < 16; ++j)
900 zero_coeff[j] = inptr[2 * j] | inptr[2 * j + 1];
901 for (j = 0; j < 8; ++j)
902 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
903 for (j = 0; j < 4; ++j)
904 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
905 for (j = 0; j < 2; ++j)
906 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
907
Jason Samsc3f27482014-02-13 13:16:43 -0800908 if (zero_coeff[0] | zero_coeff[1]) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600909 idct32_1d(inptr, outptr);
Jason Samsc3f27482014-02-13 13:16:43 -0800910 } else {
911 for (int ct = 0; ct < 32; ct++) {
912 outptr[ct] = 0;
913 }
914 }
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600915 inptr += 32;
916 outptr += 32;
917 }
918
919 // Columns
920 uint8_t result;
921 for (i = 0; i < 32; ++i) {
922 for (j = 0; j < 32; ++j)
923 temp_in[j] = out[j * 32 + i];
924 idct32_1d(temp_in, temp_out);
925 for (j = 0; j < 32; ++j) {
926 result = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
927 + rsGetElementAt_uchar(dest, i + xoff, j + yoff));
928 rsSetElementAt_uchar(dest, result, i + xoff, j + yoff);
929 }
930 }
931}
932
Jason Samsd22e2e22014-02-11 16:59:22 -0800933extern void rsIdct4x4(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600934 int xoff, int yoff) {
935 if (eob > 1) {
936 idct4x4_16(input, dest, xoff, yoff);
937 } else {
938 idct4x4_1(input, dest, xoff, yoff);
939 }
940}
941
Jason Samsd22e2e22014-02-11 16:59:22 -0800942extern void rsIdct8x8(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600943 int xoff, int yoff) {
944 if (eob == 1)
945 // DC only DCT coefficient
946 idct8x8_1(input, dest, xoff, yoff);
947 else if (eob <= 10)
948 idct8x8_10(input, dest, xoff, yoff);
949 else
950 idct8x8_64(input, dest, xoff, yoff);
951}
952
Jason Samsd22e2e22014-02-11 16:59:22 -0800953extern void rsIdct16x16(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600954 int xoff, int yoff) {
955 if (eob == 1)
956 /* DC only DCT coefficient. */
957 idct16x16_1(input, dest, xoff, yoff);
958 else if (eob <= 10)
959 idct16x16_10(input, dest, xoff, yoff);
960 else
961 idct16x16_256(input, dest, xoff, yoff);
962}
963
Jason Samsd22e2e22014-02-11 16:59:22 -0800964extern void rsIdct32x32(const rs_allocation input, rs_allocation dest, int eob,
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600965 int xoff, int yoff) {
966 if (eob == 1)
967 idct32x32_1(input, dest, xoff, yoff);
968 else if (eob <= 34)
969 // non-zero coeff only in upper-left 8x8
970 idct32x32_34(input, dest, xoff, yoff);
971 else
972 idct32x32_1024(input, dest, xoff, yoff);
973}