blob: 0698e1a87787015f42340beef1b5c61b9fef198e [file] [log] [blame]
Matthieu Delahaye197fc092014-01-28 16:06:57 -06001#include "rs_fadst.h"
2#include "rs_allocation.rsh"
3
Jason Samsd22e2e22014-02-11 16:59:22 -08004extern void rsFadst4(const rs_allocation input, rs_allocation output, int32_t xoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -06005 int x0, x1, x2, x3;
6 int s0, s1, s2, s3, s4, s5, s6, s7;
7
8 x0 = rsGetElementAt_short(input, xoff);
9 x1 = rsGetElementAt_short(input, xoff + 1);
10 x2 = rsGetElementAt_short(input, xoff + 2);
11 x3 = rsGetElementAt_short(input, xoff + 3);
12
13 if (!(x0 | x1 | x2 | x3)) {
14 rsSetElementAt_short(output, 0, xoff);
15 rsSetElementAt_short(output, 0, xoff + 1);
16 rsSetElementAt_short(output, 0, xoff + 2);
17 rsSetElementAt_short(output, 0, xoff + 3);
18 return;
19 }
20
21 s0 = sinpi_1_9 * x0;
22 s1 = sinpi_4_9 * x0;
23 s2 = sinpi_2_9 * x1;
24 s3 = sinpi_1_9 * x1;
25 s4 = sinpi_3_9 * x2;
26 s5 = sinpi_4_9 * x3;
27 s6 = sinpi_2_9 * x3;
28 s7 = x0 + x1 - x3;
29
30 x0 = s0 + s2 + s5;
31 x1 = sinpi_3_9 * s7;
32 x2 = s1 - s3 + s6;
33 x3 = s4;
34
35 s0 = x0 + x3;
36 s1 = x1;
37 s2 = x2 - x3;
38 s3 = x2 - x0 + x3;
39
40 rsSetElementAt_short(output, fdct_round_shift(s0), xoff);
41 rsSetElementAt_short(output, fdct_round_shift(s1), xoff + 1);
42 rsSetElementAt_short(output, fdct_round_shift(s2), xoff + 2);
43 rsSetElementAt_short(output, fdct_round_shift(s3), xoff + 3);
44}
45
Jason Samsd22e2e22014-02-11 16:59:22 -080046extern void rsFadst8(const rs_allocation input, rs_allocation output, int32_t xoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -060047 int s0, s1, s2, s3, s4, s5, s6, s7;
48 int16_t outArr[8];
49
50 int x0 = rsGetElementAt_short(input, xoff + 7);
51 int x1 = rsGetElementAt_short(input, xoff);
52 int x2 = rsGetElementAt_short(input, xoff + 5);
53 int x3 = rsGetElementAt_short(input, xoff + 2);
54 int x4 = rsGetElementAt_short(input, xoff + 3);
55 int x5 = rsGetElementAt_short(input, xoff + 4);
56 int x6 = rsGetElementAt_short(input, xoff + 1);
57 int x7 = rsGetElementAt_short(input, xoff + 6);
58
59 // stage 1
60 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
61 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
62 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
63 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
64 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
65 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
66 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
67 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
68
69 x0 = fdct_round_shift(s0 + s4);
70 x1 = fdct_round_shift(s1 + s5);
71 x2 = fdct_round_shift(s2 + s6);
72 x3 = fdct_round_shift(s3 + s7);
73 x4 = fdct_round_shift(s0 - s4);
74 x5 = fdct_round_shift(s1 - s5);
75 x6 = fdct_round_shift(s2 - s6);
76 x7 = fdct_round_shift(s3 - s7);
77
78 // stage 2
79 s0 = x0;
80 s1 = x1;
81 s2 = x2;
82 s3 = x3;
83 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
84 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
85 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
86 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
87
88 x0 = s0 + s2;
89 x1 = s1 + s3;
90 x2 = s0 - s2;
91 x3 = s1 - s3;
92 x4 = fdct_round_shift(s4 + s6);
93 x5 = fdct_round_shift(s5 + s7);
94 x6 = fdct_round_shift(s4 - s6);
95 x7 = fdct_round_shift(s5 - s7);
96
97 // stage 3
98 s2 = cospi_16_64 * (x2 + x3);
99 s3 = cospi_16_64 * (x2 - x3);
100 s6 = cospi_16_64 * (x6 + x7);
101 s7 = cospi_16_64 * (x6 - x7);
102
103 x2 = fdct_round_shift(s2);
104 x3 = fdct_round_shift(s3);
105 x6 = fdct_round_shift(s6);
106 x7 = fdct_round_shift(s7);
107
108 outArr[0] = x0;
109 outArr[1] = - x4;
110 outArr[2] = x6;
111 outArr[3] = - x2;
112 outArr[4] = x3;
113 outArr[5] = - x7;
114 outArr[6] = x5;
115 outArr[7] = - x1;
116
117 int i;
118 for (i = 0; i < 8; ++i) {
119 rsSetElementAt_short(output, outArr[i], xoff + i);
120 }
121}
122
Jason Samsd22e2e22014-02-11 16:59:22 -0800123extern void rsFadst16(const rs_allocation input, rs_allocation output, int32_t xoff) {
Matthieu Delahaye197fc092014-01-28 16:06:57 -0600124 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
125 int16_t outArr[16];
126
127 int x0 = rsGetElementAt_short(input, xoff + 15);
128 int x1 = rsGetElementAt_short(input, xoff);
129 int x2 = rsGetElementAt_short(input, xoff + 13);
130 int x3 = rsGetElementAt_short(input, xoff + 2);
131 int x4 = rsGetElementAt_short(input, xoff + 11);
132 int x5 = rsGetElementAt_short(input, xoff + 4);
133 int x6 = rsGetElementAt_short(input, xoff + 9);
134 int x7 = rsGetElementAt_short(input, xoff + 6);
135 int x8 = rsGetElementAt_short(input, xoff + 7);
136 int x9 = rsGetElementAt_short(input, xoff + 8);
137 int x10 = rsGetElementAt_short(input, xoff + 5);
138 int x11 = rsGetElementAt_short(input, xoff + 10);
139 int x12 = rsGetElementAt_short(input, xoff + 3);
140 int x13 = rsGetElementAt_short(input, xoff + 12);
141 int x14 = rsGetElementAt_short(input, xoff + 1);
142 int x15 = rsGetElementAt_short(input, xoff + 14);
143
144 // stage 1
145 s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
146 s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
147 s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
148 s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
149 s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
150 s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
151 s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
152 s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
153 s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
154 s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
155 s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
156 s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
157 s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
158 s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
159 s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
160 s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
161
162 x0 = fdct_round_shift(s0 + s8);
163 x1 = fdct_round_shift(s1 + s9);
164 x2 = fdct_round_shift(s2 + s10);
165 x3 = fdct_round_shift(s3 + s11);
166 x4 = fdct_round_shift(s4 + s12);
167 x5 = fdct_round_shift(s5 + s13);
168 x6 = fdct_round_shift(s6 + s14);
169 x7 = fdct_round_shift(s7 + s15);
170 x8 = fdct_round_shift(s0 - s8);
171 x9 = fdct_round_shift(s1 - s9);
172 x10 = fdct_round_shift(s2 - s10);
173 x11 = fdct_round_shift(s3 - s11);
174 x12 = fdct_round_shift(s4 - s12);
175 x13 = fdct_round_shift(s5 - s13);
176 x14 = fdct_round_shift(s6 - s14);
177 x15 = fdct_round_shift(s7 - s15);
178
179 // stage 2
180 s0 = x0;
181 s1 = x1;
182 s2 = x2;
183 s3 = x3;
184 s4 = x4;
185 s5 = x5;
186 s6 = x6;
187 s7 = x7;
188 s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
189 s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
190 s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
191 s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
192 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
193 s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
194 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
195 s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
196
197 x0 = s0 + s4;
198 x1 = s1 + s5;
199 x2 = s2 + s6;
200 x3 = s3 + s7;
201 x4 = s0 - s4;
202 x5 = s1 - s5;
203 x6 = s2 - s6;
204 x7 = s3 - s7;
205 x8 = fdct_round_shift(s8 + s12);
206 x9 = fdct_round_shift(s9 + s13);
207 x10 = fdct_round_shift(s10 + s14);
208 x11 = fdct_round_shift(s11 + s15);
209 x12 = fdct_round_shift(s8 - s12);
210 x13 = fdct_round_shift(s9 - s13);
211 x14 = fdct_round_shift(s10 - s14);
212 x15 = fdct_round_shift(s11 - s15);
213
214 // stage 3
215 s0 = x0;
216 s1 = x1;
217 s2 = x2;
218 s3 = x3;
219 s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
220 s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
221 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
222 s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
223 s8 = x8;
224 s9 = x9;
225 s10 = x10;
226 s11 = x11;
227 s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
228 s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
229 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
230 s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
231
232 x0 = s0 + s2;
233 x1 = s1 + s3;
234 x2 = s0 - s2;
235 x3 = s1 - s3;
236 x4 = fdct_round_shift(s4 + s6);
237 x5 = fdct_round_shift(s5 + s7);
238 x6 = fdct_round_shift(s4 - s6);
239 x7 = fdct_round_shift(s5 - s7);
240 x8 = s8 + s10;
241 x9 = s9 + s11;
242 x10 = s8 - s10;
243 x11 = s9 - s11;
244 x12 = fdct_round_shift(s12 + s14);
245 x13 = fdct_round_shift(s13 + s15);
246 x14 = fdct_round_shift(s12 - s14);
247 x15 = fdct_round_shift(s13 - s15);
248
249 // stage 4
250 s2 = (- cospi_16_64) * (x2 + x3);
251 s3 = cospi_16_64 * (x2 - x3);
252 s6 = cospi_16_64 * (x6 + x7);
253 s7 = cospi_16_64 * (- x6 + x7);
254 s10 = cospi_16_64 * (x10 + x11);
255 s11 = cospi_16_64 * (- x10 + x11);
256 s14 = (- cospi_16_64) * (x14 + x15);
257 s15 = cospi_16_64 * (x14 - x15);
258
259 x2 = fdct_round_shift(s2);
260 x3 = fdct_round_shift(s3);
261 x6 = fdct_round_shift(s6);
262 x7 = fdct_round_shift(s7);
263 x10 = fdct_round_shift(s10);
264 x11 = fdct_round_shift(s11);
265 x14 = fdct_round_shift(s14);
266 x15 = fdct_round_shift(s15);
267
268 outArr[0] = x0;
269 outArr[1] = - x8;
270 outArr[2] = x12;
271 outArr[3] = - x4;
272 outArr[4] = x6;
273 outArr[5] = x14;
274 outArr[6] = x10;
275 outArr[7] = x2;
276 outArr[8] = x3;
277 outArr[9] = x11;
278 outArr[10] = x15;
279 outArr[11] = x7;
280 outArr[12] = x5;
281 outArr[13] = - x13;
282 outArr[14] = x9;
283 outArr[15] = - x1;
284
285 int i;
286 for (i = 0; i < 16; ++i) {
287 rsSetElementAt_short(output, outArr[i], xoff + i);
288 }
289}