blob: 1ce54ae47516572e6a0e9439c6c52da7df006ea7 [file] [log] [blame]
fbarchard@google.comca410052012-10-14 06:01:19 +00001/*
fbarchard@google.combb6bddc2012-10-14 06:41:17 +00002 * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
fbarchard@google.comca410052012-10-14 06:01:19 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/row.h"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000012
fbarchard@google.comca410052012-10-14 06:01:19 +000013#ifdef __cplusplus
14namespace libyuv {
15extern "C" {
16#endif
17
18#if !defined(YUV_DISABLE_ASM) && defined(__mips__)
fbarchard@google.com834b7482012-12-11 17:59:26 +000019#ifdef HAS_COPYROW_MIPS
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +000020extern "C" void memcpy_MIPS(uint8* dst, const uint8* src, int count);
21void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
22 memcpy_MIPS(dst, src, count);
23}
fbarchard@google.com834b7482012-12-11 17:59:26 +000024#endif // HAS_COPYROW_MIPS
25#endif // __mips__
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +000026
fbarchard@google.com834b7482012-12-11 17:59:26 +000027// MIPS DSPR2 functions
28#if !defined(YUV_DISABLE_ASM) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
fbarchard@google.comf08ac6b2012-11-15 00:21:14 +000029void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
30 int width) {
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000031 __asm__ __volatile__ (
32 ".set push \n"
33 ".set noreorder \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000034 "srl $t4, %[width], 4 \n" // multiplies of 16
35 "blez $t4, 2f \n"
36 " andi %[width], %[width], 0xf \n" // residual
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000037
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000038 "1: \n"
39 "addiu $t4, $t4, -1 \n"
40 "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
41 "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
42 "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
43 "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
44 "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
45 "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
46 "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
47 "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
48 "addiu %[src_uv], %[src_uv], 32 \n"
49 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
50 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
51 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
52 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
53 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
54 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
55 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
56 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
57 "sw $t9, 0(%[dst_v]) \n"
58 "sw $t0, 0(%[dst_u]) \n"
59 "sw $t1, 4(%[dst_v]) \n"
60 "sw $t2, 4(%[dst_u]) \n"
61 "sw $t3, 8(%[dst_v]) \n"
62 "sw $t5, 8(%[dst_u]) \n"
63 "sw $t6, 12(%[dst_v]) \n"
64 "sw $t7, 12(%[dst_u]) \n"
65 "addiu %[dst_v], %[dst_v], 16 \n"
66 "bgtz $t4, 1b \n"
67 " addiu %[dst_u], %[dst_u], 16 \n"
68
69 "beqz %[width], 3f \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000070 " nop \n"
71
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000072 "2: \n"
73 "lbu $t0, 0(%[src_uv]) \n"
74 "lbu $t1, 1(%[src_uv]) \n"
75 "addiu %[src_uv], %[src_uv], 2 \n"
76 "addiu %[width], %[width], -1 \n"
77 "sb $t0, 0(%[dst_u]) \n"
78 "sb $t1, 0(%[dst_v]) \n"
79 "addiu %[dst_u], %[dst_u], 1 \n"
80 "bgtz %[width], 2b \n"
81 " addiu %[dst_v], %[dst_v], 1 \n"
82
83 "3: \n"
84 ".set pop \n"
85 : [src_uv] "+r" (src_uv),
86 [width] "+r" (width),
87 [dst_u] "+r" (dst_u),
88 [dst_v] "+r" (dst_v)
89 :
90 : "t0", "t1", "t2", "t3",
91 "t4", "t5", "t6", "t7", "t8", "t9"
92 );
93}
94
fbarchard@google.comf08ac6b2012-11-15 00:21:14 +000095void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
96 uint8* dst_v, int width) {
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000097 __asm__ __volatile__ (
98 ".set push \n"
99 ".set noreorder \n"
100 "srl $t4, %[width], 4 \n" // multiplies of 16
101 "blez $t4, 2f \n"
102 " andi %[width], %[width], 0xf \n" // residual
103
104 "1: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000105 "addiu $t4, $t4, -1 \n"
106 "lwr $t0, 0(%[src_uv]) \n"
107 "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
108 "lwr $t1, 4(%[src_uv]) \n"
109 "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
110 "lwr $t2, 8(%[src_uv]) \n"
111 "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
112 "lwr $t3, 12(%[src_uv]) \n"
113 "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
114 "lwr $t5, 16(%[src_uv]) \n"
115 "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
116 "lwr $t6, 20(%[src_uv]) \n"
117 "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
118 "lwr $t7, 24(%[src_uv]) \n"
119 "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
120 "lwr $t8, 28(%[src_uv]) \n"
121 "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000122 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
123 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
124 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
125 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
126 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
127 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
128 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
129 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
130 "addiu %[src_uv], %[src_uv], 32 \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000131 "swr $t9, 0(%[dst_v]) \n"
132 "swl $t9, 3(%[dst_v]) \n"
133 "swr $t0, 0(%[dst_u]) \n"
134 "swl $t0, 3(%[dst_u]) \n"
135 "swr $t1, 4(%[dst_v]) \n"
136 "swl $t1, 7(%[dst_v]) \n"
137 "swr $t2, 4(%[dst_u]) \n"
138 "swl $t2, 7(%[dst_u]) \n"
139 "swr $t3, 8(%[dst_v]) \n"
140 "swl $t3, 11(%[dst_v]) \n"
141 "swr $t5, 8(%[dst_u]) \n"
142 "swl $t5, 11(%[dst_u]) \n"
143 "swr $t6, 12(%[dst_v]) \n"
144 "swl $t6, 15(%[dst_v]) \n"
145 "swr $t7, 12(%[dst_u]) \n"
146 "swl $t7, 15(%[dst_u]) \n"
147 "addiu %[dst_u], %[dst_u], 16 \n"
148 "bgtz $t4, 1b \n"
149 " addiu %[dst_v], %[dst_v], 16 \n"
150
151 "beqz %[width], 3f \n"
152 " nop \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000153
fbarchard@google.comdb694ed2012-10-17 21:54:04 +0000154 "2: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000155 "lbu $t0, 0(%[src_uv]) \n"
156 "lbu $t1, 1(%[src_uv]) \n"
157 "addiu %[src_uv], %[src_uv], 2 \n"
158 "addiu %[width], %[width], -1 \n"
159 "sb $t0, 0(%[dst_u]) \n"
160 "sb $t1, 0(%[dst_v]) \n"
161 "addiu %[dst_u], %[dst_u], 1 \n"
162 "bgtz %[width], 2b \n"
163 " addiu %[dst_v], %[dst_v], 1 \n"
164
fbarchard@google.comdb694ed2012-10-17 21:54:04 +0000165 "3: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000166 ".set pop \n"
167 : [src_uv] "+r" (src_uv),
168 [width] "+r" (width),
169 [dst_u] "+r" (dst_u),
170 [dst_v] "+r" (dst_v)
171 :
172 : "t0", "t1", "t2", "t3",
173 "t4", "t5", "t6", "t7", "t8", "t9"
fbarchard@google.comca410052012-10-14 06:01:19 +0000174 );
175}
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000176
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000177void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
178 __asm__ __volatile__ (
179 ".set push \n"
180 ".set noreorder \n"
181
182 "srl $t4, %[width], 4 \n" // multiplies of 16
183 "andi $t5, %[width], 0xf \n"
184 "blez $t4, 2f \n"
185 " addu %[src], %[src], %[width] \n" // src += width
186
187 "1: \n"
188 "lw $t0, -16(%[src]) \n" // |3|2|1|0|
189 "lw $t1, -12(%[src]) \n" // |7|6|5|4|
190 "lw $t2, -8(%[src]) \n" // |11|10|9|8|
191 "lw $t3, -4(%[src]) \n" // |15|14|13|12|
192 "wsbh $t0, $t0 \n" // |2|3|0|1|
193 "wsbh $t1, $t1 \n" // |6|7|4|5|
194 "wsbh $t2, $t2 \n" // |10|11|8|9|
195 "wsbh $t3, $t3 \n" // |14|15|12|13|
196 "rotr $t0, $t0, 16 \n" // |0|1|2|3|
197 "rotr $t1, $t1, 16 \n" // |4|5|6|7|
198 "rotr $t2, $t2, 16 \n" // |8|9|10|11|
199 "rotr $t3, $t3, 16 \n" // |12|13|14|15|
200 "addiu %[src], %[src], -16 \n"
201 "addiu $t4, $t4, -1 \n"
202 "sw $t3, 0(%[dst]) \n" // |15|14|13|12|
203 "sw $t2, 4(%[dst]) \n" // |11|10|9|8|
204 "sw $t1, 8(%[dst]) \n" // |7|6|5|4|
205 "sw $t0, 12(%[dst]) \n" // |3|2|1|0|
206 "bgtz $t4, 1b \n"
207 " addiu %[dst], %[dst], 16 \n"
208 "beqz $t5, 3f \n"
209 " nop \n"
210
211 "2: \n"
212 "lbu $t0, -1(%[src]) \n"
213 "addiu $t5, $t5, -1 \n"
214 "addiu %[src], %[src], -1 \n"
215 "sb $t0, 0(%[dst]) \n"
216 "bgez $t5, 2b \n"
217 " addiu %[dst], %[dst], 1 \n"
218
219 "3: \n"
220 ".set pop \n"
221 : [src] "+r" (src), [dst] "+r" (dst)
222 : [width] "r" (width)
223 : "t0", "t1", "t2", "t3", "t4", "t5"
224 );
225}
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000226
fbarchard@google.combdf7cb52012-11-05 23:40:11 +0000227void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000228 int width) {
229 int x = 0;
230 int y = 0;
231 __asm__ __volatile__ (
232 ".set push \n"
233 ".set noreorder \n"
234
235 "addu $t4, %[width], %[width] \n"
236 "srl %[x], %[width], 4 \n"
237 "andi %[y], %[width], 0xf \n"
238 "blez %[x], 2f \n"
239 " addu %[src_uv], %[src_uv], $t4 \n"
240
241 "1: \n"
242 "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
243 "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
244 "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
245 "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
246 "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
247 "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
248 "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
249 "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
250
251 "rotr $t0, $t0, 16 \n" // |1|0|3|2|
252 "rotr $t1, $t1, 16 \n" // |5|4|7|6|
253 "rotr $t2, $t2, 16 \n" // |9|8|11|10|
254 "rotr $t3, $t3, 16 \n" // |13|12|15|14|
255 "rotr $t4, $t4, 16 \n" // |17|16|19|18|
256 "rotr $t6, $t6, 16 \n" // |21|20|23|22|
257 "rotr $t7, $t7, 16 \n" // |25|24|27|26|
258 "rotr $t8, $t8, 16 \n" // |29|28|31|30|
259 "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
260 "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
261 "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
262 "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
263 "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
264 "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
265 "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
266 "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
267 "addiu %[src_uv], %[src_uv], -32 \n"
268 "addiu %[x], %[x], -1 \n"
269 "swr $t4, 0(%[dst_u]) \n"
270 "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
271 "swr $t6, 0(%[dst_v]) \n"
272 "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
273 "swr $t2, 4(%[dst_u]) \n"
274 "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
275 "swr $t3, 4(%[dst_v]) \n"
276 "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
277 "swr $t0, 8(%[dst_u]) \n"
278 "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
279 "swr $t1, 8(%[dst_v]) \n"
280 "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
281 "swr $t9, 12(%[dst_u]) \n"
282 "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
283 "swr $t5, 12(%[dst_v]) \n"
284 "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
285 "addiu %[dst_v], %[dst_v], 16 \n"
286 "bgtz %[x], 1b \n"
287 " addiu %[dst_u], %[dst_u], 16 \n"
288 "beqz %[y], 3f \n"
289 " nop \n"
290 "b 2f \n"
291 " nop \n"
292
293 "2: \n"
294 "lbu $t0, -2(%[src_uv]) \n"
295 "lbu $t1, -1(%[src_uv]) \n"
296 "addiu %[src_uv], %[src_uv], -2 \n"
297 "addiu %[y], %[y], -1 \n"
298 "sb $t0, 0(%[dst_u]) \n"
299 "sb $t1, 0(%[dst_v]) \n"
300 "addiu %[dst_u], %[dst_u], 1 \n"
301 "bgtz %[y], 2b \n"
302 " addiu %[dst_v], %[dst_v], 1 \n"
303
304 "3: \n"
305 ".set pop \n"
306 : [src_uv] "+r" (src_uv),
307 [dst_u] "+r" (dst_u),
308 [dst_v] "+r" (dst_v),
309 [x] "=&r" (x),
310 [y] "+r" (y)
311 : [width] "r" (width)
312 : "t0", "t1", "t2", "t3", "t4",
313 "t5", "t7", "t8", "t9"
314 );
315}
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000316
317// Convert (4 Y and 2 VU) I422 and arrange RGB values into
318// t5 = | 0 | B0 | 0 | b0 |
319// t4 = | 0 | B1 | 0 | b1 |
320// t9 = | 0 | G0 | 0 | g0 |
321// t8 = | 0 | G1 | 0 | g1 |
322// t2 = | 0 | R0 | 0 | r0 |
323// t1 = | 0 | R1 | 0 | r1 |
324#define I422ToTransientMipsRGB \
325 "lw $t0, 0(%[y_buf]) \n" \
326 "lhu $t1, 0(%[u_buf]) \n" \
327 "lhu $t2, 0(%[v_buf]) \n" \
328 "preceu.ph.qbr $t1, $t1 \n" \
329 "preceu.ph.qbr $t2, $t2 \n" \
330 "preceu.ph.qbra $t3, $t0 \n" \
331 "preceu.ph.qbla $t0, $t0 \n" \
332 "subu.ph $t1, $t1, $s5 \n" \
333 "subu.ph $t2, $t2, $s5 \n" \
334 "subu.ph $t3, $t3, $s4 \n" \
335 "subu.ph $t0, $t0, $s4 \n" \
336 "mul.ph $t3, $t3, $s0 \n" \
337 "mul.ph $t0, $t0, $s0 \n" \
338 "shll.ph $t4, $t1, 0x7 \n" \
339 "subu.ph $t4, $t4, $t1 \n" \
340 "mul.ph $t6, $t1, $s1 \n" \
341 "mul.ph $t1, $t2, $s2 \n" \
342 "addq_s.ph $t5, $t4, $t3 \n" \
343 "addq_s.ph $t4, $t4, $t0 \n" \
344 "shra.ph $t5, $t5, 6 \n" \
345 "shra.ph $t4, $t4, 6 \n" \
346 "addiu %[u_buf], 2 \n" \
347 "addiu %[v_buf], 2 \n" \
348 "addu.ph $t6, $t6, $t1 \n" \
349 "mul.ph $t1, $t2, $s3 \n" \
350 "addu.ph $t9, $t6, $t3 \n" \
351 "addu.ph $t8, $t6, $t0 \n" \
352 "shra.ph $t9, $t9, 6 \n" \
353 "shra.ph $t8, $t8, 6 \n" \
354 "addu.ph $t2, $t1, $t3 \n" \
355 "addu.ph $t1, $t1, $t0 \n" \
356 "shra.ph $t2, $t2, 6 \n" \
357 "shra.ph $t1, $t1, 6 \n" \
358 "subu.ph $t5, $t5, $s5 \n" \
359 "subu.ph $t4, $t4, $s5 \n" \
360 "subu.ph $t9, $t9, $s5 \n" \
361 "subu.ph $t8, $t8, $s5 \n" \
362 "subu.ph $t2, $t2, $s5 \n" \
363 "subu.ph $t1, $t1, $s5 \n" \
364 "shll_s.ph $t5, $t5, 8 \n" \
365 "shll_s.ph $t4, $t4, 8 \n" \
366 "shll_s.ph $t9, $t9, 8 \n" \
367 "shll_s.ph $t8, $t8, 8 \n" \
368 "shll_s.ph $t2, $t2, 8 \n" \
369 "shll_s.ph $t1, $t1, 8 \n" \
370 "shra.ph $t5, $t5, 8 \n" \
371 "shra.ph $t4, $t4, 8 \n" \
372 "shra.ph $t9, $t9, 8 \n" \
373 "shra.ph $t8, $t8, 8 \n" \
374 "shra.ph $t2, $t2, 8 \n" \
375 "shra.ph $t1, $t1, 8 \n" \
376 "addu.ph $t5, $t5, $s5 \n" \
377 "addu.ph $t4, $t4, $s5 \n" \
378 "addu.ph $t9, $t9, $s5 \n" \
379 "addu.ph $t8, $t8, $s5 \n" \
380 "addu.ph $t2, $t2, $s5 \n" \
381 "addu.ph $t1, $t1, $s5 \n"
382
383void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
384 const uint8* u_buf,
385 const uint8* v_buf,
386 uint8* rgb_buf,
387 int width) {
388 __asm__ __volatile__ (
389 ".set push \n"
390 ".set noreorder \n"
391 "beqz %[width], 2f \n"
392 " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
393 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
394 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
395 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
396 "repl.ph $s4, 16 \n" // |0|16|0|16|
397 "repl.ph $s5, 128 \n" // |128|128| // clipping
398 "lui $s6, 0xff00 \n"
399 "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
400 "1: \n"
401 I422ToTransientMipsRGB
402// Arranging into argb format
403 "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
404 "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
405 "addiu %[width], -4 \n"
406 "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
407 "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
408 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
409
410 "addiu %[y_buf], 4 \n"
411 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
412 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
413 "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
414 "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
415 "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
416 "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
417 "sll $t9, $t9, 16 \n"
418 "sll $t8, $t8, 16 \n"
419 "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
420 "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
421// Store results.
422 "sw $t2, 0(%[rgb_buf]) \n"
423 "sw $t0, 4(%[rgb_buf]) \n"
424 "sw $t1, 8(%[rgb_buf]) \n"
425 "sw $t3, 12(%[rgb_buf]) \n"
426 "bnez %[width], 1b \n"
427 " addiu %[rgb_buf], 16 \n"
428 "2: \n"
429 ".set pop \n"
430 :[y_buf] "+r" (y_buf),
431 [u_buf] "+r" (u_buf),
432 [v_buf] "+r" (v_buf),
433 [width] "+r" (width),
434 [rgb_buf] "+r" (rgb_buf)
435 :
436 : "t0", "t1", "t2", "t3", "t4", "t5",
437 "t6", "t7", "t8", "t9",
438 "s0", "s1", "s2", "s3",
439 "s4", "s5", "s6"
440 );
441}
442
443void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
444 const uint8* u_buf,
445 const uint8* v_buf,
446 uint8* rgb_buf,
447 int width) {
448 __asm__ __volatile__ (
449 ".set push \n\t"
450 ".set noreorder \n\t"
451 "beqz %[width], 2f \n\t"
452 " repl.ph $s0, 74 \n\t" // |YG|YG| = |74|74|
453 "repl.ph $s1, -25 \n\t" // |UG|UG| = |-25|-25|
454 "repl.ph $s2, -52 \n\t" // |VG|VG| = |-52|-52|
455 "repl.ph $s3, 102 \n\t" // |VR|VR| = |102|102|
456 "repl.ph $s4, 16 \n\t" // |0|16|0|16|
457 "repl.ph $s5, 128 \n\t" // |128|128|
458 "lui $s6, 0xff00 \n\t"
459 "ori $s6, 0xff00 \n\t" // |ff|00|ff|00|
460 "1: \n"
461 I422ToTransientMipsRGB
462// Arranging into abgr format
463 "precr.qb.ph $t0, $t8, $t1 \n\t" // |G1|g1|R1|r1|
464 "precr.qb.ph $t3, $t9, $t2 \n\t" // |G0|g0|R0|r0|
465 "precrq.qb.ph $t8, $t0, $t3 \n\t" // |G1|R1|G0|R0|
466 "precr.qb.ph $t9, $t0, $t3 \n\t" // |g1|r1|g0|r0|
467
468 "precr.qb.ph $t2, $t4, $t5 \n\t" // |B1|b1|B0|b0|
469 "addiu %[width], -4 \n\t"
470 "addiu %[y_buf], 4 \n\t"
471 "preceu.ph.qbla $t1, $t2 \n\t" // |0 |B1|0 |B0|
472 "preceu.ph.qbra $t2, $t2 \n\t" // |0 |b1|0 |b0|
473 "or $t1, $t1, $s6 \n\t" // |ff|B1|ff|B0|
474 "or $t2, $t2, $s6 \n\t" // |ff|b1|ff|b0|
475 "precrq.ph.w $t0, $t2, $t9 \n\t" // |ff|b1|g1|r1|
476 "precrq.ph.w $t3, $t1, $t8 \n\t" // |ff|B1|G1|R1|
477 "sll $t9, $t9, 16 \n\t"
478 "sll $t8, $t8, 16 \n\t"
479 "packrl.ph $t2, $t2, $t9 \n\t" // |ff|b0|g0|r0|
480 "packrl.ph $t1, $t1, $t8 \n\t" // |ff|B0|G0|R0|
481// Store results.
482 "sw $t2, 0(%[rgb_buf]) \n\t"
483 "sw $t0, 4(%[rgb_buf]) \n\t"
484 "sw $t1, 8(%[rgb_buf]) \n\t"
485 "sw $t3, 12(%[rgb_buf]) \n\t"
486 "bnez %[width], 1b \n\t"
487 " addiu %[rgb_buf], 16 \n\t"
488 "2: \n\t"
489 ".set pop \n\t"
490 :[y_buf] "+r" (y_buf),
491 [u_buf] "+r" (u_buf),
492 [v_buf] "+r" (v_buf),
493 [width] "+r" (width),
494 [rgb_buf] "+r" (rgb_buf)
495 :
496 : "t0", "t1", "t2", "t3", "t4", "t5",
497 "t6", "t7", "t8", "t9",
498 "s0", "s1", "s2", "s3",
499 "s4", "s5", "s6"
500 );
501}
502
503void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
504 const uint8* u_buf,
505 const uint8* v_buf,
506 uint8* rgb_buf,
507 int width) {
508 __asm__ __volatile__ (
509 ".set push \n"
510 ".set noreorder \n"
511 "beqz %[width], 2f \n"
512 " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
513 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
514 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
515 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
516 "repl.ph $s4, 16 \n" // |0|16|0|16|
517 "repl.ph $s5, 128 \n" // |128|128|
518 "lui $s6, 0xff \n"
519 "ori $s6, 0xff \n" // |00|ff|00|ff|
520 "1: \n"
521 I422ToTransientMipsRGB
522 // Arranging into bgra format
523 "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
524 "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
525 "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
526 "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
527
528 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
529 "addiu %[width], -4 \n"
530 "addiu %[y_buf], 4 \n"
531 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
532 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
533 "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
534 "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
535 "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
536 "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
537 "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
538 "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
539 "sll $t1, $t1, 16 \n"
540 "sll $t2, $t2, 16 \n"
541 "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
542 "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
543// Store results.
544 "sw $t2, 0(%[rgb_buf]) \n"
545 "sw $t0, 4(%[rgb_buf]) \n"
546 "sw $t1, 8(%[rgb_buf]) \n"
547 "sw $t3, 12(%[rgb_buf]) \n"
548 "bnez %[width], 1b \n"
549 " addiu %[rgb_buf], 16 \n"
550 "2: \n"
551 ".set pop \n"
552 :[y_buf] "+r" (y_buf),
553 [u_buf] "+r" (u_buf),
554 [v_buf] "+r" (v_buf),
555 [width] "+r" (width),
556 [rgb_buf] "+r" (rgb_buf)
557 :
558 : "t0", "t1", "t2", "t3", "t4", "t5",
559 "t6", "t7", "t8", "t9",
560 "s0", "s1", "s2", "s3",
561 "s4", "s5", "s6"
562 );
563}
fbarchard@google.com834b7482012-12-11 17:59:26 +0000564#endif // __mips_dsp_rev >= 2
fbarchard@google.comca410052012-10-14 06:01:19 +0000565
566#ifdef __cplusplus
567} // extern "C"
568} // namespace libyuv
569#endif