blob: 6029f55426032f9a2aa0eb7d4b562ceae785ba6e [file] [log] [blame]
fbarchard@google.comca410052012-10-14 06:01:19 +00001/*
fbarchard@google.combb6bddc2012-10-14 06:41:17 +00002 * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
fbarchard@google.comca410052012-10-14 06:01:19 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/row.h"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000012
fbarchard@google.comca410052012-10-14 06:01:19 +000013#ifdef __cplusplus
14namespace libyuv {
15extern "C" {
16#endif
17
18#if !defined(YUV_DISABLE_ASM) && defined(__mips__)
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +000019#if defined HAS_COPYROW_MIPS
20extern "C" void memcpy_MIPS(uint8* dst, const uint8* src, int count);
21void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
22 memcpy_MIPS(dst, src, count);
23}
24#endif
25
fbarchard@google.comf08ac6b2012-11-15 00:21:14 +000026#ifdef HAS_SPLITUVROW_MIPS_DSPR2
27void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
28 int width) {
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000029 __asm__ __volatile__ (
30 ".set push \n"
31 ".set noreorder \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000032 "srl $t4, %[width], 4 \n" // multiplies of 16
33 "blez $t4, 2f \n"
34 " andi %[width], %[width], 0xf \n" // residual
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000035
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000036 "1: \n"
37 "addiu $t4, $t4, -1 \n"
38 "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
39 "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
40 "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
41 "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
42 "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
43 "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
44 "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
45 "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
46 "addiu %[src_uv], %[src_uv], 32 \n"
47 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
48 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
49 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
50 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
51 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
52 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
53 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
54 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
55 "sw $t9, 0(%[dst_v]) \n"
56 "sw $t0, 0(%[dst_u]) \n"
57 "sw $t1, 4(%[dst_v]) \n"
58 "sw $t2, 4(%[dst_u]) \n"
59 "sw $t3, 8(%[dst_v]) \n"
60 "sw $t5, 8(%[dst_u]) \n"
61 "sw $t6, 12(%[dst_v]) \n"
62 "sw $t7, 12(%[dst_u]) \n"
63 "addiu %[dst_v], %[dst_v], 16 \n"
64 "bgtz $t4, 1b \n"
65 " addiu %[dst_u], %[dst_u], 16 \n"
66
67 "beqz %[width], 3f \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000068 " nop \n"
69
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000070 "2: \n"
71 "lbu $t0, 0(%[src_uv]) \n"
72 "lbu $t1, 1(%[src_uv]) \n"
73 "addiu %[src_uv], %[src_uv], 2 \n"
74 "addiu %[width], %[width], -1 \n"
75 "sb $t0, 0(%[dst_u]) \n"
76 "sb $t1, 0(%[dst_v]) \n"
77 "addiu %[dst_u], %[dst_u], 1 \n"
78 "bgtz %[width], 2b \n"
79 " addiu %[dst_v], %[dst_v], 1 \n"
80
81 "3: \n"
82 ".set pop \n"
83 : [src_uv] "+r" (src_uv),
84 [width] "+r" (width),
85 [dst_u] "+r" (dst_u),
86 [dst_v] "+r" (dst_v)
87 :
88 : "t0", "t1", "t2", "t3",
89 "t4", "t5", "t6", "t7", "t8", "t9"
90 );
91}
92
fbarchard@google.comf08ac6b2012-11-15 00:21:14 +000093void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
94 uint8* dst_v, int width) {
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000095 __asm__ __volatile__ (
96 ".set push \n"
97 ".set noreorder \n"
98 "srl $t4, %[width], 4 \n" // multiplies of 16
99 "blez $t4, 2f \n"
100 " andi %[width], %[width], 0xf \n" // residual
101
102 "1: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000103 "addiu $t4, $t4, -1 \n"
104 "lwr $t0, 0(%[src_uv]) \n"
105 "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
106 "lwr $t1, 4(%[src_uv]) \n"
107 "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
108 "lwr $t2, 8(%[src_uv]) \n"
109 "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
110 "lwr $t3, 12(%[src_uv]) \n"
111 "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
112 "lwr $t5, 16(%[src_uv]) \n"
113 "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
114 "lwr $t6, 20(%[src_uv]) \n"
115 "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
116 "lwr $t7, 24(%[src_uv]) \n"
117 "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
118 "lwr $t8, 28(%[src_uv]) \n"
119 "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000120 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
121 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
122 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
123 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
124 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
125 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
126 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
127 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
128 "addiu %[src_uv], %[src_uv], 32 \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000129 "swr $t9, 0(%[dst_v]) \n"
130 "swl $t9, 3(%[dst_v]) \n"
131 "swr $t0, 0(%[dst_u]) \n"
132 "swl $t0, 3(%[dst_u]) \n"
133 "swr $t1, 4(%[dst_v]) \n"
134 "swl $t1, 7(%[dst_v]) \n"
135 "swr $t2, 4(%[dst_u]) \n"
136 "swl $t2, 7(%[dst_u]) \n"
137 "swr $t3, 8(%[dst_v]) \n"
138 "swl $t3, 11(%[dst_v]) \n"
139 "swr $t5, 8(%[dst_u]) \n"
140 "swl $t5, 11(%[dst_u]) \n"
141 "swr $t6, 12(%[dst_v]) \n"
142 "swl $t6, 15(%[dst_v]) \n"
143 "swr $t7, 12(%[dst_u]) \n"
144 "swl $t7, 15(%[dst_u]) \n"
145 "addiu %[dst_u], %[dst_u], 16 \n"
146 "bgtz $t4, 1b \n"
147 " addiu %[dst_v], %[dst_v], 16 \n"
148
149 "beqz %[width], 3f \n"
150 " nop \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000151
fbarchard@google.comdb694ed2012-10-17 21:54:04 +0000152 "2: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000153 "lbu $t0, 0(%[src_uv]) \n"
154 "lbu $t1, 1(%[src_uv]) \n"
155 "addiu %[src_uv], %[src_uv], 2 \n"
156 "addiu %[width], %[width], -1 \n"
157 "sb $t0, 0(%[dst_u]) \n"
158 "sb $t1, 0(%[dst_v]) \n"
159 "addiu %[dst_u], %[dst_u], 1 \n"
160 "bgtz %[width], 2b \n"
161 " addiu %[dst_v], %[dst_v], 1 \n"
162
fbarchard@google.comdb694ed2012-10-17 21:54:04 +0000163 "3: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000164 ".set pop \n"
165 : [src_uv] "+r" (src_uv),
166 [width] "+r" (width),
167 [dst_u] "+r" (dst_u),
168 [dst_v] "+r" (dst_v)
169 :
170 : "t0", "t1", "t2", "t3",
171 "t4", "t5", "t6", "t7", "t8", "t9"
fbarchard@google.comca410052012-10-14 06:01:19 +0000172 );
173}
fbarchard@google.comf08ac6b2012-11-15 00:21:14 +0000174#endif // HAS_SPLITUVROW_MIPS_DSPR2
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000175
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000176#ifdef HAS_MIRRORROW_MIPS_DSPR2
177void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
178 __asm__ __volatile__ (
179 ".set push \n"
180 ".set noreorder \n"
181
182 "srl $t4, %[width], 4 \n" // multiplies of 16
183 "andi $t5, %[width], 0xf \n"
184 "blez $t4, 2f \n"
185 " addu %[src], %[src], %[width] \n" // src += width
186
187 "1: \n"
188 "lw $t0, -16(%[src]) \n" // |3|2|1|0|
189 "lw $t1, -12(%[src]) \n" // |7|6|5|4|
190 "lw $t2, -8(%[src]) \n" // |11|10|9|8|
191 "lw $t3, -4(%[src]) \n" // |15|14|13|12|
192 "wsbh $t0, $t0 \n" // |2|3|0|1|
193 "wsbh $t1, $t1 \n" // |6|7|4|5|
194 "wsbh $t2, $t2 \n" // |10|11|8|9|
195 "wsbh $t3, $t3 \n" // |14|15|12|13|
196 "rotr $t0, $t0, 16 \n" // |0|1|2|3|
197 "rotr $t1, $t1, 16 \n" // |4|5|6|7|
198 "rotr $t2, $t2, 16 \n" // |8|9|10|11|
199 "rotr $t3, $t3, 16 \n" // |12|13|14|15|
200 "addiu %[src], %[src], -16 \n"
201 "addiu $t4, $t4, -1 \n"
202 "sw $t3, 0(%[dst]) \n" // |15|14|13|12|
203 "sw $t2, 4(%[dst]) \n" // |11|10|9|8|
204 "sw $t1, 8(%[dst]) \n" // |7|6|5|4|
205 "sw $t0, 12(%[dst]) \n" // |3|2|1|0|
206 "bgtz $t4, 1b \n"
207 " addiu %[dst], %[dst], 16 \n"
208 "beqz $t5, 3f \n"
209 " nop \n"
210
211 "2: \n"
212 "lbu $t0, -1(%[src]) \n"
213 "addiu $t5, $t5, -1 \n"
214 "addiu %[src], %[src], -1 \n"
215 "sb $t0, 0(%[dst]) \n"
216 "bgez $t5, 2b \n"
217 " addiu %[dst], %[dst], 1 \n"
218
219 "3: \n"
220 ".set pop \n"
221 : [src] "+r" (src), [dst] "+r" (dst)
222 : [width] "r" (width)
223 : "t0", "t1", "t2", "t3", "t4", "t5"
224 );
225}
226#endif // HAS_MIRRORROW_MIPS_DSPR2
227
fbarchard@google.comf08ac6b2012-11-15 00:21:14 +0000228#ifdef HAS_MIRRORUVROW_MIPS_DSPR2
fbarchard@google.combdf7cb52012-11-05 23:40:11 +0000229void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000230 int width) {
231 int x = 0;
232 int y = 0;
233 __asm__ __volatile__ (
234 ".set push \n"
235 ".set noreorder \n"
236
237 "addu $t4, %[width], %[width] \n"
238 "srl %[x], %[width], 4 \n"
239 "andi %[y], %[width], 0xf \n"
240 "blez %[x], 2f \n"
241 " addu %[src_uv], %[src_uv], $t4 \n"
242
243 "1: \n"
244 "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
245 "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
246 "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
247 "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
248 "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
249 "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
250 "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
251 "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
252
253 "rotr $t0, $t0, 16 \n" // |1|0|3|2|
254 "rotr $t1, $t1, 16 \n" // |5|4|7|6|
255 "rotr $t2, $t2, 16 \n" // |9|8|11|10|
256 "rotr $t3, $t3, 16 \n" // |13|12|15|14|
257 "rotr $t4, $t4, 16 \n" // |17|16|19|18|
258 "rotr $t6, $t6, 16 \n" // |21|20|23|22|
259 "rotr $t7, $t7, 16 \n" // |25|24|27|26|
260 "rotr $t8, $t8, 16 \n" // |29|28|31|30|
261 "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
262 "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
263 "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
264 "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
265 "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
266 "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
267 "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
268 "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
269 "addiu %[src_uv], %[src_uv], -32 \n"
270 "addiu %[x], %[x], -1 \n"
271 "swr $t4, 0(%[dst_u]) \n"
272 "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
273 "swr $t6, 0(%[dst_v]) \n"
274 "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
275 "swr $t2, 4(%[dst_u]) \n"
276 "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
277 "swr $t3, 4(%[dst_v]) \n"
278 "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
279 "swr $t0, 8(%[dst_u]) \n"
280 "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
281 "swr $t1, 8(%[dst_v]) \n"
282 "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
283 "swr $t9, 12(%[dst_u]) \n"
284 "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
285 "swr $t5, 12(%[dst_v]) \n"
286 "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
287 "addiu %[dst_v], %[dst_v], 16 \n"
288 "bgtz %[x], 1b \n"
289 " addiu %[dst_u], %[dst_u], 16 \n"
290 "beqz %[y], 3f \n"
291 " nop \n"
292 "b 2f \n"
293 " nop \n"
294
295 "2: \n"
296 "lbu $t0, -2(%[src_uv]) \n"
297 "lbu $t1, -1(%[src_uv]) \n"
298 "addiu %[src_uv], %[src_uv], -2 \n"
299 "addiu %[y], %[y], -1 \n"
300 "sb $t0, 0(%[dst_u]) \n"
301 "sb $t1, 0(%[dst_v]) \n"
302 "addiu %[dst_u], %[dst_u], 1 \n"
303 "bgtz %[y], 2b \n"
304 " addiu %[dst_v], %[dst_v], 1 \n"
305
306 "3: \n"
307 ".set pop \n"
308 : [src_uv] "+r" (src_uv),
309 [dst_u] "+r" (dst_u),
310 [dst_v] "+r" (dst_v),
311 [x] "=&r" (x),
312 [y] "+r" (y)
313 : [width] "r" (width)
314 : "t0", "t1", "t2", "t3", "t4",
315 "t5", "t7", "t8", "t9"
316 );
317}
fbarchard@google.comf08ac6b2012-11-15 00:21:14 +0000318#endif // HAS_MIRRORUVROW_MIPS_DSPR2
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000319
320// Convert (4 Y and 2 VU) I422 and arrange RGB values into
321// t5 = | 0 | B0 | 0 | b0 |
322// t4 = | 0 | B1 | 0 | b1 |
323// t9 = | 0 | G0 | 0 | g0 |
324// t8 = | 0 | G1 | 0 | g1 |
325// t2 = | 0 | R0 | 0 | r0 |
326// t1 = | 0 | R1 | 0 | r1 |
327#define I422ToTransientMipsRGB \
328 "lw $t0, 0(%[y_buf]) \n" \
329 "lhu $t1, 0(%[u_buf]) \n" \
330 "lhu $t2, 0(%[v_buf]) \n" \
331 "preceu.ph.qbr $t1, $t1 \n" \
332 "preceu.ph.qbr $t2, $t2 \n" \
333 "preceu.ph.qbra $t3, $t0 \n" \
334 "preceu.ph.qbla $t0, $t0 \n" \
335 "subu.ph $t1, $t1, $s5 \n" \
336 "subu.ph $t2, $t2, $s5 \n" \
337 "subu.ph $t3, $t3, $s4 \n" \
338 "subu.ph $t0, $t0, $s4 \n" \
339 "mul.ph $t3, $t3, $s0 \n" \
340 "mul.ph $t0, $t0, $s0 \n" \
341 "shll.ph $t4, $t1, 0x7 \n" \
342 "subu.ph $t4, $t4, $t1 \n" \
343 "mul.ph $t6, $t1, $s1 \n" \
344 "mul.ph $t1, $t2, $s2 \n" \
345 "addq_s.ph $t5, $t4, $t3 \n" \
346 "addq_s.ph $t4, $t4, $t0 \n" \
347 "shra.ph $t5, $t5, 6 \n" \
348 "shra.ph $t4, $t4, 6 \n" \
349 "addiu %[u_buf], 2 \n" \
350 "addiu %[v_buf], 2 \n" \
351 "addu.ph $t6, $t6, $t1 \n" \
352 "mul.ph $t1, $t2, $s3 \n" \
353 "addu.ph $t9, $t6, $t3 \n" \
354 "addu.ph $t8, $t6, $t0 \n" \
355 "shra.ph $t9, $t9, 6 \n" \
356 "shra.ph $t8, $t8, 6 \n" \
357 "addu.ph $t2, $t1, $t3 \n" \
358 "addu.ph $t1, $t1, $t0 \n" \
359 "shra.ph $t2, $t2, 6 \n" \
360 "shra.ph $t1, $t1, 6 \n" \
361 "subu.ph $t5, $t5, $s5 \n" \
362 "subu.ph $t4, $t4, $s5 \n" \
363 "subu.ph $t9, $t9, $s5 \n" \
364 "subu.ph $t8, $t8, $s5 \n" \
365 "subu.ph $t2, $t2, $s5 \n" \
366 "subu.ph $t1, $t1, $s5 \n" \
367 "shll_s.ph $t5, $t5, 8 \n" \
368 "shll_s.ph $t4, $t4, 8 \n" \
369 "shll_s.ph $t9, $t9, 8 \n" \
370 "shll_s.ph $t8, $t8, 8 \n" \
371 "shll_s.ph $t2, $t2, 8 \n" \
372 "shll_s.ph $t1, $t1, 8 \n" \
373 "shra.ph $t5, $t5, 8 \n" \
374 "shra.ph $t4, $t4, 8 \n" \
375 "shra.ph $t9, $t9, 8 \n" \
376 "shra.ph $t8, $t8, 8 \n" \
377 "shra.ph $t2, $t2, 8 \n" \
378 "shra.ph $t1, $t1, 8 \n" \
379 "addu.ph $t5, $t5, $s5 \n" \
380 "addu.ph $t4, $t4, $s5 \n" \
381 "addu.ph $t9, $t9, $s5 \n" \
382 "addu.ph $t8, $t8, $s5 \n" \
383 "addu.ph $t2, $t2, $s5 \n" \
384 "addu.ph $t1, $t1, $s5 \n"
385
386void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
387 const uint8* u_buf,
388 const uint8* v_buf,
389 uint8* rgb_buf,
390 int width) {
391 __asm__ __volatile__ (
392 ".set push \n"
393 ".set noreorder \n"
394 "beqz %[width], 2f \n"
395 " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
396 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
397 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
398 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
399 "repl.ph $s4, 16 \n" // |0|16|0|16|
400 "repl.ph $s5, 128 \n" // |128|128| // clipping
401 "lui $s6, 0xff00 \n"
402 "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
403 "1: \n"
404 I422ToTransientMipsRGB
405// Arranging into argb format
406 "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
407 "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
408 "addiu %[width], -4 \n"
409 "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
410 "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
411 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
412
413 "addiu %[y_buf], 4 \n"
414 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
415 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
416 "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
417 "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
418 "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
419 "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
420 "sll $t9, $t9, 16 \n"
421 "sll $t8, $t8, 16 \n"
422 "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
423 "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
424// Store results.
425 "sw $t2, 0(%[rgb_buf]) \n"
426 "sw $t0, 4(%[rgb_buf]) \n"
427 "sw $t1, 8(%[rgb_buf]) \n"
428 "sw $t3, 12(%[rgb_buf]) \n"
429 "bnez %[width], 1b \n"
430 " addiu %[rgb_buf], 16 \n"
431 "2: \n"
432 ".set pop \n"
433 :[y_buf] "+r" (y_buf),
434 [u_buf] "+r" (u_buf),
435 [v_buf] "+r" (v_buf),
436 [width] "+r" (width),
437 [rgb_buf] "+r" (rgb_buf)
438 :
439 : "t0", "t1", "t2", "t3", "t4", "t5",
440 "t6", "t7", "t8", "t9",
441 "s0", "s1", "s2", "s3",
442 "s4", "s5", "s6"
443 );
444}
445
446void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
447 const uint8* u_buf,
448 const uint8* v_buf,
449 uint8* rgb_buf,
450 int width) {
451 __asm__ __volatile__ (
452 ".set push \n\t"
453 ".set noreorder \n\t"
454 "beqz %[width], 2f \n\t"
455 " repl.ph $s0, 74 \n\t" // |YG|YG| = |74|74|
456 "repl.ph $s1, -25 \n\t" // |UG|UG| = |-25|-25|
457 "repl.ph $s2, -52 \n\t" // |VG|VG| = |-52|-52|
458 "repl.ph $s3, 102 \n\t" // |VR|VR| = |102|102|
459 "repl.ph $s4, 16 \n\t" // |0|16|0|16|
460 "repl.ph $s5, 128 \n\t" // |128|128|
461 "lui $s6, 0xff00 \n\t"
462 "ori $s6, 0xff00 \n\t" // |ff|00|ff|00|
463 "1: \n"
464 I422ToTransientMipsRGB
465// Arranging into abgr format
466 "precr.qb.ph $t0, $t8, $t1 \n\t" // |G1|g1|R1|r1|
467 "precr.qb.ph $t3, $t9, $t2 \n\t" // |G0|g0|R0|r0|
468 "precrq.qb.ph $t8, $t0, $t3 \n\t" // |G1|R1|G0|R0|
469 "precr.qb.ph $t9, $t0, $t3 \n\t" // |g1|r1|g0|r0|
470
471 "precr.qb.ph $t2, $t4, $t5 \n\t" // |B1|b1|B0|b0|
472 "addiu %[width], -4 \n\t"
473 "addiu %[y_buf], 4 \n\t"
474 "preceu.ph.qbla $t1, $t2 \n\t" // |0 |B1|0 |B0|
475 "preceu.ph.qbra $t2, $t2 \n\t" // |0 |b1|0 |b0|
476 "or $t1, $t1, $s6 \n\t" // |ff|B1|ff|B0|
477 "or $t2, $t2, $s6 \n\t" // |ff|b1|ff|b0|
478 "precrq.ph.w $t0, $t2, $t9 \n\t" // |ff|b1|g1|r1|
479 "precrq.ph.w $t3, $t1, $t8 \n\t" // |ff|B1|G1|R1|
480 "sll $t9, $t9, 16 \n\t"
481 "sll $t8, $t8, 16 \n\t"
482 "packrl.ph $t2, $t2, $t9 \n\t" // |ff|b0|g0|r0|
483 "packrl.ph $t1, $t1, $t8 \n\t" // |ff|B0|G0|R0|
484// Store results.
485 "sw $t2, 0(%[rgb_buf]) \n\t"
486 "sw $t0, 4(%[rgb_buf]) \n\t"
487 "sw $t1, 8(%[rgb_buf]) \n\t"
488 "sw $t3, 12(%[rgb_buf]) \n\t"
489 "bnez %[width], 1b \n\t"
490 " addiu %[rgb_buf], 16 \n\t"
491 "2: \n\t"
492 ".set pop \n\t"
493 :[y_buf] "+r" (y_buf),
494 [u_buf] "+r" (u_buf),
495 [v_buf] "+r" (v_buf),
496 [width] "+r" (width),
497 [rgb_buf] "+r" (rgb_buf)
498 :
499 : "t0", "t1", "t2", "t3", "t4", "t5",
500 "t6", "t7", "t8", "t9",
501 "s0", "s1", "s2", "s3",
502 "s4", "s5", "s6"
503 );
504}
505
506void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
507 const uint8* u_buf,
508 const uint8* v_buf,
509 uint8* rgb_buf,
510 int width) {
511 __asm__ __volatile__ (
512 ".set push \n"
513 ".set noreorder \n"
514 "beqz %[width], 2f \n"
515 " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
516 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
517 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
518 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
519 "repl.ph $s4, 16 \n" // |0|16|0|16|
520 "repl.ph $s5, 128 \n" // |128|128|
521 "lui $s6, 0xff \n"
522 "ori $s6, 0xff \n" // |00|ff|00|ff|
523 "1: \n"
524 I422ToTransientMipsRGB
525 // Arranging into bgra format
526 "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
527 "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
528 "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
529 "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
530
531 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
532 "addiu %[width], -4 \n"
533 "addiu %[y_buf], 4 \n"
534 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
535 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
536 "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
537 "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
538 "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
539 "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
540 "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
541 "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
542 "sll $t1, $t1, 16 \n"
543 "sll $t2, $t2, 16 \n"
544 "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
545 "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
546// Store results.
547 "sw $t2, 0(%[rgb_buf]) \n"
548 "sw $t0, 4(%[rgb_buf]) \n"
549 "sw $t1, 8(%[rgb_buf]) \n"
550 "sw $t3, 12(%[rgb_buf]) \n"
551 "bnez %[width], 1b \n"
552 " addiu %[rgb_buf], 16 \n"
553 "2: \n"
554 ".set pop \n"
555 :[y_buf] "+r" (y_buf),
556 [u_buf] "+r" (u_buf),
557 [v_buf] "+r" (v_buf),
558 [width] "+r" (width),
559 [rgb_buf] "+r" (rgb_buf)
560 :
561 : "t0", "t1", "t2", "t3", "t4", "t5",
562 "t6", "t7", "t8", "t9",
563 "s0", "s1", "s2", "s3",
564 "s4", "s5", "s6"
565 );
566}
567
fbarchard@google.comca410052012-10-14 06:01:19 +0000568#endif // __mips__
569
570#ifdef __cplusplus
571} // extern "C"
572} // namespace libyuv
573#endif