blob: 48759e09d83553b3acd28a94a266977b107de8cd [file] [log] [blame]
fbarchard@google.comca410052012-10-14 06:01:19 +00001/*
fbarchard@google.combb6bddc2012-10-14 06:41:17 +00002 * Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
fbarchard@google.comca410052012-10-14 06:01:19 +00003 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "libyuv/row.h"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000012
fbarchard@google.comca410052012-10-14 06:01:19 +000013#ifdef __cplusplus
14namespace libyuv {
15extern "C" {
16#endif
17
18#if !defined(YUV_DISABLE_ASM) && defined(__mips__)
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +000019#if defined HAS_COPYROW_MIPS
20extern "C" void memcpy_MIPS(uint8* dst, const uint8* src, int count);
21void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
22 memcpy_MIPS(dst, src, count);
23}
24#endif
25
fbarchard@google.comca410052012-10-14 06:01:19 +000026#ifdef HAS_SPLITUV_MIPS_DSPR2
fbarchard@google.comca410052012-10-14 06:01:19 +000027void SplitUV_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
28 int width) {
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000029 __asm__ __volatile__ (
30 ".set push \n"
31 ".set noreorder \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000032 "srl $t4, %[width], 4 \n" // multiplies of 16
33 "blez $t4, 2f \n"
34 " andi %[width], %[width], 0xf \n" // residual
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000035
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000036 "1: \n"
37 "addiu $t4, $t4, -1 \n"
38 "lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
39 "lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
40 "lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
41 "lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
42 "lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
43 "lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
44 "lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
45 "lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
46 "addiu %[src_uv], %[src_uv], 32 \n"
47 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
48 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
49 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
50 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
51 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
52 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
53 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
54 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
55 "sw $t9, 0(%[dst_v]) \n"
56 "sw $t0, 0(%[dst_u]) \n"
57 "sw $t1, 4(%[dst_v]) \n"
58 "sw $t2, 4(%[dst_u]) \n"
59 "sw $t3, 8(%[dst_v]) \n"
60 "sw $t5, 8(%[dst_u]) \n"
61 "sw $t6, 12(%[dst_v]) \n"
62 "sw $t7, 12(%[dst_u]) \n"
63 "addiu %[dst_v], %[dst_v], 16 \n"
64 "bgtz $t4, 1b \n"
65 " addiu %[dst_u], %[dst_u], 16 \n"
66
67 "beqz %[width], 3f \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +000068 " nop \n"
69
fbarchard@google.comdb694ed2012-10-17 21:54:04 +000070 "2: \n"
71 "lbu $t0, 0(%[src_uv]) \n"
72 "lbu $t1, 1(%[src_uv]) \n"
73 "addiu %[src_uv], %[src_uv], 2 \n"
74 "addiu %[width], %[width], -1 \n"
75 "sb $t0, 0(%[dst_u]) \n"
76 "sb $t1, 0(%[dst_v]) \n"
77 "addiu %[dst_u], %[dst_u], 1 \n"
78 "bgtz %[width], 2b \n"
79 " addiu %[dst_v], %[dst_v], 1 \n"
80
81 "3: \n"
82 ".set pop \n"
83 : [src_uv] "+r" (src_uv),
84 [width] "+r" (width),
85 [dst_u] "+r" (dst_u),
86 [dst_v] "+r" (dst_v)
87 :
88 : "t0", "t1", "t2", "t3",
89 "t4", "t5", "t6", "t7", "t8", "t9"
90 );
91}
92
93void SplitUV_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
94 uint8* dst_v, int width) {
95 __asm__ __volatile__ (
96 ".set push \n"
97 ".set noreorder \n"
98 "srl $t4, %[width], 4 \n" // multiplies of 16
99 "blez $t4, 2f \n"
100 " andi %[width], %[width], 0xf \n" // residual
101
102 "1: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000103 "addiu $t4, $t4, -1 \n"
104 "lwr $t0, 0(%[src_uv]) \n"
105 "lwl $t0, 3(%[src_uv]) \n" // V1 | U1 | V0 | U0
106 "lwr $t1, 4(%[src_uv]) \n"
107 "lwl $t1, 7(%[src_uv]) \n" // V3 | U3 | V2 | U2
108 "lwr $t2, 8(%[src_uv]) \n"
109 "lwl $t2, 11(%[src_uv]) \n" // V5 | U5 | V4 | U4
110 "lwr $t3, 12(%[src_uv]) \n"
111 "lwl $t3, 15(%[src_uv]) \n" // V7 | U7 | V6 | U6
112 "lwr $t5, 16(%[src_uv]) \n"
113 "lwl $t5, 19(%[src_uv]) \n" // V9 | U9 | V8 | U8
114 "lwr $t6, 20(%[src_uv]) \n"
115 "lwl $t6, 23(%[src_uv]) \n" // V11 | U11 | V10 | U10
116 "lwr $t7, 24(%[src_uv]) \n"
117 "lwl $t7, 27(%[src_uv]) \n" // V13 | U13 | V12 | U12
118 "lwr $t8, 28(%[src_uv]) \n"
119 "lwl $t8, 31(%[src_uv]) \n" // V15 | U15 | V14 | U14
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000120 "precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
121 "precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
122 "precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
123 "precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
124 "precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
125 "precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
126 "precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
127 "precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
128 "addiu %[src_uv], %[src_uv], 32 \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000129 "swr $t9, 0(%[dst_v]) \n"
130 "swl $t9, 3(%[dst_v]) \n"
131 "swr $t0, 0(%[dst_u]) \n"
132 "swl $t0, 3(%[dst_u]) \n"
133 "swr $t1, 4(%[dst_v]) \n"
134 "swl $t1, 7(%[dst_v]) \n"
135 "swr $t2, 4(%[dst_u]) \n"
136 "swl $t2, 7(%[dst_u]) \n"
137 "swr $t3, 8(%[dst_v]) \n"
138 "swl $t3, 11(%[dst_v]) \n"
139 "swr $t5, 8(%[dst_u]) \n"
140 "swl $t5, 11(%[dst_u]) \n"
141 "swr $t6, 12(%[dst_v]) \n"
142 "swl $t6, 15(%[dst_v]) \n"
143 "swr $t7, 12(%[dst_u]) \n"
144 "swl $t7, 15(%[dst_u]) \n"
145 "addiu %[dst_u], %[dst_u], 16 \n"
146 "bgtz $t4, 1b \n"
147 " addiu %[dst_v], %[dst_v], 16 \n"
148
149 "beqz %[width], 3f \n"
150 " nop \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000151
fbarchard@google.comdb694ed2012-10-17 21:54:04 +0000152 "2: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000153 "lbu $t0, 0(%[src_uv]) \n"
154 "lbu $t1, 1(%[src_uv]) \n"
155 "addiu %[src_uv], %[src_uv], 2 \n"
156 "addiu %[width], %[width], -1 \n"
157 "sb $t0, 0(%[dst_u]) \n"
158 "sb $t1, 0(%[dst_v]) \n"
159 "addiu %[dst_u], %[dst_u], 1 \n"
160 "bgtz %[width], 2b \n"
161 " addiu %[dst_v], %[dst_v], 1 \n"
162
fbarchard@google.comdb694ed2012-10-17 21:54:04 +0000163 "3: \n"
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000164 ".set pop \n"
165 : [src_uv] "+r" (src_uv),
166 [width] "+r" (width),
167 [dst_u] "+r" (dst_u),
168 [dst_v] "+r" (dst_v)
169 :
170 : "t0", "t1", "t2", "t3",
171 "t4", "t5", "t6", "t7", "t8", "t9"
fbarchard@google.comca410052012-10-14 06:01:19 +0000172 );
173}
174#endif // HAS_SPLITUV_MIPS_DSPR2
fbarchard@google.combb6bddc2012-10-14 06:41:17 +0000175
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000176#ifdef HAS_MIRRORROW_MIPS_DSPR2
177void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width) {
178 __asm__ __volatile__ (
179 ".set push \n"
180 ".set noreorder \n"
181
182 "srl $t4, %[width], 4 \n" // multiplies of 16
183 "andi $t5, %[width], 0xf \n"
184 "blez $t4, 2f \n"
185 " addu %[src], %[src], %[width] \n" // src += width
186
187 "1: \n"
188 "lw $t0, -16(%[src]) \n" // |3|2|1|0|
189 "lw $t1, -12(%[src]) \n" // |7|6|5|4|
190 "lw $t2, -8(%[src]) \n" // |11|10|9|8|
191 "lw $t3, -4(%[src]) \n" // |15|14|13|12|
192 "wsbh $t0, $t0 \n" // |2|3|0|1|
193 "wsbh $t1, $t1 \n" // |6|7|4|5|
194 "wsbh $t2, $t2 \n" // |10|11|8|9|
195 "wsbh $t3, $t3 \n" // |14|15|12|13|
196 "rotr $t0, $t0, 16 \n" // |0|1|2|3|
197 "rotr $t1, $t1, 16 \n" // |4|5|6|7|
198 "rotr $t2, $t2, 16 \n" // |8|9|10|11|
199 "rotr $t3, $t3, 16 \n" // |12|13|14|15|
200 "addiu %[src], %[src], -16 \n"
201 "addiu $t4, $t4, -1 \n"
202 "sw $t3, 0(%[dst]) \n" // |15|14|13|12|
203 "sw $t2, 4(%[dst]) \n" // |11|10|9|8|
204 "sw $t1, 8(%[dst]) \n" // |7|6|5|4|
205 "sw $t0, 12(%[dst]) \n" // |3|2|1|0|
206 "bgtz $t4, 1b \n"
207 " addiu %[dst], %[dst], 16 \n"
208 "beqz $t5, 3f \n"
209 " nop \n"
210
211 "2: \n"
212 "lbu $t0, -1(%[src]) \n"
213 "addiu $t5, $t5, -1 \n"
214 "addiu %[src], %[src], -1 \n"
215 "sb $t0, 0(%[dst]) \n"
216 "bgez $t5, 2b \n"
217 " addiu %[dst], %[dst], 1 \n"
218
219 "3: \n"
220 ".set pop \n"
221 : [src] "+r" (src), [dst] "+r" (dst)
222 : [width] "r" (width)
223 : "t0", "t1", "t2", "t3", "t4", "t5"
224 );
225}
226#endif // HAS_MIRRORROW_MIPS_DSPR2
227
fbarchard@google.combdf7cb52012-11-05 23:40:11 +0000228#ifdef HAS_MirrorUVRow_MIPS_DSPR2
229void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000230 int width) {
231 int x = 0;
232 int y = 0;
233 __asm__ __volatile__ (
234 ".set push \n"
235 ".set noreorder \n"
236
237 "addu $t4, %[width], %[width] \n"
238 "srl %[x], %[width], 4 \n"
239 "andi %[y], %[width], 0xf \n"
240 "blez %[x], 2f \n"
241 " addu %[src_uv], %[src_uv], $t4 \n"
242
243 "1: \n"
244 "lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
245 "lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
246 "lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
247 "lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
248 "lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
249 "lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
250 "lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
251 "lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
252
253 "rotr $t0, $t0, 16 \n" // |1|0|3|2|
254 "rotr $t1, $t1, 16 \n" // |5|4|7|6|
255 "rotr $t2, $t2, 16 \n" // |9|8|11|10|
256 "rotr $t3, $t3, 16 \n" // |13|12|15|14|
257 "rotr $t4, $t4, 16 \n" // |17|16|19|18|
258 "rotr $t6, $t6, 16 \n" // |21|20|23|22|
259 "rotr $t7, $t7, 16 \n" // |25|24|27|26|
260 "rotr $t8, $t8, 16 \n" // |29|28|31|30|
261 "precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
262 "precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
263 "precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
264 "precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
265 "precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
266 "precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
267 "precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
268 "precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
269 "addiu %[src_uv], %[src_uv], -32 \n"
270 "addiu %[x], %[x], -1 \n"
271 "swr $t4, 0(%[dst_u]) \n"
272 "swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
273 "swr $t6, 0(%[dst_v]) \n"
274 "swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
275 "swr $t2, 4(%[dst_u]) \n"
276 "swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
277 "swr $t3, 4(%[dst_v]) \n"
278 "swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
279 "swr $t0, 8(%[dst_u]) \n"
280 "swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
281 "swr $t1, 8(%[dst_v]) \n"
282 "swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
283 "swr $t9, 12(%[dst_u]) \n"
284 "swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
285 "swr $t5, 12(%[dst_v]) \n"
286 "swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
287 "addiu %[dst_v], %[dst_v], 16 \n"
288 "bgtz %[x], 1b \n"
289 " addiu %[dst_u], %[dst_u], 16 \n"
290 "beqz %[y], 3f \n"
291 " nop \n"
292 "b 2f \n"
293 " nop \n"
294
295 "2: \n"
296 "lbu $t0, -2(%[src_uv]) \n"
297 "lbu $t1, -1(%[src_uv]) \n"
298 "addiu %[src_uv], %[src_uv], -2 \n"
299 "addiu %[y], %[y], -1 \n"
300 "sb $t0, 0(%[dst_u]) \n"
301 "sb $t1, 0(%[dst_v]) \n"
302 "addiu %[dst_u], %[dst_u], 1 \n"
303 "bgtz %[y], 2b \n"
304 " addiu %[dst_v], %[dst_v], 1 \n"
305
306 "3: \n"
307 ".set pop \n"
308 : [src_uv] "+r" (src_uv),
309 [dst_u] "+r" (dst_u),
310 [dst_v] "+r" (dst_v),
311 [x] "=&r" (x),
312 [y] "+r" (y)
313 : [width] "r" (width)
314 : "t0", "t1", "t2", "t3", "t4",
315 "t5", "t7", "t8", "t9"
316 );
317}
fbarchard@google.combdf7cb52012-11-05 23:40:11 +0000318#endif // HAS_MirrorUVRow_MIPS_DSPR2
fbarchard@google.com6c1b2d32012-10-26 22:49:18 +0000319
320
321
322// Convert (4 Y and 2 VU) I422 and arrange RGB values into
323// t5 = | 0 | B0 | 0 | b0 |
324// t4 = | 0 | B1 | 0 | b1 |
325// t9 = | 0 | G0 | 0 | g0 |
326// t8 = | 0 | G1 | 0 | g1 |
327// t2 = | 0 | R0 | 0 | r0 |
328// t1 = | 0 | R1 | 0 | r1 |
329#define I422ToTransientMipsRGB \
330 "lw $t0, 0(%[y_buf]) \n" \
331 "lhu $t1, 0(%[u_buf]) \n" \
332 "lhu $t2, 0(%[v_buf]) \n" \
333 "preceu.ph.qbr $t1, $t1 \n" \
334 "preceu.ph.qbr $t2, $t2 \n" \
335 "preceu.ph.qbra $t3, $t0 \n" \
336 "preceu.ph.qbla $t0, $t0 \n" \
337 "subu.ph $t1, $t1, $s5 \n" \
338 "subu.ph $t2, $t2, $s5 \n" \
339 "subu.ph $t3, $t3, $s4 \n" \
340 "subu.ph $t0, $t0, $s4 \n" \
341 "mul.ph $t3, $t3, $s0 \n" \
342 "mul.ph $t0, $t0, $s0 \n" \
343 "shll.ph $t4, $t1, 0x7 \n" \
344 "subu.ph $t4, $t4, $t1 \n" \
345 "mul.ph $t6, $t1, $s1 \n" \
346 "mul.ph $t1, $t2, $s2 \n" \
347 "addq_s.ph $t5, $t4, $t3 \n" \
348 "addq_s.ph $t4, $t4, $t0 \n" \
349 "shra.ph $t5, $t5, 6 \n" \
350 "shra.ph $t4, $t4, 6 \n" \
351 "addiu %[u_buf], 2 \n" \
352 "addiu %[v_buf], 2 \n" \
353 "addu.ph $t6, $t6, $t1 \n" \
354 "mul.ph $t1, $t2, $s3 \n" \
355 "addu.ph $t9, $t6, $t3 \n" \
356 "addu.ph $t8, $t6, $t0 \n" \
357 "shra.ph $t9, $t9, 6 \n" \
358 "shra.ph $t8, $t8, 6 \n" \
359 "addu.ph $t2, $t1, $t3 \n" \
360 "addu.ph $t1, $t1, $t0 \n" \
361 "shra.ph $t2, $t2, 6 \n" \
362 "shra.ph $t1, $t1, 6 \n" \
363 "subu.ph $t5, $t5, $s5 \n" \
364 "subu.ph $t4, $t4, $s5 \n" \
365 "subu.ph $t9, $t9, $s5 \n" \
366 "subu.ph $t8, $t8, $s5 \n" \
367 "subu.ph $t2, $t2, $s5 \n" \
368 "subu.ph $t1, $t1, $s5 \n" \
369 "shll_s.ph $t5, $t5, 8 \n" \
370 "shll_s.ph $t4, $t4, 8 \n" \
371 "shll_s.ph $t9, $t9, 8 \n" \
372 "shll_s.ph $t8, $t8, 8 \n" \
373 "shll_s.ph $t2, $t2, 8 \n" \
374 "shll_s.ph $t1, $t1, 8 \n" \
375 "shra.ph $t5, $t5, 8 \n" \
376 "shra.ph $t4, $t4, 8 \n" \
377 "shra.ph $t9, $t9, 8 \n" \
378 "shra.ph $t8, $t8, 8 \n" \
379 "shra.ph $t2, $t2, 8 \n" \
380 "shra.ph $t1, $t1, 8 \n" \
381 "addu.ph $t5, $t5, $s5 \n" \
382 "addu.ph $t4, $t4, $s5 \n" \
383 "addu.ph $t9, $t9, $s5 \n" \
384 "addu.ph $t8, $t8, $s5 \n" \
385 "addu.ph $t2, $t2, $s5 \n" \
386 "addu.ph $t1, $t1, $s5 \n"
387
388void I422ToARGBRow_MIPS_DSPR2(const uint8* y_buf,
389 const uint8* u_buf,
390 const uint8* v_buf,
391 uint8* rgb_buf,
392 int width) {
393 __asm__ __volatile__ (
394 ".set push \n"
395 ".set noreorder \n"
396 "beqz %[width], 2f \n"
397 " repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
398 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
399 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
400 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
401 "repl.ph $s4, 16 \n" // |0|16|0|16|
402 "repl.ph $s5, 128 \n" // |128|128| // clipping
403 "lui $s6, 0xff00 \n"
404 "ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
405 "1: \n"
406 I422ToTransientMipsRGB
407// Arranging into argb format
408 "precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
409 "precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
410 "addiu %[width], -4 \n"
411 "precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
412 "precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
413 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
414
415 "addiu %[y_buf], 4 \n"
416 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
417 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
418 "or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
419 "or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
420 "precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
421 "precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
422 "sll $t9, $t9, 16 \n"
423 "sll $t8, $t8, 16 \n"
424 "packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
425 "packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
426// Store results.
427 "sw $t2, 0(%[rgb_buf]) \n"
428 "sw $t0, 4(%[rgb_buf]) \n"
429 "sw $t1, 8(%[rgb_buf]) \n"
430 "sw $t3, 12(%[rgb_buf]) \n"
431 "bnez %[width], 1b \n"
432 " addiu %[rgb_buf], 16 \n"
433 "2: \n"
434 ".set pop \n"
435 :[y_buf] "+r" (y_buf),
436 [u_buf] "+r" (u_buf),
437 [v_buf] "+r" (v_buf),
438 [width] "+r" (width),
439 [rgb_buf] "+r" (rgb_buf)
440 :
441 : "t0", "t1", "t2", "t3", "t4", "t5",
442 "t6", "t7", "t8", "t9",
443 "s0", "s1", "s2", "s3",
444 "s4", "s5", "s6"
445 );
446}
447
448void I422ToABGRRow_MIPS_DSPR2(const uint8* y_buf,
449 const uint8* u_buf,
450 const uint8* v_buf,
451 uint8* rgb_buf,
452 int width) {
453 __asm__ __volatile__ (
454 ".set push \n\t"
455 ".set noreorder \n\t"
456 "beqz %[width], 2f \n\t"
457 " repl.ph $s0, 74 \n\t" // |YG|YG| = |74|74|
458 "repl.ph $s1, -25 \n\t" // |UG|UG| = |-25|-25|
459 "repl.ph $s2, -52 \n\t" // |VG|VG| = |-52|-52|
460 "repl.ph $s3, 102 \n\t" // |VR|VR| = |102|102|
461 "repl.ph $s4, 16 \n\t" // |0|16|0|16|
462 "repl.ph $s5, 128 \n\t" // |128|128|
463 "lui $s6, 0xff00 \n\t"
464 "ori $s6, 0xff00 \n\t" // |ff|00|ff|00|
465 "1: \n"
466 I422ToTransientMipsRGB
467// Arranging into abgr format
468 "precr.qb.ph $t0, $t8, $t1 \n\t" // |G1|g1|R1|r1|
469 "precr.qb.ph $t3, $t9, $t2 \n\t" // |G0|g0|R0|r0|
470 "precrq.qb.ph $t8, $t0, $t3 \n\t" // |G1|R1|G0|R0|
471 "precr.qb.ph $t9, $t0, $t3 \n\t" // |g1|r1|g0|r0|
472
473 "precr.qb.ph $t2, $t4, $t5 \n\t" // |B1|b1|B0|b0|
474 "addiu %[width], -4 \n\t"
475 "addiu %[y_buf], 4 \n\t"
476 "preceu.ph.qbla $t1, $t2 \n\t" // |0 |B1|0 |B0|
477 "preceu.ph.qbra $t2, $t2 \n\t" // |0 |b1|0 |b0|
478 "or $t1, $t1, $s6 \n\t" // |ff|B1|ff|B0|
479 "or $t2, $t2, $s6 \n\t" // |ff|b1|ff|b0|
480 "precrq.ph.w $t0, $t2, $t9 \n\t" // |ff|b1|g1|r1|
481 "precrq.ph.w $t3, $t1, $t8 \n\t" // |ff|B1|G1|R1|
482 "sll $t9, $t9, 16 \n\t"
483 "sll $t8, $t8, 16 \n\t"
484 "packrl.ph $t2, $t2, $t9 \n\t" // |ff|b0|g0|r0|
485 "packrl.ph $t1, $t1, $t8 \n\t" // |ff|B0|G0|R0|
486// Store results.
487 "sw $t2, 0(%[rgb_buf]) \n\t"
488 "sw $t0, 4(%[rgb_buf]) \n\t"
489 "sw $t1, 8(%[rgb_buf]) \n\t"
490 "sw $t3, 12(%[rgb_buf]) \n\t"
491 "bnez %[width], 1b \n\t"
492 " addiu %[rgb_buf], 16 \n\t"
493 "2: \n\t"
494 ".set pop \n\t"
495 :[y_buf] "+r" (y_buf),
496 [u_buf] "+r" (u_buf),
497 [v_buf] "+r" (v_buf),
498 [width] "+r" (width),
499 [rgb_buf] "+r" (rgb_buf)
500 :
501 : "t0", "t1", "t2", "t3", "t4", "t5",
502 "t6", "t7", "t8", "t9",
503 "s0", "s1", "s2", "s3",
504 "s4", "s5", "s6"
505 );
506}
507
508void I422ToBGRARow_MIPS_DSPR2(const uint8* y_buf,
509 const uint8* u_buf,
510 const uint8* v_buf,
511 uint8* rgb_buf,
512 int width) {
513 __asm__ __volatile__ (
514 ".set push \n"
515 ".set noreorder \n"
516 "beqz %[width], 2f \n"
517 " repl.ph $s0, 74 \n" // |YG|YG| = |74 |74 |
518 "repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
519 "repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
520 "repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
521 "repl.ph $s4, 16 \n" // |0|16|0|16|
522 "repl.ph $s5, 128 \n" // |128|128|
523 "lui $s6, 0xff \n"
524 "ori $s6, 0xff \n" // |00|ff|00|ff|
525 "1: \n"
526 I422ToTransientMipsRGB
527 // Arranging into bgra format
528 "precr.qb.ph $t4, $t4, $t8 \n" // |B1|b1|G1|g1|
529 "precr.qb.ph $t5, $t5, $t9 \n" // |B0|b0|G0|g0|
530 "precrq.qb.ph $t8, $t4, $t5 \n" // |B1|G1|B0|G0|
531 "precr.qb.ph $t9, $t4, $t5 \n" // |b1|g1|b0|g0|
532
533 "precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
534 "addiu %[width], -4 \n"
535 "addiu %[y_buf], 4 \n"
536 "preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
537 "preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
538 "sll $t1, $t1, 8 \n" // |R1|0 |R0|0 |
539 "sll $t2, $t2, 8 \n" // |r1|0 |r0|0 |
540 "or $t1, $t1, $s6 \n" // |R1|ff|R0|ff|
541 "or $t2, $t2, $s6 \n" // |r1|ff|r0|ff|
542 "precrq.ph.w $t0, $t9, $t2 \n" // |b1|g1|r1|ff|
543 "precrq.ph.w $t3, $t8, $t1 \n" // |B1|G1|R1|ff|
544 "sll $t1, $t1, 16 \n"
545 "sll $t2, $t2, 16 \n"
546 "packrl.ph $t2, $t9, $t2 \n" // |b0|g0|r0|ff|
547 "packrl.ph $t1, $t8, $t1 \n" // |B0|G0|R0|ff|
548// Store results.
549 "sw $t2, 0(%[rgb_buf]) \n"
550 "sw $t0, 4(%[rgb_buf]) \n"
551 "sw $t1, 8(%[rgb_buf]) \n"
552 "sw $t3, 12(%[rgb_buf]) \n"
553 "bnez %[width], 1b \n"
554 " addiu %[rgb_buf], 16 \n"
555 "2: \n"
556 ".set pop \n"
557 :[y_buf] "+r" (y_buf),
558 [u_buf] "+r" (u_buf),
559 [v_buf] "+r" (v_buf),
560 [width] "+r" (width),
561 [rgb_buf] "+r" (rgb_buf)
562 :
563 : "t0", "t1", "t2", "t3", "t4", "t5",
564 "t6", "t7", "t8", "t9",
565 "s0", "s1", "s2", "s3",
566 "s4", "s5", "s6"
567 );
568}
569
fbarchard@google.comca410052012-10-14 06:01:19 +0000570#endif // __mips__
571
572#ifdef __cplusplus
573} // extern "C"
574} // namespace libyuv
575#endif