blob: f355122f6a89113e73678fb89f95a8136ed5e681 [file] [log] [blame]
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +00001/*
2 * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "row.h"
12
13extern "C" {
14
fbarchard@google.com585a1262011-10-28 23:51:08 +000015#ifdef HAS_ARGBTOYROW_SSSE3
16
17// Constant multiplication table for converting ARGB to I400.
fbarchard@google.comb6149762011-11-07 21:58:52 +000018static const vec8 kARGBToY = {
19 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0
fbarchard@google.com585a1262011-10-28 23:51:08 +000020};
21
fbarchard@google.comb6149762011-11-07 21:58:52 +000022static const uvec8 kAddY16 = {
23 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
24 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u,
fbarchard@google.com585a1262011-10-28 23:51:08 +000025};
26
fbarchard@google.comb6149762011-11-07 21:58:52 +000027#ifdef HAS_ARGBTOUVROW_SSSE3
28static const vec8 kARGBToU = {
29 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0
30};
31
32static const uvec8 kARGBToV = {
33 -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0,
34};
35static const uvec8 kAddUV128 = {
36 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
37 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u
38};
39#endif
40
fbarchard@google.com9394ed92011-10-31 21:36:47 +000041// Shuffle table for converting BG24 to ARGB.
fbarchard@google.comb6149762011-11-07 21:58:52 +000042static const uvec8 kShuffleMaskBG24ToARGB = {
fbarchard@google.com9394ed92011-10-31 21:36:47 +000043 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u
44};
45
46// Shuffle table for converting RAW to ARGB.
fbarchard@google.comb6149762011-11-07 21:58:52 +000047static const uvec8 kShuffleMaskRAWToARGB = {
fbarchard@google.com9394ed92011-10-31 21:36:47 +000048 2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u
49};
50
fbarchard@google.comb6149762011-11-07 21:58:52 +000051// Shuffle table for converting ABGR to ARGB.
52static const uvec8 kShuffleMaskABGRToARGB = {
53 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
54};
55
56// Shuffle table for converting BGRA to ARGB.
57static const uvec8 kShuffleMaskBGRAToARGB = {
58 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
59};
60
61void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
fbarchard@google.com585a1262011-10-28 23:51:08 +000062 asm volatile(
fbarchard@google.comb6149762011-11-07 21:58:52 +000063 "pcmpeqb %%xmm5,%%xmm5\n"
64 "pslld $0x18,%%xmm5\n"
fbarchard@google.com585a1262011-10-28 23:51:08 +000065"1:"
fbarchard@google.comb6149762011-11-07 21:58:52 +000066 "movq (%0),%%xmm0\n"
67 "lea 0x8(%0),%0\n"
68 "punpcklbw %%xmm0,%%xmm0\n"
69 "movdqa %%xmm0,%%xmm1\n"
70 "punpcklwd %%xmm0,%%xmm0\n"
71 "punpckhwd %%xmm1,%%xmm1\n"
72 "por %%xmm5,%%xmm0\n"
73 "por %%xmm5,%%xmm1\n"
74 "movdqa %%xmm0,(%1)\n"
75 "movdqa %%xmm1,0x10(%1)\n"
76 "lea 0x20(%1),%1\n"
fbarchard@google.com585a1262011-10-28 23:51:08 +000077 "sub $0x8,%2\n"
78 "ja 1b\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +000079 : "+r"(src_y), // %0
80 "+r"(dst_argb), // %1
81 "+r"(pix) // %2
82 :
83 : "memory", "cc"
84#if defined(__SSE2__)
85 , "xmm0", "xmm1", "xmm5"
86#endif
fbarchard@google.com585a1262011-10-28 23:51:08 +000087);
88}
fbarchard@google.comb6149762011-11-07 21:58:52 +000089
90void ABGRToARGBRow_SSSE3(const uint8* src_abgr, uint8* dst_argb, int pix) {
91 asm volatile(
92 "movdqa %3,%%xmm5\n"
93"1:"
94 "movdqa (%0),%%xmm0\n"
95 "lea 0x10(%0),%0\n"
96 "pshufb %%xmm5,%%xmm0\n"
97 "movdqa %%xmm0,(%1)\n"
98 "lea 0x10(%1),%1\n"
99 "sub $0x4,%2\n"
100 "ja 1b\n"
101 : "+r"(src_abgr), // %0
102 "+r"(dst_argb), // %1
103 "+r"(pix) // %2
104 : "m"(kShuffleMaskABGRToARGB) // %3
105 : "memory", "cc"
106#if defined(__SSE2__)
107 , "xmm0", "xmm5"
fbarchard@google.com585a1262011-10-28 23:51:08 +0000108#endif
109
fbarchard@google.comb6149762011-11-07 21:58:52 +0000110);
111}
112
113void BGRAToARGBRow_SSSE3(const uint8* src_bgra, uint8* dst_argb, int pix) {
114 asm volatile(
115 "movdqa %3,%%xmm5\n"
116"1:"
117 "movdqa (%0),%%xmm0\n"
118 "lea 0x10(%0),%0\n"
119 "pshufb %%xmm5,%%xmm0\n"
120 "movdqa %%xmm0,(%1)\n"
121 "lea 0x10(%1),%1\n"
122 "sub $0x4,%2\n"
123 "ja 1b\n"
124 : "+r"(src_bgra), // %0
125 "+r"(dst_argb), // %1
126 "+r"(pix) // %2
127 : "m"(kShuffleMaskBGRAToARGB) // %3
128 : "memory", "cc"
129#if defined(__SSE2__)
130 , "xmm0", "xmm5"
131#endif
132);
133}
134
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000135void BG24ToARGBRow_SSSE3(const uint8* src_bg24, uint8* dst_argb, int pix) {
136 asm volatile(
fbarchard@google.comb6149762011-11-07 21:58:52 +0000137 "pcmpeqb %%xmm5,%%xmm5\n" // generate mask 0xff000000
138 "pslld $0x18,%%xmm5\n"
139 "movdqa %3,%%xmm4\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000140"1:"
141 "movdqa (%0),%%xmm0\n"
142 "movdqa 0x10(%0),%%xmm1\n"
143 "movdqa 0x20(%0),%%xmm3\n"
144 "lea 0x30(%0),%0\n"
145 "movdqa %%xmm3,%%xmm2\n"
146 "palignr $0x8,%%xmm1,%%xmm2\n" // xmm2 = { xmm3[0:3] xmm1[8:15] }
fbarchard@google.comb6149762011-11-07 21:58:52 +0000147 "pshufb %%xmm4,%%xmm2\n"
148 "por %%xmm5,%%xmm2\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000149 "palignr $0xc,%%xmm0,%%xmm1\n" // xmm1 = { xmm3[0:7] xmm0[12:15] }
fbarchard@google.comb6149762011-11-07 21:58:52 +0000150 "pshufb %%xmm4,%%xmm0\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000151 "movdqa %%xmm2,0x20(%1)\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000152 "por %%xmm5,%%xmm0\n"
153 "pshufb %%xmm4,%%xmm1\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000154 "movdqa %%xmm0,(%1)\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000155 "por %%xmm5,%%xmm1\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000156 "palignr $0x4,%%xmm3,%%xmm3\n" // xmm3 = { xmm3[4:15] }
fbarchard@google.comb6149762011-11-07 21:58:52 +0000157 "pshufb %%xmm4,%%xmm3\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000158 "movdqa %%xmm1,0x10(%1)\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000159 "por %%xmm5,%%xmm3\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000160 "movdqa %%xmm3,0x30(%1)\n"
161 "lea 0x40(%1),%1\n"
162 "sub $0x10,%2\n"
163 "ja 1b\n"
164 : "+r"(src_bg24), // %0
165 "+r"(dst_argb), // %1
166 "+r"(pix) // %2
fbarchard@google.comb6149762011-11-07 21:58:52 +0000167 : "m"(kShuffleMaskBG24ToARGB) // %3
168 : "memory", "cc"
169#if defined(__SSE2__)
170 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
171#endif
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000172);
fbarchard@google.com585a1262011-10-28 23:51:08 +0000173}
174
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000175void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix) {
176 asm volatile(
fbarchard@google.comb6149762011-11-07 21:58:52 +0000177 "pcmpeqb %%xmm5,%%xmm5\n" // generate mask 0xff000000
178 "pslld $0x18,%%xmm5\n"
179 "movdqa %3,%%xmm4\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000180"1:"
181 "movdqa (%0),%%xmm0\n"
182 "movdqa 0x10(%0),%%xmm1\n"
183 "movdqa 0x20(%0),%%xmm3\n"
184 "lea 0x30(%0),%0\n"
185 "movdqa %%xmm3,%%xmm2\n"
186 "palignr $0x8,%%xmm1,%%xmm2\n" // xmm2 = { xmm3[0:3] xmm1[8:15] }
fbarchard@google.comb6149762011-11-07 21:58:52 +0000187 "pshufb %%xmm4,%%xmm2\n"
188 "por %%xmm5,%%xmm2\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000189 "palignr $0xc,%%xmm0,%%xmm1\n" // xmm1 = { xmm3[0:7] xmm0[12:15] }
fbarchard@google.comb6149762011-11-07 21:58:52 +0000190 "pshufb %%xmm4,%%xmm0\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000191 "movdqa %%xmm2,0x20(%1)\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000192 "por %%xmm5,%%xmm0\n"
193 "pshufb %%xmm4,%%xmm1\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000194 "movdqa %%xmm0,(%1)\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000195 "por %%xmm5,%%xmm1\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000196 "palignr $0x4,%%xmm3,%%xmm3\n" // xmm3 = { xmm3[4:15] }
fbarchard@google.comb6149762011-11-07 21:58:52 +0000197 "pshufb %%xmm4,%%xmm3\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000198 "movdqa %%xmm1,0x10(%1)\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000199 "por %%xmm5,%%xmm3\n"
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000200 "movdqa %%xmm3,0x30(%1)\n"
201 "lea 0x40(%1),%1\n"
202 "sub $0x10,%2\n"
203 "ja 1b\n"
204 : "+r"(src_raw), // %0
205 "+r"(dst_argb), // %1
206 "+r"(pix) // %2
fbarchard@google.comb6149762011-11-07 21:58:52 +0000207 : "m"(kShuffleMaskRAWToARGB) // %3
208 : "memory", "cc"
209#if defined(__SSE2__)
210 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
211#endif
212);
213}
214
215void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
216 asm volatile(
217 "movdqa %4,%%xmm5\n"
218 "movdqa %3,%%xmm4\n"
219"1:"
220 "movdqa (%0),%%xmm0\n"
221 "movdqa 0x10(%0),%%xmm1\n"
222 "movdqa 0x20(%0),%%xmm2\n"
223 "movdqa 0x30(%0),%%xmm3\n"
224 "pmaddubsw %%xmm4,%%xmm0\n"
225 "pmaddubsw %%xmm4,%%xmm1\n"
226 "pmaddubsw %%xmm4,%%xmm2\n"
227 "pmaddubsw %%xmm4,%%xmm3\n"
228 "lea 0x40(%0),%0\n"
229 "phaddw %%xmm1,%%xmm0\n"
230 "phaddw %%xmm3,%%xmm2\n"
231 "psrlw $0x7,%%xmm0\n"
232 "psrlw $0x7,%%xmm2\n"
233 "packuswb %%xmm2,%%xmm0\n"
234 "paddb %%xmm5,%%xmm0\n"
235 "movdqa %%xmm0,(%1)\n"
236 "lea 0x10(%1),%1\n"
237 "sub $0x10,%2\n"
238 "ja 1b\n"
239 : "+r"(src_argb), // %0
240 "+r"(dst_y), // %1
241 "+r"(pix) // %2
242 : "m"(kARGBToY), // %3
243 "m"(kAddY16) // %4
244 : "memory", "cc"
245#if defined(__SSE2__)
246 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
247#endif
248
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000249);
fbarchard@google.com585a1262011-10-28 23:51:08 +0000250}
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000251#endif
fbarchard@google.com585a1262011-10-28 23:51:08 +0000252
fbarchard@google.comb6149762011-11-07 21:58:52 +0000253#ifdef HAS_ARGBTOUVROW_SSSE3
254void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
255 uint8* dst_u, uint8* dst_v, int width) {
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000256 asm volatile(
257 "movdqa %0,%%xmm4\n"
258 "movdqa %1,%%xmm3\n"
259 "movdqa %2,%%xmm5\n"
260 :
261 : "m"(kARGBToU), // %0
262 "m"(kARGBToV), // %1
263 "m"(kAddUV128) // %2
264 :
265#if defined(__SSE2__)
266 "xmm3", "xmm4", "xmm5"
267#endif
268 );
269 asm volatile(
fbarchard@google.comb6149762011-11-07 21:58:52 +0000270 "sub %1,%2\n"
271"1:"
272 "movdqa (%0),%%xmm0\n"
273 "movdqa 0x10(%0),%%xmm1\n"
274 "movdqa 0x20(%0),%%xmm2\n"
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000275 "movdqa 0x30(%0),%%xmm6\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000276 "pavgb (%0,%4,1),%%xmm0\n"
277 "pavgb 0x10(%0,%4,1),%%xmm1\n"
278 "pavgb 0x20(%0,%4,1),%%xmm2\n"
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000279 "pavgb 0x30(%0,%4,1),%%xmm6\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000280 "lea 0x40(%0),%0\n"
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000281 "movdqa %%xmm0,%%xmm7\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000282 "shufps $0x88,%%xmm1,%%xmm0\n"
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000283 "shufps $0xdd,%%xmm1,%%xmm7\n"
284 "pavgb %%xmm7,%%xmm0\n"
285 "movdqa %%xmm2,%%xmm7\n"
286 "shufps $0x88,%%xmm6,%%xmm2\n"
287 "shufps $0xdd,%%xmm6,%%xmm7\n"
288 "pavgb %%xmm7,%%xmm2\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000289 "movdqa %%xmm0,%%xmm1\n"
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000290 "movdqa %%xmm2,%%xmm6\n"
291 "pmaddubsw %%xmm4,%%xmm0\n"
292 "pmaddubsw %%xmm4,%%xmm2\n"
293 "pmaddubsw %%xmm3,%%xmm1\n"
294 "pmaddubsw %%xmm3,%%xmm6\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000295 "phaddw %%xmm2,%%xmm0\n"
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000296 "phaddw %%xmm6,%%xmm1\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000297 "psraw $0x8,%%xmm0\n"
298 "psraw $0x8,%%xmm1\n"
299 "packsswb %%xmm1,%%xmm0\n"
300 "paddb %%xmm5,%%xmm0\n"
301 "movlps %%xmm0,(%1)\n"
302 "movhps %%xmm0,(%1,%2,1)\n"
303 "lea 0x8(%1),%1\n"
304 "sub $0x10,%3\n"
305 "ja 1b\n"
306 : "+r"(src_argb0), // %0
307 "+r"(dst_u), // %1
308 "+r"(dst_v), // %2
309 "+rm"(width) // %3
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000310 : "r"(static_cast<intptr_t>(src_stride_argb))
fbarchard@google.comb6149762011-11-07 21:58:52 +0000311 : "memory", "cc"
312#if defined(__SSE2__)
fbarchard@google.comd93d4482011-11-10 18:26:20 +0000313 , "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000314#endif
315);
316}
317#endif
318
319// The following code requires 6 registers and prefers 7 registers.
320// 7 registers requires -fpic to be off, and -fomit-frame-pointer
321#ifdef HAS_FASTCONVERTYUVTOARGBROW_SSE2
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000322#if defined(__x86_64__)
fbarchard@google.comb6149762011-11-07 21:58:52 +0000323#define REG_a "rax"
324#define REG_d "rdx"
325#else
326#define REG_a "eax"
327#define REG_d "edx"
328#endif
329#if defined(__APPLE__) || defined(__x86_64__)
330#define OMITFP
331#else
332#define OMITFP __attribute__((optimize("omit-frame-pointer")))
333#endif
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000334
fbarchard@google.comb6149762011-11-07 21:58:52 +0000335#if defined(__APPLE__)
336// REG6 version uses 1 less register but is slower
337#define REG6
338#endif
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000339
fbarchard@google.comb6149762011-11-07 21:58:52 +0000340#ifdef REG6
341// 6 register version only has REG_a for temporary
342#define CLOBBER "%"REG_a
343#define YUVTORGB \
344 "1:" \
345 "movzb (%1),%%"REG_a"\n" \
346 "lea 1(%1),%1\n" \
347 "movq 2048(%5,%%"REG_a",8),%%xmm0\n" \
348 "movzb (%2),%%"REG_a"\n" \
349 "lea 1(%2),%2\n" \
350 "movq 4096(%5,%%"REG_a",8),%%xmm1\n" \
351 "paddsw %%xmm1,%%xmm0\n" \
352 "movzb (%0),%%"REG_a"\n" \
353 "movq 0(%5,%%"REG_a",8),%%xmm2\n" \
354 "movzb 0x1(%0),%%"REG_a"\n" \
355 "movq 0(%5,%%"REG_a",8),%%xmm3\n" \
356 "lea 2(%0),%0\n" \
357 "paddsw %%xmm0,%%xmm2\n" \
358 "paddsw %%xmm0,%%xmm3\n" \
359 "shufps $0x44,%%xmm3,%%xmm2\n" \
360 "psraw $0x6,%%xmm2\n" \
361 "packuswb %%xmm2,%%xmm2\n" \
362 "movq %%xmm2,0x0(%3)\n" \
363 "lea 8(%3),%3\n" \
364 "sub $0x2,%4\n" \
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000365 "ja 1b\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000366#else
367#define CLOBBER "%"REG_a, "%"REG_d
368// This version produces 2 pixels
369#define YUVTORGB \
370"1:" \
371 "movzb (%1),%%"REG_a"\n" \
372 "lea 1(%1),%1\n" \
373 "movzb (%2),%%"REG_d"\n" \
374 "lea 1(%2),%2\n" \
375 "movq 2048(%5,%%"REG_a",8),%%xmm0\n" \
376 "movzb 0(%0),%%"REG_a"\n" \
377 "movq 4096(%5,%%"REG_d",8),%%xmm1\n" \
378 "paddsw %%xmm1,%%xmm0\n" \
379 "movzb 1(%0),%%"REG_d"\n" \
380 "punpcklqdq %%xmm0,%%xmm0\n" \
381 "lea 2(%0),%0\n" \
382 "movq 0(%5,%%"REG_a",8),%%xmm1\n" \
383 "movhps 0(%5,%%"REG_d",8),%%xmm1\n" \
384 "paddsw %%xmm0,%%xmm1\n" \
385 "psraw $6,%%xmm1\n" \
386 "packuswb %%xmm1,%%xmm1\n" \
387 "movq %%xmm1,0(%3)\n" \
388 "lea 8(%3),%3\n" \
389 "sub $0x2,%4\n" \
390 "ja 1b\n"
391// This version produces 4 pixels
392#define YUVTORGB4 \
393"1:" \
394 "movzb 0(%1),%%"REG_a"\n" \
395 "movzb 0(%2),%%"REG_d"\n" \
396 "movq 2048(%5,%%"REG_a",8),%%xmm0\n" \
397 "movzb 0(%0),%%"REG_a"\n" \
398 "movq 4096(%5,%%"REG_d",8),%%xmm1\n" \
399 "paddsw %%xmm1,%%xmm0\n" \
400 "movzb 1(%0),%%"REG_d"\n" \
401 "punpcklqdq %%xmm0,%%xmm0\n" \
402 "movq 0(%5,%%"REG_a",8),%%xmm2\n" \
403 "movhps 0(%5,%%"REG_d",8),%%xmm2\n" \
404 "paddsw %%xmm0,%%xmm2\n" \
405 "psraw $6,%%xmm2\n" \
406 "movzb 1(%1),%%"REG_a"\n" \
407 "movzb 1(%2),%%"REG_d"\n" \
408 "movq 2048(%5,%%"REG_a",8),%%xmm0\n" \
409 "movzb 2(%0),%%"REG_a"\n" \
410 "movq 4096(%5,%%"REG_d",8),%%xmm1\n" \
411 "paddsw %%xmm1,%%xmm0\n" \
412 "movzb 3(%0),%%"REG_d"\n" \
413 "punpcklqdq %%xmm0,%%xmm0\n" \
414 "movq 0(%5,%%"REG_a",8),%%xmm3\n" \
415 "movhps 0(%5,%%"REG_d",8),%%xmm3\n" \
416 "paddsw %%xmm0,%%xmm3\n" \
417 "psraw $6,%%xmm3\n" \
418 "lea 2(%1),%1\n" \
419 "lea 2(%2),%2\n" \
420 "lea 4(%0),%0\n" \
421 "packuswb %%xmm3,%%xmm2\n" \
422 "movdqa %%xmm2,0(%3)\n" \
423 "lea 16(%3),%3\n" \
424 "sub $0x4,%4\n" \
425 "ja 1b\n"
426#endif
427
428// 6 or 7 registers
429void OMITFP FastConvertYUVToARGBRow_SSE2(const uint8* y_buf, // rdi
430 const uint8* u_buf, // rsi
431 const uint8* v_buf, // rdx
432 uint8* rgb_buf, // rcx
433 int width) { // r8
434 asm volatile(
435 YUVTORGB
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000436 : "+r"(y_buf), // %0
437 "+r"(u_buf), // %1
438 "+r"(v_buf), // %2
439 "+r"(rgb_buf), // %3
fbarchard@google.comb6149762011-11-07 21:58:52 +0000440 "+rm"(width) // %4
441 : "r" (kCoefficientsRgbY) // %5
442 : "memory", "cc", CLOBBER
443#if defined(__SSE2__)
444 , "xmm0", "xmm1", "xmm2", "xmm3"
445#endif
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000446);
447}
448
fbarchard@google.comb6149762011-11-07 21:58:52 +0000449// 6 or 7 registers
450void OMITFP FastConvertYUVToARGBRow4_SSE2(const uint8* y_buf, // rdi
451 const uint8* u_buf, // rsi
452 const uint8* v_buf, // rdx
453 uint8* rgb_buf, // rcx
454 int width) { // r8
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000455 asm volatile(
fbarchard@google.comb6149762011-11-07 21:58:52 +0000456 YUVTORGB4
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000457 : "+r"(y_buf), // %0
458 "+r"(u_buf), // %1
459 "+r"(v_buf), // %2
460 "+r"(rgb_buf), // %3
fbarchard@google.comb6149762011-11-07 21:58:52 +0000461 "+rm"(width) // %4
462 : "r" (kCoefficientsRgbY) // %5
463 : "memory", "cc", CLOBBER
464#if defined(__SSE2__)
465 , "xmm0", "xmm1", "xmm2", "xmm3"
466#endif
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000467);
468}
469
fbarchard@google.comb6149762011-11-07 21:58:52 +0000470void OMITFP FastConvertYUVToBGRARow_SSE2(const uint8* y_buf, // rdi
471 const uint8* u_buf, // rsi
472 const uint8* v_buf, // rdx
473 uint8* rgb_buf, // rcx
474 int width) { // r8
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000475 asm volatile(
fbarchard@google.comb6149762011-11-07 21:58:52 +0000476 YUVTORGB
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000477 : "+r"(y_buf), // %0
478 "+r"(u_buf), // %1
479 "+r"(v_buf), // %2
480 "+r"(rgb_buf), // %3
fbarchard@google.comb6149762011-11-07 21:58:52 +0000481 "+rm"(width) // %4
482 : "r" (kCoefficientsBgraY) // %5
483 : "memory", "cc", CLOBBER
484#if defined(__SSE2__)
485 , "xmm0", "xmm1", "xmm2", "xmm3"
486#endif
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000487);
488}
489
fbarchard@google.comb6149762011-11-07 21:58:52 +0000490void OMITFP FastConvertYUVToABGRRow_SSE2(const uint8* y_buf, // rdi
491 const uint8* u_buf, // rsi
492 const uint8* v_buf, // rdx
493 uint8* rgb_buf, // rcx
494 int width) { // r8
495 asm volatile(
496 YUVTORGB
497 : "+r"(y_buf), // %0
498 "+r"(u_buf), // %1
499 "+r"(v_buf), // %2
500 "+r"(rgb_buf), // %3
501 "+rm"(width) // %4
502 : "r" (kCoefficientsAbgrY) // %5
503 : "memory", "cc", CLOBBER
504#if defined(__SSE2__)
505 , "xmm0", "xmm1", "xmm2", "xmm3"
506#endif
507);
508}
509
510// 6 registers
511void OMITFP FastConvertYUV444ToARGBRow_SSE2(const uint8* y_buf, // rdi
512 const uint8* u_buf, // rsi
513 const uint8* v_buf, // rdx
514 uint8* rgb_buf, // rcx
515 int width) { // r8
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000516 asm volatile(
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000517"1:"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000518 "movzb (%1),%%"REG_a"\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000519 "lea 1(%1),%1\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000520 "movq 2048(%5,%%"REG_a",8),%%xmm0\n"
521 "movzb (%2),%%"REG_a"\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000522 "lea 1(%2),%2\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000523 "movq 4096(%5,%%"REG_a",8),%%xmm1\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000524 "paddsw %%xmm1,%%xmm0\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000525 "movzb (%0),%%"REG_a"\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000526 "lea 1(%0),%0\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000527 "movq 0(%5,%%"REG_a",8),%%xmm2\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000528 "paddsw %%xmm0,%%xmm2\n"
529 "shufps $0x44,%%xmm2,%%xmm2\n"
530 "psraw $0x6,%%xmm2\n"
531 "packuswb %%xmm2,%%xmm2\n"
532 "movd %%xmm2,0x0(%3)\n"
533 "lea 4(%3),%3\n"
534 "sub $0x1,%4\n"
535 "ja 1b\n"
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000536 : "+r"(y_buf), // %0
537 "+r"(u_buf), // %1
538 "+r"(v_buf), // %2
539 "+r"(rgb_buf), // %3
fbarchard@google.comb6149762011-11-07 21:58:52 +0000540 "+rm"(width) // %4
541 : "r" (kCoefficientsRgbY) // %5
542 : "memory", "cc", "%"REG_a
543#if defined(__SSE2__)
544 , "xmm0", "xmm1", "xmm2"
545#endif
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000546);
547}
548
fbarchard@google.comb6149762011-11-07 21:58:52 +0000549// 5 registers
550void FastConvertYToARGBRow_SSE2(const uint8* y_buf, // rdi
551 uint8* rgb_buf, // rcx
552 int width) { // r8
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000553 asm volatile(
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000554"1:"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000555 "movzb (%0),%%"REG_a"\n"
556 "movzb 0x1(%0),%%"REG_d"\n"
557 "movq (%3,%%"REG_a",8),%%xmm2\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000558 "lea 2(%0),%0\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000559 "movhps (%3,%%"REG_d",8),%%xmm2\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000560 "psraw $0x6,%%xmm2\n"
561 "packuswb %%xmm2,%%xmm2\n"
562 "movq %%xmm2,0x0(%1)\n"
563 "lea 8(%1),%1\n"
564 "sub $0x2,%2\n"
565 "ja 1b\n"
fbarchard@google.com3faa0f12011-10-20 06:04:16 +0000566 : "+r"(y_buf), // %0
567 "+r"(rgb_buf), // %1
fbarchard@google.comb6149762011-11-07 21:58:52 +0000568 "+rm"(width) // %2
569 : "r" (kCoefficientsRgbY) // %3
570 : "memory", "cc", "%"REG_a, "%"REG_d
571#if defined(__SSE2__)
572 , "xmm0", "xmm1", "xmm2"
573#endif
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000574);
575}
576
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000577#endif
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000578
fbarchard@google.comb6149762011-11-07 21:58:52 +0000579#ifdef HAS_FASTCONVERTYUVTOARGBROW_MMX
580// 32 bit mmx gcc version
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000581
fbarchard@google.comb6149762011-11-07 21:58:52 +0000582#ifdef OSX
583#define UNDERSCORE "_"
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000584#else
fbarchard@google.comb6149762011-11-07 21:58:52 +0000585#define UNDERSCORE ""
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000586#endif
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000587
fbarchard@google.comb6149762011-11-07 21:58:52 +0000588void FastConvertYUVToARGBRow_MMX(const uint8* y_buf,
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000589 const uint8* u_buf,
590 const uint8* v_buf,
591 uint8* rgb_buf,
592 int width);
593 asm(
594 ".text\n"
595#if defined(OSX) || defined(IOS)
fbarchard@google.comb6149762011-11-07 21:58:52 +0000596 ".globl _FastConvertYUVToARGBRow_MMX\n"
597"_FastConvertYUVToARGBRow_MMX:\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000598#else
fbarchard@google.comb6149762011-11-07 21:58:52 +0000599 ".global FastConvertYUVToARGBRow_MMX\n"
600"FastConvertYUVToARGBRow_MMX:\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000601#endif
602 "pusha\n"
603 "mov 0x24(%esp),%edx\n"
604 "mov 0x28(%esp),%edi\n"
605 "mov 0x2c(%esp),%esi\n"
606 "mov 0x30(%esp),%ebp\n"
607 "mov 0x34(%esp),%ecx\n"
608
609"1:"
610 "movzbl (%edi),%eax\n"
611 "lea 1(%edi),%edi\n"
612 "movzbl (%esi),%ebx\n"
613 "lea 1(%esi),%esi\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000614 "movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000615 "movzbl (%edx),%eax\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000616 "paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
617 "movzbl 0x1(%edx),%ebx\n"
618 "movq " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm1\n"
619 "lea 2(%edx),%edx\n"
620 "movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm2\n"
621 "paddsw %mm0,%mm1\n"
622 "paddsw %mm0,%mm2\n"
623 "psraw $0x6,%mm1\n"
624 "psraw $0x6,%mm2\n"
625 "packuswb %mm2,%mm1\n"
626 "movq %mm1,0x0(%ebp)\n"
627 "lea 8(%ebp),%ebp\n"
628 "sub $0x2,%ecx\n"
629 "ja 1b\n"
630 "popa\n"
631 "ret\n"
632);
633
634void FastConvertYUVToBGRARow_MMX(const uint8* y_buf,
635 const uint8* u_buf,
636 const uint8* v_buf,
637 uint8* rgb_buf,
638 int width);
639 asm(
640 ".text\n"
641#if defined(OSX) || defined(IOS)
642 ".globl _FastConvertYUVToBGRARow_MMX\n"
643"_FastConvertYUVToBGRARow_MMX:\n"
644#else
645 ".global FastConvertYUVToBGRARow_MMX\n"
646"FastConvertYUVToBGRARow_MMX:\n"
647#endif
648 "pusha\n"
649 "mov 0x24(%esp),%edx\n"
650 "mov 0x28(%esp),%edi\n"
651 "mov 0x2c(%esp),%esi\n"
652 "mov 0x30(%esp),%ebp\n"
653 "mov 0x34(%esp),%ecx\n"
654
655"1:"
656 "movzbl (%edi),%eax\n"
657 "lea 1(%edi),%edi\n"
658 "movzbl (%esi),%ebx\n"
659 "lea 1(%esi),%esi\n"
660 "movq " UNDERSCORE "kCoefficientsBgraY+2048(,%eax,8),%mm0\n"
661 "movzbl (%edx),%eax\n"
662 "paddsw " UNDERSCORE "kCoefficientsBgraY+4096(,%ebx,8),%mm0\n"
663 "movzbl 0x1(%edx),%ebx\n"
664 "movq " UNDERSCORE "kCoefficientsBgraY(,%eax,8),%mm1\n"
665 "lea 2(%edx),%edx\n"
666 "movq " UNDERSCORE "kCoefficientsBgraY(,%ebx,8),%mm2\n"
667 "paddsw %mm0,%mm1\n"
668 "paddsw %mm0,%mm2\n"
669 "psraw $0x6,%mm1\n"
670 "psraw $0x6,%mm2\n"
671 "packuswb %mm2,%mm1\n"
672 "movq %mm1,0x0(%ebp)\n"
673 "lea 8(%ebp),%ebp\n"
674 "sub $0x2,%ecx\n"
675 "ja 1b\n"
676 "popa\n"
677 "ret\n"
678);
679
680void FastConvertYUVToABGRRow_MMX(const uint8* y_buf,
681 const uint8* u_buf,
682 const uint8* v_buf,
683 uint8* rgb_buf,
684 int width);
685 asm(
686 ".text\n"
687#if defined(OSX) || defined(IOS)
688 ".globl _FastConvertYUVToABGRRow_MMX\n"
689"_FastConvertYUVToABGRRow_MMX:\n"
690#else
691 ".global FastConvertYUVToABGRRow_MMX\n"
692"FastConvertYUVToABGRRow_MMX:\n"
693#endif
694 "pusha\n"
695 "mov 0x24(%esp),%edx\n"
696 "mov 0x28(%esp),%edi\n"
697 "mov 0x2c(%esp),%esi\n"
698 "mov 0x30(%esp),%ebp\n"
699 "mov 0x34(%esp),%ecx\n"
700
701"1:"
702 "movzbl (%edi),%eax\n"
703 "lea 1(%edi),%edi\n"
704 "movzbl (%esi),%ebx\n"
705 "lea 1(%esi),%esi\n"
706 "movq " UNDERSCORE "kCoefficientsAbgrY+2048(,%eax,8),%mm0\n"
707 "movzbl (%edx),%eax\n"
708 "paddsw " UNDERSCORE "kCoefficientsAbgrY+4096(,%ebx,8),%mm0\n"
709 "movzbl 0x1(%edx),%ebx\n"
710 "movq " UNDERSCORE "kCoefficientsAbgrY(,%eax,8),%mm1\n"
711 "lea 2(%edx),%edx\n"
712 "movq " UNDERSCORE "kCoefficientsAbgrY(,%ebx,8),%mm2\n"
713 "paddsw %mm0,%mm1\n"
714 "paddsw %mm0,%mm2\n"
715 "psraw $0x6,%mm1\n"
716 "psraw $0x6,%mm2\n"
717 "packuswb %mm2,%mm1\n"
718 "movq %mm1,0x0(%ebp)\n"
719 "lea 8(%ebp),%ebp\n"
720 "sub $0x2,%ecx\n"
721 "ja 1b\n"
722 "popa\n"
723 "ret\n"
724);
725
726void FastConvertYUV444ToARGBRow_MMX(const uint8* y_buf,
727 const uint8* u_buf,
728 const uint8* v_buf,
729 uint8* rgb_buf,
730 int width);
731 asm(
732 ".text\n"
733#if defined(OSX) || defined(IOS)
734 ".globl _FastConvertYUV444ToARGBRow_MMX\n"
735"_FastConvertYUV444ToARGBRow_MMX:\n"
736#else
737 ".global FastConvertYUV444ToARGBRow_MMX\n"
738"FastConvertYUV444ToARGBRow_MMX:\n"
739#endif
740 "pusha\n"
741 "mov 0x24(%esp),%edx\n"
742 "mov 0x28(%esp),%edi\n"
743 "mov 0x2c(%esp),%esi\n"
744 "mov 0x30(%esp),%ebp\n"
745 "mov 0x34(%esp),%ecx\n"
746
747"1:"
748 "movzbl (%edi),%eax\n"
749 "lea 1(%edi),%edi\n"
750 "movzbl (%esi),%ebx\n"
751 "lea 1(%esi),%esi\n"
752 "movq " UNDERSCORE "kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
753 "movzbl (%edx),%eax\n"
754 "paddsw " UNDERSCORE "kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000755 "lea 1(%edx),%edx\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000756 "paddsw " UNDERSCORE "kCoefficientsRgbY(,%eax,8),%mm0\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000757 "psraw $0x6,%mm0\n"
758 "packuswb %mm0,%mm0\n"
759 "movd %mm0,0x0(%ebp)\n"
760 "lea 4(%ebp),%ebp\n"
761 "sub $0x1,%ecx\n"
762 "ja 1b\n"
763 "popa\n"
764 "ret\n"
765);
766
fbarchard@google.comb6149762011-11-07 21:58:52 +0000767void FastConvertYToARGBRow_MMX(const uint8* y_buf,
768 uint8* rgb_buf,
769 int width);
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000770 asm(
771 ".text\n"
772#if defined(OSX) || defined(IOS)
fbarchard@google.comb6149762011-11-07 21:58:52 +0000773 ".globl _FastConvertYToARGBRow_MMX\n"
774"_FastConvertYToARGBRow_MMX:\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000775#else
fbarchard@google.comb6149762011-11-07 21:58:52 +0000776 ".global FastConvertYToARGBRow_MMX\n"
777"FastConvertYToARGBRow_MMX:\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000778#endif
779 "push %ebx\n"
780 "mov 0x8(%esp),%eax\n"
781 "mov 0xc(%esp),%edx\n"
782 "mov 0x10(%esp),%ecx\n"
783
784"1:"
785 "movzbl (%eax),%ebx\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000786 "movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm0\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000787 "psraw $0x6,%mm0\n"
788 "movzbl 0x1(%eax),%ebx\n"
fbarchard@google.comb6149762011-11-07 21:58:52 +0000789 "movq " UNDERSCORE "kCoefficientsRgbY(,%ebx,8),%mm1\n"
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000790 "psraw $0x6,%mm1\n"
791 "packuswb %mm1,%mm0\n"
792 "lea 0x2(%eax),%eax\n"
793 "movq %mm0,(%edx)\n"
794 "lea 0x8(%edx),%edx\n"
795 "sub $0x2,%ecx\n"
796 "ja 1b\n"
797 "pop %ebx\n"
798 "ret\n"
799);
800
fbarchard@google.comb6149762011-11-07 21:58:52 +0000801#endif
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000802
frkoenig@google.come5185422011-11-07 23:07:57 +0000803#ifdef HAS_ARGBTOYROW_SSSE3
fbarchard@google.comb6149762011-11-07 21:58:52 +0000804void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
805 SIMD_ALIGNED(uint8 row[kMaxStride]);
806 ABGRToARGBRow_SSSE3(src_argb, row, pix);
807 ARGBToYRow_SSSE3(row, dst_y, pix);
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000808}
809
fbarchard@google.comb6149762011-11-07 21:58:52 +0000810void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) {
811 SIMD_ALIGNED(uint8 row[kMaxStride]);
812 BGRAToARGBRow_SSSE3(src_argb, row, pix);
813 ARGBToYRow_SSSE3(row, dst_y, pix);
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000814}
frkoenig@google.come5185422011-11-07 23:07:57 +0000815#endif
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000816
fbarchard@google.comb6149762011-11-07 21:58:52 +0000817#ifdef HAS_ARGBTOUVROW_SSSE3
818void ABGRToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
819 uint8* dst_u, uint8* dst_v, int pix) {
820 SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
821 ABGRToARGBRow_SSSE3(src_argb, row, pix);
822 ABGRToARGBRow_SSSE3(src_argb + src_stride_argb, row + kMaxStride, pix);
823 ARGBToUVRow_SSSE3(row, kMaxStride, dst_u, dst_v, pix);
mikhal@webrtc.org43575c82011-10-12 18:49:21 +0000824}
825
fbarchard@google.comb6149762011-11-07 21:58:52 +0000826void BGRAToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
827 uint8* dst_u, uint8* dst_v, int pix) {
828 SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
829 BGRAToARGBRow_SSSE3(src_argb, row, pix);
830 BGRAToARGBRow_SSSE3(src_argb + src_stride_argb, row + kMaxStride, pix);
831 ARGBToUVRow_SSSE3(row, kMaxStride, dst_u, dst_v, pix);
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000832}
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000833#endif
fbarchard@google.com9394ed92011-10-31 21:36:47 +0000834
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000835} // extern "C"