blob: 44c89dabdc337c8786c8a9d2296a36bb4a4133c1 [file] [log] [blame]
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +00001/*
2 * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "row.h"
12
13extern "C" {
14
15#if defined(__x86_64__)
16
17// 64 bit linux gcc version
18
19void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
20 const uint8* u_buf, // rsi
21 const uint8* v_buf, // rdx
22 uint8* rgb_buf, // rcx
23 int width) { // r8
24 asm(
25"1:"
26 "movzb (%1),%%r10\n"
27 "lea 1(%1),%1\n"
28 "movzb (%2),%%r11\n"
29 "lea 1(%2),%2\n"
30 "movq 2048(%5,%%r10,8),%%xmm0\n"
31 "movzb (%0),%%r10\n"
32 "movq 4096(%5,%%r11,8),%%xmm1\n"
33 "movzb 0x1(%0),%%r11\n"
34 "paddsw %%xmm1,%%xmm0\n"
35 "movq (%5,%%r10,8),%%xmm2\n"
36 "lea 2(%0),%0\n"
37 "movq (%5,%%r11,8),%%xmm3\n"
38 "paddsw %%xmm0,%%xmm2\n"
39 "paddsw %%xmm0,%%xmm3\n"
40 "shufps $0x44,%%xmm3,%%xmm2\n"
41 "psraw $0x6,%%xmm2\n"
42 "packuswb %%xmm2,%%xmm2\n"
43 "movq %%xmm2,0x0(%3)\n"
44 "lea 8(%3),%3\n"
45 "sub $0x2,%4\n"
46 "ja 1b\n"
47 :
48 : "r"(y_buf), // %0
49 "r"(u_buf), // %1
50 "r"(v_buf), // %2
51 "r"(rgb_buf), // %3
52 "r"(width), // %4
53 "r" (_kCoefficientsRgbY) // %5
54 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
55);
56}
57
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +000058void FastConvertYUV444ToRGB32Row(const uint8* y_buf, // rdi
59 const uint8* u_buf, // rsi
60 const uint8* v_buf, // rdx
61 uint8* rgb_buf, // rcx
62 int width) { // r8
63 asm(
64"1:"
65 "movzb (%1),%%r10\n"
66 "lea 1(%1),%1\n"
67 "movzb (%2),%%r11\n"
68 "lea 1(%2),%2\n"
69 "movq 2048(%5,%%r10,8),%%xmm0\n"
70 "movzb (%0),%%r10\n"
71 "movq 4096(%5,%%r11,8),%%xmm1\n"
72 "paddsw %%xmm1,%%xmm0\n"
73 "movq (%5,%%r10,8),%%xmm2\n"
74 "lea 1(%0),%0\n"
75 "paddsw %%xmm0,%%xmm2\n"
76 "shufps $0x44,%%xmm2,%%xmm2\n"
77 "psraw $0x6,%%xmm2\n"
78 "packuswb %%xmm2,%%xmm2\n"
79 "movd %%xmm2,0x0(%3)\n"
80 "lea 4(%3),%3\n"
81 "sub $0x1,%4\n"
82 "ja 1b\n"
83 :
84 : "r"(y_buf), // %0
85 "r"(u_buf), // %1
86 "r"(v_buf), // %2
87 "r"(rgb_buf), // %3
88 "r"(width), // %4
89 "r" (_kCoefficientsRgbY) // %5
90 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2"
91);
92}
93
94void FastConvertYToRGB32Row(const uint8* y_buf, // rdi
95 uint8* rgb_buf, // rcx
96 int width) { // r8
97 asm(
98"1:"
99 "movzb (%0),%%r10\n"
100 "movzb 0x1(%0),%%r11\n"
101 "movq (%3,%%r10,8),%%xmm2\n"
102 "lea 2(%0),%0\n"
103 "movq (%3,%%r11,8),%%xmm3\n"
104 "shufps $0x44,%%xmm3,%%xmm2\n"
105 "psraw $0x6,%%xmm2\n"
106 "packuswb %%xmm2,%%xmm2\n"
107 "movq %%xmm2,0x0(%1)\n"
108 "lea 8(%1),%1\n"
109 "sub $0x2,%2\n"
110 "ja 1b\n"
111 :
112 : "r"(y_buf), // %0
113 "r"(rgb_buf), // %1
114 "r"(width), // %2
115 "r" (_kCoefficientsRgbY) // %3
116 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
117);
118}
119
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000120#elif defined(__i386__)
121// 32 bit gcc version
122
123void FastConvertYUVToRGB32Row(const uint8* y_buf,
124 const uint8* u_buf,
125 const uint8* v_buf,
126 uint8* rgb_buf,
127 int width);
128 asm(
129 ".text\n"
130#if defined(OSX) || defined(IOS)
131 ".globl _FastConvertYUVToRGB32Row\n"
132"_FastConvertYUVToRGB32Row:\n"
133#else
134 ".global FastConvertYUVToRGB32Row\n"
135"FastConvertYUVToRGB32Row:\n"
136#endif
137 "pusha\n"
138 "mov 0x24(%esp),%edx\n"
139 "mov 0x28(%esp),%edi\n"
140 "mov 0x2c(%esp),%esi\n"
141 "mov 0x30(%esp),%ebp\n"
142 "mov 0x34(%esp),%ecx\n"
143
144"1:"
145 "movzbl (%edi),%eax\n"
146 "lea 1(%edi),%edi\n"
147 "movzbl (%esi),%ebx\n"
148 "lea 1(%esi),%esi\n"
149 "movq _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
150 "movzbl (%edx),%eax\n"
151 "paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
152 "movzbl 0x1(%edx),%ebx\n"
153 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n"
154 "lea 2(%edx),%edx\n"
155 "movq _kCoefficientsRgbY(,%ebx,8),%mm2\n"
156 "paddsw %mm0,%mm1\n"
157 "paddsw %mm0,%mm2\n"
158 "psraw $0x6,%mm1\n"
159 "psraw $0x6,%mm2\n"
160 "packuswb %mm2,%mm1\n"
161 "movntq %mm1,0x0(%ebp)\n"
162 "lea 8(%ebp),%ebp\n"
163 "sub $0x2,%ecx\n"
164 "ja 1b\n"
165 "popa\n"
166 "ret\n"
167);
168
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000169void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
170 const uint8* u_buf,
171 const uint8* v_buf,
172 uint8* rgb_buf,
173 int width);
174 asm(
175 ".text\n"
176#if defined(OSX) || defined(IOS)
177 ".globl _FastConvertYUV444ToRGB32Row\n"
178"_FastConvertYUV444ToRGB32Row:\n"
179#else
180 ".global FastConvertYUV444ToRGB32Row\n"
181"FastConvertYUV444ToRGB32Row:\n"
182#endif
183 "pusha\n"
184 "mov 0x24(%esp),%edx\n"
185 "mov 0x28(%esp),%edi\n"
186 "mov 0x2c(%esp),%esi\n"
187 "mov 0x30(%esp),%ebp\n"
188 "mov 0x34(%esp),%ecx\n"
189
190"1:"
191 "movzbl (%edi),%eax\n"
192 "lea 1(%edi),%edi\n"
193 "movzbl (%esi),%ebx\n"
194 "lea 1(%esi),%esi\n"
195 "movq _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
196 "movzbl (%edx),%eax\n"
197 "paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
198 "lea 1(%edx),%edx\n"
199 "paddsw _kCoefficientsRgbY(,%eax,8),%mm0\n"
200 "psraw $0x6,%mm0\n"
201 "packuswb %mm0,%mm0\n"
202 "movd %mm0,0x0(%ebp)\n"
203 "lea 4(%ebp),%ebp\n"
204 "sub $0x1,%ecx\n"
205 "ja 1b\n"
206 "popa\n"
207 "ret\n"
208);
209
210void FastConvertYToRGB32Row(const uint8* y_buf,
211 uint8* rgb_buf,
212 int width);
213 asm(
214 ".text\n"
215#if defined(OSX) || defined(IOS)
216 ".globl _FastConvertYToRGB32Row\n"
217"_FastConvertYToRGB32Row:\n"
218#else
219 ".global FastConvertYToRGB32Row\n"
220"FastConvertYToRGB32Row:\n"
221#endif
222 "push %ebx\n"
223 "mov 0x8(%esp),%eax\n"
224 "mov 0xc(%esp),%edx\n"
225 "mov 0x10(%esp),%ecx\n"
226
227"1:"
228 "movzbl (%eax),%ebx\n"
229 "movq _kCoefficientsRgbY(,%ebx,8),%mm0\n"
230 "psraw $0x6,%mm0\n"
231 "movzbl 0x1(%eax),%ebx\n"
232 "movq _kCoefficientsRgbY(,%ebx,8),%mm1\n"
233 "psraw $0x6,%mm1\n"
234 "packuswb %mm1,%mm0\n"
235 "lea 0x2(%eax),%eax\n"
236 "movq %mm0,(%edx)\n"
237 "lea 0x8(%edx),%edx\n"
238 "sub $0x2,%ecx\n"
239 "ja 1b\n"
240 "pop %ebx\n"
241 "ret\n"
242);
243
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000244#else
245// C reference code that mimic the YUV assembly.
246#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
247#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
248 (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
249
250static inline void YuvPixel(uint8 y,
251 uint8 u,
252 uint8 v,
253 uint8* rgb_buf) {
254
255 int b = _kCoefficientsRgbY[256+u][0];
256 int g = _kCoefficientsRgbY[256+u][1];
257 int r = _kCoefficientsRgbY[256+u][2];
258 int a = _kCoefficientsRgbY[256+u][3];
259
260 b = paddsw(b, _kCoefficientsRgbY[512+v][0]);
261 g = paddsw(g, _kCoefficientsRgbY[512+v][1]);
262 r = paddsw(r, _kCoefficientsRgbY[512+v][2]);
263 a = paddsw(a, _kCoefficientsRgbY[512+v][3]);
264
265 b = paddsw(b, _kCoefficientsRgbY[y][0]);
266 g = paddsw(g, _kCoefficientsRgbY[y][1]);
267 r = paddsw(r, _kCoefficientsRgbY[y][2]);
268 a = paddsw(a, _kCoefficientsRgbY[y][3]);
269
270 b >>= 6;
271 g >>= 6;
272 r >>= 6;
273 a >>= 6;
274
275 *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
276 (packuswb(g) << 8) |
277 (packuswb(r) << 16) |
278 (packuswb(a) << 24);
279}
280
281void FastConvertYUVToRGB32Row(const uint8* y_buf,
282 const uint8* u_buf,
283 const uint8* v_buf,
284 uint8* rgb_buf,
285 int width) {
286 for (int x = 0; x < width; x += 2) {
287 uint8 u = u_buf[x >> 1];
288 uint8 v = v_buf[x >> 1];
289 uint8 y0 = y_buf[x];
290 YuvPixel(y0, u, v, rgb_buf);
291 if ((x + 1) < width) {
292 uint8 y1 = y_buf[x + 1];
293 YuvPixel(y1, u, v, rgb_buf + 4);
294 }
295 rgb_buf += 8; // Advance 2 pixels.
296 }
297}
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000298
mikhal@webrtc.org120d5e72011-10-07 17:57:17 +0000299void FastConvertYUV444ToRGB32Row(const uint8* y_buf,
300 const uint8* u_buf,
301 const uint8* v_buf,
302 uint8* rgb_buf,
303 int width) {
304 for (int x = 0; x < width; ++x) {
305 uint8 u = u_buf[x];
306 uint8 v = v_buf[x];
307 uint8 y = y_buf[x];
308 YuvPixel(y, u, v, rgb_buf);
309 rgb_buf += 4; // Advance 1 pixel.
310 }
311}
312
313void FastConvertYToRGB32Row(const uint8* y_buf,
314 uint8* rgb_buf,
315 int width) {
316 for (int x = 0; x < width; ++x) {
317 uint8 y = y_buf[x];
318 YuvPixel(y, 128, 128, rgb_buf);
319 rgb_buf += 4; // Advance 1 pixel.
320 }
321}
322
323#endif
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +0000324} // extern "C"