blob: 61d89a167736ff90dead817bd29342a9cc7a4723 [file] [log] [blame]
mikhal@webrtc.orgaed1cc92011-09-28 00:06:25 +00001/*
2 * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "row.h"
12
13extern "C" {
14
15#if defined(__x86_64__)
16
17// 64 bit linux gcc version
18
19void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
20 const uint8* u_buf, // rsi
21 const uint8* v_buf, // rdx
22 uint8* rgb_buf, // rcx
23 int width) { // r8
24 asm(
25"1:"
26 "movzb (%1),%%r10\n"
27 "lea 1(%1),%1\n"
28 "movzb (%2),%%r11\n"
29 "lea 1(%2),%2\n"
30 "movq 2048(%5,%%r10,8),%%xmm0\n"
31 "movzb (%0),%%r10\n"
32 "movq 4096(%5,%%r11,8),%%xmm1\n"
33 "movzb 0x1(%0),%%r11\n"
34 "paddsw %%xmm1,%%xmm0\n"
35 "movq (%5,%%r10,8),%%xmm2\n"
36 "lea 2(%0),%0\n"
37 "movq (%5,%%r11,8),%%xmm3\n"
38 "paddsw %%xmm0,%%xmm2\n"
39 "paddsw %%xmm0,%%xmm3\n"
40 "shufps $0x44,%%xmm3,%%xmm2\n"
41 "psraw $0x6,%%xmm2\n"
42 "packuswb %%xmm2,%%xmm2\n"
43 "movq %%xmm2,0x0(%3)\n"
44 "lea 8(%3),%3\n"
45 "sub $0x2,%4\n"
46 "ja 1b\n"
47 :
48 : "r"(y_buf), // %0
49 "r"(u_buf), // %1
50 "r"(v_buf), // %2
51 "r"(rgb_buf), // %3
52 "r"(width), // %4
53 "r" (_kCoefficientsRgbY) // %5
54 : "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
55);
56}
57
58#elif defined(__i386__)
59// 32 bit gcc version
60
61void FastConvertYUVToRGB32Row(const uint8* y_buf,
62 const uint8* u_buf,
63 const uint8* v_buf,
64 uint8* rgb_buf,
65 int width);
66 asm(
67 ".text\n"
68#if defined(OSX) || defined(IOS)
69 ".globl _FastConvertYUVToRGB32Row\n"
70"_FastConvertYUVToRGB32Row:\n"
71#else
72 ".global FastConvertYUVToRGB32Row\n"
73"FastConvertYUVToRGB32Row:\n"
74#endif
75 "pusha\n"
76 "mov 0x24(%esp),%edx\n"
77 "mov 0x28(%esp),%edi\n"
78 "mov 0x2c(%esp),%esi\n"
79 "mov 0x30(%esp),%ebp\n"
80 "mov 0x34(%esp),%ecx\n"
81
82"1:"
83 "movzbl (%edi),%eax\n"
84 "lea 1(%edi),%edi\n"
85 "movzbl (%esi),%ebx\n"
86 "lea 1(%esi),%esi\n"
87 "movq _kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
88 "movzbl (%edx),%eax\n"
89 "paddsw _kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
90 "movzbl 0x1(%edx),%ebx\n"
91 "movq _kCoefficientsRgbY(,%eax,8),%mm1\n"
92 "lea 2(%edx),%edx\n"
93 "movq _kCoefficientsRgbY(,%ebx,8),%mm2\n"
94 "paddsw %mm0,%mm1\n"
95 "paddsw %mm0,%mm2\n"
96 "psraw $0x6,%mm1\n"
97 "psraw $0x6,%mm2\n"
98 "packuswb %mm2,%mm1\n"
99 "movntq %mm1,0x0(%ebp)\n"
100 "lea 8(%ebp),%ebp\n"
101 "sub $0x2,%ecx\n"
102 "ja 1b\n"
103 "popa\n"
104 "ret\n"
105);
106
107#else
108// C reference code that mimic the YUV assembly.
109#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
110#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
111 (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
112
113static inline void YuvPixel(uint8 y,
114 uint8 u,
115 uint8 v,
116 uint8* rgb_buf) {
117
118 int b = _kCoefficientsRgbY[256+u][0];
119 int g = _kCoefficientsRgbY[256+u][1];
120 int r = _kCoefficientsRgbY[256+u][2];
121 int a = _kCoefficientsRgbY[256+u][3];
122
123 b = paddsw(b, _kCoefficientsRgbY[512+v][0]);
124 g = paddsw(g, _kCoefficientsRgbY[512+v][1]);
125 r = paddsw(r, _kCoefficientsRgbY[512+v][2]);
126 a = paddsw(a, _kCoefficientsRgbY[512+v][3]);
127
128 b = paddsw(b, _kCoefficientsRgbY[y][0]);
129 g = paddsw(g, _kCoefficientsRgbY[y][1]);
130 r = paddsw(r, _kCoefficientsRgbY[y][2]);
131 a = paddsw(a, _kCoefficientsRgbY[y][3]);
132
133 b >>= 6;
134 g >>= 6;
135 r >>= 6;
136 a >>= 6;
137
138 *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
139 (packuswb(g) << 8) |
140 (packuswb(r) << 16) |
141 (packuswb(a) << 24);
142}
143
144void FastConvertYUVToRGB32Row(const uint8* y_buf,
145 const uint8* u_buf,
146 const uint8* v_buf,
147 uint8* rgb_buf,
148 int width) {
149 for (int x = 0; x < width; x += 2) {
150 uint8 u = u_buf[x >> 1];
151 uint8 v = v_buf[x >> 1];
152 uint8 y0 = y_buf[x];
153 YuvPixel(y0, u, v, rgb_buf);
154 if ((x + 1) < width) {
155 uint8 y1 = y_buf[x + 1];
156 YuvPixel(y1, u, v, rgb_buf + 4);
157 }
158 rgb_buf += 8; // Advance 2 pixels.
159 }
160}
161#endif
162
163} // extern "C"