ARGBGray function for convertion a rectangle of ARGB to gray scale
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/582007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@269 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 9d9dbc5..340dd65 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -108,6 +108,11 @@
   2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
 };
 
+// Constant for ARGB color to gray scale.  0.11 * B + 0.59 * G + 0.30 * R
+CONST vec8 kARGBToGray = {
+  14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
+};
+
 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
   asm volatile (
     "pcmpeqb   %%xmm5,%%xmm5                   \n"
@@ -2527,6 +2532,53 @@
 }
 #endif  // HAS_ARGBUNATTENUATE_SSE2
 
+#ifdef HAS_ARGBGRAYROW_SSSE3
+// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
+void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
+  asm volatile (
+    "movdqa    %2,%%xmm4                       \n"
+    "pcmpeqb   %%xmm5,%%xmm5                   \n"
+    "pslld     $0x18,%%xmm5                    \n"
+    "pcmpeqb   %%xmm3,%%xmm3                   \n"
+    "psrld     $0x8,%%xmm3                     \n"
+
+  // 8 pixel loop                              \n"
+    ".p2align  4                               \n"
+  "1:                                          \n"
+    "movdqa    (%0),%%xmm0                     \n"
+    "movdqa    0x10(%0),%%xmm1                 \n"
+    "pmaddubsw %%xmm4,%%xmm0                   \n"
+    "pmaddubsw %%xmm4,%%xmm1                   \n"
+    "movdqa    (%0),%%xmm6                     \n"
+    "movdqa    0x10(%0),%%xmm7                 \n"
+    "pand      %%xmm5,%%xmm6                   \n"
+    "pand      %%xmm5,%%xmm7                   \n"
+    "phaddw    %%xmm1,%%xmm0                   \n"
+    "psrlw     $0x7,%%xmm0                     \n"
+    "packuswb  %%xmm0,%%xmm0                   \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklwd %%xmm0,%%xmm0                   \n"
+    "punpckhwd %%xmm1,%%xmm1                   \n"
+    "pand      %%xmm3,%%xmm0                   \n"
+    "pand      %%xmm3,%%xmm1                   \n"
+    "por       %%xmm6,%%xmm0                   \n"
+    "por       %%xmm7,%%xmm1                   \n"
+    "sub       $0x8,%1                         \n"
+    "movdqa    %%xmm0,(%0)                     \n"
+    "movdqa    %%xmm1,0x10(%0)                 \n"
+    "lea       0x20(%0),%0                     \n"
+    "jg        1b                              \n"
+  : "+r"(dst_argb),  // %0
+    "+r"(width)      // %1
+  : "m"(kARGBToGray)  // %2
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
+  );
+}
+#endif  // HAS_ARGBGRAYROW_SSSE3
 #endif  // defined(__x86_64__) || defined(__i386__)
 
 #ifdef __cplusplus