Sobel use G channel for consistency on all CPUs, better performance and full range of 0 to 255.
BUG=201
TESTED=out\release\libyuv_unittest --gtest_filter=*Sobel*
Review URL: https://webrtc-codereview.appspot.com/1225004

git-svn-id: http://libyuv.googlecode.com/svn/trunk@614 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 4e62b0c..34b24c5 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -4595,11 +4595,14 @@
     ".p2align  4                               \n"
   "1:                                          \n"
     "movdqa    (%0),%%xmm0                     \n"
-    "lea       0x10(%0),%0                     \n"
+    "movdqa    0x10(%0),%%xmm1                 \n"
+    "lea       0x20(%0),%0                     \n"
     "pshufb    %%xmm5,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movd      %%xmm0,(%1)                     \n"
-    "lea       0x4(%1),%1                      \n"
+    "pshufb    %%xmm5,%%xmm1                   \n"
+    "punpckldq xmm1, xmm0                      \n"
+    "sub       $0x8,%2                         \n"
+    "movq      %%xmm0,(%1)                     \n"
+    "lea       0x8(%1),%1                      \n"
     "jg        1b                              \n"
   : "+r"(src_argb),  // %0
     "+r"(dst_bayer), // %1
@@ -4607,7 +4610,7 @@
   : "g"(selector)    // %3
   : "memory", "cc"
 #if defined(__SSE2__)
-    , "xmm0", "xmm5"
+    , "xmm0", "xmm1", "xmm5"
 #endif
   );
 }