I420Mirror rewrite, I420Copy movdqa, I444To420, I420To422
Review URL: http://webrtc-codereview.appspot.com/267025

git-svn-id: http://libyuv.googlecode.com/svn/trunk@89 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 89b8c64..005efbb 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -634,4 +634,36 @@
 }
 #endif
 
+#ifdef HAS_REVERSE_ROW_SSSE3
+
+// Shuffle table for reversing the bytes.
+static const uvec8 kShuffleReverse = {
+  15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u
+};
+
+void ReverseRow_SSSE3(const uint8* src, uint8* dst, int width) {
+  intptr_t temp_width = static_cast<intptr_t>(width);
+  asm volatile (
+  "movdqa     %3,%%xmm5                        \n"
+  "lea        -0x10(%0,%2,1),%0                \n"
+"1:                                            \n"
+  "movdqa     (%0),%%xmm0                      \n"
+  "lea        -0x10(%0),%0                     \n"
+  "pshufb     %%xmm5,%%xmm0                    \n"
+  "movdqa     %%xmm0,(%1)                      \n"
+  "lea        0x10(%1),%1                      \n"
+  "sub        $0x10,%2                         \n"
+  "ja         1b                               \n"
+  : "+r"(src),  // %0
+    "+r"(dst),  // %1
+    "+r"(temp_width)  // %2
+  : "m"(kShuffleReverse) // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm5"
+#endif
+  );
+}
+#endif
+
 }  // extern "C"