MergeUV backport to SSE2
BUG=135
TESTED=unitest I420ToNV12
Review URL: https://webrtc-codereview.appspot.com/943006

git-svn-id: http://libyuv.googlecode.com/svn/trunk@447 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 267cd4b..4c11d4f 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -2547,6 +2547,37 @@
 }
 #endif  // HAS_SPLITUV_SSE2
 
+#ifdef HAS_MERGEUV_SSE2
+void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+                  int width) {
+  asm volatile (
+    "sub       %0,%1                             \n"
+    ".p2align   4                                \n"
+  "1:                                            \n"
+    "movdqa    (%0),%%xmm0                       \n"
+    "movdqa    (%0,%1,1),%%xmm1                  \n"
+    "lea       0x10(%0),%0                       \n"
+    "movdqa    %%xmm0,%%xmm2                     \n"
+    "punpcklbw %%xmm1,%%xmm0                     \n"
+    "punpckhbw %%xmm1,%%xmm2                     \n"
+    "movdqa    %%xmm0,(%2)                       \n"
+    "movdqa    %%xmm2,0x10(%2)                   \n"
+    "lea       0x20(%2),%2                       \n"
+    "sub       $0x10,%3                          \n"
+    "jg        1b                                \n"
+  : "+r"(src_u),     // %0
+    "+r"(src_v),     // %1
+    "+r"(dst_uv),    // %2
+    "+r"(width)      // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2"
+#endif
+  );
+}
+#endif  // HAS_MERGEUV_SSE2
+
 #ifdef HAS_COPYROW_SSE2
 void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
   asm volatile (