shuffle functions so convert.h is all formats to I420 and convert_from.h is from I420 to all formats
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/395006

git-svn-id: http://libyuv.googlecode.com/svn/trunk@174 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index e0148b7..6fb3f3c 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -17,6 +17,9 @@
 extern "C" {
 #endif
 
+// This module is for GCC x86 and x64
+#if (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
+
 #ifdef __APPLE__
 #define CONST
 #else
@@ -816,7 +819,7 @@
     "lea       -0x10(%0),%0                    \n"
   "1:                                          \n"
     "movdqu    (%0,%2),%%xmm0                  \n"
-    "movdqu    %%xmm0,%%xmm1                   \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
     "psllw     $0x8,%%xmm0                     \n"
     "psrlw     $0x8,%%xmm1                     \n"
     "por       %%xmm1,%%xmm0                   \n"
@@ -839,6 +842,43 @@
 }
 #endif
 
+#ifdef HAS_SPLITUV_SSE2
+void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+  asm volatile (
+    "pcmpeqb    %%xmm5,%%xmm5                    \n"
+    "psrlw      $0x8,%%xmm5                      \n"
+    "sub        %1,%2                            \n"
+
+  "1:                                            \n"
+    "movdqa     (%0),%%xmm0                      \n"
+    "movdqa     0x10(%0),%%xmm1                  \n"
+    "lea        0x20(%0),%0                      \n"
+    "movdqa     %%xmm0,%%xmm2                    \n"
+    "movdqa     %%xmm1,%%xmm3                    \n"
+    "pand       %%xmm5,%%xmm0                    \n"
+    "pand       %%xmm5,%%xmm1                    \n"
+    "packuswb   %%xmm1,%%xmm0                    \n"
+    "psrlw      $0x8,%%xmm2                      \n"
+    "psrlw      $0x8,%%xmm3                      \n"
+    "packuswb   %%xmm3,%%xmm2                    \n"
+    "movdqa     %%xmm0,(%1)                      \n"
+    "movdqa     %%xmm2,(%1,%2)                   \n"
+    "lea        0x10(%1),%1                      \n"
+    "sub        $0x10,%3                         \n"
+    "ja         1b                               \n"
+    : "+r"(src_uv),     // %0
+      "+r"(dst_u),      // %1
+      "+r"(dst_v),      // %2
+      "+r"(pix)         // %3
+    :
+    : "memory", "cc"
+#if defined(__SSE2__)
+      , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
+#endif
+
 #ifdef HAS_YUY2TOYROW_SSE2
 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
   asm volatile (
@@ -1099,9 +1139,10 @@
 #endif
   );
 }
-
 #endif  // HAS_YUY2TOYROW_SSE2
 
+#endif  // defined(__x86_64__) || defined(__i386__)
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv