SplitUV_Any variations for all CPUS.
BUG=126
TEST=convert tests NV12 with alignments
Review URL: https://webrtc-codereview.appspot.com/896007

git-svn-id: http://libyuv.googlecode.com/svn/trunk@426 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 159a790..267cd4b 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -2480,7 +2480,7 @@
     "pcmpeqb    %%xmm5,%%xmm5                    \n"
     "psrlw      $0x8,%%xmm5                      \n"
     "sub        %1,%2                            \n"
-    ".p2align  4                               \n"
+    ".p2align   4                                \n"
   "1:                                            \n"
     "movdqa     (%0),%%xmm0                      \n"
     "movdqa     0x10(%0),%%xmm1                  \n"
@@ -2509,6 +2509,42 @@
 #endif
   );
 }
+
+void SplitUV_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
+                            int pix) {
+  asm volatile (
+    "pcmpeqb    %%xmm5,%%xmm5                    \n"
+    "psrlw      $0x8,%%xmm5                      \n"
+    "sub        %1,%2                            \n"
+    ".p2align   4                                \n"
+  "1:                                            \n"
+    "movdqu     (%0),%%xmm0                      \n"
+    "movdqu     0x10(%0),%%xmm1                  \n"
+    "lea        0x20(%0),%0                      \n"
+    "movdqa     %%xmm0,%%xmm2                    \n"
+    "movdqa     %%xmm1,%%xmm3                    \n"
+    "pand       %%xmm5,%%xmm0                    \n"
+    "pand       %%xmm5,%%xmm1                    \n"
+    "packuswb   %%xmm1,%%xmm0                    \n"
+    "psrlw      $0x8,%%xmm2                      \n"
+    "psrlw      $0x8,%%xmm3                      \n"
+    "packuswb   %%xmm3,%%xmm2                    \n"
+    "movdqu     %%xmm0,(%1)                      \n"
+    "movdqu     %%xmm2,(%1,%2)                   \n"
+    "lea        0x10(%1),%1                      \n"
+    "sub        $0x10,%3                         \n"
+    "jg         1b                               \n"
+  : "+r"(src_uv),     // %0
+    "+r"(dst_u),      // %1
+    "+r"(dst_v),      // %2
+    "+r"(pix)         // %3
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+  );
+}
 #endif  // HAS_SPLITUV_SSE2
 
 #ifdef HAS_COPYROW_SSE2