MergeUV backport to SSE2
BUG=135
TESTED=unitest I420ToNV12
Review URL: https://webrtc-codereview.appspot.com/943006
git-svn-id: http://libyuv.googlecode.com/svn/trunk@447 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 267cd4b..4c11d4f 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -2547,6 +2547,37 @@
}
#endif // HAS_SPLITUV_SSE2
+#ifdef HAS_MERGEUV_SSE2
+void MergeUV_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
+ int width) {
+ asm volatile (
+ "sub %0,%1 \n"
+ ".p2align 4 \n"
+ "1: \n"
+ "movdqa (%0),%%xmm0 \n"
+ "movdqa (%0,%1,1),%%xmm1 \n"
+ "lea 0x10(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "punpcklbw %%xmm1,%%xmm0 \n"
+ "punpckhbw %%xmm1,%%xmm2 \n"
+ "movdqa %%xmm0,(%2) \n"
+ "movdqa %%xmm2,0x10(%2) \n"
+ "lea 0x20(%2),%2 \n"
+ "sub $0x10,%3 \n"
+ "jg 1b \n"
+ : "+r"(src_u), // %0
+ "+r"(src_v), // %1
+ "+r"(dst_uv), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc"
+#if defined(__SSE2__)
+ , "xmm0", "xmm1", "xmm2"
+#endif
+ );
+}
+#endif // HAS_MERGEUV_SSE2
+
#ifdef HAS_COPYROW_SSE2
void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
asm volatile (