shuffle functions so convert.h is all formats to I420 and convert_from.h is from I420 to all formats
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/395006
git-svn-id: http://libyuv.googlecode.com/svn/trunk@174 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index e0148b7..6fb3f3c 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -17,6 +17,9 @@
extern "C" {
#endif
+// This module is for GCC x86 and x64
+#if (defined(__x86_64__) || defined(__i386__)) && !defined(YUV_DISABLE_ASM)
+
#ifdef __APPLE__
#define CONST
#else
@@ -816,7 +819,7 @@
"lea -0x10(%0),%0 \n"
"1: \n"
"movdqu (%0,%2),%%xmm0 \n"
- "movdqu %%xmm0,%%xmm1 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
"psllw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"por %%xmm1,%%xmm0 \n"
@@ -839,6 +842,43 @@
}
#endif
+#ifdef HAS_SPLITUV_SSE2
+void SplitUV_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) {
+ asm volatile (
+ "pcmpeqb %%xmm5,%%xmm5 \n"
+ "psrlw $0x8,%%xmm5 \n"
+ "sub %1,%2 \n"
+
+ "1: \n"
+ "movdqa (%0),%%xmm0 \n"
+ "movdqa 0x10(%0),%%xmm1 \n"
+ "lea 0x20(%0),%0 \n"
+ "movdqa %%xmm0,%%xmm2 \n"
+ "movdqa %%xmm1,%%xmm3 \n"
+ "pand %%xmm5,%%xmm0 \n"
+ "pand %%xmm5,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm0 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "psrlw $0x8,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "movdqa %%xmm0,(%1) \n"
+ "movdqa %%xmm2,(%1,%2) \n"
+ "lea 0x10(%1),%1 \n"
+ "sub $0x10,%3 \n"
+ "ja 1b \n"
+ : "+r"(src_uv), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(pix) // %3
+ :
+ : "memory", "cc"
+#if defined(__SSE2__)
+ , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
+ );
+}
+#endif
+
#ifdef HAS_YUY2TOYROW_SSE2
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
asm volatile (
@@ -1099,9 +1139,10 @@
#endif
);
}
-
#endif // HAS_YUY2TOYROW_SSE2
+#endif // defined(__x86_64__) || defined(__i386__)
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv