CopyRow instead of memcpy for copying planes
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/396008
git-svn-id: http://libyuv.googlecode.com/svn/trunk@175 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 6fb3f3c..9a1770e 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -848,7 +848,6 @@
"pcmpeqb %%xmm5,%%xmm5 \n"
"psrlw $0x8,%%xmm5 \n"
"sub %1,%2 \n"
-
"1: \n"
"movdqa (%0),%%xmm0 \n"
"movdqa 0x10(%0),%%xmm1 \n"
@@ -879,6 +878,45 @@
}
#endif
+#ifdef HAS_COPYROW_SSE2
+void CopyRow_SSE2(const uint8* src, uint8* dst, int count) {
+ asm volatile (
+ "sub %0,%1 \n"
+ "1: \n"
+ "movdqa (%0),%%xmm0 \n"
+ "movdqa 0x10(%0),%%xmm1 \n"
+ "movdqa %%xmm0,(%0,%1) \n"
+ "movdqa %%xmm1,0x10(%0,%1) \n"
+ "lea 0x20(%0),%0 \n"
+ "sub $0x20,%2 \n"
+ "ja 1b \n"
+ : "+r"(src), // %0
+ "+r"(dst), // %1
+ "+r"(count) // %2
+ :
+ : "memory", "cc"
+#if defined(__SSE2__)
+ , "xmm0", "xmm1"
+#endif
+ );
+}
+#endif // HAS_COPYROW_SSE2
+
+#ifdef HAS_COPYROW_X86
+void CopyRow_X86(const uint8* src, uint8* dst, int width) {
+ size_t width_tmp = static_cast<size_t>(width);
+ asm volatile (
+ "shr $0x2,%2 \n"
+ "rep movsl \n"
+ : "+S"(src), // %0
+ "+D"(dst), // %1
+ "+c"(width_tmp) // %2
+ :
+ : "memory", "cc"
+ );
+}
+#endif
+
#ifdef HAS_YUY2TOYROW_SSE2
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
asm volatile (