Move Neon source to its own files.
BUG=none
TEST=none
Review URL: https://webrtc-codereview.appspot.com/860009
git-svn-id: http://libyuv.googlecode.com/svn/trunk@396 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 65416c0..33149da 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -741,9 +741,9 @@
}
// TODO(fbarchard): pass xmm constants to single block of assembly.
-// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
-// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
-// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around
+// fpic on GCC 4.2 for OSX runs out of GPR registers. "m" effectively takes
+// 3 registers - ebx, ebp and eax. "m" can be passed with 3 normal registers,
+// or 4 if stack frame is disabled. Doing 2 assembly blocks is a work around
// and considered unsafe.
void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) {
@@ -2143,6 +2143,34 @@
}
#endif // HAS_COPYROW_X86
+#ifdef HAS_SETROW_X86
+void SetRow8_X86(uint8* dst, uint32 v32, int width) {
+ size_t width_tmp = static_cast<size_t>(width);
+ asm volatile (
+ "shr $0x2,%1 \n"
+ "rep stosl \n"
+ : "+D"(dst), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v32) // %2
+ : "memory", "cc");
+}
+
+void SetRows32_X86(uint8* dst, uint32 v32, int width,
+ int dst_stride, int height) {
+ for (int y = 0; y < height; ++y) {
+ size_t width_tmp = static_cast<size_t>(width);
+ uint32* d = reinterpret_cast<uint32*>(dst);
+ asm volatile (
+ "rep stosl \n"
+ : "+D"(d), // %0
+ "+c"(width_tmp) // %1
+ : "a"(v32) // %2
+ : "memory", "cc");
+ dst += dst_stride;
+ }
+}
+#endif // HAS_SETROW_X86
+
#ifdef HAS_YUY2TOYROW_SSE2
void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix) {
asm volatile (
@@ -2998,7 +3026,7 @@
#endif // HAS_ARGBUNATTENUATEROW_SSE2
#ifdef HAS_ARGBGRAYROW_SSSE3
-// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
+// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
CONST vec8 kARGBToGray = {
14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
};
@@ -3455,7 +3483,7 @@
// TODO(fbarchard): Investigate why 4 pixels is slower than 2 on Core2.
// Copy ARGB pixels from source image with slope to a row of destination.
// Caveat - in 64 bit, movd is used with 64 bit gpr due to Mac gcc producing
-// an error if movq is used. movd %%xmm0,%1
+// an error if movq is used. movd %%xmm0,%1
LIBYUV_API
void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,