Add 2 ARGB Images together and store to destination
BUG=175
TEST=Add unittest
Review URL: https://webrtc-codereview.appspot.com/1049004
git-svn-id: http://libyuv.googlecode.com/svn/trunk@543 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index c710241..04be0be 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -3948,10 +3948,10 @@
"movdqa %%xmm0,(%0,%1,1) \n"
"lea 0x10(%0),%0 \n"
"jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "r"(value) // %3
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "r"(value) // %3
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2"
@@ -3961,7 +3961,7 @@
#endif // HAS_ARGBSHADEROW_SSE2
#ifdef HAS_ARGBMULTIPLYROW_SSE2
-// Multiple 2 rows of ARGB pixels together, 4 pixels at a time.
+// Multiply 2 rows of ARGB pixels together, 4 pixels at a time.
// Aligned to 16 bytes.
void ARGBMultiplyRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
uint8* dst_argb, int width) {
@@ -3988,8 +3988,8 @@
"movdqa %%xmm0,(%0,%2,1) \n"
"lea 0x10(%0),%0 \n"
"jg 1b \n"
- : "+r"(src_argb0), // %0
- "+r"(src_argb1), // %1
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
"+r"(dst_argb), // %2
"+r"(width) // %3
:
@@ -4001,6 +4001,39 @@
}
#endif // HAS_ARGBMULTIPLYROW_SSE2
+#ifdef HAS_ARGBADDROW_SSE2
+// Add 2 rows of ARGB pixels together, 4 pixels at a time.
+// Aligned to 16 bytes.
+void ARGBAddRow_SSE2(const uint8* src_argb0, const uint8* src_argb1,
+ uint8* dst_argb, int width) {
+ asm volatile (
+ "pxor %%xmm5,%%xmm5 \n"
+ "sub %0,%1 \n"
+ "sub %0,%2 \n"
+
+ // 4 pixel loop.
+ ".p2align 4 \n"
+ "1: \n"
+ "movdqa (%0),%%xmm0 \n"
+ "movdqa (%0,%1),%%xmm1 \n"
+ "paddusb %%xmm1,%%xmm0 \n"
+ "sub $0x4,%3 \n"
+ "movdqa %%xmm0,(%0,%2,1) \n"
+ "lea 0x10(%0),%0 \n"
+ "jg 1b \n"
+ : "+r"(src_argb0), // %0
+ "+r"(src_argb1), // %1
+ "+r"(dst_argb), // %2
+ "+r"(width) // %3
+ :
+ : "memory", "cc"
+#if defined(__SSE2__)
+ , "xmm0", "xmm1", "xmm5"
+#endif
+ );
+}
+#endif // HAS_ARGBADDROW_SSE2
+
#ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
// Creates a table of cumulative sums where each value is a sum of all values
// above and to the left of the value, inclusive of the value.