ARGBSepia planarfunction for converting a region of ARGB image to Sepia tone. ARGBGray optimized weaving of alpha value. 551 ms from 568 ms.
BUG=none
TEST=libyuv_unittest --gtest_filter=*ARGBSepia*
Review URL: https://webrtc-codereview.appspot.com/573008
git-svn-id: http://libyuv.googlecode.com/svn/trunk@270 16f28f9a-4ce2-e073-06de-1de4eb20be90
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 340dd65..1e61e1f 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc
@@ -108,11 +108,6 @@
2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u
};
-// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
-CONST vec8 kARGBToGray = {
- 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
-};
-
void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
asm volatile (
"pcmpeqb %%xmm5,%%xmm5 \n"
@@ -2533,15 +2528,15 @@
#endif // HAS_ARGBUNATTENUATE_SSE2
#ifdef HAS_ARGBGRAYROW_SSSE3
+// Constant for ARGB color to gray scale. 0.11 * B + 0.59 * G + 0.30 * R
+CONST vec8 kARGBToGray = {
+ 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0, 14, 76, 38, 0
+};
+
// Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
void ARGBGrayRow_SSSE3(uint8* dst_argb, int width) {
asm volatile (
"movdqa %2,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
- "pcmpeqb %%xmm3,%%xmm3 \n"
- "psrld $0x8,%%xmm3 \n"
-
// 8 pixel loop \n"
".p2align 4 \n"
"1: \n"
@@ -2549,36 +2544,113 @@
"movdqa 0x10(%0),%%xmm1 \n"
"pmaddubsw %%xmm4,%%xmm0 \n"
"pmaddubsw %%xmm4,%%xmm1 \n"
- "movdqa (%0),%%xmm6 \n"
- "movdqa 0x10(%0),%%xmm7 \n"
- "pand %%xmm5,%%xmm6 \n"
- "pand %%xmm5,%%xmm7 \n"
"phaddw %%xmm1,%%xmm0 \n"
"psrlw $0x7,%%xmm0 \n"
"packuswb %%xmm0,%%xmm0 \n"
+ "movdqa (%0),%%xmm2 \n"
+ "movdqa 0x10(%0),%%xmm3 \n"
+ "psrld $0x18,%%xmm2 \n"
+ "psrld $0x18,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ "packuswb %%xmm2,%%xmm2 \n"
+ "movdqa %%xmm0,%%xmm3 \n"
"punpcklbw %%xmm0,%%xmm0 \n"
+ "punpcklbw %%xmm2,%%xmm3 \n"
"movdqa %%xmm0,%%xmm1 \n"
- "punpcklwd %%xmm0,%%xmm0 \n"
- "punpckhwd %%xmm1,%%xmm1 \n"
- "pand %%xmm3,%%xmm0 \n"
- "pand %%xmm3,%%xmm1 \n"
- "por %%xmm6,%%xmm0 \n"
- "por %%xmm7,%%xmm1 \n"
+ "punpcklwd %%xmm3,%%xmm0 \n"
+ "punpckhwd %%xmm3,%%xmm1 \n"
"sub $0x8,%1 \n"
"movdqa %%xmm0,(%0) \n"
"movdqa %%xmm1,0x10(%0) \n"
"lea 0x20(%0),%0 \n"
"jg 1b \n"
- : "+r"(dst_argb), // %0
- "+r"(width) // %1
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
: "m"(kARGBToGray) // %2
: "memory", "cc"
#if defined(__SSE2__)
- , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+ , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
#endif
);
}
#endif // HAS_ARGBGRAYROW_SSSE3
+
+#ifdef HAS_ARGBSEPIAROW_SSSE3
+// b = (r * 35 + g * 68 + b * 17) >> 7
+// g = (r * 45 + g * 88 + b * 22) >> 7
+// r = (r * 50 + g * 98 + b * 24) >> 7
+// Constant for ARGB color to sepia tone
+CONST vec8 kARGBToSepiaB = {
+ 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0
+};
+
+CONST vec8 kARGBToSepiaG = {
+ 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0
+};
+
+CONST vec8 kARGBToSepiaR = {
+ 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0
+};
+
+// Convert 8 ARGB pixels (64 bytes) to 8 Sepia ARGB pixels
+void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width) {
+ asm volatile (
+ "movdqa %2,%%xmm2 \n"
+ "movdqa %3,%%xmm3 \n"
+ "movdqa %4,%%xmm4 \n"
+ // 8 pixel loop \n"
+ ".p2align 4 \n"
+ "1: \n"
+ "movdqa (%0),%%xmm0 \n"
+ "movdqa 0x10(%0),%%xmm6 \n"
+ "pmaddubsw %%xmm2,%%xmm0 \n"
+ "pmaddubsw %%xmm2,%%xmm6 \n"
+ "phaddw %%xmm6,%%xmm0 \n"
+ "psrlw $0x7,%%xmm0 \n"
+ "packuswb %%xmm0,%%xmm0 \n"
+ "movdqa (%0),%%xmm5 \n"
+ "movdqa 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm3,%%xmm5 \n"
+ "pmaddubsw %%xmm3,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm5 \n"
+ "psrlw $0x7,%%xmm5 \n"
+ "packuswb %%xmm5,%%xmm5 \n"
+ "punpcklbw %%xmm5,%%xmm0 \n"
+ "movdqa (%0),%%xmm5 \n"
+ "movdqa 0x10(%0),%%xmm1 \n"
+ "pmaddubsw %%xmm4,%%xmm5 \n"
+ "pmaddubsw %%xmm4,%%xmm1 \n"
+ "phaddw %%xmm1,%%xmm5 \n"
+ "psrlw $0x7,%%xmm5 \n"
+ "packuswb %%xmm5,%%xmm5 \n"
+ "movdqa (%0),%%xmm6 \n"
+ "movdqa 0x10(%0),%%xmm1 \n"
+ "psrld $0x18,%%xmm6 \n"
+ "psrld $0x18,%%xmm1 \n"
+ "packuswb %%xmm1,%%xmm6 \n"
+ "packuswb %%xmm6,%%xmm6 \n"
+ "punpcklbw %%xmm6,%%xmm5 \n"
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklwd %%xmm5,%%xmm0 \n"
+ "punpckhwd %%xmm5,%%xmm1 \n"
+ "sub $0x8,%1 \n"
+ "movdqa %%xmm0,(%0) \n"
+ "movdqa %%xmm1,0x10(%0) \n"
+ "lea 0x20(%0),%0 \n"
+ "jg 1b \n"
+ : "+r"(dst_argb), // %0
+ "+r"(width) // %1
+ : "m"(kARGBToSepiaB), // %2
+ "m"(kARGBToSepiaG), // %3
+ "m"(kARGBToSepiaR) // %4
+ : "memory", "cc"
+#if defined(__SSE2__)
+ , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
+#endif
+ );
+}
+#endif // HAS_ARGBSEPIAROW_SSSE3
+
#endif // defined(__x86_64__) || defined(__i386__)
#ifdef __cplusplus