Shade ported to Neon BUG=167 TESTED=planar test Shade* Review URL: https://webrtc-codereview.appspot.com/969014 git-svn-id: http://libyuv.googlecode.com/svn/trunk@509 16f28f9a-4ce2-e073-06de-1de4eb20be90

commit: b94b139e86635d40ed0d054bb66e30e6086ae7a3 [log] [tgz]
author: fbarchard@google.com <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90> Mon Dec 03 20:36:40 2012 +0000
committer: fbarchard@google.com <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90> Mon Dec 03 20:36:40 2012 +0000
tree: ec17fc654429be7acc9adfabe56132ceca412b3a
parent: ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9 [diff] [blame]
diff --git a/source/row_posix.cc b/source/row_posix.cc
index 920a8c4..e9d7627 100644
--- a/source/row_posix.cc
+++ b/source/row_posix.cc

@@ -3921,6 +3921,45 @@
 }
 #endif  // HAS_ARGBQUANTIZEROW_SSE2
 
+#ifdef HAS_ARGBSHADEROW_SSE2
+// Shade 4 pixels at a time by specified value.
+// Aligned to 16 bytes.
+void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
+                       uint32 value) {
+  asm volatile (
+    "movd      %3,%%xmm2                       \n"
+    "sub       %0,%1                           \n"
+    "punpcklbw %%xmm2,%%xmm2                   \n"
+    "punpcklqdq %%xmm2,%%xmm2                  \n"
+
+    // 4 pixel loop.
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    "movdqa    (%0),%%xmm0                     \n"
+    "movdqa    %%xmm0,%%xmm1                   \n"
+    "punpcklbw %%xmm0,%%xmm0                   \n"
+    "punpckhbw %%xmm1,%%xmm1                   \n"
+    "pmulhuw   %%xmm2,%%xmm0                   \n"
+    "pmulhuw   %%xmm2,%%xmm1                   \n"
+    "psrlw     $0x8,%%xmm0                     \n"
+    "psrlw     $0x8,%%xmm1                     \n"
+    "packuswb  %%xmm1,%%xmm0                   \n"
+    "sub       $0x4,%2                         \n"
+    "movdqa    %%xmm0,(%0,%1,1)                \n"
+    "lea       0x10(%0),%0                     \n"
+    "jg        1b                              \n"
+  : "+r"(src_argb),       // %0
+    "+r"(dst_argb),       // %1
+    "+r"(width)           // %2
+  : "r"(value)            // %3
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2"
+#endif
+  );
+}
+#endif  // HAS_ARGBSHADEROW_SSE2
+
 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2
 // Creates a table of cumulative sums where each value is a sum of all values
 // above and to the left of the value, inclusive of the value.
@@ -4091,44 +4130,6 @@
   );
 }
 #endif  // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
-#ifdef HAS_ARGBSHADEROW_SSE2
-// Shade 4 pixels at a time by specified value.
-// Aligned to 16 bytes.
-void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
-                       uint32 value) {
-  asm volatile (
-    "movd      %3,%%xmm2                       \n"
-    "sub       %0,%1                           \n"
-    "punpcklbw %%xmm2,%%xmm2                   \n"
-    "punpcklqdq %%xmm2,%%xmm2                  \n"
-
-    // 4 pixel loop.
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    "movdqa    (%0),%%xmm0                     \n"
-    "movdqa    %%xmm0,%%xmm1                   \n"
-    "punpcklbw %%xmm0,%%xmm0                   \n"
-    "punpckhbw %%xmm1,%%xmm1                   \n"
-    "pmulhuw   %%xmm2,%%xmm0                   \n"
-    "pmulhuw   %%xmm2,%%xmm1                   \n"
-    "psrlw     $0x8,%%xmm0                     \n"
-    "psrlw     $0x8,%%xmm1                     \n"
-    "packuswb  %%xmm1,%%xmm0                   \n"
-    "sub       $0x4,%2                         \n"
-    "movdqa    %%xmm0,(%0,%1,1)                \n"
-    "lea       0x10(%0),%0                     \n"
-    "jg        1b                              \n"
-  : "+r"(src_argb),       // %0
-    "+r"(dst_argb),       // %1
-    "+r"(width)           // %2
-  : "r"(value)            // %3
-  : "memory", "cc"
-#if defined(__SSE2__)
-    , "xmm0", "xmm1", "xmm2"
-#endif
-  );
-}
-#endif  // HAS_ARGBSHADEROW_SSE2
 
 #ifdef HAS_ARGBAFFINEROW_SSE2
 // TODO(fbarchard): Find 64 bit way to avoid masking.
commit	b94b139e86635d40ed0d054bb66e30e6086ae7a3	[log] [tgz]
author	fbarchard@google.com <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90>	Mon Dec 03 20:36:40 2012 +0000
committer	fbarchard@google.com <fbarchard@google.com@16f28f9a-4ce2-e073-06de-1de4eb20be90>	Mon Dec 03 20:36:40 2012 +0000
tree	ec17fc654429be7acc9adfabe56132ceca412b3a
parent	ef60ab0db435c8e0bd1f63dc9aedb5ad880424a9 [diff] [blame]