Always inline Filter_32_*_neon functions

The functions are rather performance critical and already marked
'inline'. However, Chrome for Android will not have these functions
inlined due to it being compiled with -Os and a small -finline-limit.

This avoids one call in the filtering functions.

Does not increase the library size.

BUG=chromium:363073
R=mtklein@google.com

Author: kkinnunen@nvidia.com

Review URL: https://codereview.chromium.org/280403005

git-svn-id: http://skia.googlecode.com/svn/trunk@14709 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/opts/SkBitmapProcState_filter_neon.h b/src/opts/SkBitmapProcState_filter_neon.h
index e56b683..0887145 100644
--- a/src/opts/SkBitmapProcState_filter_neon.h
+++ b/src/opts/SkBitmapProcState_filter_neon.h
@@ -17,12 +17,15 @@
  * exact results for the color components, but if the 4 incoming colors are
  * all opaque, then the output color must also be opaque. Subsequent parts of
  * the drawing pipeline may rely on this (e.g. which blitrow proc to use).
+ *
  */
-
-static inline void Filter_32_opaque_neon(unsigned x, unsigned y,
-                                         SkPMColor a00, SkPMColor a01,
-                                         SkPMColor a10, SkPMColor a11,
-                                         SkPMColor *dst) {
+// Chrome on Android uses -Os so we need to force these inline. Otherwise
+// calling the function in the inner loops will cause significant overhead on
+// some platforms.
+static SK_ALWAYS_INLINE void Filter_32_opaque_neon(unsigned x, unsigned y,
+                                                   SkPMColor a00, SkPMColor a01,
+                                                   SkPMColor a10, SkPMColor a11,
+                                                   SkPMColor *dst) {
     uint8x8_t vy, vconst16_8, v16_y, vres;
     uint16x4_t vx, vconst16_16, v16_x, tmp;
     uint32x2_t va0, va1;
@@ -53,10 +56,11 @@
     vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);         // store result
 }
 
-static inline void Filter_32_alpha_neon(unsigned x, unsigned y,
-                                        SkPMColor a00, SkPMColor a01,
-                                        SkPMColor a10, SkPMColor a11,
-                                        SkPMColor *dst, uint16_t scale) {
+static SK_ALWAYS_INLINE void Filter_32_alpha_neon(unsigned x, unsigned y,
+                                                  SkPMColor a00, SkPMColor a01,
+                                                  SkPMColor a10, SkPMColor a11,
+                                                  SkPMColor *dst,
+                                                  uint16_t scale) {
     uint8x8_t vy, vconst16_8, v16_y, vres;
     uint16x4_t vx, vconst16_16, v16_x, tmp, vscale;
     uint32x2_t va0, va1;