split up SkColor_opts_neon.h

Lots of dead code deleted, and the rest moved where it's used.

Change-Id: I9547768592d0dc6c9e9b995c32e21a6e74be7fd1
Reviewed-on: https://skia-review.googlesource.com/c/174313
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp
index b8095d4..e607a3d 100644
--- a/src/core/SkBlitRow_D32.cpp
+++ b/src/core/SkBlitRow_D32.cpp
@@ -80,8 +80,6 @@
     }
 
 #elif defined(SK_ARM_HAS_NEON)
-
-    #include "SkColor_opts_neon.h"
     #include <arm_neon.h>
 
     static void blit_row_s32_blend(SkPMColor* dst, const SkPMColor* src, int count, U8CPU alpha) {
diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp
index dac1cfe..abd63b7 100644
--- a/src/core/SkBlitter_ARGB32.cpp
+++ b/src/core/SkBlitter_ARGB32.cpp
@@ -435,7 +435,12 @@
     }
 
 #elif defined(SK_ARM_HAS_NEON)
-    #include "SkColor_opts_neon.h"
+    #include <arm_neon.h>
+
+    #define NEON_A (SK_A32_SHIFT / 8)
+    #define NEON_R (SK_R32_SHIFT / 8)
+    #define NEON_G (SK_G32_SHIFT / 8)
+    #define NEON_B (SK_B32_SHIFT / 8)
 
     static inline uint8x8_t blend_32_neon(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) {
         int16x8_t src_wide, dst_wide;
diff --git a/src/opts/SkBlitMask_opts.h b/src/opts/SkBlitMask_opts.h
index 644bae4..0521ae5 100644
--- a/src/opts/SkBlitMask_opts.h
+++ b/src/opts/SkBlitMask_opts.h
@@ -15,7 +15,31 @@
 #if defined(SK_ARM_HAS_NEON)
     // The Sk4px versions below will work fine with NEON, but we have had many indications
     // that it doesn't perform as well as this NEON-specific code.  TODO(mtklein): why?
-    #include "SkColor_opts_neon.h"
+
+    #define NEON_A (SK_A32_SHIFT / 8)
+    #define NEON_R (SK_R32_SHIFT / 8)
+    #define NEON_G (SK_G32_SHIFT / 8)
+    #define NEON_B (SK_B32_SHIFT / 8)
+
+    static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) {
+        return vaddw_u8(vdupq_n_u16(1), alpha);
+    }
+
+    static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) {
+        return vshrn_n_u16(vmovl_u8(color) * scale, 8);
+    }
+
+    static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) {
+        uint8x8x4_t ret;
+
+        ret.val[0] = SkAlphaMul_neon8(color.val[0], scale);
+        ret.val[1] = SkAlphaMul_neon8(color.val[1], scale);
+        ret.val[2] = SkAlphaMul_neon8(color.val[2], scale);
+        ret.val[3] = SkAlphaMul_neon8(color.val[3], scale);
+
+        return ret;
+    }
+
 
     template <bool isColor>
     static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
diff --git a/src/opts/SkColor_opts_neon.h b/src/opts/SkColor_opts_neon.h
deleted file mode 100644
index 27520af..0000000
--- a/src/opts/SkColor_opts_neon.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright 2015 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkColor_opts_neon_DEFINED
-#define SkColor_opts_neon_DEFINED
-
-#include "SkTypes.h"
-#include "SkColorData.h"
-
-#include <arm_neon.h>
-
-#define NEON_A (SK_A32_SHIFT / 8)
-#define NEON_R (SK_R32_SHIFT / 8)
-#define NEON_G (SK_G32_SHIFT / 8)
-#define NEON_B (SK_B32_SHIFT / 8)
-
-static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) {
-    return vaddw_u8(vdupq_n_u16(1), alpha);
-}
-
-static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) {
-    return vshrn_n_u16(vmovl_u8(color) * scale, 8);
-}
-
-static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) {
-    uint8x8x4_t ret;
-
-    ret.val[NEON_A] = SkAlphaMul_neon8(color.val[NEON_A], scale);
-    ret.val[NEON_R] = SkAlphaMul_neon8(color.val[NEON_R], scale);
-    ret.val[NEON_G] = SkAlphaMul_neon8(color.val[NEON_G], scale);
-    ret.val[NEON_B] = SkAlphaMul_neon8(color.val[NEON_B], scale);
-
-    return ret;
-}
-
-/* This function expands 8 pixels from RGB565 (R, G, B from high to low) to
- * SkPMColor (all possible configurations supported) in the exact same way as
- * SkPixel16ToPixel32.
- */
-static inline uint8x8x4_t SkPixel16ToPixel32_neon8(uint16x8_t vsrc) {
-
-    uint8x8x4_t ret;
-    uint8x8_t vr, vg, vb;
-
-    vr = vmovn_u16(vshrq_n_u16(vsrc, SK_R16_SHIFT));
-    vg = vmovn_u16(vshrq_n_u16(vshlq_n_u16(vsrc, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS));
-    vb = vmovn_u16(vsrc & vdupq_n_u16(SK_B16_MASK));
-
-    ret.val[NEON_A] = vdup_n_u8(0xFF);
-    ret.val[NEON_R] = vshl_n_u8(vr, 8 - SK_R16_BITS) | vshr_n_u8(vr, 2 * SK_R16_BITS - 8);
-    ret.val[NEON_G] = vshl_n_u8(vg, 8 - SK_G16_BITS) | vshr_n_u8(vg, 2 * SK_G16_BITS - 8);
-    ret.val[NEON_B] = vshl_n_u8(vb, 8 - SK_B16_BITS) | vshr_n_u8(vb, 2 * SK_B16_BITS - 8);
-
-    return ret;
-}
-
-/* This function packs 8 pixels from SkPMColor (all possible configurations
- * supported) to RGB565 (R, G, B from high to low) in the exact same way as
- * SkPixel32ToPixel16.
- */
-static inline uint16x8_t SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc) {
-
-    uint16x8_t ret;
-
-    ret = vshll_n_u8(vsrc.val[NEON_R], 8);
-    ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_G], 8), SK_R16_BITS);
-    ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_B], 8), SK_R16_BITS + SK_G16_BITS);
-
-    return ret;
-}
-
-static inline SkPMColor SkFourByteInterp256_neon(SkPMColor src, SkPMColor dst,
-                                                 unsigned srcScale) {
-    SkASSERT(srcScale <= 256);
-    int16x8_t vscale = vdupq_n_s16(srcScale);
-    int16x8_t vsrc_wide, vdst_wide, vdiff;
-    uint8x8_t res;
-
-    vsrc_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(src))));
-    vdst_wide = vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(vdup_n_u32(dst))));
-
-    vdiff = vsrc_wide - vdst_wide;
-    vdiff *= vscale;
-
-    vdiff = vshrq_n_s16(vdiff, 8);
-
-    vdst_wide += vdiff;
-
-    res = vmovn_u16(vreinterpretq_u16_s16(vdst_wide));
-
-    return vget_lane_u32(vreinterpret_u32_u8(res), 0);
-}
-
-static inline SkPMColor SkFourByteInterp_neon(SkPMColor src, SkPMColor dst,
-                                              U8CPU srcWeight) {
-    SkASSERT(srcWeight <= 255);
-    unsigned scale = SkAlpha255To256(srcWeight);
-    return SkFourByteInterp256_neon(src, dst, scale);
-}
-
-#endif /* #ifndef SkColor_opts_neon_DEFINED */