send SkPMColor::toPMColor() through Sk4f_toL32()

This doesn't fix that 8888 gradient if we stop clamping,
but it seems like a good idea to land first anyway.

Change-Id: Ie0feda67da5996223db2fe4458f99d57cf13db71
Reviewed-on: https://skia-review.googlesource.com/155782
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkPM4f.h b/src/core/SkPM4f.h
index e1cb1cd..f949857 100644
--- a/src/core/SkPM4f.h
+++ b/src/core/SkPM4f.h
@@ -23,6 +23,27 @@
 #endif
 }
 
+static inline Sk4f Sk4f_fromL32(uint32_t px) {
+    return SkNx_cast<float>(Sk4b::Load(&px)) * (1/255.0f);
+}
+
+static inline uint32_t Sk4f_toL32(const Sk4f& px) {
+    Sk4f v = px;
+
+#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+    // SkNx_cast<uint8_t, int32_t>() pins, and we don't anticipate giant floats
+#elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
+    // SkNx_cast<uint8_t, int32_t>() pins, and so does Sk4f_round().
+#else
+    // No guarantee of a pin.
+    v = Sk4f::Max(0, Sk4f::Min(v, 1));
+#endif
+
+    uint32_t l32;
+    SkNx_cast<uint8_t>(Sk4f_round(v * 255.0f)).store(&l32);
+    return l32;
+}
+
 /*
  *  The float values are 0...1 premultiplied in RGBA order (regardless of SkPMColor order)
  */
@@ -60,10 +81,7 @@
     Sk4f to4f_pmorder() const { return swizzle_rb_if_bgra(this->to4f()); }
 
     SkPMColor toPMColor() const {
-        Sk4f value = swizzle_rb_if_bgra(this->to4f());
-        SkPMColor result;
-        SkNx_cast<uint8_t>(value * Sk4f(255) + Sk4f(0.5f)).store(&result);
-        return result;
+        return Sk4f_toL32(swizzle_rb_if_bgra(this->to4f()));
     }
 
     void toF16(uint16_t[4]) const;
diff --git a/src/core/SkPM4fPriv.h b/src/core/SkPM4fPriv.h
index 6c98cbc..06007b7 100644
--- a/src/core/SkPM4fPriv.h
+++ b/src/core/SkPM4fPriv.h
@@ -8,14 +8,9 @@
 #ifndef SkPM4fPriv_DEFINED
 #define SkPM4fPriv_DEFINED
 
-#include "SkColorData.h"
-#include "SkColorSpace.h"
 #include "SkColorSpacePriv.h"
 #include "SkColorSpaceXformSteps.h"
-#include "SkArenaAlloc.h"
 #include "SkPM4f.h"
-#include "SkRasterPipeline.h"
-#include "../jumper/SkJumper.h"
 
 // This file is mostly helper routines for doing color space management.
 // It probably wants a new name, and they likely don't need to be inline.
@@ -27,27 +22,6 @@
 // We'll start with the new as-encoded routines first,
 // and shove all the old broken routines towards the bottom.
 
-static inline Sk4f Sk4f_fromL32(uint32_t px) {
-    return SkNx_cast<float>(Sk4b::Load(&px)) * (1/255.0f);
-}
-
-static inline uint32_t Sk4f_toL32(const Sk4f& px) {
-    Sk4f v = px;
-
-#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
-    // SkNx_cast<uint8_t, int32_t>() pins, and we don't anticipate giant floats
-#elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
-    // SkNx_cast<uint8_t, int32_t>() pins, and so does Sk4f_round().
-#else
-    // No guarantee of a pin.
-    v = Sk4f::Max(0, Sk4f::Min(v, 1));
-#endif
-
-    uint32_t l32;
-    SkNx_cast<uint8_t>(Sk4f_round(v * 255.0f)).store(&l32);
-    return l32;
-}
-
 static inline SkPM4f premul_in_dst_colorspace(SkColor4f color4f,
                                               SkColorSpace* srcCS, SkColorSpace* dstCS) {
     // TODO: In the very common case of srcCS being sRGB,
diff --git a/src/gpu/gradients/GrGradientBitmapCache.cpp b/src/gpu/gradients/GrGradientBitmapCache.cpp
index 459c13a..dd4e842 100644
--- a/src/gpu/gradients/GrGradientBitmapCache.cpp
+++ b/src/gpu/gradients/GrGradientBitmapCache.cpp
@@ -12,6 +12,7 @@
 #include "SkFloatBits.h"
 #include "SkHalf.h"
 #include "SkPM4fPriv.h"
+#include "SkTemplates.h"
 
 #include <functional>
 
diff --git a/src/utils/SkPatchUtils.cpp b/src/utils/SkPatchUtils.cpp
index 83495bc..ca92415 100644
--- a/src/utils/SkPatchUtils.cpp
+++ b/src/utils/SkPatchUtils.cpp
@@ -7,6 +7,7 @@
 
 #include "SkPatchUtils.h"
 
+#include "SkArenaAlloc.h"
 #include "SkColorData.h"
 #include "SkColorSpacePriv.h"
 #include "SkGeometry.h"