4f linear gradient shader blitters

Add F16 specializations to support writing to half-float dests.

Add color profile template arg across the board to support writing
to sRGB dests.

R=reed@google.com,mtklein@google.com,herb@google.com
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1808963005

Review URL: https://codereview.chromium.org/1808963005
diff --git a/src/effects/gradients/Sk4fLinearGradient.cpp b/src/effects/gradients/Sk4fLinearGradient.cpp
index 9bc84c4..82b2fcc 100644
--- a/src/effects/gradients/Sk4fLinearGradient.cpp
+++ b/src/effects/gradients/Sk4fLinearGradient.cpp
@@ -5,16 +5,18 @@
  * found in the LICENSE file.
  */
 
-#include "Sk4fGradientPriv.h"
 #include "Sk4fLinearGradient.h"
+#include "SkUtils.h"
+#include "SkXfermode.h"
 
 namespace {
 
-template<typename DstType, bool do_premul>
+template<typename DstType, SkColorProfileType, ApplyPremul>
 void fill(const Sk4f& c, DstType* dst, int n);
 
 template<>
-void fill<SkPM4f, false>(const Sk4f& c, SkPM4f* dst, int n) {
+void fill<SkPM4f, kLinear_SkColorProfileType, ApplyPremul::False>
+         (const Sk4f& c, SkPM4f* dst, int n) {
     while (n > 0) {
         c.store(dst++);
         n--;
@@ -22,21 +24,49 @@
 }
 
 template<>
-void fill<SkPM4f, true>(const Sk4f& c, SkPM4f* dst, int n) {
-    fill<SkPM4f, false>(premul_4f(c), dst, n);
+void fill<SkPM4f, kLinear_SkColorProfileType, ApplyPremul::True>
+         (const Sk4f& c, SkPM4f* dst, int n) {
+    fill<SkPM4f, kLinear_SkColorProfileType, ApplyPremul::False>(premul_4f(c), dst, n);
 }
 
 template<>
-void fill<SkPMColor, false>(const Sk4f& c, SkPMColor* dst, int n) {
-    sk_memset32(dst, trunc_from_255<false>(c), n);
+void fill<SkPMColor, kLinear_SkColorProfileType, ApplyPremul::False>
+         (const Sk4f& c, SkPMColor* dst, int n) {
+    sk_memset32(dst, trunc_from_4f_255<ApplyPremul::False>(c), n);
 }
 
 template<>
-void fill<SkPMColor, true>(const Sk4f& c, SkPMColor* dst, int n) {
-    sk_memset32(dst, trunc_from_255<true>(c), n);
+void fill<SkPMColor, kLinear_SkColorProfileType, ApplyPremul::True>
+         (const Sk4f& c, SkPMColor* dst, int n) {
+    sk_memset32(dst, trunc_from_4f_255<ApplyPremul::True>(c), n);
 }
 
-template<typename DstType, bool do_premul>
+template<>
+void fill<SkPMColor, kSRGB_SkColorProfileType, ApplyPremul::False>
+         (const Sk4f& c, SkPMColor* dst, int n) {
+    // FIXME: this assumes opaque colors.  Handle unpremultiplication.
+    sk_memset32(dst, Sk4f_toS32(c), n);
+}
+
+template<>
+void fill<SkPMColor, kSRGB_SkColorProfileType, ApplyPremul::True>
+         (const Sk4f& c, SkPMColor* dst, int n) {
+    sk_memset32(dst, Sk4f_toS32(premul_4f(c)), n);
+}
+
+template<>
+void fill<uint64_t, kLinear_SkColorProfileType, ApplyPremul::False>
+         (const Sk4f& c, uint64_t* dst, int n) {
+    sk_memset64(dst, SkFloatToHalf_01(c), n);
+}
+
+template<>
+void fill<uint64_t, kLinear_SkColorProfileType, ApplyPremul::True>
+         (const Sk4f& c, uint64_t* dst, int n) {
+    sk_memset64(dst, SkFloatToHalf_01(premul_4f(c)), n);
+}
+
+template<typename DstType, SkColorProfileType profile, ApplyPremul premul>
 void ramp(const Sk4f& c, const Sk4f& dc, DstType* dst, int n) {
     SkASSERT(n > 0);
 
@@ -49,7 +79,7 @@
     Sk4f c3 = c1 + dc2;
 
     while (n >= 4) {
-        store4x<DstType, do_premul>(c0, c1, c2, c3, dst);
+        store4x<DstType, profile, premul>(c0, c1, c2, c3, dst);
         dst += 4;
 
         c0 = c0 + dc4;
@@ -59,12 +89,12 @@
         n -= 4;
     }
     if (n & 2) {
-        store<DstType, do_premul>(c0, dst++);
-        store<DstType, do_premul>(c1, dst++);
+        store<DstType, profile, premul>(c0, dst++);
+        store<DstType, profile, premul>(c1, dst++);
         c0 = c0 + dc2;
     }
     if (n & 1) {
-        store<DstType, do_premul>(c0, dst);
+        store<DstType, profile, premul>(c0, dst);
     }
 }
 
@@ -163,9 +193,13 @@
     // TODO: plumb dithering
     SkASSERT(count > 0);
     if (fColorsArePremul) {
-        this->shadePremulSpan<SkPMColor, false>(x, y, dst, count);
+        this->shadePremulSpan<SkPMColor,
+                              kLinear_SkColorProfileType,
+                              ApplyPremul::False>(x, y, dst, count);
     } else {
-        this->shadePremulSpan<SkPMColor, true>(x, y, dst, count);
+        this->shadePremulSpan<SkPMColor,
+                              kLinear_SkColorProfileType,
+                              ApplyPremul::True>(x, y, dst, count);
     }
 }
 
@@ -179,13 +213,17 @@
     // TONOTDO: plumb dithering
     SkASSERT(count > 0);
     if (fColorsArePremul) {
-        this->shadePremulSpan<SkPM4f, false>(x, y, dst, count);
+        this->shadePremulSpan<SkPM4f,
+                              kLinear_SkColorProfileType,
+                              ApplyPremul::False>(x, y, dst, count);
     } else {
-        this->shadePremulSpan<SkPM4f, true>(x, y, dst, count);
+        this->shadePremulSpan<SkPM4f,
+                              kLinear_SkColorProfileType,
+                              ApplyPremul::True>(x, y, dst, count);
     }
 }
 
-template<typename DstType, bool do_premul>
+template<typename DstType, SkColorProfileType profile, ApplyPremul premul>
 void SkLinearGradient::
 LinearGradient4fContext::shadePremulSpan(int x, int y,
                                          DstType dst[],
@@ -195,23 +233,27 @@
     switch (shader.fTileMode) {
     case kClamp_TileMode:
         this->shadeSpanInternal<DstType,
-                                do_premul,
+                                profile,
+                                premul,
                                 kClamp_TileMode>(x, y, dst, count);
         break;
     case kRepeat_TileMode:
         this->shadeSpanInternal<DstType,
-                                do_premul,
+                                profile,
+                                premul,
                                 kRepeat_TileMode>(x, y, dst, count);
         break;
     case kMirror_TileMode:
         this->shadeSpanInternal<DstType,
-                                do_premul,
+                                profile,
+                                premul,
                                 kMirror_TileMode>(x, y, dst, count);
         break;
     }
 }
 
-template<typename DstType, bool do_premul, SkShader::TileMode tileMode>
+template<typename DstType, SkColorProfileType profile, ApplyPremul premul,
+         SkShader::TileMode tileMode>
 void SkLinearGradient::
 LinearGradient4fContext::shadeSpanInternal(int x, int y,
                                            DstType dst[],
@@ -223,12 +265,12 @@
                   &pt);
     const SkScalar fx = pinFx<tileMode>(pt.x());
     const SkScalar dx = fDstToPos.getScaleX();
-    LinearIntervalProcessor<DstType, tileMode> proc(fIntervals.begin(),
-                                                    fIntervals.end() - 1,
-                                                    this->findInterval(fx),
-                                                    fx,
-                                                    dx,
-                                                    SkScalarNearlyZero(dx * count));
+    LinearIntervalProcessor<DstType, profile, tileMode> proc(fIntervals.begin(),
+                                                             fIntervals.end() - 1,
+                                                             this->findInterval(fx),
+                                                             fx,
+                                                             dx,
+                                                             SkScalarNearlyZero(dx * count));
     while (count > 0) {
         // What we really want here is SkTPin(advance, 1, count)
         // but that's a significant perf hit for >> stops; investigate.
@@ -243,12 +285,12 @@
             || (n == count && proc.currentRampIsZero()));
 
         if (proc.currentRampIsZero()) {
-            fill<DstType, do_premul>(proc.currentColor(),
-                                     dst, n);
+            fill<DstType, profile, premul>(proc.currentColor(),
+                                           dst, n);
         } else {
-            ramp<DstType, do_premul>(proc.currentColor(),
-                                     proc.currentColorGrad(),
-                                     dst, n);
+            ramp<DstType, profile, premul>(proc.currentColor(),
+                                           proc.currentColorGrad(),
+                                           dst, n);
         }
 
         proc.advance(SkIntToScalar(n));
@@ -257,7 +299,7 @@
     }
 }
 
-template<typename DstType, SkShader::TileMode tileMode>
+template<typename DstType, SkColorProfileType profile, SkShader::TileMode tileMode>
 class SkLinearGradient::
 LinearGradient4fContext::LinearIntervalProcessor {
 public:
@@ -267,8 +309,7 @@
                             SkScalar fx,
                             SkScalar dx,
                             bool is_vertical)
-        : fDstComponentScale(dst_component_scale<DstType>())
-        , fAdvX((i->fP1 - fx) / dx)
+        : fAdvX((i->fP1 - fx) / dx)
         , fFirstInterval(firstInterval)
         , fLastInterval(lastInterval)
         , fInterval(i)
@@ -308,8 +349,8 @@
         fDc   = dst_swizzle<DstType>(fInterval->fDc);
         fCc   = dst_swizzle<DstType>(fInterval->fC0);
         fCc   = fCc + fDc * Sk4f(t);
-        fCc   = fCc * fDstComponentScale;
-        fDcDx = fDc * fDstComponentScale * Sk4f(fDx);
+        fCc   = scale_for_dest<DstType, profile>(fCc);
+        fDcDx = scale_for_dest<DstType, profile>(fDc * Sk4f(fDx));
         fZeroRamp = fIsVertical || fInterval->isZeroRamp();
     }
 
@@ -342,8 +383,6 @@
         return advX;
     }
 
-    const Sk4f      fDstComponentScale; // cached dst scale (PMC: 255, PM4f: 1)
-
     // Current interval properties.
     Sk4f            fDc;        // local color gradient (dc/dt)
     Sk4f            fDcDx;      // dst color gradient (dc/dx)
@@ -402,3 +441,68 @@
         }
     }
 }
+
+SkShader::Context::BlitProc SkLinearGradient::
+LinearGradient4fContext::onChooseBlitProc(const SkImageInfo& info, BlitState* state) {
+    SkXfermode::Mode mode;
+    if (!SkXfermode::AsMode(state->fXfer, &mode)) {
+        return nullptr;
+    }
+
+    const SkGradientShaderBase& shader = static_cast<const SkGradientShaderBase&>(fShader);
+    if (mode != SkXfermode::kSrc_Mode &&
+        !(mode == SkXfermode::kSrcOver_Mode && shader.colorsAreOpaque())) {
+        return nullptr;
+    }
+
+    switch (info.colorType()) {
+        case kN32_SkColorType:
+            return D32_BlitProc;
+        case kRGBA_F16_SkColorType:
+            return D64_BlitProc;
+        default:
+            return nullptr;
+    }
+}
+
+void SkLinearGradient::
+LinearGradient4fContext::D32_BlitProc(BlitState* state, int x, int y, const SkPixmap& dst,
+                                      int count, const SkAlpha aa[]) {
+    // FIXME: ignoring coverage for now
+    const LinearGradient4fContext* ctx =
+        static_cast<const LinearGradient4fContext*>(state->fCtx);
+
+    if (dst.info().isLinear()) {
+        if (ctx->fColorsArePremul) {
+            ctx->shadePremulSpan<SkPMColor, kLinear_SkColorProfileType, ApplyPremul::False>(
+                x, y, dst.writable_addr32(x, y), count);
+        } else {
+            ctx->shadePremulSpan<SkPMColor, kLinear_SkColorProfileType, ApplyPremul::True>(
+                x, y, dst.writable_addr32(x, y), count);
+        }
+    } else {
+        if (ctx->fColorsArePremul) {
+            ctx->shadePremulSpan<SkPMColor, kSRGB_SkColorProfileType, ApplyPremul::False>(
+                x, y, dst.writable_addr32(x, y), count);
+        } else {
+            ctx->shadePremulSpan<SkPMColor, kSRGB_SkColorProfileType, ApplyPremul::True>(
+                x, y, dst.writable_addr32(x, y), count);
+        }
+    }
+}
+
+void SkLinearGradient::
+LinearGradient4fContext::D64_BlitProc(BlitState* state, int x, int y, const SkPixmap& dst,
+                                      int count, const SkAlpha aa[]) {
+    // FIXME: ignoring coverage for now
+    const LinearGradient4fContext* ctx =
+        static_cast<const LinearGradient4fContext*>(state->fCtx);
+
+    if (ctx->fColorsArePremul) {
+        ctx->shadePremulSpan<uint64_t, kLinear_SkColorProfileType, ApplyPremul::False>(
+            x, y, dst.writable_addr64(x, y), count);
+    } else {
+        ctx->shadePremulSpan<uint64_t, kLinear_SkColorProfileType, ApplyPremul::True>(
+            x, y, dst.writable_addr64(x, y), count);
+    }
+}