Generic 4f gradient T sampler fallback

Add a generic T sampler fallback impl which uses T series produced by
subclasses mapTs() overrides.  The fallback path uses the same interval
structures as the current optimized linear4f impl, but always sorted
in stop order (never inverted to match dx/increasing x order).

Enable the new mechanism for 4f linear w/ perspective.

Other boring changes:

  * relocate the interval builder (back) to the base class
  * add a private header for shared templates

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1783823002

Review URL: https://codereview.chromium.org/1783823002
diff --git a/src/effects/gradients/Sk4fLinearGradient.cpp b/src/effects/gradients/Sk4fLinearGradient.cpp
index 4a20a28..9bc84c4 100644
--- a/src/effects/gradients/Sk4fLinearGradient.cpp
+++ b/src/effects/gradients/Sk4fLinearGradient.cpp
@@ -5,27 +5,11 @@
  * found in the LICENSE file.
  */
 
+#include "Sk4fGradientPriv.h"
 #include "Sk4fLinearGradient.h"
 
 namespace {
 
-Sk4f premul_4f(const Sk4f& c) {
-    const float alpha = c[SkPM4f::A];
-    // FIXME: portable swizzle?
-    return c * Sk4f(alpha, alpha, alpha, 1);
-}
-
-template <bool do_premul>
-SkPMColor trunc_from_255(const Sk4f& c) {
-    SkPMColor pmc;
-    SkNx_cast<uint8_t>(c).store(&pmc);
-    if (do_premul) {
-        pmc = SkPreMultiplyARGB(SkGetPackedA32(pmc), SkGetPackedR32(pmc),
-                                SkGetPackedG32(pmc), SkGetPackedB32(pmc));
-    }
-    return pmc;
-}
-
 template<typename DstType, bool do_premul>
 void fill(const Sk4f& c, DstType* dst, int n);
 
@@ -53,50 +37,6 @@
 }
 
 template<typename DstType, bool do_premul>
-void store(const Sk4f& color, DstType* dst);
-
-template<>
-void store<SkPM4f, false>(const Sk4f& c, SkPM4f* dst) {
-    c.store(dst);
-}
-
-template<>
-void store<SkPM4f, true>(const Sk4f& c, SkPM4f* dst) {
-    store<SkPM4f, false>(premul_4f(c), dst);
-}
-
-template<>
-void store<SkPMColor, false>(const Sk4f& c, SkPMColor* dst) {
-    *dst = trunc_from_255<false>(c);
-}
-
-template<>
-void store<SkPMColor, true>(const Sk4f& c, SkPMColor* dst) {
-    *dst = trunc_from_255<true>(c);
-}
-
-template<typename DstType, bool do_premul>
-void store4x(const Sk4f& c0,
-             const Sk4f& c1,
-             const Sk4f& c2,
-             const Sk4f& c3,
-             DstType* dst) {
-    store<DstType, do_premul>(c0, dst++);
-    store<DstType, do_premul>(c1, dst++);
-    store<DstType, do_premul>(c2, dst++);
-    store<DstType, do_premul>(c3, dst++);
-}
-
-template<>
-void store4x<SkPMColor, false>(const Sk4f& c0,
-                               const Sk4f& c1,
-                               const Sk4f& c2,
-                               const Sk4f& c3,
-                               SkPMColor* dst) {
-    Sk4f_ToBytes((uint8_t*)dst, c0, c1, c2, c3);
-}
-
-template<typename DstType, bool do_premul>
 void ramp(const Sk4f& c, const Sk4f& dc, DstType* dst, int n) {
     SkASSERT(n > 0);
 
@@ -148,38 +88,6 @@
     return f < 0 ? f + 2 : f;
 }
 
-template<typename DstType>
-float dst_component_scale();
-
-template<>
-float dst_component_scale<SkPM4f>() {
-    return 1;
-}
-
-template<>
-float dst_component_scale<SkPMColor>() {
-    return 255;
-}
-
-template<typename DstType>
-Sk4f dst_swizzle(const SkPM4f&);
-
-template<>
-Sk4f dst_swizzle<SkPM4f>(const SkPM4f& c) {
-    return c.to4f();
-}
-
-template<>
-Sk4f dst_swizzle<SkPMColor>(const SkPM4f& c) {
-    return c.to4f_pmorder();
-}
-
-SkPMColor pack_color(SkColor c, bool premul) {
-    return premul
-        ? SkPreMultiplyColor(c)
-        : SkPackARGB32NoCheck(SkColorGetA(c), SkColorGetR(c), SkColorGetG(c), SkColorGetB(c));
-}
-
 // true when x is in [k1,k2)
 bool in_range(SkScalar x, SkScalar k1, SkScalar k2) {
     SkASSERT(k1 != k2);
@@ -188,208 +96,21 @@
         : (x >= k2 && x < k1);
 }
 
-class IntervalBuilder {
-public:
-    IntervalBuilder(const SkColor* colors, const SkScalar* pos, int count, bool reverse)
-        : fColors(colors)
-        , fPos(pos)
-        , fCount(count)
-        , fFirstPos(reverse ? SK_Scalar1 : 0)
-        , fBegin(reverse ? count - 1 : 0)
-        , fAdvance(reverse ? -1 : 1) {
-        SkASSERT(colors);
-        SkASSERT(count > 1);
-    }
-
-    template<typename F>
-    void build(F func) const {
-        if (!fPos) {
-            this->buildImplicitPos(func);
-            return;
-        }
-
-        const int end = fBegin + fAdvance * (fCount - 1);
-        const SkScalar lastPos = 1 - fFirstPos;
-        int prev = fBegin;
-        SkScalar prevPos = fFirstPos;
-
-        do {
-            const int curr = prev + fAdvance;
-            SkASSERT(curr >= 0 && curr < fCount);
-
-            // TODO: this sanitization should be done in SkGradientShaderBase
-            const SkScalar currPos = (fAdvance > 0)
-                ? SkTPin(fPos[curr], prevPos, lastPos)
-                : SkTPin(fPos[curr], lastPos, prevPos);
-
-            if (currPos != prevPos) {
-                SkASSERT((currPos - prevPos > 0) == (fAdvance > 0));
-                func(fColors[prev], fColors[curr], prevPos, currPos);
-            }
-
-            prev = curr;
-            prevPos = currPos;
-        } while (prev != end);
-    }
-
-private:
-    template<typename F>
-    void buildImplicitPos(F func) const {
-        // When clients don't provide explicit color stop positions (fPos == nullptr),
-        // the color stops are distributed evenly across the unit interval
-        // (implicit positioning).
-        const SkScalar dt = fAdvance * SK_Scalar1 / (fCount - 1);
-        const int end = fBegin + fAdvance * (fCount - 2);
-        int prev = fBegin;
-        SkScalar prevPos = fFirstPos;
-
-        while (prev != end) {
-            const int curr = prev + fAdvance;
-            SkASSERT(curr >= 0 && curr < fCount);
-
-            const SkScalar currPos = prevPos + dt;
-            func(fColors[prev], fColors[curr], prevPos, currPos);
-            prev = curr;
-            prevPos = currPos;
-        }
-
-        // emit the last interval with a pinned end position, to avoid precision issues
-        func(fColors[prev], fColors[prev + fAdvance], prevPos, 1 - fFirstPos);
-    }
-
-    const SkColor*  fColors;
-    const SkScalar* fPos;
-    const int       fCount;
-    const SkScalar  fFirstPos;
-    const int       fBegin;
-    const int       fAdvance;
-};
-
 } // anonymous namespace
 
 SkLinearGradient::
 LinearGradient4fContext::LinearGradient4fContext(const SkLinearGradient& shader,
                                                  const ContextRec& rec)
     : INHERITED(shader, rec) {
-    // The main job here is to build a specialized interval list: a different
-    // representation of the color stops data, optimized for efficient scan line
-    // access during shading.
-    //
-    //   [{P0,C0} , {P1,C1}) [{P1,C2} , {P2,c3}) ... [{Pn,C2n} , {Pn+1,C2n+1})
-    //
-    // The list is sorted in increasing dst order, i.e. X(Pk) < X(Pk+1).  This
-    // allows us to always traverse left->right when iterating over a scan line.
-    // It also means that the interval order matches the color stops when dx >= 0,
-    // and is the inverse (pos, colors, order are flipped) when dx < 0.
-    //
-    // Note: the current representation duplicates pos data; we could refactor to
-    //       avoid this if interval storage size becomes a concern.
-    //
-    // Aside from reordering, we also perform two more pre-processing steps at
-    // this stage:
-    //
-    //   1) scale the color components depending on paint alpha and the requested
-    //      interpolation space (note: the interval color storage is SkPM4f, but
-    //      that doesn't necessarily mean the colors are premultiplied; that
-    //      property is tracked in fColorsArePremul)
-    //
-    //   2) inject synthetic intervals to support tiling.
-    //
-    //      * for kRepeat, no extra intervals are needed - the iterator just
-    //        wraps around at the end:
-    //
-    //          ->[P0,P1)->..[Pn-1,Pn)->
-    //
-    //      * for kClamp, we add two "infinite" intervals before/after:
-    //
-    //          [-/+inf , P0)->[P0 , P1)->..[Pn-1 , Pn)->[Pn , +/-inf)
-    //
-    //        (the iterator should never run off the end in this mode)
-    //
-    //      * for kMirror, we extend the range to [0..2] and add a flipped
-    //        interval series - then the iterator operates just as in the
-    //        kRepeat case:
-    //
-    //          ->[P0,P1)->..[Pn-1,Pn)->[2 - Pn,2 - Pn-1)->..[2 - P1,2 - P0)->
-    //
-    // TODO: investigate collapsing intervals << 1px.
 
-    SkASSERT(shader.fColorCount > 1);
-    SkASSERT(shader.fOrigColors);
-
-    const float paintAlpha = rec.fPaint->getAlpha() * (1.0f / 255);
-    const Sk4f componentScale = fColorsArePremul
-        ? Sk4f(paintAlpha)
-        : Sk4f(1.0f, 1.0f, 1.0f, paintAlpha);
-    const bool dx_is_pos = fDstToPos.getScaleX() >= 0;
-    const int first_index = dx_is_pos ? 0 : shader.fColorCount - 1;
-    const int last_index = shader.fColorCount - 1 - first_index;
-    const SkScalar first_pos = dx_is_pos ? 0 : SK_Scalar1;
-    const SkScalar last_pos = 1 - first_pos;
-
-    if (shader.fTileMode == SkShader::kClamp_TileMode) {
-        // synthetic edge interval: -/+inf .. P0
-        const SkPMColor clamp_color = pack_color(shader.fOrigColors[first_index],
-                                                 fColorsArePremul);
-        const SkScalar clamp_pos = dx_is_pos ? SK_ScalarMin : SK_ScalarMax;
-        fIntervals.emplace_back(clamp_color, clamp_pos,
-                                clamp_color, first_pos,
-                                componentScale);
-    } else if (shader.fTileMode == SkShader::kMirror_TileMode && !dx_is_pos) {
-        // synthetic mirror intervals injected before main intervals: (2 .. 1]
-        addMirrorIntervals(shader, componentScale, dx_is_pos);
-    }
-
-    const IntervalBuilder builder(shader.fOrigColors,
-                                  shader.fOrigPos,
-                                  shader.fColorCount,
-                                  !dx_is_pos);
-    builder.build([this, &componentScale] (SkColor c0, SkColor c1, SkScalar p0, SkScalar p1) {
-        SkASSERT(fIntervals.empty() || fIntervals.back().fP1 == p0);
-
-        fIntervals.emplace_back(pack_color(c0, fColorsArePremul),
-                                p0,
-                                pack_color(c1, fColorsArePremul),
-                                p1,
-                                componentScale);
-    });
-
-    if (shader.fTileMode == SkShader::kClamp_TileMode) {
-        // synthetic edge interval: Pn .. +/-inf
-        const SkPMColor clamp_color =
-            pack_color(shader.fOrigColors[last_index], fColorsArePremul);
-        const SkScalar clamp_pos = dx_is_pos ? SK_ScalarMax : SK_ScalarMin;
-        fIntervals.emplace_back(clamp_color, last_pos,
-                                clamp_color, clamp_pos,
-                                componentScale);
-    } else if (shader.fTileMode == SkShader::kMirror_TileMode && dx_is_pos) {
-        // synthetic mirror intervals injected after main intervals: [1 .. 2)
-        addMirrorIntervals(shader, componentScale, dx_is_pos);
-    }
+    // Our fast path expects interval points to be monotonically increasing in x.
+    const bool reverseIntervals = this->isFast() && fDstToPos.getScaleX() < 0;
+    this->buildIntervals(shader, rec, reverseIntervals);
 
     SkASSERT(fIntervals.count() > 0);
     fCachedInterval = fIntervals.begin();
 }
 
-void SkLinearGradient::
-LinearGradient4fContext::addMirrorIntervals(const SkLinearGradient& shader,
-                                            const Sk4f& componentScale, bool dx_is_pos) {
-    // Iterates in reverse order (vs main interval builder) and adds intervals reflected in 2.
-    const IntervalBuilder builder(shader.fOrigColors,
-                                  shader.fOrigPos,
-                                  shader.fColorCount,
-                                  dx_is_pos);
-    builder.build([this, &componentScale] (SkColor c0, SkColor c1, SkScalar p0, SkScalar p1) {
-        SkASSERT(fIntervals.empty() || fIntervals.back().fP1 == 2 - p0);
-
-        fIntervals.emplace_back(pack_color(c0, fColorsArePremul),
-                                2 - p0,
-                                pack_color(c1, fColorsArePremul),
-                                2 - p1,
-                                componentScale);
-    });
-}
-
 const SkGradientShaderBase::GradientShaderBase4fContext::Interval*
 SkLinearGradient::LinearGradient4fContext::findInterval(SkScalar fx) const {
     SkASSERT(in_range(fx, fIntervals.front().fP0, fIntervals.back().fP1));
@@ -434,6 +155,11 @@
 
 void SkLinearGradient::
 LinearGradient4fContext::shadeSpan(int x, int y, SkPMColor dst[], int count) {
+    if (!this->isFast()) {
+        this->INHERITED::shadeSpan(x, y, dst, count);
+        return;
+    }
+
     // TODO: plumb dithering
     SkASSERT(count > 0);
     if (fColorsArePremul) {
@@ -445,6 +171,11 @@
 
 void SkLinearGradient::
 LinearGradient4fContext::shadeSpan4f(int x, int y, SkPM4f dst[], int count) {
+    if (!this->isFast()) {
+        this->INHERITED::shadeSpan4f(x, y, dst, count);
+        return;
+    }
+
     // TONOTDO: plumb dithering
     SkASSERT(count > 0);
     if (fColorsArePremul) {
@@ -545,7 +276,7 @@
         , fIsVertical(is_vertical)
     {
         SkASSERT(firstInterval <= lastInterval);
-        SkASSERT(i->contains(fx));
+        SkASSERT(in_range(fx, i->fP0, i->fP1));
         this->compute_interval_props(fx - i->fP0);
     }
 
@@ -626,3 +357,48 @@
     const SkScalar  fDx;        // 'dx' for consistency with other impls; actually dt/dx
     const bool      fIsVertical;
 };
+
+void SkLinearGradient::
+LinearGradient4fContext::mapTs(int x, int y, SkScalar ts[], int count) const {
+    SkASSERT(count > 0);
+    SkASSERT(fDstToPosClass != kLinear_MatrixClass);
+
+    SkScalar sx = x + SK_ScalarHalf;
+    const SkScalar sy = y + SK_ScalarHalf;
+    SkPoint pt;
+
+    if (fDstToPosClass != kPerspective_MatrixClass) {
+        // kLinear_MatrixClass, kFixedStepInX_MatrixClass => fixed dt per scanline
+        const SkScalar dtdx = fDstToPos.fixedStepInX(sy).x();
+        fDstToPosProc(fDstToPos, sx, sy, &pt);
+
+        const Sk4f dtdx4 = Sk4f(4 * dtdx);
+        Sk4f t4 = Sk4f(pt.x() + 0 * dtdx,
+                       pt.x() + 1 * dtdx,
+                       pt.x() + 2 * dtdx,
+                       pt.x() + 3 * dtdx);
+
+        while (count >= 4) {
+            t4.store(ts);
+            t4 = t4 + dtdx4;
+            ts += 4;
+            count -= 4;
+        }
+
+        if (count & 2) {
+            *ts++ = t4[0];
+            *ts++ = t4[1];
+            t4 = SkNx_shuffle<2, 0, 1, 3>(t4);
+        }
+
+        if (count & 1) {
+            *ts++ = t4[0];
+        }
+    } else {
+        for (int i = 0; i < count; ++i) {
+            fDstToPosProc(fDstToPos, sx, sy, &pt);
+            ts[i] = pt.x();
+            sx += SK_Scalar1;
+        }
+    }
+}