Add clone to Stage. Rename place to mix and PolymorphicUnion to Stage. Cleanup.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1877483002

Review URL: https://codereview.chromium.org/1877483002
diff --git a/gm/SkLinearBitmapPipelineGM.cpp b/gm/SkLinearBitmapPipelineGM.cpp
index f5e01ca..1bba1c5 100644
--- a/gm/SkLinearBitmapPipelineGM.cpp
+++ b/gm/SkLinearBitmapPipelineGM.cpp
@@ -59,7 +59,7 @@
 
     sk_sp<SkImage> image(SkImage::MakeRasterCopy(SkPixmap(info, pmsrc.addr32(), pmsrc.rowBytes())));
     SkPaint paint;
-    int32_t storage[300];
+    int32_t storage[400];
 
     sk_sp<SkShader> shader = image->makeShader(SkShader::kRepeat_TileMode,
                                                SkShader::kRepeat_TileMode);
diff --git a/src/core/SkBitmapProcShader.h b/src/core/SkBitmapProcShader.h
index 185a95d..e2d3900 100644
--- a/src/core/SkBitmapProcShader.h
+++ b/src/core/SkBitmapProcShader.h
@@ -55,7 +55,7 @@
 // an Sk3DBlitter in SkDraw.cpp
 // Note that some contexts may contain other contexts (e.g. for compose shaders), but we've not
 // yet found a situation where the size below isn't big enough.
-typedef SkSmallAllocator<3, 2100> SkTBlitterAllocator;
+typedef SkSmallAllocator<3, 2400> SkTBlitterAllocator;
 
 // If alloc is non-nullptr, it will be used to allocate the returned SkShader, and MUST outlive
 // the SkShader.
diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp
index dc2ee51..539547a 100644
--- a/src/core/SkLinearBitmapPipeline.cpp
+++ b/src/core/SkLinearBitmapPipeline.cpp
@@ -65,21 +65,66 @@
 class SkLinearBitmapPipeline::DestinationInterface {
 public:
     virtual ~DestinationInterface() { }
-    virtual void setDestination(void* dst, int count) = 0;
-};
-
-class SkLinearBitmapPipeline::PixelPlacerInterface
-    : public SkLinearBitmapPipeline::DestinationInterface {
-public:
-    virtual ~PixelPlacerInterface() { }
     // Count is normally not needed, but in these early stages of development it is useful to
     // check bounds.
     // TODO(herb): 4/6/2016 - remove count when code is stable.
     virtual void setDestination(void* dst, int count) = 0;
-    virtual void VECTORCALL placePixel(Sk4f pixel0) = 0;
-    virtual void VECTORCALL place4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) = 0;
 };
 
+class SkLinearBitmapPipeline::BlendProcessorInterface
+    : public SkLinearBitmapPipeline::DestinationInterface {
+public:
+    virtual void VECTORCALL blendPixel(Sk4f pixel0) = 0;
+    virtual void VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// SkLinearBitmapPipeline::Stage
+template<typename Base, size_t kSize, typename Next>
+SkLinearBitmapPipeline::Stage<Base, kSize, Next>::~Stage() {
+    if (fIsInitialized) {
+        this->get()->~Base();
+    }
+}
+
+template<typename Base, size_t kSize, typename Next>
+template<typename Variant, typename... Args>
+void SkLinearBitmapPipeline::Stage<Base, kSize, Next>::initStage(Next* next, Args&& ... args) {
+    SkASSERTF(sizeof(Variant) <= sizeof(fSpace),
+              "Size Variant: %d, Space: %d", sizeof(Variant), sizeof(fSpace));
+
+    new (&fSpace) Variant(next, std::forward<Args>(args)...);
+    fStageCloner = [this](Next* nextClone, void* addr) {
+        new (addr) Variant(nextClone, (const Variant&)*this->get());
+    };
+    fIsInitialized = true;
+};
+
+template<typename Base, size_t kSize, typename Next>
+template<typename Variant, typename... Args>
+void SkLinearBitmapPipeline::Stage<Base, kSize, Next>::initSink(Args&& ... args) {
+    SkASSERTF(sizeof(Variant) <= sizeof(fSpace),
+              "Size Variant: %d, Space: %d", sizeof(Variant), sizeof(fSpace));
+    new (&fSpace) Variant(std::forward<Args>(args)...);
+    fIsInitialized = true;
+};
+
+template<typename Base, size_t kSize, typename Next>
+template <typename To, typename From>
+To* SkLinearBitmapPipeline::Stage<Base, kSize, Next>::getInterface() {
+    From* down = static_cast<From*>(this->get());
+    return static_cast<To*>(down);
+}
+
+template<typename Base, size_t kSize, typename Next>
+Base* SkLinearBitmapPipeline::Stage<Base, kSize, Next>::cloneStageTo(
+    Next* next, Stage* cloneToStage) const
+{
+    if (!fIsInitialized) return nullptr;
+    fStageCloner(next, &cloneToStage->fSpace);
+    return cloneToStage->get();
+}
+
 namespace  {
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -101,6 +146,10 @@
         : fNext{next}
         , fStrategy{std::forward<Args>(args)...}{ }
 
+    MatrixStage(Next* next, const MatrixStage& stage)
+        : fNext{next}
+        , fStrategy{stage.fStrategy} { }
+
     void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
         fStrategy.processPoints(&xs, &ys);
         fNext->pointListFew(n, xs, ys);
@@ -142,7 +191,7 @@
     const SkMatrix& inverse,
     SkLinearBitmapPipeline::MatrixStage* matrixProc) {
     if (inverse.hasPerspective()) {
-        matrixProc->Initialize<PerspectiveMatrix<>>(
+        matrixProc->initStage<PerspectiveMatrix<>>(
             next,
             SkVector{inverse.getTranslateX(), inverse.getTranslateY()},
             SkVector{inverse.getScaleX(), inverse.getScaleY()},
@@ -150,18 +199,18 @@
             SkVector{inverse.getPerspX(), inverse.getPerspY()},
             inverse.get(SkMatrix::kMPersp2));
     } else if (inverse.getSkewX() != 0.0f || inverse.getSkewY() != 0.0f) {
-        matrixProc->Initialize<AffineMatrix<>>(
+        matrixProc->initStage<AffineMatrix<>>(
             next,
             SkVector{inverse.getTranslateX(), inverse.getTranslateY()},
             SkVector{inverse.getScaleX(), inverse.getScaleY()},
             SkVector{inverse.getSkewX(), inverse.getSkewY()});
     } else if (inverse.getScaleX() != 1.0f || inverse.getScaleY() != 1.0f) {
-        matrixProc->Initialize<ScaleMatrix<>>(
+        matrixProc->initStage<ScaleMatrix<>>(
             next,
             SkVector{inverse.getTranslateX(), inverse.getTranslateY()},
             SkVector{inverse.getScaleX(), inverse.getScaleY()});
     } else if (inverse.getTranslateX() != 0.0f || inverse.getTranslateY() != 0.0f) {
-        matrixProc->Initialize<TranslateMatrix<>>(
+        matrixProc->initStage<TranslateMatrix<>>(
             next,
             SkVector{inverse.getTranslateX(), inverse.getTranslateY()});
     } else {
@@ -182,6 +231,11 @@
         , fXStrategy{dimensions.width()}
         , fYStrategy{dimensions.height()}{ }
 
+    NearestTileStage(Next* next, const NearestTileStage& stage)
+        : fNext{next}
+        , fXStrategy{stage.fXStrategy}
+        , fYStrategy{stage.fYStrategy} { }
+
     void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
         fXStrategy.tileXPoints(&xs);
         fYStrategy.tileYPoints(&ys);
@@ -218,11 +272,18 @@
 public:
     template <typename... Args>
     BilerpTileStage(Next* next, SkISize dimensions)
-        : fXMax(dimensions.width())
+        : fNext{next}
+        , fXMax(dimensions.width())
         , fYMax(dimensions.height())
-        , fNext{next}
         , fXStrategy{dimensions.width()}
-        , fYStrategy{dimensions.height()}{ }
+        , fYStrategy{dimensions.height()} { }
+
+    BilerpTileStage(Next* next, const BilerpTileStage& stage)
+        : fNext{next}
+        , fXMax{stage.fXMax}
+        , fYMax{stage.fYMax}
+        , fXStrategy{stage.fXStrategy}
+        , fYStrategy{stage.fYStrategy} { }
 
     void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
         fXStrategy.tileXPoints(&xs);
@@ -339,9 +400,9 @@
         }
     }
 
+    Next* const fNext;
     SkScalar fXMax;
     SkScalar fYMax;
-    Next* const fNext;
     XStrategy fXStrategy;
     YStrategy fYStrategy;
 };
@@ -351,9 +412,9 @@
     SkFilterQuality filterQuality, SkISize dimensions,
     Next* next, SkLinearBitmapPipeline::TileStage* tileStage) {
     if (filterQuality == kNone_SkFilterQuality) {
-        tileStage->Initialize<NearestTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
+        tileStage->initStage<NearestTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
     } else {
-        tileStage->Initialize<BilerpTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
+        tileStage->initStage<BilerpTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
     }
 }
 template <typename XStrategy>
@@ -413,6 +474,9 @@
     NearestNeighborSampler(Next* next, Args&&... args)
     : fSampler{next, std::forward<Args>(args)...} { }
 
+    NearestNeighborSampler(Next* next, const NearestNeighborSampler& sampler)
+        : fSampler{next, sampler.fSampler} { }
+
     void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
         fSampler.nearestListFew(n, xs, ys);
     }
@@ -451,6 +515,9 @@
     BilerpSampler(Next* next, Args&&... args)
         : fSampler{next, std::forward<Args>(args)...} { }
 
+    BilerpSampler(Next* next, const BilerpSampler& sampler)
+    : fSampler{next, sampler.fSampler} { }
+
     void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
         fSampler.bilerpListFew(n, xs, ys);
     }
@@ -554,34 +621,34 @@
     uint32_t*             fEnd;
 };
 
-using Placer = SkLinearBitmapPipeline::PixelPlacerInterface;
+using Blender = SkLinearBitmapPipeline::BlendProcessorInterface;
 
 template<template <typename, typename> class Sampler>
 static SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler_base(
-    Placer* next,
+    Blender* next,
     const SkPixmap& srcPixmap,
     SkLinearBitmapPipeline::SampleStage* sampleStage) {
     const SkImageInfo& imageInfo = srcPixmap.info();
     switch (imageInfo.colorType()) {
         case kRGBA_8888_SkColorType:
             if (imageInfo.profileType() == kSRGB_SkColorProfileType) {
-                sampleStage->Initialize<Sampler<Pixel8888SRGB, Placer>>(next, srcPixmap);
+                sampleStage->initStage<Sampler<Pixel8888SRGB, Blender>>(next, srcPixmap);
             } else {
-                sampleStage->Initialize<Sampler<Pixel8888LRGB, Placer>>(next, srcPixmap);
+                sampleStage->initStage<Sampler<Pixel8888LRGB, Blender>>(next, srcPixmap);
             }
             break;
         case kBGRA_8888_SkColorType:
             if (imageInfo.profileType() == kSRGB_SkColorProfileType) {
-                sampleStage->Initialize<Sampler<Pixel8888SBGR, Placer>>(next, srcPixmap);
+                sampleStage->initStage<Sampler<Pixel8888SBGR, Blender>>(next, srcPixmap);
             } else {
-                sampleStage->Initialize<Sampler<Pixel8888LBGR, Placer>>(next, srcPixmap);
+                sampleStage->initStage<Sampler<Pixel8888LBGR, Blender>>(next, srcPixmap);
             }
             break;
         case kIndex_8_SkColorType:
             if (imageInfo.profileType() == kSRGB_SkColorProfileType) {
-                sampleStage->Initialize<Sampler<PixelIndex8SRGB, Placer>>(next, srcPixmap);
+                sampleStage->initStage<Sampler<PixelIndex8SRGB, Blender>>(next, srcPixmap);
             } else {
-                sampleStage->Initialize<Sampler<PixelIndex8LRGB, Placer>>(next, srcPixmap);
+                sampleStage->initStage<Sampler<PixelIndex8LRGB, Blender>>(next, srcPixmap);
             }
             break;
         default:
@@ -592,10 +659,11 @@
 }
 
 SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler(
-    Placer* next,
+    Blender* next,
     SkFilterQuality filterQuality,
     const SkPixmap& srcPixmap,
-    SkLinearBitmapPipeline::SampleStage* sampleStage) {
+    SkLinearBitmapPipeline::SampleStage* sampleStage)
+{
     if (filterQuality == kNone_SkFilterQuality) {
         return choose_pixel_sampler_base<NearestNeighborSampler>(next, srcPixmap, sampleStage);
     } else {
@@ -604,25 +672,25 @@
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
-// Pixel Placement Stage
+// Pixel Blender Stage
 template <SkAlphaType alphaType>
-class PlaceFPPixel final : public SkLinearBitmapPipeline::PixelPlacerInterface {
+class SrcFPPixel final : public SkLinearBitmapPipeline::BlendProcessorInterface {
 public:
-    PlaceFPPixel(float postAlpha) : fPostAlpha{postAlpha} { }
-
-    void VECTORCALL placePixel(Sk4f pixel) override {
+    SrcFPPixel(float postAlpha) : fPostAlpha{postAlpha} { }
+    SrcFPPixel(const SrcFPPixel& Blender) : fPostAlpha(Blender.fPostAlpha) {}
+    void VECTORCALL blendPixel(Sk4f pixel) override {
         SkASSERT(fDst + 1 <= fEnd );
-        PlacePixel(fDst, pixel, 0);
+        SrcPixel(fDst, pixel, 0);
         fDst += 1;
     }
 
-    void VECTORCALL place4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) override {
+    void VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) override {
         SkASSERT(fDst + 4 <= fEnd);
         SkPM4f* dst = fDst;
-        PlacePixel(dst, p0, 0);
-        PlacePixel(dst, p1, 1);
-        PlacePixel(dst, p2, 2);
-        PlacePixel(dst, p3, 3);
+        SrcPixel(dst, p0, 0);
+        SrcPixel(dst, p1, 1);
+        SrcPixel(dst, p2, 2);
+        SrcPixel(dst, p3, 3);
         fDst += 4;
     }
 
@@ -632,7 +700,7 @@
     }
 
 private:
-    void VECTORCALL PlacePixel(SkPM4f* dst, Sk4f pixel, int index) {
+    void VECTORCALL SrcPixel(SkPM4f* dst, Sk4f pixel, int index) {
         Sk4f newPixel = pixel;
         if (alphaType == kUnpremul_SkAlphaType) {
             newPixel = Premultiply(pixel);
@@ -650,21 +718,22 @@
     Sk4f fPostAlpha;
 };
 
-static SkLinearBitmapPipeline::PixelPlacerInterface* choose_pixel_placer(
+static SkLinearBitmapPipeline::BlendProcessorInterface* choose_blender(
     SkAlphaType alphaType,
     float postAlpha,
-    SkLinearBitmapPipeline::PixelStage* placerStage) {
+    SkLinearBitmapPipeline::BlenderStage* blenderStage) {
     if (alphaType == kUnpremul_SkAlphaType) {
-        placerStage->Initialize<PlaceFPPixel<kUnpremul_SkAlphaType>>(postAlpha);
+        blenderStage->initSink<SrcFPPixel<kUnpremul_SkAlphaType>>(postAlpha);
     } else {
         // kOpaque_SkAlphaType is treated the same as kPremul_SkAlphaType
-        placerStage->Initialize<PlaceFPPixel<kPremul_SkAlphaType>>(postAlpha);
+        blenderStage->initSink<SrcFPPixel<kPremul_SkAlphaType>>(postAlpha);
     }
-    return placerStage->get();
+    return blenderStage->get();
 }
 }  // namespace
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
+// SkLinearBitmapPipeline
 SkLinearBitmapPipeline::~SkLinearBitmapPipeline() {}
 
 SkLinearBitmapPipeline::SkLinearBitmapPipeline(
@@ -699,14 +768,12 @@
 
     // As the stages are built, the chooser function may skip a stage. For example, with the
     // identity matrix, the matrix stage is skipped, and the tilerStage is the first stage.
-    auto placementStage = choose_pixel_placer(alphaType, postAlpha, &fPixelStage);
-    auto samplerStage   = choose_pixel_sampler(placementStage,
-                                               filterQuality, srcPixmap, &fSampleStage);
-    auto tilerStage     = choose_tiler(samplerStage,
-                                       dimensions, xTile, yTile, filterQuality, dx, &fTiler);
-    fFirstStage         = choose_matrix(tilerStage, adjustedInverse, &fMatrixStage);
-    fLastStage          = placementStage;
-
+    auto blenderStage = choose_blender(alphaType, postAlpha, &fBlenderStage);
+    auto samplerStage = choose_pixel_sampler(blenderStage, filterQuality, srcPixmap, &fSampleStage);
+    auto tilerStage   = choose_tiler(samplerStage, dimensions, xTile, yTile,
+                                     filterQuality, dx, &fTileStage);
+    fFirstStage       = choose_matrix(tilerStage, adjustedInverse, &fMatrixStage);
+    fLastStage        = blenderStage;
 }
 
 void SkLinearBitmapPipeline::shadeSpan4f(int x, int y, SkPM4f* dst, int count) {
diff --git a/src/core/SkLinearBitmapPipeline.h b/src/core/SkLinearBitmapPipeline.h
index 32e4641..548302e 100644
--- a/src/core/SkLinearBitmapPipeline.h
+++ b/src/core/SkLinearBitmapPipeline.h
@@ -8,11 +8,9 @@
 #ifndef SkLinearBitmapPipeline_DEFINED
 #define SkLinearBitmapPipeline_DEFINED
 
-
 #include "SkColor.h"
 #include "SkImageInfo.h"
 #include "SkMatrix.h"
-#include "SkNx.h"
 #include "SkShader.h"
 
 class SkLinearBitmapPipeline {
@@ -27,31 +25,33 @@
 
     void shadeSpan4f(int x, int y, SkPM4f* dst, int count);
 
-    template<typename Base, size_t kSize>
-    class PolymorphicUnion {
+    template<typename Base, size_t kSize, typename Next = void>
+    class Stage {
     public:
-        PolymorphicUnion() : fIsInitialized{false} {}
-
-        ~PolymorphicUnion() {
-            if (fIsInitialized) {
-                this->get()->~Base();
-            }
-        }
+        Stage() : fIsInitialized{false} {}
+        ~Stage();
 
         template<typename Variant, typename... Args>
-        void Initialize(Args&&... args) {
-            SkASSERTF(sizeof(Variant) <= sizeof(fSpace),
-                      "Size Variant: %d, Space: %d", sizeof(Variant), sizeof(fSpace));
+        void initStage(Next* next, Args&& ... args);
 
-            new(&fSpace) Variant(std::forward<Args>(args)...);
-            fIsInitialized = true;
-        };
+        template<typename Variant, typename... Args>
+        void initSink(Args&& ... args);
+
+        template <typename To, typename From>
+        To* getInterface();
+
+        // Copy this stage to `cloneToStage` with `next` as its next stage
+        // (not necessarily the same as our next, you see), returning `cloneToStage`.
+        // Note: There is no cloneSinkTo method because the code usually places the top part of
+        // the pipeline on a new sampler.
+        Base* cloneStageTo(Next* next, Stage* cloneToStage) const;
 
         Base* get() const { return reinterpret_cast<Base*>(&fSpace); }
         Base* operator->() const { return this->get(); }
         Base& operator*() const { return *(this->get()); }
 
     private:
+        std::function<void (Next*, void*)> fStageCloner;
         struct SK_STRUCT_ALIGN(16) Space {
             char space[kSize];
         };
@@ -61,22 +61,22 @@
 
     class PointProcessorInterface;
     class SampleProcessorInterface;
-    class PixelPlacerInterface;
+    class BlendProcessorInterface;
     class DestinationInterface;
 
-    // These values were generated by the assert above in PolymorphicUnion.
-    using MatrixStage = PolymorphicUnion<PointProcessorInterface, 160>;
-    using TileStage   = PolymorphicUnion<PointProcessorInterface, 160>;
-    using SampleStage = PolymorphicUnion<SampleProcessorInterface,100>;
-    using PixelStage  = PolymorphicUnion<PixelPlacerInterface,     80>;
+    // These values were generated by the assert above in Stage::init{Sink|Stage}.
+    using MatrixStage  = Stage<PointProcessorInterface, 160, PointProcessorInterface>;
+    using TileStage    = Stage<PointProcessorInterface, 160, SampleProcessorInterface>;
+    using SampleStage  = Stage<SampleProcessorInterface, 100, BlendProcessorInterface>;
+    using BlenderStage = Stage<BlendProcessorInterface, 80>;
 
 private:
     PointProcessorInterface* fFirstStage;
-    MatrixStage fMatrixStage;
-    TileStage   fTiler;
-    SampleStage fSampleStage;
-    PixelStage  fPixelStage;
-    DestinationInterface* fLastStage;
+    MatrixStage              fMatrixStage;
+    TileStage                fTileStage;
+    SampleStage              fSampleStage;
+    BlenderStage             fBlenderStage;
+    DestinationInterface*    fLastStage;
 };
 
 #endif  // SkLinearBitmapPipeline_DEFINED
diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h
index 9393224..7157ffc 100644
--- a/src/core/SkLinearBitmapPipeline_sample.h
+++ b/src/core/SkLinearBitmapPipeline_sample.h
@@ -52,22 +52,26 @@
 class GeneralSampler {
 public:
     template<typename... Args>
-    GeneralSampler(SkLinearBitmapPipeline::PixelPlacerInterface* next, Args&& ... args)
+    GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
         : fNext{next}, fStrategy{std::forward<Args>(args)...} { }
 
+    GeneralSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
+                   const GeneralSampler& sampler)
+        : fNext{next}, fStrategy{sampler.fStrategy} { }
+
     void VECTORCALL nearestListFew(int n, Sk4s xs, Sk4s ys) {
         SkASSERT(0 < n && n < 4);
         Sk4f px0, px1, px2;
         fStrategy.getFewPixels(n, xs, ys, &px0, &px1, &px2);
-        if (n >= 1) fNext->placePixel(px0);
-        if (n >= 2) fNext->placePixel(px1);
-        if (n >= 3) fNext->placePixel(px2);
+        if (n >= 1) fNext->blendPixel(px0);
+        if (n >= 2) fNext->blendPixel(px1);
+        if (n >= 3) fNext->blendPixel(px2);
     }
 
     void VECTORCALL nearestList4(Sk4s xs, Sk4s ys) {
         Sk4f px0, px1, px2, px3;
         fStrategy.get4Pixels(xs, ys, &px0, &px1, &px2, &px3);
-        fNext->place4Pixels(px0, px1, px2, px3);
+        fNext->blend4Pixels(px0, px1, px2, px3);
     }
 
     void nearestSpan(Span span) {
@@ -102,16 +106,16 @@
             return this->bilerNonEdgePixel(xs[index], ys[index]);
         };
 
-        if (n >= 1) fNext->placePixel(bilerpPixel(0));
-        if (n >= 2) fNext->placePixel(bilerpPixel(1));
-        if (n >= 3) fNext->placePixel(bilerpPixel(2));
+        if (n >= 1) fNext->blendPixel(bilerpPixel(0));
+        if (n >= 2) fNext->blendPixel(bilerpPixel(1));
+        if (n >= 3) fNext->blendPixel(bilerpPixel(2));
     }
 
     void VECTORCALL bilerpList4(Sk4s xs, Sk4s ys) {
         auto bilerpPixel = [&](int index) {
             return this->bilerNonEdgePixel(xs[index], ys[index]);
         };
-        fNext->place4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
+        fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
     }
 
     void VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) {
@@ -120,7 +124,7 @@
         Sk4f ys = Sk4f{sampleYs[0]};
         fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
         Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11);
-        fNext->placePixel(pixel);
+        fNext->blendPixel(pixel);
     }
 
     void bilerpSpan(Span span) {
@@ -191,11 +195,11 @@
             Sk4f px1 = getNextPixel();
             Sk4f px2 = getNextPixel();
             Sk4f px3 = getNextPixel();
-            next->place4Pixels(px0, px1, px2, px3);
+            next->blend4Pixels(px0, px1, px2, px3);
             count -= 4;
         }
         while (count > 0) {
-            next->placePixel(getNextPixel());
+            next->blendPixel(getNextPixel());
             count -= 1;
         }
     }
@@ -214,13 +218,13 @@
             while (count >= 4) {
                 Sk4f px0, px1, px2, px3;
                 fStrategy.get4Pixels(row, ix, &px0, &px1, &px2, &px3);
-                next->place4Pixels(px0, px1, px2, px3);
+                next->blend4Pixels(px0, px1, px2, px3);
                 ix += 4;
                 count -= 4;
             }
 
             while (count > 0) {
-                next->placePixel(fStrategy.getPixelAt(row, ix));
+                next->blendPixel(fStrategy.getPixelAt(row, ix));
                 ix += 1;
                 count -= 1;
             }
@@ -228,13 +232,13 @@
             while (count >= 4) {
                 Sk4f px0, px1, px2, px3;
                 fStrategy.get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0);
-                next->place4Pixels(px0, px1, px2, px3);
+                next->blend4Pixels(px0, px1, px2, px3);
                 ix -= 4;
                 count -= 4;
             }
 
             while (count > 0) {
-                next->placePixel(fStrategy.getPixelAt(row, ix));
+                next->blendPixel(fStrategy.getPixelAt(row, ix));
                 ix -= 1;
                 count -= 1;
             }
@@ -272,11 +276,11 @@
         Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1;
         int count = span.count();
         while (count >= 4) {
-            fNext->place4Pixels(filterPixel, filterPixel, filterPixel, filterPixel);
+            fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel);
             count -= 4;
         }
         while (count > 0) {
-            fNext->placePixel(filterPixel);
+            fNext->blendPixel(filterPixel);
             count -= 1;
         }
     }
@@ -341,12 +345,12 @@
             Sk4f fpixel2 = getNextPixel();
             Sk4f fpixel3 = getNextPixel();
 
-            fNext->place4Pixels(fpixel0, fpixel1, fpixel2, fpixel3);
+            fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3);
             count -= 4;
         }
 
         while (count > 0) {
-            fNext->placePixel(getNextPixel());
+            fNext->blendPixel(getNextPixel());
 
             count -= 1;
         }
@@ -416,11 +420,7 @@
                 Sk4f pxS3 = px30 + px31;
                 Sk4f px3 = lerp(pxS2, pxS3);
                 pxB = pxS3;
-                fNext->place4Pixels(
-                    px0,
-                    px1,
-                    px2,
-                    px3);
+                fNext->blend4Pixels(px0, px1, px2, px3);
                 ix0 += 4;
                 count -= 4;
             }
@@ -428,7 +428,7 @@
                 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0);
                 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0);
 
-                fNext->placePixel(lerp(pixelY0, pixelY1));
+                fNext->blendPixel(lerp(pixelY0, pixelY1));
                 ix0 += 1;
                 count -= 1;
             }
@@ -448,11 +448,7 @@
                 Sk4f pxS0 = px00 + px01;
                 Sk4f px3 = lerp(pxS0, pxS1);
                 pxB = pxS0;
-                fNext->place4Pixels(
-                    px0,
-                    px1,
-                    px2,
-                    px3);
+                fNext->blend4Pixels(px0, px1, px2, px3);
                 ix0 -= 4;
                 count -= 4;
             }
@@ -460,7 +456,7 @@
                 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix0);
                 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix0);
 
-                fNext->placePixel(lerp(pixelY0, pixelY1));
+                fNext->blendPixel(lerp(pixelY0, pixelY1));
                 ix0 -= 1;
                 count -= 1;
             }
@@ -488,7 +484,7 @@
                 fStrategy.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30);
                 Sk4f px01, px11, px21, px31;
                 fStrategy.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31);
-                fNext->place4Pixels(
+                fNext->blend4Pixels(
                     lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
                 ix += 4;
                 count -= 4;
@@ -497,7 +493,7 @@
                 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix);
                 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix);
 
-                fNext->placePixel(lerp(&pixelY0, &pixelY1));
+                fNext->blendPixel(lerp(&pixelY0, &pixelY1));
                 ix += 1;
                 count -= 1;
             }
@@ -508,7 +504,7 @@
                 fStrategy.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00);
                 Sk4f px01, px11, px21, px31;
                 fStrategy.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01);
-                fNext->place4Pixels(
+                fNext->blend4Pixels(
                     lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
                 ix -= 4;
                 count -= 4;
@@ -517,7 +513,7 @@
                 Sk4f pixelY0 = fStrategy.getPixelAt(rowY0, ix);
                 Sk4f pixelY1 = fStrategy.getPixelAt(rowY1, ix);
 
-                fNext->placePixel(lerp(&pixelY0, &pixelY1));
+                fNext->blendPixel(lerp(&pixelY0, &pixelY1));
                 ix -= 1;
                 count -= 1;
             }
@@ -657,6 +653,15 @@
         }
     }
 
+    PixelIndex8(const PixelIndex8& strategy)
+        : fSrc{strategy.fSrc}, fWidth{strategy.fWidth} {
+        fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
+        // TODO: figure out the count.
+        for (int i = 0; i < 256; i++) {
+            fColorTable[i] = strategy.fColorTable[i];
+        }
+    }
+
     void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) {
         Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
         Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
diff --git a/tests/SkColor4fTest.cpp b/tests/SkColor4fTest.cpp
index 239004a..67e8d37 100644
--- a/tests/SkColor4fTest.cpp
+++ b/tests/SkColor4fTest.cpp
@@ -153,7 +153,7 @@
 
     SkPaint paint;
     for (const auto& rec : recs) {
-        uint32_t storage[300];
+        uint32_t storage[400];
         paint.setShader(rec.fFact());
         // Encourage 4f context selection. At some point we may need
         // to instantiate two separate contexts for optimal 4b/4f selection.