remove src/jumper

The distinction between SkJumper and SkRasterPipeline used
to be important, but it's no longer.  This CL moves everything
under src/jumper to the appropriate SkRasterPipeline file.

Change-Id: I1181fffafccb3dc4c4eb5f33b442c719ee370462
Reviewed-on: https://skia-review.googlesource.com/c/164627
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/codec/SkWebpCodec.cpp b/src/codec/SkWebpCodec.cpp
index 8ebbf43..c695e0a 100644
--- a/src/codec/SkWebpCodec.cpp
+++ b/src/codec/SkWebpCodec.cpp
@@ -7,7 +7,6 @@
 
 #include "SkWebpCodec.h"
 
-#include "../jumper/SkJumper.h"
 #include "SkBitmap.h"
 #include "SkCanvas.h"
 #include "SkCodecAnimation.h"
@@ -339,8 +338,8 @@
                        SkAlphaType dstAt,
                        bool srcHasAlpha,
                        int width) {
-    SkJumper_MemoryCtx dst_ctx = { (void*)dst, 0 },
-                       src_ctx = { (void*)src, 0 };
+    SkRasterPipeline_MemoryCtx dst_ctx = { (void*)dst, 0 },
+                               src_ctx = { (void*)src, 0 };
 
     SkRasterPipeline_<256> p;
 
diff --git a/src/core/SkBlendMode.cpp b/src/core/SkBlendMode.cpp
index d7f9ded..4610ae0 100644
--- a/src/core/SkBlendMode.cpp
+++ b/src/core/SkBlendMode.cpp
@@ -8,7 +8,6 @@
 #include "SkBlendModePriv.h"
 #include "SkCoverageModePriv.h"
 #include "SkRasterPipeline.h"
-#include "../jumper/SkJumper.h"
 
 bool SkBlendMode_ShouldPreScaleCoverage(SkBlendMode mode, bool rgb_coverage) {
     // The most important things we do here are:
@@ -139,9 +138,9 @@
     SkPMColor4f            src_storage = src,
                            dst_storage = dst,
                            res_storage;
-    SkJumper_MemoryCtx src_ctx = { &src_storage, 0 },
-                       dst_ctx = { &dst_storage, 0 },
-                       res_ctx = { &res_storage, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { &src_storage, 0 },
+                               dst_ctx = { &dst_storage, 0 },
+                               res_ctx = { &res_storage, 0 };
 
     p.append(SkRasterPipeline::load_f32, &dst_ctx);
     p.append(SkRasterPipeline::move_src_dst);
diff --git a/src/core/SkBlitter_Sprite.cpp b/src/core/SkBlitter_Sprite.cpp
index fe8bcaa..ca31778 100644
--- a/src/core/SkBlitter_Sprite.cpp
+++ b/src/core/SkBlitter_Sprite.cpp
@@ -13,7 +13,6 @@
 #include "SkOpts.h"
 #include "SkRasterPipeline.h"
 #include "SkSpriteBlitter.h"
-#include "../jumper/SkJumper.h"
 
 SkSpriteBlitter::SkSpriteBlitter(const SkPixmap& source)
     : fSource(source) {}
@@ -156,10 +155,10 @@
     }
 
 private:
-    SkArenaAlloc*      fAlloc;
-    SkBlitter*         fBlitter;
-    SkJumper_MemoryCtx fSrcPtr;
-    SkColor4f          fPaintColor;
+    SkArenaAlloc*              fAlloc;
+    SkBlitter*                 fBlitter;
+    SkRasterPipeline_MemoryCtx fSrcPtr;
+    SkColor4f                  fPaintColor;
 
     typedef SkSpriteBlitter INHERITED;
 };
diff --git a/src/core/SkColorFilter.cpp b/src/core/SkColorFilter.cpp
index aa579a5..5bb46d7 100644
--- a/src/core/SkColorFilter.cpp
+++ b/src/core/SkColorFilter.cpp
@@ -17,7 +17,6 @@
 #include "SkTDArray.h"
 #include "SkUnPreMultiply.h"
 #include "SkWriteBuffer.h"
-#include "../jumper/SkJumper.h"
 
 #if SK_SUPPORT_GPU
 #include "GrFragmentProcessor.h"
@@ -72,7 +71,7 @@
 
     pipeline.append_constant_color(&alloc, src.vec());
     this->onAppendStages(&pipeline, colorSpace, &alloc, c.fA == 1);
-    SkJumper_MemoryCtx dstPtr = { &dst, 0 };
+    SkRasterPipeline_MemoryCtx dstPtr = { &dst, 0 };
     pipeline.append(SkRasterPipeline::store_f32, &dstPtr);
     pipeline.run(0,0, 1,1);
 
diff --git a/src/core/SkColorSpaceXformer.h b/src/core/SkColorSpaceXformer.h
index f4b3faf..7d277d7 100644
--- a/src/core/SkColorSpaceXformer.h
+++ b/src/core/SkColorSpaceXformer.h
@@ -14,7 +14,6 @@
 #include "SkRasterPipeline.h"
 #include "SkRefCnt.h"
 #include "SkTHash.h"
-#include "../jumper/SkJumper.h"
 
 class SkBitmap;
 class SkColorFilter;
@@ -60,8 +59,8 @@
     SkSTArenaAlloc<256>                                 fAlloc;
     std::function<void(size_t, size_t, size_t, size_t)> fFromSRGB;
     SkColorSpaceXformSteps                              fFromSRGBSteps;
-    SkJumper_MemoryCtx                                  fFromSRGBSrc{nullptr,0};
-    SkJumper_MemoryCtx                                  fFromSRGBDst{nullptr,0};
+    SkRasterPipeline_MemoryCtx                          fFromSRGBSrc{nullptr,0};
+    SkRasterPipeline_MemoryCtx                          fFromSRGBDst{nullptr,0};
 
     size_t fReentryCount; // tracks the number of nested apply() calls for cache purging.
 
diff --git a/src/core/SkConvertPixels.cpp b/src/core/SkConvertPixels.cpp
index f0de61c..561dc45 100644
--- a/src/core/SkConvertPixels.cpp
+++ b/src/core/SkConvertPixels.cpp
@@ -15,7 +15,6 @@
 #include "SkRasterPipeline.h"
 #include "SkUnPreMultiply.h"
 #include "SkUnPreMultiplyPriv.h"
-#include "../jumper/SkJumper.h"
 
 static bool rect_memcpy(const SkImageInfo& dstInfo,       void* dstPixels, size_t dstRB,
                         const SkImageInfo& srcInfo, const void* srcPixels, size_t srcRB,
@@ -170,8 +169,8 @@
                                   const SkImageInfo& srcInfo, const void* srcRow, size_t srcRB,
                                   const SkColorSpaceXformSteps& steps) {
 
-    SkJumper_MemoryCtx src = { (void*)srcRow, (int)(srcRB / srcInfo.bytesPerPixel()) },
-                       dst = { (void*)dstRow, (int)(dstRB / dstInfo.bytesPerPixel()) };
+    SkRasterPipeline_MemoryCtx src = { (void*)srcRow, (int)(srcRB / srcInfo.bytesPerPixel()) },
+                               dst = { (void*)dstRow, (int)(dstRB / dstInfo.bytesPerPixel()) };
 
     SkRasterPipeline_<256> pipeline;
     pipeline.append_load(srcInfo.colorType(), &src);
diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp
index d65f7cd..3135012 100644
--- a/src/core/SkRasterPipeline.cpp
+++ b/src/core/SkRasterPipeline.cpp
@@ -6,7 +6,7 @@
  */
 
 #include "SkRasterPipeline.h"
-#include "../jumper/SkJumper.h"
+#include "SkOpts.h"
 #include <algorithm>
 
 SkRasterPipeline::SkRasterPipeline(SkArenaAlloc* alloc) : fAlloc(alloc) {
@@ -119,7 +119,7 @@
         this->append(white_color);
         INC_WHITE;
     } else {
-        auto ctx = alloc->make<SkJumper_UniformColorCtx>();
+        auto ctx = alloc->make<SkRasterPipeline_UniformColorCtx>();
         Sk4f color = Sk4f::Load(rgba);
         color.store(&ctx->r);
 
@@ -193,7 +193,7 @@
     }
 }
 
-void SkRasterPipeline::append_load(SkColorType ct, const SkJumper_MemoryCtx* ctx) {
+void SkRasterPipeline::append_load(SkColorType ct, const SkRasterPipeline_MemoryCtx* ctx) {
     switch (ct) {
         case kUnknown_SkColorType: SkASSERT(false); break;
 
@@ -223,7 +223,7 @@
     }
 }
 
-void SkRasterPipeline::append_load_dst(SkColorType ct, const SkJumper_MemoryCtx* ctx) {
+void SkRasterPipeline::append_load_dst(SkColorType ct, const SkRasterPipeline_MemoryCtx* ctx) {
     switch (ct) {
         case kUnknown_SkColorType: SkASSERT(false); break;
 
@@ -253,7 +253,7 @@
     }
 }
 
-void SkRasterPipeline::append_store(SkColorType ct, const SkJumper_MemoryCtx* ctx) {
+void SkRasterPipeline::append_store(SkColorType ct, const SkRasterPipeline_MemoryCtx* ctx) {
     switch (ct) {
         case kUnknown_SkColorType: SkASSERT(false); break;
 
@@ -291,3 +291,66 @@
         this->unchecked_append(SkRasterPipeline::clamp_gamut, nullptr);
     }
 }
+
+SkRasterPipeline::StartPipelineFn SkRasterPipeline::build_pipeline(void** ip) const {
+#ifndef SK_JUMPER_DISABLE_8BIT
+    // We'll try to build a lowp pipeline, but if that fails fallback to a highp float pipeline.
+    void** reset_point = ip;
+
+    // Stages are stored backwards in fStages, so we reverse here, back to front.
+    *--ip = (void*)SkOpts::just_return_lowp;
+    for (const StageList* st = fStages; st; st = st->prev) {
+        SkOpts::StageFn fn;
+        if (!st->rawFunction && (fn = SkOpts::stages_lowp[st->stage])) {
+            if (st->ctx) {
+                *--ip = st->ctx;
+            }
+            *--ip = (void*)fn;
+        } else {
+            ip = reset_point;
+            break;
+        }
+    }
+    if (ip != reset_point) {
+        return SkOpts::start_pipeline_lowp;
+    }
+#endif
+
+    *--ip = (void*)SkOpts::just_return_highp;
+    for (const StageList* st = fStages; st; st = st->prev) {
+        if (st->ctx) {
+            *--ip = st->ctx;
+        }
+        if (st->rawFunction) {
+            *--ip = (void*)st->stage;
+        } else {
+            *--ip = (void*)SkOpts::stages_highp[st->stage];
+        }
+    }
+    return SkOpts::start_pipeline_highp;
+}
+
+void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const {
+    if (this->empty()) {
+        return;
+    }
+
+    // Best to not use fAlloc here... we can't bound how often run() will be called.
+    SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
+
+    auto start_pipeline = this->build_pipeline(program.get() + fSlotsNeeded);
+    start_pipeline(x,y,x+w,y+h, program.get());
+}
+
+std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const {
+    if (this->empty()) {
+        return [](size_t, size_t, size_t, size_t) {};
+    }
+
+    void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
+
+    auto start_pipeline = this->build_pipeline(program + fSlotsNeeded);
+    return [=](size_t x, size_t y, size_t w, size_t h) {
+        start_pipeline(x,y,x+w,y+h, program);
+    };
+}
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 809f404..4929b58 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -16,7 +16,6 @@
 #include "SkTypes.h"
 #include <functional>
 #include <vector>
-#include "../jumper/SkJumper.h"
 
 /**
  * SkRasterPipeline provides a cheap way to chain together a pixel processing pipeline.
@@ -31,8 +30,6 @@
  * arbitrary context pointer.  The stage funciton arguments and calling convention are
  * designed to maximize the amount of data we can pass along the pipeline cheaply, and
  * vary depending on CPU feature detection.
- *
- * If you'd like to see how this works internally, you want to start digging around src/jumper.
  */
 
 #define SK_RASTER_PIPELINE_STAGES(M)                               \
@@ -97,6 +94,85 @@
     M(rgb_to_hsl) M(hsl_to_rgb)                                    \
     M(gauss_a_to_rgba)
 
+// The largest number of pixels we handle at a time.
+static const int SkRasterPipeline_kMaxStride = 16;
+
+// Structs representing the arguments to some common stages.
+
+struct SkRasterPipeline_MemoryCtx {
+    void* pixels;
+    int   stride;
+};
+
+struct SkRasterPipeline_GatherCtx {
+    const void* pixels;
+    int         stride;
+    float       width;
+    float       height;
+};
+
+// State shared by save_xy, accumulate, and bilinear_* / bicubic_*.
+struct SkRasterPipeline_SamplerCtx {
+    float      x[SkRasterPipeline_kMaxStride];
+    float      y[SkRasterPipeline_kMaxStride];
+    float     fx[SkRasterPipeline_kMaxStride];
+    float     fy[SkRasterPipeline_kMaxStride];
+    float scalex[SkRasterPipeline_kMaxStride];
+    float scaley[SkRasterPipeline_kMaxStride];
+};
+
+struct SkRasterPipeline_TileCtx {
+    float scale;
+    float invScale; // cache of 1/scale
+};
+
+struct SkRasterPipeline_DecalTileCtx {
+    uint32_t mask[SkRasterPipeline_kMaxStride];
+    float    limit_x;
+    float    limit_y;
+};
+
+struct SkRasterPipeline_CallbackCtx {
+    void (*fn)(SkRasterPipeline_CallbackCtx* self, int active_pixels/*<= SkRasterPipeline_kMaxStride*/);
+
+    // When called, fn() will have our active pixels available in rgba.
+    // When fn() returns, the pipeline will read back those active pixels from read_from.
+    float rgba[4*SkRasterPipeline_kMaxStride];
+    float* read_from = rgba;
+};
+
+// This should line up with the memory layout of SkColorSpaceTransferFn.
+struct SkRasterPipeline_ParametricTransferFunction {
+    float G, A,B,C,D,E,F;
+};
+
+struct SkRasterPipeline_GradientCtx {
+    size_t stopCount;
+    float* fs[4];
+    float* bs[4];
+    float* ts;
+    bool interpolatedInPremul;
+};
+
+struct SkRasterPipeline_EvenlySpaced2StopGradientCtx {
+    float f[4];
+    float b[4];
+    bool interpolatedInPremul;
+};
+
+struct SkRasterPipeline_2PtConicalCtx {
+    uint32_t fMask[SkRasterPipeline_kMaxStride];
+    float    fP0,
+             fP1;
+};
+
+struct SkRasterPipeline_UniformColorCtx {
+    float r,g,b,a;
+    uint16_t rgba[4];  // [0,255] in a 16-bit lane.
+};
+
+
+
 class SkRasterPipeline {
 public:
     explicit SkRasterPipeline(SkArenaAlloc*);
@@ -150,9 +226,9 @@
         this->append_set_rgb(alloc, color.vec());
     }
 
-    void append_load    (SkColorType, const SkJumper_MemoryCtx*);
-    void append_load_dst(SkColorType, const SkJumper_MemoryCtx*);
-    void append_store   (SkColorType, const SkJumper_MemoryCtx*);
+    void append_load    (SkColorType, const SkRasterPipeline_MemoryCtx*);
+    void append_load_dst(SkColorType, const SkRasterPipeline_MemoryCtx*);
+    void append_store   (SkColorType, const SkRasterPipeline_MemoryCtx*);
 
     void append_gamut_clamp_if_normalized(const SkImageInfo&);
 
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index e3496a6..97cc27a 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -5,7 +5,6 @@
  * found in the LICENSE file.
  */
 
-#include "../jumper/SkJumper.h"
 #include "SkArenaAlloc.h"
 #include "SkBlendModePriv.h"
 #include "SkBlitter.h"
@@ -62,9 +61,10 @@
     SkShaderBase::Context* fBurstCtx;
     SkRasterPipeline       fColorPipeline;
 
-    SkJumper_MemoryCtx fShaderOutput = {nullptr,0},  // Possibly updated each call to burst_shade().
-                       fDstPtr       = {nullptr,0},  // Always points to the top-left of fDst.
-                       fMaskPtr      = {nullptr,0};  // Updated each call to blitMask().
+    SkRasterPipeline_MemoryCtx
+        fShaderOutput = {nullptr,0},  // Possibly updated each call to burst_shade().
+        fDstPtr       = {nullptr,0},  // Always points to the top-left of fDst.
+        fMaskPtr      = {nullptr,0};  // Updated each call to blitMask().
 
     // We may be able to specialize blitH() or blitRect() into a memset.
     bool     fCanMemsetInBlitRect = false;
@@ -203,7 +203,7 @@
     // A pipeline that's still constant here can collapse back into a constant color.
     if (is_constant) {
         SkColor4f constantColor;
-        SkJumper_MemoryCtx constantColorPtr = { &constantColor, 0 };
+        SkRasterPipeline_MemoryCtx constantColorPtr = { &constantColor, 0 };
         colorPipeline->append_gamut_clamp_if_normalized(dst.info());
         colorPipeline->append(SkRasterPipeline::store_f32, &constantColorPtr);
         colorPipeline->run(0,0,1,1);
@@ -227,14 +227,14 @@
         SkRasterPipeline_<256> p;
         p.extend(*colorPipeline);
         p.append_gamut_clamp_if_normalized(dst.info());
-        blitter->fDstPtr = SkJumper_MemoryCtx{&blitter->fMemsetColor, 0};
+        blitter->fDstPtr = SkRasterPipeline_MemoryCtx{&blitter->fMemsetColor, 0};
         blitter->append_store(&p);
         p.run(0,0,1,1);
 
         blitter->fCanMemsetInBlitRect = true;
     }
 
-    blitter->fDstPtr = SkJumper_MemoryCtx{
+    blitter->fDstPtr = SkRasterPipeline_MemoryCtx{
         blitter->fDst.writable_addr(),
         blitter->fDst.rowBytesAsPixels(),
     };
@@ -267,7 +267,7 @@
     }
     fBurstCtx->shadeSpan4f(x,y, fShaderBuffer.data(), w);
     // We'll be reading from fShaderOutput.pixels + x, so back up by x.
-    fShaderOutput = SkJumper_MemoryCtx{ fShaderBuffer.data() - x, 0 };
+    fShaderOutput = SkRasterPipeline_MemoryCtx{ fShaderBuffer.data() - x, 0 };
 }
 
 void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp
index b24f237..81f90af 100644
--- a/src/core/SkXfermode.cpp
+++ b/src/core/SkXfermode.cpp
@@ -15,7 +15,6 @@
 #include "SkString.h"
 #include "SkWriteBuffer.h"
 #include "SkXfermodePriv.h"
-#include "../jumper/SkJumper.h"
 
 #if SK_SUPPORT_GPU
 #include "GrFragmentProcessor.h"
@@ -36,9 +35,9 @@
 
         SkRasterPipeline_<256> p;
 
-        SkJumper_MemoryCtx dst_ctx = { (void*)dst, 0 },
-                           src_ctx = { (void*)src, 0 },
-                            aa_ctx = { (void*)aa,  0 };
+        SkRasterPipeline_MemoryCtx dst_ctx = { (void*)dst, 0 },
+                                   src_ctx = { (void*)src, 0 },
+                                    aa_ctx = { (void*)aa,  0 };
 
         p.append_load    (kN32_SkColorType, &src_ctx);
         p.append_load_dst(kN32_SkColorType, &dst_ctx);
diff --git a/src/effects/SkHighContrastFilter.cpp b/src/effects/SkHighContrastFilter.cpp
index ed5ba19..2e057fa 100644
--- a/src/effects/SkHighContrastFilter.cpp
+++ b/src/effects/SkHighContrastFilter.cpp
@@ -12,7 +12,6 @@
 #include "SkReadBuffer.h"
 #include "SkString.h"
 #include "SkWriteBuffer.h"
-#include "../jumper/SkJumper.h"
 
 #if SK_SUPPORT_GPU
 #include "GrColorSpaceInfo.h"
@@ -69,7 +68,7 @@
     if (!dstCS) {
         // In legacy draws this effect approximately linearizes by squaring.
         // When non-legacy, we're already (better) linearized.
-        auto square = alloc->make<SkJumper_ParametricTransferFunction>();
+        auto square = alloc->make<SkRasterPipeline_ParametricTransferFunction>();
         square->G = 2.0f; square->A = 1.0f;
         square->B = square->C = square->D = square->E = square->F = 0;
 
@@ -116,7 +115,7 @@
 
     if (!dstCS) {
         // See the previous if(!dstCS) { ... }
-        auto sqrt = alloc->make<SkJumper_ParametricTransferFunction>();
+        auto sqrt = alloc->make<SkRasterPipeline_ParametricTransferFunction>();
         sqrt->G = 0.5f; sqrt->A = 1.0f;
         sqrt->B = sqrt->C = sqrt->D = sqrt->E = sqrt->F = 0;
 
diff --git a/src/effects/SkOverdrawColorFilter.cpp b/src/effects/SkOverdrawColorFilter.cpp
index 230f274..2eec42e 100644
--- a/src/effects/SkOverdrawColorFilter.cpp
+++ b/src/effects/SkOverdrawColorFilter.cpp
@@ -10,7 +10,6 @@
 #include "SkPM4f.h"
 #include "SkRasterPipeline.h"
 #include "SkReadBuffer.h"
-#include "../jumper/SkJumper.h"
 
 #if SK_SUPPORT_GPU
 #include "effects/GrSkSLFP.h"
@@ -46,13 +45,13 @@
                                            SkColorSpace* dstCS,
                                            SkArenaAlloc* alloc,
                                            bool shader_is_opaque) const {
-    struct Ctx : public SkJumper_CallbackCtx {
+    struct Ctx : public SkRasterPipeline_CallbackCtx {
         const SkPMColor* colors;
     };
     // TODO: do we care about transforming to dstCS?
     auto ctx = alloc->make<Ctx>();
     ctx->colors = fColors;
-    ctx->fn = [](SkJumper_CallbackCtx* arg, int active_pixels) {
+    ctx->fn = [](SkRasterPipeline_CallbackCtx* arg, int active_pixels) {
         auto ctx = (Ctx*)arg;
         auto pixels = (SkPMColor4f*)ctx->rgba;
         for (int i = 0; i < active_pixels; i++) {
diff --git a/src/images/SkImageEncoderFns.h b/src/images/SkImageEncoderFns.h
index 6cde0cb..df5957b 100644
--- a/src/images/SkImageEncoderFns.h
+++ b/src/images/SkImageEncoderFns.h
@@ -21,7 +21,6 @@
 #include "SkRasterPipeline.h"
 #include "SkUnPreMultiply.h"
 #include "SkUnPreMultiplyPriv.h"
-#include "../jumper/SkJumper.h"
 
 /**
  * Function template for transforming scanlines.
@@ -255,8 +254,8 @@
 
 static inline void transform_scanline_1010102(char* dst, const char* src,
                                               int width, int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_1010102, &src_ctx);
     p.append(SkRasterPipeline::store_u16_be, &dst_ctx);
@@ -265,8 +264,8 @@
 
 static inline void transform_scanline_1010102_premul(char* dst, const char* src,
                                                      int width, int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_1010102, &src_ctx);
     p.append(SkRasterPipeline::unpremul);
@@ -279,8 +278,8 @@
  */
 static inline void transform_scanline_F16(char* SK_RESTRICT dst, const char* SK_RESTRICT src,
                                           int width, int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_f16, &src_ctx);
     p.append(SkRasterPipeline::clamp_0);  // F16 values may be out of [0,1] range, so clamp.
@@ -294,8 +293,8 @@
  */
 static inline void transform_scanline_F16_premul(char* SK_RESTRICT dst, const char* SK_RESTRICT src,
                                                  int width, int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_f16, &src_ctx);
     p.append(SkRasterPipeline::unpremul);
@@ -311,8 +310,8 @@
 static inline void transform_scanline_F16_to_8888(char* SK_RESTRICT dst,
                                                   const char* SK_RESTRICT src, int width, int,
                                                   const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_f16, &src_ctx);
     p.append(SkRasterPipeline::clamp_0);  // F16 values may be out of [0,1] range, so clamp.
@@ -327,8 +326,8 @@
 static inline void transform_scanline_F16_premul_to_8888(char* SK_RESTRICT dst,
                                                          const char* SK_RESTRICT src, int width,
                                                          int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_f16, &src_ctx);
     p.append(SkRasterPipeline::unpremul);
@@ -343,8 +342,8 @@
  */
 static inline void transform_scanline_F16_to_premul_8888(char* SK_RESTRICT dst,
         const char* SK_RESTRICT src, int width, int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_f16, &src_ctx);
     p.append(SkRasterPipeline::clamp_0);  // F16 values may be out of [0,1] range, so clamp.
@@ -359,8 +358,8 @@
  */
 static inline void transform_scanline_F32(char* SK_RESTRICT dst, const char* SK_RESTRICT src,
                                           int width, int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_f32, &src_ctx);
     p.append(SkRasterPipeline::clamp_0);  // F32 values may be out of [0,1] range, so clamp.
@@ -374,8 +373,8 @@
  */
 static inline void transform_scanline_F32_premul(char* SK_RESTRICT dst, const char* SK_RESTRICT src,
                                                  int width, int, const SkPMColor*) {
-    SkJumper_MemoryCtx src_ctx = { (void*)src, 0 },
-                       dst_ctx = { (void*)dst, 0 };
+    SkRasterPipeline_MemoryCtx src_ctx = { (void*)src, 0 },
+                               dst_ctx = { (void*)dst, 0 };
     SkRasterPipeline_<256> p;
     p.append(SkRasterPipeline::load_f32, &src_ctx);
     p.append(SkRasterPipeline::unpremul);
diff --git a/src/jumper/SkJumper.cpp b/src/jumper/SkJumper.cpp
deleted file mode 100644
index 485c6e0..0000000
--- a/src/jumper/SkJumper.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2017 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#include "SkJumper.h"
-#include "SkOpts.h"
-#include "SkRasterPipeline.h"
-#include "SkTemplates.h"
-
-SkRasterPipeline::StartPipelineFn SkRasterPipeline::build_pipeline(void** ip) const {
-#ifndef SK_JUMPER_DISABLE_8BIT
-    // We'll try to build a lowp pipeline, but if that fails fallback to a highp float pipeline.
-    void** reset_point = ip;
-
-    // Stages are stored backwards in fStages, so we reverse here, back to front.
-    *--ip = (void*)SkOpts::just_return_lowp;
-    for (const StageList* st = fStages; st; st = st->prev) {
-        SkOpts::StageFn fn;
-        if (!st->rawFunction && (fn = SkOpts::stages_lowp[st->stage])) {
-            if (st->ctx) {
-                *--ip = st->ctx;
-            }
-            *--ip = (void*)fn;
-        } else {
-            ip = reset_point;
-            break;
-        }
-    }
-    if (ip != reset_point) {
-        return SkOpts::start_pipeline_lowp;
-    }
-#endif
-
-    *--ip = (void*)SkOpts::just_return_highp;
-    for (const StageList* st = fStages; st; st = st->prev) {
-        if (st->ctx) {
-            *--ip = st->ctx;
-        }
-        if (st->rawFunction) {
-            *--ip = (void*)st->stage;
-        } else {
-            *--ip = (void*)SkOpts::stages_highp[st->stage];
-        }
-    }
-    return SkOpts::start_pipeline_highp;
-}
-
-void SkRasterPipeline::run(size_t x, size_t y, size_t w, size_t h) const {
-    if (this->empty()) {
-        return;
-    }
-
-    // Best to not use fAlloc here... we can't bound how often run() will be called.
-    SkAutoSTMalloc<64, void*> program(fSlotsNeeded);
-
-    auto start_pipeline = this->build_pipeline(program.get() + fSlotsNeeded);
-    start_pipeline(x,y,x+w,y+h, program.get());
-}
-
-std::function<void(size_t, size_t, size_t, size_t)> SkRasterPipeline::compile() const {
-    if (this->empty()) {
-        return [](size_t, size_t, size_t, size_t) {};
-    }
-
-    void** program = fAlloc->makeArray<void*>(fSlotsNeeded);
-
-    auto start_pipeline = this->build_pipeline(program + fSlotsNeeded);
-    return [=](size_t x, size_t y, size_t w, size_t h) {
-        start_pipeline(x,y,x+w,y+h, program);
-    };
-}
diff --git a/src/jumper/SkJumper.h b/src/jumper/SkJumper.h
deleted file mode 100644
index d2a9058..0000000
--- a/src/jumper/SkJumper.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2017 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkJumper_DEFINED
-#define SkJumper_DEFINED
-
-#include <stddef.h>
-#include <stdint.h>
-
-// This file contains definitions shared by SkJumper.cpp/SkJumper_stages.cpp
-// and the rest of Skia.  It is important to keep the interface to SkJumper
-// limited and simple to avoid serious ODR violation pitfalls, especially when
-// using Microsoft's <math.h> and similar headers with inline-but-not-static
-// function definitions.
-
-static const int SkJumper_kMaxStride = 16;
-
-struct SkJumper_MemoryCtx {
-    void* pixels;
-    int   stride;
-};
-
-struct SkJumper_GatherCtx {
-    const void* pixels;
-    int         stride;
-    float       width;
-    float       height;
-};
-
-// State shared by save_xy, accumulate, and bilinear_* / bicubic_*.
-struct SkJumper_SamplerCtx {
-    float      x[SkJumper_kMaxStride];
-    float      y[SkJumper_kMaxStride];
-    float     fx[SkJumper_kMaxStride];
-    float     fy[SkJumper_kMaxStride];
-    float scalex[SkJumper_kMaxStride];
-    float scaley[SkJumper_kMaxStride];
-};
-
-struct SkJumper_TileCtx {
-    float scale;
-    float invScale; // cache of 1/scale
-};
-
-struct SkJumper_DecalTileCtx {
-    uint32_t mask[SkJumper_kMaxStride];
-    float    limit_x;
-    float    limit_y;
-};
-
-struct SkJumper_CallbackCtx {
-    void (*fn)(SkJumper_CallbackCtx* self, int active_pixels/*<= SkJumper_kMaxStride*/);
-
-    // When called, fn() will have our active pixels available in rgba.
-    // When fn() returns, the pipeline will read back those active pixels from read_from.
-    float rgba[4*SkJumper_kMaxStride];
-    float* read_from = rgba;
-};
-
-// This should line up with the memory layout of SkColorSpaceTransferFn.
-struct SkJumper_ParametricTransferFunction {
-    float G, A,B,C,D,E,F;
-};
-
-struct SkJumper_GradientCtx {
-    size_t stopCount;
-    float* fs[4];
-    float* bs[4];
-    float* ts;
-    bool interpolatedInPremul;
-};
-
-struct SkJumper_EvenlySpaced2StopGradientCtx {
-    float f[4];
-    float b[4];
-    bool interpolatedInPremul;
-};
-
-struct SkJumper_2PtConicalCtx {
-    uint32_t fMask[SkJumper_kMaxStride];
-    float    fP0,
-             fP1;
-};
-
-struct SkJumper_UniformColorCtx {
-    float r,g,b,a;
-    uint16_t rgba[4];  // [0,255] in a 16-bit lane.
-};
-
-#endif//SkJumper_DEFINED
diff --git a/src/jumper/SkJumper_misc.h b/src/jumper/SkJumper_misc.h
deleted file mode 100644
index 4f35124..0000000
--- a/src/jumper/SkJumper_misc.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright 2017 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkJumper_misc_DEFINED
-#define SkJumper_misc_DEFINED
-
-#include <string.h>  // for memcpy()
-
-// Miscellany used by SkJumper_stages.cpp and SkJumper_stages_lowp.cpp.
-
-// Every function in this file should be marked static and inline using SI.
-#if defined(__clang__)
-    #define SI __attribute__((always_inline)) static inline
-#else
-    #define SI static inline
-#endif
-
-
-template <typename T, typename P>
-SI T unaligned_load(const P* p) {  // const void* would work too, but const P* helps ARMv7 codegen.
-    T v;
-    memcpy(&v, p, sizeof(v));
-    return v;
-}
-
-template <typename T, typename P>
-SI void unaligned_store(P* p, T v) {
-    memcpy(p, &v, sizeof(v));
-}
-
-template <typename Dst, typename Src>
-SI Dst bit_cast(const Src& src) {
-    static_assert(sizeof(Dst) == sizeof(Src), "");
-    return unaligned_load<Dst>(&src);
-}
-
-template <typename Dst, typename Src>
-SI Dst widen_cast(const Src& src) {
-    static_assert(sizeof(Dst) > sizeof(Src), "");
-    Dst dst;
-    memcpy(&dst, &src, sizeof(Src));
-    return dst;
-}
-
-// Our program is an array of void*, either
-//   - 1 void* per stage with no context pointer, the next stage;
-//   - 2 void* per stage with a context pointer, first the context pointer, then the next stage.
-
-// load_and_inc() steps the program forward by 1 void*, returning that pointer.
-SI void* load_and_inc(void**& program) {
-#if defined(__GNUC__) && defined(__x86_64__)
-    // If program is in %rsi (we try to make this likely) then this is a single instruction.
-    void* rax;
-    asm("lodsq" : "=a"(rax), "+S"(program));  // Write-only %rax, read-write %rsi.
-    return rax;
-#else
-    // On ARM *program++ compiles into pretty ideal code without any handholding.
-    return *program++;
-#endif
-}
-
-// Lazily resolved on first cast.  Does nothing if cast to Ctx::None.
-struct Ctx {
-    struct None {};
-
-    void*   ptr;
-    void**& program;
-
-    explicit Ctx(void**& p) : ptr(nullptr), program(p) {}
-
-    template <typename T>
-    operator T*() {
-        if (!ptr) { ptr = load_and_inc(program); }
-        return (T*)ptr;
-    }
-    operator None() { return None{}; }
-};
-
-#endif//SkJumper_misc_DEFINED
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 1df0ebe..6c12b3d 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -9,8 +9,75 @@
 #define SkRasterPipeline_opts_DEFINED
 
 #include "SkTypes.h"
-#include "../jumper/SkJumper.h"
-#include "../jumper/SkJumper_misc.h"
+
+// Every function in this file should be marked static and inline using SI.
+#if defined(__clang__)
+    #define SI __attribute__((always_inline)) static inline
+#else
+    #define SI static inline
+#endif
+
+
+template <typename T, typename P>
+SI T unaligned_load(const P* p) {  // const void* would work too, but const P* helps ARMv7 codegen.
+    T v;
+    memcpy(&v, p, sizeof(v));
+    return v;
+}
+
+template <typename T, typename P>
+SI void unaligned_store(P* p, T v) {
+    memcpy(p, &v, sizeof(v));
+}
+
+template <typename Dst, typename Src>
+SI Dst bit_cast(const Src& src) {
+    static_assert(sizeof(Dst) == sizeof(Src), "");
+    return unaligned_load<Dst>(&src);
+}
+
+template <typename Dst, typename Src>
+SI Dst widen_cast(const Src& src) {
+    static_assert(sizeof(Dst) > sizeof(Src), "");
+    Dst dst;
+    memcpy(&dst, &src, sizeof(Src));
+    return dst;
+}
+
+// Our program is an array of void*, either
+//   - 1 void* per stage with no context pointer, the next stage;
+//   - 2 void* per stage with a context pointer, first the context pointer, then the next stage.
+
+// load_and_inc() steps the program forward by 1 void*, returning that pointer.
+SI void* load_and_inc(void**& program) {
+#if defined(__GNUC__) && defined(__x86_64__)
+    // If program is in %rsi (we try to make this likely) then this is a single instruction.
+    void* rax;
+    asm("lodsq" : "=a"(rax), "+S"(program));  // Write-only %rax, read-write %rsi.
+    return rax;
+#else
+    // On ARM *program++ compiles into pretty ideal code without any handholding.
+    return *program++;
+#endif
+}
+
+// Lazily resolved on first cast.  Does nothing if cast to Ctx::None.
+struct Ctx {
+    struct None {};
+
+    void*   ptr;
+    void**& program;
+
+    explicit Ctx(void**& p) : ptr(nullptr), program(p) {}
+
+    template <typename T>
+    operator T*() {
+        if (!ptr) { ptr = load_and_inc(program); }
+        return (T*)ptr;
+    }
+    operator None() { return None{}; }
+};
+
 
 #if !defined(__clang__)
     #define JUMPER_IS_SCALAR
@@ -886,7 +953,7 @@
 
 // Used by load_ and store_ stages to get to the right (dx,dy) starting point of contiguous memory.
 template <typename T>
-SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, size_t dx, size_t dy) {
+SI T* ptr_at_xy(const SkRasterPipeline_MemoryCtx* ctx, size_t dx, size_t dy) {
     return (T*)ctx->pixels + dy*ctx->stride + dx;
 }
 
@@ -898,7 +965,7 @@
 
 // Used by gather_ stages to calculate the base pointer and a vector of indices to load.
 template <typename T>
-SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
+SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, F x, F y) {
     x = clamp(x, ctx->width);
     y = clamp(y, ctx->height);
 
@@ -971,13 +1038,13 @@
 }
 
 // load 4 floats from memory, and splat them into r,g,b,a
-STAGE(uniform_color, const SkJumper_UniformColorCtx* c) {
+STAGE(uniform_color, const SkRasterPipeline_UniformColorCtx* c) {
     r = c->r;
     g = c->g;
     b = c->b;
     a = c->a;
 }
-STAGE(unbounded_uniform_color, const SkJumper_UniformColorCtx* c) {
+STAGE(unbounded_uniform_color, const SkRasterPipeline_UniformColorCtx* c) {
     r = c->r;
     g = c->g;
     b = c->b;
@@ -1198,7 +1265,7 @@
     a = a + da - a*da;
 }
 
-STAGE(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) {
+STAGE(srcover_rgba_8888, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
 
     U32 dst = load<U32>(ptr, tail);
@@ -1369,7 +1436,7 @@
     b = b * *c;
     a = a * *c;
 }
-STAGE(scale_u8, const SkJumper_MemoryCtx* ctx) {
+STAGE(scale_u8, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
 
     auto scales = load<U8>(ptr, tail);
@@ -1380,7 +1447,7 @@
     b = b * c;
     a = a * c;
 }
-STAGE(scale_565, const SkJumper_MemoryCtx* ctx) {
+STAGE(scale_565, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
 
     F cr,cg,cb;
@@ -1404,7 +1471,7 @@
     b = lerp(db, b, *c);
     a = lerp(da, a, *c);
 }
-STAGE(lerp_u8, const SkJumper_MemoryCtx* ctx) {
+STAGE(lerp_u8, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
 
     auto scales = load<U8>(ptr, tail);
@@ -1415,7 +1482,7 @@
     b = lerp(db, b, c);
     a = lerp(da, a, c);
 }
-STAGE(lerp_565, const SkJumper_MemoryCtx* ctx) {
+STAGE(lerp_565, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
 
     F cr,cg,cb;
@@ -1429,7 +1496,7 @@
     a = lerp(da, a, ca);
 }
 
-STAGE(byte_tables, const void* ctx) {  // TODO: rename Tables SkJumper_ByteTablesCtx
+STAGE(byte_tables, const void* ctx) {  // TODO: rename Tables SkRasterPipeline_ByteTablesCtx
     struct Tables { const uint8_t *r, *g, *b, *a; };
     auto tables = (const Tables*)ctx;
 
@@ -1449,7 +1516,7 @@
     return bit_cast<F>(sign | bit_cast<U32>(x));
 }
 
-STAGE(parametric, const SkJumper_ParametricTransferFunction* ctx) {
+STAGE(parametric, const SkRasterPipeline_ParametricTransferFunction* ctx) {
     auto fn = [&](F v) {
         U32 sign;
         v = strip_sign(v, &sign);
@@ -1516,50 +1583,50 @@
     b = fn(b);
 }
 
-STAGE(load_a8, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_a8, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
 
     r = g = b = 0.0f;
     a = from_byte(load<U8>(ptr, tail));
 }
-STAGE(load_a8_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_a8_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint8_t>(ctx, dx,dy);
 
     dr = dg = db = 0.0f;
     da = from_byte(load<U8>(ptr, tail));
 }
-STAGE(gather_a8, const SkJumper_GatherCtx* ctx) {
+STAGE(gather_a8, const SkRasterPipeline_GatherCtx* ctx) {
     const uint8_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, r,g);
     r = g = b = 0.0f;
     a = from_byte(gather(ptr, ix));
 }
-STAGE(store_a8, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_a8, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint8_t>(ctx, dx,dy);
 
     U8 packed = pack(pack(to_unorm(a, 255)));
     store(ptr, packed, tail);
 }
 
-STAGE(load_565, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_565, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
 
     from_565(load<U16>(ptr, tail), &r,&g,&b);
     a = 1.0f;
 }
-STAGE(load_565_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_565_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
 
     from_565(load<U16>(ptr, tail), &dr,&dg,&db);
     da = 1.0f;
 }
-STAGE(gather_565, const SkJumper_GatherCtx* ctx) {
+STAGE(gather_565, const SkRasterPipeline_GatherCtx* ctx) {
     const uint16_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, r,g);
     from_565(gather(ptr, ix), &r,&g,&b);
     a = 1.0f;
 }
-STAGE(store_565, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_565, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
 
     U16 px = pack( to_unorm(r, 31) << 11
@@ -1568,20 +1635,20 @@
     store(ptr, px, tail);
 }
 
-STAGE(load_4444, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_4444, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
     from_4444(load<U16>(ptr, tail), &r,&g,&b,&a);
 }
-STAGE(load_4444_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_4444_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint16_t>(ctx, dx,dy);
     from_4444(load<U16>(ptr, tail), &dr,&dg,&db,&da);
 }
-STAGE(gather_4444, const SkJumper_GatherCtx* ctx) {
+STAGE(gather_4444, const SkRasterPipeline_GatherCtx* ctx) {
     const uint16_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, r,g);
     from_4444(gather(ptr, ix), &r,&g,&b,&a);
 }
-STAGE(store_4444, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_4444, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint16_t>(ctx, dx,dy);
     U16 px = pack( to_unorm(r, 15) << 12
                  | to_unorm(g, 15) <<  8
@@ -1590,20 +1657,20 @@
     store(ptr, px, tail);
 }
 
-STAGE(load_8888, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_8888, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
     from_8888(load<U32>(ptr, tail), &r,&g,&b,&a);
 }
-STAGE(load_8888_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_8888_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
     from_8888(load<U32>(ptr, tail), &dr,&dg,&db,&da);
 }
-STAGE(gather_8888, const SkJumper_GatherCtx* ctx) {
+STAGE(gather_8888, const SkRasterPipeline_GatherCtx* ctx) {
     const uint32_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, r,g);
     from_8888(gather(ptr, ix), &r,&g,&b,&a);
 }
-STAGE(store_8888, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_8888, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
 
     U32 px = to_unorm(r, 255)
@@ -1613,20 +1680,20 @@
     store(ptr, px, tail);
 }
 
-STAGE(load_1010102, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_1010102, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
     from_1010102(load<U32>(ptr, tail), &r,&g,&b,&a);
 }
-STAGE(load_1010102_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_1010102_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint32_t>(ctx, dx,dy);
     from_1010102(load<U32>(ptr, tail), &dr,&dg,&db,&da);
 }
-STAGE(gather_1010102, const SkJumper_GatherCtx* ctx) {
+STAGE(gather_1010102, const SkRasterPipeline_GatherCtx* ctx) {
     const uint32_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, r,g);
     from_1010102(gather(ptr, ix), &r,&g,&b,&a);
 }
-STAGE(store_1010102, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_1010102, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
 
     U32 px = to_unorm(r, 1023)
@@ -1636,7 +1703,7 @@
     store(ptr, px, tail);
 }
 
-STAGE(load_f16, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_f16, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy);
 
     U16 R,G,B,A;
@@ -1646,7 +1713,7 @@
     b = from_half(B);
     a = from_half(A);
 }
-STAGE(load_f16_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_f16_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const uint64_t>(ctx, dx,dy);
 
     U16 R,G,B,A;
@@ -1656,7 +1723,7 @@
     db = from_half(B);
     da = from_half(A);
 }
-STAGE(gather_f16, const SkJumper_GatherCtx* ctx) {
+STAGE(gather_f16, const SkRasterPipeline_GatherCtx* ctx) {
     const uint64_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, r,g);
     auto px = gather(ptr, ix);
@@ -1668,7 +1735,7 @@
     b = from_half(B);
     a = from_half(A);
 }
-STAGE(store_f16, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_f16, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint64_t>(ctx, dx,dy);
     store4((uint16_t*)ptr,tail, to_half(r)
                               , to_half(g)
@@ -1676,7 +1743,7 @@
                               , to_half(a));
 }
 
-STAGE(store_u16_be, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_u16_be, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint16_t>(ctx, 4*dx,dy);
 
     U16 R = bswap(pack(to_unorm(r, 65535))),
@@ -1687,15 +1754,15 @@
     store4(ptr,tail, R,G,B,A);
 }
 
-STAGE(load_f32, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_f32, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const float>(ctx, 4*dx,4*dy);
     load4(ptr,tail, &r,&g,&b,&a);
 }
-STAGE(load_f32_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE(load_f32_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<const float>(ctx, 4*dx,4*dy);
     load4(ptr,tail, &dr,&dg,&db,&da);
 }
-STAGE(gather_f32, const SkJumper_GatherCtx* ctx) {
+STAGE(gather_f32, const SkRasterPipeline_GatherCtx* ctx) {
     const float* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, r,g);
     r = gather(ptr, 4*ix + 0);
@@ -1703,15 +1770,15 @@
     b = gather(ptr, 4*ix + 2);
     a = gather(ptr, 4*ix + 3);
 }
-STAGE(store_f32, const SkJumper_MemoryCtx* ctx) {
+STAGE(store_f32, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<float>(ctx, 4*dx,4*dy);
     store4(ptr,tail, r,g,b,a);
 }
 
-SI F exclusive_repeat(F v, const SkJumper_TileCtx* ctx) {
+SI F exclusive_repeat(F v, const SkRasterPipeline_TileCtx* ctx) {
     return v - floor_(v*ctx->invScale)*ctx->scale;
 }
-SI F exclusive_mirror(F v, const SkJumper_TileCtx* ctx) {
+SI F exclusive_mirror(F v, const SkRasterPipeline_TileCtx* ctx) {
     auto limit = ctx->scale;
     auto invLimit = ctx->invScale;
     return abs_( (v-limit) - (limit+limit)*floor_((v-limit)*(invLimit*0.5f)) - limit );
@@ -1719,10 +1786,10 @@
 // Tile x or y to [0,limit) == [0,limit - 1 ulp] (think, sampling from images).
 // The gather stages will hard clamp the output of these stages to [0,limit)...
 // we just need to do the basic repeat or mirroring.
-STAGE(repeat_x, const SkJumper_TileCtx* ctx) { r = exclusive_repeat(r, ctx); }
-STAGE(repeat_y, const SkJumper_TileCtx* ctx) { g = exclusive_repeat(g, ctx); }
-STAGE(mirror_x, const SkJumper_TileCtx* ctx) { r = exclusive_mirror(r, ctx); }
-STAGE(mirror_y, const SkJumper_TileCtx* ctx) { g = exclusive_mirror(g, ctx); }
+STAGE(repeat_x, const SkRasterPipeline_TileCtx* ctx) { r = exclusive_repeat(r, ctx); }
+STAGE(repeat_y, const SkRasterPipeline_TileCtx* ctx) { g = exclusive_repeat(g, ctx); }
+STAGE(mirror_x, const SkRasterPipeline_TileCtx* ctx) { r = exclusive_mirror(r, ctx); }
+STAGE(mirror_y, const SkRasterPipeline_TileCtx* ctx) { g = exclusive_mirror(g, ctx); }
 
 // Clamp x to [0,1], both sides inclusive (think, gradients).
 // Even repeat and mirror funnel through a clamp to handle bad inputs like +Inf, NaN.
@@ -1738,21 +1805,21 @@
 // After the gather stage, the r,g,b,a values are AND'd with this mask, setting them to 0
 // if either of the coordinates were out of bounds.
 
-STAGE(decal_x, SkJumper_DecalTileCtx* ctx) {
+STAGE(decal_x, SkRasterPipeline_DecalTileCtx* ctx) {
     auto w = ctx->limit_x;
     unaligned_store(ctx->mask, cond_to_mask((0 <= r) & (r < w)));
 }
-STAGE(decal_y, SkJumper_DecalTileCtx* ctx) {
+STAGE(decal_y, SkRasterPipeline_DecalTileCtx* ctx) {
     auto h = ctx->limit_y;
     unaligned_store(ctx->mask, cond_to_mask((0 <= g) & (g < h)));
 }
-STAGE(decal_x_and_y, SkJumper_DecalTileCtx* ctx) {
+STAGE(decal_x_and_y, SkRasterPipeline_DecalTileCtx* ctx) {
     auto w = ctx->limit_x;
     auto h = ctx->limit_y;
     unaligned_store(ctx->mask,
                     cond_to_mask((0 <= r) & (r < w) & (0 <= g) & (g < h)));
 }
-STAGE(check_decal_mask, SkJumper_DecalTileCtx* ctx) {
+STAGE(check_decal_mask, SkRasterPipeline_DecalTileCtx* ctx) {
     auto mask = unaligned_load<U32>(ctx->mask);
     r = bit_cast<F>( bit_cast<U32>(r) & mask );
     g = bit_cast<F>( bit_cast<U32>(g) & mask );
@@ -1831,7 +1898,7 @@
     g = G * rcp(Z);
 }
 
-SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t,
+SI void gradient_lookup(const SkRasterPipeline_GradientCtx* c, U32 idx, F t,
                         F* r, F* g, F* b, F* a) {
     F fr, br, fg, bg, fb, bb, fa, ba;
 #if defined(JUMPER_IS_HSW) || defined(JUMPER_IS_AVX512)
@@ -1863,13 +1930,13 @@
     *a = mad(t, fa, ba);
 }
 
-STAGE(evenly_spaced_gradient, const SkJumper_GradientCtx* c) {
+STAGE(evenly_spaced_gradient, const SkRasterPipeline_GradientCtx* c) {
     auto t = r;
     auto idx = trunc_(t * (c->stopCount-1));
     gradient_lookup(c, idx, t, &r, &g, &b, &a);
 }
 
-STAGE(gradient, const SkJumper_GradientCtx* c) {
+STAGE(gradient, const SkRasterPipeline_GradientCtx* c) {
     auto t = r;
     U32 idx = 0;
 
@@ -1882,7 +1949,7 @@
 }
 
 STAGE(evenly_spaced_2_stop_gradient, const void* ctx) {
-    // TODO: Rename Ctx SkJumper_EvenlySpaced2StopGradientCtx.
+    // TODO: Rename Ctx SkRasterPipeline_EvenlySpaced2StopGradientCtx.
     struct Ctx { float f[4], b[4]; };
     auto c = (const Ctx*)ctx;
 
@@ -1929,7 +1996,7 @@
 
 STAGE(negate_x, Ctx::None) { r = -r; }
 
-STAGE(xy_to_2pt_conical_strip, const SkJumper_2PtConicalCtx* ctx) {
+STAGE(xy_to_2pt_conical_strip, const SkRasterPipeline_2PtConicalCtx* ctx) {
     F x = r, y = g, &t = r;
     t = x + sqrt_(ctx->fP0 - y*y); // ctx->fP0 = r0 * r0
 }
@@ -1939,22 +2006,22 @@
     t = x + y*y / x; // (x^2 + y^2) / x
 }
 
-STAGE(xy_to_2pt_conical_well_behaved, const SkJumper_2PtConicalCtx* ctx) {
+STAGE(xy_to_2pt_conical_well_behaved, const SkRasterPipeline_2PtConicalCtx* ctx) {
     F x = r, y = g, &t = r;
     t = sqrt_(x*x + y*y) - x * ctx->fP0; // ctx->fP0 = 1/r1
 }
 
-STAGE(xy_to_2pt_conical_greater, const SkJumper_2PtConicalCtx* ctx) {
+STAGE(xy_to_2pt_conical_greater, const SkRasterPipeline_2PtConicalCtx* ctx) {
     F x = r, y = g, &t = r;
     t = sqrt_(x*x - y*y) - x * ctx->fP0; // ctx->fP0 = 1/r1
 }
 
-STAGE(xy_to_2pt_conical_smaller, const SkJumper_2PtConicalCtx* ctx) {
+STAGE(xy_to_2pt_conical_smaller, const SkRasterPipeline_2PtConicalCtx* ctx) {
     F x = r, y = g, &t = r;
     t = -sqrt_(x*x - y*y) - x * ctx->fP0; // ctx->fP0 = 1/r1
 }
 
-STAGE(alter_2pt_conical_compensate_focal, const SkJumper_2PtConicalCtx* ctx) {
+STAGE(alter_2pt_conical_compensate_focal, const SkRasterPipeline_2PtConicalCtx* ctx) {
     F& t = r;
     t = t + ctx->fP1; // ctx->fP1 = f
 }
@@ -1964,14 +2031,14 @@
     t = 1 - t;
 }
 
-STAGE(mask_2pt_conical_nan, SkJumper_2PtConicalCtx* c) {
+STAGE(mask_2pt_conical_nan, SkRasterPipeline_2PtConicalCtx* c) {
     F& t = r;
     auto is_degenerate = (t != t); // NaN
     t = if_then_else(is_degenerate, F(0), t);
     unaligned_store(&c->fMask, cond_to_mask(!is_degenerate));
 }
 
-STAGE(mask_2pt_conical_degenerates, SkJumper_2PtConicalCtx* c) {
+STAGE(mask_2pt_conical_degenerates, SkRasterPipeline_2PtConicalCtx* c) {
     F& t = r;
     auto is_degenerate = (t <= 0) | (t != t);
     t = if_then_else(is_degenerate, F(0), t);
@@ -1986,7 +2053,7 @@
     a = bit_cast<F>(bit_cast<U32>(a) & mask);
 }
 
-STAGE(save_xy, SkJumper_SamplerCtx* c) {
+STAGE(save_xy, SkRasterPipeline_SamplerCtx* c) {
     // Whether bilinear or bicubic, all sample points are at the same fractional offset (fx,fy).
     // They're either the 4 corners of a logical 1x1 pixel or the 16 corners of a 3x3 grid
     // surrounding (x,y) at (0.5,0.5) off-center.
@@ -2000,7 +2067,7 @@
     unaligned_store(c->fy, fy);
 }
 
-STAGE(accumulate, const SkJumper_SamplerCtx* c) {
+STAGE(accumulate, const SkRasterPipeline_SamplerCtx* c) {
     // Bilinear and bicubic filters are both separable, so we produce independent contributions
     // from x and y, multiplying them together here to get each pixel's total scale factor.
     auto scale = unaligned_load<F>(c->scalex)
@@ -2017,7 +2084,7 @@
 // The y-axis is symmetric.
 
 template <int kScale>
-SI void bilinear_x(SkJumper_SamplerCtx* ctx, F* x) {
+SI void bilinear_x(SkRasterPipeline_SamplerCtx* ctx, F* x) {
     *x = unaligned_load<F>(ctx->x) + (kScale * 0.5f);
     F fx = unaligned_load<F>(ctx->fx);
 
@@ -2027,7 +2094,7 @@
     unaligned_store(ctx->scalex, scalex);
 }
 template <int kScale>
-SI void bilinear_y(SkJumper_SamplerCtx* ctx, F* y) {
+SI void bilinear_y(SkRasterPipeline_SamplerCtx* ctx, F* y) {
     *y = unaligned_load<F>(ctx->y) + (kScale * 0.5f);
     F fy = unaligned_load<F>(ctx->fy);
 
@@ -2037,10 +2104,10 @@
     unaligned_store(ctx->scaley, scaley);
 }
 
-STAGE(bilinear_nx, SkJumper_SamplerCtx* ctx) { bilinear_x<-1>(ctx, &r); }
-STAGE(bilinear_px, SkJumper_SamplerCtx* ctx) { bilinear_x<+1>(ctx, &r); }
-STAGE(bilinear_ny, SkJumper_SamplerCtx* ctx) { bilinear_y<-1>(ctx, &g); }
-STAGE(bilinear_py, SkJumper_SamplerCtx* ctx) { bilinear_y<+1>(ctx, &g); }
+STAGE(bilinear_nx, SkRasterPipeline_SamplerCtx* ctx) { bilinear_x<-1>(ctx, &r); }
+STAGE(bilinear_px, SkRasterPipeline_SamplerCtx* ctx) { bilinear_x<+1>(ctx, &r); }
+STAGE(bilinear_ny, SkRasterPipeline_SamplerCtx* ctx) { bilinear_y<-1>(ctx, &g); }
+STAGE(bilinear_py, SkRasterPipeline_SamplerCtx* ctx) { bilinear_y<+1>(ctx, &g); }
 
 
 // In bicubic interpolation, the 16 pixels and +/- 0.5 and +/- 1.5 offsets from the sample
@@ -2059,7 +2126,7 @@
 }
 
 template <int kScale>
-SI void bicubic_x(SkJumper_SamplerCtx* ctx, F* x) {
+SI void bicubic_x(SkRasterPipeline_SamplerCtx* ctx, F* x) {
     *x = unaligned_load<F>(ctx->x) + (kScale * 0.5f);
     F fx = unaligned_load<F>(ctx->fx);
 
@@ -2071,7 +2138,7 @@
     unaligned_store(ctx->scalex, scalex);
 }
 template <int kScale>
-SI void bicubic_y(SkJumper_SamplerCtx* ctx, F* y) {
+SI void bicubic_y(SkRasterPipeline_SamplerCtx* ctx, F* y) {
     *y = unaligned_load<F>(ctx->y) + (kScale * 0.5f);
     F fy = unaligned_load<F>(ctx->fy);
 
@@ -2083,17 +2150,17 @@
     unaligned_store(ctx->scaley, scaley);
 }
 
-STAGE(bicubic_n3x, SkJumper_SamplerCtx* ctx) { bicubic_x<-3>(ctx, &r); }
-STAGE(bicubic_n1x, SkJumper_SamplerCtx* ctx) { bicubic_x<-1>(ctx, &r); }
-STAGE(bicubic_p1x, SkJumper_SamplerCtx* ctx) { bicubic_x<+1>(ctx, &r); }
-STAGE(bicubic_p3x, SkJumper_SamplerCtx* ctx) { bicubic_x<+3>(ctx, &r); }
+STAGE(bicubic_n3x, SkRasterPipeline_SamplerCtx* ctx) { bicubic_x<-3>(ctx, &r); }
+STAGE(bicubic_n1x, SkRasterPipeline_SamplerCtx* ctx) { bicubic_x<-1>(ctx, &r); }
+STAGE(bicubic_p1x, SkRasterPipeline_SamplerCtx* ctx) { bicubic_x<+1>(ctx, &r); }
+STAGE(bicubic_p3x, SkRasterPipeline_SamplerCtx* ctx) { bicubic_x<+3>(ctx, &r); }
 
-STAGE(bicubic_n3y, SkJumper_SamplerCtx* ctx) { bicubic_y<-3>(ctx, &g); }
-STAGE(bicubic_n1y, SkJumper_SamplerCtx* ctx) { bicubic_y<-1>(ctx, &g); }
-STAGE(bicubic_p1y, SkJumper_SamplerCtx* ctx) { bicubic_y<+1>(ctx, &g); }
-STAGE(bicubic_p3y, SkJumper_SamplerCtx* ctx) { bicubic_y<+3>(ctx, &g); }
+STAGE(bicubic_n3y, SkRasterPipeline_SamplerCtx* ctx) { bicubic_y<-3>(ctx, &g); }
+STAGE(bicubic_n1y, SkRasterPipeline_SamplerCtx* ctx) { bicubic_y<-1>(ctx, &g); }
+STAGE(bicubic_p1y, SkRasterPipeline_SamplerCtx* ctx) { bicubic_y<+1>(ctx, &g); }
+STAGE(bicubic_p3y, SkRasterPipeline_SamplerCtx* ctx) { bicubic_y<+3>(ctx, &g); }
 
-STAGE(callback, SkJumper_CallbackCtx* c) {
+STAGE(callback, SkRasterPipeline_CallbackCtx* c) {
     store4(c->rgba,0, r,g,b,a);
     c->fn(c, tail ? tail : N);
     load4(c->read_from,0, &r,&g,&b,&a);
@@ -2116,7 +2183,7 @@
 }
 
 // A specialized fused image shader for clamp-x, clamp-y, non-sRGB sampling.
-STAGE(bilerp_clamp_8888, const SkJumper_GatherCtx* ctx) {
+STAGE(bilerp_clamp_8888, const SkRasterPipeline_GatherCtx* ctx) {
     // (cx,cy) are the center of our sample.
     F cx = r,
       cy = g;
@@ -2503,7 +2570,7 @@
     y = Y * rcp(Z);
 }
 
-STAGE_PP(uniform_color, const SkJumper_UniformColorCtx* c) {
+STAGE_PP(uniform_color, const SkRasterPipeline_UniformColorCtx* c) {
     r = c->rgba[0];
     g = c->rgba[1];
     b = c->rgba[2];
@@ -2634,12 +2701,12 @@
 // ~~~~~~ Helpers for interacting with memory ~~~~~~ //
 
 template <typename T>
-SI T* ptr_at_xy(const SkJumper_MemoryCtx* ctx, size_t dx, size_t dy) {
+SI T* ptr_at_xy(const SkRasterPipeline_MemoryCtx* ctx, size_t dx, size_t dy) {
     return (T*)ctx->pixels + dy*ctx->stride + dx;
 }
 
 template <typename T>
-SI U32 ix_and_ptr(T** ptr, const SkJumper_GatherCtx* ctx, F x, F y) {
+SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, F x, F y) {
     auto clamp = [](F v, F limit) {
         limit = bit_cast<F>( bit_cast<U32>(limit) - 1 );  // Exclusive -> inclusive.
         return min(max(0, v), limit);
@@ -2807,16 +2874,16 @@
 #endif
 }
 
-STAGE_PP(load_8888, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_8888, const SkRasterPipeline_MemoryCtx* ctx) {
     load_8888_(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &r,&g,&b,&a);
 }
-STAGE_PP(load_8888_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_8888_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     load_8888_(ptr_at_xy<const uint32_t>(ctx, dx,dy), tail, &dr,&dg,&db,&da);
 }
-STAGE_PP(store_8888, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(store_8888, const SkRasterPipeline_MemoryCtx* ctx) {
     store_8888_(ptr_at_xy<uint32_t>(ctx, dx,dy), tail, r,g,b,a);
 }
-STAGE_GP(gather_8888, const SkJumper_GatherCtx* ctx) {
+STAGE_GP(gather_8888, const SkRasterPipeline_GatherCtx* ctx) {
     const uint32_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, x,y);
     from_8888(gather<U32>(ptr, ix), &r, &g, &b, &a);
@@ -2849,18 +2916,18 @@
                    | B <<  0);
 }
 
-STAGE_PP(load_565, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_565, const SkRasterPipeline_MemoryCtx* ctx) {
     load_565_(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &r,&g,&b);
     a = 255;
 }
-STAGE_PP(load_565_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_565_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     load_565_(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &dr,&dg,&db);
     da = 255;
 }
-STAGE_PP(store_565, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(store_565, const SkRasterPipeline_MemoryCtx* ctx) {
     store_565_(ptr_at_xy<uint16_t>(ctx, dx,dy), tail, r,g,b);
 }
-STAGE_GP(gather_565, const SkJumper_GatherCtx* ctx) {
+STAGE_GP(gather_565, const SkRasterPipeline_GatherCtx* ctx) {
     const uint16_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, x,y);
     from_565(gather<U16>(ptr, ix), &r, &g, &b);
@@ -2896,16 +2963,16 @@
                    | A <<  0);
 }
 
-STAGE_PP(load_4444, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_4444, const SkRasterPipeline_MemoryCtx* ctx) {
     load_4444_(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &r,&g,&b,&a);
 }
-STAGE_PP(load_4444_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_4444_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     load_4444_(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &dr,&dg,&db,&da);
 }
-STAGE_PP(store_4444, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(store_4444, const SkRasterPipeline_MemoryCtx* ctx) {
     store_4444_(ptr_at_xy<uint16_t>(ctx, dx,dy), tail, r,g,b,a);
 }
-STAGE_GP(gather_4444, const SkJumper_GatherCtx* ctx) {
+STAGE_GP(gather_4444, const SkRasterPipeline_GatherCtx* ctx) {
     const uint16_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, x,y);
     from_4444(gather<U16>(ptr, ix), &r,&g,&b,&a);
@@ -2920,18 +2987,18 @@
     store(ptr, tail, cast<U8>(v));
 }
 
-STAGE_PP(load_a8, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_a8, const SkRasterPipeline_MemoryCtx* ctx) {
     r = g = b = 0;
     a = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
 }
-STAGE_PP(load_a8_dst, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(load_a8_dst, const SkRasterPipeline_MemoryCtx* ctx) {
     dr = dg = db = 0;
     da = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
 }
-STAGE_PP(store_a8, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(store_a8, const SkRasterPipeline_MemoryCtx* ctx) {
     store_8(ptr_at_xy<uint8_t>(ctx, dx,dy), tail, a);
 }
-STAGE_GP(gather_a8, const SkJumper_GatherCtx* ctx) {
+STAGE_GP(gather_a8, const SkRasterPipeline_GatherCtx* ctx) {
     const uint8_t* ptr;
     U32 ix = ix_and_ptr(&ptr, ctx, x,y);
     r = g = b = 0;
@@ -2968,14 +3035,14 @@
     a = lerp(da, a, c);
 }
 
-STAGE_PP(scale_u8, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(scale_u8, const SkRasterPipeline_MemoryCtx* ctx) {
     U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
     r = div255( r * c );
     g = div255( g * c );
     b = div255( b * c );
     a = div255( a * c );
 }
-STAGE_PP(lerp_u8, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(lerp_u8, const SkRasterPipeline_MemoryCtx* ctx) {
     U16 c = load_8(ptr_at_xy<const uint8_t>(ctx, dx,dy), tail);
     r = lerp(dr, r, c);
     g = lerp(dg, g, c);
@@ -2988,7 +3055,7 @@
     return if_then_else(a < da, min(cr,cg,cb)
                               , max(cr,cg,cb));
 }
-STAGE_PP(scale_565, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(scale_565, const SkRasterPipeline_MemoryCtx* ctx) {
     U16 cr,cg,cb;
     load_565_(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &cr,&cg,&cb);
     U16 ca = alpha_coverage_from_rgb_coverage(a,da, cr,cg,cb);
@@ -2998,7 +3065,7 @@
     b = div255( b * cb );
     a = div255( a * ca );
 }
-STAGE_PP(lerp_565, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(lerp_565, const SkRasterPipeline_MemoryCtx* ctx) {
     U16 cr,cg,cb;
     load_565_(ptr_at_xy<const uint16_t>(ctx, dx,dy), tail, &cr,&cg,&cb);
     U16 ca = alpha_coverage_from_rgb_coverage(a,da, cr,cg,cb);
@@ -3024,20 +3091,20 @@
 
 SI I16 cond_to_mask_16(I32 cond) { return cast<I16>(cond); }
 
-STAGE_GG(decal_x, SkJumper_DecalTileCtx* ctx) {
+STAGE_GG(decal_x, SkRasterPipeline_DecalTileCtx* ctx) {
     auto w = ctx->limit_x;
     unaligned_store(ctx->mask, cond_to_mask_16((0 <= x) & (x < w)));
 }
-STAGE_GG(decal_y, SkJumper_DecalTileCtx* ctx) {
+STAGE_GG(decal_y, SkRasterPipeline_DecalTileCtx* ctx) {
     auto h = ctx->limit_y;
     unaligned_store(ctx->mask, cond_to_mask_16((0 <= y) & (y < h)));
 }
-STAGE_GG(decal_x_and_y, SkJumper_DecalTileCtx* ctx) {
+STAGE_GG(decal_x_and_y, SkRasterPipeline_DecalTileCtx* ctx) {
     auto w = ctx->limit_x;
     auto h = ctx->limit_y;
     unaligned_store(ctx->mask, cond_to_mask_16((0 <= x) & (x < w) & (0 <= y) & (y < h)));
 }
-STAGE_PP(check_decal_mask, SkJumper_DecalTileCtx* ctx) {
+STAGE_PP(check_decal_mask, SkRasterPipeline_DecalTileCtx* ctx) {
     auto mask = unaligned_load<U16>(ctx->mask);
     r = r & mask;
     g = g & mask;
@@ -3057,7 +3124,7 @@
     *a = round(A);  // we assume alpha is already in [0,1].
 }
 
-SI void gradient_lookup(const SkJumper_GradientCtx* c, U32 idx, F t,
+SI void gradient_lookup(const SkRasterPipeline_GradientCtx* c, U32 idx, F t,
                         U16* r, U16* g, U16* b, U16* a) {
 
     F fr, fg, fb, fa, br, bg, bb, ba;
@@ -3102,7 +3169,7 @@
                    r,g,b,a);
 }
 
-STAGE_GP(gradient, const SkJumper_GradientCtx* c) {
+STAGE_GP(gradient, const SkRasterPipeline_GradientCtx* c) {
     auto t = x;
     U32 idx = 0;
 
@@ -3114,13 +3181,13 @@
     gradient_lookup(c, idx, t, &r, &g, &b, &a);
 }
 
-STAGE_GP(evenly_spaced_gradient, const SkJumper_GradientCtx* c) {
+STAGE_GP(evenly_spaced_gradient, const SkRasterPipeline_GradientCtx* c) {
     auto t = x;
     auto idx = trunc_(t * (c->stopCount-1));
     gradient_lookup(c, idx, t, &r, &g, &b, &a);
 }
 
-STAGE_GP(evenly_spaced_2_stop_gradient, const SkJumper_EvenlySpaced2StopGradientCtx* c) {
+STAGE_GP(evenly_spaced_2_stop_gradient, const SkRasterPipeline_EvenlySpaced2StopGradientCtx* c) {
     auto t = x;
     round_F_to_U16(mad(t, c->f[0], c->b[0]),
                    mad(t, c->f[1], c->b[1]),
@@ -3159,7 +3226,7 @@
 
 // ~~~~~~ Compound stages ~~~~~~ //
 
-STAGE_PP(srcover_rgba_8888, const SkJumper_MemoryCtx* ctx) {
+STAGE_PP(srcover_rgba_8888, const SkRasterPipeline_MemoryCtx* ctx) {
     auto ptr = ptr_at_xy<uint32_t>(ctx, dx,dy);
 
     load_8888_(ptr, tail, &dr,&dg,&db,&da);
diff --git a/src/shaders/SkComposeShader.cpp b/src/shaders/SkComposeShader.cpp
index 3e3d06d..2dee742 100644
--- a/src/shaders/SkComposeShader.cpp
+++ b/src/shaders/SkComposeShader.cpp
@@ -15,7 +15,6 @@
 #include "SkReadBuffer.h"
 #include "SkWriteBuffer.h"
 #include "SkString.h"
-#include "../jumper/SkJumper.h"
 
 sk_sp<SkShader> SkShader::MakeCompose(sk_sp<SkShader> dst, sk_sp<SkShader> src, SkBlendMode mode,
                                       float lerpT) {
@@ -81,7 +80,7 @@
 
 bool SkComposeShader::onAppendStages(const StageRec& rec) const {
     struct Storage {
-        float   fRGBA[4 * SkJumper_kMaxStride];
+        float   fRGBA[4 * SkRasterPipeline_kMaxStride];
         float   fAlpha;
     };
     auto storage = rec.fAlloc->make<Storage>();
diff --git a/src/shaders/SkImageShader.cpp b/src/shaders/SkImageShader.cpp
index 8313806..4493fd9 100644
--- a/src/shaders/SkImageShader.cpp
+++ b/src/shaders/SkImageShader.cpp
@@ -16,7 +16,6 @@
 #include "SkImageShader.h"
 #include "SkReadBuffer.h"
 #include "SkWriteBuffer.h"
-#include "../jumper/SkJumper.h"
 
 /**
  *  We are faster in clamp, so always use that tiling when we can.
@@ -286,23 +285,23 @@
     p->append(SkRasterPipeline::seed_shader);
     p->append_matrix(alloc, matrix);
 
-    auto gather = alloc->make<SkJumper_GatherCtx>();
+    auto gather = alloc->make<SkRasterPipeline_GatherCtx>();
     gather->pixels = pm.addr();
     gather->stride = pm.rowBytesAsPixels();
     gather->width  = pm.width();
     gather->height = pm.height();
 
-    auto limit_x = alloc->make<SkJumper_TileCtx>(),
-         limit_y = alloc->make<SkJumper_TileCtx>();
+    auto limit_x = alloc->make<SkRasterPipeline_TileCtx>(),
+         limit_y = alloc->make<SkRasterPipeline_TileCtx>();
     limit_x->scale = pm.width();
     limit_x->invScale = 1.0f / pm.width();
     limit_y->scale = pm.height();
     limit_y->invScale = 1.0f / pm.height();
 
-    SkJumper_DecalTileCtx* decal_ctx = nullptr;
+    SkRasterPipeline_DecalTileCtx* decal_ctx = nullptr;
     bool decal_x_and_y = fTileModeX == kDecal_TileMode && fTileModeY == kDecal_TileMode;
     if (fTileModeX == kDecal_TileMode || fTileModeY == kDecal_TileMode) {
-        decal_ctx = alloc->make<SkJumper_DecalTileCtx>();
+        decal_ctx = alloc->make<SkRasterPipeline_DecalTileCtx>();
         decal_ctx->limit_x = limit_x->scale;
         decal_ctx->limit_y = limit_y->scale;
     }
@@ -404,9 +403,9 @@
         return append_misc();
     }
 
-    SkJumper_SamplerCtx* sampler = nullptr;
+    SkRasterPipeline_SamplerCtx* sampler = nullptr;
     if (quality != kNone_SkFilterQuality) {
-        sampler = alloc->make<SkJumper_SamplerCtx>();
+        sampler = alloc->make<SkRasterPipeline_SamplerCtx>();
     }
 
     auto sample = [&](SkRasterPipeline::StockStage setup_x,
diff --git a/src/shaders/SkShader.cpp b/src/shaders/SkShader.cpp
index 21626f3..f1875ed 100644
--- a/src/shaders/SkShader.cpp
+++ b/src/shaders/SkShader.cpp
@@ -21,7 +21,6 @@
 #include "SkShaderBase.h"
 #include "SkTLazy.h"
 #include "SkWriteBuffer.h"
-#include "../jumper/SkJumper.h"
 
 #if SK_SUPPORT_GPU
 #include "GrFragmentProcessor.h"
@@ -221,7 +220,7 @@
 
     ContextRec cr(*opaquePaint, rec.fCTM, rec.fLocalM, rec.fDstColorType, rec.fDstCS);
 
-    struct CallbackCtx : SkJumper_CallbackCtx {
+    struct CallbackCtx : SkRasterPipeline_CallbackCtx {
         sk_sp<SkShader> shader;
         Context*        ctx;
     };
@@ -229,7 +228,7 @@
     cb->shader = rec.fDstCS ? SkColorSpaceXformer::Make(sk_ref_sp(rec.fDstCS))->apply(this)
                             : sk_ref_sp((SkShader*)this);
     cb->ctx = as_SB(cb->shader)->makeContext(cr, rec.fAlloc);
-    cb->fn  = [](SkJumper_CallbackCtx* self, int active_pixels) {
+    cb->fn  = [](SkRasterPipeline_CallbackCtx* self, int active_pixels) {
         auto c = (CallbackCtx*)self;
         int x = (int)c->rgba[0],
         y = (int)c->rgba[1];
diff --git a/src/shaders/gradients/SkGradientShader.cpp b/src/shaders/gradients/SkGradientShader.cpp
index 4debbf6..52a9b56 100644
--- a/src/shaders/gradients/SkGradientShader.cpp
+++ b/src/shaders/gradients/SkGradientShader.cpp
@@ -20,7 +20,6 @@
 #include "SkSweepGradient.h"
 #include "SkTwoPointConicalGradient.h"
 #include "SkWriteBuffer.h"
-#include "../../jumper/SkJumper.h"
 
 enum GradientSerializationFlags {
     // Bits 29:31 used for various boolean flags
@@ -216,7 +215,7 @@
     desc.flatten(buffer);
 }
 
-static void add_stop_color(SkJumper_GradientCtx* ctx, size_t stop, SkPMColor4f Fs, SkPMColor4f Bs) {
+static void add_stop_color(SkRasterPipeline_GradientCtx* ctx, size_t stop, SkPMColor4f Fs, SkPMColor4f Bs) {
     (ctx->fs[0])[stop] = Fs.fR;
     (ctx->fs[1])[stop] = Fs.fG;
     (ctx->fs[2])[stop] = Fs.fB;
@@ -227,14 +226,14 @@
     (ctx->bs[3])[stop] = Bs.fA;
 }
 
-static void add_const_color(SkJumper_GradientCtx* ctx, size_t stop, SkPMColor4f color) {
+static void add_const_color(SkRasterPipeline_GradientCtx* ctx, size_t stop, SkPMColor4f color) {
     add_stop_color(ctx, stop, { 0, 0, 0, 0 }, color);
 }
 
 // Calculate a factor F and a bias B so that color = F*t + B when t is in range of
 // the stop. Assume that the distance between stops is 1/gapCount.
 static void init_stop_evenly(
-    SkJumper_GradientCtx* ctx, float gapCount, size_t stop, SkPMColor4f c_l, SkPMColor4f c_r) {
+    SkRasterPipeline_GradientCtx* ctx, float gapCount, size_t stop, SkPMColor4f c_l, SkPMColor4f c_r) {
     // Clankium's GCC 4.9 targeting ARMv7 is barfing when we use Sk4f math here, so go scalar...
     SkPMColor4f Fs = {
         (c_r.fR - c_l.fR) * gapCount,
@@ -254,7 +253,7 @@
 // For each stop we calculate a bias B and a scale factor F, such that
 // for any t between stops n and n+1, the color we want is B[n] + F[n]*t.
 static void init_stop_pos(
-    SkJumper_GradientCtx* ctx, size_t stop, float t_l, float t_r, SkPMColor4f c_l, SkPMColor4f c_r) {
+    SkRasterPipeline_GradientCtx* ctx, size_t stop, float t_l, float t_r, SkPMColor4f c_l, SkPMColor4f c_r) {
     // See note about Clankium's old compiler in init_stop_evenly().
     SkPMColor4f Fs = {
         (c_r.fR - c_l.fR) / (t_r - t_l),
@@ -275,7 +274,7 @@
 bool SkGradientShaderBase::onAppendStages(const StageRec& rec) const {
     SkRasterPipeline* p = rec.fPipeline;
     SkArenaAlloc* alloc = rec.fAlloc;
-    SkJumper_DecalTileCtx* decal_ctx = nullptr;
+    SkRasterPipeline_DecalTileCtx* decal_ctx = nullptr;
 
     SkMatrix matrix;
     if (!this->computeTotalInverse(rec.fCTM, rec.fLocalM, &matrix)) {
@@ -293,7 +292,7 @@
         case kMirror_TileMode: p->append(SkRasterPipeline::mirror_x_1); break;
         case kRepeat_TileMode: p->append(SkRasterPipeline::repeat_x_1); break;
         case kDecal_TileMode:
-            decal_ctx = alloc->make<SkJumper_DecalTileCtx>();
+            decal_ctx = alloc->make<SkRasterPipeline_DecalTileCtx>();
             decal_ctx->limit_x = SkBits2Float(SkFloat2Bits(1.0f) + 1);
             // reuse mask + limit_x stage, or create a custom decal_1 that just stores the mask
             p->append(SkRasterPipeline::decal_x, decal_ctx);
@@ -326,14 +325,14 @@
                           c_r = prepareColor(1);
 
         // See F and B below.
-        auto ctx = alloc->make<SkJumper_EvenlySpaced2StopGradientCtx>();
+        auto ctx = alloc->make<SkRasterPipeline_EvenlySpaced2StopGradientCtx>();
         (Sk4f::Load(c_r.vec()) - Sk4f::Load(c_l.vec())).store(ctx->f);
         (                        Sk4f::Load(c_l.vec())).store(ctx->b);
         ctx->interpolatedInPremul = premulGrad;
 
         p->append(SkRasterPipeline::evenly_spaced_2_stop_gradient, ctx);
     } else {
-        auto* ctx = alloc->make<SkJumper_GradientCtx>();
+        auto* ctx = alloc->make<SkRasterPipeline_GradientCtx>();
         ctx->interpolatedInPremul = premulGrad;
 
         // Note: In order to handle clamps in search, the search assumes a stop conceptully placed
diff --git a/src/shaders/gradients/SkTwoPointConicalGradient.cpp b/src/shaders/gradients/SkTwoPointConicalGradient.cpp
index b605af3..49d0631 100644
--- a/src/shaders/gradients/SkTwoPointConicalGradient.cpp
+++ b/src/shaders/gradients/SkTwoPointConicalGradient.cpp
@@ -10,7 +10,6 @@
 #include "SkReadBuffer.h"
 #include "SkTwoPointConicalGradient.h"
 #include "SkWriteBuffer.h"
-#include "../../jumper/SkJumper.h"
 
 #include <utility>
 
@@ -200,7 +199,7 @@
     }
 
     if (fType == Type::kStrip) {
-        auto* ctx = alloc->make<SkJumper_2PtConicalCtx>();
+        auto* ctx = alloc->make<SkRasterPipeline_2PtConicalCtx>();
         SkScalar scaledR0 = fRadius1 / this->getCenterX1();
         ctx->fP0 = scaledR0 * scaledR0;
         p->append(SkRasterPipeline::xy_to_2pt_conical_strip, ctx);
@@ -209,7 +208,7 @@
         return;
     }
 
-    auto* ctx = alloc->make<SkJumper_2PtConicalCtx>();
+    auto* ctx = alloc->make<SkRasterPipeline_2PtConicalCtx>();
     ctx->fP0 = 1/fFocalData.fR1;
     ctx->fP1 = fFocalData.fFocalX;
 
diff --git a/src/sksl/SkSLInterpreter.cpp b/src/sksl/SkSLInterpreter.cpp
index 5879274..c39c7b2 100644
--- a/src/sksl/SkSLInterpreter.cpp
+++ b/src/sksl/SkSLInterpreter.cpp
@@ -24,7 +24,6 @@
 #include "ir/SkSLVarDeclarationsStatement.h"
 #include "ir/SkSLVariableReference.h"
 #include "SkRasterPipeline.h"
-#include "../jumper/SkJumper.h"
 
 namespace SkSL {
 
@@ -213,12 +212,12 @@
     ABORT("unsupported lvalue");
 }
 
-struct CallbackCtx : public SkJumper_CallbackCtx {
+struct CallbackCtx : public SkRasterPipeline_CallbackCtx {
     Interpreter* fInterpreter;
     const FunctionDefinition* fFunction;
 };
 
-static void do_callback(SkJumper_CallbackCtx* raw, int activePixels) {
+static void do_callback(SkRasterPipeline_CallbackCtx* raw, int activePixels) {
     CallbackCtx& ctx = (CallbackCtx&) *raw;
     for (int i = 0; i < activePixels; ++i) {
         ctx.fInterpreter->push(Interpreter::Value(ctx.rgba[i * 4 + 0]));
diff --git a/src/sksl/SkSLJIT.cpp b/src/sksl/SkSLJIT.cpp
index 4120c4a..e601387 100644
--- a/src/sksl/SkSLJIT.cpp
+++ b/src/sksl/SkSLJIT.cpp
@@ -13,7 +13,6 @@
 
 #include "SkCpu.h"
 #include "SkRasterPipeline.h"
-#include "../jumper/SkJumper.h"
 #include "ir/SkSLAppendStage.h"
 #include "ir/SkSLExpressionStatement.h"
 #include "ir/SkSLFunctionCall.h"