Move shader register setup to SkRasterPipelineBlitter.

We've been seeding the initial values of our registers to x+0.5,y+0.5,
1,0, 0,0,0,0 (useful values for shaders to start with) in all pipelines.
This CL changes that to do so only when blitting, and only when we have
a shader.

The nicest part of this change is that SkRasterPipeline itself no longer
needs to have a concept of y, or what x means.  It just marches x
through [x,x+n), and the blitter handles y and layers the meaning of
"dst x coordinate" onto x.

This ought to make SkSplicer a little easier to work with too.

dm --src gm --config f16 srgb 565 all draws the same.

Change-Id: I69d8c1cc14a06e5dfdd6a7493364f43a18f8dec5
Reviewed-on: https://skia-review.googlesource.com/7353
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp
index 9afc029..7447f4d 100644
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp
@@ -63,11 +63,11 @@
         if (kCompiled) {
             auto compiled = p.compile();
             while (loops --> 0) {
-                compiled(0,0, N);
+                compiled(0,N);
             }
         } else {
             while (loops --> 0) {
-                p.run(0,0, N);
+                p.run(0,N);
             }
         }
     }
@@ -100,11 +100,11 @@
         if (kCompiled) {
             auto compiled = p.compile();
             while (loops --> 0) {
-                compiled(0,0, N);
+                compiled(0,N);
             }
         } else {
             while (loops --> 0) {
-                p.run(0,0, N);
+                p.run(0,N);
             }
         }
     }
diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp
index dccb92e..a66d52e 100644
--- a/src/core/SkColorSpaceXform.cpp
+++ b/src/core/SkColorSpaceXform.cpp
@@ -1317,7 +1317,7 @@
             return false;
     }
 
-    pipeline.run(0, 0, len);
+    pipeline.run(0, len);
     return true;
 }
 
diff --git a/src/core/SkColorSpaceXform_A2B.cpp b/src/core/SkColorSpaceXform_A2B.cpp
index 27c9faa..39c352c 100644
--- a/src/core/SkColorSpaceXform_A2B.cpp
+++ b/src/core/SkColorSpaceXform_A2B.cpp
@@ -68,7 +68,7 @@
         default:
             return false;
     }
-    pipeline.run(0,0, count);
+    pipeline.run(0,count);
 
     return true;
 }
diff --git a/src/core/SkConfig8888.cpp b/src/core/SkConfig8888.cpp
index d75ea27..cdecf05 100644
--- a/src/core/SkConfig8888.cpp
+++ b/src/core/SkConfig8888.cpp
@@ -103,7 +103,7 @@
     auto p = pipeline.compile();
 
     for (int y = 0; y < srcInfo.height(); ++y) {
-        p(0,0, srcInfo.width());
+        p(0,srcInfo.width());
         // The pipeline has pointers to srcRow and dstRow, so we just need to update them in the
         // loop to move between rows of src/dst.
         srcRow = (const char*)srcRow + srcRB;
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 2c6a118..6f82792 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -66,8 +66,8 @@
         return hash_fn(data, bytes, seed);
     }
 
-    extern void (*run_pipeline)(size_t, size_t, size_t, const SkRasterPipeline::Stage*, int);
-    extern std::function<void(size_t, size_t, size_t)>
+    extern void (*run_pipeline)(size_t, size_t, const SkRasterPipeline::Stage*, int);
+    extern std::function<void(size_t, size_t)>
     (*compile_pipeline)(const SkRasterPipeline::Stage*, int);
 
     extern void (*convolve_vertically)(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
diff --git a/src/core/SkPM4fPriv.h b/src/core/SkPM4fPriv.h
index a08f158..5a60409 100644
--- a/src/core/SkPM4fPriv.h
+++ b/src/core/SkPM4fPriv.h
@@ -147,7 +147,7 @@
         append_gamut_transform(&p, scratch_matrix_3x4, src, dst);
         p.append(SkRasterPipeline::store_f32, &color4f_ptr);
 
-        p.run(0,0,1);
+        p.run(0,1);
     }
     return color4f;
 }
diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp
index 7c3536c..884a7cb 100644
--- a/src/core/SkRasterPipeline.cpp
+++ b/src/core/SkRasterPipeline.cpp
@@ -20,13 +20,13 @@
                    src.fStages.begin(), src.fStages.end());
 }
 
-void SkRasterPipeline::run(size_t x, size_t y, size_t n) const {
+void SkRasterPipeline::run(size_t x, size_t n) const {
     if (!fStages.empty()) {
-        SkOpts::run_pipeline(x,y,n, fStages.data(), SkToInt(fStages.size()));
+        SkOpts::run_pipeline(x,n, fStages.data(), SkToInt(fStages.size()));
     }
 }
 
-std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
+std::function<void(size_t, size_t)> SkRasterPipeline::compile() const {
 #ifdef SK_RASTER_PIPELINE_HAS_JIT
     if (auto fn = this->jit()) {
         return fn;
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 44aa379..5143d52 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -64,7 +64,7 @@
     M(set_rgb) M(swap_rb)                                        \
     M(from_srgb) M(to_srgb)                                      \
     M(from_2dot2) M(to_2dot2)                                    \
-    M(constant_color) M(store_f32)                               \
+    M(constant_color) M(seed_shader) M(store_f32)                \
     M(load_a8)   M(store_a8)                                     \
     M(load_565)  M(store_565)                                    \
     M(load_f16)  M(store_f16)                                    \
@@ -112,11 +112,11 @@
     // Append all stages to this pipeline.
     void extend(const SkRasterPipeline&);
 
-    // Runs the pipeline walking x through [x,x+n), holding y constant.
-    void run(size_t x, size_t y, size_t n) const;
+    // Runs the pipeline walking x through [x,x+n).
+    void run(size_t x, size_t n) const;
 
     // If you're going to run() the pipeline more than once, it's best to compile it.
-    std::function<void(size_t x, size_t y, size_t n)> compile() const;
+    std::function<void(size_t x, size_t n)> compile() const;
 
     void dump() const;
 
@@ -130,7 +130,7 @@
     void append_from_srgb(SkAlphaType);
 
 private:
-    std::function<void(size_t, size_t, size_t)> jit() const;
+    std::function<void(size_t, size_t)> jit() const;
 
     std::vector<Stage> fStages;
 };
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index 7f91cbf..bb89f76 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -49,16 +49,17 @@
     SkRasterPipeline fShader;
 
     // These functions are compiled lazily when first used.
-    std::function<void(size_t, size_t, size_t)> fBlitH         = nullptr,
-                                                fBlitAntiH     = nullptr,
-                                                fBlitMaskA8    = nullptr,
-                                                fBlitMaskLCD16 = nullptr;
+    std::function<void(size_t, size_t)> fBlitH         = nullptr,
+                                        fBlitAntiH     = nullptr,
+                                        fBlitMaskA8    = nullptr,
+                                        fBlitMaskLCD16 = nullptr;
 
     // These values are pointed to by the compiled blit functions
     // above, which allows us to adjust them from call to call.
     void*       fDstPtr          = nullptr;
     const void* fMaskPtr         = nullptr;
     float       fCurrentCoverage = 0.0f;
+    int         fCurrentY        = 0;
 
     // Scratch space for shaders and color filters to use.
     char            fScratch[64];
@@ -113,6 +114,7 @@
     bool is_opaque   = paintColor->a() == 1.0f,
          is_constant = true;
     if (shader) {
+        pipeline->append(SkRasterPipeline::seed_shader, &blitter->fCurrentY);
         if (!shader->appendStages(pipeline, dst.colorSpace(), &blitter->fArena,
                                   ctm, paint)) {
             return earlyOut();
@@ -138,7 +140,7 @@
 
     if (is_constant) {
         pipeline->append(SkRasterPipeline::store_f32, &paintColor);
-        pipeline->run(0,0, 1);
+        pipeline->run(0,1);
 
         *pipeline = SkRasterPipeline();
         pipeline->append(SkRasterPipeline::constant_color, paintColor);
@@ -156,21 +158,21 @@
         p.extend(*pipeline);
         blitter->fDstPtr = &color;
         blitter->append_store(&p);
-        p.run(0,0, 1);
+        p.run(0,1);
 
         switch (dst.shiftPerPixel()) {
             case 1:
-                blitter->fBlitH = [blitter,color](size_t x, size_t, size_t n) {
+                blitter->fBlitH = [blitter,color](size_t x, size_t n) {
                     sk_memset16((uint16_t*)blitter->fDstPtr + x, color, n);
                 };
                 break;
             case 2:
-                blitter->fBlitH = [blitter,color](size_t x, size_t, size_t n) {
+                blitter->fBlitH = [blitter,color](size_t x, size_t n) {
                     sk_memset32((uint32_t*)blitter->fDstPtr + x, color, n);
                 };
                 break;
             case 3:
-                blitter->fBlitH = [blitter,color](size_t x, size_t, size_t n) {
+                blitter->fBlitH = [blitter,color](size_t x, size_t n) {
                     sk_memset64((uint64_t*)blitter->fDstPtr + x, color, n);
                 };
                 break;
@@ -244,7 +246,8 @@
         fBlitH = p.compile();
     }
     fDstPtr = fDst.writable_addr(0,y);
-    fBlitH(x,y, w);
+    fCurrentY = y;
+    fBlitH(x,w);
 }
 
 void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
@@ -266,13 +269,14 @@
     }
 
     fDstPtr = fDst.writable_addr(0,y);
+    fCurrentY = y;
     for (int16_t run = *runs; run > 0; run = *runs) {
         switch (*aa) {
             case 0x00:                       break;
             case 0xff: this->blitH(x,y,run); break;
             default:
                 fCurrentCoverage = *aa * (1/255.0f);
-                fBlitAntiH(x,y, run);
+                fBlitAntiH(x,run);
         }
         x    += run;
         runs += run;
@@ -317,15 +321,16 @@
     int x = clip.left();
     for (int y = clip.top(); y < clip.bottom(); y++) {
         fDstPtr = fDst.writable_addr(0,y);
+        fCurrentY = y;
 
         switch (mask.fFormat) {
             case SkMask::kA8_Format:
                 fMaskPtr = mask.getAddr8(x,y)-x;
-                fBlitMaskA8(x,y, clip.width());
+                fBlitMaskA8(x,clip.width());
                 break;
             case SkMask::kLCD16_Format:
                 fMaskPtr = mask.getAddrLCD16(x,y)-x;
-                fBlitMaskLCD16(x,y, clip.width());
+                fBlitMaskLCD16(x,clip.width());
                 break;
             default:
                 // TODO
diff --git a/src/images/transform_scanline.h b/src/images/transform_scanline.h
index e26dde3..d492036 100644
--- a/src/images/transform_scanline.h
+++ b/src/images/transform_scanline.h
@@ -190,7 +190,7 @@
     p.append(SkRasterPipeline::unpremul);
     p.append(SkRasterPipeline::to_srgb);
     p.append(SkRasterPipeline::store_8888, &dst);
-    p.run(0, 0, width);
+    p.run(0, width);
 }
 
 /**
@@ -262,7 +262,7 @@
     p.append(SkRasterPipeline::load_f16, (const void**) &src);
     p.append(SkRasterPipeline::to_srgb);
     p.append(SkRasterPipeline::store_u16_be, (void**) &dst);
-    p.run(0, 0, width);
+    p.run(0, width);
 }
 
 /**
@@ -275,5 +275,5 @@
     p.append(SkRasterPipeline::unpremul);
     p.append(SkRasterPipeline::to_srgb);
     p.append(SkRasterPipeline::store_u16_be, (void**) &dst);
-    p.run(0, 0, width);
+    p.run(0, width);
 }
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index f4648ba..1b27fc2 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -440,6 +440,18 @@
     a = ctx->a();
 }
 
+// Set up registers with values relevant to shaders.
+STAGE_CTX(seed_shader, const int*) {
+    int y = *ctx;
+
+    static const float dx[] = { 0,1,2,3,4,5,6,7 };
+    r = x + 0.5f + SkNf::Load(dx);  // dst pixel center x coordinates
+    g = y + 0.5f;                   // dst pixel center y coordinate(s)
+    b = 1.0f;
+    a = 0.0f;
+    dr = dg = db = da = 0.0f;
+}
+
 // s' = sc for a scalar c.
 STAGE_CTX(scale_1_float, const float*) {
     SkNf c = *ctx;
@@ -1095,22 +1107,17 @@
         *program++ = (void*)just_return;
     }
 
-    static void run_program(void** program, size_t x, size_t y, size_t n) {
-        float dx[] = { 0,1,2,3,4,5,6,7 };
-        SkNf X = SkNf(x) + SkNf::Load(dx) + 0.5f,
-             Y = SkNf(y) + 0.5f,
-             _0 = SkNf(0),
-             _1 = SkNf(1);
+    static void run_program(void** program, size_t x, size_t n) {
+        SkNf u;  // fastest to start uninitialized.
 
         auto start = (Fn)load_and_increment(&program);
         while (n >= N) {
-            start(x*N, program, X,Y,_1,_0, _0,_0,_0,_0);
-            X += (float)N;
+            start(x*N, program, u,u,u,u, u,u,u,u);
             x += N;
             n -= N;
         }
         if (n) {
-            start(x*N+n, program, X,Y,_1,_0, _0,_0,_0,_0);
+            start(x*N+n, program, u,u,u,u, u,u,u,u);
         }
     }
 
@@ -1137,8 +1144,8 @@
             memcpy(fProgram, o.fProgram, slots * sizeof(void*));
         }
 
-        void operator()(size_t x, size_t y, size_t n) {
-            run_program(fProgram, x, y, n);
+        void operator()(size_t x, size_t n) {
+            run_program(fProgram, x, n);
         }
 
         void** fProgram;
@@ -1147,21 +1154,21 @@
 
 namespace SK_OPTS_NS {
 
-    SI std::function<void(size_t, size_t, size_t)>
+    SI std::function<void(size_t, size_t)>
     compile_pipeline(const SkRasterPipeline::Stage* stages, int nstages) {
         return Compiled{stages,nstages};
     }
 
-    SI void run_pipeline(size_t x, size_t y, size_t n,
+    SI void run_pipeline(size_t x, size_t n,
                          const SkRasterPipeline::Stage* stages, int nstages) {
         static const int kStackMax = 256;
         // Worst case is nstages stages with nstages context pointers, and just_return.
         if (2*nstages+1 <= kStackMax) {
             void* program[kStackMax];
             build_program(program, stages, nstages);
-            run_program(program, x,y,n);
+            run_program(program, x,n);
         } else {
-            Compiled{stages,nstages}(x,y,n);
+            Compiled{stages,nstages}(x,n);
         }
     }
 
diff --git a/src/splicer/SkSplicer.cpp b/src/splicer/SkSplicer.cpp
index 81bb61c..0330aff 100644
--- a/src/splicer/SkSplicer.cpp
+++ b/src/splicer/SkSplicer.cpp
@@ -383,12 +383,11 @@
         }
 
         // Here's where we call fSpliced if we created it, fBackup if not.
-        void operator()(size_t x, size_t y, size_t n) const {
+        void operator()(size_t x, size_t n) const {
             size_t stride = fLowp ? kStride*2
                                   : kStride;
             size_t body = n/stride*stride;     // Largest multiple of stride (2, 4, 8, or 16) <= n.
             if (fSpliced && body) {            // Can we run fSpliced for at least one stride?
-                // TODO: At some point we will want to pass in y...
                 using Fn = void(size_t x, size_t limit, void* ctx, const void* k);
                 auto k = fLowp ? (const void*)&kConstants_lowp
                                : (const void*)&kConstants;
@@ -398,17 +397,17 @@
                 x += body;
                 n -= body;
             }
-            fBackup(x,y,n);
+            fBackup(x,n);
         }
 
-        std::function<void(size_t, size_t, size_t)> fBackup;
-        size_t                                      fSplicedLen;
-        void*                                       fSpliced;
-        bool                                        fLowp;
+        std::function<void(size_t, size_t)> fBackup;
+        size_t                              fSplicedLen;
+        void*                               fSpliced;
+        bool                                fLowp;
     };
 
 }
 
-std::function<void(size_t, size_t, size_t)> SkRasterPipeline::jit() const {
+std::function<void(size_t, size_t)> SkRasterPipeline::jit() const {
     return Spliced(fStages.data(), SkToInt(fStages.size()));
 }
diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp
index a3314b8..4e992cb 100644
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@@ -27,7 +27,7 @@
     p.append(SkRasterPipeline::swap);
     p.append(SkRasterPipeline::srcover);
     p.append(SkRasterPipeline::store_f16, &store_ctx);
-    p.run(0,0, 1);
+    p.run(0,1);
 
     // We should see half-intensity magenta.
     REPORTER_ASSERT(r, ((result >>  0) & 0xffff) == 0x3800);
@@ -39,7 +39,7 @@
     result = 0;
 
     auto fn = p.compile();
-    fn(0,0, 1);
+    fn(0,1);
     REPORTER_ASSERT(r, ((result >>  0) & 0xffff) == 0x3800);
     REPORTER_ASSERT(r, ((result >> 16) & 0xffff) == 0x0000);
     REPORTER_ASSERT(r, ((result >> 32) & 0xffff) == 0x3800);
@@ -49,7 +49,7 @@
 DEF_TEST(SkRasterPipeline_empty, r) {
     // No asserts... just a test that this is safe to run.
     SkRasterPipeline p;
-    p.run(0,0, 20);
+    p.run(0,20);
 }
 
 DEF_TEST(SkRasterPipeline_nonsense, r) {
@@ -57,7 +57,7 @@
     // srcover() calls st->next(); this makes sure we've always got something there to call.
     SkRasterPipeline p;
     p.append(SkRasterPipeline::srcover);
-    p.run(0,0, 20);
+    p.run(0,20);
 }
 
 DEF_TEST(SkRasterPipeline_JIT, r) {
@@ -80,7 +80,7 @@
     p.append(SkRasterPipeline:: load_8888, &src);
     p.append(SkRasterPipeline::store_8888, &dst);
     auto fn = p.compile();
-    fn(15, 0, 20);
+    fn(15, 20);
 
     for (int i = 0; i < 36; i++) {
         if (i < 15 || i == 35) {