Remove the instep parameter.

This patch removes the instep parameter from calls to expanded kernels and
from the CPU reference implementation intrinsics.

Change-Id: I059db548a57702c576963f6b17a002b2ee393cdb
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index db3cc7f..ee4d1e6 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -49,8 +49,7 @@
 
 typedef void (*outer_foreach_t)(
     const android::renderscript::RsExpandKernelParams *,
-    uint32_t x1, uint32_t x2,
-    uint32_t instep, uint32_t outstep);
+    uint32_t x1, uint32_t x2, uint32_t outstep);
 
 
 static pthread_key_t gThreadTLSKey = 0;
@@ -414,8 +413,7 @@
                       (strides.eStride * mtls->xStart);
                 }
 
-                // Kernels now get their input strides from kparams.
-                fn(&kparams, mtls->xStart, mtls->xEnd, 0,
+                fn(&kparams, mtls->xStart, mtls->xEnd,
                    mtls->fep.outStride.eStride);
             }
         }
@@ -448,8 +446,7 @@
                   mtls->fep.inPtrs[inIndex] + (strides.eStride * xStart);
             }
 
-            // Kernels now get their input strides from kparams.
-            fn(&kparams, xStart, xEnd, 0, mtls->fep.outStride.eStride);
+            fn(&kparams, xStart, xEnd, mtls->fep.outStride.eStride);
         }
     });
 }
@@ -554,12 +551,7 @@
                           (strides.eStride * mtls->xStart);
                     }
 
-                    /*
-                     * The fourth argument is zero here because multi-input
-                     * kernels get their stride information from a member of p
-                     * that points to an array.
-                     */
-                    fn(&kparams, mtls->xStart, mtls->xEnd, 0,
+                    fn(&kparams, mtls->xStart, mtls->xEnd,
                        mtls->fep.outStride.eStride);
                 }
             }
diff --git a/cpu_ref/rsCpuIntrinsic3DLUT.cpp b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
index a19d885..ce7c9c6 100644
--- a/cpu_ref/rsCpuIntrinsic3DLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsic3DLUT.cpp
@@ -40,7 +40,7 @@
 
     static void kernel(const RsExpandKernelParams *p,
                        uint32_t xstart, uint32_t xend,
-                       uint32_t instep, uint32_t outstep);
+                       uint32_t outstep);
 };
 
 }
@@ -59,8 +59,8 @@
 
 
 void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelParams *p,
-                                      uint32_t xstart, uint32_t xend,
-                                      uint32_t instep, uint32_t outstep) {
+                                        uint32_t xstart, uint32_t xend,
+                                        uint32_t outstep) {
     RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)p->usr;
 
     uchar4 *out = (uchar4 *)p->out + xstart;
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index 0378e07..2beec3d 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -33,9 +33,8 @@
     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
 
 protected:
-    static void kernel(const RsExpandKernelParams *p,
-                          uint32_t xstart, uint32_t xend,
-                          uint32_t instep, uint32_t outstep);
+    static void kernel(const RsExpandKernelParams *p, uint32_t xstart,
+                       uint32_t xend, uint32_t outstep);
 };
 
 }
@@ -112,7 +111,7 @@
 
 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelParams *p,
                                         uint32_t xstart, uint32_t xend,
-                                        uint32_t instep, uint32_t outstep) {
+                                        uint32_t outstep) {
     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
 
     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
diff --git a/cpu_ref/rsCpuIntrinsicBlur.cpp b/cpu_ref/rsCpuIntrinsicBlur.cpp
index 22e1176..7f888e9 100644
--- a/cpu_ref/rsCpuIntrinsicBlur.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlur.cpp
@@ -46,10 +46,10 @@
 
     static void kernelU4(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelU1(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     void ComputeGaussianWeights();
 };
 
@@ -276,7 +276,7 @@
 
 void RsdCpuScriptIntrinsicBlur::kernelU4(const RsExpandKernelParams *p,
                                          uint32_t xstart, uint32_t xend,
-                                         uint32_t instep, uint32_t outstep) {
+                                         uint32_t outstep) {
 
     float4 stackbuf[2048];
     float4 *buf = &stackbuf[0];
@@ -347,7 +347,7 @@
 
 void RsdCpuScriptIntrinsicBlur::kernelU1(const RsExpandKernelParams *p,
                                          uint32_t xstart, uint32_t xend,
-                                         uint32_t instep, uint32_t outstep) {
+                                         uint32_t outstep) {
     float buf[4 * 2048];
     RsdCpuScriptIntrinsicBlur *cp = (RsdCpuScriptIntrinsicBlur *)p->usr;
     if (!cp->mAlloc.get()) {
@@ -464,5 +464,3 @@
 
     return new RsdCpuScriptIntrinsicBlur(ctx, s, e);
 }
-
-
diff --git a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
index 4e90ad7..f072e52 100644
--- a/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
+++ b/cpu_ref/rsCpuIntrinsicColorMatrix.cpp
@@ -189,7 +189,7 @@
 
     static void kernel(const RsExpandKernelParams *p,
                        uint32_t xstart, uint32_t xend,
-                       uint32_t instep, uint32_t outstep);
+                       uint32_t outstep);
     void updateCoeffCache(float fpMul, float addMul);
 
     Key_t mLastKey;
@@ -880,11 +880,10 @@
 
 void RsdCpuScriptIntrinsicColorMatrix::kernel(const RsExpandKernelParams *p,
                                               uint32_t xstart, uint32_t xend,
-                                              uint32_t instep, uint32_t outstep) {
+                                              uint32_t outstep) {
     RsdCpuScriptIntrinsicColorMatrix *cp = (RsdCpuScriptIntrinsicColorMatrix *)p->usr;
 
-    // Update the instep due to change in parameter passing.
-    instep = p->inEStrides[0];
+    uint32_t instep = p->inEStrides[0];
 
     uchar *out = (uchar *)p->out    + outstep * xstart;
     uchar *in  = (uchar *)p->ins[0] + instep  * xstart;
diff --git a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
index d5af88c..f9b70cc 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve3x3.cpp
@@ -44,22 +44,22 @@
 
     static void kernelU1(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelU2(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelU4(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelF1(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelF2(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelF4(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
 };
 
 }
@@ -185,7 +185,7 @@
 
 void RsdCpuScriptIntrinsicConvolve3x3::kernelU4(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -232,7 +232,7 @@
 
 void RsdCpuScriptIntrinsicConvolve3x3::kernelU2(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -277,7 +277,7 @@
 
 void RsdCpuScriptIntrinsicConvolve3x3::kernelU1(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -322,7 +322,7 @@
 
 void RsdCpuScriptIntrinsicConvolve3x3::kernelF4(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -367,7 +367,7 @@
 
 void RsdCpuScriptIntrinsicConvolve3x3::kernelF2(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -411,7 +411,7 @@
 }
 void RsdCpuScriptIntrinsicConvolve3x3::kernelF1(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve3x3 *cp = (RsdCpuScriptIntrinsicConvolve3x3 *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -507,5 +507,3 @@
 
     return new RsdCpuScriptIntrinsicConvolve3x3(ctx, s, e);
 }
-
-
diff --git a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
index 8421175..1a546db 100644
--- a/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
+++ b/cpu_ref/rsCpuIntrinsicConvolve5x5.cpp
@@ -44,22 +44,22 @@
 
     static void kernelU1(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelU2(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelU4(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelF1(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelF2(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelF4(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
 
 
 };
@@ -348,7 +348,7 @@
 
 void RsdCpuScriptIntrinsicConvolve5x5::kernelU4(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
     if (!cp->alloc.get()) {
         ALOGE("Convolve5x5 executed without input, skipping");
@@ -408,7 +408,7 @@
 
 void RsdCpuScriptIntrinsicConvolve5x5::kernelU2(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
     if (!cp->alloc.get()) {
         ALOGE("Convolve5x5 executed without input, skipping");
@@ -457,7 +457,7 @@
 
 void RsdCpuScriptIntrinsicConvolve5x5::kernelU1(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
     if (!cp->alloc.get()) {
         ALOGE("Convolve5x5 executed without input, skipping");
@@ -506,7 +506,7 @@
 
 void RsdCpuScriptIntrinsicConvolve5x5::kernelF4(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
     if (!cp->alloc.get()) {
         ALOGE("Convolve5x5 executed without input, skipping");
@@ -555,7 +555,7 @@
 
 void RsdCpuScriptIntrinsicConvolve5x5::kernelF2(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
     if (!cp->alloc.get()) {
         ALOGE("Convolve5x5 executed without input, skipping");
@@ -604,7 +604,7 @@
 
 void RsdCpuScriptIntrinsicConvolve5x5::kernelF1(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicConvolve5x5 *cp = (RsdCpuScriptIntrinsicConvolve5x5 *)p->usr;
     if (!cp->alloc.get()) {
         ALOGE("Convolve5x5 executed without input, skipping");
@@ -705,6 +705,3 @@
 
     return new RsdCpuScriptIntrinsicConvolve5x5(ctx, s, e);
 }
-
-
-
diff --git a/cpu_ref/rsCpuIntrinsicHistogram.cpp b/cpu_ref/rsCpuIntrinsicHistogram.cpp
index b5dbfa8..d3dce6d 100644
--- a/cpu_ref/rsCpuIntrinsicHistogram.cpp
+++ b/cpu_ref/rsCpuIntrinsicHistogram.cpp
@@ -50,30 +50,30 @@
     ObjectBaseRef<Allocation> mAllocOut;
 
     static void kernelP1U4(const RsExpandKernelParams *p,
-                          uint32_t xstart, uint32_t xend,
-                          uint32_t instep, uint32_t outstep);
+                           uint32_t xstart, uint32_t xend,
+                           uint32_t outstep);
     static void kernelP1U3(const RsExpandKernelParams *p,
-                          uint32_t xstart, uint32_t xend,
-                          uint32_t instep, uint32_t outstep);
+                           uint32_t xstart, uint32_t xend,
+                           uint32_t outstep);
     static void kernelP1U2(const RsExpandKernelParams *p,
-                          uint32_t xstart, uint32_t xend,
-                          uint32_t instep, uint32_t outstep);
+                           uint32_t xstart, uint32_t xend,
+                           uint32_t outstep);
     static void kernelP1U1(const RsExpandKernelParams *p,
-                          uint32_t xstart, uint32_t xend,
-                          uint32_t instep, uint32_t outstep);
+                           uint32_t xstart, uint32_t xend,
+                           uint32_t outstep);
 
     static void kernelP1L4(const RsExpandKernelParams *p,
                            uint32_t xstart, uint32_t xend,
-                           uint32_t instep, uint32_t outstep);
+                           uint32_t outstep);
     static void kernelP1L3(const RsExpandKernelParams *p,
                            uint32_t xstart, uint32_t xend,
-                           uint32_t instep, uint32_t outstep);
+                           uint32_t outstep);
     static void kernelP1L2(const RsExpandKernelParams *p,
                            uint32_t xstart, uint32_t xend,
-                           uint32_t instep, uint32_t outstep);
+                           uint32_t outstep);
     static void kernelP1L1(const RsExpandKernelParams *p,
                            uint32_t xstart, uint32_t xend,
-                           uint32_t instep, uint32_t outstep);
+                           uint32_t outstep);
 
 };
 
@@ -168,7 +168,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
@@ -185,7 +185,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
@@ -201,7 +201,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
@@ -216,7 +216,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
@@ -234,7 +234,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
@@ -251,7 +251,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
@@ -267,7 +267,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
@@ -282,7 +282,7 @@
 
 void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
 
     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
     uchar *in = (uchar *)p->ins[0];
diff --git a/cpu_ref/rsCpuIntrinsicLUT.cpp b/cpu_ref/rsCpuIntrinsicLUT.cpp
index 9d3b400..b08a0e5 100644
--- a/cpu_ref/rsCpuIntrinsicLUT.cpp
+++ b/cpu_ref/rsCpuIntrinsicLUT.cpp
@@ -40,7 +40,7 @@
 
     static void kernel(const RsExpandKernelParams *p,
                        uint32_t xstart, uint32_t xend,
-                       uint32_t instep, uint32_t outstep);
+                       uint32_t outstep);
 };
 
 }
@@ -55,7 +55,7 @@
 
 void RsdCpuScriptIntrinsicLUT::kernel(const RsExpandKernelParams *p,
                                       uint32_t xstart, uint32_t xend,
-                                      uint32_t instep, uint32_t outstep) {
+                                      uint32_t outstep) {
     RsdCpuScriptIntrinsicLUT *cp = (RsdCpuScriptIntrinsicLUT *)p->usr;
 
     uchar *out = (uchar *)p->out;
diff --git a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
index 45f85e5..05ccfd6 100644
--- a/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
+++ b/cpu_ref/rsCpuIntrinsicLoopFilter.cpp
@@ -176,7 +176,7 @@
     void doLoopFilter();
     static void kernel(const RsExpandKernelParams *p,
                        uint32_t xstart, uint32_t xend,
-                       uint32_t instep, uint32_t outstep);
+                       uint32_t outstep);
 };
 
 }
@@ -184,7 +184,7 @@
 
 void RsdCpuScriptIntrinsicLoopFilter::kernel(const RsExpandKernelParams *p,
                                              uint32_t xstart, uint32_t xend,
-                                             uint32_t instep, uint32_t outstep) {
+                                             uint32_t outstep) {
     RsdCpuScriptIntrinsicLoopFilter *cp = (RsdCpuScriptIntrinsicLoopFilter*)p->usr;
     memset((void*)&cp->mPrch.chart, 0, sizeof(cp->mPrch.chart));
     cp->mPrch.chart[0] = 0x0fffffff;
@@ -1232,4 +1232,3 @@
         rsAssert(rv == 0);
     }
 }
-
diff --git a/cpu_ref/rsCpuIntrinsicResize.cpp b/cpu_ref/rsCpuIntrinsicResize.cpp
index 3a307d6..826661e 100644
--- a/cpu_ref/rsCpuIntrinsicResize.cpp
+++ b/cpu_ref/rsCpuIntrinsicResize.cpp
@@ -48,13 +48,13 @@
 
     static void kernelU1(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelU2(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
     static void kernelU4(const RsExpandKernelParams *p,
                          uint32_t xstart, uint32_t xend,
-                         uint32_t instep, uint32_t outstep);
+                         uint32_t outstep);
 };
 
 }
@@ -181,7 +181,7 @@
 
 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -221,7 +221,7 @@
 
 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
 
     if (!cp->mAlloc.get()) {
@@ -261,7 +261,7 @@
 
 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelParams *p,
                                                 uint32_t xstart, uint32_t xend,
-                                                uint32_t instep, uint32_t outstep) {
+                                                uint32_t outstep) {
     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
 
     if (!cp->mAlloc.get()) {
diff --git a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
index 342d0ea..390ca3c 100644
--- a/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
+++ b/cpu_ref/rsCpuIntrinsicYuvToRGB.cpp
@@ -48,7 +48,7 @@
 
     static void kernel(const RsExpandKernelParams *p,
                        uint32_t xstart, uint32_t xend,
-                       uint32_t instep, uint32_t outstep);
+                       uint32_t outstep);
 };
 
 }
@@ -103,7 +103,7 @@
 
 void RsdCpuScriptIntrinsicYuvToRGB::kernel(const RsExpandKernelParams *p,
                                            uint32_t xstart, uint32_t xend,
-                                           uint32_t instep, uint32_t outstep) {
+                                           uint32_t outstep) {
     RsdCpuScriptIntrinsicYuvToRGB *cp = (RsdCpuScriptIntrinsicYuvToRGB *)p->usr;
     if (!cp->alloc.get()) {
         ALOGE("YuvToRGB executed without input, skipping");
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index f0843cc..c5fc183 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -49,7 +49,7 @@
     typedef void (*outer_foreach_t)(
         const RsExpandKernelParams *,
         uint32_t x1, uint32_t x2,
-        uint32_t instep, uint32_t outstep);
+        uint32_t outstep);
 #ifdef RS_COMPATIBILITY_LIB
     typedef void (* InvokeFunc_t)(void);
     typedef void (* ForEachFunc_t)(void);
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 20ee09d..4eb9e9d 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -46,11 +46,11 @@
 
 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelParams *kparams,
                                       uint32_t xstart, uint32_t xend,
-                                      uint32_t instep, uint32_t outstep);
+                                      uint32_t outstep);
 
 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelParams *kparams,
                                          uint32_t xstart, uint32_t xend,
-                                         uint32_t instep, uint32_t outstep) {
+                                         uint32_t outstep) {
 
 
     const ScriptList *sl           = (const ScriptList *)kparams->usr;
@@ -114,11 +114,7 @@
         }
 
         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
-        /*
-         * The fourth argument is zero here because kernels get their stride
-         * information from a member of p that points to an array.
-         */
-        func(kparams, xstart, xend, 0, ostep);
+        func(kparams, xstart, xend, ostep);
     }
     //ALOGE("script group root");
 
diff --git a/cpu_ref/rsCpuScriptGroup.h b/cpu_ref/rsCpuScriptGroup.h
index 71f2dd8..1a4af05 100644
--- a/cpu_ref/rsCpuScriptGroup.h
+++ b/cpu_ref/rsCpuScriptGroup.h
@@ -35,7 +35,7 @@
 
     static void scriptGroupRoot(const RsExpandKernelParams *p,
                                 uint32_t xstart, uint32_t xend,
-                                uint32_t instep, uint32_t outstep);
+                                uint32_t outstep);
 
 protected:
     struct ScriptList {