Collapse code paths for single- and multi-input kernels.

This patch simplifies the RenderScript driver and CPU reference implementation
by removing the distinction between sing- and multi-input kernels in many
places.  The distinction is maintained in some places due to the need to
maintain backwards compatibility.  This permits the deletion of some functions
and struct members that are no longer needed.  Several related functions were
also cleaned up.

Change-Id: Id70a223ea5e3aa2b0b935b2b7f9af933339ae8a4
diff --git a/cpu_ref/rsCpuScriptGroup.cpp b/cpu_ref/rsCpuScriptGroup.cpp
index 0878552..20ee09d 100644
--- a/cpu_ref/rsCpuScriptGroup.cpp
+++ b/cpu_ref/rsCpuScriptGroup.cpp
@@ -53,38 +53,45 @@
                                          uint32_t instep, uint32_t outstep) {
 
 
-    const ScriptList *sl            = (const ScriptList *)kparams->usr;
+    const ScriptList *sl           = (const ScriptList *)kparams->usr;
     RsExpandKernelParams *mkparams = (RsExpandKernelParams *)kparams;
 
+    const void **oldIns  = mkparams->ins;
+    uint32_t *oldStrides = mkparams->inEStrides;
+
+    void *localIns[1];
+    uint32_t localStride[1];
+
+    mkparams->ins        = (const void**)localIns;
+    mkparams->inEStrides = localStride;
+
     for (size_t ct = 0; ct < sl->count; ct++) {
         ScriptGroupRootFunc_t func;
         func          = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
         mkparams->usr = sl->usrPtrs[ct];
 
-        mkparams->in  = NULL;
-        mkparams->out = NULL;
-
-        uint32_t istep = 0;
-        uint32_t ostep = 0;
-
         if (sl->ins[ct]) {
-            mkparams->in =
-              (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
+            localIns[0] = sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
 
-            istep = sl->ins[ct]->mHal.state.elementSizeBytes;
+            localStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
 
             if (sl->inExts[ct]) {
-                mkparams->in =
-                  (const uint8_t *)mkparams->in +
-                  sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y;
+                localIns[0] = (void*)
+                  ((const uint8_t *)localIns[0] +
+                   sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->y);
 
             } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kparams->lid) {
-                mkparams->in =
-                  (const uint8_t *)mkparams->in +
-                  sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid;
+                localIns[0] = (void*)
+                  ((const uint8_t *)localIns[0] +
+                   sl->ins[ct]->mHal.drvState.lod[0].stride * kparams->lid);
             }
+
+        } else {
+            localIns[0]    = NULL;
+            localStride[0] = 0;
         }
 
+        uint32_t ostep;
         if (sl->outs[ct]) {
             mkparams->out =
               (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
@@ -101,14 +108,23 @@
                   (uint8_t *)mkparams->out +
                   sl->outs[ct]->mHal.drvState.lod[0].stride * kparams->lid;
             }
+        } else {
+            mkparams->out = NULL;
+            ostep         = 0;
         }
 
         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
-        func(kparams, xstart, xend, istep, ostep);
+        /*
+         * The fourth argument is zero here because kernels get their stride
+         * information from a member of p that points to an array.
+         */
+        func(kparams, xstart, xend, 0, ostep);
     }
     //ALOGE("script group root");
 
-    mkparams->usr = sl;
+    mkparams->ins        = oldIns;
+    mkparams->inEStrides = oldStrides;
+    mkparams->usr        = sl;
 }
 
 
@@ -195,17 +211,33 @@
 
     MTLaunchStruct mtls;
 
-    if(fieldDep) {
+    if (fieldDep) {
         for (size_t ct=0; ct < ins.size(); ct++) {
             Script *s = kernels[ct]->mScript;
             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
             uint32_t slot = kernels[ct]->mSlot;
 
-            si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+            uint32_t inLen;
+            const Allocation **ains;
+
+            if (ins[ct] == NULL) {
+                inLen = 0;
+                ains  = NULL;
+
+            } else {
+                inLen = 1;
+                ains  = const_cast<const Allocation**>(&ins[ct]);
+            }
+
+            si->forEachMtlsSetup(ains, inLen, outs[ct], NULL, 0, NULL, &mtls);
+
             si->forEachKernelSetup(slot, &mtls);
-            si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
-            mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
-            si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
+            si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
+                          mtls.fep.usrLen, NULL);
+
+            mCtx->launchThreads(ains, inLen, outs[ct], NULL, &mtls);
+
+            si->postLaunch(slot, ains, inLen, outs[ct], NULL, 0, NULL);
         }
     } else {
         ScriptList sl;
@@ -214,6 +246,18 @@
         sl.kernels = kernels.array();
         sl.count = kernels.size();
 
+        uint32_t inLen;
+        const Allocation **ains;
+
+        if (ins[0] == NULL) {
+            inLen = 0;
+            ains  = NULL;
+
+        } else {
+            inLen = 1;
+            ains  = const_cast<const Allocation**>(&ins[0]);
+        }
+
         Vector<const void *> usrPtrs;
         Vector<const void *> fnPtrs;
         Vector<uint32_t> sigs;
@@ -225,7 +269,8 @@
             fnPtrs.add((void *)mtls.kernel);
             usrPtrs.add(mtls.fep.usr);
             sigs.add(mtls.fep.usrLen);
-            si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
+            si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
+                          mtls.fep.usr, mtls.fep.usrLen, NULL);
         }
         sl.sigs = sigs.array();
         sl.usrPtrs = usrPtrs.array();
@@ -235,16 +280,20 @@
 
         Script *s = kernels[0]->mScript;
         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
-        si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
+
+        si->forEachMtlsSetup(ains, inLen, outs[0], NULL, 0, NULL, &mtls);
+
         mtls.script = NULL;
         mtls.kernel = (void (*)())&scriptGroupRoot;
         mtls.fep.usr = &sl;
-        mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
+
+        mCtx->launchThreads(ains, inLen, outs[0], NULL, &mtls);
 
         for (size_t ct=0; ct < kernels.size(); ct++) {
             Script *s = kernels[ct]->mScript;
             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
-            si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
+            si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], NULL, 0,
+                           NULL);
         }
     }
 }