Merge script groups.

Change-Id: Id5baf5e7c59a004127250fced91b6b33b1fe053b
diff --git a/driver/rsdScriptGroup.cpp b/driver/rsdScriptGroup.cpp
index b19678d..f4f0f1c 100644
--- a/driver/rsdScriptGroup.cpp
+++ b/driver/rsdScriptGroup.cpp
@@ -25,6 +25,7 @@
 #include "rsScriptGroup.h"
 #include "rsdScriptGroup.h"
 #include "rsdBcc.h"
+#include "rsdAllocation.h"
 
 using namespace android;
 using namespace android::renderscript;
@@ -47,12 +48,88 @@
                              android::renderscript::Allocation *) {
 }
 
+struct ScriptList {
+    size_t count;
+    Allocation *const* ins;
+    bool const* inExts;
+    Allocation *const* outs;
+    bool const* outExts;
+    const void *const* usrPtrs;
+    size_t const *usrSizes;
+    uint32_t const *sigs;
+    const void *const* fnPtrs;
+
+    const ScriptKernelID *const* kernels;
+};
+
+typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
+                                      uint32_t xstart, uint32_t xend,
+                                      uint32_t instep, uint32_t outstep);
+
+static void ScriptGroupRoot(const RsForEachStubParamStruct *p,
+                            uint32_t xstart, uint32_t xend,
+                            uint32_t instep, uint32_t outstep) {
+
+    const ScriptList *sl = (const ScriptList *)p->usr;
+    RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
+    const void *oldUsr = p->usr;
+
+    for(size_t ct=0; ct < sl->count; ct++) {
+        ScriptGroupRootFunc_t func;
+        func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
+        mp->usr = sl->usrPtrs[ct];
+
+        mp->ptrIn = NULL;
+        mp->in = NULL;
+        mp->ptrOut = NULL;
+        mp->out = NULL;
+
+        if (sl->ins[ct]) {
+            DrvAllocation *drv = (DrvAllocation *)sl->ins[ct]->mHal.drv;
+            mp->ptrIn = (const uint8_t *)drv->lod[0].mallocPtr;
+            mp->in = mp->ptrIn;
+            if (sl->inExts[ct]) {
+                mp->in = mp->ptrIn + drv->lod[0].stride * p->y;
+            } else {
+                if (drv->lod[0].dimY > p->lid) {
+                    mp->in = mp->ptrIn + drv->lod[0].stride * p->lid;
+                }
+            }
+        }
+
+        if (sl->outs[ct]) {
+            DrvAllocation *drv = (DrvAllocation *)sl->outs[ct]->mHal.drv;
+            mp->ptrOut = (uint8_t *)drv->lod[0].mallocPtr;
+            mp->out = mp->ptrOut;
+            if (sl->outExts[ct]) {
+                mp->out = mp->ptrOut + drv->lod[0].stride * p->y;
+            } else {
+                if (drv->lod[0].dimY > p->lid) {
+                    mp->out = mp->ptrOut + drv->lod[0].stride * p->lid;
+                }
+            }
+        }
+
+        //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
+        func(p, xstart, xend, instep, outstep);
+    }
+    //ALOGE("script group root");
+
+    //ConvolveParams *cp = (ConvolveParams *)p->usr;
+
+    mp->usr = oldUsr;
+}
+
+
 void rsdScriptGroupExecute(const android::renderscript::Context *rsc,
                            const android::renderscript::ScriptGroup *sg) {
 
     Vector<Allocation *> ins;
+    Vector<bool> inExts;
     Vector<Allocation *> outs;
+    Vector<bool> outExts;
     Vector<const ScriptKernelID *> kernels;
+    bool fieldDep = false;
 
     for (size_t ct=0; ct < sg->mNodes.size(); ct++) {
         ScriptGroup::Node *n = sg->mNodes[ct];
@@ -71,6 +148,8 @@
             const ScriptKernelID *k = n->mKernels[ct2];
             Allocation *ain = NULL;
             Allocation *aout = NULL;
+            bool inExt = false;
+            bool outExt = false;
 
             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
@@ -81,6 +160,7 @@
             for (size_t ct3=0; ct3 < sg->mInputs.size(); ct3++) {
                 if (sg->mInputs[ct3]->mKernel == k) {
                     ain = sg->mInputs[ct3]->mAlloc.get();
+                    inExt = true;
                     //ALOGE(" io in %p", ain);
                 }
             }
@@ -88,12 +168,16 @@
             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
                 if (n->mOutputs[ct3]->mSource.get() == k) {
                     aout = n->mOutputs[ct3]->mAlloc.get();
+                    if(n->mOutputs[ct3]->mDstField.get() != NULL) {
+                        fieldDep = true;
+                    }
                     //ALOGE(" link out %p", aout);
                 }
             }
             for (size_t ct3=0; ct3 < sg->mOutputs.size(); ct3++) {
                 if (sg->mOutputs[ct3]->mKernel == k) {
                     aout = sg->mOutputs[ct3]->mAlloc.get();
+                    outExt = true;
                     //ALOGE(" io out %p", aout);
                 }
             }
@@ -101,7 +185,9 @@
             if ((k->mHasKernelOutput == (aout != NULL)) &&
                 (k->mHasKernelInput == (ain != NULL))) {
                 ins.add(ain);
+                inExts.add(inExt);
                 outs.add(aout);
+                outExts.add(outExt);
                 kernels.add(k);
             }
         }
@@ -110,33 +196,65 @@
 
     RsdHal * dc = (RsdHal *)rsc->mHal.drv;
     MTLaunchStruct mtls;
-    for (size_t ct=0; ct < ins.size(); ct++) {
 
-        Script *s = kernels[ct]->mScript;
-        DrvScript *drv = (DrvScript *)s->mHal.drv;
-        uint32_t slot = kernels[ct]->mSlot;
+    if(fieldDep) {
+        for (size_t ct=0; ct < ins.size(); ct++) {
+            Script *s = kernels[ct]->mScript;
+            DrvScript *drv = (DrvScript *)s->mHal.drv;
+            uint32_t slot = kernels[ct]->mSlot;
 
-        rsdScriptInvokeForEachMtlsSetup(rsc, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
-        mtls.script = s;
-        mtls.fep.slot = slot;
+            rsdScriptInvokeForEachMtlsSetup(rsc, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+            mtls.script = s;
 
-        if (drv->mIntrinsicID) {
-            mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
-            mtls.fep.usr = drv->mIntrinsicData;
-        } else {
-            mtls.kernel = reinterpret_cast<ForEachFunc_t>(
-                              drv->mExecutable->getExportForeachFuncAddrs()[slot]);
-            rsAssert(mtls.kernel != NULL);
-            mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+            if (drv->mIntrinsicID) {
+                mtls.kernel = (void (*)())drv->mIntrinsicFuncs.root;
+                mtls.fep.usr = drv->mIntrinsicData;
+            } else {
+                mtls.kernel = reinterpret_cast<ForEachFunc_t>(
+                                  drv->mExecutable->getExportForeachFuncAddrs()[slot]);
+                rsAssert(mtls.kernel != NULL);
+                mtls.sig = drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+            }
+
+            rsdScriptLaunchThreads(rsc, s->mHal.info.isThreadable, ins[ct], outs[ct],
+                                   NULL, 0, NULL, &mtls);
         }
+    } else {
+        ScriptList sl;
+        sl.ins = ins.array();
+        sl.outs = outs.array();
+        sl.kernels = kernels.array();
+        sl.count = kernels.size();
 
-//        typedef void (*outer_foreach_t)(
-  //          const android::renderscript::RsForEachStubParamStruct *,
-    //        uint32_t x1, uint32_t x2,
-      //      uint32_t instep, uint32_t outstep);
-        //outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
+        Vector<const void *> usrPtrs;
+        Vector<const void *> fnPtrs;
+        Vector<uint32_t> sigs;
+        for (size_t ct=0; ct < kernels.size(); ct++) {
+            Script *s = kernels[ct]->mScript;
+            DrvScript *drv = (DrvScript *)s->mHal.drv;
 
-        rsdScriptLaunchThreads(rsc, s, slot, ins[ct], outs[ct], NULL, 0, NULL, &mtls);
+            if (drv->mIntrinsicID) {
+                fnPtrs.add((void *)drv->mIntrinsicFuncs.root);
+                usrPtrs.add(drv->mIntrinsicData);
+                sigs.add(0);
+            } else {
+                int slot = kernels[ct]->mSlot;
+                fnPtrs.add((void *)drv->mExecutable->getExportForeachFuncAddrs()[slot]);
+                usrPtrs.add(NULL);
+                sigs.add(drv->mExecutable->getInfo().getExportForeachFuncs()[slot].second);
+            }
+        }
+        sl.sigs = sigs.array();
+        sl.usrPtrs = usrPtrs.array();
+        sl.fnPtrs = fnPtrs.array();
+        sl.inExts = inExts.array();
+        sl.outExts = outExts.array();
+
+        rsdScriptInvokeForEachMtlsSetup(rsc, ins[0], outs[0], NULL, 0, NULL, &mtls);
+        mtls.script = NULL;
+        mtls.kernel = (void (*)())&ScriptGroupRoot;
+        mtls.fep.usr = &sl;
+        rsdScriptLaunchThreads(rsc, true, ins[0], outs[0], NULL, 0, NULL, &mtls);
     }
 
 }