Issue proper parameters to root() calls for RS.

BUG=5076887

Change-Id: Ic0ce9d45ee20a99add4c780233c0804d05a55e96
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index 176dd18..44ea79c 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -45,6 +45,7 @@
     InvokeFunc_t *mInvokeFunctions;
     void ** mFieldAddress;
     bool * mFieldIsObject;
+    const uint32_t *mExportForEachSignatureList;
 
     const uint8_t * mScriptText;
     uint32_t mScriptTextLength;
@@ -74,6 +75,7 @@
     size_t exportFuncCount = 0;
     size_t exportVarCount = 0;
     size_t objectSlotCount = 0;
+    size_t exportForEachSignatureCount = 0;
 
     DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
     if (drv == NULL) {
@@ -153,6 +155,10 @@
         }
     }
 
+    exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
+    rsAssert(exportForEachSignatureCount <= 1);
+    drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
+
     // Copy info over to runtime
     script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
     script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
@@ -179,6 +185,7 @@
 typedef struct {
     Context *rsc;
     Script *script;
+    uint32_t sig;
     const Allocation * ain;
     Allocation * aout;
     const void * usr;
@@ -206,7 +213,7 @@
     uint32_t dimZ;
     uint32_t dimArray;
 } MTLaunchStruct;
-typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
+typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
 
 static void wc_xy(void *usr, uint32_t idx) {
     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
@@ -214,6 +221,8 @@
     memset(&p, 0, sizeof(p));
     p.usr = mtls->usr;
     p.usr_len = mtls->usrLen;
+    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
+    uint32_t sig = mtls->sig;
 
     while (1) {
         uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
@@ -234,7 +243,7 @@
             for (p.x = mtls->xStart; p.x < mtls->xEnd; p.x++) {
                 p.in = xPtrIn;
                 p.out = xPtrOut;
-                ((rs_t)mtls->script->mHal.info.root) (p.in, p.out, p.usr, p.x, p.y, 0, 0);
+                dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
                 xPtrIn += mtls->eStrideIn;
                 xPtrOut += mtls->eStrideOut;
             }
@@ -248,6 +257,8 @@
     memset(&p, 0, sizeof(p));
     p.usr = mtls->usr;
     p.usr_len = mtls->usrLen;
+    RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
+    uint32_t sig = mtls->sig;
 
     while (1) {
         uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
@@ -265,7 +276,7 @@
         for (p.x = xStart; p.x < xEnd; p.x++) {
             p.in = xPtrIn;
             p.out = xPtrOut;
-            ((rs_t)mtls->script->mHal.info.root) (p.in, p.out, p.usr, p.x, 0, 0, 0);
+            dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
             xPtrIn += mtls->eStrideIn;
             xPtrOut += mtls->eStrideOut;
         }
@@ -286,6 +297,10 @@
     MTLaunchStruct mtls;
     memset(&mtls, 0, sizeof(mtls));
 
+    DrvScript *drv = (DrvScript *)s->mHal.drv;
+    // We only support slot 0 (root) at this point in time.
+    rsAssert(slot == 0);
+    mtls.sig = drv->mExportForEachSignatureList[slot];
     if (ain) {
         mtls.dimX = ain->getType()->getDimX();
         mtls.dimY = ain->getType()->getDimY();
@@ -369,6 +384,7 @@
         memset(&p, 0, sizeof(p));
         p.usr = mtls.usr;
         p.usr_len = mtls.usrLen;
+        uint32_t sig = mtls.sig;
 
         //LOGE("launch 3");
         for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
@@ -383,7 +399,7 @@
                     for (p.x = mtls.xStart; p.x < mtls.xEnd; p.x++) {
                         p.in = xPtrIn;
                         p.out = xPtrOut;
-                        ((rs_t)s->mHal.info.root) (p.in, p.out, p.usr, p.x, p.y, p.z, p.ar[0]);
+                        dc->mForEachLaunch[sig](&s->mHal.info.root, &p);
                         xPtrIn += mtls.eStrideIn;
                         xPtrOut += mtls.eStrideOut;
                     }
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index 38f6895..171d045 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -43,6 +43,7 @@
 
 static void Shutdown(Context *rsc);
 static void SetPriority(const Context *rsc, int32_t priority);
+static void initForEach(outer_foreach_t* forEachLaunch);
 
 static RsdHalFunctions FunctionTable = {
     rsdGLInit,
@@ -206,6 +207,8 @@
     rsdgThreadTLSKeyCount++;
     pthread_mutex_unlock(&rsdgInitMutex);
 
+    initForEach(dc->mForEachLaunch);
+
     dc->mTlsStruct.mContext = rsc;
     dc->mTlsStruct.mScript = NULL;
     int status = pthread_setspecific(rsdgThreadTLSKey, &dc->mTlsStruct);
@@ -287,4 +290,112 @@
 
 }
 
+static void rsdForEach17(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, uint32_t);
+    (*(fe*)vRoot)(p->in, p->y);
+}
+
+static void rsdForEach18(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(void *, uint32_t);
+    (*(fe*)vRoot)(p->out, p->y);
+}
+
+static void rsdForEach19(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, void *, uint32_t);
+    (*(fe*)vRoot)(p->in, p->out, p->y);
+}
+
+static void rsdForEach21(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, const void *, uint32_t);
+    (*(fe*)vRoot)(p->in, p->usr, p->y);
+}
+
+static void rsdForEach22(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(void *, const void *, uint32_t);
+    (*(fe*)vRoot)(p->out, p->usr, p->y);
+}
+
+static void rsdForEach23(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, void *, const void *, uint32_t);
+    (*(fe*)vRoot)(p->in, p->out, p->usr, p->y);
+}
+
+static void rsdForEach25(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, uint32_t, uint32_t);
+    (*(fe*)vRoot)(p->in, p->x, p->y);
+}
+
+static void rsdForEach26(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(void *, uint32_t, uint32_t);
+    (*(fe*)vRoot)(p->out, p->x, p->y);
+}
+
+static void rsdForEach27(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, void *, uint32_t, uint32_t);
+    (*(fe*)vRoot)(p->in, p->out, p->x, p->y);
+}
+
+static void rsdForEach29(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, const void *, uint32_t, uint32_t);
+    (*(fe*)vRoot)(p->in, p->usr, p->x, p->y);
+}
+
+static void rsdForEach30(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(void *, const void *, uint32_t, uint32_t);
+    (*(fe*)vRoot)(p->out, p->usr, p->x, p->y);
+}
+
+static void rsdForEach31(const void *vRoot,
+        const android::renderscript::RsForEachStubParamStruct *p) {
+    typedef void (*fe)(const void *, void *, const void *, uint32_t, uint32_t);
+    (*(fe*)vRoot)(p->in, p->out, p->usr, p->x, p->y);
+}
+
+
+static void initForEach(outer_foreach_t* forEachLaunch) {
+    rsAssert(forEachLaunch);
+    forEachLaunch[0x00] = NULL;
+    forEachLaunch[0x01] = rsdForEach31; // in
+    forEachLaunch[0x02] = rsdForEach30; //     out
+    forEachLaunch[0x03] = rsdForEach31; // in, out
+    forEachLaunch[0x04] = NULL;
+    forEachLaunch[0x05] = rsdForEach29;  // in,      usr
+    forEachLaunch[0x06] = rsdForEach30; //     out, usr
+    forEachLaunch[0x07] = rsdForEach31; // in, out, usr
+    forEachLaunch[0x08] = NULL;
+    forEachLaunch[0x09] = rsdForEach25; // in,           x
+    forEachLaunch[0x0a] = rsdForEach26; //     out,      x
+    forEachLaunch[0x0b] = rsdForEach27; // in, out,      x
+    forEachLaunch[0x0c] = NULL;
+    forEachLaunch[0x0d] = rsdForEach29; // in,      usr, x
+    forEachLaunch[0x0e] = rsdForEach30; //     out, usr, x
+    forEachLaunch[0x0f] = rsdForEach31; // in, out, usr, x
+    forEachLaunch[0x10] = NULL;
+    forEachLaunch[0x11] = rsdForEach17; // in               y
+    forEachLaunch[0x12] = rsdForEach18; //     out,         y
+    forEachLaunch[0x13] = rsdForEach19; // in, out,         y
+    forEachLaunch[0x14] = NULL;
+    forEachLaunch[0x15] = rsdForEach21; // in,      usr,    y
+    forEachLaunch[0x16] = rsdForEach22; //     out, usr,    y
+    forEachLaunch[0x17] = rsdForEach23; // in, out, usr,    y
+    forEachLaunch[0x18] = NULL;
+    forEachLaunch[0x19] = rsdForEach25; // in,           x, y
+    forEachLaunch[0x1a] = rsdForEach26; //     out,      x, y
+    forEachLaunch[0x1b] = rsdForEach27; // in, out,      x, y
+    forEachLaunch[0x1c] = NULL;
+    forEachLaunch[0x1d] = rsdForEach29; // in,      usr, x, y
+    forEachLaunch[0x1e] = rsdForEach30; //     out, usr, x, y
+    forEachLaunch[0x1f] = rsdForEach31; // in, out, usr, x, y
+}
 
diff --git a/driver/rsdCore.h b/driver/rsdCore.h
index f393b60..159b72a 100644
--- a/driver/rsdCore.h
+++ b/driver/rsdCore.h
@@ -27,6 +27,9 @@
 typedef void (* InvokeFunc_t)(void);
 typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
 
+typedef void (*outer_foreach_t)(const void *,
+    const android::renderscript::RsForEachStubParamStruct *);
+
 typedef struct RsdSymbolTableRec {
     const char * mName;
     void * mPtr;
@@ -57,6 +60,8 @@
     Workers mWorkers;
     bool mExit;
 
+    outer_foreach_t mForEachLaunch[32];
+
     ScriptTLSStruct mTlsStruct;
 
     RsdGL gl;