Start implementing control side forEach.

Change-Id: I2d77d908cbb64b26071d9d5a3421f0b195342d2c
diff --git a/rs.spec b/rs.spec
index 998296b..6310cf6 100644
--- a/rs.spec
+++ b/rs.spec
@@ -188,7 +188,6 @@
 
 
 Allocation1DData {
-	handcodeApi
 	param RsAllocation va
 	param uint32_t xoff
 	param uint32_t lod
@@ -197,7 +196,6 @@
 	}
 
 Allocation1DElementData {
-	handcodeApi
 	param RsAllocation va
 	param uint32_t x
 	param uint32_t lod
@@ -290,12 +288,19 @@
 	}
 
 ScriptInvokeV {
-	handcodeApi
 	param RsScript s
 	param uint32_t slot
 	param const void * data
 	}
 
+ScriptForEach { 
+    param RsScript s
+    param uint32_t slot
+    param RsAllocation ain
+    param RsAllocation aout
+    param const void * usr
+}
+
 ScriptSetVarI {
 	param RsScript s
 	param uint32_t slot
@@ -327,7 +332,6 @@
 	}
 
 ScriptSetVarV {
-	handcodeApi
 	param RsScript s
 	param uint32_t slot
 	param const void * data
diff --git a/rsHandcode.h b/rsHandcode.h
index da51d95..e6b722c 100644
--- a/rsHandcode.h
+++ b/rsHandcode.h
@@ -7,90 +7,3 @@
     io->mToCore.commitSync(RS_CMD_ID_ContextFinish, size);
 }
 
-static inline void rsHCAPI_ScriptInvokeV (RsContext rsc, RsScript va, uint32_t slot, const void * data, size_t sizeBytes) {
-    ThreadIO *io = &((Context *)rsc)->mIO;
-    uint32_t size = sizeof(RS_CMD_ScriptInvokeV);
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        size += (sizeBytes + 3) & ~3;
-    }
-    RS_CMD_ScriptInvokeV *cmd = static_cast<RS_CMD_ScriptInvokeV *>(io->mToCore.reserve(size));
-    cmd->s = va;
-    cmd->slot = slot;
-    cmd->data_length = sizeBytes;
-    cmd->data = data;
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        cmd->data = (void *)(cmd+1);
-        memcpy(cmd+1, data, sizeBytes);
-        io->mToCore.commit(RS_CMD_ID_ScriptInvokeV, size);
-    } else {
-        io->mToCore.commitSync(RS_CMD_ID_ScriptInvokeV, size);
-    }
-}
-
-
-static inline void rsHCAPI_ScriptSetVarV (RsContext rsc, RsScript va, uint32_t slot, const void * data, size_t sizeBytes) {
-    ThreadIO *io = &((Context *)rsc)->mIO;
-    uint32_t size = sizeof(RS_CMD_ScriptSetVarV);
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        size += (sizeBytes + 3) & ~3;
-    }
-    RS_CMD_ScriptSetVarV *cmd = static_cast<RS_CMD_ScriptSetVarV *>(io->mToCore.reserve(size));
-    cmd->s = va;
-    cmd->slot = slot;
-    cmd->data_length = sizeBytes;
-    cmd->data = data;
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        cmd->data = (void *)(cmd+1);
-        memcpy(cmd+1, data, sizeBytes);
-        io->mToCore.commit(RS_CMD_ID_ScriptSetVarV, size);
-    } else {
-        io->mToCore.commitSync(RS_CMD_ID_ScriptSetVarV, size);
-    }
-}
-
-static inline void rsHCAPI_Allocation1DData (RsContext rsc, RsAllocation va, uint32_t xoff, uint32_t lod,
-                                             uint32_t count, const void * data, size_t sizeBytes) {
-    ThreadIO *io = &((Context *)rsc)->mIO;
-    uint32_t size = sizeof(RS_CMD_Allocation1DData);
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        size += (sizeBytes + 3) & ~3;
-    }
-    RS_CMD_Allocation1DData *cmd = static_cast<RS_CMD_Allocation1DData *>(io->mToCore.reserve(size));
-    cmd->va = va;
-    cmd->xoff = xoff;
-    cmd->lod = lod;
-    cmd->count = count;
-    cmd->data = data;
-    cmd->data_length = sizeBytes;
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        cmd->data = (void *)(cmd+1);
-        memcpy(cmd+1, data, sizeBytes);
-        io->mToCore.commit(RS_CMD_ID_Allocation1DData, size);
-    } else {
-        io->mToCore.commitSync(RS_CMD_ID_Allocation1DData, size);
-    }
-}
-
-static inline void rsHCAPI_Allocation1DElementData (RsContext rsc, RsAllocation va, uint32_t x, uint32_t lod,
-                                                    const void * data, size_t sizeBytes, uint32_t comp_offset) {
-    ThreadIO *io = &((Context *)rsc)->mIO;
-    uint32_t size = sizeof(RS_CMD_Allocation1DElementData);
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        size += (sizeBytes + 3) & ~3;
-    }
-    RS_CMD_Allocation1DElementData *cmd = static_cast<RS_CMD_Allocation1DElementData *>(io->mToCore.reserve(size));
-    cmd->va = va;
-    cmd->x = x;
-    cmd->lod = lod;
-    cmd->data = data;
-    cmd->comp_offset = comp_offset;
-    cmd->data_length = sizeBytes;
-    if (sizeBytes < DATA_SYNC_SIZE) {
-        cmd->data = (void *)(cmd+1);
-        memcpy(cmd+1, data, sizeBytes);
-        io->mToCore.commit(RS_CMD_ID_Allocation1DElementData, size);
-    } else {
-        io->mToCore.commitSync(RS_CMD_ID_Allocation1DElementData, size);
-    }
-}
-
diff --git a/rsScript.cpp b/rsScript.cpp
index b84014f..7641cab 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -87,6 +87,16 @@
     s->mEnviroment.mTimeZone = timeZone;
 }
 
+void rsi_ScriptForEach(Context *rsc, RsScript vs, uint32_t slot,
+                       RsAllocation vain, RsAllocation vaout,
+                       const void *params, uint32_t paramLen) {
+    Script *s = static_cast<Script *>(vs);
+    s->runForEach(rsc,
+                  static_cast<const Allocation *>(vain), static_cast<Allocation *>(vaout),
+                  params, paramLen);
+
+}
+
 void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot) {
     Script *s = static_cast<Script *>(vs);
     s->Invoke(rsc, slot, NULL, 0);
diff --git a/rsThreadIO.cpp b/rsThreadIO.cpp
index 6cf07de..6e959a7 100644
--- a/rsThreadIO.cpp
+++ b/rsThreadIO.cpp
@@ -58,7 +58,7 @@
             LOGE("playCoreCommands error con %p, cmd %i", con, cmdID);
             mToCore.printDebugData();
         }
-        gPlaybackFuncs[cmdID](con, data);
+        gPlaybackFuncs[cmdID](con, data, cmdSize << 2);
         mToCore.next();
     }
     return ret;
diff --git a/rsg_generator.c b/rsg_generator.c
index 14b380a..0059f19 100644
--- a/rsg_generator.c
+++ b/rsg_generator.c
@@ -53,6 +53,10 @@
             fprintf(f, "*");
         }
     }
+}
+
+void printVarTypeAndName(FILE *f, const VarType *vt) {
+    printVarType(f, vt);
 
     if (vt->name[0]) {
         fprintf(f, " %s", vt->name);
@@ -65,7 +69,7 @@
         if (ct || assumePrevious) {
             fprintf(f, ", ");
         }
-        printVarType(f, &api->params[ct]);
+        printVarTypeAndName(f, &api->params[ct]);
     }
 }
 
@@ -86,7 +90,7 @@
 
         for (ct2=0; ct2 < api->paramCount; ct2++) {
             fprintf(f, "    ");
-            printVarType(f, &api->params[ct2]);
+            printVarTypeAndName(f, &api->params[ct2]);
             fprintf(f, ";\n");
         }
         fprintf(f, "};\n\n");
@@ -94,7 +98,7 @@
 }
 
 void printFuncDecl(FILE *f, const ApiEntry *api, const char *prefix, int addContext) {
-    printVarType(f, &api->ret);
+    printVarTypeAndName(f, &api->ret);
     fprintf(f, " %s%s (", prefix, api->name);
     if (!api->nocontext) {
         if (addContext) {
@@ -127,6 +131,32 @@
     }
 }
 
+static int hasInlineDataPointers(const ApiEntry * api) {
+    int ret = 0;
+    int ct;
+    if (api->sync || api->ret.typeName[0]) {
+        return 0;
+    }
+    for (ct=0; ct < api->paramCount; ct++) {
+        const VarType *vt = &api->params[ct];
+
+        if (!vt->isConst && vt->ptrLevel) {
+            // Non-const pointers cannot be inlined.
+            return 0;
+        }
+        if (vt->ptrLevel > 1) {
+            // not handled yet.
+            return 0;
+        }
+
+        if (vt->isConst && vt->ptrLevel) {
+            // Non-const pointers cannot be inlined.
+            ret = 1;
+        }
+    }
+    return ret;
+}
+
 void printApiCpp(FILE *f) {
     int ct;
     int ct2;
@@ -161,28 +191,62 @@
             fprintf(f, ");\n");
         } else {
             fprintf(f, "    ThreadIO *io = &((Context *)rsc)->mIO;\n");
+            fprintf(f, "    uint32_t size = sizeof(RS_CMD_%s);\n", api->name);
+            if (hasInlineDataPointers(api)) {
+                fprintf(f, "    uint32_t dataSize = 0;\n");
+                for (ct2=0; ct2 < api->paramCount; ct2++) {
+                    const VarType *vt = &api->params[ct2];
+                    if (vt->isConst && vt->ptrLevel) {
+                        fprintf(f, "    dataSize += %s_length;\n", vt->name);
+                    }
+                }
+            }
+
             //fprintf(f, "    LOGE(\"add command %s\\n\");\n", api->name);
             fprintf(f, "    RS_CMD_%s *cmd = static_cast<RS_CMD_%s *>(io->mToCore.reserve(sizeof(RS_CMD_%s)));\n", api->name, api->name, api->name);
-            fprintf(f, "    uint32_t size = sizeof(RS_CMD_%s);\n", api->name);
+            if (hasInlineDataPointers(api)) {
+                fprintf(f, "    uint8_t *payload = (uint8_t *)&cmd[1];\n");
+            }
 
             for (ct2=0; ct2 < api->paramCount; ct2++) {
                 const VarType *vt = &api->params[ct2];
                 needFlush += vt->ptrLevel;
-                fprintf(f, "    cmd->%s = %s;\n", vt->name, vt->name);
+                if (vt->ptrLevel && hasInlineDataPointers(api)) {
+                    fprintf(f, "    if (dataSize < 1024) {\n");
+                    fprintf(f, "        memcpy(payload, %s, %s_length);\n", vt->name, vt->name);
+                    fprintf(f, "        cmd->%s = (", vt->name);
+                    printVarType(f, vt);
+                    fprintf(f, ")payload;\n");
+                    fprintf(f, "        payload += %s_length;\n", vt->name);
+                    fprintf(f, "    } else {\n");
+                    fprintf(f, "        cmd->%s = %s;\n", vt->name, vt->name);
+                    fprintf(f, "    }\n");
+
+                } else {
+                    fprintf(f, "    cmd->%s = %s;\n", vt->name, vt->name);
+                }
             }
             if (api->ret.typeName[0]) {
                 needFlush = 1;
             }
 
-            fprintf(f, "    io->mToCore.commit");
-            if (needFlush) {
-                fprintf(f, "Sync");
+            if (hasInlineDataPointers(api)) {
+                fprintf(f, "    if (dataSize < 1024) {\n");
+                fprintf(f, "        io->mToCore.commit(RS_CMD_ID_%s, size + dataSize);\n", api->name);
+                fprintf(f, "    } else {\n");
+                fprintf(f, "        io->mToCore.commitSync(RS_CMD_ID_%s, size);\n", api->name);
+                fprintf(f, "    }\n");
+            } else {
+                fprintf(f, "    io->mToCore.commit");
+                if (needFlush) {
+                    fprintf(f, "Sync");
+                }
+                fprintf(f, "(RS_CMD_ID_%s, size);\n", api->name);
             }
-            fprintf(f, "(RS_CMD_ID_%s, size);\n", api->name);
 
             if (api->ret.typeName[0]) {
                 fprintf(f, "    return reinterpret_cast<");
-                printVarType(f, &api->ret);
+                printVarTypeAndName(f, &api->ret);
                 fprintf(f, ">(io->mToCoreRet);\n");
             }
         }
@@ -212,11 +276,12 @@
             continue;
         }
 
-        fprintf(f, "void rsp_%s(Context *con, const void *vp)\n", api->name);
+        fprintf(f, "void rsp_%s(Context *con, const void *vp, size_t cmdSizeBytes)\n", api->name);
         fprintf(f, "{\n");
 
         //fprintf(f, "    LOGE(\"play command %s\\n\");\n", api->name);
         fprintf(f, "    const RS_CMD_%s *cmd = static_cast<const RS_CMD_%s *>(vp);\n", api->name, api->name);
+
         fprintf(f, "    ");
         if (api->ret.typeName[0]) {
             fprintf(f, "con->mIO.mToCoreRet = (intptr_t)");
@@ -246,6 +311,8 @@
     fprintf(f, "};\n");
 }
 
+void yylex();
+
 int main(int argc, char **argv) {
     if (argc != 3) {
         fprintf(stderr, "usage: %s commandFile outFile\n", argv[0]);
@@ -280,7 +347,7 @@
             printStructures(f);
             printFuncDecls(f, "rsi_", 1);
             printPlaybackFuncs(f, "rsp_");
-            fprintf(f, "\n\ntypedef void (*RsPlaybackFunc)(Context *, const void *);\n");
+            fprintf(f, "\n\ntypedef void (*RsPlaybackFunc)(Context *, const void *, size_t sizeBytes);\n");
             fprintf(f, "extern RsPlaybackFunc gPlaybackFuncs[%i];\n", apiCount + 1);
 
             fprintf(f, "}\n");