Support ForEachExpand on non-root functions.

BUG=6000538

Change-Id: I36e78ced0715b060af0938f1480df240cf6ba707
diff --git a/bcinfo/MetadataExtractor.cpp b/bcinfo/MetadataExtractor.cpp
index 477b7f2..4dfa6a0 100644
--- a/bcinfo/MetadataExtractor.cpp
+++ b/bcinfo/MetadataExtractor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011, The Android Open Source Project
+ * Copyright 2011-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,6 +41,11 @@
 // synced with slang_rs_metadata.h)
 static const llvm::StringRef ExportFuncMetadataName = "#rs_export_func";
 
+// Name of metadata node where exported ForEach name information resides
+// (should be synced with slang_rs_metadata.h)
+static const llvm::StringRef ExportForEachNameMetadataName =
+    "#rs_export_foreach_name";
+
 // Name of metadata node where exported ForEach signature information resides
 // (should be synced with slang_rs_metadata.h)
 static const llvm::StringRef ExportForEachMetadataName = "#rs_export_foreach";
@@ -53,25 +58,33 @@
 MetadataExtractor::MetadataExtractor(const char *bitcode, size_t bitcodeSize)
     : mBitcode(bitcode), mBitcodeSize(bitcodeSize), mExportVarCount(0),
       mExportFuncCount(0), mExportForEachSignatureCount(0),
-      mExportForEachSignatureList(NULL), mPragmaCount(0), mPragmaKeyList(NULL),
-      mPragmaValueList(NULL), mObjectSlotCount(0), mObjectSlotList(NULL) {
+      mExportForEachNameList(NULL), mExportForEachSignatureList(NULL),
+      mPragmaCount(0), mPragmaKeyList(NULL), mPragmaValueList(NULL),
+      mObjectSlotCount(0), mObjectSlotList(NULL) {
 }
 
 
 MetadataExtractor::~MetadataExtractor() {
+  if (mExportForEachNameList) {
+    for (size_t i = 0; i < mExportForEachSignatureCount; i++) {
+        delete [] mExportForEachNameList[i];
+        mExportForEachNameList[i] = NULL;
+    }
+  }
+  delete [] mExportForEachNameList;
+  mExportForEachNameList = NULL;
+
   delete [] mExportForEachSignatureList;
   mExportForEachSignatureList = NULL;
 
-  if (mPragmaCount > 0) {
-    for (size_t i = 0; i < mPragmaCount; i++) {
-      if (mPragmaKeyList) {
-        delete [] mPragmaKeyList[i];
-        mPragmaKeyList[i] = NULL;
-      }
-      if (mPragmaValueList) {
-        delete [] mPragmaValueList[i];
-        mPragmaValueList[i] = NULL;
-      }
+  for (size_t i = 0; i < mPragmaCount; i++) {
+    if (mPragmaKeyList) {
+      delete [] mPragmaKeyList[i];
+      mPragmaKeyList[i] = NULL;
+    }
+    if (mPragmaValueList) {
+      delete [] mPragmaValueList[i];
+      mPragmaValueList[i] = NULL;
     }
   }
   delete [] mPragmaKeyList;
@@ -168,19 +181,25 @@
 
 
 bool MetadataExtractor::populateForEachMetadata(
-    const llvm::NamedMDNode *ExportForEachMetadata) {
-  if (!ExportForEachMetadata) {
+    const llvm::NamedMDNode *Names,
+    const llvm::NamedMDNode *Signatures) {
+  if (!Names || !Signatures) {
     // Handle legacy case for pre-ICS bitcode that doesn't contain a metadata
     // section for ForEach. We generate a full signature for a "root" function
     // which means that we need to set the bottom 5 bits in the mask.
     mExportForEachSignatureCount = 1;
+    char **TmpNameList = new char*[mExportForEachSignatureCount];
+    TmpNameList[0] = new char[5];
+    strncpy(TmpNameList[0], "root", 5);
+
     uint32_t *TmpSigList = new uint32_t[mExportForEachSignatureCount];
     TmpSigList[0] = 0x1f;
+    mExportForEachNameList = (const char**)TmpNameList;
     mExportForEachSignatureList = TmpSigList;
     return true;
   }
 
-  mExportForEachSignatureCount = ExportForEachMetadata->getNumOperands();
+  mExportForEachSignatureCount = Signatures->getNumOperands();
   if (!mExportForEachSignatureCount) {
     return true;
   }
@@ -188,7 +207,7 @@
   uint32_t *TmpSigList = new uint32_t[mExportForEachSignatureCount];
 
   for (size_t i = 0; i < mExportForEachSignatureCount; i++) {
-    llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(i);
+    llvm::MDNode *SigNode = Signatures->getOperand(i);
     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
       llvm::Value *SigVal = SigNode->getOperand(0);
       if (SigVal->getValueID() == llvm::Value::MDStringVal) {
@@ -206,6 +225,16 @@
 
   mExportForEachSignatureList = TmpSigList;
 
+
+  mExportForEachNameList = new const char*[mExportForEachSignatureCount];
+
+  for (size_t i = 0; i < mExportForEachSignatureCount; i++) {
+    llvm::MDNode *Name = Names->getOperand(i);
+    if (Name != NULL && Name->getNumOperands() == 1) {
+      mExportForEachNameList[i] = createStringFromValue(Name->getOperand(0));
+    }
+  }
+
   return true;
 }
 
@@ -235,6 +264,8 @@
       module->getNamedMetadata(ExportVarMetadataName);
   const llvm::NamedMDNode *ExportFuncMetadata =
       module->getNamedMetadata(ExportFuncMetadataName);
+  const llvm::NamedMDNode *ExportForEachNameMetadata =
+      module->getNamedMetadata(ExportForEachNameMetadataName);
   const llvm::NamedMDNode *ExportForEachMetadata =
       module->getNamedMetadata(ExportForEachMetadataName);
   const llvm::NamedMDNode *PragmaMetadata =
@@ -250,7 +281,8 @@
     mExportFuncCount = ExportFuncMetadata->getNumOperands();
   }
 
-  if (!populateForEachMetadata(ExportForEachMetadata)) {
+  if (!populateForEachMetadata(ExportForEachNameMetadata,
+                               ExportForEachMetadata)) {
     ALOGE("Could not populate ForEach signature metadata");
     return false;
   }
diff --git a/bcinfo/tools/main.cpp b/bcinfo/tools/main.cpp
index f74c178..19d1ad3 100644
--- a/bcinfo/tools/main.cpp
+++ b/bcinfo/tools/main.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011, The Android Open Source Project
+ * Copyright 2011-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,9 +86,11 @@
 
   printf("exportForEachSignatureCount: %u\n",
          ME->getExportForEachSignatureCount());
+  const char **nameList = ME->getExportForEachNameList();
   const uint32_t *sigList = ME->getExportForEachSignatureList();
   for (size_t i = 0; i < ME->getExportForEachSignatureCount(); i++) {
-    printf("exportForEachSignatureList[%u]: %u\n", i, sigList[i]);
+    printf("exportForEachSignatureList[%u]: %s - %u\n", i, nameList[i],
+           sigList[i]);
   }
 
   printf("pragmaCount: %u\n", ME->getPragmaCount());
diff --git a/include/bcc/bcc.h b/include/bcc/bcc.h
index d4b6681..67a27d9 100644
--- a/include/bcc/bcc.h
+++ b/include/bcc/bcc.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2010 The Android Open Source Project
+ * Copyright (C) 2010-2012 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -137,6 +137,10 @@
                           size_t funcListSize,
                           void **funcList);
 
+void bccGetExportForEachList(BCCScriptRef script,
+                             size_t forEachListSize,
+                             void **forEachList);
+
 char const *bccGetBuildTime();
 
 char const *bccGetBuildRev();
diff --git a/include/bcc/bcc_cache.h b/include/bcc/bcc_cache.h
index 6d1941f..6809cce 100644
--- a/include/bcc/bcc_cache.h
+++ b/include/bcc/bcc_cache.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -77,6 +77,10 @@
   /* dirty hack for libRS */
   /* TODO: This should be removed in the future */
   uint32_t libRS_threadable;
+
+  /* export foreach list section */
+  off_t export_foreach_list_offset;
+  size_t export_foreach_list_size;
 };
 
 struct OBCC_String {
@@ -119,6 +123,11 @@
   void *cached_addr_list[];
 };
 
+struct OBCC_ExportForEachList {
+  size_t count;
+  void *cached_addr_list[];
+};
+
 struct OBCC_Pragma {
   size_t key_strp_index;
   size_t value_strp_index;
diff --git a/include/bcc/bcc_mccache.h b/include/bcc/bcc_mccache.h
index 2988afc..292165d 100644
--- a/include/bcc/bcc_mccache.h
+++ b/include/bcc/bcc_mccache.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -75,6 +75,10 @@
   /* dirty hack for libRS */
   /* TODO: This should be removed in the future */
   uint32_t libRS_threadable;
+
+  /* export foreach list section */
+  off_t export_foreach_name_list_offset;
+  size_t export_foreach_name_list_size;
 };
 
 
diff --git a/include/bcinfo/MetadataExtractor.h b/include/bcinfo/MetadataExtractor.h
index e904238..f15693c 100644
--- a/include/bcinfo/MetadataExtractor.h
+++ b/include/bcinfo/MetadataExtractor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2011, The Android Open Source Project
+ * Copyright 2011-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,6 +34,7 @@
   size_t mExportVarCount;
   size_t mExportFuncCount;
   size_t mExportForEachSignatureCount;
+  const char **mExportForEachNameList;
   const uint32_t *mExportForEachSignatureList;
 
   size_t mPragmaCount;
@@ -44,7 +45,8 @@
   const uint32_t *mObjectSlotList;
 
   // Helper functions for extraction
-  bool populateForEachMetadata(const llvm::NamedMDNode *ExportForEachMetadata);
+  bool populateForEachMetadata(const llvm::NamedMDNode *Names,
+                               const llvm::NamedMDNode *Signatures);
   bool populateObjectSlotMetadata(const llvm::NamedMDNode *ObjectSlotMetadata);
   void populatePragmaMetadata(const llvm::NamedMDNode *PragmaMetadata);
 
@@ -95,6 +97,13 @@
   }
 
   /**
+   * \return array of ForEach function names.
+   */
+  const char **getExportForEachNameList() const {
+    return mExportForEachNameList;
+  }
+
+  /**
    * \return number of pragmas contained in pragmaKeyList and pragmaValueList.
    */
   size_t getPragmaCount() const {
diff --git a/lib/ExecutionEngine/Compiler.cpp b/lib/ExecutionEngine/Compiler.cpp
index ed2130b..cee5daa 100644
--- a/lib/ExecutionEngine/Compiler.cpp
+++ b/lib/ExecutionEngine/Compiler.cpp
@@ -126,6 +126,16 @@
 // synced with slang_rs_metadata.h)
 const llvm::StringRef Compiler::ExportFuncMetadataName = "#rs_export_func";
 
+// Name of metadata node where exported ForEach name information resides
+// (should be synced with slang_rs_metadata.h)
+const llvm::StringRef Compiler::ExportForEachNameMetadataName =
+    "#rs_export_foreach_name";
+
+// Name of metadata node where exported ForEach signature information resides
+// (should be synced with slang_rs_metadata.h)
+const llvm::StringRef Compiler::ExportForEachMetadataName =
+    "#rs_export_foreach";
+
 // Name of metadata node where RS object slot info resides (should be
 // synced with slang_rs_metadata.h)
 const llvm::StringRef Compiler::ObjectSlotMetadataName = "#rs_object_slots";
@@ -150,7 +160,7 @@
   {
     std::string Err;
     llvm::Target const *Target = llvm::TargetRegistry::lookupTarget(Triple, Err);
-    if (Target == NULL) {
+    if (Target != NULL) {
       ArchType = llvm::Triple::getArchTypeForLLVMName(Target->getName());
     } else {
       ArchType = llvm::Triple::UnknownArch;
@@ -306,8 +316,14 @@
   llvm::NamedMDNode const *PragmaMetadata;
   llvm::NamedMDNode const *ExportVarMetadata;
   llvm::NamedMDNode const *ExportFuncMetadata;
+  llvm::NamedMDNode const *ExportForEachNameMetadata;
+  llvm::NamedMDNode const *ExportForEachMetadata;
   llvm::NamedMDNode const *ObjectSlotMetadata;
 
+  std::vector<std::string> ForEachNameList;
+  std::vector<std::string> ForEachExpandList;
+  std::vector<uint32_t> forEachSigList;
+
   if (mModule == NULL)  // No module was loaded
     return 0;
 
@@ -345,14 +361,56 @@
   // Load named metadata
   ExportVarMetadata = mModule->getNamedMetadata(ExportVarMetadataName);
   ExportFuncMetadata = mModule->getNamedMetadata(ExportFuncMetadataName);
+  ExportForEachNameMetadata =
+      mModule->getNamedMetadata(ExportForEachNameMetadataName);
+  ExportForEachMetadata =
+      mModule->getNamedMetadata(ExportForEachMetadataName);
   PragmaMetadata = mModule->getNamedMetadata(PragmaMetadataName);
   ObjectSlotMetadata = mModule->getNamedMetadata(ObjectSlotMetadataName);
 
-  runInternalPasses();
+  if (ExportForEachNameMetadata) {
+    for (int i = 0, e = ExportForEachNameMetadata->getNumOperands();
+         i != e;
+         i++) {
+      llvm::MDNode *ExportForEach = ExportForEachNameMetadata->getOperand(i);
+      if (ExportForEach != NULL && ExportForEach->getNumOperands() > 0) {
+        llvm::Value *ExportForEachNameMDS = ExportForEach->getOperand(0);
+        if (ExportForEachNameMDS->getValueID() == llvm::Value::MDStringVal) {
+          llvm::StringRef ExportForEachName =
+            static_cast<llvm::MDString*>(ExportForEachNameMDS)->getString();
+          ForEachNameList.push_back(ExportForEachName.str());
+          std::string ExpandName = ExportForEachName.str() + ".expand";
+          ForEachExpandList.push_back(ExpandName);
+        }
+      }
+    }
+  }
+
+  if (ExportForEachMetadata) {
+    for (int i = 0, e = ExportForEachMetadata->getNumOperands(); i != e; i++) {
+      llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(i);
+      if (SigNode != NULL && SigNode->getNumOperands() == 1) {
+        llvm::Value *SigVal = SigNode->getOperand(0);
+        if (SigVal->getValueID() == llvm::Value::MDStringVal) {
+          llvm::StringRef SigString =
+              static_cast<llvm::MDString*>(SigVal)->getString();
+          uint32_t Signature = 0;
+          if (SigString.getAsInteger(10, Signature)) {
+            ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
+            goto on_bcc_compile_error;
+          }
+          forEachSigList.push_back(Signature);
+        }
+      }
+    }
+  }
+
+  runInternalPasses(ForEachNameList, forEachSigList);
 
   // Perform link-time optimization if we have multiple modules
   if (mHasLinked) {
-    runLTO(new llvm::TargetData(*TD), ExportVarMetadata, ExportFuncMetadata);
+    runLTO(new llvm::TargetData(*TD), ExportVarMetadata, ExportFuncMetadata,
+           ForEachExpandList);
   }
 
   // Perform code generation
@@ -435,6 +493,29 @@
     }
   }
 
+  if (ExportForEachNameMetadata) {
+    ScriptCompiled::ExportForEachList &forEachList = mpResult->mExportForEach;
+    std::vector<std::string> &ForEachNameList = mpResult->mExportForEachName;
+
+    for (int i = 0, e = ExportForEachNameMetadata->getNumOperands();
+         i != e;
+         i++) {
+      llvm::MDNode *ExportForEach = ExportForEachNameMetadata->getOperand(i);
+      if (ExportForEach != NULL && ExportForEach->getNumOperands() > 0) {
+        llvm::Value *ExportForEachNameMDS = ExportForEach->getOperand(0);
+        if (ExportForEachNameMDS->getValueID() == llvm::Value::MDStringVal) {
+          llvm::StringRef ExportForEachName =
+            static_cast<llvm::MDString*>(ExportForEachNameMDS)->getString();
+          std::string Name = ExportForEachName.str() + ".expand";
+
+          forEachList.push_back(
+              rsloaderGetSymbolAddress(mRSExecutable, Name.c_str()));
+          ForEachNameList.push_back(Name);
+        }
+      }
+    }
+  }
+
 #if DEBUG_MCJIT_DISASSEMBLER
   {
     // Get MC codegen emitted function name list
@@ -685,11 +766,12 @@
 }
 #endif // USE_MCJIT
 
-int Compiler::runInternalPasses() {
+int Compiler::runInternalPasses(std::vector<std::string>& Names,
+                                std::vector<uint32_t>& Signatures) {
   llvm::PassManager BCCPasses;
 
   // Expand ForEach on CPU path to reduce launch overhead.
-  BCCPasses.add(createForEachExpandPass());
+  BCCPasses.add(createForEachExpandPass(Names, Signatures));
 
   BCCPasses.run(*mModule);
 
@@ -698,7 +780,8 @@
 
 int Compiler::runLTO(llvm::TargetData *TD,
                      llvm::NamedMDNode const *ExportVarMetadata,
-                     llvm::NamedMDNode const *ExportFuncMetadata) {
+                     llvm::NamedMDNode const *ExportFuncMetadata,
+                     std::vector<std::string>& ForEachExpandList) {
   llvm::PassManager LTOPasses;
 
   // Add TargetData to LTO passes
@@ -737,12 +820,15 @@
     }
   }
 
+  for (int i = 0, e = ForEachExpandList.size(); i != e; i++) {
+    ExportSymbols.push_back(ForEachExpandList[i].c_str());
+  }
+
   // TODO(logan): Remove this after we have finished the
   // bccMarkExternalSymbol API.
 
   // root(), init(), and .rs.dtor() are born to be exported
   ExportSymbols.push_back("root");
-  ExportSymbols.push_back("root.expand");
   ExportSymbols.push_back("init");
   ExportSymbols.push_back(".rs.dtor");
 
diff --git a/lib/ExecutionEngine/Compiler.h b/lib/ExecutionEngine/Compiler.h
index b99588a..8cc3d2e 100644
--- a/lib/ExecutionEngine/Compiler.h
+++ b/lib/ExecutionEngine/Compiler.h
@@ -80,6 +80,8 @@
     static const llvm::StringRef PragmaMetadataName;
     static const llvm::StringRef ExportVarMetadataName;
     static const llvm::StringRef ExportFuncMetadataName;
+    static const llvm::StringRef ExportForEachNameMetadataName;
+    static const llvm::StringRef ExportForEachMetadataName;
     static const llvm::StringRef ObjectSlotMetadataName;
 
     friend class CodeEmitter;
@@ -176,11 +178,13 @@
 #if USE_MCJIT
     static void *resolveSymbolAdapter(void *context, char const *name);
 #endif
-    int runInternalPasses();
+    int runInternalPasses(std::vector<std::string>& Names,
+                          std::vector<uint32_t>& Signatures);
 
     int runLTO(llvm::TargetData *TD,
                llvm::NamedMDNode const *ExportVarMetadata,
-               llvm::NamedMDNode const *ExportFuncMetadata);
+               llvm::NamedMDNode const *ExportFuncMetadata,
+               std::vector<std::string>& ForEachExpandList);
 
     bool hasError() const {
       return !mError.empty();
diff --git a/lib/ExecutionEngine/MCCacheReader.cpp b/lib/ExecutionEngine/MCCacheReader.cpp
index 4e84a73..57499b6 100644
--- a/lib/ExecutionEngine/MCCacheReader.cpp
+++ b/lib/ExecutionEngine/MCCacheReader.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,6 +60,7 @@
              && readObjFile()
              && readVarNameList()
              && readFuncNameList()
+             && readForEachNameList()
              //&& relocate()
              ;
 
@@ -383,6 +384,30 @@
   return true;
 }
 
+bool MCCacheReader::readForEachNameList() {
+  CACHE_READER_READ_SECTION(OBCC_String_Ptr, mpForEachNameList, export_foreach_name_list);
+  vector<char const *> const &strPool = mpResult->mStringPool;
+
+  mpResult->mpExportForEach = (OBCC_ExportForEachList*)
+                              malloc(sizeof(size_t) +
+                                     sizeof(void*) * export_foreach_name_list_raw->count);
+  if (!mpResult->mpExportForEach) {
+    ALOGE("Unable to allocate for mpExportForEach\n");
+    return false;
+  }
+  mpResult->mpExportForEach->count = export_foreach_name_list_raw->count;
+
+  for (size_t i = 0; i < export_foreach_name_list_raw->count; ++i) {
+    mpResult->mpExportForEach->cached_addr_list[i] =
+      rsloaderGetSymbolAddress(mpResult->mRSExecutable, strPool[export_foreach_name_list_raw->strp_indexs[i]]);
+#if DEBUG_MCJIT_REFLECT
+    ALOGE("Get foreach function address: %s -> %p",
+      strPool[export_foreach_name_list_raw->strp_indexs[i]], mpResult->mpExportForEach->cached_addr_list[i]);
+#endif
+  }
+  return true;
+}
+
 bool MCCacheReader::readPragmaList() {
   CACHE_READER_READ_SECTION(OBCC_PragmaList, mpPragmaList, pragma_list);
 
diff --git a/lib/ExecutionEngine/MCCacheReader.h b/lib/ExecutionEngine/MCCacheReader.h
index 7fcbe41..1e59111 100644
--- a/lib/ExecutionEngine/MCCacheReader.h
+++ b/lib/ExecutionEngine/MCCacheReader.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,6 +46,7 @@
 
     OBCC_String_Ptr *mpVarNameList;
     OBCC_String_Ptr *mpFuncNameList;
+    OBCC_String_Ptr *mpForEachNameList;
 
     llvm::OwningPtr<ScriptCached> mpResult;
 
@@ -61,7 +62,7 @@
     MCCacheReader()
       : mObjFile(NULL), mInfoFile(NULL), mInfoFileSize(0), mpHeader(NULL),
         mpCachedDependTable(NULL), mpPragmaList(NULL),
-        mpVarNameList(NULL), mpFuncNameList(NULL),
+        mpVarNameList(NULL), mpFuncNameList(NULL), mpForEachNameList(NULL),
         mIsContextSlotNotAvail(false) {
     }
 
@@ -97,6 +98,7 @@
 
     bool readVarNameList();
     bool readFuncNameList();
+    bool readForEachNameList();
 
     bool checkFileSize();
     bool checkHeader();
diff --git a/lib/ExecutionEngine/MCCacheWriter.cpp b/lib/ExecutionEngine/MCCacheWriter.cpp
index 38fcd3e..cca724e 100644
--- a/lib/ExecutionEngine/MCCacheWriter.cpp
+++ b/lib/ExecutionEngine/MCCacheWriter.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -63,6 +63,7 @@
              && preparePragmaList()
              && prepareExportVarNameList()
              && prepareExportFuncNameList()
+             && prepareExportForEachNameList()
              && prepareStringPool()
              && prepareObjectSlotList()
              && calcSectionOffset()
@@ -262,6 +263,30 @@
 }
 
 
+bool MCCacheWriter::prepareExportForEachNameList() {
+  size_t forEachCount = mpOwner->getExportForEachCount();
+  size_t listSize = sizeof(OBCC_String_Ptr) + sizeof(size_t) * forEachCount;
+
+  OBCC_String_Ptr *list = (OBCC_String_Ptr*)malloc(listSize);
+
+  if (!list) {
+    ALOGE("Unable to allocate for export forEach name list\n");
+    return false;
+  }
+
+  mpExportForEachNameListSection = list;
+  mpHeaderSection->export_foreach_name_list_size = listSize;
+
+  list->count = static_cast<size_t>(forEachCount);
+
+  mpOwner->getExportForEachNameList(forEachNameList);
+  for (size_t i = 0; i < forEachCount; ++i) {
+    list->strp_indexs[i] = addString(forEachNameList[i].c_str(), forEachNameList[i].length());
+  }
+  return true;
+}
+
+
 bool MCCacheWriter::prepareObjectSlotList() {
   size_t objectSlotCount = mpOwner->getObjectSlotCount();
 
@@ -308,6 +333,7 @@
   OFFSET_INCREASE(object_slot_list);
   OFFSET_INCREASE(export_var_name_list);
   OFFSET_INCREASE(export_func_name_list);
+  OFFSET_INCREASE(export_foreach_name_list);
 
 #undef OFFSET_INCREASE
 
@@ -345,6 +371,7 @@
 
   WRITE_SECTION_SIMPLE(export_var_name_list, mpExportVarNameListSection);
   WRITE_SECTION_SIMPLE(export_func_name_list, mpExportFuncNameListSection);
+  WRITE_SECTION_SIMPLE(export_foreach_name_list, mpExportForEachNameListSection);
 
 #undef WRITE_SECTION_SIMPLE
 #undef WRITE_SECTION
diff --git a/lib/ExecutionEngine/MCCacheWriter.h b/lib/ExecutionEngine/MCCacheWriter.h
index bd395d5..58f6ef5 100644
--- a/lib/ExecutionEngine/MCCacheWriter.h
+++ b/lib/ExecutionEngine/MCCacheWriter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,9 +48,11 @@
 
     OBCC_String_Ptr *mpExportVarNameListSection;
     OBCC_String_Ptr *mpExportFuncNameListSection;
+    OBCC_String_Ptr *mpExportForEachNameListSection;
 
     std::vector<std::string> varNameList;
     std::vector<std::string> funcNameList;
+    std::vector<std::string> forEachNameList;
 
   public:
     MCCacheWriter()
@@ -81,6 +83,7 @@
 
     bool prepareExportVarNameList();
     bool prepareExportFuncNameList();
+    bool prepareExportForEachNameList();
 
     bool writeAll();
 
diff --git a/lib/ExecutionEngine/Script.cpp b/lib/ExecutionEngine/Script.cpp
index adccedc..ddd9933 100644
--- a/lib/ExecutionEngine/Script.cpp
+++ b/lib/ExecutionEngine/Script.cpp
@@ -1,5 +1,5 @@
 /*
- * copyright 2010, the android open source project
+ * copyright 2010-2012, the android open source project
  *
  * licensed under the apache license, version 2.0 (the "license");
  * you may not use this file except in compliance with the license.
@@ -530,6 +530,25 @@
 }
 
 
+size_t Script::getExportForEachCount() const {
+  switch (mStatus) {
+    case ScriptStatus::Compiled: {
+      return mCompiled->getExportForEachCount();
+    }
+
+#if USE_CACHE
+    case ScriptStatus::Cached: {
+      return mCached->getExportForEachCount();
+    }
+#endif
+
+    default: {
+      return 0;
+    }
+  }
+}
+
+
 size_t Script::getPragmaCount() const {
   switch (mStatus) {
     case ScriptStatus::Compiled: {
@@ -652,6 +671,37 @@
   }
 }
 
+void Script::getExportForEachList(size_t funcListSize, void **funcList) {
+  switch (mStatus) {
+#define DELEGATE(STATUS) \
+    case ScriptStatus::STATUS:                                 \
+      m##STATUS->getExportForEachList(funcListSize, funcList); \
+      break;
+
+#if USE_CACHE
+    DELEGATE(Cached);
+#endif
+
+    DELEGATE(Compiled);
+#undef DELEGATE
+
+    default: {
+      mErrorCode = BCC_INVALID_OPERATION;
+    }
+  }
+}
+
+void Script::getExportForEachNameList(std::vector<std::string> &forEachList) {
+  switch (mStatus) {
+    case ScriptStatus::Compiled: {
+      return mCompiled->getExportForEachNameList(forEachList);
+    }
+
+    default: {
+      mErrorCode = BCC_INVALID_OPERATION;
+    }
+  }
+}
 
 void Script::getPragmaList(size_t pragmaListSize,
                            char const **keyList,
diff --git a/lib/ExecutionEngine/Script.h b/lib/ExecutionEngine/Script.h
index e9f7b09..27931e6 100644
--- a/lib/ExecutionEngine/Script.h
+++ b/lib/ExecutionEngine/Script.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -200,6 +200,8 @@
 
     size_t getExportFuncCount() const;
 
+    size_t getExportForEachCount() const;
+
     size_t getPragmaCount() const;
 
     size_t getFuncCount() const;
@@ -210,10 +212,14 @@
 
     void getExportFuncList(size_t size, void **list);
 
+    void getExportForEachList(size_t size, void **list);
+
     void getExportVarNameList(std::vector<std::string> &list);
 
     void getExportFuncNameList(std::vector<std::string> &list);
 
+    void getExportForEachNameList(std::vector<std::string> &list);
+
     void getPragmaList(size_t size,
                        char const **keyList,
                        char const **valueList);
diff --git a/lib/ExecutionEngine/ScriptCached.cpp b/lib/ExecutionEngine/ScriptCached.cpp
index a7d21f7..30fc3fd 100644
--- a/lib/ExecutionEngine/ScriptCached.cpp
+++ b/lib/ExecutionEngine/ScriptCached.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,6 +42,7 @@
   if (mpStringPoolRaw) { free(mpStringPoolRaw); }
   if (mpExportVars) { free(mpExportVars); }
   if (mpExportFuncs) { free(mpExportFuncs); }
+  if (mpExportForEach) { free(mpExportForEach); }
   if (mpObjectSlotList) { free(mpObjectSlotList); }
 }
 
@@ -72,6 +73,21 @@
 }
 
 
+void ScriptCached::getExportForEachList(size_t forEachListSize,
+                                        void **forEachList) {
+  if (forEachList) {
+    size_t forEachCount = getExportForEachCount();
+
+    if (forEachCount > forEachListSize) {
+      forEachCount = forEachListSize;
+    }
+
+    memcpy(forEachList, mpExportForEach->cached_addr_list,
+           sizeof(void *) * forEachCount);
+  }
+}
+
+
 void ScriptCached::getPragmaList(size_t pragmaListSize,
                                  char const **keyList,
                                  char const **valueList) {
diff --git a/lib/ExecutionEngine/ScriptCached.h b/lib/ExecutionEngine/ScriptCached.h
index a627e5c..67cf635 100644
--- a/lib/ExecutionEngine/ScriptCached.h
+++ b/lib/ExecutionEngine/ScriptCached.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,6 +61,7 @@
 
     OBCC_ExportVarList *mpExportVars;
     OBCC_ExportFuncList *mpExportFuncs;
+    OBCC_ExportForEachList *mpExportForEach;
     PragmaList mPragmas;
     OBCC_ObjectSlotList *mpObjectSlotList;
 
@@ -84,6 +85,7 @@
       : mpOwner(owner),
         mpExportVars(NULL),
         mpExportFuncs(NULL),
+        mpExportForEach(NULL),
         mpObjectSlotList(NULL),
 #if USE_OLD_JIT
         mContext(NULL),
@@ -105,6 +107,10 @@
       return mpExportFuncs->count;
     }
 
+    size_t getExportForEachCount() const {
+      return mpExportForEach->count;
+    }
+
     size_t getPragmaCount() const {
       return mPragmas.size();
     }
@@ -121,6 +127,8 @@
 
     void getExportFuncList(size_t funcListSize, void **funcList);
 
+    void getExportForEachList(size_t forEachListSize, void **forEachList);
+
     void getPragmaList(size_t pragmaListSize,
                        char const **keyList,
                        char const **valueList);
diff --git a/lib/ExecutionEngine/ScriptCompiled.cpp b/lib/ExecutionEngine/ScriptCompiled.cpp
index bb3c79a..47f2bb4 100644
--- a/lib/ExecutionEngine/ScriptCompiled.cpp
+++ b/lib/ExecutionEngine/ScriptCompiled.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,6 +67,11 @@
 }
 
 
+void ScriptCompiled::getExportForEachNameList(std::vector<std::string> &forEachList) {
+  forEachList = mExportForEachName;
+}
+
+
 void ScriptCompiled::getExportFuncList(size_t funcListSize, void **funcList) {
   if (funcList) {
     size_t funcCount = getExportFuncCount();
@@ -84,6 +89,24 @@
 }
 
 
+void ScriptCompiled::getExportForEachList(size_t forEachListSize,
+                                          void **forEachList) {
+  if (forEachList) {
+    size_t forEachCount = getExportForEachCount();
+
+    if (forEachCount > forEachListSize) {
+      forEachCount = forEachListSize;
+    }
+
+    for (ExportForEachList::const_iterator
+         I = mExportForEach.begin(), E = mExportForEach.end();
+         I != E && forEachCount > 0; ++I, --forEachCount) {
+      *forEachList++ = *I;
+    }
+  }
+}
+
+
 void ScriptCompiled::getPragmaList(size_t pragmaListSize,
                                    char const **keyList,
                                    char const **valueList) {
diff --git a/lib/ExecutionEngine/ScriptCompiled.h b/lib/ExecutionEngine/ScriptCompiled.h
index 5da8851..4498f1a 100644
--- a/lib/ExecutionEngine/ScriptCompiled.h
+++ b/lib/ExecutionEngine/ScriptCompiled.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,6 +43,7 @@
     typedef std::list<std::pair<std::string, std::string> > PragmaList;
     typedef std::list<void*> ExportVarList;
     typedef std::list<void*> ExportFuncList;
+    typedef std::list<void*> ExportForEachList;
     typedef std::map<std::string, FuncInfo *> FuncInfoMap;
     typedef std::list<uint32_t> ObjectSlotList;
 
@@ -55,8 +56,10 @@
 
     std::vector<std::string> mExportVarsName;
     std::vector<std::string> mExportFuncsName;
+    std::vector<std::string> mExportForEachName;
 
     ExportFuncList mExportFuncs;
+    ExportForEachList mExportForEach;
     PragmaList mPragmas;
     ObjectSlotList mObjectSlots;
 
@@ -104,6 +107,10 @@
       return mExportFuncs.size();
     }
 
+    size_t getExportForEachCount() const {
+      return mExportForEach.size();
+    }
+
     size_t getPragmaCount() const {
       return mPragmas.size();
     }
@@ -120,10 +127,14 @@
 
     void getExportFuncList(size_t funcListSize, void **funcList);
 
+    void getExportForEachList(size_t forEachListSize, void **forEachList);
+
     void getExportVarNameList(std::vector<std::string> &varList);
 
     void getExportFuncNameList(std::vector<std::string> &funcList);
 
+    void getExportForEachNameList(std::vector<std::string> &forEachList);
+
     void getPragmaList(size_t pragmaListSize,
                        char const **keyList,
                        char const **valueList);
diff --git a/lib/ExecutionEngine/bcc.cpp b/lib/ExecutionEngine/bcc.cpp
index 6f858fa..6a20105 100644
--- a/lib/ExecutionEngine/bcc.cpp
+++ b/lib/ExecutionEngine/bcc.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2010, The Android Open Source Project
+ * Copyright 2010-2012, The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -243,3 +243,27 @@
 #endif
   }
 }
+
+
+extern "C" void bccGetExportForEachList(BCCScriptRef script,
+                                        size_t forEachListSize,
+                                        void **forEachList) {
+  BCC_FUNC_LOGGER();
+
+  if (forEachList) {
+    unwrap(script)->getExportForEachList(forEachListSize, forEachList);
+
+#if DEBUG_BCC_REFLECT
+    size_t count = unwrap(script)->getExportForEachCount();
+    ALOGD("ExportForEachCount = %lu\n", (unsigned long)count);
+
+    if (count > forEachListSize) {
+      count = forEachListSize;
+    }
+
+    for (size_t i = 0; i < count; ++i) {
+      ALOGD("ExportForEachList[%lu] = %p\n", (unsigned long)i, forEachList[i]);
+    }
+#endif
+  }
+}
diff --git a/lib/Transforms/BCCTransforms.h b/lib/Transforms/BCCTransforms.h
index fcca235..1ac8174 100644
--- a/lib/Transforms/BCCTransforms.h
+++ b/lib/Transforms/BCCTransforms.h
@@ -18,6 +18,7 @@
 
 namespace bcc {
 
-llvm::ModulePass *createForEachExpandPass();
+llvm::ModulePass *createForEachExpandPass(std::vector<std::string>& Names,
+                                          std::vector<uint32_t>& Signatures);
 
 }  // namespace bcc
diff --git a/lib/Transforms/ForEachExpand.cpp b/lib/Transforms/ForEachExpand.cpp
index 52bc25c..53d6bdc 100644
--- a/lib/Transforms/ForEachExpand.cpp
+++ b/lib/Transforms/ForEachExpand.cpp
@@ -32,9 +32,9 @@
    * called via rsForEach() or "foreach_<NAME>". We create an inner loop for
    * the ForEach-able function to be invoked over the appropriate data cells
    * of the input/output allocations (adjusting other relevant parameters as
-   * we go). We currently only support doing this for compute "root" functions.
+   * we go). We support doing this for any ForEach-able compute kernels.
    * The new function name is the original function name followed by
-   * ".expanded". Note that we still generate code for the original function.
+   * ".expand". Note that we still generate code for the original function.
    */
   class ForEachExpandPass : public llvm::ModulePass {
   private:
@@ -43,7 +43,10 @@
   llvm::Module *M;
   llvm::LLVMContext *C;
 
-  uint32_t getSignature(llvm::Function *F) {
+  std::vector<std::string>& mNames;
+  std::vector<uint32_t>& mSignatures;
+
+  uint32_t getRootSignature(llvm::Function *F) {
     const llvm::NamedMDNode *ExportForEachMetadata =
         M->getNamedMetadata("#rs_export_foreach");
 
@@ -62,10 +65,10 @@
       return (1 << RootArgTys.size()) - 1;
     }
 
-    // We only handle the case for root() functions today, so this is
-    // hard-coded to look at only the first such function.
     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
 
+    // We only handle the case for legacy root() functions here, so this is
+    // hard-coded to look at only the first such function.
     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
       llvm::Value *SigVal = SigNode->getOperand(0);
@@ -105,22 +108,26 @@
   }
 
   public:
-  ForEachExpandPass()
-      : ModulePass(ID), M(NULL), C(NULL) {
+  ForEachExpandPass(std::vector<std::string>& Names,
+                    std::vector<uint32_t>& Signatures)
+      : ModulePass(ID), M(NULL), C(NULL), mNames(Names),
+        mSignatures(Signatures) {
   }
 
   /* Performs the actual optimization on a selected function. On success, the
    * Module will contain a new function of the name "<NAME>.expand" that
    * invokes <NAME>() in a loop with the appropriate parameters.
    */
-  bool ExpandFunction(llvm::Function *F) {
-    ALOGV("Expanding a ForEach-able Function");
+  bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
+    ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
 
-    uint32_t Signature = getSignature(F);
     if (!Signature) {
-      // We couldn't determine how to expand this function based on its
-      // function signature.
-      return false;
+      Signature = getRootSignature(F);
+      if (!Signature) {
+        // We couldn't determine how to expand this function based on its
+        // function signature.
+        return false;
+      }
     }
 
     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
@@ -210,7 +217,7 @@
     llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
     Builder.CreateStore(Arg_x1, AX);
 
-    // Collect and construct the arguments for root().
+    // Collect and construct the arguments for the kernel().
     // Note that we load any loop-invariant arguments before entering the Loop.
     llvm::Function::arg_iterator Args = F->arg_begin();
 
@@ -265,7 +272,7 @@
     // Loop:
     Builder.SetInsertPoint(Loop);
 
-    // Populate the actual call to root().
+    // Populate the actual call to kernel().
     llvm::SmallVector<llvm::Value*, 8> RootArgs;
 
     llvm::Value *In = NULL;
@@ -328,15 +335,19 @@
   }
 
   virtual bool runOnModule(llvm::Module &M) {
+    bool Changed = false;
     this->M = &M;
     C = &M.getContext();
 
-    llvm::Function *root = M.getFunction("root");
-    if (root && root->getReturnType()->isVoidTy()) {
-      return ExpandFunction(root);
+    bccAssert(mNames.size() == mSignatures.size());
+    for (int i = 0, e = mNames.size(); i != e; i++) {
+      llvm::Function *kernel = M.getFunction(mNames[i]);
+      if (kernel && kernel->getReturnType()->isVoidTy()) {
+        Changed |= ExpandFunction(kernel, mSignatures[i]);
+      }
     }
 
-    return false;
+    return Changed;
   }
 
   virtual const char *getPassName() const {
@@ -350,8 +361,9 @@
 
 namespace bcc {
 
-  llvm::ModulePass *createForEachExpandPass() {
-    return new ForEachExpandPass();
+  llvm::ModulePass *createForEachExpandPass(std::vector<std::string>& Names,
+                                            std::vector<uint32_t>& Signatures) {
+    return new ForEachExpandPass(Names, Signatures);
   }
 
 }  // namespace bcc