diff --git a/bcinfo/MetadataExtractor.cpp b/bcinfo/MetadataExtractor.cpp
index acc6915..4d868c8 100644
--- a/bcinfo/MetadataExtractor.cpp
+++ b/bcinfo/MetadataExtractor.cpp
@@ -60,6 +60,7 @@
 // synced with slang_rs_metadata.h)
 static const llvm::StringRef ObjectSlotMetadataName = "#rs_object_slots";
 
+static const llvm::StringRef ThreadableMetadataName = "#rs_is_threadable";
 
 MetadataExtractor::MetadataExtractor(const char *bitcode, size_t bitcodeSize)
     : mModule(nullptr), mBitcode(bitcode), mBitcodeSize(bitcodeSize),
@@ -68,7 +69,8 @@
       mExportForEachNameList(nullptr), mExportForEachSignatureList(nullptr),
       mExportForEachInputCountList(nullptr), mPragmaCount(0),
       mPragmaKeyList(nullptr), mPragmaValueList(nullptr), mObjectSlotCount(0),
-      mObjectSlotList(nullptr), mRSFloatPrecision(RS_FP_Full) {
+      mObjectSlotList(nullptr), mRSFloatPrecision(RS_FP_Full),
+      mIsThreadable(true) {
   BitcodeWrapper wrapper(bitcode, bitcodeSize);
   mCompilerVersion = wrapper.getCompilerVersion();
   mOptimizationLevel = wrapper.getOptimizationLevel();
@@ -82,7 +84,8 @@
       mExportForEachNameList(nullptr), mExportForEachSignatureList(nullptr),
       mExportForEachInputCountList(nullptr), mPragmaCount(0),
       mPragmaKeyList(nullptr), mPragmaValueList(nullptr), mObjectSlotCount(0),
-      mObjectSlotList(nullptr), mRSFloatPrecision(RS_FP_Full) {
+      mObjectSlotList(nullptr), mRSFloatPrecision(RS_FP_Full),
+      mIsThreadable(true) {
   mCompilerVersion = RS_VERSION;  // Default to the actual current version.
   mOptimizationLevel = 3;
 }
@@ -425,6 +428,36 @@
 }
 
 
+void MetadataExtractor::readThreadableFlag(
+    const llvm::NamedMDNode *ThreadableMetadata) {
+
+  // Scripts are threadable by default.  If we read a valid metadata value for
+  // 'ThreadableMetadataName' and it is set to 'no', we mark script as non
+  // threadable.  All other exception paths retain the default value.
+
+  mIsThreadable = true;
+  if (ThreadableMetadata == nullptr)
+    return;
+
+  llvm::MDNode *mdNode = ThreadableMetadata->getOperand(0);
+  if (mdNode == nullptr)
+    return;
+
+  llvm::Value *mdValue = mdNode->getOperand(0);
+  if (mdValue == nullptr)
+    return;
+
+  const char *value = createStringFromValue(mdValue);
+  if (value == nullptr)
+    return;
+
+  if (strcmp(value, "no") == 0)
+    mIsThreadable = false;
+  return;
+
+}
+
+
 bool MetadataExtractor::extract() {
   if (!(mBitcode && mBitcodeSize) && !mModule) {
     ALOGE("Invalid/empty bitcode/module");
@@ -463,6 +496,8 @@
       mModule->getNamedMetadata(PragmaMetadataName);
   const llvm::NamedMDNode *ObjectSlotMetadata =
       mModule->getNamedMetadata(ObjectSlotMetadataName);
+  const llvm::NamedMDNode *ThreadableMetadata =
+      mModule->getNamedMetadata(ThreadableMetadataName);
 
 
   if (!populateVarNameMetadata(ExportVarMetadata)) {
@@ -488,6 +523,8 @@
     return false;
   }
 
+  readThreadableFlag(ThreadableMetadata);
+
   return true;
 }
 
diff --git a/include/bcc/Renderscript/RSTransforms.h b/include/bcc/Renderscript/RSTransforms.h
index 9fe418b..647ac4d 100644
--- a/include/bcc/Renderscript/RSTransforms.h
+++ b/include/bcc/Renderscript/RSTransforms.h
@@ -32,9 +32,9 @@
 
 llvm::ModulePass * createRSEmbedInfoPass();
 
+llvm::ModulePass * createRSScreenFunctionsPass();
 
-llvm::ModulePass *
-createRSScreenFunctionsPass();
+llvm::ModulePass * createRSIsThreadablePass();
 
 } // end namespace bcc
 
diff --git a/include/bcinfo/MetadataExtractor.h b/include/bcinfo/MetadataExtractor.h
index 6d53a29..3648889 100644
--- a/include/bcinfo/MetadataExtractor.h
+++ b/include/bcinfo/MetadataExtractor.h
@@ -61,6 +61,9 @@
 
   enum RSFloatPrecision mRSFloatPrecision;
 
+  // Flag to mark that script is threadable.  True by default.
+  bool mIsThreadable;
+
   // Helper functions for extraction
   bool populateVarNameMetadata(const llvm::NamedMDNode *VarNameMetadata);
   bool populateFuncNameMetadata(const llvm::NamedMDNode *FuncNameMetadata);
@@ -68,6 +71,7 @@
                                const llvm::NamedMDNode *Signatures);
   bool populateObjectSlotMetadata(const llvm::NamedMDNode *ObjectSlotMetadata);
   void populatePragmaMetadata(const llvm::NamedMDNode *PragmaMetadata);
+  void readThreadableFlag(const llvm::NamedMDNode *ThreadableMetadata);
 
   uint32_t calculateNumInputs(const llvm::Function *Function,
                               uint32_t Signature);
@@ -269,6 +273,11 @@
   static bool hasForEachSignatureKernel(uint32_t sig) {
     return sig & 0x20;
   }
+
+  bool isThreadable() {
+    return mIsThreadable;
+  }
+
 };
 
 }  // namespace bcinfo
diff --git a/lib/Core/Compiler.cpp b/lib/Core/Compiler.cpp
index c88ecc3..a2ebab5 100644
--- a/lib/Core/Compiler.cpp
+++ b/lib/Core/Compiler.cpp
@@ -197,6 +197,14 @@
   // This has to come after LTO, since we don't want to examine functions that
   // are never actually called.
   passes.add(createRSScreenFunctionsPass());
+  passes.add(createRSIsThreadablePass());
+
+  // RSEmbedInfoPass needs to come after we have scanned for non-threadable
+  // functions.
+  // Script passed to RSCompiler must be a RSScript.
+  RSScript &script = static_cast<RSScript &>(pScript);
+  if (script.getEmbedInfo())
+    passes.add(createRSEmbedInfoPass());
 
   // Add passes to the pass manager to emit machine code through MC layer.
   if (mTarget->addPassesToEmitMC(passes, mc_context, pResult,
@@ -356,14 +364,9 @@
 }
 
 bool Compiler::addExpandForEachPass(Script &pScript, llvm::PassManager &pPM) {
-  // Script passed to RSCompiler must be a RSScript.
-  RSScript &script = static_cast<RSScript &>(pScript);
-
   // Expand ForEach on CPU path to reduce launch overhead.
   bool pEnableStepOpt = true;
   pPM.add(createRSForEachExpandPass(pEnableStepOpt));
-  if (script.getEmbedInfo())
-    pPM.add(createRSEmbedInfoPass());
 
   return true;
 }
diff --git a/lib/Renderscript/Android.mk b/lib/Renderscript/Android.mk
index 3280909..251b32c 100644
--- a/lib/Renderscript/Android.mk
+++ b/lib/Renderscript/Android.mk
@@ -32,6 +32,7 @@
   RSMetadata.cpp \
   RSScript.cpp \
   RSInvokeHelperPass.cpp \
+  RSIsThreadablePass.cpp \
   RSScreenFunctionsPass.cpp \
   RSStubsWhiteList.cpp \
   RSScriptGroupFusion.cpp
diff --git a/lib/Renderscript/RSEmbedInfo.cpp b/lib/Renderscript/RSEmbedInfo.cpp
index 0d7f360..0ae97a2 100644
--- a/lib/Renderscript/RSEmbedInfo.cpp
+++ b/lib/Renderscript/RSEmbedInfo.cpp
@@ -83,6 +83,7 @@
     const uint32_t *objectSlotList = me.getObjectSlotList();
     const char **pragmaKeyList = me.getPragmaKeyList();
     const char **pragmaValueList = me.getPragmaValueList();
+    bool isThreadable = me.isThreadable();
     size_t i;
 
     // We use a simple text format here that the compatibility library can
@@ -118,6 +119,7 @@
       s << pragmaKeyList[i] << " - "
         << pragmaValueList[i] << "\n";
     }
+    s << "isThreadable: " << ((isThreadable) ? "yes" : "no") << "\n";
 
     s.flush();
     return str;
diff --git a/lib/Renderscript/RSIsThreadablePass.cpp b/lib/Renderscript/RSIsThreadablePass.cpp
new file mode 100644
index 0000000..33de8fc
--- /dev/null
+++ b/lib/Renderscript/RSIsThreadablePass.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bcc/Renderscript/RSTransforms.h"
+#include "bcc/Support/Log.h"
+
+#include <cstdlib>
+
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Function.h>
+#include <llvm/Pass.h>
+
+namespace { // anonymous namespace
+
+// Create a Module pass that screens all the global functions in the module and
+// check if any non-threadable function is callable.  If so, we mark the
+// Module as non-threadable by adding a metadata flag '#rs_is_threadable'
+
+class RSIsThreadablePass : public llvm::ModulePass {
+private:
+  static char ID;
+
+  std::vector<std::string> nonThreadableFns = {
+    "_Z22rsgBindProgramFragment19rs_program_fragment",
+    "_Z19rsgBindProgramStore16rs_program_store",
+    "_Z20rsgBindProgramVertex17rs_program_vertex",
+    "_Z20rsgBindProgramRaster17rs_program_raster",
+    "_Z14rsgBindSampler19rs_program_fragmentj10rs_sampler",
+    "_Z14rsgBindTexture19rs_program_fragmentj13rs_allocation",
+    "_Z15rsgBindConstant19rs_program_fragmentj13rs_allocation",
+    "_Z15rsgBindConstant17rs_program_vertexj13rs_allocation",
+    "_Z36rsgProgramVertexLoadProjectionMatrixPK12rs_matrix4x4",
+    "_Z31rsgProgramVertexLoadModelMatrixPK12rs_matrix4x4",
+    "_Z33rsgProgramVertexLoadTextureMatrixPK12rs_matrix4x4",
+    "_Z35rsgProgramVertexGetProjectionMatrixP12rs_matrix4x4",
+    "_Z31rsgProgramFragmentConstantColor19rs_program_fragmentffff",
+    "_Z11rsgGetWidthv",
+    "_Z12rsgGetHeightv",
+    "_Z11rsgDrawRectfffff",
+    "_Z11rsgDrawQuadffffffffffff",
+    "_Z20rsgDrawQuadTexCoordsffffffffffffffffffff",
+    "_Z24rsgDrawSpriteScreenspacefffff",
+    "_Z11rsgDrawMesh7rs_mesh",
+    "_Z11rsgDrawMesh7rs_meshj",
+    "_Z11rsgDrawMesh7rs_meshjjj",
+    "_Z25rsgMeshComputeBoundingBox7rs_meshPfS0_S0_S0_S0_S0_",
+    "_Z11rsgDrawPath7rs_path",
+    "_Z13rsgClearColorffff",
+    "_Z13rsgClearDepthf",
+    "_Z11rsgDrawTextPKcii",
+    "_Z11rsgDrawText13rs_allocationii",
+    "_Z14rsgMeasureTextPKcPiS1_S1_S1_",
+    "_Z14rsgMeasureText13rs_allocationPiS0_S0_S0_",
+    "_Z11rsgBindFont7rs_font",
+    "_Z12rsgFontColorffff",
+    "_Z18rsgBindColorTarget13rs_allocationj",
+    "_Z18rsgBindDepthTarget13rs_allocation",
+    "_Z19rsgClearColorTargetj",
+    "_Z19rsgClearDepthTargetv",
+    "_Z24rsgClearAllRenderTargetsv",
+    "_Z7rsGetDtv",
+    "_Z5colorffff",
+    "_Z9rsgFinishv",
+  };
+
+  bool isPresent(std::vector<std::string> &list, std::string name) {
+    auto lower = std::lower_bound(list.begin(),
+                                  list.end(),
+                                  name);
+
+    if (lower != list.end() && name.compare(*lower) == 0)
+      return true;
+    return false;
+  }
+
+public:
+  RSIsThreadablePass()
+    : ModulePass (ID) {
+      std::sort(nonThreadableFns.begin(), nonThreadableFns.end());
+  }
+
+  virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  bool runOnModule(llvm::Module &M) override {
+    bool threadable = true;
+
+    auto &FunctionList(M.getFunctionList());
+    for (auto &F: FunctionList) {
+      if (isPresent(nonThreadableFns, F.getName().str())) {
+        threadable = false;
+        break;
+      }
+    }
+
+    llvm::LLVMContext &context = M.getContext();
+    llvm::MDString *val =
+      llvm::MDString::get(context, (threadable) ? "yes" : "no");
+    llvm::NamedMDNode *node =
+        M.getOrInsertNamedMetadata("#rs_is_threadable");
+    node->addOperand(llvm::MDNode::get(context, val));
+
+    return false;
+  }
+
+};
+
+}
+
+char RSIsThreadablePass::ID = 0;
+
+namespace bcc {
+
+llvm::ModulePass *
+createRSIsThreadablePass () {
+  return new RSIsThreadablePass();
+}
+
+}
