ForEach: Set internal linkage for kernel functions

The expanded function is the external visible interface. Setting internal
linkage will influence the cost function of the inliner. Specifically, the
inliner knowns that no external users will use the unexpanded kernel function,
which means it will inline it and remove the original function.

This gives a 22% speedup on the colorcube 3D, 19% on convolve3x3
as well as minor speedups on fisheye.

Change-Id: I9770c92d24dfc7978145e694d1a35829c5432264
diff --git a/lib/Renderscript/RSForEachExpand.cpp b/lib/Renderscript/RSForEachExpand.cpp
index 59c3725..93b5672 100644
--- a/lib/Renderscript/RSForEachExpand.cpp
+++ b/lib/Renderscript/RSForEachExpand.cpp
@@ -27,6 +27,7 @@
 #include <llvm/Pass.h>
 #include <llvm/Support/raw_ostream.h>
 #include <llvm/IR/DataLayout.h>
+#include <llvm/IR/Function.h>
 #include <llvm/IR/Type.h>
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
 
@@ -606,11 +607,15 @@
       const char *name = func_iter->first;
       uint32_t signature = func_iter->second;
       llvm::Function *kernel = M.getFunction(name);
-      if (kernel && isKernel(signature)) {
-        Changed |= ExpandKernel(kernel, signature);
-      }
-      else if (kernel && kernel->getReturnType()->isVoidTy()) {
-        Changed |= ExpandFunction(kernel, signature);
+      if (kernel) {
+        if (isKernel(signature))
+          Changed |= ExpandKernel(kernel, signature);
+        else if (kernel->getReturnType()->isVoidTy())
+          Changed |= ExpandFunction(kernel, signature);
+        else
+          llvm_unreachable("Unknown kernel type");
+
+        kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
       }
     }