Skip linkloader, use shared object files

Bug: 18322681

- In rsCpuScript, if property rs.skip.linkloader is set, look for a .so
  file in the cache directory and load it.  If it is not available, use
  bcc to generate relocatable object file and link it to a .so using
  ld.mc.  Use the embedded symbols in .rs.info and follow steps similar
  to the compatibility library to invoke script functions or access
  script variables.
- Add rs* symbols like rsGetAllocation to libRSCpuRef (ala
  libRSSupport).  Do necessary changes to argument types to get mangled
  names correct.
- Make 64-bit version of rsSetObject take two pointers instead of a
  pointer and a large object.  rsIsObject takes a pointer instead of a
  large object.  Otherwise, we get failures in x86_64 due to calling
  convention mismatch.  To match the function names in the shared object
  path, define these functions as 'extern "C"' with their mangled names.
- Add stubbed Math functions from rsCpuRuntimeMath and
  rsCpuRuntimeMathFuncs into libRSCpuRef.so.
- Coalesce separate #ifdef paths in libRSCpuRef.  Function parameters
  for runtime callbacks and bcc plugin are needed in the
  non-compatibilty path, but take default NULL arguments.  This patch
  introduces these parameters into the compatibility path as well, and
  passes default NULL arguments.

Change-Id: I8a853350e39d30b4d852c30e4b5da5a75a2f2820
diff --git a/cpu_ref/Android.mk b/cpu_ref/Android.mk
index e267fe9..6599932 100644
--- a/cpu_ref/Android.mk
+++ b/cpu_ref/Android.mk
@@ -22,6 +22,7 @@
 	rsCpuCore.cpp \
 	rsCpuScript.cpp \
 	rsCpuRuntimeMath.cpp \
+	rsCpuRuntimeMathFuncs.cpp \
 	rsCpuRuntimeStubs.cpp \
 	rsCpuScriptGroup.cpp \
 	rsCpuScriptGroup2.cpp \
@@ -79,7 +80,7 @@
     rsCpuIntrinsics_x86.cpp
 endif
 
-LOCAL_SHARED_LIBRARIES += libRS libcutils libutils liblog libsync libc++
+LOCAL_SHARED_LIBRARIES += libRS libcutils libutils liblog libsync libc++ libdl
 
 # these are not supported in 64-bit yet
 LOCAL_SHARED_LIBRARIES += libbcc libbcinfo
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 84c2416..738eb84 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -64,11 +64,9 @@
 
 RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major,
         uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn
-#ifndef RS_COMPATIBILITY_LIB
         , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback,
         RSSelectRTCallback pSelectRTCallback,
         const char *pBccPluginName
-#endif
         ) {
 
     RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc);
@@ -80,13 +78,11 @@
         return nullptr;
     }
 
-#ifndef RS_COMPATIBILITY_LIB
     cpu->setLinkRuntimeCallback(pLinkRuntimeCallback);
     cpu->setSelectRTCallback(pSelectRTCallback);
     if (pBccPluginName) {
         cpu->setBccPluginName(pBccPluginName);
     }
-#endif
 
     return cpu;
 }
@@ -116,11 +112,9 @@
     memset(&mWorkers, 0, sizeof(mWorkers));
     memset(&mTlsStruct, 0, sizeof(mTlsStruct));
     mExit = false;
-#ifndef RS_COMPATIBILITY_LIB
     mLinkRuntimeCallback = nullptr;
     mSelectRTCallback = nullptr;
     mSetupCompilerCallback = nullptr;
-#endif
 }
 
 
@@ -587,9 +581,7 @@
 
     RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s);
     if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags
-#ifndef RS_COMPATIBILITY_LIB
         , getBccPluginName()
-#endif
         )) {
         delete i;
         return nullptr;
diff --git a/cpu_ref/rsCpuCore.h b/cpu_ref/rsCpuCore.h
index e069658..8060826 100644
--- a/cpu_ref/rsCpuCore.h
+++ b/cpu_ref/rsCpuCore.h
@@ -190,7 +190,6 @@
         return mScriptLookupFn(mRSC, s);
     }
 
-#ifndef RS_COMPATIBILITY_LIB
     void setLinkRuntimeCallback(
             bcc::RSLinkRuntimeCallback pLinkRuntimeCallback) {
         mLinkRuntimeCallback = pLinkRuntimeCallback;
@@ -220,7 +219,6 @@
     virtual const char *getBccPluginName() const {
         return mBccPluginName.c_str();
     }
-#endif
     virtual bool getInForEach() { return mInForEach; }
 
 protected:
@@ -248,12 +246,10 @@
 
     ScriptTLSStruct mTlsStruct;
 
-#ifndef RS_COMPATIBILITY_LIB
     bcc::RSLinkRuntimeCallback mLinkRuntimeCallback;
     RSSelectRTCallback mSelectRTCallback;
     RSSetupCompilerCallback mSetupCompilerCallback;
     std::string mBccPluginName;
-#endif
 };
 
 
diff --git a/cpu_ref/rsCpuRuntimeMath.cpp b/cpu_ref/rsCpuRuntimeMath.cpp
index 7dac921..55887fa 100644
--- a/cpu_ref/rsCpuRuntimeMath.cpp
+++ b/cpu_ref/rsCpuRuntimeMath.cpp
@@ -112,7 +112,6 @@
     return fmin(v - i, 0x1.fffffep-1f);
 }
 
-#ifdef RS_COMPATIBILITY_LIB
 EXPORT_F32_FN_F32(acosf)
 EXPORT_F32_FN_F32(acoshf)
 EXPORT_F32_FN_F32(asinf)
@@ -213,7 +212,6 @@
 void __attribute__((overloadable)) rsMatrixTranspose(rs_matrix2x2 *m) {
     SC_MatrixTranspose_2x2((Matrix2x2 *) m);
 }
-#endif
 
 //////////////////////////////////////////////////////////////////////////////
 // Class implementation
diff --git a/cpu_ref/rsCpuRuntimeMathFuncs.cpp b/cpu_ref/rsCpuRuntimeMathFuncs.cpp
index 6eb7063..0a16935 100644
--- a/cpu_ref/rsCpuRuntimeMathFuncs.cpp
+++ b/cpu_ref/rsCpuRuntimeMathFuncs.cpp
@@ -16,7 +16,6 @@
 
 // exports unavailable mathlib functions to compat lib
 
-#ifdef RS_COMPATIBILITY_LIB
 
 typedef unsigned int uint32_t;
 typedef int int32_t;
@@ -92,6 +91,7 @@
   return SC_randf2(min, max);
 }
 
+#ifdef RS_COMPATIBILITY_LIB
 
 // !!! DANGER !!!
 // These functions are potentially missing on older Android versions.
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index f38c898..462c161 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -18,16 +18,9 @@
 #include "rsCpuScript.h"
 
 #ifdef RS_COMPATIBILITY_LIB
-    #include <set>
-    #include <string>
-    #include <dlfcn.h>
     #include <stdio.h>
-    #include <stdlib.h>
-    #include <string.h>
     #include <sys/stat.h>
     #include <unistd.h>
-    #include <fstream>
-    #include <iostream>
 #else
     #include <bcc/BCCContext.h>
     #include <bcc/Config/Config.h>
@@ -45,8 +38,21 @@
     #include <vector>
 #endif
 
+#include <set>
+#include <string>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fstream>
+#include <iostream>
+
+#ifdef __LP64__
+#define SYSLIBPATH "/system/lib64"
+#else
+#define SYSLIBPATH "/system/lib"
+#endif
+
 namespace {
-#ifdef RS_COMPATIBILITY_LIB
 
 // Create a len length string containing random characters from [A-Za-z0-9].
 static std::string getRandomString(size_t len) {
@@ -106,6 +112,8 @@
     return 0;
 }
 
+#define RS_CACHE_DIR "com.android.renderscript.cache"
+
 // Attempt to load the shared library from origName, but then fall back to
 // creating a copy of the shared library if necessary (to ensure instancing).
 // This function returns the dlopen()-ed handle if successful.
@@ -136,7 +144,12 @@
     }
 
     std::string newName(cacheDir);
-    newName.append("/com.android.renderscript.cache/");
+
+    // Append RS_CACHE_DIR only if it is not found in cacheDir
+    // In driver mode, RS_CACHE_DIR is already appended to cacheDir.
+    if (newName.find(RS_CACHE_DIR) == std::string::npos) {
+        newName.append("/" RS_CACHE_DIR "/");
+    }
 
     if (!ensureCacheDirExists(newName.c_str())) {
         ALOGE("Could not verify or create cache dir: %s", cacheDir);
@@ -167,15 +180,12 @@
     return loaded;
 }
 
-// Load the shared library referred to by cacheDir and resName. If we have
-// already loaded this library, we instead create a new copy (in the
-// cache dir) and then load that. We then immediately destroy the copy.
-// This is required behavior to implement script instancing for the support
-// library, since shared objects are loaded and de-duped by name only.
-static void *loadSharedLibrary(const char *cacheDir, const char *resName) {
-    void *loaded = nullptr;
+static std::string findSharedObjectName(const char *cacheDir,
+                                        const char *resName) {
+
 #ifndef RS_SERVER
     std::string scriptSOName(cacheDir);
+#ifdef RS_COMPATIBILITY_LIB
     size_t cutPos = scriptSOName.rfind("cache");
     if (cutPos != std::string::npos) {
         scriptSOName.erase(cutPos);
@@ -184,11 +194,28 @@
     }
     scriptSOName.append("/lib/librs.");
 #else
+    scriptSOName.append("/librs.");
+#endif
+
+#else
     std::string scriptSOName("lib");
 #endif
     scriptSOName.append(resName);
     scriptSOName.append(".so");
 
+    return scriptSOName;
+}
+
+// Load the shared library referred to by cacheDir and resName. If we have
+// already loaded this library, we instead create a new copy (in the
+// cache dir) and then load that. We then immediately destroy the copy.
+// This is required behavior to implement script instancing for the support
+// library, since shared objects are loaded and de-duped by name only.
+static void *loadSharedLibrary(const char *cacheDir, const char *resName) {
+    void *loaded = nullptr;
+
+    std::string scriptSOName = findSharedObjectName(cacheDir, resName);
+
     // We should check if we can load the library from the standard app
     // location for shared libraries first.
     loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
@@ -216,7 +243,29 @@
     return loaded;
 }
 
-#else  // RS_COMPATIBILITY_LIB is not defined
+#ifndef RS_COMPATIBILITY_LIB
+
+static bool is_skip_linkloader() {
+    char buf[PROPERTY_VALUE_MAX];
+    static bool initialized = false;
+    static bool prop = false;
+
+    if (initialized) {
+        return prop;
+    }
+
+    property_get("rs.skip.linkloader", buf, "");
+    prop = (buf[0] != '\0');
+    initialized = true;
+
+    if (prop) {
+        ALOGV("Skipping linkloader");
+    }
+    else {
+        ALOGV("Default path: using linkloader");
+    }
+    return prop;
+}
 
 static bool is_force_recompile() {
 #ifdef RS_SERVER
@@ -277,6 +326,11 @@
         }
     }
 
+    if (is_skip_linkloader()) {
+        args->push_back("-fPIC");
+        args->push_back("-embedRSInfo");
+    }
+
     args->push_back(bcFileName.c_str());
     args->push_back(nullptr);
 }
@@ -335,13 +389,70 @@
     }
 }
 
+const static char *LD_EXE_PATH = "/system/bin/ld.mc";
+
+static bool createSharedLib(const char *cacheDir, const char *resName) {
+    std::string sharedLibName = findSharedObjectName(cacheDir, resName);
+    std::string objFileName = cacheDir;
+    objFileName.append("/");
+    objFileName.append(resName);
+    objFileName.append(".o");
+
+    const char *compiler_rt = SYSLIBPATH"/libcompiler_rt.so";
+    std::vector<const char *> args = {
+        LD_EXE_PATH,
+        "-shared",
+        "-nostdlib",
+        compiler_rt,
+        "-mtriple", DEFAULT_TARGET_TRIPLE_STRING,
+        "-L", SYSLIBPATH,
+        "-lRSDriver", "-lm", "-lc",
+        objFileName.c_str(),
+        "-o", sharedLibName.c_str(),
+        nullptr
+    };
+
+    std::string cmdLineStr = bcc::getCommandLine(args.size()-1, args.data());
+
+    pid_t pid = fork();
+
+    switch (pid) {
+    case -1: {  // Error occurred (we attempt no recovery)
+        ALOGE("Couldn't fork for linker (%s) execution", LD_EXE_PATH);
+        return false;
+    }
+    case 0: {  // Child process
+        ALOGV("Invoking ld.mc with args '%s'", cmdLineStr.c_str());
+        execv(LD_EXE_PATH, (char* const*) args.data());
+
+        ALOGE("execv() failed: %s", strerror(errno));
+        abort();
+        return false;
+    }
+    default: {  // Parent process (actual driver)
+        // Wait on child process to finish compiling the source.
+        int status = 0;
+        pid_t w = waitpid(pid, &status, 0);
+        if (w == -1) {
+            ALOGE("Could not wait for linker (%s)", LD_EXE_PATH);
+            return false;
+        }
+
+        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+            return true;
+        }
+
+        ALOGE("Linker (%s) terminated unexpectedly", LD_EXE_PATH);
+        return false;
+    }
+    }
+}
 #endif  // !defined(RS_COMPATIBILITY_LIB)
 }  // namespace
 
 namespace android {
 namespace renderscript {
 
-#ifdef RS_COMPATIBILITY_LIB
 #define MAXLINE 500
 #define MAKE_STR_HELPER(S) #S
 #define MAKE_STR(S) MAKE_STR_HELPER(S)
@@ -374,20 +485,20 @@
 
     return s;
 }
-#endif
 
 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
     mCtx = ctx;
     mScript = s;
 
-#ifdef RS_COMPATIBILITY_LIB
     mScriptSO = nullptr;
+
     mInvokeFunctions = nullptr;
     mForEachFunctions = nullptr;
     mFieldAddress = nullptr;
     mFieldIsObject = nullptr;
     mForEachSignatures = nullptr;
-#else
+
+#ifndef RS_COMPATIBILITY_LIB
     mCompilerDriver = nullptr;
     mExecutable = nullptr;
 #endif
@@ -404,6 +515,242 @@
     mIsThreadable = true;
 }
 
+bool RsdCpuScriptImpl::storeRSInfoFromSO() {
+    char line[MAXLINE];
+    size_t varCount = 0;
+    size_t funcCount = 0;
+    size_t forEachCount = 0;
+    size_t objectSlotCount = 0;
+
+    mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
+    if (mRoot) {
+        //ALOGE("Found root(): %p", mRoot);
+    }
+    mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
+    if (mRootExpand) {
+        //ALOGE("Found root.expand(): %p", mRootExpand);
+    }
+    mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
+    if (mInit) {
+        //ALOGE("Found init(): %p", mInit);
+    }
+    mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
+    if (mFreeChildren) {
+        //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
+    }
+
+    const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info");
+    if (rsInfo) {
+        //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo);
+    }
+
+    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+        goto error;
+    }
+    if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
+        ALOGE("Invalid export var count!: %s", line);
+        goto error;
+    }
+
+    mExportedVariableCount = varCount;
+    //ALOGE("varCount: %zu", varCount);
+    if (varCount > 0) {
+        // Start by creating/zeroing this member, since we don't want to
+        // accidentally clean up invalid pointers later (if we error out).
+        mFieldIsObject = new bool[varCount];
+        if (mFieldIsObject == nullptr) {
+            goto error;
+        }
+        memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject));
+        mFieldAddress = new void*[varCount];
+        if (mFieldAddress == nullptr) {
+            goto error;
+        }
+        for (size_t i = 0; i < varCount; ++i) {
+            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+                goto error;
+            }
+            char *c = strrchr(line, '\n');
+            if (c) {
+                *c = '\0';
+            }
+            mFieldAddress[i] = dlsym(mScriptSO, line);
+            if (mFieldAddress[i] == nullptr) {
+                ALOGE("Failed to find variable address for %s: %s",
+                      line, dlerror());
+                // Not a critical error if we don't find a global variable.
+            }
+            else {
+                //ALOGE("Found variable %s at %p", line,
+                //mFieldAddress[i]);
+            }
+        }
+    }
+
+    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+        goto error;
+    }
+    if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
+        ALOGE("Invalid export func count!: %s", line);
+        goto error;
+    }
+
+    mExportedFunctionCount = funcCount;
+    //ALOGE("funcCount: %zu", funcCount);
+
+    if (funcCount > 0) {
+        mInvokeFunctions = new InvokeFunc_t[funcCount];
+        if (mInvokeFunctions == nullptr) {
+            goto error;
+        }
+        for (size_t i = 0; i < funcCount; ++i) {
+            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+                goto error;
+            }
+            char *c = strrchr(line, '\n');
+            if (c) {
+                *c = '\0';
+            }
+
+            mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line);
+            if (mInvokeFunctions[i] == nullptr) {
+                ALOGE("Failed to get function address for %s(): %s",
+                      line, dlerror());
+                goto error;
+            }
+            else {
+                //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]);
+            }
+        }
+    }
+
+    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+        goto error;
+    }
+    if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
+        ALOGE("Invalid export forEach count!: %s", line);
+        goto error;
+    }
+
+    if (forEachCount > 0) {
+
+        mForEachSignatures = new uint32_t[forEachCount];
+        if (mForEachSignatures == nullptr) {
+            goto error;
+        }
+        mForEachFunctions = new ForEachFunc_t[forEachCount];
+        if (mForEachFunctions == nullptr) {
+            goto error;
+        }
+        for (size_t i = 0; i < forEachCount; ++i) {
+            unsigned int tmpSig = 0;
+            char tmpName[MAXLINE];
+
+            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+                goto error;
+            }
+            if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
+                       &tmpSig, tmpName) != 2) {
+                ALOGE("Invalid export forEach!: %s", line);
+                goto error;
+            }
+
+            // Lookup the expanded ForEach kernel.
+            strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
+            mForEachSignatures[i] = tmpSig;
+            mForEachFunctions[i] =
+                    (ForEachFunc_t) dlsym(mScriptSO, tmpName);
+            if (i != 0 && mForEachFunctions[i] == nullptr) {
+                // Ignore missing root.expand functions.
+                // root() is always specified at location 0.
+                ALOGE("Failed to find forEach function address for %s: %s",
+                      tmpName, dlerror());
+                goto error;
+            }
+            else {
+                //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]);
+            }
+        }
+    }
+
+    if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+        goto error;
+    }
+    if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
+        ALOGE("Invalid object slot count!: %s", line);
+        goto error;
+    }
+
+    if (objectSlotCount > 0) {
+        rsAssert(varCount > 0);
+        for (size_t i = 0; i < objectSlotCount; ++i) {
+            uint32_t varNum = 0;
+            if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
+                goto error;
+            }
+            if (sscanf(line, "%u", &varNum) != 1) {
+                ALOGE("Invalid object slot!: %s", line);
+                goto error;
+            }
+
+            if (varNum < varCount) {
+                mFieldIsObject[varNum] = true;
+            }
+        }
+    }
+
+    if (varCount > 0) {
+        mBoundAllocs = new Allocation *[varCount];
+        memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
+    }
+
+    if (mScriptSO == (void*)1) {
+        //rsdLookupRuntimeStub(script, "acos");
+    }
+
+    return true;
+
+error:
+    delete[] mInvokeFunctions;
+    delete[] mForEachFunctions;
+    delete[] mFieldAddress;
+    delete[] mFieldIsObject;
+    delete[] mForEachSignatures;
+    delete[] mBoundAllocs;
+
+    return false;
+}
+
+#ifndef RS_COMPATIBILITY_LIB
+bool RsdCpuScriptImpl::storeRSInfoFromObj(bcinfo::MetadataExtractor &bitcodeMetadata) {
+
+    mExecutable->setThreadable(mIsThreadable);
+    if (!mExecutable->syncInfo()) {
+        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
+    }
+
+    mRoot = reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root"));
+    mRootExpand =
+        reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root.expand"));
+    mInit = reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress("init"));
+    mFreeChildren =
+        reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress(".rs.dtor"));
+
+
+    if (bitcodeMetadata.getExportVarCount()) {
+        mBoundAllocs = new Allocation *[bitcodeMetadata.getExportVarCount()];
+        memset(mBoundAllocs, 0, sizeof(void *) * bitcodeMetadata.getExportVarCount());
+    }
+
+    for (size_t i = 0; i < bitcodeMetadata.getExportForEachSignatureCount(); i++) {
+        char* name = new char[strlen(bitcodeMetadata.getExportForEachNameList()[i]) + 1];
+        mExportedForEachFuncList.push_back(
+                    std::make_pair(name, bitcodeMetadata.getExportForEachSignatureList()[i]));
+    }
+
+    return true;
+}
+#endif
 
 bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
                             uint8_t const *bitcode, size_t bitcodeSize,
@@ -464,7 +811,12 @@
     std::string compileCommandLine =
                 bcc::getCommandLine(compileArguments.size() - 1, compileArguments.data());
 
-    if (!is_force_recompile()) {
+    if (is_skip_linkloader()) {
+        if (!is_force_recompile()) {
+            mScriptSO = loadSharedLibrary(cacheDir, resName);
+        }
+    }
+    else if (!is_force_recompile()) {
         // Load the compiled script that's in the cache, if any.
         mExecutable = bcc::RSCompilerDriver::loadScript(cacheDir, resName, (const char*)bitcode,
                                                         bitcodeSize, compileCommandLine.c_str(),
@@ -473,7 +825,31 @@
 
     // If we can't, it's either not there or out of date.  We compile the bit code and try loading
     // again.
-    if (mExecutable == nullptr) {
+    if (is_skip_linkloader()) {
+        if (mScriptSO == nullptr) {
+            if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
+                                compileArguments.data(), compileCommandLine))
+            {
+                ALOGE("bcc: FAILS to compile '%s'", resName);
+                mCtx->unlockMutex();
+                return false;
+            }
+
+            if (!createSharedLib(cacheDir, resName)) {
+                ALOGE("Linker: Failed to link object file '%s'", resName);
+                mCtx->unlockMutex();
+                return false;
+            }
+
+            mScriptSO = loadSharedLibrary(cacheDir, resName);
+            if (mScriptSO == nullptr) {
+                ALOGE("Unable to load '%s'", resName);
+                mCtx->unlockMutex();
+                return false;
+            }
+        }
+    }
+    else if (mExecutable == nullptr) {
         if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize, compileArguments.data(),
                             compileCommandLine)) {
             ALOGE("bcc: FAILS to compile '%s'", resName);
@@ -490,257 +866,46 @@
         }
     }
 
-    mExecutable->setThreadable(mIsThreadable);
-    if (!mExecutable->syncInfo()) {
-        ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
+    // if using the shared object path, read RS symbol information
+    // from the .so.  Otherwise, read from the object files
+    if (!is_skip_linkloader()) {
+        storeRSInfoFromObj(bitcodeMetadata);
     }
+    else {
+        if ( !mScriptSO) {
+            goto error;
+        }
 
-    mRoot = reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root"));
-    mRootExpand =
-        reinterpret_cast<int (*)()>(mExecutable->getSymbolAddress("root.expand"));
-    mInit = reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress("init"));
-    mFreeChildren =
-        reinterpret_cast<void (*)()>(mExecutable->getSymbolAddress(".rs.dtor"));
-
-
-    if (bitcodeMetadata.getExportVarCount()) {
-        mBoundAllocs = new Allocation *[bitcodeMetadata.getExportVarCount()];
-        memset(mBoundAllocs, 0, sizeof(void *) * bitcodeMetadata.getExportVarCount());
+        if ( !storeRSInfoFromSO()) {
+          goto error;
+        }
     }
-
-    for (size_t i = 0; i < bitcodeMetadata.getExportForEachSignatureCount(); i++) {
-        char* name = new char[strlen(bitcodeMetadata.getExportForEachNameList()[i]) + 1];
-        mExportedForEachFuncList.push_back(
-                    std::make_pair(name, bitcodeMetadata.getExportForEachSignatureList()[i]));
-    }
-
 #else  // RS_COMPATIBILITY_LIB is defined
 
     mScriptSO = loadSharedLibrary(cacheDir, resName);
 
-    if (mScriptSO) {
-        char line[MAXLINE];
-        mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
-        if (mRoot) {
-            //ALOGE("Found root(): %p", mRoot);
-        }
-        mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
-        if (mRootExpand) {
-            //ALOGE("Found root.expand(): %p", mRootExpand);
-        }
-        mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
-        if (mInit) {
-            //ALOGE("Found init(): %p", mInit);
-        }
-        mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
-        if (mFreeChildren) {
-            //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
-        }
+    if (!mScriptSO) {
+        goto error;
+    }
 
-        const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info");
-        if (rsInfo) {
-            //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo);
-        }
-
-        size_t varCount = 0;
-        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-            goto error;
-        }
-        if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
-            ALOGE("Invalid export var count!: %s", line);
-            goto error;
-        }
-
-        mExportedVariableCount = varCount;
-        //ALOGE("varCount: %zu", varCount);
-        if (varCount > 0) {
-            // Start by creating/zeroing this member, since we don't want to
-            // accidentally clean up invalid pointers later (if we error out).
-            mFieldIsObject = new bool[varCount];
-            if (mFieldIsObject == nullptr) {
-                goto error;
-            }
-            memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject));
-            mFieldAddress = new void*[varCount];
-            if (mFieldAddress == nullptr) {
-                goto error;
-            }
-            for (size_t i = 0; i < varCount; ++i) {
-                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-                    goto error;
-                }
-                char *c = strrchr(line, '\n');
-                if (c) {
-                    *c = '\0';
-                }
-                mFieldAddress[i] = dlsym(mScriptSO, line);
-                if (mFieldAddress[i] == nullptr) {
-                    ALOGE("Failed to find variable address for %s: %s",
-                          line, dlerror());
-                    // Not a critical error if we don't find a global variable.
-                }
-                else {
-                    //ALOGE("Found variable %s at %p", line,
-                    //mFieldAddress[i]);
-                }
-            }
-        }
-
-        size_t funcCount = 0;
-        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-            goto error;
-        }
-        if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
-            ALOGE("Invalid export func count!: %s", line);
-            goto error;
-        }
-
-        mExportedFunctionCount = funcCount;
-        //ALOGE("funcCount: %zu", funcCount);
-
-        if (funcCount > 0) {
-            mInvokeFunctions = new InvokeFunc_t[funcCount];
-            if (mInvokeFunctions == nullptr) {
-                goto error;
-            }
-            for (size_t i = 0; i < funcCount; ++i) {
-                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-                    goto error;
-                }
-                char *c = strrchr(line, '\n');
-                if (c) {
-                    *c = '\0';
-                }
-
-                mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line);
-                if (mInvokeFunctions[i] == nullptr) {
-                    ALOGE("Failed to get function address for %s(): %s",
-                          line, dlerror());
-                    goto error;
-                }
-                else {
-                    //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]);
-                }
-            }
-        }
-
-        size_t forEachCount = 0;
-        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-            goto error;
-        }
-        if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
-            ALOGE("Invalid export forEach count!: %s", line);
-            goto error;
-        }
-
-        if (forEachCount > 0) {
-
-            mForEachSignatures = new uint32_t[forEachCount];
-            if (mForEachSignatures == nullptr) {
-                goto error;
-            }
-            mForEachFunctions = new ForEachFunc_t[forEachCount];
-            if (mForEachFunctions == nullptr) {
-                goto error;
-            }
-            for (size_t i = 0; i < forEachCount; ++i) {
-                unsigned int tmpSig = 0;
-                char tmpName[MAXLINE];
-
-                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-                    goto error;
-                }
-                if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
-                           &tmpSig, tmpName) != 2) {
-                    ALOGE("Invalid export forEach!: %s", line);
-                    goto error;
-                }
-
-                // Lookup the expanded ForEach kernel.
-                strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
-                mForEachSignatures[i] = tmpSig;
-                mForEachFunctions[i] =
-                        (ForEachFunc_t) dlsym(mScriptSO, tmpName);
-                if (i != 0 && mForEachFunctions[i] == nullptr) {
-                    // Ignore missing root.expand functions.
-                    // root() is always specified at location 0.
-                    ALOGE("Failed to find forEach function address for %s: %s",
-                          tmpName, dlerror());
-                    goto error;
-                }
-                else {
-                    //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]);
-                }
-            }
-        }
-
-        size_t objectSlotCount = 0;
-        if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-            goto error;
-        }
-        if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
-            ALOGE("Invalid object slot count!: %s", line);
-            goto error;
-        }
-
-        if (objectSlotCount > 0) {
-            rsAssert(varCount > 0);
-            for (size_t i = 0; i < objectSlotCount; ++i) {
-                uint32_t varNum = 0;
-                if (strgets(line, MAXLINE, &rsInfo) == nullptr) {
-                    goto error;
-                }
-                if (sscanf(line, "%u", &varNum) != 1) {
-                    ALOGE("Invalid object slot!: %s", line);
-                    goto error;
-                }
-
-                if (varNum < varCount) {
-                    mFieldIsObject[varNum] = true;
-                }
-            }
-        }
-
-        if (varCount > 0) {
-            mBoundAllocs = new Allocation *[varCount];
-            memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
-        }
-
-        if (mScriptSO == (void*)1) {
-            //rsdLookupRuntimeStub(script, "acos");
-        }
-    } else {
+    if (!storeRSInfoFromSO()) {
         goto error;
     }
 #endif
     mCtx->unlockMutex();
     return true;
 
-#ifdef RS_COMPATIBILITY_LIB
 error:
 
     mCtx->unlockMutex();
-    delete[] mInvokeFunctions;
-    delete[] mForEachFunctions;
-    delete[] mFieldAddress;
-    delete[] mFieldIsObject;
-    delete[] mForEachSignatures;
-    delete[] mBoundAllocs;
     if (mScriptSO) {
         dlclose(mScriptSO);
     }
     return false;
-#endif
 }
 
 #ifndef RS_COMPATIBILITY_LIB
 
-#ifdef __LP64__
-#define SYSLIBPATH "/system/lib64"
-#else
-#define SYSLIBPATH "/system/lib"
-#endif
-
 const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
                                           size_t bitcodeSize) {
     const char* defaultLib = SYSLIBPATH"/libclcore.bc";
@@ -781,19 +946,36 @@
 void RsdCpuScriptImpl::populateScript(Script *script) {
 #ifndef RS_COMPATIBILITY_LIB
     // Copy info over to runtime
-    script->mHal.info.exportedFunctionCount = mExecutable->getExportFuncAddrs().size();
-    script->mHal.info.exportedVariableCount = mExecutable->getExportVarAddrs().size();
-    script->mHal.info.exportedForeachFuncList = &mExportedForEachFuncList[0];
-    script->mHal.info.exportedPragmaCount = mExecutable->getPragmaKeys().size();
-    script->mHal.info.exportedPragmaKeyList =
-        const_cast<const char**>(&mExecutable->getPragmaKeys().front());
-    script->mHal.info.exportedPragmaValueList =
-        const_cast<const char**>(&mExecutable->getPragmaValues().front());
+    if (!is_skip_linkloader()) {
+        script->mHal.info.exportedFunctionCount = mExecutable->getExportFuncAddrs().size();
+        script->mHal.info.exportedVariableCount = mExecutable->getExportVarAddrs().size();
+        script->mHal.info.exportedForeachFuncList = &mExportedForEachFuncList[0];
+        script->mHal.info.exportedPragmaCount = mExecutable->getPragmaKeys().size();
+        script->mHal.info.exportedPragmaKeyList =
+            const_cast<const char**>(&mExecutable->getPragmaKeys().front());
+        script->mHal.info.exportedPragmaValueList =
+            const_cast<const char**>(&mExecutable->getPragmaValues().front());
 
-    if (mRootExpand) {
-        script->mHal.info.root = mRootExpand;
-    } else {
-        script->mHal.info.root = mRoot;
+        if (mRootExpand) {
+            script->mHal.info.root = mRootExpand;
+        } else {
+            script->mHal.info.root = mRoot;
+        }
+    }
+    else {
+        // Copy info over to runtime
+        script->mHal.info.exportedFunctionCount = mExportedFunctionCount;
+        script->mHal.info.exportedVariableCount = mExportedVariableCount;
+        script->mHal.info.exportedPragmaCount = 0;
+        script->mHal.info.exportedPragmaKeyList = 0;
+        script->mHal.info.exportedPragmaValueList = 0;
+
+        // Bug, need to stash in metadata
+        if (mRootExpand) {
+            script->mHal.info.root = mRootExpand;
+        } else {
+            script->mHal.info.root = mRoot;
+        }
     }
 #else
     // Copy info over to runtime
@@ -997,11 +1179,18 @@
     mtls->script = this;
     mtls->fep.slot = slot;
 #ifndef RS_COMPATIBILITY_LIB
-    rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
-    mtls->kernel = reinterpret_cast<ForEachFunc_t>(
-                      mExecutable->getExportForeachFuncAddrs()[slot]);
-    rsAssert(mtls->kernel != nullptr);
-    mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+    if (!is_skip_linkloader()) {
+        rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
+        mtls->kernel = reinterpret_cast<ForEachFunc_t>(
+                          mExecutable->getExportForeachFuncAddrs()[slot]);
+        rsAssert(mtls->kernel != nullptr);
+        mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+    }
+    else {
+        mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]);
+        rsAssert(mtls->kernel != nullptr);
+        mtls->sig = mForEachSignatures[slot];
+    }
 #else
     mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]);
     rsAssert(mtls->kernel != nullptr);
@@ -1030,7 +1219,7 @@
 
 void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
                                       size_t paramLength) {
-    //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
+    //ALOGE("invoke %i %p %zu", slot, params, paramLength);
     void * ap = nullptr;
 
 #if defined(__x86_64__)
@@ -1048,15 +1237,19 @@
 #endif
 
     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
-    reinterpret_cast<void (*)(const void *, uint32_t)>(
 #ifndef RS_COMPATIBILITY_LIB
-        mExecutable->getExportFuncAddrs()[slot])(ap ? (const void *)ap : params, paramLength);
+    if (! is_skip_linkloader()) {
+        reinterpret_cast<void (*)(const void *, uint32_t)>(
+            mExecutable->getExportFuncAddrs()[slot])(
+                ap? (const void *) ap : params, paramLength);
+    }
+    else {
+        reinterpret_cast<void (*)(const void *, uint32_t)>(
+            mInvokeFunctions[slot])(ap? (const void *) ap: params, paramLength);
+    }
 #else
-        mInvokeFunctions[slot])(ap ? (const void *)ap : params, paramLength);
-#endif
-
-#if defined(__x86_64__)
-    if (ap) free(ap);
+    reinterpret_cast<void (*)(const void *, uint32_t)>(
+        mInvokeFunctions[slot])(ap? (const void *) ap: params, paramLength);
 #endif
 
     mCtx->setTLS(oldTLS);
@@ -1064,7 +1257,7 @@
 
 void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
     //rsAssert(!script->mFieldIsObject[slot]);
-    //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
+    //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
 
     //if (mIntrinsicID) {
         //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
@@ -1072,8 +1265,14 @@
     //}
 
 #ifndef RS_COMPATIBILITY_LIB
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-                          mExecutable->getExportVarAddrs()[slot]);
+    int32_t *destPtr = nullptr;
+    if (!is_skip_linkloader()) {
+        destPtr = reinterpret_cast<int32_t *>(
+                               mExecutable->getExportVarAddrs()[slot]);
+    }
+    else {
+        destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+    }
 #else
     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
 #endif
@@ -1087,11 +1286,17 @@
 
 void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
     //rsAssert(!script->mFieldIsObject[slot]);
-    //ALOGE("getGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
+    //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
 
 #ifndef RS_COMPATIBILITY_LIB
-    int32_t *srcPtr = reinterpret_cast<int32_t *>(
-                          mExecutable->getExportVarAddrs()[slot]);
+    int32_t *srcPtr = nullptr;
+    if (!is_skip_linkloader()) {
+        srcPtr = reinterpret_cast<int32_t *>(
+                              mExecutable->getExportVarAddrs()[slot]);
+    }
+    else {
+        srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+    }
 #else
     int32_t *srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
 #endif
@@ -1108,8 +1313,14 @@
                                                 const uint32_t *dims, size_t dimLength) {
 
 #ifndef RS_COMPATIBILITY_LIB
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-        mExecutable->getExportVarAddrs()[slot]);
+    int32_t *destPtr = nullptr;
+    if (!is_skip_linkloader()) {
+        destPtr = reinterpret_cast<int32_t *>(
+                               mExecutable->getExportVarAddrs()[slot]);
+    }
+    else {
+        destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+    }
 #else
     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
 #endif
@@ -1147,11 +1358,17 @@
 void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
 
     //rsAssert(!script->mFieldIsObject[slot]);
-    //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
+    //ALOGE("setGlobalBind %i %p", slot, data);
 
 #ifndef RS_COMPATIBILITY_LIB
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-                          mExecutable->getExportVarAddrs()[slot]);
+    int32_t *destPtr = nullptr;
+    if (!is_skip_linkloader()) {
+        destPtr = reinterpret_cast<int32_t *>(
+                               mExecutable->getExportVarAddrs()[slot]);
+    }
+    else {
+        destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+    }
 #else
     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
 #endif
@@ -1162,7 +1379,7 @@
 
     void *ptr = nullptr;
     mBoundAllocs[slot] = data;
-    if(data) {
+    if (data) {
         ptr = data->mHal.drvState.lod[0].mallocPtr;
     }
     memcpy(destPtr, &ptr, sizeof(void *));
@@ -1171,11 +1388,17 @@
 void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
 
     //rsAssert(script->mFieldIsObject[slot]);
-    //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
+    //ALOGE("setGlobalObj %i %p", slot, data);
 
 #ifndef RS_COMPATIBILITY_LIB
-    int32_t *destPtr = reinterpret_cast<int32_t *>(
-                          mExecutable->getExportVarAddrs()[slot]);
+    int32_t *destPtr = nullptr;
+    if (!is_skip_linkloader()) {
+        destPtr = reinterpret_cast<int32_t *>(
+                               mExecutable->getExportVarAddrs()[slot]);
+    }
+    else {
+        destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+    }
 #else
     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
 #endif
@@ -1230,6 +1453,27 @@
     for (size_t i = 0; i < mExportedForEachFuncList.size(); i++) {
         delete[] mExportedForEachFuncList[i].first;
     }
+
+    if (mFieldIsObject) {
+        for (size_t i = 0; i < mExportedVariableCount; ++i) {
+            if (mFieldIsObject[i]) {
+                if (mFieldAddress[i] != nullptr) {
+                    rs_object_base *obj_addr =
+                        reinterpret_cast<rs_object_base *>(mFieldAddress[i]);
+                    rsrClearObject(mCtx->getContext(), obj_addr);
+                }
+            }
+        }
+    }
+
+    if (is_skip_linkloader()) {
+        if (mInvokeFunctions) delete[] mInvokeFunctions;
+        if (mForEachFunctions) delete[] mForEachFunctions;
+        if (mFieldAddress) delete[] mFieldAddress;
+        if (mFieldIsObject) delete[] mFieldIsObject;
+        if (mForEachSignatures) delete[] mForEachSignatures;
+    }
+
 #else
     if (mFieldIsObject) {
         for (size_t i = 0; i < mExportedVariableCount; ++i) {
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index a8a808b..324ee14 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -50,10 +50,11 @@
         const RsExpandKernelParams *,
         uint32_t x1, uint32_t x2,
         uint32_t outstep);
-#ifdef RS_COMPATIBILITY_LIB
+
     typedef void (* InvokeFunc_t)(void);
     typedef void (* ForEachFunc_t)(void);
     typedef int (* RootFunc_t)(void);
+#ifdef RS_COMPATIBILITY_LIB
     typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
 #endif
 
@@ -107,23 +108,34 @@
     static void * lookupRuntimeStub(void* pContext, char const* name);
 
     virtual Allocation * getAllocationForPointer(const void *ptr) const;
+    bool storeRSInfoFromSO();
 
 #ifndef RS_COMPATIBILITY_LIB
+    bool storeRSInfoFromObj(bcinfo::MetadataExtractor &bitcodeMetadata);
     virtual  void * getRSExecutable() { return mExecutable; }
 #endif
 
 protected:
     RsdCpuReferenceImpl *mCtx;
     const Script *mScript;
+    void *mScriptSO;
 
 #ifndef RS_COMPATIBILITY_LIB
     // Returns the path to the core library we'll use.
     const char* findCoreLib(const bcinfo::MetadataExtractor& bitCodeMetaData, const char* bitcode,
                             size_t bitcodeSize);
-    int (*mRoot)();
-    int (*mRootExpand)();
-    void (*mInit)();
-    void (*mFreeChildren)();
+    RootFunc_t mRoot;
+    RootFunc_t mRootExpand;
+    InvokeFunc_t mInit;
+    InvokeFunc_t mFreeChildren;
+
+    InvokeFunc_t *mInvokeFunctions;
+    ForEachFunc_t *mForEachFunctions;
+    void **mFieldAddress;
+    bool *mFieldIsObject;
+    uint32_t *mForEachSignatures;
+    size_t mExportedVariableCount;
+    size_t mExportedFunctionCount;
 
     std::vector<std::pair<const char *, uint32_t> > mExportedForEachFuncList;
 
@@ -133,7 +145,6 @@
     bcc::SymbolResolverProxy mResolver;
     bcc::RSExecutable *mExecutable;
 #else
-    void *mScriptSO;
     RootFunc_t mRoot;
     RootFunc_t mRootExpand;
     InvokeFunc_t mInit;
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index d886cef..d00425c 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -19,7 +19,6 @@
 
 #include "rsAllocation.h"
 
-#ifndef RS_COMPATIBILITY_LIB
 namespace llvm {
 
 class Module;
@@ -38,7 +37,6 @@
 typedef const char* (*RSSelectRTCallback) (const char*, size_t);
 
 typedef void (*RSSetupCompilerCallback) (bcc::RSCompilerDriver *);
-#endif
 
 namespace android {
 namespace renderscript {
@@ -123,11 +121,9 @@
 
     static RsdCpuReference * create(Context *c, uint32_t version_major,
                                     uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn
-#ifndef RS_COMPATIBILITY_LIB
                                     , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback = nullptr,
                                     RSSelectRTCallback pSelectRTCallback = nullptr,
                                     const char *pBccPluginName = nullptr
-#endif
                                     );
     virtual ~RsdCpuReference();
     virtual void setPriority(int32_t priority) = 0;
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
index 7568d12..b3ce2d4 100644
--- a/driver/rsdRuntimeStubs.cpp
+++ b/driver/rsdRuntimeStubs.cpp
@@ -72,7 +72,6 @@
 typedef uint64_t ulong;
 #endif
 
-#ifdef RS_COMPATIBILITY_LIB
 #ifndef __LP64__
 #define OPAQUETYPE(t) \
     typedef struct { const int* const p; } __attribute__((packed, aligned(4))) t;
@@ -104,7 +103,6 @@
     int tm_yday;    ///< day of the year
     int tm_isdst;   ///< daylight savings time
 } rs_tm;
-#endif
 
 //////////////////////////////////////////////////////////////////////////////
 // Allocation
@@ -123,22 +121,22 @@
 
 #ifndef RS_COMPATIBILITY_LIB
 
-static void SC_AllocationCopy1DRange(android::renderscript::rs_allocation dstAlloc,
+static void SC_AllocationCopy1DRange(::rs_allocation dstAlloc,
                                      uint32_t dstOff,
                                      uint32_t dstMip,
                                      uint32_t count,
-                                     android::renderscript::rs_allocation srcAlloc,
+                                     ::rs_allocation srcAlloc,
                                      uint32_t srcOff, uint32_t srcMip) {
     Context *rsc = RsdCpuReference::getTlsContext();
     rsrAllocationCopy1DRange(rsc, (Allocation*)dstAlloc.p, dstOff, dstMip, count,
                              (Allocation*)srcAlloc.p, srcOff, srcMip);
 }
 
-static void SC_AllocationCopy2DRange(android::renderscript::rs_allocation dstAlloc,
+static void SC_AllocationCopy2DRange(::rs_allocation dstAlloc,
                                      uint32_t dstXoff, uint32_t dstYoff,
                                      uint32_t dstMip, uint32_t dstFace,
                                      uint32_t width, uint32_t height,
-                                     android::renderscript::rs_allocation srcAlloc,
+                                     ::rs_allocation srcAlloc,
                                      uint32_t srcXoff, uint32_t srcYoff,
                                      uint32_t srcMip, uint32_t srcFace) {
     Context *rsc = RsdCpuReference::getTlsContext();
@@ -149,13 +147,13 @@
                              srcXoff, srcYoff, srcMip, srcFace);
 }
 
-static void SC_AllocationIoSend(android::renderscript::rs_allocation alloc) {
+static void SC_AllocationIoSend(::rs_allocation alloc) {
     Context *rsc = RsdCpuReference::getTlsContext();
     rsrAllocationIoSend(rsc, (Allocation*)alloc.p);
 }
 
 
-static void SC_AllocationIoReceive(android::renderscript::rs_allocation alloc) {
+static void SC_AllocationIoReceive(::rs_allocation alloc) {
     Context *rsc = RsdCpuReference::getTlsContext();
     rsrAllocationIoReceive(rsc, (Allocation*)alloc.p);
 }
@@ -188,15 +186,15 @@
                              srcXoff, srcYoff, srcMip, srcFace);
 }
 
-static void SC_AllocationIoSend(Allocation* alloc) {
+static void SC_AllocationIoSend(::rs_allocation alloc) {
     Context *rsc = RsdCpuReference::getTlsContext();
-    rsrAllocationIoSend(rsc, alloc);
+    rsrAllocationIoSend(rsc, (Allocation *) alloc.p);
 }
 
 
-static void SC_AllocationIoReceive(Allocation* alloc) {
+static void SC_AllocationIoReceive(::rs_allocation alloc) {
     Context *rsc = RsdCpuReference::getTlsContext();
-    rsrAllocationIoReceive(rsc, alloc);
+    rsrAllocationIoReceive(rsc, (Allocation *) alloc.p);
 }
 
 #endif
@@ -481,7 +479,7 @@
     Context *rsc = RsdCpuReference::getTlsContext();
     rsrClearObject(rsc, dst);
 }
-#ifndef RS_COMPATIBILITY_LIB
+
 static void SC_SetObject(rs_object_base *dst, rs_object_base  src) {
     //    ALOGE("SC_SetObject: dst = %p, src = %p", dst, src.p);
     //    ALOGE("SC_SetObject: dst[0] = %p", dst[0]);
@@ -494,20 +492,6 @@
     return rsrIsObject(rsc, o);
 }
 
-#else
-static void SC_SetObject(rs_object_base *dst, ObjectBase*  src) {
-    //    ALOGE("SC_SetObject: dst = %p, src = %p", dst, src.p);
-    //    ALOGE("SC_SetObject: dst[0] = %p", dst[0]);
-    Context *rsc = RsdCpuReference::getTlsContext();
-    rsrSetObject(rsc, dst, src);
-}
-
-static bool SC_IsObject(ObjectBase* o) {
-    Context *rsc = RsdCpuReference::getTlsContext();
-    return rsrIsObject(rsc, o);
-}
-#endif
-
 #ifdef __LP64__
 static void SC_SetObject_ByRef(rs_object_base *dst, rs_object_base *src) {
     //    ALOGE("SC_SetObject2: dst = %p, src = %p", dst, src->p);
@@ -559,15 +543,11 @@
 #endif
 #endif
 
-#ifndef RS_COMPATIBILITY_LIB
+
 #ifndef __LP64__
-static void SC_ForEach_SAA(android::renderscript::rs_script target,
-                            android::renderscript::rs_allocation in,
-                            android::renderscript::rs_allocation out) {
-    Context *rsc = RsdCpuReference::getTlsContext();
-    rsrForEach(rsc, (Script*)target.p, (Allocation*)in.p, (Allocation*)out.p,
-               nullptr, 0, nullptr);
-}
+static void SC_ForEach_SAA(::rs_script target,
+                           ::rs_allocation in,
+                           ::rs_allocation out);
 #else
 static void SC_ForEach_SAA(android::renderscript::rs_script *target,
                             android::renderscript::rs_allocation *in,
@@ -578,18 +558,18 @@
 #endif
 
 #ifndef __LP64__
-static void SC_ForEach_SAAU(android::renderscript::rs_script target,
-                            android::renderscript::rs_allocation in,
-                            android::renderscript::rs_allocation out,
+static void SC_ForEach_SAAU(::rs_script target,
+                            ::rs_allocation in,
+                            ::rs_allocation out,
                             const void *usr) {
     Context *rsc = RsdCpuReference::getTlsContext();
     rsrForEach(rsc, (Script*)target.p, (Allocation*)in.p, (Allocation*)out.p,
                usr, 0, nullptr);
 }
 #else
-static void SC_ForEach_SAAU(android::renderscript::rs_script *target,
-                            android::renderscript::rs_allocation *in,
-                            android::renderscript::rs_allocation *out,
+static void SC_ForEach_SAAU(::rs_script *target,
+                            ::rs_allocation *in,
+                            ::rs_allocation *out,
                             const void *usr) {
     Context *rsc = RsdCpuReference::getTlsContext();
     rsrForEach(rsc, (Script*)target->p, (Allocation*)in->p, (Allocation*)out->p, usr, 0, NULL);
@@ -597,14 +577,11 @@
 #endif
 
 #ifndef __LP64__
-static void SC_ForEach_SAAUS(android::renderscript::rs_script target,
-                             android::renderscript::rs_allocation in,
-                             android::renderscript::rs_allocation out,
+static void SC_ForEach_SAAUS(::rs_script target,
+                             ::rs_allocation in,
+                             ::rs_allocation out,
                              const void *usr,
-                             const RsScriptCall *call) {
-    Context *rsc = RsdCpuReference::getTlsContext();
-    rsrForEach(rsc, (Script*)target.p, (Allocation*)in.p, (Allocation*)out.p, usr, 0, call);
-}
+                             const RsScriptCall *call);
 #else
 static void SC_ForEach_SAAUS(android::renderscript::rs_script *target,
                              android::renderscript::rs_allocation *in,
@@ -618,25 +595,18 @@
 
 // These functions are only supported in 32-bit.
 #ifndef __LP64__
-static void SC_ForEach_SAAUL(android::renderscript::rs_script target,
-                             android::renderscript::rs_allocation in,
-                             android::renderscript::rs_allocation out,
+static void SC_ForEach_SAAUL(::rs_script target,
+                             ::rs_allocation in,
+                             ::rs_allocation out,
                              const void *usr,
-                             uint32_t usrLen) {
-    Context *rsc = RsdCpuReference::getTlsContext();
-    rsrForEach(rsc, (Script*)target.p, (Allocation*)in.p, (Allocation*)out.p,
-               usr, usrLen, nullptr);
-}
-static void SC_ForEach_SAAULS(android::renderscript::rs_script target,
-                              android::renderscript::rs_allocation in,
-                              android::renderscript::rs_allocation out,
+                             uint32_t usrLen);
+
+static void SC_ForEach_SAAULS(::rs_script target,
+                              ::rs_allocation in,
+                              ::rs_allocation out,
                               const void *usr,
                               uint32_t usrLen,
-                              const RsScriptCall *call) {
-    Context *rsc = RsdCpuReference::getTlsContext();
-    rsrForEach(rsc, (Script*)target.p, (Allocation*)in.p, (Allocation*)out.p, usr, usrLen, call);
-}
-#endif
+                              const RsScriptCall *call);
 #endif
 
 
@@ -650,7 +620,8 @@
     return rsrGetDt(rsc, sc);
 }
 
-#ifndef RS_COMPATIBILITY_LIB
+// #if !defined(RS_COMPATIBILITY_LIB) && defined(__LP64__)
+#ifdef __LP64__
 time_t SC_Time(time_t *timer) {
     Context *rsc = RsdCpuReference::getTlsContext();
     return rsrTime(rsc, timer);
@@ -850,6 +821,38 @@
     }
 }
 
+static inline
+android::renderscript::rs_allocation castToARSAlloc(::rs_allocation a) {
+    android::renderscript::rs_allocation cast;
+    cast.p = (const Allocation *) a.p;
+    return cast;
+}
+
+const void *rsGetElementAt1D(::rs_allocation a, uint32_t x) {
+    return SC_GetElementAt1D(castToARSAlloc(a), x);
+}
+
+const void *rsGetElementAt2D(::rs_allocation a, uint32_t x, uint32_t y) {
+    return SC_GetElementAt2D(castToARSAlloc(a), x, y);
+}
+
+const void *rsGetElementAt3D(::rs_allocation a, uint32_t x, uint32_t y, uint32_t z) {
+    return SC_GetElementAt3D(castToARSAlloc(a), x, y, z);
+}
+
+void rsSetElementAt1D(::rs_allocation a, const void *ptr, uint32_t x) {
+    SC_SetElementAt1D(castToARSAlloc(a), ptr, x);
+}
+
+void rsSetElementAt2D(::rs_allocation a, const void *ptr, uint32_t x, uint32_t y) {
+    SC_SetElementAt2D(castToARSAlloc(a), ptr, x, y);
+}
+
+void rsSetElementAt1D(::rs_allocation a, const void *ptr, uint32_t x, uint32_t y, uint32_t z) {
+    SC_SetElementAt3D(castToARSAlloc(a), ptr, x, y, z);
+}
+
+
 #define ELEMENT_AT(T, DT, VS)                                               \
     static void SC_SetElementAt1_##T(android::renderscript::rs_allocation a, const T *val, uint32_t x) { \
         void *r = ElementAt1D((Allocation*)a.p, DT, VS, x);             \
@@ -880,7 +883,16 @@
         void *r = ElementAt3D((Allocation*)a.p, DT, VS, x, y, z);        \
         if (r != nullptr) *val = ((T *)r)[0];                            \
         else ALOGE("Error from %s", __PRETTY_FUNCTION__);                \
-    }
+    } \
+    void rsSetElementAt_##T(::rs_allocation a, const T *val, uint32_t x) { \
+        SC_SetElementAt1_##T(castToARSAlloc(a), val, x); \
+    } \
+    void rsSetElementAt2_##T(::rs_allocation a, const T *val, uint32_t x, uint32_t y) { \
+        SC_SetElementAt2_##T(castToARSAlloc(a), val, x, y); \
+    } \
+    void rsSetElementAt_##T(::rs_allocation a, const T *val, uint32_t x, uint32_t y, uint32_t z) { \
+        SC_SetElementAt3_##T(castToARSAlloc(a), val, x, y, z); \
+    } \
 
 ELEMENT_AT(char, RS_TYPE_SIGNED_8, 1)
 ELEMENT_AT(char2, RS_TYPE_SIGNED_8, 2)
@@ -1417,13 +1429,12 @@
     { nullptr, nullptr, false }
 };
 
-#ifdef RS_COMPATIBILITY_LIB
-
 //////////////////////////////////////////////////////////////////////////////
 // Compatibility Library entry points
 //////////////////////////////////////////////////////////////////////////////
 
-#define IS_CLEAR_SET_OBJ(t) \
+#ifndef __LP64__
+#define IS_CLEAR_SET_OBJ(t, u, v) \
     bool rsIsObject(t src) { \
         return src.p != nullptr; \
     } \
@@ -1431,14 +1442,32 @@
         return SC_ClearObject(reinterpret_cast<rs_object_base *>(dst)); \
     } \
     void __attribute__((overloadable)) rsSetObject(t *dst, t src) { \
-        return SC_SetObject(reinterpret_cast<rs_object_base *>(dst), (ObjectBase*)src.p); \
+        android::renderscript::rs_object_base cast; \
+        cast.p = (ObjectBase *) src.p; \
+        return SC_SetObject(reinterpret_cast<rs_object_base *>(dst), cast);\
     }
+#else
+#define IS_CLEAR_SET_OBJ(t, u, v) \
+    extern "C" { bool u(t* src) { \
+        return src->p != nullptr; \
+    } }\
+    void __attribute__((overloadable)) rsClearObject(t *dst) { \
+        return SC_ClearObject(reinterpret_cast<rs_object_base *>(dst)); \
+    } \
+    extern "C" {\
+      void v (t *dst, t *src) { \
+        return SC_SetObject_ByRef(reinterpret_cast<rs_object_base *>(dst),\
+                                  reinterpret_cast<rs_object_base *>(src));\
+    } }
+#endif
 
-IS_CLEAR_SET_OBJ(::rs_element)
-IS_CLEAR_SET_OBJ(::rs_type)
-IS_CLEAR_SET_OBJ(::rs_allocation)
-IS_CLEAR_SET_OBJ(::rs_sampler)
-IS_CLEAR_SET_OBJ(::rs_script)
+IS_CLEAR_SET_OBJ(::rs_element, _Z10rsIsObject10rs_element, _Z11rsSetObjectP10rs_elementS_)
+IS_CLEAR_SET_OBJ(::rs_type, _Z10rsIsObject7rs_type, _Z11rsSetObjectP7rs_typeS_)
+IS_CLEAR_SET_OBJ(::rs_allocation, _Z10rsIsObject13rs_allocation, _Z11rsSetObjectP13rs_allocationS_)
+IS_CLEAR_SET_OBJ(::rs_sampler, _Z10rsIsObject10rs_sampler, _Z11rsSetObjectP10rs_samplerS_)
+IS_CLEAR_SET_OBJ(::rs_script, _Z10rsIsObject9rs_script, _Z11rsSetObjectP9rs_scriptS_)
+
+
 #undef IS_CLEAR_SET_OBJ
 
 static void SC_ForEach_SAA(::rs_script target,
@@ -1480,6 +1509,7 @@
                usr, usrLen, call);
 }
 
+#ifdef RS_COMPATIBILITY_LIB
 static const Allocation * SC_GetAllocation(const void *ptr) {
     Context *rsc = RsdCpuReference::getTlsContext();
     const Script *sc = RsdCpuReference::getTlsScript();
@@ -1490,12 +1520,25 @@
     return SC_GetAllocation(ptr);
 }
 
+#else
+const android::renderscript::rs_allocation rsGetAllocation(const void *ptr) {
+#ifdef __i386__
+    android::renderscript::rs_allocation obj;
+    obj.p = (Allocation *) SC_GetAllocation(ptr);
+    return obj;
+#else
+    return SC_GetAllocation(ptr);
+#endif
+}
+#endif
+
+
 void __attribute__((overloadable)) rsAllocationIoSend(::rs_allocation a) {
-    SC_AllocationIoSend((Allocation *)a.p);
+    SC_AllocationIoSend(a);
 }
 
 void __attribute__((overloadable)) rsAllocationIoReceive(::rs_allocation a) {
-    SC_AllocationIoReceive((Allocation *)a.p);
+    SC_AllocationIoReceive(a);
 }
 
 
@@ -1532,6 +1575,17 @@
 
 void __attribute__((overloadable)) rsForEach(::rs_script script,
                                              ::rs_allocation in,
+                                             ::rs_allocation out,
+                                             const void *usr) {
+#ifdef __LP64__
+    return SC_ForEach_SAAU(&script, &in, &out, usr);
+#else
+    return SC_ForEach_SAAU(script, in, out, usr);
+#endif
+}
+
+void __attribute__((overloadable)) rsForEach(::rs_script script,
+                                             ::rs_allocation in,
                                              ::rs_allocation out) {
     return SC_ForEach_SAA(script, in, out);
 }
@@ -1553,12 +1607,19 @@
     return SC_ForEach_SAAULS(script, in, out, usr, usrLen, (RsScriptCall*)call);
 }
 
+// #if defined(RS_COMPATIBILITY_LIB) || !defined(__LP64__)
+#ifndef __LP64__
 int rsTime(int *timer) {
     return SC_Time(timer);
 }
+#else
+time_t rsTime(time_t * timer) {
+    return SC_Time(timer);
+}
+#endif // RS_COMPATIBILITY_LIB
 
-rs_tm* rsLocaltime(rs_tm* local, const int *timer) {
-    return (rs_tm*)(SC_LocalTime((tm*)local, (long*)timer));
+rs_tm* rsLocaltime(rs_tm* local, const time_t *timer) {
+    return (rs_tm*)(SC_LocalTime((tm*)local, (time_t *)timer));
 }
 
 int64_t rsUptimeMillis() {
@@ -1701,9 +1762,6 @@
 static void SC_debugUI4(const char *s, uint4 i) {
     ALOGD("%s {%u, %u, %u, %u}  0x%x 0x%x 0x%x 0x%x", s, i.x, i.y, i.z, i.w, i.x, i.y, i.z, i.w);
 }
-static void SC_debugLL64(const char *s, long long ll) {
-    ALOGD("%s %lld  0x%llx", s, ll, ll);
-}
 
 template <typename T>
 static inline long long LL(const T &x) {
@@ -1715,6 +1773,10 @@
     return static_cast<unsigned long long>(x);
 }
 
+static void SC_debugLL64(const char *s, long long ll) {
+    ALOGD("%s %lld  0x%llx", s, LL(ll), LL(ll));
+}
+
 static void SC_debugL2(const char *s, long2 ll) {
     ALOGD("%s {%lld, %lld}  0x%llx 0x%llx", s, LL(ll.x), LL(ll.y), LL(ll.x), LL(ll.y));
 }
@@ -1989,7 +2051,6 @@
 void rsDebug(const char *s, const void *p) {
     SC_debugP(s, p);
 }
-#endif // RS_COMPATIBILITY_LIB
 
 extern const RsdCpuReference::CpuSymbol * rsdLookupRuntimeStub(Context * pContext, char const* name) {
     ScriptC *s = (ScriptC *)pContext;