Reduce diffs with compat lib.

Change-Id: Ibfa2c7c7da56d158f424ac343e364feacd0a21da
diff --git a/cpu_ref/rsCpuRuntimeMath.cpp b/cpu_ref/rsCpuRuntimeMath.cpp
index b72565b..f66677b 100644
--- a/cpu_ref/rsCpuRuntimeMath.cpp
+++ b/cpu_ref/rsCpuRuntimeMath.cpp
@@ -80,13 +80,6 @@
     m->transpose();
 }
 
-static float SC_randf(float max) {
-    float r = (float)rand();
-    r *= max;
-    r /= RAND_MAX;
-    return r;
-}
-
 static float SC_randf2(float min, float max) {
     float r = (float)rand();
     r /= RAND_MAX;
@@ -94,121 +87,12 @@
     return r;
 }
 
-static int SC_randi(int max) {
-    return (int)SC_randf(max);
-}
-
-static int SC_randi2(int min, int max) {
-    return (int)SC_randf2(min, max);
-}
-
 static float SC_frac(float v) {
     int i = (int)floor(v);
     return fmin(v - i, 0x1.fffffep-1f);
 }
 
 
-static int32_t SC_AtomicCas(volatile int32_t *ptr, int32_t expectedValue, int32_t newValue) {
-    int32_t prev;
-
-    do {
-        int32_t ret = android_atomic_release_cas(expectedValue, newValue, ptr);
-        if (!ret) {
-            // The android cas return 0 if it wrote the value.  This means the
-            // previous value was the expected value and we can return.
-            return expectedValue;
-        }
-        // We didn't write the value and need to load the "previous" value.
-        prev = *ptr;
-
-        // A race condition exists where the expected value could appear after our cas failed
-        // above.  In this case loop until we have a legit previous value or the
-        // write passes.
-        } while (prev == expectedValue);
-    return prev;
-}
-
-
-static int32_t SC_AtomicInc(volatile int32_t *ptr) {
-    return android_atomic_inc(ptr);
-}
-
-static int32_t SC_AtomicDec(volatile int32_t *ptr) {
-    return android_atomic_dec(ptr);
-}
-
-static int32_t SC_AtomicAdd(volatile int32_t *ptr, int32_t value) {
-    return android_atomic_add(value, ptr);
-}
-
-static int32_t SC_AtomicSub(volatile int32_t *ptr, int32_t value) {
-    int32_t prev, status;
-    do {
-        prev = *ptr;
-        status = android_atomic_release_cas(prev, prev - value, ptr);
-    } while (CC_UNLIKELY(status != 0));
-    return prev;
-}
-
-static int32_t SC_AtomicAnd(volatile int32_t *ptr, int32_t value) {
-    return android_atomic_and(value, ptr);
-}
-
-static int32_t SC_AtomicOr(volatile int32_t *ptr, int32_t value) {
-    return android_atomic_or(value, ptr);
-}
-
-static int32_t SC_AtomicXor(volatile int32_t *ptr, int32_t value) {
-    int32_t prev, status;
-    do {
-        prev = *ptr;
-        status = android_atomic_release_cas(prev, prev ^ value, ptr);
-    } while (CC_UNLIKELY(status != 0));
-    return prev;
-}
-
-static uint32_t SC_AtomicUMin(volatile uint32_t *ptr, uint32_t value) {
-    uint32_t prev, status;
-    do {
-        prev = *ptr;
-        uint32_t n = rsMin(value, prev);
-        status = android_atomic_release_cas((int32_t) prev, (int32_t)n, (volatile int32_t*) ptr);
-    } while (CC_UNLIKELY(status != 0));
-    return prev;
-}
-
-static int32_t SC_AtomicMin(volatile int32_t *ptr, int32_t value) {
-    int32_t prev, status;
-    do {
-        prev = *ptr;
-        int32_t n = rsMin(value, prev);
-        status = android_atomic_release_cas(prev, n, ptr);
-    } while (CC_UNLIKELY(status != 0));
-    return prev;
-}
-
-static uint32_t SC_AtomicUMax(volatile uint32_t *ptr, uint32_t value) {
-    uint32_t prev, status;
-    do {
-        prev = *ptr;
-        uint32_t n = rsMax(value, prev);
-        status = android_atomic_release_cas((int32_t) prev, (int32_t) n, (volatile int32_t*) ptr);
-    } while (CC_UNLIKELY(status != 0));
-    return prev;
-}
-
-static int32_t SC_AtomicMax(volatile int32_t *ptr, int32_t value) {
-    int32_t prev, status;
-    do {
-        prev = *ptr;
-        int32_t n = rsMax(value, prev);
-        status = android_atomic_release_cas(prev, n, ptr);
-    } while (CC_UNLIKELY(status != 0));
-    return prev;
-}
-
-
-
 //////////////////////////////////////////////////////////////////////////////
 // Class implementation
 //////////////////////////////////////////////////////////////////////////////
@@ -301,34 +185,9 @@
     { "_Z17rsMatrixTransposeP12rs_matrix2x2", (void *)&SC_MatrixTranspose_2x2, true },
 
     // RS Math
-    { "_Z6rsRandi", (void *)&SC_randi, true },
-    { "_Z6rsRandii", (void *)&SC_randi2, true },
-    { "_Z6rsRandf", (void *)&SC_randf, true },
     { "_Z6rsRandff", (void *)&SC_randf2, true },
     { "_Z6rsFracf", (void *)&SC_frac, true },
 
-    // Atomics
-    { "_Z11rsAtomicIncPVi", (void *)&SC_AtomicInc, true },
-    { "_Z11rsAtomicIncPVj", (void *)&SC_AtomicInc, true },
-    { "_Z11rsAtomicDecPVi", (void *)&SC_AtomicDec, true },
-    { "_Z11rsAtomicDecPVj", (void *)&SC_AtomicDec, true },
-    { "_Z11rsAtomicAddPVii", (void *)&SC_AtomicAdd, true },
-    { "_Z11rsAtomicAddPVjj", (void *)&SC_AtomicAdd, true },
-    { "_Z11rsAtomicSubPVii", (void *)&SC_AtomicSub, true },
-    { "_Z11rsAtomicSubPVjj", (void *)&SC_AtomicSub, true },
-    { "_Z11rsAtomicAndPVii", (void *)&SC_AtomicAnd, true },
-    { "_Z11rsAtomicAndPVjj", (void *)&SC_AtomicAnd, true },
-    { "_Z10rsAtomicOrPVii", (void *)&SC_AtomicOr, true },
-    { "_Z10rsAtomicOrPVjj", (void *)&SC_AtomicOr, true },
-    { "_Z11rsAtomicXorPVii", (void *)&SC_AtomicXor, true },
-    { "_Z11rsAtomicXorPVjj", (void *)&SC_AtomicXor, true },
-    { "_Z11rsAtomicMinPVii", (void *)&SC_AtomicMin, true },
-    { "_Z11rsAtomicMinPVjj", (void *)&SC_AtomicUMin, true },
-    { "_Z11rsAtomicMaxPVii", (void *)&SC_AtomicMax, true },
-    { "_Z11rsAtomicMaxPVjj", (void *)&SC_AtomicUMax, true },
-    { "_Z11rsAtomicCasPViii", (void *)&SC_AtomicCas, true },
-    { "_Z11rsAtomicCasPVjjj", (void *)&SC_AtomicCas, true },
-
     { NULL, NULL, false }
 };
 
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index 356139f..2ae4d83 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -28,29 +28,82 @@
 #include "utils/Timers.h"
 #include "utils/StopWatch.h"
 
-
-#include <bcc/BCCContext.h>
-#include <bcc/Renderscript/RSCompilerDriver.h>
-#include <bcc/Renderscript/RSExecutable.h>
-#include <bcc/Renderscript/RSInfo.h>
+#ifdef RS_COMPATIBILITY_LIB
+    #include <dlfcn.h>
+    #include <stdio.h>
+    #include <string.h>
+#else
+    #include <bcc/BCCContext.h>
+    #include <bcc/Renderscript/RSCompilerDriver.h>
+    #include <bcc/Renderscript/RSExecutable.h>
+    #include <bcc/Renderscript/RSInfo.h>
+#endif
 
 namespace android {
 namespace renderscript {
 
 
+#ifdef RS_COMPATIBILITY_LIB
+#define MAXLINE 500
+#define MAKE_STR_HELPER(S) #S
+#define MAKE_STR(S) MAKE_STR_HELPER(S)
+#define EXPORT_VAR_STR "exportVarCount: "
+#define EXPORT_VAR_STR_LEN strlen(EXPORT_VAR_STR)
+#define EXPORT_FUNC_STR "exportFuncCount: "
+#define EXPORT_FUNC_STR_LEN strlen(EXPORT_FUNC_STR)
+#define EXPORT_FOREACH_STR "exportForEachCount: "
+#define EXPORT_FOREACH_STR_LEN strlen(EXPORT_FOREACH_STR)
+#define OBJECT_SLOT_STR "objectSlotCount: "
+#define OBJECT_SLOT_STR_LEN strlen(OBJECT_SLOT_STR)
+
+// Copy up to a newline or size chars from str -> s, updating str
+// Returns s when successful and NULL when '\0' is finally reached.
+static char* strgets(char *s, int size, const char **ppstr) {
+    if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
+        return NULL;
+    }
+
+    int i;
+    for (i = 0; i < (size - 1); i++) {
+        s[i] = **ppstr;
+        (*ppstr)++;
+        if (s[i] == '\0') {
+            return s;
+        } else if (s[i] == '\n') {
+            s[i+1] = '\0';
+            return s;
+        }
+    }
+
+    // size has been exceeded.
+    s[i] = '\0';
+
+    return s;
+}
+#endif
 
 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
     mCtx = ctx;
     mScript = s;
 
+#ifdef RS_COMPATIBILITY_LIB
+    mScriptSO = NULL;
+    mInvokeFunctions = NULL;
+    mForEachFunctions = NULL;
+    mFieldAddress = NULL;
+    mFieldIsObject = NULL;
+    mForEachSignatures = NULL;
+#else
+    mCompilerContext = NULL;
+    mCompilerDriver = NULL;
+    mExecutable = NULL;
+#endif
+
     mRoot = NULL;
     mRootExpand = NULL;
     mInit = NULL;
     mFreeChildren = NULL;
 
-    mCompilerContext = NULL;
-    mCompilerDriver = NULL;
-    mExecutable = NULL;
 
     mBoundAllocs = NULL;
     mIntrinsicData = NULL;
@@ -66,6 +119,7 @@
 
     mCtx->lockMutex();
 
+#ifndef RS_COMPATIBILITY_LIB
     bcc::RSExecutable *exec;
     const bcc::RSInfo *info;
 
@@ -121,11 +175,256 @@
         memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size());
     }
 
+#else
+
+    String8 scriptSOName(cacheDir);
+    scriptSOName = scriptSOName.getPathDir();
+    scriptSOName.appendPath("lib");
+    scriptSOName.append("/librs.");
+    scriptSOName.append(resName);
+    scriptSOName.append(".so");
+
+    //script->mHal.drv = drv;
+
+    //ALOGV("Opening up shared object: %s", scriptSOName.string());
+    mScriptSO = dlopen(scriptSOName.string(), RTLD_NOW | RTLD_LOCAL);
+    if (mScriptSO == NULL) {
+        ALOGE("Unable to open shared library (%s): %s",
+              scriptSOName.string(), dlerror());
+
+        // One final attempt to find the library in "/system/lib".
+        // We do this to allow bundled applications to use the compatibility
+        // library fallback path. Those applications don't have a private
+        // library path, so they need to install to the system directly.
+        String8 scriptSONameSystem("/system/lib/librs.");
+        scriptSONameSystem.append(resName);
+        scriptSONameSystem.append(".so");
+        mScriptSO = dlopen(scriptSONameSystem.string(), RTLD_NOW | RTLD_LOCAL);
+        if (mScriptSO == NULL) {
+            ALOGE("Unable to open system shared library (%s): %s",
+                  scriptSONameSystem.string(), dlerror());
+            goto error;
+        }
+    }
+
+    if (mScriptSO) {
+        char line[MAXLINE];
+        mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
+        if (mRoot) {
+            //ALOGE("Found root(): %p", mRoot);
+        }
+        mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
+        if (mRootExpand) {
+            //ALOGE("Found root.expand(): %p", mRootExpand);
+        }
+        mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
+        if (mInit) {
+            //ALOGE("Found init(): %p", mInit);
+        }
+        mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
+        if (mFreeChildren) {
+            //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
+        }
+
+        const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info");
+        if (rsInfo) {
+            //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo);
+        }
+
+        size_t varCount = 0;
+        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+            goto error;
+        }
+        if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
+            ALOGE("Invalid export var count!: %s", line);
+            goto error;
+        }
+
+        mExportedVariableCount = varCount;
+        //ALOGE("varCount: %zu", varCount);
+        if (varCount > 0) {
+            // Start by creating/zeroing this member, since we don't want to
+            // accidentally clean up invalid pointers later (if we error out).
+            mFieldIsObject = new bool[varCount];
+            if (mFieldIsObject == NULL) {
+                goto error;
+            }
+            memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject));
+            mFieldAddress = new void*[varCount];
+            if (mFieldAddress == NULL) {
+                goto error;
+            }
+            for (size_t i = 0; i < varCount; ++i) {
+                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+                    goto error;
+                }
+                char *c = strrchr(line, '\n');
+                if (c) {
+                    *c = '\0';
+                }
+                mFieldAddress[i] = dlsym(mScriptSO, line);
+                if (mFieldAddress[i] == NULL) {
+                    ALOGE("Failed to find variable address for %s: %s",
+                          line, dlerror());
+                    // Not a critical error if we don't find a global variable.
+                }
+                else {
+                    //ALOGE("Found variable %s at %p", line,
+                    //mFieldAddress[i]);
+                }
+            }
+        }
+
+        size_t funcCount = 0;
+        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+            goto error;
+        }
+        if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
+            ALOGE("Invalid export func count!: %s", line);
+            goto error;
+        }
+
+        mExportedFunctionCount = funcCount;
+        //ALOGE("funcCount: %zu", funcCount);
+
+        if (funcCount > 0) {
+            mInvokeFunctions = new InvokeFunc_t[funcCount];
+            if (mInvokeFunctions == NULL) {
+                goto error;
+            }
+            for (size_t i = 0; i < funcCount; ++i) {
+                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+                    goto error;
+                }
+                char *c = strrchr(line, '\n');
+                if (c) {
+                    *c = '\0';
+                }
+
+                mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line);
+                if (mInvokeFunctions[i] == NULL) {
+                    ALOGE("Failed to get function address for %s(): %s",
+                          line, dlerror());
+                    goto error;
+                }
+                else {
+                    //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]);
+                }
+            }
+        }
+
+        size_t forEachCount = 0;
+        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+            goto error;
+        }
+        if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
+            ALOGE("Invalid export forEach count!: %s", line);
+            goto error;
+        }
+
+        if (forEachCount > 0) {
+
+            mForEachSignatures = new uint32_t[forEachCount];
+            if (mForEachSignatures == NULL) {
+                goto error;
+            }
+            mForEachFunctions = new ForEachFunc_t[forEachCount];
+            if (mForEachFunctions == NULL) {
+                goto error;
+            }
+            for (size_t i = 0; i < forEachCount; ++i) {
+                unsigned int tmpSig = 0;
+                char tmpName[MAXLINE];
+
+                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+                    goto error;
+                }
+                if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
+                           &tmpSig, tmpName) != 2) {
+                    ALOGE("Invalid export forEach!: %s", line);
+                    goto error;
+                }
+
+                // Lookup the expanded ForEach kernel.
+                strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
+                mForEachSignatures[i] = tmpSig;
+                mForEachFunctions[i] =
+                        (ForEachFunc_t) dlsym(mScriptSO, tmpName);
+                if (mForEachFunctions[i] == NULL) {
+                    ALOGE("Failed to find forEach function address for %s: %s",
+                          tmpName, dlerror());
+                    // Ignore missing root.expand functions.
+                    // root() is always specified at location 0.
+                    if (i != 0) {
+                        goto error;
+                    }
+                }
+                else {
+                    //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]);
+                }
+            }
+        }
+
+        size_t objectSlotCount = 0;
+        if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+            goto error;
+        }
+        if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
+            ALOGE("Invalid object slot count!: %s", line);
+            goto error;
+        }
+
+        if (objectSlotCount > 0) {
+            rsAssert(varCount > 0);
+            for (size_t i = 0; i < objectSlotCount; ++i) {
+                uint32_t varNum = 0;
+                if (strgets(line, MAXLINE, &rsInfo) == NULL) {
+                    goto error;
+                }
+                if (sscanf(line, "%u", &varNum) != 1) {
+                    ALOGE("Invalid object slot!: %s", line);
+                    goto error;
+                }
+
+                if (varNum < varCount) {
+                    mFieldIsObject[varNum] = true;
+                }
+            }
+        }
+
+        if (varCount > 0) {
+            mBoundAllocs = new Allocation *[varCount];
+            memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
+        }
+
+        if (mScriptSO == (void*)1) {
+            //rsdLookupRuntimeStub(script, "acos");
+        }
+    }
+#endif
+
     mCtx->unlockMutex();
     return true;
+
+#ifdef RS_COMPATIBILITY_LIB
+error:
+
+    mCtx->unlockMutex();
+    delete[] mInvokeFunctions;
+    delete[] mForEachFunctions;
+    delete[] mFieldAddress;
+    delete[] mFieldIsObject;
+    delete[] mForEachSignatures;
+    delete[] mBoundAllocs;
+    if (mScriptSO) {
+        dlclose(mScriptSO);
+    }
+    return false;
+#endif
 }
 
 void RsdCpuScriptImpl::populateScript(Script *script) {
+#ifndef RS_COMPATIBILITY_LIB
     const bcc::RSInfo *info = &mExecutable->getInfo();
 
     // Copy info over to runtime
@@ -142,6 +441,21 @@
     } else {
         script->mHal.info.root = mRoot;
     }
+#else
+    // Copy info over to runtime
+    script->mHal.info.exportedFunctionCount = mExportedFunctionCount;
+    script->mHal.info.exportedVariableCount = mExportedVariableCount;
+    script->mHal.info.exportedPragmaCount = 0;
+    script->mHal.info.exportedPragmaKeyList = 0;
+    script->mHal.info.exportedPragmaValueList = 0;
+
+    // Bug, need to stash in metadata
+    if (mRootExpand) {
+        script->mHal.info.root = mRootExpand;
+    } else {
+        script->mHal.info.root = mRoot;
+    }
+#endif
 }
 
 
@@ -264,15 +578,19 @@
 }
 
 void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
-
     mtls->script = this;
     mtls->fep.slot = slot;
-
+#ifndef RS_COMPATIBILITY_LIB
     rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
     mtls->kernel = reinterpret_cast<ForEachFunc_t>(
                       mExecutable->getExportForeachFuncAddrs()[slot]);
     rsAssert(mtls->kernel != NULL);
     mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
+#else
+    mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]);
+    rsAssert(mtls->kernel != NULL);
+    mtls->sig = mForEachSignatures[slot];
+#endif
 }
 
 int RsdCpuScriptImpl::invokeRoot() {
@@ -300,7 +618,11 @@
 
     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
     reinterpret_cast<void (*)(const void *, uint32_t)>(
+#ifndef RS_COMPATIBILITY_LIB
         mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
+#else
+        mInvokeFunctions[slot])(params, paramLength);
+#endif
     mCtx->setTLS(oldTLS);
 }
 
@@ -313,8 +635,12 @@
         //return;
     //}
 
+#ifndef RS_COMPATIBILITY_LIB
     int32_t *destPtr = reinterpret_cast<int32_t *>(
                           mExecutable->getExportVarAddrs()[slot]);
+#else
+    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+#endif
     if (!destPtr) {
         //ALOGV("Calling setVar on slot = %i which is null", slot);
         return;
@@ -327,8 +653,12 @@
                                                 const Element *elem,
                                                 const size_t *dims, size_t dimLength) {
 
+#ifndef RS_COMPATIBILITY_LIB
     int32_t *destPtr = reinterpret_cast<int32_t *>(
         mExecutable->getExportVarAddrs()[slot]);
+#else
+    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+#endif
     if (!destPtr) {
         //ALOGV("Calling setVar on slot = %i which is null", slot);
         return;
@@ -365,8 +695,12 @@
     //rsAssert(!script->mFieldIsObject[slot]);
     //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
 
+#ifndef RS_COMPATIBILITY_LIB
     int32_t *destPtr = reinterpret_cast<int32_t *>(
                           mExecutable->getExportVarAddrs()[slot]);
+#else
+    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+#endif
     if (!destPtr) {
         //ALOGV("Calling setVar on slot = %i which is null", slot);
         return;
@@ -390,8 +724,12 @@
         //return;
     //}
 
+#ifndef RS_COMPATIBILITY_LIB
     int32_t *destPtr = reinterpret_cast<int32_t *>(
                           mExecutable->getExportVarAddrs()[slot]);
+#else
+    int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
+#endif
     if (!destPtr) {
         //ALOGV("Calling setVar on slot = %i which is null", slot);
         return;
@@ -401,7 +739,7 @@
 }
 
 RsdCpuScriptImpl::~RsdCpuScriptImpl() {
-
+#ifndef RS_COMPATIBILITY_LIB
     if (mExecutable) {
         Vector<void *>::const_iterator var_addr_iter =
             mExecutable->getExportVarAddrs().begin();
@@ -441,6 +779,29 @@
     if (mBoundAllocs) {
         delete[] mBoundAllocs;
     }
+#else
+    if (mFieldIsObject) {
+        for (size_t i = 0; i < mExportedVariableCount; ++i) {
+            if (mFieldIsObject[i]) {
+                if (mFieldAddress[i] != NULL) {
+                    ObjectBase **obj_addr =
+                        reinterpret_cast<ObjectBase **>(mFieldAddress[i]);
+                    rsrClearObject(mCtx->getContext(), obj_addr);
+                }
+            }
+        }
+    }
+
+    if (mInvokeFunctions) delete[] mInvokeFunctions;
+    if (mForEachFunctions) delete[] mForEachFunctions;
+    if (mFieldAddress) delete[] mFieldAddress;
+    if (mFieldIsObject) delete[] mFieldIsObject;
+    if (mForEachSignatures) delete[] mForEachSignatures;
+    if (mBoundAllocs) delete[] mBoundAllocs;
+    if (mScriptSO) {
+        dlclose(mScriptSO);
+    }
+#endif
 }
 
 Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
diff --git a/cpu_ref/rsCpuScript.h b/cpu_ref/rsCpuScript.h
index 6097c83..b9973c9 100644
--- a/cpu_ref/rsCpuScript.h
+++ b/cpu_ref/rsCpuScript.h
@@ -39,6 +39,12 @@
         const RsForEachStubParamStruct *,
         uint32_t x1, uint32_t x2,
         uint32_t instep, uint32_t outstep);
+#ifdef RS_COMPATIBILITY_LIB
+    typedef void (* InvokeFunc_t)(void);
+    typedef void (* ForEachFunc_t)(void);
+    typedef int (* RootFunc_t)(void);
+    typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
+#endif
 
     bool init(char const *resName, char const *cacheDir,
               uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags);
@@ -86,6 +92,7 @@
     RsdCpuReferenceImpl *mCtx;
     const Script *mScript;
 
+#ifndef RS_COMPATIBILITY_LIB
     int (*mRoot)();
     int (*mRootExpand)();
     void (*mInit)();
@@ -94,6 +101,25 @@
     bcc::BCCContext *mCompilerContext;
     bcc::RSCompilerDriver *mCompilerDriver;
     bcc::RSExecutable *mExecutable;
+#else
+    void *mScriptSO;
+    RootFunc_t mRoot;
+    RootFunc_t mRootExpand;
+    InvokeFunc_t mInit;
+    InvokeFunc_t mFreeChildren;
+    InvokeFunc_t *mInvokeFunctions;
+    ForEachFunc_t *mForEachFunctions;
+
+    void **mFieldAddress;
+    bool *mFieldIsObject;
+    uint32_t *mForEachSignatures;
+
+    // for populate script
+    //int mVersionMajor;
+    //int mVersionMinor;
+    size_t mExportedVariableCount;
+    size_t mExportedFunctionCount;
+#endif
 
     Allocation **mBoundAllocs;
     void * mIntrinsicData;
diff --git a/cpu_ref/rsd_cpu.h b/cpu_ref/rsd_cpu.h
index 1eee7de..13a9656 100644
--- a/cpu_ref/rsd_cpu.h
+++ b/cpu_ref/rsd_cpu.h
@@ -19,6 +19,7 @@
 
 #include "rsAllocation.h"
 
+#ifndef RS_COMPATIBILITY_LIB
 namespace llvm {
 
 class Module;
@@ -31,6 +32,7 @@
 typedef llvm::Module* (*RSLinkRuntimeCallback) (bcc::RSScript *, llvm::Module *, llvm::Module *);
 
 }  // end namespace bcc;
+#endif
 
 namespace android {
 namespace renderscript {
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index 7b41a47..367a50b 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -19,16 +19,18 @@
 #include "rsdCore.h"
 #include "rsdAllocation.h"
 #include "rsdBcc.h"
-#include "rsdGL.h"
-#include "rsdPath.h"
-#include "rsdProgramStore.h"
-#include "rsdProgramRaster.h"
-#include "rsdProgramVertex.h"
-#include "rsdProgramFragment.h"
-#include "rsdMesh.h"
+#ifndef RS_COMPATIBILITY_LIB
+    #include "rsdGL.h"
+    #include "rsdPath.h"
+    #include "rsdProgramStore.h"
+    #include "rsdProgramRaster.h"
+    #include "rsdProgramVertex.h"
+    #include "rsdProgramFragment.h"
+    #include "rsdMesh.h"
+    #include "rsdFrameBuffer.h"
+#endif
 #include "rsdSampler.h"
 #include "rsdScriptGroup.h"
-#include "rsdFrameBuffer.h"
 
 #include <malloc.h>
 #include "rsContext.h"
@@ -46,11 +48,18 @@
 static void Shutdown(Context *rsc);
 static void SetPriority(const Context *rsc, int32_t priority);
 
+#ifndef RS_COMPATIBILITY_LIB
+    #define NATIVE_FUNC(a) a
+#else
+    #define NATIVE_FUNC(a) NULL
+#endif
+
+
 static RsdHalFunctions FunctionTable = {
-    rsdGLInit,
-    rsdGLShutdown,
-    rsdGLSetSurface,
-    rsdGLSwap,
+    NATIVE_FUNC(rsdGLInit),
+    NATIVE_FUNC(rsdGLShutdown),
+    NATIVE_FUNC(rsdGLSetSurface),
+    NATIVE_FUNC(rsdGLSwap),
 
     Shutdown,
     NULL,
@@ -78,10 +87,10 @@
         rsdAllocationResize,
         rsdAllocationSyncAll,
         rsdAllocationMarkDirty,
-        rsdAllocationGetSurface,
-        rsdAllocationSetSurface,
-        rsdAllocationIoSend,
-        rsdAllocationIoReceive,
+        NATIVE_FUNC(rsdAllocationGetSurface),
+        NATIVE_FUNC(rsdAllocationSetSurface),
+        NATIVE_FUNC(rsdAllocationIoSend),
+        NATIVE_FUNC(rsdAllocationIoReceive),
         rsdAllocationData1D,
         rsdAllocationData2D,
         rsdAllocationData3D,
@@ -100,40 +109,40 @@
 
 
     {
-        rsdProgramStoreInit,
-        rsdProgramStoreSetActive,
-        rsdProgramStoreDestroy
+        NATIVE_FUNC(rsdProgramStoreInit),
+        NATIVE_FUNC(rsdProgramStoreSetActive),
+        NATIVE_FUNC(rsdProgramStoreDestroy)
     },
 
     {
-        rsdProgramRasterInit,
-        rsdProgramRasterSetActive,
-        rsdProgramRasterDestroy
+        NATIVE_FUNC(rsdProgramRasterInit),
+        NATIVE_FUNC(rsdProgramRasterSetActive),
+        NATIVE_FUNC(rsdProgramRasterDestroy)
     },
 
     {
-        rsdProgramVertexInit,
-        rsdProgramVertexSetActive,
-        rsdProgramVertexDestroy
+        NATIVE_FUNC(rsdProgramVertexInit),
+        NATIVE_FUNC(rsdProgramVertexSetActive),
+        NATIVE_FUNC(rsdProgramVertexDestroy)
     },
 
     {
-        rsdProgramFragmentInit,
-        rsdProgramFragmentSetActive,
-        rsdProgramFragmentDestroy
+        NATIVE_FUNC(rsdProgramFragmentInit),
+        NATIVE_FUNC(rsdProgramFragmentSetActive),
+        NATIVE_FUNC(rsdProgramFragmentDestroy)
     },
 
     {
-        rsdMeshInit,
-        rsdMeshDraw,
-        rsdMeshDestroy
+        NATIVE_FUNC(rsdMeshInit),
+        NATIVE_FUNC(rsdMeshDraw),
+        NATIVE_FUNC(rsdMeshDestroy)
     },
 
     {
-        rsdPathInitStatic,
-        rsdPathInitDynamic,
-        rsdPathDraw,
-        rsdPathDestroy
+        NATIVE_FUNC(rsdPathInitStatic),
+        NATIVE_FUNC(rsdPathInitDynamic),
+        NATIVE_FUNC(rsdPathDraw),
+        NATIVE_FUNC(rsdPathDestroy)
     },
 
     {
@@ -142,9 +151,9 @@
     },
 
     {
-        rsdFrameBufferInit,
-        rsdFrameBufferSetActive,
-        rsdFrameBufferDestroy
+        NATIVE_FUNC(rsdFrameBufferInit),
+        NATIVE_FUNC(rsdFrameBufferSetActive),
+        NATIVE_FUNC(rsdFrameBufferDestroy)
     },
 
     {
@@ -193,9 +202,11 @@
 
     dc->mCpuRef->setPriority(priority);
 
+#ifndef RS_COMPATIBILITY_LIB
     if (dc->mHasGraphics) {
         rsdGLSetPriority(rsc, priority);
     }
+#endif
 }
 
 void Shutdown(Context *rsc) {
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
index 7f86da6..92af7ce 100644
--- a/driver/rsdRuntimeStubs.cpp
+++ b/driver/rsdRuntimeStubs.cpp
@@ -112,6 +112,7 @@
                              srcXoff, srcYoff, srcMip, srcFace);
 }
 
+#ifndef RS_COMPATIBILITY_LIB
 static void SC_AllocationIoSend(Allocation *alloc) {
     Context *rsc = RsdCpuReference::getTlsContext();
     rsdAllocationIoSend(rsc, alloc);
@@ -392,7 +393,7 @@
     Context *rsc = RsdCpuReference::getTlsContext();
     rsrFontColor(rsc, r, g, b, a);
 }
-
+#endif
 
 
 //////////////////////////////////////////////////////////////////////////////
diff --git a/rsContext.cpp b/rsContext.cpp
index c227847..8d972a2 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -262,6 +262,8 @@
     return true;
 }
 
+extern "C" bool rsdHalInit(RsContext c, uint32_t version_major, uint32_t version_minor);
+
 void * Context::threadProc(void *vrsc) {
     Context *rsc = static_cast<Context *>(vrsc);
 #ifndef ANDROID_RS_SERIALIZE
@@ -290,6 +292,7 @@
     bool loadDefault = true;
 
     // Provide a mechanism for dropping in a different RS driver.
+#ifndef RS_COMPATIBILITY_LIB
 #ifdef OVERRIDE_RS_DRIVER
 #define XSTR(S) #S
 #define STR(S) XSTR(S)
@@ -319,6 +322,12 @@
             return NULL;
         }
     }
+#else // RS_COMPATIBILITY_LIB
+    if (rsdHalInit(rsc, 0, 0) != true) {
+        return NULL;
+    }
+#endif
+
 
     rsc->mHal.funcs.setPriority(rsc, rsc->mThreadPriority);
 
diff --git a/rsObjectBase.cpp b/rsObjectBase.cpp
index 6a64582..162f162 100644
--- a/rsObjectBase.cpp
+++ b/rsObjectBase.cpp
@@ -111,7 +111,7 @@
 bool ObjectBase::decUserRef() const {
     rsAssert(mUserRefCount > 0);
 #if RS_OBJECT_DEBUG
-    ALOGV("ObjectBase %p decU ref %i, %i", this, mUserRefCount, mSysRefCount);
+    //ALOGV("ObjectBase %p decU ref %i, %i", this, mUserRefCount, mSysRefCount);
     if (mUserRefCount <= 0) {
         mStack.dump();
     }
diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp
index e8c9d1d..a8591f5 100644
--- a/rsScriptC_Lib.cpp
+++ b/rsScriptC_Lib.cpp
@@ -92,7 +92,12 @@
     // have to apply locking for proper behavior in RenderScript.
     pthread_mutex_lock(&rsc->gLibMutex);
     tm *tmp = localtime(timer);
+#ifndef RS_COMPATIBILITY_LIB
     memcpy(local, tmp, sizeof(*tmp));
+#else
+    // WORKAROUND to struct rs_tm != struct tm
+    memcpy(local, tmp, sizeof(int)*9);
+#endif
     pthread_mutex_unlock(&rsc->gLibMutex);
     return local;
 }