Create runtime stubs for compute driver.

Change-Id: I8e0250a642844a2ad3ff6efc38e385445b7da032
diff --git a/Android.mk b/Android.mk
index ea7f477..a99a599 100644
--- a/Android.mk
+++ b/Android.mk
@@ -95,7 +95,9 @@
 	rsFont.cpp \
 	rsLocklessFifo.cpp \
 	rsObjectBase.cpp \
-	rsMatrix.cpp \
+	rsMatrix2x2.cpp \
+	rsMatrix3x3.cpp \
+	rsMatrix4x4.cpp \
 	rsMesh.cpp \
 	rsMutex.cpp \
 	rsProgram.cpp \
@@ -107,7 +109,6 @@
 	rsScript.cpp \
 	rsScriptC.cpp \
 	rsScriptC_Lib.cpp \
-	rsScriptC_LibCL.cpp \
 	rsScriptC_LibGL.cpp \
 	rsShaderCache.cpp \
 	rsSignal.cpp \
@@ -119,7 +120,9 @@
 	driver/rsdCore.cpp \
 	driver/rsdGL.cpp \
 	driver/rsdProgramRaster.cpp \
-	driver/rsdProgramStore.cpp
+	driver/rsdProgramStore.cpp \
+	driver/rsdRuntimeMath.cpp \
+	driver/rsdRuntimeStubs.cpp
 
 
 LOCAL_SHARED_LIBRARIES += libz libcutils libutils libEGL libGLESv1_CM libGLESv2 libui libbcc
diff --git a/RenderScriptDefines.h b/RenderScriptDefines.h
index bb275b5..308437d 100644
--- a/RenderScriptDefines.h
+++ b/RenderScriptDefines.h
@@ -52,6 +52,18 @@
 
 typedef void (* RsBitmapCallback_t)(void *);
 
+typedef struct {
+    float m[16];
+} rs_matrix4x4;
+
+typedef struct {
+    float m[9];
+} rs_matrix3x3;
+
+typedef struct {
+    float m[4];
+} rs_matrix2x2;
+
 enum RsDeviceParam {
     RS_DEVICE_PARAM_FORCE_SOFTWARE_GL,
     RS_DEVICE_PARAM_COUNT
diff --git a/driver/rsdBcc.cpp b/driver/rsdBcc.cpp
index 20aef49..2c6840b 100644
--- a/driver/rsdBcc.cpp
+++ b/driver/rsdBcc.cpp
@@ -17,6 +17,7 @@
 
 #include "rsdCore.h"
 #include "rsdBcc.h"
+#include "rsdRuntime.h"
 
 #include "rsContext.h"
 #include "rsScriptC.h"
@@ -129,8 +130,7 @@
                      char const *cacheDir,
                      uint8_t const *bitcode,
                      size_t bitcodeSize,
-                     uint32_t flags,
-                     RsHalSymbolLookupFunc lookupFunc) {
+                     uint32_t flags) {
     //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
 
     char *cachePath = NULL;
@@ -149,7 +149,7 @@
 
     //LOGE("mBccScript %p", script->mBccScript);
 
-    if (bccRegisterSymbolCallback(drv->mBccScript, lookupFunc, script) != 0) {
+    if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
         LOGE("bcc: FAILS to register symbol callback");
         goto error;
     }
@@ -334,7 +334,7 @@
                             uint32_t usrLen,
                             const RsScriptCall *sc) {
 
-    RsHal * dc = (RsHal *)rsc->mHal.drv;
+    RsdHal * dc = (RsdHal *)rsc->mHal.drv;
 
     MTLaunchStruct mtls;
     memset(&mtls, 0, sizeof(mtls));
@@ -513,7 +513,7 @@
         return;
     }
 
-    rsiSetObject((ObjectBase **)destPtr, data);
+    rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
 }
 
 void rsdScriptDestroy(const Context *dc, Script *script) {
@@ -525,7 +525,7 @@
                 // The field address can be NULL if the script-side has
                 // optimized the corresponding global variable away.
                 if (drv->mFieldAddress[ct]) {
-                    rsiClearObject((ObjectBase **)drv->mFieldAddress[ct]);
+                    rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
                 }
             }
         }
diff --git a/driver/rsdBcc.h b/driver/rsdBcc.h
index ae7a7af..62b50f4 100644
--- a/driver/rsdBcc.h
+++ b/driver/rsdBcc.h
@@ -18,12 +18,12 @@
 #define RSD_BCC_H
 
 #include <rs_hal.h>
+#include <rsRuntime.h>
 
 
 bool rsdScriptInit(const android::renderscript::Context *, android::renderscript::ScriptC *,
                    char const *resName, char const *cacheDir,
-                   uint8_t const *bitcode, size_t bitcodeSize,
-                   uint32_t flags, android::renderscript::RsHalSymbolLookupFunc lookupFunc);
+                   uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags);
 void rsdScriptInvokeFunction(const android::renderscript::Context *dc,
                              android::renderscript::Script *script,
                              uint32_t slot,
diff --git a/driver/rsdCore.cpp b/driver/rsdCore.cpp
index 75f4d6b..c01e5ab 100644
--- a/driver/rsdCore.cpp
+++ b/driver/rsdCore.cpp
@@ -77,7 +77,7 @@
 
 static void * HelperThreadProc(void *vrsc) {
     Context *rsc = static_cast<Context *>(vrsc);
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
 
 
     uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount);
@@ -116,7 +116,7 @@
 }
 
 void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) {
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
 
     dc->mWorkers.mLaunchData = data;
     dc->mWorkers.mLaunchCallback = cbk;
@@ -132,7 +132,7 @@
 bool rsdHalInit(Context *rsc, uint32_t version_major, uint32_t version_minor) {
     rsc->mHal.funcs = FunctionTable;
 
-    RsHal *dc = (RsHal *)calloc(1, sizeof(RsHal));
+    RsdHal *dc = (RsdHal *)calloc(1, sizeof(RsdHal));
     if (!dc) {
         LOGE("Calloc for driver hal failed.");
         return false;
@@ -181,14 +181,14 @@
 
 
 void SetPriority(const Context *rsc, int32_t priority) {
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
     for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) {
         setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority);
     }
 }
 
 void Shutdown(Context *rsc) {
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
 
     dc->mExit = true;
     dc->mWorkers.mLaunchData = NULL;
diff --git a/driver/rsdCore.h b/driver/rsdCore.h
index e37698b..c8df575 100644
--- a/driver/rsdCore.h
+++ b/driver/rsdCore.h
@@ -28,7 +28,13 @@
 typedef void (* InvokeFunc_t)(void);
 typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
 
-typedef struct RsHalRec {
+typedef struct RsdSymbolTableRec {
+    const char * mName;
+    void * mPtr;
+    bool threadable;
+} RsdSymbolTable;
+
+typedef struct RsdHalRec {
     uint32_t version_major;
     uint32_t version_minor;
 
@@ -48,7 +54,7 @@
     bool mExit;
 
     RsdGL gl;
-} RsHal;
+} RsdHal;
 
 
 
diff --git a/driver/rsdGL.cpp b/driver/rsdGL.cpp
index 86dfa0f..6238edd 100644
--- a/driver/rsdGL.cpp
+++ b/driver/rsdGL.cpp
@@ -107,7 +107,7 @@
     }
 }
 
-static void DumpDebug(RsHal *dc) {
+static void DumpDebug(RsdHal *dc) {
     LOGE(" EGL ver %i %i", dc->gl.egl.majorVersion, dc->gl.egl.minorVersion);
     LOGE(" EGL context %p  surface %p,  Display=%p", dc->gl.egl.context, dc->gl.egl.surface,
          dc->gl.egl.display);
@@ -126,7 +126,7 @@
 }
 
 void rsdGLShutdown(const Context *rsc) {
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
 
     LOGV("%p, deinitEGL", rsc);
 
@@ -147,7 +147,7 @@
 }
 
 bool rsdGLInit(const Context *rsc) {
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
 
     dc->gl.egl.numConfigs = -1;
     EGLint configAttribs[128];
@@ -289,7 +289,7 @@
 
 
 bool rsdGLSetSurface(const Context *rsc, uint32_t w, uint32_t h, ANativeWindow *sur) {
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
 
     EGLBoolean ret;
     // WAR: Some drivers fail to handle 0 size surfaces correcntly.
@@ -327,7 +327,7 @@
 }
 
 void rsdGLSwap(const android::renderscript::Context *rsc) {
-    RsHal *dc = (RsHal *)rsc->mHal.drv;
+    RsdHal *dc = (RsdHal *)rsc->mHal.drv;
     eglSwapBuffers(dc->gl.egl.display, dc->gl.egl.surface);
 }
 
diff --git a/driver/rsdRuntime.h b/driver/rsdRuntime.h
new file mode 100644
index 0000000..840eced
--- /dev/null
+++ b/driver/rsdRuntime.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RSD_RUNTIME_STUBS_H
+#define RSD_RUNTIME_STUBS_H
+
+#include <rs_hal.h>
+#include <bcc/bcc.h>
+
+#include "rsMutex.h"
+
+const RsdSymbolTable * rsdLookupSymbolMath(const char *sym);
+
+void* rsdLookupRuntimeStub(void* pContext, char const* name);
+
+#endif
diff --git a/driver/rsdRuntimeMath.cpp b/driver/rsdRuntimeMath.cpp
new file mode 100644
index 0000000..bd1fd0e
--- /dev/null
+++ b/driver/rsdRuntimeMath.cpp
@@ -0,0 +1,440 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsContext.h"
+#include "rsScriptC.h"
+#include "rsMatrix4x4.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix2x2.h"
+
+#include "rsdCore.h"
+#include "rsdRuntime.h"
+
+
+using namespace android;
+using namespace android::renderscript;
+
+
+static float SC_exp10(float v) {
+    return pow(10.f, v);
+}
+
+static float SC_fract(float v, int *iptr) {
+    int i = (int)floor(v);
+    iptr[0] = i;
+    return fmin(v - i, 0x1.fffffep-1f);
+}
+
+static float SC_log2(float v) {
+    return log10(v) / log10(2.f);
+}
+
+static float SC_mad(float v1, float v2, float v3) {
+    return v1 * v2 + v3;
+}
+
+#if 0
+static float SC_pown(float v, int p) {
+    return powf(v, (float)p);
+}
+
+static float SC_powr(float v, float p) {
+    return powf(v, p);
+}
+#endif
+
+float SC_rootn(float v, int r) {
+    return pow(v, 1.f / r);
+}
+
+float SC_rsqrt(float v) {
+    return 1.f / sqrtf(v);
+}
+
+float SC_sincos(float v, float *cosptr) {
+    *cosptr = cosf(v);
+    return sinf(v);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Integer
+//////////////////////////////////////////////////////////////////////////////
+
+
+static uint32_t SC_abs_i32(int32_t v) {return abs(v);}
+static uint16_t SC_abs_i16(int16_t v) {return (uint16_t)abs(v);}
+static uint8_t SC_abs_i8(int8_t v) {return (uint8_t)abs(v);}
+
+static uint32_t SC_clz_u32(uint32_t v) {return __builtin_clz(v);}
+static uint16_t SC_clz_u16(uint16_t v) {return (uint16_t)__builtin_clz(v);}
+static uint8_t SC_clz_u8(uint8_t v) {return (uint8_t)__builtin_clz(v);}
+static int32_t SC_clz_i32(int32_t v) {return (int32_t)__builtin_clz((uint32_t)v);}
+static int16_t SC_clz_i16(int16_t v) {return (int16_t)__builtin_clz(v);}
+static int8_t SC_clz_i8(int8_t v) {return (int8_t)__builtin_clz(v);}
+
+static uint32_t SC_max_u32(uint32_t v, uint32_t v2) {return rsMax(v, v2);}
+static uint16_t SC_max_u16(uint16_t v, uint16_t v2) {return rsMax(v, v2);}
+static uint8_t SC_max_u8(uint8_t v, uint8_t v2) {return rsMax(v, v2);}
+static int32_t SC_max_i32(int32_t v, int32_t v2) {return rsMax(v, v2);}
+static int16_t SC_max_i16(int16_t v, int16_t v2) {return rsMax(v, v2);}
+static int8_t SC_max_i8(int8_t v, int8_t v2) {return rsMax(v, v2);}
+
+static uint32_t SC_min_u32(uint32_t v, uint32_t v2) {return rsMin(v, v2);}
+static uint16_t SC_min_u16(uint16_t v, uint16_t v2) {return rsMin(v, v2);}
+static uint8_t SC_min_u8(uint8_t v, uint8_t v2) {return rsMin(v, v2);}
+static int32_t SC_min_i32(int32_t v, int32_t v2) {return rsMin(v, v2);}
+static int16_t SC_min_i16(int16_t v, int16_t v2) {return rsMin(v, v2);}
+static int8_t SC_min_i8(int8_t v, int8_t v2) {return rsMin(v, v2);}
+
+//////////////////////////////////////////////////////////////////////////////
+// Float util
+//////////////////////////////////////////////////////////////////////////////
+
+static float SC_clamp_f32(float amount, float low, float high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+
+static float SC_degrees(float radians) {
+    return radians * (180.f / M_PI);
+}
+
+static float SC_max_f32(float v, float v2) {
+    return rsMax(v, v2);
+}
+
+static float SC_min_f32(float v, float v2) {
+    return rsMin(v, v2);
+}
+
+static float SC_mix_f32(float start, float stop, float amount) {
+    //LOGE("lerpf %f  %f  %f", start, stop, amount);
+    return start + (stop - start) * amount;
+}
+
+static float SC_radians(float degrees) {
+    return degrees * (M_PI / 180.f);
+}
+
+static float SC_step_f32(float edge, float v) {
+    if (v < edge) return 0.f;
+    return 1.f;
+}
+
+static float SC_sign_f32(float value) {
+    if (value > 0) return 1.f;
+    if (value < 0) return -1.f;
+    return value;
+}
+
+static void SC_MatrixLoadIdentity_4x4(Matrix4x4 *m) {
+    m->loadIdentity();
+}
+static void SC_MatrixLoadIdentity_3x3(Matrix3x3 *m) {
+    m->loadIdentity();
+}
+static void SC_MatrixLoadIdentity_2x2(Matrix2x2 *m) {
+    m->loadIdentity();
+}
+
+static void SC_MatrixLoad_4x4_f(Matrix4x4 *m, const float *f) {
+    m->load(f);
+}
+static void SC_MatrixLoad_3x3_f(Matrix3x3 *m, const float *f) {
+    m->load(f);
+}
+static void SC_MatrixLoad_2x2_f(Matrix2x2 *m, const float *f) {
+    m->load(f);
+}
+
+static void SC_MatrixLoad_4x4_4x4(Matrix4x4 *m, const Matrix4x4 *s) {
+    m->load(s);
+}
+static void SC_MatrixLoad_4x4_3x3(Matrix4x4 *m, const Matrix3x3 *s) {
+    m->load(s);
+}
+static void SC_MatrixLoad_4x4_2x2(Matrix4x4 *m, const Matrix2x2 *s) {
+    m->load(s);
+}
+static void SC_MatrixLoad_3x3_3x3(Matrix3x3 *m, const Matrix3x3 *s) {
+    m->load(s);
+}
+static void SC_MatrixLoad_2x2_2x2(Matrix2x2 *m, const Matrix2x2 *s) {
+    m->load(s);
+}
+
+static void SC_MatrixLoadRotate(Matrix4x4 *m, float rot, float x, float y, float z) {
+    m->loadRotate(rot, x, y, z);
+}
+static void SC_MatrixLoadScale(Matrix4x4 *m, float x, float y, float z) {
+    m->loadScale(x, y, z);
+}
+static void SC_MatrixLoadTranslate(Matrix4x4 *m, float x, float y, float z) {
+    m->loadTranslate(x, y, z);
+}
+static void SC_MatrixRotate(Matrix4x4 *m, float rot, float x, float y, float z) {
+    m->rotate(rot, x, y, z);
+}
+static void SC_MatrixScale(Matrix4x4 *m, float x, float y, float z) {
+    m->scale(x, y, z);
+}
+static void SC_MatrixTranslate(Matrix4x4 *m, float x, float y, float z) {
+    m->translate(x, y, z);
+}
+
+static void SC_MatrixLoadMultiply_4x4_4x4_4x4(Matrix4x4 *m, const Matrix4x4 *lhs, const Matrix4x4 *rhs) {
+    m->loadMultiply(lhs, rhs);
+}
+static void SC_MatrixLoadMultiply_3x3_3x3_3x3(Matrix3x3 *m, const Matrix3x3 *lhs, const Matrix3x3 *rhs) {
+    m->loadMultiply(lhs, rhs);
+}
+static void SC_MatrixLoadMultiply_2x2_2x2_2x2(Matrix2x2 *m, const Matrix2x2 *lhs, const Matrix2x2 *rhs) {
+    m->loadMultiply(lhs, rhs);
+}
+
+static void SC_MatrixMultiply_4x4_4x4(Matrix4x4 *m, const Matrix4x4 *rhs) {
+    m->multiply(rhs);
+}
+static void SC_MatrixMultiply_3x3_3x3(Matrix3x3 *m, const Matrix3x3 *rhs) {
+    m->multiply(rhs);
+}
+static void SC_MatrixMultiply_2x2_2x2(Matrix2x2 *m, const Matrix2x2 *rhs) {
+    m->multiply(rhs);
+}
+
+static void SC_MatrixLoadOrtho(Matrix4x4 *m, float l, float r, float b, float t, float n, float f) {
+    m->loadOrtho(l, r, b, t, n, f);
+}
+static void SC_MatrixLoadFrustum(Matrix4x4 *m, float l, float r, float b, float t, float n, float f) {
+    m->loadFrustum(l, r, b, t, n, f);
+}
+static void SC_MatrixLoadPerspective(Matrix4x4 *m, float fovy, float aspect, float near, float far) {
+    m->loadPerspective(fovy, aspect, near, far);
+}
+
+static bool SC_MatrixInverse_4x4(Matrix4x4 *m) {
+    return m->inverse();
+}
+static bool SC_MatrixInverseTranspose_4x4(Matrix4x4 *m) {
+    return m->inverseTranspose();
+}
+static void SC_MatrixTranspose_4x4(Matrix4x4 *m) {
+    m->transpose();
+}
+static void SC_MatrixTranspose_3x3(Matrix3x3 *m) {
+    m->transpose();
+}
+static void SC_MatrixTranspose_2x2(Matrix2x2 *m) {
+    m->transpose();
+}
+
+static float SC_randf(float max) {
+    float r = (float)rand();
+    r *= max;
+    return r / RAND_MAX;
+}
+
+static float SC_randf2(float min, float max) {
+    float r = (float)rand();
+    r = r * (max - min) + min;
+    return r / RAND_MAX;
+}
+
+static int SC_randi(int max) {
+    return (int)SC_randf(max);
+}
+
+static int SC_randi2(int min, int max) {
+    return (int)SC_randf2(min, max);
+}
+
+static float SC_frac(float v) {
+    int i = (int)floor(v);
+    return fmin(v - i, 0x1.fffffep-1f);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Class implementation
+//////////////////////////////////////////////////////////////////////////////
+
+// llvm name mangling ref
+//  <builtin-type> ::= v  # void
+//                 ::= b  # bool
+//                 ::= c  # char
+//                 ::= a  # signed char
+//                 ::= h  # unsigned char
+//                 ::= s  # short
+//                 ::= t  # unsigned short
+//                 ::= i  # int
+//                 ::= j  # unsigned int
+//                 ::= l  # long
+//                 ::= m  # unsigned long
+//                 ::= x  # long long, __int64
+//                 ::= y  # unsigned long long, __int64
+//                 ::= f  # float
+//                 ::= d  # double
+
+static RsdSymbolTable gSyms[] = {
+    { "_Z4acosf", (void *)&acosf, true },
+    { "_Z5acoshf", (void *)&acoshf, true },
+    { "_Z4asinf", (void *)&asinf, true },
+    { "_Z5asinhf", (void *)&asinhf, true },
+    { "_Z4atanf", (void *)&atanf, true },
+    { "_Z5atan2ff", (void *)&atan2f, true },
+    { "_Z5atanhf", (void *)&atanhf, true },
+    { "_Z4cbrtf", (void *)&cbrtf, true },
+    { "_Z4ceilf", (void *)&ceilf, true },
+    { "_Z8copysignff", (void *)&copysignf, true },
+    { "_Z3cosf", (void *)&cosf, true },
+    { "_Z4coshf", (void *)&coshf, true },
+    { "_Z4erfcf", (void *)&erfcf, true },
+    { "_Z3erff", (void *)&erff, true },
+    { "_Z3expf", (void *)&expf, true },
+    { "_Z4exp2f", (void *)&exp2f, true },
+    { "_Z5exp10f", (void *)&SC_exp10, true },
+    { "_Z5expm1f", (void *)&expm1f, true },
+    { "_Z4fabsf", (void *)&fabsf, true },
+    { "_Z4fdimff", (void *)&fdimf, true },
+    { "_Z5floorf", (void *)&floorf, true },
+    { "_Z3fmafff", (void *)&fmaf, true },
+    { "_Z4fmaxff", (void *)&fmaxf, true },
+    { "_Z4fminff", (void *)&fminf, true },  // float fmin(float, float)
+    { "_Z4fmodff", (void *)&fmodf, true },
+    { "_Z5fractfPf", (void *)&SC_fract, true },
+    { "_Z5frexpfPi", (void *)&frexpf, true },
+    { "_Z5hypotff", (void *)&hypotf, true },
+    { "_Z5ilogbf", (void *)&ilogbf, true },
+    { "_Z5ldexpfi", (void *)&ldexpf, true },
+    { "_Z6lgammaf", (void *)&lgammaf, true },
+    { "_Z6lgammafPi", (void *)&lgammaf_r, true },
+    { "_Z3logf", (void *)&logf, true },
+    { "_Z4log2f", (void *)&SC_log2, true },
+    { "_Z5log10f", (void *)&log10f, true },
+    { "_Z5log1pf", (void *)&log1pf, true },
+    { "_Z4logbf", (void *)&logbf, true },
+    { "_Z3madfff", (void *)&SC_mad, true },
+    { "_Z4modffPf", (void *)&modff, true },
+    //{ "_Z3nanj", (void *)&SC_nan, true },
+    { "_Z9nextafterff", (void *)&nextafterf, true },
+    { "_Z3powff", (void *)&powf, true },
+    { "_Z9remainderff", (void *)&remainderf, true },
+    { "_Z6remquoffPi", (void *)&remquof, true },
+    { "_Z4rintf", (void *)&rintf, true },
+    { "_Z5rootnfi", (void *)&SC_rootn, true },
+    { "_Z5roundf", (void *)&roundf, true },
+    { "_Z5rsqrtf", (void *)&SC_rsqrt, true },
+    { "_Z3sinf", (void *)&sinf, true },
+    { "_Z6sincosfPf", (void *)&SC_sincos, true },
+    { "_Z4sinhf", (void *)&sinhf, true },
+    { "_Z4sqrtf", (void *)&sqrtf, true },
+    { "_Z3tanf", (void *)&tanf, true },
+    { "_Z4tanhf", (void *)&tanhf, true },
+    { "_Z6tgammaf", (void *)&tgammaf, true },
+    { "_Z5truncf", (void *)&truncf, true },
+
+    { "_Z3absi", (void *)&SC_abs_i32, true },
+    { "_Z3abss", (void *)&SC_abs_i16, true },
+    { "_Z3absc", (void *)&SC_abs_i8, true },
+    { "_Z3clzj", (void *)&SC_clz_u32, true },
+    { "_Z3clzt", (void *)&SC_clz_u16, true },
+    { "_Z3clzh", (void *)&SC_clz_u8, true },
+    { "_Z3clzi", (void *)&SC_clz_i32, true },
+    { "_Z3clzs", (void *)&SC_clz_i16, true },
+    { "_Z3clzc", (void *)&SC_clz_i8, true },
+    { "_Z3maxjj", (void *)&SC_max_u32, true },
+    { "_Z3maxtt", (void *)&SC_max_u16, true },
+    { "_Z3maxhh", (void *)&SC_max_u8, true },
+    { "_Z3maxii", (void *)&SC_max_i32, true },
+    { "_Z3maxss", (void *)&SC_max_i16, true },
+    { "_Z3maxcc", (void *)&SC_max_i8, true },
+    { "_Z3minjj", (void *)&SC_min_u32, true },
+    { "_Z3mintt", (void *)&SC_min_u16, true },
+    { "_Z3minhh", (void *)&SC_min_u8, true },
+    { "_Z3minii", (void *)&SC_min_i32, true },
+    { "_Z3minss", (void *)&SC_min_i16, true },
+    { "_Z3mincc", (void *)&SC_min_i8, true },
+
+    { "_Z5clampfff", (void *)&SC_clamp_f32, true },
+    { "_Z7degreesf", (void *)&SC_degrees, true },
+    { "_Z3maxff", (void *)&SC_max_f32, true },
+    { "_Z3minff", (void *)&SC_min_f32, true },
+    { "_Z3mixfff", (void *)&SC_mix_f32, true },
+    { "_Z7radiansf", (void *)&SC_radians, true },
+    { "_Z4stepff", (void *)&SC_step_f32, true },
+    //{ "smoothstep", (void *)&, true },
+    { "_Z4signf", (void *)&SC_sign_f32, true },
+
+    // matrix
+    { "_Z20rsMatrixLoadIdentityP12rs_matrix4x4", (void *)&SC_MatrixLoadIdentity_4x4, true },
+    { "_Z20rsMatrixLoadIdentityP12rs_matrix3x3", (void *)&SC_MatrixLoadIdentity_3x3, true },
+    { "_Z20rsMatrixLoadIdentityP12rs_matrix2x2", (void *)&SC_MatrixLoadIdentity_2x2, true },
+
+    { "_Z12rsMatrixLoadP12rs_matrix4x4PKf", (void *)&SC_MatrixLoad_4x4_f, true },
+    { "_Z12rsMatrixLoadP12rs_matrix3x3PKf", (void *)&SC_MatrixLoad_3x3_f, true },
+    { "_Z12rsMatrixLoadP12rs_matrix2x2PKf", (void *)&SC_MatrixLoad_2x2_f, true },
+
+    { "_Z12rsMatrixLoadP12rs_matrix4x4PKS_", (void *)&SC_MatrixLoad_4x4_4x4, true },
+    { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix3x3", (void *)&SC_MatrixLoad_4x4_3x3, true },
+    { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix2x2", (void *)&SC_MatrixLoad_4x4_2x2, true },
+    { "_Z12rsMatrixLoadP12rs_matrix3x3PKS_", (void *)&SC_MatrixLoad_3x3_3x3, true },
+    { "_Z12rsMatrixLoadP12rs_matrix2x2PKS_", (void *)&SC_MatrixLoad_2x2_2x2, true },
+
+    { "_Z18rsMatrixLoadRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadRotate, true },
+    { "_Z17rsMatrixLoadScaleP12rs_matrix4x4fff", (void *)&SC_MatrixLoadScale, true },
+    { "_Z21rsMatrixLoadTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixLoadTranslate, true },
+    { "_Z14rsMatrixRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixRotate, true },
+    { "_Z13rsMatrixScaleP12rs_matrix4x4fff", (void *)&SC_MatrixScale, true },
+    { "_Z17rsMatrixTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixTranslate, true },
+
+    { "_Z20rsMatrixLoadMultiplyP12rs_matrix4x4PKS_S2_", (void *)&SC_MatrixLoadMultiply_4x4_4x4_4x4, true },
+    { "_Z16rsMatrixMultiplyP12rs_matrix4x4PKS_", (void *)&SC_MatrixMultiply_4x4_4x4, true },
+    { "_Z20rsMatrixLoadMultiplyP12rs_matrix3x3PKS_S2_", (void *)&SC_MatrixLoadMultiply_3x3_3x3_3x3, true },
+    { "_Z16rsMatrixMultiplyP12rs_matrix3x3PKS_", (void *)&SC_MatrixMultiply_3x3_3x3, true },
+    { "_Z20rsMatrixLoadMultiplyP12rs_matrix2x2PKS_S2_", (void *)&SC_MatrixLoadMultiply_2x2_2x2_2x2, true },
+    { "_Z16rsMatrixMultiplyP12rs_matrix2x2PKS_", (void *)&SC_MatrixMultiply_2x2_2x2, true },
+
+    { "_Z17rsMatrixLoadOrthoP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadOrtho, true },
+    { "_Z19rsMatrixLoadFrustumP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadFrustum, true },
+    { "_Z23rsMatrixLoadPerspectiveP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadPerspective, true },
+
+    { "_Z15rsMatrixInverseP12rs_matrix4x4", (void *)&SC_MatrixInverse_4x4, true },
+    { "_Z24rsMatrixInverseTransposeP12rs_matrix4x4", (void *)&SC_MatrixInverseTranspose_4x4, true },
+    { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_4x4, true },
+    { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_3x3, true },
+    { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_2x2, true },
+
+    // RS Math
+    { "_Z6rsRandi", (void *)&SC_randi, true },
+    { "_Z6rsRandii", (void *)&SC_randi2, true },
+    { "_Z6rsRandf", (void *)&SC_randf, true },
+    { "_Z6rsRandff", (void *)&SC_randf2, true },
+    { "_Z6rsFracf", (void *)&SC_frac, true },
+
+    { NULL, NULL, false }
+};
+
+const RsdSymbolTable * rsdLookupSymbolMath(const char *sym) {
+    const RsdSymbolTable *syms = gSyms;
+
+    while (syms->mPtr) {
+        if (!strcmp(syms->mName, sym)) {
+            return syms;
+        }
+        syms++;
+    }
+    return NULL;
+}
+
diff --git a/driver/rsdRuntimeStubs.cpp b/driver/rsdRuntimeStubs.cpp
new file mode 100644
index 0000000..c16dd31
--- /dev/null
+++ b/driver/rsdRuntimeStubs.cpp
@@ -0,0 +1,693 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsContext.h"
+#include "rsScriptC.h"
+#include "rsMatrix4x4.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix2x2.h"
+#include "rsRuntime.h"
+
+#include "utils/Timers.h"
+#include "rsdCore.h"
+
+#include "rsdRuntime.h"
+
+#include <time.h>
+
+using namespace android;
+using namespace android::renderscript;
+
+#define GET_TLS()  ScriptTLSStruct * tls = \
+    (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
+    Context * rsc = tls->mContext; \
+    ScriptC * sc = (ScriptC *) tls->mScript
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Allocation
+//////////////////////////////////////////////////////////////////////////////
+
+static uint32_t SC_allocGetDimX(Allocation *a) {
+    LOGE("SC_allocGetDimX %p", a);
+    return a->mHal.state.dimensionX;
+}
+
+static uint32_t SC_allocGetDimY(Allocation *a) {
+    return a->mHal.state.dimensionY;
+}
+
+static uint32_t SC_allocGetDimZ(Allocation *a) {
+    return a->mHal.state.dimensionZ;
+}
+
+static uint32_t SC_allocGetDimLOD(Allocation *a) {
+    return a->mHal.state.hasMipmaps;
+}
+
+static uint32_t SC_allocGetDimFaces(Allocation *a) {
+    return a->mHal.state.hasFaces;
+}
+
+static const void * SC_getElementAtX(Allocation *a, uint32_t x) {
+    const uint8_t *p = (const uint8_t *)a->getPtr();
+    return &p[a->mHal.state.elementSizeBytes * x];
+}
+
+static const void * SC_getElementAtXY(Allocation *a, uint32_t x, uint32_t y) {
+    const uint8_t *p = (const uint8_t *)a->getPtr();
+    return &p[a->mHal.state.elementSizeBytes * (x + y * a->mHal.state.dimensionX)];
+}
+
+static const void * SC_getElementAtXYZ(Allocation *a, uint32_t x, uint32_t y, uint32_t z) {
+    const uint8_t *p = (const uint8_t *)a->getPtr();
+    return &p[a->mHal.state.elementSizeBytes * (x + y * a->mHal.state.dimensionX +
+              z * a->mHal.state.dimensionX * a->mHal.state.dimensionY)];
+}
+
+static void SC_AllocationSyncAll2(Allocation *a, RsAllocationUsageType source) {
+    GET_TLS();
+    rsrAllocationSyncAll(rsc, sc, a, source);
+}
+
+static void SC_AllocationSyncAll(Allocation *a) {
+    GET_TLS();
+    rsrAllocationSyncAll(rsc, sc, a, RS_ALLOCATION_USAGE_SCRIPT);
+}
+
+const Allocation * SC_getAllocation(const void *ptr) {
+    GET_TLS();
+    return rsrGetAllocation(rsc, sc, ptr);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Context
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_BindTexture(ProgramFragment *pf, uint32_t slot, Allocation *a) {
+    GET_TLS();
+    rsrBindTexture(rsc, sc, pf, slot, a);
+}
+
+static void SC_BindSampler(ProgramFragment *pf, uint32_t slot, Sampler *s) {
+    GET_TLS();
+    rsrBindSampler(rsc, sc, pf, slot, s);
+}
+
+static void SC_BindProgramStore(ProgramStore *ps) {
+    GET_TLS();
+    rsrBindProgramStore(rsc, sc, ps);
+}
+
+static void SC_BindProgramFragment(ProgramFragment *pf) {
+    GET_TLS();
+    rsrBindProgramFragment(rsc, sc, pf);
+}
+
+static void SC_BindProgramVertex(ProgramVertex *pv) {
+    GET_TLS();
+    rsrBindProgramVertex(rsc, sc, pv);
+}
+
+static void SC_BindProgramRaster(ProgramRaster *pr) {
+    GET_TLS();
+    rsrBindProgramRaster(rsc, sc, pr);
+}
+
+static void SC_BindFrameBufferObjectColorTarget(Allocation *a, uint32_t slot) {
+    GET_TLS();
+    rsrBindFrameBufferObjectColorTarget(rsc, sc, a, slot);
+}
+
+static void SC_BindFrameBufferObjectDepthTarget(Allocation *a) {
+    GET_TLS();
+    rsrBindFrameBufferObjectDepthTarget(rsc, sc, a);
+}
+
+static void SC_ClearFrameBufferObjectColorTarget(uint32_t slot) {
+    GET_TLS();
+    rsrClearFrameBufferObjectColorTarget(rsc, sc, slot);
+}
+
+static void SC_ClearFrameBufferObjectDepthTarget(Context *, Script *) {
+    GET_TLS();
+    rsrClearFrameBufferObjectDepthTarget(rsc, sc);
+}
+
+static void SC_ClearFrameBufferObjectTargets(Context *, Script *) {
+    GET_TLS();
+    rsrClearFrameBufferObjectTargets(rsc, sc);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// VP
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_VpLoadProjectionMatrix(const rsc_Matrix *m) {
+    GET_TLS();
+    rsrVpLoadProjectionMatrix(rsc, sc, m);
+}
+
+static void SC_VpLoadModelMatrix(const rsc_Matrix *m) {
+    GET_TLS();
+    rsrVpLoadModelMatrix(rsc, sc, m);
+}
+
+static void SC_VpLoadTextureMatrix(const rsc_Matrix *m) {
+    GET_TLS();
+    rsrVpLoadTextureMatrix(rsc, sc, m);
+}
+
+static void SC_PfConstantColor(ProgramFragment *pf, float r, float g, float b, float a) {
+    GET_TLS();
+    rsrPfConstantColor(rsc, sc, pf, r, g, b, a);
+}
+
+static void SC_VpGetProjectionMatrix(rsc_Matrix *m) {
+    GET_TLS();
+    rsrVpGetProjectionMatrix(rsc, sc, m);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Drawing
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_DrawQuadTexCoords(float x1, float y1, float z1, float u1, float v1,
+                                 float x2, float y2, float z2, float u2, float v2,
+                                 float x3, float y3, float z3, float u3, float v3,
+                                 float x4, float y4, float z4, float u4, float v4) {
+    GET_TLS();
+    rsrDrawQuadTexCoords(rsc, sc,
+                         x1, y1, z1, u1, v1,
+                         x2, y2, z2, u2, v2,
+                         x3, y3, z3, u3, v3,
+                         x4, y4, z4, u4, v4);
+}
+
+static void SC_DrawQuad(float x1, float y1, float z1,
+                        float x2, float y2, float z2,
+                        float x3, float y3, float z3,
+                        float x4, float y4, float z4) {
+    GET_TLS();
+    rsrDrawQuad(rsc, sc, x1, y1, z1, x2, y2, z2, x3, y3, z3, x4, y4, z4);
+}
+
+static void SC_DrawSpriteScreenspace(float x, float y, float z, float w, float h) {
+    GET_TLS();
+    rsrDrawSpriteScreenspace(rsc, sc, x, y, z, w, h);
+}
+
+static void SC_DrawRect(float x1, float y1, float x2, float y2, float z) {
+    GET_TLS();
+    rsrDrawRect(rsc, sc, x1, y1, x2, y2, z);
+}
+
+static void SC_DrawMesh(Mesh *m) {
+    GET_TLS();
+    rsrDrawMesh(rsc, sc, m);
+}
+
+static void SC_DrawMeshPrimitive(Mesh *m, uint32_t primIndex) {
+    GET_TLS();
+    rsrDrawMeshPrimitive(rsc, sc, m, primIndex);
+}
+
+static void SC_DrawMeshPrimitiveRange(Mesh *m, uint32_t primIndex, uint32_t start, uint32_t len) {
+    GET_TLS();
+    rsrDrawMeshPrimitiveRange(rsc, sc, m, primIndex, start, len);
+}
+
+static void SC_MeshComputeBoundingBox(Mesh *m,
+                               float *minX, float *minY, float *minZ,
+                               float *maxX, float *maxY, float *maxZ) {
+    GET_TLS();
+    rsrMeshComputeBoundingBox(rsc, sc, m, minX, minY, minZ, maxX, maxY, maxZ);
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+static void SC_Color(float r, float g, float b, float a) {
+    GET_TLS();
+    rsrColor(rsc, sc, r, g, b, a);
+}
+
+static void SC_Finish() {
+    GET_TLS();
+    rsrFinish(rsc, sc);
+}
+
+static void SC_ClearColor(float r, float g, float b, float a) {
+    GET_TLS();
+    rsrClearColor(rsc, sc, r, g, b, a);
+}
+
+static void SC_ClearDepth(float v) {
+    GET_TLS();
+    rsrClearDepth(rsc, sc, v);
+}
+
+static uint32_t SC_GetWidth() {
+    GET_TLS();
+    return rsrGetWidth(rsc, sc);
+}
+
+static uint32_t SC_GetHeight() {
+    GET_TLS();
+    return rsrGetHeight(rsc, sc);
+}
+
+static void SC_DrawTextAlloc(Allocation *a, int x, int y) {
+    GET_TLS();
+    rsrDrawTextAlloc(rsc, sc, a, x, y);
+}
+
+static void SC_DrawText(const char *text, int x, int y) {
+    GET_TLS();
+    rsrDrawText(rsc, sc, text, x, y);
+}
+
+static void SC_MeasureTextAlloc(Allocation *a,
+                         int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
+    GET_TLS();
+    rsrMeasureTextAlloc(rsc, sc, a, left, right, top, bottom);
+}
+
+static void SC_MeasureText(const char *text,
+                    int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
+    GET_TLS();
+    rsrMeasureText(rsc, sc, text, left, right, top, bottom);
+}
+
+static void SC_BindFont(Font *f) {
+    GET_TLS();
+    rsrBindFont(rsc, sc, f);
+}
+
+static void SC_FontColor(float r, float g, float b, float a) {
+    GET_TLS();
+    rsrFontColor(rsc, sc, r, g, b, a);
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_SetObject(ObjectBase **dst, ObjectBase * src) {
+    GET_TLS();
+    rsrSetObject(rsc, sc, dst, src);
+}
+
+static void SC_ClearObject(ObjectBase **dst) {
+    GET_TLS();
+    rsrClearObject(rsc, sc, dst);
+}
+
+static bool SC_IsObject(const ObjectBase *src) {
+    GET_TLS();
+    return rsrIsObject(rsc, sc, src);
+}
+
+
+
+
+static const Allocation * SC_GetAllocation(const void *ptr) {
+    GET_TLS();
+    return rsrGetAllocation(rsc, sc, ptr);
+}
+
+static void SC_ForEach(Script *target,
+                Allocation *in,
+                Allocation *out,
+                const void *usr,
+                const RsScriptCall *call) {
+    GET_TLS();
+    rsrForEach(rsc, sc, target, in, out, usr, 0, NULL);
+}
+
+static void SC_ForEach2(Script *target,
+                 Allocation *in,
+                 Allocation *out,
+                 const void *usr,
+                 const RsScriptCall *call) {
+    GET_TLS();
+    rsrForEach(rsc, sc, target, in, out, usr, 0, call);
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Time routines
+//////////////////////////////////////////////////////////////////////////////
+
+static float SC_GetDt() {
+    GET_TLS();
+    return rsrGetDt(rsc, sc);
+}
+
+time_t SC_Time(time_t *timer) {
+    GET_TLS();
+    return rsrTime(rsc, sc, timer);
+}
+
+tm* SC_LocalTime(tm *local, time_t *timer) {
+    GET_TLS();
+    return rsrLocalTime(rsc, sc, local, timer);
+}
+
+int64_t SC_UptimeMillis() {
+    GET_TLS();
+    return rsrUptimeMillis(rsc, sc);
+}
+
+int64_t SC_UptimeNanos() {
+    GET_TLS();
+    return rsrUptimeNanos(rsc, sc);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Message routines
+//////////////////////////////////////////////////////////////////////////////
+
+static uint32_t SC_ToClient2(int cmdID, void *data, int len) {
+    GET_TLS();
+    return rsrToClient(rsc, sc, cmdID, data, len);
+}
+
+static uint32_t SC_ToClient(int cmdID) {
+    GET_TLS();
+    return rsrToClient(rsc, sc, cmdID, NULL, 0);
+}
+
+static uint32_t SC_ToClientBlocking2(int cmdID, void *data, int len) {
+    GET_TLS();
+    return rsrToClientBlocking(rsc, sc, cmdID, data, len);
+}
+
+static uint32_t SC_ToClientBlocking(int cmdID) {
+    GET_TLS();
+    return rsrToClientBlocking(rsc, sc, cmdID, NULL, 0);
+}
+
+int SC_divsi3(int a, int b) {
+    return a / b;
+}
+
+int SC_modsi3(int a, int b) {
+    return a % b;
+}
+
+unsigned int SC_udivsi3(unsigned int a, unsigned int b) {
+    return a / b;
+}
+
+unsigned int SC_umodsi3(unsigned int a, unsigned int b) {
+    return a % b;
+}
+
+static void SC_debugF(const char *s, float f) {
+    LOGD("%s %f, 0x%08x", s, f, *((int *) (&f)));
+}
+static void SC_debugFv2(const char *s, float f1, float f2) {
+    LOGD("%s {%f, %f}", s, f1, f2);
+}
+static void SC_debugFv3(const char *s, float f1, float f2, float f3) {
+    LOGD("%s {%f, %f, %f}", s, f1, f2, f3);
+}
+static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) {
+    LOGD("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4);
+}
+static void SC_debugD(const char *s, double d) {
+    LOGD("%s %f, 0x%08llx", s, d, *((long long *) (&d)));
+}
+static void SC_debugFM4v4(const char *s, const float *f) {
+    LOGD("%s {%f, %f, %f, %f", s, f[0], f[4], f[8], f[12]);
+    LOGD("%s  %f, %f, %f, %f", s, f[1], f[5], f[9], f[13]);
+    LOGD("%s  %f, %f, %f, %f", s, f[2], f[6], f[10], f[14]);
+    LOGD("%s  %f, %f, %f, %f}", s, f[3], f[7], f[11], f[15]);
+}
+static void SC_debugFM3v3(const char *s, const float *f) {
+    LOGD("%s {%f, %f, %f", s, f[0], f[3], f[6]);
+    LOGD("%s  %f, %f, %f", s, f[1], f[4], f[7]);
+    LOGD("%s  %f, %f, %f}",s, f[2], f[5], f[8]);
+}
+static void SC_debugFM2v2(const char *s, const float *f) {
+    LOGD("%s {%f, %f", s, f[0], f[2]);
+    LOGD("%s  %f, %f}",s, f[1], f[3]);
+}
+
+static void SC_debugI32(const char *s, int32_t i) {
+    LOGD("%s %i  0x%x", s, i, i);
+}
+static void SC_debugU32(const char *s, uint32_t i) {
+    LOGD("%s %u  0x%x", s, i, i);
+}
+static void SC_debugLL64(const char *s, long long ll) {
+    LOGD("%s %lld  0x%llx", s, ll, ll);
+}
+static void SC_debugULL64(const char *s, unsigned long long ll) {
+    LOGD("%s %llu  0x%llx", s, ll, ll);
+}
+
+static void SC_debugP(const char *s, const void *p) {
+    LOGD("%s %p", s, p);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Stub implementation
+//////////////////////////////////////////////////////////////////////////////
+
+// llvm name mangling ref
+//  <builtin-type> ::= v  # void
+//                 ::= b  # bool
+//                 ::= c  # char
+//                 ::= a  # signed char
+//                 ::= h  # unsigned char
+//                 ::= s  # short
+//                 ::= t  # unsigned short
+//                 ::= i  # int
+//                 ::= j  # unsigned int
+//                 ::= l  # long
+//                 ::= m  # unsigned long
+//                 ::= x  # long long, __int64
+//                 ::= y  # unsigned long long, __int64
+//                 ::= f  # float
+//                 ::= d  # double
+
+static RsdSymbolTable gSyms[] = {
+    { "__divsi3", (void *)&SC_divsi3, true },
+    { "__modsi3", (void *)&SC_modsi3, true },
+    { "__udivsi3", (void *)&SC_udivsi3, true },
+    { "__umodsi3", (void *)&SC_umodsi3, true },
+    { "memset", (void *)&memset, true },
+    { "memcpy", (void *)&memcpy, true },
+
+    // Refcounting
+    { "_Z11rsSetObjectP10rs_elementS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP10rs_element", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject10rs_element", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP7rs_typeS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP7rs_type", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject7rs_type", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP13rs_allocationS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP13rs_allocation", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject13rs_allocation", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP10rs_samplerS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP10rs_sampler", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject10rs_sampler", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP9rs_scriptS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP9rs_script", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject9rs_script", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP7rs_meshS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP7rs_mesh", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject7rs_mesh", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP19rs_program_fragmentS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP19rs_program_fragment", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject19rs_program_fragment", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP17rs_program_vertexS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP17rs_program_vertex", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject17rs_program_vertex", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP17rs_program_rasterS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP17rs_program_raster", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject17rs_program_raster", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP16rs_program_storeS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP16rs_program_store", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject16rs_program_store", (void *)&SC_IsObject, true },
+
+    { "_Z11rsSetObjectP7rs_fontS_", (void *)&SC_SetObject, true },
+    { "_Z13rsClearObjectP7rs_font", (void *)&SC_ClearObject, true },
+    { "_Z10rsIsObject7rs_font", (void *)&SC_IsObject, true },
+
+    // Allocation ops
+    { "_Z19rsAllocationGetDimX13rs_allocation", (void *)&SC_allocGetDimX, true },
+    { "_Z19rsAllocationGetDimY13rs_allocation", (void *)&SC_allocGetDimY, true },
+    { "_Z19rsAllocationGetDimZ13rs_allocation", (void *)&SC_allocGetDimZ, true },
+    { "_Z21rsAllocationGetDimLOD13rs_allocation", (void *)&SC_allocGetDimLOD, true },
+    { "_Z23rsAllocationGetDimFaces13rs_allocation", (void *)&SC_allocGetDimFaces, true },
+
+    { "_Z14rsGetElementAt13rs_allocationj", (void *)&SC_getElementAtX, true },
+    { "_Z14rsGetElementAt13rs_allocationjj", (void *)&SC_getElementAtXY, true },
+    { "_Z14rsGetElementAt13rs_allocationjjj", (void *)&SC_getElementAtXYZ, true },
+
+    { "_Z15rsGetAllocationPKv", (void *)&SC_getAllocation, true },
+
+    { "_Z21rsAllocationMarkDirty13rs_allocation", (void *)&SC_AllocationSyncAll, true },
+    { "_Z20rsgAllocationSyncAll13rs_allocation", (void *)&SC_AllocationSyncAll, false },
+    { "_Z20rsgAllocationSyncAll13rs_allocationj", (void *)&SC_AllocationSyncAll2, false },
+    { "_Z15rsGetAllocationPKv", (void *)&SC_GetAllocation, true },
+
+
+    // Messaging
+
+    { "_Z14rsSendToClienti", (void *)&SC_ToClient, false },
+    { "_Z14rsSendToClientiPKvj", (void *)&SC_ToClient2, false },
+    { "_Z22rsSendToClientBlockingi", (void *)&SC_ToClientBlocking, false },
+    { "_Z22rsSendToClientBlockingiPKvj", (void *)&SC_ToClientBlocking2, false },
+
+    { "_Z22rsgBindProgramFragment19rs_program_fragment", (void *)&SC_BindProgramFragment, false },
+    { "_Z19rsgBindProgramStore16rs_program_store", (void *)&SC_BindProgramStore, false },
+    { "_Z20rsgBindProgramVertex17rs_program_vertex", (void *)&SC_BindProgramVertex, false },
+    { "_Z20rsgBindProgramRaster17rs_program_raster", (void *)&SC_BindProgramRaster, false },
+    { "_Z14rsgBindSampler19rs_program_fragmentj10rs_sampler", (void *)&SC_BindSampler, false },
+    { "_Z14rsgBindTexture19rs_program_fragmentj13rs_allocation", (void *)&SC_BindTexture, false },
+
+    { "_Z36rsgProgramVertexLoadProjectionMatrixPK12rs_matrix4x4", (void *)&SC_VpLoadProjectionMatrix, false },
+    { "_Z31rsgProgramVertexLoadModelMatrixPK12rs_matrix4x4", (void *)&SC_VpLoadModelMatrix, false },
+    { "_Z33rsgProgramVertexLoadTextureMatrixPK12rs_matrix4x4", (void *)&SC_VpLoadTextureMatrix, false },
+
+    { "_Z35rsgProgramVertexGetProjectionMatrixP12rs_matrix4x4", (void *)&SC_VpGetProjectionMatrix, false },
+
+    { "_Z31rsgProgramFragmentConstantColor19rs_program_fragmentffff", (void *)&SC_PfConstantColor, false },
+
+    { "_Z11rsgGetWidthv", (void *)&SC_GetWidth, false },
+    { "_Z12rsgGetHeightv", (void *)&SC_GetHeight, false },
+
+
+    { "_Z11rsgDrawRectfffff", (void *)&SC_DrawRect, false },
+    { "_Z11rsgDrawQuadffffffffffff", (void *)&SC_DrawQuad, false },
+    { "_Z20rsgDrawQuadTexCoordsffffffffffffffffffff", (void *)&SC_DrawQuadTexCoords, false },
+    { "_Z24rsgDrawSpriteScreenspacefffff", (void *)&SC_DrawSpriteScreenspace, false },
+
+    { "_Z11rsgDrawMesh7rs_mesh", (void *)&SC_DrawMesh, false },
+    { "_Z11rsgDrawMesh7rs_meshj", (void *)&SC_DrawMeshPrimitive, false },
+    { "_Z11rsgDrawMesh7rs_meshjjj", (void *)&SC_DrawMeshPrimitiveRange, false },
+    { "_Z25rsgMeshComputeBoundingBox7rs_meshPfS0_S0_S0_S0_S0_", (void *)&SC_MeshComputeBoundingBox, false },
+
+    { "_Z13rsgClearColorffff", (void *)&SC_ClearColor, false },
+    { "_Z13rsgClearDepthf", (void *)&SC_ClearDepth, false },
+
+    { "_Z11rsgDrawTextPKcii", (void *)&SC_DrawText, false },
+    { "_Z11rsgDrawText13rs_allocationii", (void *)&SC_DrawTextAlloc, false },
+    { "_Z14rsgMeasureTextPKcPiS1_S1_S1_", (void *)&SC_MeasureText, false },
+    { "_Z14rsgMeasureText13rs_allocationPiS0_S0_S0_", (void *)&SC_MeasureTextAlloc, false },
+
+    { "_Z11rsgBindFont7rs_font", (void *)&SC_BindFont, false },
+    { "_Z12rsgFontColorffff", (void *)&SC_FontColor, false },
+
+    { "_Z18rsgBindColorTarget13rs_allocationj", (void *)&SC_BindFrameBufferObjectColorTarget, false },
+    { "_Z18rsgBindDepthTarget13rs_allocation", (void *)&SC_BindFrameBufferObjectDepthTarget, false },
+    { "_Z19rsgClearColorTargetj", (void *)&SC_ClearFrameBufferObjectColorTarget, false },
+    { "_Z19rsgClearDepthTargetv", (void *)&SC_ClearFrameBufferObjectDepthTarget, false },
+    { "_Z24rsgClearAllRenderTargetsv", (void *)&SC_ClearFrameBufferObjectTargets, false },
+
+    { "_Z9rsForEach9rs_script13rs_allocationS0_PKv", (void *)&SC_ForEach, false },
+    { "_Z9rsForEach9rs_script13rs_allocationS0_PKvj", (void *)&SC_ForEach2, false },
+
+    // time
+    { "_Z6rsTimePi", (void *)&SC_Time, true },
+    { "_Z11rsLocaltimeP5rs_tmPKi", (void *)&SC_LocalTime, true },
+    { "_Z14rsUptimeMillisv", (void*)&SC_UptimeMillis, true },
+    { "_Z13rsUptimeNanosv", (void*)&SC_UptimeNanos, true },
+    { "_Z7rsGetDtv", (void*)&SC_GetDt, false },
+
+    // misc
+    { "_Z5colorffff", (void *)&SC_Color, false },
+    { "_Z9rsgFinishv", (void *)&SC_Finish, false },
+
+    // Debug
+    { "_Z7rsDebugPKcf", (void *)&SC_debugF, true },
+    { "_Z7rsDebugPKcff", (void *)&SC_debugFv2, true },
+    { "_Z7rsDebugPKcfff", (void *)&SC_debugFv3, true },
+    { "_Z7rsDebugPKcffff", (void *)&SC_debugFv4, true },
+    { "_Z7rsDebugPKcd", (void *)&SC_debugD, true },
+    { "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4, true },
+    { "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3, true },
+    { "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2, true },
+    { "_Z7rsDebugPKci", (void *)&SC_debugI32, true },
+    { "_Z7rsDebugPKcj", (void *)&SC_debugU32, true },
+    // Both "long" and "unsigned long" need to be redirected to their
+    // 64-bit counterparts, since we have hacked Slang to use 64-bit
+    // for "long" on Arm (to be similar to Java).
+    { "_Z7rsDebugPKcl", (void *)&SC_debugLL64, true },
+    { "_Z7rsDebugPKcm", (void *)&SC_debugULL64, true },
+    { "_Z7rsDebugPKcx", (void *)&SC_debugLL64, true },
+    { "_Z7rsDebugPKcy", (void *)&SC_debugULL64, true },
+    { "_Z7rsDebugPKcPKv", (void *)&SC_debugP, true },
+
+    { NULL, NULL, false }
+};
+
+
+void* rsdLookupRuntimeStub(void* pContext, char const* name) {
+    ScriptC *s = (ScriptC *)pContext;
+    if (!strcmp(name, "__isThreadable")) {
+      return (void*) s->mHal.info.isThreadable;
+    } else if (!strcmp(name, "__clearThreadable")) {
+      s->mHal.info.isThreadable = false;
+      return NULL;
+    }
+
+    RsdSymbolTable *syms = gSyms;
+    const RsdSymbolTable *sym = rsdLookupSymbolMath(name);
+
+    if (!sym) {
+        while (syms->mPtr) {
+            if (!strcmp(syms->mName, name)) {
+                sym = syms;
+            }
+            syms++;
+        }
+    }
+
+    if (sym) {
+        s->mHal.info.isThreadable &= sym->threadable;
+        return sym->mPtr;
+    }
+    LOGE("ScriptC sym lookup failed for %s", name);
+    return NULL;
+}
+
+
diff --git a/rsContext.h b/rsContext.h
index 4dd186c..0a0f3e0 100644
--- a/rsContext.h
+++ b/rsContext.h
@@ -27,7 +27,7 @@
 #ifndef ANDROID_RS_SERIALIZE
 #include "rsMutex.h"
 #include "rsThreadIO.h"
-#include "rsMatrix.h"
+#include "rsMatrix4x4.h"
 #include "rsDevice.h"
 #include "rsScriptC.h"
 #include "rsAdapter.h"
diff --git a/rsMatrix.cpp b/rsMatrix.cpp
deleted file mode 100644
index ca41886..0000000
--- a/rsMatrix.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rsMatrix.h"
-
-#include "stdlib.h"
-#include "string.h"
-#include "math.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-void Matrix::loadIdentity() {
-    set(0, 0, 1);
-    set(1, 0, 0);
-    set(2, 0, 0);
-    set(3, 0, 0);
-
-    set(0, 1, 0);
-    set(1, 1, 1);
-    set(2, 1, 0);
-    set(3, 1, 0);
-
-    set(0, 2, 0);
-    set(1, 2, 0);
-    set(2, 2, 1);
-    set(3, 2, 0);
-
-    set(0, 3, 0);
-    set(1, 3, 0);
-    set(2, 3, 0);
-    set(3, 3, 1);
-}
-
-void Matrix::load(const float *v) {
-    memcpy(m, v, sizeof(m));
-}
-
-void Matrix::load(const Matrix *v) {
-    memcpy(m, v->m, sizeof(m));
-}
-
-void Matrix::loadRotate(float rot, float x, float y, float z) {
-    float c, s;
-    m[3] = 0;
-    m[7] = 0;
-    m[11]= 0;
-    m[12]= 0;
-    m[13]= 0;
-    m[14]= 0;
-    m[15]= 1;
-    rot *= float(M_PI / 180.0f);
-    c = cosf(rot);
-    s = sinf(rot);
-
-    const float len = sqrtf(x*x + y*y + z*z);
-    if (len != 1) {
-        const float recipLen = 1.f / len;
-        x *= recipLen;
-        y *= recipLen;
-        z *= recipLen;
-    }
-    const float nc = 1.0f - c;
-    const float xy = x * y;
-    const float yz = y * z;
-    const float zx = z * x;
-    const float xs = x * s;
-    const float ys = y * s;
-    const float zs = z * s;
-    m[ 0] = x*x*nc +  c;
-    m[ 4] =  xy*nc - zs;
-    m[ 8] =  zx*nc + ys;
-    m[ 1] =  xy*nc + zs;
-    m[ 5] = y*y*nc +  c;
-    m[ 9] =  yz*nc - xs;
-    m[ 2] =  zx*nc - ys;
-    m[ 6] =  yz*nc + xs;
-    m[10] = z*z*nc +  c;
-}
-
-void Matrix::loadScale(float x, float y, float z) {
-    loadIdentity();
-    m[0] = x;
-    m[5] = y;
-    m[10] = z;
-}
-
-void Matrix::loadTranslate(float x, float y, float z) {
-    loadIdentity();
-    m[12] = x;
-    m[13] = y;
-    m[14] = z;
-}
-
-void Matrix::loadMultiply(const Matrix *lhs, const Matrix *rhs) {
-    for (int i=0 ; i<4 ; i++) {
-        float ri0 = 0;
-        float ri1 = 0;
-        float ri2 = 0;
-        float ri3 = 0;
-        for (int j=0 ; j<4 ; j++) {
-            const float rhs_ij = rhs->get(i,j);
-            ri0 += lhs->get(j,0) * rhs_ij;
-            ri1 += lhs->get(j,1) * rhs_ij;
-            ri2 += lhs->get(j,2) * rhs_ij;
-            ri3 += lhs->get(j,3) * rhs_ij;
-        }
-        set(i,0, ri0);
-        set(i,1, ri1);
-        set(i,2, ri2);
-        set(i,3, ri3);
-    }
-}
-
-void Matrix::loadOrtho(float l, float r, float b, float t, float n, float f) {
-    loadIdentity();
-    m[0] = 2 / (r - l);
-    m[5] = 2 / (t - b);
-    m[10]= -2 / (f - n);
-    m[12]= -(r + l) / (r - l);
-    m[13]= -(t + b) / (t - b);
-    m[14]= -(f + n) / (f - n);
-}
-
-void Matrix::loadFrustum(float l, float r, float b, float t, float n, float f) {
-    loadIdentity();
-    m[0] = 2 * n / (r - l);
-    m[5] = 2 * n / (t - b);
-    m[8] = (r + l) / (r - l);
-    m[9] = (t + b) / (t - b);
-    m[10]= -(f + n) / (f - n);
-    m[11]= -1;
-    m[14]= -2*f*n / (f - n);
-    m[15]= 0;
-}
-
-void Matrix::vectorMultiply(float *out, const float *in) const {
-    out[0] = (m[0] * in[0]) + (m[4] * in[1]) + (m[8] * in[2]) + m[12];
-    out[1] = (m[1] * in[0]) + (m[5] * in[1]) + (m[9] * in[2]) + m[13];
-    out[2] = (m[2] * in[0]) + (m[6] * in[1]) + (m[10] * in[2]) + m[14];
-    out[3] = (m[3] * in[0]) + (m[7] * in[1]) + (m[11] * in[2]) + m[15];
-}
diff --git a/rsMatrix2x2.cpp b/rsMatrix2x2.cpp
new file mode 100644
index 0000000..622113c
--- /dev/null
+++ b/rsMatrix2x2.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsMatrix2x2.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix4x4.h"
+
+#include "stdlib.h"
+#include "string.h"
+#include "math.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+
+void Matrix2x2::loadIdentity() {
+    m[0] = 1.f;
+    m[1] = 0.f;
+    m[2] = 0.f;
+    m[3] = 1.f;
+}
+
+void Matrix2x2::load(const float *v) {
+    memcpy(m, v, sizeof(m));
+}
+
+void Matrix2x2::load(const rs_matrix2x2 *v) {
+    memcpy(m, v->m, sizeof(m));
+}
+
+void Matrix2x2::loadMultiply(const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs) {
+    for (int i=0 ; i<2 ; i++) {
+        float ri0 = 0;
+        float ri1 = 0;
+        for (int j=0 ; j<2 ; j++) {
+            const float rhs_ij = ((const Matrix2x2 *)rhs)->get(i, j);
+            ri0 += ((const Matrix2x2 *)lhs)->get(j, 0) * rhs_ij;
+            ri1 += ((const Matrix2x2 *)lhs)->get(j, 1) * rhs_ij;
+        }
+        set(i, 0, ri0);
+        set(i, 1, ri1);
+    }
+}
+
+void Matrix2x2::transpose() {
+    float temp = m[1];
+    m[1] = m[2];
+    m[2] = temp;
+}
+
diff --git a/rsMatrix2x2.h b/rsMatrix2x2.h
new file mode 100644
index 0000000..4dcb84a
--- /dev/null
+++ b/rsMatrix2x2.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_MATRIX_2x2_H
+#define ANDROID_RS_MATRIX_2x2_H
+
+#include "rsType.h"
+
+
+// ---------------------------------------------------------------------------
+namespace android {
+namespace renderscript {
+
+struct Matrix2x2 : public rs_matrix2x2 {
+    inline float get(uint32_t row, uint32_t col) const {
+        return m[row*2 + col];
+    }
+
+    inline void set(uint32_t row, uint32_t col, float v) {
+        m[row*2 + col] = v;
+    }
+
+    void loadIdentity();
+    void load(const float *);
+    void load(const rs_matrix2x2 *);
+
+    void loadMultiply(const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs);
+
+    void transpose();
+
+    void multiply(const rs_matrix2x2 *rhs) {
+        Matrix2x2 tmp;
+        tmp.loadMultiply(this, rhs);
+        load(&tmp);
+    }
+};
+
+}
+}
+
+
+
+
+#endif
+
+
+
+
+
diff --git a/rsMatrix3x3.cpp b/rsMatrix3x3.cpp
new file mode 100644
index 0000000..3f9a2d1
--- /dev/null
+++ b/rsMatrix3x3.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsMatrix2x2.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix4x4.h"
+
+#include "stdlib.h"
+#include "string.h"
+#include "math.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+void Matrix3x3::loadIdentity() {
+    m[0] = 1.f;
+    m[1] = 0.f;
+    m[2] = 0.f;
+    m[3] = 0.f;
+    m[4] = 1.f;
+    m[5] = 0.f;
+    m[6] = 0.f;
+    m[7] = 0.f;
+    m[8] = 1.f;
+}
+
+void Matrix3x3::load(const float *v) {
+    memcpy(m, v, sizeof(m));
+}
+
+void Matrix3x3::load(const rs_matrix3x3 *v) {
+    memcpy(m, v->m, sizeof(m));
+}
+
+void Matrix3x3::loadMultiply(const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs) {
+    for (int i=0 ; i<3 ; i++) {
+        float ri0 = 0;
+        float ri1 = 0;
+        float ri2 = 0;
+        for (int j=0 ; j<3 ; j++) {
+            const float rhs_ij = ((const Matrix3x3 *)rhs)->get(i, j);
+            ri0 += ((const Matrix3x3 *)lhs)->get(j, 0) * rhs_ij;
+            ri1 += ((const Matrix3x3 *)lhs)->get(j, 1) * rhs_ij;
+            ri2 += ((const Matrix3x3 *)lhs)->get(j, 2) * rhs_ij;
+        }
+        set(i, 0, ri0);
+        set(i, 1, ri1);
+        set(i, 2, ri2);
+    }
+}
+
+void Matrix3x3::transpose() {
+    int i, j;
+    float temp;
+    for (i = 0; i < 2; ++i) {
+        for (j = i + 1; j < 3; ++j) {
+            temp = get(i, j);
+            set(i, j, get(j, i));
+            set(j, i, temp);
+        }
+    }
+}
+
diff --git a/rsMatrix3x3.h b/rsMatrix3x3.h
new file mode 100644
index 0000000..f96d270
--- /dev/null
+++ b/rsMatrix3x3.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_MATRIX_3x3_H
+#define ANDROID_RS_MATRIX_3x3_H
+
+#include "rsType.h"
+
+
+// ---------------------------------------------------------------------------
+namespace android {
+namespace renderscript {
+
+struct Matrix3x3 : public rs_matrix3x3 {
+    inline float get(uint32_t row, uint32_t col) const {
+        return m[row*3 + col];
+    }
+
+    inline void set(uint32_t row, uint32_t col, float v) {
+        m[row*3 + col] = v;
+    }
+
+    void loadIdentity();
+    void load(const float *);
+    void load(const rs_matrix3x3 *);
+
+    void loadMultiply(const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs);
+
+    void transpose();
+
+    void multiply(const rs_matrix3x3 *rhs) {
+        Matrix3x3 tmp;
+        tmp.loadMultiply(this, rhs);
+        load(&tmp);
+    }
+};
+
+}
+}
+
+
+
+
+#endif
+
+
+
+
+
diff --git a/rsMatrix4x4.cpp b/rsMatrix4x4.cpp
new file mode 100644
index 0000000..2d90a98
--- /dev/null
+++ b/rsMatrix4x4.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsMatrix2x2.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix4x4.h"
+
+#include "stdlib.h"
+#include "string.h"
+#include "math.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+//////////////////////////////////////////////////////////////////////////////
+// Heavy math functions
+//////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+// Returns true if the matrix was successfully inversed
+bool Matrix4x4::inverse() {
+    rs_matrix4x4 result;
+
+    int i, j;
+    for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) {
+            // computeCofactor for int i, int j
+            int c0 = (i+1) % 4;
+            int c1 = (i+2) % 4;
+            int c2 = (i+3) % 4;
+            int r0 = (j+1) % 4;
+            int r1 = (j+2) % 4;
+            int r2 = (j+3) % 4;
+
+            float minor =
+                (m[c0 + 4*r0] * (m[c1 + 4*r1] * m[c2 + 4*r2] - m[c1 + 4*r2] * m[c2 + 4*r1]))
+                - (m[c0 + 4*r1] * (m[c1 + 4*r0] * m[c2 + 4*r2] - m[c1 + 4*r2] * m[c2 + 4*r0]))
+                + (m[c0 + 4*r2] * (m[c1 + 4*r0] * m[c2 + 4*r1] - m[c1 + 4*r1] * m[c2 + 4*r0]));
+
+            float cofactor = (i+j) & 1 ? -minor : minor;
+
+            result.m[4*i + j] = cofactor;
+        }
+    }
+
+    // Dot product of 0th column of source and 0th row of result
+    float det = m[0]*result.m[0] + m[4]*result.m[1] +
+                 m[8]*result.m[2] + m[12]*result.m[3];
+
+    if (fabs(det) < 1e-6) {
+        return false;
+    }
+
+    det = 1.0f / det;
+    for (i = 0; i < 16; ++i) {
+        m[i] = result.m[i] * det;
+    }
+
+    return true;
+}
+
+// Returns true if the matrix was successfully inversed
+bool Matrix4x4::inverseTranspose() {
+    rs_matrix4x4 result;
+
+    int i, j;
+    for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) {
+            // computeCofactor for int i, int j
+            int c0 = (i+1) % 4;
+            int c1 = (i+2) % 4;
+            int c2 = (i+3) % 4;
+            int r0 = (j+1) % 4;
+            int r1 = (j+2) % 4;
+            int r2 = (j+3) % 4;
+
+            float minor = (m[c0 + 4*r0] * (m[c1 + 4*r1] * m[c2 + 4*r2] - m[c1 + 4*r2] * m[c2 + 4*r1]))
+                         - (m[c0 + 4*r1] * (m[c1 + 4*r0] * m[c2 + 4*r2] - m[c1 + 4*r2] * m[c2 + 4*r0]))
+                         + (m[c0 + 4*r2] * (m[c1 + 4*r0] * m[c2 + 4*r1] - m[c1 + 4*r1] * m[c2 + 4*r0]));
+
+            float cofactor = (i+j) & 1 ? -minor : minor;
+
+            result.m[4*j + i] = cofactor;
+        }
+    }
+
+    // Dot product of 0th column of source and 0th column of result
+    float det = m[0]*result.m[0] + m[4]*result.m[4] +
+                 m[8]*result.m[8] + m[12]*result.m[12];
+
+    if (fabs(det) < 1e-6) {
+        return false;
+    }
+
+    det = 1.0f / det;
+    for (i = 0; i < 16; ++i) {
+        m[i] = result.m[i] * det;
+    }
+
+    return true;
+}
+
+void Matrix4x4::transpose() {
+    int i, j;
+    float temp;
+    for (i = 0; i < 3; ++i) {
+        for (j = i + 1; j < 4; ++j) {
+            temp = m[i*4 + j];
+            m[i*4 + j] = m[j*4 + i];
+            m[j*4 + i] = temp;
+        }
+    }
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////
+
+void Matrix4x4::loadIdentity() {
+    m[0] = 1.f;
+    m[1] = 0.f;
+    m[2] = 0.f;
+    m[3] = 0.f;
+    m[4] = 0.f;
+    m[5] = 1.f;
+    m[6] = 0.f;
+    m[7] = 0.f;
+    m[8] = 0.f;
+    m[9] = 0.f;
+    m[10] = 1.f;
+    m[11] = 0.f;
+    m[12] = 0.f;
+    m[13] = 0.f;
+    m[14] = 0.f;
+    m[15] = 1.f;
+}
+
+void Matrix4x4::load(const float *v) {
+    memcpy(m, v, sizeof(m));
+}
+
+void Matrix4x4::load(const rs_matrix4x4 *v) {
+    memcpy(m, v->m, sizeof(m));
+}
+
+void Matrix4x4::load(const rs_matrix3x3 *v) {
+    m[0] = v->m[0];
+    m[1] = v->m[1];
+    m[2] = v->m[2];
+    m[3] = 0.f;
+    m[4] = v->m[3];
+    m[5] = v->m[4];
+    m[6] = v->m[5];
+    m[7] = 0.f;
+    m[8] = v->m[6];
+    m[9] = v->m[7];
+    m[10] = v->m[8];
+    m[11] = 0.f;
+    m[12] = 0.f;
+    m[13] = 0.f;
+    m[14] = 0.f;
+    m[15] = 1.f;
+}
+
+void Matrix4x4::load(const rs_matrix2x2 *v) {
+    m[0] = v->m[0];
+    m[1] = v->m[1];
+    m[2] = 0.f;
+    m[3] = 0.f;
+    m[4] = v->m[2];
+    m[5] = v->m[3];
+    m[6] = 0.f;
+    m[7] = 0.f;
+    m[8] = 0.f;
+    m[9] = 0.f;
+    m[10] = 1.f;
+    m[11] = 0.f;
+    m[12] = 0.f;
+    m[13] = 0.f;
+    m[14] = 0.f;
+    m[15] = 1.f;
+}
+
+
+void Matrix4x4::loadRotate(float rot, float x, float y, float z) {
+    float c, s;
+    m[3] = 0;
+    m[7] = 0;
+    m[11]= 0;
+    m[12]= 0;
+    m[13]= 0;
+    m[14]= 0;
+    m[15]= 1;
+    rot *= float(M_PI / 180.0f);
+    c = cosf(rot);
+    s = sinf(rot);
+
+    const float len = x*x + y*y + z*z;
+    if (len != 1) {
+        const float recipLen = 1.f / sqrtf(len);
+        x *= recipLen;
+        y *= recipLen;
+        z *= recipLen;
+    }
+    const float nc = 1.0f - c;
+    const float xy = x * y;
+    const float yz = y * z;
+    const float zx = z * x;
+    const float xs = x * s;
+    const float ys = y * s;
+    const float zs = z * s;
+    m[ 0] = x*x*nc +  c;
+    m[ 4] =  xy*nc - zs;
+    m[ 8] =  zx*nc + ys;
+    m[ 1] =  xy*nc + zs;
+    m[ 5] = y*y*nc +  c;
+    m[ 9] =  yz*nc - xs;
+    m[ 2] =  zx*nc - ys;
+    m[ 6] =  yz*nc + xs;
+    m[10] = z*z*nc +  c;
+}
+
+void Matrix4x4::loadScale(float x, float y, float z) {
+    loadIdentity();
+    set(0, 0, x);
+    set(1, 1, y);
+    set(2, 2, z);
+}
+
+void Matrix4x4::loadTranslate(float x, float y, float z) {
+    loadIdentity();
+    m[12] = x;
+    m[13] = y;
+    m[14] = z;
+}
+
+void Matrix4x4::loadMultiply(const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs) {
+    for (int i=0 ; i<4 ; i++) {
+        float ri0 = 0;
+        float ri1 = 0;
+        float ri2 = 0;
+        float ri3 = 0;
+        for (int j=0 ; j<4 ; j++) {
+            const float rhs_ij = ((const Matrix4x4 *)rhs)->get(i,j);
+            ri0 += ((const Matrix4x4 *)lhs)->get(j,0) * rhs_ij;
+            ri1 += ((const Matrix4x4 *)lhs)->get(j,1) * rhs_ij;
+            ri2 += ((const Matrix4x4 *)lhs)->get(j,2) * rhs_ij;
+            ri3 += ((const Matrix4x4 *)lhs)->get(j,3) * rhs_ij;
+        }
+        set(i,0, ri0);
+        set(i,1, ri1);
+        set(i,2, ri2);
+        set(i,3, ri3);
+    }
+}
+
+void Matrix4x4::loadOrtho(float left, float right, float bottom, float top, float near, float far) {
+    loadIdentity();
+    m[0] = 2.f / (right - left);
+    m[5] = 2.f / (top - bottom);
+    m[10]= -2.f / (far - near);
+    m[12]= -(right + left) / (right - left);
+    m[13]= -(top + bottom) / (top - bottom);
+    m[14]= -(far + near) / (far - near);
+}
+
+void Matrix4x4::loadFrustum(float left, float right, float bottom, float top, float near, float far) {
+    loadIdentity();
+    m[0] = 2.f * near / (right - left);
+    m[5] = 2.f * near / (top - bottom);
+    m[8] = (right + left) / (right - left);
+    m[9] = (top + bottom) / (top - bottom);
+    m[10]= -(far + near) / (far - near);
+    m[11]= -1.f;
+    m[14]= -2.f * far * near / (far - near);
+    m[15]= 0.f;
+}
+
+void Matrix4x4::loadPerspective(float fovy, float aspect, float near, float far) {
+    float top = near * tan((float) (fovy * M_PI / 360.0f));
+    float bottom = -top;
+    float left = bottom * aspect;
+    float right = top * aspect;
+    loadFrustum(left, right, bottom, top, near, far);
+}
+
+void Matrix4x4::vectorMultiply(float *out, const float *in) const {
+    out[0] = (m[0] * in[0]) + (m[4] * in[1]) + (m[8] * in[2]) + m[12];
+    out[1] = (m[1] * in[0]) + (m[5] * in[1]) + (m[9] * in[2]) + m[13];
+    out[2] = (m[2] * in[0]) + (m[6] * in[1]) + (m[10] * in[2]) + m[14];
+    out[3] = (m[3] * in[0]) + (m[7] * in[1]) + (m[11] * in[2]) + m[15];
+}
diff --git a/rsMatrix.h b/rsMatrix4x4.h
similarity index 68%
rename from rsMatrix.h
rename to rsMatrix4x4.h
index 4130b8e..abf34a3 100644
--- a/rsMatrix.h
+++ b/rsMatrix4x4.h
@@ -14,57 +14,65 @@
  * limitations under the License.
  */
 
-#ifndef ANDROID_RS_MATRIX_H
-#define ANDROID_RS_MATRIX_H
+#ifndef ANDROID_RS_MATRIX_4x4_H
+#define ANDROID_RS_MATRIX_4x4_H
 
+#include "rsType.h"
 
 
 // ---------------------------------------------------------------------------
 namespace android {
 namespace renderscript {
 
-struct Matrix {
-    float m[16];
-
-    inline float get(int i, int j) const {
-        return m[i*4 + j];
+struct Matrix4x4 : public rs_matrix4x4 {
+    float get(uint32_t row, uint32_t col) const {
+        return m[row*4 + col];
     }
 
-    inline void set(int i, int j, float v) {
-        m[i*4 + j] = v;
+    void set(uint32_t row, uint32_t col, float v) {
+        m[row*4 + col] = v;
     }
 
     void loadIdentity();
     void load(const float *);
-    void load(const Matrix *);
+    void load(const rs_matrix4x4 *);
+    void load(const rs_matrix3x3 *);
+    void load(const rs_matrix2x2 *);
 
     void loadRotate(float rot, float x, float y, float z);
     void loadScale(float x, float y, float z);
     void loadTranslate(float x, float y, float z);
-    void loadMultiply(const Matrix *lhs, const Matrix *rhs);
+    void loadMultiply(const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs);
 
     void loadOrtho(float l, float r, float b, float t, float n, float f);
     void loadFrustum(float l, float r, float b, float t, float n, float f);
+    void loadPerspective(float fovy, float aspect, float near, float far);
 
     void vectorMultiply(float *v4out, const float *v3in) const;
 
-    void multiply(const Matrix *rhs) {
-        Matrix tmp;
+    bool inverse();
+    bool inverseTranspose();
+    void transpose();
+
+
+
+    void multiply(const rs_matrix4x4 *rhs) {
+        Matrix4x4 tmp;
         tmp.loadMultiply(this, rhs);
         load(&tmp);
     }
     void rotate(float rot, float x, float y, float z) {
-        Matrix tmp;
+        Matrix4x4 tmp;
         tmp.loadRotate(rot, x, y, z);
         multiply(&tmp);
     }
     void scale(float x, float y, float z) {
-        Matrix tmp;
+        Matrix4x4 tmp;
         tmp.loadScale(x, y, z);
         multiply(&tmp);
     }
     void translate(float x, float y, float z) {
-        Matrix tmp;
+        Matrix4x4 tmp;
         tmp.loadTranslate(x, y, z);
         multiply(&tmp);
     }
diff --git a/rsProgramVertex.cpp b/rsProgramVertex.cpp
index 403c2a6..e407d3a 100644
--- a/rsProgramVertex.cpp
+++ b/rsProgramVertex.cpp
@@ -96,9 +96,9 @@
             return;
         }
         float *f = static_cast<float *>(mConstants[0]->getPtr());
-        Matrix mvp;
+        Matrix4x4 mvp;
         mvp.load(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
-        Matrix t;
+        Matrix4x4 t;
         t.load(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET]);
         mvp.multiply(&t);
         for (uint32_t i = 0; i < 16; i ++) {
@@ -181,9 +181,9 @@
         return;
     }
     float *f = static_cast<float *>(mConstants[0]->getPtr());
-    Matrix mvp;
-    mvp.loadMultiply((Matrix *)&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET],
-                     (Matrix *)&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
+    Matrix4x4 mvp;
+    mvp.loadMultiply((Matrix4x4 *)&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET],
+                     (Matrix4x4 *)&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
     mvp.vectorMultiply(v4out, v3in);
 }
 
@@ -269,7 +269,7 @@
 void ProgramVertexState::updateSize(Context *rsc) {
     float *f = static_cast<float *>(mDefaultAlloc->getPtr());
 
-    Matrix m;
+    Matrix4x4 m;
     m.loadOrtho(0,rsc->getWidth(), rsc->getHeight(),0, -1,1);
     memcpy(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET], m.m, sizeof(m));
     memcpy(&f[RS_PROGRAM_VERTEX_MVP_OFFSET], m.m, sizeof(m));
diff --git a/rsRuntime.h b/rsRuntime.h
new file mode 100644
index 0000000..884f7b6
--- /dev/null
+++ b/rsRuntime.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsContext.h"
+#include "rsScriptC.h"
+
+#include "utils/Timers.h"
+
+#include <time.h>
+
+namespace android {
+namespace renderscript {
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Context
+//////////////////////////////////////////////////////////////////////////////
+
+void rsrBindTexture(Context *, Script *, ProgramFragment *, uint32_t slot, Allocation *);
+void rsrBindSampler(Context *, Script *, ProgramFragment *, uint32_t slot, Sampler *);
+void rsrBindProgramStore(Context *, Script *, ProgramStore *);
+void rsrBindProgramFragment(Context *, Script *, ProgramFragment *);
+void rsrBindProgramVertex(Context *, Script *, ProgramVertex *);
+void rsrBindProgramRaster(Context *, Script *, ProgramRaster *);
+void rsrBindFrameBufferObjectColorTarget(Context *, Script *, Allocation *, uint32_t slot);
+void rsrBindFrameBufferObjectDepthTarget(Context *, Script *, Allocation *);
+void rsrClearFrameBufferObjectColorTarget(Context *, Script *, uint32_t slot);
+void rsrClearFrameBufferObjectDepthTarget(Context *, Script *);
+void rsrClearFrameBufferObjectTargets(Context *, Script *);
+
+//////////////////////////////////////////////////////////////////////////////
+// VP
+//////////////////////////////////////////////////////////////////////////////
+
+void rsrVpLoadProjectionMatrix(Context *, Script *, const rsc_Matrix *m);
+void rsrVpLoadModelMatrix(Context *, Script *, const rsc_Matrix *m);
+void rsrVpLoadTextureMatrix(Context *, Script *, const rsc_Matrix *m);
+void rsrPfConstantColor(Context *, Script *, ProgramFragment *, float r, float g, float b, float a);
+void rsrVpGetProjectionMatrix(Context *, Script *, rsc_Matrix *m);
+
+//////////////////////////////////////////////////////////////////////////////
+// Drawing
+//////////////////////////////////////////////////////////////////////////////
+
+void rsrDrawQuadTexCoords(Context *, Script *,
+                          float x1, float y1, float z1, float u1, float v1,
+                          float x2, float y2, float z2, float u2, float v2,
+                          float x3, float y3, float z3, float u3, float v3,
+                          float x4, float y4, float z4, float u4, float v4);
+void rsrDrawQuad(Context *, Script *,
+                 float x1, float y1, float z1,
+                 float x2, float y2, float z2,
+                 float x3, float y3, float z3,
+                 float x4, float y4, float z4);
+void rsrDrawSpriteScreenspace(Context *, Script *,
+                              float x, float y, float z, float w, float h);
+void rsrDrawRect(Context *, Script *, float x1, float y1, float x2, float y2, float z);
+void rsrDrawMesh(Context *, Script *, Mesh *);
+void rsrDrawMeshPrimitive(Context *, Script *, Mesh *, uint32_t primIndex);
+void rsrDrawMeshPrimitiveRange(Context *, Script *, Mesh *,
+                               uint32_t primIndex, uint32_t start, uint32_t len);
+void rsrMeshComputeBoundingBox(Context *, Script *, Mesh *,
+                               float *minX, float *minY, float *minZ,
+                               float *maxX, float *maxY, float *maxZ);
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+void rsrColor(Context *, Script *, float r, float g, float b, float a);
+void rsrFinish(Context *, Script *);
+void rsrAllocationSyncAll(Context *, Script *, Allocation *);
+void rsrClearColor(Context *, Script *, float r, float g, float b, float a);
+void rsrClearDepth(Context *, Script *, float v);
+uint32_t rsrGetWidth(Context *, Script *);
+uint32_t rsrGetHeight(Context *, Script *);
+void rsrDrawTextAlloc(Context *, Script *, Allocation *, int x, int y);
+void rsrDrawText(Context *, Script *, const char *text, int x, int y);
+void rsrSetMetrics(Context *, Script *, Font::Rect *metrics,
+                   int32_t *left, int32_t *right, int32_t *top, int32_t *bottom);
+void rsrMeasureTextAlloc(Context *, Script *, Allocation *,
+                         int32_t *left, int32_t *right, int32_t *top, int32_t *bottom);
+void rsrMeasureText(Context *, Script *, const char *text,
+                    int32_t *left, int32_t *right, int32_t *top, int32_t *bottom);
+void rsrBindFont(Context *, Script *, Font *);
+void rsrFontColor(Context *, Script *, float r, float g, float b, float a);
+
+//////////////////////////////////////////////////////////////////////////////
+// Time routines
+//////////////////////////////////////////////////////////////////////////////
+
+float rsrGetDt(Context *, Script *);
+time_t rsrTime(Context *, Script *, time_t *timer);
+tm* rsrLocalTime(Context *, Script *, tm *local, time_t *timer);
+int64_t rsrUptimeMillis(Context *, Script *);
+int64_t rsrUptimeNanos(Context *, Script *);
+
+//////////////////////////////////////////////////////////////////////////////
+// Message routines
+//////////////////////////////////////////////////////////////////////////////
+
+uint32_t rsrToClient(Context *, Script *, int cmdID, void *data, int len);
+uint32_t rsrToClientBlocking(Context *, Script *, int cmdID, void *data, int len);
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//////////////////////////////////////////////////////////////////////////////
+
+void rsrSetObject(const Context *, const Script *, ObjectBase **dst, ObjectBase * src);
+void rsrClearObject(const Context *, const Script *, ObjectBase **dst);
+bool rsrIsObject(const Context *, const Script *, const ObjectBase *src);
+
+uint32_t rsrToClient(Context *, Script *, int cmdID, void *data, int len);
+uint32_t rsrToClientBlocking(Context *, Script *, int cmdID, void *data, int len);
+const Allocation * rsrGetAllocation(Context *, Script *, const void *ptr);
+
+void rsrAllocationMarkDirty(Context *, Script *, RsAllocation a);
+void rsrAllocationSyncAll(Context *, Script *, Allocation *a, RsAllocationUsageType source);
+
+
+void rsrForEach(Context *, Script *, Script *target,
+                Allocation *in,
+                Allocation *out,
+                const void *usr,
+                 uint32_t usrBytes,
+                const RsScriptCall *call);
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Heavy math functions
+//////////////////////////////////////////////////////////////////////////////
+
+
+void rsrMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v);
+float rsrMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col);
+void rsrMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v);
+float rsrMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col);
+void rsrMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v);
+float rsrMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col);
+void rsrMatrixLoadIdentity_4x4(rs_matrix4x4 *m);
+void rsrMatrixLoadIdentity_3x3(rs_matrix3x3 *m);
+void rsrMatrixLoadIdentity_2x2(rs_matrix2x2 *m);
+void rsrMatrixLoad_4x4_f(rs_matrix4x4 *m, const float *v);
+void rsrMatrixLoad_3x3_f(rs_matrix3x3 *m, const float *v);
+void rsrMatrixLoad_2x2_f(rs_matrix2x2 *m, const float *v);
+void rsrMatrixLoad_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *v);
+void rsrMatrixLoad_4x4_3x3(rs_matrix4x4 *m, const rs_matrix3x3 *v);
+void rsrMatrixLoad_4x4_2x2(rs_matrix4x4 *m, const rs_matrix2x2 *v);
+void rsrMatrixLoad_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *v);
+void rsrMatrixLoad_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *v);
+void rsrMatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
+void rsrMatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z);
+void rsrMatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z);
+void rsrMatrixLoadMultiply_4x4_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *lhs,
+                                       const rs_matrix4x4 *rhs);
+void rsrMatrixMultiply_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *rhs);
+void rsrMatrixLoadMultiply_3x3_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *lhs,
+                                       const rs_matrix3x3 *rhs);
+void rsrMatrixMultiply_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *rhs);
+void rsrMatrixLoadMultiply_2x2_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *lhs,
+                                       const rs_matrix2x2 *rhs);
+void rsrMatrixMultiply_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *rhs);
+void rsrMatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z);
+void rsrMatrixScale(rs_matrix4x4 *m, float x, float y, float z);
+void rsrMatrixTranslate(rs_matrix4x4 *m, float x, float y, float z);
+void rsrMatrixLoadOrtho(rs_matrix4x4 *m, float left, float right,
+                        float bottom, float top, float near, float far);
+void rsrMatrixLoadFrustum(rs_matrix4x4 *m, float left, float right,
+                          float bottom, float top, float near, float far);
+void rsrMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far);
+
+// Returns true if the matrix was successfully inversed
+bool rsrMatrixInverse_4x4(rs_matrix4x4 *m);
+// Returns true if the matrix was successfully inversed
+bool rsrMatrixInverseTranspose_4x4(rs_matrix4x4 *m);
+
+void rsrMatrixTranspose_4x4(rs_matrix4x4 *m);
+void rsrMatrixTranspose_3x3(rs_matrix3x3 *m);
+void rsrMatrixTranspose_2x2(rs_matrix2x2 *m);
+
+}
+}
diff --git a/rsScript.h b/rsScript.h
index 671fbe6..088c8d1 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -34,25 +34,6 @@
     struct Hal {
         void * drv;
 
-        struct State {
-            ObjectBaseRef<const Type> type;
-            void * mallocPtr;
-
-            uint32_t usageFlags;
-            RsAllocationMipmapControl mipmapControl;
-
-            // Cached fields from the Type and Element
-            // to prevent pointer chasing in critical loops.
-            uint32_t dimensionX;
-            uint32_t dimensionY;
-            uint32_t dimensionZ;
-            uint32_t elementSizeBytes;
-            bool hasMipmaps;
-            bool hasFaces;
-            bool hasReferences;
-        };
-        State state;
-
         struct DriverInfo {
             int mVersionMajor;
             int mVersionMinor;
@@ -96,6 +77,7 @@
                             const Allocation * ain,
                             Allocation * aout,
                             const void * usr,
+                            size_t usrBytes,
                             const RsScriptCall *sc = NULL) = 0;
 
     virtual void Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len) = 0;
diff --git a/rsScriptC.cpp b/rsScriptC.cpp
index 8e95891..c379b8b 100644
--- a/rsScriptC.cpp
+++ b/rsScriptC.cpp
@@ -16,7 +16,6 @@
 
 #include "rsContext.h"
 #include "rsScriptC.h"
-#include "rsMatrix.h"
 #include "utils/Timers.h"
 #include "utils/StopWatch.h"
 
@@ -120,13 +119,14 @@
                          const Allocation * ain,
                          Allocation * aout,
                          const void * usr,
+                         size_t usrBytes,
                          const RsScriptCall *sc) {
 
     Context::PushState ps(rsc);
 
     setupGLState(rsc);
     setupScript(rsc);
-    rsc->mHal.funcs.script.invokeForEach(rsc, this, ain, aout, usr, 0, sc);
+    rsc->mHal.funcs.script.invokeForEach(rsc, this, ain, aout, usr, usrBytes, sc);
 }
 
 void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len) {
@@ -148,6 +148,7 @@
 ScriptCState::~ScriptCState() {
 }
 
+/*
 static void* symbolLookup(void* pContext, char const* name) {
     const ScriptCState::SymbolTable_t *sym;
     ScriptC *s = (ScriptC *)pContext;
@@ -171,6 +172,7 @@
     LOGE("ScriptC sym lookup failed for %s", name);
     return NULL;
 }
+*/
 
 #if 0
 extern const char rs_runtime_lib_bc[];
@@ -185,7 +187,7 @@
 
     //LOGE("runCompiler %p %p %p %p %p %i", rsc, this, resName, cacheDir, bitcode, bitcodeLen);
 
-    rsc->mHal.funcs.script.init(rsc, this, resName, cacheDir, bitcode, bitcodeLen, 0, symbolLookup);
+    rsc->mHal.funcs.script.init(rsc, this, resName, cacheDir, bitcode, bitcodeLen, 0);
 
     mEnviroment.mFragment.set(rsc->getDefaultProgramFragment());
     mEnviroment.mVertex.set(rsc->getDefaultProgramVertex());
diff --git a/rsScriptC.h b/rsScriptC.h
index 2edeb9b..4c85745 100644
--- a/rsScriptC.h
+++ b/rsScriptC.h
@@ -47,6 +47,7 @@
                             const Allocation * ain,
                             Allocation * aout,
                             const void * usr,
+                            size_t usrBytes,
                             const RsScriptCall *sc = NULL);
 
     virtual void serialize(OStream *stream) const {    }
diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp
index 4e8cbdc..4ee0a3e 100644
--- a/rsScriptC_Lib.cpp
+++ b/rsScriptC_Lib.cpp
@@ -16,7 +16,9 @@
 
 #include "rsContext.h"
 #include "rsScriptC.h"
-#include "rsMatrix.h"
+#include "rsMatrix4x4.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix2x2.h"
 
 #include "utils/Timers.h"
 
@@ -25,10 +27,9 @@
 using namespace android;
 using namespace android::renderscript;
 
-#define GET_TLS()  ScriptTLSStruct * tls = \
-    (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
-    Context * rsc = tls->mContext; \
-    ScriptC * sc = (ScriptC *) tls->mScript
+
+namespace android {
+namespace renderscript {
 
 
 //////////////////////////////////////////////////////////////////////////////
@@ -73,42 +74,15 @@
 }
 #endif
 
-static float SC_randf(float max) {
-    float r = (float)rand();
-    r *= max;
-    return r / RAND_MAX;
-}
-
-static float SC_randf2(float min, float max) {
-    float r = (float)rand();
-    r = r * (max - min) + min;
-    return r / RAND_MAX;
-}
-
-static int SC_randi(int max) {
-    return (int)SC_randf(max);
-}
-
-static int SC_randi2(int min, int max) {
-    return (int)SC_randf2(min, max);
-}
-
-static float SC_frac(float v) {
-    int i = (int)floor(v);
-    return fmin(v - i, 0x1.fffffep-1f);
-}
-
 //////////////////////////////////////////////////////////////////////////////
 // Time routines
 //////////////////////////////////////////////////////////////////////////////
 
-static time_t SC_time(time_t *timer) {
-    GET_TLS();
+time_t rsrTime(Context *rsc, Script *sc, time_t *timer) {
     return time(timer);
 }
 
-static tm* SC_localtime(tm *local, time_t *timer) {
-    GET_TLS();
+tm* rsrLocalTime(Context *rsc, Script *sc, tm *local, time_t *timer) {
     if (!local) {
       return NULL;
     }
@@ -122,16 +96,15 @@
     return local;
 }
 
-static int64_t SC_uptimeMillis() {
+int64_t rsrUptimeMillis(Context *rsc, Script *sc) {
     return nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC));
 }
 
-static int64_t SC_uptimeNanos() {
+int64_t rsrUptimeNanos(Context *rsc, Script *sc) {
     return systemTime(SYSTEM_TIME_MONOTONIC);
 }
 
-static float SC_getDt() {
-    GET_TLS();
+float rsrGetDt(Context *rsc, Script *sc) {
     int64_t l = sc->mEnviroment.mLastDtTime;
     sc->mEnviroment.mLastDtTime = systemTime(SYSTEM_TIME_MONOTONIC);
     return ((float)(sc->mEnviroment.mLastDtTime - l)) / 1.0e9;
@@ -141,51 +114,7 @@
 //
 //////////////////////////////////////////////////////////////////////////////
 
-static uint32_t SC_allocGetDimX(Allocation *a) {
-    CHECK_OBJ(a);
-    return a->mHal.state.dimensionX;
-}
-
-static uint32_t SC_allocGetDimY(Allocation *a) {
-    CHECK_OBJ(a);
-    return a->mHal.state.dimensionY;
-}
-
-static uint32_t SC_allocGetDimZ(Allocation *a) {
-    CHECK_OBJ(a);
-    return a->mHal.state.dimensionZ;
-}
-
-static uint32_t SC_allocGetDimLOD(Allocation *a) {
-    CHECK_OBJ(a);
-    return a->mHal.state.hasMipmaps;
-}
-
-static uint32_t SC_allocGetDimFaces(Allocation *a) {
-    CHECK_OBJ(a);
-    return a->mHal.state.hasFaces;
-}
-
-static const void * SC_getElementAtX(Allocation *a, uint32_t x) {
-    CHECK_OBJ(a);
-    const uint8_t *p = (const uint8_t *)a->getPtr();
-    return &p[a->mHal.state.elementSizeBytes * x];
-}
-
-static const void * SC_getElementAtXY(Allocation *a, uint32_t x, uint32_t y) {
-    CHECK_OBJ(a);
-    const uint8_t *p = (const uint8_t *)a->getPtr();
-    return &p[a->mHal.state.elementSizeBytes * (x + y * a->mHal.state.dimensionX)];
-}
-
-static const void * SC_getElementAtXYZ(Allocation *a, uint32_t x, uint32_t y, uint32_t z) {
-    CHECK_OBJ(a);
-    const uint8_t *p = (const uint8_t *)a->getPtr();
-    return &p[a->mHal.state.elementSizeBytes * (x + y * a->mHal.state.dimensionX +
-              z * a->mHal.state.dimensionX * a->mHal.state.dimensionY)];
-}
-
-void android::renderscript::rsiSetObject(ObjectBase **dst, ObjectBase * src) {
+void rsrSetObject(const Context *rsc, const Script *sc, ObjectBase **dst, ObjectBase * src) {
     //LOGE("rsiSetObject  %p,%p  %p", vdst, *vdst, vsrc);
     if (src) {
         CHECK_OBJ(src);
@@ -198,7 +127,7 @@
     *dst = src;
 }
 
-void android::renderscript::rsiClearObject(ObjectBase **dst) {
+void rsrClearObject(const Context *rsc, const Script *sc, ObjectBase **dst) {
     //LOGE("rsiClearObject  %p,%p", vdst, *vdst);
     if (dst[0]) {
         CHECK_OBJ(dst[0]);
@@ -207,802 +136,39 @@
     *dst = NULL;
 }
 
-bool android::renderscript::rsiIsObject(const ObjectBase *src) {
+bool rsrIsObject(const Context *rsc, const Script *sc, const ObjectBase *src) {
     return src != NULL;
 }
 
-static void SC_debugF(const char *s, float f) {
-    LOGD("%s %f, 0x%08x", s, f, *((int *) (&f)));
-}
-static void SC_debugFv2(const char *s, float f1, float f2) {
-    LOGD("%s {%f, %f}", s, f1, f2);
-}
-static void SC_debugFv3(const char *s, float f1, float f2, float f3) {
-    LOGD("%s {%f, %f, %f}", s, f1, f2, f3);
-}
-static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) {
-    LOGD("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4);
-}
-static void SC_debugD(const char *s, double d) {
-    LOGD("%s %f, 0x%08llx", s, d, *((long long *) (&d)));
-}
-static void SC_debugFM4v4(const char *s, const float *f) {
-    LOGD("%s {%f, %f, %f, %f", s, f[0], f[4], f[8], f[12]);
-    LOGD("%s  %f, %f, %f, %f", s, f[1], f[5], f[9], f[13]);
-    LOGD("%s  %f, %f, %f, %f", s, f[2], f[6], f[10], f[14]);
-    LOGD("%s  %f, %f, %f, %f}", s, f[3], f[7], f[11], f[15]);
-}
-static void SC_debugFM3v3(const char *s, const float *f) {
-    LOGD("%s {%f, %f, %f", s, f[0], f[3], f[6]);
-    LOGD("%s  %f, %f, %f", s, f[1], f[4], f[7]);
-    LOGD("%s  %f, %f, %f}",s, f[2], f[5], f[8]);
-}
-static void SC_debugFM2v2(const char *s, const float *f) {
-    LOGD("%s {%f, %f", s, f[0], f[2]);
-    LOGD("%s  %f, %f}",s, f[1], f[3]);
-}
 
-static void SC_debugI32(const char *s, int32_t i) {
-    LOGD("%s %i  0x%x", s, i, i);
-}
-static void SC_debugU32(const char *s, uint32_t i) {
-    LOGD("%s %u  0x%x", s, i, i);
-}
-static void SC_debugLL64(const char *s, long long ll) {
-    LOGD("%s %lld  0x%llx", s, ll, ll);
-}
-static void SC_debugULL64(const char *s, unsigned long long ll) {
-    LOGD("%s %llu  0x%llx", s, ll, ll);
-}
-
-static void SC_debugP(const char *s, const void *p) {
-    LOGD("%s %p", s, p);
-}
-
-static uint32_t SC_toClient2(int cmdID, void *data, int len) {
-    GET_TLS();
+uint32_t rsrToClient(Context *rsc, Script *sc, int cmdID, void *data, int len) {
     //LOGE("SC_toClient %i %i %i", cmdID, len);
     return rsc->sendMessageToClient(data, RS_MESSAGE_TO_CLIENT_USER, cmdID, len, false);
 }
 
-static uint32_t SC_toClient(int cmdID) {
-    GET_TLS();
-    //LOGE("SC_toClient %i", cmdID);
-    return rsc->sendMessageToClient(NULL, RS_MESSAGE_TO_CLIENT_USER, cmdID, 0, false);
-}
-
-static uint32_t SC_toClientBlocking2(int cmdID, void *data, int len) {
-    GET_TLS();
+uint32_t rsrToClientBlocking(Context *rsc, Script *sc, int cmdID, void *data, int len) {
     //LOGE("SC_toClientBlocking %i %i", cmdID, len);
     return rsc->sendMessageToClient(data, RS_MESSAGE_TO_CLIENT_USER, cmdID, len, true);
 }
 
-static uint32_t SC_toClientBlocking(int cmdID) {
-    GET_TLS();
-    //LOGE("SC_toClientBlocking %i", cmdID);
-    return rsc->sendMessageToClient(NULL, RS_MESSAGE_TO_CLIENT_USER, cmdID, 0, true);
-}
 
-int SC_divsi3(int a, int b) {
-    return a / b;
-}
-
-int SC_modsi3(int a, int b) {
-    return a % b;
-}
-
-unsigned int SC_udivsi3(unsigned int a, unsigned int b) {
-    return a / b;
-}
-
-unsigned int SC_umodsi3(unsigned int a, unsigned int b) {
-    return a % b;
-}
-
-int SC_getAllocation(const void *ptr) {
-    GET_TLS();
-    const Allocation *alloc = sc->ptrToAllocation(ptr);
-    return (int)alloc;
-}
-
-void SC_allocationMarkDirty(RsAllocation a) {
-    Allocation *alloc = static_cast<Allocation *>(a);
-    alloc->sendDirty();
-}
-
-void SC_ForEach(RsScript vs,
-                RsAllocation vin,
-                RsAllocation vout,
-                const void *usr) {
-    GET_TLS();
-    const Allocation *ain = static_cast<const Allocation *>(vin);
-    Allocation *aout = static_cast<Allocation *>(vout);
-    Script *s = static_cast<Script *>(vs);
-    s->runForEach(rsc, ain, aout, usr);
-}
-
-void SC_ForEach2(RsScript vs,
-                RsAllocation vin,
-                RsAllocation vout,
-                const void *usr,
+void rsrForEach(Context *rsc, Script *sc,
+                Script *target,
+                Allocation *in, Allocation *out,
+                const void *usr, uint32_t usrBytes,
                 const RsScriptCall *call) {
-    GET_TLS();
-    const Allocation *ain = static_cast<const Allocation *>(vin);
-    Allocation *aout = static_cast<Allocation *>(vout);
-    Script *s = static_cast<Script *>(vs);
-    s->runForEach(rsc, ain, aout, usr, call);
+    target->runForEach(rsc, in, out, usr, usrBytes, call);
 }
 
-
-//////////////////////////////////////////////////////////////////////////////
-// Heavy math functions
-//////////////////////////////////////////////////////////////////////////////
-
-typedef struct {
-    float m[16];
-} rs_matrix4x4;
-
-typedef struct {
-    float m[9];
-} rs_matrix3x3;
-
-typedef struct {
-    float m[4];
-} rs_matrix2x2;
-
-static inline void
-rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v) {
-    m->m[row * 4 + col] = v;
+void rsrAllocationSyncAll(Context *rsc, Script *sc, Allocation *a, RsAllocationUsageType usage) {
+    a->syncAll(rsc, usage);
 }
 
-static inline float
-rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col) {
-    return m->m[row * 4 + col];
+const Allocation * rsrGetAllocation(Context *rsc, Script *s, const void *ptr) {
+    ScriptC *sc = (ScriptC *)s;
+    return sc->ptrToAllocation(ptr);
 }
 
-static inline void
-rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v) {
-    m->m[row * 3 + col] = v;
 }
-
-static inline float
-rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col) {
-    return m->m[row * 3 + col];
-}
-
-static inline void
-rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v) {
-    m->m[row * 2 + col] = v;
-}
-
-static inline float
-rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col) {
-    return m->m[row * 2 + col];
-}
-
-
-static void SC_MatrixLoadIdentity_4x4(rs_matrix4x4 *m) {
-    m->m[0] = 1.f;
-    m->m[1] = 0.f;
-    m->m[2] = 0.f;
-    m->m[3] = 0.f;
-    m->m[4] = 0.f;
-    m->m[5] = 1.f;
-    m->m[6] = 0.f;
-    m->m[7] = 0.f;
-    m->m[8] = 0.f;
-    m->m[9] = 0.f;
-    m->m[10] = 1.f;
-    m->m[11] = 0.f;
-    m->m[12] = 0.f;
-    m->m[13] = 0.f;
-    m->m[14] = 0.f;
-    m->m[15] = 1.f;
-}
-
-static void SC_MatrixLoadIdentity_3x3(rs_matrix3x3 *m) {
-    m->m[0] = 1.f;
-    m->m[1] = 0.f;
-    m->m[2] = 0.f;
-    m->m[3] = 0.f;
-    m->m[4] = 1.f;
-    m->m[5] = 0.f;
-    m->m[6] = 0.f;
-    m->m[7] = 0.f;
-    m->m[8] = 1.f;
-}
-
-static void SC_MatrixLoadIdentity_2x2(rs_matrix2x2 *m) {
-    m->m[0] = 1.f;
-    m->m[1] = 0.f;
-    m->m[2] = 0.f;
-    m->m[3] = 1.f;
-}
-
-static void SC_MatrixLoad_4x4_f(rs_matrix4x4 *m, const float *v) {
-    m->m[0] = v[0];
-    m->m[1] = v[1];
-    m->m[2] = v[2];
-    m->m[3] = v[3];
-    m->m[4] = v[4];
-    m->m[5] = v[5];
-    m->m[6] = v[6];
-    m->m[7] = v[7];
-    m->m[8] = v[8];
-    m->m[9] = v[9];
-    m->m[10] = v[10];
-    m->m[11] = v[11];
-    m->m[12] = v[12];
-    m->m[13] = v[13];
-    m->m[14] = v[14];
-    m->m[15] = v[15];
-}
-
-static void SC_MatrixLoad_3x3_f(rs_matrix3x3 *m, const float *v) {
-    m->m[0] = v[0];
-    m->m[1] = v[1];
-    m->m[2] = v[2];
-    m->m[3] = v[3];
-    m->m[4] = v[4];
-    m->m[5] = v[5];
-    m->m[6] = v[6];
-    m->m[7] = v[7];
-    m->m[8] = v[8];
-}
-
-static void SC_MatrixLoad_2x2_f(rs_matrix2x2 *m, const float *v) {
-    m->m[0] = v[0];
-    m->m[1] = v[1];
-    m->m[2] = v[2];
-    m->m[3] = v[3];
-}
-
-static void SC_MatrixLoad_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *v) {
-    m->m[0] = v->m[0];
-    m->m[1] = v->m[1];
-    m->m[2] = v->m[2];
-    m->m[3] = v->m[3];
-    m->m[4] = v->m[4];
-    m->m[5] = v->m[5];
-    m->m[6] = v->m[6];
-    m->m[7] = v->m[7];
-    m->m[8] = v->m[8];
-    m->m[9] = v->m[9];
-    m->m[10] = v->m[10];
-    m->m[11] = v->m[11];
-    m->m[12] = v->m[12];
-    m->m[13] = v->m[13];
-    m->m[14] = v->m[14];
-    m->m[15] = v->m[15];
-}
-
-static void SC_MatrixLoad_4x4_3x3(rs_matrix4x4 *m, const rs_matrix3x3 *v) {
-    m->m[0] = v->m[0];
-    m->m[1] = v->m[1];
-    m->m[2] = v->m[2];
-    m->m[3] = 0.f;
-    m->m[4] = v->m[3];
-    m->m[5] = v->m[4];
-    m->m[6] = v->m[5];
-    m->m[7] = 0.f;
-    m->m[8] = v->m[6];
-    m->m[9] = v->m[7];
-    m->m[10] = v->m[8];
-    m->m[11] = 0.f;
-    m->m[12] = 0.f;
-    m->m[13] = 0.f;
-    m->m[14] = 0.f;
-    m->m[15] = 1.f;
-}
-
-static void SC_MatrixLoad_4x4_2x2(rs_matrix4x4 *m, const rs_matrix2x2 *v) {
-    m->m[0] = v->m[0];
-    m->m[1] = v->m[1];
-    m->m[2] = 0.f;
-    m->m[3] = 0.f;
-    m->m[4] = v->m[2];
-    m->m[5] = v->m[3];
-    m->m[6] = 0.f;
-    m->m[7] = 0.f;
-    m->m[8] = 0.f;
-    m->m[9] = 0.f;
-    m->m[10] = 1.f;
-    m->m[11] = 0.f;
-    m->m[12] = 0.f;
-    m->m[13] = 0.f;
-    m->m[14] = 0.f;
-    m->m[15] = 1.f;
-}
-
-static void SC_MatrixLoad_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *v) {
-    m->m[0] = v->m[0];
-    m->m[1] = v->m[1];
-    m->m[2] = v->m[2];
-    m->m[3] = v->m[3];
-    m->m[4] = v->m[4];
-    m->m[5] = v->m[5];
-    m->m[6] = v->m[6];
-    m->m[7] = v->m[7];
-    m->m[8] = v->m[8];
-}
-
-static void SC_MatrixLoad_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *v) {
-    m->m[0] = v->m[0];
-    m->m[1] = v->m[1];
-    m->m[2] = v->m[2];
-    m->m[3] = v->m[3];
-}
-
-static void SC_MatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) {
-    float c, s;
-    m->m[3] = 0;
-    m->m[7] = 0;
-    m->m[11]= 0;
-    m->m[12]= 0;
-    m->m[13]= 0;
-    m->m[14]= 0;
-    m->m[15]= 1;
-    rot *= (float)(M_PI / 180.0f);
-    c = cos(rot);
-    s = sin(rot);
-
-    const float len = x*x + y*y + z*z;
-    if (len != 1) {
-        const float recipLen = 1.f / sqrt(len);
-        x *= recipLen;
-        y *= recipLen;
-        z *= recipLen;
-    }
-    const float nc = 1.0f - c;
-    const float xy = x * y;
-    const float yz = y * z;
-    const float zx = z * x;
-    const float xs = x * s;
-    const float ys = y * s;
-    const float zs = z * s;
-    m->m[ 0] = x*x*nc +  c;
-    m->m[ 4] =  xy*nc - zs;
-    m->m[ 8] =  zx*nc + ys;
-    m->m[ 1] =  xy*nc + zs;
-    m->m[ 5] = y*y*nc +  c;
-    m->m[ 9] =  yz*nc - xs;
-    m->m[ 2] =  zx*nc - ys;
-    m->m[ 6] =  yz*nc + xs;
-    m->m[10] = z*z*nc +  c;
-}
-
-static void SC_MatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z) {
-    SC_MatrixLoadIdentity_4x4(m);
-    m->m[0] = x;
-    m->m[5] = y;
-    m->m[10] = z;
-}
-
-static void SC_MatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z) {
-    SC_MatrixLoadIdentity_4x4(m);
-    m->m[12] = x;
-    m->m[13] = y;
-    m->m[14] = z;
-}
-
-static void SC_MatrixLoadMultiply_4x4_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs) {
-    for (int i=0 ; i<4 ; i++) {
-        float ri0 = 0;
-        float ri1 = 0;
-        float ri2 = 0;
-        float ri3 = 0;
-        for (int j=0 ; j<4 ; j++) {
-            const float rhs_ij = rsMatrixGet(rhs, i,j);
-            ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
-            ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
-            ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij;
-            ri3 += rsMatrixGet(lhs, j, 3) * rhs_ij;
-        }
-        rsMatrixSet(m, i, 0, ri0);
-        rsMatrixSet(m, i, 1, ri1);
-        rsMatrixSet(m, i, 2, ri2);
-        rsMatrixSet(m, i, 3, ri3);
-    }
-}
-
-static void SC_MatrixMultiply_4x4_4x4(rs_matrix4x4 *m, const rs_matrix4x4 *rhs) {
-    rs_matrix4x4 mt;
-    SC_MatrixLoadMultiply_4x4_4x4_4x4(&mt, m, rhs);
-    SC_MatrixLoad_4x4_4x4(m, &mt);
-}
-
-static void SC_MatrixLoadMultiply_3x3_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs) {
-    for (int i=0 ; i<3 ; i++) {
-        float ri0 = 0;
-        float ri1 = 0;
-        float ri2 = 0;
-        for (int j=0 ; j<3 ; j++) {
-            const float rhs_ij = rsMatrixGet(rhs, i,j);
-            ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
-            ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
-            ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij;
-        }
-        rsMatrixSet(m, i, 0, ri0);
-        rsMatrixSet(m, i, 1, ri1);
-        rsMatrixSet(m, i, 2, ri2);
-    }
-}
-
-static void SC_MatrixMultiply_3x3_3x3(rs_matrix3x3 *m, const rs_matrix3x3 *rhs) {
-    rs_matrix3x3 mt;
-    SC_MatrixLoadMultiply_3x3_3x3_3x3(&mt, m, rhs);
-    SC_MatrixLoad_3x3_3x3(m, &mt);
-}
-
-static void SC_MatrixLoadMultiply_2x2_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs) {
-    for (int i=0 ; i<2 ; i++) {
-        float ri0 = 0;
-        float ri1 = 0;
-        for (int j=0 ; j<2 ; j++) {
-            const float rhs_ij = rsMatrixGet(rhs, i,j);
-            ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
-            ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
-        }
-        rsMatrixSet(m, i, 0, ri0);
-        rsMatrixSet(m, i, 1, ri1);
-    }
-}
-
-static void SC_MatrixMultiply_2x2_2x2(rs_matrix2x2 *m, const rs_matrix2x2 *rhs) {
-    rs_matrix2x2 mt;
-    SC_MatrixLoadMultiply_2x2_2x2_2x2(&mt, m, rhs);
-    SC_MatrixLoad_2x2_2x2(m, &mt);
-}
-
-static void SC_MatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) {
-    rs_matrix4x4 m1;
-    SC_MatrixLoadRotate(&m1, rot, x, y, z);
-    SC_MatrixMultiply_4x4_4x4(m, &m1);
-}
-
-static void SC_MatrixScale(rs_matrix4x4 *m, float x, float y, float z) {
-    rs_matrix4x4 m1;
-    SC_MatrixLoadScale(&m1, x, y, z);
-    SC_MatrixMultiply_4x4_4x4(m, &m1);
-}
-
-static void SC_MatrixTranslate(rs_matrix4x4 *m, float x, float y, float z) {
-    rs_matrix4x4 m1;
-    SC_MatrixLoadTranslate(&m1, x, y, z);
-    SC_MatrixMultiply_4x4_4x4(m, &m1);
-}
-
-static void SC_MatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) {
-    SC_MatrixLoadIdentity_4x4(m);
-    m->m[0] = 2.f / (right - left);
-    m->m[5] = 2.f / (top - bottom);
-    m->m[10]= -2.f / (far - near);
-    m->m[12]= -(right + left) / (right - left);
-    m->m[13]= -(top + bottom) / (top - bottom);
-    m->m[14]= -(far + near) / (far - near);
-}
-
-static void SC_MatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) {
-    SC_MatrixLoadIdentity_4x4(m);
-    m->m[0] = 2.f * near / (right - left);
-    m->m[5] = 2.f * near / (top - bottom);
-    m->m[8] = (right + left) / (right - left);
-    m->m[9] = (top + bottom) / (top - bottom);
-    m->m[10]= -(far + near) / (far - near);
-    m->m[11]= -1.f;
-    m->m[14]= -2.f * far * near / (far - near);
-    m->m[15]= 0.f;
-}
-
-static void SC_MatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far) {
-    float top = near * tan((float) (fovy * M_PI / 360.0f));
-    float bottom = -top;
-    float left = bottom * aspect;
-    float right = top * aspect;
-    SC_MatrixLoadFrustum(m, left, right, bottom, top, near, far);
-}
-
-
-// Returns true if the matrix was successfully inversed
-static bool SC_MatrixInverse_4x4(rs_matrix4x4 *m) {
-    rs_matrix4x4 result;
-
-    int i, j;
-    for (i = 0; i < 4; ++i) {
-        for (j = 0; j < 4; ++j) {
-            // computeCofactor for int i, int j
-            int c0 = (i+1) % 4;
-            int c1 = (i+2) % 4;
-            int c2 = (i+3) % 4;
-            int r0 = (j+1) % 4;
-            int r1 = (j+2) % 4;
-            int r2 = (j+3) % 4;
-
-            float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1]))
-                         - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0]))
-                         + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0]));
-
-            float cofactor = (i+j) & 1 ? -minor : minor;
-
-            result.m[4*i + j] = cofactor;
-        }
-    }
-
-    // Dot product of 0th column of source and 0th row of result
-    float det = m->m[0]*result.m[0] + m->m[4]*result.m[1] +
-                 m->m[8]*result.m[2] + m->m[12]*result.m[3];
-
-    if (fabs(det) < 1e-6) {
-        return false;
-    }
-
-    det = 1.0f / det;
-    for (i = 0; i < 16; ++i) {
-        m->m[i] = result.m[i] * det;
-    }
-
-    return true;
-}
-
-// Returns true if the matrix was successfully inversed
-static bool SC_MatrixInverseTranspose_4x4(rs_matrix4x4 *m) {
-    rs_matrix4x4 result;
-
-    int i, j;
-    for (i = 0; i < 4; ++i) {
-        for (j = 0; j < 4; ++j) {
-            // computeCofactor for int i, int j
-            int c0 = (i+1) % 4;
-            int c1 = (i+2) % 4;
-            int c2 = (i+3) % 4;
-            int r0 = (j+1) % 4;
-            int r1 = (j+2) % 4;
-            int r2 = (j+3) % 4;
-
-            float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1]))
-                         - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0]))
-                         + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0]));
-
-            float cofactor = (i+j) & 1 ? -minor : minor;
-
-            result.m[4*j + i] = cofactor;
-        }
-    }
-
-    // Dot product of 0th column of source and 0th column of result
-    float det = m->m[0]*result.m[0] + m->m[4]*result.m[4] +
-                 m->m[8]*result.m[8] + m->m[12]*result.m[12];
-
-    if (fabs(det) < 1e-6) {
-        return false;
-    }
-
-    det = 1.0f / det;
-    for (i = 0; i < 16; ++i) {
-        m->m[i] = result.m[i] * det;
-    }
-
-    return true;
-}
-
-static void SC_MatrixTranspose_4x4(rs_matrix4x4 *m) {
-    int i, j;
-    float temp;
-    for (i = 0; i < 3; ++i) {
-        for (j = i + 1; j < 4; ++j) {
-            temp = m->m[i*4 + j];
-            m->m[i*4 + j] = m->m[j*4 + i];
-            m->m[j*4 + i] = temp;
-        }
-    }
-}
-
-static void SC_MatrixTranspose_3x3(rs_matrix3x3 *m) {
-    int i, j;
-    float temp;
-    for (i = 0; i < 2; ++i) {
-        for (j = i + 1; j < 3; ++j) {
-            temp = m->m[i*3 + j];
-            m->m[i*3 + j] = m->m[j*4 + i];
-            m->m[j*3 + i] = temp;
-        }
-    }
-}
-
-static void SC_MatrixTranspose_2x2(rs_matrix2x2 *m) {
-    float temp = m->m[1];
-    m->m[1] = m->m[2];
-    m->m[2] = temp;
-}
-
-
-//////////////////////////////////////////////////////////////////////////////
-// Class implementation
-//////////////////////////////////////////////////////////////////////////////
-
-// llvm name mangling ref
-//  <builtin-type> ::= v  # void
-//                 ::= b  # bool
-//                 ::= c  # char
-//                 ::= a  # signed char
-//                 ::= h  # unsigned char
-//                 ::= s  # short
-//                 ::= t  # unsigned short
-//                 ::= i  # int
-//                 ::= j  # unsigned int
-//                 ::= l  # long
-//                 ::= m  # unsigned long
-//                 ::= x  # long long, __int64
-//                 ::= y  # unsigned long long, __int64
-//                 ::= f  # float
-//                 ::= d  # double
-
-static ScriptCState::SymbolTable_t gSyms[] = {
-    { "__divsi3", (void *)&SC_divsi3, true },
-    { "__modsi3", (void *)&SC_modsi3, true },
-    { "__udivsi3", (void *)&SC_udivsi3, true },
-    { "__umodsi3", (void *)&SC_umodsi3, true },
-    { "memset", (void *)&memset, true },
-    { "memcpy", (void *)&memcpy, true },
-
-    // allocation
-    { "_Z19rsAllocationGetDimX13rs_allocation", (void *)&SC_allocGetDimX, true },
-    { "_Z19rsAllocationGetDimY13rs_allocation", (void *)&SC_allocGetDimY, true },
-    { "_Z19rsAllocationGetDimZ13rs_allocation", (void *)&SC_allocGetDimZ, true },
-    { "_Z21rsAllocationGetDimLOD13rs_allocation", (void *)&SC_allocGetDimLOD, true },
-    { "_Z23rsAllocationGetDimFaces13rs_allocation", (void *)&SC_allocGetDimFaces, true },
-    { "_Z15rsGetAllocationPKv", (void *)&SC_getAllocation, true },
-
-    { "_Z14rsGetElementAt13rs_allocationj", (void *)&SC_getElementAtX, true },
-    { "_Z14rsGetElementAt13rs_allocationjj", (void *)&SC_getElementAtXY, true },
-    { "_Z14rsGetElementAt13rs_allocationjjj", (void *)&SC_getElementAtXYZ, true },
-
-    { "_Z11rsSetObjectP10rs_elementS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP10rs_element", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject10rs_element", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP7rs_typeS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP7rs_type", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject7rs_type", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP13rs_allocationS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP13rs_allocation", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject13rs_allocation", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP10rs_samplerS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP10rs_sampler", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject10rs_sampler", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP9rs_scriptS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP9rs_script", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject9rs_script", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP7rs_meshS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP7rs_mesh", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject7rs_mesh", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP19rs_program_fragmentS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP19rs_program_fragment", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject19rs_program_fragment", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP17rs_program_vertexS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP17rs_program_vertex", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject17rs_program_vertex", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP17rs_program_rasterS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP17rs_program_raster", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject17rs_program_raster", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP16rs_program_storeS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP16rs_program_store", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject16rs_program_store", (void *)&rsiIsObject, true },
-
-    { "_Z11rsSetObjectP7rs_fontS_", (void *)&rsiSetObject, true },
-    { "_Z13rsClearObjectP7rs_font", (void *)&rsiClearObject, true },
-    { "_Z10rsIsObject7rs_font", (void *)&rsiIsObject, true },
-
-
-    { "_Z21rsAllocationMarkDirty13rs_allocation", (void *)&SC_allocationMarkDirty, true },
-
-
-    // Debug
-    { "_Z7rsDebugPKcf", (void *)&SC_debugF, true },
-    { "_Z7rsDebugPKcff", (void *)&SC_debugFv2, true },
-    { "_Z7rsDebugPKcfff", (void *)&SC_debugFv3, true },
-    { "_Z7rsDebugPKcffff", (void *)&SC_debugFv4, true },
-    { "_Z7rsDebugPKcd", (void *)&SC_debugD, true },
-    { "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4, true },
-    { "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3, true },
-    { "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2, true },
-    { "_Z7rsDebugPKci", (void *)&SC_debugI32, true },
-    { "_Z7rsDebugPKcj", (void *)&SC_debugU32, true },
-    // Both "long" and "unsigned long" need to be redirected to their
-    // 64-bit counterparts, since we have hacked Slang to use 64-bit
-    // for "long" on Arm (to be similar to Java).
-    { "_Z7rsDebugPKcl", (void *)&SC_debugLL64, true },
-    { "_Z7rsDebugPKcm", (void *)&SC_debugULL64, true },
-    { "_Z7rsDebugPKcx", (void *)&SC_debugLL64, true },
-    { "_Z7rsDebugPKcy", (void *)&SC_debugULL64, true },
-    { "_Z7rsDebugPKcPKv", (void *)&SC_debugP, true },
-
-    // RS Math
-    { "_Z6rsRandi", (void *)&SC_randi, true },
-    { "_Z6rsRandii", (void *)&SC_randi2, true },
-    { "_Z6rsRandf", (void *)&SC_randf, true },
-    { "_Z6rsRandff", (void *)&SC_randf2, true },
-    { "_Z6rsFracf", (void *)&SC_frac, true },
-
-    // time
-    { "_Z6rsTimePi", (void *)&SC_time, true },
-    { "_Z11rsLocaltimeP5rs_tmPKi", (void *)&SC_localtime, true },
-    { "_Z14rsUptimeMillisv", (void*)&SC_uptimeMillis, true },
-    { "_Z13rsUptimeNanosv", (void*)&SC_uptimeNanos, true },
-    { "_Z7rsGetDtv", (void*)&SC_getDt, false },
-
-    { "_Z14rsSendToClienti", (void *)&SC_toClient, false },
-    { "_Z14rsSendToClientiPKvj", (void *)&SC_toClient2, false },
-    { "_Z22rsSendToClientBlockingi", (void *)&SC_toClientBlocking, false },
-    { "_Z22rsSendToClientBlockingiPKvj", (void *)&SC_toClientBlocking2, false },
-
-    // matrix
-    { "_Z20rsMatrixLoadIdentityP12rs_matrix4x4", (void *)&SC_MatrixLoadIdentity_4x4, true },
-    { "_Z20rsMatrixLoadIdentityP12rs_matrix3x3", (void *)&SC_MatrixLoadIdentity_3x3, true },
-    { "_Z20rsMatrixLoadIdentityP12rs_matrix2x2", (void *)&SC_MatrixLoadIdentity_2x2, true },
-
-    { "_Z12rsMatrixLoadP12rs_matrix4x4PKf", (void *)&SC_MatrixLoad_4x4_f, true },
-    { "_Z12rsMatrixLoadP12rs_matrix3x3PKf", (void *)&SC_MatrixLoad_3x3_f, true },
-    { "_Z12rsMatrixLoadP12rs_matrix2x2PKf", (void *)&SC_MatrixLoad_2x2_f, true },
-
-    { "_Z12rsMatrixLoadP12rs_matrix4x4PKS_", (void *)&SC_MatrixLoad_4x4_4x4, true },
-    { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix3x3", (void *)&SC_MatrixLoad_4x4_3x3, true },
-    { "_Z12rsMatrixLoadP12rs_matrix4x4PK12rs_matrix2x2", (void *)&SC_MatrixLoad_4x4_2x2, true },
-    { "_Z12rsMatrixLoadP12rs_matrix3x3PKS_", (void *)&SC_MatrixLoad_3x3_3x3, true },
-    { "_Z12rsMatrixLoadP12rs_matrix2x2PKS_", (void *)&SC_MatrixLoad_2x2_2x2, true },
-
-    { "_Z18rsMatrixLoadRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadRotate, true },
-    { "_Z17rsMatrixLoadScaleP12rs_matrix4x4fff", (void *)&SC_MatrixLoadScale, true },
-    { "_Z21rsMatrixLoadTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixLoadTranslate, true },
-    { "_Z14rsMatrixRotateP12rs_matrix4x4ffff", (void *)&SC_MatrixRotate, true },
-    { "_Z13rsMatrixScaleP12rs_matrix4x4fff", (void *)&SC_MatrixScale, true },
-    { "_Z17rsMatrixTranslateP12rs_matrix4x4fff", (void *)&SC_MatrixTranslate, true },
-
-    { "_Z20rsMatrixLoadMultiplyP12rs_matrix4x4PKS_S2_", (void *)&SC_MatrixLoadMultiply_4x4_4x4_4x4, true },
-    { "_Z16rsMatrixMultiplyP12rs_matrix4x4PKS_", (void *)&SC_MatrixMultiply_4x4_4x4, true },
-    { "_Z20rsMatrixLoadMultiplyP12rs_matrix3x3PKS_S2_", (void *)&SC_MatrixLoadMultiply_3x3_3x3_3x3, true },
-    { "_Z16rsMatrixMultiplyP12rs_matrix3x3PKS_", (void *)&SC_MatrixMultiply_3x3_3x3, true },
-    { "_Z20rsMatrixLoadMultiplyP12rs_matrix2x2PKS_S2_", (void *)&SC_MatrixLoadMultiply_2x2_2x2_2x2, true },
-    { "_Z16rsMatrixMultiplyP12rs_matrix2x2PKS_", (void *)&SC_MatrixMultiply_2x2_2x2, true },
-
-    { "_Z17rsMatrixLoadOrthoP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadOrtho, true },
-    { "_Z19rsMatrixLoadFrustumP12rs_matrix4x4ffffff", (void *)&SC_MatrixLoadFrustum, true },
-    { "_Z23rsMatrixLoadPerspectiveP12rs_matrix4x4ffff", (void *)&SC_MatrixLoadPerspective, true },
-
-    { "_Z15rsMatrixInverseP12rs_matrix4x4", (void *)&SC_MatrixInverse_4x4, true },
-    { "_Z24rsMatrixInverseTransposeP12rs_matrix4x4", (void *)&SC_MatrixInverseTranspose_4x4, true },
-    { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_4x4, true },
-    { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_3x3, true },
-    { "_Z17rsMatrixTransposeP12rs_matrix4x4", (void *)&SC_MatrixTranspose_2x2, true },
-
-    { "_Z9rsForEach9rs_script13rs_allocationS0_PKv", (void *)&SC_ForEach, false },
-    //{ "_Z9rsForEach9rs_script13rs_allocationS0_PKv", (void *)&SC_ForEach2, false },
-
-////////////////////////////////////////////////////////////////////
-
-    //{ "sinf_fast", (void *)&SC_sinf_fast, true },
-    //{ "cosf_fast", (void *)&SC_cosf_fast, true },
-
-    { NULL, NULL, false }
-};
-
-const ScriptCState::SymbolTable_t * ScriptCState::lookupSymbol(const char *sym) {
-    ScriptCState::SymbolTable_t *syms = gSyms;
-
-    while (syms->mPtr) {
-        if (!strcmp(syms->mName, sym)) {
-            return syms;
-        }
-        syms++;
-    }
-    return NULL;
 }
 
diff --git a/rsScriptC_LibCL.cpp b/rsScriptC_LibCL.cpp
deleted file mode 100644
index 8a0aa47..0000000
--- a/rsScriptC_LibCL.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rsContext.h"
-#include "rsScriptC.h"
-
-// Implements rs_cl.rsh
-
-
-using namespace android;
-using namespace android::renderscript;
-
-
-static float SC_exp10(float v) {
-    return pow(10.f, v);
-}
-
-static float SC_fract(float v, int *iptr) {
-    int i = (int)floor(v);
-    iptr[0] = i;
-    return fmin(v - i, 0x1.fffffep-1f);
-}
-
-static float SC_log2(float v) {
-    return log10(v) / log10(2.f);
-}
-
-static float SC_mad(float v1, float v2, float v3) {
-    return v1 * v2 + v3;
-}
-
-#if 0
-static float SC_pown(float v, int p) {
-    return powf(v, (float)p);
-}
-
-static float SC_powr(float v, float p) {
-    return powf(v, p);
-}
-#endif
-
-float SC_rootn(float v, int r) {
-    return pow(v, 1.f / r);
-}
-
-float SC_rsqrt(float v) {
-    return 1.f / sqrtf(v);
-}
-
-float SC_sincos(float v, float *cosptr) {
-    *cosptr = cosf(v);
-    return sinf(v);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Integer
-//////////////////////////////////////////////////////////////////////////////
-
-
-static uint32_t SC_abs_i32(int32_t v) {return abs(v);}
-static uint16_t SC_abs_i16(int16_t v) {return (uint16_t)abs(v);}
-static uint8_t SC_abs_i8(int8_t v) {return (uint8_t)abs(v);}
-
-static uint32_t SC_clz_u32(uint32_t v) {return __builtin_clz(v);}
-static uint16_t SC_clz_u16(uint16_t v) {return (uint16_t)__builtin_clz(v);}
-static uint8_t SC_clz_u8(uint8_t v) {return (uint8_t)__builtin_clz(v);}
-static int32_t SC_clz_i32(int32_t v) {return (int32_t)__builtin_clz((uint32_t)v);}
-static int16_t SC_clz_i16(int16_t v) {return (int16_t)__builtin_clz(v);}
-static int8_t SC_clz_i8(int8_t v) {return (int8_t)__builtin_clz(v);}
-
-static uint32_t SC_max_u32(uint32_t v, uint32_t v2) {return rsMax(v, v2);}
-static uint16_t SC_max_u16(uint16_t v, uint16_t v2) {return rsMax(v, v2);}
-static uint8_t SC_max_u8(uint8_t v, uint8_t v2) {return rsMax(v, v2);}
-static int32_t SC_max_i32(int32_t v, int32_t v2) {return rsMax(v, v2);}
-static int16_t SC_max_i16(int16_t v, int16_t v2) {return rsMax(v, v2);}
-static int8_t SC_max_i8(int8_t v, int8_t v2) {return rsMax(v, v2);}
-
-static uint32_t SC_min_u32(uint32_t v, uint32_t v2) {return rsMin(v, v2);}
-static uint16_t SC_min_u16(uint16_t v, uint16_t v2) {return rsMin(v, v2);}
-static uint8_t SC_min_u8(uint8_t v, uint8_t v2) {return rsMin(v, v2);}
-static int32_t SC_min_i32(int32_t v, int32_t v2) {return rsMin(v, v2);}
-static int16_t SC_min_i16(int16_t v, int16_t v2) {return rsMin(v, v2);}
-static int8_t SC_min_i8(int8_t v, int8_t v2) {return rsMin(v, v2);}
-
-//////////////////////////////////////////////////////////////////////////////
-// Float util
-//////////////////////////////////////////////////////////////////////////////
-
-static float SC_clamp_f32(float amount, float low, float high) {
-    return amount < low ? low : (amount > high ? high : amount);
-}
-
-static float SC_degrees(float radians) {
-    return radians * (180.f / M_PI);
-}
-
-static float SC_max_f32(float v, float v2) {
-    return rsMax(v, v2);
-}
-
-static float SC_min_f32(float v, float v2) {
-    return rsMin(v, v2);
-}
-
-static float SC_mix_f32(float start, float stop, float amount) {
-    //LOGE("lerpf %f  %f  %f", start, stop, amount);
-    return start + (stop - start) * amount;
-}
-
-static float SC_radians(float degrees) {
-    return degrees * (M_PI / 180.f);
-}
-
-static float SC_step_f32(float edge, float v) {
-    if (v < edge) return 0.f;
-    return 1.f;
-}
-
-static float SC_sign_f32(float value) {
-    if (value > 0) return 1.f;
-    if (value < 0) return -1.f;
-    return value;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Class implementation
-//////////////////////////////////////////////////////////////////////////////
-
-// llvm name mangling ref
-//  <builtin-type> ::= v  # void
-//                 ::= b  # bool
-//                 ::= c  # char
-//                 ::= a  # signed char
-//                 ::= h  # unsigned char
-//                 ::= s  # short
-//                 ::= t  # unsigned short
-//                 ::= i  # int
-//                 ::= j  # unsigned int
-//                 ::= l  # long
-//                 ::= m  # unsigned long
-//                 ::= x  # long long, __int64
-//                 ::= y  # unsigned long long, __int64
-//                 ::= f  # float
-//                 ::= d  # double
-
-static ScriptCState::SymbolTable_t gSyms[] = {
-    // OpenCL math
-    { "_Z4acosf", (void *)&acosf, true },
-    { "_Z5acoshf", (void *)&acoshf, true },
-    { "_Z4asinf", (void *)&asinf, true },
-    { "_Z5asinhf", (void *)&asinhf, true },
-    { "_Z4atanf", (void *)&atanf, true },
-    { "_Z5atan2ff", (void *)&atan2f, true },
-    { "_Z5atanhf", (void *)&atanhf, true },
-    { "_Z4cbrtf", (void *)&cbrtf, true },
-    { "_Z4ceilf", (void *)&ceilf, true },
-    { "_Z8copysignff", (void *)&copysignf, true },
-    { "_Z3cosf", (void *)&cosf, true },
-    { "_Z4coshf", (void *)&coshf, true },
-    { "_Z4erfcf", (void *)&erfcf, true },
-    { "_Z3erff", (void *)&erff, true },
-    { "_Z3expf", (void *)&expf, true },
-    { "_Z4exp2f", (void *)&exp2f, true },
-    { "_Z5exp10f", (void *)&SC_exp10, true },
-    { "_Z5expm1f", (void *)&expm1f, true },
-    { "_Z4fabsf", (void *)&fabsf, true },
-    { "_Z4fdimff", (void *)&fdimf, true },
-    { "_Z5floorf", (void *)&floorf, true },
-    { "_Z3fmafff", (void *)&fmaf, true },
-    { "_Z4fmaxff", (void *)&fmaxf, true },
-    { "_Z4fminff", (void *)&fminf, true },  // float fmin(float, float)
-    { "_Z4fmodff", (void *)&fmodf, true },
-    { "_Z5fractfPf", (void *)&SC_fract, true },
-    { "_Z5frexpfPi", (void *)&frexpf, true },
-    { "_Z5hypotff", (void *)&hypotf, true },
-    { "_Z5ilogbf", (void *)&ilogbf, true },
-    { "_Z5ldexpfi", (void *)&ldexpf, true },
-    { "_Z6lgammaf", (void *)&lgammaf, true },
-    { "_Z6lgammafPi", (void *)&lgammaf_r, true },
-    { "_Z3logf", (void *)&logf, true },
-    { "_Z4log2f", (void *)&SC_log2, true },
-    { "_Z5log10f", (void *)&log10f, true },
-    { "_Z5log1pf", (void *)&log1pf, true },
-    { "_Z4logbf", (void *)&logbf, true },
-    { "_Z3madfff", (void *)&SC_mad, true },
-    { "_Z4modffPf", (void *)&modff, true },
-    //{ "_Z3nanj", (void *)&SC_nan, true },
-    { "_Z9nextafterff", (void *)&nextafterf, true },
-    { "_Z3powff", (void *)&powf, true },
-    { "_Z9remainderff", (void *)&remainderf, true },
-    { "_Z6remquoffPi", (void *)&remquof, true },
-    { "_Z4rintf", (void *)&rintf, true },
-    { "_Z5rootnfi", (void *)&SC_rootn, true },
-    { "_Z5roundf", (void *)&roundf, true },
-    { "_Z5rsqrtf", (void *)&SC_rsqrt, true },
-    { "_Z3sinf", (void *)&sinf, true },
-    { "_Z6sincosfPf", (void *)&SC_sincos, true },
-    { "_Z4sinhf", (void *)&sinhf, true },
-    { "_Z4sqrtf", (void *)&sqrtf, true },
-    { "_Z3tanf", (void *)&tanf, true },
-    { "_Z4tanhf", (void *)&tanhf, true },
-    { "_Z6tgammaf", (void *)&tgammaf, true },
-    { "_Z5truncf", (void *)&truncf, true },
-
-    // OpenCL Int
-    { "_Z3absi", (void *)&SC_abs_i32, true },
-    { "_Z3abss", (void *)&SC_abs_i16, true },
-    { "_Z3absc", (void *)&SC_abs_i8, true },
-    { "_Z3clzj", (void *)&SC_clz_u32, true },
-    { "_Z3clzt", (void *)&SC_clz_u16, true },
-    { "_Z3clzh", (void *)&SC_clz_u8, true },
-    { "_Z3clzi", (void *)&SC_clz_i32, true },
-    { "_Z3clzs", (void *)&SC_clz_i16, true },
-    { "_Z3clzc", (void *)&SC_clz_i8, true },
-    { "_Z3maxjj", (void *)&SC_max_u32, true },
-    { "_Z3maxtt", (void *)&SC_max_u16, true },
-    { "_Z3maxhh", (void *)&SC_max_u8, true },
-    { "_Z3maxii", (void *)&SC_max_i32, true },
-    { "_Z3maxss", (void *)&SC_max_i16, true },
-    { "_Z3maxcc", (void *)&SC_max_i8, true },
-    { "_Z3minjj", (void *)&SC_min_u32, true },
-    { "_Z3mintt", (void *)&SC_min_u16, true },
-    { "_Z3minhh", (void *)&SC_min_u8, true },
-    { "_Z3minii", (void *)&SC_min_i32, true },
-    { "_Z3minss", (void *)&SC_min_i16, true },
-    { "_Z3mincc", (void *)&SC_min_i8, true },
-
-    // OpenCL 6.11.4
-    { "_Z5clampfff", (void *)&SC_clamp_f32, true },
-    { "_Z7degreesf", (void *)&SC_degrees, true },
-    { "_Z3maxff", (void *)&SC_max_f32, true },
-    { "_Z3minff", (void *)&SC_min_f32, true },
-    { "_Z3mixfff", (void *)&SC_mix_f32, true },
-    { "_Z7radiansf", (void *)&SC_radians, true },
-    { "_Z4stepff", (void *)&SC_step_f32, true },
-    //{ "smoothstep", (void *)&, true },
-    { "_Z4signf", (void *)&SC_sign_f32, true },
-
-    { NULL, NULL, false }
-};
-
-const ScriptCState::SymbolTable_t * ScriptCState::lookupSymbolCL(const char *sym) {
-    ScriptCState::SymbolTable_t *syms = gSyms;
-
-    while (syms->mPtr) {
-        if (!strcmp(syms->mName, sym)) {
-            return syms;
-        }
-        syms++;
-    }
-    return NULL;
-}
-
diff --git a/rsScriptC_LibGL.cpp b/rsScriptC_LibGL.cpp
index 1ed0f31..71f1312 100644
--- a/rsScriptC_LibGL.cpp
+++ b/rsScriptC_LibGL.cpp
@@ -16,7 +16,9 @@
 
 #include "rsContext.h"
 #include "rsScriptC.h"
-#include "rsMatrix.h"
+#include "rsMatrix4x4.h"
+#include "rsMatrix3x3.h"
+#include "rsMatrix2x2.h"
 
 #include "utils/Timers.h"
 
@@ -32,84 +34,64 @@
 using namespace android;
 using namespace android::renderscript;
 
-#define GET_TLS()  ScriptTLSStruct * tls = \
-    (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
-    Context * rsc = tls->mContext; \
-    ScriptC * sc = (ScriptC *) tls->mScript
-
+namespace android {
+namespace renderscript {
 
 //////////////////////////////////////////////////////////////////////////////
 // Context
 //////////////////////////////////////////////////////////////////////////////
 
-static void SC_bindTexture(RsProgramFragment vpf, uint32_t slot, RsAllocation va) {
-    CHECK_OBJ_OR_NULL(va);
-    CHECK_OBJ(vpf);
-    GET_TLS();
-    rsi_ProgramBindTexture(rsc,
-                           static_cast<ProgramFragment *>(vpf),
-                           slot,
-                           static_cast<Allocation *>(va));
+void rsrBindTexture(Context *rsc, Script *sc, ProgramFragment *pf, uint32_t slot, Allocation *a) {
+    CHECK_OBJ_OR_NULL(a);
+    CHECK_OBJ(pf);
+    pf->bindTexture(rsc, slot, a);
 }
 
-static void SC_bindSampler(RsProgramFragment vpf, uint32_t slot, RsSampler vs) {
+void rsrBindSampler(Context *rsc, Script *sc, ProgramFragment *pf, uint32_t slot, Sampler *s) {
     CHECK_OBJ_OR_NULL(vs);
     CHECK_OBJ(vpf);
-    GET_TLS();
-    rsi_ProgramBindSampler(rsc,
-                           static_cast<ProgramFragment *>(vpf),
-                           slot,
-                           static_cast<Sampler *>(vs));
+    pf->bindSampler(rsc, slot, s);
 }
 
-static void SC_bindProgramStore(RsProgramStore pfs) {
-    CHECK_OBJ_OR_NULL(pfs);
-    GET_TLS();
-    rsi_ContextBindProgramStore(rsc, pfs);
+void rsrBindProgramStore(Context *rsc, Script *sc, ProgramStore *ps) {
+    CHECK_OBJ_OR_NULL(ps);
+    rsc->setProgramStore(ps);
 }
 
-static void SC_bindProgramFragment(RsProgramFragment pf) {
+void rsrBindProgramFragment(Context *rsc, Script *sc, ProgramFragment *pf) {
     CHECK_OBJ_OR_NULL(pf);
-    GET_TLS();
-    rsi_ContextBindProgramFragment(rsc, pf);
+    rsc->setProgramFragment(pf);
 }
 
-static void SC_bindProgramVertex(RsProgramVertex pv) {
+void rsrBindProgramVertex(Context *rsc, Script *sc, ProgramVertex *pv) {
     CHECK_OBJ_OR_NULL(pv);
-    GET_TLS();
-    rsi_ContextBindProgramVertex(rsc, pv);
+    rsc->setProgramVertex(pv);
 }
 
-static void SC_bindProgramRaster(RsProgramRaster pv) {
-    CHECK_OBJ_OR_NULL(pv);
-    GET_TLS();
-    rsi_ContextBindProgramRaster(rsc, pv);
+void rsrBindProgramRaster(Context *rsc, Script *sc, ProgramRaster *pr) {
+    CHECK_OBJ_OR_NULL(pr);
+    rsc->setProgramRaster(pr);
 }
 
-static void SC_bindFrameBufferObjectColorTarget(RsAllocation va, uint32_t slot) {
+void rsrBindFrameBufferObjectColorTarget(Context *rsc, Script *sc, Allocation *a, uint32_t slot) {
     CHECK_OBJ(va);
-    GET_TLS();
-    rsc->mFBOCache.bindColorTarget(rsc, static_cast<Allocation *>(va), slot);
+    rsc->mFBOCache.bindColorTarget(rsc, a, slot);
 }
 
-static void SC_bindFrameBufferObjectDepthTarget(RsAllocation va) {
+void rsrBindFrameBufferObjectDepthTarget(Context *rsc, Script *sc, Allocation *a) {
     CHECK_OBJ(va);
-    GET_TLS();
-    rsc->mFBOCache.bindDepthTarget(rsc, static_cast<Allocation *>(va));
+    rsc->mFBOCache.bindDepthTarget(rsc, a);
 }
 
-static void SC_clearFrameBufferObjectColorTarget(uint32_t slot) {
-    GET_TLS();
+void rsrClearFrameBufferObjectColorTarget(Context *rsc, Script *sc, uint32_t slot) {
     rsc->mFBOCache.bindColorTarget(rsc, NULL, slot);
 }
 
-static void SC_clearFrameBufferObjectDepthTarget() {
-    GET_TLS();
+void rsrClearFrameBufferObjectDepthTarget(Context *rsc, Script *sc) {
     rsc->mFBOCache.bindDepthTarget(rsc, NULL);
 }
 
-static void SC_clearFrameBufferObjectTargets() {
-    GET_TLS();
+void rsrClearFrameBufferObjectTargets(Context *rsc, Script *sc) {
     rsc->mFBOCache.resetAll(rsc);
 }
 
@@ -117,30 +99,25 @@
 // VP
 //////////////////////////////////////////////////////////////////////////////
 
-static void SC_vpLoadProjectionMatrix(const rsc_Matrix *m) {
-    GET_TLS();
+void rsrVpLoadProjectionMatrix(Context *rsc, Script *sc, const rsc_Matrix *m) {
     rsc->getProgramVertex()->setProjectionMatrix(rsc, m);
 }
 
-static void SC_vpLoadModelMatrix(const rsc_Matrix *m) {
-    GET_TLS();
+void rsrVpLoadModelMatrix(Context *rsc, Script *sc, const rsc_Matrix *m) {
     rsc->getProgramVertex()->setModelviewMatrix(rsc, m);
 }
 
-static void SC_vpLoadTextureMatrix(const rsc_Matrix *m) {
-    GET_TLS();
+void rsrVpLoadTextureMatrix(Context *rsc, Script *sc, const rsc_Matrix *m) {
     rsc->getProgramVertex()->setTextureMatrix(rsc, m);
 }
 
-static void SC_pfConstantColor(RsProgramFragment vpf, float r, float g, float b, float a) {
-    GET_TLS();
-    CHECK_OBJ(vpf);
-    ProgramFragment *pf = static_cast<ProgramFragment *>(vpf);
+void rsrPfConstantColor(Context *rsc, Script *sc, ProgramFragment *pf,
+                        float r, float g, float b, float a) {
+    CHECK_OBJ(pf);
     pf->setConstantColor(rsc, r, g, b, a);
 }
 
-static void SC_vpGetProjectionMatrix(rsc_Matrix *m) {
-    GET_TLS();
+void rsrVpGetProjectionMatrix(Context *rsc, Script *sc, rsc_Matrix *m) {
     rsc->getProgramVertex()->getProjectionMatrix(rsc, m);
 }
 
@@ -148,15 +125,11 @@
 // Drawing
 //////////////////////////////////////////////////////////////////////////////
 
-static void SC_drawQuadTexCoords(float x1, float y1, float z1,
-                                 float u1, float v1,
-                                 float x2, float y2, float z2,
-                                 float u2, float v2,
-                                 float x3, float y3, float z3,
-                                 float u3, float v3,
-                                 float x4, float y4, float z4,
-                                 float u4, float v4) {
-    GET_TLS();
+void rsrDrawQuadTexCoords(Context *rsc, Script *sc,
+                          float x1, float y1, float z1, float u1, float v1,
+                          float x2, float y2, float z2, float u2, float v2,
+                          float x3, float y3, float z3, float u3, float v3,
+                          float x4, float y4, float z4, float u4, float v4) {
     if (!rsc->setupCheck()) {
         return;
     }
@@ -180,18 +153,19 @@
     glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
 }
 
-static void SC_drawQuad(float x1, float y1, float z1,
-                        float x2, float y2, float z2,
-                        float x3, float y3, float z3,
-                        float x4, float y4, float z4) {
-    SC_drawQuadTexCoords(x1, y1, z1, 0, 1,
-                         x2, y2, z2, 1, 1,
-                         x3, y3, z3, 1, 0,
-                         x4, y4, z4, 0, 0);
+void rsrDrawQuad(Context *rsc, Script *sc,
+                 float x1, float y1, float z1,
+                 float x2, float y2, float z2,
+                 float x3, float y3, float z3,
+                 float x4, float y4, float z4) {
+    rsrDrawQuadTexCoords(rsc, sc, x1, y1, z1, 0, 1,
+                                  x2, y2, z2, 1, 1,
+                                  x3, y3, z3, 1, 0,
+                                  x4, y4, z4, 0, 0);
 }
 
-static void SC_drawSpriteScreenspace(float x, float y, float z, float w, float h) {
-    GET_TLS();
+void rsrDrawSpriteScreenspace(Context *rsc, Script *sc,
+                              float x, float y, float z, float w, float h) {
     ObjectBaseRef<const ProgramVertex> tmp(rsc->getProgramVertex());
     rsc->setProgramVertex(rsc->getDefaultProgramVertex());
     //rsc->setupCheck();
@@ -200,87 +174,48 @@
 
     float sh = rsc->getHeight();
 
-    SC_drawQuad(x,   sh - y,     z,
+    rsrDrawQuad(rsc, sc,
+                x,   sh - y,     z,
                 x+w, sh - y,     z,
                 x+w, sh - (y+h), z,
                 x,   sh - (y+h), z);
     rsc->setProgramVertex((ProgramVertex *)tmp.get());
 }
-/*
-static void SC_drawSprite(float x, float y, float z, float w, float h)
-{
-    GET_TLS();
-    float vin[3] = {x, y, z};
-    float vout[4];
 
-    //LOGE("ds  in %f %f %f", x, y, z);
-    rsc->getVertex()->transformToScreen(rsc, vout, vin);
-    //LOGE("ds  out %f %f %f %f", vout[0], vout[1], vout[2], vout[3]);
-    vout[0] /= vout[3];
-    vout[1] /= vout[3];
-    vout[2] /= vout[3];
-
-    vout[0] *= rsc->getWidth() / 2;
-    vout[1] *= rsc->getHeight() / 2;
-    vout[0] += rsc->getWidth() / 2;
-    vout[1] += rsc->getHeight() / 2;
-
-    vout[0] -= w/2;
-    vout[1] -= h/2;
-
-    //LOGE("ds  out2 %f %f %f", vout[0], vout[1], vout[2]);
-
-    // U, V, W, H
-    SC_drawSpriteScreenspace(vout[0], vout[1], z, h, w);
-    //rsc->setupCheck();
-}
-*/
-
-static void SC_drawRect(float x1, float y1,
-                        float x2, float y2, float z) {
+void rsrDrawRect(Context *rsc, Script *sc, float x1, float y1, float x2, float y2, float z) {
     //LOGE("SC_drawRect %f,%f  %f,%f  %f", x1, y1, x2, y2, z);
-    SC_drawQuad(x1, y2, z,
-                x2, y2, z,
-                x2, y1, z,
-                x1, y1, z);
+    rsrDrawQuad(rsc, sc, x1, y2, z, x2, y2, z, x2, y1, z, x1, y1, z);
 }
 
-static void SC_drawMesh(RsMesh vsm) {
-    CHECK_OBJ(vsm);
-    GET_TLS();
-    Mesh *sm = static_cast<Mesh *>(vsm);
+void rsrDrawMesh(Context *rsc, Script *sc, Mesh *sm) {
+    CHECK_OBJ(sm);
     if (!rsc->setupCheck()) {
         return;
     }
     sm->render(rsc);
 }
 
-static void SC_drawMeshPrimitive(RsMesh vsm, uint32_t primIndex) {
-    CHECK_OBJ(vsm);
-    GET_TLS();
-    Mesh *sm = static_cast<Mesh *>(vsm);
+void rsrDrawMeshPrimitive(Context *rsc, Script *sc, Mesh *sm, uint32_t primIndex) {
+    CHECK_OBJ(sm);
     if (!rsc->setupCheck()) {
         return;
     }
     sm->renderPrimitive(rsc, primIndex);
 }
 
-static void SC_drawMeshPrimitiveRange(RsMesh vsm, uint32_t primIndex, uint32_t start, uint32_t len) {
-    CHECK_OBJ(vsm);
-    GET_TLS();
-    Mesh *sm = static_cast<Mesh *>(vsm);
+void rsrDrawMeshPrimitiveRange(Context *rsc, Script *sc, Mesh *sm, uint32_t primIndex,
+                               uint32_t start, uint32_t len) {
+    CHECK_OBJ(sm);
     if (!rsc->setupCheck()) {
         return;
     }
     sm->renderPrimitiveRange(rsc, primIndex, start, len);
 }
 
-static void SC_meshComputeBoundingBox(RsMesh vsm,
-                                      float *minX, float *minY, float *minZ,
-                                      float *maxX, float *maxY, float *maxZ) {
-    CHECK_OBJ(vsm);
-    GET_TLS();
-    Mesh *sm = static_cast<Mesh *>(vsm);
+void rsrMeshComputeBoundingBox(Context *rsc, Script *sc, Mesh *sm,
+                               float *minX, float *minY, float *minZ,
+                               float *maxX, float *maxY, float *maxZ) {
+    CHECK_OBJ(sm);
     sm->computeBBox();
     *minX = sm->mBBoxMin[0];
     *minY = sm->mBBoxMin[1];
@@ -296,32 +231,17 @@
 //////////////////////////////////////////////////////////////////////////////
 
 
-static void SC_color(float r, float g, float b, float a) {
-    GET_TLS();
-    ProgramFragment *pf = (ProgramFragment *)rsc->getProgramFragment();
+void rsrColor(Context *rsc, Script *sc, float r, float g, float b, float a) {
+    ProgramFragment *pf = rsc->getProgramFragment();
     pf->setConstantColor(rsc, r, g, b, a);
 }
 
-static void SC_finish() {
+void rsrFinish(Context *rsc, Script *sc) {
     glFinish();
 }
 
-static void SC_allocationSyncAll(RsAllocation va) {
-    CHECK_OBJ(va);
-    GET_TLS();
-    static_cast<Allocation *>(va)->syncAll(rsc, RS_ALLOCATION_USAGE_SCRIPT);
-}
 
-#if 0
-static void SC_allocationSyncAll2(RsAllocation va, RsAllocationUsageType source) {
-    CHECK_OBJ(va);
-    GET_TLS();
-    static_cast<Allocation *>(va)->syncAll(rsc, source);
-}
-#endif
-
-static void SC_ClearColor(float r, float g, float b, float a) {
-    GET_TLS();
+void rsrClearColor(Context *rsc, Script *sc, float r, float g, float b, float a) {
     rsc->mFBOCache.setupGL2(rsc);
     rsc->setupProgramStore();
 
@@ -329,8 +249,7 @@
     glClear(GL_COLOR_BUFFER_BIT);
 }
 
-static void SC_ClearDepth(float v) {
-    GET_TLS();
+void rsrClearDepth(Context *rsc, Script *sc, float v) {
     rsc->mFBOCache.setupGL2(rsc);
     rsc->setupProgramStore();
 
@@ -338,34 +257,27 @@
     glClear(GL_DEPTH_BUFFER_BIT);
 }
 
-static uint32_t SC_getWidth() {
-    GET_TLS();
+uint32_t rsrGetWidth(Context *rsc, Script *sc) {
     return rsc->getWidth();
 }
 
-static uint32_t SC_getHeight() {
-    GET_TLS();
+uint32_t rsrGetHeight(Context *rsc, Script *sc) {
     return rsc->getHeight();
 }
 
-static void SC_DrawTextAlloc(RsAllocation va, int x, int y) {
-    CHECK_OBJ(va);
-    GET_TLS();
-    Allocation *alloc = static_cast<Allocation *>(va);
-    const char *text = (const char *)alloc->getPtr();
-    size_t allocSize = alloc->getType()->getSizeBytes();
+void rsrDrawTextAlloc(Context *rsc, Script *sc, Allocation *a, int x, int y) {
+    const char *text = (const char *)a->getPtr();
+    size_t allocSize = a->getType()->getSizeBytes();
     rsc->mStateFont.renderText(text, allocSize, x, y);
 }
 
-static void SC_DrawText(const char *text, int x, int y) {
-    GET_TLS();
+void rsrDrawText(Context *rsc, Script *sc, const char *text, int x, int y) {
     size_t textLen = strlen(text);
     rsc->mStateFont.renderText(text, textLen, x, y);
 }
 
-static void SC_setMetrics(Font::Rect *metrics,
-                          int32_t *left, int32_t *right,
-                          int32_t *top, int32_t *bottom) {
+static void SetMetrics(Font::Rect *metrics,
+                       int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
     if (left) {
         *left = metrics->left;
     }
@@ -380,125 +292,32 @@
     }
 }
 
-static void SC_MeasureTextAlloc(RsAllocation va,
-                                int32_t *left, int32_t *right,
-                                int32_t *top, int32_t *bottom) {
-    CHECK_OBJ(va);
-    GET_TLS();
-    Allocation *alloc = static_cast<Allocation *>(va);
-    const char *text = (const char *)alloc->getPtr();
-    size_t textLen = alloc->getType()->getSizeBytes();
+void rsrMeasureTextAlloc(Context *rsc, Script *sc, Allocation *a,
+                         int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
+    CHECK_OBJ(a);
+    const char *text = (const char *)a->getPtr();
+    size_t textLen = a->getType()->getSizeBytes();
     Font::Rect metrics;
     rsc->mStateFont.measureText(text, textLen, &metrics);
-    SC_setMetrics(&metrics, left, right, top, bottom);
+    SetMetrics(&metrics, left, right, top, bottom);
 }
 
-static void SC_MeasureText(const char *text,
-                           int32_t *left, int32_t *right,
-                           int32_t *top, int32_t *bottom) {
-    GET_TLS();
+void rsrMeasureText(Context *rsc, Script *sc, const char *text,
+                    int32_t *left, int32_t *right, int32_t *top, int32_t *bottom) {
     size_t textLen = strlen(text);
     Font::Rect metrics;
     rsc->mStateFont.measureText(text, textLen, &metrics);
-    SC_setMetrics(&metrics, left, right, top, bottom);
+    SetMetrics(&metrics, left, right, top, bottom);
 }
 
-static void SC_BindFont(RsFont font) {
+void rsrBindFont(Context *rsc, Script *sc, Font *font) {
     CHECK_OBJ(font);
-    GET_TLS();
     rsi_ContextBindFont(rsc, font);
 }
 
-static void SC_FontColor(float r, float g, float b, float a) {
-    GET_TLS();
+void rsrFontColor(Context *rsc, Script *sc, float r, float g, float b, float a) {
     rsc->mStateFont.setFontColor(r, g, b, a);
 }
 
-//////////////////////////////////////////////////////////////////////////////
-// Class implementation
-//////////////////////////////////////////////////////////////////////////////
-
-// llvm name mangling ref
-//  <builtin-type> ::= v  # void
-//                 ::= b  # bool
-//                 ::= c  # char
-//                 ::= a  # signed char
-//                 ::= h  # unsigned char
-//                 ::= s  # short
-//                 ::= t  # unsigned short
-//                 ::= i  # int
-//                 ::= j  # unsigned int
-//                 ::= l  # long
-//                 ::= m  # unsigned long
-//                 ::= x  # long long, __int64
-//                 ::= y  # unsigned long long, __int64
-//                 ::= f  # float
-//                 ::= d  # double
-
-static ScriptCState::SymbolTable_t gSyms[] = {
-    { "_Z22rsgBindProgramFragment19rs_program_fragment", (void *)&SC_bindProgramFragment, false },
-    { "_Z19rsgBindProgramStore16rs_program_store", (void *)&SC_bindProgramStore, false },
-    { "_Z20rsgBindProgramVertex17rs_program_vertex", (void *)&SC_bindProgramVertex, false },
-    { "_Z20rsgBindProgramRaster17rs_program_raster", (void *)&SC_bindProgramRaster, false },
-    { "_Z14rsgBindSampler19rs_program_fragmentj10rs_sampler", (void *)&SC_bindSampler, false },
-    { "_Z14rsgBindTexture19rs_program_fragmentj13rs_allocation", (void *)&SC_bindTexture, false },
-
-    { "_Z36rsgProgramVertexLoadProjectionMatrixPK12rs_matrix4x4", (void *)&SC_vpLoadProjectionMatrix, false },
-    { "_Z31rsgProgramVertexLoadModelMatrixPK12rs_matrix4x4", (void *)&SC_vpLoadModelMatrix, false },
-    { "_Z33rsgProgramVertexLoadTextureMatrixPK12rs_matrix4x4", (void *)&SC_vpLoadTextureMatrix, false },
-
-    { "_Z35rsgProgramVertexGetProjectionMatrixP12rs_matrix4x4", (void *)&SC_vpGetProjectionMatrix, false },
-
-    { "_Z31rsgProgramFragmentConstantColor19rs_program_fragmentffff", (void *)&SC_pfConstantColor, false },
-
-    { "_Z11rsgGetWidthv", (void *)&SC_getWidth, false },
-    { "_Z12rsgGetHeightv", (void *)&SC_getHeight, false },
-
-    { "_Z20rsgAllocationSyncAll13rs_allocation", (void *)&SC_allocationSyncAll, false },
-
-    { "_Z11rsgDrawRectfffff", (void *)&SC_drawRect, false },
-    { "_Z11rsgDrawQuadffffffffffff", (void *)&SC_drawQuad, false },
-    { "_Z20rsgDrawQuadTexCoordsffffffffffffffffffff", (void *)&SC_drawQuadTexCoords, false },
-    { "_Z24rsgDrawSpriteScreenspacefffff", (void *)&SC_drawSpriteScreenspace, false },
-
-    { "_Z11rsgDrawMesh7rs_mesh", (void *)&SC_drawMesh, false },
-    { "_Z11rsgDrawMesh7rs_meshj", (void *)&SC_drawMeshPrimitive, false },
-    { "_Z11rsgDrawMesh7rs_meshjjj", (void *)&SC_drawMeshPrimitiveRange, false },
-    { "_Z25rsgMeshComputeBoundingBox7rs_meshPfS0_S0_S0_S0_S0_", (void *)&SC_meshComputeBoundingBox, false },
-
-    { "_Z13rsgClearColorffff", (void *)&SC_ClearColor, false },
-    { "_Z13rsgClearDepthf", (void *)&SC_ClearDepth, false },
-
-    { "_Z11rsgDrawTextPKcii", (void *)&SC_DrawText, false },
-    { "_Z11rsgDrawText13rs_allocationii", (void *)&SC_DrawTextAlloc, false },
-    { "_Z14rsgMeasureTextPKcPiS1_S1_S1_", (void *)&SC_MeasureText, false },
-    { "_Z14rsgMeasureText13rs_allocationPiS0_S0_S0_", (void *)&SC_MeasureTextAlloc, false },
-
-    { "_Z11rsgBindFont7rs_font", (void *)&SC_BindFont, false },
-    { "_Z12rsgFontColorffff", (void *)&SC_FontColor, false },
-
-    { "_Z18rsgBindColorTarget13rs_allocationj", (void *)&SC_bindFrameBufferObjectColorTarget, false },
-    { "_Z18rsgBindDepthTarget13rs_allocation", (void *)&SC_bindFrameBufferObjectDepthTarget, false },
-    { "_Z19rsgClearColorTargetj", (void *)&SC_clearFrameBufferObjectColorTarget, false },
-    { "_Z19rsgClearDepthTargetv", (void *)&SC_clearFrameBufferObjectDepthTarget, false },
-    { "_Z24rsgClearAllRenderTargetsv", (void *)&SC_clearFrameBufferObjectTargets, false },
-
-    // misc
-    { "_Z5colorffff", (void *)&SC_color, false },
-    { "_Z9rsgFinishv", (void *)&SC_finish, false },
-
-    { NULL, NULL, false }
-};
-
-const ScriptCState::SymbolTable_t * ScriptCState::lookupSymbolGL(const char *sym) {
-    ScriptCState::SymbolTable_t *syms = gSyms;
-
-    while (syms->mPtr) {
-        if (!strcmp(syms->mName, sym)) {
-            return syms;
-        }
-        syms++;
-    }
-    return NULL;
 }
-
+}
diff --git a/rs_hal.h b/rs_hal.h
index a4ca936..90abd85 100644
--- a/rs_hal.h
+++ b/rs_hal.h
@@ -62,8 +62,7 @@
                      char const *cacheDir,
                      uint8_t const *bitcode,
                      size_t bitcodeSize,
-                     uint32_t flags,
-                     RsHalSymbolLookupFunc lookupFunc);
+                     uint32_t flags);
 
         void (*invokeFunction)(const Context *rsc, Script *s,
                                uint32_t slot,
@@ -108,9 +107,6 @@
 
 } RsdHalFunctions;
 
-void rsiSetObject(ObjectBase **vdst, ObjectBase * vsrc);
-void rsiClearObject(ObjectBase **vdst);
-bool rsiIsObject(const ObjectBase *vdst);
 
 }
 }