am 4ceee5a6: (-s ours) am 73655bd2: Merge "propagate RS context priority to EGLContext when possible. DO NOT MERGE." into gingerbread

Merge commit '4ceee5a6b0089ffbec75690b7284f80d481ab212'

* commit '4ceee5a6b0089ffbec75690b7284f80d481ab212':
  propagate RS context priority to EGLContext when possible. DO NOT MERGE.
diff --git a/Android.mk b/Android.mk
index 6f2cd07..05c1a48 100644
--- a/Android.mk
+++ b/Android.mk
@@ -76,37 +76,43 @@
 LOCAL_SRC_FILES:= \
 	rsAdapter.cpp \
 	rsAllocation.cpp \
+	rsAnimation.cpp \
 	rsComponent.cpp \
 	rsContext.cpp \
 	rsDevice.cpp \
 	rsElement.cpp \
-        rsFileA3D.cpp \
-	rsLight.cpp \
+	rsFileA3D.cpp \
+	rsFont.cpp \
 	rsLocklessFifo.cpp \
 	rsObjectBase.cpp \
 	rsMatrix.cpp \
-        rsMesh.cpp \
-	rsNoise.cpp \
+	rsMesh.cpp \
+	rsMutex.cpp \
 	rsProgram.cpp \
 	rsProgramFragment.cpp \
-	rsProgramFragmentStore.cpp \
+	rsProgramStore.cpp \
 	rsProgramRaster.cpp \
 	rsProgramVertex.cpp \
 	rsSampler.cpp \
 	rsScript.cpp \
 	rsScriptC.cpp \
 	rsScriptC_Lib.cpp \
-        rsShaderCache.cpp \
-	rsSimpleMesh.cpp \
+	rsScriptC_LibCL.cpp \
+	rsScriptC_LibGL.cpp \
+	rsShaderCache.cpp \
+	rsSignal.cpp \
+	rsStream.cpp \
 	rsThreadIO.cpp \
 	rsType.cpp \
 	rsVertexArray.cpp
 
-ifeq ($(TARGET_BOARD_PLATFORM), s5pc110)
-	LOCAL_CFLAGS += -DHAS_CONTEXT_PRIORITY
-endif
 
-LOCAL_SHARED_LIBRARIES += libcutils libutils libEGL libGLESv1_CM libGLESv2 libui libacc
+LOCAL_SHARED_LIBRARIES += libcutils libutils libEGL libGLESv1_CM libGLESv2 libui libbcc
+
+LOCAL_STATIC_LIBRARIES := libft2
+
+LOCAL_C_INCLUDES += external/freetype/include
+
 LOCAL_LDLIBS := -lpthread -ldl
 LOCAL_MODULE:= libRS
 LOCAL_MODULE_TAGS := optional
diff --git a/RenderScript.h b/RenderScript.h
index d280f50..e63cc9b 100644
--- a/RenderScript.h
+++ b/RenderScript.h
@@ -27,23 +27,27 @@
 //////////////////////////////////////////////////////
 //
 
+typedef void * RsAsyncVoidPtr;
+
 typedef void * RsAdapter1D;
 typedef void * RsAdapter2D;
 typedef void * RsAllocation;
+typedef void * RsAnimation;
 typedef void * RsContext;
 typedef void * RsDevice;
 typedef void * RsElement;
 typedef void * RsFile;
+typedef void * RsFont;
 typedef void * RsSampler;
 typedef void * RsScript;
-typedef void * RsSimpleMesh;
+typedef void * RsMesh;
 typedef void * RsType;
-typedef void * RsLight;
+typedef void * RsObjectBase;
 
 typedef void * RsProgram;
 typedef void * RsProgramVertex;
 typedef void * RsProgramFragment;
-typedef void * RsProgramFragmentStore;
+typedef void * RsProgramStore;
 typedef void * RsProgramRaster;
 
 typedef void (* RsBitmapCallback_t)(void *);
@@ -53,14 +57,27 @@
     RS_DEVICE_PARAM_COUNT
 };
 
+typedef struct {
+    uint32_t colorMin;
+    uint32_t colorPref;
+    uint32_t alphaMin;
+    uint32_t alphaPref;
+    uint32_t depthMin;
+    uint32_t depthPref;
+    uint32_t stencilMin;
+    uint32_t stencilPref;
+    uint32_t samplesMin;
+    uint32_t samplesPref;
+    float samplesQ;
+} RsSurfaceConfig;
+
 RsDevice rsDeviceCreate();
 void rsDeviceDestroy(RsDevice);
 void rsDeviceSetConfig(RsDevice, RsDeviceParam, int32_t value);
 
 RsContext rsContextCreate(RsDevice, uint32_t version);
-RsContext rsContextCreateGL(RsDevice, uint32_t version, bool useDepth);
+RsContext rsContextCreateGL(RsDevice, uint32_t version, RsSurfaceConfig sc);
 void rsContextDestroy(RsContext);
-void rsObjDestroyOOB(RsContext, void *);
 
 uint32_t rsContextGetMessage(RsContext, void *data, size_t *receiveLen, size_t bufferLen, bool wait);
 void rsContextInitToClient(RsContext);
@@ -83,11 +100,17 @@
     RS_TYPE_UNSIGNED_32,
     RS_TYPE_UNSIGNED_64,
 
+    RS_TYPE_BOOLEAN,
+
     RS_TYPE_UNSIGNED_5_6_5,
     RS_TYPE_UNSIGNED_5_5_5_1,
     RS_TYPE_UNSIGNED_4_4_4_4,
 
-    RS_TYPE_ELEMENT,
+    RS_TYPE_MATRIX_4X4,
+    RS_TYPE_MATRIX_3X3,
+    RS_TYPE_MATRIX_2X2,
+
+    RS_TYPE_ELEMENT = 1000,
     RS_TYPE_TYPE,
     RS_TYPE_ALLOCATION,
     RS_TYPE_SAMPLER,
@@ -96,24 +119,17 @@
     RS_TYPE_PROGRAM_FRAGMENT,
     RS_TYPE_PROGRAM_VERTEX,
     RS_TYPE_PROGRAM_RASTER,
-    RS_TYPE_PROGRAM_STORE
+    RS_TYPE_PROGRAM_STORE,
 };
 
 enum RsDataKind {
     RS_KIND_USER,
-    RS_KIND_COLOR,
-    RS_KIND_POSITION,
-    RS_KIND_TEXTURE,
-    RS_KIND_NORMAL,
-    RS_KIND_INDEX,
-    RS_KIND_POINT_SIZE,
 
-    RS_KIND_PIXEL_L,
+    RS_KIND_PIXEL_L = 7,
     RS_KIND_PIXEL_A,
     RS_KIND_PIXEL_LA,
     RS_KIND_PIXEL_RGB,
     RS_KIND_PIXEL_RGBA,
-
 };
 
 enum RsSamplerParam {
@@ -121,7 +137,8 @@
     RS_SAMPLER_MAG_FILTER,
     RS_SAMPLER_WRAP_S,
     RS_SAMPLER_WRAP_T,
-    RS_SAMPLER_WRAP_R
+    RS_SAMPLER_WRAP_R,
+    RS_SAMPLER_ANISO
 };
 
 enum RsSamplerValue {
@@ -205,9 +222,76 @@
 enum RsError {
     RS_ERROR_NONE,
     RS_ERROR_BAD_SHADER,
-    RS_ERROR_BAD_SCRIPT
+    RS_ERROR_BAD_SCRIPT,
+    RS_ERROR_BAD_VALUE,
+    RS_ERROR_OUT_OF_MEMORY
 };
 
+enum RsAnimationInterpolation {
+    RS_ANIMATION_INTERPOLATION_STEP,
+    RS_ANIMATION_INTERPOLATION_LINEAR,
+    RS_ANIMATION_INTERPOLATION_BEZIER,
+    RS_ANIMATION_INTERPOLATION_CARDINAL,
+    RS_ANIMATION_INTERPOLATION_HERMITE,
+    RS_ANIMATION_INTERPOLATION_BSPLINE
+};
+
+enum RsAnimationEdge {
+    RS_ANIMATION_EDGE_UNDEFINED,
+    RS_ANIMATION_EDGE_CONSTANT,
+    RS_ANIMATION_EDGE_GRADIENT,
+    RS_ANIMATION_EDGE_CYCLE,
+    RS_ANIMATION_EDGE_OSCILLATE,
+    RS_ANIMATION_EDGE_CYLE_RELATIVE
+};
+
+enum RsA3DClassID {
+    RS_A3D_CLASS_ID_UNKNOWN,
+    RS_A3D_CLASS_ID_MESH,
+    RS_A3D_CLASS_ID_TYPE,
+    RS_A3D_CLASS_ID_ELEMENT,
+    RS_A3D_CLASS_ID_ALLOCATION,
+    RS_A3D_CLASS_ID_PROGRAM_VERTEX,
+    RS_A3D_CLASS_ID_PROGRAM_RASTER,
+    RS_A3D_CLASS_ID_PROGRAM_FRAGMENT,
+    RS_A3D_CLASS_ID_PROGRAM_STORE,
+    RS_A3D_CLASS_ID_SAMPLER,
+    RS_A3D_CLASS_ID_ANIMATION,
+    RS_A3D_CLASS_ID_ADAPTER_1D,
+    RS_A3D_CLASS_ID_ADAPTER_2D,
+    RS_A3D_CLASS_ID_SCRIPT_C
+};
+
+enum RsCullMode {
+    RS_CULL_BACK,
+    RS_CULL_FRONT,
+    RS_CULL_NONE
+};
+
+typedef struct {
+    RsA3DClassID classID;
+    const char* objectName;
+} RsFileIndexEntry;
+
+// Script to Script
+typedef struct {
+    uint32_t xStart;
+    uint32_t xEnd;
+    uint32_t yStart;
+    uint32_t yEnd;
+    uint32_t zStart;
+    uint32_t zEnd;
+    uint32_t arrayStart;
+    uint32_t arrayEnd;
+
+} RsScriptCall;
+
+
+// Async commands for returning new IDS
+void * rsaTypeCreate(RsContext, RsElement, uint32_t dimCount,
+                     const RsDimension *dims, const uint32_t *vals);
+
+
 #ifndef NO_RS_FUNCS
 #include "rsgApiFuncDecl.h"
 #endif
diff --git a/RenderScriptEnv.h b/RenderScriptEnv.h
index 99b8c04..b82eaf1 100644
--- a/RenderScriptEnv.h
+++ b/RenderScriptEnv.h
@@ -9,12 +9,10 @@
 typedef void * RsElement;
 typedef void * RsSampler;
 typedef void * RsScript;
-typedef void * RsSimpleMesh;
+typedef void * RsMesh;
 typedef void * RsType;
 typedef void * RsProgramFragment;
-typedef void * RsProgramFragmentStore;
-typedef void * RsLight;
-
+typedef void * RsProgramStore;
 
 typedef struct {
     float m[16];
@@ -28,4 +26,4 @@
 #define RS_PROGRAM_VERTEX_MODELVIEW_OFFSET 0
 #define RS_PROGRAM_VERTEX_PROJECTION_OFFSET 16
 #define RS_PROGRAM_VERTEX_TEXTURE_OFFSET 32
-
+#define RS_PROGRAM_VERTEX_MVP_OFFSET 48
diff --git a/java/Film/res/drawable/p01.png b/java/Film/res/drawable/p01.png
deleted file mode 100644
index a9b9bdb..0000000
--- a/java/Film/res/drawable/p01.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p02.png b/java/Film/res/drawable/p02.png
deleted file mode 100644
index 8162c82..0000000
--- a/java/Film/res/drawable/p02.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p03.png b/java/Film/res/drawable/p03.png
deleted file mode 100644
index e3e26c0..0000000
--- a/java/Film/res/drawable/p03.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p04.png b/java/Film/res/drawable/p04.png
deleted file mode 100644
index daee603..0000000
--- a/java/Film/res/drawable/p04.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p05.png b/java/Film/res/drawable/p05.png
deleted file mode 100644
index fac5248..0000000
--- a/java/Film/res/drawable/p05.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p06.png b/java/Film/res/drawable/p06.png
deleted file mode 100644
index 3b51261..0000000
--- a/java/Film/res/drawable/p06.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p07.png b/java/Film/res/drawable/p07.png
deleted file mode 100644
index d8bd938..0000000
--- a/java/Film/res/drawable/p07.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p08.png b/java/Film/res/drawable/p08.png
deleted file mode 100644
index ef175e8..0000000
--- a/java/Film/res/drawable/p08.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p09.png b/java/Film/res/drawable/p09.png
deleted file mode 100644
index 7bf3874..0000000
--- a/java/Film/res/drawable/p09.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p10.png b/java/Film/res/drawable/p10.png
deleted file mode 100644
index 908827d..0000000
--- a/java/Film/res/drawable/p10.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p11.png b/java/Film/res/drawable/p11.png
deleted file mode 100644
index 1289f71..0000000
--- a/java/Film/res/drawable/p11.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p12.png b/java/Film/res/drawable/p12.png
deleted file mode 100644
index e1af16a..0000000
--- a/java/Film/res/drawable/p12.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/drawable/p13.png b/java/Film/res/drawable/p13.png
deleted file mode 100644
index d08bcbe..0000000
--- a/java/Film/res/drawable/p13.png
+++ /dev/null
Binary files differ
diff --git a/java/Film/res/raw/filmimage.c b/java/Film/res/raw/filmimage.c
deleted file mode 100644
index d154c68..0000000
--- a/java/Film/res/raw/filmimage.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// Fountain test script
-
-#pragma version(1)
-#pragma stateVertex(orthoWindow)
-#pragma stateRaster(flat)
-#pragma stateFragment(PgmFragBackground)
-#pragma stateStore(MyBlend)
-
-
-int main(void* con, int ft, int launchID) {
-    int count, touch, x, y, rate, maxLife, lifeShift;
-    int life;
-    int ct, ct2;
-    int newPart;
-    int drawCount;
-    int dx, dy, idx;
-    int posx,posy;
-    int c;
-    int srcIdx;
-    int dstIdx;
-
-    count = loadI32(con, 0, 1);
-    touch = loadI32(con, 0, 2);
-    x = loadI32(con, 0, 3);
-    y = loadI32(con, 0, 4);
-
-    rate = 4;
-    maxLife = (count / rate) - 1;
-    lifeShift = 0;
-    {
-        life = maxLife;
-        while (life > 255) {
-            life = life >> 1;
-            lifeShift ++;
-        }
-    }
-
-    drawRect(con, 0, 256, 0, 512);
-    contextBindProgramFragment(con, NAMED_PgmFragParts);
-
-    if (touch) {
-        newPart = loadI32(con, 2, 0);
-        for (ct2=0; ct2<rate; ct2++) {
-            dx = scriptRand(con, 0x10000) - 0x8000;
-            dy = scriptRand(con, 0x10000) - 0x8000;
-
-            idx = newPart * 5 + 1;
-            storeI32(con, 2, idx, dx);
-            storeI32(con, 2, idx + 1, dy);
-            storeI32(con, 2, idx + 2, maxLife);
-            storeI32(con, 2, idx + 3, x << 16);
-            storeI32(con, 2, idx + 4, y << 16);
-
-            newPart++;
-            if (newPart >= count) {
-                newPart = 0;
-            }
-        }
-        storeI32(con, 2, 0, newPart);
-    }
-
-    drawCount = 0;
-    for (ct=0; ct < count; ct++) {
-        srcIdx = ct * 5 + 1;
-
-        dx = loadI32(con, 2, srcIdx);
-        dy = loadI32(con, 2, srcIdx + 1);
-        life = loadI32(con, 2, srcIdx + 2);
-        posx = loadI32(con, 2, srcIdx + 3);
-        posy = loadI32(con, 2, srcIdx + 4);
-
-        if (life) {
-            if (posy < (480 << 16)) {
-                dstIdx = drawCount * 9;
-                c = 0xffafcf | ((life >> lifeShift) << 24);
-
-                storeU32(con, 1, dstIdx, c);
-                storeI32(con, 1, dstIdx + 1, posx);
-                storeI32(con, 1, dstIdx + 2, posy);
-
-                storeU32(con, 1, dstIdx + 3, c);
-                storeI32(con, 1, dstIdx + 4, posx + 0x10000);
-                storeI32(con, 1, dstIdx + 5, posy + dy * 4);
-
-                storeU32(con, 1, dstIdx + 6, c);
-                storeI32(con, 1, dstIdx + 7, posx - 0x10000);
-                storeI32(con, 1, dstIdx + 8, posy + dy * 4);
-                drawCount ++;
-            } else {
-                if (dy > 0) {
-                    dy = (-dy) >> 1;
-                }
-            }
-
-            posx = posx + dx;
-            posy = posy + dy;
-            dy = dy + 0x400;
-            life --;
-
-            //storeI32(con, 2, srcIdx, dx);
-            storeI32(con, 2, srcIdx + 1, dy);
-            storeI32(con, 2, srcIdx + 2, life);
-            storeI32(con, 2, srcIdx + 3, posx);
-            storeI32(con, 2, srcIdx + 4, posy);
-        }
-    }
-
-    drawTriangleArray(con, NAMED_PartBuffer, drawCount);
-    return 1;
-}
diff --git a/java/Film/res/raw/filmstrip.c b/java/Film/res/raw/filmstrip.c
deleted file mode 100644
index bf75675..0000000
--- a/java/Film/res/raw/filmstrip.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// Fountain test script
-
-#pragma version(1)
-#pragma stateVertex(PVBackground)
-#pragma stateFragment(PFBackground)
-#pragma stateStore(PSBackground)
-
-#define STATE_TRIANGLE_OFFSET_COUNT 0
-#define STATE_LAST_FOCUS 1
-
-
-// The script enviroment has 3 env allocations.
-// bank0: (r) The enviroment structure
-// bank1: (r) The position information
-// bank2: (rw) The temporary texture state
-
-int lastFocus;
-
-int main(int index)
-{
-    float mat1[16];
-
-    float trans = Pos->translate;
-    float rot = Pos->rotate;
-
-    matrixLoadScale(mat1, 2.f, 2.f, 2.f);
-    matrixTranslate(mat1, 0.f, 0.f, trans);
-    matrixRotate(mat1, 90.f, 0.f, 0.f, 1.f);
-    matrixRotate(mat1, rot, 1.f, 0.f, 0.f);
-    vpLoadModelMatrix(mat1);
-
-    // Draw the lighting effect in the strip and fill the Z buffer.
-    drawSimpleMesh(NAMED_mesh);
-
-    // Start of images.
-    bindProgramStore(NAMED_PSImages);
-    bindProgramFragment(NAMED_PFImages);
-    bindProgramVertex(NAMED_PVImages);
-
-    float focusPos = Pos->focus;
-    int focusID = 0;
-    int lastFocusID = loadI32(2, STATE_LAST_FOCUS);
-    int imgCount = 13;
-
-    if (trans > (-.3f)) {
-        focusID = -1.0f - focusPos;
-        if (focusID >= imgCount) {
-            focusID = -1;
-        }
-    } else {
-        focusID = -1;
-    }
-
-    /*
-    if (focusID != lastFocusID) {
-        if (lastFocusID >= 0) {
-            uploadToTexture(con, env->tex[lastFocusID], 1);
-        }
-        if (focusID >= 0) {
-            uploadToTexture(con, env->tex[focusID], 0);
-        }
-    }
-    */
-    lastFocus = focusID;
-
-    int triangleOffsetsCount = Pos->triangleOffsetCount;
-
-    int imgId = 0;
-    for (imgId=1; imgId <= imgCount; imgId++) {
-        float pos = focusPos + imgId + 0.4f;
-        int offset = (int)floorf(pos * 2.f);
-        pos = pos - 0.75f;
-
-        offset = offset + triangleOffsetsCount / 2;
-        if (!((offset < 0) || (offset >= triangleOffsetsCount))) {
-            int start = offset -2;
-            int end = offset + 2;
-
-            if (start < 0) {
-                start = 0;
-            }
-            if (end >= triangleOffsetsCount) {
-                end = triangleOffsetsCount-1;
-            }
-
-            bindTexture(NAMED_PFImages, 0, loadI32(0, imgId - 1));
-            matrixLoadTranslate(mat1, -pos - loadF(5, triangleOffsetsCount / 2), 0, 0);
-            vpLoadTextureMatrix(mat1);
-            drawSimpleMeshRange(NAMED_mesh, loadI32(4, start), (loadI32(4, end) - loadI32(4, start)));
-        }
-    }
-    return 0;
-}
-
diff --git a/java/Film/src/com/android/film/FilmRS.java b/java/Film/src/com/android/film/FilmRS.java
deleted file mode 100644
index 7d04502..0000000
--- a/java/Film/src/com/android/film/FilmRS.java
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.film;
-
-import java.io.Writer;
-
-import android.content.Context;
-import android.content.res.Resources;
-import android.graphics.Bitmap;
-import android.util.Log;
-
-import android.renderscript.*;
-
-public class FilmRS {
-    class StripPosition {
-        public float translate;
-        public float rotate;
-        public float focus;
-        public int triangleOffsetCount;
-    }
-    StripPosition mPos = new StripPosition();
-
-
-    private final int STATE_LAST_FOCUS = 1;
-
-    public FilmRS() {
-    }
-
-    public void init(RenderScriptGL rs, Resources res, int width, int height) {
-        mRS = rs;
-        mRes = res;
-        initRS();
-    }
-
-    public void setFilmStripPosition(int x, int y)
-    {
-        if (x < 50) {
-            x = 50;
-        }
-        if (x > 270) {
-            x = 270;
-        }
-
-        float anim = ((float)x-50) / 270.f;
-        mPos.translate = 2f * anim + 0.5f;   // translation
-        mPos.rotate = (anim * 40);  // rotation
-        mPos.focus = ((float)y) / 16.f - 10.f;  // focusPos
-        mPos.triangleOffsetCount = mFSM.mTriangleOffsetsCount;
-        mAllocPos.data(mPos);
-    }
-
-
-    private Resources mRes;
-    private RenderScriptGL mRS;
-    private Script mScriptStrip;
-    private Script mScriptImage;
-    private Sampler mSampler;
-    private ProgramStore mPSBackground;
-    private ProgramStore mPSImages;
-    private ProgramFragment mPFBackground;
-    private ProgramFragment mPFImages;
-    private ProgramVertex mPVBackground;
-    private ProgramVertex mPVImages;
-    private ProgramVertex.MatrixAllocation mPVA;
-    private Type mStripPositionType;
-
-    private Allocation mImages[];
-    private Allocation mAllocIDs;
-    private Allocation mAllocPos;
-    private Allocation mAllocState;
-    private Allocation mAllocPV;
-    private Allocation mAllocOffsetsTex;
-    private Allocation mAllocOffsets;
-
-    private SimpleMesh mMesh;
-    private Light mLight;
-
-    private FilmStripMesh mFSM;
-
-    private int[] mBufferIDs;
-    private float[] mBufferPos = new float[3];
-    private int[] mBufferState;
-
-    private void initPFS() {
-        ProgramStore.Builder b = new ProgramStore.Builder(mRS, null, null);
-
-        b.setDepthFunc(ProgramStore.DepthFunc.LESS);
-        b.setDitherEnable(true);
-        b.setDepthMask(true);
-        mPSBackground = b.create();
-        mPSBackground.setName("PSBackground");
-
-        b.setDepthFunc(ProgramStore.DepthFunc.EQUAL);
-        b.setDitherEnable(false);
-        b.setDepthMask(false);
-        b.setBlendFunc(ProgramStore.BlendSrcFunc.ONE,
-                       ProgramStore.BlendDstFunc.ONE);
-        mPSImages = b.create();
-        mPSImages.setName("PSImages");
-    }
-
-    private void initPF() {
-        Sampler.Builder bs = new Sampler.Builder(mRS);
-        bs.setMin(Sampler.Value.LINEAR);//_MIP_LINEAR);
-        bs.setMag(Sampler.Value.LINEAR);
-        bs.setWrapS(Sampler.Value.CLAMP);
-        bs.setWrapT(Sampler.Value.WRAP);
-        mSampler = bs.create();
-
-        ProgramFragment.Builder b = new ProgramFragment.Builder(mRS);
-        mPFBackground = b.create();
-        mPFBackground.setName("PFBackground");
-
-        b = new ProgramFragment.Builder(mRS);
-        b.setTexture(ProgramFragment.Builder.EnvMode.REPLACE,
-                     ProgramFragment.Builder.Format.RGBA, 0);
-        mPFImages = b.create();
-        mPFImages.bindSampler(mSampler, 0);
-        mPFImages.setName("PFImages");
-    }
-
-    private void initPV() {
-        mLight = (new Light.Builder(mRS)).create();
-        mLight.setPosition(0, -0.5f, -1.0f);
-
-        ProgramVertex.Builder pvb = new ProgramVertex.Builder(mRS, null, null);
-        //pvb.addLight(mLight);
-        mPVBackground = pvb.create();
-        mPVBackground.setName("PVBackground");
-
-        pvb = new ProgramVertex.Builder(mRS, null, null);
-        pvb.setTextureMatrixEnable(true);
-        mPVImages = pvb.create();
-        mPVImages.setName("PVImages");
-    }
-
-    private void loadImages() {
-        mBufferIDs = new int[13];
-        mImages = new Allocation[13];
-        mAllocIDs = Allocation.createSized(mRS,
-            Element.createUser(mRS, Element.DataType.FLOAT_32),
-            mBufferIDs.length);
-
-        Element ie = Element.createPixel(mRS, Element.DataType.UNSIGNED_5_6_5, Element.DataKind.PIXEL_RGB);
-        mImages[0] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p01, ie, true);
-        mImages[1] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p02, ie, true);
-        mImages[2] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p03, ie, true);
-        mImages[3] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p04, ie, true);
-        mImages[4] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p05, ie, true);
-        mImages[5] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p06, ie, true);
-        mImages[6] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p07, ie, true);
-        mImages[7] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p08, ie, true);
-        mImages[8] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p09, ie, true);
-        mImages[9] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p10, ie, true);
-        mImages[10] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p11, ie, true);
-        mImages[11] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p12, ie, true);
-        mImages[12] = Allocation.createFromBitmapResourceBoxed(mRS, mRes, R.drawable.p13, ie, true);
-
-        int black[] = new int[1024];
-        for(int ct=0; ct < mImages.length; ct++) {
-            Allocation.Adapter2D a = mImages[ct].createAdapter2D();
-
-            int size = 512;
-            int mip = 0;
-            while(size >= 2) {
-                a.subData(0, 0, 2, size, black);
-                a.subData(size-2, 0, 2, size, black);
-                a.subData(0, 0, size, 2, black);
-                a.subData(0, size-2, size, 2, black);
-                size >>= 1;
-                mip++;
-                a.setConstraint(Dimension.LOD, mip);
-            }
-
-            mImages[ct].uploadToTexture(1);
-            mBufferIDs[ct] = mImages[ct].getID();
-        }
-        mAllocIDs.data(mBufferIDs);
-    }
-
-    private void initState()
-    {
-        mBufferState = new int[10];
-        mAllocState = Allocation.createSized(mRS,
-            Element.createUser(mRS, Element.DataType.FLOAT_32),
-            mBufferState.length);
-        mBufferState[STATE_LAST_FOCUS] = -1;
-        mAllocState.data(mBufferState);
-    }
-
-    private void initRS() {
-        mFSM = new FilmStripMesh();
-        mMesh = mFSM.init(mRS);
-        mMesh.setName("mesh");
-
-        initPFS();
-        initPF();
-        initPV();
-
-        Log.e("rs", "Done loading named");
-
-        mStripPositionType = Type.createFromClass(mRS, StripPosition.class, 1);
-
-        ScriptC.Builder sb = new ScriptC.Builder(mRS);
-        sb.setScript(mRes, R.raw.filmstrip);
-        sb.setRoot(true);
-        sb.setType(mStripPositionType, "Pos", 1);
-        mScriptStrip = sb.create();
-        mScriptStrip.setClearColor(0.0f, 0.0f, 0.0f, 1.0f);
-
-        mAllocPos = Allocation.createTyped(mRS, mStripPositionType);
-
-        loadImages();
-        initState();
-
-        mPVA = new ProgramVertex.MatrixAllocation(mRS);
-        mPVBackground.bindAllocation(mPVA);
-        mPVImages.bindAllocation(mPVA);
-        mPVA.setupProjectionNormalized(320, 480);
-
-
-        mScriptStrip.bindAllocation(mAllocIDs, 0);
-        mScriptStrip.bindAllocation(mAllocPos, 1);
-        mScriptStrip.bindAllocation(mAllocState, 2);
-        mScriptStrip.bindAllocation(mPVA.mAlloc, 3);
-
-
-        mAllocOffsets = Allocation.createSized(mRS,
-            Element.createUser(mRS, Element.DataType.SIGNED_32), mFSM.mTriangleOffsets.length);
-        mAllocOffsets.data(mFSM.mTriangleOffsets);
-        mScriptStrip.bindAllocation(mAllocOffsets, 4);
-
-        mAllocOffsetsTex = Allocation.createSized(mRS,
-            Element.createUser(mRS, Element.DataType.FLOAT_32), mFSM.mTriangleOffsetsTex.length);
-        mAllocOffsetsTex.data(mFSM.mTriangleOffsetsTex);
-        mScriptStrip.bindAllocation(mAllocOffsetsTex, 5);
-
-        setFilmStripPosition(0, 0);
-        mRS.contextBindRootScript(mScriptStrip);
-    }
-}
-
-
-
diff --git a/java/Film/src/com/android/film/FilmStripMesh.java b/java/Film/src/com/android/film/FilmStripMesh.java
deleted file mode 100644
index 448cce0..0000000
--- a/java/Film/src/com/android/film/FilmStripMesh.java
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package com.android.film;
-
-import java.io.Writer;
-import java.lang.Math;
-import android.util.Log;
-
-import android.renderscript.RenderScript;
-import android.renderscript.SimpleMesh;
-
-
-class FilmStripMesh {
-
-    class Vertex {
-        float nx;
-        float ny;
-        float nz;
-        float s;
-        float t;
-        float x;
-        float y;
-        float z;
-
-        Vertex() {
-            nx = 0;
-            ny = 0;
-            nz = 0;
-            s = 0;
-            t = 0;
-            x = 0;
-            y = 0;
-            z = 0;
-        }
-
-        void xyz(float _x, float _y, float _z) {
-            x = _x;
-            y = _y;
-            z = _z;
-        }
-
-        void nxyz(float _x, float _y, float _z) {
-            nx = _x;
-            ny = _y;
-            nz = _z;
-        }
-
-        void st(float _s, float _t) {
-            s = _s;
-            t = _t;
-        }
-
-        void computeNorm(Vertex v1, Vertex v2) {
-            float dx = v1.x - v2.x;
-            float dy = v1.y - v2.y;
-            float dz = v1.z - v2.z;
-            float len = (float)java.lang.Math.sqrt(dx*dx + dy*dy + dz*dz);
-            dx /= len;
-            dy /= len;
-            dz /= len;
-
-            nx = dx * dz;
-            ny = dy * dz;
-            nz = (float)java.lang.Math.sqrt(dx*dx + dy*dy);
-
-            len = (float)java.lang.Math.sqrt(nx*nx + ny*ny + nz*nz);
-            nx /= len;
-            ny /= len;
-            nz /= len;
-        }
-    }
-
-    int[] mTriangleOffsets;
-    float[] mTriangleOffsetsTex;
-    int mTriangleOffsetsCount;
-
-    SimpleMesh init(RenderScript rs)
-    {
-        float vtx[] = new float[] {
-            60.431003f, 124.482050f,
-            60.862074f, 120.872604f,
-            61.705303f, 117.336662f,
-            62.949505f, 113.921127f,
-            64.578177f, 110.671304f,
-            66.569716f, 107.630302f,
-            68.897703f, 104.838457f,
-            71.531259f, 102.332803f,
-            74.435452f, 100.146577f,
-            77.571757f, 98.308777f,
-            80.898574f, 96.843781f,
-            84.371773f, 95.771023f,
-            87.945283f, 95.104731f,
-            98.958994f, 95.267098f,
-            109.489523f, 98.497596f,
-            118.699582f, 104.539366f,
-            125.856872f, 112.912022f,
-            130.392311f, 122.949849f,
-            131.945283f, 133.854731f,
-            130.392311f, 144.759613f,
-            125.856872f, 154.797439f,
-            118.699582f, 163.170096f,
-            109.489523f, 169.211866f,
-            98.958994f, 172.442364f,
-            87.945283f, 172.604731f,
-            72.507313f, 172.672927f,
-            57.678920f, 168.377071f,
-            44.668135f, 160.067134f,
-            34.534908f, 148.420104f,
-            28.104767f, 134.384831f,
-            25.901557f, 119.104731f,
-            28.104767f, 103.824631f,
-            34.534908f, 89.789358f,
-            44.668135f, 78.142327f,
-            57.678920f, 69.832390f,
-            72.507313f, 65.536534f,
-            87.945283f, 65.604731f,
-            106.918117f, 65.688542f,
-            125.141795f, 60.409056f,
-            141.131686f, 50.196376f,
-            153.585137f, 35.882502f,
-            161.487600f, 18.633545f,
-            164.195283f, -0.145269f,
-            161.487600f, -18.924084f,
-            153.585137f, -36.173040f,
-            141.131686f, -50.486914f,
-            125.141795f, -60.699594f,
-            106.918117f, -65.979081f,
-            87.945283f, -65.895269f,
-            80f, -65.895269f,
-            60f, -65.895269f,
-            40f, -65.895269f,
-            20f, -65.895269f,
-            0f, -65.895269f,
-            -20f, -65.895269f,
-            -40f, -65.895269f,
-            -60f, -65.895269f,
-            -80f, -65.895269f,
-            -87.945283f, -65.895269f,
-            -106.918117f, -65.979081f,
-            -125.141795f, -60.699594f,
-            -141.131686f, -50.486914f,
-            -153.585137f, -36.173040f,
-            -161.487600f, -18.924084f,
-            -164.195283f, -0.145269f,
-            -161.487600f, 18.633545f,
-             -153.585137f, 35.882502f,
-             -141.131686f, 50.196376f,
-             -125.141795f, 60.409056f,
-             -106.918117f, 65.688542f,
-             -87.945283f, 65.604731f,
-             -72.507313f, 65.536534f,
-             -57.678920f, 69.832390f,
-             -44.668135f, 78.142327f,
-             -34.534908f, 89.789358f,
-             -28.104767f, 103.824631f,
-             -25.901557f, 119.104731f,
-             -28.104767f, 134.384831f,
-             -34.534908f, 148.420104f,
-             -44.668135f, 160.067134f,
-             -57.678920f, 168.377071f,
-             -72.507313f, 172.672927f,
-             -87.945283f, 172.604731f,
-             -98.958994f, 172.442364f,
-             -109.489523f, 169.211866f,
-             -118.699582f, 163.170096f,
-             -125.856872f, 154.797439f,
-             -130.392311f, 144.759613f,
-             -131.945283f, 133.854731f,
-             -130.392311f, 122.949849f,
-             -125.856872f, 112.912022f,
-             -118.699582f, 104.539366f,
-             -109.489523f, 98.497596f,
-             -98.958994f, 95.267098f,
-             -87.945283f, 95.104731f,
-             -84.371773f, 95.771023f,
-             -80.898574f, 96.843781f,
-             -77.571757f, 98.308777f,
-             -74.435452f, 100.146577f,
-             -71.531259f, 102.332803f,
-             -68.897703f, 104.838457f,
-             -66.569716f, 107.630302f,
-             -64.578177f, 110.671304f,
-             -62.949505f, 113.921127f,
-             -61.705303f, 117.336662f,
-             -60.862074f, 120.872604f,
-             -60.431003f, 124.482050f
-        };
-
-
-        mTriangleOffsets = new int[64];
-        mTriangleOffsetsTex = new float[64];
-
-        mTriangleOffsets[0] = 0;
-        mTriangleOffsetsCount = 1;
-
-        Vertex t = new Vertex();
-        t.nxyz(1, 0, 0);
-        int count = vtx.length / 2;
-
-        SimpleMesh.TriangleMeshBuilder tm = new SimpleMesh.TriangleMeshBuilder(
-            rs, 3,
-            SimpleMesh.TriangleMeshBuilder.NORMAL | SimpleMesh.TriangleMeshBuilder.TEXTURE_0);
-
-        float runningS = 0;
-        for (int ct=0; ct < (count-1); ct++) {
-            t.x = -vtx[ct*2] / 100.f;
-            t.z = vtx[ct*2+1] / 100.f;
-            t.s = runningS;
-            t.nx =  (vtx[ct*2+3] - vtx[ct*2 +1]);
-            t.ny =  (vtx[ct*2+2] - vtx[ct*2   ]);
-            float len = (float)java.lang.Math.sqrt(t.nx * t.nx + t.ny * t.ny);
-            runningS += len / 100;
-            t.nx /= len;
-            t.ny /= len;
-            t.y = -0.5f;
-            t.t = 0;
-            tm.setNormal(t.nx, t.ny, t.nz);
-            tm.setTexture(t.s, t.t);
-            tm.addVertex(t.x, t.y, t.z);
-            //android.util.Log.e("rs", "vtx x="+t.x+" y="+t.y+" z="+t.z+" s="+t.s+" t="+t.t);
-            t.y = .5f;
-            t.t = 1;
-            tm.setTexture(t.s, t.t);
-            tm.addVertex(t.x, t.y, t.z);
-            //android.util.Log.e("rs", "vtx x="+t.x+" y="+t.y+" z="+t.z+" s="+t.s+" t="+t.t);
-
-            if((runningS*2) > mTriangleOffsetsCount) {
-                mTriangleOffsets[mTriangleOffsetsCount] = ct*2 * 3;
-                mTriangleOffsetsTex[mTriangleOffsetsCount] = t.s;
-                mTriangleOffsetsCount ++;
-            }
-        }
-
-        count = (count * 2 - 2);
-        for (int ct=0; ct < (count-2); ct+= 2) {
-            tm.addTriangle(ct, ct+1, ct+2);
-            tm.addTriangle(ct+1, ct+3, ct+2);
-        }
-        return tm.create();
-    }
-
-
-}
-
diff --git a/java/Fountain/Android.mk b/java/Fountain/Android.mk
index f7e53a8..71944b2 100644
--- a/java/Fountain/Android.mk
+++ b/java/Fountain/Android.mk
@@ -14,14 +14,18 @@
 # limitations under the License.
 #
 
+ifneq ($(TARGET_SIMULATOR),true)
+
 LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 
 LOCAL_MODULE_TAGS := optional
 
-LOCAL_SRC_FILES := $(call all-java-files-under, src)
+LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src)
 #LOCAL_STATIC_JAVA_LIBRARIES := android.renderscript
 
 LOCAL_PACKAGE_NAME := Fountain
 
 include $(BUILD_PACKAGE)
+
+endif
diff --git a/java/Fountain/res/drawable/gadgets_clock_mp3.png b/java/Fountain/res/drawable/gadgets_clock_mp3.png
deleted file mode 100755
index e91bfb4..0000000
--- a/java/Fountain/res/drawable/gadgets_clock_mp3.png
+++ /dev/null
Binary files differ
diff --git a/java/Fountain/res/raw/fountain.c b/java/Fountain/res/raw/fountain.c
deleted file mode 100644
index 73b819b..0000000
--- a/java/Fountain/res/raw/fountain.c
+++ /dev/null
@@ -1,52 +0,0 @@
-// Fountain test script
-#pragma version(1)
-
-int newPart = 0;
-
-int main(int launchID) {
-    int ct;
-    int count = Control->count;
-    int rate = Control->rate;
-    float height = getHeight();
-    struct point_s * p = (struct point_s *)point;
-
-    if (rate) {
-        float rMax = ((float)rate) * 0.005f;
-        int x = Control->x;
-        int y = Control->y;
-        int color = ((int)(Control->r * 255.f)) |
-                    ((int)(Control->g * 255.f)) << 8 |
-                    ((int)(Control->b * 255.f)) << 16 |
-                    (0xf0 << 24);
-        struct point_s * np = &p[newPart];
-
-        while (rate--) {
-            vec2Rand((float *)&np->delta.x, rMax);
-            np->position.x = x;
-            np->position.y = y;
-            np->color = color;
-            newPart++;
-            np++;
-            if (newPart >= count) {
-                newPart = 0;
-                np = &p[newPart];
-            }
-        }
-    }
-
-    for (ct=0; ct < count; ct++) {
-        float dy = p->delta.y + 0.15f;
-        float posy = p->position.y + dy;
-        if ((posy > height) && (dy > 0)) {
-            dy *= -0.3f;
-        }
-        p->delta.y = dy;
-        p->position.x += p->delta.x;
-        p->position.y = posy;
-        p++;
-    }
-
-    uploadToBufferObject(NAMED_PartBuffer);
-    drawSimpleMesh(NAMED_PartMesh);
-    return 1;
-}
diff --git a/java/Fountain/res/raw/fountain2.rs b/java/Fountain/res/raw/fountain2.rs
deleted file mode 100644
index 3301140..0000000
--- a/java/Fountain/res/raw/fountain2.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-// Fountain test script
-#pragma version(1)
-
-#include "rs_types.rsh"
-#include "rs_math.rsh"
-#include "rs_graphics.rsh"
-
-static int newPart = 0;
-
-typedef struct Control_s {
-    int x, y;
-    int rate;
-    int count;
-    float r, g, b;
-    rs_allocation partBuffer;
-    rs_mesh partMesh;
-} Control_t;
-Control_t *Control;
-
-typedef struct Point_s{
-    float2 delta;
-    float2 position;
-    unsigned int color;
-} Point_t;
-Point_t *point;
-
-int main(int launchID) {
-    int ct;
-    int count = Control->count;
-    int rate = Control->rate;
-    float height = getHeight();
-    Point_t * p = point;
-
-    if (rate) {
-        float rMax = ((float)rate) * 0.005f;
-        int x = Control->x;
-        int y = Control->y;
-        int color = ((int)(Control->r * 255.f)) |
-                    ((int)(Control->g * 255.f)) << 8 |
-                    ((int)(Control->b * 255.f)) << 16 |
-                    (0xf0 << 24);
-        Point_t * np = &p[newPart];
-
-        while (rate--) {
-            np->delta = vec2Rand(rMax);
-            np->position.x = x;
-            np->position.y = y;
-            np->color = color;
-            newPart++;
-            np++;
-            if (newPart >= count) {
-                newPart = 0;
-                np = &p[newPart];
-            }
-        }
-    }
-
-    for (ct=0; ct < count; ct++) {
-        float dy = p->delta.y + 0.15f;
-        float posy = p->position.y + dy;
-        if ((posy > height) && (dy > 0)) {
-            dy *= -0.3f;
-        }
-        p->delta.y = dy;
-        p->position.x += p->delta.x;
-        p->position.y = posy;
-        p++;
-    }
-
-    uploadToBufferObject(Control->partBuffer);
-    drawSimpleMesh(Control->partMesh);
-    return 1;
-}
diff --git a/java/Fountain/src/com/android/fountain/FountainRS.java b/java/Fountain/src/com/android/fountain/FountainRS.java
index 9356579..0b26cfd 100644
--- a/java/Fountain/src/com/android/fountain/FountainRS.java
+++ b/java/Fountain/src/com/android/fountain/FountainRS.java
@@ -22,94 +22,50 @@
 
 
 public class FountainRS {
-    public static final int PART_COUNT = 20000;
-
-    static class SomeData {
-        public int x;
-        public int y;
-        public int rate;
-        public int count;
-        public float r;
-        public float g;
-        public float b;
-    }
+    public static final int PART_COUNT = 50000;
 
     public FountainRS() {
     }
 
+    private Resources mRes;
+    private RenderScriptGL mRS;
+    private ScriptC_fountain mScript;
     public void init(RenderScriptGL rs, Resources res, int width, int height) {
         mRS = rs;
         mRes = res;
-        initRS();
+
+        ProgramFragment.Builder pfb = new ProgramFragment.Builder(rs);
+        pfb.setVaryingColor(true);
+        rs.contextBindProgramFragment(pfb.create());
+
+        ScriptField_Point points = new ScriptField_Point(mRS, PART_COUNT);
+
+        Mesh.AllocationBuilder smb = new Mesh.AllocationBuilder(mRS);
+        smb.addVertexAllocation(points.getAllocation());
+        smb.addIndexType(Primitive.POINT);
+        Mesh sm = smb.create();
+
+        mScript = new ScriptC_fountain(mRS, mRes, R.raw.fountain, true);
+        mScript.set_partMesh(sm);
+        mScript.bind_point(points);
+        mRS.contextBindRootScript(mScript);
     }
 
-    public void newTouchPosition(int x, int y, int rate) {
-        if (mSD.rate == 0) {
-            mSD.r = ((x & 0x1) != 0) ? 0.f : 1.f;
-            mSD.g = ((x & 0x2) != 0) ? 0.f : 1.f;
-            mSD.b = ((x & 0x4) != 0) ? 0.f : 1.f;
-            if ((mSD.r + mSD.g + mSD.b) < 0.9f) {
-                mSD.r = 0.8f;
-                mSD.g = 0.5f;
-                mSD.b = 1.f;
-            }
+    boolean holdingColor[] = new boolean[10];
+    public void newTouchPosition(float x, float y, float pressure, int id) {
+        if (id > holdingColor.length) {
+            return;
         }
-        mSD.rate = rate;
-        mSD.x = x;
-        mSD.y = y;
-        mIntAlloc.data(mSD);
+        int rate = (int)(pressure * pressure * 500.f);
+        if(rate > 500) {
+            rate = 500;
+        }
+        if (rate > 0) {
+            mScript.invoke_addParticles(rate, x, y, id, !holdingColor[id]);
+            holdingColor[id] = true;
+        } else {
+            holdingColor[id] = false;
+        }
+
     }
-
-
-    /////////////////////////////////////////
-
-    private Resources mRes;
-
-    private RenderScriptGL mRS;
-    private Allocation mIntAlloc;
-    private SimpleMesh mSM;
-    private SomeData mSD;
-    private Type mSDType;
-
-    private void initRS() {
-        mSD = new SomeData();
-        mSDType = Type.createFromClass(mRS, SomeData.class, 1, "SomeData");
-        mIntAlloc = Allocation.createTyped(mRS, mSDType);
-        mSD.count = PART_COUNT;
-        mIntAlloc.data(mSD);
-
-        Element.Builder eb = new Element.Builder(mRS);
-        eb.add(Element.createVector(mRS, Element.DataType.FLOAT_32, 2), "delta");
-        eb.add(Element.createAttrib(mRS, Element.DataType.FLOAT_32, Element.DataKind.POSITION, 2), "position");
-        eb.add(Element.createAttrib(mRS, Element.DataType.UNSIGNED_8, Element.DataKind.COLOR, 4), "color");
-        Element primElement = eb.create();
-
-
-        SimpleMesh.Builder smb = new SimpleMesh.Builder(mRS);
-        int vtxSlot = smb.addVertexType(primElement, PART_COUNT);
-        smb.setPrimitive(Primitive.POINT);
-        mSM = smb.create();
-        mSM.setName("PartMesh");
-
-        Allocation partAlloc = mSM.createVertexAllocation(vtxSlot);
-        partAlloc.setName("PartBuffer");
-        mSM.bindVertexAllocation(partAlloc, 0);
-
-        // All setup of named objects should be done by this point
-        // because we are about to compile the script.
-        ScriptC.Builder sb = new ScriptC.Builder(mRS);
-        sb.setScript(mRes, R.raw.fountain);
-        sb.setRoot(true);
-        sb.setType(mSDType, "Control", 0);
-        sb.setType(mSM.getVertexType(0), "point", 1);
-        Script script = sb.create();
-        script.setClearColor(0.0f, 0.0f, 0.0f, 1.0f);
-
-        script.bindAllocation(mIntAlloc, 0);
-        script.bindAllocation(partAlloc, 1);
-        mRS.contextBindRootScript(script);
-    }
-
 }
-
-
diff --git a/java/Fountain/src/com/android/fountain/FountainView.java b/java/Fountain/src/com/android/fountain/FountainView.java
index dfd6a49..987bebe 100644
--- a/java/Fountain/src/com/android/fountain/FountainView.java
+++ b/java/Fountain/src/com/android/fountain/FountainView.java
@@ -52,7 +52,8 @@
     public void surfaceChanged(SurfaceHolder holder, int format, int w, int h) {
         super.surfaceChanged(holder, format, w, h);
         if (mRS == null) {
-            mRS = createRenderScript(false);
+            RenderScriptGL.SurfaceConfig sc = new RenderScriptGL.SurfaceConfig();
+            mRS = createRenderScript(sc);
             mRS.contextSetSurface(w, h, holder.getSurface());
             mRender = new FountainRS();
             mRender.init(mRS, getResources(), w, h);
@@ -71,17 +72,33 @@
     @Override
     public boolean onTouchEvent(MotionEvent ev)
     {
-        int act = ev.getAction();
+        int act = ev.getActionMasked();
         if (act == ev.ACTION_UP) {
-            mRender.newTouchPosition(0, 0, 0);
+            mRender.newTouchPosition(0, 0, 0, ev.getPointerId(0));
             return false;
+        } else if (act == MotionEvent.ACTION_POINTER_UP) {
+            // only one pointer going up, we can get the index like this
+            int pointerIndex = ev.getActionIndex();
+            int pointerId = ev.getPointerId(pointerIndex);
+            mRender.newTouchPosition(0, 0, 0, pointerId);
         }
-        float rate = (ev.getPressure() * 50.f);
-        rate *= rate;
-        if(rate > 2000.f) {
-            rate = 2000.f;
+        int count = ev.getHistorySize();
+        int pcount = ev.getPointerCount();
+
+        for (int p=0; p < pcount; p++) {
+            int id = ev.getPointerId(p);
+            mRender.newTouchPosition(ev.getX(p),
+                                     ev.getY(p),
+                                     ev.getPressure(p),
+                                     id);
+
+            for (int i=0; i < count; i++) {
+                mRender.newTouchPosition(ev.getHistoricalX(p, i),
+                                         ev.getHistoricalY(p, i),
+                                         ev.getHistoricalPressure(p, i),
+                                         id);
+            }
         }
-        mRender.newTouchPosition((int)ev.getX(), (int)ev.getY(), (int)rate);
         return true;
     }
 }
diff --git a/java/Fountain/src/com/android/fountain/fountain.rs b/java/Fountain/src/com/android/fountain/fountain.rs
new file mode 100644
index 0000000..5445744
--- /dev/null
+++ b/java/Fountain/src/com/android/fountain/fountain.rs
@@ -0,0 +1,71 @@
+// Fountain test script
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.fountain)
+
+#pragma stateFragment(parent)
+
+#include "rs_graphics.rsh"
+
+static int newPart = 0;
+rs_mesh partMesh;
+
+typedef struct __attribute__((packed, aligned(4))) Point {
+    float2 delta;
+    float2 position;
+    uchar4 color;
+} Point_t;
+Point_t *point;
+
+#pragma rs export_func(addParticles)
+
+int root() {
+    float dt = min(rsGetDt(), 0.1f);
+    rsgClearColor(0.f, 0.f, 0.f, 1.f);
+    const float height = rsgGetHeight();
+    const int size = rsAllocationGetDimX(rsGetAllocation(point));
+    float dy2 = dt * (10.f);
+    Point_t * p = point;
+    for (int ct=0; ct < size; ct++) {
+        p->delta.y += dy2;
+        p->position += p->delta;
+        if ((p->position.y > height) && (p->delta.y > 0)) {
+            p->delta.y *= -0.3f;
+        }
+        p++;
+    }
+
+    rsgDrawMesh(partMesh);
+    return 1;
+}
+
+static float4 partColor[10];
+void addParticles(int rate, float x, float y, int index, bool newColor)
+{
+    if (newColor) {
+        partColor[index].x = rsRand(0.5f, 1.0f);
+        partColor[index].y = rsRand(1.0f);
+        partColor[index].z = rsRand(1.0f);
+    }
+    float rMax = ((float)rate) * 0.02f;
+    int size = rsAllocationGetDimX(rsGetAllocation(point));
+    uchar4 c = rsPackColorTo8888(partColor[index]);
+
+    Point_t * np = &point[newPart];
+    float2 p = {x, y};
+    while (rate--) {
+        float angle = rsRand(3.14f * 2.f);
+        float len = rsRand(rMax);
+        np->delta.x = len * sin(angle);
+        np->delta.y = len * cos(angle);
+        np->position = p;
+        np->color = c;
+        newPart++;
+        np++;
+        if (newPart >= size) {
+            newPart = 0;
+            np = &point[newPart];
+        }
+    }
+}
+
diff --git a/java/ImageProcessing/Android.mk b/java/ImageProcessing/Android.mk
index 833427b..7fa30d0 100644
--- a/java/ImageProcessing/Android.mk
+++ b/java/ImageProcessing/Android.mk
@@ -14,14 +14,19 @@
 # limitations under the License.
 #
 
+ifneq ($(TARGET_SIMULATOR),true)
+
 LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 
 LOCAL_MODULE_TAGS := optional
 
-LOCAL_SRC_FILES := $(call all-java-files-under, src)
+LOCAL_SRC_FILES := $(call all-java-files-under, src) \
+                   $(call all-renderscript-files-under, src)
 #LOCAL_STATIC_JAVA_LIBRARIES := android.renderscript
 
 LOCAL_PACKAGE_NAME := ImageProcessing
 
 include $(BUILD_PACKAGE)
+
+endif
diff --git a/java/ImageProcessing/AndroidManifest.xml b/java/ImageProcessing/AndroidManifest.xml
index b48d208..d6a2db4 100644
--- a/java/ImageProcessing/AndroidManifest.xml
+++ b/java/ImageProcessing/AndroidManifest.xml
@@ -6,7 +6,8 @@
     <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
 
     <application android:label="Image Processing">
-        <activity android:name="ImageProcessingActivity">
+        <activity android:name="ImageProcessingActivity"
+                  android:screenOrientation="portrait">
             <intent-filter>
                 <action android:name="android.intent.action.MAIN" />
                 <category android:name="android.intent.category.LAUNCHER" />
diff --git a/java/ImageProcessing/res/drawable/data.jpg b/java/ImageProcessing/res/drawable/data.jpg
new file mode 100644
index 0000000..81a87b1
--- /dev/null
+++ b/java/ImageProcessing/res/drawable/data.jpg
Binary files differ
diff --git a/java/ImageProcessing/res/layout/main.xml b/java/ImageProcessing/res/layout/main.xml
index 6770c18..c6ec729 100644
--- a/java/ImageProcessing/res/layout/main.xml
+++ b/java/ImageProcessing/res/layout/main.xml
@@ -25,9 +25,147 @@
         android:id="@+id/display"
         android:layout_width="320dip"
         android:layout_height="266dip" />
-    
+
+    <Button
+        android:layout_marginBottom="170dip"
+        android:layout_width="wrap_content"
+        android:layout_height="40dip"
+        android:text="@string/benchmark"
+        android:onClick="benchmark"
+        android:layout_gravity="bottom"/>
+
+    <TextView
+        android:id="@+id/benchmarkText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="100dip"
+        android:layout_marginBottom="175dip"
+        android:layout_gravity="bottom"
+        android:text="@string/saturation"/>
+
+     <SeekBar
+        android:id="@+id/inSaturation"
+        android:layout_marginBottom="140dip"
+        android:layout_marginLeft="10dip"
+        android:layout_marginRight="10dip"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:layout_gravity="bottom" />
+
+    <TextView
+        android:id="@+id/inSaturationText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="50dip"
+        android:layout_marginBottom="142dip"
+        android:textColor="#000"
+        android:layout_gravity="bottom"
+        android:text="@string/saturation"/>
+
     <SeekBar
-        android:id="@+id/threshold"
+        android:id="@+id/inGamma"
+        android:layout_marginBottom="110dip"
+        android:layout_marginLeft="10dip"
+        android:layout_marginRight="10dip"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:layout_gravity="bottom" />
+
+    <TextView
+        android:id="@+id/inGammaText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="50dip"
+        android:layout_marginBottom="112dip"
+        android:textColor="#000"
+        android:layout_gravity="bottom"
+        android:text="@string/gamma"/>
+
+    <SeekBar
+        android:id="@+id/outWhite"
+        android:layout_marginBottom="80dip"
+        android:layout_marginLeft="170dip"
+        android:layout_marginRight="10dip"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:layout_gravity="bottom" />
+
+    <TextView
+        android:id="@+id/outWhiteText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="220dip"
+        android:layout_marginBottom="82dip"
+        android:textColor="#000"
+        android:layout_gravity="bottom"
+        android:text="@string/out_white"/>
+
+    <SeekBar
+        android:id="@+id/inWhite"
+        android:layout_marginBottom="80dip"
+        android:layout_marginLeft="10dip"
+        android:layout_marginRight="170dip"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:layout_gravity="bottom" />
+
+    <TextView
+        android:id="@+id/inWhiteText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="50dip"
+        android:layout_marginBottom="82dip"
+        android:textColor="#000"
+        android:layout_gravity="bottom"
+        android:text="@string/in_white"/>
+
+    <SeekBar
+        android:id="@+id/outBlack"
+        android:layout_marginBottom="50dip"
+        android:layout_marginLeft="170dip"
+        android:layout_marginRight="10dip"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:layout_gravity="bottom" />
+
+    <TextView
+        android:id="@+id/outBlackText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="220dip"
+        android:layout_marginBottom="52dip"
+        android:textColor="#000"
+        android:layout_gravity="bottom"
+        android:text="@string/out_black"/>
+
+    <SeekBar
+        android:id="@+id/inBlack"
+        android:layout_marginBottom="50dip"
+        android:layout_marginLeft="10dip"
+        android:layout_marginRight="170dip"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:layout_gravity="bottom" />
+
+    <TextView
+        android:id="@+id/inBlackText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="50dip"
+        android:layout_marginBottom="52dip"
+        android:textColor="#000"
+        android:layout_gravity="bottom"
+        android:text="@string/in_black"/>
+
+    <SeekBar
+        android:id="@+id/radius"
         android:layout_marginBottom="10dip"
         android:layout_marginLeft="10dip"
         android:layout_marginRight="10dip"
@@ -35,4 +173,15 @@
         android:layout_height="wrap_content"
         android:layout_gravity="bottom" />
 
+     <TextView
+        android:id="@+id/blurText"
+        android:layout_width="match_parent"
+        android:layout_height="wrap_content"
+        android:textSize="18sp"
+        android:layout_marginLeft="50dip"
+        android:layout_marginBottom="12dip"
+        android:textColor="#000"
+        android:layout_gravity="bottom"
+        android:text="@string/blur_description"/>
+
 </merge>
\ No newline at end of file
diff --git a/java/ImageProcessing/res/raw/threshold.rs b/java/ImageProcessing/res/raw/threshold.rs
deleted file mode 100644
index 888f0cd..0000000
--- a/java/ImageProcessing/res/raw/threshold.rs
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-// block of defines matching what RS will insert at runtime.
-struct Params_s{
-    int inHeight;
-    int inWidth;
-    int outHeight;
-    int outWidth;
-    float threshold;
-};
-struct Params_s * Params;
-struct InPixel_s{
-    char a;
-    char b;
-    char g;
-    char r;
-};
-struct InPixel_s * InPixel;
-struct OutPixel_s{
-    char a;
-    char b;
-    char g;
-    char r;
-};
-struct OutPixel_s * OutPixel;
-*/
-
-struct color_s {
-    char b;
-    char g;
-    char r;
-    char a;
-};
-
-void main() {
-    int t = uptimeMillis();
-
-    struct color_s *in = (struct color_s *) InPixel;
-    struct color_s *out = (struct color_s *) OutPixel;
-
-    int count = Params->inWidth * Params->inHeight;
-    int i;
-    float threshold = (Params->threshold * 255.f);
-
-    for (i = 0; i < count; i++) {
-        float luminance = 0.2125f * in->r +
-                          0.7154f * in->g +
-                          0.0721f * in->b;
-        if (luminance > threshold) {
-            *out = *in;
-        } else {
-            *((int *)out) = *((int *)in) & 0xff000000;
-        }
-
-        in++;
-        out++;
-    }
-
-    t= uptimeMillis() - t;
-    debugI32("Filter time", t);
-
-    sendToClient(&count, 1, 4, 0);
-}
diff --git a/java/ImageProcessing/res/values/strings.xml b/java/ImageProcessing/res/values/strings.xml
new file mode 100644
index 0000000..cc5cc4d
--- /dev/null
+++ b/java/ImageProcessing/res/values/strings.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+/*
+* Copyright (C) 2008 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+-->
+
+<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
+    <!-- General -->
+    <skip />
+    <!--slider label -->
+    <string name="blur_description">Blur Radius</string>
+    <string name="in_white">In White</string>
+    <string name="out_white">Out White</string>
+    <string name="in_black">In Black</string>
+    <string name="out_black">Out Black</string>
+    <string name="gamma">Gamma</string>
+    <string name="saturation">Saturation</string>
+    <string name="benchmark">Benchmark</string>
+
+</resources>
diff --git a/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java b/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java
index 9ce53d8..e806969 100644
--- a/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java
+++ b/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java
@@ -31,53 +31,56 @@
 import android.view.SurfaceHolder;
 import android.widget.ImageView;
 import android.widget.SeekBar;
+import android.widget.TextView;
+import android.view.View;
 import java.lang.Math;
 
-public class ImageProcessingActivity extends Activity implements SurfaceHolder.Callback {
-    private Bitmap mBitmap;
-    private Params mParams;
-    private Script.Invokable mInvokable;
-    private int[] mInData;
-    private int[] mOutData;
+public class ImageProcessingActivity extends Activity
+                                       implements SurfaceHolder.Callback,
+                                       SeekBar.OnSeekBarChangeListener {
+    private Bitmap mBitmapIn;
+    private Bitmap mBitmapOut;
+    private Bitmap mBitmapScratch;
+    private ScriptC_threshold mScript;
+    private ScriptC_vertical_blur mScriptVBlur;
+    private ScriptC_horizontal_blur mScriptHBlur;
+    private int mRadius = 0;
+    private SeekBar mRadiusSeekBar;
+
+    private float mInBlack = 0.0f;
+    private SeekBar mInBlackSeekBar;
+    private float mOutBlack = 0.0f;
+    private SeekBar mOutBlackSeekBar;
+    private float mInWhite = 255.0f;
+    private SeekBar mInWhiteSeekBar;
+    private float mOutWhite = 255.0f;
+    private SeekBar mOutWhiteSeekBar;
+    private float mGamma = 1.0f;
+    private SeekBar mGammaSeekBar;
+
+    private float mSaturation = 1.0f;
+    private SeekBar mSaturationSeekBar;
+
+    private TextView mBenchmarkResult;
 
     @SuppressWarnings({"FieldCanBeLocal"})
     private RenderScript mRS;
     @SuppressWarnings({"FieldCanBeLocal"})
-    private Type mParamsType;
-    @SuppressWarnings({"FieldCanBeLocal"})
-    private Allocation mParamsAllocation;
-    @SuppressWarnings({"FieldCanBeLocal"})
     private Type mPixelType;
     @SuppressWarnings({"FieldCanBeLocal"})
     private Allocation mInPixelsAllocation;
     @SuppressWarnings({"FieldCanBeLocal"})
     private Allocation mOutPixelsAllocation;
+    @SuppressWarnings({"FieldCanBeLocal"})
+    private Allocation mScratchPixelsAllocation1;
+    private Allocation mScratchPixelsAllocation2;
 
     private SurfaceView mSurfaceView;
     private ImageView mDisplayView;
 
-    static class Params {
-        public int inWidth;
-        public int outWidth;
-        public int inHeight;
-        public int outHeight;
-
-        public float threshold;
-    }
-
-    static class Pixel {
-        public byte a;
-        public byte r;
-        public byte g;
-        public byte b;
-    }
-
     class FilterCallback extends RenderScript.RSMessage {
         private Runnable mAction = new Runnable() {
             public void run() {
-                mOutPixelsAllocation.readData(mOutData);
-                mBitmap.setPixels(mOutData, 0, mParams.outWidth, 0, 0,
-                        mParams.outWidth, mParams.outHeight);
                 mDisplayView.invalidate();
             }
         };
@@ -89,29 +92,218 @@
         }
     }
 
-    private void javaFilter() {
+    int in[];
+    int interm[];
+    int out[];
+    int MAX_RADIUS = 25;
+    // Store our coefficients here
+    float gaussian[];
+
+    private long javaFilter() {
+        final int width = mBitmapIn.getWidth();
+        final int height = mBitmapIn.getHeight();
+        final int count = width * height;
+
+        if (in == null) {
+            in = new int[count];
+            interm = new int[count];
+            out = new int[count];
+            gaussian = new float[MAX_RADIUS * 2 + 1];
+            mBitmapIn.getPixels(in, 0, width, 0, 0, width, height);
+        }
+
         long t = java.lang.System.currentTimeMillis();
-        int count = mParams.inWidth * mParams.inHeight;
-        float threshold = mParams.threshold * 255.f;
 
-        for (int i = 0; i < count; i++) {
-            final float r = (float)((mInData[i] >> 0) & 0xff);
-            final float g = (float)((mInData[i] >> 8) & 0xff);
-            final float b = (float)((mInData[i] >> 16) & 0xff);
+        int w, h, r;
 
-            final float luminance = 0.2125f * r +
-                              0.7154f * g +
-                              0.0721f * b;
-            if (luminance > threshold) {
-                mOutData[i] = mInData[i];
-            } else {
-                mOutData[i] = mInData[i] & 0xff000000;
+        float fRadius = (float)mRadius;
+        int radius = (int)mRadius;
+
+        // Compute gaussian weights for the blur
+        // e is the euler's number
+        float e = 2.718281828459045f;
+        float pi = 3.1415926535897932f;
+        // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 )
+        // x is of the form [-radius .. 0 .. radius]
+        // and sigma varies with radius.
+        // Based on some experimental radius values and sigma's
+        // we approximately fit sigma = f(radius) as
+        // sigma = radius * 0.4  + 0.6
+        // The larger the radius gets, the more our gaussian blur
+        // will resemble a box blur since with large sigma
+        // the gaussian curve begins to lose its shape
+        float sigma = 0.4f * fRadius + 0.6f;
+        // Now compute the coefficints
+        // We will store some redundant values to save some math during
+        // the blur calculations
+        // precompute some values
+        float coeff1 = 1.0f / (float)(Math.sqrt( 2.0f * pi ) * sigma);
+        float coeff2 = - 1.0f / (2.0f * sigma * sigma);
+        float normalizeFactor = 0.0f;
+        float floatR = 0.0f;
+        for(r = -radius; r <= radius; r ++) {
+            floatR = (float)r;
+            gaussian[r + radius] = coeff1 * (float)Math.pow(e, floatR * floatR * coeff2);
+            normalizeFactor += gaussian[r + radius];
+        }
+
+        //Now we need to normalize the weights because all our coefficients need to add up to one
+        normalizeFactor = 1.0f / normalizeFactor;
+        for(r = -radius; r <= radius; r ++) {
+            floatR = (float)r;
+            gaussian[r + radius] *= normalizeFactor;
+        }
+
+        float blurredPixelR = 0.0f;
+        float blurredPixelG = 0.0f;
+        float blurredPixelB = 0.0f;
+        float blurredPixelA = 0.0f;
+
+        for(h = 0; h < height; h ++) {
+            for(w = 0; w < width; w ++) {
+
+                blurredPixelR = 0.0f;
+                blurredPixelG = 0.0f;
+                blurredPixelB = 0.0f;
+                blurredPixelA = 0.0f;
+
+                for(r = -radius; r <= radius; r ++) {
+                    // Stepping left and right away from the pixel
+                    int validW = w + r;
+                    // Clamp to zero and width max() isn't exposed for ints yet
+                    if(validW < 0) {
+                        validW = 0;
+                    }
+                    if(validW > width - 1) {
+                        validW = width - 1;
+                    }
+
+                    int input = in[h*width + validW];
+
+                    int R = ((input >> 24) & 0xff);
+                    int G = ((input >> 16) & 0xff);
+                    int B = ((input >> 8) & 0xff);
+                    int A = (input & 0xff);
+
+                    float weight = gaussian[r + radius];
+
+                    blurredPixelR += (float)(R)*weight;
+                    blurredPixelG += (float)(G)*weight;
+                    blurredPixelB += (float)(B)*weight;
+                    blurredPixelA += (float)(A)*weight;
+                }
+
+                int R = (int)blurredPixelR;
+                int G = (int)blurredPixelG;
+                int B = (int)blurredPixelB;
+                int A = (int)blurredPixelA;
+
+                interm[h*width + w] = (R << 24) | (G << 16) | (B << 8) | (A);
+            }
+        }
+
+        for(h = 0; h < height; h ++) {
+            for(w = 0; w < width; w ++) {
+
+                blurredPixelR = 0.0f;
+                blurredPixelG = 0.0f;
+                blurredPixelB = 0.0f;
+                blurredPixelA = 0.0f;
+                for(r = -radius; r <= radius; r ++) {
+                    int validH = h + r;
+                    // Clamp to zero and width
+                    if(validH < 0) {
+                        validH = 0;
+                    }
+                    if(validH > height - 1) {
+                        validH = height - 1;
+                    }
+
+                    int input = interm[validH*width + w];
+
+                    int R = ((input >> 24) & 0xff);
+                    int G = ((input >> 16) & 0xff);
+                    int B = ((input >> 8) & 0xff);
+                    int A = (input & 0xff);
+
+                    float weight = gaussian[r + radius];
+
+                    blurredPixelR += (float)(R)*weight;
+                    blurredPixelG += (float)(G)*weight;
+                    blurredPixelB += (float)(B)*weight;
+                    blurredPixelA += (float)(A)*weight;
+                }
+
+                int R = (int)blurredPixelR;
+                int G = (int)blurredPixelG;
+                int B = (int)blurredPixelB;
+                int A = (int)blurredPixelA;
+
+                out[h*width + w] = (R << 24) | (G << 16) | (B << 8) | (A);
             }
         }
 
         t = java.lang.System.currentTimeMillis() - t;
+        android.util.Log.v("Img", "Java frame time ms " + t);
+        mBitmapOut.setPixels(out, 0, width, 0, 0, width, height);
+        return t;
+    }
 
-        android.util.Log.v("Img", "frame time ms " + t);
+    public void onProgressChanged(SeekBar seekBar, int progress, boolean fromUser) {
+        if (fromUser) {
+
+            if(seekBar == mRadiusSeekBar) {
+                float fRadius = progress / 100.0f;
+                fRadius *= (float)(MAX_RADIUS);
+                mRadius = (int)fRadius;
+
+                mScript.set_radius(mRadius);
+            }
+            else if(seekBar == mInBlackSeekBar) {
+                mInBlack = (float)progress;
+                mScriptVBlur.invoke_setLevels(mInBlack, mOutBlack, mInWhite, mOutWhite);
+            }
+            else if(seekBar == mOutBlackSeekBar) {
+                mOutBlack = (float)progress;
+                mScriptVBlur.invoke_setLevels(mInBlack, mOutBlack, mInWhite, mOutWhite);
+            }
+            else if(seekBar == mInWhiteSeekBar) {
+                mInWhite = (float)progress + 127.0f;
+                mScriptVBlur.invoke_setLevels(mInBlack, mOutBlack, mInWhite, mOutWhite);
+            }
+            else if(seekBar == mOutWhiteSeekBar) {
+                mOutWhite = (float)progress + 127.0f;
+                mScriptVBlur.invoke_setLevels(mInBlack, mOutBlack, mInWhite, mOutWhite);
+            }
+            else if(seekBar == mGammaSeekBar) {
+                mGamma = (float)progress/100.0f;
+                mGamma = Math.max(mGamma, 0.1f);
+                mGamma = 1.0f / mGamma;
+                mScriptVBlur.invoke_setGamma(mGamma);
+            }
+            else if(seekBar == mSaturationSeekBar) {
+                mSaturation = (float)progress / 50.0f;
+                mScriptVBlur.invoke_setSaturation(mSaturation);
+            }
+
+            long t = java.lang.System.currentTimeMillis();
+            if (true) {
+                mScript.invoke_filter();
+                mRS.finish();
+            } else {
+                javaFilter();
+                mDisplayView.invalidate();
+            }
+
+            t = java.lang.System.currentTimeMillis() - t;
+            android.util.Log.v("Img", "Renderscript frame time core ms " + t);
+        }
+    }
+
+    public void onStartTrackingTouch(SeekBar seekBar) {
+    }
+
+    public void onStopTrackingTouch(SeekBar seekBar) {
     }
 
     @Override
@@ -119,45 +311,54 @@
         super.onCreate(savedInstanceState);
         setContentView(R.layout.main);
 
-        mBitmap = loadBitmap(R.drawable.data);
+        mBitmapIn = loadBitmap(R.drawable.data);
+        mBitmapOut = loadBitmap(R.drawable.data);
+        mBitmapScratch = loadBitmap(R.drawable.data);
 
         mSurfaceView = (SurfaceView) findViewById(R.id.surface);
         mSurfaceView.getHolder().addCallback(this);
 
         mDisplayView = (ImageView) findViewById(R.id.display);
-        mDisplayView.setImageBitmap(mBitmap);
+        mDisplayView.setImageBitmap(mBitmapOut);
 
-        ((SeekBar) findViewById(R.id.threshold)).setOnSeekBarChangeListener(
-                new SeekBar.OnSeekBarChangeListener() {
-            public void onProgressChanged(SeekBar seekBar, int progress, boolean fromUser) {
-                if (fromUser) {
-                    mParams.threshold = progress / 100.0f;
-                    mParamsAllocation.data(mParams);
+        mRadiusSeekBar = (SeekBar) findViewById(R.id.radius);
+        mRadiusSeekBar.setOnSeekBarChangeListener(this);
 
-                    if (true) {
-                        mInvokable.execute();
-                    } else {
-                        javaFilter();
-                        mBitmap.setPixels(mOutData, 0, mParams.outWidth, 0, 0,
-                                mParams.outWidth, mParams.outHeight);
-                        mDisplayView.invalidate();
-                    }
-                }
-            }
+        mInBlackSeekBar = (SeekBar)findViewById(R.id.inBlack);
+        mInBlackSeekBar.setOnSeekBarChangeListener(this);
+        mInBlackSeekBar.setMax(128);
+        mInBlackSeekBar.setProgress(0);
+        mOutBlackSeekBar = (SeekBar)findViewById(R.id.outBlack);
+        mOutBlackSeekBar.setOnSeekBarChangeListener(this);
+        mOutBlackSeekBar.setMax(128);
+        mOutBlackSeekBar.setProgress(0);
 
-            public void onStartTrackingTouch(SeekBar seekBar) {
-            }
+        mInWhiteSeekBar = (SeekBar)findViewById(R.id.inWhite);
+        mInWhiteSeekBar.setOnSeekBarChangeListener(this);
+        mInWhiteSeekBar.setMax(128);
+        mInWhiteSeekBar.setProgress(128);
+        mOutWhiteSeekBar = (SeekBar)findViewById(R.id.outWhite);
+        mOutWhiteSeekBar.setOnSeekBarChangeListener(this);
+        mOutWhiteSeekBar.setMax(128);
+        mOutWhiteSeekBar.setProgress(128);
 
-            public void onStopTrackingTouch(SeekBar seekBar) {
-            }
-        });
+        mGammaSeekBar = (SeekBar)findViewById(R.id.inGamma);
+        mGammaSeekBar.setOnSeekBarChangeListener(this);
+        mGammaSeekBar.setMax(150);
+        mGammaSeekBar.setProgress(100);
+
+        mSaturationSeekBar = (SeekBar)findViewById(R.id.inSaturation);
+        mSaturationSeekBar.setOnSeekBarChangeListener(this);
+        mSaturationSeekBar.setProgress(50);
+
+        mBenchmarkResult = (TextView) findViewById(R.id.benchmarkText);
+        mBenchmarkResult.setText("Benchmark not yet run");
     }
 
     public void surfaceCreated(SurfaceHolder holder) {
-        mParams = createParams();
-        mInvokable = createScript();
-
-        mInvokable.execute();
+        createScript();
+        mScript.invoke_filter();
+        mRS.finish();
     }
 
     public void surfaceChanged(SurfaceHolder holder, int format, int width, int height) {
@@ -166,54 +367,38 @@
     public void surfaceDestroyed(SurfaceHolder holder) {
     }
 
-    private Script.Invokable createScript() {
+    private void createScript() {
         mRS = RenderScript.create();
         mRS.mMessageCallback = new FilterCallback();
 
-        mParamsType = Type.createFromClass(mRS, Params.class, 1, "Parameters");
-        mParamsAllocation = Allocation.createTyped(mRS, mParamsType);
-        mParamsAllocation.data(mParams);
+        mInPixelsAllocation = Allocation.createBitmapRef(mRS, mBitmapIn);
+        mOutPixelsAllocation = Allocation.createBitmapRef(mRS, mBitmapOut);
 
-        final int pixelCount = mParams.inWidth * mParams.inHeight;
+        Type.Builder tb = new Type.Builder(mRS, Element.F32_4(mRS));
+        tb.add(android.renderscript.Dimension.X, mBitmapIn.getWidth());
+        tb.add(android.renderscript.Dimension.Y, mBitmapIn.getHeight());
+        mScratchPixelsAllocation1 = Allocation.createTyped(mRS, tb.create());
+        mScratchPixelsAllocation2 = Allocation.createTyped(mRS, tb.create());
 
-        mPixelType = Type.createFromClass(mRS, Pixel.class, 1, "Pixel");
-        mInPixelsAllocation = Allocation.createSized(mRS,
-                Element.createUser(mRS, Element.DataType.SIGNED_32),
-                pixelCount);
-        mOutPixelsAllocation = Allocation.createSized(mRS,
-                Element.createUser(mRS, Element.DataType.SIGNED_32),
-                pixelCount);
+        mScriptVBlur = new ScriptC_vertical_blur(mRS, getResources(), R.raw.vertical_blur, false);
+        mScriptHBlur = new ScriptC_horizontal_blur(mRS, getResources(), R.raw.horizontal_blur, false);
 
-        mInData = new int[pixelCount];
-        mBitmap.getPixels(mInData, 0, mParams.inWidth, 0, 0, mParams.inWidth, mParams.inHeight);
-        mInPixelsAllocation.data(mInData);
+        mScript = new ScriptC_threshold(mRS, getResources(), R.raw.threshold, false);
+        mScript.set_width(mBitmapIn.getWidth());
+        mScript.set_height(mBitmapIn.getHeight());
+        mScript.set_radius(mRadius);
 
-        mOutData = new int[pixelCount];
-        mOutPixelsAllocation.data(mOutData);
+        mScriptVBlur.invoke_setLevels(mInBlack, mOutBlack, mInWhite, mOutWhite);
+        mScriptVBlur.invoke_setGamma(mGamma);
+        mScriptVBlur.invoke_setSaturation(mSaturation);
 
-        ScriptC.Builder sb = new ScriptC.Builder(mRS);
-        sb.setType(mParamsType, "Params", 0);
-        sb.setType(mPixelType, "InPixel", 1);
-        sb.setType(mPixelType, "OutPixel", 2);
-        sb.setType(true, 2);
-        Script.Invokable invokable = sb.addInvokable("main");
-        sb.setScript(getResources(), R.raw.threshold);
-        //sb.setRoot(true);
+        mScript.bind_InPixel(mInPixelsAllocation);
+        mScript.bind_OutPixel(mOutPixelsAllocation);
+        mScript.bind_ScratchPixel1(mScratchPixelsAllocation1);
+        mScript.bind_ScratchPixel2(mScratchPixelsAllocation2);
 
-        ScriptC script = sb.create();
-        script.bindAllocation(mParamsAllocation, 0);
-        script.bindAllocation(mInPixelsAllocation, 1);
-        script.bindAllocation(mOutPixelsAllocation, 2);
-
-        return invokable;
-    }
-
-    private Params createParams() {
-        final Params params = new Params();
-        params.inWidth = params.outWidth = mBitmap.getWidth();
-        params.inHeight = params.outHeight = mBitmap.getHeight();
-        params.threshold = 0.5f;
-        return params;
+        mScript.set_vBlurScript(mScriptVBlur);
+        mScript.set_hBlurScript(mScriptHBlur);
     }
 
     private Bitmap loadBitmap(int resource) {
@@ -229,4 +414,30 @@
         source.recycle();
         return b;
     }
+
+    // button hook
+    public void benchmark(View v) {
+        android.util.Log.v("Img", "Benchmarking");
+        int oldRadius = mRadius;
+        mRadius = MAX_RADIUS;
+        mScript.set_radius(mRadius);
+
+        long t = java.lang.System.currentTimeMillis();
+
+        mScript.invoke_filter();
+        mRS.finish();
+
+        t = java.lang.System.currentTimeMillis() - t;
+        android.util.Log.v("Img", "Renderscript frame time core ms " + t);
+
+        //long javaTime = javaFilter();
+        //mBenchmarkResult.setText("RS: " + t + " ms  Java: " + javaTime + " ms");
+        mBenchmarkResult.setText("RS: " + t + " ms");
+
+        mRadius = oldRadius;
+        mScript.set_radius(mRadius);
+
+        mScript.invoke_filter();
+        mRS.finish();
+    }
 }
diff --git a/java/ImageProcessing/src/com/android/rs/image/horizontal_blur.rs b/java/ImageProcessing/src/com/android/rs/image/horizontal_blur.rs
new file mode 100644
index 0000000..cfffac8
--- /dev/null
+++ b/java/ImageProcessing/src/com/android/rs/image/horizontal_blur.rs
@@ -0,0 +1,30 @@
+#pragma version(1)
+
+#include "ip.rsh"
+
+void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32_t y) {
+    float4 *output = (float4 *)v_out;
+    const FilterStruct *fs = (const FilterStruct *)usrData;
+    const float4 *input = (const float4 *)rsGetElementAt(fs->ain, 0, y);
+
+    float3 blurredPixel = 0;
+    const float *gPtr = fs->gaussian;
+    if ((x > fs->radius) && (x < (fs->width - fs->radius))) {
+        const float4 *i = input + (x - fs->radius);
+        for(int r = -fs->radius; r <= fs->radius; r ++) {
+            blurredPixel += i->xyz * gPtr[0];
+            gPtr++;
+            i++;
+        }
+    } else {
+        for(int r = -fs->radius; r <= fs->radius; r ++) {
+            // Stepping left and right away from the pixel
+            int validW = rsClamp(x + r, (uint)0, (uint)(fs->width - 1));
+            blurredPixel += input[validW].xyz * gPtr[0];
+            gPtr++;
+        }
+    }
+
+    output->xyz = blurredPixel;
+}
+
diff --git a/java/ImageProcessing/src/com/android/rs/image/ip.rsh b/java/ImageProcessing/src/com/android/rs/image/ip.rsh
new file mode 100644
index 0000000..1d7a719
--- /dev/null
+++ b/java/ImageProcessing/src/com/android/rs/image/ip.rsh
@@ -0,0 +1,15 @@
+#pragma rs java_package_name(com.android.rs.image)
+
+#define MAX_RADIUS 25
+
+typedef struct FilterStruct_s {
+    rs_allocation ain;
+
+    float *gaussian; //[MAX_RADIUS * 2 + 1];
+    int height;
+    int width;
+    int radius;
+
+} FilterStruct;
+
+
diff --git a/java/ImageProcessing/src/com/android/rs/image/threshold.rs b/java/ImageProcessing/src/com/android/rs/image/threshold.rs
new file mode 100644
index 0000000..4f46810
--- /dev/null
+++ b/java/ImageProcessing/src/com/android/rs/image/threshold.rs
@@ -0,0 +1,94 @@
+#pragma version(1)
+
+#include "ip.rsh"
+
+int height;
+int width;
+int radius;
+
+uchar4 * InPixel;
+uchar4 * OutPixel;
+float4 * ScratchPixel1;
+float4 * ScratchPixel2;
+
+#pragma rs export_func(filter);
+
+rs_script vBlurScript;
+rs_script hBlurScript;
+
+const int CMD_FINISHED = 1;
+
+// Store our coefficients here
+static float gaussian[MAX_RADIUS * 2 + 1];
+
+
+static void computeGaussianWeights() {
+    // Compute gaussian weights for the blur
+    // e is the euler's number
+    float e = 2.718281828459045f;
+    float pi = 3.1415926535897932f;
+    // g(x) = ( 1 / sqrt( 2 * pi ) * sigma) * e ^ ( -x^2 / 2 * sigma^2 )
+    // x is of the form [-radius .. 0 .. radius]
+    // and sigma varies with radius.
+    // Based on some experimental radius values and sigma's
+    // we approximately fit sigma = f(radius) as
+    // sigma = radius * 0.4  + 0.6
+    // The larger the radius gets, the more our gaussian blur
+    // will resemble a box blur since with large sigma
+    // the gaussian curve begins to lose its shape
+    float sigma = 0.4f * (float)radius + 0.6f;
+
+    // Now compute the coefficints
+    // We will store some redundant values to save some math during
+    // the blur calculations
+    // precompute some values
+    float coeff1 = 1.0f / (sqrt( 2.0f * pi ) * sigma);
+    float coeff2 = - 1.0f / (2.0f * sigma * sigma);
+
+    float normalizeFactor = 0.0f;
+    float floatR = 0.0f;
+    int r;
+    for(r = -radius; r <= radius; r ++) {
+        floatR = (float)r;
+        gaussian[r + radius] = coeff1 * pow(e, floatR * floatR * coeff2);
+        normalizeFactor += gaussian[r + radius];
+    }
+
+    //Now we need to normalize the weights because all our coefficients need to add up to one
+    normalizeFactor = 1.0f / normalizeFactor;
+    for(r = -radius; r <= radius; r ++) {
+        floatR = (float)r;
+        gaussian[r + radius] *= normalizeFactor;
+    }
+}
+
+
+static void copyInput() {
+    rs_allocation ain = rsGetAllocation(InPixel);
+    uint32_t dimx = rsAllocationGetDimX(ain);
+    uint32_t dimy = rsAllocationGetDimY(ain);
+    for(uint32_t y = 0; y < dimy; y++) {
+        for(uint32_t x = 0; x < dimx; x++) {
+            ScratchPixel1[x + y * dimx] = convert_float4(InPixel[x + y * dimx]);
+        }
+    }
+}
+
+void filter() {
+    copyInput();
+    computeGaussianWeights();
+
+    FilterStruct fs;
+    fs.gaussian = gaussian;
+    fs.width = width;
+    fs.height = height;
+    fs.radius = radius;
+
+    fs.ain = rsGetAllocation(ScratchPixel1);
+    rsForEach(hBlurScript, fs.ain, rsGetAllocation(ScratchPixel2), &fs);
+
+    fs.ain = rsGetAllocation(ScratchPixel2);
+    rsForEach(vBlurScript, fs.ain, rsGetAllocation(OutPixel), &fs);
+    rsSendToClientBlocking(CMD_FINISHED);
+}
+
diff --git a/java/ImageProcessing/src/com/android/rs/image/vertical_blur.rs b/java/ImageProcessing/src/com/android/rs/image/vertical_blur.rs
new file mode 100644
index 0000000..d901d2a
--- /dev/null
+++ b/java/ImageProcessing/src/com/android/rs/image/vertical_blur.rs
@@ -0,0 +1,92 @@
+#pragma version(1)
+
+#include "ip.rsh"
+
+static float inBlack;
+static float outBlack;
+static float inWhite;
+static float outWhite;
+static float3 gamma;
+static float saturation;
+
+static float inWMinInB;
+static float outWMinOutB;
+static float overInWMinInB;
+static rs_matrix3x3 colorMat;
+
+#pragma rs export_func(setLevels, setSaturation, setGamma);
+
+void setLevels(float iBlk, float oBlk, float iWht, float oWht) {
+    inBlack = iBlk;
+    outBlack = oBlk;
+    inWhite = iWht;
+    outWhite = oWht;
+
+    inWMinInB = inWhite - inBlack;
+    outWMinOutB = outWhite - outBlack;
+    overInWMinInB = 1.f / inWMinInB;
+}
+
+void setSaturation(float sat) {
+    saturation = sat;
+
+    // Saturation
+    // Linear weights
+    //float rWeight = 0.3086f;
+    //float gWeight = 0.6094f;
+    //float bWeight = 0.0820f;
+
+    // Gamma 2.2 weights (we haven't converted our image to linear space yet for perf reasons)
+    float rWeight = 0.299f;
+    float gWeight = 0.587f;
+    float bWeight = 0.114f;
+
+    float oneMinusS = 1.0f - saturation;
+    rsMatrixSet(&colorMat, 0, 0, oneMinusS * rWeight + saturation);
+    rsMatrixSet(&colorMat, 0, 1, oneMinusS * rWeight);
+    rsMatrixSet(&colorMat, 0, 2, oneMinusS * rWeight);
+    rsMatrixSet(&colorMat, 1, 0, oneMinusS * gWeight);
+    rsMatrixSet(&colorMat, 1, 1, oneMinusS * gWeight + saturation);
+    rsMatrixSet(&colorMat, 1, 2, oneMinusS * gWeight);
+    rsMatrixSet(&colorMat, 2, 0, oneMinusS * bWeight);
+    rsMatrixSet(&colorMat, 2, 1, oneMinusS * bWeight);
+    rsMatrixSet(&colorMat, 2, 2, oneMinusS * bWeight + saturation);
+}
+
+void setGamma(float g) {
+    gamma = (float3)g;
+}
+
+void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32_t y) {
+    uchar4 *output = (uchar4 *)v_out;
+    const FilterStruct *fs = (const FilterStruct *)usrData;
+    const float4 *input = (const float4 *)rsGetElementAt(fs->ain, x, 0);
+
+    float3 blurredPixel = 0;
+    const float *gPtr = fs->gaussian;
+    if ((y > fs->radius) && (y < (fs->height - fs->radius))) {
+        const float4 *i = input + ((y - fs->radius) * fs->width);
+        for(int r = -fs->radius; r <= fs->radius; r ++) {
+            blurredPixel += i->xyz * gPtr[0];
+            gPtr++;
+            i += fs->width;
+        }
+    } else {
+        for(int r = -fs->radius; r <= fs->radius; r ++) {
+            int validH = rsClamp(y + r, (uint)0, (uint)(fs->height - 1));
+            const float4 *i = input + validH * fs->width;
+            blurredPixel += i->xyz * gPtr[0];
+            gPtr++;
+        }
+    }
+
+    float3 temp = rsMatrixMultiply(&colorMat, blurredPixel);
+    temp = (clamp(temp, 0.f, 255.f) - inBlack) * overInWMinInB;
+    if (gamma.x != 1.0f)
+        temp = pow(temp, (float3)gamma);
+    temp = clamp(temp * outWMinOutB + outBlack, 0.f, 255.f);
+
+    output->xyz = convert_uchar3(temp);
+    //output->w = input->w;
+}
+
diff --git a/java/Film/Android.mk b/java/ModelViewer/Android.mk
similarity index 81%
copy from java/Film/Android.mk
copy to java/ModelViewer/Android.mk
index 9e6ed7e..efe77d7 100644
--- a/java/Film/Android.mk
+++ b/java/ModelViewer/Android.mk
@@ -14,14 +14,18 @@
 # limitations under the License.
 #
 
+ifneq ($(TARGET_SIMULATOR),true)
+
 LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 
 LOCAL_MODULE_TAGS := optional
 
-LOCAL_SRC_FILES := $(call all-java-files-under, src)
+LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src)
 #LOCAL_STATIC_JAVA_LIBRARIES := android.renderscript
 
-LOCAL_PACKAGE_NAME := Film
+LOCAL_PACKAGE_NAME := ModelViewer
 
 include $(BUILD_PACKAGE)
+
+endif
diff --git a/java/ModelViewer/AndroidManifest.xml b/java/ModelViewer/AndroidManifest.xml
new file mode 100644
index 0000000..959fe53
--- /dev/null
+++ b/java/ModelViewer/AndroidManifest.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.android.modelviewer">
+    <application android:label="ModelViewer">
+        <activity android:name="SimpleModel"
+                  android:label="SimpleModel"
+                  android:screenOrientation="portrait"
+                  android:theme="@android:style/Theme.Black.NoTitleBar">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+        <activity android:name="SceneGraph"
+                  android:label="SceneGraph"
+                  android:theme="@android:style/Theme.Black.NoTitleBar">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+</manifest>
diff --git a/java/ModelViewer/res/drawable/robot.png b/java/ModelViewer/res/drawable/robot.png
new file mode 100644
index 0000000..f7353fd
--- /dev/null
+++ b/java/ModelViewer/res/drawable/robot.png
Binary files differ
diff --git a/java/ModelViewer/res/raw/robot.a3d b/java/ModelViewer/res/raw/robot.a3d
new file mode 100644
index 0000000..f48895c
--- /dev/null
+++ b/java/ModelViewer/res/raw/robot.a3d
Binary files differ
diff --git a/java/Film/src/com/android/film/Film.java b/java/ModelViewer/src/com/android/modelviewer/SceneGraph.java
similarity index 75%
copy from java/Film/src/com/android/film/Film.java
copy to java/ModelViewer/src/com/android/modelviewer/SceneGraph.java
index 6e99816..557e0cc 100644
--- a/java/Film/src/com/android/film/Film.java
+++ b/java/ModelViewer/src/com/android/modelviewer/SceneGraph.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.modelviewer;
 
 import android.renderscript.RSSurfaceView;
 import android.renderscript.RenderScript;
@@ -37,18 +37,9 @@
 
 import java.lang.Runtime;
 
-public class Film extends Activity {
-    //EventListener mListener = new EventListener();
+public class SceneGraph extends Activity {
 
-    private static final String LOG_TAG = "libRS_jni";
-    private static final boolean DEBUG  = false;
-    private static final boolean LOG_ENABLED = DEBUG ? Config.LOGD : Config.LOGV;
-
-    private FilmView mView;
-
-    // get the current looper (from your Activity UI thread for instance
-
-
+    private SceneGraphView mView;
 
     @Override
     public void onCreate(Bundle icicle) {
@@ -56,7 +47,7 @@
 
         // Create our Preview view and set it as the content of our
         // Activity
-        mView = new FilmView(this);
+        mView = new SceneGraphView(this);
         setContentView(mView);
     }
 
@@ -74,17 +65,7 @@
         // to take appropriate action when the activity looses focus
         super.onPause();
         mView.onPause();
-
-        Runtime.getRuntime().exit(0);
     }
 
-
-    static void log(String message) {
-        if (LOG_ENABLED) {
-            Log.v(LOG_TAG, message);
-        }
-    }
-
-
 }
 
diff --git a/java/ModelViewer/src/com/android/modelviewer/SceneGraphRS.java b/java/ModelViewer/src/com/android/modelviewer/SceneGraphRS.java
new file mode 100644
index 0000000..81bd578
--- /dev/null
+++ b/java/ModelViewer/src/com/android/modelviewer/SceneGraphRS.java
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.modelviewer;
+
+import java.io.Writer;
+import java.util.Map;
+import java.util.Vector;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+import android.renderscript.Element.Builder;
+import android.renderscript.ProgramStore.DepthFunc;
+import android.util.Log;
+
+
+public class SceneGraphRS {
+
+    private final int STATE_LAST_FOCUS = 1;
+
+    int mWidth;
+    int mHeight;
+    int mRotation;
+
+    public SceneGraphRS() {
+    }
+
+    public void init(RenderScriptGL rs, Resources res, int width, int height) {
+        mRS = rs;
+        mRes = res;
+        mWidth = width;
+        mHeight = height;
+        mRotation = 0;
+        initRS();
+    }
+
+    private Resources mRes;
+    private RenderScriptGL mRS;
+    private Sampler mSampler;
+    private ProgramStore mPSBackground;
+    private ProgramFragment mPFBackground;
+    private ProgramVertex mPVBackground;
+    private ProgramVertex.MatrixAllocation mPVA;
+
+    private Allocation mGridImage;
+    private Allocation mAllocPV;
+
+    private Mesh mMesh;
+
+    private Font mItalic;
+    private Allocation mTextAlloc;
+
+    private ScriptC_scenegraph mScript;
+    private ScriptC_transform mTransformScript;
+
+    int mLastX;
+    int mLastY;
+
+    public void touchEvent(int x, int y) {
+        int dx = mLastX - x;
+        if(Math.abs(dx) > 50 || Math.abs(dx) < 3) {
+            dx = 0;
+        }
+
+        mRotation -= dx;
+        if(mRotation > 360) {
+            mRotation -= 360;
+        }
+        if(mRotation < 0) {
+            mRotation += 360;
+        }
+
+        mScript.set_gRotate(-(float)mRotation);
+
+        mLastX = x;
+        mLastY = y;
+    }
+
+    private void initPFS() {
+        ProgramStore.Builder b = new ProgramStore.Builder(mRS);
+
+        b.setDepthFunc(ProgramStore.DepthFunc.LESS);
+        b.setDitherEnable(false);
+        b.setDepthMask(true);
+        mPSBackground = b.create();
+
+        mScript.set_gPFSBackground(mPSBackground);
+    }
+
+    private void initPF() {
+        Sampler.Builder bs = new Sampler.Builder(mRS);
+        bs.setMin(Sampler.Value.LINEAR);
+        bs.setMag(Sampler.Value.LINEAR);
+        bs.setWrapS(Sampler.Value.CLAMP);
+        bs.setWrapT(Sampler.Value.CLAMP);
+        mSampler = bs.create();
+
+        ProgramFragment.Builder b = new ProgramFragment.Builder(mRS);
+        b.setTexture(ProgramFragment.Builder.EnvMode.REPLACE,
+                     ProgramFragment.Builder.Format.RGBA, 0);
+        mPFBackground = b.create();
+        mPFBackground.bindSampler(mSampler, 0);
+
+        mScript.set_gPFBackground(mPFBackground);
+    }
+
+    private void initPV() {
+        ProgramVertex.Builder pvb = new ProgramVertex.Builder(mRS);
+        mPVBackground = pvb.create();
+
+        mPVA = new ProgramVertex.MatrixAllocation(mRS);
+        mPVBackground.bindAllocation(mPVA);
+
+        mScript.set_gPVBackground(mPVBackground);
+    }
+
+    private void loadImage() {
+        mGridImage = Allocation.createFromBitmapResource(mRS, mRes, R.drawable.robot, Element.RGB_565(mRS), true);
+        mGridImage.uploadToTexture(0);
+
+        mScript.set_gTGrid(mGridImage);
+    }
+
+    private void initTextAllocation() {
+        String allocString = "Displaying file: R.raw.robot";
+        mTextAlloc = Allocation.createFromString(mRS, allocString);
+        mScript.set_gTextAlloc(mTextAlloc);
+    }
+
+    SgTransform mRootTransform;
+    SgTransform mGroup1;
+
+    SgTransform mRobot1;
+    SgTransform mRobot2;
+
+    void initTransformHierarchy() {
+        mRootTransform = new SgTransform(mRS);
+
+        mGroup1 = new SgTransform(mRS);
+        mRootTransform.addChild(mGroup1);
+
+        mRobot1 = new SgTransform(mRS);
+        mRobot2 = new SgTransform(mRS);
+
+        mGroup1.addChild(mRobot1);
+        mGroup1.addChild(mRobot2);
+
+        mGroup1.setTransform(0, new Float4(0.0f, 0.0f, -15.0f, 0.0f), TransformType.TRANSLATE);
+        mGroup1.setTransform(1, new Float4(0.0f, 1.0f, 0.0f, 15.0f), TransformType.ROTATE);
+
+        mRobot1.setTransform(0, new Float4(-3.0f, -0.5f, 0.0f, 0.0f), TransformType.TRANSLATE);
+        mRobot1.setTransform(1, new Float4(0.0f, 1.0f, 0.0f, 20.0f), TransformType.ROTATE);
+        mRobot1.setTransform(2, new Float4(0.2f, 0.2f, 0.2f, 0.0f), TransformType.SCALE);
+
+        mRobot2.setTransform(0, new Float4(3.0f, 0.0f, 0.0f, 0.0f), TransformType.TRANSLATE);
+        mRobot2.setTransform(1, new Float4(0.0f, 1.0f, 0.0f, -20.0f), TransformType.ROTATE);
+        mRobot2.setTransform(2, new Float4(0.3f, 0.3f, 0.3f, 0.0f), TransformType.SCALE);
+    }
+
+    private void initRS() {
+
+        mScript = new ScriptC_scenegraph(mRS, mRes, R.raw.scenegraph, true);
+        mTransformScript = new ScriptC_transform(mRS, mRes, R.raw.transform, false);
+        mTransformScript.set_transformScript(mTransformScript);
+
+        mScript.set_gTransformRS(mTransformScript);
+
+        initPFS();
+        initPF();
+        initPV();
+
+        loadImage();
+
+        FileA3D model = FileA3D.createFromResource(mRS, mRes, R.raw.robot);
+        FileA3D.IndexEntry entry = model.getIndexEntry(0);
+        if(entry == null || entry.getClassID() != FileA3D.ClassID.MESH) {
+            Log.e("rs", "could not load model");
+        }
+        else {
+            mMesh = (Mesh)entry.getObject();
+            mScript.set_gTestMesh(mMesh);
+        }
+
+        mItalic = Font.create(mRS, mRes, "DroidSerif-Italic.ttf", 8);
+        mScript.set_gItalic(mItalic);
+
+        initTextAllocation();
+
+        initTransformHierarchy();
+
+        Log.v("========SceneGraph========", "transform hierarchy initialized");
+
+        mScript.bind_gRootNode(mRootTransform.getField());
+
+        mScript.bind_gGroup(mGroup1.mParent.mChildField);
+        mScript.bind_gRobot1(mRobot1.mParent.mChildField);
+        mScript.set_gRobot1Index(mRobot1.mIndexInParentGroup);
+        mScript.bind_gRobot2(mRobot2.mParent.mChildField);
+        mScript.set_gRobot2Index(mRobot2.mIndexInParentGroup);
+
+        mRS.contextBindRootScript(mScript);
+    }
+}
+
+
+
diff --git a/java/Film/src/com/android/film/FilmView.java b/java/ModelViewer/src/com/android/modelviewer/SceneGraphView.java
similarity index 84%
rename from java/Film/src/com/android/film/FilmView.java
rename to java/ModelViewer/src/com/android/modelviewer/SceneGraphView.java
index 5bc2811..9457fd7 100644
--- a/java/Film/src/com/android/film/FilmView.java
+++ b/java/ModelViewer/src/com/android/modelviewer/SceneGraphView.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.modelviewer;
 
 import java.io.Writer;
 import java.util.ArrayList;
@@ -39,23 +39,25 @@
 import android.view.KeyEvent;
 import android.view.MotionEvent;
 
-public class FilmView extends RSSurfaceView {
+public class SceneGraphView extends RSSurfaceView {
 
-    public FilmView(Context context) {
+    public SceneGraphView(Context context) {
         super(context);
         //setFocusable(true);
     }
 
     private RenderScriptGL mRS;
-    private FilmRS mRender;
+    private SceneGraphRS mRender;
 
 
     public void surfaceChanged(SurfaceHolder holder, int format, int w, int h) {
         super.surfaceChanged(holder, format, w, h);
         if (mRS == null) {
-            mRS = createRenderScript(true);
+            RenderScriptGL.SurfaceConfig sc = new RenderScriptGL.SurfaceConfig();
+            sc.setDepth(16, 24);
+            mRS = createRenderScript(sc);
             mRS.contextSetSurface(w, h, holder.getSurface());
-            mRender = new FilmRS();
+            mRender = new SceneGraphRS();
             mRender.init(mRS, getResources(), w, h);
         }
     }
@@ -85,7 +87,8 @@
         if (act == ev.ACTION_UP) {
             ret = false;
         }
-        mRender.setFilmStripPosition((int)ev.getX(), (int)ev.getY() / 5);
+
+        mRender.touchEvent((int)ev.getX(), (int)ev.getY());
         return ret;
     }
 }
diff --git a/java/ModelViewer/src/com/android/modelviewer/SgTransform.java b/java/ModelViewer/src/com/android/modelviewer/SgTransform.java
new file mode 100644
index 0000000..8351f42
--- /dev/null
+++ b/java/ModelViewer/src/com/android/modelviewer/SgTransform.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.modelviewer;
+
+import java.io.Writer;
+import java.util.Map;
+import java.util.Vector;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+import android.renderscript.Element.Builder;
+import android.renderscript.ProgramStore.DepthFunc;
+import android.util.Log;
+
+enum TransformType {
+
+    NONE(0),
+    TRANSLATE(1),
+    ROTATE(2),
+    SCALE(3);
+
+    int mID;
+    TransformType(int id) {
+        mID = id;
+    }
+}
+
+public class SgTransform {
+
+
+    ScriptField_SgTransform mTransformField;
+    ScriptField_SgTransform mChildField;
+    public ScriptField_SgTransform.Item mTransformData;
+
+    RenderScript mRS;
+
+    Vector mChildren;
+    SgTransform mParent;
+    int mIndexInParentGroup;
+
+    public void setParent(SgTransform parent, int parentIndex) {
+        mParent = parent;
+        mIndexInParentGroup = parentIndex;
+    }
+
+    public void addChild(SgTransform child) {
+        mChildren.add(child);
+        child.setParent(this, mChildren.size() - 1);
+    }
+
+    public void setTransform(int index, Float4 value, TransformType type) {
+        mTransformData.transforms[index] = value;
+        mTransformData.transformTypes[index] = type.mID;
+    }
+
+    void initData() {
+        int numElements = mTransformData.transforms.length;
+        mTransformData.transformTypes = new int[numElements];
+        for(int i = 0; i < numElements; i ++) {
+            mTransformData.transforms[i] = new Float4(0, 0, 0, 0);
+            mTransformData.transformTypes[i] = TransformType.NONE.mID;
+        }
+
+        mTransformData.isDirty = 1;
+        mTransformData.children = null;
+    }
+
+    public SgTransform(RenderScript rs) {
+        mRS = rs;
+        mTransformData = new ScriptField_SgTransform.Item();
+        mChildren = new Vector();
+        initData();
+    }
+
+    public ScriptField_SgTransform.Item getData() {
+        if(mChildren.size() != 0) {
+            mChildField = new ScriptField_SgTransform(mRS, mChildren.size());
+            mTransformData.children = mChildField.getAllocation();
+
+            for(int i = 0; i < mChildren.size(); i ++) {
+                SgTransform child = (SgTransform)mChildren.get(i);
+                mChildField.set(child.getData(), i, false);
+            }
+            mChildField.copyAll();
+        }
+
+        return mTransformData;
+    }
+
+    public ScriptField_SgTransform getField() {
+        mTransformField = new ScriptField_SgTransform(mRS, 1);
+        mTransformField.set(getData(), 0, true);
+        return mTransformField;
+    }
+}
+
+
+
diff --git a/java/Film/src/com/android/film/Film.java b/java/ModelViewer/src/com/android/modelviewer/SimpleModel.java
similarity index 75%
copy from java/Film/src/com/android/film/Film.java
copy to java/ModelViewer/src/com/android/modelviewer/SimpleModel.java
index 6e99816..cb7c39c 100644
--- a/java/Film/src/com/android/film/Film.java
+++ b/java/ModelViewer/src/com/android/modelviewer/SimpleModel.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.modelviewer;
 
 import android.renderscript.RSSurfaceView;
 import android.renderscript.RenderScript;
@@ -37,18 +37,9 @@
 
 import java.lang.Runtime;
 
-public class Film extends Activity {
-    //EventListener mListener = new EventListener();
+public class SimpleModel extends Activity {
 
-    private static final String LOG_TAG = "libRS_jni";
-    private static final boolean DEBUG  = false;
-    private static final boolean LOG_ENABLED = DEBUG ? Config.LOGD : Config.LOGV;
-
-    private FilmView mView;
-
-    // get the current looper (from your Activity UI thread for instance
-
-
+    private SimpleModelView mView;
 
     @Override
     public void onCreate(Bundle icicle) {
@@ -56,7 +47,7 @@
 
         // Create our Preview view and set it as the content of our
         // Activity
-        mView = new FilmView(this);
+        mView = new SimpleModelView(this);
         setContentView(mView);
     }
 
@@ -74,17 +65,7 @@
         // to take appropriate action when the activity looses focus
         super.onPause();
         mView.onPause();
-
-        Runtime.getRuntime().exit(0);
     }
 
-
-    static void log(String message) {
-        if (LOG_ENABLED) {
-            Log.v(LOG_TAG, message);
-        }
-    }
-
-
 }
 
diff --git a/java/ModelViewer/src/com/android/modelviewer/SimpleModelRS.java b/java/ModelViewer/src/com/android/modelviewer/SimpleModelRS.java
new file mode 100644
index 0000000..ccbecd8
--- /dev/null
+++ b/java/ModelViewer/src/com/android/modelviewer/SimpleModelRS.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.modelviewer;
+
+import java.io.Writer;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+import android.renderscript.ProgramStore.DepthFunc;
+import android.util.Log;
+
+
+public class SimpleModelRS {
+
+    private final int STATE_LAST_FOCUS = 1;
+
+    int mWidth;
+    int mHeight;
+    int mRotation;
+
+    public SimpleModelRS() {
+    }
+
+    public void init(RenderScriptGL rs, Resources res, int width, int height) {
+        mRS = rs;
+        mRes = res;
+        mWidth = width;
+        mHeight = height;
+        mRotation = 0;
+        initRS();
+    }
+
+    private Resources mRes;
+    private RenderScriptGL mRS;
+    private Sampler mSampler;
+    private ProgramStore mPSBackground;
+    private ProgramFragment mPFBackground;
+    private ProgramVertex mPVBackground;
+    private ProgramVertex.MatrixAllocation mPVA;
+
+    private Allocation mGridImage;
+    private Allocation mAllocPV;
+
+    private Mesh mMesh;
+
+    private Font mItalic;
+    private Allocation mTextAlloc;
+
+    private ScriptC_simplemodel mScript;
+
+    int mLastX;
+    int mLastY;
+
+    public void touchEvent(int x, int y) {
+        int dx = mLastX - x;
+        if(Math.abs(dx) > 50 || Math.abs(dx) < 3) {
+            dx = 0;
+        }
+
+        mRotation -= dx;
+        if(mRotation > 360) {
+            mRotation -= 360;
+        }
+        if(mRotation < 0) {
+            mRotation += 360;
+        }
+
+        mScript.set_gRotate((float)mRotation);
+
+        mLastX = x;
+        mLastY = y;
+    }
+
+    private void initPFS() {
+        ProgramStore.Builder b = new ProgramStore.Builder(mRS);
+
+        b.setDepthFunc(ProgramStore.DepthFunc.LESS);
+        b.setDitherEnable(false);
+        b.setDepthMask(true);
+        mPSBackground = b.create();
+
+        mScript.set_gPFSBackground(mPSBackground);
+    }
+
+    private void initPF() {
+        Sampler.Builder bs = new Sampler.Builder(mRS);
+        bs.setMin(Sampler.Value.LINEAR);
+        bs.setMag(Sampler.Value.LINEAR);
+        bs.setWrapS(Sampler.Value.CLAMP);
+        bs.setWrapT(Sampler.Value.CLAMP);
+        mSampler = bs.create();
+
+        ProgramFragment.Builder b = new ProgramFragment.Builder(mRS);
+        b.setTexture(ProgramFragment.Builder.EnvMode.REPLACE,
+                     ProgramFragment.Builder.Format.RGBA, 0);
+        mPFBackground = b.create();
+        mPFBackground.bindSampler(mSampler, 0);
+
+        mScript.set_gPFBackground(mPFBackground);
+    }
+
+    private void initPV() {
+        ProgramVertex.Builder pvb = new ProgramVertex.Builder(mRS);
+        mPVBackground = pvb.create();
+
+        mPVA = new ProgramVertex.MatrixAllocation(mRS);
+        mPVBackground.bindAllocation(mPVA);
+
+        mScript.set_gPVBackground(mPVBackground);
+    }
+
+    private void loadImage() {
+        mGridImage = Allocation.createFromBitmapResource(mRS, mRes, R.drawable.robot, Element.RGB_565(mRS), true);
+        mGridImage.uploadToTexture(0);
+
+        mScript.set_gTGrid(mGridImage);
+    }
+
+    private void initTextAllocation() {
+        String allocString = "Displaying file: R.raw.robot";
+        mTextAlloc = Allocation.createFromString(mRS, allocString);
+        mScript.set_gTextAlloc(mTextAlloc);
+    }
+
+    private void initRS() {
+
+        mScript = new ScriptC_simplemodel(mRS, mRes, R.raw.simplemodel, true);
+
+        initPFS();
+        initPF();
+        initPV();
+
+        loadImage();
+
+        FileA3D model = FileA3D.createFromResource(mRS, mRes, R.raw.robot);
+        FileA3D.IndexEntry entry = model.getIndexEntry(0);
+        if(entry == null || entry.getClassID() != FileA3D.ClassID.MESH) {
+            Log.e("rs", "could not load model");
+        }
+        else {
+            mMesh = (Mesh)entry.getObject();
+            mScript.set_gTestMesh(mMesh);
+        }
+
+        mItalic = Font.create(mRS, mRes, "DroidSerif-Italic.ttf", 8);
+        mScript.set_gItalic(mItalic);
+
+        initTextAllocation();
+
+        mRS.contextBindRootScript(mScript);
+    }
+}
+
+
+
diff --git a/java/Film/src/com/android/film/FilmView.java b/java/ModelViewer/src/com/android/modelviewer/SimpleModelView.java
similarity index 83%
copy from java/Film/src/com/android/film/FilmView.java
copy to java/ModelViewer/src/com/android/modelviewer/SimpleModelView.java
index 5bc2811..4253085 100644
--- a/java/Film/src/com/android/film/FilmView.java
+++ b/java/ModelViewer/src/com/android/modelviewer/SimpleModelView.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.modelviewer;
 
 import java.io.Writer;
 import java.util.ArrayList;
@@ -39,23 +39,25 @@
 import android.view.KeyEvent;
 import android.view.MotionEvent;
 
-public class FilmView extends RSSurfaceView {
+public class SimpleModelView extends RSSurfaceView {
 
-    public FilmView(Context context) {
+    public SimpleModelView(Context context) {
         super(context);
         //setFocusable(true);
     }
 
     private RenderScriptGL mRS;
-    private FilmRS mRender;
+    private SimpleModelRS mRender;
 
 
     public void surfaceChanged(SurfaceHolder holder, int format, int w, int h) {
         super.surfaceChanged(holder, format, w, h);
         if (mRS == null) {
-            mRS = createRenderScript(true);
+            RenderScriptGL.SurfaceConfig sc = new RenderScriptGL.SurfaceConfig();
+            sc.setDepth(16, 24);
+            mRS = createRenderScript(sc);
             mRS.contextSetSurface(w, h, holder.getSurface());
-            mRender = new FilmRS();
+            mRender = new SimpleModelRS();
             mRender.init(mRS, getResources(), w, h);
         }
     }
@@ -85,7 +87,8 @@
         if (act == ev.ACTION_UP) {
             ret = false;
         }
-        mRender.setFilmStripPosition((int)ev.getX(), (int)ev.getY() / 5);
+
+        mRender.touchEvent((int)ev.getX(), (int)ev.getY());
         return ret;
     }
 }
diff --git a/java/ModelViewer/src/com/android/modelviewer/scenegraph.rs b/java/ModelViewer/src/com/android/modelviewer/scenegraph.rs
new file mode 100644
index 0000000..3bee8d6
--- /dev/null
+++ b/java/ModelViewer/src/com/android/modelviewer/scenegraph.rs
@@ -0,0 +1,91 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.modelviewer)
+
+#include "rs_graphics.rsh"
+#include "transform_def.rsh"
+
+rs_program_vertex gPVBackground;
+rs_program_fragment gPFBackground;
+
+rs_allocation gTGrid;
+rs_mesh gTestMesh;
+
+rs_program_store gPFSBackground;
+
+float gRotate;
+
+rs_font gItalic;
+rs_allocation gTextAlloc;
+
+rs_script gTransformRS;
+
+SgTransform *gGroup;
+SgTransform *gRobot1;
+int gRobot1Index;
+SgTransform *gRobot2;
+int gRobot2Index;
+
+SgTransform *gRootNode;
+
+void init() {
+    gRotate = 0.0f;
+}
+
+int root(int launchID) {
+
+    gGroup->transforms[1].w += 0.5f;
+    gGroup->isDirty = 1;
+
+    SgTransform *robot1Ptr = gRobot1 + gRobot1Index;
+
+    robot1Ptr->transforms[1].w -= 1.5f;
+    robot1Ptr->isDirty = 1;
+
+    SgTransform *robot2Ptr = gRobot2 + gRobot2Index;
+    robot2Ptr->transforms[1].w += 2.5f;
+    robot2Ptr->isDirty = 1;
+
+    rsForEach(gTransformRS, gRootNode->children, gRootNode->children, 0);
+
+    rsgClearColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgClearDepth(1.0f);
+
+    rsgBindProgramVertex(gPVBackground);
+    rs_matrix4x4 proj;
+    float aspect = (float)rsgGetWidth() / (float)rsgGetHeight();
+    rsMatrixLoadPerspective(&proj, 30.0f, aspect, 0.1f, 100.0f);
+    rsgProgramVertexLoadProjectionMatrix(&proj);
+
+    rsgBindProgramFragment(gPFBackground);
+    rsgBindProgramStore(gPFSBackground);
+    rsgBindTexture(gPFBackground, 0, gTGrid);
+
+    rsgProgramVertexLoadModelMatrix(&robot1Ptr->globalMat);
+    rsgDrawMesh(gTestMesh);
+
+    rsgProgramVertexLoadModelMatrix(&robot2Ptr->globalMat);
+    rsgDrawMesh(gTestMesh);
+
+    color(0.3f, 0.3f, 0.3f, 1.0f);
+    rsgDrawText("Renderscript transform test", 30, 695);
+
+    rsgBindFont(gItalic);
+    rsgDrawText(gTextAlloc, 30, 730);
+
+    return 10;
+}
diff --git a/java/ModelViewer/src/com/android/modelviewer/simplemodel.rs b/java/ModelViewer/src/com/android/modelviewer/simplemodel.rs
new file mode 100644
index 0000000..419de62
--- /dev/null
+++ b/java/ModelViewer/src/com/android/modelviewer/simplemodel.rs
@@ -0,0 +1,71 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.modelviewer)
+
+#include "rs_graphics.rsh"
+
+rs_program_vertex gPVBackground;
+rs_program_fragment gPFBackground;
+
+rs_allocation gTGrid;
+rs_mesh gTestMesh;
+
+rs_program_store gPFSBackground;
+
+float gRotate;
+
+rs_font gItalic;
+rs_allocation gTextAlloc;
+
+void init() {
+    gRotate = 0.0f;
+}
+
+int root(int launchID) {
+
+    rsgClearColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgClearDepth(1.0f);
+
+    rsgBindProgramVertex(gPVBackground);
+    rs_matrix4x4 proj;
+    float aspect = (float)rsgGetWidth() / (float)rsgGetHeight();
+    rsMatrixLoadPerspective(&proj, 30.0f, aspect, 0.1f, 100.0f);
+    rsgProgramVertexLoadProjectionMatrix(&proj);
+
+    rsgBindProgramFragment(gPFBackground);
+    rsgBindProgramStore(gPFSBackground);
+    rsgBindTexture(gPFBackground, 0, gTGrid);
+
+    rs_matrix4x4 matrix;
+    rsMatrixLoadIdentity(&matrix);
+    // Position our model on the screen
+    rsMatrixTranslate(&matrix, 0.0f, -0.3f, -10.0f);
+    rsMatrixScale(&matrix, 0.2f, 0.2f, 0.2f);
+    rsMatrixRotate(&matrix, 25.0f, 1.0f, 0.0f, 0.0f);
+    rsMatrixRotate(&matrix, gRotate, 0.0f, 1.0f, 0.0f);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+
+    rsgDrawMesh(gTestMesh);
+
+    rsgFontColor(0.3f, 0.3f, 0.3f, 1.0f);
+    rsgDrawText("Renderscript model test", 30, 695);
+
+    rsgBindFont(gItalic);
+    rsgDrawText(gTextAlloc, 30, 730);
+
+    return 10;
+}
diff --git a/java/ModelViewer/src/com/android/modelviewer/transform.rs b/java/ModelViewer/src/com/android/modelviewer/transform.rs
new file mode 100644
index 0000000..e7c04de
--- /dev/null
+++ b/java/ModelViewer/src/com/android/modelviewer/transform.rs
@@ -0,0 +1,97 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.modelviewer)
+
+#include "transform_def.rsh"
+
+rs_script transformScript;
+
+typedef struct {
+    int changed;
+    rs_matrix4x4 *mat;
+} ParentData;
+
+void appendTransformation(int type, float4 data, rs_matrix4x4 *mat) {
+    rs_matrix4x4 temp;
+
+    switch(type) {
+    case TRANSFORM_TRANSLATE:
+        rsMatrixLoadTranslate(&temp, data.x, data.y, data.z);
+        break;
+    case TRANSFORM_ROTATE:
+        rsMatrixLoadRotate(&temp, data.w, data.x, data.y, data.z);
+        break;
+    case TRANSFORM_SCALE:
+        rsMatrixLoadScale(&temp, data.x, data.y, data.z);
+        break;
+    }
+    rsMatrixMultiply(mat, &temp);
+}
+
+void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32_t y) {
+
+    SgTransform *data = (SgTransform *)v_out;
+    const ParentData *parent = (const ParentData *)usrData;
+
+    //rsDebug("Transform data", (int)data);
+    //rsDebug("Entering parent", (int)parent);
+
+    rs_matrix4x4 *localMat = &data->localMat;
+    rs_matrix4x4 *globalMat = &data->globalMat;
+
+    ParentData toChild;
+    toChild.changed = 0;
+    toChild.mat = globalMat;
+
+    //rsDebug("Transform is dirty", data->isDirty);
+
+    // Refresh matrices if dirty
+    if(data->isDirty) {
+        data->isDirty = 0;
+        toChild.changed = 1;
+
+        // Reset our local matrix
+        rsMatrixLoadIdentity(localMat);
+
+        for(int i = 0; i < 16; i ++) {
+            if(data->transformTypes[i] == TRANSFORM_NONE) {
+                break;
+            }
+            //rsDebug("Transform adding transformation", transformTypes[i]);
+            appendTransformation(data->transformTypes[i], data->transforms[i], localMat);
+        }
+    }
+
+    //rsDebug("Transform checking parent", (int)0);
+
+    if(parent) {
+        if(parent->changed) {
+            toChild.changed = 1;
+
+            rsMatrixLoad(globalMat, parent->mat);
+            rsMatrixMultiply(globalMat, localMat);
+        }
+    }
+    else {
+        rsMatrixLoad(globalMat, localMat);
+    }
+
+    //rsDebug("Transform calling self with child ", (int)data->children.p);
+    if(data->children.p) {
+        rsForEach(transformScript, data->children, data->children, (void*)&toChild);
+    }
+}
diff --git a/java/ModelViewer/src/com/android/modelviewer/transform_def.rsh b/java/ModelViewer/src/com/android/modelviewer/transform_def.rsh
new file mode 100644
index 0000000..7d1721c
--- /dev/null
+++ b/java/ModelViewer/src/com/android/modelviewer/transform_def.rsh
@@ -0,0 +1,35 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.modelviewer)
+
+#define TRANSFORM_NONE 0
+#define TRANSFORM_TRANSLATE 1
+#define TRANSFORM_ROTATE 2
+#define TRANSFORM_SCALE 3
+
+typedef struct __attribute__((packed, aligned(4))) {
+    rs_matrix4x4 globalMat;
+    rs_matrix4x4 localMat;
+
+    float4 transforms[16];
+    int transformTypes[16];
+
+    int isDirty;
+
+    rs_allocation children;
+
+} SgTransform;
diff --git a/java/Film/Android.mk b/java/Samples/Android.mk
similarity index 82%
rename from java/Film/Android.mk
rename to java/Samples/Android.mk
index 9e6ed7e..65ae734 100644
--- a/java/Film/Android.mk
+++ b/java/Samples/Android.mk
@@ -14,14 +14,18 @@
 # limitations under the License.
 #
 
+ifneq ($(TARGET_SIMULATOR),true)
+
 LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 
 LOCAL_MODULE_TAGS := optional
 
-LOCAL_SRC_FILES := $(call all-java-files-under, src)
+LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src)
 #LOCAL_STATIC_JAVA_LIBRARIES := android.renderscript
 
-LOCAL_PACKAGE_NAME := Film
+LOCAL_PACKAGE_NAME := Samples
 
 include $(BUILD_PACKAGE)
+
+endif
diff --git a/java/Samples/AndroidManifest.xml b/java/Samples/AndroidManifest.xml
new file mode 100644
index 0000000..be191f2
--- /dev/null
+++ b/java/Samples/AndroidManifest.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="com.android.samples">
+    <application android:label="Samples"
+    android:icon="@drawable/test_pattern">
+        <activity android:name="RsList"
+                  android:label="RsList"                  
+                  android:theme="@android:style/Theme.Black.NoTitleBar">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+        
+        <activity android:name="RsRenderStates"
+                  android:label="RsStates"                  
+                  android:theme="@android:style/Theme.Black.NoTitleBar">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity>
+    </application>
+</manifest>
diff --git a/java/Samples/res/drawable/checker.png b/java/Samples/res/drawable/checker.png
new file mode 100644
index 0000000..b631e1e
--- /dev/null
+++ b/java/Samples/res/drawable/checker.png
Binary files differ
diff --git a/java/Samples/res/drawable/data.png b/java/Samples/res/drawable/data.png
new file mode 100644
index 0000000..8e34714
--- /dev/null
+++ b/java/Samples/res/drawable/data.png
Binary files differ
diff --git a/java/Samples/res/drawable/leaf.png b/java/Samples/res/drawable/leaf.png
new file mode 100644
index 0000000..3cd3775
--- /dev/null
+++ b/java/Samples/res/drawable/leaf.png
Binary files differ
diff --git a/java/Samples/res/drawable/test_pattern.png b/java/Samples/res/drawable/test_pattern.png
new file mode 100644
index 0000000..e7d1455
--- /dev/null
+++ b/java/Samples/res/drawable/test_pattern.png
Binary files differ
diff --git a/java/Samples/res/drawable/torusmap.png b/java/Samples/res/drawable/torusmap.png
new file mode 100644
index 0000000..1e08f3b
--- /dev/null
+++ b/java/Samples/res/drawable/torusmap.png
Binary files differ
diff --git a/java/Samples/res/raw/multitexf.glsl b/java/Samples/res/raw/multitexf.glsl
new file mode 100644
index 0000000..351ff9b
--- /dev/null
+++ b/java/Samples/res/raw/multitexf.glsl
@@ -0,0 +1,12 @@
+varying vec2 varTex0;
+
+void main() {
+   vec2 t0 = varTex0.xy;
+   lowp vec4 col0 = texture2D(UNI_Tex0, t0).rgba;
+   lowp vec4 col1 = texture2D(UNI_Tex1, t0*4.0).rgba;
+   lowp vec4 col2 = texture2D(UNI_Tex2, t0).rgba;
+   col0.xyz = col0.xyz*col1.xyz*1.5;
+   col0.xyz = mix(col0.xyz, col2.xyz, col2.w);
+   gl_FragColor = col0;
+}
+
diff --git a/java/Samples/res/raw/shaderf.glsl b/java/Samples/res/raw/shaderf.glsl
new file mode 100644
index 0000000..fcbe7ee
--- /dev/null
+++ b/java/Samples/res/raw/shaderf.glsl
@@ -0,0 +1,16 @@
+
+varying lowp float light0_Diffuse;
+varying lowp float light0_Specular;
+varying lowp float light1_Diffuse;
+varying lowp float light1_Specular;
+varying vec2 varTex0;
+
+void main() {
+   vec2 t0 = varTex0.xy;
+   lowp vec4 col = texture2D(UNI_Tex0, t0).rgba;
+   col.xyz = col.xyz * (light0_Diffuse * UNI_light0_DiffuseColor + light1_Diffuse * UNI_light1_DiffuseColor);
+   col.xyz += light0_Specular * UNI_light0_SpecularColor;
+   col.xyz += light1_Specular * UNI_light1_SpecularColor;
+   gl_FragColor = col;
+}
+
diff --git a/java/Samples/res/raw/shaderv.glsl b/java/Samples/res/raw/shaderv.glsl
new file mode 100644
index 0000000..867589c
--- /dev/null
+++ b/java/Samples/res/raw/shaderv.glsl
@@ -0,0 +1,30 @@
+varying float light0_Diffuse;
+varying float light0_Specular;
+varying float light1_Diffuse;
+varying float light1_Specular;
+varying vec2 varTex0;
+
+// This is where actual shader code begins
+void main() {
+   vec4 worldPos = UNI_model * ATTRIB_position;
+   gl_Position = UNI_proj * worldPos;
+
+   mat3 model3 = mat3(UNI_model[0].xyz, UNI_model[1].xyz, UNI_model[2].xyz);
+   vec3 worldNorm = model3 * ATTRIB_normal;
+   vec3 V = normalize(-worldPos.xyz);
+
+   vec3 light0Vec = normalize(UNI_light0_Posision - worldPos.xyz);
+   vec3 light0R = reflect(light0Vec, worldNorm);
+   light0_Diffuse = clamp(dot(worldNorm, light0Vec), 0.0, 1.0) * UNI_light0_Diffuse;
+   float light0Spec = clamp(dot(light0R, V), 0.001, 1.0);
+   light0_Specular = pow(light0Spec, UNI_light0_CosinePower) * UNI_light0_Specular;
+
+   vec3 light1Vec = normalize(UNI_light1_Posision - worldPos.xyz);
+   vec3 light1R = reflect(light1Vec, worldNorm);
+   light1_Diffuse = clamp(dot(worldNorm, light1Vec), 0.0, 1.0) * UNI_light1_Diffuse;
+   float light1Spec = clamp(dot(light1R, V), 0.001, 1.0);
+   light1_Specular = pow(light1Spec, UNI_light1_CosinePower) * UNI_light1_Specular;
+
+   gl_PointSize = 1.0;
+   varTex0 = ATTRIB_texture0;
+}
diff --git a/java/Samples/res/raw/torus.a3d b/java/Samples/res/raw/torus.a3d
new file mode 100644
index 0000000..0322b01
--- /dev/null
+++ b/java/Samples/res/raw/torus.a3d
Binary files differ
diff --git a/java/Film/src/com/android/film/Film.java b/java/Samples/src/com/android/samples/RsList.java
similarity index 69%
copy from java/Film/src/com/android/film/Film.java
copy to java/Samples/src/com/android/samples/RsList.java
index 6e99816..0f6b1ac 100644
--- a/java/Film/src/com/android/film/Film.java
+++ b/java/Samples/src/com/android/samples/RsList.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.samples;
 
 import android.renderscript.RSSurfaceView;
 import android.renderscript.RenderScript;
@@ -37,18 +37,9 @@
 
 import java.lang.Runtime;
 
-public class Film extends Activity {
-    //EventListener mListener = new EventListener();
+public class RsList extends Activity {
 
-    private static final String LOG_TAG = "libRS_jni";
-    private static final boolean DEBUG  = false;
-    private static final boolean LOG_ENABLED = DEBUG ? Config.LOGD : Config.LOGV;
-
-    private FilmView mView;
-
-    // get the current looper (from your Activity UI thread for instance
-
-
+    private RsListView mView;
 
     @Override
     public void onCreate(Bundle icicle) {
@@ -56,14 +47,14 @@
 
         // Create our Preview view and set it as the content of our
         // Activity
-        mView = new FilmView(this);
+        mView = new RsListView(this);
         setContentView(mView);
     }
 
     @Override
     protected void onResume() {
         // Ideally a game should implement onResume() and onPause()
-        // to take appropriate action when the activity looses focus
+        // to take appropriate action when the activity loses focus
         super.onResume();
         mView.onResume();
     }
@@ -71,20 +62,10 @@
     @Override
     protected void onPause() {
         // Ideally a game should implement onResume() and onPause()
-        // to take appropriate action when the activity looses focus
+        // to take appropriate action when the activity loses focus
         super.onPause();
         mView.onPause();
-
-        Runtime.getRuntime().exit(0);
     }
 
-
-    static void log(String message) {
-        if (LOG_ENABLED) {
-            Log.v(LOG_TAG, message);
-        }
-    }
-
-
 }
 
diff --git a/java/Samples/src/com/android/samples/RsListRS.java b/java/Samples/src/com/android/samples/RsListRS.java
new file mode 100644
index 0000000..aaeea87
--- /dev/null
+++ b/java/Samples/src/com/android/samples/RsListRS.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.samples;
+
+import java.io.Writer;
+import java.util.Vector;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+import android.renderscript.ProgramStore.DepthFunc;
+import android.util.Log;
+
+
+public class RsListRS {
+
+    private final int STATE_LAST_FOCUS = 1;
+
+    private static final String[] DATA_LIST = {
+    "Afghanistan", "Albania", "Algeria", "American Samoa", "Andorra",
+    "Angola", "Anguilla", "Antarctica", "Antigua and Barbuda", "Argentina",
+    "Armenia", "Aruba", "Australia", "Austria", "Azerbaijan",
+    "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium",
+    "Belize", "Benin", "Bermuda", "Bhutan", "Bolivia",
+    "Bosnia and Herzegovina", "Botswana", "Bouvet Island", "Brazil",
+    "British Indian Ocean Territory", "British Virgin Islands", "Brunei", "Bulgaria",
+    "Burkina Faso", "Burundi", "Cote d'Ivoire", "Cambodia", "Cameroon", "Canada", "Cape Verde",
+    "Cayman Islands", "Central African Republic", "Chad", "Chile", "China",
+    "Christmas Island", "Cocos (Keeling) Islands", "Colombia", "Comoros", "Congo",
+    "Cook Islands", "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic",
+    "Democratic Republic of the Congo", "Denmark", "Djibouti", "Dominica", "Dominican Republic",
+    "East Timor", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea",
+    "Estonia", "Ethiopia", "Faeroe Islands", "Falkland Islands", "Fiji", "Finland",
+    "Former Yugoslav Republic of Macedonia", "France", "French Guiana", "French Polynesia",
+    "French Southern Territories", "Gabon", "Georgia", "Germany", "Ghana", "Gibraltar",
+    "Greece", "Greenland", "Grenada", "Guadeloupe", "Guam", "Guatemala", "Guinea", "Guinea-Bissau",
+    "Guyana", "Haiti", "Heard Island and McDonald Islands", "Honduras", "Hong Kong", "Hungary",
+    "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Israel", "Italy", "Jamaica",
+    "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Kuwait", "Kyrgyzstan", "Laos",
+    "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg",
+    "Macau", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands",
+    "Martinique", "Mauritania", "Mauritius", "Mayotte", "Mexico", "Micronesia", "Moldova",
+    "Monaco", "Mongolia", "Montserrat", "Morocco", "Mozambique", "Myanmar", "Namibia",
+    "Nauru", "Nepal", "Netherlands", "Netherlands Antilles", "New Caledonia", "New Zealand",
+    "Nicaragua", "Niger", "Nigeria", "Niue", "Norfolk Island", "North Korea", "Northern Marianas",
+    "Norway", "Oman", "Pakistan", "Palau", "Panama", "Papua New Guinea", "Paraguay", "Peru",
+    "Philippines", "Pitcairn Islands", "Poland", "Portugal", "Puerto Rico", "Qatar",
+    "Reunion", "Romania", "Russia", "Rwanda", "Sqo Tome and Principe", "Saint Helena",
+    "Saint Kitts and Nevis", "Saint Lucia", "Saint Pierre and Miquelon",
+    "Saint Vincent and the Grenadines", "Samoa", "San Marino", "Saudi Arabia", "Senegal",
+    "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands",
+    "Somalia", "South Africa", "South Georgia and the South Sandwich Islands", "South Korea",
+    "Spain", "Sri Lanka", "Sudan", "Suriname", "Svalbard and Jan Mayen", "Swaziland", "Sweden",
+    "Switzerland", "Syria", "Taiwan", "Tajikistan", "Tanzania", "Thailand", "The Bahamas",
+    "The Gambia", "Togo", "Tokelau", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey",
+    "Turkmenistan", "Turks and Caicos Islands", "Tuvalu", "Virgin Islands", "Uganda",
+    "Ukraine", "United Arab Emirates", "United Kingdom",
+    "United States", "United States Minor Outlying Islands", "Uruguay", "Uzbekistan",
+    "Vanuatu", "Vatican City", "Venezuela", "Vietnam", "Wallis and Futuna", "Western Sahara",
+    "Yemen", "Yugoslavia", "Zambia", "Zimbabwe"
+    };
+
+    int mWidth;
+    int mHeight;
+
+    public RsListRS() {
+    }
+
+    public void init(RenderScriptGL rs, Resources res, int width, int height) {
+        mRS = rs;
+        mRes = res;
+        mWidth = width;
+        mHeight = height;
+        initRS();
+    }
+
+    private Resources mRes;
+    private RenderScriptGL mRS;
+    private Font mItalic;
+
+    ScriptField_ListAllocs_s mListAllocs;
+
+    private ScriptC_rslist mScript;
+
+    int mLastX;
+    int mLastY;
+
+    public void onActionDown(int x, int y) {
+        mScript.set_gDY(0.0f);
+
+        mLastX = x;
+        mLastY = y;
+    }
+
+    public void onActionMove(int x, int y) {
+        int dx = mLastX - x;
+        int dy = mLastY - y;
+
+        if(Math.abs(dy) <= 2) {
+            dy = 0;
+        }
+
+        mScript.set_gDY(dy);
+
+        mLastX = x;
+        mLastY = y;
+    }
+
+    private void initRS() {
+
+        mScript = new ScriptC_rslist(mRS, mRes, R.raw.rslist, true);
+
+        mListAllocs = new ScriptField_ListAllocs_s(mRS, DATA_LIST.length);
+        for(int i = 0; i < DATA_LIST.length; i ++) {
+            ScriptField_ListAllocs_s.Item listElem = new ScriptField_ListAllocs_s.Item();
+            listElem.text = Allocation.createFromString(mRS, DATA_LIST[i]);
+            mListAllocs.set(listElem, i, false);
+        }
+
+        mListAllocs.copyAll();
+
+        mScript.bind_gList(mListAllocs);
+
+        mItalic = Font.createFromFamily(mRS, mRes, "serif", Font.Style.BOLD_ITALIC, 8);
+        mScript.set_gItalic(mItalic);
+
+        mRS.contextBindRootScript(mScript);
+    }
+}
+
+
+
diff --git a/java/Film/src/com/android/film/FilmView.java b/java/Samples/src/com/android/samples/RsListView.java
similarity index 77%
copy from java/Film/src/com/android/film/FilmView.java
copy to java/Samples/src/com/android/samples/RsListView.java
index 5bc2811..cd66fbb 100644
--- a/java/Film/src/com/android/film/FilmView.java
+++ b/java/Samples/src/com/android/samples/RsListView.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.samples;
 
 import java.io.Writer;
 import java.util.ArrayList;
@@ -39,23 +39,25 @@
 import android.view.KeyEvent;
 import android.view.MotionEvent;
 
-public class FilmView extends RSSurfaceView {
+public class RsListView extends RSSurfaceView {
 
-    public FilmView(Context context) {
+    public RsListView(Context context) {
         super(context);
         //setFocusable(true);
     }
 
     private RenderScriptGL mRS;
-    private FilmRS mRender;
+    private RsListRS mRender;
 
 
     public void surfaceChanged(SurfaceHolder holder, int format, int w, int h) {
         super.surfaceChanged(holder, format, w, h);
         if (mRS == null) {
-            mRS = createRenderScript(true);
+            RenderScriptGL.SurfaceConfig sc = new RenderScriptGL.SurfaceConfig();
+            sc.setDepth(16, 24);
+            mRS = createRenderScript(sc);
             mRS.contextSetSurface(w, h, holder.getSurface());
-            mRender = new FilmRS();
+            mRender = new RsListRS();
             mRender.init(mRS, getResources(), w, h);
         }
     }
@@ -80,12 +82,17 @@
     @Override
     public boolean onTouchEvent(MotionEvent ev)
     {
-        boolean ret = true;
+        boolean ret = false;
         int act = ev.getAction();
-        if (act == ev.ACTION_UP) {
-            ret = false;
+        if (act == ev.ACTION_DOWN) {
+            mRender.onActionDown((int)ev.getX(), (int)ev.getY());
+            ret = true;
         }
-        mRender.setFilmStripPosition((int)ev.getX(), (int)ev.getY() / 5);
+        else if (act == ev.ACTION_MOVE) {
+            mRender.onActionMove((int)ev.getX(), (int)ev.getY());
+            ret = true;
+        }
+
         return ret;
     }
 }
diff --git a/java/Film/src/com/android/film/Film.java b/java/Samples/src/com/android/samples/RsRenderStates.java
similarity index 75%
copy from java/Film/src/com/android/film/Film.java
copy to java/Samples/src/com/android/samples/RsRenderStates.java
index 6e99816..391007e 100644
--- a/java/Film/src/com/android/film/Film.java
+++ b/java/Samples/src/com/android/samples/RsRenderStates.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.samples;
 
 import android.renderscript.RSSurfaceView;
 import android.renderscript.RenderScript;
@@ -37,18 +37,9 @@
 
 import java.lang.Runtime;
 
-public class Film extends Activity {
-    //EventListener mListener = new EventListener();
+public class RsRenderStates extends Activity {
 
-    private static final String LOG_TAG = "libRS_jni";
-    private static final boolean DEBUG  = false;
-    private static final boolean LOG_ENABLED = DEBUG ? Config.LOGD : Config.LOGV;
-
-    private FilmView mView;
-
-    // get the current looper (from your Activity UI thread for instance
-
-
+    private RsRenderStatesView mView;
 
     @Override
     public void onCreate(Bundle icicle) {
@@ -56,7 +47,7 @@
 
         // Create our Preview view and set it as the content of our
         // Activity
-        mView = new FilmView(this);
+        mView = new RsRenderStatesView(this);
         setContentView(mView);
     }
 
@@ -74,17 +65,7 @@
         // to take appropriate action when the activity looses focus
         super.onPause();
         mView.onPause();
-
-        Runtime.getRuntime().exit(0);
     }
 
-
-    static void log(String message) {
-        if (LOG_ENABLED) {
-            Log.v(LOG_TAG, message);
-        }
-    }
-
-
 }
 
diff --git a/java/Samples/src/com/android/samples/RsRenderStatesRS.java b/java/Samples/src/com/android/samples/RsRenderStatesRS.java
new file mode 100644
index 0000000..0990da3
--- /dev/null
+++ b/java/Samples/src/com/android/samples/RsRenderStatesRS.java
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.samples;
+
+import java.io.Writer;
+
+import android.content.res.Resources;
+import android.graphics.Bitmap;
+import android.graphics.BitmapFactory;
+import android.renderscript.*;
+import android.renderscript.ProgramStore.DepthFunc;
+import android.renderscript.Sampler.Value;
+import android.util.Log;
+
+
+public class RsRenderStatesRS {
+
+    int mWidth;
+    int mHeight;
+
+    public RsRenderStatesRS() {
+    }
+
+    public void init(RenderScriptGL rs, Resources res, int width, int height) {
+        mRS = rs;
+        mRes = res;
+        mWidth = width;
+        mHeight = height;
+        mOptionsARGB.inScaled = false;
+        mOptionsARGB.inPreferredConfig = Bitmap.Config.ARGB_8888;
+        mMode = 0;
+        mMaxModes = 9;
+        initRS();
+    }
+
+    private Resources mRes;
+    private RenderScriptGL mRS;
+
+    private Sampler mLinearClamp;
+    private Sampler mLinearWrap;
+    private Sampler mMipLinearWrap;
+    private Sampler mNearestClamp;
+    private Sampler mMipLinearAniso8;
+    private Sampler mMipLinearAniso15;
+
+    private ProgramStore mProgStoreBlendNoneDepth;
+    private ProgramStore mProgStoreBlendNone;
+    private ProgramStore mProgStoreBlendAlpha;
+    private ProgramStore mProgStoreBlendAdd;
+
+    private ProgramFragment mProgFragmentTexture;
+    private ProgramFragment mProgFragmentColor;
+
+    private ProgramVertex mProgVertex;
+    private ProgramVertex.MatrixAllocation mPVA;
+
+    // Custom shaders
+    private ProgramVertex mProgVertexCustom;
+    private ProgramFragment mProgFragmentCustom;
+    private ProgramFragment mProgFragmentMultitex;
+    private ScriptField_VertexShaderConstants_s mVSConst;
+    private ScriptField_FragentShaderConstants_s mFSConst;
+
+    private ProgramRaster mCullBack;
+    private ProgramRaster mCullFront;
+    private ProgramRaster mCullNone;
+
+    private Allocation mTexTorus;
+    private Allocation mTexOpaque;
+    private Allocation mTexTransparent;
+    private Allocation mTexChecker;
+
+    private Mesh mMbyNMesh;
+    private Mesh mTorus;
+
+    Font mFontSans;
+    Font mFontSerif;
+    Font mFontSerifBold;
+    Font mFontSerifItalic;
+    Font mFontSerifBoldItalic;
+    Font mFontMono;
+    private Allocation mTextAlloc;
+
+    private ScriptC_rsrenderstates mScript;
+
+    private final BitmapFactory.Options mOptionsARGB = new BitmapFactory.Options();
+
+    int mMode;
+    int mMaxModes;
+
+    public void onActionDown(int x, int y) {
+        mMode ++;
+        mMode = mMode % mMaxModes;
+        mScript.set_gDisplayMode(mMode);
+    }
+
+    private Mesh getMbyNMesh(float width, float height, int wResolution, int hResolution) {
+
+        Mesh.TriangleMeshBuilder tmb = new Mesh.TriangleMeshBuilder(mRS,
+                                           2, Mesh.TriangleMeshBuilder.TEXTURE_0);
+
+        for (int y = 0; y <= hResolution; y++) {
+            final float normalizedY = (float)y / hResolution;
+            final float yOffset = (normalizedY - 0.5f) * height;
+            for (int x = 0; x <= wResolution; x++) {
+                float normalizedX = (float)x / wResolution;
+                float xOffset = (normalizedX - 0.5f) * width;
+                tmb.setTexture(normalizedX, normalizedY);
+                tmb.addVertex(xOffset, yOffset);
+             }
+        }
+
+        for (int y = 0; y < hResolution; y++) {
+            final int curY = y * (wResolution + 1);
+            final int belowY = (y + 1) * (wResolution + 1);
+            for (int x = 0; x < wResolution; x++) {
+                int curV = curY + x;
+                int belowV = belowY + x;
+                tmb.addTriangle(curV, belowV, curV + 1);
+                tmb.addTriangle(belowV, belowV + 1, curV + 1);
+            }
+        }
+
+        return tmb.create(true);
+    }
+
+    private void initProgramStore() {
+        // Use stock the stock program store object
+        mProgStoreBlendNoneDepth = ProgramStore.BLEND_NONE_DEPTH_TEST(mRS);
+        mProgStoreBlendNone = ProgramStore.BLEND_NONE_DEPTH_NO_DEPTH(mRS);
+
+        // Create a custom program store
+        ProgramStore.Builder builder = new ProgramStore.Builder(mRS);
+        builder.setDepthFunc(ProgramStore.DepthFunc.ALWAYS);
+        builder.setBlendFunc(ProgramStore.BlendSrcFunc.SRC_ALPHA,
+                             ProgramStore.BlendDstFunc.ONE_MINUS_SRC_ALPHA);
+        builder.setDitherEnable(false);
+        builder.setDepthMask(false);
+        mProgStoreBlendAlpha = builder.create();
+
+        mProgStoreBlendAdd = ProgramStore.BLEND_ADD_DEPTH_NO_DEPTH(mRS);
+
+        mScript.set_gProgStoreBlendNoneDepth(mProgStoreBlendNoneDepth);
+        mScript.set_gProgStoreBlendNone(mProgStoreBlendNone);
+        mScript.set_gProgStoreBlendAlpha(mProgStoreBlendAlpha);
+        mScript.set_gProgStoreBlendAdd(mProgStoreBlendAdd);
+    }
+
+    private void initProgramFragment() {
+
+        ProgramFragment.Builder texBuilder = new ProgramFragment.Builder(mRS);
+        texBuilder.setTexture(ProgramFragment.Builder.EnvMode.REPLACE,
+                              ProgramFragment.Builder.Format.RGBA, 0);
+        mProgFragmentTexture = texBuilder.create();
+        mProgFragmentTexture.bindSampler(mLinearClamp, 0);
+
+        ProgramFragment.Builder colBuilder = new ProgramFragment.Builder(mRS);
+        colBuilder.setVaryingColor(false);
+        mProgFragmentColor = colBuilder.create();
+
+        mScript.set_gProgFragmentColor(mProgFragmentColor);
+        mScript.set_gProgFragmentTexture(mProgFragmentTexture);
+    }
+
+    private void initProgramVertex() {
+        ProgramVertex.Builder pvb = new ProgramVertex.Builder(mRS);
+        mProgVertex = pvb.create();
+
+        mPVA = new ProgramVertex.MatrixAllocation(mRS);
+        mProgVertex.bindAllocation(mPVA);
+        mPVA.setupOrthoWindow(mWidth, mHeight);
+
+        mScript.set_gProgVertex(mProgVertex);
+    }
+
+    private void initCustomShaders() {
+        mVSConst = new ScriptField_VertexShaderConstants_s(mRS, 1);
+        mFSConst = new ScriptField_FragentShaderConstants_s(mRS, 1);
+
+        mScript.bind_gVSConstants(mVSConst);
+        mScript.bind_gFSConstants(mFSConst);
+
+        // Initialize the shader builder
+        ProgramVertex.ShaderBuilder pvbCustom = new ProgramVertex.ShaderBuilder(mRS);
+        // Specify the resource that contains the shader string
+        pvbCustom.setShader(mRes, R.raw.shaderv);
+        // Use a script field to spcify the input layout
+        pvbCustom.addInput(ScriptField_VertexShaderInputs_s.createElement(mRS));
+        // Define the constant input layout
+        pvbCustom.addConstant(mVSConst.getAllocation().getType());
+        mProgVertexCustom = pvbCustom.create();
+        // Bind the source of constant data
+        mProgVertexCustom.bindConstants(mVSConst.getAllocation(), 0);
+
+        ProgramFragment.ShaderBuilder pfbCustom = new ProgramFragment.ShaderBuilder(mRS);
+        // Specify the resource that contains the shader string
+        pfbCustom.setShader(mRes, R.raw.shaderf);
+        //Tell the builder how many textures we have
+        pfbCustom.setTextureCount(1);
+        // Define the constant input layout
+        pfbCustom.addConstant(mFSConst.getAllocation().getType());
+        mProgFragmentCustom = pfbCustom.create();
+        // Bind the source of constant data
+        mProgFragmentCustom.bindConstants(mFSConst.getAllocation(), 0);
+
+        pfbCustom = new ProgramFragment.ShaderBuilder(mRS);
+        pfbCustom.setShader(mRes, R.raw.multitexf);
+        pfbCustom.setTextureCount(3);
+        mProgFragmentMultitex = pfbCustom.create();
+
+        mScript.set_gProgVertexCustom(mProgVertexCustom);
+        mScript.set_gProgFragmentCustom(mProgFragmentCustom);
+        mScript.set_gProgFragmentMultitex(mProgFragmentMultitex);
+    }
+
+    private Allocation loadTextureRGB(int id) {
+        final Allocation allocation = Allocation.createFromBitmapResource(mRS, mRes,
+                id, Element.RGB_565(mRS), true);
+        allocation.uploadToTexture(0);
+        return allocation;
+    }
+
+    private Allocation loadTextureARGB(int id) {
+        Bitmap b = BitmapFactory.decodeResource(mRes, id, mOptionsARGB);
+        final Allocation allocation = Allocation.createFromBitmap(mRS, b, Element.RGBA_8888(mRS), true);
+        allocation.uploadToTexture(0);
+        return allocation;
+    }
+
+    private void loadImages() {
+        mTexTorus = loadTextureRGB(R.drawable.torusmap);
+        mTexOpaque = loadTextureRGB(R.drawable.data);
+        mTexTransparent = loadTextureARGB(R.drawable.leaf);
+        mTexChecker = loadTextureRGB(R.drawable.checker);
+
+        mScript.set_gTexTorus(mTexTorus);
+        mScript.set_gTexOpaque(mTexOpaque);
+        mScript.set_gTexTransparent(mTexTransparent);
+        mScript.set_gTexChecker(mTexChecker);
+    }
+
+    private void initFonts() {
+        // Sans font by family name
+        mFontSans = Font.createFromFamily(mRS, mRes, "sans-serif", Font.Style.NORMAL, 8);
+        // Create font by file name
+        mFontSerif = Font.create(mRS, mRes, "DroidSerif-Regular.ttf", 8);
+        // Create fonts by family and style
+        mFontSerifBold = Font.createFromFamily(mRS, mRes, "serif", Font.Style.BOLD, 8);
+        mFontSerifItalic = Font.createFromFamily(mRS, mRes, "serif", Font.Style.ITALIC, 8);
+        mFontSerifBoldItalic = Font.createFromFamily(mRS, mRes, "serif", Font.Style.BOLD_ITALIC, 8);
+        mFontMono = Font.createFromFamily(mRS, mRes, "mono", Font.Style.NORMAL, 8);
+
+        mTextAlloc = Allocation.createFromString(mRS, "String from allocation");
+
+        mScript.set_gFontSans(mFontSans);
+        mScript.set_gFontSerif(mFontSerif);
+        mScript.set_gFontSerifBold(mFontSerifBold);
+        mScript.set_gFontSerifItalic(mFontSerifItalic);
+        mScript.set_gFontSerifBoldItalic(mFontSerifBoldItalic);
+        mScript.set_gFontMono(mFontMono);
+        mScript.set_gTextAlloc(mTextAlloc);
+    }
+
+    private void initMesh() {
+        mMbyNMesh = getMbyNMesh(256, 256, 10, 10);
+        mScript.set_gMbyNMesh(mMbyNMesh);
+
+        FileA3D model = FileA3D.createFromResource(mRS, mRes, R.raw.torus);
+        FileA3D.IndexEntry entry = model.getIndexEntry(0);
+        if(entry == null || entry.getClassID() != FileA3D.ClassID.MESH) {
+            Log.e("rs", "could not load model");
+        }
+        else {
+            mTorus = (Mesh)entry.getObject();
+            mScript.set_gTorusMesh(mTorus);
+        }
+    }
+
+    private void initSamplers() {
+        Sampler.Builder bs = new Sampler.Builder(mRS);
+        bs.setMin(Sampler.Value.LINEAR);
+        bs.setMag(Sampler.Value.LINEAR);
+        bs.setWrapS(Sampler.Value.WRAP);
+        bs.setWrapT(Sampler.Value.WRAP);
+        mLinearWrap = bs.create();
+
+        mLinearClamp = Sampler.CLAMP_LINEAR(mRS);
+        mNearestClamp = Sampler.CLAMP_NEAREST(mRS);
+        mMipLinearWrap = Sampler.WRAP_LINEAR_MIP_LINEAR(mRS);
+
+        bs = new Sampler.Builder(mRS);
+        bs.setMin(Sampler.Value.LINEAR_MIP_LINEAR);
+        bs.setMag(Sampler.Value.LINEAR);
+        bs.setWrapS(Sampler.Value.WRAP);
+        bs.setWrapT(Sampler.Value.WRAP);
+        bs.setAnisotropy(8.0f);
+        mMipLinearAniso8 = bs.create();
+        bs.setAnisotropy(15.0f);
+        mMipLinearAniso15 = bs.create();
+
+        mScript.set_gLinearClamp(mLinearClamp);
+        mScript.set_gLinearWrap(mLinearWrap);
+        mScript.set_gMipLinearWrap(mMipLinearWrap);
+        mScript.set_gMipLinearAniso8(mMipLinearAniso8);
+        mScript.set_gMipLinearAniso15(mMipLinearAniso15);
+        mScript.set_gNearestClamp(mNearestClamp);
+    }
+
+    private void initProgramRaster() {
+        mCullBack = ProgramRaster.CULL_BACK(mRS);
+        mCullFront = ProgramRaster.CULL_FRONT(mRS);
+        mCullNone = ProgramRaster.CULL_NONE(mRS);
+
+        mScript.set_gCullBack(mCullBack);
+        mScript.set_gCullFront(mCullFront);
+        mScript.set_gCullNone(mCullNone);
+    }
+
+    private void initRS() {
+
+        mScript = new ScriptC_rsrenderstates(mRS, mRes, R.raw.rsrenderstates, true);
+
+        initSamplers();
+        initProgramStore();
+        initProgramFragment();
+        initProgramVertex();
+        initFonts();
+        loadImages();
+        initMesh();
+        initProgramRaster();
+        initCustomShaders();
+
+        mRS.contextBindRootScript(mScript);
+    }
+}
+
+
+
diff --git a/java/Film/src/com/android/film/FilmView.java b/java/Samples/src/com/android/samples/RsRenderStatesView.java
similarity index 80%
copy from java/Film/src/com/android/film/FilmView.java
copy to java/Samples/src/com/android/samples/RsRenderStatesView.java
index 5bc2811..c434c09 100644
--- a/java/Film/src/com/android/film/FilmView.java
+++ b/java/Samples/src/com/android/samples/RsRenderStatesView.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.samples;
 
 import java.io.Writer;
 import java.util.ArrayList;
@@ -39,23 +39,25 @@
 import android.view.KeyEvent;
 import android.view.MotionEvent;
 
-public class FilmView extends RSSurfaceView {
+public class RsRenderStatesView extends RSSurfaceView {
 
-    public FilmView(Context context) {
+    public RsRenderStatesView(Context context) {
         super(context);
         //setFocusable(true);
     }
 
     private RenderScriptGL mRS;
-    private FilmRS mRender;
+    private RsRenderStatesRS mRender;
 
 
     public void surfaceChanged(SurfaceHolder holder, int format, int w, int h) {
         super.surfaceChanged(holder, format, w, h);
         if (mRS == null) {
-            mRS = createRenderScript(true);
+            RenderScriptGL.SurfaceConfig sc = new RenderScriptGL.SurfaceConfig();
+            sc.setDepth(16, 24);
+            mRS = createRenderScript(sc);
             mRS.contextSetSurface(w, h, holder.getSurface());
-            mRender = new FilmRS();
+            mRender = new RsRenderStatesRS();
             mRender.init(mRS, getResources(), w, h);
         }
     }
@@ -80,12 +82,13 @@
     @Override
     public boolean onTouchEvent(MotionEvent ev)
     {
-        boolean ret = true;
+        boolean ret = false;
         int act = ev.getAction();
-        if (act == ev.ACTION_UP) {
-            ret = false;
+        if (act == ev.ACTION_DOWN) {
+            mRender.onActionDown((int)ev.getX(), (int)ev.getY());
+            ret = true;
         }
-        mRender.setFilmStripPosition((int)ev.getX(), (int)ev.getY() / 5);
+
         return ret;
     }
 }
diff --git a/java/Samples/src/com/android/samples/rslist.rs b/java/Samples/src/com/android/samples/rslist.rs
new file mode 100644
index 0000000..f760ad0
--- /dev/null
+++ b/java/Samples/src/com/android/samples/rslist.rs
@@ -0,0 +1,71 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.samples)
+
+#include "rs_graphics.rsh"
+
+float gDY;
+
+rs_font gItalic;
+
+typedef struct ListAllocs_s {
+    rs_allocation text;
+} ListAllocs;
+
+ListAllocs *gList;
+
+void init() {
+    gDY = 0.0f;
+}
+
+int textPos = 0;
+
+int root(int launchID) {
+
+    rsgClearColor(0.0f, 0.0f, 0.0f, 0.0f);
+    rsgClearDepth(1.0f);
+
+    textPos -= (int)gDY*2;
+    gDY *= 0.95;
+
+    rsgFontColor(0.9f, 0.9f, 0.9f, 1.0f);
+    rsgBindFont(gItalic);
+    color(0.2, 0.2, 0.2, 0);
+
+    rs_allocation listAlloc = rsGetAllocation(gList);
+    int allocSize = rsAllocationGetDimX(listAlloc);
+
+    int width = rsgGetWidth();
+    int height = rsgGetHeight();
+
+    int itemHeight = 80;
+    int currentYPos = itemHeight + textPos;
+
+    for(int i = 0; i < allocSize; i ++) {
+        if(currentYPos - itemHeight > height) {
+            break;
+        }
+
+        if(currentYPos > 0) {
+            rsgDrawRect(0, currentYPos - 1, width, currentYPos, 0);
+            rsgDrawText(gList[i].text, 30, currentYPos - 32);
+        }
+        currentYPos += itemHeight;
+    }
+
+    return 10;
+}
diff --git a/java/Samples/src/com/android/samples/rsrenderstates.rs b/java/Samples/src/com/android/samples/rsrenderstates.rs
new file mode 100644
index 0000000..8be35f8
--- /dev/null
+++ b/java/Samples/src/com/android/samples/rsrenderstates.rs
@@ -0,0 +1,583 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.samples)
+
+#include "rs_graphics.rsh"
+#include "shader_def.rsh"
+
+rs_program_vertex gProgVertex;
+rs_program_fragment gProgFragmentColor;
+rs_program_fragment gProgFragmentTexture;
+
+rs_program_store gProgStoreBlendNoneDepth;
+rs_program_store gProgStoreBlendNone;
+rs_program_store gProgStoreBlendAlpha;
+rs_program_store gProgStoreBlendAdd;
+
+rs_allocation gTexOpaque;
+rs_allocation gTexTorus;
+rs_allocation gTexTransparent;
+rs_allocation gTexChecker;
+
+rs_mesh gMbyNMesh;
+rs_mesh gTorusMesh;
+
+rs_font gFontSans;
+rs_font gFontSerif;
+rs_font gFontSerifBold;
+rs_font gFontSerifItalic;
+rs_font gFontSerifBoldItalic;
+rs_font gFontMono;
+rs_allocation gTextAlloc;
+
+int gDisplayMode;
+
+rs_sampler gLinearClamp;
+rs_sampler gLinearWrap;
+rs_sampler gMipLinearWrap;
+rs_sampler gMipLinearAniso8;
+rs_sampler gMipLinearAniso15;
+rs_sampler gNearestClamp;
+
+rs_program_raster gCullBack;
+rs_program_raster gCullFront;
+rs_program_raster gCullNone;
+
+// Custom vertex shader compunents
+VertexShaderConstants *gVSConstants;
+FragentShaderConstants *gFSConstants;
+// Export these out to easily set the inputs to shader
+VertexShaderInputs *gVSInputs;
+// Custom shaders we use for lighting
+rs_program_vertex gProgVertexCustom;
+rs_program_fragment gProgFragmentCustom;
+rs_program_fragment gProgFragmentMultitex;
+
+float gDt = 0;
+
+void init() {
+}
+
+void displayFontSamples() {
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    int yPos = 100;
+    rsgBindFont(gFontSans);
+    rsgDrawText("Sans font sample", 30, yPos);
+    yPos += 30;
+    rsgFontColor(0.5f, 0.9f, 0.5f, 1.0f);
+    rsgBindFont(gFontSerif);
+    rsgDrawText("Serif font sample", 30, yPos);
+    yPos += 30;
+    rsgFontColor(0.7f, 0.7f, 0.7f, 1.0f);
+    rsgBindFont(gFontSerifBold);
+    rsgDrawText("Serif Bold font sample", 30, yPos);
+    yPos += 30;
+    rsgFontColor(0.5f, 0.5f, 0.9f, 1.0f);
+    rsgBindFont(gFontSerifItalic);
+    rsgDrawText("Serif Italic font sample", 30, yPos);
+    yPos += 30;
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontSerifBoldItalic);
+    rsgDrawText("Serif Bold Italic font sample", 30, yPos);
+    yPos += 30;
+    rsgBindFont(gFontMono);
+    rsgDrawText("Monospace font sample", 30, yPos);
+    yPos += 50;
+
+    // Now use text metrics to center the text
+    uint width = rsgGetWidth();
+    uint height = rsgGetHeight();
+    int left = 0, right = 0, top = 0, bottom = 0;
+
+    rsgFontColor(0.9f, 0.9f, 0.95f, 1.0f);
+    rsgBindFont(gFontSerifBoldItalic);
+
+    rsgMeasureText(gTextAlloc, &left, &right, &top, &bottom);
+    int centeredPos = width / 2 - (right - left) / 2;
+    rsgDrawText(gTextAlloc, centeredPos, yPos);
+    yPos += 30;
+
+    const char* text = "Centered Text Sample";
+    rsgMeasureText(text, &left, &right, &top, &bottom);
+    centeredPos = width / 2 - (right - left) / 2;
+    rsgDrawText(text, centeredPos, yPos);
+    yPos += 30;
+
+    rsgBindFont(gFontSans);
+    text = "More Centered Text Samples";
+    rsgMeasureText(text, &left, &right, &top, &bottom);
+    centeredPos = width / 2 - (right - left) / 2;
+    rsgDrawText(text, centeredPos, yPos);
+    yPos += 30;
+
+    // Now draw bottom and top right aligned text
+    text = "Top-right aligned text";
+    rsgMeasureText(text, &left, &right, &top, &bottom);
+    rsgDrawText(text, width - right, top);
+
+    text = "Top-left";
+    rsgMeasureText(text, &left, &right, &top, &bottom);
+    rsgDrawText(text, -left, top);
+
+    text = "Bottom-right aligned text";
+    rsgMeasureText(text, &left, &right, &top, &bottom);
+    rsgDrawText(text, width - right, height + bottom);
+
+}
+
+void bindProgramVertexOrtho() {
+    // Default vertex sahder
+    rsgBindProgramVertex(gProgVertex);
+    // Setup the projectioni matrix
+    rs_matrix4x4 proj;
+    rsMatrixLoadOrtho(&proj, 0, rsgGetWidth(), rsgGetHeight(), 0, -500, 500);
+    rsgProgramVertexLoadProjectionMatrix(&proj);
+}
+
+void displayShaderSamples() {
+    bindProgramVertexOrtho();
+    rs_matrix4x4 matrix;
+    rsMatrixLoadIdentity(&matrix);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNone);
+    rsgBindProgramFragment(gProgFragmentTexture);
+    rsgBindSampler(gProgFragmentTexture, 0, gLinearClamp);
+    rsgBindTexture(gProgFragmentTexture, 0, gTexOpaque);
+
+    float startX = 0, startY = 0;
+    float width = 256, height = 256;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1,
+                         startX + width, startY + height, 0, 1, 1,
+                         startX + width, startY, 0, 1, 0);
+
+    startX = 200; startY = 0;
+    width = 128; height = 128;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1,
+                         startX + width, startY + height, 0, 1, 1,
+                         startX + width, startY, 0, 1, 0);
+
+    rsgBindProgramStore(gProgStoreBlendAlpha);
+    rsgBindTexture(gProgFragmentTexture, 0, gTexTransparent);
+    startX = 0; startY = 200;
+    width = 128; height = 128;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1,
+                         startX + width, startY + height, 0, 1, 1,
+                         startX + width, startY, 0, 1, 0);
+
+    // Fragment program with simple color
+    rsgBindProgramFragment(gProgFragmentColor);
+    rsgProgramFragmentConstantColor(gProgFragmentColor, 0.9, 0.3, 0.3, 1);
+    rsgDrawRect(200, 300, 350, 450, 0);
+    rsgProgramFragmentConstantColor(gProgFragmentColor, 0.3, 0.9, 0.3, 1);
+    rsgDrawRect(50, 400, 400, 600, 0);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("Texture shader", 10, 50);
+    rsgDrawText("Alpha-blended texture shader", 10, 280);
+    rsgDrawText("Flat color shader", 100, 450);
+}
+
+void displayBlendingSamples() {
+    int i;
+
+    bindProgramVertexOrtho();
+    rs_matrix4x4 matrix;
+    rsMatrixLoadIdentity(&matrix);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+
+    rsgBindProgramFragment(gProgFragmentColor);
+
+    rsgBindProgramStore(gProgStoreBlendNone);
+    for(i = 0; i < 3; i ++) {
+        float iPlusOne = (float)(i + 1);
+        rsgProgramFragmentConstantColor(gProgFragmentColor,
+                                        0.1f*iPlusOne, 0.2f*iPlusOne, 0.3f*iPlusOne, 1);
+        float yPos = 150 * (float)i;
+        rsgDrawRect(0, yPos, 200, yPos + 200, 0);
+    }
+
+    rsgBindProgramStore(gProgStoreBlendAlpha);
+    for(i = 0; i < 3; i ++) {
+        float iPlusOne = (float)(i + 1);
+        rsgProgramFragmentConstantColor(gProgFragmentColor,
+                                        0.2f*iPlusOne, 0.3f*iPlusOne, 0.1f*iPlusOne, 0.5);
+        float yPos = 150 * (float)i;
+        rsgDrawRect(150, yPos, 350, yPos + 200, 0);
+    }
+
+    rsgBindProgramStore(gProgStoreBlendAdd);
+    for(i = 0; i < 3; i ++) {
+        float iPlusOne = (float)(i + 1);
+        rsgProgramFragmentConstantColor(gProgFragmentColor,
+                                        0.3f*iPlusOne, 0.1f*iPlusOne, 0.2f*iPlusOne, 0.5);
+        float yPos = 150 * (float)i;
+        rsgDrawRect(300, yPos, 500, yPos + 200, 0);
+    }
+
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("No Blending", 10, 50);
+    rsgDrawText("Alpha Blending", 160, 150);
+    rsgDrawText("Additive Blending", 320, 250);
+
+}
+
+void displayMeshSamples() {
+
+    bindProgramVertexOrtho();
+    rs_matrix4x4 matrix;
+    rsMatrixLoadTranslate(&matrix, 128, 128, 0);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNone);
+    rsgBindProgramFragment(gProgFragmentTexture);
+    rsgBindSampler(gProgFragmentTexture, 0, gLinearClamp);
+    rsgBindTexture(gProgFragmentTexture, 0, gTexOpaque);
+
+    rsgDrawMesh(gMbyNMesh);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("User gen 10 by 10 grid mesh", 10, 250);
+}
+
+void displayTextureSamplers() {
+
+    bindProgramVertexOrtho();
+    rs_matrix4x4 matrix;
+    rsMatrixLoadIdentity(&matrix);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNone);
+    rsgBindProgramFragment(gProgFragmentTexture);
+    rsgBindTexture(gProgFragmentTexture, 0, gTexOpaque);
+
+    // Linear clamp
+    rsgBindSampler(gProgFragmentTexture, 0, gLinearClamp);
+    float startX = 0, startY = 0;
+    float width = 300, height = 300;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1.1,
+                         startX + width, startY + height, 0, 1.1, 1.1,
+                         startX + width, startY, 0, 1.1, 0);
+
+    // Linear Wrap
+    rsgBindSampler(gProgFragmentTexture, 0, gLinearWrap);
+    startX = 0; startY = 300;
+    width = 300; height = 300;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1.1,
+                         startX + width, startY + height, 0, 1.1, 1.1,
+                         startX + width, startY, 0, 1.1, 0);
+
+    // Nearest
+    rsgBindSampler(gProgFragmentTexture, 0, gNearestClamp);
+    startX = 300; startY = 0;
+    width = 300; height = 300;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1.1,
+                         startX + width, startY + height, 0, 1.1, 1.1,
+                         startX + width, startY, 0, 1.1, 0);
+
+    rsgBindSampler(gProgFragmentTexture, 0, gMipLinearWrap);
+    startX = 300; startY = 300;
+    width = 300; height = 300;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1.5,
+                         startX + width, startY + height, 0, 1.5, 1.5,
+                         startX + width, startY, 0, 1.5, 0);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("Filtering: linear clamp", 10, 290);
+    rsgDrawText("Filtering: linear wrap", 10, 590);
+    rsgDrawText("Filtering: nearest clamp", 310, 290);
+    rsgDrawText("Filtering: miplinear wrap", 310, 590);
+}
+
+float gTorusRotation = 0;
+
+void displayCullingSamples() {
+    rsgBindProgramVertex(gProgVertex);
+    // Setup the projectioni matrix with 60 degree field of view
+    rs_matrix4x4 proj;
+    float aspect = (float)rsgGetWidth() / (float)rsgGetHeight();
+    rsMatrixLoadPerspective(&proj, 30.0f, aspect, 0.1f, 100.0f);
+    rsgProgramVertexLoadProjectionMatrix(&proj);
+
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNoneDepth);
+    rsgBindProgramFragment(gProgFragmentTexture);
+    rsgBindSampler(gProgFragmentTexture, 0, gLinearClamp);
+    rsgBindTexture(gProgFragmentTexture, 0, gTexTorus);
+
+    // Aplly a rotation to our mesh
+    gTorusRotation += 50.0f * gDt;
+    if(gTorusRotation > 360.0f) {
+        gTorusRotation -= 360.0f;
+    }
+
+    rs_matrix4x4 matrix;
+    // Position our model on the screen
+    rsMatrixLoadTranslate(&matrix, -2.0f, 0.0f, -10.0f);
+    rsMatrixRotate(&matrix, gTorusRotation, 1.0f, 0.0f, 0.0f);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+    // Use front face culling
+    rsgBindProgramRaster(gCullFront);
+    rsgDrawMesh(gTorusMesh);
+
+    rsMatrixLoadTranslate(&matrix, 2.0f, 0.0f, -10.0f);
+    rsMatrixRotate(&matrix, gTorusRotation, 1.0f, 0.0f, 0.0f);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+    // Use back face culling
+    rsgBindProgramRaster(gCullBack);
+    rsgDrawMesh(gTorusMesh);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("Displaying mesh front/back face culling", 10, rsgGetHeight() - 10);
+}
+
+float gLight0Rotation = 0;
+float gLight1Rotation = 0;
+
+void setupCustomShaderLights() {
+    float4 light0Pos = {-5.0f, 5.0f, -10.0f, 1.0f};
+    float4 light1Pos = {2.0f, 5.0f, 15.0f, 1.0f};
+    float3 light0DiffCol = {0.9f, 0.7f, 0.7f};
+    float3 light0SpecCol = {0.9f, 0.6f, 0.6f};
+    float3 light1DiffCol = {0.5f, 0.5f, 0.9f};
+    float3 light1SpecCol = {0.5f, 0.5f, 0.9f};
+
+    gLight0Rotation += 50.0f * gDt;
+    if(gLight0Rotation > 360.0f) {
+        gLight0Rotation -= 360.0f;
+    }
+    gLight1Rotation -= 50.0f * gDt;
+    if(gLight1Rotation > 360.0f) {
+        gLight1Rotation -= 360.0f;
+    }
+
+    rs_matrix4x4 l0Mat;
+    rsMatrixLoadRotate(&l0Mat, gLight0Rotation, 1.0f, 0.0f, 0.0f);
+    light0Pos = rsMatrixMultiply(&l0Mat, light0Pos);
+    rs_matrix4x4 l1Mat;
+    rsMatrixLoadRotate(&l1Mat, gLight1Rotation, 0.0f, 0.0f, 1.0f);
+    light1Pos = rsMatrixMultiply(&l1Mat, light1Pos);
+
+    // Set light 0 properties
+    gVSConstants->light0_Posision.x = light0Pos.x;
+    gVSConstants->light0_Posision.y = light0Pos.y;
+    gVSConstants->light0_Posision.z = light0Pos.z;
+    gVSConstants->light0_Diffuse = 1.0f;
+    gVSConstants->light0_Specular = 0.5f;
+    gVSConstants->light0_CosinePower = 40.0f;
+    // Set light 1 properties
+    gVSConstants->light1_Posision.x = light1Pos.x;
+    gVSConstants->light1_Posision.y = light1Pos.y;
+    gVSConstants->light1_Posision.z = light1Pos.z;
+    gVSConstants->light1_Diffuse = 1.0f;
+    gVSConstants->light1_Specular = 0.7f;
+    gVSConstants->light1_CosinePower = 50.0f;
+    rsAllocationMarkDirty(rsGetAllocation(gVSConstants));
+
+    // Update fragmetn shader constants
+    // Set light 0 colors
+    gFSConstants->light0_DiffuseColor = light0DiffCol;
+    gFSConstants->light0_SpecularColor = light0SpecCol;
+    // Set light 1 colors
+    gFSConstants->light1_DiffuseColor = light1DiffCol;
+    gFSConstants->light1_SpecularColor = light1SpecCol;
+    rsAllocationMarkDirty(rsGetAllocation(gFSConstants));
+}
+
+void displayCustomShaderSamples() {
+
+    // Update vertex shader constants
+    // Load model matrix
+    // Aplly a rotation to our mesh
+    gTorusRotation += 50.0f * gDt;
+    if(gTorusRotation > 360.0f) {
+        gTorusRotation -= 360.0f;
+    }
+
+    // Position our model on the screen
+    rsMatrixLoadTranslate(&gVSConstants->model, 0.0f, 0.0f, -10.0f);
+    rsMatrixRotate(&gVSConstants->model, gTorusRotation, 1.0f, 0.0f, 0.0f);
+    rsMatrixRotate(&gVSConstants->model, gTorusRotation, 0.0f, 0.0f, 1.0f);
+    // Setup the projectioni matrix
+    float aspect = (float)rsgGetWidth() / (float)rsgGetHeight();
+    rsMatrixLoadPerspective(&gVSConstants->proj, 30.0f, aspect, 0.1f, 100.0f);
+    setupCustomShaderLights();
+
+    rsgBindProgramVertex(gProgVertexCustom);
+
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNoneDepth);
+    rsgBindProgramFragment(gProgFragmentCustom);
+    rsgBindSampler(gProgFragmentCustom, 0, gLinearClamp);
+    rsgBindTexture(gProgFragmentCustom, 0, gTexTorus);
+
+    // Use back face culling
+    rsgBindProgramRaster(gCullBack);
+    rsgDrawMesh(gTorusMesh);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("Custom shader sample", 10, rsgGetHeight() - 10);
+}
+
+void displayMultitextureSample() {
+    bindProgramVertexOrtho();
+    rs_matrix4x4 matrix;
+    rsMatrixLoadIdentity(&matrix);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNone);
+    rsgBindProgramFragment(gProgFragmentMultitex);
+    rsgBindSampler(gProgFragmentMultitex, 0, gLinearClamp);
+    rsgBindSampler(gProgFragmentMultitex, 1, gLinearWrap);
+    rsgBindSampler(gProgFragmentMultitex, 2, gLinearClamp);
+    rsgBindTexture(gProgFragmentMultitex, 0, gTexChecker);
+    rsgBindTexture(gProgFragmentMultitex, 1, gTexTorus);
+    rsgBindTexture(gProgFragmentMultitex, 2, gTexTransparent);
+
+    float startX = 0, startY = 0;
+    float width = 256, height = 256;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 1,
+                         startX + width, startY + height, 0, 1, 1,
+                         startX + width, startY, 0, 1, 0);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("Custom shader with multitexturing", 10, 280);
+}
+
+float gAnisoTime = 0.0f;
+uint anisoMode = 0;
+void displayAnisoSample() {
+
+    gAnisoTime += gDt;
+
+    rsgBindProgramVertex(gProgVertex);
+    float aspect = (float)rsgGetWidth() / (float)rsgGetHeight();
+    rs_matrix4x4 proj;
+    rsMatrixLoadPerspective(&proj, 30.0f, aspect, 0.1f, 100.0f);
+    rsgProgramVertexLoadProjectionMatrix(&proj);
+
+    rs_matrix4x4 matrix;
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNone);
+    rsgBindProgramFragment(gProgFragmentTexture);
+    rsMatrixLoadTranslate(&matrix, 0.0f, 0.0f, -10.0f);
+    rsMatrixRotate(&matrix, -80, 1.0f, 0.0f, 0.0f);
+    rsgProgramVertexLoadModelMatrix(&matrix);
+
+    rsgBindProgramRaster(gCullNone);
+
+    rsgBindTexture(gProgFragmentTexture, 0, gTexChecker);
+
+    if(gAnisoTime >= 5.0f) {
+        gAnisoTime = 0.0f;
+        anisoMode ++;
+        anisoMode = anisoMode % 3;
+    }
+
+    if(anisoMode == 0) {
+        rsgBindSampler(gProgFragmentTexture, 0, gMipLinearAniso8);
+    }
+    else if(anisoMode == 1) {
+        rsgBindSampler(gProgFragmentTexture, 0, gMipLinearAniso15);
+    }
+    else {
+        rsgBindSampler(gProgFragmentTexture, 0, gMipLinearWrap);
+    }
+
+    float startX = -15;
+    float startY = -15;
+    float width = 30;
+    float height = 30;
+    rsgDrawQuadTexCoords(startX, startY, 0, 0, 0,
+                         startX, startY + height, 0, 0, 10,
+                         startX + width, startY + height, 0, 10, 10,
+                         startX + width, startY, 0, 10, 0);
+
+    rsgBindProgramRaster(gCullBack);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    if(anisoMode == 0) {
+        rsgDrawText("Anisotropic filtering 8", 10, 40);
+    }
+    else if(anisoMode == 1) {
+        rsgDrawText("Anisotropic filtering 15", 10, 40);
+    }
+    else {
+        rsgDrawText("Miplinear filtering", 10, 40);
+    }
+}
+
+int root(int launchID) {
+
+    gDt = rsGetDt();
+
+    rsgClearColor(0.2f, 0.2f, 0.2f, 0.0f);
+    rsgClearDepth(1.0f);
+
+    switch(gDisplayMode) {
+    case 0:
+        displayFontSamples();
+        break;
+    case 1:
+        displayShaderSamples();
+        break;
+    case 2:
+        displayBlendingSamples();
+        break;
+    case 3:
+        displayMeshSamples();
+        break;
+    case 4:
+        displayTextureSamplers();
+        break;
+    case 5:
+        displayCullingSamples();
+        break;
+    case 6:
+        displayCustomShaderSamples();
+        break;
+    case 7:
+        displayMultitextureSample();
+        break;
+    case 8:
+        displayAnisoSample();
+        break;
+    }
+
+    return 10;
+}
diff --git a/java/Samples/src/com/android/samples/shader_def.rsh b/java/Samples/src/com/android/samples/shader_def.rsh
new file mode 100644
index 0000000..e3f6206
--- /dev/null
+++ b/java/Samples/src/com/android/samples/shader_def.rsh
@@ -0,0 +1,47 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.samples)
+
+typedef struct VertexShaderConstants_s {
+    rs_matrix4x4 model;
+    rs_matrix4x4 proj;
+    float3 light0_Posision;
+    float light0_Diffuse;
+    float light0_Specular;
+    float light0_CosinePower;
+
+    float3 light1_Posision;
+    float light1_Diffuse;
+    float light1_Specular;
+    float light1_CosinePower;
+} VertexShaderConstants;
+
+typedef struct FragentShaderConstants_s {
+    float3 light0_DiffuseColor;
+    float3 light0_SpecularColor;
+
+    float3 light1_DiffuseColor;
+    float3 light1_SpecularColor;
+
+} FragentShaderConstants;
+
+typedef struct VertexShaderInputs_s {
+    float4 position;
+    float3 normal;
+    float2 texture0;
+} VertexShaderInputs;
+
diff --git a/java/Film/Android.mk b/java/tests/Android.mk
similarity index 81%
copy from java/Film/Android.mk
copy to java/tests/Android.mk
index 9e6ed7e..6c992d5 100644
--- a/java/Film/Android.mk
+++ b/java/tests/Android.mk
@@ -14,14 +14,17 @@
 # limitations under the License.
 #
 
+ifneq ($(TARGET_SIMULATOR),true)
+
 LOCAL_PATH := $(call my-dir)
 include $(CLEAR_VARS)
 
 LOCAL_MODULE_TAGS := optional
 
-LOCAL_SRC_FILES := $(call all-java-files-under, src)
-#LOCAL_STATIC_JAVA_LIBRARIES := android.renderscript
+LOCAL_SRC_FILES := $(call all-java-files-under, src) $(call all-renderscript-files-under, src)
 
-LOCAL_PACKAGE_NAME := Film
+LOCAL_PACKAGE_NAME := RSTest
 
 include $(BUILD_PACKAGE)
+
+endif
diff --git a/java/Film/AndroidManifest.xml b/java/tests/AndroidManifest.xml
similarity index 60%
rename from java/Film/AndroidManifest.xml
rename to java/tests/AndroidManifest.xml
index a5ce8a1..b660398 100644
--- a/java/Film/AndroidManifest.xml
+++ b/java/tests/AndroidManifest.xml
@@ -1,10 +1,11 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
-    package="com.android.film">
-    <application android:label="Film">
-        <activity android:name="Film"
-                  android:screenOrientation="portrait"
-                  android:theme="@android:style/Theme.Black.NoTitleBar">
+    package="com.android.rs.test">
+    <application 
+        android:label="_RS_Test"
+        android:icon="@drawable/test_pattern">
+        <activity android:name="RSTest"
+                  android:screenOrientation="portrait">
             <intent-filter>
                 <action android:name="android.intent.action.MAIN" />
                 <category android:name="android.intent.category.LAUNCHER" />
diff --git a/java/tests/res/drawable/test_pattern.png b/java/tests/res/drawable/test_pattern.png
new file mode 100644
index 0000000..e7d1455
--- /dev/null
+++ b/java/tests/res/drawable/test_pattern.png
Binary files differ
diff --git a/java/Film/src/com/android/film/Film.java b/java/tests/src/com/android/rs/test/RSTest.java
similarity index 86%
rename from java/Film/src/com/android/film/Film.java
rename to java/tests/src/com/android/rs/test/RSTest.java
index 6e99816..c264649 100644
--- a/java/Film/src/com/android/film/Film.java
+++ b/java/tests/src/com/android/rs/test/RSTest.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.rs.test;
 
 import android.renderscript.RSSurfaceView;
 import android.renderscript.RenderScript;
@@ -37,33 +37,31 @@
 
 import java.lang.Runtime;
 
-public class Film extends Activity {
+public class RSTest extends Activity {
     //EventListener mListener = new EventListener();
 
     private static final String LOG_TAG = "libRS_jni";
     private static final boolean DEBUG  = false;
     private static final boolean LOG_ENABLED = DEBUG ? Config.LOGD : Config.LOGV;
 
-    private FilmView mView;
+    private RSTestView mView;
 
     // get the current looper (from your Activity UI thread for instance
 
-
-
     @Override
     public void onCreate(Bundle icicle) {
         super.onCreate(icicle);
 
         // Create our Preview view and set it as the content of our
         // Activity
-        mView = new FilmView(this);
+        mView = new RSTestView(this);
         setContentView(mView);
     }
 
     @Override
     protected void onResume() {
         // Ideally a game should implement onResume() and onPause()
-        // to take appropriate action when the activity looses focus
+        // to take appropriate action when the activity loses focus
         super.onResume();
         mView.onResume();
     }
@@ -71,14 +69,11 @@
     @Override
     protected void onPause() {
         // Ideally a game should implement onResume() and onPause()
-        // to take appropriate action when the activity looses focus
+        // to take appropriate action when the activity loses focus
         super.onPause();
         mView.onPause();
-
-        Runtime.getRuntime().exit(0);
     }
 
-
     static void log(String message) {
         if (LOG_ENABLED) {
             Log.v(LOG_TAG, message);
@@ -87,4 +82,3 @@
 
 
 }
-
diff --git a/java/tests/src/com/android/rs/test/RSTestCore.java b/java/tests/src/com/android/rs/test/RSTestCore.java
new file mode 100644
index 0000000..789fa4d
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/RSTestCore.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+import android.util.Log;
+import java.util.ArrayList;
+import java.util.ListIterator;
+import java.util.Timer;
+import java.util.TimerTask;
+
+
+public class RSTestCore {
+    int mWidth;
+    int mHeight;
+
+    public RSTestCore() {
+    }
+
+    private Resources mRes;
+    private RenderScriptGL mRS;
+
+    private Font mFont;
+    ScriptField_ListAllocs_s mListAllocs;
+    int mLastX;
+    int mLastY;
+    private ScriptC_rslist mScript;
+
+    private ArrayList<UnitTest> unitTests;
+    private ListIterator<UnitTest> test_iter;
+    private UnitTest activeTest;
+    private boolean stopTesting;
+
+    /* Periodic timer for ensuring future tests get scheduled */
+    private Timer mTimer;
+    public static final int RS_TIMER_PERIOD = 100;
+
+    public void init(RenderScriptGL rs, Resources res, int width, int height) {
+        mRS = rs;
+        mRes = res;
+        mWidth = width;
+        mHeight = height;
+        stopTesting = false;
+
+        mScript = new ScriptC_rslist(mRS, mRes, R.raw.rslist, true);
+
+        unitTests = new ArrayList<UnitTest>();
+
+        unitTests.add(new UT_primitives(this, mRes));
+        unitTests.add(new UT_rsdebug(this, mRes));
+        unitTests.add(new UT_fp_mad(this, mRes));
+        /*
+        unitTests.add(new UnitTest(null, "<Pass>", 1));
+        unitTests.add(new UnitTest());
+        unitTests.add(new UnitTest(null, "<Fail>", -1));
+
+        for (int i = 0; i < 20; i++) {
+            unitTests.add(new UnitTest(null, "<Pass>", 1));
+        }
+        */
+
+        UnitTest [] uta = new UnitTest[unitTests.size()];
+        uta = unitTests.toArray(uta);
+
+        mListAllocs = new ScriptField_ListAllocs_s(mRS, uta.length);
+        for (int i = 0; i < uta.length; i++) {
+            ScriptField_ListAllocs_s.Item listElem = new ScriptField_ListAllocs_s.Item();
+            listElem.text = Allocation.createFromString(mRS, uta[i].name);
+            listElem.result = uta[i].result;
+            mListAllocs.set(listElem, i, false);
+            uta[i].setItem(listElem);
+        }
+
+        mListAllocs.copyAll();
+
+        mScript.bind_gList(mListAllocs);
+
+        mFont = Font.createFromFamily(mRS, mRes, "serif", Font.Style.BOLD, 8);
+        mScript.set_gFont(mFont);
+
+        mRS.contextBindRootScript(mScript);
+
+        test_iter = unitTests.listIterator();
+        refreshTestResults(); /* Kick off the first test */
+
+        TimerTask pTask = new TimerTask() {
+            public void run() {
+                refreshTestResults();
+            }
+        };
+
+        mTimer = new Timer();
+        mTimer.schedule(pTask, RS_TIMER_PERIOD, RS_TIMER_PERIOD);
+    }
+
+    public void checkAndRunNextTest() {
+        if (activeTest != null) {
+            if (!activeTest.isAlive()) {
+                /* Properly clean up on our last test */
+                try {
+                    activeTest.join();
+                }
+                catch (InterruptedException e) {
+                }
+                activeTest = null;
+            }
+        }
+
+        if (!stopTesting && activeTest == null) {
+            if (test_iter.hasNext()) {
+                activeTest = test_iter.next();
+                activeTest.start();
+                /* This routine will only get called once when a new test
+                 * should start running. The message handler in UnitTest.java
+                 * ensures this. */
+            }
+            else {
+                if (mTimer != null) {
+                    mTimer.cancel();
+                    mTimer.purge();
+                    mTimer = null;
+                }
+            }
+        }
+    }
+
+    public void refreshTestResults() {
+        checkAndRunNextTest();
+
+        if (mListAllocs != null && mScript != null && mRS != null) {
+            mListAllocs.copyAll();
+
+            mScript.bind_gList(mListAllocs);
+            mRS.contextBindRootScript(mScript);
+        }
+    }
+
+    public void cleanup() {
+        stopTesting = true;
+        UnitTest t = activeTest;
+
+        /* Stop periodic refresh of testing */
+        if (mTimer != null) {
+            mTimer.cancel();
+            mTimer.purge();
+            mTimer = null;
+        }
+
+        /* Wait to exit until we finish the current test */
+        if (t != null) {
+            try {
+                t.join();
+            }
+            catch (InterruptedException e) {
+            }
+            t = null;
+        }
+
+    }
+
+    public void newTouchPosition(float x, float y, float pressure, int id) {
+    }
+
+    public void onActionDown(int x, int y) {
+        mScript.set_gDY(0.0f);
+        mLastX = x;
+        mLastY = y;
+        refreshTestResults();
+    }
+
+    public void onActionMove(int x, int y) {
+        int dx = mLastX - x;
+        int dy = mLastY - y;
+
+        if (Math.abs(dy) <= 2) {
+            dy = 0;
+        }
+
+        mScript.set_gDY(dy);
+
+        mLastX = x;
+        mLastY = y;
+        refreshTestResults();
+    }
+}
diff --git a/java/Film/src/com/android/film/FilmView.java b/java/tests/src/com/android/rs/test/RSTestView.java
similarity index 76%
copy from java/Film/src/com/android/film/FilmView.java
copy to java/tests/src/com/android/rs/test/RSTestView.java
index 5bc2811..c65f8c6 100644
--- a/java/Film/src/com/android/film/FilmView.java
+++ b/java/tests/src/com/android/rs/test/RSTestView.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.android.film;
+package com.android.rs.test;
 
 import java.io.Writer;
 import java.util.ArrayList;
@@ -39,23 +39,23 @@
 import android.view.KeyEvent;
 import android.view.MotionEvent;
 
-public class FilmView extends RSSurfaceView {
+public class RSTestView extends RSSurfaceView {
 
-    public FilmView(Context context) {
+    public RSTestView(Context context) {
         super(context);
         //setFocusable(true);
     }
 
     private RenderScriptGL mRS;
-    private FilmRS mRender;
-
+    private RSTestCore mRender;
 
     public void surfaceChanged(SurfaceHolder holder, int format, int w, int h) {
         super.surfaceChanged(holder, format, w, h);
         if (mRS == null) {
-            mRS = createRenderScript(true);
+            RenderScriptGL.SurfaceConfig sc = new RenderScriptGL.SurfaceConfig();
+            mRS = createRenderScript(sc);
             mRS.contextSetSurface(w, h, holder.getSurface());
-            mRender = new FilmRS();
+            mRender = new RSTestCore();
             mRender.init(mRS, getResources(), w, h);
         }
     }
@@ -63,6 +63,7 @@
     @Override
     protected void onDetachedFromWindow() {
         if(mRS != null) {
+            mRender.cleanup();
             mRS = null;
             destroyRenderScript();
         }
@@ -71,21 +72,23 @@
     @Override
     public boolean onKeyDown(int keyCode, KeyEvent event)
     {
-        // break point at here
-        // this method doesn't work when 'extends View' include 'extends ScrollView'.
         return super.onKeyDown(keyCode, event);
     }
 
-
     @Override
     public boolean onTouchEvent(MotionEvent ev)
     {
-        boolean ret = true;
+        boolean ret = false;
         int act = ev.getAction();
-        if (act == ev.ACTION_UP) {
-            ret = false;
+        if (act == ev.ACTION_DOWN) {
+            mRender.onActionDown((int)ev.getX(), (int)ev.getY());
+            ret = true;
         }
-        mRender.setFilmStripPosition((int)ev.getX(), (int)ev.getY() / 5);
+        else if (act == ev.ACTION_MOVE) {
+            mRender.onActionMove((int)ev.getX(), (int)ev.getY());
+            ret = true;
+        }
+
         return ret;
     }
 }
diff --git a/java/tests/src/com/android/rs/test/UT_fp_mad.java b/java/tests/src/com/android/rs/test/UT_fp_mad.java
new file mode 100644
index 0000000..9d57e90
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/UT_fp_mad.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+
+public class UT_fp_mad extends UnitTest {
+    private Resources mRes;
+
+    protected UT_fp_mad(RSTestCore rstc, Resources res) {
+        super(rstc, "Fp_Mad");
+        mRes = res;
+    }
+
+    public void run() {
+        RenderScript pRS = RenderScript.create();
+        ScriptC_fp_mad s = new ScriptC_fp_mad(pRS, mRes, R.raw.fp_mad, true);
+        pRS.mMessageCallback = mRsMessage;
+        s.invoke_fp_mad_test(0, 0);
+        pRS.finish();
+        waitForMessage();
+        pRS.destroy();
+    }
+}
+
diff --git a/java/tests/src/com/android/rs/test/UT_primitives.java b/java/tests/src/com/android/rs/test/UT_primitives.java
new file mode 100644
index 0000000..da995da
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/UT_primitives.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+
+public class UT_primitives extends UnitTest {
+    private Resources mRes;
+
+    protected UT_primitives(RSTestCore rstc, Resources res) {
+        super(rstc, "Primitives");
+        mRes = res;
+    }
+
+    private boolean initializeGlobals(ScriptC_primitives s) {
+        float pF = s.get_floatTest();
+        if (pF != 1.99f) {
+            return false;
+        }
+        s.set_floatTest(2.99f);
+
+        double pD = s.get_doubleTest();
+        if (pD != 2.05) {
+            return false;
+        }
+        s.set_doubleTest(3.05);
+
+        byte pC = s.get_charTest();
+        if (pC != -8) {
+            return false;
+        }
+        s.set_charTest((byte)-16);
+
+        short pS = s.get_shortTest();
+        if (pS != -16) {
+            return false;
+        }
+        s.set_shortTest((short)-32);
+
+        int pI = s.get_intTest();
+        if (pI != -32) {
+            return false;
+        }
+        s.set_intTest(-64);
+
+        long pL = s.get_longTest();
+        if (pL != 17179869184l) {
+            return false;
+        }
+        s.set_longTest(17179869185l);
+
+        long puL = s.get_ulongTest();
+        if (puL != 4611686018427387904L) {
+            return false;
+        }
+        s.set_ulongTest(4611686018427387903L);
+
+
+        long pLL = s.get_longlongTest();
+        if (pLL != 68719476736L) {
+            return false;
+        }
+        s.set_longlongTest(68719476735L);
+
+        long pu64 = s.get_uint64_tTest();
+        if (pu64 != 117179869184l) {
+            return false;
+        }
+        s.set_uint64_tTest(117179869185l);
+
+        return true;
+    }
+
+    public void run() {
+        RenderScript pRS = RenderScript.create();
+        ScriptC_primitives s = new ScriptC_primitives(pRS, mRes, R.raw.primitives, true);
+        pRS.mMessageCallback = mRsMessage;
+        if (!initializeGlobals(s)) {
+            // initializeGlobals failed
+            result = -1;
+        } else {
+            s.invoke_primitives_test(0, 0);
+            pRS.finish();
+            waitForMessage();
+        }
+        pRS.destroy();
+    }
+}
+
diff --git a/java/tests/src/com/android/rs/test/UT_rsdebug.java b/java/tests/src/com/android/rs/test/UT_rsdebug.java
new file mode 100644
index 0000000..c555658
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/UT_rsdebug.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+
+import android.content.res.Resources;
+import android.renderscript.*;
+
+public class UT_rsdebug extends UnitTest {
+    private Resources mRes;
+
+    protected UT_rsdebug(RSTestCore rstc, Resources res) {
+        super(rstc, "rsDebug");
+        mRes = res;
+    }
+
+    public void run() {
+        RenderScript pRS = RenderScript.create();
+        ScriptC_rsdebug s = new ScriptC_rsdebug(pRS, mRes, R.raw.rsdebug, true);
+        pRS.mMessageCallback = mRsMessage;
+        s.invoke_test_rsdebug(0, 0);
+        pRS.finish();
+        waitForMessage();
+        pRS.destroy();
+    }
+}
+
diff --git a/java/tests/src/com/android/rs/test/UnitTest.java b/java/tests/src/com/android/rs/test/UnitTest.java
new file mode 100644
index 0000000..90bb8a3
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/UnitTest.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.test;
+import android.renderscript.RenderScript.RSMessage;
+import android.util.Log;
+
+public class UnitTest extends Thread {
+    public String name;
+    public int result;
+    private ScriptField_ListAllocs_s.Item mItem;
+    private RSTestCore mRSTC;
+    private boolean msgHandled;
+
+    /* These constants must match those in shared.rsh */
+    public static final int RS_MSG_TEST_PASSED = 100;
+    public static final int RS_MSG_TEST_FAILED = 101;
+
+    private static int numTests = 0;
+    public int testID;
+
+    protected UnitTest(RSTestCore rstc, String n, int initResult) {
+        super();
+        mRSTC = rstc;
+        name = n;
+        msgHandled = false;
+        result = initResult;
+        testID = numTests++;
+    }
+
+    protected UnitTest(RSTestCore rstc, String n) {
+        this(rstc, n, 0);
+    }
+
+    protected UnitTest(RSTestCore rstc) {
+        this (rstc, "<Unknown>");
+    }
+
+    protected UnitTest() {
+        this (null);
+    }
+
+    protected RSMessage mRsMessage = new RSMessage() {
+        public void run() {
+            if (result == 0) {
+                switch (mID) {
+                    case RS_MSG_TEST_PASSED:
+                        result = 1;
+                        break;
+                    case RS_MSG_TEST_FAILED:
+                        result = -1;
+                        break;
+                    default:
+                        android.util.Log.v("RenderScript", "Unit test got unexpected message");
+                        return;
+                }
+            }
+
+            if (mItem != null) {
+                mItem.result = result;
+                msgHandled = true;
+                try {
+                    mRSTC.refreshTestResults();
+                }
+                catch (IllegalStateException e) {
+                    /* Ignore the case where our message receiver has been
+                       disconnected. This happens when we leave the application
+                       before it finishes running all of the unit tests. */
+                }
+            }
+        }
+    };
+
+    public void waitForMessage() {
+        while (!msgHandled) {
+            yield();
+        }
+    }
+
+    public void setItem(ScriptField_ListAllocs_s.Item item) {
+        mItem = item;
+    }
+
+    public void run() {
+        /* This method needs to be implemented for each subclass */
+        if (mRSTC != null) {
+            mRSTC.refreshTestResults();
+        }
+    }
+}
+
diff --git a/java/tests/src/com/android/rs/test/fp_mad.rs b/java/tests/src/com/android/rs/test/fp_mad.rs
new file mode 100644
index 0000000..066fab8
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/fp_mad.rs
@@ -0,0 +1,175 @@
+#include "shared.rsh"
+
+const int TEST_COUNT = 1;
+
+#pragma rs export_func(fp_mad_test)
+
+static float data_f1[1025];
+static float4 data_f4[1025];
+
+static void test_mad4(uint32_t index) {
+    start();
+
+    float total = 0;
+    // Do ~1 billion ops
+    for (int ct=0; ct < 1000 * (1000 / 80); ct++) {
+        for (int i=0; i < (1000); i++) {
+            data_f4[i] = (data_f4[i] * 0.02f +
+                          data_f4[i+1] * 0.04f +
+                          data_f4[i+2] * 0.05f +
+                          data_f4[i+3] * 0.1f +
+                          data_f4[i+4] * 0.2f +
+                          data_f4[i+5] * 0.2f +
+                          data_f4[i+6] * 0.1f +
+                          data_f4[i+7] * 0.05f +
+                          data_f4[i+8] * 0.04f +
+                          data_f4[i+9] * 0.02f + 1.f);
+        }
+    }
+
+    float time = end(index);
+    rsDebug("fp_mad4 M ops", 1000.f / time);
+}
+
+static void test_mad(uint32_t index) {
+    start();
+
+    float total = 0;
+    // Do ~1 billion ops
+    for (int ct=0; ct < 1000 * (1000 / 20); ct++) {
+        for (int i=0; i < (1000); i++) {
+            data_f1[i] = (data_f1[i] * 0.02f +
+                          data_f1[i+1] * 0.04f +
+                          data_f1[i+2] * 0.05f +
+                          data_f1[i+3] * 0.1f +
+                          data_f1[i+4] * 0.2f +
+                          data_f1[i+5] * 0.2f +
+                          data_f1[i+6] * 0.1f +
+                          data_f1[i+7] * 0.05f +
+                          data_f1[i+8] * 0.04f +
+                          data_f1[i+9] * 0.02f + 1.f);
+        }
+    }
+
+    float time = end(index);
+    rsDebug("fp_mad M ops", 1000.f / time);
+}
+
+static void test_norm(uint32_t index) {
+    start();
+
+    float total = 0;
+    // Do ~10 M ops
+    for (int ct=0; ct < 1000 * 10; ct++) {
+        for (int i=0; i < (1000); i++) {
+            data_f4[i] = normalize(data_f4[i]);
+        }
+    }
+
+    float time = end(index);
+    rsDebug("fp_norm M ops", 10.f / time);
+}
+
+static void test_sincos4(uint32_t index) {
+    start();
+
+    float total = 0;
+    // Do ~10 M ops
+    for (int ct=0; ct < 1000 * 10 / 4; ct++) {
+        for (int i=0; i < (1000); i++) {
+            data_f4[i] = sin(data_f4[i]) * cos(data_f4[i]);
+        }
+    }
+
+    float time = end(index);
+    rsDebug("fp_sincos4 M ops", 10.f / time);
+}
+
+static void test_sincos(uint32_t index) {
+    start();
+
+    float total = 0;
+    // Do ~10 M ops
+    for (int ct=0; ct < 1000 * 10; ct++) {
+        for (int i=0; i < (1000); i++) {
+            data_f1[i] = sin(data_f1[i]) * cos(data_f1[i]);
+        }
+    }
+
+    float time = end(index);
+    rsDebug("fp_sincos M ops", 10.f / time);
+}
+
+static void test_clamp(uint32_t index) {
+    start();
+
+    // Do ~100 M ops
+    for (int ct=0; ct < 1000 * 100; ct++) {
+        for (int i=0; i < (1000); i++) {
+            data_f1[i] = clamp(data_f1[i], -1.f, 1.f);
+        }
+    }
+
+    float time = end(index);
+    rsDebug("fp_clamp M ops", 100.f / time);
+
+    start();
+    // Do ~100 M ops
+    for (ct=0; ct < 1000 * 100; ct++) {
+        for (int i=0; i < (1000); i++) {
+            if (data_f1[i] < -1.f) data_f1[i] = -1.f;
+            if (data_f1[i] > -1.f) data_f1[i] = 1.f;
+        }
+    }
+
+    time = end(index);
+    rsDebug("fp_clamp ref M ops", 100.f / time);
+}
+
+static void test_clamp4(uint32_t index) {
+    start();
+
+    float total = 0;
+    // Do ~100 M ops
+    for (int ct=0; ct < 1000 * 100 /4; ct++) {
+        for (int i=0; i < (1000); i++) {
+            data_f4[i] = clamp(data_f4[i], -1.f, 1.f);
+        }
+    }
+
+    float time = end(index);
+    rsDebug("fp_clamp4 M ops", 100.f / time);
+}
+
+void fp_mad_test(uint32_t index, int test_num) {
+    for (int x=0; x < 1025; x++) {
+        data_f1[x] = (x & 0xf) * 0.1f;
+        data_f4[x].x = (x & 0xf) * 0.1f;
+        data_f4[x].y = (x & 0xf0) * 0.1f;
+        data_f4[x].z = (x & 0x33) * 0.1f;
+        data_f4[x].w = (x & 0x77) * 0.1f;
+    }
+
+    test_mad4(index);
+    test_mad(index);
+
+    for (x=0; x < 1025; x++) {
+        data_f1[x] = (x & 0xf) * 0.1f + 1.f;
+        data_f4[x].x = (x & 0xf) * 0.1f + 1.f;
+        data_f4[x].y = (x & 0xf0) * 0.1f + 1.f;
+        data_f4[x].z = (x & 0x33) * 0.1f + 1.f;
+        data_f4[x].w = (x & 0x77) * 0.1f + 1.f;
+    }
+
+    test_norm(index);
+    test_sincos4(index);
+    test_sincos(index);
+    test_clamp4(index);
+    test_clamp(index);
+
+    // TODO Actually verify test result accuracy
+    rsDebug("fp_mad_test PASSED", 0);
+    rsSendToClientBlocking(RS_MSG_TEST_PASSED);
+}
+
+
diff --git a/java/tests/src/com/android/rs/test/primitives.rs b/java/tests/src/com/android/rs/test/primitives.rs
new file mode 100644
index 0000000..351a8a5
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/primitives.rs
@@ -0,0 +1,63 @@
+#include "shared.rsh"
+
+#pragma rs export_func(primitives_test)
+
+// Testing primitive types
+float floatTest = 1.99f;
+double doubleTest = 2.05;
+char charTest = -8;
+short shortTest = -16;
+int intTest = -32;
+long longTest = 17179869184l; // 1 << 34
+long long longlongTest = 68719476736l; // 1 << 36
+
+uchar ucharTest = 8;
+ushort ushortTest = 16;
+uint uintTest = 32;
+ulong ulongTest = 4611686018427387904L;
+int64_t int64_tTest = -17179869184l; // - 1 << 34
+uint64_t uint64_tTest = 117179869184l;
+
+static bool test_primitive_types(uint32_t index) {
+    bool failed = false;
+    start();
+
+    _RS_ASSERT(floatTest == 2.99f);
+    _RS_ASSERT(doubleTest == 3.05);
+    _RS_ASSERT(charTest == -16);
+    _RS_ASSERT(shortTest == -32);
+    _RS_ASSERT(intTest == -64);
+    _RS_ASSERT(longTest == 17179869185l);
+    _RS_ASSERT(longlongTest == 68719476735l);
+
+    _RS_ASSERT(ucharTest == 8);
+    _RS_ASSERT(ushortTest == 16);
+    _RS_ASSERT(uintTest == 32);
+    _RS_ASSERT(ulongTest == 4611686018427387903L);
+    _RS_ASSERT(int64_tTest == -17179869184l);
+    _RS_ASSERT(uint64_tTest == 117179869185l);
+
+    float time = end(index);
+
+    if (failed) {
+        rsDebug("test_primitives FAILED", time);
+    }
+    else {
+        rsDebug("test_primitives PASSED", time);
+    }
+
+    return failed;
+}
+
+void primitives_test(uint32_t index, int test_num) {
+    bool failed = false;
+    failed |= test_primitive_types(index);
+
+    if (failed) {
+        rsSendToClientBlocking(RS_MSG_TEST_FAILED);
+    }
+    else {
+        rsSendToClientBlocking(RS_MSG_TEST_PASSED);
+    }
+}
+
diff --git a/java/tests/src/com/android/rs/test/rsdebug.rs b/java/tests/src/com/android/rs/test/rsdebug.rs
new file mode 100644
index 0000000..a849234
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/rsdebug.rs
@@ -0,0 +1,58 @@
+#include "shared.rsh"
+
+#pragma rs export_func(test_rsdebug)
+
+// Testing primitive types
+float floatTest = 1.99f;
+double doubleTest = 2.05;
+char charTest = -8;
+short shortTest = -16;
+int intTest = -32;
+long longTest = 17179869184l; // 1 << 34
+long long longlongTest = 68719476736l; // 1 << 36
+
+uchar ucharTest = 8;
+ushort ushortTest = 16;
+uint uintTest = 32;
+ulong ulongTest = 4611686018427387904L;
+int64_t int64_tTest = -17179869184l; // - 1 << 34
+uint64_t uint64_tTest = 117179869184l;
+
+static bool basic_test(uint32_t index) {
+    bool failed = false;
+
+    // This test focuses primarily on compilation-time, not run-time.
+    // For this reason, none of the outputs are actually checked.
+
+    rsDebug("floatTest", floatTest);
+    rsDebug("doubleTest", doubleTest);
+    rsDebug("charTest", charTest);
+    rsDebug("shortTest", shortTest);
+    rsDebug("intTest", intTest);
+    rsDebug("longTest", longTest);
+    rsDebug("longlongTest", longlongTest);
+
+    rsDebug("ucharTest", ucharTest);
+    rsDebug("ushortTest", ushortTest);
+    rsDebug("uintTest", uintTest);
+    rsDebug("ulongTest", ulongTest);
+    rsDebug("int64_tTest", int64_tTest);
+    rsDebug("uint64_tTest", uint64_tTest);
+
+    return failed;
+}
+
+void test_rsdebug(uint32_t index, int test_num) {
+    bool failed = false;
+    failed |= basic_test(index);
+
+    if (failed) {
+        rsSendToClientBlocking(RS_MSG_TEST_FAILED);
+        rsDebug("rsdebug_test FAILED", -1);
+    }
+    else {
+        rsSendToClientBlocking(RS_MSG_TEST_PASSED);
+        rsDebug("rsdebug_test PASSED", 0);
+    }
+}
+
diff --git a/java/tests/src/com/android/rs/test/rslist.rs b/java/tests/src/com/android/rs/test/rslist.rs
new file mode 100644
index 0000000..b2d06fe
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/rslist.rs
@@ -0,0 +1,107 @@
+// Copyright (C) 2009 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.rs.test)
+
+#include "rs_graphics.rsh"
+
+float gDY;
+
+rs_font gFont;
+
+typedef struct ListAllocs_s {
+    rs_allocation text;
+    int result;
+} ListAllocs;
+
+ListAllocs *gList;
+
+void init() {
+    gDY = 0.0f;
+}
+
+int textPos = 0;
+
+int root(int launchID) {
+
+    rsgClearColor(0.0f, 0.0f, 0.0f, 0.0f);
+    rsgClearDepth(1.0f);
+
+    textPos -= (int)gDY*2;
+    gDY *= 0.95;
+
+    rsgFontColor(0.9f, 0.9f, 0.9f, 1.0f);
+    rsgBindFont(gFont);
+    color(0.2, 0.2, 0.2, 0);
+
+    rs_allocation listAlloc = rsGetAllocation(gList);
+    int allocSize = rsAllocationGetDimX(listAlloc);
+
+    int width = rsgGetWidth();
+    int height = rsgGetHeight();
+
+    int itemHeight = 80;
+    int totalItemHeight = itemHeight * allocSize;
+
+    /* Prevent scrolling above the top of the list */
+    int firstItem = height - totalItemHeight;
+    if (firstItem < 0) {
+        firstItem = 0;
+    }
+
+    /* Prevent scrolling past the last line of the list */
+    int lastItem = -1 * (totalItemHeight - height);
+    if (lastItem > 0) {
+        lastItem = 0;
+    }
+
+    if (textPos > firstItem) {
+        textPos = firstItem;
+    }
+    else if (textPos < lastItem) {
+        textPos = lastItem;
+    }
+
+    int currentYPos = itemHeight + textPos;
+
+    for(int i = 0; i < allocSize; i ++) {
+        if(currentYPos - itemHeight > height) {
+            break;
+        }
+
+        if(currentYPos > 0) {
+            switch(gList[i].result) {
+                case 1: /* Passed */
+                    rsgFontColor(0.5f, 0.9f, 0.5f, 1.0f);
+                    break;
+                case -1: /* Failed */
+                    rsgFontColor(0.9f, 0.5f, 0.5f, 1.0f);
+                    break;
+                case 0: /* Still Testing */
+                    rsgFontColor(0.9f, 0.9f, 0.5f, 1.0f);
+                    break;
+                default: /* Unknown */
+                    rsgFontColor(0.9f, 0.9f, 0.9f, 1.0f);
+                    break;
+            }
+            rsgDrawRect(0, currentYPos - 1, width, currentYPos, 0);
+            rsgDrawText(gList[i].text, 30, currentYPos - 32);
+        }
+        currentYPos += itemHeight;
+    }
+
+    return 10;
+}
diff --git a/java/tests/src/com/android/rs/test/shared.rsh b/java/tests/src/com/android/rs/test/shared.rsh
new file mode 100644
index 0000000..21be9af
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/shared.rsh
@@ -0,0 +1,38 @@
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.rs.test)
+
+typedef struct TestResult_s {
+    rs_allocation name;
+    bool pass;
+    float score;
+    int64_t time;
+} TestResult;
+//TestResult *g_results;
+
+static int64_t g_time;
+
+static void start(void) {
+    g_time = rsUptimeMillis();
+}
+
+static float end(uint32_t idx) {
+    int64_t t = rsUptimeMillis() - g_time;
+    //g_results[idx].time = t;
+    //rsDebug("test time", (int)t);
+    return ((float)t) / 1000.f;
+}
+
+#define _RS_ASSERT(b) \
+do { \
+    if (!(b)) { \
+        failed = true; \
+        rsDebug(#b " FAILED", 0); \
+    } \
+\
+} while (0)
+
+/* These constants must match those in UnitTest.java */
+static const int RS_MSG_TEST_PASSED = 100;
+static const int RS_MSG_TEST_FAILED = 101;
+
diff --git a/java/tests/src/com/android/rs/test/test_root.rs b/java/tests/src/com/android/rs/test/test_root.rs
new file mode 100644
index 0000000..6dc83ba
--- /dev/null
+++ b/java/tests/src/com/android/rs/test/test_root.rs
@@ -0,0 +1,23 @@
+// Fountain test script
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.rs.test)
+
+#pragma stateFragment(parent)
+
+#include "rs_graphics.rsh"
+
+
+typedef struct TestResult {
+    rs_allocation name;
+    bool pass;
+    float score;
+} TestResult_t;
+TestResult_t *results;
+
+int root() {
+
+    return 0;
+}
+
+
diff --git a/rs.spec b/rs.spec
index 5ae8d01..eb2942e 100644
--- a/rs.spec
+++ b/rs.spec
@@ -1,11 +1,14 @@
 
+ContextFinish {
+	handcodeApi
+	}
 
 ContextBindRootScript {
 	param RsScript sampler
 	}
 
-ContextBindProgramFragmentStore {
-	param RsProgramFragmentStore pgm
+ContextBindProgramStore {
+	param RsProgramStore pgm
 	}
 
 ContextBindProgramFragment {
@@ -20,6 +23,10 @@
 	param RsProgramRaster pgm
 	}
 
+ContextBindFont {
+	param RsFont pgm
+	}
+
 ContextPause {
 	}
 
@@ -51,8 +58,13 @@
 	param size_t len
 	}
 
-ObjDestroy {
+GetName {
 	param void *obj
+	param const char **name
+	}
+
+ObjDestroy {
+	param RsAsyncVoidPtr objPtr
 	}
 
 ElementCreate {
@@ -68,20 +80,28 @@
 	param const RsElement * elements
 	param const char ** names
 	param const size_t * nameLengths
+	param const uint32_t * arraySize
 	ret RsElement
 	}
 
-TypeBegin {
-	param RsElement type
+ElementGetNativeData {
+	param RsElement elem
+	param uint32_t *elemData
+	param uint32_t elemDataSize
 	}
 
-TypeAdd {
-	param RsDimension dim
-	param size_t value
+ElementGetSubElements {
+	param RsElement elem
+	param uint32_t *ids
+	param const char **names
+	param uint32_t dataSize
 	}
 
-TypeCreate {
-	ret RsType
+
+TypeGetNativeData {
+	param RsType type
+	param uint32_t * typeData
+	param uint32_t typeDataSize
 	}
 
 AllocationCreateTyped {
@@ -95,10 +115,16 @@
 	ret RsAllocation
 	}
 
+AllocationUpdateFromBitmap {
+	param RsAllocation alloc
+	param RsElement srcFmt
+	param const void * data
+	}
+
 AllocationCreateBitmapRef {
 	param RsType type
-	param void * bmpPtr
-	param void * callbackData
+	param RsAsyncVoidPtr bmpPtr
+	param RsAsyncVoidPtr callbackData
 	param RsBitmapCallback_t callback
 	ret RsAllocation
 	}
@@ -113,16 +139,6 @@
 	ret RsAllocation
 	}
 
-AllocationCreateFromBitmapBoxed {
-	param uint32_t width
-	param uint32_t height
-	param RsElement dstFmt
-	param RsElement srcFmt
-	param bool genMips
-	param const void * data
-	ret RsAllocation
-	}
-
 
 AllocationUploadToTexture {
 	param RsAllocation alloc
@@ -153,6 +169,16 @@
 	togglePlay
 	}
 
+Allocation1DSubElementData {
+	param RsAllocation va
+	param uint32_t x
+	param const void *data
+	param uint32_t comp_offset
+	param uint32_t bytes
+	handcodeApi
+	togglePlay
+	}
+
 Allocation2DSubData {
 	param RsAllocation va
 	param uint32_t xoff
@@ -163,6 +189,15 @@
 	param uint32_t bytes
 	}
 
+Allocation2DSubElementData {
+	param RsAllocation va
+	param uint32_t x
+	param uint32_t y
+	param const void *data
+	param uint32_t element_offset
+	param uint32_t bytes
+	}
+
 AllocationRead {
 	param RsAllocation va
 	param void * data
@@ -224,6 +259,22 @@
 	param const void *data
 	}
 
+AllocationGetType {
+	param RsAllocation va
+	ret const void*
+	}
+
+AllocationResize1D {
+	param RsAllocation va
+	param uint32_t dimX
+	}
+
+AllocationResize2D {
+	param RsAllocation va
+	param uint32_t dimX
+	param uint32_t dimY
+	}
+
 SamplerBegin {
 	}
 
@@ -232,6 +283,11 @@
 	param RsSamplerValue value
 	}
 
+SamplerSet2 {
+	param RsSamplerParam p
+	param float value
+	}
+
 SamplerCreate {
 	ret RsSampler
 	}
@@ -248,13 +304,6 @@
 ScriptCBegin {
 	}
 
-ScriptSetClearColor {
-	param RsScript s
-	param float r
-	param float g
-	param float b
-	param float a
-	}
 
 ScriptSetTimeZone {
 	param RsScript s
@@ -262,43 +311,55 @@
 	param uint32_t length
 	}
 
-ScriptSetClearDepth {
-	param RsScript s
-	param float depth
-	}
-
-ScriptSetClearStencil {
-	param RsScript s
-	param uint32_t stencil
-	}
-
-ScriptSetType {
-	param RsType type
-	param uint32_t slot
-	param bool isWritable
-	param const char * name
-	}
-
-ScriptSetInvoke {
-	param const char * name
-	param uint32_t slot
-	}
 
 ScriptInvoke {
 	param RsScript s
 	param uint32_t slot
 	}
 
-ScriptSetRoot {
-	param bool isRoot
+ScriptInvokeV {
+	param RsScript s
+	param uint32_t slot
+	param const void * data
+	param uint32_t dataLen
+	handcodeApi
+	togglePlay
 	}
 
-
-
-ScriptCSetScript {
-	param void * codePtr
+ScriptSetVarI {
+	param RsScript s
+	param uint32_t slot
+	param int value
 	}
 
+ScriptSetVarJ {
+	param RsScript s
+	param uint32_t slot
+	param int64_t value
+	}
+
+ScriptSetVarF {
+	param RsScript s
+	param uint32_t slot
+	param float value
+	}
+
+ScriptSetVarD {
+	param RsScript s
+	param uint32_t slot
+	param double value
+	}
+
+ScriptSetVarV {
+	param RsScript s
+	param uint32_t slot
+	param const void * data
+	param uint32_t dataLen
+	handcodeApi
+	togglePlay
+	}
+
+
 ScriptCSetText {
 	param const char * text
 	param uint32_t length
@@ -308,52 +369,41 @@
 	ret RsScript
 	}
 
-ScriptCSetDefineF {
-    param const char* name
-    param float value
-    }
 
-ScriptCSetDefineI32 {
-    param const char* name
-    param int32_t value
-    }
-
-ProgramFragmentStoreBegin {
+ProgramStoreBegin {
 	param RsElement in
 	param RsElement out
 	}
 
-ProgramFragmentStoreColorMask {
+ProgramStoreColorMask {
 	param bool r
 	param bool g
 	param bool b
 	param bool a
 	}
 
-ProgramFragmentStoreBlendFunc {
+ProgramStoreBlendFunc {
 	param RsBlendSrcFunc srcFunc
 	param RsBlendDstFunc destFunc
 	}
 
-ProgramFragmentStoreDepthMask {
+ProgramStoreDepthMask {
 	param bool enable
 }
 
-ProgramFragmentStoreDither {
+ProgramStoreDither {
 	param bool enable
 }
 
-ProgramFragmentStoreDepthFunc {
+ProgramStoreDepthFunc {
 	param RsDepthFunc func
 }
 
-ProgramFragmentStoreCreate {
-	ret RsProgramFragmentStore
+ProgramStoreCreate {
+	ret RsProgramStore
 	}
 
 ProgramRasterCreate {
-	param RsElement in
-	param RsElement out
 	param bool pointSmooth
 	param bool lineSmooth
 	param bool pointSprite
@@ -365,12 +415,11 @@
 	param float lw
 }
 
-ProgramRasterSetPointSize{
+ProgramRasterSetCullMode {
 	param RsProgramRaster pr
-	param float ps
+	param RsCullMode mode
 }
 
-
 ProgramBindConstants {
 	param RsProgram vp
 	param uint32_t slot
@@ -391,12 +440,6 @@
 	}
 
 ProgramFragmentCreate {
-	param const uint32_t * params
-	param uint32_t paramLength
-	ret RsProgramFragment
-	}
-
-ProgramFragmentCreate2 {
 	param const char * shaderText
 	param uint32_t shaderLength
 	param const uint32_t * params
@@ -405,11 +448,6 @@
 	}
 
 ProgramVertexCreate {
-	param bool texMat
-	ret RsProgramVertex
-	}
-
-ProgramVertexCreate2 {
 	param const char * shaderText
 	param uint32_t shaderLength
 	param const uint32_t * params
@@ -417,34 +455,10 @@
 	ret RsProgramVertex
 	}
 
-LightBegin {
-	}
-
-LightSetLocal {
-	param bool isLocal
-	}
-
-LightSetMonochromatic {
-	param bool isMono
-	}
-
-LightCreate {
-	ret RsLight light
-	}
-
-
-LightSetPosition {
-	param RsLight light
-	param float x
-	param float y
-	param float z
-	}
-
-LightSetColor {
-	param RsLight light
-	param float r
-	param float g
-	param float b
+FileA3DCreateFromAssetStream {
+	param const void * data
+	param size_t len
+	ret RsFile
 	}
 
 FileOpen {
@@ -453,30 +467,79 @@
 	param size_t len
 	}
 
+FileA3DGetNumIndexEntries {
+	param int32_t * numEntries
+	param RsFile file
+	}
 
-SimpleMeshCreate {
-	ret RsSimpleMesh
-	param RsAllocation prim
-	param RsAllocation index
-	param RsAllocation *vtx
+FileA3DGetIndexEntries {
+	param RsFileIndexEntry * fileEntries
+	param uint32_t numEntries
+	param RsFile fileA3D
+	}
+
+FileA3DGetEntryByIndex {
+	param uint32_t index
+	param RsFile file
+	ret RsObjectBase
+	}
+
+FontCreateFromFile {
+	param const char *name
+	param uint32_t fontSize
+	param uint32_t dpi
+	ret RsFont
+	}
+
+MeshCreate {
+	ret RsMesh
 	param uint32_t vtxCount
-	param uint32_t primType
+	param uint32_t idxCount
 	}
 
-
-SimpleMeshBindIndex {
-	param RsSimpleMesh mesh
+MeshBindIndex {
+	param RsMesh mesh
 	param RsAllocation idx
+	param uint32_t primType
+	param uint32_t slot
 	}
 
-SimpleMeshBindPrimitive {
-	param RsSimpleMesh mesh
-	param RsAllocation prim
-	}
-
-SimpleMeshBindVertex {
-	param RsSimpleMesh mesh
+MeshBindVertex {
+	param RsMesh mesh
 	param RsAllocation vtx
 	param uint32_t slot
 	}
 
+MeshGetVertexBufferCount {
+	param RsMesh mesh
+	param int32_t *numVtx
+	}
+
+MeshGetIndexCount {
+	param RsMesh mesh
+	param int32_t *numIdx
+	}
+
+MeshGetVertices {
+	param RsMesh mv
+	param RsAllocation *vtxData
+	param uint32_t vtxDataCount
+	}
+
+MeshGetIndices {
+	param RsMesh mv
+	param RsAllocation *va
+	param uint32_t *primType
+	param uint32_t idxDataCount
+	}
+
+AnimationCreate {
+	param const float *inValues
+	param const float *outValues
+	param uint32_t valueCount
+	param RsAnimationInterpolation interp
+	param RsAnimationEdge pre
+	param RsAnimationEdge post
+	ret RsAnimation
+	}
+
diff --git a/rsAdapter.cpp b/rsAdapter.cpp
index 0d31fac..ef69b75 100644
--- a/rsAdapter.cpp
+++ b/rsAdapter.cpp
@@ -15,7 +15,11 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#endif
 
 using namespace android;
 using namespace android::renderscript;
@@ -70,6 +74,16 @@
            mAllocation.get()->getType()->getSizeBytes());
 }
 
+void Adapter1D::serialize(OStream *stream) const
+{
+    
+}
+
+Adapter1D *Adapter1D::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
+
 namespace android {
 namespace renderscript {
 
@@ -169,7 +183,6 @@
 
     uint32_t eSize = mAllocation.get()->getType()->getElementSizeBytes();
     uint32_t lineSize = eSize * w;
-    uint32_t destW = getDimX();
 
     const uint8_t *src = static_cast<const uint8_t *>(data);
     for (uint32_t line=yoff; line < (yoff+h); line++) {
@@ -185,6 +198,15 @@
            mAllocation.get()->getType()->getSizeBytes());
 }
 
+void Adapter2D::serialize(OStream *stream) const
+{
+    
+}
+
+Adapter2D *Adapter2D::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
 
 
 namespace android {
diff --git a/rsAdapter.h b/rsAdapter.h
index cb2872e..449e7ad 100644
--- a/rsAdapter.h
+++ b/rsAdapter.h
@@ -50,6 +50,10 @@
     void subData(uint32_t xoff, uint32_t count, const void *data);
     void data(const void *data);
 
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ADAPTER_1D; }
+    static Adapter1D *createFromStream(Context *rsc, IStream *stream);
+
 protected:
     ObjectBaseRef<Allocation> mAllocation;
     uint32_t mY;
@@ -82,6 +86,10 @@
     void data(const void *data);
     void subData(uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h, const void *data);
 
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ADAPTER_2D; }
+    static Adapter2D *createFromStream(Context *rsc, IStream *stream);
+
 protected:
     ObjectBaseRef<Allocation> mAllocation;
     uint32_t mZ;
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index 4e8278d..6748bb4 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -13,12 +13,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
 
 #include <GLES/gl.h>
 #include <GLES2/gl2.h>
 #include <GLES/glext.h>
+#else
+#include "rsContextHostStub.h"
+
+#include <OpenGL/gl.h>
+#include <OpenGl/glext.h>
+#endif
+
+#include "utils/StopWatch.h"
 
 using namespace android;
 using namespace android::renderscript;
@@ -28,6 +36,9 @@
     init(rsc, type);
 
     mPtr = malloc(mType->getSizeBytes());
+    if (mType->getElement()->getHasReferences()) {
+        memset(mPtr, 0, mType->getSizeBytes());
+    }
     if (!mPtr) {
         LOGE("Allocation::Allocation, alloc failure");
     }
@@ -141,6 +152,8 @@
         return;
     }
 
+    bool isFirstUpload = false;
+
     if (!mTextureID) {
         glGenTextures(1, &mTextureID);
 
@@ -153,6 +166,7 @@
             mUploadDefered = true;
             return;
         }
+        isFirstUpload = true;
     }
     glBindTexture(GL_TEXTURE_2D, mTextureID);
     glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
@@ -162,14 +176,23 @@
         adapt.setLOD(lod+mTextureLOD);
 
         uint16_t * ptr = static_cast<uint16_t *>(adapt.getElement(0,0));
-        glTexImage2D(GL_TEXTURE_2D, lod, format,
-                     adapt.getDimX(), adapt.getDimY(),
-                     0, format, type, ptr);
+        if(isFirstUpload) {
+            glTexImage2D(GL_TEXTURE_2D, lod, format,
+                         adapt.getDimX(), adapt.getDimY(),
+                         0, format, type, ptr);
+        } else {
+            glTexSubImage2D(GL_TEXTURE_2D, lod, 0, 0,
+                         adapt.getDimX(), adapt.getDimY(),
+                         format, type, ptr);
+        }
     }
     if (mTextureGenMipmap) {
+#ifndef ANDROID_RS_BUILD_FOR_HOST
         glGenerateMipmap(GL_TEXTURE_2D);
+#endif //ANDROID_RS_BUILD_FOR_HOST
     }
 
+    rsc->checkError("Allocation::uploadToTexture");
 }
 
 void Allocation::deferedUploadToBufferObject(const Context *rsc)
@@ -201,6 +224,7 @@
     glBindBuffer(GL_ARRAY_BUFFER, mBufferID);
     glBufferData(GL_ARRAY_BUFFER, mType->getSizeBytes(), getPtr(), GL_DYNAMIC_DRAW);
     glBindBuffer(GL_ARRAY_BUFFER, 0);
+    rsc->checkError("Allocation::uploadToBufferObject");
 }
 
 void Allocation::uploadCheck(const Context *rsc)
@@ -217,13 +241,19 @@
 }
 
 
-void Allocation::data(const void *data, uint32_t sizeBytes)
+void Allocation::data(Context *rsc, const void *data, uint32_t sizeBytes)
 {
     uint32_t size = mType->getSizeBytes();
     if (size != sizeBytes) {
         LOGE("Allocation::data called with mismatched size expected %i, got %i", size, sizeBytes);
         return;
     }
+
+    if (mType->getElement()->getHasReferences()) {
+        incRefs(data, sizeBytes / mType->getElement()->getSizeBytes());
+        decRefs(mPtr, sizeBytes / mType->getElement()->getSizeBytes());
+    }
+
     memcpy(mPtr, data, size);
     sendDirty();
     mUploadDefered = true;
@@ -234,7 +264,7 @@
     memcpy(data, mPtr, mType->getSizeBytes());
 }
 
-void Allocation::subData(uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes)
+void Allocation::subData(Context *rsc, uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes)
 {
     uint32_t eSize = mType->getElementSizeBytes();
     uint8_t * ptr = static_cast<uint8_t *>(mPtr);
@@ -246,12 +276,18 @@
         mType->dumpLOGV("type info");
         return;
     }
+
+    if (mType->getElement()->getHasReferences()) {
+        incRefs(data, count);
+        decRefs(ptr, count);
+    }
+
     memcpy(ptr, data, size);
     sendDirty();
     mUploadDefered = true;
 }
 
-void Allocation::subData(uint32_t xoff, uint32_t yoff,
+void Allocation::subData(Context *rsc, uint32_t xoff, uint32_t yoff,
              uint32_t w, uint32_t h, const void *data, uint32_t sizeBytes)
 {
     uint32_t eSize = mType->getElementSizeBytes();
@@ -268,7 +304,10 @@
     }
 
     for (uint32_t line=yoff; line < (yoff+h); line++) {
-        uint8_t * ptr = static_cast<uint8_t *>(mPtr);
+        if (mType->getElement()->getHasReferences()) {
+            incRefs(src, w);
+            decRefs(dst, w);
+        }
         memcpy(dst, src, lineSize);
         src += lineSize;
         dst += destW * eSize;
@@ -277,14 +316,96 @@
     mUploadDefered = true;
 }
 
-void Allocation::subData(uint32_t xoff, uint32_t yoff, uint32_t zoff,
+void Allocation::subData(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff,
              uint32_t w, uint32_t h, uint32_t d, const void *data, uint32_t sizeBytes)
 {
 }
 
+void Allocation::subElementData(Context *rsc, uint32_t x, const void *data,
+                                uint32_t cIdx, uint32_t sizeBytes)
+{
+    uint32_t eSize = mType->getElementSizeBytes();
+    uint8_t * ptr = static_cast<uint8_t *>(mPtr);
+    ptr += eSize * x;
+
+    if (cIdx >= mType->getElement()->getFieldCount()) {
+        LOGE("Error Allocation::subElementData component %i out of range.", cIdx);
+        rsc->setError(RS_ERROR_BAD_VALUE, "subElementData component out of range.");
+        return;
+    }
+
+    if (x >= mType->getDimX()) {
+        LOGE("Error Allocation::subElementData X offset %i out of range.", x);
+        rsc->setError(RS_ERROR_BAD_VALUE, "subElementData X offset out of range.");
+        return;
+    }
+
+    const Element * e = mType->getElement()->getField(cIdx);
+    ptr += mType->getElement()->getFieldOffsetBytes(cIdx);
+
+    if (sizeBytes != e->getSizeBytes()) {
+        LOGE("Error Allocation::subElementData data size %i does not match field size %i.", sizeBytes, e->getSizeBytes());
+        rsc->setError(RS_ERROR_BAD_VALUE, "subElementData bad size.");
+        return;
+    }
+
+    if (e->getHasReferences()) {
+        e->incRefs(data);
+        e->decRefs(ptr);
+    }
+
+    memcpy(ptr, data, sizeBytes);
+    sendDirty();
+    mUploadDefered = true;
+}
+
+void Allocation::subElementData(Context *rsc, uint32_t x, uint32_t y,
+                                const void *data, uint32_t cIdx, uint32_t sizeBytes)
+{
+    uint32_t eSize = mType->getElementSizeBytes();
+    uint8_t * ptr = static_cast<uint8_t *>(mPtr);
+    ptr += eSize * (x + y * mType->getDimX());
+
+    if (x >= mType->getDimX()) {
+        LOGE("Error Allocation::subElementData X offset %i out of range.", x);
+        rsc->setError(RS_ERROR_BAD_VALUE, "subElementData X offset out of range.");
+        return;
+    }
+
+    if (y >= mType->getDimY()) {
+        LOGE("Error Allocation::subElementData X offset %i out of range.", x);
+        rsc->setError(RS_ERROR_BAD_VALUE, "subElementData X offset out of range.");
+        return;
+    }
+
+    if (cIdx >= mType->getElement()->getFieldCount()) {
+        LOGE("Error Allocation::subElementData component %i out of range.", cIdx);
+        rsc->setError(RS_ERROR_BAD_VALUE, "subElementData component out of range.");
+        return;
+    }
+
+    const Element * e = mType->getElement()->getField(cIdx);
+    ptr += mType->getElement()->getFieldOffsetBytes(cIdx);
+
+    if (sizeBytes != e->getSizeBytes()) {
+        LOGE("Error Allocation::subElementData data size %i does not match field size %i.", sizeBytes, e->getSizeBytes());
+        rsc->setError(RS_ERROR_BAD_VALUE, "subElementData bad size.");
+        return;
+    }
+
+    if (e->getHasReferences()) {
+        e->incRefs(data);
+        e->decRefs(ptr);
+    }
+
+    memcpy(ptr, data, sizeBytes);
+    sendDirty();
+    mUploadDefered = true;
+}
+
 void Allocation::addProgramToDirty(const Program *p)
 {
-    mToDirtyList.add(p);
+    mToDirtyList.push(p);
 }
 
 void Allocation::removeProgramToDirty(const Program *p)
@@ -316,6 +437,61 @@
 
 }
 
+void Allocation::serialize(OStream *stream) const
+{
+    // Need to identify ourselves
+    stream->addU32((uint32_t)getClassId());
+
+    String8 name(getName());
+    stream->addString(&name);
+
+    // First thing we need to serialize is the type object since it will be needed
+    // to initialize the class
+    mType->serialize(stream);
+
+    uint32_t dataSize = mType->getSizeBytes();
+    // Write how much data we are storing
+    stream->addU32(dataSize);
+    // Now write the data
+    stream->addByteArray(mPtr, dataSize);
+}
+
+Allocation *Allocation::createFromStream(Context *rsc, IStream *stream)
+{
+    // First make sure we are reading the correct object
+    RsA3DClassID classID = (RsA3DClassID)stream->loadU32();
+    if(classID != RS_A3D_CLASS_ID_ALLOCATION) {
+        LOGE("allocation loading skipped due to invalid class id\n");
+        return NULL;
+    }
+
+    String8 name;
+    stream->loadString(&name);
+
+    Type *type = Type::createFromStream(rsc, stream);
+    if(!type) {
+        return NULL;
+    }
+    type->compute();
+
+    // Number of bytes we wrote out for this allocation
+    uint32_t dataSize = stream->loadU32();
+    if(dataSize != type->getSizeBytes()) {
+        LOGE("failed to read allocation because numbytes written is not the same loaded type wants\n");
+        delete type;
+        return NULL;
+    }
+
+    Allocation *alloc = new Allocation(rsc, type);
+    alloc->setName(name.string(), name.size());
+
+    // Read in all of our allocation data
+    alloc->data(rsc, stream->getPtr() + stream->getPos(), dataSize);
+    stream->reset(stream->getPos() + dataSize);
+
+    return alloc;
+}
+
 void Allocation::sendDirty() const
 {
     for (size_t ct=0; ct < mToDirtyList.size(); ct++) {
@@ -323,6 +499,65 @@
     }
 }
 
+void Allocation::incRefs(const void *ptr, size_t ct, size_t startOff) const
+{
+    const uint8_t *p = static_cast<const uint8_t *>(ptr);
+    const Element *e = mType->getElement();
+    uint32_t stride = e->getSizeBytes();
+
+    p += stride * startOff;
+    while (ct > 0) {
+        e->incRefs(p);
+        ct --;
+        p += stride;
+    }
+}
+
+void Allocation::decRefs(const void *ptr, size_t ct, size_t startOff) const
+{
+    const uint8_t *p = static_cast<const uint8_t *>(ptr);
+    const Element *e = mType->getElement();
+    uint32_t stride = e->getSizeBytes();
+
+    p += stride * startOff;
+    while (ct > 0) {
+        e->decRefs(p);
+        ct --;
+        p += stride;
+    }
+}
+
+void Allocation::copyRange1D(Context *rsc, const Allocation *src, int32_t srcOff, int32_t destOff, int32_t len)
+{
+}
+
+void Allocation::resize1D(Context *rsc, uint32_t dimX)
+{
+    Type *t = mType->cloneAndResize1D(rsc, dimX);
+
+    uint32_t oldDimX = mType->getDimX();
+    if (dimX == oldDimX) {
+        return;
+    }
+
+    if (dimX < oldDimX) {
+        decRefs(mPtr, oldDimX - dimX, dimX);
+    }
+    mPtr = realloc(mPtr, t->getSizeBytes());
+
+    if (dimX > oldDimX) {
+        const Element *e = mType->getElement();
+        uint32_t stride = e->getSizeBytes();
+        memset(((uint8_t *)mPtr) + stride * oldDimX, 0, stride * (dimX - oldDimX));
+    }
+    mType.set(t);
+}
+
+void Allocation::resize2D(Context *rsc, uint32_t dimX, uint32_t dimY)
+{
+    LOGE("not implemented");
+}
+
 /////////////////
 //
 
@@ -495,7 +730,7 @@
     if (srcGLType == GL_UNSIGNED_BYTE &&
         srcGLFmt == GL_RGB &&
         dstGLType == GL_UNSIGNED_SHORT_5_6_5 &&
-        dstGLType == GL_RGB) {
+        dstGLFmt == GL_RGB) {
 
         return elementConverter_888_to_565;
     }
@@ -503,15 +738,21 @@
     if (srcGLType == GL_UNSIGNED_BYTE &&
         srcGLFmt == GL_RGBA &&
         dstGLType == GL_UNSIGNED_SHORT_5_6_5 &&
-        dstGLType == GL_RGB) {
+        dstGLFmt == GL_RGB) {
 
         return elementConverter_8888_to_565;
     }
 
     LOGE("pickConverter, unsuported combo, src %p,  dst %p", src, dst);
+    LOGE("pickConverter, srcGLType = %x,  srcGLFmt = %x", srcGLType, srcGLFmt);
+    LOGE("pickConverter, dstGLType = %x,  dstGLFmt = %x", dstGLType, dstGLFmt);
+    src->dumpLOGV("SRC ");
+    dst->dumpLOGV("DST ");
     return 0;
 }
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
+
 RsAllocation rsi_AllocationCreateBitmapRef(Context *rsc, RsType vtype,
                                            void *bmp, void *callbackData, RsBitmapCallback_t callback)
 {
@@ -521,22 +762,41 @@
     return alloc;
 }
 
+void rsi_AllocationUpdateFromBitmap(Context *rsc, RsAllocation va, RsElement _src, const void *data)
+{
+    Allocation *texAlloc = static_cast<Allocation *>(va);
+    const Element *src = static_cast<const Element *>(_src);
+    const Element *dst = texAlloc->getType()->getElement();
+    uint32_t w = texAlloc->getType()->getDimX();
+    uint32_t h = texAlloc->getType()->getDimY();
+    bool genMips = texAlloc->getType()->getDimLOD();
+
+    ElementConverter_t cvt = pickConverter(dst, src);
+    if (cvt) {
+        cvt(texAlloc->getPtr(), data, w * h);
+        if (genMips) {
+            Adapter2D adapt(rsc, texAlloc);
+            Adapter2D adapt2(rsc, texAlloc);
+            for(uint32_t lod=0; lod < (texAlloc->getType()->getLODCount() -1); lod++) {
+                adapt.setLOD(lod);
+                adapt2.setLOD(lod + 1);
+                mip(adapt2, adapt);
+            }
+        }
+    } else {
+        rsc->setError(RS_ERROR_BAD_VALUE, "Unsupported bitmap format");
+    }
+}
+
 RsAllocation rsi_AllocationCreateFromBitmap(Context *rsc, uint32_t w, uint32_t h, RsElement _dst, RsElement _src,  bool genMips, const void *data)
 {
     const Element *src = static_cast<const Element *>(_src);
     const Element *dst = static_cast<const Element *>(_dst);
 
-    // Check for pow2 on pre es 2.0 versions.
-    rsAssert(rsc->checkVersion2_0() || (!(w & (w-1)) && !(h & (h-1))));
-
-    //LOGE("rsi_AllocationCreateFromBitmap %i %i %i %i %i", w, h, dstFmt, srcFmt, genMips);
-    rsi_TypeBegin(rsc, _dst);
-    rsi_TypeAdd(rsc, RS_DIMENSION_X, w);
-    rsi_TypeAdd(rsc, RS_DIMENSION_Y, h);
-    if (genMips) {
-        rsi_TypeAdd(rsc, RS_DIMENSION_LOD, 1);
-    }
-    RsType type = rsi_TypeCreate(rsc);
+    //LOGE("%p rsi_AllocationCreateFromBitmap %i %i %i", rsc, w, h, genMips);
+    RsDimension dims[] = {RS_DIMENSION_X, RS_DIMENSION_Y, RS_DIMENSION_LOD};
+    uint32_t dimValues[] = {w, h, genMips};
+    RsType type = rsaTypeCreate(rsc, _dst, 3, dims, dimValues);
 
     RsAllocation vTexAlloc = rsi_AllocationCreateTyped(rsc, type);
     Allocation *texAlloc = static_cast<Allocation *>(vTexAlloc);
@@ -546,65 +806,52 @@
     }
 
     ElementConverter_t cvt = pickConverter(dst, src);
-    cvt(texAlloc->getPtr(), data, w * h);
-
-    if (genMips) {
-        Adapter2D adapt(rsc, texAlloc);
-        Adapter2D adapt2(rsc, texAlloc);
-        for(uint32_t lod=0; lod < (texAlloc->getType()->getLODCount() -1); lod++) {
-            adapt.setLOD(lod);
-            adapt2.setLOD(lod + 1);
-            mip(adapt2, adapt);
+    if (cvt) {
+        cvt(texAlloc->getPtr(), data, w * h);
+        if (genMips) {
+            Adapter2D adapt(rsc, texAlloc);
+            Adapter2D adapt2(rsc, texAlloc);
+            for(uint32_t lod=0; lod < (texAlloc->getType()->getLODCount() -1); lod++) {
+                adapt.setLOD(lod);
+                adapt2.setLOD(lod + 1);
+                mip(adapt2, adapt);
+            }
         }
+    } else {
+        rsc->setError(RS_ERROR_BAD_VALUE, "Unsupported bitmap format");
     }
 
     return texAlloc;
 }
 
-RsAllocation rsi_AllocationCreateFromBitmapBoxed(Context *rsc, uint32_t w, uint32_t h, RsElement _dst, RsElement _src, bool genMips, const void *data)
-{
-    const Element *srcE = static_cast<const Element *>(_src);
-    const Element *dstE = static_cast<const Element *>(_dst);
-    uint32_t w2 = rsHigherPow2(w);
-    uint32_t h2 = rsHigherPow2(h);
-
-    if ((w2 == w) && (h2 == h)) {
-        return rsi_AllocationCreateFromBitmap(rsc, w, h, _dst, _src, genMips, data);
-    }
-
-    uint32_t bpp = srcE->getSizeBytes();
-    size_t size = w2 * h2 * bpp;
-    uint8_t *tmp = static_cast<uint8_t *>(malloc(size));
-    memset(tmp, 0, size);
-
-    const uint8_t * src = static_cast<const uint8_t *>(data);
-    for (uint32_t y = 0; y < h; y++) {
-        uint8_t * ydst = &tmp[(y + ((h2 - h) >> 1)) * w2 * bpp];
-        memcpy(&ydst[((w2 - w) >> 1) * bpp], src, w * bpp);
-        src += w * bpp;
-    }
-
-    RsAllocation ret = rsi_AllocationCreateFromBitmap(rsc, w2, h2, _dst, _src, genMips, tmp);
-    free(tmp);
-    return ret;
-}
-
 void rsi_AllocationData(Context *rsc, RsAllocation va, const void *data, uint32_t sizeBytes)
 {
     Allocation *a = static_cast<Allocation *>(va);
-    a->data(data, sizeBytes);
+    a->data(rsc, data, sizeBytes);
 }
 
 void rsi_Allocation1DSubData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes)
 {
     Allocation *a = static_cast<Allocation *>(va);
-    a->subData(xoff, count, data, sizeBytes);
+    a->subData(rsc, xoff, count, data, sizeBytes);
+}
+
+void rsi_Allocation2DSubElementData(Context *rsc, RsAllocation va, uint32_t x, uint32_t y, const void *data, uint32_t eoff, uint32_t sizeBytes)
+{
+    Allocation *a = static_cast<Allocation *>(va);
+    a->subElementData(rsc, x, y, data, eoff, sizeBytes);
+}
+
+void rsi_Allocation1DSubElementData(Context *rsc, RsAllocation va, uint32_t x, const void *data, uint32_t eoff, uint32_t sizeBytes)
+{
+    Allocation *a = static_cast<Allocation *>(va);
+    a->subElementData(rsc, x, data, eoff, sizeBytes);
 }
 
 void rsi_Allocation2DSubData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h, const void *data, uint32_t sizeBytes)
 {
     Allocation *a = static_cast<Allocation *>(va);
-    a->subData(xoff, yoff, w, h, data, sizeBytes);
+    a->subData(rsc, xoff, yoff, w, h, data, sizeBytes);
 }
 
 void rsi_AllocationRead(Context *rsc, RsAllocation va, void *data)
@@ -613,6 +860,27 @@
     a->read(data);
 }
 
+void rsi_AllocationResize1D(Context *rsc, RsAllocation va, uint32_t dimX)
+{
+    Allocation *a = static_cast<Allocation *>(va);
+    a->resize1D(rsc, dimX);
+}
+
+void rsi_AllocationResize2D(Context *rsc, RsAllocation va, uint32_t dimX, uint32_t dimY)
+{
+    Allocation *a = static_cast<Allocation *>(va);
+    a->resize2D(rsc, dimX, dimY);
+}
+
+const void* rsi_AllocationGetType(Context *rsc, RsAllocation va)
+{
+    Allocation *a = static_cast<Allocation *>(va);
+    a->getType()->incUserRef();
+
+    return a->getType();
+}
+
+#endif //ANDROID_RS_BUILD_FOR_HOST
 
 }
 }
diff --git a/rsAllocation.h b/rsAllocation.h
index 516f8b7..12cf832 100644
--- a/rsAllocation.h
+++ b/rsAllocation.h
@@ -30,8 +30,6 @@
     // The graphics equilivent of malloc.  The allocation contains a structure of elements.
 
 public:
-    // By policy this allocation will hold a pointer to the type
-    // but will not destroy it on destruction.
     Allocation(Context *rsc, const Type *);
     Allocation(Context *rsc, const Type *, void *bmp, void *callbackData, RsBitmapCallback_t callback);
 
@@ -55,14 +53,23 @@
     void uploadToBufferObject(const Context *rsc);
     uint32_t getBufferObjectID() const {return mBufferID;}
 
+    void copyRange1D(Context *rsc, const Allocation *src, int32_t srcOff, int32_t destOff, int32_t len);
 
-    void data(const void *data, uint32_t sizeBytes);
-    void subData(uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes);
-    void subData(uint32_t xoff, uint32_t yoff,
+    void resize1D(Context *rsc, uint32_t dimX);
+    void resize2D(Context *rsc, uint32_t dimX, uint32_t dimY);
+
+    void data(Context *rsc, const void *data, uint32_t sizeBytes);
+    void subData(Context *rsc, uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes);
+    void subData(Context *rsc, uint32_t xoff, uint32_t yoff,
                  uint32_t w, uint32_t h, const void *data, uint32_t sizeBytes);
-    void subData(uint32_t xoff, uint32_t yoff, uint32_t zoff,
+    void subData(Context *rsc, uint32_t xoff, uint32_t yoff, uint32_t zoff,
                  uint32_t w, uint32_t h, uint32_t d, const void *data, uint32_t sizeBytes);
 
+    void subElementData(Context *rsc, uint32_t x,
+                        const void *data, uint32_t elementOff, uint32_t sizeBytes);
+    void subElementData(Context *rsc, uint32_t x, uint32_t y,
+                        const void *data, uint32_t elementOff, uint32_t sizeBytes);
+
     void read(void *data);
 
     void enableGLVertexBuffers() const;
@@ -72,12 +79,22 @@
     void removeProgramToDirty(const Program *);
 
     virtual void dumpLOGV(const char *prefix) const;
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ALLOCATION; }
+    static Allocation *createFromStream(Context *rsc, IStream *stream);
 
     virtual void uploadCheck(const Context *rsc);
 
-protected:
-    void sendDirty() const;
+    bool getIsTexture() const {return mIsTexture;}
+    bool getIsBufferObject() const {return mIsVertexBuffer;}
 
+    void incRefs(const void *ptr, size_t ct, size_t startOff = 0) const;
+    void decRefs(const void *ptr, size_t ct, size_t startOff = 0) const;
+
+    void sendDirty() const;
+    bool getHasGraphicsMipmaps() const {return mTextureGenMipmap;}
+
+protected:
     ObjectBaseRef<const Type> mType;
     void * mPtr;
 
diff --git a/rsAnimation.cpp b/rsAnimation.cpp
new file mode 100644
index 0000000..6200715
--- /dev/null
+++ b/rsAnimation.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_BUILD_FOR_HOST
+#include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#endif //ANDROID_RS_BUILD_FOR_HOST
+
+#include "rsAnimation.h"
+
+
+using namespace android;
+using namespace android::renderscript;
+
+void Animation::serialize(OStream *stream) const
+{
+    
+}
+
+Animation *Animation::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
+
+/*
+Animation::Animation(Context *rsc) : ObjectBase(rsc)
+{
+    mAllocFile = __FILE__;
+    mAllocLine = __LINE__;
+
+    mValuesInput = NULL;
+    mValuesOutput = NULL;
+    mValueCount = 0;
+    mInterpolation = RS_ANIMATION_INTERPOLATION_STEP;
+    mEdgePre = RS_ANIMATION_EDGE_UNDEFINED;
+    mEdgePost = RS_ANIMATION_EDGE_UNDEFINED;
+    mInputMin = 0;
+    mInputMax = 0;
+}
+
+Animation * Animation::create(Context *rsc,
+                              const float *inValues, const float *outValues,
+                              uint32_t valueCount, RsAnimationInterpolation interp,
+                              RsAnimationEdge pre, RsAnimationEdge post)
+{
+    if (valueCount < 2) {
+        rsc->setError(RS_ERROR_BAD_VALUE, "Animations require more than 2 values.");
+        return NULL;
+    }
+    Animation *a = new Animation(rsc);
+    if (!a) {
+        rsc->setError(RS_ERROR_OUT_OF_MEMORY);
+        return NULL;
+    }
+
+    float *vin = (float *)malloc(valueCount * sizeof(float));
+    float *vout = (float *)malloc(valueCount * sizeof(float));
+    a->mValuesInput = vin;
+    a->mValuesOutput = vout;
+    if (a->mValuesInput == NULL || a->mValuesOutput == NULL) {
+        delete a;
+        rsc->setError(RS_ERROR_OUT_OF_MEMORY);
+        return NULL;
+    }
+
+    a->mEdgePre = pre;
+    a->mEdgePost = post;
+    a->mInterpolation = interp;
+    a->mValueCount = valueCount;
+
+    memcpy(vin, inValues, valueCount * sizeof(float));
+    memcpy(vout, outValues, valueCount * sizeof(float));
+    a->mInputMin = inValues[0];
+    a->mInputMax = inValues[0];
+
+    bool needSort = false;
+    for (uint32_t ct=1; ct < valueCount; ct++) {
+        if (a->mInputMin > vin[ct]) {
+            needSort = true;
+            a->mInputMin = vin[ct];
+        }
+        if (a->mInputMax < vin[ct]) {
+            a->mInputMax = vin[ct];
+        } else {
+            needSort = true;
+        }
+    }
+
+    while (1) {
+        bool changed = false;
+        for (uint32_t ct=1; ct < valueCount; ct++) {
+            if (vin[ct-1] > vin[ct]) {
+                float t = vin[ct-1];
+                vin[ct-1] = vin[ct];
+                vin[ct] = t;
+                t = vout[ct-1];
+                vout[ct-1] = vout[ct];
+                vout[ct] = t;
+                changed = true;
+            }
+        }
+        if (!changed) break;
+    }
+
+    return a;
+}
+*/
+
+
+/////////////////////////////////////////
+//
+
+namespace android {
+namespace renderscript {
+
+RsAnimation rsi_AnimationCreate(Context *rsc,
+                                const float *inValues,
+                                const float *outValues,
+                                uint32_t valueCount,
+                                RsAnimationInterpolation interp,
+                                RsAnimationEdge pre,
+                                RsAnimationEdge post)
+{
+    //LOGE("rsi_ElementCreate %i %i %i %i", dt, dk, norm, vecSize);
+    Animation *a = NULL;//Animation::create(rsc, inValues, outValues, valueCount, interp, pre, post);
+    if (a != NULL) {
+        a->incUserRef();
+    }
+    return (RsAnimation)a;
+}
+
+
+}
+}
+
diff --git a/rsAnimation.h b/rsAnimation.h
new file mode 100644
index 0000000..340314e
--- /dev/null
+++ b/rsAnimation.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_ANIMATION_H
+#define ANDROID_RS_ANIMATION_H
+
+#include "rsUtils.h"
+#include "rsObjectBase.h"
+
+// ---------------------------------------------------------------------------
+namespace android {
+namespace renderscript {
+
+
+class Animation : public ObjectBase
+{
+public:
+    ~Animation();
+
+    static Animation * create(Context *rsc,
+                              const float *inValues, const float *outValues,
+                              uint32_t valueCount, RsAnimationInterpolation,
+                              RsAnimationEdge pre, RsAnimationEdge post);
+
+    float eval(float) const;
+
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ANIMATION; }
+    static Animation *createFromStream(Context *rsc, IStream *stream);
+
+protected:
+    Animation(Context *rsc);
+
+
+
+    float evalInRange(float) const;
+
+
+
+    const float *mValuesInput;
+    const float *mValuesOutput;
+    uint32_t mValueCount;
+    RsAnimationInterpolation mInterpolation;
+    RsAnimationEdge mEdgePre;
+    RsAnimationEdge mEdgePost;
+
+    // derived
+    float mInputMin;
+    float mInputMax;
+};
+
+
+
+
+}
+}
+#endif //ANDROID_STRUCTURED_ELEMENT_H
+
diff --git a/rsComponent.cpp b/rsComponent.cpp
index 15a56f7..f51b23e 100644
--- a/rsComponent.cpp
+++ b/rsComponent.cpp
@@ -16,7 +16,11 @@
 
 #include "rsComponent.h"
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include <GLES/gl.h>
+#else
+#include <OpenGL/gl.h>
+#endif
 
 using namespace android;
 using namespace android::renderscript;
@@ -91,6 +95,26 @@
         mNormalized = true;
         rsAssert(mKind == RS_KIND_PIXEL_RGBA);
         return;
+
+    case RS_TYPE_MATRIX_4X4:
+        mTypeBits = 16 * 32;
+        rsAssert(mVectorSize == 1);
+        rsAssert(mNormalized == false);
+        rsAssert(mKind == RS_KIND_USER);
+        break;
+    case RS_TYPE_MATRIX_3X3:
+        mTypeBits = 9 * 32;
+        rsAssert(mVectorSize == 1);
+        rsAssert(mNormalized == false);
+        rsAssert(mKind == RS_KIND_USER);
+        break;
+    case RS_TYPE_MATRIX_2X2:
+        mTypeBits = 4 * 32;
+        rsAssert(mVectorSize == 1);
+        rsAssert(mNormalized == false);
+        rsAssert(mKind == RS_KIND_USER);
+        break;
+
     case RS_TYPE_ELEMENT:
     case RS_TYPE_TYPE:
     case RS_TYPE_ALLOCATION:
@@ -148,11 +172,19 @@
     case RS_TYPE_UNSIGNED_64:
         mTypeBits = 64;
         break;
+
+    case RS_TYPE_BOOLEAN:
+        mTypeBits = 8;
+        break;
     }
 
     mBits = mTypeBits * mVectorSize;
 }
 
+bool Component::isReference() const
+{
+    return (mType >= RS_TYPE_ELEMENT);
+}
 
 
 
@@ -188,82 +220,6 @@
     return 0;
 }
 
-static const char * gCTypeStrings[] = {
-    0,
-    0,//"F16",
-    "float",
-    "double",
-    "char",
-    "short",
-    "int",
-    0,//"S64",
-    "char",//U8",
-    "short",//U16",
-    "int",//U32",
-    0,//"U64",
-    0,//"UP_565",
-    0,//"UP_5551",
-    0,//"UP_4444",
-    0,//"ELEMENT",
-    0,//"TYPE",
-    0,//"ALLOCATION",
-    0,//"SAMPLER",
-    0,//"SCRIPT",
-    0,//"MESH",
-    0,//"PROGRAM_FRAGMENT",
-    0,//"PROGRAM_VERTEX",
-    0,//"PROGRAM_RASTER",
-    0,//"PROGRAM_STORE",
-};
-
-static const char * gCVecTypeStrings[] = {
-    0,
-    0,//"F16",
-    "vecF32",
-    "vecF64",
-    "vecI8",
-    "vecI16",
-    "vecI32",
-    0,//"S64",
-    "vecU8",//U8",
-    "vecU16",//U16",
-    "vecU32",//U32",
-    0,//"U64",
-    0,//"UP_565",
-    0,//"UP_5551",
-    0,//"UP_4444",
-    0,//"ELEMENT",
-    0,//"TYPE",
-    0,//"ALLOCATION",
-    0,//"SAMPLER",
-    0,//"SCRIPT",
-    0,//"MESH",
-    0,//"PROGRAM_FRAGMENT",
-    0,//"PROGRAM_VERTEX",
-    0,//"PROGRAM_RASTER",
-    0,//"PROGRAM_STORE",
-};
-
-String8 Component::getCType() const
-{
-    char buf[64];
-    if (mVectorSize == 1) {
-        return String8(gCTypeStrings[mType]);
-    }
-
-    // Yuck, acc WAR
-    // Appears to have problems packing chars
-    if (mVectorSize == 4 && mType == RS_TYPE_UNSIGNED_8) {
-        return String8("int");
-    }
-
-
-    String8 s(gCVecTypeStrings[mType]);
-    sprintf(buf, "_%i_t", mVectorSize);
-    s.append(buf);
-    return s;
-}
-
 String8 Component::getGLSLType() const
 {
     if (mType == RS_TYPE_SIGNED_32) {
@@ -282,10 +238,19 @@
         case 4: return String8("vec4");
         }
     }
+    if ((mType == RS_TYPE_MATRIX_4X4) && (mVectorSize == 1)) {
+        return String8("mat4");
+    }
+    if ((mType == RS_TYPE_MATRIX_3X3) && (mVectorSize == 1)) {
+        return String8("mat3");
+    }
+    if ((mType == RS_TYPE_MATRIX_2X2) && (mVectorSize == 1)) {
+        return String8("mat2");
+    }
     return String8();
 }
 
-static const char * gTypeStrings[] = {
+static const char * gTypeBasicStrings[] = {
     "NONE",
     "F16",
     "F32",
@@ -298,9 +263,16 @@
     "U16",
     "U32",
     "U64",
+    "BOOLEAN",
     "UP_565",
     "UP_5551",
     "UP_4444",
+    "MATRIX_4X4",
+    "MATRIX_3X3",
+    "MATRIX_2X2",
+};
+
+static const char * gTypeObjStrings[] = {
     "ELEMENT",
     "TYPE",
     "ALLOCATION",
@@ -330,8 +302,34 @@
 
 void Component::dumpLOGV(const char *prefix) const
 {
-    LOGV("%s   Component: %s, %s, vectorSize=%i, bits=%i",
-         prefix, gTypeStrings[mType], gKindStrings[mKind], mVectorSize, mBits);
+    if (mType >= RS_TYPE_ELEMENT) {
+        LOGV("%s   Component: %s, %s, vectorSize=%i, bits=%i",
+             prefix, gTypeObjStrings[mType - RS_TYPE_ELEMENT], gKindStrings[mKind], mVectorSize, mBits);
+    } else {
+        LOGV("%s   Component: %s, %s, vectorSize=%i, bits=%i",
+             prefix, gTypeBasicStrings[mType], gKindStrings[mKind], mVectorSize, mBits);
+    }
 }
 
+void Component::serialize(OStream *stream) const
+{
+    stream->addU8((uint8_t)mType);
+    stream->addU8((uint8_t)mKind);
+    stream->addU8((uint8_t)(mNormalized ? 1 : 0));
+    stream->addU32(mVectorSize);
+}
+
+void Component::loadFromStream(IStream *stream)
+{
+    mType = (RsDataType)stream->loadU8();
+    mKind = (RsDataKind)stream->loadU8();
+    uint8_t temp = stream->loadU8();
+    mNormalized = temp != 0;
+    mVectorSize = stream->loadU32();
+
+    set(mType, mKind, mNormalized, mVectorSize);
+}
+
+
+
 
diff --git a/rsComponent.h b/rsComponent.h
index 71de324..a775051 100644
--- a/rsComponent.h
+++ b/rsComponent.h
@@ -35,7 +35,6 @@
 
     uint32_t getGLType() const;
     uint32_t getGLFormat() const;
-    String8 getCType() const;
     String8 getGLSLType() const;
     void dumpLOGV(const char *prefix) const;
 
@@ -48,6 +47,12 @@
     bool getIsSigned() const {return mIsSigned;}
     uint32_t getBits() const {return mBits;}
 
+    // Helpers for reading / writing this class out
+    void serialize(OStream *stream) const;
+    void loadFromStream(IStream *stream);
+
+    bool isReference() const;
+
 protected:
     RsDataType mType;
     RsDataKind mKind;
diff --git a/rsContext.cpp b/rsContext.cpp
index 92c6619..944cd86 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -18,21 +18,24 @@
 #include "rsContext.h"
 #include "rsThreadIO.h"
 #include <ui/FramebufferNativeWindow.h>
+#include <ui/PixelFormat.h>
 #include <ui/EGLUtils.h>
 #include <ui/egl/android_natives.h>
 
 #include <sys/types.h>
 #include <sys/resource.h>
+#include <sched.h>
 
 #include <cutils/properties.h>
 
-#include <EGL/eglext.h>
 #include <GLES/gl.h>
 #include <GLES/glext.h>
 #include <GLES2/gl2.h>
 #include <GLES2/gl2ext.h>
 
 #include <cutils/sched_policy.h>
+#include <sys/syscall.h>
+#include <string.h>
 
 using namespace android;
 using namespace android::renderscript;
@@ -54,23 +57,65 @@
     }
 }
 
-void Context::initEGL(bool useGL2)
+void printEGLConfiguration(EGLDisplay dpy, EGLConfig config) {
+
+#define X(VAL) {VAL, #VAL}
+    struct {EGLint attribute; const char* name;} names[] = {
+    X(EGL_BUFFER_SIZE),
+    X(EGL_ALPHA_SIZE),
+    X(EGL_BLUE_SIZE),
+    X(EGL_GREEN_SIZE),
+    X(EGL_RED_SIZE),
+    X(EGL_DEPTH_SIZE),
+    X(EGL_STENCIL_SIZE),
+    X(EGL_CONFIG_CAVEAT),
+    X(EGL_CONFIG_ID),
+    X(EGL_LEVEL),
+    X(EGL_MAX_PBUFFER_HEIGHT),
+    X(EGL_MAX_PBUFFER_PIXELS),
+    X(EGL_MAX_PBUFFER_WIDTH),
+    X(EGL_NATIVE_RENDERABLE),
+    X(EGL_NATIVE_VISUAL_ID),
+    X(EGL_NATIVE_VISUAL_TYPE),
+    X(EGL_SAMPLES),
+    X(EGL_SAMPLE_BUFFERS),
+    X(EGL_SURFACE_TYPE),
+    X(EGL_TRANSPARENT_TYPE),
+    X(EGL_TRANSPARENT_RED_VALUE),
+    X(EGL_TRANSPARENT_GREEN_VALUE),
+    X(EGL_TRANSPARENT_BLUE_VALUE),
+    X(EGL_BIND_TO_TEXTURE_RGB),
+    X(EGL_BIND_TO_TEXTURE_RGBA),
+    X(EGL_MIN_SWAP_INTERVAL),
+    X(EGL_MAX_SWAP_INTERVAL),
+    X(EGL_LUMINANCE_SIZE),
+    X(EGL_ALPHA_MASK_SIZE),
+    X(EGL_COLOR_BUFFER_TYPE),
+    X(EGL_RENDERABLE_TYPE),
+    X(EGL_CONFORMANT),
+   };
+#undef X
+
+    for (size_t j = 0; j < sizeof(names) / sizeof(names[0]); j++) {
+        EGLint value = -1;
+        EGLint returnVal = eglGetConfigAttrib(dpy, config, names[j].attribute, &value);
+        EGLint error = eglGetError();
+        if (returnVal && error == EGL_SUCCESS) {
+            LOGV(" %s: %d (0x%x)", names[j].name, value, value);
+        }
+    }
+}
+
+
+void Context::initGLThread()
 {
+    pthread_mutex_lock(&gInitMutex);
+    LOGV("initGLThread start %p", this);
+
     mEGL.mNumConfigs = -1;
     EGLint configAttribs[128];
     EGLint *configAttribsPtr = configAttribs;
-    EGLint context_attribs2[] = { EGL_CONTEXT_CLIENT_VERSION, 2,
-            EGL_NONE, GL_NONE, EGL_NONE };
-
-#ifdef HAS_CONTEXT_PRIORITY
-#ifdef EGL_IMG_context_priority
-#warning "using EGL_IMG_context_priority"
-    if (mThreadPriority > 0) {
-        context_attribs2[2] = EGL_CONTEXT_PRIORITY_LEVEL_IMG;
-        context_attribs2[3] = EGL_CONTEXT_PRIORITY_LOW_IMG;
-    }
-#endif
-#endif
+    EGLint context_attribs2[] = { EGL_CONTEXT_CLIENT_VERSION, 2, EGL_NONE };
 
     memset(configAttribs, 0, sizeof(configAttribs));
 
@@ -78,15 +123,13 @@
     configAttribsPtr[1] = EGL_WINDOW_BIT;
     configAttribsPtr += 2;
 
-    if (useGL2) {
-        configAttribsPtr[0] = EGL_RENDERABLE_TYPE;
-        configAttribsPtr[1] = EGL_OPENGL_ES2_BIT;
-        configAttribsPtr += 2;
-    }
+    configAttribsPtr[0] = EGL_RENDERABLE_TYPE;
+    configAttribsPtr[1] = EGL_OPENGL_ES2_BIT;
+    configAttribsPtr += 2;
 
-    if (mUseDepth) {
+    if (mUserSurfaceConfig.depthMin > 0) {
         configAttribsPtr[0] = EGL_DEPTH_SIZE;
-        configAttribsPtr[1] = 16;
+        configAttribsPtr[1] = mUserSurfaceConfig.depthMin;
         configAttribsPtr += 2;
     }
 
@@ -99,36 +142,101 @@
     configAttribsPtr[0] = EGL_NONE;
     rsAssert(configAttribsPtr < (configAttribs + (sizeof(configAttribs) / sizeof(EGLint))));
 
-    LOGV("initEGL start");
+    LOGV("%p initEGL start", this);
     mEGL.mDisplay = eglGetDisplay(EGL_DEFAULT_DISPLAY);
     checkEglError("eglGetDisplay");
 
     eglInitialize(mEGL.mDisplay, &mEGL.mMajorVersion, &mEGL.mMinorVersion);
     checkEglError("eglInitialize");
 
-    status_t err = EGLUtils::selectConfigForNativeWindow(mEGL.mDisplay, configAttribs, mWndSurface, &mEGL.mConfig);
+#if 1
+    PixelFormat pf = PIXEL_FORMAT_RGBA_8888;
+    if (mUserSurfaceConfig.alphaMin == 0) {
+        pf = PIXEL_FORMAT_RGBX_8888;
+    }
+
+    status_t err = EGLUtils::selectConfigForPixelFormat(mEGL.mDisplay, configAttribs, pf, &mEGL.mConfig);
     if (err) {
-       LOGE("couldn't find an EGLConfig matching the screen format\n");
+       LOGE("%p, couldn't find an EGLConfig matching the screen format\n", this);
     }
-    //eglChooseConfig(mEGL.mDisplay, configAttribs, &mEGL.mConfig, 1, &mEGL.mNumConfigs);
-
-
-    if (useGL2) {
-        mEGL.mContext = eglCreateContext(mEGL.mDisplay, mEGL.mConfig, EGL_NO_CONTEXT, context_attribs2);
-    } else {
-        mEGL.mContext = eglCreateContext(mEGL.mDisplay, mEGL.mConfig, EGL_NO_CONTEXT, NULL);
+    if (props.mLogVisual) {
+        printEGLConfiguration(mEGL.mDisplay, mEGL.mConfig);
     }
+#else
+    eglChooseConfig(mEGL.mDisplay, configAttribs, &mEGL.mConfig, 1, &mEGL.mNumConfigs);
+#endif
+
+    mEGL.mContext = eglCreateContext(mEGL.mDisplay, mEGL.mConfig, EGL_NO_CONTEXT, context_attribs2);
     checkEglError("eglCreateContext");
     if (mEGL.mContext == EGL_NO_CONTEXT) {
-        LOGE("eglCreateContext returned EGL_NO_CONTEXT");
+        LOGE("%p, eglCreateContext returned EGL_NO_CONTEXT", this);
     }
     gGLContextCount++;
+
+
+    EGLint pbuffer_attribs[] = { EGL_WIDTH, 1, EGL_HEIGHT, 1, EGL_NONE };
+    mEGL.mSurfaceDefault = eglCreatePbufferSurface(mEGL.mDisplay, mEGL.mConfig, pbuffer_attribs);
+    checkEglError("eglCreatePbufferSurface");
+    if (mEGL.mSurfaceDefault == EGL_NO_SURFACE) {
+        LOGE("eglCreatePbufferSurface returned EGL_NO_SURFACE");
+    }
+
+    EGLBoolean ret = eglMakeCurrent(mEGL.mDisplay, mEGL.mSurfaceDefault, mEGL.mSurfaceDefault, mEGL.mContext);
+    checkEglError("eglMakeCurrent", ret);
+
+    mGL.mVersion = glGetString(GL_VERSION);
+    mGL.mVendor = glGetString(GL_VENDOR);
+    mGL.mRenderer = glGetString(GL_RENDERER);
+    mGL.mExtensions = glGetString(GL_EXTENSIONS);
+
+    //LOGV("EGL Version %i %i", mEGL.mMajorVersion, mEGL.mMinorVersion);
+    LOGV("GL Version %s", mGL.mVersion);
+    //LOGV("GL Vendor %s", mGL.mVendor);
+    LOGV("GL Renderer %s", mGL.mRenderer);
+    //LOGV("GL Extensions %s", mGL.mExtensions);
+
+    const char *verptr = NULL;
+    if (strlen((const char *)mGL.mVersion) > 9) {
+        if (!memcmp(mGL.mVersion, "OpenGL ES-CM", 12)) {
+            verptr = (const char *)mGL.mVersion + 12;
+        }
+        if (!memcmp(mGL.mVersion, "OpenGL ES ", 10)) {
+            verptr = (const char *)mGL.mVersion + 9;
+        }
+    }
+
+    if (!verptr) {
+        LOGE("Error, OpenGL ES Lite not supported");
+    } else {
+        sscanf(verptr, " %i.%i", &mGL.mMajorVersion, &mGL.mMinorVersion);
+    }
+
+    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &mGL.mMaxVertexAttribs);
+    glGetIntegerv(GL_MAX_VERTEX_UNIFORM_VECTORS, &mGL.mMaxVertexUniformVectors);
+    glGetIntegerv(GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, &mGL.mMaxVertexTextureUnits);
+
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &mGL.mMaxVaryingVectors);
+    glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &mGL.mMaxTextureImageUnits);
+
+    glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &mGL.mMaxFragmentTextureImageUnits);
+    glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_VECTORS, &mGL.mMaxFragmentUniformVectors);
+
+    mGL.OES_texture_npot = NULL != strstr((const char *)mGL.mExtensions, "GL_OES_texture_npot");
+    mGL.EXT_texture_max_aniso = 1.0f;
+    bool hasAniso = NULL != strstr((const char *)mGL.mExtensions, "GL_EXT_texture_filter_anisotropic");
+    if(hasAniso) {
+        glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT, &mGL.EXT_texture_max_aniso);
+    }
+
+    LOGV("initGLThread end %p", this);
+    pthread_mutex_unlock(&gInitMutex);
 }
 
 void Context::deinitEGL()
 {
-    LOGV("deinitEGL");
-    setSurface(0, 0, NULL);
+    LOGV("%p, deinitEGL", this);
+
+    eglMakeCurrent(mEGL.mDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, mEGL.mContext);
     eglDestroyContext(mEGL.mDisplay, mEGL.mContext);
     checkEglError("eglDestroyContext");
 
@@ -139,19 +247,21 @@
 }
 
 
-uint32_t Context::runScript(Script *s, uint32_t launchID)
+uint32_t Context::runScript(Script *s)
 {
     ObjectBaseRef<ProgramFragment> frag(mFragment);
     ObjectBaseRef<ProgramVertex> vtx(mVertex);
-    ObjectBaseRef<ProgramFragmentStore> store(mFragmentStore);
+    ObjectBaseRef<ProgramStore> store(mFragmentStore);
     ObjectBaseRef<ProgramRaster> raster(mRaster);
+    ObjectBaseRef<Font> font(mFont);
 
-    uint32_t ret = s->run(this, launchID);
+    uint32_t ret = s->run(this);
 
     mFragment.set(frag);
     mVertex.set(vtx);
     mFragmentStore.set(store);
     mRaster.set(raster);
+    mFont.set(font);
     return ret;
 }
 
@@ -159,42 +269,19 @@
 {
     GLenum err = glGetError();
     if (err != GL_NO_ERROR) {
-        LOGE("GL Error, 0x%x, from %s", err, msg);
+        LOGE("%p, GL Error, 0x%x, from %s", this, err, msg);
     }
 }
 
 uint32_t Context::runRootScript()
 {
-    timerSet(RS_TIMER_CLEAR_SWAP);
-    rsAssert(mRootScript->mEnviroment.mIsRoot);
-
-    eglQuerySurface(mEGL.mDisplay, mEGL.mSurface, EGL_WIDTH, &mEGL.mWidth);
-    eglQuerySurface(mEGL.mDisplay, mEGL.mSurface, EGL_HEIGHT, &mEGL.mHeight);
-    glViewport(0, 0, mEGL.mWidth, mEGL.mHeight);
-    glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
-
-    glClearColor(mRootScript->mEnviroment.mClearColor[0],
-                 mRootScript->mEnviroment.mClearColor[1],
-                 mRootScript->mEnviroment.mClearColor[2],
-                 mRootScript->mEnviroment.mClearColor[3]);
-    if (mUseDepth) {
-        glDepthMask(GL_TRUE);
-        glClearDepthf(mRootScript->mEnviroment.mClearDepth);
-        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
-    } else {
-        glClear(GL_COLOR_BUFFER_BIT);
-    }
+    glViewport(0, 0, mWidth, mHeight);
 
     timerSet(RS_TIMER_SCRIPT);
     mStateFragmentStore.mLast.clear();
-    uint32_t ret = runScript(mRootScript.get(), 0);
+    uint32_t ret = runScript(mRootScript.get());
 
     checkError("runRootScript");
-    if (mError != RS_ERROR_NONE) {
-        // If we have an error condition we stop rendering until
-        // somthing changes that might fix it.
-        ret = 0;
-    }
     return ret;
 }
 
@@ -218,6 +305,9 @@
     mTimeFrame = mTimeLast;
     mTimeLastFrame = mTimeLast;
     mTimerActive = RS_TIMER_INTERNAL;
+    mAverageFPSFrameCount = 0;
+    mAverageFPSStartTime = mTimeLast;
+    mAverageFPS = 0;
     timerReset();
 }
 
@@ -225,6 +315,16 @@
 {
     mTimeLastFrame = mTimeFrame;
     mTimeFrame = getTime();
+    // Update average fps
+    const uint64_t averageFramerateInterval = 1000 * 1000000;
+    mAverageFPSFrameCount ++;
+    uint64_t inverval = mTimeFrame - mAverageFPSStartTime;
+    if(inverval >= averageFramerateInterval) {
+        inverval = inverval / 1000000;
+        mAverageFPS = (mAverageFPSFrameCount * 1000) / inverval;
+        mAverageFPSFrameCount = 0;
+        mAverageFPSStartTime = mTimeFrame;
+    }
 }
 
 void Context::timerSet(Timers tm)
@@ -248,37 +348,34 @@
 
 
     if (props.mLogTimes) {
-        LOGV("RS: Frame (%i),   Script %2.1f (%i),  Clear & Swap %2.1f (%i),  Idle %2.1f (%lli),  Internal %2.1f (%lli)",
+        LOGV("RS: Frame (%i),   Script %2.1f%% (%i),  Swap %2.1f%% (%i),  Idle %2.1f%% (%lli),  Internal %2.1f%% (%lli), Avg fps: %u",
              mTimeMSLastFrame,
              100.0 * mTimers[RS_TIMER_SCRIPT] / total, mTimeMSLastScript,
              100.0 * mTimers[RS_TIMER_CLEAR_SWAP] / total, mTimeMSLastSwap,
              100.0 * mTimers[RS_TIMER_IDLE] / total, mTimers[RS_TIMER_IDLE] / 1000000,
-             100.0 * mTimers[RS_TIMER_INTERNAL] / total, mTimers[RS_TIMER_INTERNAL] / 1000000);
+             100.0 * mTimers[RS_TIMER_INTERNAL] / total, mTimers[RS_TIMER_INTERNAL] / 1000000,
+             mAverageFPS);
     }
 }
 
 bool Context::setupCheck()
 {
-    if (checkVersion2_0()) {
-        if (!mShaderCache.lookup(this, mVertex.get(), mFragment.get())) {
-            LOGE("Context::setupCheck() 1 fail");
-            return false;
-        }
-
-        mFragmentStore->setupGL2(this, &mStateFragmentStore);
-        mFragment->setupGL2(this, &mStateFragment, &mShaderCache);
-        mRaster->setupGL2(this, &mStateRaster);
-        mVertex->setupGL2(this, &mStateVertex, &mShaderCache);
-
-    } else {
-        mFragmentStore->setupGL(this, &mStateFragmentStore);
-        mFragment->setupGL(this, &mStateFragment);
-        mRaster->setupGL(this, &mStateRaster);
-        mVertex->setupGL(this, &mStateVertex);
+    if (!mShaderCache.lookup(this, mVertex.get(), mFragment.get())) {
+        LOGE("Context::setupCheck() 1 fail");
+        return false;
     }
+
+    mFragmentStore->setupGL2(this, &mStateFragmentStore);
+    mFragment->setupGL2(this, &mStateFragment, &mShaderCache);
+    mRaster->setupGL2(this, &mStateRaster);
+    mVertex->setupGL2(this, &mStateVertex, &mShaderCache);
     return true;
 }
 
+void Context::setupProgramStore() {
+    mFragmentStore->setupGL2(this, &mStateFragmentStore);
+}
+
 static bool getProp(const char *str)
 {
     char buf[PROPERTY_VALUE_MAX];
@@ -286,6 +383,24 @@
     return 0 != strcmp(buf, "0");
 }
 
+void Context::displayDebugStats()
+{
+    char buffer[128];
+    sprintf(buffer, "Avg fps %u, Frame %i ms, Script %i ms", mAverageFPS, mTimeMSLastFrame, mTimeMSLastScript);
+    float oldR, oldG, oldB, oldA;
+    mStateFont.getFontColor(&oldR, &oldG, &oldB, &oldA);
+    uint32_t bufferLen = strlen(buffer);
+
+    float shadowCol = 0.1f;
+    mStateFont.setFontColor(shadowCol, shadowCol, shadowCol, 1.0f);
+    mStateFont.renderText(buffer, bufferLen, 5, getHeight() - 6);
+
+    mStateFont.setFontColor(1.0f, 0.7f, 0.0f, 1.0f);
+    mStateFont.renderText(buffer, bufferLen, 4, getHeight() - 7);
+
+    mStateFont.setFontColor(oldR, oldG, oldB, oldA);
+}
+
 void * Context::threadProc(void *vrsc)
 {
      Context *rsc = static_cast<Context *>(vrsc);
@@ -298,28 +413,36 @@
      rsc->props.mLogScripts = getProp("debug.rs.script");
      rsc->props.mLogObjects = getProp("debug.rs.object");
      rsc->props.mLogShaders = getProp("debug.rs.shader");
+     rsc->props.mLogShadersAttr = getProp("debug.rs.shader.attributes");
+     rsc->props.mLogShadersUniforms = getProp("debug.rs.shader.uniforms");
+     rsc->props.mLogVisual = getProp("debug.rs.visual");
 
-     ScriptTLSStruct *tlsStruct = new ScriptTLSStruct;
-     if (!tlsStruct) {
+     rsc->mTlsStruct = new ScriptTLSStruct;
+     if (!rsc->mTlsStruct) {
          LOGE("Error allocating tls storage");
          return NULL;
      }
-     tlsStruct->mContext = rsc;
-     tlsStruct->mScript = NULL;
-     int status = pthread_setspecific(rsc->gThreadTLSKey, tlsStruct);
+     rsc->mTlsStruct->mContext = rsc;
+     rsc->mTlsStruct->mScript = NULL;
+     int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
      if (status) {
          LOGE("pthread_setspecific %i", status);
      }
 
+     rsc->initGLThread();
+
+     rsc->mScriptC.init(rsc);
      if (rsc->mIsGraphicsContext) {
-         rsc->mStateRaster.init(rsc, rsc->mEGL.mWidth, rsc->mEGL.mHeight);
+         rsc->mStateRaster.init(rsc);
          rsc->setRaster(NULL);
-         rsc->mStateVertex.init(rsc, rsc->mEGL.mWidth, rsc->mEGL.mHeight);
+         rsc->mStateVertex.init(rsc);
          rsc->setVertex(NULL);
-         rsc->mStateFragment.init(rsc, rsc->mEGL.mWidth, rsc->mEGL.mHeight);
+         rsc->mStateFragment.init(rsc);
          rsc->setFragment(NULL);
-         rsc->mStateFragmentStore.init(rsc, rsc->mEGL.mWidth, rsc->mEGL.mHeight);
+         rsc->mStateFragmentStore.init(rsc);
          rsc->setFragmentStore(NULL);
+         rsc->mStateFont.init(rsc);
+         rsc->setFont(NULL);
          rsc->mStateVertexArray.init(rsc);
      }
 
@@ -333,6 +456,11 @@
          uint32_t targetTime = 0;
          if (mDraw && rsc->mIsGraphicsContext) {
              targetTime = rsc->runRootScript();
+
+             if(rsc->props.mLogVisual) {
+                 rsc->displayDebugStats();
+             }
+
              mDraw = targetTime && !rsc->mPaused;
              rsc->timerSet(RS_TIMER_CLEAR_SWAP);
              eglSwapBuffers(rsc->mEGL.mDisplay, rsc->mEGL.mSurface);
@@ -341,9 +469,6 @@
              rsc->timerPrint();
              rsc->timerReset();
          }
-         if (rsc->mObjDestroy.mNeedToEmpty) {
-             rsc->objDestroyOOBRun();
-         }
          if (rsc->mThreadPriority > 0 && targetTime) {
              int32_t t = (targetTime - (int32_t)(rsc->mTimeMSLastScript + rsc->mTimeMSLastSwap)) * 1000;
              if (t > 0) {
@@ -352,33 +477,85 @@
          }
      }
 
-     LOGV("RS Thread exiting");
+     LOGV("%p, RS Thread exiting", rsc);
      if (rsc->mIsGraphicsContext) {
          rsc->mRaster.clear();
          rsc->mFragment.clear();
          rsc->mVertex.clear();
          rsc->mFragmentStore.clear();
+         rsc->mFont.clear();
          rsc->mRootScript.clear();
          rsc->mStateRaster.deinit(rsc);
          rsc->mStateVertex.deinit(rsc);
          rsc->mStateFragment.deinit(rsc);
          rsc->mStateFragmentStore.deinit(rsc);
+         rsc->mStateFont.deinit(rsc);
      }
      ObjectBase::zeroAllUserRef(rsc);
 
-     rsc->mObjDestroy.mNeedToEmpty = true;
-     rsc->objDestroyOOBRun();
-
      if (rsc->mIsGraphicsContext) {
          pthread_mutex_lock(&gInitMutex);
          rsc->deinitEGL();
          pthread_mutex_unlock(&gInitMutex);
      }
+     delete rsc->mTlsStruct;
 
-     LOGV("RS Thread exited");
+     LOGV("%p, RS Thread exited", rsc);
      return NULL;
 }
 
+void * Context::helperThreadProc(void *vrsc)
+{
+     Context *rsc = static_cast<Context *>(vrsc);
+     uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount);
+
+     LOGV("RS helperThread starting %p idx=%i", rsc, idx);
+
+     rsc->mWorkers.mLaunchSignals[idx].init();
+     rsc->mWorkers.mNativeThreadId[idx] = gettid();
+
+#if 0
+     typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t;
+     cpu_set_t cpuset;
+     memset(&cpuset, 0, sizeof(cpuset));
+     cpuset.bits[idx / 64] |= 1ULL << (idx % 64);
+     int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx],
+               sizeof(cpuset), &cpuset);
+     LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret));
+#endif
+
+     setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority);
+     int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct);
+     if (status) {
+         LOGE("pthread_setspecific %i", status);
+     }
+
+     while(rsc->mRunning) {
+         rsc->mWorkers.mLaunchSignals[idx].wait();
+         if (rsc->mWorkers.mLaunchCallback) {
+            rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx);
+         }
+         android_atomic_dec(&rsc->mWorkers.mRunningCount);
+         rsc->mWorkers.mCompleteSignal.set();
+     }
+
+     LOGV("RS helperThread exiting %p idx=%i", rsc, idx);
+     return NULL;
+}
+
+void Context::launchThreads(WorkerCallback_t cbk, void *data)
+{
+    mWorkers.mLaunchData = data;
+    mWorkers.mLaunchCallback = cbk;
+    mWorkers.mRunningCount = (int)mWorkers.mCount;
+    for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) {
+        mWorkers.mLaunchSignals[ct].set();
+    }
+    while(mWorkers.mRunningCount) {
+        mWorkers.mCompleteSignal.wait();
+    }
+}
+
 void Context::setPriority(int32_t p)
 {
     // Note: If we put this in the proper "background" policy
@@ -395,11 +572,14 @@
         // success; reset the priority as well
     }
 #else
-        setpriority(PRIO_PROCESS, mNativeThreadId, p);
+    setpriority(PRIO_PROCESS, mNativeThreadId, p);
+    for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
+        setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p);
+    }
 #endif
 }
 
-Context::Context(Device *dev, bool isGraphics, bool useDepth)
+Context::Context(Device *dev, const RsSurfaceConfig *sc)
 {
     pthread_mutex_lock(&gInitMutex);
 
@@ -407,15 +587,19 @@
     mDev = dev;
     mRunning = false;
     mExit = false;
-    mUseDepth = useDepth;
     mPaused = false;
     mObjHead = NULL;
     mError = RS_ERROR_NONE;
     mErrorMsg = NULL;
+    if (sc) {
+        mUserSurfaceConfig = *sc;
+    } else {
+        memset(&mUserSurfaceConfig, 0, sizeof(mUserSurfaceConfig));
+    }
 
     memset(&mEGL, 0, sizeof(mEGL));
     memset(&mGL, 0, sizeof(mGL));
-    mIsGraphicsContext = isGraphics;
+    mIsGraphicsContext = sc != NULL;
 
     int status;
     pthread_attr_t threadAttr;
@@ -429,6 +613,7 @@
         }
     }
     gThreadTLSKeyCount++;
+
     pthread_mutex_unlock(&gInitMutex);
 
     // Global init done at this point.
@@ -441,20 +626,38 @@
 
     mWndSurface = NULL;
 
-    objDestroyOOBInit();
     timerInit();
     timerSet(RS_TIMER_INTERNAL);
 
-    LOGV("RS Launching thread");
+    int cpu = sysconf(_SC_NPROCESSORS_ONLN);
+    LOGV("RS Launching thread(s), reported CPU count %i", cpu);
+    if (cpu < 2) cpu = 0;
+
+    mWorkers.mCount = (uint32_t)cpu;
+    mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t));
+    mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t));
+    mWorkers.mLaunchSignals = new Signal[mWorkers.mCount];
+    mWorkers.mLaunchCallback = NULL;
     status = pthread_create(&mThreadId, &threadAttr, threadProc, this);
     if (status) {
         LOGE("Failed to start rs context thread.");
+        return;
     }
-
     while(!mRunning) {
         usleep(100);
     }
 
+    mWorkers.mCompleteSignal.init();
+    mWorkers.mRunningCount = 0;
+    mWorkers.mLaunchCount = 0;
+    for (uint32_t ct=0; ct < mWorkers.mCount; ct++) {
+        status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this);
+        if (status) {
+            mWorkers.mCount = ct;
+            LOGE("Created fewer than expected number of RS threads.");
+            break;
+        }
+    }
     pthread_attr_destroy(&threadAttr);
 }
 
@@ -467,8 +670,6 @@
 
     mIO.shutdown();
     int status = pthread_join(mThreadId, &res);
-    mObjDestroy.mNeedToEmpty = true;
-    objDestroyOOBRun();
 
     // Global structure cleanup.
     pthread_mutex_lock(&gInitMutex);
@@ -481,8 +682,6 @@
         mDev = NULL;
     }
     pthread_mutex_unlock(&gInitMutex);
-
-    objDestroyOOBDestroy();
 }
 
 void Context::setSurface(uint32_t w, uint32_t h, ANativeWindow *sur)
@@ -491,28 +690,21 @@
 
     EGLBoolean ret;
     if (mEGL.mSurface != NULL) {
-        ret = eglMakeCurrent(mEGL.mDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
+        ret = eglMakeCurrent(mEGL.mDisplay, mEGL.mSurfaceDefault, mEGL.mSurfaceDefault, mEGL.mContext);
         checkEglError("eglMakeCurrent", ret);
 
         ret = eglDestroySurface(mEGL.mDisplay, mEGL.mSurface);
         checkEglError("eglDestroySurface", ret);
 
         mEGL.mSurface = NULL;
-        mEGL.mWidth = 0;
-        mEGL.mHeight = 0;
-        mWidth = 0;
-        mHeight = 0;
+        mWidth = 1;
+        mHeight = 1;
     }
 
     mWndSurface = sur;
     if (mWndSurface != NULL) {
-        bool first = false;
-        if (!mEGL.mContext) {
-            first = true;
-            pthread_mutex_lock(&gInitMutex);
-            initEGL(true);
-            pthread_mutex_unlock(&gInitMutex);
-        }
+        mWidth = w;
+        mHeight = h;
 
         mEGL.mSurface = eglCreateWindowSurface(mEGL.mDisplay, mEGL.mConfig, mWndSurface, NULL);
         checkEglError("eglCreateWindowSurface");
@@ -523,57 +715,7 @@
         ret = eglMakeCurrent(mEGL.mDisplay, mEGL.mSurface, mEGL.mSurface, mEGL.mContext);
         checkEglError("eglMakeCurrent", ret);
 
-        eglQuerySurface(mEGL.mDisplay, mEGL.mSurface, EGL_WIDTH, &mEGL.mWidth);
-        eglQuerySurface(mEGL.mDisplay, mEGL.mSurface, EGL_HEIGHT, &mEGL.mHeight);
-        mWidth = w;
-        mHeight = h;
-        mStateVertex.updateSize(this, w, h);
-
-        if ((int)mWidth != mEGL.mWidth || (int)mHeight != mEGL.mHeight) {
-            LOGE("EGL/Surface mismatch  EGL (%i x %i)  SF (%i x %i)", mEGL.mWidth, mEGL.mHeight, mWidth, mHeight);
-        }
-
-        if (first) {
-            mGL.mVersion = glGetString(GL_VERSION);
-            mGL.mVendor = glGetString(GL_VENDOR);
-            mGL.mRenderer = glGetString(GL_RENDERER);
-            mGL.mExtensions = glGetString(GL_EXTENSIONS);
-
-            //LOGV("EGL Version %i %i", mEGL.mMajorVersion, mEGL.mMinorVersion);
-            LOGV("GL Version %s", mGL.mVersion);
-            //LOGV("GL Vendor %s", mGL.mVendor);
-            LOGV("GL Renderer %s", mGL.mRenderer);
-            //LOGV("GL Extensions %s", mGL.mExtensions);
-
-            const char *verptr = NULL;
-            if (strlen((const char *)mGL.mVersion) > 9) {
-                if (!memcmp(mGL.mVersion, "OpenGL ES-CM", 12)) {
-                    verptr = (const char *)mGL.mVersion + 12;
-                }
-                if (!memcmp(mGL.mVersion, "OpenGL ES ", 10)) {
-                    verptr = (const char *)mGL.mVersion + 9;
-                }
-            }
-
-            if (!verptr) {
-                LOGE("Error, OpenGL ES Lite not supported");
-            } else {
-                sscanf(verptr, " %i.%i", &mGL.mMajorVersion, &mGL.mMinorVersion);
-            }
-
-            glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &mGL.mMaxVertexAttribs);
-            glGetIntegerv(GL_MAX_VERTEX_UNIFORM_VECTORS, &mGL.mMaxVertexUniformVectors);
-            glGetIntegerv(GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, &mGL.mMaxVertexTextureUnits);
-
-            glGetIntegerv(GL_MAX_VARYING_VECTORS, &mGL.mMaxVaryingVectors);
-            glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &mGL.mMaxTextureImageUnits);
-
-            glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &mGL.mMaxFragmentTextureImageUnits);
-            glGetIntegerv(GL_MAX_FRAGMENT_UNIFORM_VECTORS, &mGL.mMaxFragmentUniformVectors);
-
-            mGL.OES_texture_npot = NULL != strstr((const char *)mGL.mExtensions, "GL_OES_texture_npot");
-        }
-
+        mStateVertex.updateSize(this);
     }
 }
 
@@ -595,7 +737,7 @@
     mRootScript.set(s);
 }
 
-void Context::setFragmentStore(ProgramFragmentStore *pfs)
+void Context::setFragmentStore(ProgramStore *pfs)
 {
     rsAssert(mIsGraphicsContext);
     if (pfs == NULL) {
@@ -635,6 +777,16 @@
     }
 }
 
+void Context::setFont(Font *f)
+{
+    rsAssert(mIsGraphicsContext);
+    if (f == NULL) {
+        mFont.set(mStateFont.mDefault);
+    } else {
+        mFont.set(f);
+    }
+}
+
 void Context::assignName(ObjectBase *obj, const char *name, uint32_t len)
 {
     rsAssert(!obj->getName());
@@ -652,90 +804,13 @@
     }
 }
 
-ObjectBase * Context::lookupName(const char *name) const
-{
-    for(size_t ct=0; ct < mNames.size(); ct++) {
-        if (!strcmp(name, mNames[ct]->getName())) {
-            return mNames[ct];
-        }
-    }
-    return NULL;
-}
-
-void Context::appendNameDefines(String8 *str) const
-{
-    char buf[256];
-    for (size_t ct=0; ct < mNames.size(); ct++) {
-        str->append("#define NAMED_");
-        str->append(mNames[ct]->getName());
-        str->append(" ");
-        sprintf(buf, "%i\n", (int)mNames[ct]);
-        str->append(buf);
-    }
-}
-
-bool Context::objDestroyOOBInit()
-{
-    int status = pthread_mutex_init(&mObjDestroy.mMutex, NULL);
-    if (status) {
-        LOGE("Context::ObjDestroyOOBInit mutex init failure");
-        return false;
-    }
-    return true;
-}
-
-void Context::objDestroyOOBRun()
-{
-    if (mObjDestroy.mNeedToEmpty) {
-        int status = pthread_mutex_lock(&mObjDestroy.mMutex);
-        if (status) {
-            LOGE("Context::ObjDestroyOOBRun: error %i locking for OOBRun.", status);
-            return;
-        }
-
-        for (size_t ct = 0; ct < mObjDestroy.mDestroyList.size(); ct++) {
-            mObjDestroy.mDestroyList[ct]->decUserRef();
-        }
-        mObjDestroy.mDestroyList.clear();
-        mObjDestroy.mNeedToEmpty = false;
-
-        status = pthread_mutex_unlock(&mObjDestroy.mMutex);
-        if (status) {
-            LOGE("Context::ObjDestroyOOBRun: error %i unlocking for set condition.", status);
-        }
-    }
-}
-
-void Context::objDestroyOOBDestroy()
-{
-    rsAssert(!mObjDestroy.mNeedToEmpty);
-    pthread_mutex_destroy(&mObjDestroy.mMutex);
-}
-
-void Context::objDestroyAdd(ObjectBase *obj)
-{
-    int status = pthread_mutex_lock(&mObjDestroy.mMutex);
-    if (status) {
-        LOGE("Context::ObjDestroyOOBRun: error %i locking for OOBRun.", status);
-        return;
-    }
-
-    mObjDestroy.mNeedToEmpty = true;
-    mObjDestroy.mDestroyList.add(obj);
-
-    status = pthread_mutex_unlock(&mObjDestroy.mMutex);
-    if (status) {
-        LOGE("Context::ObjDestroyOOBRun: error %i unlocking for set condition.", status);
-    }
-}
-
 uint32_t Context::getMessageToClient(void *data, size_t *receiveLen, size_t bufferLen, bool wait)
 {
     //LOGE("getMessageToClient %i %i", bufferLen, wait);
+    *receiveLen = 0;
     if (!wait) {
         if (mIO.mToClient.isEmpty()) {
             // No message to get and not going to wait for one.
-            receiveLen = 0;
             return 0;
         }
     }
@@ -763,15 +838,19 @@
         return false;
     }
     if (!waitForSpace) {
-        if (mIO.mToClient.getFreeSpace() < len) {
+        if (!mIO.mToClient.makeSpaceNonBlocking(len + 8)) {
             // Not enough room, and not waiting.
             return false;
         }
     }
     //LOGE("sendMessageToClient 2");
-    void *p = mIO.mToClient.reserve(len);
-    memcpy(p, data, len);
-    mIO.mToClient.commit(cmdID, len);
+    if (len > 0) {
+        void *p = mIO.mToClient.reserve(len);
+        memcpy(p, data, len);
+        mIO.mToClient.commit(cmdID, len);
+    } else {
+        mIO.mToClient.commit(cmdID, 0);
+    }
     //LOGE("sendMessageToClient 3");
     return true;
 }
@@ -811,15 +890,14 @@
     LOGE("RS Context debug");
 
     LOGE(" EGL ver %i %i", mEGL.mMajorVersion, mEGL.mMinorVersion);
-    LOGE(" EGL context %p  surface %p,  w=%i h=%i  Display=%p", mEGL.mContext,
-         mEGL.mSurface, mEGL.mWidth, mEGL.mHeight, mEGL.mDisplay);
+    LOGE(" EGL context %p  surface %p,  Display=%p", mEGL.mContext, mEGL.mSurface, mEGL.mDisplay);
     LOGE(" GL vendor: %s", mGL.mVendor);
     LOGE(" GL renderer: %s", mGL.mRenderer);
     LOGE(" GL Version: %s", mGL.mVersion);
     LOGE(" GL Extensions: %s", mGL.mExtensions);
     LOGE(" GL int Versions %i %i", mGL.mMajorVersion, mGL.mMinorVersion);
     LOGE(" RS width %i, height %i", mWidth, mHeight);
-    LOGE(" RS running %i, exit %i, useDepth %i, paused %i", mRunning, mExit, mUseDepth, mPaused);
+    LOGE(" RS running %i, exit %i, paused %i", mRunning, mExit, mPaused);
     LOGE(" RS pThreadID %li, nativeThreadID %i", mThreadId, mNativeThreadId);
 
     LOGV("MAX Textures %i, %i  %i", mGL.mMaxVertexTextureUnits, mGL.mMaxFragmentTextureImageUnits, mGL.mMaxTextureImageUnits);
@@ -834,6 +912,9 @@
 namespace android {
 namespace renderscript {
 
+void rsi_ContextFinish(Context *rsc)
+{
+}
 
 void rsi_ContextBindRootScript(Context *rsc, RsScript vs)
 {
@@ -853,9 +934,9 @@
     s->bindToContext(&rsc->mStateSampler, slot);
 }
 
-void rsi_ContextBindProgramFragmentStore(Context *rsc, RsProgramFragmentStore vpfs)
+void rsi_ContextBindProgramStore(Context *rsc, RsProgramStore vpfs)
 {
-    ProgramFragmentStore *pfs = static_cast<ProgramFragmentStore *>(vpfs);
+    ProgramStore *pfs = static_cast<ProgramStore *>(vpfs);
     rsc->setFragmentStore(pfs);
 }
 
@@ -877,15 +958,27 @@
     rsc->setVertex(pv);
 }
 
+void rsi_ContextBindFont(Context *rsc, RsFont vfont)
+{
+    Font *font = static_cast<Font *>(vfont);
+    rsc->setFont(font);
+}
+
 void rsi_AssignName(Context *rsc, void * obj, const char *name, uint32_t len)
 {
     ObjectBase *ob = static_cast<ObjectBase *>(obj);
     rsc->assignName(ob, name, len);
 }
 
-void rsi_ObjDestroy(Context *rsc, void *obj)
+void rsi_GetName(Context *rsc, void * obj, const char **name)
 {
     ObjectBase *ob = static_cast<ObjectBase *>(obj);
+    (*name) = ob->getName();
+}
+
+void rsi_ObjDestroy(Context *rsc, void *optr)
+{
+    ObjectBase *ob = static_cast<ObjectBase *>(optr);
     rsc->removeName(ob);
     ob->decUserRef();
 }
@@ -932,15 +1025,16 @@
 {
     LOGV("rsContextCreate %p", vdev);
     Device * dev = static_cast<Device *>(vdev);
-    Context *rsc = new Context(dev, false, false);
+    Context *rsc = new Context(dev, NULL);
     return rsc;
 }
 
-RsContext rsContextCreateGL(RsDevice vdev, uint32_t version, bool useDepth)
+RsContext rsContextCreateGL(RsDevice vdev, uint32_t version, RsSurfaceConfig sc)
 {
-    LOGV("rsContextCreateGL %p, %i", vdev, useDepth);
+    LOGV("rsContextCreateGL %p", vdev);
     Device * dev = static_cast<Device *>(vdev);
-    Context *rsc = new Context(dev, true, useDepth);
+    Context *rsc = new Context(dev, &sc);
+    LOGV("rsContextCreateGL ret %p ", rsc);
     return rsc;
 }
 
@@ -950,12 +1044,6 @@
     delete rsc;
 }
 
-void rsObjDestroyOOB(RsContext vrsc, void *obj)
-{
-    Context * rsc = static_cast<Context *>(vrsc);
-    rsc->objDestroyAdd(static_cast<ObjectBase *>(obj));
-}
-
 uint32_t rsContextGetMessage(RsContext vrsc, void *data, size_t *receiveLen, size_t bufferLen, bool wait)
 {
     Context * rsc = static_cast<Context *>(vrsc);
diff --git a/rsContext.h b/rsContext.h
index 709730e..dbe2c79 100644
--- a/rsContext.h
+++ b/rsContext.h
@@ -18,21 +18,21 @@
 #define ANDROID_RS_CONTEXT_H
 
 #include "rsUtils.h"
+#include "rsMutex.h"
 
 #include "rsThreadIO.h"
 #include "rsType.h"
 #include "rsMatrix.h"
 #include "rsAllocation.h"
-#include "rsSimpleMesh.h"
 #include "rsMesh.h"
 #include "rsDevice.h"
 #include "rsScriptC.h"
 #include "rsAllocation.h"
 #include "rsAdapter.h"
 #include "rsSampler.h"
-#include "rsLight.h"
+#include "rsFont.h"
 #include "rsProgramFragment.h"
-#include "rsProgramFragmentStore.h"
+#include "rsProgramStore.h"
 #include "rsProgramRaster.h"
 #include "rsProgramVertex.h"
 #include "rsShaderCache.h"
@@ -48,10 +48,28 @@
 
 namespace renderscript {
 
+#if 0
+#define CHECK_OBJ(o) { \
+    GET_TLS(); \
+    if(!ObjectBase::isValid(rsc, (const ObjectBase *)o)) {  \
+        LOGE("Bad object %p at %s, %i", o, __FILE__, __LINE__);  \
+    } \
+}
+#define CHECK_OBJ_OR_NULL(o) { \
+    GET_TLS(); \
+    if(o && !ObjectBase::isValid(rsc, (const ObjectBase *)o)) {  \
+        LOGE("Bad object %p at %s, %i", o, __FILE__, __LINE__);  \
+    } \
+}
+#else
+#define CHECK_OBJ(o)
+#define CHECK_OBJ_OR_NULL(o)
+#endif
+
 class Context
 {
 public:
-    Context(Device *, bool isGraphics, bool useDepth);
+    Context(Device *, const RsSurfaceConfig *sc);
     ~Context();
 
     static pthread_key_t gThreadTLSKey;
@@ -63,18 +81,21 @@
         Context * mContext;
         Script * mScript;
     };
+    ScriptTLSStruct *mTlsStruct;
+    RsSurfaceConfig mUserSurfaceConfig;
 
+    typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
 
     //StructuredAllocationContext mStateAllocation;
     ElementState mStateElement;
     TypeState mStateType;
     SamplerState mStateSampler;
     ProgramFragmentState mStateFragment;
-    ProgramFragmentStoreState mStateFragmentStore;
+    ProgramStoreState mStateFragmentStore;
     ProgramRasterState mStateRaster;
     ProgramVertexState mStateVertex;
-    LightState mStateLight;
     VertexArrayState mStateVertexArray;
+    FontState mStateFont;
 
     ScriptCState mScriptC;
     ShaderCache mShaderCache;
@@ -84,16 +105,19 @@
     void setRaster(ProgramRaster *);
     void setVertex(ProgramVertex *);
     void setFragment(ProgramFragment *);
-    void setFragmentStore(ProgramFragmentStore *);
+    void setFragmentStore(ProgramStore *);
+    void setFont(Font *);
 
     void updateSurface(void *sur);
 
     const ProgramFragment * getFragment() {return mFragment.get();}
-    const ProgramFragmentStore * getFragmentStore() {return mFragmentStore.get();}
+    const ProgramStore * getFragmentStore() {return mFragmentStore.get();}
     const ProgramRaster * getRaster() {return mRaster.get();}
     const ProgramVertex * getVertex() {return mVertex.get();}
+    Font * getFont() {return mFont.get();}
 
     bool setupCheck();
+    void setupProgramStore();
     bool checkDriver() const {return mEGL.mSurface != 0;}
 
     void pause();
@@ -103,12 +127,10 @@
 
     void assignName(ObjectBase *obj, const char *name, uint32_t len);
     void removeName(ObjectBase *obj);
-    ObjectBase * lookupName(const char *name) const;
-    void appendNameDefines(String8 *str) const;
 
     uint32_t getMessageToClient(void *data, size_t *receiveLen, size_t bufferLen, bool wait);
     bool sendMessageToClient(void *data, uint32_t cmdID, size_t len, bool waitForSpace);
-    uint32_t runScript(Script *s, uint32_t launchID);
+    uint32_t runScript(Script *s);
 
     void initToClient();
     void deinitToClient();
@@ -119,19 +141,21 @@
     ProgramVertex * getDefaultProgramVertex() const {
         return mStateVertex.mDefault.get();
     }
-    ProgramFragmentStore * getDefaultProgramFragmentStore() const {
+    ProgramStore * getDefaultProgramStore() const {
         return mStateFragmentStore.mDefault.get();
     }
     ProgramRaster * getDefaultProgramRaster() const {
         return mStateRaster.mDefault.get();
     }
+    Font* getDefaultFont() const {
+        return mStateFont.mDefault.get();
+    }
 
-    uint32_t getWidth() const {return mEGL.mWidth;}
-    uint32_t getHeight() const {return mEGL.mHeight;}
+    uint32_t getWidth() const {return mWidth;}
+    uint32_t getHeight() const {return mHeight;}
 
 
     ThreadIO mIO;
-    void objDestroyAdd(ObjectBase *);
 
     // Timers
     enum Timers {
@@ -148,24 +172,31 @@
     void timerPrint();
     void timerFrame();
 
-    bool checkVersion1_1() const {return (mGL.mMajorVersion > 1) || (mGL.mMinorVersion >= 1); }
-    bool checkVersion2_0() const {return mGL.mMajorVersion >= 2; }
-
     struct {
         bool mLogTimes;
         bool mLogScripts;
         bool mLogObjects;
         bool mLogShaders;
+        bool mLogShadersAttr;
+        bool mLogShadersUniforms;
+        bool mLogVisual;
     } props;
 
     void dumpDebug() const;
     void checkError(const char *) const;
     const char * getError(RsError *);
-    void setError(RsError e, const char *msg);
+    void setError(RsError e, const char *msg = NULL);
 
     mutable const ObjectBase * mObjHead;
 
     bool ext_OES_texture_npot() const {return mGL.OES_texture_npot;}
+    float ext_texture_max_aniso() const {return mGL.EXT_texture_max_aniso; }
+    uint32_t getMaxFragmentTextures() const {return mGL.mMaxFragmentTextureImageUnits;}
+    uint32_t getMaxFragmentUniformVectors() const {return mGL.mMaxFragmentUniformVectors;}
+    uint32_t getMaxVertexUniformVectors() const {return mGL.mMaxVertexUniformVectors;}
+
+    void launchThreads(WorkerCallback_t cbk, void *data);
+    uint32_t getWorkerPoolSize() const {return (uint32_t)mWorkers.mCount;}
 
 protected:
     Device *mDev;
@@ -177,8 +208,7 @@
         EGLConfig mConfig;
         EGLContext mContext;
         EGLSurface mSurface;
-        EGLint mWidth;
-        EGLint mHeight;
+        EGLSurface mSurfaceDefault;
         EGLDisplay mDisplay;
     } mEGL;
 
@@ -202,6 +232,7 @@
         int32_t mMaxVertexTextureUnits;
 
         bool OES_texture_npot;
+        float EXT_texture_max_aniso;
     } mGL;
 
     uint32_t mWidth;
@@ -211,7 +242,6 @@
 
     bool mRunning;
     bool mExit;
-    bool mUseDepth;
     bool mPaused;
     RsError mError;
     const char *mErrorMsg;
@@ -219,32 +249,40 @@
     pthread_t mThreadId;
     pid_t mNativeThreadId;
 
+    struct Workers {
+        volatile int mRunningCount;
+        volatile int mLaunchCount;
+        uint32_t mCount;
+        pthread_t *mThreadId;
+        pid_t *mNativeThreadId;
+        Signal mCompleteSignal;
+
+        Signal *mLaunchSignals;
+        WorkerCallback_t mLaunchCallback;
+        void *mLaunchData;
+    };
+    Workers mWorkers;
+
     ObjectBaseRef<Script> mRootScript;
     ObjectBaseRef<ProgramFragment> mFragment;
     ObjectBaseRef<ProgramVertex> mVertex;
-    ObjectBaseRef<ProgramFragmentStore> mFragmentStore;
+    ObjectBaseRef<ProgramStore> mFragmentStore;
     ObjectBaseRef<ProgramRaster> mRaster;
+    ObjectBaseRef<Font> mFont;
 
-
-    struct ObjDestroyOOB {
-        pthread_mutex_t mMutex;
-        Vector<ObjectBase *> mDestroyList;
-        bool mNeedToEmpty;
-    };
-    ObjDestroyOOB mObjDestroy;
-    bool objDestroyOOBInit();
-    void objDestroyOOBRun();
-    void objDestroyOOBDestroy();
+    void displayDebugStats();
 
 private:
     Context();
 
-    void initEGL(bool useGL2);
+    void initEGL();
+    void initGLThread();
     void deinitEGL();
 
     uint32_t runRootScript();
 
     static void * threadProc(void *);
+    static void * helperThreadProc(void *);
 
     ANativeWindow *mWndSurface;
 
@@ -258,6 +296,9 @@
     uint32_t mTimeMSLastFrame;
     uint32_t mTimeMSLastScript;
     uint32_t mTimeMSLastSwap;
+    uint32_t mAverageFPSFrameCount;
+    uint64_t mAverageFPSStartTime;
+    uint32_t mAverageFPS;
 };
 
 }
diff --git a/rsContextHostStub.h b/rsContextHostStub.h
new file mode 100644
index 0000000..f3e9dab
--- /dev/null
+++ b/rsContextHostStub.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_CONTEXT_HOST_STUB_H
+#define ANDROID_RS_CONTEXT_HOST_STUB_H
+
+#include "rsUtils.h"
+//#include "rsMutex.h"
+
+//#include "rsThreadIO.h"
+#include "rsType.h"
+#include "rsMatrix.h"
+#include "rsAllocation.h"
+#include "rsMesh.h"
+//#include "rsDevice.h"
+#include "rsScriptC.h"
+#include "rsAllocation.h"
+#include "rsAdapter.h"
+#include "rsSampler.h"
+#include "rsProgramFragment.h"
+#include "rsProgramStore.h"
+#include "rsProgramRaster.h"
+#include "rsProgramVertex.h"
+#include "rsShaderCache.h"
+#include "rsVertexArray.h"
+
+//#include "rsgApiStructs.h"
+//#include "rsLocklessFifo.h"
+
+//#include <ui/egl/android_natives.h>
+
+// ---------------------------------------------------------------------------
+namespace android {
+
+namespace renderscript {
+
+class Device;
+
+class Context
+{
+public:
+    Context(Device *, bool isGraphics, bool useDepth) {
+        mObjHead = NULL;
+    }
+    ~Context() {
+    }
+
+
+    //StructuredAllocationContext mStateAllocation;
+    ElementState mStateElement;
+    TypeState mStateType;
+    SamplerState mStateSampler;
+    //ProgramFragmentState mStateFragment;
+    ProgramStoreState mStateFragmentStore;
+    //ProgramRasterState mStateRaster;
+    //ProgramVertexState mStateVertex;
+    VertexArrayState mStateVertexArray;
+
+    //ScriptCState mScriptC;
+    ShaderCache mShaderCache;
+
+    RsSurfaceConfig mUserSurfaceConfig;
+
+    //bool setupCheck();
+    bool checkDriver() const {return false;}
+
+    ProgramFragment * getDefaultProgramFragment() const {
+        return NULL;
+    }
+    ProgramVertex * getDefaultProgramVertex() const {
+        return NULL;
+    }
+    ProgramStore * getDefaultProgramStore() const {
+        return NULL;
+    }
+    ProgramRaster * getDefaultProgramRaster() const {
+        return NULL;
+    }
+
+    uint32_t getWidth() const {return 0;}
+    uint32_t getHeight() const {return 0;}
+
+    // Timers
+    enum Timers {
+        RS_TIMER_IDLE,
+        RS_TIMER_INTERNAL,
+        RS_TIMER_SCRIPT,
+        RS_TIMER_CLEAR_SWAP,
+        _RS_TIMER_TOTAL
+    };
+
+    bool checkVersion1_1() const {return false; }
+    bool checkVersion2_0() const {return false; }
+
+    struct {
+        bool mLogTimes;
+        bool mLogScripts;
+        bool mLogObjects;
+        bool mLogShaders;
+        bool mLogShadersAttr;
+        bool mLogShadersUniforms;
+        bool mLogVisual;
+    } props;
+
+    void dumpDebug() const {    }
+    void checkError(const char *) const {  };
+    void setError(RsError e, const char *msg = NULL) {  }
+
+    mutable const ObjectBase * mObjHead;
+
+    bool ext_OES_texture_npot() const {return mGL.OES_texture_npot;}
+    float ext_texture_max_aniso() const {return 1.0f;}
+    uint32_t getMaxFragmentTextures() const {return mGL.mMaxFragmentTextureImageUnits;}
+    uint32_t getMaxFragmentUniformVectors() const {return mGL.mMaxFragmentUniformVectors;}
+    uint32_t getMaxVertexUniformVectors() const {return mGL.mMaxVertexUniformVectors;}
+
+protected:
+
+    struct {
+        const uint8_t * mVendor;
+        const uint8_t * mRenderer;
+        const uint8_t * mVersion;
+        const uint8_t * mExtensions;
+
+        uint32_t mMajorVersion;
+        uint32_t mMinorVersion;
+
+        int32_t mMaxVaryingVectors;
+        int32_t mMaxTextureImageUnits;
+
+        int32_t mMaxFragmentTextureImageUnits;
+        int32_t mMaxFragmentUniformVectors;
+
+        int32_t mMaxVertexAttribs;
+        int32_t mMaxVertexUniformVectors;
+        int32_t mMaxVertexTextureUnits;
+
+        bool OES_texture_npot;
+    } mGL;
+
+};
+
+}
+}
+#endif
diff --git a/rsDevice.cpp b/rsDevice.cpp
index b670ad4..a96b114 100644
--- a/rsDevice.cpp
+++ b/rsDevice.cpp
@@ -15,7 +15,11 @@
  */
 
 #include "rsDevice.h"
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#endif
 
 using namespace android;
 using namespace android::renderscript;
@@ -33,7 +37,7 @@
 
 void Device::addContext(Context *rsc)
 {
-    mContexts.add(rsc);
+    mContexts.push(rsc);
 }
 
 void Device::removeContext(Context *rsc)
diff --git a/rsElement.cpp b/rsElement.cpp
index 6288bc4..9e6fbd5 100644
--- a/rsElement.cpp
+++ b/rsElement.cpp
@@ -14,9 +14,14 @@
  * limitations under the License.
  */
 
-#include "rsContext.h"
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
+#include "rsContext.h"
 #include <GLES/gl.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#endif
 
 using namespace android;
 using namespace android::renderscript;
@@ -29,6 +34,7 @@
     mAllocLine = __LINE__;
     mFields = NULL;
     mFieldCount = 0;
+    mHasReference = false;
 }
 
 
@@ -48,6 +54,7 @@
     delete [] mFields;
     mFields = NULL;
     mFieldCount = 0;
+    mHasReference = false;
 }
 
 size_t Element::getSizeBits() const
@@ -58,31 +65,117 @@
 
     size_t total = 0;
     for (size_t ct=0; ct < mFieldCount; ct++) {
-        total += mFields[ct].e->mBits;
+        total += mFields[ct].e->mBits * mFields[ct].arraySize;
     }
     return total;
 }
 
-size_t Element::getFieldOffsetBits(uint32_t componentNumber) const
-{
-    size_t offset = 0;
-    for (uint32_t ct = 0; ct < componentNumber; ct++) {
-        offset += mFields[ct].e->mBits;
-    }
-    return offset;
-}
-
 void Element::dumpLOGV(const char *prefix) const
 {
     ObjectBase::dumpLOGV(prefix);
-    LOGV("%s   Element: components %i,  size %i", prefix, mFieldCount, mBits);
+    LOGV("%s Element: fieldCount: %i,  size bytes: %i", prefix, mFieldCount, getSizeBytes());
     for (uint32_t ct = 0; ct < mFieldCount; ct++) {
-        char buf[1024];
-        sprintf(buf, "%s component %i: ", prefix, ct);
-        //mComponents[ct]->dumpLOGV(buf);
+        LOGV("%s Element field index: %u ------------------", prefix, ct);
+        LOGV("%s name: %s, offsetBits: %u, arraySize: %u",
+             prefix, mFields[ct].name.string(), mFields[ct].offsetBits, mFields[ct].arraySize);
+        mFields[ct].e->dumpLOGV(prefix);
     }
 }
 
+void Element::serialize(OStream *stream) const
+{
+    // Need to identify ourselves
+    stream->addU32((uint32_t)getClassId());
+
+    String8 name(getName());
+    stream->addString(&name);
+
+    mComponent.serialize(stream);
+
+    // Now serialize all the fields
+    stream->addU32(mFieldCount);
+    for(uint32_t ct = 0; ct < mFieldCount; ct++) {
+        stream->addString(&mFields[ct].name);
+        stream->addU32(mFields[ct].arraySize);
+        mFields[ct].e->serialize(stream);
+    }
+}
+
+Element *Element::createFromStream(Context *rsc, IStream *stream)
+{
+    // First make sure we are reading the correct object
+    RsA3DClassID classID = (RsA3DClassID)stream->loadU32();
+    if(classID != RS_A3D_CLASS_ID_ELEMENT) {
+        LOGE("element loading skipped due to invalid class id\n");
+        return NULL;
+    }
+
+    String8 name;
+    stream->loadString(&name);
+
+    Element *elem = new Element(rsc);
+    elem->mComponent.loadFromStream(stream);
+    elem->mBits = elem->mComponent.getBits();
+    elem->mHasReference = elem->mComponent.isReference();
+
+    elem->mFieldCount = stream->loadU32();
+    if(elem->mFieldCount) {
+        uint32_t offset = 0;
+        elem->mFields = new ElementField_t [elem->mFieldCount];
+        for(uint32_t ct = 0; ct < elem->mFieldCount; ct ++) {
+            stream->loadString(&elem->mFields[ct].name);
+            elem->mFields[ct].arraySize = stream->loadU32();
+            Element *fieldElem = Element::createFromStream(rsc, stream);
+            elem->mFields[ct].e.set(fieldElem);
+            elem->mFields[ct].offsetBits = offset;
+            offset += fieldElem->getSizeBits();
+            // Check if our sub-elements have references
+            if(fieldElem->mHasReference) {
+                elem->mHasReference = true;
+            }
+        }
+    }
+
+    // We need to check if this already exists
+    for (uint32_t ct=0; ct < rsc->mStateElement.mElements.size(); ct++) {
+        Element *ee = rsc->mStateElement.mElements[ct];
+        if(ee->isEqual(elem)) {
+            delete elem;
+            ee->incUserRef();
+            return ee;
+        }
+    }
+
+    rsc->mStateElement.mElements.push(elem);
+    return elem;
+}
+
+bool Element::isEqual(const Element *other) const {
+    if(other == NULL) {
+        return false;
+    }
+    if (!other->getFieldCount() && !mFieldCount) {
+        if((other->getType() == getType()) &&
+           (other->getKind() == getKind()) &&
+           (other->getComponent().getIsNormalized() == getComponent().getIsNormalized()) &&
+           (other->getComponent().getVectorSize() == getComponent().getVectorSize())) {
+            return true;
+        }
+        return false;
+    }
+    if (other->getFieldCount() == mFieldCount) {
+        for (uint32_t i=0; i < mFieldCount; i++) {
+            if ((!other->mFields[i].e->isEqual(mFields[i].e.get())) ||
+                (other->mFields[i].name.length() != mFields[i].name.length()) ||
+                (other->mFields[i].name != mFields[i].name) ||
+                (other->mFields[i].arraySize != mFields[i].arraySize)) {
+                return false;
+            }
+        }
+        return true;
+    }
+    return false;
+}
 
 const Element * Element::create(Context *rsc, RsDataType dt, RsDataKind dk,
                             bool isNorm, uint32_t vecSize)
@@ -104,12 +197,13 @@
     Element *e = new Element(rsc);
     e->mComponent.set(dt, dk, isNorm, vecSize);
     e->mBits = e->mComponent.getBits();
+    e->mHasReference = e->mComponent.isReference();
     rsc->mStateElement.mElements.push(e);
     return e;
 }
 
 const Element * Element::create(Context *rsc, size_t count, const Element **ein,
-                            const char **nin, const size_t * lengths)
+                            const char **nin, const size_t * lengths, const uint32_t *asin)
 {
     // Look for an existing match.
     for (uint32_t ct=0; ct < rsc->mStateElement.mElements.size(); ct++) {
@@ -119,7 +213,8 @@
             for (uint32_t i=0; i < count; i++) {
                 if ((ee->mFields[i].e.get() != ein[i]) ||
                     (ee->mFields[i].name.length() != lengths[i]) ||
-                    (ee->mFields[i].name != nin[i])) {
+                    (ee->mFields[i].name != nin[i]) ||
+                    (ee->mFields[i].arraySize != asin[i])) {
                     match = false;
                     break;
                 }
@@ -134,54 +229,23 @@
     Element *e = new Element(rsc);
     e->mFields = new ElementField_t [count];
     e->mFieldCount = count;
+    size_t bits = 0;
     for (size_t ct=0; ct < count; ct++) {
         e->mFields[ct].e.set(ein[ct]);
         e->mFields[ct].name.setTo(nin[ct], lengths[ct]);
+        e->mFields[ct].offsetBits = bits;
+        e->mFields[ct].arraySize = asin[ct];
+        bits += ein[ct]->getSizeBits();
+
+        if (ein[ct]->mHasReference) {
+            e->mHasReference = true;
+        }
     }
 
     rsc->mStateElement.mElements.push(e);
     return e;
 }
 
-String8 Element::getCStructBody(uint32_t indent) const
-{
-    String8 si;
-    for (uint32_t ct=0; ct < indent; ct++) {
-        si.append(" ");
-    }
-
-    String8 s(si);
-    s.append("{\n");
-    for (uint32_t ct = 0; ct < mFieldCount; ct++) {
-        s.append(si);
-        s.append(mFields[ct].e->getCType(indent+4));
-        s.append(" ");
-        s.append(mFields[ct].name);
-        s.append(";\n");
-    }
-    s.append(si);
-    s.append("}");
-    return s;
-}
-
-String8 Element::getCType(uint32_t indent) const
-{
-    String8 s;
-    for (uint32_t ct=0; ct < indent; ct++) {
-        s.append(" ");
-    }
-
-    if (!mFieldCount) {
-        // Basic component.
-        s.append(mComponent.getCType());
-    } else {
-        s.append("struct ");
-        s.append(getCStructBody(indent));
-    }
-
-    return s;
-}
-
 String8 Element::getGLSLType(uint32_t indent) const
 {
     String8 s;
@@ -201,10 +265,60 @@
     return s;
 }
 
+void Element::incRefs(const void *ptr) const
+{
+    if (!mFieldCount) {
+        if (mComponent.isReference()) {
+            ObjectBase *const*obp = static_cast<ObjectBase *const*>(ptr);
+            ObjectBase *ob = obp[0];
+            if (ob) ob->incSysRef();
+        }
+        return;
+    }
+
+    const uint8_t *p = static_cast<const uint8_t *>(ptr);
+    for (uint32_t i=0; i < mFieldCount; i++) {
+        if (mFields[i].e->mHasReference) {
+            p = &p[mFields[i].offsetBits >> 3];
+            for (uint32_t ct=0; ct < mFields[i].arraySize; ct++) {
+                mFields[i].e->incRefs(p);
+                p += mFields[i].e->getSizeBytes();
+            }
+        }
+    }
+}
+
+void Element::decRefs(const void *ptr) const
+{
+    if (!mFieldCount) {
+        if (mComponent.isReference()) {
+            ObjectBase *const*obp = static_cast<ObjectBase *const*>(ptr);
+            ObjectBase *ob = obp[0];
+            if (ob) ob->decSysRef();
+        }
+        return;
+    }
+
+    const uint8_t *p = static_cast<const uint8_t *>(ptr);
+    for (uint32_t i=0; i < mFieldCount; i++) {
+        if (mFields[i].e->mHasReference) {
+            p = &p[mFields[i].offsetBits >> 3];
+            for (uint32_t ct=0; ct < mFields[i].arraySize; ct++) {
+                mFields[i].e->decRefs(p);
+                p += mFields[i].e->getSizeBytes();
+            }
+        }
+    }
+}
 
 
 ElementState::ElementState()
 {
+    const uint32_t initialCapacity = 32;
+    mBuilderElements.setCapacity(initialCapacity);
+    mBuilderNameStrings.setCapacity(initialCapacity);
+    mBuilderNameLengths.setCapacity(initialCapacity);
+    mBuilderArrays.setCapacity(initialCapacity);
 }
 
 ElementState::~ElementState()
@@ -212,6 +326,29 @@
     rsAssert(!mElements.size());
 }
 
+void ElementState::elementBuilderBegin() {
+    mBuilderElements.clear();
+    mBuilderNameStrings.clear();
+    mBuilderNameLengths.clear();
+    mBuilderArrays.clear();
+}
+
+void ElementState::elementBuilderAdd(const Element *e, const char *nameStr, uint32_t arraySize) {
+    mBuilderElements.push(e);
+    mBuilderNameStrings.push(nameStr);
+    mBuilderNameLengths.push(strlen(nameStr));
+    mBuilderArrays.push(arraySize);
+
+}
+
+const Element *ElementState::elementBuilderCreate(Context *rsc) {
+    return Element::create(rsc, mBuilderElements.size(),
+                                &(mBuilderElements.editArray()[0]),
+                                &(mBuilderNameStrings.editArray()[0]),
+                                mBuilderNameLengths.editArray(),
+                                mBuilderArrays.editArray());
+}
+
 
 /////////////////////////////////////////
 //
@@ -235,14 +372,41 @@
                              size_t count,
                              const RsElement * ein,
                              const char ** names,
-                             const size_t * nameLengths)
+                             const size_t * nameLengths,
+                             const uint32_t * arraySizes)
 {
     //LOGE("rsi_ElementCreate2 %i", count);
-    const Element *e = Element::create(rsc, count, (const Element **)ein, names, nameLengths);
+    const Element *e = Element::create(rsc, count, (const Element **)ein, names, nameLengths, arraySizes);
     e->incUserRef();
     return (RsElement)e;
 }
 
+void rsi_ElementGetNativeData(Context *rsc, RsElement elem, uint32_t *elemData, uint32_t elemDataSize)
+{
+    rsAssert(elemDataSize == 5);
+    // we will pack mType; mKind; mNormalized; mVectorSize; NumSubElements
+    Element *e = static_cast<Element *>(elem);
+
+    (*elemData++) = (uint32_t)e->getType();
+    (*elemData++) = (uint32_t)e->getKind();
+    (*elemData++) = e->getComponent().getIsNormalized() ? 1 : 0;
+    (*elemData++) = e->getComponent().getVectorSize();
+    (*elemData++) = e->getFieldCount();
+
+}
+
+void rsi_ElementGetSubElements(Context *rsc, RsElement elem, uint32_t *ids, const char **names, uint32_t dataSize)
+{
+    Element *e = static_cast<Element *>(elem);
+    rsAssert(e->getFieldCount() == dataSize);
+
+    for(uint32_t i = 0; i < dataSize; i ++) {
+        ids[i] = (uint32_t)e->getField(i);
+        names[i] = e->getFieldName(i);
+    }
+
+}
+
 
 }
 }
diff --git a/rsElement.h b/rsElement.h
index 02a1ca2..70e2619 100644
--- a/rsElement.h
+++ b/rsElement.h
@@ -40,30 +40,40 @@
         return (getSizeBits() + 7) >> 3;
     }
 
-    size_t getFieldOffsetBits(uint32_t componentNumber) const;
+    size_t getFieldOffsetBits(uint32_t componentNumber) const {
+        return mFields[componentNumber].offsetBits;
+    }
     size_t getFieldOffsetBytes(uint32_t componentNumber) const {
-        return (getFieldOffsetBits(componentNumber) + 7) >> 3;
+        return mFields[componentNumber].offsetBits >> 3;
     }
 
     uint32_t getFieldCount() const {return mFieldCount;}
     const Element * getField(uint32_t idx) const {return mFields[idx].e.get();}
     const char * getFieldName(uint32_t idx) const {return mFields[idx].name.string();}
+    uint32_t getFieldArraySize(uint32_t idx) const {return mFields[idx].arraySize;}
 
     const Component & getComponent() const {return mComponent;}
     RsDataType getType() const {return mComponent.getType();}
     RsDataKind getKind() const {return mComponent.getKind();}
     uint32_t getBits() const {return mBits;}
 
-    String8 getCType(uint32_t indent=0) const;
-    String8 getCStructBody(uint32_t indent=0) const;
     String8 getGLSLType(uint32_t indent=0) const;
 
     void dumpLOGV(const char *prefix) const;
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ELEMENT; }
+    static Element *createFromStream(Context *rsc, IStream *stream);
 
     static const Element * create(Context *rsc, RsDataType dt, RsDataKind dk,
                             bool isNorm, uint32_t vecSize);
     static const Element * create(Context *rsc, size_t count, const Element **,
-                            const char **, const size_t * lengths);
+                            const char **, const size_t * lengths, const uint32_t *asin);
+
+    void incRefs(const void *) const;
+    void decRefs(const void *) const;
+    bool getHasReferences() const {return mHasReference;}
+
+    bool isEqual(const Element *other) const;
 
 protected:
     // deallocate any components that are part of this element.
@@ -72,9 +82,12 @@
     typedef struct {
         String8 name;
         ObjectBaseRef<const Element> e;
+        uint32_t offsetBits;
+        uint32_t arraySize;
     } ElementField_t;
     ElementField_t *mFields;
     size_t mFieldCount;
+    bool mHasReference;
 
 
     Element(Context *);
@@ -89,8 +102,17 @@
     ElementState();
     ~ElementState();
 
+    void elementBuilderBegin();
+    void elementBuilderAdd(const Element *e, const char *nameStr, uint32_t arraySize);
+    const Element *elementBuilderCreate(Context *rsc);
+
     // Cache of all existing elements.
-    Vector<const Element *> mElements;
+    Vector<Element *> mElements;
+private:
+    Vector<const Element *> mBuilderElements;
+    Vector<const char*> mBuilderNameStrings;
+    Vector<size_t> mBuilderNameLengths;
+    Vector<uint32_t> mBuilderArrays;
 };
 
 
diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp
index e3272c5..c90edc2 100644
--- a/rsFileA3D.cpp
+++ b/rsFileA3D.cpp
@@ -15,81 +15,193 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#endif
 
-
-#include <utils/String8.h>
 #include "rsFileA3D.h"
 
 #include "rsMesh.h"
+#include "rsAnimation.h"
+
 
 using namespace android;
 using namespace android::renderscript;
 
-
-
-FileA3D::FileA3D()
+FileA3D::FileA3D(Context *rsc) : ObjectBase(rsc)
 {
-    mRsc = NULL;
+    mAlloc = NULL;
+    mData = NULL;
+    mWriteStream = NULL;
+    mReadStream = NULL;
+
+    mMajorVersion = 0;
+    mMinorVersion = 1;
+    mDataSize = 0;
 }
 
 FileA3D::~FileA3D()
 {
+    for(size_t i = 0; i < mIndex.size(); i ++) {
+        delete mIndex[i];
+    }
+    for(size_t i = 0; i < mWriteIndex.size(); i ++) {
+        delete mWriteIndex[i];
+    }
+    if(mWriteStream) {
+        delete mWriteStream;
+    }
+    if(mReadStream) {
+        delete mWriteStream;
+    }
+    if(mAlloc) {
+        free(mAlloc);
+    }
 }
 
-bool FileA3D::load(Context *rsc, FILE *f)
+void FileA3D::parseHeader(IStream *headerStream)
+{
+    mMajorVersion = headerStream->loadU32();
+    mMinorVersion = headerStream->loadU32();
+    uint32_t flags = headerStream->loadU32();
+    mUse64BitOffsets = (flags & 1) != 0;
+
+    uint32_t numIndexEntries = headerStream->loadU32();
+    for(uint32_t i = 0; i < numIndexEntries; i ++) {
+        A3DIndexEntry *entry = new A3DIndexEntry();
+        headerStream->loadString(&entry->mObjectName);
+        LOGV("Header data, entry name = %s", entry->mObjectName.string());
+        entry->mType = (RsA3DClassID)headerStream->loadU32();
+        if(mUse64BitOffsets){
+            entry->mOffset = headerStream->loadOffset();
+            entry->mLength = headerStream->loadOffset();
+        }
+        else {
+            entry->mOffset = headerStream->loadU32();
+            entry->mLength = headerStream->loadU32();
+        }
+        entry->mRsObj = NULL;
+        mIndex.push(entry);
+    }
+}
+
+bool FileA3D::load(const void *data, size_t length)
+{
+    const uint8_t *localData = (const uint8_t *)data;
+
+    size_t lengthRemaining = length;
+    size_t magicStrLen = 12;
+    if ((length < magicStrLen) ||
+        memcmp(data, "Android3D_ff", magicStrLen)) {
+        return false;
+    }
+
+    localData += magicStrLen;
+    lengthRemaining -= magicStrLen;
+
+    // Next we get our header size
+    uint64_t headerSize = 0;
+    if(lengthRemaining < sizeof(headerSize)) {
+        return false;
+    }
+
+    memcpy(&headerSize, localData, sizeof(headerSize));
+    localData += sizeof(headerSize);
+    lengthRemaining -= sizeof(headerSize);
+
+    if(lengthRemaining < headerSize) {
+        return false;
+    }
+
+    uint8_t *headerData = (uint8_t *)malloc(headerSize);
+    if(!headerData) {
+        return false;
+    }
+
+    memcpy(headerData, localData, headerSize);
+
+    // Now open the stream to parse the header
+    IStream headerStream(headerData, false);
+    parseHeader(&headerStream);
+
+    free(headerData);
+
+    localData += headerSize;
+    lengthRemaining -= headerSize;
+
+    if(lengthRemaining < sizeof(mDataSize)) {
+        return false;
+    }
+
+    // Read the size of the data
+    memcpy(&mDataSize, localData, sizeof(mDataSize));
+    localData += sizeof(mDataSize);
+    lengthRemaining -= sizeof(mDataSize);
+
+    if(lengthRemaining < mDataSize) {
+        return false;
+    }
+
+    // We should know enough to read the file in at this point.
+    mAlloc = malloc(mDataSize);
+    if (!mAlloc) {
+        return false;
+    }
+    mData = (uint8_t *)mAlloc;
+    memcpy(mAlloc, localData, mDataSize);
+
+    mReadStream = new IStream(mData, mUse64BitOffsets);
+
+    return true;
+}
+
+bool FileA3D::load(FILE *f)
 {
     char magicString[12];
     size_t len;
 
-    LOGE("file open 1");
+    LOGV("file open 1");
     len = fread(magicString, 1, 12, f);
     if ((len != 12) ||
         memcmp(magicString, "Android3D_ff", 12)) {
         return false;
     }
 
-    LOGE("file open 2");
-    len = fread(&mMajorVersion, 1, sizeof(mMajorVersion), f);
-    if (len != sizeof(mMajorVersion)) {
+    // Next thing is the size of the header
+    uint64_t headerSize = 0;
+    len = fread(&headerSize, 1, sizeof(headerSize), f);
+    if (len != sizeof(headerSize) || headerSize == 0) {
         return false;
     }
 
-    LOGE("file open 3");
-    len = fread(&mMinorVersion, 1, sizeof(mMinorVersion), f);
-    if (len != sizeof(mMinorVersion)) {
+    uint8_t *headerData = (uint8_t *)malloc(headerSize);
+    if(!headerData) {
         return false;
     }
 
-    LOGE("file open 4");
-    uint32_t flags;
-    len = fread(&flags, 1, sizeof(flags), f);
-    if (len != sizeof(flags)) {
+    len = fread(headerData, 1, headerSize, f);
+    if (len != headerSize) {
         return false;
     }
-    mUse64BitOffsets = (flags & 1) != 0;
 
-    LOGE("file open 64bit = %i", mUse64BitOffsets);
+    // Now open the stream to parse the header
+    IStream headerStream(headerData, false);
+    parseHeader(&headerStream);
 
-    if (mUse64BitOffsets) {
-        len = fread(&mDataSize, 1, sizeof(mDataSize), f);
-        if (len != sizeof(mDataSize)) {
-            return false;
-        }
-    } else {
-        uint32_t tmp;
-        len = fread(&tmp, 1, sizeof(tmp), f);
-        if (len != sizeof(tmp)) {
-            return false;
-        }
-        mDataSize = tmp;
+    free(headerData);
+
+    // Next thing is the size of the header
+    len = fread(&mDataSize, 1, sizeof(mDataSize), f);
+    if (len != sizeof(mDataSize) || mDataSize == 0) {
+        return false;
     }
 
-    LOGE("file open size = %lli", mDataSize);
+    LOGV("file open size = %lli", mDataSize);
 
     // We should know enough to read the file in at this point.
-    fseek(f, SEEK_SET, 0);
-    mAlloc= malloc(mDataSize);
+    mAlloc = malloc(mDataSize);
     if (!mAlloc) {
         return false;
     }
@@ -99,282 +211,252 @@
         return false;
     }
 
-    LOGE("file start processing");
-    return process(rsc);
+    mReadStream = new IStream(mData, mUse64BitOffsets);
+
+    LOGV("Header is read an stream initialized");
+    return true;
 }
 
-bool FileA3D::processIndex(Context *rsc, A3DIndexEntry *ie)
-{
-    bool ret = false;
-    IO io(mData + ie->mOffset, mUse64BitOffsets);
+size_t FileA3D::getNumIndexEntries() const {
+    return mIndex.size();
+}
 
-    LOGE("process index, type %i", ie->mType);
-
-    switch(ie->mType) {
-    case CHUNK_ELEMENT:
-        processChunk_Element(rsc, &io, ie);
-        break;
-    case CHUNK_ELEMENT_SOURCE:
-        processChunk_ElementSource(rsc, &io, ie);
-        break;
-    case CHUNK_VERTICIES:
-        processChunk_Verticies(rsc, &io, ie);
-        break;
-    case CHUNK_MESH:
-        processChunk_Mesh(rsc, &io, ie);
-        break;
-    case CHUNK_PRIMITIVE:
-        processChunk_Primitive(rsc, &io, ie);
-        break;
-    default:
-        LOGE("FileA3D Unknown chunk type");
-        break;
+const FileA3D::A3DIndexEntry *FileA3D::getIndexEntry(size_t index) const {
+    if(index < mIndex.size()) {
+        return mIndex[index];
     }
-    return (ie->mRsObj != NULL);
+    return NULL;
 }
 
-bool FileA3D::process(Context *rsc)
-{
-    LOGE("process");
-    IO io(mData + 12, mUse64BitOffsets);
-    bool ret = true;
-
-    // Build the index first
-    LOGE("process 1");
-    io.loadU32(); // major version, already loaded
-    io.loadU32(); // minor version, already loaded
-    LOGE("process 2");
-
-    io.loadU32();  // flags
-    io.loadOffset(); // filesize, already loaded.
-    LOGE("process 4");
-    uint64_t mIndexOffset = io.loadOffset();
-    uint64_t mStringOffset = io.loadOffset();
-
-    LOGE("process mIndexOffset= 0x%016llx", mIndexOffset);
-    LOGE("process mStringOffset= 0x%016llx", mStringOffset);
-
-    IO index(mData + mIndexOffset, mUse64BitOffsets);
-    IO stringTable(mData + mStringOffset, mUse64BitOffsets);
-
-    uint32_t stringEntryCount = stringTable.loadU32();
-    LOGE("stringEntryCount %i", stringEntryCount);
-    mStrings.setCapacity(stringEntryCount);
-    mStringIndexValues.setCapacity(stringEntryCount);
-    if (stringEntryCount) {
-        uint32_t stringType = stringTable.loadU32();
-        LOGE("stringType %i", stringType);
-        rsAssert(stringType==0);
-        for (uint32_t ct = 0; ct < stringEntryCount; ct++) {
-            uint64_t offset = stringTable.loadOffset();
-            LOGE("string offset 0x%016llx", offset);
-            IO tmp(mData + offset, mUse64BitOffsets);
-            String8 s;
-            tmp.loadString(&s);
-            LOGE("string %s", s.string());
-            mStrings.push(s);
-        }
+ObjectBase *FileA3D::initializeFromEntry(size_t index) {
+    if(index >= mIndex.size()) {
+        return NULL;
     }
 
-    LOGE("strings done");
-    uint32_t indexEntryCount = index.loadU32();
-    LOGE("index count %i", indexEntryCount);
-    mIndex.setCapacity(indexEntryCount);
-    for (uint32_t ct = 0; ct < indexEntryCount; ct++) {
-        A3DIndexEntry e;
-        uint32_t stringIndex = index.loadU32();
-        LOGE("index %i", ct);
-        LOGE("  string index %i", stringIndex);
-        e.mType = (A3DChunkType)index.loadU32();
-        LOGE("  type %i", e.mType);
-        e.mOffset = index.loadOffset();
-        LOGE("  offset 0x%016llx", e.mOffset);
-
-        if (stringIndex && (stringIndex < mStrings.size())) {
-            e.mID = mStrings[stringIndex];
-            mStringIndexValues.editItemAt(stringIndex) = ct;
-            LOGE("  id %s", e.mID.string());
-        }
-
-        mIndex.push(e);
-    }
-    LOGE("index done");
-
-    // At this point the index should be fully populated.
-    // We can now walk though it and load all the objects.
-    for (uint32_t ct = 0; ct < indexEntryCount; ct++) {
-        LOGE("processing index entry %i", ct);
-        processIndex(rsc, &mIndex.editItemAt(ct));
+    FileA3D::A3DIndexEntry *entry = mIndex[index];
+    if(!entry) {
+        return NULL;
     }
 
-    return ret;
-}
-
-
-FileA3D::IO::IO(const uint8_t *buf, bool use64)
-{
-    mData = buf;
-    mPos = 0;
-    mUse64 = use64;
-}
-
-uint64_t FileA3D::IO::loadOffset()
-{
-    uint64_t tmp;
-    if (mUse64) {
-        mPos = (mPos + 7) & (~7);
-        tmp = reinterpret_cast<const uint64_t *>(&mData[mPos])[0];
-        mPos += sizeof(uint64_t);
-        return tmp;
+    if(entry->mRsObj) {
+        entry->mRsObj->incUserRef();
+        return entry->mRsObj;
     }
-    return loadU32();
-}
 
-void FileA3D::IO::loadString(String8 *s)
-{
-    LOGE("loadString");
-    uint32_t len = loadU32();
-    LOGE("loadString len %i", len);
-    s->setTo((const char *)&mData[mPos], len);
-    mPos += len;
-}
-
-
-void FileA3D::processChunk_Mesh(Context *rsc, IO *io, A3DIndexEntry *ie)
-{
-    Mesh * m = new Mesh(rsc);
-
-    m->mPrimitivesCount = io->loadU32();
-    m->mPrimitives = new Mesh::Primitive_t *[m->mPrimitivesCount];
-
-    for (uint32_t ct = 0; ct < m->mPrimitivesCount; ct++) {
-        uint32_t index = io->loadU32();
-
-        m->mPrimitives[ct] = (Mesh::Primitive_t *)mIndex[index].mRsObj;
-    }
-    ie->mRsObj = m;
-}
-
-void FileA3D::processChunk_Primitive(Context *rsc, IO *io, A3DIndexEntry *ie)
-{
-    Mesh::Primitive_t * p = new Mesh::Primitive_t;
-
-    p->mIndexCount = io->loadU32();
-    uint32_t vertIdx = io->loadU32();
-    p->mRestartCounts = io->loadU16();
-    uint32_t bits = io->loadU8();
-    p->mType = (RsPrimitive)io->loadU8();
-
-    LOGE("processChunk_Primitive count %i, bits %i", p->mIndexCount, bits);
-
-    p->mVerticies = (Mesh::Verticies_t *)mIndex[vertIdx].mRsObj;
-
-    p->mIndicies = new uint16_t[p->mIndexCount];
-    for (uint32_t ct = 0; ct < p->mIndexCount; ct++) {
-        switch(bits) {
-        case 8:
-            p->mIndicies[ct] = io->loadU8();
+    // Seek to the beginning of object
+    mReadStream->reset(entry->mOffset);
+    switch (entry->mType) {
+        case RS_A3D_CLASS_ID_UNKNOWN:
+            return NULL;
+        case RS_A3D_CLASS_ID_MESH:
+            entry->mRsObj = Mesh::createFromStream(mRSC, mReadStream);
             break;
-        case 16:
-            p->mIndicies[ct] = io->loadU16();
+        case RS_A3D_CLASS_ID_TYPE:
+            entry->mRsObj = Type::createFromStream(mRSC, mReadStream);
             break;
-        case 32:
-            p->mIndicies[ct] = io->loadU32();
+        case RS_A3D_CLASS_ID_ELEMENT:
+            entry->mRsObj = Element::createFromStream(mRSC, mReadStream);
             break;
+        case RS_A3D_CLASS_ID_ALLOCATION:
+            entry->mRsObj = Allocation::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_PROGRAM_VERTEX:
+            entry->mRsObj = ProgramVertex::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_PROGRAM_RASTER:
+            entry->mRsObj = ProgramRaster::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_PROGRAM_FRAGMENT:
+            entry->mRsObj = ProgramFragment::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_PROGRAM_STORE:
+            entry->mRsObj = ProgramStore::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_SAMPLER:
+            entry->mRsObj = Sampler::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_ANIMATION:
+            entry->mRsObj = Animation::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_ADAPTER_1D:
+            entry->mRsObj = Adapter1D::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_ADAPTER_2D:
+            entry->mRsObj = Adapter2D::createFromStream(mRSC, mReadStream);
+            break;
+        case RS_A3D_CLASS_ID_SCRIPT_C:
+            return NULL;
+    }
+    if(entry->mRsObj) {
+        entry->mRsObj->incUserRef();
+    }
+    return entry->mRsObj;
+}
+
+bool FileA3D::writeFile(const char *filename)
+{
+    if(!mWriteStream) {
+        LOGE("No objects to write\n");
+        return false;
+    }
+    if(mWriteStream->getPos() == 0) {
+        LOGE("No objects to write\n");
+        return false;
+    }
+
+    FILE *writeHandle = fopen(filename, "wb");
+    if(!writeHandle) {
+        LOGE("Couldn't open the file for writing\n");
+        return false;
+    }
+
+    // Open a new stream to make writing the header easier
+    OStream headerStream(5*1024, false);
+    headerStream.addU32(mMajorVersion);
+    headerStream.addU32(mMinorVersion);
+    uint32_t is64Bit = 0;
+    headerStream.addU32(is64Bit);
+
+    uint32_t writeIndexSize = mWriteIndex.size();
+    headerStream.addU32(writeIndexSize);
+    for(uint32_t i = 0; i < writeIndexSize; i ++) {
+        headerStream.addString(&mWriteIndex[i]->mObjectName);
+        headerStream.addU32((uint32_t)mWriteIndex[i]->mType);
+        if(mUse64BitOffsets){
+            headerStream.addOffset(mWriteIndex[i]->mOffset);
+            headerStream.addOffset(mWriteIndex[i]->mLength);
         }
-        LOGE("  idx %i", p->mIndicies[ct]);
-    }
-
-    if (p->mRestartCounts) {
-        p->mRestarts = new uint16_t[p->mRestartCounts];
-        for (uint32_t ct = 0; ct < p->mRestartCounts; ct++) {
-            switch(bits) {
-            case 8:
-                p->mRestarts[ct] = io->loadU8();
-                break;
-            case 16:
-                p->mRestarts[ct] = io->loadU16();
-                break;
-            case 32:
-                p->mRestarts[ct] = io->loadU32();
-                break;
-            }
-            LOGE("  idx %i", p->mRestarts[ct]);
+        else {
+            uint32_t offset = (uint32_t)mWriteIndex[i]->mOffset;
+            headerStream.addU32(offset);
+            offset = (uint32_t)mWriteIndex[i]->mLength;
+            headerStream.addU32(offset);
         }
-    } else {
-        p->mRestarts = NULL;
     }
 
-    ie->mRsObj = p;
+    // Write our magic string so we know we are reading the right file
+    String8 magicString(A3D_MAGIC_KEY);
+    fwrite(magicString.string(), sizeof(char), magicString.size(), writeHandle);
+
+    // Store the size of the header to make it easier to parse when we read it
+    uint64_t headerSize = headerStream.getPos();
+    fwrite(&headerSize, sizeof(headerSize), 1, writeHandle);
+
+    // Now write our header
+    fwrite(headerStream.getPtr(), sizeof(uint8_t), headerStream.getPos(), writeHandle);
+
+    // Now write the size of the data part of the file for easier parsing later
+    uint64_t fileDataSize = mWriteStream->getPos();
+    fwrite(&fileDataSize, sizeof(fileDataSize), 1, writeHandle);
+
+    fwrite(mWriteStream->getPtr(), sizeof(uint8_t), mWriteStream->getPos(), writeHandle);
+
+    int status = fclose(writeHandle);
+
+    if(status != 0) {
+        LOGE("Couldn't close file\n");
+        return false;
+    }
+
+    return true;
 }
 
-void FileA3D::processChunk_Verticies(Context *rsc, IO *io, A3DIndexEntry *ie)
-{
-    Mesh::Verticies_t *cv = new Mesh::Verticies_t;
-    cv->mAllocationCount = io->loadU32();
-    cv->mAllocations = new Allocation *[cv->mAllocationCount];
-    LOGE("processChunk_Verticies count %i", cv->mAllocationCount);
-    for (uint32_t ct = 0; ct < cv->mAllocationCount; ct++) {
-        uint32_t i = io->loadU32();
-        cv->mAllocations[ct] = (Allocation *)mIndex[i].mRsObj;
-        LOGE("  idx %i", i);
+void FileA3D::appendToFile(ObjectBase *obj) {
+    if(!obj) {
+        return;
     }
-    ie->mRsObj = cv;
-}
-
-void FileA3D::processChunk_Element(Context *rsc, IO *io, A3DIndexEntry *ie)
-{
-    /*
-    rsi_ElementBegin(rsc);
-
-    uint32_t count = io->loadU32();
-    LOGE("processChunk_Element count %i", count);
-    while (count--) {
-        RsDataKind dk = (RsDataKind)io->loadU8();
-        RsDataType dt = (RsDataType)io->loadU8();
-        uint32_t bits = io->loadU8();
-        bool isNorm = io->loadU8() != 0;
-        LOGE("  %i %i %i %i", dk, dt, bits, isNorm);
-        rsi_ElementAdd(rsc, dk, dt, isNorm, bits, 0);
+    if(!mWriteStream) {
+        const uint64_t initialStreamSize = 256*1024;
+        mWriteStream = new OStream(initialStreamSize, false);
     }
-    LOGE("processChunk_Element create");
-    ie->mRsObj = rsi_ElementCreate(rsc);
-    */
-}
-
-void FileA3D::processChunk_ElementSource(Context *rsc, IO *io, A3DIndexEntry *ie)
-{
-    uint32_t index = io->loadU32();
-    uint32_t count = io->loadU32();
-
-    LOGE("processChunk_ElementSource count %i, index %i", count, index);
-
-    RsElement e = (RsElement)mIndex[index].mRsObj;
-
-    RsAllocation a = rsi_AllocationCreateSized(rsc, e, count);
-    Allocation * alloc = static_cast<Allocation *>(a);
-
-    float * data = (float *)alloc->getPtr();
-    while(count--) {
-        *data = io->loadF();
-        LOGE("  %f", *data);
-        data++;
-    }
-    ie->mRsObj = alloc;
+    A3DIndexEntry *indexEntry = new A3DIndexEntry();
+    indexEntry->mObjectName.setTo(obj->getName());
+    indexEntry->mType = obj->getClassId();
+    indexEntry->mOffset = mWriteStream->getPos();
+    indexEntry->mRsObj = obj;
+    mWriteIndex.push(indexEntry);
+    obj->serialize(mWriteStream);
+    indexEntry->mLength = mWriteStream->getPos() - indexEntry->mOffset;
+    mWriteStream->align(4);
 }
 
 namespace android {
 namespace renderscript {
 
+void rsi_FileA3DGetNumIndexEntries(Context *rsc, int32_t *numEntries, RsFile file)
+{
+    FileA3D *fa3d = static_cast<FileA3D *>(file);
+
+    if(fa3d) {
+        *numEntries = fa3d->getNumIndexEntries();
+    }
+    else {
+        *numEntries = 0;
+    }
+}
+
+void rsi_FileA3DGetIndexEntries(Context *rsc, RsFileIndexEntry *fileEntries, uint32_t numEntries, RsFile file)
+{
+    FileA3D *fa3d = static_cast<FileA3D *>(file);
+
+    if(!fa3d) {
+        LOGE("Can't load index entries. No valid file");
+        return;
+    }
+
+    uint32_t numFileEntries = fa3d->getNumIndexEntries();
+    if(numFileEntries != numEntries || numEntries == 0 || fileEntries == NULL) {
+        LOGE("Can't load index entries. Invalid number requested");
+        return;
+    }
+
+    for(uint32_t i = 0; i < numFileEntries; i ++) {
+        const FileA3D::A3DIndexEntry *entry = fa3d->getIndexEntry(i);
+        fileEntries[i].classID = entry->getType();
+        fileEntries[i].objectName = entry->getObjectName().string();
+    }
+
+}
+
+RsObjectBase rsi_FileA3DGetEntryByIndex(Context *rsc, uint32_t index, RsFile file)
+{
+    FileA3D *fa3d = static_cast<FileA3D *>(file);
+    if(!fa3d) {
+        LOGE("Can't load entry. No valid file");
+        return NULL;
+    }
+
+    ObjectBase *obj = fa3d->initializeFromEntry(index);
+    LOGV("Returning object with name %s", obj->getName());
+
+    return obj;
+}
+
+RsFile rsi_FileA3DCreateFromAssetStream(Context *rsc, const void *data, uint32_t len)
+{
+    if (data == NULL) {
+        LOGE("File load failed. Asset stream is NULL");
+        return NULL;
+    }
+
+    FileA3D *fa3d = new FileA3D(rsc);
+
+    fa3d->load(data, len);
+    fa3d->incUserRef();
+
+    return fa3d;
+}
+
 
 RsFile rsi_FileOpen(Context *rsc, char const *path, unsigned int len)
 {
-    FileA3D *fa3d = new FileA3D;
+    FileA3D *fa3d = new FileA3D(rsc);
 
     FILE *f = fopen("/sdcard/test.a3d", "rb");
     if (f) {
-        fa3d->load(rsc, f);
+        fa3d->load(f);
         fclose(f);
+        fa3d->incUserRef();
         return fa3d;
     }
     delete fa3d;
diff --git a/rsFileA3D.h b/rsFileA3D.h
index 9ee08ec..b985907 100644
--- a/rsFileA3D.h
+++ b/rsFileA3D.h
@@ -18,20 +18,23 @@
 #define ANDROID_RS_FILE_A3D_H
 
 #include "RenderScript.h"
-#include "rsFileA3DDecls.h"
 #include "rsMesh.h"
 
 #include <utils/String8.h>
+#include "rsStream.h"
 #include <stdio.h>
 
+#define A3D_MAGIC_KEY "Android3D_ff"
+
 // ---------------------------------------------------------------------------
 namespace android {
+
 namespace renderscript {
 
-class FileA3D
+class FileA3D : public ObjectBase
 {
 public:
-    FileA3D();
+    FileA3D(Context *rsc);
     ~FileA3D();
 
     uint32_t mMajorVersion;
@@ -40,78 +43,53 @@
     uint64_t mStringTableOffset;
     bool mUse64BitOffsets;
 
-    struct A3DIndexEntry {
-        String8 mID;
-        A3DChunkType mType;
+    class A3DIndexEntry {
+        String8 mObjectName;
+        RsA3DClassID mType;
         uint64_t mOffset;
-        void * mRsObj;
+        uint64_t mLength;
+        ObjectBase *mRsObj;
+    public:
+        friend class FileA3D;
+        const String8 &getObjectName() const {
+            return mObjectName;
+        }
+        RsA3DClassID getType() const {
+            return mType;
+        }
     };
 
-    bool load(Context *rsc, FILE *f);
+    bool load(FILE *f);
+    bool load(const void *data, size_t length);
+
+    size_t getNumIndexEntries() const;
+    const A3DIndexEntry* getIndexEntry(size_t index) const;
+    ObjectBase *initializeFromEntry(size_t index);
+
+    void appendToFile(ObjectBase *obj);
+    bool writeFile(const char *filename);
+
+    // Currently files do not get serialized,
+    // but we need to inherit from ObjectBase for ref tracking
+    virtual void serialize(OStream *stream) const {
+    }
+    virtual RsA3DClassID getClassId() const {
+        return RS_A3D_CLASS_ID_UNKNOWN;
+    }
 
 protected:
-    class IO
-    {
-    public:
-        IO(const uint8_t *, bool use64);
-    
-        float loadF() {
-            mPos = (mPos + 3) & (~3);
-            float tmp = reinterpret_cast<const float *>(&mData[mPos])[0];
-            mPos += sizeof(float);
-            return tmp;
-        }
-        int32_t loadI32() {
-            mPos = (mPos + 3) & (~3);
-            int32_t tmp = reinterpret_cast<const int32_t *>(&mData[mPos])[0];
-            mPos += sizeof(int32_t);
-            return tmp;
-        }
-        uint32_t loadU32() {
-            mPos = (mPos + 3) & (~3);
-            uint32_t tmp = reinterpret_cast<const uint32_t *>(&mData[mPos])[0];
-            mPos += sizeof(uint32_t);
-            return tmp;
-        }
-        uint16_t loadU16() {
-            mPos = (mPos + 1) & (~1);
-            uint16_t tmp = reinterpret_cast<const uint16_t *>(&mData[mPos])[0];
-            mPos += sizeof(uint16_t);
-            return tmp;
-        }
-        uint8_t loadU8() {
-            uint8_t tmp = reinterpret_cast<const uint8_t *>(&mData[mPos])[0];
-            mPos += sizeof(uint8_t);
-            return tmp;
-        }
-        uint64_t loadOffset();
-        void loadString(String8 *s);
-        uint64_t getPos() const {return mPos;}
-        const uint8_t * getPtr() const;
-    protected:
-        const uint8_t * mData;
-        uint64_t mPos;
-        bool mUse64;
-    };
 
-
-    bool process(Context *rsc);
-    bool processIndex(Context *rsc, A3DIndexEntry *);
-    void processChunk_Mesh(Context *rsc, IO *io, A3DIndexEntry *ie);
-    void processChunk_Primitive(Context *rsc, IO *io, A3DIndexEntry *ie);
-    void processChunk_Verticies(Context *rsc, IO *io, A3DIndexEntry *ie);
-    void processChunk_Element(Context *rsc, IO *io, A3DIndexEntry *ie);
-    void processChunk_ElementSource(Context *rsc, IO *io, A3DIndexEntry *ie);
+    void parseHeader(IStream *headerStream);
 
     const uint8_t * mData;
     void * mAlloc;
     uint64_t mDataSize;
-    Context * mRsc;
 
-    Vector<A3DIndexEntry> mIndex;
-    Vector<String8> mStrings;
-    Vector<uint32_t> mStringIndexValues;
+    OStream *mWriteStream;
+    Vector<A3DIndexEntry*> mWriteIndex;
 
+    IStream *mReadStream;
+    Vector<A3DIndexEntry*> mIndex;
 };
 
 
diff --git a/rsFont.cpp b/rsFont.cpp
new file mode 100644
index 0000000..633129a
--- /dev/null
+++ b/rsFont.cpp
@@ -0,0 +1,879 @@
+
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_BUILD_FOR_HOST
+#include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#endif
+
+#include "rsFont.h"
+#include "rsProgramFragment.h"
+#include <cutils/properties.h>
+#include FT_BITMAP_H
+
+#include <GLES/gl.h>
+#include <GLES/glext.h>
+#include <GLES2/gl2.h>
+#include <GLES2/gl2ext.h>
+
+using namespace android;
+using namespace android::renderscript;
+
+Font::Font(Context *rsc) : ObjectBase(rsc), mCachedGlyphs(NULL)
+{
+    mAllocFile = __FILE__;
+    mAllocLine = __LINE__;
+    mInitialized = false;
+    mHasKerning = false;
+    mFace = NULL;
+}
+
+bool Font::init(const char *name, uint32_t fontSize, uint32_t dpi)
+{
+    if(mInitialized) {
+        LOGE("Reinitialization of fonts not supported");
+        return false;
+    }
+
+    String8 fontsDir("/fonts/");
+    String8 fullPath(getenv("ANDROID_ROOT"));
+    fullPath += fontsDir;
+    fullPath += name;
+
+    FT_Error error = FT_New_Face(mRSC->mStateFont.getLib(), fullPath.string(), 0, &mFace);
+    if(error) {
+        LOGE("Unable to initialize font %s", fullPath.string());
+        return false;
+    }
+
+    mFontName = name;
+    mFontSize = fontSize;
+    mDpi = dpi;
+
+    error = FT_Set_Char_Size(mFace, fontSize * 64, 0, dpi, 0);
+    if(error) {
+        LOGE("Unable to set font size on %s", fullPath.string());
+        return false;
+    }
+
+    mHasKerning = FT_HAS_KERNING(mFace);
+
+    mInitialized = true;
+    return true;
+}
+
+void Font::invalidateTextureCache()
+{
+    for(uint32_t i = 0; i < mCachedGlyphs.size(); i ++) {
+        mCachedGlyphs.valueAt(i)->mIsValid = false;
+    }
+}
+
+void Font::drawCachedGlyph(CachedGlyphInfo *glyph, int32_t x, int32_t y)
+{
+    FontState *state = &mRSC->mStateFont;
+
+    int32_t nPenX = x + glyph->mBitmapLeft;
+    int32_t nPenY = y - glyph->mBitmapTop + glyph->mBitmapHeight;
+
+    float u1 = glyph->mBitmapMinU;
+    float u2 = glyph->mBitmapMaxU;
+    float v1 = glyph->mBitmapMinV;
+    float v2 = glyph->mBitmapMaxV;
+
+    int32_t width = (int32_t) glyph->mBitmapWidth;
+    int32_t height = (int32_t) glyph->mBitmapHeight;
+
+    state->appendMeshQuad(nPenX, nPenY, 0, u1, v2,
+                          nPenX + width, nPenY, 0, u2, v2,
+                          nPenX + width, nPenY - height, 0, u2, v1,
+                          nPenX, nPenY - height, 0, u1, v1);
+}
+
+void Font::drawCachedGlyph(CachedGlyphInfo* glyph, int32_t x, int32_t y,
+                           uint8_t* bitmap, uint32_t bitmapW, uint32_t bitmapH) {
+    int32_t nPenX = x + glyph->mBitmapLeft;
+    int32_t nPenY = y + glyph->mBitmapTop;
+
+    uint32_t endX = glyph->mBitmapMinX + glyph->mBitmapWidth;
+    uint32_t endY = glyph->mBitmapMinY + glyph->mBitmapHeight;
+
+    FontState *state = &mRSC->mStateFont;
+    uint32_t cacheWidth = state->getCacheTextureType()->getDimX();
+    const uint8_t* cacheBuffer = state->getTextTextureData();
+
+    uint32_t cacheX = 0, cacheY = 0;
+    int32_t bX = 0, bY = 0;
+    for (cacheX = glyph->mBitmapMinX, bX = nPenX; cacheX < endX; cacheX++, bX++) {
+        for (cacheY = glyph->mBitmapMinY, bY = nPenY; cacheY < endY; cacheY++, bY++) {
+            if (bX < 0 || bY < 0 || bX >= (int32_t) bitmapW || bY >= (int32_t) bitmapH) {
+                LOGE("Skipping invalid index");
+                continue;
+            }
+            uint8_t tempCol = cacheBuffer[cacheY * cacheWidth + cacheX];
+            bitmap[bY * bitmapW + bX] = tempCol;
+        }
+    }
+
+}
+
+void Font::measureCachedGlyph(CachedGlyphInfo *glyph, int32_t x, int32_t y, Rect *bounds) {
+    int32_t nPenX = x + glyph->mBitmapLeft;
+    int32_t nPenY = y - glyph->mBitmapTop + glyph->mBitmapHeight;
+
+    int32_t width = (int32_t) glyph->mBitmapWidth;
+    int32_t height = (int32_t) glyph->mBitmapHeight;
+
+    if (bounds->bottom > nPenY) {
+        bounds->bottom = nPenY;
+    }
+    if (bounds->left > nPenX) {
+        bounds->left = nPenX;
+    }
+    if (bounds->right < nPenX + width) {
+        bounds->right = nPenX + width;
+    }
+    if (bounds->top < nPenY + height) {
+        bounds->top = nPenY + height;
+    }
+}
+
+void Font::renderUTF(const char *text, uint32_t len, int32_t x, int32_t y,
+                     uint32_t start, int32_t numGlyphs,
+                     RenderMode mode, Rect *bounds,
+                     uint8_t *bitmap, uint32_t bitmapW, uint32_t bitmapH)
+{
+    if(!mInitialized || numGlyphs == 0 || text == NULL || len == 0) {
+        return;
+    }
+
+    if(mode == Font::MEASURE) {
+        if (bounds == NULL) {
+            LOGE("No return rectangle provided to measure text");
+            return;
+        }
+        // Reset min and max of the bounding box to something large
+        bounds->set(1e6, -1e6, -1e6, 1e6);
+    }
+
+    int32_t penX = x, penY = y;
+    int32_t glyphsLeft = 1;
+    if(numGlyphs > 0) {
+        glyphsLeft = numGlyphs;
+    }
+
+    size_t index = start;
+    size_t nextIndex = 0;
+
+    while (glyphsLeft > 0) {
+
+        int32_t utfChar = utf32_at(text, len, index, &nextIndex);
+
+        // Reached the end of the string or encountered
+        if(utfChar < 0) {
+            break;
+        }
+
+        // Move to the next character in the array
+        index = nextIndex;
+
+        CachedGlyphInfo *cachedGlyph = getCachedUTFChar(utfChar);
+
+        // If it's still not valid, we couldn't cache it, so we shouldn't draw garbage
+        if(cachedGlyph->mIsValid) {
+            switch(mode) {
+            case FRAMEBUFFER:
+                drawCachedGlyph(cachedGlyph, penX, penY);
+                break;
+            case BITMAP:
+                drawCachedGlyph(cachedGlyph, penX, penY, bitmap, bitmapW, bitmapH);
+                break;
+            case MEASURE:
+                measureCachedGlyph(cachedGlyph, penX, penY, bounds);
+                break;
+            }
+        }
+
+        penX += (cachedGlyph->mAdvance.x >> 6);
+
+        // If we were given a specific number of glyphs, decrement
+        if(numGlyphs > 0) {
+            glyphsLeft --;
+        }
+    }
+}
+
+Font::CachedGlyphInfo* Font::getCachedUTFChar(int32_t utfChar) {
+
+    CachedGlyphInfo *cachedGlyph = mCachedGlyphs.valueFor((uint32_t)utfChar);
+    if(cachedGlyph == NULL) {
+        cachedGlyph = cacheGlyph((uint32_t)utfChar);
+    }
+    // Is the glyph still in texture cache?
+    if(!cachedGlyph->mIsValid) {
+        updateGlyphCache(cachedGlyph);
+    }
+
+    return cachedGlyph;
+}
+
+void Font::updateGlyphCache(CachedGlyphInfo *glyph)
+{
+    FT_Error error = FT_Load_Glyph( mFace, glyph->mGlyphIndex, FT_LOAD_RENDER );
+    if(error) {
+        LOGE("Couldn't load glyph.");
+        return;
+    }
+
+    glyph->mAdvance = mFace->glyph->advance;
+    glyph->mBitmapLeft = mFace->glyph->bitmap_left;
+    glyph->mBitmapTop = mFace->glyph->bitmap_top;
+
+    FT_Bitmap *bitmap = &mFace->glyph->bitmap;
+
+    // Now copy the bitmap into the cache texture
+    uint32_t startX = 0;
+    uint32_t startY = 0;
+
+    // Let the font state figure out where to put the bitmap
+    FontState *state = &mRSC->mStateFont;
+    glyph->mIsValid = state->cacheBitmap(bitmap, &startX, &startY);
+
+    if(!glyph->mIsValid) {
+        return;
+    }
+
+    uint32_t endX = startX + bitmap->width;
+    uint32_t endY = startY + bitmap->rows;
+
+    glyph->mBitmapMinX = startX;
+    glyph->mBitmapMinY = startY;
+    glyph->mBitmapWidth = bitmap->width;
+    glyph->mBitmapHeight = bitmap->rows;
+
+    uint32_t cacheWidth = state->getCacheTextureType()->getDimX();
+    uint32_t cacheHeight = state->getCacheTextureType()->getDimY();
+
+    glyph->mBitmapMinU = (float)startX / (float)cacheWidth;
+    glyph->mBitmapMinV = (float)startY / (float)cacheHeight;
+    glyph->mBitmapMaxU = (float)endX / (float)cacheWidth;
+    glyph->mBitmapMaxV = (float)endY / (float)cacheHeight;
+}
+
+Font::CachedGlyphInfo *Font::cacheGlyph(uint32_t glyph)
+{
+    CachedGlyphInfo *newGlyph = new CachedGlyphInfo();
+    mCachedGlyphs.add(glyph, newGlyph);
+
+    newGlyph->mGlyphIndex = FT_Get_Char_Index(mFace, glyph);
+    newGlyph->mIsValid = false;
+
+    updateGlyphCache(newGlyph);
+
+    return newGlyph;
+}
+
+Font * Font::create(Context *rsc, const char *name, uint32_t fontSize, uint32_t dpi)
+{
+    rsc->mStateFont.checkInit();
+    Vector<Font*> &activeFonts = rsc->mStateFont.mActiveFonts;
+
+    for(uint32_t i = 0; i < activeFonts.size(); i ++) {
+        Font *ithFont = activeFonts[i];
+        if(ithFont->mFontName == name && ithFont->mFontSize == fontSize && ithFont->mDpi == dpi) {
+            return ithFont;
+        }
+    }
+
+    Font *newFont = new Font(rsc);
+    bool isInitialized = newFont->init(name, fontSize, dpi);
+    if(isInitialized) {
+        activeFonts.push(newFont);
+        rsc->mStateFont.precacheLatin(newFont);
+        return newFont;
+    }
+
+    delete newFont;
+    return NULL;
+
+}
+
+Font::~Font()
+{
+    if(mFace) {
+        FT_Done_Face(mFace);
+    }
+
+    for (uint32_t ct = 0; ct < mRSC->mStateFont.mActiveFonts.size(); ct++) {
+        if (mRSC->mStateFont.mActiveFonts[ct] == this) {
+            mRSC->mStateFont.mActiveFonts.removeAt(ct);
+            break;
+        }
+    }
+
+    for(uint32_t i = 0; i < mCachedGlyphs.size(); i ++) {
+        CachedGlyphInfo *glyph = mCachedGlyphs.valueAt(i);
+        delete glyph;
+    }
+}
+
+FontState::FontState()
+{
+    mInitialized = false;
+    mMaxNumberOfQuads = 1024;
+    mCurrentQuadIndex = 0;
+    mRSC = NULL;
+    mLibrary = NULL;
+
+    // Get the renderer properties
+    char property[PROPERTY_VALUE_MAX];
+
+    // Get the gamma
+    float gamma = DEFAULT_TEXT_GAMMA;
+    if (property_get(PROPERTY_TEXT_GAMMA, property, NULL) > 0) {
+        LOGD("  Setting text gamma to %s", property);
+        gamma = atof(property);
+    } else {
+        LOGD("  Using default text gamma of %.2f", DEFAULT_TEXT_GAMMA);
+    }
+
+    // Get the black gamma threshold
+    int32_t blackThreshold = DEFAULT_TEXT_BLACK_GAMMA_THRESHOLD;
+    if (property_get(PROPERTY_TEXT_BLACK_GAMMA_THRESHOLD, property, NULL) > 0) {
+        LOGD("  Setting text black gamma threshold to %s", property);
+        blackThreshold = atoi(property);
+    } else {
+        LOGD("  Using default text black gamma threshold of %d",
+                DEFAULT_TEXT_BLACK_GAMMA_THRESHOLD);
+    }
+    mBlackThreshold = (float)(blackThreshold) / 255.0f;
+
+    // Get the white gamma threshold
+    int32_t whiteThreshold = DEFAULT_TEXT_WHITE_GAMMA_THRESHOLD;
+    if (property_get(PROPERTY_TEXT_WHITE_GAMMA_THRESHOLD, property, NULL) > 0) {
+        LOGD("  Setting text white gamma threshold to %s", property);
+        whiteThreshold = atoi(property);
+    } else {
+        LOGD("  Using default white black gamma threshold of %d",
+                DEFAULT_TEXT_WHITE_GAMMA_THRESHOLD);
+    }
+    mWhiteThreshold = (float)(whiteThreshold) / 255.0f;
+
+    // Compute the gamma tables
+    mBlackGamma = gamma;
+    mWhiteGamma = 1.0f / gamma;
+
+    setFontColor(0.1f, 0.1f, 0.1f, 1.0f);
+}
+
+FontState::~FontState()
+{
+    for(uint32_t i = 0; i < mCacheLines.size(); i ++) {
+        delete mCacheLines[i];
+    }
+
+    rsAssert(!mActiveFonts.size());
+}
+
+FT_Library FontState::getLib()
+{
+    if(!mLibrary) {
+        FT_Error error = FT_Init_FreeType(&mLibrary);
+        if(error) {
+            LOGE("Unable to initialize freetype");
+            return NULL;
+        }
+    }
+
+    return mLibrary;
+}
+
+void FontState::init(Context *rsc)
+{
+    mRSC = rsc;
+}
+
+void FontState::flushAllAndInvalidate()
+{
+    if(mCurrentQuadIndex != 0) {
+        issueDrawCommand();
+        mCurrentQuadIndex = 0;
+    }
+    for(uint32_t i = 0; i < mActiveFonts.size(); i ++) {
+        mActiveFonts[i]->invalidateTextureCache();
+    }
+    for(uint32_t i = 0; i < mCacheLines.size(); i ++) {
+        mCacheLines[i]->mCurrentCol = 0;
+    }
+}
+
+bool FontState::cacheBitmap(FT_Bitmap *bitmap, uint32_t *retOriginX, uint32_t *retOriginY)
+{
+    // If the glyph is too tall, don't cache it
+    if((uint32_t)bitmap->rows > mCacheLines[mCacheLines.size()-1]->mMaxHeight) {
+        LOGE("Font size to large to fit in cache. width, height = %i, %i", (int)bitmap->width, (int)bitmap->rows);
+        return false;
+    }
+
+    // Now copy the bitmap into the cache texture
+    uint32_t startX = 0;
+    uint32_t startY = 0;
+
+    bool bitmapFit = false;
+    for(uint32_t i = 0; i < mCacheLines.size(); i ++) {
+        bitmapFit = mCacheLines[i]->fitBitmap(bitmap, &startX, &startY);
+        if(bitmapFit) {
+            break;
+        }
+    }
+
+    // If the new glyph didn't fit, flush the state so far and invalidate everything
+    if(!bitmapFit) {
+        flushAllAndInvalidate();
+
+        // Try to fit it again
+        for(uint32_t i = 0; i < mCacheLines.size(); i ++) {
+            bitmapFit = mCacheLines[i]->fitBitmap(bitmap, &startX, &startY);
+            if(bitmapFit) {
+                break;
+            }
+        }
+
+        // if we still don't fit, something is wrong and we shouldn't draw
+        if(!bitmapFit) {
+            LOGE("Bitmap doesn't fit in cache. width, height = %i, %i", (int)bitmap->width, (int)bitmap->rows);
+            return false;
+        }
+    }
+
+    *retOriginX = startX;
+    *retOriginY = startY;
+
+    uint32_t endX = startX + bitmap->width;
+    uint32_t endY = startY + bitmap->rows;
+
+    uint32_t cacheWidth = getCacheTextureType()->getDimX();
+
+    uint8_t *cacheBuffer = (uint8_t*)mTextTexture->getPtr();
+    uint8_t *bitmapBuffer = bitmap->buffer;
+
+    uint32_t cacheX = 0, bX = 0, cacheY = 0, bY = 0;
+    for(cacheX = startX, bX = 0; cacheX < endX; cacheX ++, bX ++) {
+        for(cacheY = startY, bY = 0; cacheY < endY; cacheY ++, bY ++) {
+            uint8_t tempCol = bitmapBuffer[bY * bitmap->width + bX];
+            cacheBuffer[cacheY*cacheWidth + cacheX] = tempCol;
+        }
+    }
+
+    // This will dirty the texture and the shader so next time
+    // we draw it will upload the data
+    mTextTexture->deferedUploadToTexture(mRSC, false, 0);
+    mFontShaderF->bindTexture(mRSC, 0, mTextTexture.get());
+
+    // Some debug code
+    /*for(uint32_t i = 0; i < mCacheLines.size(); i ++) {
+        LOGE("Cache Line: H: %u Empty Space: %f",
+             mCacheLines[i]->mMaxHeight,
+              (1.0f - (float)mCacheLines[i]->mCurrentCol/(float)mCacheLines[i]->mMaxWidth)*100.0f);
+
+    }*/
+
+    return true;
+}
+
+void FontState::initRenderState()
+{
+    String8 shaderString("varying vec2 varTex0;\n");
+    shaderString.append("void main() {\n");
+    shaderString.append("  lowp vec4 col = UNI_Color;\n");
+    shaderString.append("  col.a = texture2D(UNI_Tex0, varTex0.xy).a;\n");
+    shaderString.append("  col.a = pow(col.a, UNI_Gamma);\n");
+    shaderString.append("  gl_FragColor = col;\n");
+    shaderString.append("}\n");
+
+    const Element *colorElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
+    const Element *gammaElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 1);
+    mRSC->mStateElement.elementBuilderBegin();
+    mRSC->mStateElement.elementBuilderAdd(colorElem, "Color", 1);
+    mRSC->mStateElement.elementBuilderAdd(gammaElem, "Gamma", 1);
+    const Element *constInput = mRSC->mStateElement.elementBuilderCreate(mRSC);
+
+    Type *inputType = new Type(mRSC);
+    inputType->setElement(constInput);
+    inputType->setDimX(1);
+    inputType->compute();
+
+    uint32_t tmp[4];
+    tmp[0] = RS_PROGRAM_PARAM_CONSTANT;
+    tmp[1] = (uint32_t)inputType;
+    tmp[2] = RS_PROGRAM_PARAM_TEXTURE_COUNT;
+    tmp[3] = 1;
+
+    mFontShaderFConstant.set(new Allocation(mRSC, inputType));
+    ProgramFragment *pf = new ProgramFragment(mRSC, shaderString.string(),
+                                              shaderString.length(), tmp, 4);
+    mFontShaderF.set(pf);
+    mFontShaderF->bindAllocation(mRSC, mFontShaderFConstant.get(), 0);
+
+    Sampler *sampler = new Sampler(mRSC, RS_SAMPLER_NEAREST, RS_SAMPLER_NEAREST,
+                                      RS_SAMPLER_CLAMP, RS_SAMPLER_CLAMP, RS_SAMPLER_CLAMP);
+    mFontSampler.set(sampler);
+    mFontShaderF->bindSampler(mRSC, 0, sampler);
+
+    ProgramStore *fontStore = new ProgramStore(mRSC);
+    mFontProgramStore.set(fontStore);
+    mFontProgramStore->setDepthFunc(RS_DEPTH_FUNC_ALWAYS);
+    mFontProgramStore->setBlendFunc(RS_BLEND_SRC_SRC_ALPHA, RS_BLEND_DST_ONE_MINUS_SRC_ALPHA);
+    mFontProgramStore->setDitherEnable(false);
+    mFontProgramStore->setDepthMask(false);
+}
+
+void FontState::initTextTexture()
+{
+    const Element *alphaElem = Element::create(mRSC, RS_TYPE_UNSIGNED_8, RS_KIND_PIXEL_A, true, 1);
+
+    // We will allocate a texture to initially hold 32 character bitmaps
+    Type *texType = new Type(mRSC);
+    texType->setElement(alphaElem);
+    texType->setDimX(1024);
+    texType->setDimY(256);
+    texType->compute();
+
+    Allocation *cacheAlloc = new Allocation(mRSC, texType);
+    mTextTexture.set(cacheAlloc);
+    mTextTexture->deferedUploadToTexture(mRSC, false, 0);
+
+    // Split up our cache texture into lines of certain widths
+    int32_t nextLine = 0;
+    mCacheLines.push(new CacheTextureLine(16, texType->getDimX(), nextLine, 0));
+    nextLine += mCacheLines.top()->mMaxHeight;
+    mCacheLines.push(new CacheTextureLine(24, texType->getDimX(), nextLine, 0));
+    nextLine += mCacheLines.top()->mMaxHeight;
+    mCacheLines.push(new CacheTextureLine(24, texType->getDimX(), nextLine, 0));
+    nextLine += mCacheLines.top()->mMaxHeight;
+    mCacheLines.push(new CacheTextureLine(32, texType->getDimX(), nextLine, 0));
+    nextLine += mCacheLines.top()->mMaxHeight;
+    mCacheLines.push(new CacheTextureLine(32, texType->getDimX(), nextLine, 0));
+    nextLine += mCacheLines.top()->mMaxHeight;
+    mCacheLines.push(new CacheTextureLine(40, texType->getDimX(), nextLine, 0));
+    nextLine += mCacheLines.top()->mMaxHeight;
+    mCacheLines.push(new CacheTextureLine(texType->getDimY() - nextLine, texType->getDimX(), nextLine, 0));
+}
+
+// Avoid having to reallocate memory and render quad by quad
+void FontState::initVertexArrayBuffers()
+{
+    // Now lets write index data
+    const Element *indexElem = Element::create(mRSC, RS_TYPE_UNSIGNED_16, RS_KIND_USER, false, 1);
+    Type *indexType = new Type(mRSC);
+    uint32_t numIndicies = mMaxNumberOfQuads * 6;
+    indexType->setDimX(numIndicies);
+    indexType->setElement(indexElem);
+    indexType->compute();
+
+    Allocation *indexAlloc = new Allocation(mRSC, indexType);
+    uint16_t *indexPtr = (uint16_t*)indexAlloc->getPtr();
+
+    // Four verts, two triangles , six indices per quad
+    for(uint32_t i = 0; i < mMaxNumberOfQuads; i ++) {
+        int32_t i6 = i * 6;
+        int32_t i4 = i * 4;
+
+        indexPtr[i6 + 0] = i4 + 0;
+        indexPtr[i6 + 1] = i4 + 1;
+        indexPtr[i6 + 2] = i4 + 2;
+
+        indexPtr[i6 + 3] = i4 + 0;
+        indexPtr[i6 + 4] = i4 + 2;
+        indexPtr[i6 + 5] = i4 + 3;
+    }
+
+    indexAlloc->deferedUploadToBufferObject(mRSC);
+    mIndexBuffer.set(indexAlloc);
+
+    const Element *posElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 3);
+    const Element *texElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 2);
+
+    mRSC->mStateElement.elementBuilderBegin();
+    mRSC->mStateElement.elementBuilderAdd(posElem, "position", 1);
+    mRSC->mStateElement.elementBuilderAdd(texElem, "texture0", 1);
+    const Element *vertexDataElem = mRSC->mStateElement.elementBuilderCreate(mRSC);
+
+    Type *vertexDataType = new Type(mRSC);
+    vertexDataType->setDimX(mMaxNumberOfQuads * 4);
+    vertexDataType->setElement(vertexDataElem);
+    vertexDataType->compute();
+
+    Allocation *vertexAlloc = new Allocation(mRSC, vertexDataType);
+    mTextMeshPtr = (float*)vertexAlloc->getPtr();
+
+    mVertexArray.set(vertexAlloc);
+}
+
+// We don't want to allocate anything unless we actually draw text
+void FontState::checkInit()
+{
+    if(mInitialized) {
+        return;
+    }
+
+    initTextTexture();
+    initRenderState();
+
+    initVertexArrayBuffers();
+
+    // We store a string with letters in a rough frequency of occurrence
+    mLatinPrecache = String8(" eisarntolcdugpmhbyfvkwzxjq");
+    mLatinPrecache += String8("EISARNTOLCDUGPMHBYFVKWZXJQ");
+    mLatinPrecache += String8(",.?!()-+@;:`'");
+    mLatinPrecache += String8("0123456789");
+
+    mInitialized = true;
+}
+
+void FontState::issueDrawCommand() {
+
+    ObjectBaseRef<const ProgramVertex> tmpV(mRSC->getVertex());
+    mRSC->setVertex(mRSC->getDefaultProgramVertex());
+
+    ObjectBaseRef<const ProgramRaster> tmpR(mRSC->getRaster());
+    mRSC->setRaster(mRSC->getDefaultProgramRaster());
+
+    ObjectBaseRef<const ProgramFragment> tmpF(mRSC->getFragment());
+    mRSC->setFragment(mFontShaderF.get());
+
+    ObjectBaseRef<const ProgramStore> tmpPS(mRSC->getFragmentStore());
+    mRSC->setFragmentStore(mFontProgramStore.get());
+
+    if(mConstantsDirty) {
+        mFontShaderFConstant->data(mRSC, &mConstants, sizeof(mConstants));
+        mConstantsDirty = false;
+    }
+
+    if (!mRSC->setupCheck()) {
+        mRSC->setVertex((ProgramVertex *)tmpV.get());
+        mRSC->setRaster((ProgramRaster *)tmpR.get());
+        mRSC->setFragment((ProgramFragment *)tmpF.get());
+        mRSC->setFragmentStore((ProgramStore *)tmpPS.get());
+        return;
+    }
+
+    float *vtx = (float*)mVertexArray->getPtr();
+    float *tex = vtx + 3;
+
+    VertexArray va;
+    va.add(GL_FLOAT, 3, 20, false, (uint32_t)vtx, "ATTRIB_position");
+    va.add(GL_FLOAT, 2, 20, false, (uint32_t)tex, "ATTRIB_texture0");
+    va.setupGL2(mRSC, &mRSC->mStateVertexArray, &mRSC->mShaderCache);
+
+    mIndexBuffer->uploadCheck(mRSC);
+    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer->getBufferObjectID());
+    glDrawElements(GL_TRIANGLES, mCurrentQuadIndex * 6, GL_UNSIGNED_SHORT, (uint16_t *)(0));
+
+    // Reset the state
+    mRSC->setVertex((ProgramVertex *)tmpV.get());
+    mRSC->setRaster((ProgramRaster *)tmpR.get());
+    mRSC->setFragment((ProgramFragment *)tmpF.get());
+    mRSC->setFragmentStore((ProgramStore *)tmpPS.get());
+}
+
+void FontState::appendMeshQuad(float x1, float y1, float z1,
+                                  float u1, float v1,
+                                  float x2, float y2, float z2,
+                                  float u2, float v2,
+                                  float x3, float y3, float z3,
+                                  float u3, float v3,
+                                  float x4, float y4, float z4,
+                                  float u4, float v4)
+{
+    const uint32_t vertsPerQuad = 4;
+    const uint32_t floatsPerVert = 5;
+    float *currentPos = mTextMeshPtr + mCurrentQuadIndex * vertsPerQuad * floatsPerVert;
+
+    // Cull things that are off the screen
+    float width = (float)mRSC->getWidth();
+    float height = (float)mRSC->getHeight();
+
+    if(x1 > width || y1 < 0.0f || x2 < 0 || y4 > height) {
+        return;
+    }
+
+    /*LOGE("V0 x: %f y: %f z: %f", x1, y1, z1);
+    LOGE("V1 x: %f y: %f z: %f", x2, y2, z2);
+    LOGE("V2 x: %f y: %f z: %f", x3, y3, z3);
+    LOGE("V3 x: %f y: %f z: %f", x4, y4, z4);*/
+
+    (*currentPos++) = x1;
+    (*currentPos++) = y1;
+    (*currentPos++) = z1;
+    (*currentPos++) = u1;
+    (*currentPos++) = v1;
+
+    (*currentPos++) = x2;
+    (*currentPos++) = y2;
+    (*currentPos++) = z2;
+    (*currentPos++) = u2;
+    (*currentPos++) = v2;
+
+    (*currentPos++) = x3;
+    (*currentPos++) = y3;
+    (*currentPos++) = z3;
+    (*currentPos++) = u3;
+    (*currentPos++) = v3;
+
+    (*currentPos++) = x4;
+    (*currentPos++) = y4;
+    (*currentPos++) = z4;
+    (*currentPos++) = u4;
+    (*currentPos++) = v4;
+
+    mCurrentQuadIndex ++;
+
+    if(mCurrentQuadIndex == mMaxNumberOfQuads) {
+        issueDrawCommand();
+        mCurrentQuadIndex = 0;
+    }
+}
+
+uint32_t FontState::getRemainingCacheCapacity() {
+    uint32_t remainingCapacity = 0;
+    uint32_t totalPixels = 0;
+    for(uint32_t i = 0; i < mCacheLines.size(); i ++) {
+         remainingCapacity += (mCacheLines[i]->mMaxWidth - mCacheLines[i]->mCurrentCol);
+         totalPixels += mCacheLines[i]->mMaxWidth;
+    }
+    remainingCapacity = (remainingCapacity * 100) / totalPixels;
+    return remainingCapacity;
+}
+
+void FontState::precacheLatin(Font *font) {
+    // Remaining capacity is measured in %
+    uint32_t remainingCapacity = getRemainingCacheCapacity();
+    uint32_t precacheIdx = 0;
+    while(remainingCapacity > 25 && precacheIdx < mLatinPrecache.size()) {
+        font->getCachedUTFChar((int32_t)mLatinPrecache[precacheIdx]);
+        remainingCapacity = getRemainingCacheCapacity();
+        precacheIdx ++;
+    }
+}
+
+
+void FontState::renderText(const char *text, uint32_t len, int32_t x, int32_t y,
+                           uint32_t startIndex, int32_t numGlyphs,
+                           Font::RenderMode mode,
+                           Font::Rect *bounds,
+                           uint8_t *bitmap, uint32_t bitmapW, uint32_t bitmapH)
+{
+    checkInit();
+
+    // Render code here
+    Font *currentFont = mRSC->getFont();
+    if(!currentFont) {
+        if(!mDefault.get()) {
+            mDefault.set(Font::create(mRSC, "DroidSans.ttf", 16, 96));
+        }
+        currentFont = mDefault.get();
+    }
+    if(!currentFont) {
+        LOGE("Unable to initialize any fonts");
+        return;
+    }
+
+    currentFont->renderUTF(text, len, x, y, startIndex, numGlyphs,
+                           mode, bounds, bitmap, bitmapW, bitmapH);
+
+    if(mCurrentQuadIndex != 0) {
+        issueDrawCommand();
+        mCurrentQuadIndex = 0;
+    }
+}
+
+void FontState::measureText(const char *text, uint32_t len, Font::Rect *bounds) {
+    renderText(text, len, 0, 0, 0, -1, Font::MEASURE, bounds);
+}
+
+void FontState::setFontColor(float r, float g, float b, float a) {
+    mConstants.mFontColor[0] = r;
+    mConstants.mFontColor[1] = g;
+    mConstants.mFontColor[2] = b;
+    mConstants.mFontColor[3] = a;
+
+    mConstants.mGamma = 1.0f;
+    const float luminance = (r * 2.0f + g * 5.0f + b) / 8.0f;
+    if (luminance <= mBlackThreshold) {
+        mConstants.mGamma = mBlackGamma;
+    } else if (luminance >= mWhiteThreshold) {
+        mConstants.mGamma = mWhiteGamma;
+    }
+
+    mConstantsDirty = true;
+}
+
+void FontState::getFontColor(float *r, float *g, float *b, float *a) const {
+    *r = mConstants.mFontColor[0];
+    *g = mConstants.mFontColor[1];
+    *b = mConstants.mFontColor[2];
+    *a = mConstants.mFontColor[3];
+}
+
+void FontState::deinit(Context *rsc)
+{
+    mInitialized = false;
+
+    mFontShaderFConstant.clear();
+
+    mIndexBuffer.clear();
+    mVertexArray.clear();
+
+    mFontShaderF.clear();
+    mFontSampler.clear();
+    mFontProgramStore.clear();
+
+    mTextTexture.clear();
+    for(uint32_t i = 0; i < mCacheLines.size(); i ++) {
+        delete mCacheLines[i];
+    }
+    mCacheLines.clear();
+
+    mDefault.clear();
+
+    Vector<Font*> fontsToDereference = mActiveFonts;
+    for(uint32_t i = 0; i < fontsToDereference.size(); i ++) {
+        fontsToDereference[i]->zeroUserRef();
+    }
+
+    if(mLibrary) {
+        FT_Done_FreeType( mLibrary );
+        mLibrary = NULL;
+    }
+}
+
+namespace android {
+namespace renderscript {
+
+RsFont rsi_FontCreateFromFile(Context *rsc, char const *name, uint32_t fontSize, uint32_t dpi)
+{
+    Font *newFont = Font::create(rsc, name, fontSize, dpi);
+    if(newFont) {
+        newFont->incUserRef();
+    }
+    return newFont;
+}
+
+} // renderscript
+} // android
diff --git a/rsFont.h b/rsFont.h
new file mode 100644
index 0000000..0012b84
--- /dev/null
+++ b/rsFont.h
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_FONT_H
+#define ANDROID_RS_FONT_H
+
+#include "RenderScript.h"
+#include "rsStream.h"
+#include <utils/String8.h>
+#include <utils/Vector.h>
+#include <utils/KeyedVector.h>
+
+#include <ft2build.h>
+#include FT_FREETYPE_H
+
+// ---------------------------------------------------------------------------
+namespace android {
+
+namespace renderscript {
+
+// Gamma (>= 1.0, <= 10.0)
+#define PROPERTY_TEXT_GAMMA "ro.text_gamma"
+#define PROPERTY_TEXT_BLACK_GAMMA_THRESHOLD "ro.text_gamma.black_threshold"
+#define PROPERTY_TEXT_WHITE_GAMMA_THRESHOLD "ro.text_gamma.white_threshold"
+
+#define DEFAULT_TEXT_GAMMA 1.4f
+#define DEFAULT_TEXT_BLACK_GAMMA_THRESHOLD 64
+#define DEFAULT_TEXT_WHITE_GAMMA_THRESHOLD 192
+
+class FontState;
+
+class Font : public ObjectBase
+{
+public:
+    enum RenderMode {
+        FRAMEBUFFER,
+        BITMAP,
+        MEASURE,
+    };
+
+    struct Rect {
+        int32_t left;
+        int32_t top;
+        int32_t right;
+        int32_t bottom;
+        void set(int32_t l, int32_t r, int32_t t, int32_t b) {
+            left = l;
+            right = r;
+            top = t;
+            bottom = b;
+        }
+    };
+
+    ~Font();
+
+    // Currently files do not get serialized,
+    // but we need to inherit from ObjectBase for ref tracking
+    virtual void serialize(OStream *stream) const {
+    }
+    virtual RsA3DClassID getClassId() const {
+        return RS_A3D_CLASS_ID_UNKNOWN;
+    }
+
+    static Font * create(Context *rsc, const char *name, uint32_t fontSize, uint32_t dpi);
+
+protected:
+
+    friend class FontState;
+
+    // Pointer to the utf data, length of data, where to start, number of glyphs ot read
+    // (each glyph may be longer than a char because we are dealing with utf data)
+    // Last two variables are the initial pen position
+    void renderUTF(const char *text, uint32_t len, int32_t x, int32_t y,
+                   uint32_t start, int32_t numGlyphs,
+                   RenderMode mode = FRAMEBUFFER, Rect *bounds = NULL,
+                   uint8_t *bitmap = NULL, uint32_t bitmapW = 0, uint32_t bitmapH = 0);
+
+    void invalidateTextureCache();
+    struct CachedGlyphInfo
+    {
+        // Has the cache been invalidated?
+        bool mIsValid;
+        // Location of the cached glyph in the bitmap
+        // in case we need to resize the texture
+        uint32_t mBitmapMinX;
+        uint32_t mBitmapMinY;
+        uint32_t mBitmapWidth;
+        uint32_t mBitmapHeight;
+        // Also cache texture coords for the quad
+        float mBitmapMinU;
+        float mBitmapMinV;
+        float mBitmapMaxU;
+        float mBitmapMaxV;
+        // Minimize how much we call freetype
+        FT_UInt mGlyphIndex;
+        FT_Vector mAdvance;
+        // Values below contain a glyph's origin in the bitmap
+        FT_Int mBitmapLeft;
+        FT_Int mBitmapTop;
+    };
+
+    String8 mFontName;
+    uint32_t mFontSize;
+    uint32_t mDpi;
+
+    Font(Context *rsc);
+    bool init(const char *name, uint32_t fontSize, uint32_t dpi);
+
+    FT_Face mFace;
+    bool mInitialized;
+    bool mHasKerning;
+
+    DefaultKeyedVector<uint32_t, CachedGlyphInfo* > mCachedGlyphs;
+    CachedGlyphInfo* getCachedUTFChar(int32_t utfChar);
+
+    CachedGlyphInfo *cacheGlyph(uint32_t glyph);
+    void updateGlyphCache(CachedGlyphInfo *glyph);
+    void measureCachedGlyph(CachedGlyphInfo *glyph, int32_t x, int32_t y, Rect *bounds);
+    void drawCachedGlyph(CachedGlyphInfo *glyph, int32_t x, int32_t y);
+    void drawCachedGlyph(CachedGlyphInfo *glyph, int32_t x, int32_t y,
+                         uint8_t *bitmap, uint32_t bitmapW, uint32_t bitmapH);
+};
+
+class FontState
+{
+public:
+    FontState();
+    ~FontState();
+
+    void init(Context *rsc);
+    void deinit(Context *rsc);
+
+    ObjectBaseRef<Font> mDefault;
+    ObjectBaseRef<Font> mLast;
+
+    void renderText(const char *text, uint32_t len, int32_t x, int32_t y,
+                    uint32_t startIndex = 0, int numGlyphs = -1,
+                    Font::RenderMode mode = Font::FRAMEBUFFER,
+                    Font::Rect *bounds = NULL,
+                    uint8_t *bitmap = NULL, uint32_t bitmapW = 0, uint32_t bitmapH = 0);
+
+    void measureText(const char *text, uint32_t len, Font::Rect *bounds);
+
+    void setFontColor(float r, float g, float b, float a);
+    void getFontColor(float *r, float *g, float *b, float *a) const;
+
+protected:
+
+    friend class Font;
+
+    struct CacheTextureLine
+    {
+        uint32_t mMaxHeight;
+        uint32_t mMaxWidth;
+        uint32_t mCurrentRow;
+        uint32_t mCurrentCol;
+        bool mDirty;
+
+        CacheTextureLine(uint32_t maxHeight, uint32_t maxWidth, uint32_t currentRow, uint32_t currentCol) :
+            mMaxHeight(maxHeight), mMaxWidth(maxWidth), mCurrentRow(currentRow), mCurrentCol(currentCol),
+            mDirty(false)  {
+        }
+
+        bool fitBitmap(FT_Bitmap *bitmap, uint32_t *retOriginX, uint32_t *retOriginY) {
+            if((uint32_t)bitmap->rows > mMaxHeight) {
+                return false;
+            }
+
+            if(mCurrentCol + (uint32_t)bitmap->width < mMaxWidth) {
+               *retOriginX = mCurrentCol;
+               *retOriginY = mCurrentRow;
+               mCurrentCol += bitmap->width;
+               mDirty = true;
+               return true;
+            }
+
+            return false;
+        }
+    };
+
+    Vector<CacheTextureLine*> mCacheLines;
+    uint32_t getRemainingCacheCapacity();
+
+    void precacheLatin(Font *font);
+    String8 mLatinPrecache;
+
+    Context *mRSC;
+
+    struct {
+        float mFontColor[4];
+        float mGamma;
+    } mConstants;
+    bool mConstantsDirty;
+
+    float mBlackGamma;
+    float mWhiteGamma;
+
+    float mBlackThreshold;
+    float mWhiteThreshold;
+
+    // Free type library, we only need one copy
+    FT_Library mLibrary;
+    FT_Library getLib();
+    Vector<Font*> mActiveFonts;
+
+    // Render state for the font
+    ObjectBaseRef<Allocation> mFontShaderFConstant;
+    ObjectBaseRef<ProgramFragment> mFontShaderF;
+    ObjectBaseRef<Sampler> mFontSampler;
+    ObjectBaseRef<ProgramStore> mFontProgramStore;
+    void initRenderState();
+
+    // Texture to cache glyph bitmaps
+    ObjectBaseRef<Allocation> mTextTexture;
+    void initTextTexture();
+    const uint8_t* getTextTextureData() const {
+        return (uint8_t*)mTextTexture->getPtr();
+    }
+
+    bool cacheBitmap(FT_Bitmap *bitmap, uint32_t *retOriginX, uint32_t *retOriginY);
+    const Type* getCacheTextureType() {
+        return mTextTexture->getType();
+    }
+
+    void flushAllAndInvalidate();
+
+    // Pointer to vertex data to speed up frame to frame work
+    float *mTextMeshPtr;
+    uint32_t mCurrentQuadIndex;
+    uint32_t mMaxNumberOfQuads;
+
+    void initVertexArrayBuffers();
+    ObjectBaseRef<Allocation> mIndexBuffer;
+    ObjectBaseRef<Allocation> mVertexArray;
+
+
+    bool mInitialized;
+
+    void checkInit();
+
+    void issueDrawCommand();
+
+    void appendMeshQuad(float x1, float y1, float z1,
+                          float u1, float v1,
+                          float x2, float y2, float z2,
+                          float u2, float v2,
+                          float x3, float y3, float z3,
+                          float u3, float v3,
+                          float x4, float y4, float z4,
+                          float u4, float v4);
+
+};
+
+
+}
+}
+
+#endif
diff --git a/rsHandcode.h b/rsHandcode.h
index 800eddd..c02fd42 100644
--- a/rsHandcode.h
+++ b/rsHandcode.h
@@ -1,6 +1,57 @@
 
 #define DATA_SYNC_SIZE 1024
 
+static inline void rsHCAPI_ContextFinish (RsContext rsc)
+{
+    ThreadIO *io = &((Context *)rsc)->mIO;
+    uint32_t size = sizeof(RS_CMD_ContextFinish);
+    RS_CMD_ContextFinish *cmd = static_cast<RS_CMD_ContextFinish *>(io->mToCore.reserve(size));
+    io->mToCore.commitSync(RS_CMD_ID_ContextFinish, size);
+}
+
+static inline void rsHCAPI_ScriptInvokeV (RsContext rsc, RsScript va, uint32_t slot, const void * data, uint32_t sizeBytes)
+{
+    ThreadIO *io = &((Context *)rsc)->mIO;
+    uint32_t size = sizeof(RS_CMD_ScriptInvokeV);
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        size += (sizeBytes + 3) & ~3;
+    }
+    RS_CMD_ScriptInvokeV *cmd = static_cast<RS_CMD_ScriptInvokeV *>(io->mToCore.reserve(size));
+    cmd->s = va;
+    cmd->slot = slot;
+    cmd->dataLen = sizeBytes;
+    cmd->data = data;
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        cmd->data = (void *)(cmd+1);
+        memcpy(cmd+1, data, sizeBytes);
+        io->mToCore.commit(RS_CMD_ID_ScriptInvokeV, size);
+    } else {
+        io->mToCore.commitSync(RS_CMD_ID_ScriptInvokeV, size);
+    }
+}
+
+
+static inline void rsHCAPI_ScriptSetVarV (RsContext rsc, RsScript va, uint32_t slot, const void * data, uint32_t sizeBytes)
+{
+    ThreadIO *io = &((Context *)rsc)->mIO;
+    uint32_t size = sizeof(RS_CMD_ScriptSetVarV);
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        size += (sizeBytes + 3) & ~3;
+    }
+    RS_CMD_ScriptSetVarV *cmd = static_cast<RS_CMD_ScriptSetVarV *>(io->mToCore.reserve(size));
+    cmd->s = va;
+    cmd->slot = slot;
+    cmd->dataLen = sizeBytes;
+    cmd->data = data;
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        cmd->data = (void *)(cmd+1);
+        memcpy(cmd+1, data, sizeBytes);
+        io->mToCore.commit(RS_CMD_ID_ScriptSetVarV, size);
+    } else {
+        io->mToCore.commitSync(RS_CMD_ID_ScriptSetVarV, size);
+    }
+}
+
 static inline void rsHCAPI_AllocationData (RsContext rsc, RsAllocation va, const void * data, uint32_t sizeBytes)
 {
     ThreadIO *io = &((Context *)rsc)->mIO;
@@ -45,3 +96,26 @@
 
 }
 
+static inline void rsHCAPI_Allocation1DSubElementData (RsContext rsc, RsAllocation va, uint32_t x, const void * data, uint32_t comp_offset, uint32_t sizeBytes)
+{
+    ThreadIO *io = &((Context *)rsc)->mIO;
+    uint32_t size = sizeof(RS_CMD_Allocation1DSubElementData);
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        size += (sizeBytes + 3) & ~3;
+    }
+    RS_CMD_Allocation1DSubElementData *cmd = static_cast<RS_CMD_Allocation1DSubElementData *>(io->mToCore.reserve(size));
+    cmd->va = va;
+    cmd->x = x;
+    cmd->data = data;
+    cmd->comp_offset = comp_offset;
+    cmd->bytes = sizeBytes;
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        cmd->data = (void *)(cmd+1);
+        memcpy(cmd+1, data, sizeBytes);
+        io->mToCore.commit(RS_CMD_ID_Allocation1DSubElementData, size);
+    } else {
+        io->mToCore.commitSync(RS_CMD_ID_Allocation1DSubElementData, size);
+    }
+
+}
+
diff --git a/rsLight.cpp b/rsLight.cpp
deleted file mode 100644
index 6f2cf3e..0000000
--- a/rsLight.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rsContext.h"
-
-#include <GLES/gl.h>
-
-using namespace android;
-using namespace android::renderscript;
-
-
-Light::Light(Context *rsc, bool isLocal, bool isMono) : ObjectBase(rsc)
-{
-    mAllocFile = __FILE__;
-    mAllocLine = __LINE__;
-    mIsLocal = isLocal;
-    mIsMono = isMono;
-
-    mPosition[0] = 0;
-    mPosition[1] = 0;
-    mPosition[2] = 1;
-    mPosition[3] = 0;
-
-    mColor[0] = 1.f;
-    mColor[1] = 1.f;
-    mColor[2] = 1.f;
-    mColor[3] = 1.f;
-}
-
-Light::~Light()
-{
-}
-
-void Light::setPosition(float x, float y, float z)
-{
-    mPosition[0] = x;
-    mPosition[1] = y;
-    mPosition[2] = z;
-}
-
-void Light::setColor(float r, float g, float b)
-{
-    mColor[0] = r;
-    mColor[1] = g;
-    mColor[2] = b;
-}
-
-void Light::setupGL(uint32_t num) const
-{
-    glLightfv(GL_LIGHT0 + num, GL_DIFFUSE, mColor);
-    glLightfv(GL_LIGHT0 + num, GL_SPECULAR, mColor);
-    glLightfv(GL_LIGHT0 + num, GL_POSITION, mPosition);
-}
-
-////////////////////////////////////////////
-
-LightState::LightState()
-{
-    clear();
-}
-
-LightState::~LightState()
-{
-}
-
-void LightState::clear()
-{
-    mIsLocal = false;
-    mIsMono = false;
-}
-
-
-////////////////////////////////////////////////////
-//
-
-namespace android {
-namespace renderscript {
-
-void rsi_LightBegin(Context *rsc)
-{
-    rsc->mStateLight.clear();
-}
-
-void rsi_LightSetLocal(Context *rsc, bool isLocal)
-{
-    rsc->mStateLight.mIsLocal = isLocal;
-}
-
-void rsi_LightSetMonochromatic(Context *rsc, bool isMono)
-{
-    rsc->mStateLight.mIsMono = isMono;
-}
-
-RsLight rsi_LightCreate(Context *rsc)
-{
-    Light *l = new Light(rsc, rsc->mStateLight.mIsLocal,
-                         rsc->mStateLight.mIsMono);
-    l->incUserRef();
-    return l;
-}
-
-void rsi_LightSetColor(Context *rsc, RsLight vl, float r, float g, float b)
-{
-    Light *l = static_cast<Light *>(vl);
-    l->setColor(r, g, b);
-}
-
-void rsi_LightSetPosition(Context *rsc, RsLight vl, float x, float y, float z)
-{
-    Light *l = static_cast<Light *>(vl);
-    l->setPosition(x, y, z);
-}
-
-
-
-}
-}
diff --git a/rsLight.h b/rsLight.h
deleted file mode 100644
index d8796e6..0000000
--- a/rsLight.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ANDROID_LIGHT_H
-#define ANDROID_LIGHT_H
-
-
-#include "rsObjectBase.h"
-
-// ---------------------------------------------------------------------------
-namespace android {
-namespace renderscript {
-
-
-// An element is a group of Components that occupies one cell in a structure.
-class Light : public ObjectBase
-{
-public:
-    Light(Context *, bool isLocal, bool isMono);
-    virtual ~Light();
-
-    // Values, mutable after creation.
-    void setPosition(float x, float y, float z);
-    void setColor(float r, float g, float b);
-
-    void setupGL(uint32_t num) const;
-
-protected:
-    float mColor[4];
-    float mPosition[4];
-    bool mIsLocal;
-    bool mIsMono;
-};
-
-
-class LightState {
-public:
-    LightState();
-    ~LightState();
-
-    void clear();
-
-    bool mIsMono;
-    bool mIsLocal;
-};
-
-
-}
-}
-#endif //ANDROID_LIGHT_H
-
diff --git a/rsLocklessFifo.cpp b/rsLocklessFifo.cpp
index c796520..10b38af 100644
--- a/rsLocklessFifo.cpp
+++ b/rsLocklessFifo.cpp
@@ -15,9 +15,11 @@
  */
 
 #include "rsLocklessFifo.h"
+#include "utils/Timers.h"
+#include "utils/StopWatch.h"
 
 using namespace android;
-
+using namespace android::renderscript;
 
 LocklessCommandFifo::LocklessCommandFifo()
 {
@@ -115,6 +117,10 @@
     if (mInShutdown) {
         return;
     }
+
+    //char buf[1024];
+    //sprintf(buf, "RenderScript LocklessCommandFifo::commitSync  %p %i  %i", this, command, sizeInBytes);
+    //StopWatch compileTimer(buf);
     commit(command, sizeInBytes);
     flush();
 }
@@ -128,15 +134,19 @@
     //dumpState("flush 2");
 }
 
+void LocklessCommandFifo::wait()
+{
+    while(isEmpty() && !mInShutdown) {
+        mSignalToControl.set();
+        mSignalToWorker.wait();
+    }
+}
+
 const void * LocklessCommandFifo::get(uint32_t *command, uint32_t *bytesData)
 {
     while(1) {
         //dumpState("get");
-        while(isEmpty() && !mInShutdown) {
-            mSignalToControl.set();
-            mSignalToWorker.wait();
-        }
-
+        wait();
         if (mInShutdown) {
             *command = 0;
             *bytesData = 0;
@@ -165,6 +175,30 @@
     //dumpState("next");
 }
 
+bool LocklessCommandFifo::makeSpaceNonBlocking(uint32_t bytes)
+{
+    //dumpState("make space non-blocking");
+    if ((mPut+bytes) > mEnd) {
+        // Need to loop regardless of where get is.
+        if((mGet > mPut) && (mBuffer+4 >= mGet)) {
+            return false;
+        }
+
+        // Toss in a reset then the normal wait for space will do the rest.
+        reinterpret_cast<uint16_t *>(mPut)[0] = 0;
+        reinterpret_cast<uint16_t *>(mPut)[1] = 0;
+        mPut = mBuffer;
+        mSignalToWorker.set();
+    }
+
+    // it will fit here so we just need to wait for space.
+    if(getFreeSpace() < bytes) {
+        return false;
+    }
+
+    return true;
+}
+
 void LocklessCommandFifo::makeSpace(uint32_t bytes)
 {
     //dumpState("make space");
@@ -178,6 +212,7 @@
         reinterpret_cast<uint16_t *>(mPut)[0] = 0;
         reinterpret_cast<uint16_t *>(mPut)[1] = 0;
         mPut = mBuffer;
+        mSignalToWorker.set();
     }
 
     // it will fit here so we just need to wait for space.
@@ -189,82 +224,6 @@
 
 void LocklessCommandFifo::dumpState(const char *s) const
 {
-    LOGV("%s  put %p, get %p,  buf %p,  end %p", s, mPut, mGet, mBuffer, mEnd);
-}
-
-LocklessCommandFifo::Signal::Signal()
-{
-    mSet = true;
-}
-
-LocklessCommandFifo::Signal::~Signal()
-{
-    pthread_mutex_destroy(&mMutex);
-    pthread_cond_destroy(&mCondition);
-}
-
-bool LocklessCommandFifo::Signal::init()
-{
-    int status = pthread_mutex_init(&mMutex, NULL);
-    if (status) {
-        LOGE("LocklessFifo mutex init failure");
-        return false;
-    }
-
-    status = pthread_cond_init(&mCondition, NULL);
-    if (status) {
-        LOGE("LocklessFifo condition init failure");
-        pthread_mutex_destroy(&mMutex);
-        return false;
-    }
-
-    return true;
-}
-
-void LocklessCommandFifo::Signal::set()
-{
-    int status;
-
-    status = pthread_mutex_lock(&mMutex);
-    if (status) {
-        LOGE("LocklessCommandFifo: error %i locking for set condition.", status);
-        return;
-    }
-
-    mSet = true;
-
-    status = pthread_cond_signal(&mCondition);
-    if (status) {
-        LOGE("LocklessCommandFifo: error %i on set condition.", status);
-    }
-
-    status = pthread_mutex_unlock(&mMutex);
-    if (status) {
-        LOGE("LocklessCommandFifo: error %i unlocking for set condition.", status);
-    }
-}
-
-void LocklessCommandFifo::Signal::wait()
-{
-    int status;
-
-    status = pthread_mutex_lock(&mMutex);
-    if (status) {
-        LOGE("LocklessCommandFifo: error %i locking for condition.", status);
-        return;
-    }
-
-    if (!mSet) {
-        status = pthread_cond_wait(&mCondition, &mMutex);
-        if (status) {
-            LOGE("LocklessCommandFifo: error %i waiting on condition.", status);
-        }
-    }
-    mSet = false;
-
-    status = pthread_mutex_unlock(&mMutex);
-    if (status) {
-        LOGE("LocklessCommandFifo: error %i unlocking for condition.", status);
-    }
+    LOGV("%s %p  put %p, get %p,  buf %p,  end %p", s, this, mPut, mGet, mBuffer, mEnd);
 }
 
diff --git a/rsLocklessFifo.h b/rsLocklessFifo.h
index d0a4356..b8ceeed 100644
--- a/rsLocklessFifo.h
+++ b/rsLocklessFifo.h
@@ -19,8 +19,10 @@
 
 
 #include "rsUtils.h"
+#include "rsSignal.h"
 
 namespace android {
+namespace renderscript {
 
 
 // A simple FIFO to be used as a producer / consumer between two
@@ -37,24 +39,7 @@
     LocklessCommandFifo();
     ~LocklessCommandFifo();
 
-
 protected:
-    class Signal {
-    public:
-        Signal();
-        ~Signal();
-
-        bool init();
-
-        void set();
-        void wait();
-
-    protected:
-        bool mSet;
-        pthread_mutex_t mMutex;
-        pthread_cond_t mCondition;
-    };
-
     uint8_t * volatile mPut;
     uint8_t * volatile mGet;
     uint8_t * mBuffer;
@@ -65,18 +50,19 @@
     Signal mSignalToWorker;
     Signal mSignalToControl;
 
-
-
 public:
     void * reserve(uint32_t bytes);
     void commit(uint32_t command, uint32_t bytes);
     void commitSync(uint32_t command, uint32_t bytes);
 
     void flush();
+    void wait();
+
     const void * get(uint32_t *command, uint32_t *bytesData);
     void next();
 
     void makeSpace(uint32_t bytes);
+    bool makeSpaceNonBlocking(uint32_t bytes);
 
     bool isEmpty() const;
     uint32_t getFreeSpace() const;
@@ -88,4 +74,5 @@
 
 
 }
+}
 #endif
diff --git a/rsMatrix.cpp b/rsMatrix.cpp
index 2f21405..94eef13 100644
--- a/rsMatrix.cpp
+++ b/rsMatrix.cpp
@@ -73,7 +73,7 @@
     s = sinf(rot);
 
     const float len = sqrtf(x*x + y*y + z*z);
-    if (!(len != 1)) {
+    if (len != 1) {
         const float recipLen = 1.f / len;
         x *= recipLen;
         y *= recipLen;
diff --git a/rsMesh.cpp b/rsMesh.cpp
index d595b4e..8e43f24 100644
--- a/rsMesh.cpp
+++ b/rsMesh.cpp
@@ -14,28 +14,256 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
 
+#include <GLES/gl.h>
+#include <GLES2/gl2.h>
+#include <GLES/glext.h>
+#else
+#include "rsContextHostStub.h"
+
+#include <OpenGL/gl.h>
+#include <OpenGl/glext.h>
+#endif
+
+
 using namespace android;
 using namespace android::renderscript;
 
-#include <GLES/gl.h>
-#include <GLES/glext.h>
-
 Mesh::Mesh(Context *rsc) : ObjectBase(rsc)
 {
     mAllocFile = __FILE__;
     mAllocLine = __LINE__;
-    mVerticies = NULL;
-    mVerticiesCount = 0;
     mPrimitives = NULL;
     mPrimitivesCount = 0;
+    mVertexBuffers = NULL;
+    mVertexBufferCount = 0;
 }
 
 Mesh::~Mesh()
 {
+    if(mVertexBuffers) {
+        delete[] mVertexBuffers;
+    }
+
+    if(mPrimitives) {
+        for(uint32_t i = 0; i < mPrimitivesCount; i ++) {
+            delete mPrimitives[i];
+        }
+        delete[] mPrimitives;
+    }
 }
 
+void Mesh::render(Context *rsc) const
+{
+    for(uint32_t ct = 0; ct < mPrimitivesCount; ct ++) {
+        renderPrimitive(rsc, ct);
+    }
+}
+
+void Mesh::renderPrimitive(Context *rsc, uint32_t primIndex) const {
+    if (primIndex >= mPrimitivesCount) {
+        LOGE("Invalid primitive index");
+        return;
+    }
+
+    Primitive_t *prim = mPrimitives[primIndex];
+
+    if (prim->mIndexBuffer.get()) {
+        renderPrimitiveRange(rsc, primIndex, 0, prim->mIndexBuffer->getType()->getDimX());
+        return;
+    }
+
+    renderPrimitiveRange(rsc, primIndex, 0, mVertexBuffers[0]->getType()->getDimX());
+}
+
+void Mesh::renderPrimitiveRange(Context *rsc, uint32_t primIndex, uint32_t start, uint32_t len) const
+{
+    if (len < 1 || primIndex >= mPrimitivesCount) {
+        return;
+    }
+
+    rsc->checkError("Mesh::renderPrimitiveRange 1");
+    VertexArray va;
+    for (uint32_t ct=0; ct < mVertexBufferCount; ct++) {
+        mVertexBuffers[ct]->uploadCheck(rsc);
+        if (mVertexBuffers[ct]->getIsBufferObject()) {
+            va.setActiveBuffer(mVertexBuffers[ct]->getBufferObjectID());
+        } else {
+            va.setActiveBuffer(mVertexBuffers[ct]->getPtr());
+        }
+        mVertexBuffers[ct]->getType()->enableGLVertexBuffer(&va);
+    }
+    va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
+
+    rsc->checkError("Mesh::renderPrimitiveRange 2");
+    Primitive_t *prim = mPrimitives[primIndex];
+    if (prim->mIndexBuffer.get()) {
+        prim->mIndexBuffer->uploadCheck(rsc);
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, prim->mIndexBuffer->getBufferObjectID());
+        glDrawElements(prim->mGLPrimitive, len, GL_UNSIGNED_SHORT, (uint16_t *)(start * 2));
+    } else {
+        glDrawArrays(prim->mGLPrimitive, start, len);
+    }
+
+    rsc->checkError("Mesh::renderPrimitiveRange");
+}
+
+
+void Mesh::uploadAll(Context *rsc)
+{
+    for (uint32_t ct = 0; ct < mVertexBufferCount; ct ++) {
+        if (mVertexBuffers[ct].get()) {
+            mVertexBuffers[ct]->deferedUploadToBufferObject(rsc);
+        }
+    }
+
+    for (uint32_t ct = 0; ct < mPrimitivesCount; ct ++) {
+        if (mPrimitives[ct]->mIndexBuffer.get()) {
+            mPrimitives[ct]->mIndexBuffer->deferedUploadToBufferObject(rsc);
+        }
+    }
+}
+
+void Mesh::updateGLPrimitives()
+{
+    for(uint32_t i = 0; i < mPrimitivesCount; i ++) {
+        switch(mPrimitives[i]->mPrimitive) {
+            case RS_PRIMITIVE_POINT:          mPrimitives[i]->mGLPrimitive = GL_POINTS; break;
+            case RS_PRIMITIVE_LINE:           mPrimitives[i]->mGLPrimitive = GL_LINES; break;
+            case RS_PRIMITIVE_LINE_STRIP:     mPrimitives[i]->mGLPrimitive = GL_LINE_STRIP; break;
+            case RS_PRIMITIVE_TRIANGLE:       mPrimitives[i]->mGLPrimitive = GL_TRIANGLES; break;
+            case RS_PRIMITIVE_TRIANGLE_STRIP: mPrimitives[i]->mGLPrimitive = GL_TRIANGLE_STRIP; break;
+            case RS_PRIMITIVE_TRIANGLE_FAN:   mPrimitives[i]->mGLPrimitive = GL_TRIANGLE_FAN; break;
+        }
+    }
+}
+
+void Mesh::serialize(OStream *stream) const
+{
+    // Need to identify ourselves
+    stream->addU32((uint32_t)getClassId());
+
+    String8 name(getName());
+    stream->addString(&name);
+
+    // Store number of vertex streams
+    stream->addU32(mVertexBufferCount);
+    for(uint32_t vCount = 0; vCount < mVertexBufferCount; vCount ++) {
+        mVertexBuffers[vCount]->serialize(stream);
+    }
+
+    stream->addU32(mPrimitivesCount);
+    // Store the primitives
+    for (uint32_t pCount = 0; pCount < mPrimitivesCount; pCount ++) {
+        Primitive_t * prim = mPrimitives[pCount];
+
+        stream->addU8((uint8_t)prim->mPrimitive);
+
+        if(prim->mIndexBuffer.get()) {
+            stream->addU32(1);
+            prim->mIndexBuffer->serialize(stream);
+        }
+        else {
+            stream->addU32(0);
+        }
+    }
+}
+
+Mesh *Mesh::createFromStream(Context *rsc, IStream *stream)
+{
+    // First make sure we are reading the correct object
+    RsA3DClassID classID = (RsA3DClassID)stream->loadU32();
+    if(classID != RS_A3D_CLASS_ID_MESH) {
+        LOGE("mesh loading skipped due to invalid class id");
+        return NULL;
+    }
+
+    Mesh * mesh = new Mesh(rsc);
+
+    String8 name;
+    stream->loadString(&name);
+    mesh->setName(name.string(), name.size());
+
+    mesh->mVertexBufferCount = stream->loadU32();
+    if(mesh->mVertexBufferCount) {
+        mesh->mVertexBuffers = new ObjectBaseRef<Allocation>[mesh->mVertexBufferCount];
+
+        for(uint32_t vCount = 0; vCount < mesh->mVertexBufferCount; vCount ++) {
+            Allocation *vertexAlloc = Allocation::createFromStream(rsc, stream);
+            mesh->mVertexBuffers[vCount].set(vertexAlloc);
+        }
+    }
+
+    mesh->mPrimitivesCount = stream->loadU32();
+    if(mesh->mPrimitivesCount) {
+        mesh->mPrimitives = new Primitive_t *[mesh->mPrimitivesCount];
+
+        // load all primitives
+        for (uint32_t pCount = 0; pCount < mesh->mPrimitivesCount; pCount ++) {
+            Primitive_t * prim = new Primitive_t;
+            mesh->mPrimitives[pCount] = prim;
+
+            prim->mPrimitive = (RsPrimitive)stream->loadU8();
+
+            // Check to see if the index buffer was stored
+            uint32_t isIndexPresent = stream->loadU32();
+            if(isIndexPresent) {
+                Allocation *indexAlloc = Allocation::createFromStream(rsc, stream);
+                prim->mIndexBuffer.set(indexAlloc);
+            }
+        }
+    }
+
+    mesh->updateGLPrimitives();
+    mesh->uploadAll(rsc);
+
+    return mesh;
+}
+
+void Mesh::computeBBox() {
+    float *posPtr = NULL;
+    uint32_t vectorSize = 0;
+    uint32_t stride = 0;
+    uint32_t numVerts = 0;
+    // First we need to find the position ptr and stride
+    for (uint32_t ct=0; ct < mVertexBufferCount; ct++) {
+        const Type *bufferType = mVertexBuffers[ct]->getType();
+        const Element *bufferElem = bufferType->getElement();
+
+        for (uint32_t ct=0; ct < bufferElem->getFieldCount(); ct++) {
+            if(strcmp(bufferElem->getFieldName(ct), "position") == 0) {
+                vectorSize = bufferElem->getField(ct)->getComponent().getVectorSize();
+                stride = bufferElem->getSizeBytes() / sizeof(float);
+                uint32_t offset = bufferElem->getFieldOffsetBytes(ct);
+                posPtr = (float*)((uint8_t*)mVertexBuffers[ct]->getPtr() + offset);
+                numVerts = bufferType->getDimX();
+                break;
+            }
+        }
+        if(posPtr) {
+            break;
+        }
+    }
+
+    mBBoxMin[0] = mBBoxMin[1] = mBBoxMin[2] = 1e6;
+    mBBoxMax[0] = mBBoxMax[1] = mBBoxMax[2] = -1e6;
+    if(!posPtr) {
+        LOGE("Unable to compute bounding box");
+        mBBoxMin[0] = mBBoxMin[1] = mBBoxMin[2] = 0.0f;
+        mBBoxMax[0] = mBBoxMax[1] = mBBoxMax[2] = 0.0f;
+        return;
+    }
+
+    for(uint32_t i = 0; i < numVerts; i ++) {
+        for(uint32_t v = 0; v < vectorSize; v ++) {
+            mBBoxMin[v] = rsMin(mBBoxMin[v], posPtr[v]);
+            mBBoxMax[v] = rsMax(mBBoxMax[v], posPtr[v]);
+        }
+        posPtr += stride;
+    }
+}
 
 
 MeshContext::MeshContext()
@@ -46,3 +274,83 @@
 {
 }
 
+namespace android {
+namespace renderscript {
+
+RsMesh rsi_MeshCreate(Context *rsc, uint32_t vtxCount, uint32_t idxCount)
+{
+    Mesh *sm = new Mesh(rsc);
+    sm->incUserRef();
+
+    sm->mPrimitivesCount = idxCount;
+    sm->mPrimitives = new Mesh::Primitive_t *[sm->mPrimitivesCount];
+    for(uint32_t ct = 0; ct < idxCount; ct ++) {
+        sm->mPrimitives[ct] = new Mesh::Primitive_t;
+    }
+
+    sm->mVertexBufferCount = vtxCount;
+    sm->mVertexBuffers = new ObjectBaseRef<Allocation>[vtxCount];
+
+    return sm;
+}
+
+void rsi_MeshBindVertex(Context *rsc, RsMesh mv, RsAllocation va, uint32_t slot)
+{
+    Mesh *sm = static_cast<Mesh *>(mv);
+    rsAssert(slot < sm->mVertexBufferCount);
+
+    sm->mVertexBuffers[slot].set((Allocation *)va);
+}
+
+void rsi_MeshBindIndex(Context *rsc, RsMesh mv, RsAllocation va, uint32_t primType, uint32_t slot)
+{
+    Mesh *sm = static_cast<Mesh *>(mv);
+    rsAssert(slot < sm->mPrimitivesCount);
+
+    sm->mPrimitives[slot]->mIndexBuffer.set((Allocation *)va);
+    sm->mPrimitives[slot]->mPrimitive = (RsPrimitive)primType;
+    sm->updateGLPrimitives();
+}
+
+void rsi_MeshGetVertexBufferCount(Context *rsc, RsMesh mv, int32_t *numVtx)
+{
+    Mesh *sm = static_cast<Mesh *>(mv);
+    *numVtx = sm->mVertexBufferCount;
+}
+
+void rsi_MeshGetIndexCount(Context *rsc, RsMesh mv, int32_t *numIdx)
+{
+    Mesh *sm = static_cast<Mesh *>(mv);
+    *numIdx = sm->mPrimitivesCount;
+}
+
+void rsi_MeshGetVertices(Context *rsc, RsMesh mv, RsAllocation *vtxData, uint32_t vtxDataCount)
+{
+    Mesh *sm = static_cast<Mesh *>(mv);
+    rsAssert(vtxDataCount == sm->mVertexBufferCount);
+
+    for(uint32_t ct = 0; ct < vtxDataCount; ct ++) {
+        vtxData[ct] = sm->mVertexBuffers[ct].get();
+        sm->mVertexBuffers[ct]->incUserRef();
+    }
+}
+
+void rsi_MeshGetIndices(Context *rsc, RsMesh mv, RsAllocation *va, uint32_t *primType, uint32_t idxDataCount)
+{
+    Mesh *sm = static_cast<Mesh *>(mv);
+    rsAssert(idxDataCount == sm->mPrimitivesCount);
+
+    for(uint32_t ct = 0; ct < idxDataCount; ct ++) {
+        va[ct] = sm->mPrimitives[ct]->mIndexBuffer.get();
+        primType[ct] = sm->mPrimitives[ct]->mPrimitive;
+        if(sm->mPrimitives[ct]->mIndexBuffer.get()) {
+            sm->mPrimitives[ct]->mIndexBuffer->incUserRef();
+        }
+    }
+
+}
+
+
+
+
+}}
diff --git a/rsMesh.h b/rsMesh.h
index 5201abd..ed01c38 100644
--- a/rsMesh.h
+++ b/rsMesh.h
@@ -32,45 +32,40 @@
     Mesh(Context *);
     ~Mesh();
 
-    struct Verticies_t
-    {
-        Allocation ** mAllocations;
-        uint32_t mAllocationCount;
+    // Contains vertex data
+    // Position, normal, texcoord, etc could either be strided in one allocation
+    // of provided separetely in multiple ones
+    ObjectBaseRef<Allocation> *mVertexBuffers;
+    uint32_t mVertexBufferCount;
 
-        size_t mVertexDataSize;
-
-        size_t mOffsetCoord;
-        size_t mOffsetTex;
-        size_t mOffsetNorm;
-
-        size_t mSizeCoord;
-        size_t mSizeTex;
-        size_t mSizeNorm;
-
-        uint32_t mBufferObject;
-    };
-
+    // Either mIndexBuffer, mPrimitiveBuffer or both could have a NULL reference
+    // If both are null, mPrimitive only would be used to render the mesh
     struct Primitive_t
     {
-        RsPrimitive mType;
-        Verticies_t *mVerticies;
+        ObjectBaseRef<Allocation> mIndexBuffer;
 
-        uint32_t mIndexCount;
-        uint16_t *mIndicies;
-
-        uint32_t mRestartCounts;
-        uint16_t *mRestarts;
+        RsPrimitive mPrimitive;
+        uint32_t mGLPrimitive;
     };
 
-    Verticies_t * mVerticies;
-    uint32_t mVerticiesCount;
-
     Primitive_t ** mPrimitives;
     uint32_t mPrimitivesCount;
 
+    void render(Context *) const;
+    void renderPrimitive(Context *, uint32_t primIndex) const;
+    void renderPrimitiveRange(Context *, uint32_t primIndex, uint32_t start, uint32_t len) const;
+    void uploadAll(Context *);
+    void updateGLPrimitives();
 
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_MESH; }
+    static Mesh *createFromStream(Context *rsc, IStream *stream);
 
-    void analyzeElement();
+    // Bounding volumes
+    float mBBoxMin[3];
+    float mBBoxMax[3];
+    void computeBBox();
+
 protected:
 };
 
@@ -88,3 +83,4 @@
 #endif //ANDROID_RS_TRIANGLE_MESH_H
 
 
+
diff --git a/rsMutex.cpp b/rsMutex.cpp
new file mode 100644
index 0000000..37752f2
--- /dev/null
+++ b/rsMutex.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsMutex.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+
+Mutex::Mutex()
+{
+}
+
+Mutex::~Mutex()
+{
+    pthread_mutex_destroy(&mMutex);
+}
+
+bool Mutex::init()
+{
+    int status = pthread_mutex_init(&mMutex, NULL);
+    if (status) {
+        LOGE("Mutex::Mutex init failure");
+        return false;
+    }
+    return true;
+}
+
+bool Mutex::lock()
+{
+    int status;
+    status = pthread_mutex_lock(&mMutex);
+    if (status) {
+        LOGE("Mutex: error %i locking.", status);
+        return false;
+    }
+    return true;
+}
+
+bool Mutex::unlock()
+{
+    int status;
+    status = pthread_mutex_unlock(&mMutex);
+    if (status) {
+        LOGE("Mutex error %i unlocking.", status);
+        return false;
+    }
+    return true;
+}
+
+
diff --git a/rsFileA3DDecls.h b/rsMutex.h
similarity index 66%
rename from rsFileA3DDecls.h
rename to rsMutex.h
index 2a08bd3..47725d7 100644
--- a/rsFileA3DDecls.h
+++ b/rsMutex.h
@@ -14,31 +14,30 @@
  * limitations under the License.
  */
 
-#ifndef ANDROID_RS_FILE_A3D_DECLS_H
-#define ANDROID_RS_FILE_A3D_DECLS_H
+#ifndef ANDROID_RS_MUTEX_H
+#define ANDROID_RS_MUTEX_H
 
 
-#define A3D_MAGIC_KEY "Android3D_ff"
+#include "rsUtils.h"
 
 namespace android {
 namespace renderscript {
 
-    enum A3DChunkType {
-        CHUNK_EMPTY,
+class Mutex {
+public:
+    Mutex();
+    ~Mutex();
 
-        CHUNK_ELEMENT,
-        CHUNK_ELEMENT_SOURCE,
-        CHUNK_VERTICIES,
-        CHUNK_MESH,
-        CHUNK_PRIMITIVE,
+    bool init();
+    bool lock();
+    bool unlock();
 
-        CHUNK_LAST
-    };
-
+protected:
+    pthread_mutex_t mMutex;
+};
 
 }
 }
-#endif //ANDROID_RS_FILE_A3D_H
 
-
+#endif
 
diff --git a/rsNoise.cpp b/rsNoise.cpp
deleted file mode 100644
index 4b67586..0000000
--- a/rsNoise.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- * This implementation of the noise functions was ported from the Java
- * implementation by Jerry Huxtable (http://www.jhlabs.com) under
- * Apache License 2.0 (see http://jhlabs.com/ip/filters/download.html)
- *
- * Original header:
- *
- * Copyright 2006 Jerry Huxtable
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rsNoise.h"
-
-#include <math.h>
-#include <stdlib.h>
-#include <time.h>
-
-namespace android {
-namespace renderscript {
-
-#define B 0x100
-#define BM 0xff
-#define N 0x1000
-
-static int p[B + B + 2];
-static float g3[B + B + 2][3];
-static float g2[B + B + 2][2];
-static float g1[B + B + 2];
-static bool noise_start = true;
-
-#define lerpf(start, stop, amount) start + (stop - start) * amount
-
-static inline float noise_sCurve(float t)
-{
-    return t * t * (3.0f - 2.0f * t);
-}
-
-inline void SC_normalizef2(float v[])
-{
-    float s = (float)sqrtf(v[0] * v[0] + v[1] * v[1]);
-    v[0] = v[0] / s;
-    v[1] = v[1] / s;
-}
-
-inline void SC_normalizef3(float v[])
-{
-    float s = (float)sqrtf(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
-    v[0] = v[0] / s;
-    v[1] = v[1] / s;
-    v[2] = v[2] / s;
-}
-
-static void noise_init()
-{
-    int i, j, k;
-    
-    for (i = 0; i < B; i++) {
-        p[i] = i;
-        
-        g1[i] = (float)((rand() % (B + B)) - B) / B;
-        
-        for (j = 0; j < 2; j++)
-            g2[i][j] = (float)((rand() % (B + B)) - B) / B;
-        SC_normalizef2(g2[i]);
-        
-        for (j = 0; j < 3; j++)
-            g3[i][j] = (float)((rand() % (B + B)) - B) / B;
-        SC_normalizef3(g3[i]);
-    }
-    
-    for (i = B-1; i >= 0; i--) {
-        k = p[i];
-        p[i] = p[j = rand() % B];
-        p[j] = k;
-    }
-    
-    for (i = 0; i < B + 2; i++) {
-        p[B + i] = p[i];
-        g1[B + i] = g1[i];
-        for (j = 0; j < 2; j++)
-            g2[B + i][j] = g2[i][j];
-        for (j = 0; j < 3; j++)
-            g3[B + i][j] = g3[i][j];
-    }
-}
-
-float SC_noisef(float x)
-{
-    srand(time(NULL));
-    int bx0, bx1;
-    float rx0, rx1, sx, t, u, v;
-    
-    if (noise_start) {
-        noise_start = false;
-        noise_init();
-    }
-    
-    t = x + N;
-    bx0 = ((int)t) & BM;
-    bx1 = (bx0+1) & BM;
-    rx0 = t - (int)t;
-    rx1 = rx0 - 1.0f;
-    
-    sx = noise_sCurve(rx0);
-    
-    u = rx0 * g1[p[bx0]];
-    v = rx1 * g1[p[bx1]];
-    return 2.3f * lerpf(u, v, sx);
-}
-
-float SC_noisef2(float x, float y)
-{
-    srand(time(NULL));
-    int bx0, bx1, by0, by1, b00, b10, b01, b11;
-    float rx0, rx1, ry0, ry1, sx, sy, a, b, t, u, v;
-    float *q;
-    int i, j;
-    
-    if (noise_start) {
-        noise_start = false;
-        noise_init();
-    }
-    
-    t = x + N;
-    bx0 = ((int)t) & BM;
-    bx1 = (bx0+1) & BM;
-    rx0 = t - (int)t;
-    rx1 = rx0 - 1.0f;
-	
-    t = y + N;
-    by0 = ((int)t) & BM;
-    by1 = (by0+1) & BM;
-    ry0 = t - (int)t;
-    ry1 = ry0 - 1.0f;
-	
-    i = p[bx0];
-    j = p[bx1];
-    
-    b00 = p[i + by0];
-    b10 = p[j + by0];
-    b01 = p[i + by1];
-    b11 = p[j + by1];
-    
-    sx = noise_sCurve(rx0);
-    sy = noise_sCurve(ry0);
-    
-    q = g2[b00]; u = rx0 * q[0] + ry0 * q[1];
-    q = g2[b10]; v = rx1 * q[0] + ry0 * q[1];
-    a = lerpf(u, v, sx);
-    
-    q = g2[b01]; u = rx0 * q[0] + ry1 * q[1];
-    q = g2[b11]; v = rx1 * q[0] + ry1 * q[1];
-    b = lerpf(u, v, sx);
-    
-    return 1.5f*lerpf(a, b, sy);
-}
-
-float SC_noisef3(float x, float y, float z)
-{
-    srand(time(NULL));
-    int bx0, bx1, by0, by1, bz0, bz1, b00, b10, b01, b11;
-    float rx0, rx1, ry0, ry1, rz0, rz1, sy, sz, a, b, c, d, t, u, v;
-    float *q;
-    int i, j;
-    
-    if (noise_start) {
-        noise_start = false;
-        noise_init();
-    }
-    
-    t = x + N;
-    bx0 = ((int)t) & BM;
-    bx1 = (bx0+1) & BM;
-    rx0 = t - (int)t;
-    rx1 = rx0 - 1.0f;
-    
-    t = y + N;
-    by0 = ((int)t) & BM;
-    by1 = (by0+1) & BM;
-    ry0 = t - (int)t;
-    ry1 = ry0 - 1.0f;
-	
-    t = z + N;
-    bz0 = ((int)t) & BM;
-    bz1 = (bz0+1) & BM;
-    rz0 = t - (int)t;
-    rz1 = rz0 - 1.0f;
-	
-    i = p[bx0];
-    j = p[bx1];
-    
-    b00 = p[i + by0];
-    b10 = p[j + by0];
-    b01 = p[i + by1];
-    b11 = p[j + by1];
-    
-    t  = noise_sCurve(rx0);
-    sy = noise_sCurve(ry0);
-    sz = noise_sCurve(rz0);
-    
-    q = g3[b00 + bz0]; u = rx0 * q[0] + ry0 * q[1] + rz0 * q[2];
-    q = g3[b10 + bz0]; v = rx1 * q[0] + ry0 * q[1] + rz0 * q[2];
-    a = lerpf(u, v, t);
-    
-    q = g3[b01 + bz0]; u = rx0 * q[0] + ry1 * q[1] + rz0 * q[2];
-    q = g3[b11 + bz0]; v = rx1 * q[0] + ry1 * q[1] + rz0 * q[2];
-    b = lerpf(u, v, t);
-    
-    c = lerpf(a, b, sy);
-    
-    q = g3[b00 + bz1]; u = rx0 * q[0] + ry0 * q[1] + rz1 * q[2];
-    q = g3[b10 + bz1]; v = rx1 * q[0] + ry0 * q[1] + rz1 * q[2];
-    a = lerpf(u, v, t);
-    
-    q = g3[b01 + bz1]; u = rx0 * q[0] + ry1 * q[1] + rz1 * q[2];
-    q = g3[b11 + bz1]; v = rx1 * q[0] + ry1 * q[1] + rz1 * q[2];
-    b = lerpf(u, v, t);
-    
-    d = lerpf(a, b, sy);
-    
-    return 1.5f*lerpf(c, d, sz);
-}
-
-float SC_turbulencef2(float x, float y, float octaves)
-{
-    srand(time(NULL));
-    float t = 0.0f;
-    
-    for (float f = 1.0f; f <= octaves; f *= 2)
-        t += fabs(SC_noisef2(f * x, f * y)) / f;
-    return t;
-}
-
-float SC_turbulencef3(float x, float y, float z, float octaves)
-{
-    srand(time(NULL));
-    float t = 0.0f;
-    
-    for (float f = 1.0f; f <= octaves; f *= 2)
-        t += fabs(SC_noisef3(f * x, f * y, f * z)) / f;
-    return t;
-}
-
-}
-}
diff --git a/rsNoise.h b/rsNoise.h
deleted file mode 100644
index 9040751..0000000
--- a/rsNoise.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ANDROID_RS_NOISE_H
-#define ANDROID_RS_NOISE_H
-
-// ---------------------------------------------------------------------------
-namespace android {
-namespace renderscript {
-
-void SC_normalizef2(float v[]);
-void SC_normalizef3(float v[]);
-float SC_noisef(float x);
-float SC_noisef2(float x, float y);
-float SC_noisef3(float x, float y, float z);
-float SC_turbulencef2(float x, float y, float octaves);
-float SC_turbulencef3(float x, float y, float z, float octaves);
-
-}
-}
-
-#endif
diff --git a/rsObjectBase.cpp b/rsObjectBase.cpp
index 677413e..46b1750 100644
--- a/rsObjectBase.cpp
+++ b/rsObjectBase.cpp
@@ -15,22 +15,30 @@
  */
 
 #include "rsObjectBase.h"
+
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#endif
 
 using namespace android;
 using namespace android::renderscript;
 
+pthread_mutex_t ObjectBase::gObjectInitMutex = PTHREAD_MUTEX_INITIALIZER;
+
 ObjectBase::ObjectBase(Context *rsc)
 {
     mUserRefCount = 0;
     mSysRefCount = 0;
-    mName = NULL;
-    mRSC = NULL;
+    mRSC = rsc;
     mNext = NULL;
     mPrev = NULL;
     mAllocFile = __FILE__;
     mAllocLine = __LINE__;
-    setContext(rsc);
+
+    rsAssert(rsc);
+    add();
 }
 
 ObjectBase::~ObjectBase()
@@ -39,49 +47,53 @@
     rsAssert(!mUserRefCount);
     rsAssert(!mSysRefCount);
     remove();
-    delete[] mName;
 }
 
 void ObjectBase::dumpLOGV(const char *op) const
 {
-    if (mName) {
+    if (mName.size()) {
         LOGV("%s RSobj %p, name %s, refs %i,%i  from %s,%i links %p,%p,%p",
-             op, this, mName, mUserRefCount, mSysRefCount, mAllocFile, mAllocLine, mNext, mPrev, mRSC);
+             op, this, mName.string(), mUserRefCount, mSysRefCount, mAllocFile, mAllocLine, mNext, mPrev, mRSC);
     } else {
         LOGV("%s RSobj %p, no-name, refs %i,%i  from %s,%i links %p,%p,%p",
              op, this, mUserRefCount, mSysRefCount, mAllocFile, mAllocLine, mNext, mPrev, mRSC);
     }
 }
 
-void ObjectBase::setContext(Context *rsc)
-{
-    if (mRSC) {
-        remove();
-    }
-    mRSC = rsc;
-    if (rsc) {
-        add();
-    }
-}
-
 void ObjectBase::incUserRef() const
 {
-    mUserRefCount ++;
-    //LOGV("ObjectBase %p inc ref %i", this, mRefCount);
+    lockUserRef();
+    mUserRefCount++;
+    unlockUserRef();
+    //LOGV("ObjectBase %p inc ref %i", this, mUserRefCount);
+}
+
+void ObjectBase::prelockedIncUserRef() const
+{
+    mUserRefCount++;
 }
 
 void ObjectBase::incSysRef() const
 {
     mSysRefCount ++;
-    //LOGV("ObjectBase %p inc ref %i", this, mRefCount);
+    //LOGV("ObjectBase %p inc ref %i", this, mSysRefCount);
 }
 
 bool ObjectBase::checkDelete() const
 {
     if (!(mSysRefCount | mUserRefCount)) {
+        lockUserRef();
+
+        // Recheck the user ref count since it can be incremented from other threads.
+        if (mUserRefCount) {
+            unlockUserRef();
+            return false;
+        }
+
         if (mRSC && mRSC->props.mLogObjects) {
             dumpLOGV("checkDelete");
         }
+        unlockUserRef();
         delete this;
         return true;
     }
@@ -90,45 +102,59 @@
 
 bool ObjectBase::decUserRef() const
 {
+    lockUserRef();
     rsAssert(mUserRefCount > 0);
-    mUserRefCount --;
-    //dumpObj("decUserRef");
-    return checkDelete();
+    //dumpLOGV("decUserRef");
+    mUserRefCount--;
+    unlockUserRef();
+    bool ret = checkDelete();
+    return ret;
 }
 
 bool ObjectBase::zeroUserRef() const
 {
+    lockUserRef();
+    // This can only happen during cleanup and is therefore
+    // thread safe.
     mUserRefCount = 0;
-    //dumpObj("zeroUserRef");
-    return checkDelete();
+    //dumpLOGV("zeroUserRef");
+    unlockUserRef();
+    bool ret = checkDelete();
+    return ret;
 }
 
 bool ObjectBase::decSysRef() const
 {
     rsAssert(mSysRefCount > 0);
     mSysRefCount --;
-    //dumpObj("decSysRef");
+    //dumpLOGV("decSysRef");
     return checkDelete();
 }
 
 void ObjectBase::setName(const char *name)
 {
-    setName(name, strlen(name));
+    mName.setTo(name);
 }
 
 void ObjectBase::setName(const char *name, uint32_t len)
 {
-    delete mName;
-    mName = NULL;
-    if (name) {
-        mName = new char[len + 1];
-        memcpy(mName, name, len);
-        mName[len] = 0;
-    }
+    mName.setTo(name, len);
+}
+
+void ObjectBase::lockUserRef()
+{
+    pthread_mutex_lock(&gObjectInitMutex);
+}
+
+void ObjectBase::unlockUserRef()
+{
+    pthread_mutex_unlock(&gObjectInitMutex);
 }
 
 void ObjectBase::add() const
 {
+    pthread_mutex_lock(&gObjectInitMutex);
+
     rsAssert(!mNext);
     rsAssert(!mPrev);
     //LOGV("calling add  rsc %p", mRSC);
@@ -137,16 +163,20 @@
         mRSC->mObjHead->mPrev = this;
     }
     mRSC->mObjHead = this;
+
+    pthread_mutex_unlock(&gObjectInitMutex);
 }
 
 void ObjectBase::remove() const
 {
+    lockUserRef();
     //LOGV("calling remove  rsc %p", mRSC);
     if (!mRSC) {
         rsAssert(!mPrev);
         rsAssert(!mNext);
         return;
     }
+
     if (mRSC->mObjHead == this) {
         mRSC->mObjHead = mNext;
     }
@@ -158,6 +188,7 @@
     }
     mPrev = NULL;
     mNext = NULL;
+    unlockUserRef();
 }
 
 void ObjectBase::zeroAllUserRef(Context *rsc)
@@ -188,6 +219,8 @@
 
 void ObjectBase::dumpAll(Context *rsc)
 {
+    lockUserRef();
+
     LOGV("Dumping all objects");
     const ObjectBase * o = rsc->mObjHead;
     while (o) {
@@ -195,5 +228,23 @@
         o->dumpLOGV("  ");
         o = o->mNext;
     }
+
+    unlockUserRef();
+}
+
+bool ObjectBase::isValid(const Context *rsc, const ObjectBase *obj)
+{
+    lockUserRef();
+
+    const ObjectBase * o = rsc->mObjHead;
+    while (o) {
+        if (o == obj) {
+            unlockUserRef();
+            return true;
+        }
+        o = o->mNext;
+    }
+    unlockUserRef();
+    return false;
 }
 
diff --git a/rsObjectBase.h b/rsObjectBase.h
index bb03b87..8d1ace1 100644
--- a/rsObjectBase.h
+++ b/rsObjectBase.h
@@ -24,6 +24,7 @@
 namespace renderscript {
 
 class Context;
+class OStream;
 
 // An element is a group of Components that occupies one cell in a structure.
 class ObjectBase
@@ -38,20 +39,27 @@
     void incUserRef() const;
     bool decUserRef() const;
     bool zeroUserRef() const;
+    void prelockedIncUserRef() const;
 
     const char * getName() const {
-        return mName;
+        return mName.string();
     }
     void setName(const char *);
     void setName(const char *, uint32_t len);
 
     Context * getContext() const {return mRSC;}
-    void setContext(Context *);
 
     static void zeroAllUserRef(Context *rsc);
     static void dumpAll(Context *rsc);
 
     virtual void dumpLOGV(const char *prefix) const;
+    virtual void serialize(OStream *stream) const = 0;
+    virtual RsA3DClassID getClassId() const = 0;
+
+    static bool isValid(const Context *rsc, const ObjectBase *obj);
+
+    static void lockUserRef();
+    static void unlockUserRef();
 
 protected:
     const char *mAllocFile;
@@ -59,12 +67,14 @@
     Context *mRSC;
 
 private:
+    static pthread_mutex_t gObjectInitMutex;
+
     void add() const;
     void remove() const;
 
     bool checkDelete() const;
 
-    char * mName;
+    String8 mName;
     mutable int32_t mSysRefCount;
     mutable int32_t mUserRefCount;
 
diff --git a/rsProgram.cpp b/rsProgram.cpp
index 70e2868..10e00e6 100644
--- a/rsProgram.cpp
+++ b/rsProgram.cpp
@@ -14,16 +14,21 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
-#include "rsProgram.h"
-
 #include <GLES2/gl2.h>
 #include <GLES2/gl2ext.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#include <OpenGL/glext.h>
+#endif //ANDROID_RS_BUILD_FOR_HOST
+
+#include "rsProgram.h"
 
 using namespace android;
 using namespace android::renderscript;
 
-
 Program::Program(Context *rsc) : ObjectBase(rsc)
 {
     mAllocFile = __FILE__;
@@ -32,7 +37,10 @@
     mShaderID = 0;
     mAttribCount = 0;
     mUniformCount = 0;
+    mTextureCount = 0;
 
+    mTextures = NULL;
+    mSamplers = NULL;
     mInputElements = NULL;
     mOutputElements = NULL;
     mConstantTypes = NULL;
@@ -40,6 +48,7 @@
     mOutputCount = 0;
     mConstantCount = 0;
     mIsValid = false;
+    mIsInternal = false;
 }
 
 Program::Program(Context *rsc, const char * shaderText, uint32_t shaderLength,
@@ -73,6 +82,8 @@
         }
     }
 
+    mTextures = new ObjectBaseRef<Allocation>[mTextureCount];
+    mSamplers = new ObjectBaseRef<Sampler>[mTextureCount];
     mInputElements = new ObjectBaseRef<Element>[mInputCount];
     mOutputElements = new ObjectBaseRef<Element>[mOutputCount];
     mConstantTypes = new ObjectBaseRef<Type>[mConstantCount];
@@ -91,15 +102,37 @@
             mConstantTypes[constant++].set(reinterpret_cast<Type *>(params[ct+1]));
         }
     }
+    mIsInternal = false;
+    uint32_t internalTokenLen = strlen(RS_SHADER_INTERNAL);
+    if(shaderLength > internalTokenLen &&
+       strncmp(RS_SHADER_INTERNAL, shaderText, internalTokenLen) == 0) {
+        mIsInternal = true;
+        shaderText += internalTokenLen;
+        shaderLength -= internalTokenLen;
+    }
     mUserShader.setTo(shaderText, shaderLength);
 }
 
 Program::~Program()
 {
-    for (uint32_t ct=0; ct < MAX_UNIFORMS; ct++) {
-        bindAllocation(NULL, ct);
+    if(mRSC->props.mLogShaders) {
+        LOGV("Program::~Program with shader id %u", mShaderID);
     }
 
+    if(mShaderID) {
+        glDeleteShader(mShaderID);
+    }
+
+    for (uint32_t ct=0; ct < MAX_UNIFORMS; ct++) {
+        bindAllocation(NULL, NULL, ct);
+    }
+
+    for (uint32_t ct=0; ct < mTextureCount; ct++) {
+        bindTexture(NULL, ct, NULL);
+        bindSampler(NULL, ct, NULL);
+    }
+    delete[] mTextures;
+    delete[] mSamplers;
     delete[] mInputElements;
     delete[] mOutputElements;
     delete[] mConstantTypes;
@@ -109,8 +142,22 @@
 }
 
 
-void Program::bindAllocation(Allocation *alloc, uint32_t slot)
+void Program::bindAllocation(Context *rsc, Allocation *alloc, uint32_t slot)
 {
+    if (alloc != NULL) {
+        if (slot >= mConstantCount) {
+            LOGE("Attempt to bind alloc at slot %u, on shader id %u, but const count is %u",
+                 slot, (uint32_t)this, mConstantCount);
+            rsc->setError(RS_ERROR_BAD_SHADER, "Cannot bind allocation");
+            return;
+        }
+        if (!alloc->getType()->isEqual(mConstantTypes[slot].get())) {
+            LOGE("Attempt to bind alloc at slot %u, on shader id %u, but types mismatch",
+                 slot, (uint32_t)this);
+            rsc->setError(RS_ERROR_BAD_SHADER, "Cannot bind allocation");
+            return;
+        }
+    }
     if (mConstants[slot].get() == alloc) {
         return;
     }
@@ -124,10 +171,11 @@
     mDirty = true;
 }
 
-void Program::bindTexture(uint32_t slot, Allocation *a)
+void Program::bindTexture(Context *rsc, uint32_t slot, Allocation *a)
 {
-    if (slot >= MAX_TEXTURE) {
-        LOGE("Attempt to bind a texture to a slot > MAX_TEXTURE");
+    if (slot >= mTextureCount) {
+        LOGE("Attempt to bind texture to slot %u but tex count is %u", slot, mTextureCount);
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot bind texture");
         return;
     }
 
@@ -136,10 +184,11 @@
     mDirty = true;
 }
 
-void Program::bindSampler(uint32_t slot, Sampler *s)
+void Program::bindSampler(Context *rsc, uint32_t slot, Sampler *s)
 {
-    if (slot >= MAX_TEXTURE) {
-        LOGE("Attempt to bind a Sampler to a slot > MAX_TEXTURE");
+    if (slot >= mTextureCount) {
+        LOGE("Attempt to bind sampler to slot %u but tex count is %u", slot, mTextureCount);
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot bind sampler");
         return;
     }
 
@@ -235,7 +284,159 @@
     mUserShader.setTo(txt, len);
 }
 
+void Program::appendUserConstants() {
+    for (uint32_t ct=0; ct < mConstantCount; ct++) {
+        const Element *e = mConstantTypes[ct]->getElement();
+        for (uint32_t field=0; field < e->getFieldCount(); field++) {
+            const Element *f = e->getField(field);
+            const char *fn = e->getFieldName(field);
 
+            if (fn[0] == '#') {
+                continue;
+            }
+
+            // Cannot be complex
+            rsAssert(!f->getFieldCount());
+            if(f->getType() == RS_TYPE_MATRIX_4X4) {
+                mShader.append("uniform mat4 UNI_");
+            }
+            else if(f->getType() == RS_TYPE_MATRIX_3X3) {
+                mShader.append("uniform mat3 UNI_");
+            }
+            else if(f->getType() == RS_TYPE_MATRIX_2X2) {
+                mShader.append("uniform mat2 UNI_");
+            }
+            else {
+                switch(f->getComponent().getVectorSize()) {
+                case 1: mShader.append("uniform float UNI_"); break;
+                case 2: mShader.append("uniform vec2 UNI_"); break;
+                case 3: mShader.append("uniform vec3 UNI_"); break;
+                case 4: mShader.append("uniform vec4 UNI_"); break;
+                default:
+                    rsAssert(0);
+                }
+            }
+
+            mShader.append(fn);
+            mShader.append(";\n");
+        }
+    }
+}
+
+void Program::setupUserConstants(Context *rsc, ShaderCache *sc, bool isFragment) {
+    uint32_t uidx = 0;
+    for (uint32_t ct=0; ct < mConstantCount; ct++) {
+        Allocation *alloc = mConstants[ct].get();
+        if (!alloc) {
+            LOGE("Attempting to set constants on shader id %u, but alloc at slot %u is not set", (uint32_t)this, ct);
+            rsc->setError(RS_ERROR_BAD_SHADER, "No constant allocation bound");
+            continue;
+        }
+
+        const uint8_t *data = static_cast<const uint8_t *>(alloc->getPtr());
+        const Element *e = mConstantTypes[ct]->getElement();
+        for (uint32_t field=0; field < e->getFieldCount(); field++) {
+            const Element *f = e->getField(field);
+            const char *fieldName = e->getFieldName(field);
+            // If this field is padding, skip it
+            if(fieldName[0] == '#') {
+                continue;
+            }
+
+            uint32_t offset = e->getFieldOffsetBytes(field);
+            const float *fd = reinterpret_cast<const float *>(&data[offset]);
+
+            int32_t slot = -1;
+            if(!isFragment) {
+                slot = sc->vtxUniformSlot(uidx);
+            }
+            else {
+                slot = sc->fragUniformSlot(uidx);
+            }
+
+            if(rsc->props.mLogShadersUniforms) {
+                LOGV("Uniform  slot=%i, offset=%i, constant=%i, field=%i, uidx=%i, name=%s", slot, offset, ct, field, uidx, fieldName);
+            }
+            if (slot >= 0) {
+                if(f->getType() == RS_TYPE_MATRIX_4X4) {
+                    if(rsc->props.mLogShadersUniforms) {
+                        LOGV("Matrix4x4");
+                        LOGV("{%f, %f, %f, %f",  fd[0], fd[4], fd[8], fd[12]);
+                        LOGV(" %f, %f, %f, %f",  fd[1], fd[5], fd[9], fd[13]);
+                        LOGV(" %f, %f, %f, %f",  fd[2], fd[6], fd[10], fd[14]);
+                        LOGV(" %f, %f, %f, %f}", fd[3], fd[7], fd[11], fd[15]);
+                    }
+                    glUniformMatrix4fv(slot, 1, GL_FALSE, fd);
+                }
+                else if(f->getType() == RS_TYPE_MATRIX_3X3) {
+                    if(rsc->props.mLogShadersUniforms) {
+                        LOGV("Matrix3x3");
+                        LOGV("{%f, %f, %f",  fd[0], fd[3], fd[6]);
+                        LOGV(" %f, %f, %f",  fd[1], fd[4], fd[7]);
+                        LOGV(" %f, %f, %f}", fd[2], fd[5], fd[8]);
+                    }
+                    glUniformMatrix3fv(slot, 1, GL_FALSE, fd);
+                }
+                else if(f->getType() == RS_TYPE_MATRIX_2X2) {
+                    if(rsc->props.mLogShadersUniforms){
+                        LOGV("Matrix2x2");
+                        LOGV("{%f, %f",  fd[0], fd[2]);
+                        LOGV(" %f, %f}", fd[1], fd[3]);
+                    }
+                    glUniformMatrix2fv(slot, 1, GL_FALSE, fd);
+                }
+                else {
+                    switch(f->getComponent().getVectorSize()) {
+                    case 1:
+                        if(rsc->props.mLogShadersUniforms) {
+                            LOGV("Uniform 1 = %f", fd[0]);
+                        }
+                        glUniform1fv(slot, 1, fd);
+                        break;
+                    case 2:
+                        if(rsc->props.mLogShadersUniforms) {
+                            LOGV("Uniform 2 = %f %f", fd[0], fd[1]);
+                        }
+                        glUniform2fv(slot, 1, fd);
+                        break;
+                    case 3:
+                        if(rsc->props.mLogShadersUniforms) {
+                            LOGV("Uniform 3 = %f %f %f", fd[0], fd[1], fd[2]);
+                        }
+                        glUniform3fv(slot, 1, fd);
+                        break;
+                    case 4:
+                        if(rsc->props.mLogShadersUniforms) {
+                            LOGV("Uniform 4 = %f %f %f %f", fd[0], fd[1], fd[2], fd[3]);
+                        }
+                        glUniform4fv(slot, 1, fd);
+                        break;
+                    default:
+                        rsAssert(0);
+                    }
+                }
+            }
+            uidx ++;
+        }
+    }
+}
+
+void Program::initAddUserElement(const Element *e, String8 *names, uint32_t *count, const char *prefix)
+{
+    rsAssert(e->getFieldCount());
+    for (uint32_t ct=0; ct < e->getFieldCount(); ct++) {
+        const Element *ce = e->getField(ct);
+        if (ce->getFieldCount()) {
+            initAddUserElement(ce, names, count, prefix);
+        }
+        else if(e->getFieldName(ct)[0] != '#') {
+            String8 tmp(prefix);
+            tmp.append(e->getFieldName(ct));
+            names[*count].setTo(tmp.string());
+            (*count)++;
+        }
+    }
+}
 
 namespace android {
 namespace renderscript {
@@ -244,19 +445,19 @@
 void rsi_ProgramBindConstants(Context *rsc, RsProgram vp, uint32_t slot, RsAllocation constants)
 {
     Program *p = static_cast<Program *>(vp);
-    p->bindAllocation(static_cast<Allocation *>(constants), slot);
+    p->bindAllocation(rsc, static_cast<Allocation *>(constants), slot);
 }
 
 void rsi_ProgramBindTexture(Context *rsc, RsProgram vpf, uint32_t slot, RsAllocation a)
 {
     Program *p = static_cast<Program *>(vpf);
-    p->bindTexture(slot, static_cast<Allocation *>(a));
+    p->bindTexture(rsc, slot, static_cast<Allocation *>(a));
 }
 
 void rsi_ProgramBindSampler(Context *rsc, RsProgram vpf, uint32_t slot, RsSampler s)
 {
     Program *p = static_cast<Program *>(vpf);
-    p->bindSampler(slot, static_cast<Sampler *>(s));
+    p->bindSampler(rsc, slot, static_cast<Sampler *>(s));
 }
 
 }
diff --git a/rsProgram.h b/rsProgram.h
index 86f85fb..c93033b 100644
--- a/rsProgram.h
+++ b/rsProgram.h
@@ -23,29 +23,30 @@
 // ---------------------------------------------------------------------------
 namespace android {
 namespace renderscript {
-
-
 class ShaderCache;
 
+#define RS_SHADER_INTERNAL "//rs_shader_internal\n"
+#define RS_SHADER_ATTR "ATTRIB_"
+#define RS_SHADER_UNI "UNI_"
+
 class Program : public ObjectBase
 {
 public:
     const static uint32_t MAX_ATTRIBS = 8;
     const static uint32_t MAX_UNIFORMS = 16;
-    const static uint32_t MAX_TEXTURE = 2;
 
     Program(Context *);
     Program(Context *, const char * shaderText, uint32_t shaderLength,
                        const uint32_t * params, uint32_t paramLength);
     virtual ~Program();
 
-    void bindAllocation(Allocation *, uint32_t slot);
+    void bindAllocation(Context *, Allocation *, uint32_t slot);
     virtual void createShader();
 
-    bool isUserProgram() const {return mUserShader.size() > 0;}
+    bool isUserProgram() const {return !mIsInternal;}
 
-    void bindTexture(uint32_t slot, Allocation *);
-    void bindSampler(uint32_t slot, Sampler *);
+    void bindTexture(Context *, uint32_t slot, Allocation *);
+    void bindSampler(Context *, uint32_t slot, Sampler *);
 
     uint32_t getShaderID() const {return mShaderID;}
     void setShader(const char *, uint32_t len);
@@ -71,6 +72,12 @@
     uint32_t mOutputCount;
     uint32_t mConstantCount;
     bool mIsValid;
+    bool mIsInternal;
+
+    // Applies to vertex and fragment shaders only
+    void appendUserConstants();
+    void setupUserConstants(Context *rsc, ShaderCache *sc, bool isFragment);
+    void initAddUserElement(const Element *e, String8 *names, uint32_t *count, const char *prefix);
 
     ObjectBaseRef<Allocation> mConstants[MAX_UNIFORMS];
 
@@ -91,8 +98,8 @@
     // and filtered.
     //
     // Constants are strictly accessed by programetic loads.
-    ObjectBaseRef<Allocation> mTextures[MAX_TEXTURE];
-    ObjectBaseRef<Sampler> mSamplers[MAX_TEXTURE];
+    ObjectBaseRef<Allocation> *mTextures;
+    ObjectBaseRef<Sampler> *mSamplers;
 
     bool loadShader(Context *, uint32_t type);
 
diff --git a/rsProgramFragment.cpp b/rsProgramFragment.cpp
index c17b94c..81b4fa4 100644
--- a/rsProgramFragment.cpp
+++ b/rsProgramFragment.cpp
@@ -14,42 +14,23 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
-#include "rsProgramFragment.h"
-
 #include <GLES/gl.h>
 #include <GLES/glext.h>
 #include <GLES2/gl2.h>
 #include <GLES2/gl2ext.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#include <OpenGL/glext.h>
+#endif //ANDROID_RS_BUILD_FOR_HOST
+
+#include "rsProgramFragment.h"
 
 using namespace android;
 using namespace android::renderscript;
 
-
-ProgramFragment::ProgramFragment(Context *rsc, const uint32_t * params,
-                                 uint32_t paramLength) :
-    Program(rsc)
-{
-    mAllocFile = __FILE__;
-    mAllocLine = __LINE__;
-    rsAssert(paramLength = 5);
-
-    mEnvModes[0] = (RsTexEnvMode)params[0];
-    mTextureFormats[0] = params[1];
-    mEnvModes[1] = (RsTexEnvMode)params[2];
-    mTextureFormats[1] = params[3];
-    mPointSpriteEnable = params[4] != 0;
-
-    mTextureEnableMask = 0;
-    if (mEnvModes[0]) {
-        mTextureEnableMask |= 1;
-    }
-    if (mEnvModes[1]) {
-        mTextureEnableMask |= 2;
-    }
-    init(rsc);
-}
-
 ProgramFragment::ProgramFragment(Context *rsc, const char * shaderText,
                                  uint32_t shaderLength, const uint32_t * params,
                                  uint32_t paramLength) :
@@ -58,100 +39,68 @@
     mAllocFile = __FILE__;
     mAllocLine = __LINE__;
 
-    init(rsc);
-    mTextureEnableMask = (1 << mTextureCount) -1;
-}
+    mConstantColor[0] = 1.f;
+    mConstantColor[1] = 1.f;
+    mConstantColor[2] = 1.f;
+    mConstantColor[3] = 1.f;
 
+    init(rsc);
+}
 
 ProgramFragment::~ProgramFragment()
 {
+    if(mShaderID) {
+        mRSC->mShaderCache.cleanupFragment(mShaderID);
+    }
 }
 
-void ProgramFragment::setupGL(const Context *rsc, ProgramFragmentState *state)
+void ProgramFragment::setConstantColor(Context *rsc, float r, float g, float b, float a)
 {
+    if(isUserProgram()) {
+        LOGE("Attempting to set fixed function emulation color on user program");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot  set fixed function emulation color on user program");
+        return;
+    }
+    if(mConstants[0].get() == NULL) {
+        LOGE("Unable to set fixed function emulation color because allocation is missing");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Unable to set fixed function emulation color because allocation is missing");
+        return;
+    }
+    mConstantColor[0] = r;
+    mConstantColor[1] = g;
+    mConstantColor[2] = b;
+    mConstantColor[3] = a;
+    memcpy(mConstants[0]->getPtr(), mConstantColor, 4*sizeof(float));
+    mDirty = true;
+}
+
+void ProgramFragment::setupGL2(Context *rsc, ProgramFragmentState *state, ShaderCache *sc)
+{
+    //LOGE("sgl2 frag1 %x", glGetError());
     if ((state->mLast.get() == this) && !mDirty) {
         return;
     }
     state->mLast.set(this);
 
-    for (uint32_t ct=0; ct < MAX_TEXTURE; ct++) {
-        glActiveTexture(GL_TEXTURE0 + ct);
-        if (!(mTextureEnableMask & (1 << ct)) || !mTextures[ct].get()) {
-            glDisable(GL_TEXTURE_2D);
-            continue;
-        }
-
-        glEnable(GL_TEXTURE_2D);
-        if (rsc->checkVersion1_1()) {
-            if (mPointSpriteEnable) {
-                glEnable(GL_POINT_SPRITE_OES);
-            } else {
-                glDisable(GL_POINT_SPRITE_OES);
-            }
-            glTexEnvi(GL_POINT_SPRITE_OES, GL_COORD_REPLACE_OES, mPointSpriteEnable);
-        }
-        mTextures[ct]->uploadCheck(rsc);
-        glBindTexture(GL_TEXTURE_2D, mTextures[ct]->getTextureID());
-
-        switch(mEnvModes[ct]) {
-        case RS_TEX_ENV_MODE_NONE:
-            rsAssert(0);
-            break;
-        case RS_TEX_ENV_MODE_REPLACE:
-            glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
-            break;
-        case RS_TEX_ENV_MODE_MODULATE:
-            glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);
-            break;
-        case RS_TEX_ENV_MODE_DECAL:
-            glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_DECAL);
-            break;
-        }
-
-        if (mSamplers[ct].get()) {
-            mSamplers[ct]->setupGL(rsc, mTextures[ct]->getType()->getIsNp2());
-        } else {
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
-        }
-
-        // Gross hack.
-        if (ct == 2) {
-            glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_COMBINE);
-
-            glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_RGB, GL_ADD);
-            glTexEnvi(GL_TEXTURE_ENV, GL_SRC0_RGB, GL_PREVIOUS);
-            glTexEnvi(GL_TEXTURE_ENV, GL_SRC1_RGB, GL_TEXTURE);
-            glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_RGB, GL_SRC_COLOR);
-            glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND1_RGB, GL_SRC_COLOR);
-
-            glTexEnvi(GL_TEXTURE_ENV, GL_COMBINE_ALPHA, GL_ADD);
-            glTexEnvi(GL_TEXTURE_ENV, GL_SRC0_ALPHA, GL_PREVIOUS);
-            glTexEnvi(GL_TEXTURE_ENV, GL_SRC1_ALPHA, GL_TEXTURE);
-            glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND0_ALPHA, GL_SRC_ALPHA);
-            glTexEnvi(GL_TEXTURE_ENV, GL_OPERAND1_ALPHA, GL_SRC_ALPHA);
-        }
-    }
-    glActiveTexture(GL_TEXTURE0);
-    mDirty = false;
-    rsc->checkError("ProgramFragment::setupGL");
-}
-
-void ProgramFragment::setupGL2(const Context *rsc, ProgramFragmentState *state, ShaderCache *sc)
-{
-
-    //LOGE("sgl2 frag1 %x", glGetError());
-    if ((state->mLast.get() == this) && !mDirty) {
-        //return;
-    }
-    state->mLast.set(this);
-
     rsc->checkError("ProgramFragment::setupGL2 start");
-    for (uint32_t ct=0; ct < MAX_TEXTURE; ct++) {
+
+    rsc->checkError("ProgramFragment::setupGL2 begin uniforms");
+    setupUserConstants(rsc, sc, true);
+
+    uint32_t numTexturesToBind = mTextureCount;
+    uint32_t numTexturesAvailable = rsc->getMaxFragmentTextures();
+    if(numTexturesToBind >= numTexturesAvailable) {
+        LOGE("Attempting to bind %u textures on shader id %u, but only %u are available",
+             mTextureCount, (uint32_t)this, numTexturesAvailable);
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot bind more textuers than available");
+        numTexturesToBind = numTexturesAvailable;
+    }
+
+    for (uint32_t ct=0; ct < numTexturesToBind; ct++) {
         glActiveTexture(GL_TEXTURE0 + ct);
-        if (!(mTextureEnableMask & (1 << ct)) || !mTextures[ct].get()) {
+        if (!mTextures[ct].get()) {
+            LOGE("No texture bound for shader id %u, texture unit %u", (uint)this, ct);
+            rsc->setError(RS_ERROR_BAD_SHADER, "No texture bound");
             continue;
         }
 
@@ -159,16 +108,16 @@
         glBindTexture(GL_TEXTURE_2D, mTextures[ct]->getTextureID());
         rsc->checkError("ProgramFragment::setupGL2 tex bind");
         if (mSamplers[ct].get()) {
-            mSamplers[ct]->setupGL(rsc, mTextures[ct]->getType()->getIsNp2());
+            mSamplers[ct]->setupGL(rsc, mTextures[ct].get());
         } else {
             glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
             glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
             rsc->checkError("ProgramFragment::setupGL2 tex env");
         }
 
-        glUniform1i(sc->fragUniformSlot(ct), ct);
+        glUniform1i(sc->fragUniformSlot(mTextureUniformIndexStart + ct), ct);
         rsc->checkError("ProgramFragment::setupGL2 uniforms");
     }
 
@@ -183,112 +132,49 @@
 
 void ProgramFragment::createShader()
 {
-    mShader.setTo("precision mediump float;\n");
-    mShader.append("varying vec4 varColor;\n");
-    mShader.append("varying vec4 varTex0;\n");
-
     if (mUserShader.length() > 1) {
+        mShader.append("precision mediump float;\n");
+        appendUserConstants();
+        char buf[256];
         for (uint32_t ct=0; ct < mTextureCount; ct++) {
-            char buf[256];
-            sprintf(buf, "uniform sampler2D uni_Tex%i;\n", ct);
+            sprintf(buf, "uniform sampler2D UNI_Tex%i;\n", ct);
             mShader.append(buf);
         }
-
         mShader.append(mUserShader);
     } else {
-        uint32_t mask = mTextureEnableMask;
-        uint32_t texNum = 0;
-        while (mask) {
-            if (mask & 1) {
-                char buf[64];
-                mShader.append("uniform sampler2D uni_Tex");
-                sprintf(buf, "%i", texNum);
-                mShader.append(buf);
-                mShader.append(";\n");
-            }
-            mask >>= 1;
-            texNum++;
-        }
-
-
-        mShader.append("void main() {\n");
-        mShader.append("  vec4 col = varColor;\n");
-
-        if (mTextureEnableMask) {
-            if (mPointSpriteEnable) {
-                mShader.append("  vec2 t0 = gl_PointCoord;\n");
-            } else {
-                mShader.append("  vec2 t0 = varTex0.xy;\n");
-            }
-        }
-
-        mask = mTextureEnableMask;
-        texNum = 0;
-        while (mask) {
-            if (mask & 1) {
-                switch(mEnvModes[texNum]) {
-                case RS_TEX_ENV_MODE_NONE:
-                    rsAssert(0);
-                    break;
-                case RS_TEX_ENV_MODE_REPLACE:
-                    switch(mTextureFormats[texNum]) {
-                    case 1:
-                        mShader.append("  col.a = texture2D(uni_Tex0, t0).a;\n");
-                        break;
-                    case 2:
-                        mShader.append("  col.rgba = texture2D(uni_Tex0, t0).rgba;\n");
-                        break;
-                    case 3:
-                        mShader.append("  col.rgb = texture2D(uni_Tex0, t0).rgb;\n");
-                        break;
-                    case 4:
-                        mShader.append("  col.rgba = texture2D(uni_Tex0, t0).rgba;\n");
-                        break;
-                    }
-                    break;
-                case RS_TEX_ENV_MODE_MODULATE:
-                    switch(mTextureFormats[texNum]) {
-                    case 1:
-                        mShader.append("  col.a *= texture2D(uni_Tex0, t0).a;\n");
-                        break;
-                    case 2:
-                        mShader.append("  col.rgba *= texture2D(uni_Tex0, t0).rgba;\n");
-                        break;
-                    case 3:
-                        mShader.append("  col.rgb *= texture2D(uni_Tex0, t0).rgb;\n");
-                        break;
-                    case 4:
-                        mShader.append("  col.rgba *= texture2D(uni_Tex0, t0).rgba;\n");
-                        break;
-                    }
-                    break;
-                case RS_TEX_ENV_MODE_DECAL:
-                    mShader.append("  col = texture2D(uni_Tex0, t0);\n");
-                    break;
-                }
-
-            }
-            mask >>= 1;
-            texNum++;
-        }
-
-        //mShader.append("  col.a = 1.0;\n");
-        //mShader.append("  col.r = 0.5;\n");
-
-        mShader.append("  gl_FragColor = col;\n");
-        mShader.append("}\n");
+        LOGE("ProgramFragment::createShader cannot create program, shader code not defined");
+        rsAssert(0);
     }
 }
 
 void ProgramFragment::init(Context *rsc)
 {
-    mUniformCount = 2;
-    mUniformNames[0].setTo("uni_Tex0");
-    mUniformNames[1].setTo("uni_Tex1");
+    mUniformCount = 0;
+    if (mUserShader.size() > 0) {
+        for (uint32_t ct=0; ct < mConstantCount; ct++) {
+            initAddUserElement(mConstantTypes[ct]->getElement(), mUniformNames, &mUniformCount, RS_SHADER_UNI);
+        }
+    }
+    mTextureUniformIndexStart = mUniformCount;
+    char buf[256];
+    for (uint32_t ct=0; ct < mTextureCount; ct++) {
+        sprintf(buf, "UNI_Tex%i", ct);
+        mUniformNames[mUniformCount++].setTo(buf);
+    }
 
     createShader();
 }
 
+void ProgramFragment::serialize(OStream *stream) const
+{
+
+}
+
+ProgramFragment *ProgramFragment::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
+
 ProgramFragmentState::ProgramFragmentState()
 {
     mPF = NULL;
@@ -300,16 +186,39 @@
 
 }
 
-void ProgramFragmentState::init(Context *rsc, int32_t w, int32_t h)
+void ProgramFragmentState::init(Context *rsc)
 {
-    uint32_t tmp[5] = {
-        RS_TEX_ENV_MODE_NONE, 0,
-        RS_TEX_ENV_MODE_NONE, 0,
-        0
-    };
-    ProgramFragment *pf = new ProgramFragment(rsc, tmp, 5);
+    String8 shaderString(RS_SHADER_INTERNAL);
+    shaderString.append("varying lowp vec4 varColor;\n");
+    shaderString.append("varying vec2 varTex0;\n");
+    shaderString.append("void main() {\n");
+    shaderString.append("  lowp vec4 col = UNI_Color;\n");
+    shaderString.append("  gl_FragColor = col;\n");
+    shaderString.append("}\n");
+
+    const Element *colorElem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
+    rsc->mStateElement.elementBuilderBegin();
+    rsc->mStateElement.elementBuilderAdd(colorElem, "Color", 1);
+    const Element *constInput = rsc->mStateElement.elementBuilderCreate(rsc);
+
+    Type *inputType = new Type(rsc);
+    inputType->setElement(constInput);
+    inputType->setDimX(1);
+    inputType->compute();
+
+    uint32_t tmp[4];
+    tmp[0] = RS_PROGRAM_PARAM_CONSTANT;
+    tmp[1] = (uint32_t)inputType;
+    tmp[2] = RS_PROGRAM_PARAM_TEXTURE_COUNT;
+    tmp[3] = 0;
+
+    Allocation *constAlloc = new Allocation(rsc, inputType);
+    ProgramFragment *pf = new ProgramFragment(rsc, shaderString.string(),
+                                              shaderString.length(), tmp, 4);
+    pf->bindAllocation(rsc, constAlloc, 0);
+    pf->setConstantColor(rsc, 1.0f, 1.0f, 1.0f, 1.0f);
+
     mDefault.set(pf);
-    pf->init(rsc);
 }
 
 void ProgramFragmentState::deinit(Context *rsc)
@@ -322,21 +231,13 @@
 namespace android {
 namespace renderscript {
 
-RsProgramFragment rsi_ProgramFragmentCreate(Context *rsc,
-                                            const uint32_t * params,
-                                            uint32_t paramLength)
-{
-    ProgramFragment *pf = new ProgramFragment(rsc, params, paramLength);
-    pf->incUserRef();
-    return pf;
-}
-
-RsProgramFragment rsi_ProgramFragmentCreate2(Context *rsc, const char * shaderText,
+RsProgramFragment rsi_ProgramFragmentCreate(Context *rsc, const char * shaderText,
                              uint32_t shaderLength, const uint32_t * params,
                              uint32_t paramLength)
 {
     ProgramFragment *pf = new ProgramFragment(rsc, shaderText, shaderLength, params, paramLength);
     pf->incUserRef();
+    //LOGE("rsi_ProgramFragmentCreate %p", pf);
     return pf;
 }
 
diff --git a/rsProgramFragment.h b/rsProgramFragment.h
index 9fa565d..1cf9ca7 100644
--- a/rsProgramFragment.h
+++ b/rsProgramFragment.h
@@ -28,26 +28,25 @@
 class ProgramFragment : public Program
 {
 public:
-    ProgramFragment(Context *, const uint32_t * params, uint32_t paramLength);
     ProgramFragment(Context *rsc, const char * shaderText,
                              uint32_t shaderLength, const uint32_t * params,
                              uint32_t paramLength);
     virtual ~ProgramFragment();
 
-    virtual void setupGL(const Context *, ProgramFragmentState *);
-    virtual void setupGL2(const Context *, ProgramFragmentState *, ShaderCache *sc);
+    virtual void setupGL2(Context *, ProgramFragmentState *, ShaderCache *sc);
 
     virtual void createShader();
     virtual void loadShader(Context *rsc);
     virtual void init(Context *rsc);
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_FRAGMENT; }
+    static ProgramFragment *createFromStream(Context *rsc, IStream *stream);
+
+    void setConstantColor(Context *, float, float, float, float);
 
 protected:
-    // Hacks to create a program for now
-    uint32_t mTextureFormats[MAX_TEXTURE];
-    uint32_t mTextureDimensions[MAX_TEXTURE];
-    RsTexEnvMode mEnvModes[MAX_TEXTURE];
-    uint32_t mTextureEnableMask;
-    bool mPointSpriteEnable;
+    float mConstantColor[4];
+    int32_t mTextureUniformIndexStart;
 };
 
 class ProgramFragmentState
@@ -57,10 +56,9 @@
     ~ProgramFragmentState();
 
     ProgramFragment *mPF;
-    void init(Context *rsc, int32_t w, int32_t h);
+    void init(Context *rsc);
     void deinit(Context *rsc);
 
-    ObjectBaseRef<Type> mTextureTypes[ProgramFragment::MAX_TEXTURE];
     ObjectBaseRef<ProgramFragment> mDefault;
     Vector<ProgramFragment *> mPrograms;
 
diff --git a/rsProgramRaster.cpp b/rsProgramRaster.cpp
index 13887d1..62d060d 100644
--- a/rsProgramRaster.cpp
+++ b/rsProgramRaster.cpp
@@ -14,11 +14,17 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
-#include "rsProgramRaster.h"
-
 #include <GLES/gl.h>
 #include <GLES/glext.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#include <OpenGl/glext.h>
+#endif //ANDROID_RS_BUILD_FOR_HOST
+
+#include "rsProgramRaster.h"
 
 using namespace android;
 using namespace android::renderscript;
@@ -35,9 +41,8 @@
     mPointSmooth = pointSmooth;
     mLineSmooth = lineSmooth;
     mPointSprite = pointSprite;
-
-    mPointSize = 1.0f;
     mLineWidth = 1.0f;
+    mCull = RS_CULL_BACK;
 }
 
 ProgramRaster::~ProgramRaster()
@@ -47,52 +52,47 @@
 void ProgramRaster::setLineWidth(float s)
 {
     mLineWidth = s;
+    mDirty = true;
 }
 
-void ProgramRaster::setPointSize(float s)
+void ProgramRaster::setCullMode(RsCullMode mode)
 {
-    mPointSize = s;
-}
-
-void ProgramRaster::setupGL(const Context *rsc, ProgramRasterState *state)
-{
-    if (state->mLast.get() == this) {
-        return;
-    }
-    state->mLast.set(this);
-
-    glPointSize(mPointSize);
-    if (mPointSmooth) {
-        glEnable(GL_POINT_SMOOTH);
-    } else {
-        glDisable(GL_POINT_SMOOTH);
-    }
-
-    glLineWidth(mLineWidth);
-    if (mLineSmooth) {
-        glEnable(GL_LINE_SMOOTH);
-    } else {
-        glDisable(GL_LINE_SMOOTH);
-    }
-
-    if (rsc->checkVersion1_1()) {
-        if (mPointSprite) {
-            glEnable(GL_POINT_SPRITE_OES);
-        } else {
-            glDisable(GL_POINT_SPRITE_OES);
-        }
-    }
+    mCull = mode;
+    mDirty = true;
 }
 
 void ProgramRaster::setupGL2(const Context *rsc, ProgramRasterState *state)
 {
-    if (state->mLast.get() == this) {
+    if (state->mLast.get() == this && !mDirty) {
         return;
     }
     state->mLast.set(this);
+    mDirty = false;
+
+    switch(mCull) {
+        case RS_CULL_BACK:
+            glEnable(GL_CULL_FACE);
+            glCullFace(GL_BACK);
+            break;
+        case RS_CULL_FRONT:
+            glEnable(GL_CULL_FACE);
+            glCullFace(GL_FRONT);
+            break;
+        case RS_CULL_NONE:
+            glDisable(GL_CULL_FACE);
+            break;
+    }
 }
 
+void ProgramRaster::serialize(OStream *stream) const
+{
 
+}
+
+ProgramRaster *ProgramRaster::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
 
 ProgramRasterState::ProgramRasterState()
 {
@@ -102,7 +102,7 @@
 {
 }
 
-void ProgramRasterState::init(Context *rsc, int32_t w, int32_t h)
+void ProgramRasterState::init(Context *rsc)
 {
     ProgramRaster *pr = new ProgramRaster(rsc, false, false, false);
     mDefault.set(pr);
@@ -118,7 +118,7 @@
 namespace android {
 namespace renderscript {
 
-RsProgramRaster rsi_ProgramRasterCreate(Context * rsc, RsElement in, RsElement out,
+RsProgramRaster rsi_ProgramRasterCreate(Context * rsc,
                                       bool pointSmooth,
                                       bool lineSmooth,
                                       bool pointSprite)
@@ -131,18 +131,18 @@
     return pr;
 }
 
-void rsi_ProgramRasterSetPointSize(Context * rsc, RsProgramRaster vpr, float s)
-{
-    ProgramRaster *pr = static_cast<ProgramRaster *>(vpr);
-    pr->setPointSize(s);
-}
-
 void rsi_ProgramRasterSetLineWidth(Context * rsc, RsProgramRaster vpr, float s)
 {
     ProgramRaster *pr = static_cast<ProgramRaster *>(vpr);
     pr->setLineWidth(s);
 }
 
+void rsi_ProgramRasterSetCullMode(Context * rsc, RsProgramRaster vpr, RsCullMode mode)
+{
+    ProgramRaster *pr = static_cast<ProgramRaster *>(vpr);
+    pr->setCullMode(mode);
+}
+
 
 }
 }
diff --git a/rsProgramRaster.h b/rsProgramRaster.h
index c3a9c90..d5ed686 100644
--- a/rsProgramRaster.h
+++ b/rsProgramRaster.h
@@ -34,21 +34,20 @@
                   bool pointSprite);
     virtual ~ProgramRaster();
 
-    virtual void setupGL(const Context *, ProgramRasterState *);
     virtual void setupGL2(const Context *, ProgramRasterState *);
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_RASTER; }
+    static ProgramRaster *createFromStream(Context *rsc, IStream *stream);
 
     void setLineWidth(float w);
-    void setPointSize(float s);
+    void setCullMode(RsCullMode mode);
 
 protected:
     bool mPointSmooth;
     bool mLineSmooth;
     bool mPointSprite;
-
-    float mPointSize;
     float mLineWidth;
-
-
+    RsCullMode mCull;
 };
 
 class ProgramRasterState
@@ -56,7 +55,7 @@
 public:
     ProgramRasterState();
     ~ProgramRasterState();
-    void init(Context *rsc, int32_t w, int32_t h);
+    void init(Context *rsc);
     void deinit(Context *rsc);
 
     ObjectBaseRef<ProgramRaster> mDefault;
diff --git a/rsProgramFragmentStore.cpp b/rsProgramStore.cpp
similarity index 66%
rename from rsProgramFragmentStore.cpp
rename to rsProgramStore.cpp
index 8a2157f..586a89f 100644
--- a/rsProgramFragmentStore.cpp
+++ b/rsProgramStore.cpp
@@ -14,17 +14,23 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
-#include "rsProgramFragmentStore.h"
-
 #include <GLES/gl.h>
 #include <GLES/glext.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#include <OpenGl/glext.h>
+#endif //ANDROID_RS_BUILD_FOR_HOST
+
+#include "rsProgramStore.h"
 
 using namespace android;
 using namespace android::renderscript;
 
 
-ProgramFragmentStore::ProgramFragmentStore(Context *rsc) :
+ProgramStore::ProgramStore(Context *rsc) :
     Program(rsc)
 {
     mAllocFile = __FILE__;
@@ -46,11 +52,11 @@
 
 }
 
-ProgramFragmentStore::~ProgramFragmentStore()
+ProgramStore::~ProgramStore()
 {
 }
 
-void ProgramFragmentStore::setupGL(const Context *rsc, ProgramFragmentStoreState *state)
+void ProgramStore::setupGL2(const Context *rsc, ProgramStoreState *state)
 {
     if (state->mLast.get() == this) {
         return;
@@ -70,47 +76,23 @@
 
     //LOGE("pfs  %i, %i, %x", mDepthWriteEnable, mDepthTestEnable, mDepthFunc);
 
-    glDepthMask(mDepthWriteEnable);
-    if(mDepthTestEnable || mDepthWriteEnable) {
-        glEnable(GL_DEPTH_TEST);
-        glDepthFunc(mDepthFunc);
+    if (rsc->mUserSurfaceConfig.depthMin > 0) {
+        glDepthMask(mDepthWriteEnable);
+        if(mDepthTestEnable || mDepthWriteEnable) {
+            glEnable(GL_DEPTH_TEST);
+            glDepthFunc(mDepthFunc);
+        } else {
+            glDisable(GL_DEPTH_TEST);
+        }
     } else {
+        glDepthMask(false);
         glDisable(GL_DEPTH_TEST);
     }
 
-    if (mDitherEnable) {
-        glEnable(GL_DITHER);
+    if (rsc->mUserSurfaceConfig.stencilMin > 0) {
     } else {
-        glDisable(GL_DITHER);
-    }
-}
-
-void ProgramFragmentStore::setupGL2(const Context *rsc, ProgramFragmentStoreState *state)
-{
-    if (state->mLast.get() == this) {
-        return;
-    }
-    state->mLast.set(this);
-
-    glColorMask(mColorRWriteEnable,
-                mColorGWriteEnable,
-                mColorBWriteEnable,
-                mColorAWriteEnable);
-    if (mBlendEnable) {
-        glEnable(GL_BLEND);
-        glBlendFunc(mBlendSrc, mBlendDst);
-    } else {
-        glDisable(GL_BLEND);
-    }
-
-    //LOGE("pfs  %i, %i, %x", mDepthWriteEnable, mDepthTestEnable, mDepthFunc);
-
-    glDepthMask(mDepthWriteEnable);
-    if(mDepthTestEnable || mDepthWriteEnable) {
-        glEnable(GL_DEPTH_TEST);
-        glDepthFunc(mDepthFunc);
-    } else {
-        glDisable(GL_DEPTH_TEST);
+        glStencilMask(0);
+        glDisable(GL_STENCIL_TEST);
     }
 
     if (mDitherEnable) {
@@ -121,12 +103,23 @@
 }
 
 
-void ProgramFragmentStore::setDitherEnable(bool enable)
+void ProgramStore::setDitherEnable(bool enable)
 {
     mDitherEnable = enable;
 }
 
-void ProgramFragmentStore::setDepthFunc(RsDepthFunc func)
+void ProgramStore::serialize(OStream *stream) const
+{
+
+}
+
+ProgramStore *ProgramStore::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
+
+
+void ProgramStore::setDepthFunc(RsDepthFunc func)
 {
     mDepthTestEnable = true;
 
@@ -156,12 +149,12 @@
     }
 }
 
-void ProgramFragmentStore::setDepthMask(bool mask)
+void ProgramStore::setDepthMask(bool mask)
 {
     mDepthWriteEnable = mask;
 }
 
-void ProgramFragmentStore::setBlendFunc(RsBlendSrcFunc src, RsBlendDstFunc dst)
+void ProgramStore::setBlendFunc(RsBlendSrcFunc src, RsBlendDstFunc dst)
 {
     mBlendEnable = true;
     if ((src == RS_BLEND_SRC_ONE) &&
@@ -227,7 +220,7 @@
     }
 }
 
-void ProgramFragmentStore::setColorMask(bool r, bool g, bool b, bool a)
+void ProgramStore::setColorMask(bool r, bool g, bool b, bool a)
 {
     mColorRWriteEnable = r;
     mColorGWriteEnable = g;
@@ -236,24 +229,24 @@
 }
 
 
-ProgramFragmentStoreState::ProgramFragmentStoreState()
+ProgramStoreState::ProgramStoreState()
 {
     mPFS = NULL;
 }
 
-ProgramFragmentStoreState::~ProgramFragmentStoreState()
+ProgramStoreState::~ProgramStoreState()
 {
     delete mPFS;
 
 }
 
-void ProgramFragmentStoreState::init(Context *rsc, int32_t w, int32_t h)
+void ProgramStoreState::init(Context *rsc)
 {
-    ProgramFragmentStore *pfs = new ProgramFragmentStore(rsc);
+    ProgramStore *pfs = new ProgramStore(rsc);
     mDefault.set(pfs);
 }
 
-void ProgramFragmentStoreState::deinit(Context *rsc)
+void ProgramStoreState::deinit(Context *rsc)
 {
     mDefault.clear();
     mLast.clear();
@@ -263,42 +256,42 @@
 namespace android {
 namespace renderscript {
 
-void rsi_ProgramFragmentStoreBegin(Context * rsc, RsElement in, RsElement out)
+void rsi_ProgramStoreBegin(Context * rsc, RsElement in, RsElement out)
 {
     delete rsc->mStateFragmentStore.mPFS;
-    rsc->mStateFragmentStore.mPFS = new ProgramFragmentStore(rsc);
+    rsc->mStateFragmentStore.mPFS = new ProgramStore(rsc);
 
 }
 
-void rsi_ProgramFragmentStoreDepthFunc(Context *rsc, RsDepthFunc func)
+void rsi_ProgramStoreDepthFunc(Context *rsc, RsDepthFunc func)
 {
     rsc->mStateFragmentStore.mPFS->setDepthFunc(func);
 }
 
-void rsi_ProgramFragmentStoreDepthMask(Context *rsc, bool mask)
+void rsi_ProgramStoreDepthMask(Context *rsc, bool mask)
 {
     rsc->mStateFragmentStore.mPFS->setDepthMask(mask);
 }
 
-void rsi_ProgramFragmentStoreColorMask(Context *rsc, bool r, bool g, bool b, bool a)
+void rsi_ProgramStoreColorMask(Context *rsc, bool r, bool g, bool b, bool a)
 {
     rsc->mStateFragmentStore.mPFS->setColorMask(r, g, b, a);
 }
 
-void rsi_ProgramFragmentStoreBlendFunc(Context *rsc, RsBlendSrcFunc src, RsBlendDstFunc dst)
+void rsi_ProgramStoreBlendFunc(Context *rsc, RsBlendSrcFunc src, RsBlendDstFunc dst)
 {
     rsc->mStateFragmentStore.mPFS->setBlendFunc(src, dst);
 }
 
-RsProgramFragmentStore rsi_ProgramFragmentStoreCreate(Context *rsc)
+RsProgramStore rsi_ProgramStoreCreate(Context *rsc)
 {
-    ProgramFragmentStore *pfs = rsc->mStateFragmentStore.mPFS;
+    ProgramStore *pfs = rsc->mStateFragmentStore.mPFS;
     pfs->incUserRef();
     rsc->mStateFragmentStore.mPFS = 0;
     return pfs;
 }
 
-void rsi_ProgramFragmentStoreDither(Context *rsc, bool enable)
+void rsi_ProgramStoreDither(Context *rsc, bool enable)
 {
     rsc->mStateFragmentStore.mPFS->setDitherEnable(enable);
 }
diff --git a/rsProgramFragmentStore.h b/rsProgramStore.h
similarity index 70%
rename from rsProgramFragmentStore.h
rename to rsProgramStore.h
index 3412c99..95bcf3c 100644
--- a/rsProgramFragmentStore.h
+++ b/rsProgramStore.h
@@ -18,21 +18,21 @@
 #define ANDROID_RS_PROGRAM_FRAGMENT_STORE_H
 
 #include "rsProgram.h"
+#include "rsStream.h"
 
 // ---------------------------------------------------------------------------
 namespace android {
 namespace renderscript {
 
-class ProgramFragmentStoreState;
+class ProgramStoreState;
 
-class ProgramFragmentStore : public Program
+class ProgramStore : public Program
 {
 public:
-    ProgramFragmentStore(Context *);
-    virtual ~ProgramFragmentStore();
+    ProgramStore(Context *);
+    virtual ~ProgramStore();
 
-    virtual void setupGL(const Context *, ProgramFragmentStoreState *);
-    virtual void setupGL2(const Context *, ProgramFragmentStoreState *);
+    virtual void setupGL2(const Context *, ProgramStoreState *);
 
     void setDepthFunc(RsDepthFunc);
     void setDepthMask(bool);
@@ -42,6 +42,10 @@
 
     void setDitherEnable(bool);
 
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_STORE; }
+    static ProgramStore *createFromStream(Context *rsc, IStream *stream);
+
 protected:
     bool mDitherEnable;
 
@@ -60,19 +64,19 @@
     bool mStencilTestEnable;
 };
 
-class ProgramFragmentStoreState
+class ProgramStoreState
 {
 public:
-    ProgramFragmentStoreState();
-    ~ProgramFragmentStoreState();
-    void init(Context *rsc, int32_t w, int32_t h);
+    ProgramStoreState();
+    ~ProgramStoreState();
+    void init(Context *rsc);
     void deinit(Context *rsc);
 
-    ObjectBaseRef<ProgramFragmentStore> mDefault;
-    ObjectBaseRef<ProgramFragmentStore> mLast;
+    ObjectBaseRef<ProgramStore> mDefault;
+    ObjectBaseRef<ProgramStore> mLast;
 
 
-    ProgramFragmentStore *mPFS;
+    ProgramStore *mPFS;
 };
 
 
diff --git a/rsProgramVertex.cpp b/rsProgramVertex.cpp
index a2b2df4..a785262 100644
--- a/rsProgramVertex.cpp
+++ b/rsProgramVertex.cpp
@@ -14,28 +14,24 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
-#include "rsProgramVertex.h"
-
 #include <GLES/gl.h>
 #include <GLES/glext.h>
 #include <GLES2/gl2.h>
 #include <GLES2/gl2ext.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#include <OpenGL/glext.h>
+#endif //ANDROID_RS_BUILD_FOR_HOST
+
+#include "rsProgramVertex.h"
 
 using namespace android;
 using namespace android::renderscript;
 
 
-ProgramVertex::ProgramVertex(Context *rsc, bool texMat) :
-    Program(rsc)
-{
-    mAllocFile = __FILE__;
-    mAllocLine = __LINE__;
-    mTextureMatrixEnable = texMat;
-    mLightCount = 0;
-    init(rsc);
-}
-
 ProgramVertex::ProgramVertex(Context *rsc, const char * shaderText,
                              uint32_t shaderLength, const uint32_t * params,
                              uint32_t paramLength) :
@@ -43,69 +39,15 @@
 {
     mAllocFile = __FILE__;
     mAllocLine = __LINE__;
-    mTextureMatrixEnable = false;
-    mLightCount = 0;
 
     init(rsc);
 }
 
 ProgramVertex::~ProgramVertex()
 {
-}
-
-static void logMatrix(const char *txt, const float *f)
-{
-    LOGV("Matrix %s, %p", txt, f);
-    LOGV("%6.4f, %6.4f, %6.4f, %6.4f", f[0], f[4], f[8], f[12]);
-    LOGV("%6.4f, %6.4f, %6.4f, %6.4f", f[1], f[5], f[9], f[13]);
-    LOGV("%6.4f, %6.4f, %6.4f, %6.4f", f[2], f[6], f[10], f[14]);
-    LOGV("%6.4f, %6.4f, %6.4f, %6.4f", f[3], f[7], f[11], f[15]);
-}
-
-void ProgramVertex::setupGL(const Context *rsc, ProgramVertexState *state)
-{
-    if ((state->mLast.get() == this) && !mDirty) {
-        return;
+    if(mShaderID) {
+        mRSC->mShaderCache.cleanupVertex(mShaderID);
     }
-    state->mLast.set(this);
-
-    const float *f = static_cast<const float *>(mConstants[0]->getPtr());
-
-    glMatrixMode(GL_TEXTURE);
-    if (mTextureMatrixEnable) {
-        glLoadMatrixf(&f[RS_PROGRAM_VERTEX_TEXTURE_OFFSET]);
-    } else {
-        glLoadIdentity();
-    }
-
-    glMatrixMode(GL_MODELVIEW);
-    glLoadIdentity();
-    if (mLightCount) {
-        int v = 0;
-        glEnable(GL_LIGHTING);
-        glLightModelxv(GL_LIGHT_MODEL_TWO_SIDE, &v);
-        for (uint32_t ct = 0; ct < mLightCount; ct++) {
-            const Light *l = mLights[ct].get();
-            glEnable(GL_LIGHT0 + ct);
-            l->setupGL(ct);
-        }
-        for (uint32_t ct = mLightCount; ct < MAX_LIGHTS; ct++) {
-            glDisable(GL_LIGHT0 + ct);
-        }
-    } else {
-        glDisable(GL_LIGHTING);
-    }
-
-    if (!f) {
-        LOGE("Must bind constants to vertex program");
-    }
-
-    glMatrixMode(GL_PROJECTION);
-    glLoadMatrixf(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
-    glMatrixMode(GL_MODELVIEW);
-    glLoadMatrixf(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET]);
-
-    mDirty = false;
 }
 
 void ProgramVertex::loadShader(Context *rsc) {
@@ -114,42 +56,19 @@
 
 void ProgramVertex::createShader()
 {
-    mShader.setTo("");
-
-    mShader.append("varying vec4 varColor;\n");
-    mShader.append("varying vec4 varTex0;\n");
-
     if (mUserShader.length() > 1) {
-        mShader.append("uniform mat4 ");
-        mShader.append(mUniformNames[0]);
-        mShader.append(";\n");
 
-        for (uint32_t ct=0; ct < mConstantCount; ct++) {
-            const Element *e = mConstantTypes[ct]->getElement();
-            for (uint32_t field=0; field < e->getFieldCount(); field++) {
-                const Element *f = e->getField(field);
-
-                // Cannot be complex
-                rsAssert(!f->getFieldCount());
-                switch(f->getComponent().getVectorSize()) {
-                case 1: mShader.append("uniform float UNI_"); break;
-                case 2: mShader.append("uniform vec2 UNI_"); break;
-                case 3: mShader.append("uniform vec3 UNI_"); break;
-                case 4: mShader.append("uniform vec4 UNI_"); break;
-                default:
-                    rsAssert(0);
-                }
-
-                mShader.append(e->getFieldName(field));
-                mShader.append(";\n");
-            }
-        }
-
+        appendUserConstants();
 
         for (uint32_t ct=0; ct < mInputCount; ct++) {
             const Element *e = mInputElements[ct].get();
             for (uint32_t field=0; field < e->getFieldCount(); field++) {
                 const Element *f = e->getField(field);
+                const char *fn = e->getFieldName(field);
+
+                if (fn[0] == '#') {
+                    continue;
+                }
 
                 // Cannot be complex
                 rsAssert(!f->getFieldCount());
@@ -162,149 +81,121 @@
                     rsAssert(0);
                 }
 
-                mShader.append(e->getFieldName(field));
+                mShader.append(fn);
                 mShader.append(";\n");
             }
         }
         mShader.append(mUserShader);
     } else {
-        mShader.append("attribute vec4 ATTRIB_LegacyPosition;\n");
-        mShader.append("attribute vec4 ATTRIB_LegacyColor;\n");
-        mShader.append("attribute vec3 ATTRIB_LegacyNormal;\n");
-        mShader.append("attribute float ATTRIB_LegacyPointSize;\n");
-        mShader.append("attribute vec4 ATTRIB_LegacyTexture;\n");
-
-        for (uint32_t ct=0; ct < mUniformCount; ct++) {
-            mShader.append("uniform mat4 ");
-            mShader.append(mUniformNames[ct]);
-            mShader.append(";\n");
-        }
-
-        mShader.append("void main() {\n");
-        mShader.append("  gl_Position = UNI_MVP * ATTRIB_LegacyPosition;\n");
-        mShader.append("  gl_PointSize = ATTRIB_LegacyPointSize;\n");
-
-        mShader.append("  varColor = ATTRIB_LegacyColor;\n");
-        if (mTextureMatrixEnable) {
-            mShader.append("  varTex0 = UNI_TexMatrix * ATTRIB_LegacyTexture;\n");
-        } else {
-            mShader.append("  varTex0 = ATTRIB_LegacyTexture;\n");
-        }
-        //mShader.append("  pos.x = pos.x / 480.0;\n");
-        //mShader.append("  pos.y = pos.y / 800.0;\n");
-        //mShader.append("  gl_Position = pos;\n");
-        mShader.append("}\n");
+        LOGE("ProgramFragment::createShader cannot create program, shader code not defined");
+        rsAssert(0);
     }
 }
 
-void ProgramVertex::setupGL2(const Context *rsc, ProgramVertexState *state, ShaderCache *sc)
+void ProgramVertex::setupGL2(Context *rsc, ProgramVertexState *state, ShaderCache *sc)
 {
     //LOGE("sgl2 vtx1 %x", glGetError());
     if ((state->mLast.get() == this) && !mDirty) {
-        //return;
+        return;
     }
 
     rsc->checkError("ProgramVertex::setupGL2 start");
-    glVertexAttrib4f(1, state->color[0], state->color[1], state->color[2], state->color[3]);
 
-    const float *f = static_cast<const float *>(mConstants[0]->getPtr());
-
-    Matrix mvp;
-    mvp.load(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
-    Matrix t;
-    t.load(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET]);
-    mvp.multiply(&t);
-
-    glUniformMatrix4fv(sc->vtxUniformSlot(0), 1, GL_FALSE, mvp.m);
-    if (mTextureMatrixEnable) {
-        glUniformMatrix4fv(sc->vtxUniformSlot(1), 1, GL_FALSE,
-                           &f[RS_PROGRAM_VERTEX_TEXTURE_OFFSET]);
+    if(!isUserProgram()) {
+        if(mConstants[0].get() == NULL) {
+            LOGE("Unable to set fixed function emulation matrices because allocation is missing");
+            rsc->setError(RS_ERROR_BAD_SHADER, "Fixed function allocation missing");
+            return;
+        }
+        float *f = static_cast<float *>(mConstants[0]->getPtr());
+        Matrix mvp;
+        mvp.load(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET]);
+        Matrix t;
+        t.load(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET]);
+        mvp.multiply(&t);
+        for(uint32_t i = 0; i < 16; i ++) {
+            f[RS_PROGRAM_VERTEX_MVP_OFFSET + i] = mvp.m[i];
+        }
     }
 
     rsc->checkError("ProgramVertex::setupGL2 begin uniforms");
-    uint32_t uidx = 1;
-    for (uint32_t ct=0; ct < mConstantCount; ct++) {
-        Allocation *alloc = mConstants[ct+1].get();
-        if (!alloc) {
-            continue;
-        }
-
-        const uint8_t *data = static_cast<const uint8_t *>(alloc->getPtr());
-        const Element *e = mConstantTypes[ct]->getElement();
-        for (uint32_t field=0; field < e->getFieldCount(); field++) {
-            const Element *f = e->getField(field);
-            uint32_t offset = e->getFieldOffsetBytes(field);
-            int32_t slot = sc->vtxUniformSlot(uidx);
-
-            const float *fd = reinterpret_cast<const float *>(&data[offset]);
-
-            //LOGE("Uniform  slot=%i, offset=%i, constant=%i, field=%i, uidx=%i", slot, offset, ct, field, uidx);
-            if (slot >= 0) {
-                switch(f->getComponent().getVectorSize()) {
-                case 1:
-                    //LOGE("Uniform 1 = %f", fd[0]);
-                    glUniform1fv(slot, 1, fd);
-                    break;
-                case 2:
-                    //LOGE("Uniform 2 = %f %f", fd[0], fd[1]);
-                    glUniform2fv(slot, 1, fd);
-                    break;
-                case 3:
-                    //LOGE("Uniform 3 = %f %f %f", fd[0], fd[1], fd[2]);
-                    glUniform3fv(slot, 1, fd);
-                    break;
-                case 4:
-                    //LOGE("Uniform 4 = %f %f %f %f", fd[0], fd[1], fd[2], fd[3]);
-                    glUniform4fv(slot, 1, fd);
-                    break;
-                default:
-                    rsAssert(0);
-                }
-            }
-            uidx ++;
-        }
-    }
-
-    for (uint32_t ct=0; ct < mConstantCount; ct++) {
-        uint32_t glSlot = sc->vtxUniformSlot(ct + 1);
-
-    }
+    setupUserConstants(rsc, sc, false);
 
     state->mLast.set(this);
     rsc->checkError("ProgramVertex::setupGL2");
 }
 
-void ProgramVertex::addLight(const Light *l)
+void ProgramVertex::setProjectionMatrix(Context *rsc, const rsc_Matrix *m) const
 {
-    if (mLightCount < MAX_LIGHTS) {
-        mLights[mLightCount].set(l);
-        mLightCount++;
+    if(isUserProgram()) {
+        LOGE("Attempting to set fixed function emulation matrix projection on user program");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot set emulation matrix on user shader");
+        return;
     }
-}
-
-void ProgramVertex::setProjectionMatrix(const rsc_Matrix *m) const
-{
+    if(mConstants[0].get() == NULL) {
+        LOGE("Unable to set fixed function emulation matrix projection because allocation is missing");
+        return;
+    }
     float *f = static_cast<float *>(mConstants[0]->getPtr());
     memcpy(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET], m, sizeof(rsc_Matrix));
     mDirty = true;
 }
 
-void ProgramVertex::setModelviewMatrix(const rsc_Matrix *m) const
+void ProgramVertex::setModelviewMatrix(Context *rsc, const rsc_Matrix *m) const
 {
+    if(isUserProgram()) {
+        LOGE("Attempting to set fixed function emulation matrix modelview on user program");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot set emulation matrix on user shader");
+        return;
+    }
+    if(mConstants[0].get() == NULL) {
+        LOGE("Unable to set fixed function emulation matrix modelview because allocation is missing");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Fixed function allocation missing");
+        return;
+    }
     float *f = static_cast<float *>(mConstants[0]->getPtr());
     memcpy(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET], m, sizeof(rsc_Matrix));
     mDirty = true;
 }
 
-void ProgramVertex::setTextureMatrix(const rsc_Matrix *m) const
+void ProgramVertex::setTextureMatrix(Context *rsc, const rsc_Matrix *m) const
 {
+    if(isUserProgram()) {
+        LOGE("Attempting to set fixed function emulation matrix texture on user program");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot set emulation matrix on user shader");
+        return;
+    }
+    if(mConstants[0].get() == NULL) {
+        LOGE("Unable to set fixed function emulation matrix texture because allocation is missing");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Fixed function allocation missing");
+        return;
+    }
     float *f = static_cast<float *>(mConstants[0]->getPtr());
     memcpy(&f[RS_PROGRAM_VERTEX_TEXTURE_OFFSET], m, sizeof(rsc_Matrix));
     mDirty = true;
 }
 
-void ProgramVertex::transformToScreen(const Context *rsc, float *v4out, const float *v3in) const
+void ProgramVertex::getProjectionMatrix(Context *rsc, rsc_Matrix *m) const
 {
+    if(isUserProgram()) {
+        LOGE("Attempting to get fixed function emulation matrix projection on user program");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Cannot get emulation matrix on user shader");
+        return;
+    }
+    if(mConstants[0].get() == NULL) {
+        LOGE("Unable to get fixed function emulation matrix projection because allocation is missing");
+        rsc->setError(RS_ERROR_BAD_SHADER, "Fixed function allocation missing");
+        return;
+    }
+    float *f = static_cast<float *>(mConstants[0]->getPtr());
+    memcpy(m, &f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET], sizeof(rsc_Matrix));
+}
+
+void ProgramVertex::transformToScreen(Context *rsc, float *v4out, const float *v3in) const
+{
+    if(isUserProgram()) {
+        return;
+    }
     float *f = static_cast<float *>(mConstants[0]->getPtr());
     Matrix mvp;
     mvp.loadMultiply((Matrix *)&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET],
@@ -312,45 +203,31 @@
     mvp.vectorMultiply(v4out, v3in);
 }
 
-void ProgramVertex::initAddUserElement(const Element *e, String8 *names, uint32_t *count, const char *prefix)
-{
-    rsAssert(e->getFieldCount());
-    for (uint32_t ct=0; ct < e->getFieldCount(); ct++) {
-        const Element *ce = e->getField(ct);
-        if (ce->getFieldCount()) {
-            initAddUserElement(ce, names, count, prefix);
-        } else {
-            String8 tmp(prefix);
-            tmp.append(e->getFieldName(ct));
-            names[*count].setTo(tmp.string());
-            (*count)++;
-        }
-    }
-}
-
-
 void ProgramVertex::init(Context *rsc)
 {
     mAttribCount = 0;
     if (mUserShader.size() > 0) {
         for (uint32_t ct=0; ct < mInputCount; ct++) {
-            initAddUserElement(mInputElements[ct].get(), mAttribNames, &mAttribCount, "ATTRIB_");
+            initAddUserElement(mInputElements[ct].get(), mAttribNames, &mAttribCount, RS_SHADER_ATTR);
         }
-
-        mUniformCount = 1;
-        mUniformNames[0].setTo("UNI_MVP");
+        mUniformCount = 0;
         for (uint32_t ct=0; ct < mConstantCount; ct++) {
-            initAddUserElement(mConstantTypes[ct]->getElement(), mUniformNames, &mUniformCount, "UNI_");
+            initAddUserElement(mConstantTypes[ct]->getElement(), mUniformNames, &mUniformCount, RS_SHADER_UNI);
         }
-    } else {
-        mUniformCount = 2;
-        mUniformNames[0].setTo("UNI_MVP");
-        mUniformNames[1].setTo("UNI_TexMatrix");
     }
-
     createShader();
 }
 
+void ProgramVertex::serialize(OStream *stream) const
+{
+
+}
+
+ProgramVertex *ProgramVertex::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
+
 
 ///////////////////////////////////////////////////////////////////////
 
@@ -362,44 +239,80 @@
 {
 }
 
-void ProgramVertexState::init(Context *rsc, int32_t w, int32_t h)
+void ProgramVertexState::init(Context *rsc)
 {
-    RsElement e = (RsElement) Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 1);
+    const Element *matrixElem = Element::create(rsc, RS_TYPE_MATRIX_4X4, RS_KIND_USER, false, 1);
+    const Element *f2Elem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 2);
+    const Element *f3Elem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 3);
+    const Element *f4Elem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
 
-    rsi_TypeBegin(rsc, e);
-    rsi_TypeAdd(rsc, RS_DIMENSION_X, 48);
-    mAllocType.set((Type *)rsi_TypeCreate(rsc));
+    rsc->mStateElement.elementBuilderBegin();
+    rsc->mStateElement.elementBuilderAdd(matrixElem, "MV", 1);
+    rsc->mStateElement.elementBuilderAdd(matrixElem, "P", 1);
+    rsc->mStateElement.elementBuilderAdd(matrixElem, "TexMatrix", 1);
+    rsc->mStateElement.elementBuilderAdd(matrixElem, "MVP", 1);
+    const Element *constInput = rsc->mStateElement.elementBuilderCreate(rsc);
 
-    ProgramVertex *pv = new ProgramVertex(rsc, false);
-    Allocation *alloc = (Allocation *)rsi_AllocationCreateTyped(rsc, mAllocType.get());
+    rsc->mStateElement.elementBuilderBegin();
+    rsc->mStateElement.elementBuilderAdd(f4Elem, "position", 1);
+    rsc->mStateElement.elementBuilderAdd(f4Elem, "color", 1);
+    rsc->mStateElement.elementBuilderAdd(f3Elem, "normal", 1);
+    rsc->mStateElement.elementBuilderAdd(f2Elem, "texture0", 1);
+    const Element *attrElem = rsc->mStateElement.elementBuilderCreate(rsc);
+
+    Type *inputType = new Type(rsc);
+    inputType->setElement(constInput);
+    inputType->setDimX(1);
+    inputType->compute();
+
+    String8 shaderString(RS_SHADER_INTERNAL);
+    shaderString.append("varying vec4 varColor;\n");
+    shaderString.append("varying vec2 varTex0;\n");
+    shaderString.append("void main() {\n");
+    shaderString.append("  gl_Position = UNI_MVP * ATTRIB_position;\n");
+    shaderString.append("  gl_PointSize = 1.0;\n");
+    shaderString.append("  varColor = ATTRIB_color;\n");
+    shaderString.append("  varTex0 = ATTRIB_texture0;\n");
+    shaderString.append("}\n");
+
+    uint32_t tmp[6];
+    tmp[0] = RS_PROGRAM_PARAM_CONSTANT;
+    tmp[1] = (uint32_t)inputType;
+    tmp[2] = RS_PROGRAM_PARAM_INPUT;
+    tmp[3] = (uint32_t)attrElem;
+    tmp[4] = RS_PROGRAM_PARAM_TEXTURE_COUNT;
+    tmp[5] = 0;
+
+    ProgramVertex *pv = new ProgramVertex(rsc, shaderString.string(),
+                                          shaderString.length(), tmp, 6);
+    Allocation *alloc = new Allocation(rsc, inputType);
+    pv->bindAllocation(rsc, alloc, 0);
+
     mDefaultAlloc.set(alloc);
     mDefault.set(pv);
-    pv->init(rsc);
-    pv->bindAllocation(alloc, 0);
 
-    color[0] = 1.f;
-    color[1] = 1.f;
-    color[2] = 1.f;
-    color[3] = 1.f;
+    updateSize(rsc);
 
-    updateSize(rsc, w, h);
 }
 
-void ProgramVertexState::updateSize(Context *rsc, int32_t w, int32_t h)
+void ProgramVertexState::updateSize(Context *rsc)
 {
+    float *f = static_cast<float *>(mDefaultAlloc->getPtr());
+
     Matrix m;
-    m.loadOrtho(0,w, h,0, -1,1);
-    mDefaultAlloc->subData(RS_PROGRAM_VERTEX_PROJECTION_OFFSET, 16, &m.m[0], 16*4);
+    m.loadOrtho(0,rsc->getWidth(), rsc->getHeight(),0, -1,1);
+    memcpy(&f[RS_PROGRAM_VERTEX_PROJECTION_OFFSET], m.m, sizeof(m));
+    memcpy(&f[RS_PROGRAM_VERTEX_MVP_OFFSET], m.m, sizeof(m));
 
     m.loadIdentity();
-    mDefaultAlloc->subData(RS_PROGRAM_VERTEX_MODELVIEW_OFFSET, 16, &m.m[0], 16*4);
+    memcpy(&f[RS_PROGRAM_VERTEX_MODELVIEW_OFFSET], m.m, sizeof(m));
+    memcpy(&f[RS_PROGRAM_VERTEX_TEXTURE_OFFSET], m.m, sizeof(m));
 }
 
 void ProgramVertexState::deinit(Context *rsc)
 {
     mDefaultAlloc.clear();
     mDefault.clear();
-    mAllocType.clear();
     mLast.clear();
 }
 
@@ -407,15 +320,7 @@
 namespace android {
 namespace renderscript {
 
-
-RsProgramVertex rsi_ProgramVertexCreate(Context *rsc, bool texMat)
-{
-    ProgramVertex *pv = new ProgramVertex(rsc, texMat);
-    pv->incUserRef();
-    return pv;
-}
-
-RsProgramVertex rsi_ProgramVertexCreate2(Context *rsc, const char * shaderText,
+RsProgramVertex rsi_ProgramVertexCreate(Context *rsc, const char * shaderText,
                              uint32_t shaderLength, const uint32_t * params,
                              uint32_t paramLength)
 {
diff --git a/rsProgramVertex.h b/rsProgramVertex.h
index 28554cc..355df2b 100644
--- a/rsProgramVertex.h
+++ b/rsProgramVertex.h
@@ -28,40 +28,26 @@
 class ProgramVertex : public Program
 {
 public:
-    const static uint32_t MAX_LIGHTS = 8;
-
     ProgramVertex(Context *,const char * shaderText, uint32_t shaderLength,
                   const uint32_t * params, uint32_t paramLength);
-    ProgramVertex(Context *, bool texMat);
     virtual ~ProgramVertex();
 
-    virtual void setupGL(const Context *rsc, ProgramVertexState *state);
-    virtual void setupGL2(const Context *rsc, ProgramVertexState *state, ShaderCache *sc);
+    virtual void setupGL2(Context *rsc, ProgramVertexState *state, ShaderCache *sc);
 
+    void setProjectionMatrix(Context *, const rsc_Matrix *) const;
+    void getProjectionMatrix(Context *, rsc_Matrix *) const;
+    void setModelviewMatrix(Context *, const rsc_Matrix *) const;
+    void setTextureMatrix(Context *, const rsc_Matrix *) const;
 
-    void setTextureMatrixEnable(bool e) {mTextureMatrixEnable = e;}
-    void addLight(const Light *);
-
-    void setProjectionMatrix(const rsc_Matrix *) const;
-    void setModelviewMatrix(const rsc_Matrix *) const;
-    void setTextureMatrix(const rsc_Matrix *) const;
-
-    void transformToScreen(const Context *, float *v4out, const float *v3in) const;
+    void transformToScreen(Context *, float *v4out, const float *v3in) const;
 
     virtual void createShader();
     virtual void loadShader(Context *);
     virtual void init(Context *);
 
-
-protected:
-    uint32_t mLightCount;
-    ObjectBaseRef<const Light> mLights[MAX_LIGHTS];
-
-    // Hacks to create a program for now
-    bool mTextureMatrixEnable;
-
-private:
-    void initAddUserElement(const Element *e, String8 *names, uint32_t *count, const char *prefix);
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_VERTEX; }
+    static ProgramVertex *createFromStream(Context *rsc, IStream *stream);
 };
 
 
@@ -71,18 +57,13 @@
     ProgramVertexState();
     ~ProgramVertexState();
 
-    void init(Context *rsc, int32_t w, int32_t h);
+    void init(Context *rsc);
     void deinit(Context *rsc);
-    void updateSize(Context *rsc, int32_t w, int32_t h);
+    void updateSize(Context *rsc);
 
     ObjectBaseRef<ProgramVertex> mDefault;
     ObjectBaseRef<ProgramVertex> mLast;
     ObjectBaseRef<Allocation> mDefaultAlloc;
-
-    ObjectBaseRef<Type> mAllocType;
-
-
-    float color[4];
 };
 
 
diff --git a/rsSampler.cpp b/rsSampler.cpp
index 71f508f..180d78e 100644
--- a/rsSampler.cpp
+++ b/rsSampler.cpp
@@ -14,10 +14,16 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include <GLES/gl.h>
 #include <GLES/glext.h>
-
 #include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#include <OpenGL/glext.h>
+#endif //ANDROID_RS_BUILD_FOR_HOST
+
 #include "rsSampler.h"
 
 
@@ -38,7 +44,8 @@
                  RsSamplerValue minFilter,
                  RsSamplerValue wrapS,
                  RsSamplerValue wrapT,
-                 RsSamplerValue wrapR) : ObjectBase(rsc)
+                 RsSamplerValue wrapR,
+                 float aniso) : ObjectBase(rsc)
 {
     mAllocFile = __FILE__;
     mAllocLine = __LINE__;
@@ -47,13 +54,14 @@
     mWrapS = wrapS;
     mWrapT = wrapT;
     mWrapR = wrapR;
+    mAniso = aniso;
 }
 
 Sampler::~Sampler()
 {
 }
 
-void Sampler::setupGL(const Context *rsc, bool npot)
+void Sampler::setupGL(const Context *rsc, const Allocation *tex)
 {
     GLenum trans[] = {
         GL_NEAREST, //RS_SAMPLER_NEAREST,
@@ -61,25 +69,38 @@
         GL_LINEAR_MIPMAP_LINEAR, //RS_SAMPLER_LINEAR_MIP_LINEAR,
         GL_REPEAT, //RS_SAMPLER_WRAP,
         GL_CLAMP_TO_EDGE, //RS_SAMPLER_CLAMP
-
     };
 
-    bool forceNonMip = false;
-    if (!rsc->ext_OES_texture_npot() && npot) {
-        forceNonMip = true;
-    }
+    GLenum transNP[] = {
+        GL_NEAREST, //RS_SAMPLER_NEAREST,
+        GL_LINEAR, //RS_SAMPLER_LINEAR,
+        GL_LINEAR, //RS_SAMPLER_LINEAR_MIP_LINEAR,
+        GL_CLAMP_TO_EDGE, //RS_SAMPLER_WRAP,
+        GL_CLAMP_TO_EDGE, //RS_SAMPLER_CLAMP
+    };
 
-    if ((mMinFilter == RS_SAMPLER_LINEAR_MIP_LINEAR) && forceNonMip) {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    if (!rsc->ext_OES_texture_npot() && tex->getType()->getIsNp2()) {
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, transNP[mMinFilter]);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, transNP[mMagFilter]);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, transNP[mWrapS]);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, transNP[mWrapT]);
     } else {
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, trans[mMinFilter]);
+        if (tex->getHasGraphicsMipmaps()) {
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, trans[mMinFilter]);
+        } else {
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, transNP[mMinFilter]);
+        }
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, trans[mMagFilter]);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, trans[mWrapS]);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, trans[mWrapT]);
     }
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, trans[mMagFilter]);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, trans[mWrapS]);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, trans[mWrapT]);
 
+    float anisoValue = rsMin(rsc->ext_texture_max_aniso(), mAniso);
+    if(rsc->ext_texture_max_aniso() > 1.0f) {
+        glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY_EXT, anisoValue);
+    }
 
-    rsc->checkError("ProgramFragment::setupGL2 tex env");
+    rsc->checkError("Sampler::setupGL2 tex env");
 }
 
 void Sampler::bindToContext(SamplerState *ss, uint32_t slot)
@@ -94,6 +115,17 @@
     mBoundSlot = -1;
     ss->mSamplers[slot].clear();
 }
+
+void Sampler::serialize(OStream *stream) const
+{
+
+}
+
+Sampler *Sampler::createFromStream(Context *rsc, IStream *stream)
+{
+    return NULL;
+}
+
 /*
 void SamplerState::setupGL()
 {
@@ -122,6 +154,7 @@
     ss->mWrapS = RS_SAMPLER_WRAP;
     ss->mWrapT = RS_SAMPLER_WRAP;
     ss->mWrapR = RS_SAMPLER_WRAP;
+    ss->mAniso = 1.0f;
 }
 
 void rsi_SamplerSet(Context *rsc, RsSamplerParam param, RsSamplerValue value)
@@ -144,21 +177,37 @@
     case RS_SAMPLER_WRAP_R:
         ss->mWrapR = value;
         break;
+    default:
+        LOGE("Attempting to set invalid value on sampler");
+        break;
     }
+}
 
+void rsi_SamplerSet2(Context *rsc, RsSamplerParam param, float value)
+{
+    SamplerState * ss = &rsc->mStateSampler;
+
+    switch(param) {
+    case RS_SAMPLER_ANISO:
+        ss->mAniso = value;
+        break;
+    default:
+        LOGE("Attempting to set invalid value on sampler");
+        break;
+    }
 }
 
 RsSampler rsi_SamplerCreate(Context *rsc)
 {
     SamplerState * ss = &rsc->mStateSampler;
 
-
     Sampler * s = new Sampler(rsc,
                               ss->mMagFilter,
                               ss->mMinFilter,
                               ss->mWrapS,
                               ss->mWrapT,
-                              ss->mWrapR);
+                              ss->mWrapR,
+                              ss->mAniso);
     s->incUserRef();
     return s;
 }
diff --git a/rsSampler.h b/rsSampler.h
index 0506081..4946355 100644
--- a/rsSampler.h
+++ b/rsSampler.h
@@ -36,22 +36,28 @@
             RsSamplerValue minFilter,
             RsSamplerValue wrapS,
             RsSamplerValue wrapT,
-            RsSamplerValue wrapR);
+            RsSamplerValue wrapR,
+            float aniso = 1.0f);
 
     virtual ~Sampler();
 
     void bind(Allocation *);
-    void setupGL(const Context *, bool npot);
+    void setupGL(const Context *, const Allocation *);
 
     void bindToContext(SamplerState *, uint32_t slot);
     void unbindFromContext(SamplerState *);
 
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_SAMPLER; }
+    static Sampler *createFromStream(Context *rsc, IStream *stream);
+
 protected:
     RsSamplerValue mMagFilter;
     RsSamplerValue mMinFilter;
     RsSamplerValue mWrapS;
     RsSamplerValue mWrapT;
     RsSamplerValue mWrapR;
+    float mAniso;
 
     int32_t mBoundSlot;
 
@@ -70,6 +76,7 @@
     RsSamplerValue mWrapS;
     RsSamplerValue mWrapT;
     RsSamplerValue mWrapR;
+    float mAniso;
 
 
     ObjectBaseRef<Sampler> mSamplers[RS_MAX_SAMPLER_SLOT];
diff --git a/rsScript.cpp b/rsScript.cpp
index a33933b..c5632b5 100644
--- a/rsScript.cpp
+++ b/rsScript.cpp
@@ -24,17 +24,52 @@
     mAllocFile = __FILE__;
     mAllocLine = __LINE__;
     memset(&mEnviroment, 0, sizeof(mEnviroment));
-    mEnviroment.mClearColor[0] = 0;
-    mEnviroment.mClearColor[1] = 0;
-    mEnviroment.mClearColor[2] = 0;
-    mEnviroment.mClearColor[3] = 1;
-    mEnviroment.mClearDepth = 1;
-    mEnviroment.mClearStencil = 0;
-    mEnviroment.mIsRoot = false;
+
+    mSlots = NULL;
+    mTypes = NULL;
 }
 
 Script::~Script()
 {
+    if(mSlots) {
+        delete [] mSlots;
+        mSlots = NULL;
+    }
+    if(mTypes) {
+        delete [] mTypes;
+        mTypes = NULL;
+    }
+}
+
+void Script::initSlots() {
+    if(mEnviroment.mFieldCount > 0) {
+        mSlots = new ObjectBaseRef<Allocation>[mEnviroment.mFieldCount];
+        mTypes = new ObjectBaseRef<const Type>[mEnviroment.mFieldCount];
+    }
+}
+
+void Script::setSlot(uint32_t slot, Allocation *a) {
+    if(slot >= mEnviroment.mFieldCount) {
+        LOGE("Script::setSlot unable to set allocation, invalid slot index");
+        return;
+    }
+
+    mSlots[slot].set(a);
+}
+
+void Script::setVar(uint32_t slot, const void *val, uint32_t len)
+{
+    int32_t *destPtr = ((int32_t **)mEnviroment.mFieldAddress)[slot];
+    if (destPtr) {
+        //LOGE("setVar f1  %f", ((const float *)destPtr)[0]);
+        //LOGE("setVar %p %i", destPtr, len);
+        memcpy(destPtr, val, len);
+        //LOGE("setVar f2  %f", ((const float *)destPtr)[0]);
+    } else {
+        //if (rsc->props.mLogScripts) {
+            LOGV("Calling setVar on slot = %i which is null", slot);
+        //}
+    }
 }
 
 namespace android {
@@ -44,16 +79,9 @@
 void rsi_ScriptBindAllocation(Context * rsc, RsScript vs, RsAllocation va, uint32_t slot)
 {
     Script *s = static_cast<Script *>(vs);
-    s->mSlots[slot].set(static_cast<Allocation *>(va));
-}
-
-void rsi_ScriptSetClearColor(Context * rsc, RsScript vs, float r, float g, float b, float a)
-{
-    Script *s = static_cast<Script *>(vs);
-    s->mEnviroment.mClearColor[0] = r;
-    s->mEnviroment.mClearColor[1] = g;
-    s->mEnviroment.mClearColor[2] = b;
-    s->mEnviroment.mClearColor[3] = a;
+    Allocation *a = static_cast<Allocation *>(va);
+    s->setSlot(slot, a);
+    //LOGE("rsi_ScriptBindAllocation %i  %p  %p", slot, a, a->getPtr());
 }
 
 void rsi_ScriptSetTimeZone(Context * rsc, RsScript vs, const char * timeZone, uint32_t length)
@@ -62,53 +90,54 @@
     s->mEnviroment.mTimeZone = timeZone;
 }
 
-void rsi_ScriptSetClearDepth(Context * rsc, RsScript vs, float v)
-{
-    Script *s = static_cast<Script *>(vs);
-    s->mEnviroment.mClearDepth = v;
-}
-
-void rsi_ScriptSetClearStencil(Context * rsc, RsScript vs, uint32_t v)
-{
-    Script *s = static_cast<Script *>(vs);
-    s->mEnviroment.mClearStencil = v;
-}
-
-void rsi_ScriptSetType(Context * rsc, RsType vt, uint32_t slot, bool writable, const char *name)
-{
-    ScriptCState *ss = &rsc->mScriptC;
-    const Type *t = static_cast<const Type *>(vt);
-    ss->mConstantBufferTypes[slot].set(t);
-    ss->mSlotWritable[slot] = writable;
-    if (name) {
-        ss->mSlotNames[slot].setTo(name);
-    } else {
-        ss->mSlotNames[slot].setTo("");
-    }
-}
-
-void rsi_ScriptSetInvoke(Context *rsc, const char *name, uint32_t slot)
-{
-    ScriptCState *ss = &rsc->mScriptC;
-    ss->mInvokableNames[slot] = name;
-}
-
 void rsi_ScriptInvoke(Context *rsc, RsScript vs, uint32_t slot)
 {
     Script *s = static_cast<Script *>(vs);
-    if (s->mEnviroment.mInvokables[slot] == NULL) {
-        rsc->setError(RS_ERROR_BAD_SCRIPT, "Calling invoke on bad script");
-        return;
-    }
-    s->setupScript();
-    s->mEnviroment.mInvokables[slot]();
+    s->Invoke(rsc, slot, NULL, 0);
 }
 
 
-void rsi_ScriptSetRoot(Context * rsc, bool isRoot)
+void rsi_ScriptInvokeData(Context *rsc, RsScript vs, uint32_t slot, void *data)
 {
-    ScriptCState *ss = &rsc->mScriptC;
-    ss->mScript->mEnviroment.mIsRoot = isRoot;
+    Script *s = static_cast<Script *>(vs);
+    s->Invoke(rsc, slot, NULL, 0);
+}
+
+void rsi_ScriptInvokeV(Context *rsc, RsScript vs, uint32_t slot, const void *data, uint32_t len)
+{
+    Script *s = static_cast<Script *>(vs);
+    s->Invoke(rsc, slot, data, len);
+}
+
+void rsi_ScriptSetVarI(Context *rsc, RsScript vs, uint32_t slot, int value)
+{
+    Script *s = static_cast<Script *>(vs);
+    s->setVar(slot, &value, sizeof(value));
+}
+
+void rsi_ScriptSetVarJ(Context *rsc, RsScript vs, uint32_t slot, long long value)
+{
+    Script *s = static_cast<Script *>(vs);
+    s->setVar(slot, &value, sizeof(value));
+}
+
+void rsi_ScriptSetVarF(Context *rsc, RsScript vs, uint32_t slot, float value)
+{
+    Script *s = static_cast<Script *>(vs);
+    s->setVar(slot, &value, sizeof(value));
+}
+
+void rsi_ScriptSetVarD(Context *rsc, RsScript vs, uint32_t slot, double value)
+{
+    Script *s = static_cast<Script *>(vs);
+    s->setVar(slot, &value, sizeof(value));
+}
+
+void rsi_ScriptSetVarV(Context *rsc, RsScript vs, uint32_t slot, const void *data, uint32_t len)
+{
+    const float *fp = (const float *)data;
+    Script *s = static_cast<Script *>(vs);
+    s->setVar(slot, data, len);
 }
 
 
diff --git a/rsScript.h b/rsScript.h
index 5f4a536..c73bb5e 100644
--- a/rsScript.h
+++ b/rsScript.h
@@ -27,9 +27,7 @@
 class ProgramVertex;
 class ProgramFragment;
 class ProgramRaster;
-class ProgramFragmentStore;
-
-#define MAX_SCRIPT_BANKS 16
+class ProgramStore;
 
 class Script : public ObjectBase
 {
@@ -39,38 +37,45 @@
     Script(Context *);
     virtual ~Script();
 
-
     struct Enviroment_t {
-        bool mIsRoot;
-        float mClearColor[4];
-        float mClearDepth;
-        uint32_t mClearStencil;
-
-        uint32_t mStartTimeMillis;
+        int64_t mStartTimeMillis;
+        int64_t mLastDtTime;
         const char* mTimeZone;
 
         ObjectBaseRef<ProgramVertex> mVertex;
         ObjectBaseRef<ProgramFragment> mFragment;
         ObjectBaseRef<ProgramRaster> mRaster;
-        ObjectBaseRef<ProgramFragmentStore> mFragmentStore;
-        InvokeFunc_t mInvokables[MAX_SCRIPT_BANKS];
+        ObjectBaseRef<ProgramStore> mFragmentStore;
+
+        uint32_t mInvokeFunctionCount;
+        InvokeFunc_t *mInvokeFunctions;
+        uint32_t mFieldCount;
+        void ** mFieldAddress;
+
         char * mScriptText;
         uint32_t mScriptTextLength;
+
+        bool mIsThreadable;
     };
     Enviroment_t mEnviroment;
 
-    uint32_t mCounstantBufferCount;
+    void initSlots();
+    void setSlot(uint32_t slot, Allocation *a);
+    void setVar(uint32_t slot, const void *val, uint32_t len);
 
+    virtual void runForEach(Context *rsc,
+                            const Allocation * ain,
+                            Allocation * aout,
+                            const void * usr,
+                            const RsScriptCall *sc = NULL) = 0;
 
-    ObjectBaseRef<Allocation> mSlots[MAX_SCRIPT_BANKS];
-    ObjectBaseRef<const Type> mTypes[MAX_SCRIPT_BANKS];
-    String8 mSlotNames[MAX_SCRIPT_BANKS];
-    bool mSlotWritable[MAX_SCRIPT_BANKS];
+    virtual void Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len) = 0;
+    virtual void setupScript(Context *rsc) = 0;
+    virtual uint32_t run(Context *) = 0;
+protected:
+    ObjectBaseRef<Allocation> *mSlots;
+    ObjectBaseRef<const Type> *mTypes;
 
-
-
-    virtual void setupScript() = 0;
-    virtual uint32_t run(Context *, uint32_t launchID) = 0;
 };
 
 
diff --git a/rsScriptC.cpp b/rsScriptC.cpp
index f4d2451..a2910d7 100644
--- a/rsScriptC.cpp
+++ b/rsScriptC.cpp
@@ -17,9 +17,9 @@
 #include "rsContext.h"
 #include "rsScriptC.h"
 #include "rsMatrix.h"
-
-#include "acc/acc.h"
+#include "../../compile/libbcc/include/bcc/bcc.h"
 #include "utils/Timers.h"
+#include "utils/StopWatch.h"
 
 #include <GLES/gl.h>
 #include <GLES/glext.h>
@@ -37,40 +37,86 @@
 {
     mAllocFile = __FILE__;
     mAllocLine = __LINE__;
-    mAccScript = NULL;
+    mBccScript = NULL;
     memset(&mProgram, 0, sizeof(mProgram));
 }
 
 ScriptC::~ScriptC()
 {
-    if (mAccScript) {
-        accDeleteScript(mAccScript);
+    if (mBccScript) {
+        bccDeleteScript(mBccScript);
     }
     free(mEnviroment.mScriptText);
     mEnviroment.mScriptText = NULL;
 }
 
-void ScriptC::setupScript()
+void ScriptC::setupScript(Context *rsc)
 {
-    for (int ct=0; ct < MAX_SCRIPT_BANKS; ct++) {
-        if (mProgram.mSlotPointers[ct]) {
-            *mProgram.mSlotPointers[ct] = mSlots[ct]->getPtr();
+    setupGLState(rsc);
+    mEnviroment.mStartTimeMillis
+                = nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC));
+
+    for (uint32_t ct=0; ct < mEnviroment.mFieldCount; ct++) {
+        if (mSlots[ct].get() && !mTypes[ct].get()) {
+            mTypes[ct].set(mSlots[ct]->getType());
+        }
+
+        if (!mTypes[ct].get())
+            continue;
+        void *ptr = NULL;
+        if (mSlots[ct].get()) {
+            ptr = mSlots[ct]->getPtr();
+        }
+        void **dest = ((void ***)mEnviroment.mFieldAddress)[ct];
+
+        if (rsc->props.mLogScripts) {
+            LOGV("%p ScriptC::setupScript slot=%i  dst=%p  src=%p  type=%p", rsc, ct, dest, ptr, mSlots[ct]->getType());
+
+            //const uint32_t *p32 = (const uint32_t *)ptr;
+            //for (uint32_t ct2=0; ct2 < mSlots[ct]->getType()->getDimX(); ct2++) {
+                //LOGE("  %i = 0x%08x ", ct2, p32[ct2]);
+            //}
+        }
+
+        if (dest) {
+            *dest = ptr;
+        } else {
+            if (rsc->props.mLogScripts) {
+                LOGV("ScriptC::setupScript, NULL var binding address.");
+            }
         }
     }
 }
 
-
-uint32_t ScriptC::run(Context *rsc, uint32_t launchIndex)
+const Allocation *ScriptC::ptrToAllocation(const void *ptr) const
 {
-    if (mProgram.mScript == NULL) {
-        rsc->setError(RS_ERROR_BAD_SCRIPT, "Attempted to run bad script");
-        return 0;
+    if (!ptr) {
+        return NULL;
     }
+    for (uint32_t ct=0; ct < mEnviroment.mFieldCount; ct++) {
+        if (!mSlots[ct].get())
+            continue;
+        if (mSlots[ct]->getPtr() == ptr) {
+            return mSlots[ct].get();
+        }
+    }
+    LOGE("ScriptC::ptrToAllocation, failed to find %p", ptr);
+    return NULL;
+}
 
-    Context::ScriptTLSStruct * tls =
-    (Context::ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey);
+Script * ScriptC::setTLS(Script *sc)
+{
+    Context::ScriptTLSStruct * tls = (Context::ScriptTLSStruct *)
+                                  pthread_getspecific(Context::gThreadTLSKey);
     rsAssert(tls);
+    Script *old = tls->mScript;
+    tls->mScript = sc;
+    return old;
+}
 
+
+void ScriptC::setupGLState(Context *rsc)
+{
     if (mEnviroment.mFragmentStore.get()) {
         rsc->setFragmentStore(mEnviroment.mFragmentStore.get());
     }
@@ -83,51 +129,262 @@
     if (mEnviroment.mRaster.get()) {
         rsc->setRaster(mEnviroment.mRaster.get());
     }
+}
 
-    if (launchIndex == 0) {
-        mEnviroment.mStartTimeMillis
-                = nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC));
+uint32_t ScriptC::run(Context *rsc)
+{
+    if (mProgram.mRoot == NULL) {
+        rsc->setError(RS_ERROR_BAD_SCRIPT, "Attempted to run bad script");
+        return 0;
     }
-    setupScript();
+
+    setupScript(rsc);
 
     uint32_t ret = 0;
-    tls->mScript = this;
-    ret = mProgram.mScript(launchIndex);
-    tls->mScript = NULL;
+    Script * oldTLS = setTLS(this);
+
+    if (rsc->props.mLogScripts) {
+        LOGV("%p ScriptC::run invoking root,  ptr %p", rsc, mProgram.mRoot);
+    }
+
+    ret = mProgram.mRoot();
+
+    if (rsc->props.mLogScripts) {
+        LOGV("%p ScriptC::run invoking complete, ret=%i", rsc, ret);
+    }
+
+    setTLS(oldTLS);
     return ret;
 }
 
+
+typedef struct {
+    Context *rsc;
+    ScriptC *script;
+    const Allocation * ain;
+    Allocation * aout;
+    const void * usr;
+
+    uint32_t mSliceSize;
+    volatile int mSliceNum;
+
+    const uint8_t *ptrIn;
+    uint32_t eStrideIn;
+    uint8_t *ptrOut;
+    uint32_t eStrideOut;
+
+    uint32_t xStart;
+    uint32_t xEnd;
+    uint32_t yStart;
+    uint32_t yEnd;
+    uint32_t zStart;
+    uint32_t zEnd;
+    uint32_t arrayStart;
+    uint32_t arrayEnd;
+
+    uint32_t dimX;
+    uint32_t dimY;
+    uint32_t dimZ;
+    uint32_t dimArray;
+} MTLaunchStruct;
+typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
+
+static void wc_xy(void *usr, uint32_t idx)
+{
+    MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
+
+    while (1) {
+        uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
+        uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
+        uint32_t yEnd = yStart + mtls->mSliceSize;
+        yEnd = rsMin(yEnd, mtls->yEnd);
+        if (yEnd <= yStart) {
+            return;
+        }
+
+        //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
+        //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
+        for (uint32_t y = yStart; y < yEnd; y++) {
+            uint32_t offset = mtls->dimX * y;
+            uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
+            const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
+
+            for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) {
+                ((rs_t)mtls->script->mProgram.mRoot) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0);
+                xPtrIn += mtls->eStrideIn;
+                xPtrOut += mtls->eStrideOut;
+            }
+        }
+    }
+
+}
+
+void ScriptC::runForEach(Context *rsc,
+                         const Allocation * ain,
+                         Allocation * aout,
+                         const void * usr,
+                         const RsScriptCall *sc)
+{
+    MTLaunchStruct mtls;
+    memset(&mtls, 0, sizeof(mtls));
+
+    if (ain) {
+        mtls.dimX = ain->getType()->getDimX();
+        mtls.dimY = ain->getType()->getDimY();
+        mtls.dimZ = ain->getType()->getDimZ();
+        //mtls.dimArray = ain->getType()->getDimArray();
+    } else if (aout) {
+        mtls.dimX = aout->getType()->getDimX();
+        mtls.dimY = aout->getType()->getDimY();
+        mtls.dimZ = aout->getType()->getDimZ();
+        //mtls.dimArray = aout->getType()->getDimArray();
+    } else {
+        rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
+        return;
+    }
+
+    if (!sc || (sc->xEnd == 0)) {
+        mtls.xEnd = mtls.dimX;
+    } else {
+        rsAssert(sc->xStart < mtls.dimX);
+        rsAssert(sc->xEnd <= mtls.dimX);
+        rsAssert(sc->xStart < sc->xEnd);
+        mtls.xStart = rsMin(mtls.dimX, sc->xStart);
+        mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
+        if (mtls.xStart >= mtls.xEnd) return;
+    }
+
+    if (!sc || (sc->yEnd == 0)) {
+        mtls.yEnd = mtls.dimY;
+    } else {
+        rsAssert(sc->yStart < mtls.dimY);
+        rsAssert(sc->yEnd <= mtls.dimY);
+        rsAssert(sc->yStart < sc->yEnd);
+        mtls.yStart = rsMin(mtls.dimY, sc->yStart);
+        mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
+        if (mtls.yStart >= mtls.yEnd) return;
+    }
+
+    mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
+    mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
+    mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
+    mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
+
+    rsAssert(ain->getType()->getDimZ() == 0);
+
+    setupScript(rsc);
+    Script * oldTLS = setTLS(this);
+
+
+    mtls.rsc = rsc;
+    mtls.ain = ain;
+    mtls.aout = aout;
+    mtls.script = this;
+    mtls.usr = usr;
+    mtls.mSliceSize = 10;
+    mtls.mSliceNum = 0;
+
+    mtls.ptrIn = NULL;
+    mtls.eStrideIn = 0;
+    if (ain) {
+        mtls.ptrIn = (const uint8_t *)ain->getPtr();
+        mtls.eStrideIn = ain->getType()->getElementSizeBytes();
+    }
+
+    mtls.ptrOut = NULL;
+    mtls.eStrideOut = 0;
+    if (aout) {
+        mtls.ptrOut = (uint8_t *)aout->getPtr();
+        mtls.eStrideOut = aout->getType()->getElementSizeBytes();
+    }
+
+    if ((rsc->getWorkerPoolSize() > 1) && mEnviroment.mIsThreadable && (mtls.dimY > 1)) {
+
+        //LOGE("launch 1");
+        rsc->launchThreads(wc_xy, &mtls);
+    } else {
+        //LOGE("launch 3");
+        for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) {
+            for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) {
+                for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) {
+                    uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar +
+                                      mtls.dimX * mtls.dimY * z +
+                                      mtls.dimX * y;
+                    uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
+                    const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
+
+                    for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) {
+                        ((rs_t)mProgram.mRoot) (xPtrIn, xPtrOut, usr, x, y, z, ar);
+                        xPtrIn += mtls.eStrideIn;
+                        xPtrOut += mtls.eStrideOut;
+                    }
+                }
+            }
+        }
+    }
+
+    setTLS(oldTLS);
+}
+
+void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len)
+{
+    //LOGE("rsi_ScriptInvoke %i", slot);
+    if ((slot >= mEnviroment.mInvokeFunctionCount) ||
+        (mEnviroment.mInvokeFunctions[slot] == NULL)) {
+        rsc->setError(RS_ERROR_BAD_SCRIPT, "Calling invoke on bad script");
+        return;
+    }
+    setupScript(rsc);
+    Script * oldTLS = setTLS(this);
+
+    if (rsc->props.mLogScripts) {
+        LOGV("%p ScriptC::Invoke invoking slot %i,  ptr %p", rsc, slot, mEnviroment.mInvokeFunctions[slot]);
+    }
+    ((void (*)(const void *, uint32_t))
+        mEnviroment.mInvokeFunctions[slot])(data, len);
+    if (rsc->props.mLogScripts) {
+        LOGV("%p ScriptC::Invoke complete", rsc);
+    }
+
+    setTLS(oldTLS);
+}
+
 ScriptCState::ScriptCState()
 {
-    mScript = NULL;
-    clear();
+    mScript.clear();
 }
 
 ScriptCState::~ScriptCState()
 {
-    delete mScript;
-    mScript = NULL;
+    mScript.clear();
 }
 
-void ScriptCState::clear()
+void ScriptCState::init(Context *rsc)
 {
-    for (uint32_t ct=0; ct < MAX_SCRIPT_BANKS; ct++) {
-        mConstantBufferTypes[ct].clear();
-        mSlotNames[ct].setTo("");
-        mInvokableNames[ct].setTo("");
-        mSlotWritable[ct] = false;
+    clear(rsc);
+}
+
+void ScriptCState::clear(Context *rsc)
+{
+    rsAssert(rsc);
+    mScript.clear();
+    mScript.set(new ScriptC(rsc));
+}
+
+static BCCvoid* symbolLookup(BCCvoid* pContext, const BCCchar* name)
+{
+    const ScriptCState::SymbolTable_t *sym;
+    ScriptC *s = (ScriptC *)pContext;
+    sym = ScriptCState::lookupSymbol(name);
+    if (sym) {
+        return sym->mPtr;
     }
-
-    delete mScript;
-    mScript = new ScriptC(NULL);
-
-    mInt32Defines.clear();
-    mFloatDefines.clear();
-}
-
-static ACCvoid* symbolLookup(ACCvoid* pContext, const ACCchar* name)
-{
-    const ScriptCState::SymbolTable_t *sym = ScriptCState::lookupSymbol(name);
+    sym = ScriptCState::lookupSymbolCL(name);
+    if (sym) {
+        return sym->mPtr;
+    }
+    s->mEnviroment.mIsThreadable = false;
+    sym = ScriptCState::lookupSymbolGL(name);
     if (sym) {
         return sym->mPtr;
     }
@@ -137,65 +394,53 @@
 
 void ScriptCState::runCompiler(Context *rsc, ScriptC *s)
 {
-    s->mAccScript = accCreateScript();
-    String8 tmp;
-
-    rsc->appendNameDefines(&tmp);
-    appendDecls(&tmp);
-    appendVarDefines(rsc, &tmp);
-    appendTypes(rsc, &tmp);
-    tmp.append("#line 1\n");
-
-    const char* scriptSource[] = {tmp.string(), s->mEnviroment.mScriptText};
-    int scriptLength[] = {tmp.length(), s->mEnviroment.mScriptTextLength} ;
-    accScriptSource(s->mAccScript, sizeof(scriptLength) / sizeof(int), scriptSource, scriptLength);
-    accRegisterSymbolCallback(s->mAccScript, symbolLookup, NULL);
-    accCompileScript(s->mAccScript);
-    accGetScriptLabel(s->mAccScript, "main", (ACCvoid**) &s->mProgram.mScript);
-    accGetScriptLabel(s->mAccScript, "init", (ACCvoid**) &s->mProgram.mInit);
-    rsAssert(s->mProgram.mScript);
-
-    if (!s->mProgram.mScript) {
-        ACCchar buf[4096];
-        ACCsizei len;
-        accGetScriptInfoLog(s->mAccScript, sizeof(buf), &len, buf);
-        LOGE("%s", buf);
-        rsc->setError(RS_ERROR_BAD_SCRIPT, "Error compiling user script.");
-        return;
+    LOGV("%p ScriptCState::runCompiler ", rsc);
+    {
+        StopWatch compileTimer("RenderScript compile time");
+        s->mBccScript = bccCreateScript();
+        s->mEnviroment.mIsThreadable = true;
+        bccScriptBitcode(s->mBccScript, s->mEnviroment.mScriptText, s->mEnviroment.mScriptTextLength);
+        bccRegisterSymbolCallback(s->mBccScript, symbolLookup, s);
+        bccCompileScript(s->mBccScript);
+        bccGetScriptLabel(s->mBccScript, "root", (BCCvoid**) &s->mProgram.mRoot);
+        bccGetScriptLabel(s->mBccScript, "init", (BCCvoid**) &s->mProgram.mInit);
     }
+    LOGV("%p ScriptCState::runCompiler root %p,  init %p", rsc, s->mProgram.mRoot, s->mProgram.mInit);
 
     if (s->mProgram.mInit) {
         s->mProgram.mInit();
     }
 
-    for (int ct=0; ct < MAX_SCRIPT_BANKS; ct++) {
-        if (mSlotNames[ct].length() > 0) {
-            accGetScriptLabel(s->mAccScript,
-                              mSlotNames[ct].string(),
-                              (ACCvoid**) &s->mProgram.mSlotPointers[ct]);
-        }
+    bccGetExportFuncs(s->mBccScript, (BCCsizei*) &s->mEnviroment.mInvokeFunctionCount, 0, NULL);
+    if(s->mEnviroment.mInvokeFunctionCount <= 0)
+        s->mEnviroment.mInvokeFunctions = NULL;
+    else {
+        s->mEnviroment.mInvokeFunctions = (Script::InvokeFunc_t*) calloc(s->mEnviroment.mInvokeFunctionCount, sizeof(Script::InvokeFunc_t));
+        bccGetExportFuncs(s->mBccScript, NULL, s->mEnviroment.mInvokeFunctionCount, (BCCvoid **) s->mEnviroment.mInvokeFunctions);
     }
 
-    for (int ct=0; ct < MAX_SCRIPT_BANKS; ct++) {
-        if (mInvokableNames[ct].length() > 0) {
-            accGetScriptLabel(s->mAccScript,
-                              mInvokableNames[ct].string(),
-                              (ACCvoid**) &s->mEnviroment.mInvokables[ct]);
-        }
+    bccGetExportVars(s->mBccScript, (BCCsizei*) &s->mEnviroment.mFieldCount, 0, NULL);
+    if(s->mEnviroment.mFieldCount <= 0)
+        s->mEnviroment.mFieldAddress = NULL;
+    else {
+        s->mEnviroment.mFieldAddress = (void **) calloc(s->mEnviroment.mFieldCount, sizeof(void *));
+        bccGetExportVars(s->mBccScript, NULL, s->mEnviroment.mFieldCount, (BCCvoid **) s->mEnviroment.mFieldAddress);
+        s->initSlots();
     }
 
     s->mEnviroment.mFragment.set(rsc->getDefaultProgramFragment());
     s->mEnviroment.mVertex.set(rsc->getDefaultProgramVertex());
-    s->mEnviroment.mFragmentStore.set(rsc->getDefaultProgramFragmentStore());
+    s->mEnviroment.mFragmentStore.set(rsc->getDefaultProgramStore());
     s->mEnviroment.mRaster.set(rsc->getDefaultProgramRaster());
 
-    if (s->mProgram.mScript) {
+    if (s->mProgram.mRoot) {
         const static int pragmaMax = 16;
-        ACCsizei pragmaCount;
-        ACCchar * str[pragmaMax];
-        accGetPragmas(s->mAccScript, &pragmaCount, pragmaMax, &str[0]);
+        BCCsizei pragmaCount;
+        BCCchar * str[pragmaMax];
+        bccGetPragmas(s->mBccScript, &pragmaCount, pragmaMax, &str[0]);
 
         for (int ct=0; ct < pragmaCount; ct+=2) {
+            //LOGE("pragme %s %s", str[ct], str[ct+1]);
             if (!strcmp(str[ct], "version")) {
                 continue;
             }
@@ -208,11 +453,6 @@
                     s->mEnviroment.mVertex.clear();
                     continue;
                 }
-                ProgramVertex * pv = (ProgramVertex *)rsc->lookupName(str[ct+1]);
-                if (pv != NULL) {
-                    s->mEnviroment.mVertex.set(pv);
-                    continue;
-                }
                 LOGE("Unreconized value %s passed to stateVertex", str[ct+1]);
             }
 
@@ -224,11 +464,6 @@
                     s->mEnviroment.mRaster.clear();
                     continue;
                 }
-                ProgramRaster * pr = (ProgramRaster *)rsc->lookupName(str[ct+1]);
-                if (pr != NULL) {
-                    s->mEnviroment.mRaster.set(pr);
-                    continue;
-                }
                 LOGE("Unreconized value %s passed to stateRaster", str[ct+1]);
             }
 
@@ -240,11 +475,6 @@
                     s->mEnviroment.mFragment.clear();
                     continue;
                 }
-                ProgramFragment * pf = (ProgramFragment *)rsc->lookupName(str[ct+1]);
-                if (pf != NULL) {
-                    s->mEnviroment.mFragment.set(pf);
-                    continue;
-                }
                 LOGE("Unreconized value %s passed to stateFragment", str[ct+1]);
             }
 
@@ -256,12 +486,6 @@
                     s->mEnviroment.mFragmentStore.clear();
                     continue;
                 }
-                ProgramFragmentStore * pfs =
-                    (ProgramFragmentStore *)rsc->lookupName(str[ct+1]);
-                if (pfs != NULL) {
-                    s->mEnviroment.mFragmentStore.set(pfs);
-                    continue;
-                }
                 LOGE("Unreconized value %s passed to stateStore", str[ct+1]);
             }
 
@@ -273,111 +497,6 @@
     }
 }
 
-static void appendElementBody(String8 *s, const Element *e)
-{
-    s->append(" {\n");
-    for (size_t ct2=0; ct2 < e->getFieldCount(); ct2++) {
-        const Element *c = e->getField(ct2);
-        s->append("    ");
-        s->append(c->getCType());
-        s->append(" ");
-        s->append(e->getFieldName(ct2));
-        s->append(";\n");
-    }
-    s->append("}");
-}
-
-void ScriptCState::appendVarDefines(const Context *rsc, String8 *str)
-{
-    char buf[256];
-    if (rsc->props.mLogScripts) {
-        LOGD("appendVarDefines mInt32Defines.size()=%d mFloatDefines.size()=%d\n",
-                mInt32Defines.size(), mFloatDefines.size());
-    }
-    for (size_t ct=0; ct < mInt32Defines.size(); ct++) {
-        str->append("#define ");
-        str->append(mInt32Defines.keyAt(ct));
-        str->append(" ");
-        sprintf(buf, "%i\n", (int)mInt32Defines.valueAt(ct));
-        str->append(buf);
-    }
-    for (size_t ct=0; ct < mFloatDefines.size(); ct++) {
-        str->append("#define ");
-        str->append(mFloatDefines.keyAt(ct));
-        str->append(" ");
-        sprintf(buf, "%ff\n", mFloatDefines.valueAt(ct));
-        str->append(buf);
-    }
-}
-
-
-
-void ScriptCState::appendTypes(const Context *rsc, String8 *str)
-{
-    char buf[256];
-    String8 tmp;
-
-    str->append("struct vecF32_2_s {float x; float y;};\n");
-    str->append("struct vecF32_3_s {float x; float y; float z;};\n");
-    str->append("struct vecF32_4_s {float x; float y; float z; float w;};\n");
-    str->append("struct vecU8_4_s {char r; char g; char b; char a;};\n");
-    str->append("#define vecF32_2_t struct vecF32_2_s\n");
-    str->append("#define vecF32_3_t struct vecF32_3_s\n");
-    str->append("#define vecF32_4_t struct vecF32_4_s\n");
-    str->append("#define vecU8_4_t struct vecU8_4_s\n");
-    str->append("#define vecI8_4_t struct vecU8_4_s\n");
-
-    for (size_t ct=0; ct < MAX_SCRIPT_BANKS; ct++) {
-        const Type *t = mConstantBufferTypes[ct].get();
-        if (!t) {
-            continue;
-        }
-        const Element *e = t->getElement();
-        if (e->getName() && (e->getFieldCount() > 1)) {
-            String8 s("struct struct_");
-            s.append(e->getName());
-            s.append(e->getCStructBody());
-            s.append(";\n");
-
-            s.append("#define ");
-            s.append(e->getName());
-            s.append("_t struct struct_");
-            s.append(e->getName());
-            s.append("\n\n");
-            if (rsc->props.mLogScripts) {
-                LOGV("%s", static_cast<const char*>(s));
-            }
-            str->append(s);
-        }
-
-        if (mSlotNames[ct].length() > 0) {
-            String8 s;
-            if (e->getName()) {
-                // Use the named struct
-                s.setTo(e->getName());
-            } else {
-                // create an struct named from the slot.
-                s.setTo("struct ");
-                s.append(mSlotNames[ct]);
-                s.append("_s");
-                s.append(e->getCStructBody());
-                //appendElementBody(&s, e);
-                s.append(";\n");
-                s.append("struct ");
-                s.append(mSlotNames[ct]);
-                s.append("_s");
-            }
-
-            s.append(" * ");
-            s.append(mSlotNames[ct]);
-            s.append(";\n");
-            if (rsc->props.mLogScripts) {
-                LOGV("%s", static_cast<const char*>(s));
-            }
-            str->append(s);
-        }
-    }
-}
 
 
 namespace android {
@@ -386,14 +505,7 @@
 void rsi_ScriptCBegin(Context * rsc)
 {
     ScriptCState *ss = &rsc->mScriptC;
-    ss->clear();
-}
-
-void rsi_ScriptCSetScript(Context * rsc, void *vp)
-{
-    rsAssert(0);
-    //ScriptCState *ss = &rsc->mScriptC;
-    //ss->mProgram.mScript = reinterpret_cast<ScriptC::RunScript_t>(vp);
+    ss->clear(rsc);
 }
 
 void rsi_ScriptCSetText(Context *rsc, const char *text, uint32_t len)
@@ -412,32 +524,13 @@
 {
     ScriptCState *ss = &rsc->mScriptC;
 
-    ScriptC *s = ss->mScript;
-    ss->mScript = NULL;
+    ObjectBaseRef<ScriptC> s = ss->mScript.get();
+    ss->mScript.clear();
 
-    ss->runCompiler(rsc, s);
+    ss->runCompiler(rsc, s.get());
     s->incUserRef();
-    s->setContext(rsc);
-    for (int ct=0; ct < MAX_SCRIPT_BANKS; ct++) {
-        s->mTypes[ct].set(ss->mConstantBufferTypes[ct].get());
-        s->mSlotNames[ct] = ss->mSlotNames[ct];
-        s->mSlotWritable[ct] = ss->mSlotWritable[ct];
-    }
-
-    ss->clear();
-    return s;
-}
-
-void rsi_ScriptCSetDefineF(Context *rsc, const char* name, float value)
-{
-    ScriptCState *ss = &rsc->mScriptC;
-    ss->mFloatDefines.add(String8(name), value);
-}
-
-void rsi_ScriptCSetDefineI32(Context *rsc, const char* name, int32_t value)
-{
-    ScriptCState *ss = &rsc->mScriptC;
-    ss->mInt32Defines.add(String8(name), value);
+    ss->clear(rsc);
+    return s.get();
 }
 
 }
diff --git a/rsScriptC.h b/rsScriptC.h
index 35abadf..e5b5ba9 100644
--- a/rsScriptC.h
+++ b/rsScriptC.h
@@ -21,9 +21,7 @@
 
 #include "RenderScriptEnv.h"
 
-#include <utils/KeyedVector.h>
-
-struct ACCscript;
+struct BCCscript;
 
 // ---------------------------------------------------------------------------
 namespace android {
@@ -34,7 +32,7 @@
 class ScriptC : public Script
 {
 public:
-    typedef int (*RunScript_t)(uint32_t launchIndex);
+    typedef int (*RunScript_t)();
     typedef void (*VoidFunc_t)();
 
     ScriptC(Context *);
@@ -44,18 +42,35 @@
         int mVersionMajor;
         int mVersionMinor;
 
-        RunScript_t mScript;
+        RunScript_t mRoot;
         VoidFunc_t mInit;
-
-        void ** mSlotPointers[MAX_SCRIPT_BANKS];
     };
 
     Program_t mProgram;
 
-    ACCscript*    mAccScript;
+    BCCscript*    mBccScript;
 
-    virtual void setupScript();
-    virtual uint32_t run(Context *, uint32_t launchID);
+    const Allocation *ptrToAllocation(const void *) const;
+
+
+    virtual void Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len);
+
+    virtual uint32_t run(Context *);
+
+    virtual void runForEach(Context *rsc,
+                            const Allocation * ain,
+                            Allocation * aout,
+                            const void * usr,
+                            const RsScriptCall *sc = NULL);
+
+    virtual void serialize(OStream *stream) const {    }
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_SCRIPT_C; }
+    static Type *createFromStream(Context *rsc, IStream *stream) { return NULL; }
+
+protected:
+    void setupScript(Context *);
+    void setupGLState(Context *);
+    Script * setTLS(Script *);
 };
 
 class ScriptCState
@@ -64,30 +79,21 @@
     ScriptCState();
     ~ScriptCState();
 
-    ScriptC *mScript;
+    ObjectBaseRef<ScriptC> mScript;
 
-    ObjectBaseRef<const Type> mConstantBufferTypes[MAX_SCRIPT_BANKS];
-    String8 mSlotNames[MAX_SCRIPT_BANKS];
-    bool mSlotWritable[MAX_SCRIPT_BANKS];
-    String8 mInvokableNames[MAX_SCRIPT_BANKS];
+    void init(Context *rsc);
 
-    void clear();
+    void clear(Context *rsc);
     void runCompiler(Context *rsc, ScriptC *s);
-    void appendVarDefines(const Context *rsc, String8 *str);
-    void appendTypes(const Context *rsc, String8 *str);
 
     struct SymbolTable_t {
         const char * mName;
         void * mPtr;
-        const char * mRet;
-        const char * mParam;
     };
-    static SymbolTable_t gSyms[];
+    //static SymbolTable_t gSyms[];
     static const SymbolTable_t * lookupSymbol(const char *);
-    static void appendDecls(String8 *str);
-
-    KeyedVector<String8,int> mInt32Defines;
-    KeyedVector<String8,float> mFloatDefines;
+    static const SymbolTable_t * lookupSymbolCL(const char *);
+    static const SymbolTable_t * lookupSymbolGL(const char *);
 };
 
 
diff --git a/rsScriptC_Lib.cpp b/rsScriptC_Lib.cpp
index 202ca3d..9fadee0 100644
--- a/rsScriptC_Lib.cpp
+++ b/rsScriptC_Lib.cpp
@@ -17,18 +17,9 @@
 #include "rsContext.h"
 #include "rsScriptC.h"
 #include "rsMatrix.h"
-#include "rsNoise.h"
 
-#include "acc/acc.h"
 #include "utils/Timers.h"
 
-#define GL_GLEXT_PROTOTYPES
-
-#include <GLES/gl.h>
-#include <GLES/glext.h>
-#include <GLES2/gl2.h>
-#include <GLES2/gl2ext.h>
-
 #include <time.h>
 
 using namespace android;
@@ -39,252 +30,11 @@
     Context * rsc = tls->mContext; \
     ScriptC * sc = (ScriptC *) tls->mScript
 
-typedef struct {
-    float x;
-    float y;
-    float z;
-} vec3_t;
-
-typedef struct {
-    float x;
-    float y;
-    float z;
-    float w;
-} vec4_t;
-
-typedef struct {
-    float x;
-    float y;
-} vec2_t;
-
-//////////////////////////////////////////////////////////////////////////////
-// IO routines
-//////////////////////////////////////////////////////////////////////////////
-
-static float SC_loadF(uint32_t bank, uint32_t offset)
-{
-    GET_TLS();
-    const void *vp = sc->mSlots[bank]->getPtr();
-    const float *f = static_cast<const float *>(vp);
-    //LOGE("loadF %i %i = %f %x", bank, offset, f, ((int *)&f)[0]);
-    return f[offset];
-}
-
-static int32_t SC_loadI32(uint32_t bank, uint32_t offset)
-{
-    GET_TLS();
-    const void *vp = sc->mSlots[bank]->getPtr();
-    const int32_t *i = static_cast<const int32_t *>(vp);
-    //LOGE("loadI32 %i %i = %i", bank, offset, t);
-    return i[offset];
-}
-
-static float* SC_loadArrayF(uint32_t bank, uint32_t offset)
-{
-    GET_TLS();
-    void *vp = sc->mSlots[bank]->getPtr();
-    float *f = static_cast<float *>(vp);
-    return f + offset;
-}
-
-static int32_t* SC_loadArrayI32(uint32_t bank, uint32_t offset)
-{
-    GET_TLS();
-    void *vp = sc->mSlots[bank]->getPtr();
-    int32_t *i = static_cast<int32_t *>(vp);
-    return i + offset;
-}
-
-static float* SC_loadSimpleMeshVerticesF(RsSimpleMesh mesh, uint32_t idx)
-{
-    SimpleMesh *tm = static_cast<SimpleMesh *>(mesh);
-    void *vp = tm->mVertexBuffers[idx]->getPtr();;
-    return static_cast<float *>(vp);
-}
-
-static void SC_updateSimpleMesh(RsSimpleMesh mesh)
-{
-    GET_TLS();
-    SimpleMesh *sm = static_cast<SimpleMesh *>(mesh);
-    sm->uploadAll(rsc);
-}
-
-static uint32_t SC_loadU32(uint32_t bank, uint32_t offset)
-{
-    GET_TLS();
-    const void *vp = sc->mSlots[bank]->getPtr();
-    const uint32_t *i = static_cast<const uint32_t *>(vp);
-    return i[offset];
-}
-
-static void SC_loadVec4(uint32_t bank, uint32_t offset, rsc_Vector4 *v)
-{
-    GET_TLS();
-    const void *vp = sc->mSlots[bank]->getPtr();
-    const float *f = static_cast<const float *>(vp);
-    memcpy(v, &f[offset], sizeof(rsc_Vector4));
-}
-
-static void SC_loadMatrix(uint32_t bank, uint32_t offset, rsc_Matrix *m)
-{
-    GET_TLS();
-    const void *vp = sc->mSlots[bank]->getPtr();
-    const float *f = static_cast<const float *>(vp);
-    memcpy(m, &f[offset], sizeof(rsc_Matrix));
-}
-
-
-static void SC_storeF(uint32_t bank, uint32_t offset, float v)
-{
-    //LOGE("storeF %i %i %f", bank, offset, v);
-    GET_TLS();
-    void *vp = sc->mSlots[bank]->getPtr();
-    float *f = static_cast<float *>(vp);
-    f[offset] = v;
-}
-
-static void SC_storeI32(uint32_t bank, uint32_t offset, int32_t v)
-{
-    GET_TLS();
-    void *vp = sc->mSlots[bank]->getPtr();
-    int32_t *f = static_cast<int32_t *>(vp);
-    static_cast<int32_t *>(sc->mSlots[bank]->getPtr())[offset] = v;
-}
-
-static void SC_storeU32(uint32_t bank, uint32_t offset, uint32_t v)
-{
-    GET_TLS();
-    void *vp = sc->mSlots[bank]->getPtr();
-    uint32_t *f = static_cast<uint32_t *>(vp);
-    static_cast<uint32_t *>(sc->mSlots[bank]->getPtr())[offset] = v;
-}
-
-static void SC_storeVec4(uint32_t bank, uint32_t offset, const rsc_Vector4 *v)
-{
-    GET_TLS();
-    void *vp = sc->mSlots[bank]->getPtr();
-    float *f = static_cast<float *>(vp);
-    memcpy(&f[offset], v, sizeof(rsc_Vector4));
-}
-
-static void SC_storeMatrix(uint32_t bank, uint32_t offset, const rsc_Matrix *m)
-{
-    GET_TLS();
-    void *vp = sc->mSlots[bank]->getPtr();
-    float *f = static_cast<float *>(vp);
-    memcpy(&f[offset], m, sizeof(rsc_Matrix));
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Vec3 routines
-//////////////////////////////////////////////////////////////////////////////
-
-static void SC_vec3Norm(vec3_t *v)
-{
-    float len = sqrtf(v->x * v->x + v->y * v->y + v->z * v->z);
-    len = 1 / len;
-    v->x *= len;
-    v->y *= len;
-    v->z *= len;
-}
-
-static float SC_vec3Length(const vec3_t *v)
-{
-    return sqrtf(v->x * v->x + v->y * v->y + v->z * v->z);
-}
-
-static void SC_vec3Add(vec3_t *dest, const vec3_t *lhs, const vec3_t *rhs)
-{
-    dest->x = lhs->x + rhs->x;
-    dest->y = lhs->y + rhs->y;
-    dest->z = lhs->z + rhs->z;
-}
-
-static void SC_vec3Sub(vec3_t *dest, const vec3_t *lhs, const vec3_t *rhs)
-{
-    dest->x = lhs->x - rhs->x;
-    dest->y = lhs->y - rhs->y;
-    dest->z = lhs->z - rhs->z;
-}
-
-static void SC_vec3Cross(vec3_t *dest, const vec3_t *lhs, const vec3_t *rhs)
-{
-    float x = lhs->y * rhs->z  - lhs->z * rhs->y;
-    float y = lhs->z * rhs->x  - lhs->x * rhs->z;
-    float z = lhs->x * rhs->y  - lhs->y * rhs->x;
-    dest->x = x;
-    dest->y = y;
-    dest->z = z;
-}
-
-static float SC_vec3Dot(const vec3_t *lhs, const vec3_t *rhs)
-{
-    return lhs->x * rhs->x + lhs->y * rhs->y + lhs->z * rhs->z;
-}
-
-static void SC_vec3Scale(vec3_t *lhs, float scale)
-{
-    lhs->x *= scale;
-    lhs->y *= scale;
-    lhs->z *= scale;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Vec4 routines
-//////////////////////////////////////////////////////////////////////////////
-
-static void SC_vec4Norm(vec4_t *v)
-{
-    float len = sqrtf(v->x * v->x + v->y * v->y + v->z * v->z + v->w * v->w);
-    len = 1 / len;
-    v->x *= len;
-    v->y *= len;
-    v->z *= len;
-    v->w *= len;
-}
-
-static float SC_vec4Length(const vec4_t *v)
-{
-    return sqrtf(v->x * v->x + v->y * v->y + v->z * v->z + v->w * v->w);
-}
-
-static void SC_vec4Add(vec4_t *dest, const vec4_t *lhs, const vec4_t *rhs)
-{
-    dest->x = lhs->x + rhs->x;
-    dest->y = lhs->y + rhs->y;
-    dest->z = lhs->z + rhs->z;
-    dest->w = lhs->w + rhs->w;
-}
-
-static void SC_vec4Sub(vec4_t *dest, const vec4_t *lhs, const vec4_t *rhs)
-{
-    dest->x = lhs->x - rhs->x;
-    dest->y = lhs->y - rhs->y;
-    dest->z = lhs->z - rhs->z;
-    dest->w = lhs->w - rhs->w;
-}
-
-static float SC_vec4Dot(const vec4_t *lhs, const vec4_t *rhs)
-{
-    return lhs->x * rhs->x + lhs->y * rhs->y + lhs->z * rhs->z + lhs->w * rhs->w;
-}
-
-static void SC_vec4Scale(vec4_t *lhs, float scale)
-{
-    lhs->x *= scale;
-    lhs->y *= scale;
-    lhs->z *= scale;
-    lhs->w *= scale;
-}
 
 //////////////////////////////////////////////////////////////////////////////
 // Math routines
 //////////////////////////////////////////////////////////////////////////////
 
-#define PI 3.1415926f
-#define DEG_TO_RAD PI / 180.0f
-#define RAD_TO_DEG 180.0f / PI
-
 static float SC_sinf_fast(float x)
 {
     const float A =   1.0f / (2.0f * M_PI);
@@ -323,6 +73,7 @@
     return 0.2215f * (y * fabsf(y) - y) + y;
 }
 
+
 static float SC_randf(float max)
 {
     float r = (float)rand();
@@ -335,104 +86,20 @@
     return r / RAND_MAX * (max - min) + min;
 }
 
-static int SC_sign(int value)
+static int SC_randi(int max)
 {
-    return (value > 0) - (value < 0);
+    return (int)SC_randf(max);
 }
 
-static float SC_signf(float value)
+static int SC_randi2(int min, int max)
 {
-    return (value > 0) - (value < 0);
+    return (int)SC_randf2(min, max);
 }
 
-static float SC_clampf(float amount, float low, float high)
+static float SC_frac(float v)
 {
-    return amount < low ? low : (amount > high ? high : amount);
-}
-
-static int SC_clamp(int amount, int low, int high)
-{
-    return amount < low ? low : (amount > high ? high : amount);
-}
-
-static float SC_maxf(float a, float b)
-{
-    return a > b ? a : b;
-}
-
-static float SC_minf(float a, float b)
-{
-    return a < b ? a : b;
-}
-
-static float SC_sqrf(float v)
-{
-    return v * v;
-}
-
-static int SC_sqr(int v)
-{
-    return v * v;
-}
-
-static float SC_fracf(float v)
-{
-    return v - floorf(v);
-}
-
-static float SC_roundf(float v)
-{
-    return floorf(v + 0.4999999999);
-}
-
-static float SC_distf2(float x1, float y1, float x2, float y2)
-{
-    float x = x2 - x1;
-    float y = y2 - y1;
-    return sqrtf(x * x + y * y);
-}
-
-static float SC_distf3(float x1, float y1, float z1, float x2, float y2, float z2)
-{
-    float x = x2 - x1;
-    float y = y2 - y1;
-    float z = z2 - z1;
-    return sqrtf(x * x + y * y + z * z);
-}
-
-static float SC_magf2(float a, float b)
-{
-    return sqrtf(a * a + b * b);
-}
-
-static float SC_magf3(float a, float b, float c)
-{
-    return sqrtf(a * a + b * b + c * c);
-}
-
-static float SC_radf(float degrees)
-{
-    return degrees * DEG_TO_RAD;
-}
-
-static float SC_degf(float radians)
-{
-    return radians * RAD_TO_DEG;
-}
-
-static float SC_lerpf(float start, float stop, float amount)
-{
-    return start + (stop - start) * amount;
-}
-
-static float SC_normf(float start, float stop, float value)
-{
-    return (value - start) / (stop - start);
-}
-
-static float SC_mapf(float minStart, float minStop, float maxStart, float maxStop, float value)
-{
-    return maxStart + (maxStart - maxStop) * ((value - minStart) / (minStop - minStart));
+    int i = (int)floor(v);
+    return fmin(v - i, 0x1.fffffep-1f);
 }
 
 //////////////////////////////////////////////////////////////////////////////
@@ -511,348 +178,22 @@
     return timeinfo->tm_year;
 }
 
-static int32_t SC_uptimeMillis()
+static int64_t SC_uptimeMillis()
 {
     return nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC));
 }
 
-static int32_t SC_startTimeMillis()
+static int64_t SC_uptimeNanos()
+{
+    return systemTime(SYSTEM_TIME_MONOTONIC);
+}
+
+static float SC_getDt()
 {
     GET_TLS();
-    return sc->mEnviroment.mStartTimeMillis;
-}
-
-static int32_t SC_elapsedTimeMillis()
-{
-    GET_TLS();
-    return nanoseconds_to_milliseconds(systemTime(SYSTEM_TIME_MONOTONIC))
-            - sc->mEnviroment.mStartTimeMillis;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// Matrix routines
-//////////////////////////////////////////////////////////////////////////////
-
-
-static void SC_matrixLoadIdentity(rsc_Matrix *mat)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->loadIdentity();
-}
-
-static void SC_matrixLoadFloat(rsc_Matrix *mat, const float *f)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->load(f);
-}
-
-static void SC_matrixLoadMat(rsc_Matrix *mat, const rsc_Matrix *newmat)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->load(reinterpret_cast<const Matrix *>(newmat));
-}
-
-static void SC_matrixLoadRotate(rsc_Matrix *mat, float rot, float x, float y, float z)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->loadRotate(rot, x, y, z);
-}
-
-static void SC_matrixLoadScale(rsc_Matrix *mat, float x, float y, float z)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->loadScale(x, y, z);
-}
-
-static void SC_matrixLoadTranslate(rsc_Matrix *mat, float x, float y, float z)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->loadTranslate(x, y, z);
-}
-
-static void SC_matrixLoadMultiply(rsc_Matrix *mat, const rsc_Matrix *lhs, const rsc_Matrix *rhs)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->loadMultiply(reinterpret_cast<const Matrix *>(lhs),
-                    reinterpret_cast<const Matrix *>(rhs));
-}
-
-static void SC_matrixMultiply(rsc_Matrix *mat, const rsc_Matrix *rhs)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->multiply(reinterpret_cast<const Matrix *>(rhs));
-}
-
-static void SC_matrixRotate(rsc_Matrix *mat, float rot, float x, float y, float z)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->rotate(rot, x, y, z);
-}
-
-static void SC_matrixScale(rsc_Matrix *mat, float x, float y, float z)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->scale(x, y, z);
-}
-
-static void SC_matrixTranslate(rsc_Matrix *mat, float x, float y, float z)
-{
-    Matrix *m = reinterpret_cast<Matrix *>(mat);
-    m->translate(x, y, z);
-}
-
-
-static void SC_vec2Rand(float *vec, float maxLen)
-{
-    float angle = SC_randf(PI * 2);
-    float len = SC_randf(maxLen);
-    vec[0] = len * sinf(angle);
-    vec[1] = len * cosf(angle);
-}
-
-
-
-//////////////////////////////////////////////////////////////////////////////
-// Context
-//////////////////////////////////////////////////////////////////////////////
-
-static void SC_bindTexture(RsProgramFragment vpf, uint32_t slot, RsAllocation va)
-{
-    GET_TLS();
-    rsi_ProgramBindTexture(rsc,
-                           static_cast<ProgramFragment *>(vpf),
-                           slot,
-                           static_cast<Allocation *>(va));
-
-}
-
-static void SC_bindSampler(RsProgramFragment vpf, uint32_t slot, RsSampler vs)
-{
-    GET_TLS();
-    rsi_ProgramBindSampler(rsc,
-                           static_cast<ProgramFragment *>(vpf),
-                           slot,
-                           static_cast<Sampler *>(vs));
-
-}
-
-static void SC_bindProgramFragmentStore(RsProgramFragmentStore pfs)
-{
-    GET_TLS();
-    rsi_ContextBindProgramFragmentStore(rsc, pfs);
-
-}
-
-static void SC_bindProgramFragment(RsProgramFragment pf)
-{
-    GET_TLS();
-    rsi_ContextBindProgramFragment(rsc, pf);
-
-}
-
-static void SC_bindProgramVertex(RsProgramVertex pv)
-{
-    GET_TLS();
-    rsi_ContextBindProgramVertex(rsc, pv);
-
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// VP
-//////////////////////////////////////////////////////////////////////////////
-
-static void SC_vpLoadModelMatrix(const rsc_Matrix *m)
-{
-    GET_TLS();
-    rsc->getVertex()->setModelviewMatrix(m);
-}
-
-static void SC_vpLoadTextureMatrix(const rsc_Matrix *m)
-{
-    GET_TLS();
-    rsc->getVertex()->setTextureMatrix(m);
-}
-
-
-
-//////////////////////////////////////////////////////////////////////////////
-// Drawing
-//////////////////////////////////////////////////////////////////////////////
-
-static void SC_drawLine(float x1, float y1, float z1,
-                        float x2, float y2, float z2)
-{
-    GET_TLS();
-    if (!rsc->setupCheck()) {
-        return;
-    }
-
-    float vtx[] = { x1, y1, z1, x2, y2, z2 };
-    VertexArray va;
-    va.addLegacy(GL_FLOAT, 3, 12, RS_KIND_POSITION, false, (uint32_t)vtx);
-    if (rsc->checkVersion2_0()) {
-        va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
-    } else {
-        va.setupGL(rsc, &rsc->mStateVertexArray);
-    }
-
-    glDrawArrays(GL_LINES, 0, 2);
-}
-
-static void SC_drawPoint(float x, float y, float z)
-{
-    GET_TLS();
-    if (!rsc->setupCheck()) {
-        return;
-    }
-
-    float vtx[] = { x, y, z };
-
-    VertexArray va;
-    va.addLegacy(GL_FLOAT, 3, 12, RS_KIND_POSITION, false, (uint32_t)vtx);
-    if (rsc->checkVersion2_0()) {
-        va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
-    } else {
-        va.setupGL(rsc, &rsc->mStateVertexArray);
-    }
-
-    glDrawArrays(GL_POINTS, 0, 1);
-}
-
-static void SC_drawQuadTexCoords(float x1, float y1, float z1,
-                                 float u1, float v1,
-                                 float x2, float y2, float z2,
-                                 float u2, float v2,
-                                 float x3, float y3, float z3,
-                                 float u3, float v3,
-                                 float x4, float y4, float z4,
-                                 float u4, float v4)
-{
-    GET_TLS();
-    if (!rsc->setupCheck()) {
-        return;
-    }
-
-    //LOGE("Quad");
-    //LOGE("%4.2f, %4.2f, %4.2f", x1, y1, z1);
-    //LOGE("%4.2f, %4.2f, %4.2f", x2, y2, z2);
-    //LOGE("%4.2f, %4.2f, %4.2f", x3, y3, z3);
-    //LOGE("%4.2f, %4.2f, %4.2f", x4, y4, z4);
-
-    float vtx[] = {x1,y1,z1, x2,y2,z2, x3,y3,z3, x4,y4,z4};
-    const float tex[] = {u1,v1, u2,v2, u3,v3, u4,v4};
-
-    VertexArray va;
-    va.addLegacy(GL_FLOAT, 3, 12, RS_KIND_POSITION, false, (uint32_t)vtx);
-    va.addLegacy(GL_FLOAT, 2, 8, RS_KIND_TEXTURE, false, (uint32_t)tex);
-    if (rsc->checkVersion2_0()) {
-        va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
-    } else {
-        va.setupGL(rsc, &rsc->mStateVertexArray);
-    }
-
-
-    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-}
-
-static void SC_drawQuad(float x1, float y1, float z1,
-                        float x2, float y2, float z2,
-                        float x3, float y3, float z3,
-                        float x4, float y4, float z4)
-{
-    SC_drawQuadTexCoords(x1, y1, z1, 0, 1,
-                         x2, y2, z2, 1, 1,
-                         x3, y3, z3, 1, 0,
-                         x4, y4, z4, 0, 0);
-}
-
-static void SC_drawSpriteScreenspace(float x, float y, float z, float w, float h)
-{
-    GET_TLS();
-    ObjectBaseRef<const ProgramVertex> tmp(rsc->getVertex());
-    rsc->setVertex(rsc->getDefaultProgramVertex());
-    //rsc->setupCheck();
-
-    //GLint crop[4] = {0, h, w, -h};
-
-    float sh = rsc->getHeight();
-
-    SC_drawQuad(x,   sh - y,     z,
-                x+w, sh - y,     z,
-                x+w, sh - (y+h), z,
-                x,   sh - (y+h), z);
-    rsc->setVertex((ProgramVertex *)tmp.get());
-}
-
-static void SC_drawSpriteScreenspaceCropped(float x, float y, float z, float w, float h,
-        float cx0, float cy0, float cx1, float cy1)
-{
-    GET_TLS();
-    if (!rsc->setupCheck()) {
-        return;
-    }
-
-    GLint crop[4] = {cx0, cy0, cx1, cy1};
-    glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_CROP_RECT_OES, crop);
-    glDrawTexfOES(x, y, z, w, h);
-}
-
-static void SC_drawSprite(float x, float y, float z, float w, float h)
-{
-    GET_TLS();
-    float vin[3] = {x, y, z};
-    float vout[4];
-
-    //LOGE("ds  in %f %f %f", x, y, z);
-    rsc->getVertex()->transformToScreen(rsc, vout, vin);
-    //LOGE("ds  out %f %f %f %f", vout[0], vout[1], vout[2], vout[3]);
-    vout[0] /= vout[3];
-    vout[1] /= vout[3];
-    vout[2] /= vout[3];
-
-    vout[0] *= rsc->getWidth() / 2;
-    vout[1] *= rsc->getHeight() / 2;
-    vout[0] += rsc->getWidth() / 2;
-    vout[1] += rsc->getHeight() / 2;
-
-    vout[0] -= w/2;
-    vout[1] -= h/2;
-
-    //LOGE("ds  out2 %f %f %f", vout[0], vout[1], vout[2]);
-
-    // U, V, W, H
-    SC_drawSpriteScreenspace(vout[0], vout[1], z, h, w);
-    //rsc->setupCheck();
-}
-
-
-static void SC_drawRect(float x1, float y1,
-                        float x2, float y2, float z)
-{
-    SC_drawQuad(x1, y2, z,
-                x2, y2, z,
-                x2, y1, z,
-                x1, y1, z);
-}
-
-static void SC_drawSimpleMesh(RsSimpleMesh vsm)
-{
-    GET_TLS();
-    SimpleMesh *sm = static_cast<SimpleMesh *>(vsm);
-    if (!rsc->setupCheck()) {
-        return;
-    }
-    sm->render(rsc);
-}
-
-static void SC_drawSimpleMeshRange(RsSimpleMesh vsm, uint32_t start, uint32_t len)
-{
-    GET_TLS();
-    SimpleMesh *sm = static_cast<SimpleMesh *>(vsm);
-    if (!rsc->setupCheck()) {
-        return;
-    }
-    sm->renderRange(rsc, start, len);
+    int64_t l = sc->mEnviroment.mLastDtTime;
+    sc->mEnviroment.mLastDtTime = systemTime(SYSTEM_TIME_MONOTONIC);
+    return ((float)(sc->mEnviroment.mLastDtTime - l)) / 1.0e9;
 }
 
 
@@ -860,531 +201,355 @@
 //
 //////////////////////////////////////////////////////////////////////////////
 
-static void SC_color(float r, float g, float b, float a)
+static uint32_t SC_allocGetDimX(RsAllocation va)
 {
-    GET_TLS();
-    rsc->mStateVertex.color[0] = r;
-    rsc->mStateVertex.color[1] = g;
-    rsc->mStateVertex.color[2] = b;
-    rsc->mStateVertex.color[3] = a;
-    if (!rsc->checkVersion2_0()) {
-        glColor4f(r, g, b, a);
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    //LOGE("SC_allocGetDimX a=%p  type=%p", a, a->getType());
+    return a->getType()->getDimX();
+}
+
+static uint32_t SC_allocGetDimY(RsAllocation va)
+{
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    return a->getType()->getDimY();
+}
+
+static uint32_t SC_allocGetDimZ(RsAllocation va)
+{
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    return a->getType()->getDimZ();
+}
+
+static uint32_t SC_allocGetDimLOD(RsAllocation va)
+{
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    return a->getType()->getDimLOD();
+}
+
+static uint32_t SC_allocGetDimFaces(RsAllocation va)
+{
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    return a->getType()->getDimFaces();
+}
+
+static const void * SC_getElementAtX(RsAllocation va, uint32_t x)
+{
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    const Type *t = a->getType();
+    CHECK_OBJ(t);
+    const uint8_t *p = (const uint8_t *)a->getPtr();
+    return &p[t->getElementSizeBytes() * x];
+}
+
+static const void * SC_getElementAtXY(RsAllocation va, uint32_t x, uint32_t y)
+{
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    const Type *t = a->getType();
+    CHECK_OBJ(t);
+    const uint8_t *p = (const uint8_t *)a->getPtr();
+    return &p[t->getElementSizeBytes() * (x + y*t->getDimX())];
+}
+
+static const void * SC_getElementAtXYZ(RsAllocation va, uint32_t x, uint32_t y, uint32_t z)
+{
+    const Allocation *a = static_cast<const Allocation *>(va);
+    CHECK_OBJ(a);
+    const Type *t = a->getType();
+    CHECK_OBJ(t);
+    const uint8_t *p = (const uint8_t *)a->getPtr();
+    return &p[t->getElementSizeBytes() * (x + y*t->getDimX())];
+}
+
+static void SC_setObject(void **vdst, void * vsrc) {
+    //LOGE("SC_setObject  %p,%p  %p", vdst, *vdst, vsrc);
+    if (vsrc) {
+        CHECK_OBJ(vsrc);
+        static_cast<ObjectBase *>(vsrc)->incSysRef();
     }
-}
-
-static void SC_ambient(float r, float g, float b, float a)
-{
-    GLfloat params[] = { r, g, b, a };
-    glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT, params);
-}
-
-static void SC_diffuse(float r, float g, float b, float a)
-{
-    GLfloat params[] = { r, g, b, a };
-    glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, params);
-}
-
-static void SC_specular(float r, float g, float b, float a)
-{
-    GLfloat params[] = { r, g, b, a };
-    glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, params);
-}
-
-static void SC_emission(float r, float g, float b, float a)
-{
-    GLfloat params[] = { r, g, b, a };
-    glMaterialfv(GL_FRONT_AND_BACK, GL_EMISSION, params);
-}
-
-static void SC_shininess(float s)
-{
-    glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, s);
-}
-
-static void SC_pointAttenuation(float a, float b, float c)
-{
-    GLfloat params[] = { a, b, c };
-    glPointParameterfv(GL_POINT_DISTANCE_ATTENUATION, params);
-}
-
-static void SC_hsbToRgb(float h, float s, float b, float* rgb)
-{
-    float red = 0.0f;
-    float green = 0.0f;
-    float blue = 0.0f;
-
-    float x = h;
-    float y = s;
-    float z = b;
-
-    float hf = (x - (int) x) * 6.0f;
-    int ihf = (int) hf;
-    float f = hf - ihf;
-    float pv = z * (1.0f - y);
-    float qv = z * (1.0f - y * f);
-    float tv = z * (1.0f - y * (1.0f - f));
-
-    switch (ihf) {
-        case 0:         // Red is the dominant color
-            red = z;
-            green = tv;
-            blue = pv;
-            break;
-        case 1:         // Green is the dominant color
-            red = qv;
-            green = z;
-            blue = pv;
-            break;
-        case 2:
-            red = pv;
-            green = z;
-            blue = tv;
-            break;
-        case 3:         // Blue is the dominant color
-            red = pv;
-            green = qv;
-            blue = z;
-            break;
-        case 4:
-            red = tv;
-            green = pv;
-            blue = z;
-            break;
-        case 5:         // Red is the dominant color
-            red = z;
-            green = pv;
-            blue = qv;
-            break;
+    if (vdst[0]) {
+        CHECK_OBJ(vdst[0]);
+        static_cast<ObjectBase *>(vdst[0])->decSysRef();
     }
-
-    rgb[0] = red;
-    rgb[1] = green;
-    rgb[2] = blue;
+    *vdst = vsrc;
+    //LOGE("SC_setObject *");
 }
-
-static int SC_hsbToAbgr(float h, float s, float b, float a)
-{
-    float rgb[3];
-    SC_hsbToRgb(h, s, b, rgb);
-    return int(a      * 255.0f) << 24 |
-           int(rgb[2] * 255.0f) << 16 |
-           int(rgb[1] * 255.0f) <<  8 |
-           int(rgb[0] * 255.0f);
-}
-
-static void SC_hsb(float h, float s, float b, float a)
-{
-    GET_TLS();
-    float rgb[3];
-    SC_hsbToRgb(h, s, b, rgb);
-    if (rsc->checkVersion2_0()) {
-        glVertexAttrib4f(1, rgb[0], rgb[1], rgb[2], a);
-    } else {
-        glColor4f(rgb[0], rgb[1], rgb[2], a);
+static void SC_clearObject(void **vdst) {
+    //LOGE("SC_clearObject  %p,%p", vdst, *vdst);
+    if (vdst[0]) {
+        CHECK_OBJ(vdst[0]);
+        static_cast<ObjectBase *>(vdst[0])->decSysRef();
     }
+    *vdst = NULL;
+    //LOGE("SC_clearObject *");
+}
+static bool SC_isObject(RsAllocation vsrc) {
+    return vsrc != NULL;
 }
 
-static void SC_uploadToTexture(RsAllocation va, uint32_t baseMipLevel)
+
+
+static void SC_debugF(const char *s, float f) {
+    LOGE("%s %f, 0x%08x", s, f, *((int *) (&f)));
+}
+static void SC_debugFv2(const char *s, float f1, float f2) {
+    LOGE("%s {%f, %f}", s, f1, f2);
+}
+static void SC_debugFv3(const char *s, float f1, float f2, float f3) {
+    LOGE("%s {%f, %f, %f}", s, f1, f2, f3);
+}
+static void SC_debugFv4(const char *s, float f1, float f2, float f3, float f4) {
+    LOGE("%s {%f, %f, %f, %f}", s, f1, f2, f3, f4);
+}
+static void SC_debugD(const char *s, double d) {
+    LOGE("%s %f, 0x%08llx", s, d, *((long long *) (&d)));
+}
+static void SC_debugFM4v4(const char *s, const float *f) {
+    LOGE("%s {%f, %f, %f, %f", s, f[0], f[4], f[8], f[12]);
+    LOGE("%s  %f, %f, %f, %f", s, f[1], f[5], f[9], f[13]);
+    LOGE("%s  %f, %f, %f, %f", s, f[2], f[6], f[10], f[14]);
+    LOGE("%s  %f, %f, %f, %f}", s, f[3], f[7], f[11], f[15]);
+}
+static void SC_debugFM3v3(const char *s, const float *f) {
+    LOGE("%s {%f, %f, %f", s, f[0], f[3], f[6]);
+    LOGE("%s  %f, %f, %f", s, f[1], f[4], f[7]);
+    LOGE("%s  %f, %f, %f}",s, f[2], f[5], f[8]);
+}
+static void SC_debugFM2v2(const char *s, const float *f) {
+    LOGE("%s {%f, %f", s, f[0], f[2]);
+    LOGE("%s  %f, %f}",s, f[1], f[3]);
+}
+
+static void SC_debugI32(const char *s, int32_t i) {
+    LOGE("%s %i  0x%x", s, i, i);
+}
+static void SC_debugU32(const char *s, uint32_t i) {
+    LOGE("%s %u  0x%x", s, i, i);
+}
+static void SC_debugLL64(const char *s, long long ll) {
+    LOGE("%s %lld  0x%llx", s, ll, ll);
+}
+static void SC_debugULL64(const char *s, unsigned long long ll) {
+    LOGE("%s %llu  0x%llx", s, ll, ll);
+}
+
+static void SC_debugP(const char *s, const void *p) {
+    LOGE("%s %p", s, p);
+}
+
+static uint32_t SC_toClient2(int cmdID, void *data, int len)
 {
     GET_TLS();
-    rsi_AllocationUploadToTexture(rsc, va, false, baseMipLevel);
+    //LOGE("SC_toClient %i %i %i", cmdID, len);
+    return rsc->sendMessageToClient(data, cmdID, len, false);
 }
 
-static void SC_uploadToBufferObject(RsAllocation va)
+static uint32_t SC_toClient(int cmdID)
 {
     GET_TLS();
-    rsi_AllocationUploadToBufferObject(rsc, va);
+    //LOGE("SC_toClient %i", cmdID);
+    return rsc->sendMessageToClient(NULL, cmdID, 0, false);
 }
 
-static void SC_syncToGL(RsAllocation va)
+static uint32_t SC_toClientBlocking2(int cmdID, void *data, int len)
 {
     GET_TLS();
-    Allocation *a = static_cast<Allocation *>(va);
-
+    //LOGE("SC_toClientBlocking %i %i", cmdID, len);
+    return rsc->sendMessageToClient(data, cmdID, len, true);
 }
 
-static void SC_ClearColor(float r, float g, float b, float a)
-{
-    //LOGE("c %f %f %f %f", r, g, b, a);
-    GET_TLS();
-    sc->mEnviroment.mClearColor[0] = r;
-    sc->mEnviroment.mClearColor[1] = g;
-    sc->mEnviroment.mClearColor[2] = b;
-    sc->mEnviroment.mClearColor[3] = a;
-}
-
-static void SC_debugF(const char *s, float f)
-{
-    LOGE("%s %f", s, f);
-}
-
-static void SC_debugHexF(const char *s, float f)
-{
-    LOGE("%s 0x%x", s, *((int *) (&f)));
-}
-
-static void SC_debugI32(const char *s, int32_t i)
-{
-    LOGE("%s %i", s, i);
-}
-
-static void SC_debugHexI32(const char *s, int32_t i)
-{
-    LOGE("%s 0x%x", s, i);
-}
-
-static uint32_t SC_getWidth()
+static uint32_t SC_toClientBlocking(int cmdID)
 {
     GET_TLS();
-    return rsc->getWidth();
+    //LOGE("SC_toClientBlocking %i", cmdID);
+    return rsc->sendMessageToClient(NULL, cmdID, 0, true);
 }
 
-static uint32_t SC_getHeight()
+int SC_divsi3(int a, int b)
+{
+    return a / b;
+}
+
+int SC_getAllocation(const void *ptr)
 {
     GET_TLS();
-    return rsc->getHeight();
+    const Allocation *alloc = sc->ptrToAllocation(ptr);
+    return (int)alloc;
 }
 
-static uint32_t SC_colorFloatRGBAtoUNorm8(float r, float g, float b, float a)
+void SC_allocationMarkDirty(RsAllocation a)
 {
-    uint32_t c = 0;
-    c |= (uint32_t)(r * 255.f + 0.5f);
-    c |= ((uint32_t)(g * 255.f + 0.5f)) << 8;
-    c |= ((uint32_t)(b * 255.f + 0.5f)) << 16;
-    c |= ((uint32_t)(a * 255.f + 0.5f)) << 24;
-    return c;
+    Allocation *alloc = static_cast<Allocation *>(a);
+    alloc->sendDirty();
 }
 
-static uint32_t SC_colorFloatRGBAto565(float r, float g, float b)
-{
-    uint32_t ir = (uint32_t)(r * 255.f + 0.5f);
-    uint32_t ig = (uint32_t)(g * 255.f + 0.5f);
-    uint32_t ib = (uint32_t)(b * 255.f + 0.5f);
-    return rs888to565(ir, ig, ib);
-}
-
-static uint32_t SC_toClient(void *data, int cmdID, int len, int waitForSpace)
+void SC_ForEach(RsScript vs,
+                RsAllocation vin,
+                RsAllocation vout,
+                const void *usr)
 {
     GET_TLS();
-    return rsc->sendMessageToClient(data, cmdID, len, waitForSpace != 0);
+    const Allocation *ain = static_cast<const Allocation *>(vin);
+    Allocation *aout = static_cast<Allocation *>(vout);
+    Script *s = static_cast<Script *>(vs);
+    s->runForEach(rsc, ain, aout, usr);
 }
 
-static void SC_scriptCall(int scriptID)
+void SC_ForEach2(RsScript vs,
+                RsAllocation vin,
+                RsAllocation vout,
+                const void *usr,
+                const RsScriptCall *call)
 {
     GET_TLS();
-    rsc->runScript((Script *)scriptID, 0);
+    const Allocation *ain = static_cast<const Allocation *>(vin);
+    Allocation *aout = static_cast<Allocation *>(vout);
+    Script *s = static_cast<Script *>(vs);
+    s->runForEach(rsc, ain, aout, usr, call);
 }
 
-
 //////////////////////////////////////////////////////////////////////////////
 // Class implementation
 //////////////////////////////////////////////////////////////////////////////
 
-ScriptCState::SymbolTable_t ScriptCState::gSyms[] = {
-    // IO
-    { "loadI32", (void *)&SC_loadI32,
-        "int", "(int, int)" },
-    //{ "loadU32", (void *)&SC_loadU32, "unsigned int", "(int, int)" },
-    { "loadF", (void *)&SC_loadF,
-        "float", "(int, int)" },
-    { "loadArrayF", (void *)&SC_loadArrayF,
-        "float*", "(int, int)" },
-    { "loadArrayI32", (void *)&SC_loadArrayI32,
-        "int*", "(int, int)" },
-    { "loadVec4", (void *)&SC_loadVec4,
-        "void", "(int, int, float *)" },
-    { "loadMatrix", (void *)&SC_loadMatrix,
-        "void", "(int, int, float *)" },
-    { "storeI32", (void *)&SC_storeI32,
-        "void", "(int, int, int)" },
-    //{ "storeU32", (void *)&SC_storeU32, "void", "(int, int, unsigned int)" },
-    { "storeF", (void *)&SC_storeF,
-        "void", "(int, int, float)" },
-    { "storeVec4", (void *)&SC_storeVec4,
-        "void", "(int, int, float *)" },
-    { "storeMatrix", (void *)&SC_storeMatrix,
-        "void", "(int, int, float *)" },
-    { "loadSimpleMeshVerticesF", (void *)&SC_loadSimpleMeshVerticesF,
-        "float*", "(int, int)" },
-    { "updateSimpleMesh", (void *)&SC_updateSimpleMesh,
-        "void", "(int)" },
+// llvm name mangling ref
+//  <builtin-type> ::= v  # void
+//                 ::= b  # bool
+//                 ::= c  # char
+//                 ::= a  # signed char
+//                 ::= h  # unsigned char
+//                 ::= s  # short
+//                 ::= t  # unsigned short
+//                 ::= i  # int
+//                 ::= j  # unsigned int
+//                 ::= l  # long
+//                 ::= m  # unsigned long
+//                 ::= x  # long long, __int64
+//                 ::= y  # unsigned long long, __int64
+//                 ::= f  # float
+//                 ::= d  # double
 
-    // math
-    { "modf", (void *)&fmod,
-        "float", "(float, float)" },
-    { "abs", (void *)&abs,
-        "int", "(int)" },
-    { "absf", (void *)&fabsf,
-        "float", "(float)" },
-    { "sinf_fast", (void *)&SC_sinf_fast,
-        "float", "(float)" },
-    { "cosf_fast", (void *)&SC_cosf_fast,
-        "float", "(float)" },
-    { "sinf", (void *)&sinf,
-        "float", "(float)" },
-    { "cosf", (void *)&cosf,
-        "float", "(float)" },
-    { "asinf", (void *)&asinf,
-        "float", "(float)" },
-    { "acosf", (void *)&acosf,
-        "float", "(float)" },
-    { "atanf", (void *)&atanf,
-        "float", "(float)" },
-    { "atan2f", (void *)&atan2f,
-        "float", "(float, float)" },
-    { "fabsf", (void *)&fabsf,
-        "float", "(float)" },
-    { "randf", (void *)&SC_randf,
-        "float", "(float)" },
-    { "randf2", (void *)&SC_randf2,
-        "float", "(float, float)" },
-    { "floorf", (void *)&floorf,
-        "float", "(float)" },
-    { "fracf", (void *)&SC_fracf,
-        "float", "(float)" },
-    { "ceilf", (void *)&ceilf,
-        "float", "(float)" },
-    { "roundf", (void *)&SC_roundf,
-        "float", "(float)" },
-    { "expf", (void *)&expf,
-        "float", "(float)" },
-    { "logf", (void *)&logf,
-        "float", "(float)" },
-    { "powf", (void *)&powf,
-        "float", "(float, float)" },
-    { "maxf", (void *)&SC_maxf,
-        "float", "(float, float)" },
-    { "minf", (void *)&SC_minf,
-        "float", "(float, float)" },
-    { "sqrt", (void *)&sqrt,
-        "int", "(int)" },
-    { "sqrtf", (void *)&sqrtf,
-        "float", "(float)" },
-    { "sqr", (void *)&SC_sqr,
-        "int", "(int)" },
-    { "sqrf", (void *)&SC_sqrf,
-        "float", "(float)" },
-    { "sign", (void *)&SC_sign,
-        "int", "(int)" },
-    { "signf", (void *)&SC_signf,
-        "float", "(float)" },
-    { "clamp", (void *)&SC_clamp,
-        "int", "(int, int, int)" },
-    { "clampf", (void *)&SC_clampf,
-        "float", "(float, float, float)" },
-    { "distf2", (void *)&SC_distf2,
-        "float", "(float, float, float, float)" },
-    { "distf3", (void *)&SC_distf3,
-        "float", "(float, float, float, float, float, float)" },
-    { "magf2", (void *)&SC_magf2,
-        "float", "(float, float)" },
-    { "magf3", (void *)&SC_magf3,
-        "float", "(float, float, float)" },
-    { "radf", (void *)&SC_radf,
-        "float", "(float)" },
-    { "degf", (void *)&SC_degf,
-        "float", "(float)" },
-    { "lerpf", (void *)&SC_lerpf,
-        "float", "(float, float, float)" },
-    { "normf", (void *)&SC_normf,
-        "float", "(float, float, float)" },
-    { "mapf", (void *)&SC_mapf,
-        "float", "(float, float, float, float, float)" },
-    { "noisef", (void *)&SC_noisef,
-        "float", "(float)" },
-    { "noisef2", (void *)&SC_noisef2,
-        "float", "(float, float)" },
-    { "noisef3", (void *)&SC_noisef3,
-        "float", "(float, float, float)" },
-    { "turbulencef2", (void *)&SC_turbulencef2,
-        "float", "(float, float, float)" },
-    { "turbulencef3", (void *)&SC_turbulencef3,
-        "float", "(float, float, float, float)" },
+static ScriptCState::SymbolTable_t gSyms[] = {
+    { "__divsi3", (void *)&SC_divsi3 },
+
+    // allocation
+    { "_Z19rsAllocationGetDimX13rs_allocation", (void *)&SC_allocGetDimX },
+    { "_Z19rsAllocationGetDimY13rs_allocation", (void *)&SC_allocGetDimY },
+    { "_Z19rsAllocationGetDimZ13rs_allocation", (void *)&SC_allocGetDimZ },
+    { "_Z21rsAllocationGetDimLOD13rs_allocation", (void *)&SC_allocGetDimLOD },
+    { "_Z23rsAllocationGetDimFaces13rs_allocation", (void *)&SC_allocGetDimFaces },
+    { "_Z15rsGetAllocationPKv", (void *)&SC_getAllocation },
+
+    { "_Z14rsGetElementAt13rs_allocationj", (void *)&SC_getElementAtX },
+    { "_Z14rsGetElementAt13rs_allocationjj", (void *)&SC_getElementAtXY },
+    { "_Z14rsGetElementAt13rs_allocationjjj", (void *)&SC_getElementAtXYZ },
+
+    { "_Z11rsSetObjectP10rs_elementS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP10rs_element", (void *)&SC_clearObject },
+    { "_Z10rsIsObject10rs_element", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP7rs_typeS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP7rs_type", (void *)&SC_clearObject },
+    { "_Z10rsIsObject7rs_type", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP13rs_allocationS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP13rs_allocation", (void *)&SC_clearObject },
+    { "_Z10rsIsObject13rs_allocation", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP10rs_samplerS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP10rs_sampler", (void *)&SC_clearObject },
+    { "_Z10rsIsObject10rs_sampler", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP9rs_scriptS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP9rs_script", (void *)&SC_clearObject },
+    { "_Z10rsIsObject9rs_script", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP7rs_meshS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP7rs_mesh", (void *)&SC_clearObject },
+    { "_Z10rsIsObject7rs_mesh", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP19rs_program_fragmentS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP19rs_program_fragment", (void *)&SC_clearObject },
+    { "_Z10rsIsObject19rs_program_fragment", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP17rs_program_vertexS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP17rs_program_vertex", (void *)&SC_clearObject },
+    { "_Z10rsIsObject17rs_program_vertex", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP17rs_program_rasterS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP17rs_program_raster", (void *)&SC_clearObject },
+    { "_Z10rsIsObject17rs_program_raster", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP16rs_program_storeS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP16rs_program_store", (void *)&SC_clearObject },
+    { "_Z10rsIsObject16rs_program_store", (void *)&SC_isObject },
+
+    { "_Z11rsSetObjectP7rs_fontS_", (void *)&SC_setObject },
+    { "_Z13rsClearObjectP7rs_font", (void *)&SC_clearObject },
+    { "_Z10rsIsObject7rs_font", (void *)&SC_isObject },
+
+
+    { "_Z21rsAllocationMarkDirty13rs_allocation", (void *)&SC_allocationMarkDirty },
+
+
+    // Debug
+    { "_Z7rsDebugPKcf", (void *)&SC_debugF },
+    { "_Z7rsDebugPKcff", (void *)&SC_debugFv2 },
+    { "_Z7rsDebugPKcfff", (void *)&SC_debugFv3 },
+    { "_Z7rsDebugPKcffff", (void *)&SC_debugFv4 },
+    { "_Z7rsDebugPKcd", (void *)&SC_debugD },
+    { "_Z7rsDebugPKcPK12rs_matrix4x4", (void *)&SC_debugFM4v4 },
+    { "_Z7rsDebugPKcPK12rs_matrix3x3", (void *)&SC_debugFM3v3 },
+    { "_Z7rsDebugPKcPK12rs_matrix2x2", (void *)&SC_debugFM2v2 },
+    { "_Z7rsDebugPKci", (void *)&SC_debugI32 },
+    { "_Z7rsDebugPKcj", (void *)&SC_debugU32 },
+    // Both "long" and "unsigned long" need to be redirected to their
+    // 64-bit counterparts, since we have hacked Slang to use 64-bit
+    // for "long" on Arm (to be similar to Java).
+    { "_Z7rsDebugPKcl", (void *)&SC_debugLL64 },
+    { "_Z7rsDebugPKcm", (void *)&SC_debugULL64 },
+    { "_Z7rsDebugPKcx", (void *)&SC_debugLL64 },
+    { "_Z7rsDebugPKcy", (void *)&SC_debugULL64 },
+    { "_Z7rsDebugPKcPKv", (void *)&SC_debugP },
+
+    // RS Math
+    { "_Z6rsRandi", (void *)&SC_randi },
+    { "_Z6rsRandii", (void *)&SC_randi2 },
+    { "_Z6rsRandf", (void *)&SC_randf },
+    { "_Z6rsRandff", (void *)&SC_randf2 },
+    { "_Z6rsFracf", (void *)&SC_frac },
 
     // time
-    { "second", (void *)&SC_second,
-        "int", "()" },
-    { "minute", (void *)&SC_minute,
-        "int", "()" },
-    { "hour", (void *)&SC_hour,
-        "int", "()" },
-    { "day", (void *)&SC_day,
-        "int", "()" },
-    { "month", (void *)&SC_month,
-        "int", "()" },
-    { "year", (void *)&SC_year,
-        "int", "()" },
-    { "uptimeMillis", (void*)&SC_uptimeMillis,
-        "int", "()" },      // TODO: use long instead
-    { "startTimeMillis", (void*)&SC_startTimeMillis,
-        "int", "()" },      // TODO: use long instead
-    { "elapsedTimeMillis", (void*)&SC_elapsedTimeMillis,
-        "int", "()" },      // TODO: use long instead
+    { "_Z8rsSecondv", (void *)&SC_second },
+    { "_Z8rsMinutev", (void *)&SC_minute },
+    { "_Z6rsHourv", (void *)&SC_hour },
+    { "_Z5rsDayv", (void *)&SC_day },
+    { "_Z7rsMonthv", (void *)&SC_month },
+    { "_Z6rsYearv", (void *)&SC_year },
+    { "_Z14rsUptimeMillisv", (void*)&SC_uptimeMillis },
+    { "_Z13rsUptimeNanosv", (void*)&SC_uptimeNanos },
+    { "_Z7rsGetDtv", (void*)&SC_getDt },
 
-    // matrix
-    { "matrixLoadIdentity", (void *)&SC_matrixLoadIdentity,
-        "void", "(float *mat)" },
-    { "matrixLoadFloat", (void *)&SC_matrixLoadFloat,
-        "void", "(float *mat, float *f)" },
-    { "matrixLoadMat", (void *)&SC_matrixLoadMat,
-        "void", "(float *mat, float *newmat)" },
-    { "matrixLoadRotate", (void *)&SC_matrixLoadRotate,
-        "void", "(float *mat, float rot, float x, float y, float z)" },
-    { "matrixLoadScale", (void *)&SC_matrixLoadScale,
-        "void", "(float *mat, float x, float y, float z)" },
-    { "matrixLoadTranslate", (void *)&SC_matrixLoadTranslate,
-        "void", "(float *mat, float x, float y, float z)" },
-    { "matrixLoadMultiply", (void *)&SC_matrixLoadMultiply,
-        "void", "(float *mat, float *lhs, float *rhs)" },
-    { "matrixMultiply", (void *)&SC_matrixMultiply,
-        "void", "(float *mat, float *rhs)" },
-    { "matrixRotate", (void *)&SC_matrixRotate,
-        "void", "(float *mat, float rot, float x, float y, float z)" },
-    { "matrixScale", (void *)&SC_matrixScale,
-        "void", "(float *mat, float x, float y, float z)" },
-    { "matrixTranslate", (void *)&SC_matrixTranslate,
-        "void", "(float *mat, float x, float y, float z)" },
+    { "_Z14rsSendToClienti", (void *)&SC_toClient },
+    { "_Z14rsSendToClientiPKvj", (void *)&SC_toClient2 },
+    { "_Z22rsSendToClientBlockingi", (void *)&SC_toClientBlocking },
+    { "_Z22rsSendToClientBlockingiPKvj", (void *)&SC_toClientBlocking2 },
 
-    // vector
-    { "vec2Rand", (void *)&SC_vec2Rand,
-        "void", "(float *vec, float maxLen)" },
+    { "_Z9rsForEach9rs_script13rs_allocationS0_PKv", (void *)&SC_ForEach },
+    //{ "_Z9rsForEach9rs_script13rs_allocationS0_PKv", (void *)&SC_ForEach2 },
 
-    // vec3
-    { "vec3Norm", (void *)&SC_vec3Norm,
-        "void", "(struct vecF32_3_s *)" },
-    { "vec3Length", (void *)&SC_vec3Length,
-        "float", "(struct vecF32_3_s *)" },
-    { "vec3Add", (void *)&SC_vec3Add,
-        "void", "(struct vecF32_3_s *dest, struct vecF32_3_s *lhs, struct vecF32_3_s *rhs)" },
-    { "vec3Sub", (void *)&SC_vec3Sub,
-        "void", "(struct vecF32_3_s *dest, struct vecF32_3_s *lhs, struct vecF32_3_s *rhs)" },
-    { "vec3Cross", (void *)&SC_vec3Cross,
-        "void", "(struct vecF32_3_s *dest, struct vecF32_3_s *lhs, struct vecF32_3_s *rhs)" },
-    { "vec3Dot", (void *)&SC_vec3Dot,
-        "float", "(struct vecF32_3_s *lhs, struct vecF32_3_s *rhs)" },
-    { "vec3Scale", (void *)&SC_vec3Scale,
-        "void", "(struct vecF32_3_s *lhs, float scale)" },
+////////////////////////////////////////////////////////////////////
 
-    // vec4
-    { "vec4Norm", (void *)&SC_vec4Norm,
-        "void", "(struct vecF32_4_s *)" },
-    { "vec4Length", (void *)&SC_vec4Length,
-        "float", "(struct vecF32_4_s *)" },
-    { "vec4Add", (void *)&SC_vec4Add,
-        "void", "(struct vecF32_4_s *dest, struct vecF32_4_s *lhs, struct vecF32_4_s *rhs)" },
-    { "vec4Sub", (void *)&SC_vec4Sub,
-        "void", "(struct vecF32_4_s *dest, struct vecF32_4_s *lhs, struct vecF32_4_s *rhs)" },
-    { "vec4Dot", (void *)&SC_vec4Dot,
-        "float", "(struct vecF32_4_s *lhs, struct vecF32_4_s *rhs)" },
-    { "vec4Scale", (void *)&SC_vec4Scale,
-        "void", "(struct vecF32_4_s *lhs, float scale)" },
+    //{ "sinf_fast", (void *)&SC_sinf_fast },
+    //{ "cosf_fast", (void *)&SC_cosf_fast },
 
-    // context
-    { "bindProgramFragment", (void *)&SC_bindProgramFragment,
-        "void", "(int)" },
-    { "bindProgramFragmentStore", (void *)&SC_bindProgramFragmentStore,
-        "void", "(int)" },
-    { "bindProgramStore", (void *)&SC_bindProgramFragmentStore,
-        "void", "(int)" },
-    { "bindProgramVertex", (void *)&SC_bindProgramVertex,
-        "void", "(int)" },
-    { "bindSampler", (void *)&SC_bindSampler,
-        "void", "(int, int, int)" },
-    { "bindTexture", (void *)&SC_bindTexture,
-        "void", "(int, int, int)" },
-
-    // vp
-    { "vpLoadModelMatrix", (void *)&SC_vpLoadModelMatrix,
-        "void", "(void *)" },
-    { "vpLoadTextureMatrix", (void *)&SC_vpLoadTextureMatrix,
-        "void", "(void *)" },
-
-
-
-    // drawing
-    { "drawRect", (void *)&SC_drawRect,
-        "void", "(float x1, float y1, float x2, float y2, float z)" },
-    { "drawQuad", (void *)&SC_drawQuad,
-        "void", "(float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3, float x4, float y4, float z4)" },
-    { "drawQuadTexCoords", (void *)&SC_drawQuadTexCoords,
-        "void", "(float x1, float y1, float z1, float u1, float v1, float x2, float y2, float z2, float u2, float v2, float x3, float y3, float z3, float u3, float v3, float x4, float y4, float z4, float u4, float v4)" },
-    { "drawSprite", (void *)&SC_drawSprite,
-        "void", "(float x, float y, float z, float w, float h)" },
-    { "drawSpriteScreenspace", (void *)&SC_drawSpriteScreenspace,
-        "void", "(float x, float y, float z, float w, float h)" },
-    { "drawSpriteScreenspaceCropped", (void *)&SC_drawSpriteScreenspaceCropped,
-        "void", "(float x, float y, float z, float w, float h, float cx0, float cy0, float cx1, float cy1)" },
-    { "drawLine", (void *)&SC_drawLine,
-        "void", "(float x1, float y1, float z1, float x2, float y2, float z2)" },
-    { "drawPoint", (void *)&SC_drawPoint,
-        "void", "(float x1, float y1, float z1)" },
-    { "drawSimpleMesh", (void *)&SC_drawSimpleMesh,
-        "void", "(int ism)" },
-    { "drawSimpleMeshRange", (void *)&SC_drawSimpleMeshRange,
-        "void", "(int ism, int start, int len)" },
-
-
-    // misc
-    { "pfClearColor", (void *)&SC_ClearColor,
-        "void", "(float, float, float, float)" },
-    { "color", (void *)&SC_color,
-        "void", "(float, float, float, float)" },
-    { "hsb", (void *)&SC_hsb,
-        "void", "(float, float, float, float)" },
-    { "hsbToRgb", (void *)&SC_hsbToRgb,
-        "void", "(float, float, float, float*)" },
-    { "hsbToAbgr", (void *)&SC_hsbToAbgr,
-        "int", "(float, float, float, float)" },
-    { "ambient", (void *)&SC_ambient,
-        "void", "(float, float, float, float)" },
-    { "diffuse", (void *)&SC_diffuse,
-        "void", "(float, float, float, float)" },
-    { "specular", (void *)&SC_specular,
-        "void", "(float, float, float, float)" },
-    { "emission", (void *)&SC_emission,
-        "void", "(float, float, float, float)" },
-    { "shininess", (void *)&SC_shininess,
-        "void", "(float)" },
-    { "pointAttenuation", (void *)&SC_pointAttenuation,
-        "void", "(float, float, float)" },
-
-    { "uploadToTexture", (void *)&SC_uploadToTexture,
-        "void", "(int, int)" },
-    { "uploadToBufferObject", (void *)&SC_uploadToBufferObject,
-        "void", "(int)" },
-
-    { "syncToGL", (void *)&SC_syncToGL,
-        "void", "(int)" },
-
-    { "colorFloatRGBAtoUNorm8", (void *)&SC_colorFloatRGBAtoUNorm8,
-        "int", "(float, float, float, float)" },
-    { "colorFloatRGBto565", (void *)&SC_colorFloatRGBAto565,
-        "int", "(float, float, float)" },
-
-
-    { "getWidth", (void *)&SC_getWidth,
-        "int", "()" },
-    { "getHeight", (void *)&SC_getHeight,
-        "int", "()" },
-
-    { "sendToClient", (void *)&SC_toClient,
-        "int", "(void *data, int cmdID, int len, int waitForSpace)" },
-
-
-    { "debugF", (void *)&SC_debugF,
-        "void", "(void *, float)" },
-    { "debugI32", (void *)&SC_debugI32,
-        "void", "(void *, int)" },
-    { "debugHexF", (void *)&SC_debugHexF,
-        "void", "(void *, float)" },
-    { "debugHexI32", (void *)&SC_debugHexI32,
-        "void", "(void *, int)" },
-
-    { "scriptCall", (void *)&SC_scriptCall,
-        "void", "(int)" },
-
-
-    { NULL, NULL, NULL, NULL }
+    { NULL, NULL }
 };
 
 const ScriptCState::SymbolTable_t * ScriptCState::lookupSymbol(const char *sym)
@@ -1400,17 +565,3 @@
     return NULL;
 }
 
-void ScriptCState::appendDecls(String8 *str)
-{
-    ScriptCState::SymbolTable_t *syms = gSyms;
-    while (syms->mPtr) {
-        str->append(syms->mRet);
-        str->append(" ");
-        str->append(syms->mName);
-        str->append(syms->mParam);
-        str->append(";\n");
-        syms++;
-    }
-}
-
-
diff --git a/rsScriptC_LibCL.cpp b/rsScriptC_LibCL.cpp
new file mode 100644
index 0000000..ce8e7b2
--- /dev/null
+++ b/rsScriptC_LibCL.cpp
@@ -0,0 +1,314 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsContext.h"
+#include "rsScriptC.h"
+
+// Implements rs_cl.rsh
+
+
+using namespace android;
+using namespace android::renderscript;
+
+
+static float SC_acospi(float v) {
+    return acosf(v)/ M_PI;
+}
+
+static float SC_asinpi(float v) {
+    return asinf(v) / M_PI;
+}
+
+static float SC_atanpi(float v) {
+    return atanf(v) / M_PI;
+}
+
+static float SC_atan2pi(float y, float x) {
+    return atan2f(y, x) / M_PI;
+}
+
+static float SC_cospi(float v) {
+    return cosf(v * M_PI);
+}
+
+static float SC_exp10(float v) {
+    return pow(10.f, v);
+
+}
+
+static float SC_fract(float v, int *iptr) {
+    int i = (int)floor(v);
+    iptr[0] = i;
+    return fmin(v - i, 0x1.fffffep-1f);
+}
+
+static float SC_log2(float v) {
+    return log10(v) / log10(2.f);
+}
+
+static float SC_pown(float v, int p) {
+    return powf(v, (float)p);
+}
+
+static float SC_powr(float v, float p) {
+    return powf(v, p);
+}
+
+float SC_rootn(float v, int r) {
+    return pow(v, 1.f / r);
+}
+
+float SC_rsqrt(float v) {
+    return 1.f / sqrtf(v);
+}
+
+float SC_sincos(float v, float *cosptr) {
+    *cosptr = cosf(v);
+    return sinf(v);
+}
+
+static float SC_sinpi(float v) {
+    return sinf(v * M_PI);
+}
+
+static float SC_tanpi(float v) {
+    return tanf(v * M_PI);
+}
+
+    //{ "logb", (void *)& },
+    //{ "mad", (void *)& },
+    //{ "nan", (void *)& },
+    //{ "tgamma", (void *)& },
+
+//////////////////////////////////////////////////////////////////////////////
+// Integer
+//////////////////////////////////////////////////////////////////////////////
+
+
+static uint32_t SC_abs_i32(int32_t v) {return abs(v);}
+static uint16_t SC_abs_i16(int16_t v) {return (uint16_t)abs(v);}
+static uint8_t SC_abs_i8(int8_t v) {return (uint8_t)abs(v);}
+
+static uint32_t SC_clz_u32(uint32_t v) {return __builtin_clz(v);}
+static uint16_t SC_clz_u16(uint16_t v) {return (uint16_t)__builtin_clz(v);}
+static uint8_t SC_clz_u8(uint8_t v) {return (uint8_t)__builtin_clz(v);}
+static int32_t SC_clz_i32(int32_t v) {return (int32_t)__builtin_clz((uint32_t)v);}
+static int16_t SC_clz_i16(int16_t v) {return (int16_t)__builtin_clz(v);}
+static int8_t SC_clz_i8(int8_t v) {return (int8_t)__builtin_clz(v);}
+
+static uint32_t SC_max_u32(uint32_t v, uint32_t v2) {return rsMax(v, v2);}
+static uint16_t SC_max_u16(uint16_t v, uint16_t v2) {return rsMax(v, v2);}
+static uint8_t SC_max_u8(uint8_t v, uint8_t v2) {return rsMax(v, v2);}
+static int32_t SC_max_i32(int32_t v, int32_t v2) {return rsMax(v, v2);}
+static int16_t SC_max_i16(int16_t v, int16_t v2) {return rsMax(v, v2);}
+static int8_t SC_max_i8(int8_t v, int8_t v2) {return rsMax(v, v2);}
+
+static uint32_t SC_min_u32(uint32_t v, uint32_t v2) {return rsMin(v, v2);}
+static uint16_t SC_min_u16(uint16_t v, uint16_t v2) {return rsMin(v, v2);}
+static uint8_t SC_min_u8(uint8_t v, uint8_t v2) {return rsMin(v, v2);}
+static int32_t SC_min_i32(int32_t v, int32_t v2) {return rsMin(v, v2);}
+static int16_t SC_min_i16(int16_t v, int16_t v2) {return rsMin(v, v2);}
+static int8_t SC_min_i8(int8_t v, int8_t v2) {return rsMin(v, v2);}
+
+//////////////////////////////////////////////////////////////////////////////
+// Float util
+//////////////////////////////////////////////////////////////////////////////
+
+static float SC_clamp_f32(float amount, float low, float high)
+{
+    return amount < low ? low : (amount > high ? high : amount);
+}
+
+static float SC_degrees(float radians)
+{
+    return radians * (180.f / M_PI);
+}
+
+static float SC_max_f32(float v, float v2)
+{
+    return rsMax(v, v2);
+}
+
+static float SC_min_f32(float v, float v2)
+{
+    return rsMin(v, v2);
+}
+
+static float SC_mix_f32(float start, float stop, float amount)
+{
+    //LOGE("lerpf %f  %f  %f", start, stop, amount);
+    return start + (stop - start) * amount;
+}
+
+static float SC_radians(float degrees)
+{
+    return degrees * (M_PI / 180.f);
+}
+
+static float SC_step_f32(float edge, float v)
+{
+    if (v < edge) return 0.f;
+    return 1.f;
+}
+
+static float SC_sign_f32(float value)
+{
+    if (value > 0) return 1.f;
+    if (value < 0) return -1.f;
+    return value;
+}
+
+
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Class implementation
+//////////////////////////////////////////////////////////////////////////////
+
+// llvm name mangling ref
+//  <builtin-type> ::= v  # void
+//                 ::= b  # bool
+//                 ::= c  # char
+//                 ::= a  # signed char
+//                 ::= h  # unsigned char
+//                 ::= s  # short
+//                 ::= t  # unsigned short
+//                 ::= i  # int
+//                 ::= j  # unsigned int
+//                 ::= l  # long
+//                 ::= m  # unsigned long
+//                 ::= x  # long long, __int64
+//                 ::= y  # unsigned long long, __int64
+//                 ::= f  # float
+//                 ::= d  # double
+
+static ScriptCState::SymbolTable_t gSyms[] = {
+    // OpenCL math
+    { "_Z4acosf", (void *)&acosf },
+    { "_Z5acoshf", (void *)&acoshf },
+    { "_Z6acospif", (void *)&SC_acospi },
+    { "_Z4asinf", (void *)&asinf },
+    { "_Z5asinhf", (void *)&asinhf },
+    { "_Z6asinpif", (void *)&SC_asinpi },
+    { "_Z4atanf", (void *)&atanf },
+    { "_Z5atan2f", (void *)&atan2f },
+    { "_Z6atanpif", (void *)&SC_atanpi },
+    { "_Z7atan2pif", (void *)&SC_atan2pi },
+    { "_Z4cbrtf", (void *)&cbrtf },
+    { "_Z4ceilf", (void *)&ceilf },
+    { "_Z8copysignff", (void *)&copysignf },
+    { "_Z3cosf", (void *)&cosf },
+    { "_Z4coshf", (void *)&coshf },
+    { "_Z5cospif", (void *)&SC_cospi },
+    { "_Z4erfcf", (void *)&erfcf },
+    { "_Z3erff", (void *)&erff },
+    { "_Z3expf", (void *)&expf },
+    { "_Z4exp2f", (void *)&exp2f },
+    { "_Z5exp10f", (void *)&SC_exp10 },
+    { "_Z5expm1f", (void *)&expm1f },
+    { "_Z4fabsf", (void *)&fabsf },
+    { "_Z4fdimff", (void *)&fdimf },
+    { "_Z5floorf", (void *)&floorf },
+    { "_Z3fmafff", (void *)&fmaf },
+    { "_Z4fmaxff", (void *)&fmaxf },
+    { "_Z4fminff", (void *)&fminf },  // float fmin(float, float)
+    { "_Z4fmodff", (void *)&fmodf },
+    { "_Z5fractfPf", (void *)&SC_fract },
+    { "_Z5frexpfPi", (void *)&frexpf },
+    { "_Z5hypotff", (void *)&hypotf },
+    { "_Z5ilogbf", (void *)&ilogbf },
+    { "_Z5ldexpfi", (void *)&ldexpf },
+    { "_Z6lgammaf", (void *)&lgammaf },
+    { "_Z3logf", (void *)&logf },
+    { "_Z4log2f", (void *)&SC_log2 },
+    { "_Z5log10f", (void *)&log10f },
+    { "_Z5log1pf", (void *)&log1pf },
+    //{ "logb", (void *)& },
+    //{ "mad", (void *)& },
+    { "modf", (void *)&modff },
+    //{ "nan", (void *)& },
+    { "_Z9nextafterff", (void *)&nextafterf },
+    { "_Z3powff", (void *)&powf },
+    { "_Z4pownfi", (void *)&SC_pown },
+    { "_Z4powrff", (void *)&SC_powr },
+    { "_Z9remainderff", (void *)&remainderf },
+    { "remquo", (void *)&remquof },
+    { "_Z4rintf", (void *)&rintf },
+    { "_Z5rootnfi", (void *)&SC_rootn },
+    { "_Z5roundf", (void *)&roundf },
+    { "_Z5rsqrtf", (void *)&SC_rsqrt },
+    { "_Z3sinf", (void *)&sinf },
+    { "sincos", (void *)&SC_sincos },
+    { "_Z4sinhf", (void *)&sinhf },
+    { "_Z5sinpif", (void *)&SC_sinpi },
+    { "_Z4sqrtf", (void *)&sqrtf },
+    { "_Z3tanf", (void *)&tanf },
+    { "_Z4tanhf", (void *)&tanhf },
+    { "_Z5tanpif", (void *)&SC_tanpi },
+    //{ "tgamma", (void *)& },
+    { "_Z5truncf", (void *)&truncf },
+
+    // OpenCL Int
+    { "_Z3absi", (void *)&SC_abs_i32 },
+    { "_Z3abss", (void *)&SC_abs_i16 },
+    { "_Z3absc", (void *)&SC_abs_i8 },
+    { "_Z3clzj", (void *)&SC_clz_u32 },
+    { "_Z3clzt", (void *)&SC_clz_u16 },
+    { "_Z3clzh", (void *)&SC_clz_u8 },
+    { "_Z3clzi", (void *)&SC_clz_i32 },
+    { "_Z3clzs", (void *)&SC_clz_i16 },
+    { "_Z3clzc", (void *)&SC_clz_i8 },
+    { "_Z3maxjj", (void *)&SC_max_u32 },
+    { "_Z3maxtt", (void *)&SC_max_u16 },
+    { "_Z3maxhh", (void *)&SC_max_u8 },
+    { "_Z3maxii", (void *)&SC_max_i32 },
+    { "_Z3maxss", (void *)&SC_max_i16 },
+    { "_Z3maxcc", (void *)&SC_max_i8 },
+    { "_Z3minjj", (void *)&SC_min_u32 },
+    { "_Z3mintt", (void *)&SC_min_u16 },
+    { "_Z3minhh", (void *)&SC_min_u8 },
+    { "_Z3minii", (void *)&SC_min_i32 },
+    { "_Z3minss", (void *)&SC_min_i16 },
+    { "_Z3mincc", (void *)&SC_min_i8 },
+
+    // OpenCL 6.11.4
+    { "_Z5clampfff", (void *)&SC_clamp_f32 },
+    { "_Z7degreesf", (void *)&SC_degrees },
+    { "_Z3maxff", (void *)&SC_max_f32 },
+    { "_Z3minff", (void *)&SC_min_f32 },
+    { "_Z3mixfff", (void *)&SC_mix_f32 },
+    { "_Z7radiansf", (void *)&SC_radians },
+    { "_Z4stepff", (void *)&SC_step_f32 },
+    //{ "smoothstep", (void *)& },
+    { "_Z4signf", (void *)&SC_sign_f32 },
+
+    { NULL, NULL }
+};
+
+const ScriptCState::SymbolTable_t * ScriptCState::lookupSymbolCL(const char *sym)
+{
+    ScriptCState::SymbolTable_t *syms = gSyms;
+
+    while (syms->mPtr) {
+        if (!strcmp(syms->mName, sym)) {
+            return syms;
+        }
+        syms++;
+    }
+    return NULL;
+}
+
diff --git a/rsScriptC_LibGL.cpp b/rsScriptC_LibGL.cpp
new file mode 100644
index 0000000..b991cab
--- /dev/null
+++ b/rsScriptC_LibGL.cpp
@@ -0,0 +1,505 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsContext.h"
+#include "rsScriptC.h"
+#include "rsMatrix.h"
+
+#include "utils/Timers.h"
+
+#define GL_GLEXT_PROTOTYPES
+
+#include <GLES/gl.h>
+#include <GLES/glext.h>
+#include <GLES2/gl2.h>
+#include <GLES2/gl2ext.h>
+
+#include <time.h>
+
+using namespace android;
+using namespace android::renderscript;
+
+#define GET_TLS()  Context::ScriptTLSStruct * tls = \
+    (Context::ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \
+    Context * rsc = tls->mContext; \
+    ScriptC * sc = (ScriptC *) tls->mScript
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Context
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_bindTexture(RsProgramFragment vpf, uint32_t slot, RsAllocation va)
+{
+    CHECK_OBJ_OR_NULL(va);
+    CHECK_OBJ(vpf);
+    GET_TLS();
+    rsi_ProgramBindTexture(rsc,
+                           static_cast<ProgramFragment *>(vpf),
+                           slot,
+                           static_cast<Allocation *>(va));
+
+}
+
+static void SC_bindSampler(RsProgramFragment vpf, uint32_t slot, RsSampler vs)
+{
+    CHECK_OBJ_OR_NULL(vs);
+    CHECK_OBJ(vpf);
+    GET_TLS();
+    rsi_ProgramBindSampler(rsc,
+                           static_cast<ProgramFragment *>(vpf),
+                           slot,
+                           static_cast<Sampler *>(vs));
+
+}
+
+static void SC_bindProgramStore(RsProgramStore pfs)
+{
+    CHECK_OBJ_OR_NULL(pfs);
+    GET_TLS();
+    rsi_ContextBindProgramStore(rsc, pfs);
+}
+
+static void SC_bindProgramFragment(RsProgramFragment pf)
+{
+    CHECK_OBJ_OR_NULL(pf);
+    GET_TLS();
+    rsi_ContextBindProgramFragment(rsc, pf);
+}
+
+static void SC_bindProgramVertex(RsProgramVertex pv)
+{
+    CHECK_OBJ_OR_NULL(pv);
+    GET_TLS();
+    rsi_ContextBindProgramVertex(rsc, pv);
+}
+
+static void SC_bindProgramRaster(RsProgramRaster pv)
+{
+    CHECK_OBJ_OR_NULL(pv);
+    GET_TLS();
+    rsi_ContextBindProgramRaster(rsc, pv);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// VP
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_vpLoadProjectionMatrix(const rsc_Matrix *m)
+{
+    GET_TLS();
+    rsc->getVertex()->setProjectionMatrix(rsc, m);
+}
+
+static void SC_vpLoadModelMatrix(const rsc_Matrix *m)
+{
+    GET_TLS();
+    rsc->getVertex()->setModelviewMatrix(rsc, m);
+}
+
+static void SC_vpLoadTextureMatrix(const rsc_Matrix *m)
+{
+    GET_TLS();
+    rsc->getVertex()->setTextureMatrix(rsc, m);
+}
+
+
+static void SC_pfConstantColor(RsProgramFragment vpf, float r, float g, float b, float a)
+{
+    GET_TLS();
+    CHECK_OBJ(vpf);
+    ProgramFragment *pf = static_cast<ProgramFragment *>(vpf);
+    pf->setConstantColor(rsc, r, g, b, a);
+}
+
+static void SC_vpGetProjectionMatrix(rsc_Matrix *m)
+{
+    GET_TLS();
+    rsc->getVertex()->getProjectionMatrix(rsc, m);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Drawing
+//////////////////////////////////////////////////////////////////////////////
+
+static void SC_drawQuadTexCoords(float x1, float y1, float z1,
+                                 float u1, float v1,
+                                 float x2, float y2, float z2,
+                                 float u2, float v2,
+                                 float x3, float y3, float z3,
+                                 float u3, float v3,
+                                 float x4, float y4, float z4,
+                                 float u4, float v4)
+{
+    GET_TLS();
+    if (!rsc->setupCheck()) {
+        return;
+    }
+
+    //LOGE("Quad");
+    //LOGE("%4.2f, %4.2f, %4.2f", x1, y1, z1);
+    //LOGE("%4.2f, %4.2f, %4.2f", x2, y2, z2);
+    //LOGE("%4.2f, %4.2f, %4.2f", x3, y3, z3);
+    //LOGE("%4.2f, %4.2f, %4.2f", x4, y4, z4);
+
+    float vtx[] = {x1,y1,z1, x2,y2,z2, x3,y3,z3, x4,y4,z4};
+    const float tex[] = {u1,v1, u2,v2, u3,v3, u4,v4};
+
+    VertexArray va;
+    va.add(GL_FLOAT, 3, 12, false, (uint32_t)vtx, "ATTRIB_position");
+    va.add(GL_FLOAT, 2, 8, false, (uint32_t)tex, "ATTRIB_texture0");
+    va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
+
+    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+}
+
+static void SC_drawQuad(float x1, float y1, float z1,
+                        float x2, float y2, float z2,
+                        float x3, float y3, float z3,
+                        float x4, float y4, float z4)
+{
+    SC_drawQuadTexCoords(x1, y1, z1, 0, 1,
+                         x2, y2, z2, 1, 1,
+                         x3, y3, z3, 1, 0,
+                         x4, y4, z4, 0, 0);
+}
+
+static void SC_drawSpriteScreenspace(float x, float y, float z, float w, float h)
+{
+    GET_TLS();
+    ObjectBaseRef<const ProgramVertex> tmp(rsc->getVertex());
+    rsc->setVertex(rsc->getDefaultProgramVertex());
+    //rsc->setupCheck();
+
+    //GLint crop[4] = {0, h, w, -h};
+
+    float sh = rsc->getHeight();
+
+    SC_drawQuad(x,   sh - y,     z,
+                x+w, sh - y,     z,
+                x+w, sh - (y+h), z,
+                x,   sh - (y+h), z);
+    rsc->setVertex((ProgramVertex *)tmp.get());
+}
+/*
+static void SC_drawSprite(float x, float y, float z, float w, float h)
+{
+    GET_TLS();
+    float vin[3] = {x, y, z};
+    float vout[4];
+
+    //LOGE("ds  in %f %f %f", x, y, z);
+    rsc->getVertex()->transformToScreen(rsc, vout, vin);
+    //LOGE("ds  out %f %f %f %f", vout[0], vout[1], vout[2], vout[3]);
+    vout[0] /= vout[3];
+    vout[1] /= vout[3];
+    vout[2] /= vout[3];
+
+    vout[0] *= rsc->getWidth() / 2;
+    vout[1] *= rsc->getHeight() / 2;
+    vout[0] += rsc->getWidth() / 2;
+    vout[1] += rsc->getHeight() / 2;
+
+    vout[0] -= w/2;
+    vout[1] -= h/2;
+
+    //LOGE("ds  out2 %f %f %f", vout[0], vout[1], vout[2]);
+
+    // U, V, W, H
+    SC_drawSpriteScreenspace(vout[0], vout[1], z, h, w);
+    //rsc->setupCheck();
+}
+*/
+
+static void SC_drawRect(float x1, float y1,
+                        float x2, float y2, float z)
+{
+    //LOGE("SC_drawRect %f,%f  %f,%f  %f", x1, y1, x2, y2, z);
+    SC_drawQuad(x1, y2, z,
+                x2, y2, z,
+                x2, y1, z,
+                x1, y1, z);
+}
+
+static void SC_drawMesh(RsMesh vsm)
+{
+    CHECK_OBJ(vsm);
+    GET_TLS();
+    Mesh *sm = static_cast<Mesh *>(vsm);
+    if (!rsc->setupCheck()) {
+        return;
+    }
+    sm->render(rsc);
+}
+
+static void SC_drawMeshPrimitive(RsMesh vsm, uint32_t primIndex)
+{
+    CHECK_OBJ(vsm);
+    GET_TLS();
+    Mesh *sm = static_cast<Mesh *>(vsm);
+    if (!rsc->setupCheck()) {
+        return;
+    }
+    sm->renderPrimitive(rsc, primIndex);
+}
+
+static void SC_drawMeshPrimitiveRange(RsMesh vsm, uint32_t primIndex, uint32_t start, uint32_t len)
+{
+    CHECK_OBJ(vsm);
+    GET_TLS();
+    Mesh *sm = static_cast<Mesh *>(vsm);
+    if (!rsc->setupCheck()) {
+        return;
+    }
+    sm->renderPrimitiveRange(rsc, primIndex, start, len);
+}
+
+static void SC_meshComputeBoundingBox(RsMesh vsm, float *minX, float *minY, float *minZ,
+                                                     float *maxX, float *maxY, float *maxZ)
+{
+    CHECK_OBJ(vsm);
+    GET_TLS();
+    Mesh *sm = static_cast<Mesh *>(vsm);
+    sm->computeBBox();
+    *minX = sm->mBBoxMin[0];
+    *minY = sm->mBBoxMin[1];
+    *minZ = sm->mBBoxMin[2];
+    *maxX = sm->mBBoxMax[0];
+    *maxY = sm->mBBoxMax[1];
+    *maxZ = sm->mBBoxMax[2];
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//////////////////////////////////////////////////////////////////////////////
+
+
+static void SC_color(float r, float g, float b, float a)
+{
+    GET_TLS();
+    ProgramFragment *pf = (ProgramFragment *)rsc->getFragment();
+    pf->setConstantColor(rsc, r, g, b, a);
+}
+
+static void SC_uploadToTexture2(RsAllocation va, uint32_t baseMipLevel)
+{
+    CHECK_OBJ(va);
+    GET_TLS();
+    rsi_AllocationUploadToTexture(rsc, va, false, baseMipLevel);
+}
+static void SC_uploadToTexture(RsAllocation va)
+{
+    CHECK_OBJ(va);
+    GET_TLS();
+    rsi_AllocationUploadToTexture(rsc, va, false, 0);
+}
+
+static void SC_uploadToBufferObject(RsAllocation va)
+{
+    CHECK_OBJ(va);
+    GET_TLS();
+    rsi_AllocationUploadToBufferObject(rsc, va);
+}
+
+static void SC_ClearColor(float r, float g, float b, float a)
+{
+    GET_TLS();
+    rsc->setupProgramStore();
+
+    glClearColor(r, g, b, a);
+    glClear(GL_COLOR_BUFFER_BIT);
+}
+
+static void SC_ClearDepth(float v)
+{
+    GET_TLS();
+    rsc->setupProgramStore();
+
+    glClearDepthf(v);
+    glClear(GL_DEPTH_BUFFER_BIT);
+}
+
+static uint32_t SC_getWidth()
+{
+    GET_TLS();
+    return rsc->getWidth();
+}
+
+static uint32_t SC_getHeight()
+{
+    GET_TLS();
+    return rsc->getHeight();
+}
+
+static void SC_DrawTextAlloc(RsAllocation va, int x, int y)
+{
+    CHECK_OBJ(va);
+    GET_TLS();
+    Allocation *alloc = static_cast<Allocation *>(va);
+    const char *text = (const char *)alloc->getPtr();
+    size_t allocSize = alloc->getType()->getSizeBytes();
+    rsc->mStateFont.renderText(text, allocSize, x, y);
+}
+
+static void SC_DrawText(const char *text, int x, int y)
+{
+    GET_TLS();
+    size_t textLen = strlen(text);
+    rsc->mStateFont.renderText(text, textLen, x, y);
+}
+
+static void SC_setMetrics(Font::Rect *metrics,
+                          int32_t *left, int32_t *right,
+                          int32_t *top, int32_t *bottom)
+{
+    if(left) {
+        *left = metrics->left;
+    }
+    if(right) {
+        *right = metrics->right;
+    }
+    if(top) {
+        *top = metrics->top;
+    }
+    if(bottom) {
+        *bottom = metrics->bottom;
+    }
+}
+
+static void SC_MeasureTextAlloc(RsAllocation va,
+                                int32_t *left, int32_t *right,
+                                int32_t *top, int32_t *bottom)
+{
+    CHECK_OBJ(va);
+    GET_TLS();
+    Allocation *alloc = static_cast<Allocation *>(va);
+    const char *text = (const char *)alloc->getPtr();
+    size_t textLen = alloc->getType()->getSizeBytes();
+    Font::Rect metrics;
+    rsc->mStateFont.measureText(text, textLen, &metrics);
+    SC_setMetrics(&metrics, left, right, top, bottom);
+}
+
+static void SC_MeasureText(const char *text,
+                           int32_t *left, int32_t *right,
+                           int32_t *top, int32_t *bottom)
+{
+    GET_TLS();
+    size_t textLen = strlen(text);
+    Font::Rect metrics;
+    rsc->mStateFont.measureText(text, textLen, &metrics);
+    SC_setMetrics(&metrics, left, right, top, bottom);
+}
+
+static void SC_BindFont(RsFont font)
+{
+    CHECK_OBJ(font);
+    GET_TLS();
+    rsi_ContextBindFont(rsc, font);
+}
+
+static void SC_FontColor(float r, float g, float b, float a)
+{
+    GET_TLS();
+    rsc->mStateFont.setFontColor(r, g, b, a);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Class implementation
+//////////////////////////////////////////////////////////////////////////////
+
+// llvm name mangling ref
+//  <builtin-type> ::= v  # void
+//                 ::= b  # bool
+//                 ::= c  # char
+//                 ::= a  # signed char
+//                 ::= h  # unsigned char
+//                 ::= s  # short
+//                 ::= t  # unsigned short
+//                 ::= i  # int
+//                 ::= j  # unsigned int
+//                 ::= l  # long
+//                 ::= m  # unsigned long
+//                 ::= x  # long long, __int64
+//                 ::= y  # unsigned long long, __int64
+//                 ::= f  # float
+//                 ::= d  # double
+
+static ScriptCState::SymbolTable_t gSyms[] = {
+    { "_Z22rsgBindProgramFragment19rs_program_fragment", (void *)&SC_bindProgramFragment },
+    { "_Z19rsgBindProgramStore16rs_program_store", (void *)&SC_bindProgramStore },
+    { "_Z20rsgBindProgramVertex17rs_program_vertex", (void *)&SC_bindProgramVertex },
+    { "_Z20rsgBindProgramRaster17rs_program_raster", (void *)&SC_bindProgramRaster },
+    { "_Z14rsgBindSampler19rs_program_fragmentj10rs_sampler", (void *)&SC_bindSampler },
+    { "_Z14rsgBindTexture19rs_program_fragmentj13rs_allocation", (void *)&SC_bindTexture },
+
+    { "_Z36rsgProgramVertexLoadProjectionMatrixPK12rs_matrix4x4", (void *)&SC_vpLoadProjectionMatrix },
+    { "_Z31rsgProgramVertexLoadModelMatrixPK12rs_matrix4x4", (void *)&SC_vpLoadModelMatrix },
+    { "_Z33rsgProgramVertexLoadTextureMatrixPK12rs_matrix4x4", (void *)&SC_vpLoadTextureMatrix },
+
+    { "_Z35rsgProgramVertexGetProjectionMatrixP12rs_matrix4x4", (void *)&SC_vpGetProjectionMatrix },
+
+    { "_Z31rsgProgramFragmentConstantColor19rs_program_fragmentffff", (void *)&SC_pfConstantColor },
+
+    { "_Z11rsgGetWidthv", (void *)&SC_getWidth },
+    { "_Z12rsgGetHeightv", (void *)&SC_getHeight },
+
+    { "_Z18rsgUploadToTexture13rs_allocationj", (void *)&SC_uploadToTexture2 },
+    { "_Z18rsgUploadToTexture13rs_allocation", (void *)&SC_uploadToTexture },
+    { "_Z23rsgUploadToBufferObject13rs_allocation", (void *)&SC_uploadToBufferObject },
+
+    { "_Z11rsgDrawRectfffff", (void *)&SC_drawRect },
+    { "_Z11rsgDrawQuadffffffffffff", (void *)&SC_drawQuad },
+    { "_Z20rsgDrawQuadTexCoordsffffffffffffffffffff", (void *)&SC_drawQuadTexCoords },
+    { "_Z24rsgDrawSpriteScreenspacefffff", (void *)&SC_drawSpriteScreenspace },
+
+    { "_Z11rsgDrawMesh7rs_mesh", (void *)&SC_drawMesh },
+    { "_Z11rsgDrawMesh7rs_meshj", (void *)&SC_drawMeshPrimitive },
+    { "_Z11rsgDrawMesh7rs_meshjjj", (void *)&SC_drawMeshPrimitiveRange },
+    { "_Z25rsgMeshComputeBoundingBox7rs_meshPfS0_S0_S0_S0_S0_", (void *)&SC_meshComputeBoundingBox },
+
+    { "_Z13rsgClearColorffff", (void *)&SC_ClearColor },
+    { "_Z13rsgClearDepthf", (void *)&SC_ClearDepth },
+
+    { "_Z11rsgDrawTextPKcii", (void *)&SC_DrawText },
+    { "_Z11rsgDrawText13rs_allocationii", (void *)&SC_DrawTextAlloc },
+    { "_Z14rsgMeasureTextPKcPiS1_S1_S1_", (void *)&SC_MeasureText },
+    { "_Z14rsgMeasureText13rs_allocationPiS0_S0_S0_", (void *)&SC_MeasureTextAlloc },
+
+    { "_Z11rsgBindFont7rs_font", (void *)&SC_BindFont },
+    { "_Z12rsgFontColorffff", (void *)&SC_FontColor },
+
+    // misc
+    { "_Z5colorffff", (void *)&SC_color },
+
+    { NULL, NULL }
+};
+
+const ScriptCState::SymbolTable_t * ScriptCState::lookupSymbolGL(const char *sym)
+{
+    ScriptCState::SymbolTable_t *syms = gSyms;
+
+    while (syms->mPtr) {
+        if (!strcmp(syms->mName, sym)) {
+            return syms;
+        }
+        syms++;
+    }
+    return NULL;
+}
+
diff --git a/rsShaderCache.cpp b/rsShaderCache.cpp
index 4711d1b..45f6207 100644
--- a/rsShaderCache.cpp
+++ b/rsShaderCache.cpp
@@ -14,10 +14,14 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
-
 #include <GLES/gl.h>
 #include <GLES2/gl2.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#endif //ANDROID_RS_BUILD_FOR_HOST
 
 using namespace android;
 using namespace android::renderscript;
@@ -25,20 +29,15 @@
 
 ShaderCache::ShaderCache()
 {
-    mEntryCount = 0;
-    mEntryAllocationCount = 16;
-    mEntries = (entry_t *)calloc(mEntryAllocationCount, sizeof(entry_t));
+    mEntries.setCapacity(16);
 }
 
 ShaderCache::~ShaderCache()
 {
-    for (uint32_t ct=0; ct < mEntryCount; ct++) {
-        glDeleteProgram(mEntries[ct].program);
+    for (uint32_t ct=0; ct < mEntries.size(); ct++) {
+        glDeleteProgram(mEntries[ct]->program);
+        free(mEntries[ct]);
     }
-
-    mEntryCount = 0;
-    mEntryAllocationCount = 0;
-    free(mEntries);
 }
 
 bool ShaderCache::lookup(Context *rsc, ProgramVertex *vtx, ProgramFragment *frag)
@@ -49,61 +48,46 @@
     if (!frag->getShaderID()) {
         frag->loadShader(rsc);
     }
+
+    // Don't try to cache if shaders failed to load
+    if(!vtx->getShaderID() || !frag->getShaderID()) {
+        return false;
+    }
     //LOGV("ShaderCache lookup  vtx %i, frag %i", vtx->getShaderID(), frag->getShaderID());
+    uint32_t entryCount = mEntries.size();
+    for(uint32_t ct = 0; ct < entryCount; ct ++) {
+        if ((mEntries[ct]->vtx == vtx->getShaderID()) &&
+            (mEntries[ct]->frag == frag->getShaderID())) {
 
-    for (uint32_t ct=0; ct < mEntryCount; ct++) {
-        if ((mEntries[ct].vtx == vtx->getShaderID()) &&
-            (mEntries[ct].frag == frag->getShaderID())) {
-
-            //LOGV("SC using program %i", mEntries[ct].program);
-            glUseProgram(mEntries[ct].program);
-            mCurrent = &mEntries[ct];
+            //LOGV("SC using program %i", mEntries[ct]->program);
+            glUseProgram(mEntries[ct]->program);
+            mCurrent = mEntries[ct];
             //LOGV("ShaderCache hit, using %i", ct);
             rsc->checkError("ShaderCache::lookup (hit)");
             return true;
         }
     }
-    // Not in cache, add it.
 
-    if (mEntryAllocationCount == mEntryCount) {
-        // Out of space, make some.
-        mEntryAllocationCount *= 2;
-        entry_t *e = (entry_t *)calloc(mEntryAllocationCount, sizeof(entry_t));
-        if (!e) {
-            LOGE("Out of memory for ShaderCache::lookup");
-            return false;
-        }
-        memcpy(e, mEntries, sizeof(entry_t) * mEntryCount);
-        free(mEntries);
-        mEntries = e;
-    }
-
-    //LOGV("ShaderCache miss, using %i", mEntryCount);
+    //LOGV("ShaderCache miss");
     //LOGE("e0 %x", glGetError());
-
-    entry_t *e = &mEntries[mEntryCount];
+    entry_t *e = (entry_t *)malloc(sizeof(entry_t));
+    mEntries.push(e);
     mCurrent = e;
     e->vtx = vtx->getShaderID();
     e->frag = frag->getShaderID();
     e->program = glCreateProgram();
-    e->mUserVertexProgram = vtx->isUserProgram();
-    if (mEntries[mEntryCount].program) {
+    e->vtxAttrCount = vtx->getAttribCount();
+    if (e->program) {
         GLuint pgm = e->program;
         glAttachShader(pgm, vtx->getShaderID());
         //LOGE("e1 %x", glGetError());
         glAttachShader(pgm, frag->getShaderID());
 
         if (!vtx->isUserProgram()) {
-            glBindAttribLocation(pgm, 0, "ATTRIB_LegacyPosition");
-            glBindAttribLocation(pgm, 1, "ATTRIB_LegacyColor");
-            glBindAttribLocation(pgm, 2, "ATTRIB_LegacyNormal");
-            glBindAttribLocation(pgm, 3, "ATTRIB_LegacyPointSize");
-            glBindAttribLocation(pgm, 4, "ATTRIB_LegacyTexture");
-            e->mVtxAttribSlots[RS_KIND_POSITION] = 0;
-            e->mVtxAttribSlots[RS_KIND_COLOR] = 1;
-            e->mVtxAttribSlots[RS_KIND_NORMAL] = 2;
-            e->mVtxAttribSlots[RS_KIND_POINT_SIZE] = 3;
-            e->mVtxAttribSlots[RS_KIND_TEXTURE] = 4;
+            glBindAttribLocation(pgm, 0, "ATTRIB_position");
+            glBindAttribLocation(pgm, 1, "ATTRIB_color");
+            glBindAttribLocation(pgm, 2, "ATTRIB_normal");
+            glBindAttribLocation(pgm, 3, "ATTRIB_texture0");
         }
 
         //LOGE("e2 %x", glGetError());
@@ -126,14 +110,15 @@
             rsc->setError(RS_ERROR_BAD_SHADER, "Error linking GL Programs");
             return false;
         }
-        if (vtx->isUserProgram()) {
-            for (uint32_t ct=0; ct < vtx->getAttribCount(); ct++) {
-                e->mVtxAttribSlots[ct] = glGetAttribLocation(pgm, vtx->getAttribName(ct));
-                if (rsc->props.mLogShaders) {
-                    LOGV("vtx A %i, %s = %d\n", ct, vtx->getAttribName(ct).string(), e->mVtxAttribSlots[ct]);
-                }
+
+        for (uint32_t ct=0; ct < e->vtxAttrCount; ct++) {
+            e->mVtxAttribSlots[ct] = glGetAttribLocation(pgm, vtx->getAttribName(ct));
+            e->mVtxAttribNames[ct] = vtx->getAttribName(ct).string();
+            if (rsc->props.mLogShaders) {
+                LOGV("vtx A %i, %s = %d\n", ct, vtx->getAttribName(ct).string(), e->mVtxAttribSlots[ct]);
             }
         }
+
         for (uint32_t ct=0; ct < vtx->getUniformCount(); ct++) {
             e->mVtxUniformSlots[ct] = glGetUniformLocation(pgm, vtx->getUniformName(ct));
             if (rsc->props.mLogShaders) {
@@ -151,17 +136,47 @@
     e->mIsValid = true;
     //LOGV("SC made program %i", e->program);
     glUseProgram(e->program);
-    mEntryCount++;
     rsc->checkError("ShaderCache::lookup (miss)");
     return true;
 }
 
+int32_t ShaderCache::vtxAttribSlot(const String8 &attrName) const {
+    for (uint32_t ct=0; ct < mCurrent->vtxAttrCount; ct++) {
+        if(attrName == mCurrent->mVtxAttribNames[ct]) {
+            return mCurrent->mVtxAttribSlots[ct];
+        }
+    }
+    return -1;
+}
+
 void ShaderCache::cleanupVertex(uint32_t id)
 {
+    int32_t numEntries = (int32_t)mEntries.size();
+    for(int32_t ct = 0; ct < numEntries; ct ++) {
+        if (mEntries[ct]->vtx == id) {
+            glDeleteProgram(mEntries[ct]->program);
+
+            free(mEntries[ct]);
+            mEntries.removeAt(ct);
+            numEntries = (int32_t)mEntries.size();
+            ct --;
+        }
+    }
 }
 
 void ShaderCache::cleanupFragment(uint32_t id)
 {
+    int32_t numEntries = (int32_t)mEntries.size();
+    for(int32_t ct = 0; ct < numEntries; ct ++) {
+        if (mEntries[ct]->frag == id) {
+            glDeleteProgram(mEntries[ct]->program);
+
+            free(mEntries[ct]);
+            mEntries.removeAt(ct);
+            numEntries = (int32_t)mEntries.size();
+            ct --;
+        }
+    }
 }
 
 void ShaderCache::cleanupAll()
diff --git a/rsShaderCache.h b/rsShaderCache.h
index df99ccc..35ff95b 100644
--- a/rsShaderCache.h
+++ b/rsShaderCache.h
@@ -40,29 +40,30 @@
 
     void cleanupAll();
 
-    int32_t vtxAttribSlot(uint32_t a) const {return mCurrent->mVtxAttribSlots[a];}
+    int32_t vtxAttribSlot(const String8 &attrName) const;
     int32_t vtxUniformSlot(uint32_t a) const {return mCurrent->mVtxUniformSlots[a];}
     int32_t fragAttribSlot(uint32_t a) const {return mCurrent->mFragAttribSlots[a];}
     int32_t fragUniformSlot(uint32_t a) const {return mCurrent->mFragUniformSlots[a];}
-    bool isUserVertexProgram() const {return mCurrent->mUserVertexProgram;}
 
 protected:
     typedef struct {
         uint32_t vtx;
         uint32_t frag;
         uint32_t program;
+        uint32_t vtxAttrCount;
+        const char* mVtxAttribNames[Program::MAX_ATTRIBS];
         int32_t mVtxAttribSlots[Program::MAX_ATTRIBS];
         int32_t mVtxUniformSlots[Program::MAX_UNIFORMS];
         int32_t mFragAttribSlots[Program::MAX_ATTRIBS];
         int32_t mFragUniformSlots[Program::MAX_UNIFORMS];
-        bool mUserVertexProgram;
         bool mIsValid;
     } entry_t;
-    entry_t *mEntries;
+    //entry_t *mEntries;
+    Vector<entry_t*> mEntries;
     entry_t *mCurrent;
 
-    uint32_t mEntryCount;
-    uint32_t mEntryAllocationCount;
+    /*uint32_t mEntryCount;
+    uint32_t mEntryAllocationCount;*/
 
 };
 
diff --git a/rsSignal.cpp b/rsSignal.cpp
new file mode 100644
index 0000000..9239bfd
--- /dev/null
+++ b/rsSignal.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "rsSignal.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+
+Signal::Signal()
+{
+    mSet = true;
+}
+
+Signal::~Signal()
+{
+    pthread_mutex_destroy(&mMutex);
+    pthread_cond_destroy(&mCondition);
+}
+
+bool Signal::init()
+{
+    int status = pthread_mutex_init(&mMutex, NULL);
+    if (status) {
+        LOGE("LocklessFifo mutex init failure");
+        return false;
+    }
+
+    status = pthread_cond_init(&mCondition, NULL);
+    if (status) {
+        LOGE("LocklessFifo condition init failure");
+        pthread_mutex_destroy(&mMutex);
+        return false;
+    }
+
+    return true;
+}
+
+void Signal::set()
+{
+    int status;
+
+    status = pthread_mutex_lock(&mMutex);
+    if (status) {
+        LOGE("LocklessCommandFifo: error %i locking for set condition.", status);
+        return;
+    }
+
+    mSet = true;
+
+    status = pthread_cond_signal(&mCondition);
+    if (status) {
+        LOGE("LocklessCommandFifo: error %i on set condition.", status);
+    }
+
+    status = pthread_mutex_unlock(&mMutex);
+    if (status) {
+        LOGE("LocklessCommandFifo: error %i unlocking for set condition.", status);
+    }
+}
+
+void Signal::wait()
+{
+    int status;
+
+    status = pthread_mutex_lock(&mMutex);
+    if (status) {
+        LOGE("LocklessCommandFifo: error %i locking for condition.", status);
+        return;
+    }
+
+    if (!mSet) {
+        status = pthread_cond_wait(&mCondition, &mMutex);
+        if (status) {
+            LOGE("LocklessCommandFifo: error %i waiting on condition.", status);
+        }
+    }
+    mSet = false;
+
+    status = pthread_mutex_unlock(&mMutex);
+    if (status) {
+        LOGE("LocklessCommandFifo: error %i unlocking for condition.", status);
+    }
+}
+
diff --git a/rsFileA3DDecls.h b/rsSignal.h
similarity index 66%
copy from rsFileA3DDecls.h
copy to rsSignal.h
index 2a08bd3..2e760f1 100644
--- a/rsFileA3DDecls.h
+++ b/rsSignal.h
@@ -14,31 +14,33 @@
  * limitations under the License.
  */
 
-#ifndef ANDROID_RS_FILE_A3D_DECLS_H
-#define ANDROID_RS_FILE_A3D_DECLS_H
+#ifndef ANDROID_RS_SIGNAL_H
+#define ANDROID_RS_SIGNAL_H
 
 
-#define A3D_MAGIC_KEY "Android3D_ff"
+#include "rsUtils.h"
 
 namespace android {
 namespace renderscript {
 
-    enum A3DChunkType {
-        CHUNK_EMPTY,
+class Signal {
+public:
+    Signal();
+    ~Signal();
 
-        CHUNK_ELEMENT,
-        CHUNK_ELEMENT_SOURCE,
-        CHUNK_VERTICIES,
-        CHUNK_MESH,
-        CHUNK_PRIMITIVE,
+    bool init();
 
-        CHUNK_LAST
-    };
+    void set();
+    void wait();
 
+protected:
+    bool mSet;
+    pthread_mutex_t mMutex;
+    pthread_cond_t mCondition;
+};
 
 }
 }
-#endif //ANDROID_RS_FILE_A3D_H
 
-
+#endif
 
diff --git a/rsSimpleMesh.cpp b/rsSimpleMesh.cpp
deleted file mode 100644
index 53ce5cd..0000000
--- a/rsSimpleMesh.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "rsContext.h"
-
-using namespace android;
-using namespace android::renderscript;
-
-#include <GLES/gl.h>
-#include <GLES/glext.h>
-
-SimpleMesh::SimpleMesh(Context *rsc) : ObjectBase(rsc)
-{
-    mAllocFile = __FILE__;
-    mAllocLine = __LINE__;
-}
-
-SimpleMesh::~SimpleMesh()
-{
-    delete[] mVertexTypes;
-    delete[] mVertexBuffers;
-}
-
-void SimpleMesh::render(Context *rsc) const
-{
-    if (mPrimitiveType.get()) {
-        renderRange(rsc, 0, mPrimitiveType->getDimX());
-        return;
-    }
-
-    if (mIndexType.get()) {
-        renderRange(rsc, 0, mIndexType->getDimX());
-        return;
-    }
-
-    renderRange(rsc, 0, mVertexTypes[0]->getDimX());
-}
-
-void SimpleMesh::renderRange(Context *rsc, uint32_t start, uint32_t len) const
-{
-    if (len < 1) {
-        return;
-    }
-
-    rsc->checkError("SimpleMesh::renderRange 1");
-    VertexArray va;
-    if (rsc->checkVersion2_0()) {
-        for (uint32_t ct=0; ct < mVertexTypeCount; ct++) {
-            mVertexBuffers[ct]->uploadCheck(rsc);
-            va.setActiveBuffer(mVertexBuffers[ct]->getBufferObjectID());
-            mVertexTypes[ct]->enableGLVertexBuffer2(&va);
-        }
-        va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
-    } else {
-        for (uint32_t ct=0; ct < mVertexTypeCount; ct++) {
-            mVertexBuffers[ct]->uploadCheck(rsc);
-            va.setActiveBuffer(mVertexBuffers[ct]->getBufferObjectID());
-            mVertexTypes[ct]->enableGLVertexBuffer(&va);
-        }
-        va.setupGL(rsc, 0);
-    }
-
-    rsc->checkError("SimpleMesh::renderRange 2");
-    if (mIndexType.get()) {
-        mIndexBuffer->uploadCheck(rsc);
-        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer->getBufferObjectID());
-        glDrawElements(mGLPrimitive, len, GL_UNSIGNED_SHORT, (uint16_t *)(start * 2));
-    } else {
-        glDrawArrays(mGLPrimitive, start, len);
-    }
-
-    rsc->checkError("SimpleMesh::renderRange");
-}
-
-void SimpleMesh::uploadAll(Context *rsc)
-{
-    for (uint32_t ct=0; ct < mVertexTypeCount; ct++) {
-        if (mVertexBuffers[ct].get()) {
-            mVertexBuffers[ct]->deferedUploadToBufferObject(rsc);
-        }
-    }
-    if (mIndexBuffer.get()) {
-        mIndexBuffer->deferedUploadToBufferObject(rsc);
-    }
-    if (mPrimitiveBuffer.get()) {
-        mPrimitiveBuffer->deferedUploadToBufferObject(rsc);
-    }
-    rsc->checkError("SimpleMesh::uploadAll");
-}
-
-
-SimpleMeshContext::SimpleMeshContext()
-{
-}
-
-SimpleMeshContext::~SimpleMeshContext()
-{
-}
-
-
-namespace android {
-namespace renderscript {
-
-
-RsSimpleMesh rsi_SimpleMeshCreate(Context *rsc, RsType prim, RsType idx, RsType *vtx, uint32_t vtxCount, uint32_t primType)
-{
-    SimpleMesh *sm = new SimpleMesh(rsc);
-    sm->incUserRef();
-
-    sm->mIndexType.set((const Type *)idx);
-    sm->mPrimitiveType.set((const Type *)prim);
-
-    sm->mVertexTypeCount = vtxCount;
-    sm->mVertexTypes = new ObjectBaseRef<const Type>[vtxCount];
-    sm->mVertexBuffers = new ObjectBaseRef<Allocation>[vtxCount];
-    for (uint32_t ct=0; ct < vtxCount; ct++) {
-        sm->mVertexTypes[ct].set((const Type *)vtx[ct]);
-    }
-
-    sm->mPrimitive = (RsPrimitive)primType;
-    switch(sm->mPrimitive) {
-    case RS_PRIMITIVE_POINT:          sm->mGLPrimitive = GL_POINTS; break;
-    case RS_PRIMITIVE_LINE:           sm->mGLPrimitive = GL_LINES; break;
-    case RS_PRIMITIVE_LINE_STRIP:     sm->mGLPrimitive = GL_LINE_STRIP; break;
-    case RS_PRIMITIVE_TRIANGLE:       sm->mGLPrimitive = GL_TRIANGLES; break;
-    case RS_PRIMITIVE_TRIANGLE_STRIP: sm->mGLPrimitive = GL_TRIANGLE_STRIP; break;
-    case RS_PRIMITIVE_TRIANGLE_FAN:   sm->mGLPrimitive = GL_TRIANGLE_FAN; break;
-    }
-    return sm;
-}
-
-void rsi_SimpleMeshBindVertex(Context *rsc, RsSimpleMesh mv, RsAllocation va, uint32_t slot)
-{
-    SimpleMesh *sm = static_cast<SimpleMesh *>(mv);
-    rsAssert(slot < sm->mVertexTypeCount);
-
-    sm->mVertexBuffers[slot].set((Allocation *)va);
-}
-
-void rsi_SimpleMeshBindIndex(Context *rsc, RsSimpleMesh mv, RsAllocation va)
-{
-    SimpleMesh *sm = static_cast<SimpleMesh *>(mv);
-    sm->mIndexBuffer.set((Allocation *)va);
-}
-
-void rsi_SimpleMeshBindPrimitive(Context *rsc, RsSimpleMesh mv, RsAllocation va)
-{
-    SimpleMesh *sm = static_cast<SimpleMesh *>(mv);
-    sm->mPrimitiveBuffer.set((Allocation *)va);
-}
-
-
-
-
-}}
-
diff --git a/rsSimpleMesh.h b/rsSimpleMesh.h
deleted file mode 100644
index 6defbda..0000000
--- a/rsSimpleMesh.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ANDROID_RS_SIMPLE_MESH_H
-#define ANDROID_RS_SIMPLE_MESH_H
-
-
-#include "RenderScript.h"
-
-// ---------------------------------------------------------------------------
-namespace android {
-namespace renderscript {
-
-
-// An element is a group of Components that occupies one cell in a structure.
-class SimpleMesh : public ObjectBase
-{
-public:
-    SimpleMesh(Context *);
-    ~SimpleMesh();
-
-    ObjectBaseRef<const Type> mIndexType;
-    ObjectBaseRef<const Type> mPrimitiveType;
-    ObjectBaseRef<const Type> *mVertexTypes;
-    uint32_t mVertexTypeCount;
-
-    ObjectBaseRef<Allocation> mIndexBuffer;
-    ObjectBaseRef<Allocation> mPrimitiveBuffer;
-    ObjectBaseRef<Allocation> *mVertexBuffers;
-
-    RsPrimitive mPrimitive;
-    uint32_t mGLPrimitive;
-
-
-    void render(Context *) const;
-    void renderRange(Context *, uint32_t start, uint32_t len) const;
-    void uploadAll(Context *);
-
-
-protected:
-};
-
-class SimpleMeshContext
-{
-public:
-    SimpleMeshContext();
-    ~SimpleMeshContext();
-
-
-};
-
-
-}
-}
-#endif //ANDROID_RS_SIMPLE_MESH_H
-
diff --git a/rsStream.cpp b/rsStream.cpp
new file mode 100644
index 0000000..68241fa
--- /dev/null
+++ b/rsStream.cpp
@@ -0,0 +1,131 @@
+
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_BUILD_FOR_HOST
+#include "rsContext.h"
+#else
+#include "rsContextHostStub.h"
+#endif
+
+#include "rsStream.h"
+
+using namespace android;
+using namespace android::renderscript;
+
+IStream::IStream(const uint8_t *buf, bool use64)
+{
+    mData = buf;
+    mPos = 0;
+    mUse64 = use64;
+}
+
+void IStream::loadByteArray(void *dest, size_t numBytes)
+{
+    memcpy(dest, mData + mPos, numBytes);
+    mPos += numBytes;
+}
+
+uint64_t IStream::loadOffset()
+{
+    uint64_t tmp;
+    if (mUse64) {
+        mPos = (mPos + 7) & (~7);
+        tmp = reinterpret_cast<const uint64_t *>(&mData[mPos])[0];
+        mPos += sizeof(uint64_t);
+        return tmp;
+    }
+    return loadU32();
+}
+
+void IStream::loadString(String8 *s)
+{
+    uint32_t len = loadU32();
+    s->setTo((const char *)&mData[mPos], len);
+    mPos += len;
+}
+
+
+// Output stream implementation
+
+OStream::OStream(uint64_t len, bool use64)
+{
+    mData = (uint8_t*)malloc(len);
+    mLength = len;
+    mPos = 0;
+    mUse64 = use64;
+}
+
+OStream::~OStream()
+{
+    free(mData);
+}
+
+void OStream::addByteArray(const void *src, size_t numBytes)
+{
+    // We need to potentially grow more than once if the number of byes we write is substantial
+    while(mPos + numBytes >= mLength) {
+        growSize();
+    }
+    memcpy(mData + mPos, src, numBytes);
+    mPos += numBytes;
+}
+
+void OStream::addOffset(uint64_t v)
+{
+    if (mUse64) {
+        mPos = (mPos + 7) & (~7);
+        if(mPos + sizeof(v) >= mLength) {
+            growSize();
+        }
+        mData[mPos++] = (uint8_t)(v & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 8) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 16) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 24) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 32) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 40) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 48) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 56) & 0xff);
+    }
+    else {
+        addU32(v);
+    }
+}
+
+void OStream::addString(String8 *s)
+{
+    uint32_t len = s->size();
+    addU32(len);
+    if(mPos + len*sizeof(char) >= mLength) {
+        growSize();
+    }
+    char *stringData = reinterpret_cast<char *>(&mData[mPos]);
+    for(uint32_t i = 0; i < len; i ++) {
+        stringData[i] = s->string()[i];
+    }
+    mPos += len*sizeof(char);
+}
+
+void OStream::growSize()
+{
+    uint8_t *newData = (uint8_t*)malloc(mLength*2);
+    memcpy(newData, mData, mLength*sizeof(uint8_t));
+    mLength = mLength * 2;
+    free(mData);
+    mData = newData;
+}
+
+
diff --git a/rsStream.h b/rsStream.h
new file mode 100644
index 0000000..d401cd1
--- /dev/null
+++ b/rsStream.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_RS_STREAM_H
+#define ANDROID_RS_STREAM_H
+
+#include <utils/String8.h>
+#include <stdio.h>
+
+// ---------------------------------------------------------------------------
+namespace android {
+namespace renderscript {
+
+class IStream
+{
+public:
+    IStream(const uint8_t *, bool use64);
+
+    float loadF() {
+        mPos = (mPos + 3) & (~3);
+        float tmp = reinterpret_cast<const float *>(&mData[mPos])[0];
+        mPos += sizeof(float);
+        return tmp;
+    }
+    int32_t loadI32() {
+        mPos = (mPos + 3) & (~3);
+        int32_t tmp = reinterpret_cast<const int32_t *>(&mData[mPos])[0];
+        mPos += sizeof(int32_t);
+        return tmp;
+    }
+    uint32_t loadU32() {
+        mPos = (mPos + 3) & (~3);
+        uint32_t tmp = reinterpret_cast<const uint32_t *>(&mData[mPos])[0];
+        mPos += sizeof(uint32_t);
+        return tmp;
+    }
+    uint16_t loadU16() {
+        mPos = (mPos + 1) & (~1);
+        uint16_t tmp = reinterpret_cast<const uint16_t *>(&mData[mPos])[0];
+        mPos += sizeof(uint16_t);
+        return tmp;
+    }
+    inline uint8_t loadU8() {
+        uint8_t tmp = reinterpret_cast<const uint8_t *>(&mData[mPos])[0];
+        mPos += sizeof(uint8_t);
+        return tmp;
+    }
+    void loadByteArray(void *dest, size_t numBytes);
+    uint64_t loadOffset();
+    void loadString(String8 *s);
+    uint64_t getPos() const {
+        return mPos;
+    }
+    void reset(uint64_t pos) {
+        mPos = pos;
+    }
+    void reset() {
+        mPos = 0;
+    }
+    
+    const uint8_t * getPtr() const {
+        return mData;
+    }
+protected:
+    const uint8_t * mData;
+    uint64_t mPos;
+    bool mUse64;
+};
+
+class OStream
+{
+public:
+    OStream(uint64_t length, bool use64);
+    ~OStream();
+    
+    void align(uint32_t bytes) {
+        mPos = (mPos + (bytes - 1)) & (~(bytes - 1));
+        if(mPos >= mLength) {
+            growSize();
+        }
+    }
+    
+    void addF(float v) {
+        uint32_t uintV = *reinterpret_cast<uint32_t*> (&v);
+        addU32(uintV);
+    }
+    void addI32(int32_t v) {
+        mPos = (mPos + 3) & (~3);
+        if(mPos + sizeof(v) >= mLength) {
+            growSize();
+        }
+        mData[mPos++] = (uint8_t)(v & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 8) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 16) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 24) & 0xff);
+    }
+    void addU32(uint32_t v) {
+        mPos = (mPos + 3) & (~3);
+        if(mPos + sizeof(v) >= mLength) {
+            growSize();
+        }
+        mData[mPos++] = (uint8_t)(v & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 8) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 16) & 0xff);
+        mData[mPos++] = (uint8_t)((v >> 24) & 0xff);
+    }
+    void addU16(uint16_t v) {
+        mPos = (mPos + 1) & (~1);
+        if(mPos + sizeof(v) >= mLength) {
+            growSize();
+        }
+        mData[mPos++] = (uint8_t)(v & 0xff);
+        mData[mPos++] = (uint8_t)(v >> 8);
+    }
+    inline void addU8(uint8_t v) {
+        if(mPos + 1 >= mLength) {
+            growSize();
+        }
+        reinterpret_cast<uint8_t *>(&mData[mPos])[0] = v;
+        mPos ++;
+    }
+    void addByteArray(const void *src, size_t numBytes);
+    void addOffset(uint64_t v);
+    void addString(String8 *s);
+    uint64_t getPos() const {
+        return mPos;
+    }
+    void reset(uint64_t pos) {
+        mPos = pos;
+    }
+    void reset() {
+        mPos = 0;
+    }
+    const uint8_t * getPtr() const {
+        return mData;
+    }
+protected:
+    void growSize();
+    uint8_t * mData;
+    uint64_t mLength;
+    uint64_t mPos;
+    bool mUse64;
+};
+    
+
+} // renderscript
+} // android
+#endif //ANDROID_RS_STREAM_H
+
+
diff --git a/rsType.cpp b/rsType.cpp
index c09e979..27b1b4f 100644
--- a/rsType.cpp
+++ b/rsType.cpp
@@ -14,8 +14,13 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
 #include <GLES/gl.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#endif
 
 using namespace android;
 using namespace android::renderscript;
@@ -26,6 +31,8 @@
     mAllocLine = __LINE__;
     mLODs = 0;
     mLODCount = 0;
+    mAttribs = NULL;
+    mAttribsSize = 0;
     clear();
 }
 
@@ -39,6 +46,11 @@
     }
     if (mLODs) {
         delete [] mLODs;
+        mLODs = NULL;
+    }
+    if(mAttribs) {
+        delete [] mAttribs;
+        mAttribs = NULL;
     }
 }
 
@@ -84,7 +96,9 @@
         mLODCount = 1;
     }
     if (mLODCount != oldLODCount) {
-        delete [] mLODs;
+        if(mLODs){
+            delete [] mLODs;
+        }
         mLODs = new LOD[mLODCount];
     }
 
@@ -135,139 +149,81 @@
     return offset;
 }
 
+bool Type::isValidGLComponent(uint32_t fieldIdx) {
+    // Do not create attribs for padding
+    if(mElement->getFieldName(fieldIdx)[0] == '#') {
+        return false;
+    }
+
+    // Only GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_FIXED, GL_FLOAT are accepted.
+    // Filter rs types accordingly
+    RsDataType dt = mElement->getField(fieldIdx)->getComponent().getType();
+    if(dt != RS_TYPE_FLOAT_32 && dt != RS_TYPE_UNSIGNED_8 &&
+       dt != RS_TYPE_UNSIGNED_16 && dt != RS_TYPE_SIGNED_8 &&
+       dt != RS_TYPE_SIGNED_16) {
+        return false;
+    }
+
+    // Now make sure they are not arrays
+    uint32_t arraySize = mElement->getFieldArraySize(fieldIdx);
+    if(arraySize != 1) {
+        return false;
+    }
+
+    return true;
+}
 
 void Type::makeGLComponents()
 {
-    uint32_t userNum = 0;
+    // Count the number of gl attrs to initialize
+    mAttribsSize = 0;
 
-    for (uint32_t ct=0; ct < getElement()->getFieldCount(); ct++) {
-        const Component &c = getElement()->getField(ct)->getComponent();
-
-        switch(c.getKind()) {
-        case RS_KIND_USER:
-            mGL.mUser[userNum].size = c.getVectorSize();
-            mGL.mUser[userNum].offset = mElement->getFieldOffsetBytes(ct);
-            mGL.mUser[userNum].type = c.getGLType();
-            mGL.mUser[userNum].normalized = c.getType() != RS_TYPE_FLOAT_32;//c.getIsNormalized();
-            mGL.mUser[userNum].name.setTo(getElement()->getFieldName(ct));
-            userNum ++;
-            break;
-
-        case RS_KIND_POSITION:
-            rsAssert(mGL.mVtx.size == 0);
-            mGL.mVtx.size = c.getVectorSize();
-            mGL.mVtx.offset = mElement->getFieldOffsetBytes(ct);
-            mGL.mVtx.type = c.getGLType();
-            mGL.mVtx.normalized = false;
-            mGL.mVtx.name.setTo("Position");
-            break;
-
-        case RS_KIND_COLOR:
-            rsAssert(mGL.mColor.size == 0);
-            mGL.mColor.size = c.getVectorSize();
-            mGL.mColor.offset = mElement->getFieldOffsetBytes(ct);
-            mGL.mColor.type = c.getGLType();
-            mGL.mColor.normalized = c.getType() != RS_TYPE_FLOAT_32;
-            mGL.mColor.name.setTo("Color");
-            break;
-
-        case RS_KIND_NORMAL:
-            rsAssert(mGL.mNorm.size == 0);
-            mGL.mNorm.size = c.getVectorSize();
-            mGL.mNorm.offset = mElement->getFieldOffsetBytes(ct);
-            mGL.mNorm.type = c.getGLType();
-            mGL.mNorm.normalized = false;
-            mGL.mNorm.name.setTo("Normal");
-            break;
-
-        case RS_KIND_TEXTURE:
-            rsAssert(mGL.mTex.size == 0);
-            mGL.mTex.size = c.getVectorSize();
-            mGL.mTex.offset = mElement->getFieldOffsetBytes(ct);
-            mGL.mTex.type = c.getGLType();
-            mGL.mTex.normalized = false;
-            mGL.mTex.name.setTo("Texture");
-            break;
-
-        case RS_KIND_POINT_SIZE:
-            rsAssert(!mGL.mPointSize.size);
-            mGL.mPointSize.size = c.getVectorSize();
-            mGL.mPointSize.offset = mElement->getFieldOffsetBytes(ct);
-            mGL.mPointSize.type = c.getGLType();
-            mGL.mPointSize.normalized = false;
-            mGL.mPointSize.name.setTo("PointSize");
-        break;
-
-        default:
-            break;
+    for (uint32_t ct=0; ct < mElement->getFieldCount(); ct++) {
+        if(isValidGLComponent(ct)) {
+            mAttribsSize ++;
         }
     }
+    if(mAttribs) {
+        delete [] mAttribs;
+        mAttribs = NULL;
+    }
+    if(mAttribsSize) {
+        mAttribs = new VertexArray::Attrib[mAttribsSize];
+    }
+
+    uint32_t userNum = 0;
+    for (uint32_t ct=0; ct < mElement->getFieldCount(); ct++) {
+        const Component &c = mElement->getField(ct)->getComponent();
+
+        if(!isValidGLComponent(ct)) {
+            continue;
+        }
+
+        mAttribs[userNum].size = c.getVectorSize();
+        mAttribs[userNum].offset = mElement->getFieldOffsetBytes(ct);
+        mAttribs[userNum].type = c.getGLType();
+        mAttribs[userNum].normalized = c.getType() != RS_TYPE_FLOAT_32;//c.getIsNormalized();
+        String8 tmp(RS_SHADER_ATTR);
+        tmp.append(mElement->getFieldName(ct));
+        mAttribs[userNum].name.setTo(tmp.string());
+
+        userNum ++;
+    }
 }
 
+
 void Type::enableGLVertexBuffer(VertexArray *va) const
 {
-    // Note: We are only going to enable buffers and never disable them
-    // here.  The reason is more than one Allocation may be used as a vertex
-    // source.  So we cannot disable arrays that may have been in use by
-    // another allocation.
-
     uint32_t stride = mElement->getSizeBytes();
-    if (mGL.mVtx.size) {
-        va->addLegacy(mGL.mVtx.type,
-                      mGL.mVtx.size,
-                      stride,
-                      RS_KIND_POSITION,
-                      false,
-                      mGL.mVtx.offset);
-    }
-
-    if (mGL.mNorm.size) {
-        va->addLegacy(mGL.mNorm.type,
-                     3,
-                     stride,
-                     RS_KIND_NORMAL,
-                     false,
-                     mGL.mNorm.offset);
-    }
-
-    if (mGL.mColor.size) {
-        va->addLegacy(mGL.mColor.type,
-                     mGL.mColor.size,
-                     stride,
-                     RS_KIND_COLOR,
-                     true,
-                     mGL.mColor.offset);
-    }
-
-    if (mGL.mTex.size) {
-        va->addLegacy(mGL.mTex.type,
-                     mGL.mTex.size,
-                     stride,
-                     RS_KIND_TEXTURE,
-                     false,
-                     mGL.mTex.offset);
-    }
-
-    if (mGL.mPointSize.size) {
-        va->addLegacy(mGL.mPointSize.type,
-                     1,
-                     stride,
-                     RS_KIND_POINT_SIZE,
-                     false,
-                     mGL.mPointSize.offset);
-    }
-
-}
-
-void Type::enableGLVertexBuffer2(VertexArray *va) const
-{
-    // Do legacy buffers
-    enableGLVertexBuffer(va);
-
-    uint32_t stride = mElement->getSizeBytes();
-    for (uint32_t ct=0; ct < RS_MAX_ATTRIBS; ct++) {
-        if (mGL.mUser[ct].size) {
-            va->addUser(mGL.mUser[ct], stride);
+    for (uint32_t ct=0; ct < mAttribsSize; ct++) {
+        // Load up to RS_MAX_ATTRIBS inputs
+        // TODO: grow vertexarray dynamically
+        if(ct >= RS_MAX_ATTRIBS) {
+            LOGE("More GL attributes than we can handle");
+            break;
+        }
+        if (mAttribs[ct].size) {
+            va->add(mAttribs[ct], stride);
         }
     }
 }
@@ -283,6 +239,57 @@
     mElement->dumpLOGV(buf);
 }
 
+void Type::serialize(OStream *stream) const
+{
+    // Need to identify ourselves
+    stream->addU32((uint32_t)getClassId());
+
+    String8 name(getName());
+    stream->addString(&name);
+
+    mElement->serialize(stream);
+
+    stream->addU32(mDimX);
+    stream->addU32(mDimY);
+    stream->addU32(mDimZ);
+
+    stream->addU8((uint8_t)(mDimLOD ? 1 : 0));
+    stream->addU8((uint8_t)(mFaces ? 1 : 0));
+}
+
+Type *Type::createFromStream(Context *rsc, IStream *stream)
+{
+    // First make sure we are reading the correct object
+    RsA3DClassID classID = (RsA3DClassID)stream->loadU32();
+    if(classID != RS_A3D_CLASS_ID_TYPE) {
+        LOGE("type loading skipped due to invalid class id\n");
+        return NULL;
+    }
+
+    String8 name;
+    stream->loadString(&name);
+
+    Element *elem = Element::createFromStream(rsc, stream);
+    if(!elem) {
+        return NULL;
+    }
+
+    Type *type = new Type(rsc);
+    type->mDimX = stream->loadU32();
+    type->mDimY = stream->loadU32();
+    type->mDimZ = stream->loadU32();
+
+    uint8_t temp = stream->loadU8();
+    type->mDimLOD = temp != 0;
+
+    temp = stream->loadU8();
+    type->mFaces = temp != 0;
+
+    type->setElement(elem);
+
+    return type;
+}
+
 bool Type::getIsNp2() const
 {
     uint32_t x = getDimX();
@@ -301,97 +308,154 @@
     return false;
 }
 
+bool Type::isEqual(const Type *other) const {
+    if(other == NULL) {
+        return false;
+    }
+    if (other->getElement()->isEqual(getElement()) &&
+        other->getDimX() == mDimX &&
+        other->getDimY() == mDimY &&
+        other->getDimZ() == mDimZ &&
+        other->getDimLOD() == mDimLOD &&
+        other->getDimFaces() == mFaces) {
+        return true;
+    }
+    return false;
+}
+
+Type * Type::cloneAndResize1D(Context *rsc, uint32_t dimX) const
+{
+    TypeState * stc = &rsc->mStateType;
+    for (uint32_t ct=0; ct < stc->mTypes.size(); ct++) {
+        Type *t = stc->mTypes[ct];
+        if (t->getElement() != mElement.get()) continue;
+        if (t->getDimX() != dimX) continue;
+        if (t->getDimY() != mDimY) continue;
+        if (t->getDimZ() != mDimZ) continue;
+        if (t->getDimLOD() != mDimLOD) continue;
+        if (t->getDimFaces() != mFaces) continue;
+        t->incUserRef();
+        return t;
+    }
+
+    Type *nt = new Type(rsc);
+    nt->mElement.set(mElement);
+    nt->mDimX = dimX;
+    nt->mDimY = mDimY;
+    nt->mDimZ = mDimZ;
+    nt->mDimLOD = mDimLOD;
+    nt->mFaces = mFaces;
+    nt->compute();
+    return nt;
+}
+
+Type * Type::cloneAndResize2D(Context *rsc, uint32_t dimX, uint32_t dimY) const
+{
+    TypeState * stc = &rsc->mStateType;
+    for (uint32_t ct=0; ct < stc->mTypes.size(); ct++) {
+        Type *t = stc->mTypes[ct];
+        if (t->getElement() != mElement.get()) continue;
+        if (t->getDimX() != dimX) continue;
+        if (t->getDimY() != dimY) continue;
+        if (t->getDimZ() != mDimZ) continue;
+        if (t->getDimLOD() != mDimLOD) continue;
+        if (t->getDimFaces() != mFaces) continue;
+        t->incUserRef();
+        return t;
+    }
+
+    Type *nt = new Type(rsc);
+    nt->mElement.set(mElement);
+    nt->mDimX = dimX;
+    nt->mDimY = dimY;
+    nt->mDimZ = mDimZ;
+    nt->mDimLOD = mDimLOD;
+    nt->mFaces = mFaces;
+    nt->compute();
+    return nt;
+}
+
 
 //////////////////////////////////////////////////
 //
 namespace android {
 namespace renderscript {
 
-void rsi_TypeBegin(Context *rsc, RsElement vse)
+void rsi_TypeGetNativeData(Context *rsc, RsType type, uint32_t *typeData, uint32_t typeDataSize)
 {
-    TypeState * stc = &rsc->mStateType;
+    rsAssert(typeDataSize == 6);
+    // Pack the data in the follofing way mDimX; mDimY; mDimZ;
+    // mDimLOD; mDimFaces; mElement; into typeData
+    Type *t = static_cast<Type *>(type);
 
-    stc->mX = 0;
-    stc->mY = 0;
-    stc->mZ = 0;
-    stc->mLOD = false;
-    stc->mFaces = false;
-    stc->mElement.set(static_cast<const Element *>(vse));
-}
-
-void rsi_TypeAdd(Context *rsc, RsDimension dim, size_t value)
-{
-    TypeState * stc = &rsc->mStateType;
-
-    if (dim < 0) {
-        //error
-        return;
-    }
-
-
-    switch (dim) {
-    case RS_DIMENSION_X:
-        stc->mX = value;
-        return;
-    case RS_DIMENSION_Y:
-        stc->mY = value;
-        return;
-    case RS_DIMENSION_Z:
-        stc->mZ = value;
-        return;
-    case RS_DIMENSION_FACE:
-        stc->mFaces = (value != 0);
-        return;
-    case RS_DIMENSION_LOD:
-        stc->mLOD = (value != 0);
-        return;
-    default:
-        break;
-    }
-
-
-    int32_t arrayNum = dim - RS_DIMENSION_ARRAY_0;
-    if ((dim < 0) || (dim > RS_DIMENSION_MAX)) {
-        LOGE("rsTypeAdd: Bad dimension");
-        //error
-        return;
-    }
-
-    // todo: implement array support
+    (*typeData++) = t->getDimX();
+    (*typeData++) = t->getDimY();
+    (*typeData++) = t->getDimZ();
+    (*typeData++) = t->getDimLOD();
+    (*typeData++) = t->getDimFaces() ? 1 : 0;
+    (*typeData++) = (uint32_t)t->getElement();
 
 }
 
-RsType rsi_TypeCreate(Context *rsc)
+
+}
+}
+
+void * rsaTypeCreate(RsContext con, RsElement _e, uint32_t dimCount,
+                     const RsDimension *dims, const uint32_t *vals)
 {
+    Context *rsc = static_cast<Context *>(con);
+    Element *e = static_cast<Element *>(_e);
     TypeState * stc = &rsc->mStateType;
 
+    uint32_t dimX = 0;
+    uint32_t dimY = 0;
+    uint32_t dimZ = 0;
+    uint32_t dimLOD = 0;
+    uint32_t dimFaces = 0;
+
+    for (uint32_t ct=0; ct < dimCount; ct++) {
+        switch(dims[ct]) {
+        case RS_DIMENSION_X: dimX = vals[ct]; break;
+        case RS_DIMENSION_Y: dimY = vals[ct]; break;
+        case RS_DIMENSION_Z: dimZ = vals[ct]; break;
+        case RS_DIMENSION_LOD: dimLOD = vals[ct]; break;
+        case RS_DIMENSION_FACE: dimFaces = vals[ct]; break;
+
+        default:
+            LOGE("rsaTypeCreate: Bad dimension");
+            rsAssert(0);
+        }
+    }
+
+    ObjectBase::lockUserRef();
     for (uint32_t ct=0; ct < stc->mTypes.size(); ct++) {
         Type *t = stc->mTypes[ct];
-        if (t->getElement() != stc->mElement.get()) continue;
-        if (t->getDimX() != stc->mX) continue;
-        if (t->getDimY() != stc->mY) continue;
-        if (t->getDimZ() != stc->mZ) continue;
-        if (t->getDimLOD() != stc->mLOD) continue;
-        if (t->getDimFaces() != stc->mFaces) continue;
-        t->incUserRef();
+        if (t->getElement() != e) continue;
+        if (t->getDimX() != dimX) continue;
+        if (t->getDimY() != dimY) continue;
+        if (t->getDimZ() != dimZ) continue;
+        if (t->getDimLOD() != dimLOD) continue;
+        if (t->getDimFaces() != dimFaces) continue;
+        t->prelockedIncUserRef();
+        ObjectBase::unlockUserRef();
         return t;
     }
+    ObjectBase::unlockUserRef();
 
     Type * st = new Type(rsc);
     st->incUserRef();
-    st->setDimX(stc->mX);
-    st->setDimY(stc->mY);
-    st->setDimZ(stc->mZ);
-    st->setElement(stc->mElement.get());
-    st->setDimLOD(stc->mLOD);
-    st->setDimFaces(stc->mFaces);
+    st->setDimX(dimX);
+    st->setDimY(dimY);
+    st->setDimZ(dimZ);
+    st->setElement(e);
+    st->setDimLOD(dimLOD);
+    st->setDimFaces(dimFaces);
     st->compute();
-    stc->mElement.clear();
+
+    ObjectBase::lockUserRef();
     stc->mTypes.push(st);
+    ObjectBase::unlockUserRef();
     return st;
 }
 
-
-}
-}
-
diff --git a/rsType.h b/rsType.h
index c25577c..a0c77ab 100644
--- a/rsType.h
+++ b/rsType.h
@@ -71,9 +71,16 @@
     void compute();
 
     void enableGLVertexBuffer(class VertexArray *) const;
-    void enableGLVertexBuffer2(class VertexArray *) const;
 
     void dumpLOGV(const char *prefix) const;
+    virtual void serialize(OStream *stream) const;
+    virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_TYPE; }
+    static Type *createFromStream(Context *rsc, IStream *stream);
+
+    bool isEqual(const Type *other) const;
+
+    Type * cloneAndResize1D(Context *rsc, uint32_t dimX) const;
+    Type * cloneAndResize2D(Context *rsc, uint32_t dimX, uint32_t dimY) const;
 
 protected:
     struct LOD {
@@ -112,15 +119,9 @@
     LOD *mLODs;
     uint32_t mLODCount;
 
-    struct GLState_t {
-        VertexArray::Attrib mUser[RS_MAX_ATTRIBS];
-        VertexArray::Attrib mVtx;
-        VertexArray::Attrib mNorm;
-        VertexArray::Attrib mColor;
-        VertexArray::Attrib mTex;
-        VertexArray::Attrib mPointSize;
-    };
-    GLState_t mGL;
+    VertexArray::Attrib *mAttribs;
+    uint32_t mAttribsSize;
+    bool isValidGLComponent(uint32_t fieldIdx);
     void makeGLComponents();
 
 private:
@@ -133,14 +134,6 @@
     TypeState();
     ~TypeState();
 
-    size_t mX;
-    size_t mY;
-    size_t mZ;
-    uint32_t mLOD;
-    bool mFaces;
-    ObjectBaseRef<const Element> mElement;
-
-
     // Cache of all existing types.
     Vector<Type *> mTypes;
 };
diff --git a/rsUtils.h b/rsUtils.h
index 07f8933..17feb22 100644
--- a/rsUtils.h
+++ b/rsUtils.h
@@ -19,15 +19,23 @@
 
 #define LOG_NDEBUG 0
 #define LOG_TAG "RenderScript"
+
 #include <utils/Log.h>
-#include <utils/Vector.h>
-#include <utils/KeyedVector.h>
+
+#include "rsStream.h"
+
 #include <utils/String8.h>
+#include <utils/Vector.h>
+
 #include <stdlib.h>
 #include <pthread.h>
 #include <time.h>
+#include <cutils/atomic.h>
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include <EGL/egl.h>
+#endif
+
 #include <math.h>
 
 #include "RenderScript.h"
@@ -41,6 +49,26 @@
 #define rsAssert(v) while(0)
 #endif
 
+typedef float rsvF_2 __attribute__ ((vector_size (8)));
+typedef float rsvF_4 __attribute__ ((vector_size (16)));
+typedef uint8_t rsvU8_4 __attribute__ ((vector_size (4)));
+
+union float2 {
+    rsvF_2 v;
+    float f[2];
+};
+
+union float4 {
+    rsvF_4 v;
+    float f[4];
+};
+
+union uchar4 {
+    rsvU8_4 v;
+    uint8_t f[4];
+    uint32_t packed;
+};
+
 template<typename T>
 T rsMin(T in1, T in2)
 {
diff --git a/rsVertexArray.cpp b/rsVertexArray.cpp
index 6c2002d..4d50124 100644
--- a/rsVertexArray.cpp
+++ b/rsVertexArray.cpp
@@ -14,10 +14,15 @@
  * limitations under the License.
  */
 
+#ifndef ANDROID_RS_BUILD_FOR_HOST
 #include "rsContext.h"
-
 #include <GLES/gl.h>
 #include <GLES2/gl2.h>
+#else
+#include "rsContextHostStub.h"
+#include <OpenGL/gl.h>
+#endif
+
 
 using namespace android;
 using namespace android::renderscript;
@@ -39,6 +44,7 @@
         mAttribs[ct].clear();
     }
     mActiveBuffer = 0;
+    mActivePointer = NULL;
     mCount = 0;
 }
 
@@ -50,12 +56,12 @@
 void VertexArray::Attrib::set(const Attrib &a)
 {
     buffer = a.buffer;
+    ptr = a.ptr;
     offset = a.offset;
     type = a.type;
     size = a.size;
     stride = a.stride;
     normalized = a.normalized;
-    kind = RS_KIND_USER;
     name.setTo(a.name);
 }
 
@@ -66,6 +72,7 @@
     type = 0;
     size = 0;
     stride = 0;
+    ptr = NULL;
     normalized = false;
     name.setTo("");
 }
@@ -75,138 +82,72 @@
     mAttribs[n].clear();
 }
 
-void VertexArray::addUser(const Attrib &a, uint32_t stride)
+void VertexArray::add(const Attrib &a, uint32_t stride)
 {
-    assert(mCount < RS_MAX_ATTRIBS);
+    rsAssert(mCount < RS_MAX_ATTRIBS);
     mAttribs[mCount].set(a);
     mAttribs[mCount].buffer = mActiveBuffer;
+    mAttribs[mCount].ptr = mActivePointer;
     mAttribs[mCount].stride = stride;
-    mAttribs[mCount].kind = RS_KIND_USER;
     mCount ++;
 }
 
-void VertexArray::addLegacy(uint32_t type, uint32_t size, uint32_t stride, RsDataKind kind, bool normalized, uint32_t offset)
+void VertexArray::add(uint32_t type, uint32_t size, uint32_t stride, bool normalized, uint32_t offset, const char *name)
 {
-    assert(mCount < RS_MAX_ATTRIBS);
+    rsAssert(mCount < RS_MAX_ATTRIBS);
     mAttribs[mCount].clear();
     mAttribs[mCount].type = type;
     mAttribs[mCount].size = size;
     mAttribs[mCount].offset = offset;
     mAttribs[mCount].normalized = normalized;
-    mAttribs[mCount].buffer = mActiveBuffer;
     mAttribs[mCount].stride = stride;
-    mAttribs[mCount].kind = kind;
+    mAttribs[mCount].name.setTo(name);
+
+    mAttribs[mCount].buffer = mActiveBuffer;
+    mAttribs[mCount].ptr = mActivePointer;
     mCount ++;
 }
 
 void VertexArray::logAttrib(uint32_t idx, uint32_t slot) const {
-    LOGE("va %i: slot=%i name=%s buf=%i  size=%i  type=0x%x  kind=%i  stride=0x%x  norm=%i  offset=0x%x", idx, slot,
+    if(idx == 0) {
+        LOGV("Starting vertex attribute binding");
+    }
+    LOGV("va %i: slot=%i name=%s buf=%i ptr=%p size=%i  type=0x%x  stride=0x%x  norm=%i  offset=0x%x",
+         idx, slot,
          mAttribs[idx].name.string(),
          mAttribs[idx].buffer,
+         mAttribs[idx].ptr,
          mAttribs[idx].size,
          mAttribs[idx].type,
-         mAttribs[idx].kind,
          mAttribs[idx].stride,
          mAttribs[idx].normalized,
          mAttribs[idx].offset);
 }
 
-void VertexArray::setupGL(const Context *rsc, class VertexArrayState *state) const
-{
-    glClientActiveTexture(GL_TEXTURE0);
-    glDisableClientState(GL_NORMAL_ARRAY);
-    glDisableClientState(GL_COLOR_ARRAY);
-    glDisableClientState(GL_TEXTURE_COORD_ARRAY);
-    glDisableClientState(GL_POINT_SIZE_ARRAY_OES);
-
-    for (uint32_t ct=0; ct < mCount; ct++) {
-        switch(mAttribs[ct].kind) {
-        case RS_KIND_POSITION:
-            //logAttrib(POSITION);
-            glEnableClientState(GL_VERTEX_ARRAY);
-            glBindBuffer(GL_ARRAY_BUFFER, mAttribs[ct].buffer);
-            glVertexPointer(mAttribs[ct].size,
-                            mAttribs[ct].type,
-                            mAttribs[ct].stride,
-                            (void *)mAttribs[ct].offset);
-            break;
-
-        case RS_KIND_NORMAL:
-            //logAttrib(NORMAL);
-            glEnableClientState(GL_NORMAL_ARRAY);
-            rsAssert(mAttribs[ct].size == 3);
-            glBindBuffer(GL_ARRAY_BUFFER, mAttribs[ct].buffer);
-            glNormalPointer(mAttribs[ct].type,
-                            mAttribs[ct].stride,
-                            (void *)mAttribs[ct].offset);
-            break;
-
-        case RS_KIND_COLOR:
-            //logAttrib(COLOR);
-            glEnableClientState(GL_COLOR_ARRAY);
-            glBindBuffer(GL_ARRAY_BUFFER, mAttribs[ct].buffer);
-            glColorPointer(mAttribs[ct].size,
-                           mAttribs[ct].type,
-                           mAttribs[ct].stride,
-                           (void *)mAttribs[ct].offset);
-            break;
-
-        case RS_KIND_TEXTURE:
-            //logAttrib(TEXTURE);
-            glEnableClientState(GL_TEXTURE_COORD_ARRAY);
-            glBindBuffer(GL_ARRAY_BUFFER, mAttribs[ct].buffer);
-            glTexCoordPointer(mAttribs[ct].size,
-                              mAttribs[ct].type,
-                              mAttribs[ct].stride,
-                              (void *)mAttribs[ct].offset);
-            break;
-
-        case RS_KIND_POINT_SIZE:
-            //logAttrib(POINT_SIZE);
-            glEnableClientState(GL_POINT_SIZE_ARRAY_OES);
-            glBindBuffer(GL_ARRAY_BUFFER, mAttribs[ct].buffer);
-            glPointSizePointerOES(mAttribs[ct].type,
-                                  mAttribs[ct].stride,
-                                  (void *)mAttribs[ct].offset);
-            break;
-
-        default:
-            rsAssert(0);
-        }
-    }
-
-    rsc->checkError("VertexArray::setupGL");
-}
-
 void VertexArray::setupGL2(const Context *rsc, class VertexArrayState *state, ShaderCache *sc) const
 {
     rsc->checkError("VertexArray::setupGL2 start");
-    for (uint32_t ct=1; ct <= state->mLastEnableCount; ct++) {
+    for (uint32_t ct=1; ct <= 0xf/*state->mLastEnableCount*/; ct++) {
         glDisableVertexAttribArray(ct);
     }
 
     rsc->checkError("VertexArray::setupGL2 disabled");
     for (uint32_t ct=0; ct < mCount; ct++) {
-        uint32_t slot = 0;
-        if (sc->isUserVertexProgram()) {
-            slot = sc->vtxAttribSlot(ct);
-        } else {
-            if (mAttribs[ct].kind == RS_KIND_USER) {
-                continue;
-            }
-            slot = sc->vtxAttribSlot(mAttribs[ct].kind);
+        int32_t slot = sc->vtxAttribSlot(mAttribs[ct].name);
+        if(rsc->props.mLogShadersAttr) {
+            logAttrib(ct, slot);
         }
-
-        //logAttrib(ct, slot);
+        if(slot < 0) {
+            continue;
+        }
         glEnableVertexAttribArray(slot);
         glBindBuffer(GL_ARRAY_BUFFER, mAttribs[ct].buffer);
-
         glVertexAttribPointer(slot,
                               mAttribs[ct].size,
                               mAttribs[ct].type,
                               mAttribs[ct].normalized,
                               mAttribs[ct].stride,
-                              (void *)mAttribs[ct].offset);
+                              mAttribs[ct].ptr + mAttribs[ct].offset);
     }
     state->mLastEnableCount = mCount;
     rsc->checkError("VertexArray::setupGL2 done");
diff --git a/rsVertexArray.h b/rsVertexArray.h
index 3904cb6..dea7d41 100644
--- a/rsVertexArray.h
+++ b/rsVertexArray.h
@@ -37,13 +37,13 @@
     class Attrib {
     public:
         uint32_t buffer;
+        const uint8_t * ptr;
         uint32_t offset;
         uint32_t type;
         uint32_t size;
         uint32_t stride;
         bool normalized;
         String8 name;
-        RsDataKind kind;
 
         Attrib();
         void set(const Attrib &);
@@ -52,17 +52,25 @@
 
 
     void clearAll();
-    void setActiveBuffer(uint32_t id) {mActiveBuffer = id;}
-    void addUser(const Attrib &, uint32_t stride);
-    void addLegacy(uint32_t type, uint32_t size, uint32_t stride, RsDataKind kind, bool normalized, uint32_t offset);
+    void setActiveBuffer(uint32_t id) {
+        mActiveBuffer = id;
+        mActivePointer = NULL;
+    }
+    void setActiveBuffer(const void *ptr) {
+        mActiveBuffer = 0;
+        mActivePointer = (const uint8_t *)ptr;
+    }
 
-    void setupGL(const Context *rsc, class VertexArrayState *) const;
+    void add(const Attrib &, uint32_t stride);
+    void add(uint32_t type, uint32_t size, uint32_t stride, bool normalized, uint32_t offset, const char *name);
+
     void setupGL2(const Context *rsc, class VertexArrayState *, ShaderCache *) const;
     void logAttrib(uint32_t idx, uint32_t slot) const;
 
 protected:
     void clear(uint32_t index);
     uint32_t mActiveBuffer;
+    const uint8_t * mActivePointer;
     uint32_t mCount;
 
     Attrib mAttribs[RS_MAX_ATTRIBS];
@@ -80,7 +88,7 @@
 
 }
 }
-#endif //ANDROID_LIGHT_H
+#endif //ANDROID_VERTEX_ARRAY_H
 
 
 
diff --git a/rsg_ScriptJavaClass.cpp b/rsg_ScriptJavaClass.cpp
index cee9f52..0169b98 100644
--- a/rsg_ScriptJavaClass.cpp
+++ b/rsg_ScriptJavaClass.cpp
@@ -7,8 +7,12 @@
 struct Element;
 
 struct ElementField {
+    // An Element Field is a combination of an Element with a name assigned.
+
     const char *name;
     Element *e;
+
+
     ElementField(const char *n, Element *_e) {
         name = n;
         e = _e;
@@ -20,12 +24,21 @@
 };
 
 struct Element {
+    // An Element can take one of two forms.
+    // 1: Basic.  It contains a single basic type and vector size.
+    // 2: Complex.  It contains a list of fields with names.  Each field
+    // will in turn be another element.
+
     ElementField *fields;
-    size_t fieldCount;
+    size_t fieldCount;  // If field count is 0, the element is a Basic type.
     const char *name;
     bool generated;
 
+    // The basic data type from RenderScript.h
     RsDataType compType;
+
+    // The vector size of the data type for float2, float3, ....
+    // Allowed sizes are 2,3,4,8,16
     uint32_t compVectorSize;
 
     Element() {
diff --git a/scriptc/rs_cl.rsh b/scriptc/rs_cl.rsh
new file mode 100644
index 0000000..64844a4
--- /dev/null
+++ b/scriptc/rs_cl.rsh
@@ -0,0 +1,785 @@
+#ifndef __RS_CL_RSH__
+#define __RS_CL_RSH__
+
+#define M_PI        3.14159265358979323846264338327950288f   /* pi */
+
+
+// Conversions
+#define CVT_FUNC_2(typeout, typein) \
+static typeout##2 __attribute__((overloadable)) convert_##typeout##2(typein##2 v) { \
+    typeout##2 r = {(typeout)v.x, (typeout)v.y}; \
+    return r; \
+} \
+static typeout##3 __attribute__((overloadable)) convert_##typeout##3(typein##3 v) { \
+    typeout##3 r = {(typeout)v.x, (typeout)v.y, (typeout)v.z}; \
+    return r; \
+} \
+static typeout##4 __attribute__((overloadable)) convert_##typeout##4(typein##4 v) { \
+    typeout##4 r = {(typeout)v.x, (typeout)v.y, (typeout)v.z, (typeout)v.w}; \
+    return r; \
+}
+
+#define CVT_FUNC(type)      CVT_FUNC_2(type, uchar) \
+                            CVT_FUNC_2(type, char) \
+                            CVT_FUNC_2(type, ushort) \
+                            CVT_FUNC_2(type, short) \
+                            CVT_FUNC_2(type, int) \
+                            CVT_FUNC_2(type, uint) \
+                            CVT_FUNC_2(type, float)
+
+CVT_FUNC(char)
+CVT_FUNC(uchar)
+CVT_FUNC(short)
+CVT_FUNC(ushort)
+CVT_FUNC(int)
+CVT_FUNC(uint)
+CVT_FUNC(float)
+
+
+
+// Float ops, 6.11.2
+
+#define DEF_FUNC_1(fnc) \
+static float2 __attribute__((overloadable)) fnc(float2 v) { \
+    float2 r; \
+    r.x = fnc(v.x); \
+    r.y = fnc(v.y); \
+    return r; \
+} \
+static float3 __attribute__((overloadable)) fnc(float3 v) { \
+    float3 r; \
+    r.x = fnc(v.x); \
+    r.y = fnc(v.y); \
+    r.z = fnc(v.z); \
+    return r; \
+} \
+static float4 __attribute__((overloadable)) fnc(float4 v) { \
+    float4 r; \
+    r.x = fnc(v.x); \
+    r.y = fnc(v.y); \
+    r.z = fnc(v.z); \
+    r.w = fnc(v.w); \
+    return r; \
+}
+
+#define DEF_FUNC_2(fnc) \
+static float2 __attribute__((overloadable)) fnc(float2 v1, float2 v2) { \
+    float2 r; \
+    r.x = fnc(v1.x, v2.x); \
+    r.y = fnc(v1.y, v2.y); \
+    return r; \
+} \
+static float3 __attribute__((overloadable)) fnc(float3 v1, float3 v2) { \
+    float3 r; \
+    r.x = fnc(v1.x, v2.x); \
+    r.y = fnc(v1.y, v2.y); \
+    r.z = fnc(v1.z, v2.z); \
+    return r; \
+} \
+static float4 __attribute__((overloadable)) fnc(float4 v1, float4 v2) { \
+    float4 r; \
+    r.x = fnc(v1.x, v2.x); \
+    r.y = fnc(v1.y, v2.y); \
+    r.z = fnc(v1.z, v2.z); \
+    r.w = fnc(v1.w, v2.z); \
+    return r; \
+}
+
+#define DEF_FUNC_2F(fnc) \
+static float2 __attribute__((overloadable)) fnc(float2 v1, float v2) { \
+    float2 r; \
+    r.x = fnc(v1.x, v2); \
+    r.y = fnc(v1.y, v2); \
+    return r; \
+} \
+static float3 __attribute__((overloadable)) fnc(float3 v1, float v2) { \
+    float3 r; \
+    r.x = fnc(v1.x, v2); \
+    r.y = fnc(v1.y, v2); \
+    r.z = fnc(v1.z, v2); \
+    return r; \
+} \
+static float4 __attribute__((overloadable)) fnc(float4 v1, float v2) { \
+    float4 r; \
+    r.x = fnc(v1.x, v2); \
+    r.y = fnc(v1.y, v2); \
+    r.z = fnc(v1.z, v2); \
+    r.w = fnc(v1.w, v2); \
+    return r; \
+}
+
+
+extern float __attribute__((overloadable)) acos(float);
+DEF_FUNC_1(acos)
+
+extern float __attribute__((overloadable)) acosh(float);
+DEF_FUNC_1(acosh)
+
+static float __attribute__((overloadable)) acospi(float v) {
+    return acos(v) / M_PI;
+}
+DEF_FUNC_1(acospi)
+
+extern float __attribute__((overloadable)) asin(float);
+DEF_FUNC_1(asin)
+
+extern float __attribute__((overloadable)) asinh(float);
+DEF_FUNC_1(asinh)
+
+static float __attribute__((overloadable)) asinpi(float v) {
+    return asin(v) / M_PI;
+}
+DEF_FUNC_1(asinpi)
+
+extern float __attribute__((overloadable)) atan(float);
+DEF_FUNC_1(atan)
+
+extern float __attribute__((overloadable)) atan2(float, float);
+DEF_FUNC_2(atan2)
+
+extern float __attribute__((overloadable)) atanh(float);
+DEF_FUNC_1(atanh)
+
+static float __attribute__((overloadable)) atanpi(float v) {
+    return atan(v) / M_PI;
+}
+DEF_FUNC_1(atanpi)
+
+static float __attribute__((overloadable)) atan2pi(float y, float x) {
+    return atan2(y, x) / M_PI;
+}
+DEF_FUNC_2(atan2pi)
+
+extern float __attribute__((overloadable)) cbrt(float);
+DEF_FUNC_1(cbrt)
+
+extern float __attribute__((overloadable)) ceil(float);
+DEF_FUNC_1(ceil)
+
+extern float __attribute__((overloadable)) copysign(float, float);
+DEF_FUNC_2(copysign)
+
+extern float __attribute__((overloadable)) cos(float);
+DEF_FUNC_1(cos)
+
+extern float __attribute__((overloadable)) cosh(float);
+DEF_FUNC_1(cosh)
+
+static float __attribute__((overloadable)) cospi(float v) {
+    return cos(v * M_PI);
+}
+DEF_FUNC_1(cospi)
+
+extern float __attribute__((overloadable)) erfc(float);
+DEF_FUNC_1(erfc)
+
+extern float __attribute__((overloadable)) erf(float);
+DEF_FUNC_1(erf)
+
+extern float __attribute__((overloadable)) exp(float);
+DEF_FUNC_1(exp)
+
+extern float __attribute__((overloadable)) exp2(float);
+DEF_FUNC_1(exp2)
+
+extern float __attribute__((overloadable)) pow(float, float);
+static float __attribute__((overloadable)) exp10(float v) {
+    return pow(10.f, v);
+}
+DEF_FUNC_1(exp10)
+
+extern float __attribute__((overloadable)) expm1(float);
+DEF_FUNC_1(expm1)
+
+extern float __attribute__((overloadable)) fabs(float);
+DEF_FUNC_1(fabs)
+
+extern float __attribute__((overloadable)) fdim(float, float);
+DEF_FUNC_2(fdim)
+
+extern float __attribute__((overloadable)) floor(float);
+DEF_FUNC_1(floor)
+
+extern float __attribute__((overloadable)) fma(float, float, float);
+extern float2 __attribute__((overloadable)) fma(float2, float2, float2);
+extern float3 __attribute__((overloadable)) fma(float3, float3, float3);
+extern float4 __attribute__((overloadable)) fma(float4, float4, float4);
+
+extern float __attribute__((overloadable)) fmax(float, float);
+DEF_FUNC_2(fmax);
+DEF_FUNC_2F(fmax);
+
+extern float __attribute__((overloadable)) fmin(float, float);
+DEF_FUNC_2(fmin);
+DEF_FUNC_2F(fmin);
+
+extern float __attribute__((overloadable)) fmod(float, float);
+DEF_FUNC_2(fmod)
+
+static float __attribute__((overloadable)) fract(float v, float *iptr) {
+    int i = (int)floor(v);
+    iptr[0] = i;
+    return fmin(v - i, 0x1.fffffep-1f);
+}
+static float2 __attribute__((overloadable)) fract(float2 v, float2 *iptr) {
+    float t[2];
+    float2 r;
+    r.x = fract(v.x, &t[0]);
+    r.y = fract(v.y, &t[1]);
+    iptr[0] = t[0];
+    iptr[1] = t[1];
+    return r;
+}
+static float3 __attribute__((overloadable)) fract(float3 v, float3 *iptr) {
+    float t[3];
+    float3 r;
+    r.x = fract(v.x, &t[0]);
+    r.y = fract(v.y, &t[1]);
+    r.z = fract(v.z, &t[2]);
+    iptr[0] = t[0];
+    iptr[1] = t[1];
+    iptr[2] = t[2];
+    return r;
+}
+static float4 __attribute__((overloadable)) fract(float4 v, float4 *iptr) {
+    float t[4];
+    float4 r;
+    r.x = fract(v.x, &t[0]);
+    r.y = fract(v.y, &t[1]);
+    r.z = fract(v.z, &t[2]);
+    r.w = fract(v.w, &t[3]);
+    iptr[0] = t[0];
+    iptr[1] = t[1];
+    iptr[2] = t[2];
+    iptr[3] = t[3];
+    return r;
+}
+
+extern float __attribute__((overloadable)) frexp(float, float *);
+extern float2 __attribute__((overloadable)) frexp(float2, float2 *);
+extern float3 __attribute__((overloadable)) frexp(float3, float3 *);
+extern float4 __attribute__((overloadable)) frexp(float4, float4 *);
+
+extern float __attribute__((overloadable)) hypot(float, float);
+DEF_FUNC_2(hypot)
+
+extern int __attribute__((overloadable)) ilogb(float);
+DEF_FUNC_1(ilogb)
+
+extern float __attribute__((overloadable)) ldexp(float, int);
+extern float2 __attribute__((overloadable)) ldexp(float2, int2);
+extern float3 __attribute__((overloadable)) ldexp(float3, int3);
+extern float4 __attribute__((overloadable)) ldexp(float4, int4);
+extern float2 __attribute__((overloadable)) ldexp(float2, int);
+extern float3 __attribute__((overloadable)) ldexp(float3, int);
+extern float4 __attribute__((overloadable)) ldexp(float4, int);
+
+extern float __attribute__((overloadable)) lgamma(float);
+DEF_FUNC_1(lgamma)
+extern float __attribute__((overloadable)) lgamma(float, float *);
+extern float2 __attribute__((overloadable)) lgamma(float2, float2 *);
+extern float3 __attribute__((overloadable)) lgamma(float3, float3 *);
+extern float4 __attribute__((overloadable)) lgamma(float4, float4 *);
+
+extern float __attribute__((overloadable)) log(float);
+DEF_FUNC_1(log)
+
+
+extern float __attribute__((overloadable)) log10(float);
+DEF_FUNC_1(log10)
+
+static float __attribute__((overloadable)) log2(float v) {
+    return log10(v) / log10(2.f);
+}
+DEF_FUNC_1(log2)
+
+extern float __attribute__((overloadable)) log1p(float);
+DEF_FUNC_1(log1p)
+
+extern float __attribute__((overloadable)) logb(float);
+DEF_FUNC_1(logb)
+
+extern float __attribute__((overloadable)) mad(float, float, float);
+extern float2 __attribute__((overloadable)) mad(float2, float2, float2);
+extern float3 __attribute__((overloadable)) mad(float3, float3, float3);
+extern float4 __attribute__((overloadable)) mad(float4, float4, float4);
+
+extern float __attribute__((overloadable)) modf(float, float *);
+extern float2 __attribute__((overloadable)) modf(float2, float2 *);
+extern float3 __attribute__((overloadable)) modf(float3, float3 *);
+extern float4 __attribute__((overloadable)) modf(float4, float4 *);
+
+//extern float __attribute__((overloadable)) nan(uint);
+
+extern float __attribute__((overloadable)) nextafter(float, float);
+DEF_FUNC_2(nextafter)
+
+DEF_FUNC_2(pow)
+
+static float __attribute__((overloadable)) pown(float v, int p) {
+    return pow(v, (float)p);
+}
+static float2 __attribute__((overloadable)) pown(float2 v, int2 p) {
+    return pow(v, (float2)p);
+}
+static float3 __attribute__((overloadable)) pown(float3 v, int3 p) {
+    return pow(v, (float3)p);
+}
+static float4 __attribute__((overloadable)) pown(float4 v, int4 p) {
+    return pow(v, (float4)p);
+}
+
+static float __attribute__((overloadable)) powr(float v, float p) {
+    return pow(v, p);
+}
+static float2 __attribute__((overloadable)) powr(float2 v, float2 p) {
+    return pow(v, p);
+}
+static float3 __attribute__((overloadable)) powr(float3 v, float3 p) {
+    return pow(v, p);
+}
+static float4 __attribute__((overloadable)) powr(float4 v, float4 p) {
+    return pow(v, p);
+}
+
+extern float __attribute__((overloadable)) remainder(float, float);
+DEF_FUNC_2(remainder)
+
+extern float __attribute__((overloadable)) remquo(float, float, float *);
+extern float2 __attribute__((overloadable)) remquo(float2, float2, float2 *);
+extern float3 __attribute__((overloadable)) remquo(float3, float3, float3 *);
+extern float4 __attribute__((overloadable)) remquo(float4, float4, float4 *);
+
+extern float __attribute__((overloadable)) rint(float);
+DEF_FUNC_1(rint)
+
+static float __attribute__((overloadable)) rootn(float v, int r) {
+    return pow(v, 1.f / r);
+}
+static float2 __attribute__((overloadable)) rootn(float2 v, int2 r) {
+    float2 t = {1.f / r.x, 1.f / r.y};
+    return pow(v, t);
+}
+static float3 __attribute__((overloadable)) rootn(float3 v, int3 r) {
+    float3 t = {1.f / r.x, 1.f / r.y, 1.f / r.z};
+    return pow(v, t);
+}
+static float4 __attribute__((overloadable)) rootn(float4 v, int4 r) {
+    float4 t = {1.f / r.x, 1.f / r.y, 1.f / r.z, 1.f / r.w};
+    return pow(v, t);
+}
+
+extern float __attribute__((overloadable)) round(float);
+DEF_FUNC_1(round)
+
+extern float __attribute__((overloadable)) sqrt(float);
+/*static float __attribute__((overloadable)) rsqrt(float v) {
+    return 1.f / sqrt(v);
+}
+DEF_FUNC_1(rsqrt)*/
+
+extern float __attribute__((overloadable)) sin(float);
+DEF_FUNC_1(sin)
+
+static float __attribute__((overloadable)) sincos(float v, float *cosptr) {
+    *cosptr = cos(v);
+    return sin(v);
+}
+static float2 __attribute__((overloadable)) sincos(float2 v, float2 *cosptr) {
+    *cosptr = cos(v);
+    return sin(v);
+}
+static float3 __attribute__((overloadable)) sincos(float3 v, float3 *cosptr) {
+    *cosptr = cos(v);
+    return sin(v);
+}
+static float4 __attribute__((overloadable)) sincos(float4 v, float4 *cosptr) {
+    *cosptr = cos(v);
+    return sin(v);
+}
+
+extern float __attribute__((overloadable)) sinh(float);
+DEF_FUNC_1(sinh)
+
+static float __attribute__((overloadable)) sinpi(float v) {
+    return sin(v * M_PI);
+}
+DEF_FUNC_1(sinpi)
+
+DEF_FUNC_1(sqrt)
+
+extern float __attribute__((overloadable)) tan(float);
+DEF_FUNC_1(tan)
+
+extern float __attribute__((overloadable)) tanh(float);
+DEF_FUNC_1(tanh)
+
+static float __attribute__((overloadable)) tanpi(float v) {
+    return tan(v * M_PI);
+}
+DEF_FUNC_1(tanpi)
+
+extern float __attribute__((overloadable)) tgamma(float);
+DEF_FUNC_1(tgamma)
+
+extern float __attribute__((overloadable)) trunc(float);
+DEF_FUNC_1(trunc)
+
+// Int ops (partial), 6.11.3
+extern uint __attribute__((overloadable)) abs(int);
+extern ushort __attribute__((overloadable)) abs(short);
+extern uchar __attribute__((overloadable)) abs(char);
+
+extern uint __attribute__((overloadable)) clz(uint);
+extern int __attribute__((overloadable)) clz(int);
+extern ushort __attribute__((overloadable)) clz(ushort);
+extern short __attribute__((overloadable)) clz(short);
+extern uchar __attribute__((overloadable)) clz(uchar);
+extern char __attribute__((overloadable)) clz(char);
+
+static uint __attribute__((overloadable)) min(uint v1, uint v2) {
+    return v1 < v2 ? v1 : v2;
+}
+static int __attribute__((overloadable)) min(int v1, int v2) {
+    return v1 < v2 ? v1 : v2;
+}
+static ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
+    return v1 < v2 ? v1 : v2;
+}
+static short __attribute__((overloadable)) min(short v1, short v2) {
+    return v1 < v2 ? v1 : v2;
+}
+static uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
+    return v1 < v2 ? v1 : v2;
+}
+static char __attribute__((overloadable)) min(char v1, char v2) {
+    return v1 < v2 ? v1 : v2;
+}
+
+static uint __attribute__((overloadable)) max(uint v1, uint v2) {
+    return v1 > v2 ? v1 : v2;
+}
+static int __attribute__((overloadable)) max(int v1, int v2) {
+    return v1 > v2 ? v1 : v2;
+}
+static ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
+    return v1 > v2 ? v1 : v2;
+}
+static short __attribute__((overloadable)) max(short v1, short v2) {
+    return v1 > v2 ? v1 : v2;
+}
+static uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
+    return v1 > v2 ? v1 : v2;
+}
+static char __attribute__((overloadable)) max(char v1, char v2) {
+    return v1 > v2 ? v1 : v2;
+}
+
+
+
+
+// 6.11.4
+
+static float __attribute__((overloadable)) clamp(float amount, float low, float high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+static float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high) {
+    float2 r;
+    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
+    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
+    return r;
+}
+static float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high) {
+    float3 r;
+    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
+    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
+    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);
+    return r;
+}
+static float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high) {
+    float4 r;
+    r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x);
+    r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y);
+    r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z);
+    r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w);
+    return r;
+}
+static float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high) {
+    float2 r;
+    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
+    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
+    return r;
+}
+static float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high) {
+    float3 r;
+    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
+    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
+    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
+    return r;
+}
+static float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high) {
+    float4 r;
+    r.x = amount.x < low ? low : (amount.x > high ? high : amount.x);
+    r.y = amount.y < low ? low : (amount.y > high ? high : amount.y);
+    r.z = amount.z < low ? low : (amount.z > high ? high : amount.z);
+    r.w = amount.w < low ? low : (amount.w > high ? high : amount.w);
+    return r;
+}
+
+static float __attribute__((overloadable)) degrees(float radians) {
+    return radians * (180.f / M_PI);
+}
+DEF_FUNC_1(degrees)
+
+static float __attribute__((overloadable)) max(float v1, float v2) {
+    return v1 > v2 ? v1 : v2;
+}
+static float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
+    float2 r;
+    r.x = v1.x > v2.x ? v1.x : v2.x;
+    r.y = v1.y > v2.y ? v1.y : v2.y;
+    return r;
+}
+static float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
+    float3 r;
+    r.x = v1.x > v2.x ? v1.x : v2.x;
+    r.y = v1.y > v2.y ? v1.y : v2.y;
+    r.z = v1.z > v2.z ? v1.z : v2.z;
+    return r;
+}
+static float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
+    float4 r;
+    r.x = v1.x > v2.x ? v1.x : v2.x;
+    r.y = v1.y > v2.y ? v1.y : v2.y;
+    r.z = v1.z > v2.z ? v1.z : v2.z;
+    r.w = v1.w > v2.w ? v1.w : v2.w;
+    return r;
+}
+static float2 __attribute__((overloadable)) max(float2 v1, float v2) {
+    float2 r;
+    r.x = v1.x > v2 ? v1.x : v2;
+    r.y = v1.y > v2 ? v1.y : v2;
+    return r;
+}
+static float3 __attribute__((overloadable)) max(float3 v1, float v2) {
+    float3 r;
+    r.x = v1.x > v2 ? v1.x : v2;
+    r.y = v1.y > v2 ? v1.y : v2;
+    r.z = v1.z > v2 ? v1.z : v2;
+    return r;
+}
+static float4 __attribute__((overloadable)) max(float4 v1, float v2) {
+    float4 r;
+    r.x = v1.x > v2 ? v1.x : v2;
+    r.y = v1.y > v2 ? v1.y : v2;
+    r.z = v1.z > v2 ? v1.z : v2;
+    r.w = v1.w > v2 ? v1.w : v2;
+    return r;
+}
+
+static float __attribute__((overloadable)) min(float v1, float v2) {
+    return v1 < v2 ? v1 : v2;
+}
+static float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
+    float2 r;
+    r.x = v1.x < v2.x ? v1.x : v2.x;
+    r.y = v1.y < v2.y ? v1.y : v2.y;
+    return r;
+}
+static float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
+    float3 r;
+    r.x = v1.x < v2.x ? v1.x : v2.x;
+    r.y = v1.y < v2.y ? v1.y : v2.y;
+    r.z = v1.z < v2.z ? v1.z : v2.z;
+    return r;
+}
+static float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
+    float4 r;
+    r.x = v1.x < v2.x ? v1.x : v2.x;
+    r.y = v1.y < v2.y ? v1.y : v2.y;
+    r.z = v1.z < v2.z ? v1.z : v2.z;
+    r.w = v1.w < v2.w ? v1.w : v2.w;
+    return r;
+}
+static float2 __attribute__((overloadable)) min(float2 v1, float v2) {
+    float2 r;
+    r.x = v1.x < v2 ? v1.x : v2;
+    r.y = v1.y < v2 ? v1.y : v2;
+    return r;
+}
+static float3 __attribute__((overloadable)) min(float3 v1, float v2) {
+    float3 r;
+    r.x = v1.x < v2 ? v1.x : v2;
+    r.y = v1.y < v2 ? v1.y : v2;
+    r.z = v1.z < v2 ? v1.z : v2;
+    return r;
+}
+static float4 __attribute__((overloadable)) min(float4 v1, float v2) {
+    float4 r;
+    r.x = v1.x < v2 ? v1.x : v2;
+    r.y = v1.y < v2 ? v1.y : v2;
+    r.z = v1.z < v2 ? v1.z : v2;
+    r.w = v1.w < v2 ? v1.w : v2;
+    return r;
+}
+
+static float __attribute__((overloadable)) mix(float start, float stop, float amount) {
+    return start + (stop - start) * amount;
+}
+static float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float2 amount) {
+    return start + (stop - start) * amount;
+}
+static float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float3 amount) {
+    return start + (stop - start) * amount;
+}
+static float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float4 amount) {
+    return start + (stop - start) * amount;
+}
+static float2 __attribute__((overloadable)) mix(float2 start, float2 stop, float amount) {
+    return start + (stop - start) * amount;
+}
+static float3 __attribute__((overloadable)) mix(float3 start, float3 stop, float amount) {
+    return start + (stop - start) * amount;
+}
+static float4 __attribute__((overloadable)) mix(float4 start, float4 stop, float amount) {
+    return start + (stop - start) * amount;
+}
+
+static float __attribute__((overloadable)) radians(float degrees) {
+    return degrees * (M_PI / 180.f);
+}
+DEF_FUNC_1(radians)
+
+static float __attribute__((overloadable)) step(float edge, float v) {
+    return (v < edge) ? 0.f : 1.f;
+}
+static float2 __attribute__((overloadable)) step(float2 edge, float2 v) {
+    float2 r;
+    r.x = (v.x < edge.x) ? 0.f : 1.f;
+    r.y = (v.y < edge.y) ? 0.f : 1.f;
+    return r;
+}
+static float3 __attribute__((overloadable)) step(float3 edge, float3 v) {
+    float3 r;
+    r.x = (v.x < edge.x) ? 0.f : 1.f;
+    r.y = (v.y < edge.y) ? 0.f : 1.f;
+    r.z = (v.z < edge.z) ? 0.f : 1.f;
+    return r;
+}
+static float4 __attribute__((overloadable)) step(float4 edge, float4 v) {
+    float4 r;
+    r.x = (v.x < edge.x) ? 0.f : 1.f;
+    r.y = (v.y < edge.y) ? 0.f : 1.f;
+    r.z = (v.z < edge.z) ? 0.f : 1.f;
+    r.w = (v.w < edge.w) ? 0.f : 1.f;
+    return r;
+}
+static float2 __attribute__((overloadable)) step(float2 edge, float v) {
+    float2 r;
+    r.x = (v < edge.x) ? 0.f : 1.f;
+    r.y = (v < edge.y) ? 0.f : 1.f;
+    return r;
+}
+static float3 __attribute__((overloadable)) step(float3 edge, float v) {
+    float3 r;
+    r.x = (v < edge.x) ? 0.f : 1.f;
+    r.y = (v < edge.y) ? 0.f : 1.f;
+    r.z = (v < edge.z) ? 0.f : 1.f;
+    return r;
+}
+static float4 __attribute__((overloadable)) step(float4 edge, float v) {
+    float4 r;
+    r.x = (v < edge.x) ? 0.f : 1.f;
+    r.y = (v < edge.y) ? 0.f : 1.f;
+    r.z = (v < edge.z) ? 0.f : 1.f;
+    r.w = (v < edge.w) ? 0.f : 1.f;
+    return r;
+}
+
+extern float __attribute__((overloadable)) smoothstep(float, float, float);
+extern float2 __attribute__((overloadable)) smoothstep(float2, float2, float2);
+extern float3 __attribute__((overloadable)) smoothstep(float3, float3, float3);
+extern float4 __attribute__((overloadable)) smoothstep(float4, float4, float4);
+extern float2 __attribute__((overloadable)) smoothstep(float, float, float2);
+extern float3 __attribute__((overloadable)) smoothstep(float, float, float3);
+extern float4 __attribute__((overloadable)) smoothstep(float, float, float4);
+
+static float __attribute__((overloadable)) sign(float v) {
+    if (v > 0) return 1.f;
+    if (v < 0) return -1.f;
+    return v;
+}
+DEF_FUNC_1(sign)
+
+// 6.11.5
+static float3 __attribute__((overloadable)) cross(float3 lhs, float3 rhs) {
+    float3 r;
+    r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
+    r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
+    r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
+    return r;
+}
+
+static float4 __attribute__((overloadable)) cross(float4 lhs, float4 rhs) {
+    float4 r;
+    r.x = lhs.y * rhs.z  - lhs.z * rhs.y;
+    r.y = lhs.z * rhs.x  - lhs.x * rhs.z;
+    r.z = lhs.x * rhs.y  - lhs.y * rhs.x;
+    r.w = 0.f;
+    return r;
+}
+
+static float __attribute__((overloadable)) dot(float lhs, float rhs) {
+    return lhs * rhs;
+}
+static float __attribute__((overloadable)) dot(float2 lhs, float2 rhs) {
+    return lhs.x*rhs.x + lhs.y*rhs.y;
+}
+static float __attribute__((overloadable)) dot(float3 lhs, float3 rhs) {
+    return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
+}
+static float __attribute__((overloadable)) dot(float4 lhs, float4 rhs) {
+    return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
+}
+
+static float __attribute__((overloadable)) length(float v) {
+    return v;
+}
+static float __attribute__((overloadable)) length(float2 v) {
+    return sqrt(v.x*v.x + v.y*v.y);
+}
+static float __attribute__((overloadable)) length(float3 v) {
+    return sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
+}
+static float __attribute__((overloadable)) length(float4 v) {
+    return sqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
+}
+
+static float __attribute__((overloadable)) distance(float lhs, float rhs) {
+    return length(lhs - rhs);
+}
+static float __attribute__((overloadable)) distance(float2 lhs, float2 rhs) {
+    return length(lhs - rhs);
+}
+static float __attribute__((overloadable)) distance(float3 lhs, float3 rhs) {
+    return length(lhs - rhs);
+}
+static float __attribute__((overloadable)) distance(float4 lhs, float4 rhs) {
+    return length(lhs - rhs);
+}
+
+static float __attribute__((overloadable)) normalize(float v) {
+    return 1.f;
+}
+static float2 __attribute__((overloadable)) normalize(float2 v) {
+    return v / length(v);
+}
+static float3 __attribute__((overloadable)) normalize(float3 v) {
+    return v / length(v);
+}
+static float4 __attribute__((overloadable)) normalize(float4 v) {
+    return v / length(v);
+}
+
+
+#endif
diff --git a/scriptc/rs_core.rsh b/scriptc/rs_core.rsh
new file mode 100644
index 0000000..0e0532c
--- /dev/null
+++ b/scriptc/rs_core.rsh
@@ -0,0 +1,926 @@
+#ifndef __RS_CORE_RSH__
+#define __RS_CORE_RSH__
+
+// Debugging, print to the LOG a description string and a value.
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float, float);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float, float, float);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, float, float, float, float);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, double);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const rs_matrix4x4 *);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const rs_matrix3x3 *);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const rs_matrix2x2 *);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, int);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, uint);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, long);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, unsigned long);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, long long);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, unsigned long long);
+extern void __attribute__((overloadable))
+    rsDebug(const char *, const void *);
+#define RS_DEBUG(a) rsDebug(#a, a)
+#define RS_DEBUG_MARKER rsDebug(__FILE__, __LINE__)
+
+static void __attribute__((overloadable)) rsDebug(const char *s, float2 v) {
+    rsDebug(s, v.x, v.y);
+}
+static void __attribute__((overloadable)) rsDebug(const char *s, float3 v) {
+    rsDebug(s, v.x, v.y, v.z);
+}
+static void __attribute__((overloadable)) rsDebug(const char *s, float4 v) {
+    rsDebug(s, v.x, v.y, v.z, v.w);
+}
+
+static uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
+{
+    uchar4 c;
+    c.x = (uchar)(r * 255.f);
+    c.y = (uchar)(g * 255.f);
+    c.z = (uchar)(b * 255.f);
+    c.w = 255;
+    return c;
+}
+
+static uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
+{
+    uchar4 c;
+    c.x = (uchar)(r * 255.f);
+    c.y = (uchar)(g * 255.f);
+    c.z = (uchar)(b * 255.f);
+    c.w = (uchar)(a * 255.f);
+    return c;
+}
+
+static uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
+{
+    color *= 255.f;
+    uchar4 c = {color.x, color.y, color.z, 255};
+    return c;
+}
+
+static uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
+{
+    color *= 255.f;
+    uchar4 c = {color.x, color.y, color.z, color.w};
+    return c;
+}
+
+static float4 rsUnpackColor8888(uchar4 c)
+{
+    float4 ret = (float4)0.0039156862745f;
+    ret *= convert_float4(c);
+    return ret;
+}
+
+//extern uchar4 __attribute__((overloadable)) rsPackColorTo565(float r, float g, float b);
+//extern uchar4 __attribute__((overloadable)) rsPackColorTo565(float3);
+//extern float4 rsUnpackColor565(uchar4);
+
+
+/////////////////////////////////////////////////////
+// Matrix ops
+/////////////////////////////////////////////////////
+
+static void __attribute__((overloadable))
+rsMatrixSet(rs_matrix4x4 *m, uint32_t row, uint32_t col, float v) {
+    m->m[row * 4 + col] = v;
+}
+
+static float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix4x4 *m, uint32_t row, uint32_t col) {
+    return m->m[row * 4 + col];
+}
+
+static void __attribute__((overloadable))
+rsMatrixSet(rs_matrix3x3 *m, uint32_t row, uint32_t col, float v) {
+    m->m[row * 3 + col] = v;
+}
+
+static float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix3x3 *m, uint32_t row, uint32_t col) {
+    return m->m[row * 3 + col];
+}
+
+static void __attribute__((overloadable))
+rsMatrixSet(rs_matrix2x2 *m, uint32_t row, uint32_t col, float v) {
+    m->m[row * 2 + col] = v;
+}
+
+static float __attribute__((overloadable))
+rsMatrixGet(const rs_matrix2x2 *m, uint32_t row, uint32_t col) {
+    return m->m[row * 2 + col];
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadIdentity(rs_matrix4x4 *m) {
+    m->m[0] = 1.f;
+    m->m[1] = 0.f;
+    m->m[2] = 0.f;
+    m->m[3] = 0.f;
+    m->m[4] = 0.f;
+    m->m[5] = 1.f;
+    m->m[6] = 0.f;
+    m->m[7] = 0.f;
+    m->m[8] = 0.f;
+    m->m[9] = 0.f;
+    m->m[10] = 1.f;
+    m->m[11] = 0.f;
+    m->m[12] = 0.f;
+    m->m[13] = 0.f;
+    m->m[14] = 0.f;
+    m->m[15] = 1.f;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadIdentity(rs_matrix3x3 *m) {
+    m->m[0] = 1.f;
+    m->m[1] = 0.f;
+    m->m[2] = 0.f;
+    m->m[3] = 0.f;
+    m->m[4] = 1.f;
+    m->m[5] = 0.f;
+    m->m[6] = 0.f;
+    m->m[7] = 0.f;
+    m->m[8] = 1.f;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadIdentity(rs_matrix2x2 *m) {
+    m->m[0] = 1.f;
+    m->m[1] = 0.f;
+    m->m[2] = 0.f;
+    m->m[3] = 1.f;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const float *v) {
+    m->m[0] = v[0];
+    m->m[1] = v[1];
+    m->m[2] = v[2];
+    m->m[3] = v[3];
+    m->m[4] = v[4];
+    m->m[5] = v[5];
+    m->m[6] = v[6];
+    m->m[7] = v[7];
+    m->m[8] = v[8];
+    m->m[9] = v[9];
+    m->m[10] = v[10];
+    m->m[11] = v[11];
+    m->m[12] = v[12];
+    m->m[13] = v[13];
+    m->m[14] = v[14];
+    m->m[15] = v[15];
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix3x3 *m, const float *v) {
+    m->m[0] = v[0];
+    m->m[1] = v[1];
+    m->m[2] = v[2];
+    m->m[3] = v[3];
+    m->m[4] = v[4];
+    m->m[5] = v[5];
+    m->m[6] = v[6];
+    m->m[7] = v[7];
+    m->m[8] = v[8];
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix2x2 *m, const float *v) {
+    m->m[0] = v[0];
+    m->m[1] = v[1];
+    m->m[2] = v[2];
+    m->m[3] = v[3];
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix4x4 *v) {
+    m->m[0] = v->m[0];
+    m->m[1] = v->m[1];
+    m->m[2] = v->m[2];
+    m->m[3] = v->m[3];
+    m->m[4] = v->m[4];
+    m->m[5] = v->m[5];
+    m->m[6] = v->m[6];
+    m->m[7] = v->m[7];
+    m->m[8] = v->m[8];
+    m->m[9] = v->m[9];
+    m->m[10] = v->m[10];
+    m->m[11] = v->m[11];
+    m->m[12] = v->m[12];
+    m->m[13] = v->m[13];
+    m->m[14] = v->m[14];
+    m->m[15] = v->m[15];
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix3x3 *v) {
+    m->m[0] = v->m[0];
+    m->m[1] = v->m[1];
+    m->m[2] = v->m[2];
+    m->m[3] = 0.f;
+    m->m[4] = v->m[3];
+    m->m[5] = v->m[4];
+    m->m[6] = v->m[5];
+    m->m[7] = 0.f;
+    m->m[8] = v->m[6];
+    m->m[9] = v->m[7];
+    m->m[10] = v->m[8];
+    m->m[11] = 0.f;
+    m->m[12] = 0.f;
+    m->m[13] = 0.f;
+    m->m[14] = 0.f;
+    m->m[15] = 1.f;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix4x4 *m, const rs_matrix2x2 *v) {
+    m->m[0] = v->m[0];
+    m->m[1] = v->m[1];
+    m->m[2] = 0.f;
+    m->m[3] = 0.f;
+    m->m[4] = v->m[3];
+    m->m[5] = v->m[4];
+    m->m[6] = 0.f;
+    m->m[7] = 0.f;
+    m->m[8] = v->m[6];
+    m->m[9] = v->m[7];
+    m->m[10] = 1.f;
+    m->m[11] = 0.f;
+    m->m[12] = 0.f;
+    m->m[13] = 0.f;
+    m->m[14] = 0.f;
+    m->m[15] = 1.f;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix3x3 *m, const rs_matrix3x3 *v) {
+    m->m[0] = v->m[0];
+    m->m[1] = v->m[1];
+    m->m[2] = v->m[2];
+    m->m[3] = v->m[3];
+    m->m[4] = v->m[4];
+    m->m[5] = v->m[5];
+    m->m[6] = v->m[6];
+    m->m[7] = v->m[7];
+    m->m[8] = v->m[8];
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoad(rs_matrix2x2 *m, const rs_matrix2x2 *v) {
+    m->m[0] = v->m[0];
+    m->m[1] = v->m[1];
+    m->m[2] = v->m[2];
+    m->m[3] = v->m[3];
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) {
+    float c, s;
+    m->m[3] = 0;
+    m->m[7] = 0;
+    m->m[11]= 0;
+    m->m[12]= 0;
+    m->m[13]= 0;
+    m->m[14]= 0;
+    m->m[15]= 1;
+    rot *= (float)(M_PI / 180.0f);
+    c = cos(rot);
+    s = sin(rot);
+
+    const float len = x*x + y*y + z*z;
+    if (len != 1) {
+        const float recipLen = 1.f / sqrt(len);
+        x *= recipLen;
+        y *= recipLen;
+        z *= recipLen;
+    }
+    const float nc = 1.0f - c;
+    const float xy = x * y;
+    const float yz = y * z;
+    const float zx = z * x;
+    const float xs = x * s;
+    const float ys = y * s;
+    const float zs = z * s;
+    m->m[ 0] = x*x*nc +  c;
+    m->m[ 4] =  xy*nc - zs;
+    m->m[ 8] =  zx*nc + ys;
+    m->m[ 1] =  xy*nc + zs;
+    m->m[ 5] = y*y*nc +  c;
+    m->m[ 9] =  yz*nc - xs;
+    m->m[ 2] =  zx*nc - ys;
+    m->m[ 6] =  yz*nc + xs;
+    m->m[10] = z*z*nc +  c;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadScale(rs_matrix4x4 *m, float x, float y, float z) {
+    rsMatrixLoadIdentity(m);
+    m->m[0] = x;
+    m->m[5] = y;
+    m->m[10] = z;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadTranslate(rs_matrix4x4 *m, float x, float y, float z) {
+    rsMatrixLoadIdentity(m);
+    m->m[12] = x;
+    m->m[13] = y;
+    m->m[14] = z;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *lhs, const rs_matrix4x4 *rhs) {
+    for (int i=0 ; i<4 ; i++) {
+        float ri0 = 0;
+        float ri1 = 0;
+        float ri2 = 0;
+        float ri3 = 0;
+        for (int j=0 ; j<4 ; j++) {
+            const float rhs_ij = rsMatrixGet(rhs, i,j);
+            ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
+            ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
+            ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij;
+            ri3 += rsMatrixGet(lhs, j, 3) * rhs_ij;
+        }
+        rsMatrixSet(m, i, 0, ri0);
+        rsMatrixSet(m, i, 1, ri1);
+        rsMatrixSet(m, i, 2, ri2);
+        rsMatrixSet(m, i, 3, ri3);
+    }
+}
+
+static void __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, const rs_matrix4x4 *rhs) {
+    rs_matrix4x4 mt;
+    rsMatrixLoadMultiply(&mt, m, rhs);
+    rsMatrixLoad(m, &mt);
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *lhs, const rs_matrix3x3 *rhs) {
+    for (int i=0 ; i<3 ; i++) {
+        float ri0 = 0;
+        float ri1 = 0;
+        float ri2 = 0;
+        for (int j=0 ; j<3 ; j++) {
+            const float rhs_ij = rsMatrixGet(rhs, i,j);
+            ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
+            ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
+            ri2 += rsMatrixGet(lhs, j, 2) * rhs_ij;
+        }
+        rsMatrixSet(m, i, 0, ri0);
+        rsMatrixSet(m, i, 1, ri1);
+        rsMatrixSet(m, i, 2, ri2);
+    }
+}
+
+static void __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, const rs_matrix3x3 *rhs) {
+    rs_matrix3x3 mt;
+    rsMatrixLoadMultiply(&mt, m, rhs);
+    rsMatrixLoad(m, &mt);
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *lhs, const rs_matrix2x2 *rhs) {
+    for (int i=0 ; i<2 ; i++) {
+        float ri0 = 0;
+        float ri1 = 0;
+        for (int j=0 ; j<2 ; j++) {
+            const float rhs_ij = rsMatrixGet(rhs, i,j);
+            ri0 += rsMatrixGet(lhs, j, 0) * rhs_ij;
+            ri1 += rsMatrixGet(lhs, j, 1) * rhs_ij;
+        }
+        rsMatrixSet(m, i, 0, ri0);
+        rsMatrixSet(m, i, 1, ri1);
+    }
+}
+
+static void __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix2x2 *m, const rs_matrix2x2 *rhs) {
+    rs_matrix2x2 mt;
+    rsMatrixLoadMultiply(&mt, m, rhs);
+    rsMatrixLoad(m, &mt);
+}
+
+static void __attribute__((overloadable))
+rsMatrixRotate(rs_matrix4x4 *m, float rot, float x, float y, float z) {
+    rs_matrix4x4 m1;
+    rsMatrixLoadRotate(&m1, rot, x, y, z);
+    rsMatrixMultiply(m, &m1);
+}
+
+static void __attribute__((overloadable))
+rsMatrixScale(rs_matrix4x4 *m, float x, float y, float z) {
+    rs_matrix4x4 m1;
+    rsMatrixLoadScale(&m1, x, y, z);
+    rsMatrixMultiply(m, &m1);
+}
+
+static void __attribute__((overloadable))
+rsMatrixTranslate(rs_matrix4x4 *m, float x, float y, float z) {
+    rs_matrix4x4 m1;
+    rsMatrixLoadTranslate(&m1, x, y, z);
+    rsMatrixMultiply(m, &m1);
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadOrtho(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) {
+    rsMatrixLoadIdentity(m);
+    m->m[0] = 2.f / (right - left);
+    m->m[5] = 2.f / (top - bottom);
+    m->m[10]= -2.f / (far - near);
+    m->m[12]= -(right + left) / (right - left);
+    m->m[13]= -(top + bottom) / (top - bottom);
+    m->m[14]= -(far + near) / (far - near);
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadFrustum(rs_matrix4x4 *m, float left, float right, float bottom, float top, float near, float far) {
+    rsMatrixLoadIdentity(m);
+    m->m[0] = 2.f * near / (right - left);
+    m->m[5] = 2.f * near / (top - bottom);
+    m->m[8] = (right + left) / (right - left);
+    m->m[9] = (top + bottom) / (top - bottom);
+    m->m[10]= -(far + near) / (far - near);
+    m->m[11]= -1.f;
+    m->m[14]= -2.f * far * near / (far - near);
+    m->m[15]= 0.f;
+}
+
+static void __attribute__((overloadable))
+rsMatrixLoadPerspective(rs_matrix4x4* m, float fovy, float aspect, float near, float far) {
+    float top = near * tan((float) (fovy * M_PI / 360.0f));
+    float bottom = -top;
+    float left = bottom * aspect;
+    float right = top * aspect;
+    rsMatrixLoadFrustum(m, left, right, bottom, top, near, far);
+}
+
+static float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float4 in) {
+    float4 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
+    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
+    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
+    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
+    return ret;
+}
+
+static float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float3 in) {
+    float4 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
+    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
+    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
+    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
+    return ret;
+}
+
+static float4 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix4x4 *m, float2 in) {
+    float4 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
+    ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
+    ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
+    ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
+    return ret;
+}
+
+static float3 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, float3 in) {
+    float3 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
+    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
+    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
+    return ret;
+}
+
+static float3 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix3x3 *m, float2 in) {
+    float3 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
+    ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
+    ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
+    return ret;
+}
+
+static float2 __attribute__((overloadable))
+rsMatrixMultiply(rs_matrix2x2 *m, float2 in) {
+    float2 ret;
+    ret.x = (m->m[0] * in.x) + (m->m[2] * in.y);
+    ret.y = (m->m[1] * in.x) + (m->m[3] * in.y);
+    return ret;
+}
+
+// Returns true if the matrix was successfully inversed
+static bool __attribute__((overloadable))
+rsMatrixInverse(rs_matrix4x4 *m) {
+    rs_matrix4x4 result;
+
+    int i, j;
+    for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) {
+            // computeCofactor for int i, int j
+            int c0 = (i+1) % 4;
+            int c1 = (i+2) % 4;
+            int c2 = (i+3) % 4;
+            int r0 = (j+1) % 4;
+            int r1 = (j+2) % 4;
+            int r2 = (j+3) % 4;
+
+            float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1]))
+                         - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0]))
+                         + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0]));
+
+            float cofactor = (i+j) & 1 ? -minor : minor;
+
+            result.m[4*i + j] = cofactor;
+        }
+    }
+
+    // Dot product of 0th column of source and 0th row of result
+    float det = m->m[0]*result.m[0] + m->m[4]*result.m[1] +
+                 m->m[8]*result.m[2] + m->m[12]*result.m[3];
+
+    if (fabs(det) < 1e-6) {
+        return false;
+    }
+
+    det = 1.0f / det;
+    for (i = 0; i < 16; ++i) {
+        m->m[i] = result.m[i] * det;
+    }
+
+    return true;
+}
+
+// Returns true if the matrix was successfully inversed
+static bool __attribute__((overloadable))
+rsMatrixInverseTranspose(rs_matrix4x4 *m) {
+    rs_matrix4x4 result;
+
+    int i, j;
+    for (i = 0; i < 4; ++i) {
+        for (j = 0; j < 4; ++j) {
+            // computeCofactor for int i, int j
+            int c0 = (i+1) % 4;
+            int c1 = (i+2) % 4;
+            int c2 = (i+3) % 4;
+            int r0 = (j+1) % 4;
+            int r1 = (j+2) % 4;
+            int r2 = (j+3) % 4;
+
+            float minor = (m->m[c0 + 4*r0] * (m->m[c1 + 4*r1] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r1]))
+                         - (m->m[c0 + 4*r1] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r2] - m->m[c1 + 4*r2] * m->m[c2 + 4*r0]))
+                         + (m->m[c0 + 4*r2] * (m->m[c1 + 4*r0] * m->m[c2 + 4*r1] - m->m[c1 + 4*r1] * m->m[c2 + 4*r0]));
+
+            float cofactor = (i+j) & 1 ? -minor : minor;
+
+            result.m[4*j + i] = cofactor;
+        }
+    }
+
+    // Dot product of 0th column of source and 0th column of result
+    float det = m->m[0]*result.m[0] + m->m[4]*result.m[4] +
+                 m->m[8]*result.m[8] + m->m[12]*result.m[12];
+
+    if (fabs(det) < 1e-6) {
+        return false;
+    }
+
+    det = 1.0f / det;
+    for (i = 0; i < 16; ++i) {
+        m->m[i] = result.m[i] * det;
+    }
+
+    return true;
+}
+
+static void __attribute__((overloadable))
+rsMatrixTranspose(rs_matrix4x4 *m) {
+    int i, j;
+    float temp;
+    for (i = 0; i < 3; ++i) {
+        for (j = i + 1; j < 4; ++j) {
+            temp = m->m[i*4 + j];
+            m->m[i*4 + j] = m->m[j*4 + i];
+            m->m[j*4 + i] = temp;
+        }
+    }
+}
+
+static void __attribute__((overloadable))
+rsMatrixTranspose(rs_matrix3x3 *m) {
+    int i, j;
+    float temp;
+    for (i = 0; i < 2; ++i) {
+        for (j = i + 1; j < 3; ++j) {
+            temp = m->m[i*3 + j];
+            m->m[i*3 + j] = m->m[j*4 + i];
+            m->m[j*3 + i] = temp;
+        }
+    }
+}
+
+static void __attribute__((overloadable))
+rsMatrixTranspose(rs_matrix2x2 *m) {
+    float temp = m->m[1];
+    m->m[1] = m->m[2];
+    m->m[2] = temp;
+}
+
+/////////////////////////////////////////////////////
+// quaternion ops
+/////////////////////////////////////////////////////
+
+static void __attribute__((overloadable))
+rsQuaternionSet(rs_quaternion *q, float w, float x, float y, float z) {
+    q->w = w;
+    q->x = x;
+    q->y = y;
+    q->z = z;
+}
+
+static void __attribute__((overloadable))
+rsQuaternionSet(rs_quaternion *q, const rs_quaternion *rhs) {
+    q->w = rhs->w;
+    q->x = rhs->x;
+    q->y = rhs->y;
+    q->z = rhs->z;
+}
+
+static void __attribute__((overloadable))
+rsQuaternionMultiply(rs_quaternion *q, float s) {
+    q->w *= s;
+    q->x *= s;
+    q->y *= s;
+    q->z *= s;
+}
+
+static void __attribute__((overloadable))
+rsQuaternionMultiply(rs_quaternion *q, const rs_quaternion *rhs) {
+    q->w = -q->x*rhs->x - q->y*rhs->y - q->z*rhs->z + q->w*rhs->w;
+    q->x =  q->x*rhs->w + q->y*rhs->z - q->z*rhs->y + q->w*rhs->x;
+    q->y = -q->x*rhs->z + q->y*rhs->w + q->z*rhs->z + q->w*rhs->y;
+    q->z =  q->x*rhs->y - q->y*rhs->x + q->z*rhs->w + q->w*rhs->z;
+}
+
+static void
+rsQuaternionAdd(rs_quaternion *q, const rs_quaternion *rhs) {
+    q->w *= rhs->w;
+    q->x *= rhs->x;
+    q->y *= rhs->y;
+    q->z *= rhs->z;
+}
+
+static void
+rsQuaternionLoadRotateUnit(rs_quaternion *q, float rot, float x, float y, float z) {
+    rot *= (float)(M_PI / 180.0f) * 0.5f;
+    float c = cos(rot);
+    float s = sin(rot);
+
+    q->w = c;
+    q->x = x * s;
+    q->y = y * s;
+    q->z = z * s;
+}
+
+static void
+rsQuaternionLoadRotate(rs_quaternion *q, float rot, float x, float y, float z) {
+    const float len = x*x + y*y + z*z;
+    if (len != 1) {
+        const float recipLen = 1.f / sqrt(len);
+        x *= recipLen;
+        y *= recipLen;
+        z *= recipLen;
+    }
+    rsQuaternionLoadRotateUnit(q, rot, x, y, z);
+}
+
+static void
+rsQuaternionConjugate(rs_quaternion *q) {
+    q->x = -q->x;
+    q->y = -q->y;
+    q->z = -q->z;
+}
+
+static float
+rsQuaternionDot(const rs_quaternion *q0, const rs_quaternion *q1) {
+    return q0->w*q1->w + q0->x*q1->x + q0->y*q1->y + q0->z*q1->z;
+}
+
+static void
+rsQuaternionNormalize(rs_quaternion *q) {
+    const float len = rsQuaternionDot(q, q);
+    if (len != 1) {
+        const float recipLen = 1.f / sqrt(len);
+        rsQuaternionMultiply(q, recipLen);
+    }
+}
+
+static void
+rsQuaternionSlerp(rs_quaternion *q, const rs_quaternion *q0, const rs_quaternion *q1, float t) {
+    if(t <= 0.0f) {
+        rsQuaternionSet(q, q0);
+        return;
+    }
+    if(t >= 1.0f) {
+        rsQuaternionSet(q, q1);
+        return;
+    }
+
+    rs_quaternion tempq0, tempq1;
+    rsQuaternionSet(&tempq0, q0);
+    rsQuaternionSet(&tempq1, q1);
+
+    float angle = rsQuaternionDot(q0, q1);
+    if(angle < 0) {
+        rsQuaternionMultiply(&tempq0, -1.0f);
+        angle *= -1.0f;
+    }
+
+    float scale, invScale;
+    if (angle + 1.0f > 0.05f) {
+        if (1.0f - angle >= 0.05f) {
+            float theta = acos(angle);
+            float invSinTheta = 1.0f / sin(theta);
+            scale = sin(theta * (1.0f - t)) * invSinTheta;
+            invScale = sin(theta * t) * invSinTheta;
+        }
+        else {
+            scale = 1.0f - t;
+            invScale = t;
+        }
+    }
+    else {
+        rsQuaternionSet(&tempq1, tempq0.z, -tempq0.y, tempq0.x, -tempq0.w);
+        scale = sin(M_PI * (0.5f - t));
+        invScale = sin(M_PI * t);
+    }
+
+    rsQuaternionSet(q, tempq0.w*scale + tempq1.w*invScale, tempq0.x*scale + tempq1.x*invScale,
+                        tempq0.y*scale + tempq1.y*invScale, tempq0.z*scale + tempq1.z*invScale);
+}
+
+static void rsQuaternionGetMatrixUnit(rs_matrix4x4 *m, const rs_quaternion *q) {
+    float x2 = 2.0f * q->x * q->x;
+    float y2 = 2.0f * q->y * q->y;
+    float z2 = 2.0f * q->z * q->z;
+    float xy = 2.0f * q->x * q->y;
+    float wz = 2.0f * q->w * q->z;
+    float xz = 2.0f * q->x * q->z;
+    float wy = 2.0f * q->w * q->y;
+    float wx = 2.0f * q->w * q->x;
+    float yz = 2.0f * q->y * q->z;
+
+    m->m[0] = 1.0f - y2 - z2;
+    m->m[1] = xy - wz;
+    m->m[2] = xz + wy;
+    m->m[3] = 0.0f;
+
+    m->m[4] = xy + wz;
+    m->m[5] = 1.0f - x2 - z2;
+    m->m[6] = yz - wx;
+    m->m[7] = 0.0f;
+
+    m->m[8] = xz - wy;
+    m->m[9] = yz - wx;
+    m->m[10] = 1.0f - x2 - y2;
+    m->m[11] = 0.0f;
+
+    m->m[12] = 0.0f;
+    m->m[13] = 0.0f;
+    m->m[14] = 0.0f;
+    m->m[15] = 1.0f;
+}
+
+/////////////////////////////////////////////////////
+// utility funcs
+/////////////////////////////////////////////////////
+__inline__ static void __attribute__((overloadable, always_inline))
+rsExtractFrustumPlanes(const rs_matrix4x4 *modelViewProj,
+                         float4 *left, float4 *right,
+                         float4 *top, float4 *bottom,
+                         float4 *near, float4 *far) {
+    // x y z w = a b c d in the plane equation
+    left->x = modelViewProj->m[3] + modelViewProj->m[0];
+    left->y = modelViewProj->m[7] + modelViewProj->m[4];
+    left->z = modelViewProj->m[11] + modelViewProj->m[8];
+    left->w = modelViewProj->m[15] + modelViewProj->m[12];
+
+    right->x = modelViewProj->m[3] - modelViewProj->m[0];
+    right->y = modelViewProj->m[7] - modelViewProj->m[4];
+    right->z = modelViewProj->m[11] - modelViewProj->m[8];
+    right->w = modelViewProj->m[15] - modelViewProj->m[12];
+
+    top->x = modelViewProj->m[3] - modelViewProj->m[1];
+    top->y = modelViewProj->m[7] - modelViewProj->m[5];
+    top->z = modelViewProj->m[11] - modelViewProj->m[9];
+    top->w = modelViewProj->m[15] - modelViewProj->m[13];
+
+    bottom->x = modelViewProj->m[3] + modelViewProj->m[1];
+    bottom->y = modelViewProj->m[7] + modelViewProj->m[5];
+    bottom->z = modelViewProj->m[11] + modelViewProj->m[9];
+    bottom->w = modelViewProj->m[15] + modelViewProj->m[13];
+
+    near->x = modelViewProj->m[3] + modelViewProj->m[2];
+    near->y = modelViewProj->m[7] + modelViewProj->m[6];
+    near->z = modelViewProj->m[11] + modelViewProj->m[10];
+    near->w = modelViewProj->m[15] + modelViewProj->m[14];
+
+    far->x = modelViewProj->m[3] - modelViewProj->m[2];
+    far->y = modelViewProj->m[7] - modelViewProj->m[6];
+    far->z = modelViewProj->m[11] - modelViewProj->m[10];
+    far->w = modelViewProj->m[15] - modelViewProj->m[14];
+
+    float len = length(left->xyz);
+    *left /= len;
+    len = length(right->xyz);
+    *right /= len;
+    len = length(top->xyz);
+    *top /= len;
+    len = length(bottom->xyz);
+    *bottom /= len;
+    len = length(near->xyz);
+    *near /= len;
+    len = length(far->xyz);
+    *far /= len;
+}
+
+__inline__ static bool __attribute__((overloadable, always_inline))
+rsIsSphereInFrustum(float4 *sphere,
+                      float4 *left, float4 *right,
+                      float4 *top, float4 *bottom,
+                      float4 *near, float4 *far) {
+
+    float distToCenter = dot(left->xyz, sphere->xyz) + left->w;
+    if(distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(right->xyz, sphere->xyz) + right->w;
+    if(distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(top->xyz, sphere->xyz) + top->w;
+    if(distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(bottom->xyz, sphere->xyz) + bottom->w;
+    if(distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(near->xyz, sphere->xyz) + near->w;
+    if(distToCenter < -sphere->w) {
+        return false;
+    }
+    distToCenter = dot(far->xyz, sphere->xyz) + far->w;
+    if(distToCenter < -sphere->w) {
+        return false;
+    }
+    return true;
+}
+
+
+/////////////////////////////////////////////////////
+// int ops
+/////////////////////////////////////////////////////
+
+__inline__ static uint __attribute__((overloadable, always_inline)) rsClamp(uint amount, uint low, uint high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+__inline__ static int __attribute__((overloadable, always_inline)) rsClamp(int amount, int low, int high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+__inline__ static ushort __attribute__((overloadable, always_inline)) rsClamp(ushort amount, ushort low, ushort high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+__inline__ static short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+__inline__ static uchar __attribute__((overloadable, always_inline)) rsClamp(uchar amount, uchar low, uchar high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+__inline__ static char __attribute__((overloadable, always_inline)) rsClamp(char amount, char low, char high) {
+    return amount < low ? low : (amount > high ? high : amount);
+}
+
+
+
+#endif
+
diff --git a/scriptc/rs_graphics.rsh b/scriptc/rs_graphics.rsh
index 70cd562..54e6328 100644
--- a/scriptc/rs_graphics.rsh
+++ b/scriptc/rs_graphics.rsh
@@ -1,65 +1,109 @@
+#ifndef __RS_GRAPHICS_RSH__
+#define __RS_GRAPHICS_RSH__
 
+// Bind a ProgramFragment to the RS context.
+extern void __attribute__((overloadable))
+    rsgBindProgramFragment(rs_program_fragment);
+extern void __attribute__((overloadable))
+    rsgBindProgramStore(rs_program_store);
+extern void __attribute__((overloadable))
+    rsgBindProgramVertex(rs_program_vertex);
+extern void __attribute__((overloadable))
+    rsgBindProgramRaster(rs_program_raster);
 
-extern float2 vec2Rand(float len);
+extern void __attribute__((overloadable))
+    rsgBindSampler(rs_program_fragment, uint slot, rs_sampler);
+extern void __attribute__((overloadable))
+    rsgBindTexture(rs_program_fragment, uint slot, rs_allocation);
 
-extern float3 float3Norm(float3);
-extern float float3Length(float3);
-extern float3 float3Add(float3 lhs, float3 rhs);
-extern float3 float3Sub(float3 lhs, float3 rhs);
-extern float3 float3Cross(float3 lhs, float3 rhs);
-extern float float3Dot(float3 lhs, float3 rhs);
-extern float3 float3Scale(float3 v, float scale);
+extern void __attribute__((overloadable))
+    rsgProgramVertexLoadProjectionMatrix(const rs_matrix4x4 *);
+extern void __attribute__((overloadable))
+    rsgProgramVertexLoadModelMatrix(const rs_matrix4x4 *);
+extern void __attribute__((overloadable))
+    rsgProgramVertexLoadTextureMatrix(const rs_matrix4x4 *);
 
-extern float4 float4Add(float4 lhs, float4 rhs);
-extern float4 float4Sub(float4 lhs, float4 rhs);
-extern float4 float4Cross(float4 lhs, float4 rhs);
-extern float float4Dot(float4 lhs, float4 rhs);
-extern float4 float4Scale(float4 v, float scale);
+extern void __attribute__((overloadable))
+    rsgProgramVertexGetProjectionMatrix(rs_matrix4x4 *);
 
-    // context
-extern void bindProgramFragment(rs_program_fragment);
-extern void bindProgramStore(rs_program_store);
-extern void bindProgramVertex(rs_program_vertex);
+extern void __attribute__((overloadable))
+    rsgProgramFragmentConstantColor(rs_program_fragment, float, float, float, float);
 
-extern void bindSampler(rs_program_fragment, int slot, rs_sampler);
-extern void bindSampler(rs_program_fragment, int slot, rs_allocation);
+extern uint __attribute__((overloadable))
+    rsgGetWidth(void);
+extern uint __attribute__((overloadable))
+    rsgGetHeight(void);
 
-extern void vpLoadModelMatrix(const float *);
-extern void vpLoadTextureMatrix(const float *);
+extern void __attribute__((overloadable))
+    rsgUploadToTexture(rs_allocation);
+extern void __attribute__((overloadable))
+    rsgUploadToTexture(rs_allocation, uint mipLevel);
+extern void __attribute__((overloadable))
+    rsgUploadToBufferObject(rs_allocation);
 
+extern void __attribute__((overloadable))
+    rsgDrawRect(float x1, float y1, float x2, float y2, float z);
+extern void __attribute__((overloadable))
+    rsgDrawQuad(float x1, float y1, float z1,
+                float x2, float y2, float z2,
+                float x3, float y3, float z3,
+                float x4, float y4, float z4);
+extern void __attribute__((overloadable))
+    rsgDrawQuadTexCoords(float x1, float y1, float z1, float u1, float v1,
+                         float x2, float y2, float z2, float u2, float v2,
+                         float x3, float y3, float z3, float u3, float v3,
+                         float x4, float y4, float z4, float u4, float v4);
+extern void __attribute__((overloadable))
+    rsgDrawSpriteScreenspace(float x, float y, float z, float w, float h);
 
-// drawing
-extern void drawRect(float x1, float y1, float x2, float y2, float z);
-extern void drawQuad(float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3, float x4, float y4, float z4);
-extern void drawQuadTexCoords(float x1, float y1, float z1, float u1, float v1, float x2, float y2, float z2, float u2, float v2, float x3, float y3, float z3, float u3, float v3, float x4, float y4, float z4, float u4, float v4);
-extern void drawSprite(float x, float y, float z, float w, float h);
-extern void drawSpriteScreenspace(float x, float y, float z, float w, float h);
-extern void drawLine(float x1, float y1, float z1, float x2, float y2, float z2);
-extern void drawPoint(float x1, float y1, float z1);
-extern void drawSimpleMesh(int ism);
-extern void drawSimpleMeshRange(int ism, int start, int len);
+extern void __attribute__((overloadable))
+    rsgDrawMesh(rs_mesh ism);
+extern void __attribute__((overloadable))
+    rsgDrawMesh(rs_mesh ism, uint primitiveIndex);
+extern void __attribute__((overloadable))
+    rsgDrawMesh(rs_mesh ism, uint primitiveIndex, uint start, uint len);
 
+extern void __attribute__((overloadable))
+    rsgClearColor(float, float, float, float);
+extern void __attribute__((overloadable))
+    rsgClearDepth(float);
+
+extern void __attribute__((overloadable))
+    rsgDrawText(const char *, int x, int y);
+extern void __attribute__((overloadable))
+    rsgDrawText(rs_allocation, int x, int y);
+extern void __attribute__((overloadable))
+    rsgBindFont(rs_font);
+extern void __attribute__((overloadable))
+    rsgFontColor(float, float, float, float);
+// Returns the bounding box of the text relative to (0, 0)
+// Any of left, right, top, bottom could be NULL
+extern void __attribute__((overloadable))
+    rsgMeasureText(const char *, int *left, int *right, int *top, int *bottom);
+extern void __attribute__((overloadable))
+    rsgMeasureText(rs_allocation, int *left, int *right, int *top, int *bottom);
+
+extern void __attribute__((overloadable))
+    rsgMeshComputeBoundingBox(rs_mesh mesh, float *minX, float *minY, float *minZ,
+                                                float *maxX, float *maxY, float *maxZ);
+__inline__ static void __attribute__((overloadable, always_inline))
+rsgMeshComputeBoundingBox(rs_mesh mesh, float3 *bBoxMin, float3 *bBoxMax) {
+    float x1, y1, z1, x2, y2, z2;
+    rsgMeshComputeBoundingBox(mesh, &x1, &y1, &z1, &x2, &y2, &z2);
+    bBoxMin->x = x1;
+    bBoxMin->y = y1;
+    bBoxMin->z = z1;
+    bBoxMax->x = x2;
+    bBoxMax->y = y2;
+    bBoxMax->z = z2;
+}
+
+///////////////////////////////////////////////////////
 // misc
-extern void pfClearColor(float, float, float, float);
-extern void color(float, float, float, float);
-extern void hsb(float, float, float, float);
-extern void hsbToRgb(float, float, float, float*);
-extern int hsbToAbgr(float, float, float, float);
 
-extern void uploadToTexture(int, int);
-extern void uploadToBufferObject(int);
+// Depricated
+extern void __attribute__((overloadable))
+    color(float, float, float, float);
 
-extern int colorFloatRGBAtoUNorm8(float, float, float, float);
-extern int colorFloatRGBto565(float, float, float);
-
-extern int getWidth();
-extern int getHeight();
-
-extern int sendToClient(void *data, int cmdID, int len, int waitForSpace);
-
-extern void debugF(const char *, float);
-extern void debugI32(const char *, int);
-extern void debugHexI32(const char *, int);
-
-
+#endif
 
diff --git a/scriptc/rs_math.rsh b/scriptc/rs_math.rsh
index 613c7ca..d059997 100644
--- a/scriptc/rs_math.rsh
+++ b/scriptc/rs_math.rsh
@@ -1,287 +1,194 @@
-// Float ops
+#ifndef __RS_MATH_RSH__
+#define __RS_MATH_RSH__
 
-extern float __attribute__((overloadable)) abs(float);
-extern float2 __attribute__((overloadable)) abs(float2);
-extern float3 __attribute__((overloadable)) abs(float3);
-extern float4 __attribute__((overloadable)) abs(float4);
-extern float8 __attribute__((overloadable)) abs(float8);
-extern float16 __attribute__((overloadable)) abs(float16);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_element *dst, rs_element src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_type *dst, rs_type src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_allocation *dst, rs_allocation src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_sampler *dst, rs_sampler src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_script *dst, rs_script src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_mesh *dst, rs_mesh src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_fragment *dst, rs_program_fragment src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_vertex *dst, rs_program_vertex src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_raster *dst, rs_program_raster src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_program_store *dst, rs_program_store src);
+extern void __attribute__((overloadable))
+    rsSetObject(rs_font *dst, rs_font src);
 
-extern float __attribute__((overloadable)) acos(float);
-extern float2 __attribute__((overloadable)) acos(float2);
-extern float3 __attribute__((overloadable)) acos(float3);
-extern float4 __attribute__((overloadable)) acos(float4);
-extern float8 __attribute__((overloadable)) acos(float8);
-extern float16 __attribute__((overloadable)) acos(float16);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_element *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_type *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_allocation *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_sampler *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_script *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_mesh *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_fragment *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_vertex *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_raster *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_program_store *dst);
+extern void __attribute__((overloadable))
+    rsClearObject(rs_font *dst);
 
-extern float __attribute__((overloadable)) asin(float);
-extern float2 __attribute__((overloadable)) asin(float2);
-extern float3 __attribute__((overloadable)) asin(float3);
-extern float4 __attribute__((overloadable)) asin(float4);
-extern float8 __attribute__((overloadable)) asin(float8);
-extern float16 __attribute__((overloadable)) asin(float16);
-
-extern float __attribute__((overloadable)) atan(float);
-extern float2 __attribute__((overloadable)) atan(float2);
-extern float3 __attribute__((overloadable)) atan(float3);
-extern float4 __attribute__((overloadable)) atan(float4);
-extern float8 __attribute__((overloadable)) atan(float8);
-extern float16 __attribute__((overloadable)) atan(float16);
-
-extern float __attribute__((overloadable)) atan2(float, float);
-extern float2 __attribute__((overloadable)) atan2(float2, float2);
-extern float3 __attribute__((overloadable)) atan2(float3, float3);
-extern float4 __attribute__((overloadable)) atan2(float4, float4);
-extern float8 __attribute__((overloadable)) atan2(float8, float8);
-extern float16 __attribute__((overloadable)) atan2(float16, float16);
-
-extern float __attribute__((overloadable)) ceil(float);
-extern float2 __attribute__((overloadable)) ceil(float2);
-extern float3 __attribute__((overloadable)) ceil(float3);
-extern float4 __attribute__((overloadable)) ceil(float4);
-extern float8 __attribute__((overloadable)) ceil(float8);
-extern float16 __attribute__((overloadable)) ceil(float16);
-
-extern float __attribute__((overloadable)) clamp(float, float, float);
-extern float2 __attribute__((overloadable)) clamp(float2, float2, float2);
-extern float3 __attribute__((overloadable)) clamp(float3, float3, float3);
-extern float4 __attribute__((overloadable)) clamp(float4, float4, float4);
-extern float8 __attribute__((overloadable)) clamp(float8, float8, float8);
-extern float16 __attribute__((overloadable)) clamp(float16, float16, float16);
-extern float __attribute__((overloadable)) clamp(float, float, float);
-extern float2 __attribute__((overloadable)) clamp(float2, float, float);
-extern float3 __attribute__((overloadable)) clamp(float3, float, float);
-extern float4 __attribute__((overloadable)) clamp(float4, float, float);
-extern float8 __attribute__((overloadable)) clamp(float8, float, float);
-extern float16 __attribute__((overloadable)) clamp(float16, float, float);
-
-extern float __attribute__((overloadable)) copysign(float, float);
-extern float2 __attribute__((overloadable)) copysign(float2, float2);
-extern float3 __attribute__((overloadable)) copysign(float3, float3);
-extern float4 __attribute__((overloadable)) copysign(float4, float4);
-extern float8 __attribute__((overloadable)) copysign(float8, float8);
-extern float16 __attribute__((overloadable)) copysign(float16, float16);
-
-extern float __attribute__((overloadable)) cos(float);
-extern float2 __attribute__((overloadable)) cos(float2);
-extern float3 __attribute__((overloadable)) cos(float3);
-extern float4 __attribute__((overloadable)) cos(float4);
-extern float8 __attribute__((overloadable)) cos(float8);
-extern float16 __attribute__((overloadable)) cos(float16);
-
-extern float __attribute__((overloadable)) degrees(float);
-extern float2 __attribute__((overloadable)) degrees(float2);
-extern float3 __attribute__((overloadable)) degrees(float3);
-extern float4 __attribute__((overloadable)) degrees(float4);
-extern float8 __attribute__((overloadable)) degrees(float8);
-extern float16 __attribute__((overloadable)) degrees(float16);
-
-extern float __attribute__((overloadable)) exp(float);
-extern float2 __attribute__((overloadable)) exp(float2);
-extern float3 __attribute__((overloadable)) exp(float3);
-extern float4 __attribute__((overloadable)) exp(float4);
-extern float8 __attribute__((overloadable)) exp(float8);
-extern float16 __attribute__((overloadable)) exp(float16);
-
-extern float __attribute__((overloadable)) exp2(float);
-extern float2 __attribute__((overloadable)) exp2(float2);
-extern float3 __attribute__((overloadable)) exp2(float3);
-extern float4 __attribute__((overloadable)) exp2(float4);
-extern float8 __attribute__((overloadable)) exp2(float8);
-extern float16 __attribute__((overloadable)) exp2(float16);
-
-extern float __attribute__((overloadable)) exp10(float);
-extern float2 __attribute__((overloadable)) exp10(float2);
-extern float3 __attribute__((overloadable)) exp10(float3);
-extern float4 __attribute__((overloadable)) exp10(float4);
-extern float8 __attribute__((overloadable)) exp10(float8);
-extern float16 __attribute__((overloadable)) exp10(float16);
-
-extern float __attribute__((overloadable)) fabs(float);
-extern float2 __attribute__((overloadable)) fabs(float2);
-extern float3 __attribute__((overloadable)) fabs(float3);
-extern float4 __attribute__((overloadable)) fabs(float4);
-extern float8 __attribute__((overloadable)) fabs(float8);
-extern float16 __attribute__((overloadable)) fabs(float16);
-
-extern float __attribute__((overloadable)) floor(float);
-extern float2 __attribute__((overloadable)) floor(float2);
-extern float3 __attribute__((overloadable)) floor(float3);
-extern float4 __attribute__((overloadable)) floor(float4);
-extern float8 __attribute__((overloadable)) floor(float8);
-extern float16 __attribute__((overloadable)) floor(float16);
-
-extern float __attribute__((overloadable)) fmax(float, float);
-extern float2 __attribute__((overloadable)) fmax(float2, float2);
-extern float3 __attribute__((overloadable)) fmax(float3, float3);
-extern float4 __attribute__((overloadable)) fmax(float4, float4);
-extern float8 __attribute__((overloadable)) fmax(float8, float8);
-extern float16 __attribute__((overloadable)) fmax(float16, float16);
-extern float2 __attribute__((overloadable)) fmax(float2, float);
-extern float3 __attribute__((overloadable)) fmax(float3, float);
-extern float4 __attribute__((overloadable)) fmax(float4, float);
-extern float8 __attribute__((overloadable)) fmax(float8, float);
-extern float16 __attribute__((overloadable)) fmax(float16, float);
-
-extern float __attribute__((overloadable)) fmin(float, float);
-extern float2 __attribute__((overloadable)) fmin(float2, float2);
-extern float3 __attribute__((overloadable)) fmin(float3, float3);
-extern float4 __attribute__((overloadable)) fmin(float4, float4);
-extern float8 __attribute__((overloadable)) fmin(float8, float8);
-extern float16 __attribute__((overloadable)) fmin(float16, float16);
-extern float2 __attribute__((overloadable)) fmin(float2, float);
-extern float3 __attribute__((overloadable)) fmin(float3, float);
-extern float4 __attribute__((overloadable)) fmin(float4, float);
-extern float8 __attribute__((overloadable)) fmin(float8, float);
-extern float16 __attribute__((overloadable)) fmin(float16, float);
-
-extern float __attribute__((overloadable)) fmod(float, float);
-extern float2 __attribute__((overloadable)) fmod(float2, float2);
-extern float3 __attribute__((overloadable)) fmod(float3, float3);
-extern float4 __attribute__((overloadable)) fmod(float4, float4);
-extern float8 __attribute__((overloadable)) fmod(float8, float8);
-extern float16 __attribute__((overloadable)) fmod(float16, float16);
-
-extern float __attribute__((overloadable)) log(float);
-extern float2 __attribute__((overloadable)) log(float2);
-extern float3 __attribute__((overloadable)) log(float3);
-extern float4 __attribute__((overloadable)) log(float4);
-extern float8 __attribute__((overloadable)) log(float8);
-extern float16 __attribute__((overloadable)) log(float16);
-
-extern float __attribute__((overloadable)) log2(float);
-extern float2 __attribute__((overloadable)) log2(float2);
-extern float3 __attribute__((overloadable)) log2(float3);
-extern float4 __attribute__((overloadable)) log2(float4);
-extern float8 __attribute__((overloadable)) log2(float8);
-extern float16 __attribute__((overloadable)) log2(float16);
-
-extern float __attribute__((overloadable)) log10(float);
-extern float2 __attribute__((overloadable)) log10(float2);
-extern float3 __attribute__((overloadable)) log10(float3);
-extern float4 __attribute__((overloadable)) log10(float4);
-extern float8 __attribute__((overloadable)) log10(float8);
-extern float16 __attribute__((overloadable)) log10(float16);
-
-extern float __attribute__((overloadable)) max(float, float);
-extern float2 __attribute__((overloadable)) max(float2, float2);
-extern float3 __attribute__((overloadable)) max(float3, float3);
-extern float4 __attribute__((overloadable)) max(float4, float4);
-extern float8 __attribute__((overloadable)) max(float8, float8);
-extern float16 __attribute__((overloadable)) max(float16, float16);
-
-extern float __attribute__((overloadable)) min(float, float);
-extern float2 __attribute__((overloadable)) min(float2, float2);
-extern float3 __attribute__((overloadable)) min(float3, float3);
-extern float4 __attribute__((overloadable)) min(float4, float4);
-extern float8 __attribute__((overloadable)) min(float8, float8);
-extern float16 __attribute__((overloadable)) min(float16, float16);
-
-extern float __attribute__((overloadable)) mix(float, float, float);
-extern float2 __attribute__((overloadable)) mix(float2, float2, float2);
-extern float3 __attribute__((overloadable)) mix(float3, float3, float3);
-extern float4 __attribute__((overloadable)) mix(float4, float4, float4);
-extern float8 __attribute__((overloadable)) mix(float8, float8, float8);
-extern float16 __attribute__((overloadable)) mix(float16, float16, float16);
-extern float __attribute__((overloadable)) mix(float, float, float);
-extern float2 __attribute__((overloadable)) mix(float2, float2, float);
-extern float3 __attribute__((overloadable)) mix(float3, float3, float);
-extern float4 __attribute__((overloadable)) mix(float4, float4, float);
-extern float8 __attribute__((overloadable)) mix(float8, float8, float);
-extern float16 __attribute__((overloadable)) mix(float16, float16, float);
-
-extern float __attribute__((overloadable)) pow(float, float);
-extern float2 __attribute__((overloadable)) pow(float2, float2);
-extern float3 __attribute__((overloadable)) pow(float3, float3);
-extern float4 __attribute__((overloadable)) pow(float4, float4);
-extern float8 __attribute__((overloadable)) pow(float8, float8);
-extern float16 __attribute__((overloadable)) pow(float16, float16);
-
-extern float __attribute__((overloadable)) radians(float);
-extern float2 __attribute__((overloadable)) radians(float2);
-extern float3 __attribute__((overloadable)) radians(float3);
-extern float4 __attribute__((overloadable)) radians(float4);
-extern float8 __attribute__((overloadable)) radians(float8);
-extern float16 __attribute__((overloadable)) radians(float16);
-
-extern float __attribute__((overloadable)) rint(float);
-extern float2 __attribute__((overloadable)) rint(float2);
-extern float3 __attribute__((overloadable)) rint(float3);
-extern float4 __attribute__((overloadable)) rint(float4);
-extern float8 __attribute__((overloadable)) rint(float8);
-extern float16 __attribute__((overloadable)) rint(float16);
-
-extern float __attribute__((overloadable)) round(float);
-extern float2 __attribute__((overloadable)) round(float2);
-extern float3 __attribute__((overloadable)) round(float3);
-extern float4 __attribute__((overloadable)) round(float4);
-extern float8 __attribute__((overloadable)) round(float8);
-extern float16 __attribute__((overloadable)) round(float16);
-
-extern float __attribute__((overloadable)) rsqrt(float);
-extern float2 __attribute__((overloadable)) rsqrt(float2);
-extern float3 __attribute__((overloadable)) rsqrt(float3);
-extern float4 __attribute__((overloadable)) rsqrt(float4);
-extern float8 __attribute__((overloadable)) rsqrt(float8);
-extern float16 __attribute__((overloadable)) rsqrt(float16);
-
-extern float __attribute__((overloadable)) sign(float);
-extern float2 __attribute__((overloadable)) sign(float2);
-extern float3 __attribute__((overloadable)) sign(float3);
-extern float4 __attribute__((overloadable)) sign(float4);
-extern float8 __attribute__((overloadable)) sign(float8);
-extern float16 __attribute__((overloadable)) sign(float16);
-
-extern float __attribute__((overloadable)) sin(float);
-extern float2 __attribute__((overloadable)) sin(float2);
-extern float3 __attribute__((overloadable)) sin(float3);
-extern float4 __attribute__((overloadable)) sin(float4);
-extern float8 __attribute__((overloadable)) sin(float8);
-extern float16 __attribute__((overloadable)) sin(float16);
-
-extern float __attribute__((overloadable)) sqrt(float);
-extern float2 __attribute__((overloadable)) sqrt(float2);
-extern float3 __attribute__((overloadable)) sqrt(float3);
-extern float4 __attribute__((overloadable)) sqrt(float4);
-extern float8 __attribute__((overloadable)) sqrt(float8);
-extern float16 __attribute__((overloadable)) sqrt(float16);
-
-extern float __attribute__((overloadable)) tan(float);
-extern float2 __attribute__((overloadable)) tan(float2);
-extern float3 __attribute__((overloadable)) tan(float3);
-extern float4 __attribute__((overloadable)) tan(float4);
-extern float8 __attribute__((overloadable)) tan(float8);
-extern float16 __attribute__((overloadable)) tan(float16);
-
-extern float __attribute__((overloadable)) trunc(float);
-extern float2 __attribute__((overloadable)) trunc(float2);
-extern float3 __attribute__((overloadable)) trunc(float3);
-extern float4 __attribute__((overloadable)) trunc(float4);
-extern float8 __attribute__((overloadable)) trunc(float8);
-extern float16 __attribute__((overloadable)) trunc(float16);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_element);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_type);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_allocation);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_sampler);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_script);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_mesh);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_fragment);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_vertex);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_raster);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_program_store);
+extern bool __attribute__((overloadable))
+    rsIsObject(rs_font);
 
 
 
+// Allocations
+
+// Return the rs_allocation associated with a bound data
+// pointer.
+extern rs_allocation __attribute__((overloadable))
+    rsGetAllocation(const void *);
+
+// Mark the allocation dirty and notify those using it
+extern void __attribute__((overloadable))
+    rsAllocationMarkDirty(rs_allocation);
+
+// Return the dimensions associated with an allocation.
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimX(rs_allocation);
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimY(rs_allocation);
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimZ(rs_allocation);
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimLOD(rs_allocation);
+extern uint32_t __attribute__((overloadable))
+    rsAllocationGetDimFaces(rs_allocation);
+
+// Extract a single element from an allocation.
+extern const void * __attribute__((overloadable))
+    rsGetElementAt(rs_allocation, uint32_t x);
+extern const void * __attribute__((overloadable))
+    rsGetElementAt(rs_allocation, uint32_t x, uint32_t y);
+extern const void * __attribute__((overloadable))
+    rsGetElementAt(rs_allocation, uint32_t x, uint32_t y, uint32_t z);
+
+// Return a random value between 0 (or min_value) and max_malue.
+extern int __attribute__((overloadable))
+    rsRand(int max_value);
+extern int __attribute__((overloadable))
+    rsRand(int min_value, int max_value);
+extern float __attribute__((overloadable))
+    rsRand(float max_value);
+extern float __attribute__((overloadable))
+    rsRand(float min_value, float max_value);
+
+// return the fractional part of a float
+// min(v - ((int)floor(v)), 0x1.fffffep-1f);
+extern float __attribute__((overloadable))
+    rsFrac(float);
+
+// time
+extern int32_t __attribute__((overloadable))
+    rsSecond(void);
+extern int32_t __attribute__((overloadable))
+    rsMinute(void);
+extern int32_t __attribute__((overloadable))
+    rsHour(void);
+extern int32_t __attribute__((overloadable))
+    rsDay(void);
+extern int32_t __attribute__((overloadable))
+    rsMonth(void);
+extern int32_t __attribute__((overloadable))
+    rsYear(void);
+
+// Return the current system clock in milliseconds
+extern int64_t __attribute__((overloadable))
+    rsUptimeMillis(void);
+
+// Return the current system clock in nanoseconds
+extern int64_t __attribute__((overloadable))
+    rsUptimeNanos(void);
+
+// Return the time in seconds since function was last called in this script.
+extern float __attribute__((overloadable))
+    rsGetDt(void);
+
+// Send a message back to the client.  Will not block and returns true
+// if the message was sendable and false if the fifo was full.
+// A message ID is required.  Data payload is optional.
+extern bool __attribute__((overloadable))
+    rsSendToClient(int cmdID);
+extern bool __attribute__((overloadable))
+    rsSendToClient(int cmdID, const void *data, uint len);
+
+// Send a message back to the client, blocking until the message is queued.
+// A message ID is required.  Data payload is optional.
+extern void __attribute__((overloadable))
+    rsSendToClientBlocking(int cmdID);
+extern void __attribute__((overloadable))
+    rsSendToClientBlocking(int cmdID, const void *data, uint len);
 
 
+// Script to Script
+enum rs_for_each_strategy {
+    RS_FOR_EACH_STRATEGY_SERIAL,
+    RS_FOR_EACH_STRATEGY_DONT_CARE,
+    RS_FOR_EACH_STRATEGY_DST_LINEAR,
+    RS_FOR_EACH_STRATEGY_TILE_SMALL,
+    RS_FOR_EACH_STRATEGY_TILE_MEDIUM,
+    RS_FOR_EACH_STRATEGY_TILE_LARGE
+};
 
-// Int ops
+typedef struct rs_script_call {
+    enum rs_for_each_strategy strategy;
+    uint32_t xStart;
+    uint32_t xEnd;
+    uint32_t yStart;
+    uint32_t yEnd;
+    uint32_t zStart;
+    uint32_t zEnd;
+    uint32_t arrayStart;
+    uint32_t arrayEnd;
+} rs_script_call_t;
 
-extern int __attribute__((overloadable)) abs(int);
-extern int2 __attribute__((overloadable)) abs(int2);
-extern int3 __attribute__((overloadable)) abs(int3);
-extern int4 __attribute__((overloadable)) abs(int4);
-extern int8 __attribute__((overloadable)) abs(int8);
-extern int16 __attribute__((overloadable)) abs(int16);
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input,
+              rs_allocation output, const void * usrData);
 
+extern void __attribute__((overloadable))
+    rsForEach(rs_script script, rs_allocation input,
+              rs_allocation output, const void * usrData,
+              const rs_script_call_t *);
 
-
-/*
-extern float modf(float, float);
-extern float randf(float);
-extern float randf2(float, float);
-extern float fracf(float);
-extern float lerpf(float, float, float);
-extern float mapf(float, float, float, float, float);
-*/
-
+#endif
diff --git a/scriptc/rs_types.rsh b/scriptc/rs_types.rsh
index 4198a74..212eb83 100644
--- a/scriptc/rs_types.rsh
+++ b/scriptc/rs_types.rsh
@@ -1,71 +1,77 @@
+#ifndef __RS_TYPES_RSH__
+#define __RS_TYPES_RSH__
 
 typedef char int8_t;
 typedef short int16_t;
 typedef int int32_t;
-//typedef long int64_t;
+typedef long long int64_t;
 
 typedef unsigned char uint8_t;
 typedef unsigned short uint16_t;
 typedef unsigned int uint32_t;
-//typedef long uint64_t;
+typedef unsigned long long uint64_t;
 
 typedef uint8_t uchar;
 typedef uint16_t ushort;
 typedef uint32_t uint;
-//typedef uint64_t ulong;
+typedef uint64_t ulong;
 
-typedef int rs_element;
-typedef int rs_type;
-typedef int rs_allocation;
-typedef int rs_sampler;
-typedef int rs_script;
-typedef int rs_mesh;
-typedef int rs_program_fragment;
-typedef int rs_program_vertex;
-typedef int rs_program_raster;
-typedef int rs_program_store;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_element;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_type;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_allocation;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_sampler;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_script;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_mesh;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_fragment;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_vertex;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_raster;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_program_store;
+typedef struct { const int* const p; } __attribute__((packed, aligned(4))) rs_font;
+
 
 typedef float float2 __attribute__((ext_vector_type(2)));
 typedef float float3 __attribute__((ext_vector_type(3)));
 typedef float float4 __attribute__((ext_vector_type(4)));
-typedef float float8 __attribute__((ext_vector_type(8)));
-typedef float float16 __attribute__((ext_vector_type(16)));
 
 typedef uchar uchar2 __attribute__((ext_vector_type(2)));
 typedef uchar uchar3 __attribute__((ext_vector_type(3)));
 typedef uchar uchar4 __attribute__((ext_vector_type(4)));
-typedef uchar uchar8 __attribute__((ext_vector_type(8)));
-typedef uchar uchar16 __attribute__((ext_vector_type(16)));
 
 typedef ushort ushort2 __attribute__((ext_vector_type(2)));
 typedef ushort ushort3 __attribute__((ext_vector_type(3)));
 typedef ushort ushort4 __attribute__((ext_vector_type(4)));
-typedef ushort ushort8 __attribute__((ext_vector_type(8)));
-typedef ushort ushort16 __attribute__((ext_vector_type(16)));
 
 typedef uint uint2 __attribute__((ext_vector_type(2)));
 typedef uint uint3 __attribute__((ext_vector_type(3)));
 typedef uint uint4 __attribute__((ext_vector_type(4)));
-typedef uint uint8 __attribute__((ext_vector_type(8)));
-typedef uint uint16 __attribute__((ext_vector_type(16)));
 
 typedef char char2 __attribute__((ext_vector_type(2)));
 typedef char char3 __attribute__((ext_vector_type(3)));
 typedef char char4 __attribute__((ext_vector_type(4)));
-typedef char char8 __attribute__((ext_vector_type(8)));
-typedef char char16 __attribute__((ext_vector_type(16)));
 
 typedef short short2 __attribute__((ext_vector_type(2)));
 typedef short short3 __attribute__((ext_vector_type(3)));
 typedef short short4 __attribute__((ext_vector_type(4)));
-typedef short short8 __attribute__((ext_vector_type(8)));
-typedef short short16 __attribute__((ext_vector_type(16)));
 
 typedef int int2 __attribute__((ext_vector_type(2)));
 typedef int int3 __attribute__((ext_vector_type(3)));
 typedef int int4 __attribute__((ext_vector_type(4)));
-typedef int int8 __attribute__((ext_vector_type(8)));
-typedef int int16 __attribute__((ext_vector_type(16)));
 
 
+typedef struct {
+    float m[16];
+} rs_matrix4x4;
 
+typedef struct {
+    float m[9];
+} rs_matrix3x3;
+
+typedef struct {
+    float m[4];
+} rs_matrix2x2;
+
+typedef float4 rs_quaternion;
+
+#define RS_PACKED __attribute__((packed, aligned(4)))
+
+#endif