Implement async data/subData.  Implement TriangleMeshBuilder in SimpleMesh which replaces TriangleMesh.  Update Film to use new builder.
diff --git a/graphics/java/android/renderscript/Allocation.java b/graphics/java/android/renderscript/Allocation.java
index 81848b9..3001c743 100644
--- a/graphics/java/android/renderscript/Allocation.java
+++ b/graphics/java/android/renderscript/Allocation.java
@@ -47,28 +47,62 @@
         mRS.nAllocationUploadToTexture(mID, baseMipLevel);
     }
 
+    public void uploadToBufferObject() {
+        mRS.nAllocationUploadToBufferObject(mID);
+    }
+
     public void data(int[] d) {
-        mRS.nAllocationData(mID, d);
+        int size = 0;
+        if(mType != null && mType.mElement != null) {
+            size = mType.mElement.mSize;
+            for(int ct=0; ct < mType.mValues.length; ct++) {
+                if(mType.mValues[ct] != 0) {
+                    size *= mType.mValues[ct];
+                }
+            }
+            if((d.length * 4) < size) {
+                throw new IllegalArgumentException("Array too small for allocation type.");
+            }
+            Log.e("rs", "Alloc data size=" + size);
+            mRS.nAllocationData(mID, d, size);
+            return;
+        }
+        mRS.nAllocationData(mID, d, d.length * 4);
     }
 
     public void data(float[] d) {
-        mRS.nAllocationData(mID, d);
+        int size = 0;
+        if(mType != null && mType.mElement != null) {
+            size = mType.mElement.mSize;
+            for(int ct=0; ct < mType.mValues.length; ct++) {
+                if(mType.mValues[ct] != 0) {
+                    size *= mType.mValues[ct];
+                }
+            }
+            if((d.length * 4) < size) {
+                throw new IllegalArgumentException("Array too small for allocation type.");
+            }
+            Log.e("rs", "Alloc data size=" + size);
+            mRS.nAllocationData(mID, d, size);
+            return;
+        }
+        mRS.nAllocationData(mID, d, d.length * 4);
     }
 
     public void subData1D(int off, int count, int[] d) {
-        mRS.nAllocationSubData1D(mID, off, count, d);
+        mRS.nAllocationSubData1D(mID, off, count, d, count * 4);
     }
 
     public void subData1D(int off, int count, float[] d) {
-        mRS.nAllocationSubData1D(mID, off, count, d);
+        mRS.nAllocationSubData1D(mID, off, count, d, d.length * 4);
     }
 
     public void subData2D(int xoff, int yoff, int w, int h, int[] d) {
-        mRS.nAllocationSubData2D(mID, xoff, yoff, w, h, d);
+        mRS.nAllocationSubData2D(mID, xoff, yoff, w, h, d, d.length * 4);
     }
 
     public void subData2D(int xoff, int yoff, int w, int h, float[] d) {
-        mRS.nAllocationSubData2D(mID, xoff, yoff, w, h, d);
+        mRS.nAllocationSubData2D(mID, xoff, yoff, w, h, d, d.length * 4);
     }
 
     public void readData(int[] d) {
@@ -221,20 +255,6 @@
         Bitmap b = BitmapFactory.decodeResource(res, id, mBitmapOptions);
         return createFromBitmapBoxed(rs, b, dstFmt, genMips);
     }
-/*
-    public static Allocation createFromObject(RenderScript rs, Object o) {
-        Class c = o.getClass();
-        Type t;
-        if(c.isArray()) {
-            t = Type.createFromClass(rs, c, Array.getLength(o));
-        } else {
-            t = Type.createFromClass(rs, c, 1);
-        }
-        Allocation alloc = createTyped(rs, t);
-        t.destroy();
-        return alloc;
-    }
-*/
 }
 
 
diff --git a/graphics/java/android/renderscript/Element.java b/graphics/java/android/renderscript/Element.java
index aeec739..0ca112c 100644
--- a/graphics/java/android/renderscript/Element.java
+++ b/graphics/java/android/renderscript/Element.java
@@ -25,30 +25,31 @@
 public class Element extends BaseObj {
     final int mPredefinedID;
     final boolean mIsPredefined;
+    final int mSize;
 
-    public static final Element USER_U8 = new Element(0);
-    public static final Element USER_I8 = new Element(1);
-    public static final Element USER_U16 = new Element(2);
-    public static final Element USER_I16 = new Element(3);
-    public static final Element USER_U32 = new Element(4);
-    public static final Element USER_I32 = new Element(5);
-    public static final Element USER_FLOAT = new Element(6);
+    public static final Element USER_U8 = new Element(0, 1);
+    public static final Element USER_I8 = new Element(1, 1);
+    public static final Element USER_U16 = new Element(2, 2);
+    public static final Element USER_I16 = new Element(3, 2);
+    public static final Element USER_U32 = new Element(4, 4);
+    public static final Element USER_I32 = new Element(5, 4);
+    public static final Element USER_FLOAT = new Element(6, 4);
 
-    public static final Element A_8 = new Element(7);
-    public static final Element RGB_565 = new Element(8);
-    public static final Element RGB_888 = new Element(11);
-    public static final Element RGBA_5551 = new Element(9);
-    public static final Element RGBA_4444 = new Element(10);
-    public static final Element RGBA_8888 = new Element(12);
+    public static final Element A_8 = new Element(7, 1);
+    public static final Element RGB_565 = new Element(8, 2);
+    public static final Element RGB_888 = new Element(11, 2);
+    public static final Element RGBA_5551 = new Element(9, 2);
+    public static final Element RGBA_4444 = new Element(10, 2);
+    public static final Element RGBA_8888 = new Element(12, 4);
 
-    public static final Element INDEX_16 = new Element(13);
-    public static final Element INDEX_32 = new Element(14);
-    public static final Element XY_F32 = new Element(15);
-    public static final Element XYZ_F32 = new Element(16);
-    public static final Element ST_XY_F32 = new Element(17);
-    public static final Element ST_XYZ_F32 = new Element(18);
-    public static final Element NORM_XYZ_F32 = new Element(19);
-    public static final Element NORM_ST_XYZ_F32 = new Element(20);
+    public static final Element INDEX_16 = new Element(13, 2);
+    public static final Element INDEX_32 = new Element(14, 2);
+    public static final Element XY_F32 = new Element(15, 8);
+    public static final Element XYZ_F32 = new Element(16, 12);
+    public static final Element ST_XY_F32 = new Element(17, 16);
+    public static final Element ST_XYZ_F32 = new Element(18, 20);
+    public static final Element NORM_XYZ_F32 = new Element(19, 24);
+    public static final Element NORM_ST_XYZ_F32 = new Element(20, 32);
 
     void initPredef(RenderScript rs) {
         mID = rs.nElementGetPredefined(mPredefinedID);
@@ -121,18 +122,20 @@
     }
 
 
-    Element(int predef) {
+    Element(int predef, int size) {
         super(null);
         mID = 0;
         mPredefinedID = predef;
         mIsPredefined = true;
+        mSize = size;
     }
 
-    Element(int id, RenderScript rs) {
+    Element(int id, RenderScript rs, int size) {
         super(rs);
         mID = id;
         mPredefinedID = 0;
         mIsPredefined = false;
+        mSize = size;
     }
 
     public void destroy() throws IllegalStateException {
@@ -168,6 +171,7 @@
         RenderScript mRS;
         Entry[] mEntries;
         int mEntryCount;
+        int mSizeBits;
 
         private class Entry {
             Element mElement;
@@ -182,6 +186,7 @@
             mRS = rs;
             mEntryCount = 0;
             mEntries = new Entry[8];
+            mSizeBits = 0;
         }
 
         void addEntry(Entry e) {
@@ -201,6 +206,7 @@
             Entry en = new Entry();
             en.mElement = e;
             addEntry(en);
+            mSizeBits += e.mSize * 8;
             return this;
         }
 
@@ -211,6 +217,7 @@
             en.mIsNormalized = isNormalized;
             en.mBits = bits;
             en.mName = name;
+            mSizeBits += bits;
             addEntry(en);
             return this;
         }
@@ -236,6 +243,12 @@
             return this;
         }
 
+        public Builder addFloatXY(String prefix) {
+            add(DataType.FLOAT, DataKind.X, false, 32, prefix + "X");
+            add(DataType.FLOAT, DataKind.Y, false, 32, prefix + "Y");
+            return this;
+        }
+
         public Builder addFloatXYZ() {
             add(DataType.FLOAT, DataKind.X, false, 32, null);
             add(DataType.FLOAT, DataKind.Y, false, 32, null);
@@ -243,17 +256,49 @@
             return this;
         }
 
+        public Builder addFloatXYZ(String prefix) {
+            add(DataType.FLOAT, DataKind.X, false, 32, prefix + "X");
+            add(DataType.FLOAT, DataKind.Y, false, 32, prefix + "Y");
+            add(DataType.FLOAT, DataKind.Z, false, 32, prefix + "Z");
+            return this;
+        }
+
         public Builder addFloatST() {
             add(DataType.FLOAT, DataKind.S, false, 32, null);
             add(DataType.FLOAT, DataKind.T, false, 32, null);
             return this;
         }
 
+        public Builder addFloatST(String prefix) {
+            add(DataType.FLOAT, DataKind.S, false, 32, prefix + "S");
+            add(DataType.FLOAT, DataKind.T, false, 32, prefix + "T");
+            return this;
+        }
+
+        public Builder addFloatNorm() {
+            add(DataType.FLOAT, DataKind.NX, false, 32, null);
+            add(DataType.FLOAT, DataKind.NY, false, 32, null);
+            add(DataType.FLOAT, DataKind.NZ, false, 32, null);
+            return this;
+        }
+
+        public Builder addFloatNorm(String prefix) {
+            add(DataType.FLOAT, DataKind.NX, false, 32, prefix + "NX");
+            add(DataType.FLOAT, DataKind.NY, false, 32, prefix + "NY");
+            add(DataType.FLOAT, DataKind.NZ, false, 32, prefix + "NZ");
+            return this;
+        }
+
         public Builder addFloatPointSize() {
             add(DataType.FLOAT, DataKind.POINT_SIZE, false, 32, null);
             return this;
         }
 
+        public Builder addFloatPointSize(String name) {
+            add(DataType.FLOAT, DataKind.POINT_SIZE, false, 32, name);
+            return this;
+        }
+
         public Builder addFloatRGB() {
             add(DataType.FLOAT, DataKind.RED, false, 32, null);
             add(DataType.FLOAT, DataKind.GREEN, false, 32, null);
@@ -261,6 +306,13 @@
             return this;
         }
 
+        public Builder addFloatRGB(String prefix) {
+            add(DataType.FLOAT, DataKind.RED, false, 32, prefix + "R");
+            add(DataType.FLOAT, DataKind.GREEN, false, 32, prefix + "G");
+            add(DataType.FLOAT, DataKind.BLUE, false, 32, prefix + "B");
+            return this;
+        }
+
         public Builder addFloatRGBA() {
             add(DataType.FLOAT, DataKind.RED, false, 32, null);
             add(DataType.FLOAT, DataKind.GREEN, false, 32, null);
@@ -269,6 +321,14 @@
             return this;
         }
 
+        public Builder addFloatRGBA(String prefix) {
+            add(DataType.FLOAT, DataKind.RED, false, 32, prefix + "R");
+            add(DataType.FLOAT, DataKind.GREEN, false, 32, prefix + "G");
+            add(DataType.FLOAT, DataKind.BLUE, false, 32, prefix + "B");
+            add(DataType.FLOAT, DataKind.ALPHA, false, 32, prefix + "A");
+            return this;
+        }
+
         public Builder addUNorm8RGBA() {
             add(DataType.UNSIGNED, DataKind.RED, true, 8, null);
             add(DataType.UNSIGNED, DataKind.GREEN, true, 8, null);
@@ -277,6 +337,14 @@
             return this;
         }
 
+        public Builder addUNorm8RGBA(String prefix) {
+            add(DataType.UNSIGNED, DataKind.RED, true, 8, prefix + "R");
+            add(DataType.UNSIGNED, DataKind.GREEN, true, 8, prefix + "G");
+            add(DataType.UNSIGNED, DataKind.BLUE, true, 8, prefix + "B");
+            add(DataType.UNSIGNED, DataKind.ALPHA, true, 8, prefix + "A");
+            return this;
+        }
+
         static synchronized Element internalCreate(RenderScript rs, Builder b) {
             rs.nElementBegin();
             for (int ct=0; ct < b.mEntryCount; ct++) {
@@ -292,7 +360,7 @@
                 }
             }
             int id = rs.nElementCreate();
-            return new Element(id, rs);
+            return new Element(id, rs, (b.mSizeBits + 7) >> 3);
         }
 
         public Element create() {
diff --git a/graphics/java/android/renderscript/RenderScript.java b/graphics/java/android/renderscript/RenderScript.java
index 8489003..ee7b702 100644
--- a/graphics/java/android/renderscript/RenderScript.java
+++ b/graphics/java/android/renderscript/RenderScript.java
@@ -97,12 +97,13 @@
     native int  nAllocationCreateFromBitmapBoxed(int dstFmt, boolean genMips, Bitmap bmp);
 
     native void nAllocationUploadToTexture(int alloc, int baseMioLevel);
-    native void nAllocationData(int id, int[] d);
-    native void nAllocationData(int id, float[] d);
-    native void nAllocationSubData1D(int id, int off, int count, int[] d);
-    native void nAllocationSubData1D(int id, int off, int count, float[] d);
-    native void nAllocationSubData2D(int id, int xoff, int yoff, int w, int h, int[] d);
-    native void nAllocationSubData2D(int id, int xoff, int yoff, int w, int h, float[] d);
+    native void nAllocationUploadToBufferObject(int alloc);
+    native void nAllocationData(int id, int[] d, int sizeBytes);
+    native void nAllocationData(int id, float[] d, int sizeBytes);
+    native void nAllocationSubData1D(int id, int off, int count, int[] d, int sizeBytes);
+    native void nAllocationSubData1D(int id, int off, int count, float[] d, int sizeBytes);
+    native void nAllocationSubData2D(int id, int xoff, int yoff, int w, int h, int[] d, int sizeBytes);
+    native void nAllocationSubData2D(int id, int xoff, int yoff, int w, int h, float[] d, int sizeBytes);
     native void nAllocationRead(int id, int[] d);
     native void nAllocationRead(int id, float[] d);
     native void nAllocationDataFromObject(int id, Type t, Object o);
diff --git a/graphics/java/android/renderscript/SimpleMesh.java b/graphics/java/android/renderscript/SimpleMesh.java
index d80551e..e66fb8a 100644
--- a/graphics/java/android/renderscript/SimpleMesh.java
+++ b/graphics/java/android/renderscript/SimpleMesh.java
@@ -167,5 +167,150 @@
         }
     }
 
+    public static class TriangleMeshBuilder {
+        float mVtxData[];
+        int mVtxCount;
+        int mIndexData[];
+        int mIndexCount;
+        RenderScript mRS;
+        Element mElement;
+
+        int mVtxSize;
+        boolean mNorm;
+        boolean mTex;
+
+        public TriangleMeshBuilder(RenderScript rs, int vtxSize, boolean norm, boolean tex) {
+            mRS = rs;
+            mVtxCount = 0;
+            mIndexCount = 0;
+            mVtxData = new float[128];
+            mIndexData = new int[128];
+            mVtxSize = vtxSize;
+            mNorm = norm;
+            mTex = tex;
+
+            if(vtxSize < 2 || vtxSize > 3) {
+                throw new IllegalArgumentException("Vertex size out of range.");
+            }
+        }
+
+        private void makeSpace(int count) {
+            if((mVtxCount + count) >= mVtxData.length) {
+                float t[] = new float[mVtxData.length * 2];
+                System.arraycopy(mVtxData, 0, t, 0, mVtxData.length);
+                mVtxData = t;
+            }
+        }
+
+        public void add_XY(float x, float y) {
+            if((mVtxSize != 2) || mNorm || mTex) {
+                throw new IllegalStateException("add mistmatch with declaired components.");
+            }
+            makeSpace(2);
+            mVtxData[mVtxCount++] = x;
+            mVtxData[mVtxCount++] = y;
+        }
+
+        public void add_XYZ(float x, float y, float z) {
+            if((mVtxSize != 3) || mNorm || mTex) {
+                throw new IllegalStateException("add mistmatch with declaired components.");
+            }
+            makeSpace(3);
+            mVtxData[mVtxCount++] = x;
+            mVtxData[mVtxCount++] = y;
+            mVtxData[mVtxCount++] = z;
+        }
+
+        public void add_XY_ST(float x, float y, float s, float t) {
+            if((mVtxSize != 2) || mNorm || !mTex) {
+                throw new IllegalStateException("add mistmatch with declaired components.");
+            }
+            makeSpace(4);
+            mVtxData[mVtxCount++] = x;
+            mVtxData[mVtxCount++] = y;
+            mVtxData[mVtxCount++] = s;
+            mVtxData[mVtxCount++] = t;
+        }
+
+        public void add_XYZ_ST(float x, float y, float z, float s, float t) {
+            if((mVtxSize != 3) || mNorm || !mTex) {
+                throw new IllegalStateException("add mistmatch with declaired components.");
+            }
+            makeSpace(5);
+            mVtxData[mVtxCount++] = x;
+            mVtxData[mVtxCount++] = y;
+            mVtxData[mVtxCount++] = z;
+            mVtxData[mVtxCount++] = s;
+            mVtxData[mVtxCount++] = t;
+        }
+
+        public void add_XYZ_ST_NORM(float x, float y, float z, float s, float t, float nx, float ny, float nz) {
+            if((mVtxSize != 3) || !mNorm || !mTex) {
+                throw new IllegalStateException("add mistmatch with declaired components.");
+            }
+            makeSpace(8);
+            mVtxData[mVtxCount++] = x;
+            mVtxData[mVtxCount++] = y;
+            mVtxData[mVtxCount++] = z;
+            mVtxData[mVtxCount++] = s;
+            mVtxData[mVtxCount++] = t;
+            mVtxData[mVtxCount++] = nx;
+            mVtxData[mVtxCount++] = ny;
+            mVtxData[mVtxCount++] = nz;
+        }
+
+        public void addTriangle(int idx1, int idx2, int idx3) {
+            if((mIndexCount + 3) >= mIndexData.length) {
+                int t[] = new int[mIndexData.length * 2];
+                System.arraycopy(mIndexData, 0, t, 0, mIndexData.length);
+                mIndexData = t;
+            }
+            mIndexData[mIndexCount++] = idx1;
+            mIndexData[mIndexCount++] = idx2;
+            mIndexData[mIndexCount++] = idx3;
+        }
+
+        public SimpleMesh create() {
+            Element.Builder b = new Element.Builder(mRS);
+            int floatCount = mVtxSize;
+            if(mVtxSize == 2) {
+                b.addFloatXY();
+            } else {
+                b.addFloatXYZ();
+            }
+            if(mTex) {
+                floatCount += 2;
+                b.addFloatST();
+            }
+            if(mNorm) {
+                floatCount += 3;
+                b.addFloatNorm();
+            }
+            mElement = b.create();
+
+            Builder smb = new Builder(mRS);
+            smb.addVertexType(mElement, mVtxCount / floatCount);
+            smb.setIndexType(Element.INDEX_16, mIndexCount);
+            smb.setPrimitive(Primitive.TRIANGLE);
+            SimpleMesh sm = smb.create();
+
+            Allocation vertexAlloc = sm.createVertexAllocation(0);
+            Allocation indexAlloc = sm.createIndexAllocation();
+            sm.bindVertexAllocation(vertexAlloc, 0);
+            sm.bindIndexAllocation(indexAlloc);
+
+            vertexAlloc.data(mVtxData);
+            vertexAlloc.uploadToBufferObject();
+
+            // This is safe because length is a pow2
+            for(int ct=0; ct < (mIndexCount+1); ct += 2) {
+                mIndexData[ct >> 1] = mIndexData[ct] | (mIndexData[ct+1] << 16);
+            }
+            indexAlloc.data(mIndexData);
+            indexAlloc.uploadToBufferObject();
+
+            return sm;
+        }
+    }
 }
 
diff --git a/graphics/jni/android_renderscript_RenderScript.cpp b/graphics/jni/android_renderscript_RenderScript.cpp
index 2393f74..2550181 100644
--- a/graphics/jni/android_renderscript_RenderScript.cpp
+++ b/graphics/jni/android_renderscript_RenderScript.cpp
@@ -70,7 +70,7 @@
 nAssignName(JNIEnv *_env, jobject _this, jint obj, jbyteArray str)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
-    LOG_API("nAssignName, con(%p), obj(%p)", con, obj);
+    LOG_API("nAssignName, con(%p), obj(%p)", con, (void *)obj);
 
     jint len = _env->GetArrayLength(str);
     jbyte * cptr = (jbyte *) _env->GetPrimitiveArrayCritical(str, 0);
@@ -345,6 +345,14 @@
     rsAllocationUploadToTexture(con, (RsAllocation)a, mip);
 }
 
+static void
+nAllocationUploadToBufferObject(JNIEnv *_env, jobject _this, jint a)
+{
+    RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
+    LOG_API("nAllocationUploadToBufferObject, con(%p), a(%p)", con, (RsAllocation)a);
+    rsAllocationUploadToBufferObject(con, (RsAllocation)a);
+}
+
 static RsElementPredefined SkBitmapToPredefined(SkBitmap::Config cfg)
 {
     switch (cfg) {
@@ -413,68 +421,68 @@
 
 
 static void
-nAllocationData_i(JNIEnv *_env, jobject _this, jint alloc, jintArray data)
+nAllocationData_i(JNIEnv *_env, jobject _this, jint alloc, jintArray data, int sizeBytes)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
     jint len = _env->GetArrayLength(data);
     LOG_API("nAllocationData_i, con(%p), alloc(%p), len(%i)", con, (RsAllocation)alloc, len);
     jint *ptr = _env->GetIntArrayElements(data, NULL);
-    rsAllocationData(con, (RsAllocation)alloc, ptr);
+    rsAllocationData(con, (RsAllocation)alloc, ptr, sizeBytes);
     _env->ReleaseIntArrayElements(data, ptr, JNI_ABORT);
 }
 
 static void
-nAllocationData_f(JNIEnv *_env, jobject _this, jint alloc, jfloatArray data)
+nAllocationData_f(JNIEnv *_env, jobject _this, jint alloc, jfloatArray data, int sizeBytes)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
     jint len = _env->GetArrayLength(data);
     LOG_API("nAllocationData_i, con(%p), alloc(%p), len(%i)", con, (RsAllocation)alloc, len);
     jfloat *ptr = _env->GetFloatArrayElements(data, NULL);
-    rsAllocationData(con, (RsAllocation)alloc, ptr);
+    rsAllocationData(con, (RsAllocation)alloc, ptr, sizeBytes);
     _env->ReleaseFloatArrayElements(data, ptr, JNI_ABORT);
 }
 
 static void
-nAllocationSubData1D_i(JNIEnv *_env, jobject _this, jint alloc, jint offset, jint count, jintArray data)
+nAllocationSubData1D_i(JNIEnv *_env, jobject _this, jint alloc, jint offset, jint count, jintArray data, int sizeBytes)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
     jint len = _env->GetArrayLength(data);
     LOG_API("nAllocation1DSubData_i, con(%p), adapter(%p), offset(%i), count(%i), len(%i)", con, (RsAllocation)alloc, offset, count, len);
     jint *ptr = _env->GetIntArrayElements(data, NULL);
-    rsAllocation1DSubData(con, (RsAllocation)alloc, offset, count, ptr);
+    rsAllocation1DSubData(con, (RsAllocation)alloc, offset, count, ptr, sizeBytes);
     _env->ReleaseIntArrayElements(data, ptr, JNI_ABORT);
 }
 
 static void
-nAllocationSubData1D_f(JNIEnv *_env, jobject _this, jint alloc, jint offset, jint count, jfloatArray data)
+nAllocationSubData1D_f(JNIEnv *_env, jobject _this, jint alloc, jint offset, jint count, jfloatArray data, int sizeBytes)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
     jint len = _env->GetArrayLength(data);
     LOG_API("nAllocation1DSubData_f, con(%p), adapter(%p), offset(%i), count(%i), len(%i)", con, (RsAllocation)alloc, offset, count, len);
     jfloat *ptr = _env->GetFloatArrayElements(data, NULL);
-    rsAllocation1DSubData(con, (RsAllocation)alloc, offset, count, ptr);
+    rsAllocation1DSubData(con, (RsAllocation)alloc, offset, count, ptr, sizeBytes);
     _env->ReleaseFloatArrayElements(data, ptr, JNI_ABORT);
 }
 
 static void
-nAllocationSubData2D_i(JNIEnv *_env, jobject _this, jint alloc, jint xoff, jint yoff, jint w, jint h, jintArray data)
+nAllocationSubData2D_i(JNIEnv *_env, jobject _this, jint alloc, jint xoff, jint yoff, jint w, jint h, jintArray data, int sizeBytes)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
     jint len = _env->GetArrayLength(data);
     LOG_API("nAllocation2DSubData_i, con(%p), adapter(%p), xoff(%i), yoff(%i), w(%i), h(%i), len(%i)", con, (RsAllocation)alloc, xoff, yoff, w, h, len);
     jint *ptr = _env->GetIntArrayElements(data, NULL);
-    rsAllocation2DSubData(con, (RsAllocation)alloc, xoff, yoff, w, h, ptr);
+    rsAllocation2DSubData(con, (RsAllocation)alloc, xoff, yoff, w, h, ptr, sizeBytes);
     _env->ReleaseIntArrayElements(data, ptr, JNI_ABORT);
 }
 
 static void
-nAllocationSubData2D_f(JNIEnv *_env, jobject _this, jint alloc, jint xoff, jint yoff, jint w, jint h, jfloatArray data)
+nAllocationSubData2D_f(JNIEnv *_env, jobject _this, jint alloc, jint xoff, jint yoff, jint w, jint h, jfloatArray data, int sizeBytes)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
     jint len = _env->GetArrayLength(data);
     LOG_API("nAllocation2DSubData_i, con(%p), adapter(%p), xoff(%i), yoff(%i), w(%i), h(%i), len(%i)", con, (RsAllocation)alloc, xoff, yoff, w, h, len);
     jfloat *ptr = _env->GetFloatArrayElements(data, NULL);
-    rsAllocation2DSubData(con, (RsAllocation)alloc, xoff, yoff, w, h, ptr);
+    rsAllocation2DSubData(con, (RsAllocation)alloc, xoff, yoff, w, h, ptr, sizeBytes);
     _env->ReleaseFloatArrayElements(data, ptr, JNI_ABORT);
 }
 
@@ -516,7 +524,7 @@
         const TypeFieldCache *tfc = &tc->fields[ct];
         buf = tfc->ptr(_env, _o, tfc->field, buf);
     }
-    rsAllocationData(con, (RsAllocation)alloc, bufAlloc);
+    rsAllocationData(con, (RsAllocation)alloc, bufAlloc, tc->size);
     const uint32_t * tmp = (const uint32_t *)bufAlloc;
     free(bufAlloc);
 }
@@ -748,7 +756,7 @@
 nScriptSetClearColor(JNIEnv *_env, jobject _this, jint script, jfloat r, jfloat g, jfloat b, jfloat a)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
-    LOG_API("nScriptSetClearColor, con(%p), s(%p), r(%f), g(%f), b(%f), a(%f)", con, script, r, g, b, a);
+    LOG_API("nScriptSetClearColor, con(%p), s(%p), r(%f), g(%f), b(%f), a(%f)", con, (void *)script, r, g, b, a);
     rsScriptSetClearColor(con, (RsScript)script, r, g, b, a);
 }
 
@@ -756,7 +764,7 @@
 nScriptSetClearDepth(JNIEnv *_env, jobject _this, jint script, jfloat d)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
-    LOG_API("nScriptCSetClearDepth, con(%p), s(%p), depth(%f)", con, script, d);
+    LOG_API("nScriptCSetClearDepth, con(%p), s(%p), depth(%f)", con, (void *)script, d);
     rsScriptSetClearDepth(con, (RsScript)script, d);
 }
 
@@ -764,7 +772,7 @@
 nScriptSetClearStencil(JNIEnv *_env, jobject _this, jint script, jint stencil)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
-    LOG_API("nScriptCSetClearStencil, con(%p), s(%p), stencil(%i)", con, script, stencil);
+    LOG_API("nScriptCSetClearStencil, con(%p), s(%p), stencil(%i)", con, (void *)script, stencil);
     rsScriptSetClearStencil(con, (RsScript)script, stencil);
 }
 
@@ -772,7 +780,7 @@
 nScriptSetTimeZone(JNIEnv *_env, jobject _this, jint script, jbyteArray timeZone)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
-    LOG_API("nScriptCSetTimeZone, con(%p), s(%p), timeZone(%s)", con, script, timeZone);
+    LOG_API("nScriptCSetTimeZone, con(%p), s(%p), timeZone(%s)", con, (void *)script, (const char *)timeZone);
 
     jint length = _env->GetArrayLength(timeZone);
     jbyte* timeZone_ptr;
@@ -1005,7 +1013,7 @@
 nProgramVertexBindAllocation(JNIEnv *_env, jobject _this, jint vpv, jint a)
 {
     RsContext con = (RsContext)(_env->GetIntField(_this, gContextId));
-    LOG_API("nProgramVertexBindAllocation, con(%p), vpf(%p), slot(%i), a(%p)", con, (RsProgramVertex)vpv, slot, (RsAllocation)a);
+    LOG_API("nProgramVertexBindAllocation, con(%p), vpf(%p), a(%p)", con, (RsProgramVertex)vpv, (RsAllocation)a);
     rsProgramVertexBindAllocation(con, (RsProgramFragment)vpv, (RsAllocation)a);
 }
 
@@ -1230,14 +1238,15 @@
 {"nAllocationCreatePredefSized",   "(II)I",                                (void*)nAllocationCreatePredefSized },
 {"nAllocationCreateSized",         "(II)I",                                (void*)nAllocationCreateSized },
 {"nAllocationCreateFromBitmap",    "(IZLandroid/graphics/Bitmap;)I",       (void*)nAllocationCreateFromBitmap },
-{"nAllocationCreateFromBitmapBoxed","(IZLandroid/graphics/Bitmap;)I",       (void*)nAllocationCreateFromBitmapBoxed },
+{"nAllocationCreateFromBitmapBoxed","(IZLandroid/graphics/Bitmap;)I",      (void*)nAllocationCreateFromBitmapBoxed },
 {"nAllocationUploadToTexture",     "(II)V",                                (void*)nAllocationUploadToTexture },
-{"nAllocationData",                "(I[I)V",                               (void*)nAllocationData_i },
-{"nAllocationData",                "(I[F)V",                               (void*)nAllocationData_f },
-{"nAllocationSubData1D",           "(III[I)V",                             (void*)nAllocationSubData1D_i },
-{"nAllocationSubData1D",           "(III[F)V",                             (void*)nAllocationSubData1D_f },
-{"nAllocationSubData2D",           "(IIIII[I)V",                           (void*)nAllocationSubData2D_i },
-{"nAllocationSubData2D",           "(IIIII[F)V",                           (void*)nAllocationSubData2D_f },
+{"nAllocationUploadToBufferObject","(I)V",                                 (void*)nAllocationUploadToBufferObject },
+{"nAllocationData",                "(I[II)V",                              (void*)nAllocationData_i },
+{"nAllocationData",                "(I[FI)V",                              (void*)nAllocationData_f },
+{"nAllocationSubData1D",           "(III[II)V",                            (void*)nAllocationSubData1D_i },
+{"nAllocationSubData1D",           "(III[FI)V",                            (void*)nAllocationSubData1D_f },
+{"nAllocationSubData2D",           "(IIIII[II)V",                          (void*)nAllocationSubData2D_i },
+{"nAllocationSubData2D",           "(IIIII[FI)V",                          (void*)nAllocationSubData2D_f },
 {"nAllocationRead",                "(I[I)V",                               (void*)nAllocationRead_i },
 {"nAllocationRead",                "(I[F)V",                               (void*)nAllocationRead_f },
 {"nAllocationDataFromObject",      "(ILandroid/renderscript/Type;Ljava/lang/Object;)V",   (void*)nAllocationDataFromObject },
diff --git a/libs/rs/java/Film/res/raw/filmstrip.c b/libs/rs/java/Film/res/raw/filmstrip.c
index 255d908..8f3d930 100644
--- a/libs/rs/java/Film/res/raw/filmstrip.c
+++ b/libs/rs/java/Film/res/raw/filmstrip.c
@@ -24,15 +24,15 @@
 
     float trans = Pos_translate;
     float rot = Pos_rotate;
+
     matrixLoadScale(mat1, 2.f, 2.f, 2.f);
     matrixTranslate(mat1, 0.f, 0.f, trans);
     matrixRotate(mat1, 90.f, 0.f, 0.f, 1.f);
     matrixRotate(mat1, rot, 1.f, 0.f, 0.f);
-    storeMatrix(3, 0, mat1);
+    vpLoadModelMatrix(mat1);
 
     // Draw the lighting effect in the strip and fill the Z buffer.
-    drawTriangleMesh(NAMED_mesh);
-
+    drawSimpleMesh(NAMED_mesh);
 
     // Start of images.
     bindProgramFragmentStore(NAMED_PSImages);
@@ -74,31 +74,21 @@
         pos = pos - 0.75f;
 
         offset = offset + triangleOffsetsCount / 2;
-
-    int drawit = 1;
-    if (offset < 0) {
-        drawit = 0;
-    }
-    if (offset >= triangleOffsetsCount) {
-        drawit = 0;
-    }
-
-        //if (!((offset < 0) || (offset >= triangleOffsetsCount))) {
-        if (drawit) {
+        if (!((offset < 0) || (offset >= triangleOffsetsCount))) {
             int start = offset -2;
             int end = offset + 2;
 
             if (start < 0) {
                 start = 0;
             }
-            if (end > triangleOffsetsCount) {
-                end = triangleOffsetsCount;
+            if (end >= triangleOffsetsCount) {
+                end = triangleOffsetsCount-1;
             }
 
             bindTexture(NAMED_PFImages, 0, loadI32(0, imgId - 1));
             matrixLoadTranslate(mat1, -pos - loadF(5, triangleOffsetsCount / 2), 0, 0);
             vpLoadTextureMatrix(mat1);
-            drawTriangleMeshRange(NAMED_mesh, loadI32(4, start), loadI32(4, end) - loadI32(4, start));
+            drawSimpleMeshRange(NAMED_mesh, loadI32(4, start), (loadI32(4, end) - loadI32(4, start)));
         }
     }
     return 0;
diff --git a/libs/rs/java/Film/src/com/android/film/FilmRS.java b/libs/rs/java/Film/src/com/android/film/FilmRS.java
index e6cd52d..cee827b 100644
--- a/libs/rs/java/Film/src/com/android/film/FilmRS.java
+++ b/libs/rs/java/Film/src/com/android/film/FilmRS.java
@@ -68,8 +68,6 @@
     private RenderScript mRS;
     private Script mScriptStrip;
     private Script mScriptImage;
-    private Element mElementVertex;
-    private Element mElementIndex;
     private Sampler mSampler;
     private ProgramStore mPSBackground;
     private ProgramStore mPSImages;
@@ -88,7 +86,7 @@
     private Allocation mAllocOffsetsTex;
     private Allocation mAllocOffsets;
 
-    private RenderScript.TriangleMesh mMesh;
+    private SimpleMesh mMesh;
     private Light mLight;
 
     private FilmStripMesh mFSM;
@@ -186,7 +184,6 @@
                 mip++;
                 a.setConstraint(Dimension.LOD, mip);
             }
-            a.destroy();
 
             mImages[ct].uploadToTexture(1);
             mBufferIDs[ct] = mImages[ct].getID();
@@ -204,13 +201,8 @@
     }
 
     private void initRS() {
-        mElementVertex = Element.NORM_ST_XYZ_F32;
-        mElementIndex = Element.INDEX_16;
-
-        mRS.triangleMeshBegin(mElementVertex, mElementIndex);
         mFSM = new FilmStripMesh();
-        mFSM.init(mRS);
-        mMesh = mRS.triangleMeshCreate();
+        mMesh = mFSM.init(mRS);
         mMesh.setName("mesh");
 
         initPFS();
diff --git a/libs/rs/java/Film/src/com/android/film/FilmStripMesh.java b/libs/rs/java/Film/src/com/android/film/FilmStripMesh.java
index 02bffd8..64aac26 100644
--- a/libs/rs/java/Film/src/com/android/film/FilmStripMesh.java
+++ b/libs/rs/java/Film/src/com/android/film/FilmStripMesh.java
@@ -22,6 +22,7 @@
 import android.util.Log;
 
 import android.renderscript.RenderScript;
+import android.renderscript.SimpleMesh;
 
 
 class FilmStripMesh {
@@ -72,27 +73,23 @@
             dx /= len;
             dy /= len;
             dz /= len;
-        
+
             nx = dx * dz;
             ny = dy * dz;
             nz = (float)java.lang.Math.sqrt(dx*dx + dy*dy);
-        
+
             len = (float)java.lang.Math.sqrt(nx*nx + ny*ny + nz*nz);
             nx /= len;
             ny /= len;
             nz /= len;
         }
-
-        void addToRS(RenderScript rs) {
-            rs.triangleMeshAddVertex_XYZ_ST_NORM(x, y, z, s, t, nx, ny, nz);
-        }
     }
 
     int[] mTriangleOffsets;
     float[] mTriangleOffsetsTex;
     int mTriangleOffsetsCount;
 
-    void init(RenderScript rs)
+    SimpleMesh init(RenderScript rs)
     {
         float vtx[] = new float[] {
             60.431003f, 124.482050f,
@@ -203,11 +200,11 @@
              -60.862074f, 120.872604f,
              -60.431003f, 124.482050f
         };
-    
-    
+
+
         mTriangleOffsets = new int[64];
         mTriangleOffsetsTex = new float[64];
-    
+
         mTriangleOffsets[0] = 0;
         mTriangleOffsetsCount = 1;
 
@@ -215,6 +212,8 @@
         t.nxyz(1, 0, 0);
         int count = vtx.length / 2;
 
+        SimpleMesh.TriangleMeshBuilder tm = new SimpleMesh.TriangleMeshBuilder(rs, 3, true, true);
+
         float runningS = 0;
         for (int ct=0; ct < (count-1); ct++) {
             t.x = -vtx[ct*2] / 100.f;
@@ -228,16 +227,15 @@
             t.ny /= len;
             t.y = -0.5f;
             t.t = 0;
-            //Log.e("xx", "vtx " + t.x + "  " + t.y + "  " + t.z);
-            t.addToRS(rs);
+            tm.add_XYZ_ST_NORM(t.x, t.y, t.z, t.s, t.t, t.nx, t.ny, t.nz);
+            //android.util.Log.e("rs", "vtx x="+t.x+" y="+t.y+" z="+t.z+" s="+t.s+" t="+t.t);
             t.y = .5f;
             t.t = 1;
-            t.addToRS(rs);
+            tm.add_XYZ_ST_NORM(t.x, t.y, t.z, t.s, t.t, t.nx, t.ny, t.nz);
+            //android.util.Log.e("rs", "vtx x="+t.x+" y="+t.y+" z="+t.z+" s="+t.s+" t="+t.t);
 
-            //LOGE(" %f", runningS);
             if((runningS*2) > mTriangleOffsetsCount) {
-                //LOGE("**** img %i  %i", gTriangleOffsetsCount, ct*2);
-                mTriangleOffsets[mTriangleOffsetsCount] = ct*2;
+                mTriangleOffsets[mTriangleOffsetsCount] = ct*2 * 3;
                 mTriangleOffsetsTex[mTriangleOffsetsCount] = t.s;
                 mTriangleOffsetsCount ++;
             }
@@ -245,9 +243,10 @@
 
         count = (count * 2 - 2);
         for (int ct=0; ct < (count-2); ct+= 2) {
-            rs.triangleMeshAddTriangle(ct, ct+1, ct+2);
-            rs.triangleMeshAddTriangle(ct+1, ct+3, ct+2);
+            tm.addTriangle(ct, ct+1, ct+2);
+            tm.addTriangle(ct+1, ct+3, ct+2);
         }
+        return tm.create();
     }
 
 
diff --git a/libs/rs/rs.spec b/libs/rs/rs.spec
index e275f27..cb4dd00 100644
--- a/libs/rs/rs.spec
+++ b/libs/rs/rs.spec
@@ -130,6 +130,9 @@
 AllocationData {
 	param RsAllocation va
 	param const void * data
+	param uint32_t bytes
+	handcodeApi
+	togglePlay
 	}
 
 Allocation1DSubData {
@@ -137,6 +140,9 @@
 	param uint32_t xoff
 	param uint32_t count
 	param const void *data
+	param uint32_t bytes
+	handcodeApi
+	togglePlay
 	}
 
 Allocation2DSubData {
@@ -146,6 +152,7 @@
 	param uint32_t w
 	param uint32_t h
 	param const void *data
+	param uint32_t bytes
 	}
 
 AllocationRead {
diff --git a/libs/rs/rsAdapter.cpp b/libs/rs/rsAdapter.cpp
index 3242e11..d20e910 100644
--- a/libs/rs/rsAdapter.cpp
+++ b/libs/rs/rsAdapter.cpp
@@ -72,7 +72,7 @@
 RsAdapter1D rsi_Adapter1DCreate(Context *rsc)
 {
     Adapter1D *a = new Adapter1D();
-    a->incRef();
+    a->incUserRef();
     return a;
 }
 
@@ -185,7 +185,7 @@
 RsAdapter2D rsi_Adapter2DCreate(Context *rsc)
 {
     Adapter2D *a = new Adapter2D();
-    a->incRef();
+    a->incUserRef();
     return a;
 }
 
diff --git a/libs/rs/rsAllocation.cpp b/libs/rs/rsAllocation.cpp
index 3cb76bc..1f49ca1 100644
--- a/libs/rs/rsAllocation.cpp
+++ b/libs/rs/rsAllocation.cpp
@@ -115,9 +115,14 @@
 }
 
 
-void Allocation::data(const void *data)
+void Allocation::data(const void *data, uint32_t sizeBytes)
 {
-    memcpy(mPtr, data, mType->getSizeBytes());
+    uint32_t size = mType->getSizeBytes();
+    if (size != sizeBytes) {
+        LOGE("Allocation::data called with mismatched size expected %i, got %i", size, sizeBytes);
+        return;
+    }
+    memcpy(mPtr, data, size);
 }
 
 void Allocation::read(void *data)
@@ -125,16 +130,22 @@
     memcpy(data, mPtr, mType->getSizeBytes());
 }
 
-void Allocation::subData(uint32_t xoff, uint32_t count, const void *data)
+void Allocation::subData(uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes)
 {
     uint32_t eSize = mType->getElementSizeBytes();
     uint8_t * ptr = static_cast<uint8_t *>(mPtr);
     ptr += eSize * xoff;
-    memcpy(ptr, data, count * eSize);
+    uint32_t size = count * eSize;
+
+    if (size != sizeBytes) {
+        LOGE("Allocation::subData called with mismatched size expected %i, got %i", size, sizeBytes);
+        return;
+    }
+    memcpy(ptr, data, size);
 }
 
 void Allocation::subData(uint32_t xoff, uint32_t yoff,
-             uint32_t w, uint32_t h, const void *data)
+             uint32_t w, uint32_t h, const void *data, uint32_t sizeBytes)
 {
     uint32_t eSize = mType->getElementSizeBytes();
     uint32_t lineSize = eSize * w;
@@ -143,6 +154,12 @@
     const uint8_t *src = static_cast<const uint8_t *>(data);
     uint8_t *dst = static_cast<uint8_t *>(mPtr);
     dst += eSize * (xoff + yoff * destW);
+
+    if ((lineSize * eSize * h) != sizeBytes) {
+        rsAssert(!"Allocation::subData called with mismatched size");
+        return;
+    }
+
     for (uint32_t line=yoff; line < (yoff+h); line++) {
         uint8_t * ptr = static_cast<uint8_t *>(mPtr);
         memcpy(dst, src, lineSize);
@@ -152,7 +169,7 @@
 }
 
 void Allocation::subData(uint32_t xoff, uint32_t yoff, uint32_t zoff,
-             uint32_t w, uint32_t h, uint32_t d, const void *data)
+             uint32_t w, uint32_t h, uint32_t d, const void *data, uint32_t sizeBytes)
 {
 }
 
@@ -170,7 +187,7 @@
     const Type * type = static_cast<const Type *>(vtype);
 
     Allocation * alloc = new Allocation(type);
-    alloc->incRef();
+    alloc->incUserRef();
     return alloc;
 }
 
@@ -340,7 +357,7 @@
         LOGE("Memory allocation failure");
         return NULL;
     }
-    texAlloc->incRef();
+    texAlloc->incUserRef();
 
     ElementConverter_t cvt = pickConverter(dstFmt, srcFmt);
     cvt(texAlloc->getPtr(), data, w * h);
@@ -451,7 +468,7 @@
 
     RsAllocation vTexAlloc = rsi_AllocationCreateTyped(rsc, type);
     Allocation *texAlloc = static_cast<Allocation *>(vTexAlloc);
-    texAlloc->incRef();
+    texAlloc->incUserRef();
     if (texAlloc == NULL) {
         LOGE("Memory allocation failure");
         fclose(f);
@@ -503,24 +520,24 @@
     return texAlloc;
 }
 
-void rsi_AllocationData(Context *rsc, RsAllocation va, const void *data)
+void rsi_AllocationData(Context *rsc, RsAllocation va, const void *data, uint32_t sizeBytes)
 {
     Allocation *a = static_cast<Allocation *>(va);
-    a->data(data);
+    a->data(data, sizeBytes);
     rsc->allocationCheck(a);
 }
 
-void rsi_Allocation1DSubData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t count, const void *data)
+void rsi_Allocation1DSubData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes)
 {
     Allocation *a = static_cast<Allocation *>(va);
-    a->subData(xoff, count, data);
+    a->subData(xoff, count, data, sizeBytes);
     rsc->allocationCheck(a);
 }
 
-void rsi_Allocation2DSubData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h, const void *data)
+void rsi_Allocation2DSubData(Context *rsc, RsAllocation va, uint32_t xoff, uint32_t yoff, uint32_t w, uint32_t h, const void *data, uint32_t sizeBytes)
 {
     Allocation *a = static_cast<Allocation *>(va);
-    a->subData(xoff, yoff, w, h, data);
+    a->subData(xoff, yoff, w, h, data, sizeBytes);
     rsc->allocationCheck(a);
 }
 
diff --git a/libs/rs/rsAllocation.h b/libs/rs/rsAllocation.h
index 00af9ed..1f58ec5 100644
--- a/libs/rs/rsAllocation.h
+++ b/libs/rs/rsAllocation.h
@@ -53,12 +53,12 @@
     uint32_t getBufferObjectID() const {return mBufferID;}
 
 
-    void data(const void *data);
-    void subData(uint32_t xoff, uint32_t count, const void *data);
+    void data(const void *data, uint32_t sizeBytes);
+    void subData(uint32_t xoff, uint32_t count, const void *data, uint32_t sizeBytes);
     void subData(uint32_t xoff, uint32_t yoff,
-                 uint32_t w, uint32_t h, const void *data);
+                 uint32_t w, uint32_t h, const void *data, uint32_t sizeBytes);
     void subData(uint32_t xoff, uint32_t yoff, uint32_t zoff,
-                 uint32_t w, uint32_t h, uint32_t d, const void *data);
+                 uint32_t w, uint32_t h, uint32_t d, const void *data, uint32_t sizeBytes);
 
     void read(void *data);
 
diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp
index 52c2b78..c28bd02 100644
--- a/libs/rs/rsContext.cpp
+++ b/libs/rs/rsContext.cpp
@@ -45,6 +45,7 @@
         configAttribsPtr[1] = 16;
         configAttribsPtr += 2;
     }
+
     configAttribsPtr[0] = EGL_NONE;
     rsAssert(configAttribsPtr < (configAttribs + (sizeof(configAttribs) / sizeof(EGLint))));
 
@@ -53,7 +54,7 @@
 
     status_t err = EGLUtils::selectConfigForNativeWindow(mEGL.mDisplay, configAttribs, mWndSurface, &mEGL.mConfig);
     if (err) {
-     LOGE("couldn't find an EGLConfig matching the screen format\n");
+       LOGE("couldn't find an EGLConfig matching the screen format\n");
     }
     //eglChooseConfig(mEGL.mDisplay, configAttribs, &mEGL.mConfig, 1, &mEGL.mNumConfigs);
 
@@ -76,11 +77,11 @@
     mGL.mRenderer = glGetString(GL_RENDERER);
     mGL.mExtensions = glGetString(GL_EXTENSIONS);
 
-    //LOGV("EGL Version %i %i", mEGL.mMajorVersion, mEGL.mMinorVersion);
-    //LOGV("GL Version %s", mGL.mVersion);
-    //LOGV("GL Vendor %s", mGL.mVendor);
-    //LOGV("GL Renderer %s", mGL.mRenderer);
-    //LOGV("GL Extensions %s", mGL.mExtensions);
+    LOGV("EGL Version %i %i", mEGL.mMajorVersion, mEGL.mMinorVersion);
+    LOGV("GL Version %s", mGL.mVersion);
+    LOGV("GL Vendor %s", mGL.mVendor);
+    LOGV("GL Renderer %s", mGL.mRenderer);
+    LOGV("GL Extensions %s", mGL.mExtensions);
 
     if ((strlen((const char *)mGL.mVersion) < 12) || memcmp(mGL.mVersion, "OpenGL ES-CM", 12)) {
         LOGE("Error, OpenGL ES Lite not supported");
@@ -432,7 +433,7 @@
         }
 
         for (size_t ct = 0; ct < mObjDestroy.mDestroyList.size(); ct++) {
-            mObjDestroy.mDestroyList[ct]->decRef();
+            mObjDestroy.mDestroyList[ct]->decUserRef();
         }
         mObjDestroy.mDestroyList.clear();
         mObjDestroy.mNeedToEmpty = false;
@@ -522,7 +523,7 @@
 {
     ObjectBase *ob = static_cast<ObjectBase *>(obj);
     rsc->removeName(ob);
-    ob->decRef();
+    ob->decUserRef();
 }
 
 void rsi_ContextSetDefineF(Context *rsc, const char* name, float value)
diff --git a/libs/rs/rsElement.cpp b/libs/rs/rsElement.cpp
index 389b2c0..6794522 100644
--- a/libs/rs/rsElement.cpp
+++ b/libs/rs/rsElement.cpp
@@ -215,7 +215,7 @@
     rsAssert(!mComponents[idx].get());
     rsAssert(idx < mComponentCount);
     mComponents[idx].set(c);
-    c->incRef();
+    c->incUserRef();
 }
 
 
@@ -387,7 +387,7 @@
 
     rsAssert(sec->mPredefinedList[predef].mEnum == predef);
     Element * e = sec->mPredefinedList[predef].mElement;
-    e->incRef();
+    e->incUserRef();
     return e;
 }
 
@@ -412,7 +412,7 @@
     }
 
     rsc->mStateElement.mComponentBuildList.clear();
-    se->incRef();
+    se->incUserRef();
     return se;
 }
 
diff --git a/libs/rs/rsHandcode.h b/libs/rs/rsHandcode.h
new file mode 100644
index 0000000..800eddd
--- /dev/null
+++ b/libs/rs/rsHandcode.h
@@ -0,0 +1,47 @@
+
+#define DATA_SYNC_SIZE 1024
+
+static inline void rsHCAPI_AllocationData (RsContext rsc, RsAllocation va, const void * data, uint32_t sizeBytes)
+{
+    ThreadIO *io = &((Context *)rsc)->mIO;
+    uint32_t size = sizeof(RS_CMD_AllocationData);
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        size += (sizeBytes + 3) & ~3;
+    }
+    RS_CMD_AllocationData *cmd = static_cast<RS_CMD_AllocationData *>(io->mToCore.reserve(size));
+    cmd->va = va;
+    cmd->bytes = sizeBytes;
+    cmd->data = data;
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        cmd->data = (void *)(cmd+1);
+        memcpy(cmd+1, data, sizeBytes);
+        io->mToCore.commit(RS_CMD_ID_AllocationData, size);
+    } else {
+        io->mToCore.commitSync(RS_CMD_ID_AllocationData, size);
+    }
+}
+
+
+static inline void rsHCAPI_Allocation1DSubData (RsContext rsc, RsAllocation va, uint32_t xoff, uint32_t count, const void * data, uint32_t sizeBytes)
+{
+    ThreadIO *io = &((Context *)rsc)->mIO;
+    uint32_t size = sizeof(RS_CMD_Allocation1DSubData);
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        size += (sizeBytes + 3) & ~3;
+    }
+    RS_CMD_Allocation1DSubData *cmd = static_cast<RS_CMD_Allocation1DSubData *>(io->mToCore.reserve(size));
+    cmd->va = va;
+    cmd->xoff = xoff;
+    cmd->count = count;
+    cmd->data = data;
+    cmd->bytes = sizeBytes;
+    if (sizeBytes < DATA_SYNC_SIZE) {
+        cmd->data = (void *)(cmd+1);
+        memcpy(cmd+1, data, sizeBytes);
+        io->mToCore.commit(RS_CMD_ID_Allocation1DSubData, size);
+    } else {
+        io->mToCore.commitSync(RS_CMD_ID_Allocation1DSubData, size);
+    }
+
+}
+
diff --git a/libs/rs/rsLight.cpp b/libs/rs/rsLight.cpp
index f780e52..ad06c1f 100644
--- a/libs/rs/rsLight.cpp
+++ b/libs/rs/rsLight.cpp
@@ -106,7 +106,7 @@
 {
     Light *l = new Light(rsc->mStateLight.mIsLocal,
                          rsc->mStateLight.mIsMono);
-    l->incRef();
+    l->incUserRef();
     return l;
 }
 
diff --git a/libs/rs/rsObjectBase.cpp b/libs/rs/rsObjectBase.cpp
index 07bbc1e..7e7afab 100644
--- a/libs/rs/rsObjectBase.cpp
+++ b/libs/rs/rsObjectBase.cpp
@@ -21,28 +21,51 @@
 
 ObjectBase::ObjectBase()
 {
-    mRefCount = 0;
+    mUserRefCount = 0;
+    mSysRefCount = 0;
     mName = NULL;
 }
 
 ObjectBase::~ObjectBase()
 {
     //LOGV("~ObjectBase %p  ref %i", this, mRefCount);
-    rsAssert(!mRefCount);
+    rsAssert(!mUserRefCount);
+    rsAssert(!mSysRefCount);
 }
 
-void ObjectBase::incRef() const
+void ObjectBase::incUserRef() const
 {
-    mRefCount ++;
+    mUserRefCount ++;
     //LOGV("ObjectBase %p inc ref %i", this, mRefCount);
 }
 
-void ObjectBase::decRef() const
+void ObjectBase::incSysRef() const
 {
-    rsAssert(mRefCount > 0);
-    mRefCount --;
+    mSysRefCount ++;
+    //LOGV("ObjectBase %p inc ref %i", this, mRefCount);
+}
+
+void ObjectBase::decUserRef() const
+{
+    rsAssert(mUserRefCount > 0);
+    mUserRefCount --;
     //LOGV("ObjectBase %p dec ref %i", this, mRefCount);
-    if (!mRefCount) {
+    if (!(mSysRefCount | mUserRefCount)) {
+        if (mName) {
+            LOGV("Deleting RS object %p, name %s", this, mName);
+        } else {
+            LOGV("Deleting RS object %p, no name", this);
+        }
+        delete this;
+    }
+}
+
+void ObjectBase::decSysRef() const
+{
+    rsAssert(mSysRefCount > 0);
+    mSysRefCount --;
+    //LOGV("ObjectBase %p dec ref %i", this, mRefCount);
+    if (!(mSysRefCount | mUserRefCount)) {
         if (mName) {
             LOGV("Deleting RS object %p, name %s", this, mName);
         } else {
diff --git a/libs/rs/rsObjectBase.h b/libs/rs/rsObjectBase.h
index b2c3338..d1e6baa 100644
--- a/libs/rs/rsObjectBase.h
+++ b/libs/rs/rsObjectBase.h
@@ -30,8 +30,11 @@
     ObjectBase();
     virtual ~ObjectBase();
 
-    void incRef() const;
-    void decRef() const;
+    void incSysRef() const;
+    void decSysRef() const;
+
+    void incUserRef() const;
+    void decUserRef() const;
 
     const char * getName() const {
         return mName;
@@ -41,13 +44,14 @@
 
 private:
     char * mName;
-    mutable int32_t mRefCount;
+    mutable int32_t mSysRefCount;
+    mutable int32_t mUserRefCount;
 
 
 };
 
-template<class T> 
-class ObjectBaseRef 
+template<class T>
+class ObjectBaseRef
 {
 public:
     ObjectBaseRef() {
@@ -57,14 +61,14 @@
     ObjectBaseRef(const ObjectBaseRef &ref) {
         mRef = ref.get();
         if (mRef) {
-            mRef->incRef();
+            mRef->incSysRef();
         }
     }
 
     ObjectBaseRef(T *ref) {
         mRef = ref;
         if (mRef) {
-            ref->incRef();
+            ref->incSysRef();
         }
     }
 
@@ -77,7 +81,7 @@
             clear();
             mRef = ref;
             if (mRef) {
-                ref->incRef();
+                ref->incSysRef();
             }
         }
     }
@@ -88,7 +92,7 @@
 
     void clear() {
         if (mRef) {
-            mRef->decRef();
+            mRef->decSysRef();
         }
         mRef = NULL;
     }
@@ -97,8 +101,8 @@
         return mRef;
     }
 
-    inline T * operator-> () const { 
-        return mRef;  
+    inline T * operator-> () const {
+        return mRef;
     }
 
 protected:
diff --git a/libs/rs/rsProgramFragment.cpp b/libs/rs/rsProgramFragment.cpp
index 654974f..0adce75 100644
--- a/libs/rs/rsProgramFragment.cpp
+++ b/libs/rs/rsProgramFragment.cpp
@@ -227,7 +227,7 @@
 RsProgramFragment rsi_ProgramFragmentCreate(Context *rsc)
 {
     ProgramFragment *pf = rsc->mStateFragment.mPF;
-    pf->incRef();
+    pf->incUserRef();
     rsc->mStateFragment.mPF = 0;
     return pf;
 }
diff --git a/libs/rs/rsProgramFragmentStore.cpp b/libs/rs/rsProgramFragmentStore.cpp
index 36ec615..3179484 100644
--- a/libs/rs/rsProgramFragmentStore.cpp
+++ b/libs/rs/rsProgramFragmentStore.cpp
@@ -251,7 +251,7 @@
 RsProgramFragmentStore rsi_ProgramFragmentStoreCreate(Context *rsc)
 {
     ProgramFragmentStore *pfs = rsc->mStateFragmentStore.mPFS;
-    pfs->incRef();
+    pfs->incUserRef();
     rsc->mStateFragmentStore.mPFS = 0;
     return pfs;
 }
diff --git a/libs/rs/rsProgramVertex.cpp b/libs/rs/rsProgramVertex.cpp
index dc57d34..a07e166 100644
--- a/libs/rs/rsProgramVertex.cpp
+++ b/libs/rs/rsProgramVertex.cpp
@@ -143,10 +143,10 @@
 
     Matrix m;
     m.loadOrtho(0,w, h,0, -1,1);
-    alloc->subData(RS_PROGRAM_VERTEX_PROJECTION_OFFSET, 16, &m.m[0]);
+    alloc->subData(RS_PROGRAM_VERTEX_PROJECTION_OFFSET, 16, &m.m[0], 16*4);
 
     m.loadIdentity();
-    alloc->subData(RS_PROGRAM_VERTEX_MODELVIEW_OFFSET, 16, &m.m[0]);
+    alloc->subData(RS_PROGRAM_VERTEX_MODELVIEW_OFFSET, 16, &m.m[0], 16*4);
 }
 
 
@@ -162,7 +162,7 @@
 RsProgramVertex rsi_ProgramVertexCreate(Context *rsc)
 {
     ProgramVertex *pv = rsc->mStateVertex.mPV;
-    pv->incRef();
+    pv->incUserRef();
     rsc->mStateVertex.mPV = 0;
     return pv;
 }
diff --git a/libs/rs/rsSampler.cpp b/libs/rs/rsSampler.cpp
index 332d532..3f56faa 100644
--- a/libs/rs/rsSampler.cpp
+++ b/libs/rs/rsSampler.cpp
@@ -143,7 +143,7 @@
                               ss->mWrapS,
                               ss->mWrapT,
                               ss->mWrapR);
-    s->incRef();
+    s->incUserRef();
     return s;
 }
 
diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp
index 9419829..0c7ac18 100644
--- a/libs/rs/rsScriptC.cpp
+++ b/libs/rs/rsScriptC.cpp
@@ -334,7 +334,7 @@
     ss->runCompiler(rsc);
 
     ScriptC *s = new ScriptC();
-    s->incRef();
+    s->incUserRef();
     s->mAccScript = ss->mAccScript;
     ss->mAccScript = NULL;
     s->mEnviroment = ss->mEnviroment;
diff --git a/libs/rs/rsSimpleMesh.cpp b/libs/rs/rsSimpleMesh.cpp
index 0b745eb..7c73eb9 100644
--- a/libs/rs/rsSimpleMesh.cpp
+++ b/libs/rs/rsSimpleMesh.cpp
@@ -67,7 +67,7 @@
 
     if (mIndexType.get()) {
         glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, mIndexBuffer->getBufferObjectID());
-        glDrawElements(mGLPrimitive, len, GL_UNSIGNED_SHORT, (GLvoid *)(start * 2));
+        glDrawElements(mGLPrimitive, len, GL_UNSIGNED_SHORT, (uint16_t *)(start * 2));
     } else {
         glDrawArrays(mGLPrimitive, start, len);
     }
@@ -91,7 +91,7 @@
 RsSimpleMesh rsi_SimpleMeshCreate(Context *rsc, RsType prim, RsType idx, RsType *vtx, uint32_t vtxCount, uint32_t primType)
 {
     SimpleMesh *sm = new SimpleMesh();
-    sm->incRef();
+    sm->incUserRef();
 
     sm->mIndexType.set((const Type *)idx);
     sm->mPrimitiveType.set((const Type *)prim);
diff --git a/libs/rs/rsTriangleMesh.cpp b/libs/rs/rsTriangleMesh.cpp
index 99f8adb..64bb71b 100644
--- a/libs/rs/rsTriangleMesh.cpp
+++ b/libs/rs/rsTriangleMesh.cpp
@@ -199,7 +199,7 @@
     memcpy(tm->mIndexData, tmc->mIndexData.array(), tm->mIndexDataSize);
     tm->analyzeElement();
 
-    tm->incRef();
+    tm->incUserRef();
     return tm;
 }
 
diff --git a/libs/rs/rsType.cpp b/libs/rs/rsType.cpp
index 5a9090e..1838fa6 100644
--- a/libs/rs/rsType.cpp
+++ b/libs/rs/rsType.cpp
@@ -252,6 +252,7 @@
 
     uint32_t stride = mElement->getSizeBytes();
     if (mGL.mVtx.size) {
+        //LOGE("va vtx %i %x, %i, %p", mGL.mVtx.size, mGL.mVtx.type, stride, (void *)mGL.mVtx.offset);
         glEnableClientState(GL_VERTEX_ARRAY);
         glVertexPointer(mGL.mVtx.size,
                         mGL.mVtx.type,
@@ -260,9 +261,10 @@
     }
 
     if (mGL.mNorm.size) {
+        //LOGE("va norm %i %x, %i, %p", mGL.mNorm.size, mGL.mNorm.type, stride, (void *)mGL.mNorm.offset);
         glEnableClientState(GL_NORMAL_ARRAY);
         rsAssert(mGL.mNorm.size == 3);
-        glNormalPointer(mGL.mNorm.size,
+        glNormalPointer(mGL.mNorm.type,
                         stride,
                         (void *)mGL.mNorm.offset);
     }
@@ -277,6 +279,7 @@
 
     for (uint32_t ct=0; ct < RS_MAX_TEXTURE; ct++) {
         if (mGL.mTex[ct].size) {
+            //LOGE("va tex%i %i %x, %i, %p", ct, mGL.mTex[ct].size, mGL.mTex[ct].type, stride, (void *)mGL.mTex[ct].offset);
             glClientActiveTexture(GL_TEXTURE0 + ct);
             glEnableClientState(GL_TEXTURE_COORD_ARRAY);
             glTexCoordPointer(mGL.mTex[ct].size,
@@ -361,7 +364,7 @@
     TypeState * stc = &rsc->mStateType;
 
     Type * st = new Type();
-    st->incRef();
+    st->incUserRef();
     st->setDimX(stc->mX);
     st->setDimY(stc->mY);
     st->setDimZ(stc->mZ);
diff --git a/libs/rs/rsg_generator.c b/libs/rs/rsg_generator.c
index e3f816f..74ba248 100644
--- a/libs/rs/rsg_generator.c
+++ b/libs/rs/rsg_generator.c
@@ -141,6 +141,7 @@
     fprintf(f, "\n");
     fprintf(f, "using namespace android;\n");
     fprintf(f, "using namespace android::renderscript;\n");
+    fprintf(f, "#include \"rsHandcode.h\"\n");
     fprintf(f, "\n");
 
     for(ct=0; ct < apiCount; ct++) {
@@ -149,30 +150,39 @@
 
         printFuncDecl(f, api, "rs", 0);
         fprintf(f, "\n{\n");
-        fprintf(f, "    ThreadIO *io = &((Context *)rsc)->mIO;\n");
-        //fprintf(f, "    LOGE(\"add command %s\\n\");\n", api->name);
-        fprintf(f, "    RS_CMD_%s *cmd = static_cast<RS_CMD_%s *>(io->mToCore.reserve(sizeof(RS_CMD_%s)));\n", api->name, api->name, api->name);
-        fprintf(f, "    uint32_t size = sizeof(RS_CMD_%s);\n", api->name);
+        if (api->handcodeApi) {
+            fprintf(f, "    rsHCAPI_%s(rsc", api->name);
+            for(ct2=0; ct2 < api->paramCount; ct2++) {
+                const VarType *vt = &api->params[ct2];
+                fprintf(f, ", %s", vt->name);
+            }
+            fprintf(f, ");\n");
+        } else {
+            fprintf(f, "    ThreadIO *io = &((Context *)rsc)->mIO;\n");
+            //fprintf(f, "    LOGE(\"add command %s\\n\");\n", api->name);
+            fprintf(f, "    RS_CMD_%s *cmd = static_cast<RS_CMD_%s *>(io->mToCore.reserve(sizeof(RS_CMD_%s)));\n", api->name, api->name, api->name);
+            fprintf(f, "    uint32_t size = sizeof(RS_CMD_%s);\n", api->name);
 
-        for(ct2=0; ct2 < api->paramCount; ct2++) {
-            const VarType *vt = &api->params[ct2];
-            needFlush += vt->ptrLevel;
-            fprintf(f, "    cmd->%s = %s;\n", vt->name, vt->name);
-        }
-        if (api->ret.typeName[0]) {
-            needFlush = 1;
-        }
+            for(ct2=0; ct2 < api->paramCount; ct2++) {
+                const VarType *vt = &api->params[ct2];
+                needFlush += vt->ptrLevel;
+                fprintf(f, "    cmd->%s = %s;\n", vt->name, vt->name);
+            }
+            if (api->ret.typeName[0]) {
+                needFlush = 1;
+            }
 
-        fprintf(f, "    io->mToCore.commit");
-        if (needFlush) {
-            fprintf(f, "Sync");
-        }
-        fprintf(f, "(RS_CMD_ID_%s, size);\n", api->name);
+            fprintf(f, "    io->mToCore.commit");
+            if (needFlush) {
+                fprintf(f, "Sync");
+            }
+            fprintf(f, "(RS_CMD_ID_%s, size);\n", api->name);
 
-        if (api->ret.typeName[0]) {
-            fprintf(f, "    return reinterpret_cast<");
-            printVarType(f, &api->ret);
-            fprintf(f, ">(io->mToCoreRet);\n");
+            if (api->ret.typeName[0]) {
+                fprintf(f, "    return reinterpret_cast<");
+                printVarType(f, &api->ret);
+                fprintf(f, ">(io->mToCoreRet);\n");
+            }
         }
         fprintf(f, "};\n\n");
     }
@@ -191,6 +201,7 @@
     fprintf(f, "\n");
     fprintf(f, "namespace android {\n");
     fprintf(f, "namespace renderscript {\n");
+    fprintf(f, "#include \"rsHandcode.h\"\n");
     fprintf(f, "\n");
 
     for(ct=0; ct < apiCount; ct++) {
@@ -198,20 +209,22 @@
 
         fprintf(f, "void rsp_%s(Context *con, const void *vp)\n", api->name);
         fprintf(f, "{\n");
-        //fprintf(f, "    LOGE(\"play command %s\\n\");\n", api->name);
-        fprintf(f, "    const RS_CMD_%s *cmd = static_cast<const RS_CMD_%s *>(vp);\n", api->name, api->name);
-        fprintf(f, "    ");
-        if (api->ret.typeName[0]) {
-            fprintf(f, "con->mIO.mToCoreRet = (intptr_t)");
+        if (api->handcodePlay) {
+            fprintf(f, "    rsHCPLAY_%s(con, vp);\n", api->name);
+        } else {
+            //fprintf(f, "    LOGE(\"play command %s\\n\");\n", api->name);
+            fprintf(f, "    const RS_CMD_%s *cmd = static_cast<const RS_CMD_%s *>(vp);\n", api->name, api->name);
+            fprintf(f, "    ");
+            if (api->ret.typeName[0]) {
+                fprintf(f, "con->mIO.mToCoreRet = (intptr_t)");
+            }
+            fprintf(f, "rsi_%s(con", api->name);
+            for(ct2=0; ct2 < api->paramCount; ct2++) {
+                const VarType *vt = &api->params[ct2];
+                fprintf(f, ",\n           cmd->%s", vt->name);
+            }
+            fprintf(f, ");\n");
         }
-        fprintf(f, "rsi_%s(con", api->name);
-        for(ct2=0; ct2 < api->paramCount; ct2++) {
-            const VarType *vt = &api->params[ct2];
-            fprintf(f, ",");
-            fprintf(f, "\n           cmd->%s", vt->name);
-        }
-        fprintf(f, ");\n");
-
         fprintf(f, "};\n\n");
     }
 
diff --git a/libs/rs/spec.h b/libs/rs/spec.h
index ba802f7..82650a7 100644
--- a/libs/rs/spec.h
+++ b/libs/rs/spec.h
@@ -24,6 +24,8 @@
 typedef struct {
   char name[256];
   int sync;
+  int handcodeApi;
+  int handcodePlay;
   int paramCount;
   VarType ret;
   VarType params[16];
diff --git a/libs/rs/spec.l b/libs/rs/spec.l
index 62fcb63..d81d47e 100644
--- a/libs/rs/spec.l
+++ b/libs/rs/spec.l
@@ -47,6 +47,14 @@
     apis[apiCount].sync = 1;
     }
 
+<api_entry2>"handcodeApi" {
+    apis[apiCount].handcodeApi = 1;
+    }
+
+<api_entry2>"handcodePlay" {
+    apis[apiCount].handcodePlay = 1;
+    }
+
 <api_entry2>"ret" {
     currType = &apis[apiCount].ret;
     typeNextState = api_entry2;