diff --git a/java/Samples/res/raw/shaderarrayf.glsl b/java/Samples/res/raw/shaderarrayf.glsl
new file mode 100644
index 0000000..238ecad
--- /dev/null
+++ b/java/Samples/res/raw/shaderarrayf.glsl
@@ -0,0 +1,16 @@
+
+varying lowp float light0_Diffuse;
+varying lowp float light0_Specular;
+varying lowp float light1_Diffuse;
+varying lowp float light1_Specular;
+varying vec2 varTex0;
+
+void main() {
+   vec2 t0 = varTex0.xy;
+   lowp vec4 col = texture2D(UNI_Tex0, t0).rgba;
+   col.xyz = col.xyz * (light0_Diffuse * UNI_light_DiffuseColor[0].xyz + light1_Diffuse * UNI_light_DiffuseColor[1].xyz);
+   col.xyz += light0_Specular * UNI_light_SpecularColor[0].xyz;
+   col.xyz += light1_Specular * UNI_light_SpecularColor[1].xyz;
+   gl_FragColor = col;
+}
+
diff --git a/java/Samples/res/raw/shaderarrayv.glsl b/java/Samples/res/raw/shaderarrayv.glsl
new file mode 100644
index 0000000..7a1310a
--- /dev/null
+++ b/java/Samples/res/raw/shaderarrayv.glsl
@@ -0,0 +1,32 @@
+varying float light0_Diffuse;
+varying float light0_Specular;
+varying float light1_Diffuse;
+varying float light1_Specular;
+varying vec2 varTex0;
+
+// This is where actual shader code begins
+void main() {
+   vec4 worldPos = UNI_model[0] * ATTRIB_position;
+   worldPos = UNI_model[1] * worldPos;
+   gl_Position = UNI_proj * worldPos;
+
+   mat4 model0 = UNI_model[0];
+   mat3 model3 = mat3(model0[0].xyz, model0[1].xyz, model0[2].xyz);
+   vec3 worldNorm = model3 * ATTRIB_normal;
+   vec3 V = normalize(-worldPos.xyz);
+
+   vec3 light0Vec = normalize(UNI_light_Posision[0].xyz - worldPos.xyz);
+   vec3 light0R = -reflect(light0Vec, worldNorm);
+   light0_Diffuse = clamp(dot(worldNorm, light0Vec), 0.0, 1.0) * UNI_light_Diffuse[0];
+   float light0Spec = clamp(dot(light0R, V), 0.001, 1.0);
+   light0_Specular = pow(light0Spec, UNI_light_CosinePower[0]) * UNI_light_Specular[0];
+
+   vec3 light1Vec = normalize(UNI_light_Posision[1].xyz - worldPos.xyz);
+   vec3 light1R = reflect(light1Vec, worldNorm);
+   light1_Diffuse = clamp(dot(worldNorm, light1Vec), 0.0, 1.0) * UNI_light_Diffuse[1];
+   float light1Spec = clamp(dot(light1R, V), 0.001, 1.0);
+   light1_Specular = pow(light1Spec, UNI_light_CosinePower[1]) * UNI_light_Specular[1];
+
+   gl_PointSize = 1.0;
+   varTex0 = ATTRIB_texture0;
+}
diff --git a/java/Samples/res/raw/shaderf.glsl b/java/Samples/res/raw/shaderf.glsl
index fcbe7ee..d56e203 100644
--- a/java/Samples/res/raw/shaderf.glsl
+++ b/java/Samples/res/raw/shaderf.glsl
@@ -8,9 +8,9 @@
 void main() {
    vec2 t0 = varTex0.xy;
    lowp vec4 col = texture2D(UNI_Tex0, t0).rgba;
-   col.xyz = col.xyz * (light0_Diffuse * UNI_light0_DiffuseColor + light1_Diffuse * UNI_light1_DiffuseColor);
-   col.xyz += light0_Specular * UNI_light0_SpecularColor;
-   col.xyz += light1_Specular * UNI_light1_SpecularColor;
+   col.xyz = col.xyz * (light0_Diffuse * UNI_light0_DiffuseColor.xyz + light1_Diffuse * UNI_light1_DiffuseColor.xyz);
+   col.xyz += light0_Specular * UNI_light0_SpecularColor.xyz;
+   col.xyz += light1_Specular * UNI_light1_SpecularColor.xyz;
    gl_FragColor = col;
 }
 
diff --git a/java/Samples/res/raw/shaderv.glsl b/java/Samples/res/raw/shaderv.glsl
index 867589c..f7d01de 100644
--- a/java/Samples/res/raw/shaderv.glsl
+++ b/java/Samples/res/raw/shaderv.glsl
@@ -13,13 +13,13 @@
    vec3 worldNorm = model3 * ATTRIB_normal;
    vec3 V = normalize(-worldPos.xyz);
 
-   vec3 light0Vec = normalize(UNI_light0_Posision - worldPos.xyz);
-   vec3 light0R = reflect(light0Vec, worldNorm);
+   vec3 light0Vec = normalize(UNI_light0_Posision.xyz - worldPos.xyz);
+   vec3 light0R = -reflect(light0Vec, worldNorm);
    light0_Diffuse = clamp(dot(worldNorm, light0Vec), 0.0, 1.0) * UNI_light0_Diffuse;
    float light0Spec = clamp(dot(light0R, V), 0.001, 1.0);
    light0_Specular = pow(light0Spec, UNI_light0_CosinePower) * UNI_light0_Specular;
 
-   vec3 light1Vec = normalize(UNI_light1_Posision - worldPos.xyz);
+   vec3 light1Vec = normalize(UNI_light1_Posision.xyz - worldPos.xyz);
    vec3 light1R = reflect(light1Vec, worldNorm);
    light1_Diffuse = clamp(dot(worldNorm, light1Vec), 0.0, 1.0) * UNI_light1_Diffuse;
    float light1Spec = clamp(dot(light1R, V), 0.001, 1.0);
diff --git a/java/Samples/src/com/android/samples/RsRenderStatesRS.java b/java/Samples/src/com/android/samples/RsRenderStatesRS.java
index 85c2557..dd2daa7 100644
--- a/java/Samples/src/com/android/samples/RsRenderStatesRS.java
+++ b/java/Samples/src/com/android/samples/RsRenderStatesRS.java
@@ -43,7 +43,7 @@
         mOptionsARGB.inScaled = false;
         mOptionsARGB.inPreferredConfig = Bitmap.Config.ARGB_8888;
         mMode = 0;
-        mMaxModes = 9;
+        mMaxModes = 0;
         initRS();
     }
 
@@ -73,7 +73,12 @@
     private ProgramFragment mProgFragmentCustom;
     private ProgramFragment mProgFragmentMultitex;
     private ScriptField_VertexShaderConstants_s mVSConst;
+    private ScriptField_VertexShaderConstants2_s mVSConst2;
     private ScriptField_FragentShaderConstants_s mFSConst;
+    private ScriptField_FragentShaderConstants2_s mFSConst2;
+
+    private ProgramVertex mProgVertexCustom2;
+    private ProgramFragment mProgFragmentCustom2;
 
     private ProgramRaster mCullBack;
     private ProgramRaster mCullFront;
@@ -189,10 +194,14 @@
 
     private void initCustomShaders() {
         mVSConst = new ScriptField_VertexShaderConstants_s(mRS, 1);
+        mVSConst2 = new ScriptField_VertexShaderConstants2_s(mRS, 1);
         mFSConst = new ScriptField_FragentShaderConstants_s(mRS, 1);
+        mFSConst2 = new ScriptField_FragentShaderConstants2_s(mRS, 1);
 
         mScript.bind_gVSConstants(mVSConst);
+        mScript.bind_gVSConstants2(mVSConst2);
         mScript.bind_gFSConstants(mFSConst);
+        mScript.bind_gFSConstants2(mFSConst2);
 
         // Initialize the shader builder
         ProgramVertex.ShaderBuilder pvbCustom = new ProgramVertex.ShaderBuilder(mRS);
@@ -217,6 +226,20 @@
         // Bind the source of constant data
         mProgFragmentCustom.bindConstants(mFSConst.getAllocation(), 0);
 
+        pvbCustom = new ProgramVertex.ShaderBuilder(mRS);
+        pvbCustom.setShader(mRes, R.raw.shaderarrayv);
+        pvbCustom.addInput(ScriptField_VertexShaderInputs_s.createElement(mRS));
+        pvbCustom.addConstant(mVSConst2.getAllocation().getType());
+        mProgVertexCustom2 = pvbCustom.create();
+        mProgVertexCustom2.bindConstants(mVSConst2.getAllocation(), 0);
+
+        pfbCustom = new ProgramFragment.ShaderBuilder(mRS);
+        pfbCustom.setShader(mRes, R.raw.shaderarrayf);
+        pfbCustom.setTextureCount(1);
+        pfbCustom.addConstant(mFSConst2.getAllocation().getType());
+        mProgFragmentCustom2 = pfbCustom.create();
+        mProgFragmentCustom2.bindConstants(mFSConst2.getAllocation(), 0);
+
         pfbCustom = new ProgramFragment.ShaderBuilder(mRS);
         pfbCustom.setShader(mRes, R.raw.multitexf);
         pfbCustom.setTextureCount(3);
@@ -225,6 +248,9 @@
         mScript.set_gProgVertexCustom(mProgVertexCustom);
         mScript.set_gProgFragmentCustom(mProgFragmentCustom);
         mScript.set_gProgFragmentMultitex(mProgFragmentMultitex);
+
+        mScript.set_gProgVertexCustom2(mProgVertexCustom2);
+        mScript.set_gProgFragmentCustom2(mProgFragmentCustom2);
     }
 
     private Allocation loadTextureRGB(int id) {
@@ -334,6 +360,8 @@
 
         mScript = new ScriptC_rsrenderstates(mRS, mRes, R.raw.rsrenderstates);
 
+        mMaxModes = mScript.get_gMaxModes();
+
         initSamplers();
         initProgramStore();
         initProgramFragment();
diff --git a/java/Samples/src/com/android/samples/rsrenderstates.rs b/java/Samples/src/com/android/samples/rsrenderstates.rs
index 8be35f8..4f8eada 100644
--- a/java/Samples/src/com/android/samples/rsrenderstates.rs
+++ b/java/Samples/src/com/android/samples/rsrenderstates.rs
@@ -19,6 +19,8 @@
 #include "rs_graphics.rsh"
 #include "shader_def.rsh"
 
+const int gMaxModes = 10;
+
 rs_program_vertex gProgVertex;
 rs_program_fragment gProgFragmentColor;
 rs_program_fragment gProgFragmentTexture;
@@ -59,12 +61,16 @@
 
 // Custom vertex shader compunents
 VertexShaderConstants *gVSConstants;
+VertexShaderConstants2 *gVSConstants2;
 FragentShaderConstants *gFSConstants;
+FragentShaderConstants2 *gFSConstants2;
 // Export these out to easily set the inputs to shader
 VertexShaderInputs *gVSInputs;
 // Custom shaders we use for lighting
 rs_program_vertex gProgVertexCustom;
 rs_program_fragment gProgFragmentCustom;
+rs_program_vertex gProgVertexCustom2;
+rs_program_fragment gProgFragmentCustom2;
 rs_program_fragment gProgFragmentMultitex;
 
 float gDt = 0;
@@ -367,10 +373,10 @@
 void setupCustomShaderLights() {
     float4 light0Pos = {-5.0f, 5.0f, -10.0f, 1.0f};
     float4 light1Pos = {2.0f, 5.0f, 15.0f, 1.0f};
-    float3 light0DiffCol = {0.9f, 0.7f, 0.7f};
-    float3 light0SpecCol = {0.9f, 0.6f, 0.6f};
-    float3 light1DiffCol = {0.5f, 0.5f, 0.9f};
-    float3 light1SpecCol = {0.5f, 0.5f, 0.9f};
+    float4 light0DiffCol = {0.9f, 0.7f, 0.7f, 1.0f};
+    float4 light0SpecCol = {0.9f, 0.6f, 0.6f, 1.0f};
+    float4 light1DiffCol = {0.5f, 0.5f, 0.9f, 1.0f};
+    float4 light1SpecCol = {0.5f, 0.5f, 0.9f, 1.0f};
 
     gLight0Rotation += 50.0f * gDt;
     if(gLight0Rotation > 360.0f) {
@@ -389,21 +395,27 @@
     light1Pos = rsMatrixMultiply(&l1Mat, light1Pos);
 
     // Set light 0 properties
-    gVSConstants->light0_Posision.x = light0Pos.x;
-    gVSConstants->light0_Posision.y = light0Pos.y;
-    gVSConstants->light0_Posision.z = light0Pos.z;
+    gVSConstants->light0_Posision = light0Pos;
     gVSConstants->light0_Diffuse = 1.0f;
     gVSConstants->light0_Specular = 0.5f;
-    gVSConstants->light0_CosinePower = 40.0f;
+    gVSConstants->light0_CosinePower = 10.0f;
     // Set light 1 properties
-    gVSConstants->light1_Posision.x = light1Pos.x;
-    gVSConstants->light1_Posision.y = light1Pos.y;
-    gVSConstants->light1_Posision.z = light1Pos.z;
+    gVSConstants->light1_Posision = light1Pos;
     gVSConstants->light1_Diffuse = 1.0f;
     gVSConstants->light1_Specular = 0.7f;
-    gVSConstants->light1_CosinePower = 50.0f;
+    gVSConstants->light1_CosinePower = 25.0f;
     rsAllocationMarkDirty(rsGetAllocation(gVSConstants));
 
+    gVSConstants2->light_Posision[0] = light0Pos;
+    gVSConstants2->light_Diffuse[0] = 1.0f;
+    gVSConstants2->light_Specular[0] = 0.5f;
+    gVSConstants2->light_CosinePower[0] = 10.0f;
+    gVSConstants2->light_Posision[1] = light1Pos;
+    gVSConstants2->light_Diffuse[1] = 1.0f;
+    gVSConstants2->light_Specular[1] = 0.7f;
+    gVSConstants2->light_CosinePower[1] = 25.0f;
+    rsAllocationMarkDirty(rsGetAllocation(gVSConstants2));
+
     // Update fragmetn shader constants
     // Set light 0 colors
     gFSConstants->light0_DiffuseColor = light0DiffCol;
@@ -412,6 +424,13 @@
     gFSConstants->light1_DiffuseColor = light1DiffCol;
     gFSConstants->light1_SpecularColor = light1SpecCol;
     rsAllocationMarkDirty(rsGetAllocation(gFSConstants));
+
+    gFSConstants2->light_DiffuseColor[0] = light0DiffCol;
+    gFSConstants2->light_SpecularColor[0] = light0SpecCol;
+    // Set light 1 colors
+    gFSConstants2->light_DiffuseColor[1] = light1DiffCol;
+    gFSConstants2->light_SpecularColor[1] = light1SpecCol;
+    rsAllocationMarkDirty(rsGetAllocation(gFSConstants2));
 }
 
 void displayCustomShaderSamples() {
@@ -450,6 +469,43 @@
     rsgDrawText("Custom shader sample", 10, rsgGetHeight() - 10);
 }
 
+void displayCustomShaderSamples2() {
+
+    // Update vertex shader constants
+    // Load model matrix
+    // Aplly a rotation to our mesh
+    gTorusRotation += 50.0f * gDt;
+    if(gTorusRotation > 360.0f) {
+        gTorusRotation -= 360.0f;
+    }
+
+    // Position our model on the screen
+    rsMatrixLoadTranslate(&gVSConstants2->model[1], 0.0f, 0.0f, -10.0f);
+    rsMatrixLoadIdentity(&gVSConstants2->model[0]);
+    rsMatrixRotate(&gVSConstants2->model[0], gTorusRotation, 1.0f, 0.0f, 0.0f);
+    rsMatrixRotate(&gVSConstants2->model[0], gTorusRotation, 0.0f, 0.0f, 1.0f);
+    // Setup the projectioni matrix
+    float aspect = (float)rsgGetWidth() / (float)rsgGetHeight();
+    rsMatrixLoadPerspective(&gVSConstants2->proj, 30.0f, aspect, 0.1f, 100.0f);
+    setupCustomShaderLights();
+
+    rsgBindProgramVertex(gProgVertexCustom2);
+
+    // Fragment shader with texture
+    rsgBindProgramStore(gProgStoreBlendNoneDepth);
+    rsgBindProgramFragment(gProgFragmentCustom2);
+    rsgBindSampler(gProgFragmentCustom2, 0, gLinearClamp);
+    rsgBindTexture(gProgFragmentCustom2, 0, gTexTorus);
+
+    // Use back face culling
+    rsgBindProgramRaster(gCullBack);
+    rsgDrawMesh(gTorusMesh);
+
+    rsgFontColor(1.0f, 1.0f, 1.0f, 1.0f);
+    rsgBindFont(gFontMono);
+    rsgDrawText("Custom shader sample with array uniforms", 10, rsgGetHeight() - 10);
+}
+
 void displayMultitextureSample() {
     bindProgramVertexOrtho();
     rs_matrix4x4 matrix;
@@ -577,6 +633,9 @@
     case 8:
         displayAnisoSample();
         break;
+    case 9:
+        displayCustomShaderSamples2();
+        break;
     }
 
     return 10;
diff --git a/java/Samples/src/com/android/samples/shader_def.rsh b/java/Samples/src/com/android/samples/shader_def.rsh
index e3f6206..3f51785 100644
--- a/java/Samples/src/com/android/samples/shader_def.rsh
+++ b/java/Samples/src/com/android/samples/shader_def.rsh
@@ -19,26 +19,39 @@
 typedef struct VertexShaderConstants_s {
     rs_matrix4x4 model;
     rs_matrix4x4 proj;
-    float3 light0_Posision;
+    float4 light0_Posision;
     float light0_Diffuse;
     float light0_Specular;
     float light0_CosinePower;
 
-    float3 light1_Posision;
+    float4 light1_Posision;
     float light1_Diffuse;
     float light1_Specular;
     float light1_CosinePower;
 } VertexShaderConstants;
 
+typedef struct VertexShaderConstants2_s {
+    rs_matrix4x4 model[2];
+    rs_matrix4x4 proj;
+    float4 light_Posision[2];
+    float light_Diffuse[2];
+    float light_Specular[2];
+    float light_CosinePower[2];
+} VertexShaderConstants2;
+
 typedef struct FragentShaderConstants_s {
-    float3 light0_DiffuseColor;
-    float3 light0_SpecularColor;
+    float4 light0_DiffuseColor;
+    float4 light0_SpecularColor;
 
-    float3 light1_DiffuseColor;
-    float3 light1_SpecularColor;
-
+    float4 light1_DiffuseColor;
+    float4 light1_SpecularColor;
 } FragentShaderConstants;
 
+typedef struct FragentShaderConstants2_s {
+    float4 light_DiffuseColor[2];
+    float4 light_SpecularColor[2];
+} FragentShaderConstants2;
+
 typedef struct VertexShaderInputs_s {
     float4 position;
     float3 normal;
diff --git a/rs.spec b/rs.spec
index 14809e9..ac9abe0 100644
--- a/rs.spec
+++ b/rs.spec
@@ -435,6 +435,10 @@
 	param uint32_t slot
 	}
 
+MeshInitVertexAttribs {
+	param RsMesh mesh
+	}
+
 AnimationCreate {
 	param const float *inValues
 	param const float *outValues
diff --git a/rsContextHostStub.h b/rsContextHostStub.h
index f3e9dab..ee32b57 100644
--- a/rsContextHostStub.h
+++ b/rsContextHostStub.h
@@ -122,6 +122,7 @@
     mutable const ObjectBase * mObjHead;
 
     bool ext_OES_texture_npot() const {return mGL.OES_texture_npot;}
+    bool ext_GL_NV_texture_npot_2D_mipmap() const {return false;}
     float ext_texture_max_aniso() const {return 1.0f;}
     uint32_t getMaxFragmentTextures() const {return mGL.mMaxFragmentTextureImageUnits;}
     uint32_t getMaxFragmentUniformVectors() const {return mGL.mMaxFragmentUniformVectors;}
diff --git a/rsElement.cpp b/rsElement.cpp
index d207dcf..2ab3729 100644
--- a/rsElement.cpp
+++ b/rsElement.cpp
@@ -113,24 +113,15 @@
 
     Element *elem = new Element(rsc);
     elem->mComponent.loadFromStream(stream);
-    elem->mBits = elem->mComponent.getBits();
-    elem->mHasReference = elem->mComponent.isReference();
 
     elem->mFieldCount = stream->loadU32();
     if(elem->mFieldCount) {
-        uint32_t offset = 0;
         elem->mFields = new ElementField_t [elem->mFieldCount];
         for(uint32_t ct = 0; ct < elem->mFieldCount; ct ++) {
             stream->loadString(&elem->mFields[ct].name);
             elem->mFields[ct].arraySize = stream->loadU32();
             Element *fieldElem = Element::createFromStream(rsc, stream);
             elem->mFields[ct].e.set(fieldElem);
-            elem->mFields[ct].offsetBits = offset;
-            offset += fieldElem->getSizeBits();
-            // Check if our sub-elements have references
-            if(fieldElem->mHasReference) {
-                elem->mHasReference = true;
-            }
         }
     }
 
@@ -144,6 +135,7 @@
         }
     }
 
+    elem->compute();
     rsc->mStateElement.mElements.push(elem);
     return elem;
 }
@@ -175,6 +167,25 @@
     return false;
 }
 
+void Element::compute() {
+    if(mFieldCount == 0) {
+        mBits = mComponent.getBits();
+        mHasReference = mComponent.isReference();
+        return;
+    }
+
+    size_t bits = 0;
+    for (size_t ct=0; ct < mFieldCount; ct++) {
+        mFields[ct].offsetBits = bits;
+        bits += mFields[ct].e->getSizeBits() * mFields[ct].arraySize;
+
+        if (mFields[ct].e->mHasReference) {
+            mHasReference = true;
+        }
+    }
+
+}
+
 const Element * Element::create(Context *rsc, RsDataType dt, RsDataKind dk,
                             bool isNorm, uint32_t vecSize)
 {
@@ -194,8 +205,7 @@
 
     Element *e = new Element(rsc);
     e->mComponent.set(dt, dk, isNorm, vecSize);
-    e->mBits = e->mComponent.getBits();
-    e->mHasReference = e->mComponent.isReference();
+    e->compute();
     rsc->mStateElement.mElements.push(e);
     return e;
 }
@@ -227,18 +237,12 @@
     Element *e = new Element(rsc);
     e->mFields = new ElementField_t [count];
     e->mFieldCount = count;
-    size_t bits = 0;
     for (size_t ct=0; ct < count; ct++) {
         e->mFields[ct].e.set(ein[ct]);
         e->mFields[ct].name.setTo(nin[ct], lengths[ct]);
-        e->mFields[ct].offsetBits = bits;
         e->mFields[ct].arraySize = asin[ct];
-        bits += ein[ct]->getSizeBits();
-
-        if (ein[ct]->mHasReference) {
-            e->mHasReference = true;
-        }
     }
+    e->compute();
 
     rsc->mStateElement.mElements.push(e);
     return e;
diff --git a/rsElement.h b/rsElement.h
index 70e2619..506a530 100644
--- a/rsElement.h
+++ b/rsElement.h
@@ -94,6 +94,8 @@
 
     Component mComponent;
     uint32_t mBits;
+
+    void compute();
 };
 
 
diff --git a/rsFont.cpp b/rsFont.cpp
index 9e76215..66f455c 100644
--- a/rsFont.cpp
+++ b/rsFont.cpp
@@ -663,9 +663,10 @@
     float *vtx = (float*)mVertexArray->getPtr();
     float *tex = vtx + 3;
 
-    VertexArray va;
-    va.add(GL_FLOAT, 3, 20, false, (uint32_t)vtx, "ATTRIB_position");
-    va.add(GL_FLOAT, 2, 20, false, (uint32_t)tex, "ATTRIB_texture0");
+    VertexArray::Attrib attribs[2];
+    attribs[0].set(GL_FLOAT, 3, 20, false, (uint32_t)vtx, "ATTRIB_position");
+    attribs[1].set(GL_FLOAT, 2, 20, false, (uint32_t)tex, "ATTRIB_texture0");
+    VertexArray va(attribs, 2);
     va.setupGL2(mRSC, &mRSC->mStateVertexArray, &mRSC->mShaderCache);
 
     mIndexBuffer->uploadCheck(mRSC);
diff --git a/rsMesh.cpp b/rsMesh.cpp
index fd604e5..f2dd687 100644
--- a/rsMesh.cpp
+++ b/rsMesh.cpp
@@ -37,6 +37,10 @@
     mPrimitivesCount = 0;
     mVertexBuffers = NULL;
     mVertexBufferCount = 0;
+    mAttribs = NULL;
+    mAttribAllocationIndex = NULL;
+
+    mAttribCount = 0;
 }
 
 Mesh::~Mesh()
@@ -51,6 +55,87 @@
         }
         delete[] mPrimitives;
     }
+
+    if(mAttribs) {
+        delete[] mAttribs;
+        delete[] mAttribAllocationIndex;
+    }
+}
+
+bool Mesh::isValidGLComponent(const Element *elem, uint32_t fieldIdx) {
+    // Do not create attribs for padding
+    if(elem->getFieldName(fieldIdx)[0] == '#') {
+        return false;
+    }
+
+    // Only GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_FIXED, GL_FLOAT are accepted.
+    // Filter rs types accordingly
+    RsDataType dt = elem->getField(fieldIdx)->getComponent().getType();
+    if(dt != RS_TYPE_FLOAT_32 && dt != RS_TYPE_UNSIGNED_8 &&
+       dt != RS_TYPE_UNSIGNED_16 && dt != RS_TYPE_SIGNED_8 &&
+       dt != RS_TYPE_SIGNED_16) {
+        return false;
+    }
+
+    // Now make sure they are not arrays
+    uint32_t arraySize = elem->getFieldArraySize(fieldIdx);
+    if(arraySize != 1) {
+        return false;
+    }
+
+    return true;
+}
+
+void Mesh::initVertexAttribs() {
+    // Count the number of gl attrs to initialize
+    mAttribCount = 0;
+    for (uint32_t ct=0; ct < mVertexBufferCount; ct++) {
+        const Element *elem = mVertexBuffers[ct]->getType()->getElement();
+        for (uint32_t ct=0; ct < elem->getFieldCount(); ct++) {
+            if(isValidGLComponent(elem, ct)) {
+                mAttribCount ++;
+            }
+        }
+    }
+
+    if(mAttribs) {
+        delete [] mAttribs;
+        delete [] mAttribAllocationIndex;
+        mAttribs = NULL;
+        mAttribAllocationIndex = NULL;
+    }
+    if(!mAttribCount) {
+        return;
+    }
+
+    mAttribs = new VertexArray::Attrib[mAttribCount];
+    mAttribAllocationIndex = new uint32_t[mAttribCount];
+
+    uint32_t userNum = 0;
+    for (uint32_t ct=0; ct < mVertexBufferCount; ct++) {
+        const Element *elem = mVertexBuffers[ct]->getType()->getElement();
+        uint32_t stride = elem->getSizeBytes();
+        for (uint32_t fieldI=0; fieldI < elem->getFieldCount(); fieldI++) {
+            const Component &c = elem->getField(fieldI)->getComponent();
+
+            if(!isValidGLComponent(elem, fieldI)) {
+                continue;
+            }
+
+            mAttribs[userNum].size = c.getVectorSize();
+            mAttribs[userNum].offset = elem->getFieldOffsetBytes(fieldI);
+            mAttribs[userNum].type = c.getGLType();
+            mAttribs[userNum].normalized = c.getType() != RS_TYPE_FLOAT_32;//c.getIsNormalized();
+            mAttribs[userNum].stride = stride;
+            String8 tmp(RS_SHADER_ATTR);
+            tmp.append(elem->getFieldName(fieldI));
+            mAttribs[userNum].name.setTo(tmp.string());
+
+            // Remember which allocation this attribute came from
+            mAttribAllocationIndex[userNum] = ct;
+            userNum ++;
+        }
+    }
 }
 
 void Mesh::render(Context *rsc) const
@@ -78,21 +163,29 @@
 
 void Mesh::renderPrimitiveRange(Context *rsc, uint32_t primIndex, uint32_t start, uint32_t len) const
 {
-    if (len < 1 || primIndex >= mPrimitivesCount) {
+    if (len < 1 || primIndex >= mPrimitivesCount || mAttribCount == 0) {
+        LOGE("Invalid mesh or parameters");
         return;
     }
 
     rsc->checkError("Mesh::renderPrimitiveRange 1");
-    VertexArray va;
     for (uint32_t ct=0; ct < mVertexBufferCount; ct++) {
         mVertexBuffers[ct]->uploadCheck(rsc);
-        if (mVertexBuffers[ct]->getIsBufferObject()) {
-            va.setActiveBuffer(mVertexBuffers[ct]->getBufferObjectID());
-        } else {
-            va.setActiveBuffer(mVertexBuffers[ct]->getPtr());
-        }
-        mVertexBuffers[ct]->getType()->enableGLVertexBuffer(&va);
     }
+    // update attributes with either buffer information or data ptr based on their current state
+    for (uint32_t ct=0; ct < mAttribCount; ct++) {
+        uint32_t allocIndex = mAttribAllocationIndex[ct];
+        Allocation *alloc = mVertexBuffers[allocIndex].get();
+        if (alloc->getIsBufferObject()) {
+            mAttribs[ct].buffer = alloc->getBufferObjectID();
+            mAttribs[ct].ptr = NULL;
+        } else {
+            mAttribs[ct].buffer = 0;
+            mAttribs[ct].ptr = (const uint8_t*)alloc->getPtr();
+        }
+    }
+
+    VertexArray va(mAttribs, mAttribCount);
     va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
 
     rsc->checkError("Mesh::renderPrimitiveRange 2");
@@ -215,6 +308,7 @@
     }
 
     mesh->updateGLPrimitives();
+    mesh->initVertexAttribs();
     mesh->uploadAll(rsc);
 
     return mesh;
@@ -310,6 +404,12 @@
     sm->updateGLPrimitives();
 }
 
+void rsi_MeshInitVertexAttribs(Context *rsc, RsMesh mv)
+{
+    Mesh *sm = static_cast<Mesh *>(mv);
+    sm->initVertexAttribs();
+}
+
 }}
 
 void rsaMeshGetVertexBufferCount(RsContext con, RsMesh mv, int32_t *numVtx)
diff --git a/rsMesh.h b/rsMesh.h
index ed01c38..261b5a6 100644
--- a/rsMesh.h
+++ b/rsMesh.h
@@ -66,7 +66,17 @@
     float mBBoxMax[3];
     void computeBBox();
 
+    void initVertexAttribs();
+
 protected:
+    bool isValidGLComponent(const Element *elem, uint32_t fieldIdx);
+    // Attribues that allow us to map to GL
+    VertexArray::Attrib *mAttribs;
+    // This allows us to figure out which allocation the attribute
+    // belongs to. In the event the allocation is uploaded to GL
+    // buffer, it lets us properly map it
+    uint32_t *mAttribAllocationIndex;
+    uint32_t mAttribCount;
 };
 
 class MeshContext
diff --git a/rsProgram.cpp b/rsProgram.cpp
index 72d1b02..ce3af2d 100644
--- a/rsProgram.cpp
+++ b/rsProgram.cpp
@@ -31,37 +31,14 @@
 
 Program::Program(Context *rsc) : ObjectBase(rsc)
 {
-    mDirty = true;
-    mShaderID = 0;
-    mAttribCount = 0;
-    mUniformCount = 0;
-    mTextureCount = 0;
-
-    mTextures = NULL;
-    mSamplers = NULL;
-    mInputElements = NULL;
-    mOutputElements = NULL;
-    mConstantTypes = NULL;
-    mInputCount = 0;
-    mOutputCount = 0;
-    mConstantCount = 0;
-    mIsValid = false;
-    mIsInternal = false;
+   initMemberVars();
 }
 
 Program::Program(Context *rsc, const char * shaderText, uint32_t shaderLength,
                  const uint32_t * params, uint32_t paramLength) :
     ObjectBase(rsc)
 {
-    mDirty = true;
-    mShaderID = 0;
-    mAttribCount = 0;
-    mUniformCount = 0;
-    mTextureCount = 0;
-
-    mInputCount = 0;
-    mOutputCount = 0;
-    mConstantCount = 0;
+    initMemberVars();
 
     for (uint32_t ct=0; ct < paramLength; ct+=2) {
         if (params[ct] == RS_PROGRAM_PARAM_INPUT) {
@@ -83,6 +60,7 @@
     mInputElements = new ObjectBaseRef<Element>[mInputCount];
     mOutputElements = new ObjectBaseRef<Element>[mOutputCount];
     mConstantTypes = new ObjectBaseRef<Type>[mConstantCount];
+    mConstants = new ObjectBaseRef<Allocation>[mConstantCount];
 
     uint32_t input = 0;
     uint32_t output = 0;
@@ -107,6 +85,8 @@
         shaderLength -= internalTokenLen;
     }
     mUserShader.setTo(shaderText, shaderLength);
+
+    initAttribAndUniformArray();
 }
 
 Program::~Program()
@@ -119,7 +99,7 @@
         glDeleteShader(mShaderID);
     }
 
-    for (uint32_t ct=0; ct < MAX_UNIFORMS; ct++) {
+    for (uint32_t ct=0; ct < mConstantCount; ct++) {
         bindAllocation(NULL, NULL, ct);
     }
 
@@ -132,11 +112,37 @@
     delete[] mInputElements;
     delete[] mOutputElements;
     delete[] mConstantTypes;
+    delete[] mConstants;
+    delete[] mAttribNames;
+    delete[] mUniformNames;
+    delete[] mUniformArraySizes;
     mInputCount = 0;
     mOutputCount = 0;
     mConstantCount = 0;
 }
 
+void Program::initMemberVars() {
+    mDirty = true;
+    mShaderID = 0;
+    mAttribCount = 0;
+    mUniformCount = 0;
+    mTextureCount = 0;
+
+    mTextures = NULL;
+    mSamplers = NULL;
+    mInputElements = NULL;
+    mOutputElements = NULL;
+    mConstantTypes = NULL;
+    mConstants = NULL;
+    mAttribNames = NULL;
+    mUniformNames = NULL;
+    mUniformArraySizes = NULL;
+    mInputCount = 0;
+    mOutputCount = 0;
+    mConstantCount = 0;
+    mIsValid = false;
+    mIsInternal = false;
+}
 
 void Program::bindAllocation(Context *rsc, Allocation *alloc, uint32_t slot)
 {
@@ -314,11 +320,95 @@
             }
 
             mShader.append(fn);
+            if(e->getFieldArraySize(field) > 1) {
+                mShader.appendFormat("[%d]", e->getFieldArraySize(field));
+            }
             mShader.append(";\n");
         }
     }
 }
 
+void Program::logUniform(const Element *field, const float *fd, uint32_t arraySize ) {
+    RsDataType dataType = field->getType();
+    uint32_t elementSize = field->getSizeBytes() / sizeof(float);
+    for(uint32_t i = 0; i < arraySize; i ++) {
+        if(arraySize > 1) {
+            LOGV("Array Element [%u]", i);
+        }
+        if(dataType == RS_TYPE_MATRIX_4X4) {
+            LOGV("Matrix4x4");
+            LOGV("{%f, %f, %f, %f",  fd[0], fd[4], fd[8], fd[12]);
+            LOGV(" %f, %f, %f, %f",  fd[1], fd[5], fd[9], fd[13]);
+            LOGV(" %f, %f, %f, %f",  fd[2], fd[6], fd[10], fd[14]);
+            LOGV(" %f, %f, %f, %f}", fd[3], fd[7], fd[11], fd[15]);
+        }
+        else if(dataType == RS_TYPE_MATRIX_3X3) {
+            LOGV("Matrix3x3");
+            LOGV("{%f, %f, %f",  fd[0], fd[3], fd[6]);
+            LOGV(" %f, %f, %f",  fd[1], fd[4], fd[7]);
+            LOGV(" %f, %f, %f}", fd[2], fd[5], fd[8]);
+        }
+        else if(dataType == RS_TYPE_MATRIX_2X2) {
+            LOGV("Matrix2x2");
+            LOGV("{%f, %f",  fd[0], fd[2]);
+            LOGV(" %f, %f}", fd[1], fd[3]);
+        }
+        else {
+            switch(field->getComponent().getVectorSize()) {
+            case 1:
+                LOGV("Uniform 1 = %f", fd[0]);
+                break;
+            case 2:
+                LOGV("Uniform 2 = %f %f", fd[0], fd[1]);
+                break;
+            case 3:
+                LOGV("Uniform 3 = %f %f %f", fd[0], fd[1], fd[2]);
+                break;
+            case 4:
+                LOGV("Uniform 4 = %f %f %f %f", fd[0], fd[1], fd[2], fd[3]);
+                break;
+            default:
+                rsAssert(0);
+            }
+        }
+        LOGE("Element size %u data=%p", elementSize, fd);
+        fd += elementSize;
+        LOGE("New data=%p", fd);
+    }
+}
+
+void Program::setUniform(Context *rsc, const Element *field, const float *fd,
+                         int32_t slot, uint32_t arraySize ) {
+    RsDataType dataType = field->getType();
+    if(dataType == RS_TYPE_MATRIX_4X4) {
+        glUniformMatrix4fv(slot, arraySize, GL_FALSE, fd);
+    }
+    else if(dataType == RS_TYPE_MATRIX_3X3) {
+        glUniformMatrix3fv(slot, arraySize, GL_FALSE, fd);
+    }
+    else if(dataType == RS_TYPE_MATRIX_2X2) {
+        glUniformMatrix2fv(slot, arraySize, GL_FALSE, fd);
+    }
+    else {
+        switch(field->getComponent().getVectorSize()) {
+        case 1:
+            glUniform1fv(slot, arraySize, fd);
+            break;
+        case 2:
+            glUniform2fv(slot, arraySize, fd);
+            break;
+        case 3:
+            glUniform3fv(slot, arraySize, fd);
+            break;
+        case 4:
+            glUniform4fv(slot, arraySize, fd);
+            break;
+        default:
+            rsAssert(0);
+        }
+    }
+}
+
 void Program::setupUserConstants(Context *rsc, ShaderCache *sc, bool isFragment) {
     uint32_t uidx = 0;
     for (uint32_t ct=0; ct < mConstantCount; ct++) {
@@ -343,92 +433,77 @@
             const float *fd = reinterpret_cast<const float *>(&data[offset]);
 
             int32_t slot = -1;
+            uint32_t arraySize = 1;
             if(!isFragment) {
                 slot = sc->vtxUniformSlot(uidx);
-            }
-            else {
+                arraySize = sc->vtxUniformSize(uidx);
+            } else {
                 slot = sc->fragUniformSlot(uidx);
+                arraySize = sc->fragUniformSize(uidx);
             }
-
             if(rsc->props.mLogShadersUniforms) {
                 LOGV("Uniform  slot=%i, offset=%i, constant=%i, field=%i, uidx=%i, name=%s", slot, offset, ct, field, uidx, fieldName);
             }
-            if (slot >= 0) {
-                if(f->getType() == RS_TYPE_MATRIX_4X4) {
-                    if(rsc->props.mLogShadersUniforms) {
-                        LOGV("Matrix4x4");
-                        LOGV("{%f, %f, %f, %f",  fd[0], fd[4], fd[8], fd[12]);
-                        LOGV(" %f, %f, %f, %f",  fd[1], fd[5], fd[9], fd[13]);
-                        LOGV(" %f, %f, %f, %f",  fd[2], fd[6], fd[10], fd[14]);
-                        LOGV(" %f, %f, %f, %f}", fd[3], fd[7], fd[11], fd[15]);
-                    }
-                    glUniformMatrix4fv(slot, 1, GL_FALSE, fd);
-                }
-                else if(f->getType() == RS_TYPE_MATRIX_3X3) {
-                    if(rsc->props.mLogShadersUniforms) {
-                        LOGV("Matrix3x3");
-                        LOGV("{%f, %f, %f",  fd[0], fd[3], fd[6]);
-                        LOGV(" %f, %f, %f",  fd[1], fd[4], fd[7]);
-                        LOGV(" %f, %f, %f}", fd[2], fd[5], fd[8]);
-                    }
-                    glUniformMatrix3fv(slot, 1, GL_FALSE, fd);
-                }
-                else if(f->getType() == RS_TYPE_MATRIX_2X2) {
-                    if(rsc->props.mLogShadersUniforms){
-                        LOGV("Matrix2x2");
-                        LOGV("{%f, %f",  fd[0], fd[2]);
-                        LOGV(" %f, %f}", fd[1], fd[3]);
-                    }
-                    glUniformMatrix2fv(slot, 1, GL_FALSE, fd);
-                }
-                else {
-                    switch(f->getComponent().getVectorSize()) {
-                    case 1:
-                        if(rsc->props.mLogShadersUniforms) {
-                            LOGV("Uniform 1 = %f", fd[0]);
-                        }
-                        glUniform1fv(slot, 1, fd);
-                        break;
-                    case 2:
-                        if(rsc->props.mLogShadersUniforms) {
-                            LOGV("Uniform 2 = %f %f", fd[0], fd[1]);
-                        }
-                        glUniform2fv(slot, 1, fd);
-                        break;
-                    case 3:
-                        if(rsc->props.mLogShadersUniforms) {
-                            LOGV("Uniform 3 = %f %f %f", fd[0], fd[1], fd[2]);
-                        }
-                        glUniform3fv(slot, 1, fd);
-                        break;
-                    case 4:
-                        if(rsc->props.mLogShadersUniforms) {
-                            LOGV("Uniform 4 = %f %f %f %f", fd[0], fd[1], fd[2], fd[3]);
-                        }
-                        glUniform4fv(slot, 1, fd);
-                        break;
-                    default:
-                        rsAssert(0);
-                    }
-                }
-            }
             uidx ++;
+            if (slot < 0) {
+                continue;
+            }
+
+            if(rsc->props.mLogShadersUniforms) {
+                logUniform(f, fd, arraySize);
+            }
+            setUniform(rsc, f, fd, slot, arraySize);
         }
     }
 }
 
-void Program::initAddUserElement(const Element *e, String8 *names, uint32_t *count, const char *prefix)
+void Program::initAttribAndUniformArray() {
+    mAttribCount = 0;
+    for (uint32_t ct=0; ct < mInputCount; ct++) {
+        const Element *elem = mInputElements[ct].get();
+        for (uint32_t field=0; field < elem->getFieldCount(); field++) {
+            if(elem->getFieldName(field)[0] != '#') {
+                mAttribCount ++;
+            }
+        }
+    }
+
+    mUniformCount = 0;
+    for (uint32_t ct=0; ct < mConstantCount; ct++) {
+        const Element *elem = mConstantTypes[ct]->getElement();
+
+        for (uint32_t field=0; field < elem->getFieldCount(); field++) {
+            if(elem->getFieldName(field)[0] != '#') {
+                mUniformCount ++;
+            }
+        }
+    }
+    mUniformCount += mTextureCount;
+
+    if(mAttribCount) {
+        mAttribNames = new String8[mAttribCount];
+    }
+    if(mUniformCount) {
+        mUniformNames = new String8[mUniformCount];
+        mUniformArraySizes = new uint32_t[mUniformCount];
+    }
+}
+
+void Program::initAddUserElement(const Element *e, String8 *names, uint32_t *arrayLengths, uint32_t *count, const char *prefix)
 {
     rsAssert(e->getFieldCount());
     for (uint32_t ct=0; ct < e->getFieldCount(); ct++) {
         const Element *ce = e->getField(ct);
         if (ce->getFieldCount()) {
-            initAddUserElement(ce, names, count, prefix);
+            initAddUserElement(ce, names, arrayLengths, count, prefix);
         }
         else if(e->getFieldName(ct)[0] != '#') {
             String8 tmp(prefix);
             tmp.append(e->getFieldName(ct));
             names[*count].setTo(tmp.string());
+            if(arrayLengths) {
+                arrayLengths[*count] = e->getFieldArraySize(ct);
+            }
             (*count)++;
         }
     }
diff --git a/rsProgram.h b/rsProgram.h
index c93033b..46692fd 100644
--- a/rsProgram.h
+++ b/rsProgram.h
@@ -32,8 +32,6 @@
 class Program : public ObjectBase
 {
 public:
-    const static uint32_t MAX_ATTRIBS = 8;
-    const static uint32_t MAX_UNIFORMS = 16;
 
     Program(Context *);
     Program(Context *, const char * shaderText, uint32_t shaderLength,
@@ -55,12 +53,14 @@
     uint32_t getUniformCount() const {return mUniformCount;}
     const String8 & getAttribName(uint32_t i) const {return mAttribNames[i];}
     const String8 & getUniformName(uint32_t i) const {return mUniformNames[i];}
+    uint32_t getUniformArraySize(uint32_t i) const {return mUniformArraySizes[i];}
 
     String8 getGLSLInputString() const;
     String8 getGLSLOutputString() const;
     String8 getGLSLConstantString() const;
 
     bool isValid() const {return mIsValid;}
+    void forceDirty() const {mDirty = true;}
 
 protected:
     // Components not listed in "in" will be passed though
@@ -68,6 +68,7 @@
     ObjectBaseRef<Element> *mInputElements;
     ObjectBaseRef<Element> *mOutputElements;
     ObjectBaseRef<Type> *mConstantTypes;
+    ObjectBaseRef<Allocation> *mConstants;
     uint32_t mInputCount;
     uint32_t mOutputCount;
     uint32_t mConstantCount;
@@ -77,9 +78,9 @@
     // Applies to vertex and fragment shaders only
     void appendUserConstants();
     void setupUserConstants(Context *rsc, ShaderCache *sc, bool isFragment);
-    void initAddUserElement(const Element *e, String8 *names, uint32_t *count, const char *prefix);
+    void initAddUserElement(const Element *e, String8 *names, uint32_t *arrayLengths, uint32_t *count, const char *prefix);
 
-    ObjectBaseRef<Allocation> mConstants[MAX_UNIFORMS];
+    void initAttribAndUniformArray();
 
     mutable bool mDirty;
     String8 mShader;
@@ -89,8 +90,13 @@
     uint32_t mTextureCount;
     uint32_t mAttribCount;
     uint32_t mUniformCount;
-    String8 mAttribNames[MAX_ATTRIBS];
-    String8 mUniformNames[MAX_UNIFORMS];
+    String8 *mAttribNames;
+    String8 *mUniformNames;
+    uint32_t *mUniformArraySizes;
+
+    void logUniform(const Element *field, const float *fd, uint32_t arraySize );
+    void setUniform(Context *rsc, const Element *field, const float *fd, int32_t slot, uint32_t arraySize );
+    void initMemberVars();
 
     // The difference between Textures and Constants is how they are accessed
     // Texture lookups go though a sampler which in effect converts normalized
@@ -102,9 +108,6 @@
     ObjectBaseRef<Sampler> *mSamplers;
 
     bool loadShader(Context *, uint32_t type);
-
-public:
-    void forceDirty() const {mDirty = true;}
 };
 
 
diff --git a/rsProgramFragment.cpp b/rsProgramFragment.cpp
index 800854b..d089a5a 100644
--- a/rsProgramFragment.cpp
+++ b/rsProgramFragment.cpp
@@ -146,17 +146,19 @@
 
 void ProgramFragment::init(Context *rsc)
 {
-    mUniformCount = 0;
+    uint32_t uniformIndex = 0;
     if (mUserShader.size() > 0) {
         for (uint32_t ct=0; ct < mConstantCount; ct++) {
-            initAddUserElement(mConstantTypes[ct]->getElement(), mUniformNames, &mUniformCount, RS_SHADER_UNI);
+            initAddUserElement(mConstantTypes[ct]->getElement(), mUniformNames, mUniformArraySizes, &uniformIndex, RS_SHADER_UNI);
         }
     }
-    mTextureUniformIndexStart = mUniformCount;
+    mTextureUniformIndexStart = uniformIndex;
     char buf[256];
     for (uint32_t ct=0; ct < mTextureCount; ct++) {
         sprintf(buf, "UNI_Tex%i", ct);
-        mUniformNames[mUniformCount++].setTo(buf);
+        mUniformNames[uniformIndex].setTo(buf);
+        mUniformArraySizes[uniformIndex] = 1;
+        uniformIndex++;
     }
 
     createShader();
diff --git a/rsProgramVertex.cpp b/rsProgramVertex.cpp
index 4e64008..9ee2eeb 100644
--- a/rsProgramVertex.cpp
+++ b/rsProgramVertex.cpp
@@ -202,14 +202,14 @@
 
 void ProgramVertex::init(Context *rsc)
 {
-    mAttribCount = 0;
+    uint32_t attribCount = 0;
+    uint32_t uniformCount = 0;
     if (mUserShader.size() > 0) {
         for (uint32_t ct=0; ct < mInputCount; ct++) {
-            initAddUserElement(mInputElements[ct].get(), mAttribNames, &mAttribCount, RS_SHADER_ATTR);
+            initAddUserElement(mInputElements[ct].get(), mAttribNames, NULL, &attribCount, RS_SHADER_ATTR);
         }
-        mUniformCount = 0;
         for (uint32_t ct=0; ct < mConstantCount; ct++) {
-            initAddUserElement(mConstantTypes[ct]->getElement(), mUniformNames, &mUniformCount, RS_SHADER_UNI);
+            initAddUserElement(mConstantTypes[ct]->getElement(), mUniformNames, mUniformArraySizes, &uniformCount, RS_SHADER_UNI);
         }
     }
     createShader();
diff --git a/rsScriptC_LibGL.cpp b/rsScriptC_LibGL.cpp
index 5b07e7b..d877ebd 100644
--- a/rsScriptC_LibGL.cpp
+++ b/rsScriptC_LibGL.cpp
@@ -159,9 +159,11 @@
     float vtx[] = {x1,y1,z1, x2,y2,z2, x3,y3,z3, x4,y4,z4};
     const float tex[] = {u1,v1, u2,v2, u3,v3, u4,v4};
 
-    VertexArray va;
-    va.add(GL_FLOAT, 3, 12, false, (uint32_t)vtx, "ATTRIB_position");
-    va.add(GL_FLOAT, 2, 8, false, (uint32_t)tex, "ATTRIB_texture0");
+    VertexArray::Attrib attribs[2];
+    attribs[0].set(GL_FLOAT, 3, 12, false, (uint32_t)vtx, "ATTRIB_position");
+    attribs[1].set(GL_FLOAT, 2, 8, false, (uint32_t)tex, "ATTRIB_texture0");
+
+    VertexArray va(attribs, 2);
     va.setupGL2(rsc, &rsc->mStateVertexArray, &rsc->mShaderCache);
 
     glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
diff --git a/rsShaderCache.cpp b/rsShaderCache.cpp
index 45f6207..193ced3 100644
--- a/rsShaderCache.cpp
+++ b/rsShaderCache.cpp
@@ -27,19 +27,62 @@
 using namespace android::renderscript;
 
 
-ShaderCache::ShaderCache()
-{
+ShaderCache::ShaderCache() {
     mEntries.setCapacity(16);
 }
 
-ShaderCache::~ShaderCache()
-{
+ShaderCache::~ShaderCache() {
     for (uint32_t ct=0; ct < mEntries.size(); ct++) {
         glDeleteProgram(mEntries[ct]->program);
         free(mEntries[ct]);
     }
 }
 
+void ShaderCache::updateUniformArrayData(Context *rsc, Program *prog, uint32_t linkedID,
+                                         UniformData *data, const char* logTag,
+                                         UniformQueryData **uniformList, uint32_t uniListSize) {
+
+    for (uint32_t ct=0; ct < prog->getUniformCount(); ct++) {
+        if(data[ct].slot >= 0 && data[ct].arraySize > 1) {
+            //Iterate over the list of active GL uniforms and find highest array index
+            for(uint32_t ui = 0; ui < uniListSize; ui ++) {
+                if(prog->getUniformName(ct) == uniformList[ui]->name) {
+                    data[ct].arraySize = (uint32_t)uniformList[ui]->arraySize;
+                    break;
+                }
+            }
+        }
+
+        if (rsc->props.mLogShaders) {
+             LOGV("%s U, %s = %d, arraySize = %d\n", logTag,
+                  prog->getUniformName(ct).string(), data[ct].slot, data[ct].arraySize);
+        }
+    }
+}
+
+void ShaderCache::populateUniformData(Program *prog, uint32_t linkedID, UniformData *data) {
+    for (uint32_t ct=0; ct < prog->getUniformCount(); ct++) {
+       data[ct].slot = glGetUniformLocation(linkedID, prog->getUniformName(ct));
+       data[ct].arraySize = prog->getUniformArraySize(ct);
+    }
+}
+
+bool ShaderCache::hasArrayUniforms(ProgramVertex *vtx, ProgramFragment *frag) {
+    UniformData *data = mCurrent->vtxUniforms;
+    for (uint32_t ct=0; ct < vtx->getUniformCount(); ct++) {
+        if(data[ct].slot >= 0 && data[ct].arraySize > 1) {
+            return true;
+        }
+    }
+    data = mCurrent->fragUniforms;
+    for (uint32_t ct=0; ct < frag->getUniformCount(); ct++) {
+        if(data[ct].slot >= 0 && data[ct].arraySize > 1) {
+            return true;
+        }
+    }
+    return false;
+}
+
 bool ShaderCache::lookup(Context *rsc, ProgramVertex *vtx, ProgramFragment *frag)
 {
     if (!vtx->getShaderID()) {
@@ -70,13 +113,14 @@
 
     //LOGV("ShaderCache miss");
     //LOGE("e0 %x", glGetError());
-    entry_t *e = (entry_t *)malloc(sizeof(entry_t));
+    ProgramEntry *e = new ProgramEntry(vtx->getAttribCount(),
+                                       vtx->getUniformCount(),
+                                       frag->getUniformCount());
     mEntries.push(e);
     mCurrent = e;
     e->vtx = vtx->getShaderID();
     e->frag = frag->getShaderID();
     e->program = glCreateProgram();
-    e->vtxAttrCount = vtx->getAttribCount();
     if (e->program) {
         GLuint pgm = e->program;
         glAttachShader(pgm, vtx->getShaderID());
@@ -112,28 +156,58 @@
         }
 
         for (uint32_t ct=0; ct < e->vtxAttrCount; ct++) {
-            e->mVtxAttribSlots[ct] = glGetAttribLocation(pgm, vtx->getAttribName(ct));
-            e->mVtxAttribNames[ct] = vtx->getAttribName(ct).string();
+            e->vtxAttrs[ct].slot = glGetAttribLocation(pgm, vtx->getAttribName(ct));
+            e->vtxAttrs[ct].name = vtx->getAttribName(ct).string();
             if (rsc->props.mLogShaders) {
-                LOGV("vtx A %i, %s = %d\n", ct, vtx->getAttribName(ct).string(), e->mVtxAttribSlots[ct]);
+                LOGV("vtx A %i, %s = %d\n", ct, vtx->getAttribName(ct).string(), e->vtxAttrs[ct].slot);
             }
         }
 
-        for (uint32_t ct=0; ct < vtx->getUniformCount(); ct++) {
-            e->mVtxUniformSlots[ct] = glGetUniformLocation(pgm, vtx->getUniformName(ct));
-            if (rsc->props.mLogShaders) {
-                LOGV("vtx U, %s = %d\n", vtx->getUniformName(ct).string(), e->mVtxUniformSlots[ct]);
+        populateUniformData(vtx, pgm, e->vtxUniforms);
+        populateUniformData(frag, pgm, e->fragUniforms);
+
+        // Only populate this list if we have arrays in our uniforms
+        UniformQueryData **uniformList = NULL;
+        GLint numUniforms = 0;
+        bool hasArrays = hasArrayUniforms(vtx, frag);
+        if(hasArrays) {
+            // Get the number of active uniforms and the length of the longest name
+            glGetProgramiv(pgm, GL_ACTIVE_UNIFORMS, &numUniforms);
+            GLint maxNameLength = 0;
+            glGetProgramiv(pgm, GL_ACTIVE_UNIFORM_MAX_LENGTH, &maxNameLength);
+            if(numUniforms > 0 && maxNameLength > 0) {
+                uniformList = new UniformQueryData*[numUniforms];
+                // Iterate over all the uniforms and build the list we
+                // can later use to match our uniforms to
+                for(uint32_t ct = 0; ct < (uint32_t)numUniforms; ct++) {
+                    uniformList[ct] = new UniformQueryData(maxNameLength);
+                    glGetActiveUniform(pgm, ct, maxNameLength, &uniformList[ct]->writtenLength,
+                                       &uniformList[ct]->arraySize, &uniformList[ct]->type,
+                                       uniformList[ct]->name);
+                    //LOGE("GL UNI idx=%u, arraySize=%u, name=%s", ct,
+                    //     uniformList[ct]->arraySize, uniformList[ct]->name);
+                }
             }
         }
-        for (uint32_t ct=0; ct < frag->getUniformCount(); ct++) {
-            e->mFragUniformSlots[ct] = glGetUniformLocation(pgm, frag->getUniformName(ct));
-            if (rsc->props.mLogShaders) {
-                LOGV("frag U, %s = %d\n", frag->getUniformName(ct).string(), e->mFragUniformSlots[ct]);
+
+        // We now know the highest index of all of the array uniforms
+        // and we need to update our cache to reflect that
+        // we may have declared [n], but only m < n elements are used
+        updateUniformArrayData(rsc, vtx, pgm, e->vtxUniforms, "vtx",
+                               uniformList, (uint32_t)numUniforms);
+        updateUniformArrayData(rsc, frag, pgm, e->fragUniforms, "frag",
+                               uniformList, (uint32_t)numUniforms);
+
+        // Clean up the uniform data from GL
+        if(uniformList != NULL) {
+            for(uint32_t ct = 0; ct < (uint32_t)numUniforms; ct++) {
+                delete uniformList[ct];
             }
+            delete[] uniformList;
+            uniformList = NULL;
         }
     }
 
-    e->mIsValid = true;
     //LOGV("SC made program %i", e->program);
     glUseProgram(e->program);
     rsc->checkError("ShaderCache::lookup (miss)");
@@ -142,8 +216,8 @@
 
 int32_t ShaderCache::vtxAttribSlot(const String8 &attrName) const {
     for (uint32_t ct=0; ct < mCurrent->vtxAttrCount; ct++) {
-        if(attrName == mCurrent->mVtxAttribNames[ct]) {
-            return mCurrent->mVtxAttribSlots[ct];
+        if(attrName == mCurrent->vtxAttrs[ct].name) {
+            return mCurrent->vtxAttrs[ct].slot;
         }
     }
     return -1;
@@ -156,7 +230,7 @@
         if (mEntries[ct]->vtx == id) {
             glDeleteProgram(mEntries[ct]->program);
 
-            free(mEntries[ct]);
+            delete mEntries[ct];
             mEntries.removeAt(ct);
             numEntries = (int32_t)mEntries.size();
             ct --;
@@ -171,7 +245,7 @@
         if (mEntries[ct]->frag == id) {
             glDeleteProgram(mEntries[ct]->program);
 
-            free(mEntries[ct]);
+            delete mEntries[ct];
             mEntries.removeAt(ct);
             numEntries = (int32_t)mEntries.size();
             ct --;
diff --git a/rsShaderCache.h b/rsShaderCache.h
index 35ff95b..6b49e5f 100644
--- a/rsShaderCache.h
+++ b/rsShaderCache.h
@@ -41,30 +41,86 @@
     void cleanupAll();
 
     int32_t vtxAttribSlot(const String8 &attrName) const;
-    int32_t vtxUniformSlot(uint32_t a) const {return mCurrent->mVtxUniformSlots[a];}
-    int32_t fragAttribSlot(uint32_t a) const {return mCurrent->mFragAttribSlots[a];}
-    int32_t fragUniformSlot(uint32_t a) const {return mCurrent->mFragUniformSlots[a];}
+    int32_t vtxUniformSlot(uint32_t a) const {return mCurrent->vtxUniforms[a].slot;}
+    uint32_t vtxUniformSize(uint32_t a) const {return mCurrent->vtxUniforms[a].arraySize;}
+    int32_t fragUniformSlot(uint32_t a) const {return mCurrent->fragUniforms[a].slot;}
+    uint32_t fragUniformSize(uint32_t a) const {return mCurrent->fragUniforms[a].arraySize;}
 
 protected:
-    typedef struct {
+    struct UniformQueryData {
+        char *name;
+        uint32_t nameLength;
+        int32_t writtenLength;
+        int32_t arraySize;
+        uint32_t type;
+        UniformQueryData(uint32_t maxName) {
+            name = NULL;
+            nameLength = maxName;
+            if(nameLength > 0 ) {
+                name = new char[nameLength];
+            }
+        }
+        ~UniformQueryData() {
+            if(name != NULL) {
+                delete[] name;
+                name = NULL;
+            }
+        }
+    };
+    struct UniformData {
+        int32_t slot;
+        uint32_t arraySize;
+    };
+    struct AttrData {
+        int32_t slot;
+        const char* name;
+    };
+    struct ProgramEntry {
+        ProgramEntry(uint32_t numVtxAttr, uint32_t numVtxUnis,
+                     uint32_t numFragUnis) : vtx(0), frag(0), program(0), vtxAttrCount(0),
+                                             vtxAttrs(0), vtxUniforms(0), fragUniforms(0) {
+            vtxAttrCount = numVtxAttr;
+            if(numVtxAttr) {
+                vtxAttrs = new AttrData[numVtxAttr];
+            }
+            if(numVtxUnis) {
+                vtxUniforms = new UniformData[numVtxUnis];
+            }
+            if(numFragUnis) {
+                fragUniforms = new UniformData[numFragUnis];
+            }
+        }
+        ~ProgramEntry() {
+            if(vtxAttrs) {
+                delete[] vtxAttrs;
+                vtxAttrs = NULL;
+            }
+            if(vtxUniforms) {
+                delete[] vtxUniforms;
+                vtxUniforms = NULL;
+            }
+            if(fragUniforms) {
+                delete[] fragUniforms;
+                fragUniforms = NULL;
+            }
+        }
         uint32_t vtx;
         uint32_t frag;
         uint32_t program;
         uint32_t vtxAttrCount;
-        const char* mVtxAttribNames[Program::MAX_ATTRIBS];
-        int32_t mVtxAttribSlots[Program::MAX_ATTRIBS];
-        int32_t mVtxUniformSlots[Program::MAX_UNIFORMS];
-        int32_t mFragAttribSlots[Program::MAX_ATTRIBS];
-        int32_t mFragUniformSlots[Program::MAX_UNIFORMS];
-        bool mIsValid;
-    } entry_t;
-    //entry_t *mEntries;
-    Vector<entry_t*> mEntries;
-    entry_t *mCurrent;
+        AttrData *vtxAttrs;
+        UniformData *vtxUniforms;
+        UniformData *fragUniforms;
 
-    /*uint32_t mEntryCount;
-    uint32_t mEntryAllocationCount;*/
+    };
+    Vector<ProgramEntry*> mEntries;
+    ProgramEntry *mCurrent;
 
+    bool hasArrayUniforms(ProgramVertex *vtx, ProgramFragment *frag);
+    void populateUniformData(Program *prog, uint32_t linkedID, UniformData *data);
+    void updateUniformArrayData(Context *rsc, Program *prog, uint32_t linkedID,
+                                UniformData *data, const char* logTag,
+                                UniformQueryData **uniformList, uint32_t uniListSize);
 };
 
 
diff --git a/rsType.cpp b/rsType.cpp
index caaa9f5..cc07412 100644
--- a/rsType.cpp
+++ b/rsType.cpp
@@ -29,8 +29,6 @@
 {
     mLODs = 0;
     mLODCount = 0;
-    mAttribs = NULL;
-    mAttribsSize = 0;
     clear();
 }
 
@@ -50,10 +48,6 @@
         delete [] mLODs;
         mLODs = NULL;
     }
-    if(mAttribs) {
-        delete [] mAttribs;
-        mAttribs = NULL;
-    }
 }
 
 void Type::clear()
@@ -126,8 +120,6 @@
         offset *= 6;
     }
     mTotalSizeBytes = offset;
-
-    makeGLComponents();
 }
 
 uint32_t Type::getLODOffset(uint32_t lod, uint32_t x) const
@@ -151,87 +143,6 @@
     return offset;
 }
 
-bool Type::isValidGLComponent(uint32_t fieldIdx) {
-    // Do not create attribs for padding
-    if(mElement->getFieldName(fieldIdx)[0] == '#') {
-        return false;
-    }
-
-    // Only GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT, GL_FIXED, GL_FLOAT are accepted.
-    // Filter rs types accordingly
-    RsDataType dt = mElement->getField(fieldIdx)->getComponent().getType();
-    if(dt != RS_TYPE_FLOAT_32 && dt != RS_TYPE_UNSIGNED_8 &&
-       dt != RS_TYPE_UNSIGNED_16 && dt != RS_TYPE_SIGNED_8 &&
-       dt != RS_TYPE_SIGNED_16) {
-        return false;
-    }
-
-    // Now make sure they are not arrays
-    uint32_t arraySize = mElement->getFieldArraySize(fieldIdx);
-    if(arraySize != 1) {
-        return false;
-    }
-
-    return true;
-}
-
-void Type::makeGLComponents()
-{
-    // Count the number of gl attrs to initialize
-    mAttribsSize = 0;
-
-    for (uint32_t ct=0; ct < mElement->getFieldCount(); ct++) {
-        if(isValidGLComponent(ct)) {
-            mAttribsSize ++;
-        }
-    }
-    if(mAttribs) {
-        delete [] mAttribs;
-        mAttribs = NULL;
-    }
-    if(mAttribsSize) {
-        mAttribs = new VertexArray::Attrib[mAttribsSize];
-    }
-
-    uint32_t userNum = 0;
-    for (uint32_t ct=0; ct < mElement->getFieldCount(); ct++) {
-        const Component &c = mElement->getField(ct)->getComponent();
-
-        if(!isValidGLComponent(ct)) {
-            continue;
-        }
-
-        mAttribs[userNum].size = c.getVectorSize();
-        mAttribs[userNum].offset = mElement->getFieldOffsetBytes(ct);
-        mAttribs[userNum].type = c.getGLType();
-        mAttribs[userNum].normalized = c.getType() != RS_TYPE_FLOAT_32;//c.getIsNormalized();
-        String8 tmp(RS_SHADER_ATTR);
-        tmp.append(mElement->getFieldName(ct));
-        mAttribs[userNum].name.setTo(tmp.string());
-
-        userNum ++;
-    }
-}
-
-
-void Type::enableGLVertexBuffer(VertexArray *va) const
-{
-    uint32_t stride = mElement->getSizeBytes();
-    for (uint32_t ct=0; ct < mAttribsSize; ct++) {
-        // Load up to RS_MAX_ATTRIBS inputs
-        // TODO: grow vertexarray dynamically
-        if(ct >= RS_MAX_ATTRIBS) {
-            LOGE("More GL attributes than we can handle");
-            break;
-        }
-        if (mAttribs[ct].size) {
-            va->add(mAttribs[ct], stride);
-        }
-    }
-}
-
-
-
 void Type::dumpLOGV(const char *prefix) const
 {
     char buf[1024];
diff --git a/rsType.h b/rsType.h
index 0ca5bb6..fd626ed 100644
--- a/rsType.h
+++ b/rsType.h
@@ -58,8 +58,6 @@
     void clear();
     void compute();
 
-    void enableGLVertexBuffer(class VertexArray *) const;
-
     void dumpLOGV(const char *prefix) const;
     virtual void serialize(OStream *stream) const;
     virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_TYPE; }
@@ -100,10 +98,6 @@
     bool mDimLOD;
     bool mFaces;
 
-    // A list of array dimensions.  The count is the number of array dimensions and the
-    // sizes is a per array size.
-    //Vector<size_t> mDimArraysSizes;
-
     // count of mipmap levels, 0 indicates no mipmapping
 
     size_t mMipChainSizeBytes;
@@ -111,12 +105,6 @@
     LOD *mLODs;
     uint32_t mLODCount;
 
-    VertexArray::Attrib *mAttribs;
-    uint32_t mAttribsSize;
-    bool isValidGLComponent(uint32_t fieldIdx);
-    void makeGLComponents();
-
-
 protected:
     virtual void preDestroy();
     virtual ~Type();
diff --git a/rsVertexArray.cpp b/rsVertexArray.cpp
index 4d50124..8eb94d0 100644
--- a/rsVertexArray.cpp
+++ b/rsVertexArray.cpp
@@ -28,43 +28,21 @@
 using namespace android::renderscript;
 
 
-VertexArray::VertexArray()
+VertexArray::VertexArray(const Attrib *attribs, uint32_t numAttribs)
 {
-    clearAll();
+    mAttribs = attribs;
+    mCount = numAttribs;
 }
 
 VertexArray::~VertexArray()
 {
 }
 
-
-void VertexArray::clearAll()
-{
-    for (uint32_t ct=0; ct < RS_MAX_ATTRIBS; ct++) {
-        mAttribs[ct].clear();
-    }
-    mActiveBuffer = 0;
-    mActivePointer = NULL;
-    mCount = 0;
-}
-
 VertexArray::Attrib::Attrib()
 {
     clear();
 }
 
-void VertexArray::Attrib::set(const Attrib &a)
-{
-    buffer = a.buffer;
-    ptr = a.ptr;
-    offset = a.offset;
-    type = a.type;
-    size = a.size;
-    stride = a.stride;
-    normalized = a.normalized;
-    name.setTo(a.name);
-}
-
 void VertexArray::Attrib::clear()
 {
     buffer = 0;
@@ -77,35 +55,15 @@
     name.setTo("");
 }
 
-void VertexArray::clear(uint32_t n)
+void VertexArray::Attrib::set(uint32_t type, uint32_t size, uint32_t stride, bool normalized, uint32_t offset, const char *name)
 {
-    mAttribs[n].clear();
-}
-
-void VertexArray::add(const Attrib &a, uint32_t stride)
-{
-    rsAssert(mCount < RS_MAX_ATTRIBS);
-    mAttribs[mCount].set(a);
-    mAttribs[mCount].buffer = mActiveBuffer;
-    mAttribs[mCount].ptr = mActivePointer;
-    mAttribs[mCount].stride = stride;
-    mCount ++;
-}
-
-void VertexArray::add(uint32_t type, uint32_t size, uint32_t stride, bool normalized, uint32_t offset, const char *name)
-{
-    rsAssert(mCount < RS_MAX_ATTRIBS);
-    mAttribs[mCount].clear();
-    mAttribs[mCount].type = type;
-    mAttribs[mCount].size = size;
-    mAttribs[mCount].offset = offset;
-    mAttribs[mCount].normalized = normalized;
-    mAttribs[mCount].stride = stride;
-    mAttribs[mCount].name.setTo(name);
-
-    mAttribs[mCount].buffer = mActiveBuffer;
-    mAttribs[mCount].ptr = mActivePointer;
-    mCount ++;
+    clear();
+    this->type = type;
+    this->size = size;
+    this->offset = offset;
+    this->normalized = normalized;
+    this->stride = stride;
+    this->name.setTo(name);
 }
 
 void VertexArray::logAttrib(uint32_t idx, uint32_t slot) const {
diff --git a/rsVertexArray.h b/rsVertexArray.h
index dea7d41..c2d10e6 100644
--- a/rsVertexArray.h
+++ b/rsVertexArray.h
@@ -30,10 +30,6 @@
 class VertexArray
 {
 public:
-    VertexArray();
-    virtual ~VertexArray();
-
-
     class Attrib {
     public:
         uint32_t buffer;
@@ -46,23 +42,12 @@
         String8 name;
 
         Attrib();
-        void set(const Attrib &);
         void clear();
+        void set(uint32_t type, uint32_t size, uint32_t stride, bool normalized, uint32_t offset, const char *name);
     };
 
-
-    void clearAll();
-    void setActiveBuffer(uint32_t id) {
-        mActiveBuffer = id;
-        mActivePointer = NULL;
-    }
-    void setActiveBuffer(const void *ptr) {
-        mActiveBuffer = 0;
-        mActivePointer = (const uint8_t *)ptr;
-    }
-
-    void add(const Attrib &, uint32_t stride);
-    void add(uint32_t type, uint32_t size, uint32_t stride, bool normalized, uint32_t offset, const char *name);
+    VertexArray(const Attrib *attribs, uint32_t numAttribs);
+    virtual ~VertexArray();
 
     void setupGL2(const Context *rsc, class VertexArrayState *, ShaderCache *) const;
     void logAttrib(uint32_t idx, uint32_t slot) const;
@@ -73,7 +58,7 @@
     const uint8_t * mActivePointer;
     uint32_t mCount;
 
-    Attrib mAttribs[RS_MAX_ATTRIBS];
+    const Attrib *mAttribs;
 };
 
 
@@ -82,7 +67,6 @@
     void init(Context *);
 
     uint32_t mLastEnableCount;
-    //VertexArray::Attrib mAttribs[VertexArray::_LAST];
 };
 
 
