Merge "Fixing asynchronous performance issues."
diff --git a/rs.spec b/rs.spec
index 0dea971..f277582 100644
--- a/rs.spec
+++ b/rs.spec
@@ -127,6 +127,7 @@
 	}
 
 ElementCreate {
+        direct
 	param RsDataType mType
 	param RsDataKind mKind
 	param bool mNormalized
@@ -135,6 +136,7 @@
 	}
 
 ElementCreate2 {
+        direct
 	param const RsElement * elements
 	param const char ** names
 	param const uint32_t * arraySize
@@ -226,6 +228,7 @@
 	}
 
 SamplerCreate {
+    direct
     param RsSamplerValue magFilter
     param RsSamplerValue minFilter
     param RsSamplerValue wrapS
@@ -311,6 +314,7 @@
 
 
 ProgramStoreCreate {
+	direct
 	param bool colorMaskR
 	param bool colorMaskG
 	param bool colorMaskB
@@ -324,6 +328,7 @@
 	}
 
 ProgramRasterCreate {
+	direct
 	param bool pointSmooth
 	param bool lineSmooth
 	param bool pointSprite
@@ -352,12 +357,14 @@
 	}
 
 ProgramFragmentCreate {
+	direct
 	param const char * shaderText
 	param const uint32_t * params
 	ret RsProgramFragment
 	}
 
 ProgramVertexCreate {
+	direct
 	param const char * shaderText
 	param const uint32_t * params
 	ret RsProgramVertex
diff --git a/rsAllocation.cpp b/rsAllocation.cpp
index b59ade8..a366d49 100644
--- a/rsAllocation.cpp
+++ b/rsAllocation.cpp
@@ -252,6 +252,7 @@
 
     Allocation *alloc = Allocation::createAllocation(rsc, type, RS_ALLOCATION_USAGE_SCRIPT);
     alloc->setName(name.string(), name.size());
+    type->decUserRef();
 
     uint32_t count = dataSize / type->getElementSizeBytes();
 
@@ -307,12 +308,12 @@
         return;
     }
 
-    Type *t = mHal.state.type->cloneAndResize1D(rsc, dimX);
+    ObjectBaseRef<Type> t = mHal.state.type->cloneAndResize1D(rsc, dimX);
     if (dimX < oldDimX) {
         decRefs(getPtr(), oldDimX - dimX, dimX);
     }
-    rsc->mHal.funcs.allocation.resize(rsc, this, t, mHal.state.hasReferences);
-    mHal.state.type.set(t);
+    rsc->mHal.funcs.allocation.resize(rsc, this, t.get(), mHal.state.hasReferences);
+    mHal.state.type.set(t.get());
     updateCache();
 }
 
diff --git a/rsComponent.cpp b/rsComponent.cpp
index e65febb..ce06306 100644
--- a/rsComponent.cpp
+++ b/rsComponent.cpp
@@ -176,36 +176,6 @@
     return (mType >= RS_TYPE_ELEMENT);
 }
 
-String8 Component::getGLSLType() const {
-    if (mType == RS_TYPE_SIGNED_32) {
-        switch (mVectorSize) {
-        case 1: return String8("int");
-        case 2: return String8("ivec2");
-        case 3: return String8("ivec3");
-        case 4: return String8("ivec4");
-        }
-    }
-    if (mType == RS_TYPE_FLOAT_32) {
-        switch (mVectorSize) {
-        case 1: return String8("float");
-        case 2: return String8("vec2");
-        case 3: return String8("vec3");
-        case 4: return String8("vec4");
-        }
-    }
-    if ((mType == RS_TYPE_MATRIX_4X4) && (mVectorSize == 1)) {
-        return String8("mat4");
-    }
-    if ((mType == RS_TYPE_MATRIX_3X3) && (mVectorSize == 1)) {
-        return String8("mat3");
-    }
-    if ((mType == RS_TYPE_MATRIX_2X2) && (mVectorSize == 1)) {
-        return String8("mat2");
-    }
-    return String8();
-}
-
-
 static const char * gTypeBasicStrings[] = {
     "NONE",
     "F16",
diff --git a/rsComponent.h b/rsComponent.h
index a448f0e..6ddc990 100644
--- a/rsComponent.h
+++ b/rsComponent.h
@@ -32,10 +32,8 @@
 
     void set(RsDataType dt, RsDataKind dk, bool norm, uint32_t vecSize=1);
 
-    String8 getGLSLType() const;
     void dumpLOGV(const char *prefix) const;
 
-
     RsDataType getType() const {return mType;}
     RsDataKind getKind() const {return mKind;}
     bool getIsNormalized() const {return mNormalized;}
diff --git a/rsContext.cpp b/rsContext.cpp
index bffe3c0..f65dd47 100644
--- a/rsContext.cpp
+++ b/rsContext.cpp
@@ -240,6 +240,7 @@
         rsc->setProgramStore(NULL);
         rsc->mStateFont.init(rsc);
         rsc->setFont(NULL);
+        rsc->mStateSampler.init(rsc);
         rsc->mFBOCache.init(rsc);
     }
 
@@ -307,6 +308,7 @@
          mStateFragment.deinit(this);
          mStateFragmentStore.deinit(this);
          mStateFont.deinit(this);
+         mStateSampler.deinit(this);
          mFBOCache.deinit(this);
     }
     //LOGV("destroyWorkerThreadResources 2");
diff --git a/rsElement.cpp b/rsElement.cpp
index b77b18a..36bbdf0 100644
--- a/rsElement.cpp
+++ b/rsElement.cpp
@@ -29,13 +29,16 @@
 }
 
 Element::~Element() {
+    clear();
+}
+
+void Element::preDestroy() const {
     for (uint32_t ct = 0; ct < mRSC->mStateElement.mElements.size(); ct++) {
         if (mRSC->mStateElement.mElements[ct] == this) {
             mRSC->mStateElement.mElements.removeAt(ct);
             break;
         }
     }
-    clear();
 }
 
 void Element::clear() {
@@ -60,6 +63,7 @@
 void Element::dumpLOGV(const char *prefix) const {
     ObjectBase::dumpLOGV(prefix);
     LOGV("%s Element: fieldCount: %zu,  size bytes: %zu", prefix, mFieldCount, getSizeBytes());
+    mComponent.dumpLOGV(prefix);
     for (uint32_t ct = 0; ct < mFieldCount; ct++) {
         LOGV("%s Element field index: %u ------------------", prefix, ct);
         LOGV("%s name: %s, offsetBits: %u, arraySize: %u",
@@ -97,60 +101,46 @@
     String8 name;
     stream->loadString(&name);
 
-    Element *elem = new Element(rsc);
-    elem->mComponent.loadFromStream(stream);
+    Component component;
+    component.loadFromStream(stream);
 
-    elem->mFieldCount = stream->loadU32();
-    if (elem->mFieldCount) {
-        elem->mFields = new ElementField_t [elem->mFieldCount];
-        for (uint32_t ct = 0; ct < elem->mFieldCount; ct ++) {
-            stream->loadString(&elem->mFields[ct].name);
-            elem->mFields[ct].arraySize = stream->loadU32();
-            Element *fieldElem = Element::createFromStream(rsc, stream);
-            elem->mFields[ct].e.set(fieldElem);
-        }
+    uint32_t fieldCount = stream->loadU32();
+    if (!fieldCount) {
+        return (Element *)Element::create(rsc,
+                                          component.getType(),
+                                          component.getKind(),
+                                          component.getIsNormalized(),
+                                          component.getVectorSize());;
     }
 
-    // We need to check if this already exists
-    for (uint32_t ct=0; ct < rsc->mStateElement.mElements.size(); ct++) {
-        Element *ee = rsc->mStateElement.mElements[ct];
-        if (ee->isEqual(elem)) {
-            ObjectBase::checkDelete(elem);
-            ee->incUserRef();
-            return ee;
-        }
+    const Element **subElems = new const Element *[fieldCount];
+    const char **subElemNames = new const char *[fieldCount];
+    size_t *subElemNamesLengths = new size_t[fieldCount];
+    uint32_t *arraySizes = new uint32_t[fieldCount];
+
+    String8 elemName;
+    for (uint32_t ct = 0; ct < fieldCount; ct ++) {
+        stream->loadString(&elemName);
+        subElemNamesLengths[ct] = elemName.length();
+        char *tmpName = new char[subElemNamesLengths[ct]];
+        memcpy(tmpName, elemName.string(), subElemNamesLengths[ct]);
+        subElemNames[ct] = tmpName;
+        arraySizes[ct] = stream->loadU32();
+        subElems[ct] = Element::createFromStream(rsc, stream);
     }
 
-    elem->compute();
-    rsc->mStateElement.mElements.push(elem);
-    return elem;
-}
+    const Element *elem = Element::create(rsc, fieldCount, subElems, subElemNames,
+                                          subElemNamesLengths, arraySizes);
+    for (uint32_t ct = 0; ct < fieldCount; ct ++) {
+        delete [] subElemNames[ct];
+        subElems[ct]->decUserRef();
+    }
+    delete[] subElems;
+    delete[] subElemNames;
+    delete[] subElemNamesLengths;
+    delete[] arraySizes;
 
-bool Element::isEqual(const Element *other) const {
-    if (other == NULL) {
-        return false;
-    }
-    if (!other->getFieldCount() && !mFieldCount) {
-        if ((other->getType() == getType()) &&
-           (other->getKind() == getKind()) &&
-           (other->getComponent().getIsNormalized() == getComponent().getIsNormalized()) &&
-           (other->getComponent().getVectorSize() == getComponent().getVectorSize())) {
-            return true;
-        }
-        return false;
-    }
-    if (other->getFieldCount() == mFieldCount) {
-        for (uint32_t i=0; i < mFieldCount; i++) {
-            if ((!other->mFields[i].e->isEqual(mFields[i].e.get())) ||
-                (other->mFields[i].name.length() != mFields[i].name.length()) ||
-                (other->mFields[i].name != mFields[i].name) ||
-                (other->mFields[i].arraySize != mFields[i].arraySize)) {
-                return false;
-            }
-        }
-        return true;
-    }
-    return false;
+    return (Element *)elem;
 }
 
 void Element::compute() {
@@ -172,9 +162,11 @@
 
 }
 
-const Element * Element::create(Context *rsc, RsDataType dt, RsDataKind dk,
+ObjectBaseRef<const Element> Element::createRef(Context *rsc, RsDataType dt, RsDataKind dk,
                                 bool isNorm, uint32_t vecSize) {
+    ObjectBaseRef<const Element> returnRef;
     // Look for an existing match.
+    ObjectBase::asyncLock();
     for (uint32_t ct=0; ct < rsc->mStateElement.mElements.size(); ct++) {
         const Element *ee = rsc->mStateElement.mElements[ct];
         if (!ee->getFieldCount() &&
@@ -183,21 +175,31 @@
             (ee->getComponent().getIsNormalized() == isNorm) &&
             (ee->getComponent().getVectorSize() == vecSize)) {
             // Match
-            ee->incUserRef();
+            returnRef.set(ee);
+            ObjectBase::asyncUnlock();
             return ee;
         }
     }
+    ObjectBase::asyncUnlock();
 
     Element *e = new Element(rsc);
+    returnRef.set(e);
     e->mComponent.set(dt, dk, isNorm, vecSize);
     e->compute();
+
+    ObjectBase::asyncLock();
     rsc->mStateElement.mElements.push(e);
-    return e;
+    ObjectBase::asyncUnlock();
+
+    return returnRef;
 }
 
-const Element * Element::create(Context *rsc, size_t count, const Element **ein,
+ObjectBaseRef<const Element> Element::createRef(Context *rsc, size_t count, const Element **ein,
                             const char **nin, const size_t * lengths, const uint32_t *asin) {
+
+    ObjectBaseRef<const Element> returnRef;
     // Look for an existing match.
+    ObjectBase::asyncLock();
     for (uint32_t ct=0; ct < rsc->mStateElement.mElements.size(); ct++) {
         const Element *ee = rsc->mStateElement.mElements[ct];
         if (ee->getFieldCount() == count) {
@@ -212,13 +214,16 @@
                 }
             }
             if (match) {
-                ee->incUserRef();
-                return ee;
+                returnRef.set(ee);
+                ObjectBase::asyncUnlock();
+                return returnRef;
             }
         }
     }
+    ObjectBase::asyncUnlock();
 
     Element *e = new Element(rsc);
+    returnRef.set(e);
     e->mFields = new ElementField_t [count];
     e->mFieldCount = count;
     for (size_t ct=0; ct < count; ct++) {
@@ -228,26 +233,11 @@
     }
     e->compute();
 
+    ObjectBase::asyncLock();
     rsc->mStateElement.mElements.push(e);
-    return e;
-}
+    ObjectBase::asyncUnlock();
 
-String8 Element::getGLSLType(uint32_t indent) const {
-    String8 s;
-    for (uint32_t ct=0; ct < indent; ct++) {
-        s.append(" ");
-    }
-
-    if (!mFieldCount) {
-        // Basic component.
-        s.append(mComponent.getGLSLType());
-    } else {
-        rsAssert(0);
-        //s.append("struct ");
-        //s.append(getCStructBody(indent));
-    }
-
-    return s;
+    return returnRef;
 }
 
 void Element::incRefs(const void *ptr) const {
@@ -294,6 +284,23 @@
     }
 }
 
+void Element::Builder::add(const Element *e, const char *nameStr, uint32_t arraySize) {
+    mBuilderElementRefs.push(ObjectBaseRef<const Element>(e));
+    mBuilderElements.push(e);
+    mBuilderNameStrings.push(nameStr);
+    mBuilderNameLengths.push(strlen(nameStr));
+    mBuilderArrays.push(arraySize);
+
+}
+
+ObjectBaseRef<const Element> Element::Builder::create(Context *rsc) {
+    return Element::createRef(rsc, mBuilderElements.size(),
+                              &(mBuilderElements.editArray()[0]),
+                              &(mBuilderNameStrings.editArray()[0]),
+                              mBuilderNameLengths.editArray(),
+                              mBuilderArrays.editArray());
+}
+
 
 ElementState::ElementState() {
     const uint32_t initialCapacity = 32;
@@ -324,10 +331,10 @@
 
 const Element *ElementState::elementBuilderCreate(Context *rsc) {
     return Element::create(rsc, mBuilderElements.size(),
-                                &(mBuilderElements.editArray()[0]),
-                                &(mBuilderNameStrings.editArray()[0]),
-                                mBuilderNameLengths.editArray(),
-                                mBuilderArrays.editArray());
+                           &(mBuilderElements.editArray()[0]),
+                           &(mBuilderNameStrings.editArray()[0]),
+                           mBuilderNameLengths.editArray(),
+                           mBuilderArrays.editArray());
 }
 
 
@@ -342,9 +349,7 @@
                             RsDataKind dk,
                             bool norm,
                             uint32_t vecSize) {
-    const Element *e = Element::create(rsc, dt, dk, norm, vecSize);
-    e->incUserRef();
-    return (RsElement)e;
+    return (RsElement)Element::create(rsc, dt, dk, norm, vecSize);
 }
 
 
@@ -358,15 +363,15 @@
 
                              const uint32_t * arraySizes,
                              size_t arraySizes_length) {
-    const Element *e = Element::create(rsc, ein_length, (const Element **)ein, names, nameLengths, arraySizes);
-    e->incUserRef();
-    return (RsElement)e;
+    return (RsElement)Element::create(rsc, ein_length, (const Element **)ein,
+                                      names, nameLengths, arraySizes);
 }
 
 }
 }
 
-void rsaElementGetNativeData(RsContext con, RsElement elem, uint32_t *elemData, uint32_t elemDataSize) {
+void rsaElementGetNativeData(RsContext con, RsElement elem,
+                             uint32_t *elemData, uint32_t elemDataSize) {
     rsAssert(elemDataSize == 5);
     // we will pack mType; mKind; mNormalized; mVectorSize; NumSubElements
     Element *e = static_cast<Element *>(elem);
@@ -378,7 +383,8 @@
     (*elemData++) = e->getFieldCount();
 }
 
-void rsaElementGetSubElements(RsContext con, RsElement elem, uint32_t *ids, const char **names, uint32_t dataSize) {
+void rsaElementGetSubElements(RsContext con, RsElement elem, uint32_t *ids,
+                              const char **names, uint32_t dataSize) {
     Element *e = static_cast<Element *>(elem);
     rsAssert(e->getFieldCount() == dataSize);
 
diff --git a/rsElement.h b/rsElement.h
index 26e2760..c3ef250 100644
--- a/rsElement.h
+++ b/rsElement.h
@@ -28,8 +28,17 @@
 // An element is a group of Components that occupies one cell in a structure.
 class Element : public ObjectBase {
 public:
-    ~Element();
-
+    class Builder {
+    public:
+        void add(const Element *e, const char *nameStr, uint32_t arraySize);
+        ObjectBaseRef<const Element> create(Context *rsc);
+    private:
+        Vector<ObjectBaseRef<const Element> > mBuilderElementRefs;
+        Vector<const Element *> mBuilderElements;
+        Vector<const char*> mBuilderNameStrings;
+        Vector<size_t> mBuilderNameLengths;
+        Vector<uint32_t> mBuilderArrays;
+    };
     uint32_t getGLType() const;
     uint32_t getGLFormat() const;
 
@@ -55,24 +64,45 @@
     RsDataKind getKind() const {return mComponent.getKind();}
     uint32_t getBits() const {return mBits;}
 
-    String8 getGLSLType(uint32_t indent=0) const;
-
     void dumpLOGV(const char *prefix) const;
     virtual void serialize(OStream *stream) const;
     virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_ELEMENT; }
     static Element *createFromStream(Context *rsc, IStream *stream);
 
-    static const Element * create(Context *rsc, RsDataType dt, RsDataKind dk,
-                            bool isNorm, uint32_t vecSize);
-    static const Element * create(Context *rsc, size_t count, const Element **,
-                            const char **, const size_t * lengths, const uint32_t *asin);
+    static ObjectBaseRef<const Element> createRef(Context *rsc,
+                                                  RsDataType dt,
+                                                  RsDataKind dk,
+                                                  bool isNorm,
+                                                  uint32_t vecSize);
+    static ObjectBaseRef<const Element> createRef(Context *rsc, size_t count,
+                                                  const Element **,
+                                                  const char **,
+                                                  const size_t * lengths,
+                                                  const uint32_t *asin);
+
+    static const Element* create(Context *rsc,
+                                 RsDataType dt,
+                                 RsDataKind dk,
+                                 bool isNorm,
+                                 uint32_t vecSize) {
+        ObjectBaseRef<const Element> elem = createRef(rsc, dt, dk, isNorm, vecSize);
+        elem->incUserRef();
+        return elem.get();
+    }
+    static const Element* create(Context *rsc, size_t count,
+                                 const Element **ein,
+                                 const char **nin,
+                                 const size_t * lengths,
+                                 const uint32_t *asin) {
+        ObjectBaseRef<const Element> elem = createRef(rsc, count, ein, nin, lengths, asin);
+        elem->incUserRef();
+        return elem.get();
+    }
 
     void incRefs(const void *) const;
     void decRefs(const void *) const;
     bool getHasReferences() const {return mHasReference;}
 
-    bool isEqual(const Element *other) const;
-
 protected:
     // deallocate any components that are part of this element.
     void clear();
@@ -88,12 +118,15 @@
     bool mHasReference;
 
 
+    virtual ~Element();
     Element(Context *);
 
     Component mComponent;
     uint32_t mBits;
 
     void compute();
+
+    virtual void preDestroy() const;
 };
 
 
diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp
index cd02c24..df5dc12 100644
--- a/rsFileA3D.cpp
+++ b/rsFileA3D.cpp
@@ -68,7 +68,7 @@
     for (uint32_t i = 0; i < numIndexEntries; i ++) {
         A3DIndexEntry *entry = new A3DIndexEntry();
         headerStream->loadString(&entry->mObjectName);
-        LOGV("Header data, entry name = %s", entry->mObjectName.string());
+        //LOGV("Header data, entry name = %s", entry->mObjectName.string());
         entry->mType = (RsA3DClassID)headerStream->loadU32();
         if (mUse64BitOffsets){
             entry->mOffset = headerStream->loadOffset();
@@ -369,7 +369,7 @@
     }
 
     ObjectBase *obj = fa3d->initializeFromEntry(index);
-    LOGV("Returning object with name %s", obj->getName());
+    //LOGV("Returning object with name %s", obj->getName());
 
     return obj;
 }
diff --git a/rsFont.cpp b/rsFont.cpp
index 3917ca1..7efed9d 100644
--- a/rsFont.cpp
+++ b/rsFont.cpp
@@ -490,49 +490,47 @@
     shaderString.append("  gl_FragColor = col;\n");
     shaderString.append("}\n");
 
-    const Element *colorElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
-    const Element *gammaElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 1);
-    mRSC->mStateElement.elementBuilderBegin();
-    mRSC->mStateElement.elementBuilderAdd(colorElem, "Color", 1);
-    mRSC->mStateElement.elementBuilderAdd(gammaElem, "Gamma", 1);
-    const Element *constInput = mRSC->mStateElement.elementBuilderCreate(mRSC);
+    ObjectBaseRef<const Element> colorElem = Element::createRef(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
+    ObjectBaseRef<const Element> gammaElem = Element::createRef(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 1);
+    Element::Builder builder;
+    builder.add(colorElem.get(), "Color", 1);
+    builder.add(gammaElem.get(), "Gamma", 1);
+    ObjectBaseRef<const Element> constInput = builder.create(mRSC);
 
-    Type *inputType = Type::getType(mRSC, constInput, 1, 0, 0, false, false);
+    ObjectBaseRef<Type> inputType = Type::getTypeRef(mRSC, constInput.get(), 1, 0, 0, false, false);
 
     uint32_t tmp[4];
     tmp[0] = RS_PROGRAM_PARAM_CONSTANT;
-    tmp[1] = (uint32_t)inputType;
+    tmp[1] = (uint32_t)inputType.get();
     tmp[2] = RS_PROGRAM_PARAM_TEXTURE_TYPE;
     tmp[3] = RS_TEXTURE_2D;
 
-    mFontShaderFConstant.set(Allocation::createAllocation(mRSC, inputType,
+    mFontShaderFConstant.set(Allocation::createAllocation(mRSC, inputType.get(),
                                             RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_GRAPHICS_CONSTANTS));
     ProgramFragment *pf = new ProgramFragment(mRSC, shaderString.string(),
                                               shaderString.length(), tmp, 4);
     mFontShaderF.set(pf);
     mFontShaderF->bindAllocation(mRSC, mFontShaderFConstant.get(), 0);
 
-    Sampler *sampler = new Sampler(mRSC, RS_SAMPLER_NEAREST, RS_SAMPLER_NEAREST,
-                                      RS_SAMPLER_CLAMP, RS_SAMPLER_CLAMP, RS_SAMPLER_CLAMP);
-    mFontSampler.set(sampler);
-    mFontShaderF->bindSampler(mRSC, 0, sampler);
+    mFontSampler.set(Sampler::getSampler(mRSC, RS_SAMPLER_NEAREST, RS_SAMPLER_NEAREST,
+                                         RS_SAMPLER_CLAMP, RS_SAMPLER_CLAMP, RS_SAMPLER_CLAMP).get());
+    mFontShaderF->bindSampler(mRSC, 0, mFontSampler.get());
 
-    ProgramStore *fontStore = new ProgramStore(mRSC, true, true, true, true,
-                                               false, false,
-                                               RS_BLEND_SRC_SRC_ALPHA,
-                                               RS_BLEND_DST_ONE_MINUS_SRC_ALPHA,
-                                               RS_DEPTH_FUNC_ALWAYS);
-    mFontProgramStore.set(fontStore);
+    mFontProgramStore.set(ProgramStore::getProgramStore(mRSC, true, true, true, true,
+                                                        false, false,
+                                                        RS_BLEND_SRC_SRC_ALPHA,
+                                                        RS_BLEND_DST_ONE_MINUS_SRC_ALPHA,
+                                                        RS_DEPTH_FUNC_ALWAYS).get());
     mFontProgramStore->init();
 }
 
 void FontState::initTextTexture() {
-    const Element *alphaElem = Element::create(mRSC, RS_TYPE_UNSIGNED_8, RS_KIND_PIXEL_A, true, 1);
+    ObjectBaseRef<const Element> alphaElem = Element::createRef(mRSC, RS_TYPE_UNSIGNED_8, RS_KIND_PIXEL_A, true, 1);
 
     // We will allocate a texture to initially hold 32 character bitmaps
-    Type *texType = Type::getType(mRSC, alphaElem, 1024, 256, 0, false, false);
+    ObjectBaseRef<Type> texType = Type::getTypeRef(mRSC, alphaElem.get(), 1024, 256, 0, false, false);
 
-    Allocation *cacheAlloc = Allocation::createAllocation(mRSC, texType,
+    Allocation *cacheAlloc = Allocation::createAllocation(mRSC, texType.get(),
                                 RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE);
     mTextTexture.set(cacheAlloc);
     mTextTexture->syncAll(mRSC, RS_ALLOCATION_USAGE_SCRIPT);
@@ -557,11 +555,11 @@
 // Avoid having to reallocate memory and render quad by quad
 void FontState::initVertexArrayBuffers() {
     // Now lets write index data
-    const Element *indexElem = Element::create(mRSC, RS_TYPE_UNSIGNED_16, RS_KIND_USER, false, 1);
+    ObjectBaseRef<const Element> indexElem = Element::createRef(mRSC, RS_TYPE_UNSIGNED_16, RS_KIND_USER, false, 1);
     uint32_t numIndicies = mMaxNumberOfQuads * 6;
-    Type *indexType = Type::getType(mRSC, indexElem, numIndicies, 0, 0, false, false);
+    ObjectBaseRef<Type> indexType = Type::getTypeRef(mRSC, indexElem.get(), numIndicies, 0, 0, false, false);
 
-    Allocation *indexAlloc = Allocation::createAllocation(mRSC, indexType,
+    Allocation *indexAlloc = Allocation::createAllocation(mRSC, indexType.get(),
                                                           RS_ALLOCATION_USAGE_SCRIPT |
                                                           RS_ALLOCATION_USAGE_GRAPHICS_VERTEX);
     uint16_t *indexPtr = (uint16_t*)indexAlloc->getPtr();
@@ -582,19 +580,19 @@
 
     indexAlloc->sendDirty(mRSC);
 
-    const Element *posElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 3);
-    const Element *texElem = Element::create(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 2);
+    ObjectBaseRef<const Element> posElem = Element::createRef(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 3);
+    ObjectBaseRef<const Element> texElem = Element::createRef(mRSC, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 2);
 
-    mRSC->mStateElement.elementBuilderBegin();
-    mRSC->mStateElement.elementBuilderAdd(posElem, "position", 1);
-    mRSC->mStateElement.elementBuilderAdd(texElem, "texture0", 1);
-    const Element *vertexDataElem = mRSC->mStateElement.elementBuilderCreate(mRSC);
+    Element::Builder builder;
+    builder.add(posElem.get(), "position", 1);
+    builder.add(texElem.get(), "texture0", 1);
+    ObjectBaseRef<const Element> vertexDataElem = builder.create(mRSC);
 
-    Type *vertexDataType = Type::getType(mRSC, vertexDataElem,
-                                         mMaxNumberOfQuads * 4,
-                                         0, 0, false, false);
+    ObjectBaseRef<Type> vertexDataType = Type::getTypeRef(mRSC, vertexDataElem.get(),
+                                                          mMaxNumberOfQuads * 4,
+                                                          0, 0, false, false);
 
-    Allocation *vertexAlloc = Allocation::createAllocation(mRSC, vertexDataType,
+    Allocation *vertexAlloc = Allocation::createAllocation(mRSC, vertexDataType.get(),
                                                            RS_ALLOCATION_USAGE_SCRIPT);
     mTextMeshPtr = (float*)vertexAlloc->getPtr();
 
diff --git a/rsFont.h b/rsFont.h
index b0e1430..679591c 100644
--- a/rsFont.h
+++ b/rsFont.h
@@ -146,7 +146,6 @@
     void deinit(Context *rsc);
 
     ObjectBaseRef<Font> mDefault;
-    ObjectBaseRef<Font> mLast;
 
     void renderText(const char *text, uint32_t len, int32_t x, int32_t y,
                     uint32_t startIndex = 0, int numGlyphs = -1,
diff --git a/rsObjectBase.h b/rsObjectBase.h
index 01850f1..c7cfb0e 100644
--- a/rsObjectBase.h
+++ b/rsObjectBase.h
@@ -114,7 +114,10 @@
     }
 
     ObjectBaseRef & operator= (const ObjectBaseRef &ref) {
-        return ObjectBaseRef(ref);
+        if (&ref != this) {
+            set(ref);
+        }
+        return *this;
     }
 
     ~ObjectBaseRef() {
diff --git a/rsProgram.cpp b/rsProgram.cpp
index b1d8f48..33eb422 100644
--- a/rsProgram.cpp
+++ b/rsProgram.cpp
@@ -116,7 +116,7 @@
             rsc->setError(RS_ERROR_BAD_SHADER, "Cannot bind allocation");
             return;
         }
-        if (!alloc->getType()->isEqual(mHal.state.constantTypes[slot].get())) {
+        if (alloc->getType() != mHal.state.constantTypes[slot].get()) {
             LOGE("Attempt to bind alloc at slot %u, on shader id %u, but types mismatch",
                  slot, (uint32_t)this);
             rsc->setError(RS_ERROR_BAD_SHADER, "Cannot bind allocation");
diff --git a/rsProgramFragment.cpp b/rsProgramFragment.cpp
index 356ff77..ff29520 100644
--- a/rsProgramFragment.cpp
+++ b/rsProgramFragment.cpp
@@ -97,18 +97,18 @@
     shaderString.append("  gl_FragColor = col;\n");
     shaderString.append("}\n");
 
-    const Element *colorElem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
-    rsc->mStateElement.elementBuilderBegin();
-    rsc->mStateElement.elementBuilderAdd(colorElem, "Color", 1);
-    const Element *constInput = rsc->mStateElement.elementBuilderCreate(rsc);
+    ObjectBaseRef<const Element> colorElem = Element::createRef(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
+    Element::Builder builder;
+    builder.add(colorElem.get(), "Color", 1);
+    ObjectBaseRef<const Element> constInput = builder.create(rsc);
 
-    Type *inputType = Type::getType(rsc, constInput, 1, 0, 0, false, false);
+    ObjectBaseRef<Type> inputType = Type::getTypeRef(rsc, constInput.get(), 1, 0, 0, false, false);
 
     uint32_t tmp[2];
     tmp[0] = RS_PROGRAM_PARAM_CONSTANT;
-    tmp[1] = (uint32_t)inputType;
+    tmp[1] = (uint32_t)inputType.get();
 
-    Allocation *constAlloc = Allocation::createAllocation(rsc, inputType,
+    Allocation *constAlloc = Allocation::createAllocation(rsc, inputType.get(),
                               RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_GRAPHICS_CONSTANTS);
     ProgramFragment *pf = new ProgramFragment(rsc, shaderString.string(),
                                               shaderString.length(), tmp, 2);
diff --git a/rsProgramRaster.cpp b/rsProgramRaster.cpp
index 435561d..945b5ec 100644
--- a/rsProgramRaster.cpp
+++ b/rsProgramRaster.cpp
@@ -37,6 +37,15 @@
     rsc->mHal.funcs.raster.init(rsc, this);
 }
 
+void ProgramRaster::preDestroy() const {
+    for (uint32_t ct = 0; ct < mRSC->mStateRaster.mRasterPrograms.size(); ct++) {
+        if (mRSC->mStateRaster.mRasterPrograms[ct] == this) {
+            mRSC->mStateRaster.mRasterPrograms.removeAt(ct);
+            break;
+        }
+    }
+}
+
 ProgramRaster::~ProgramRaster() {
     mRSC->mHal.funcs.raster.destroy(mRSC, this);
 }
@@ -65,8 +74,8 @@
 }
 
 void ProgramRasterState::init(Context *rsc) {
-    ProgramRaster *pr = new ProgramRaster(rsc, false, false, false, 1.f, RS_CULL_BACK);
-    mDefault.set(pr);
+    mDefault.set(ProgramRaster::getProgramRaster(rsc, false, false,
+                                                 false, 1.f, RS_CULL_BACK).get());
 }
 
 void ProgramRasterState::deinit(Context *rsc) {
@@ -74,19 +83,47 @@
     mLast.clear();
 }
 
+ObjectBaseRef<ProgramRaster> ProgramRaster::getProgramRaster(Context *rsc,
+                                                             bool pointSmooth,
+                                                             bool lineSmooth,
+                                                             bool pointSprite,
+                                                             float lineWidth,
+                                                             RsCullMode cull) {
+    ObjectBaseRef<ProgramRaster> returnRef;
+    ObjectBase::asyncLock();
+    for (uint32_t ct = 0; ct < rsc->mStateRaster.mRasterPrograms.size(); ct++) {
+        ProgramRaster *existing = rsc->mStateRaster.mRasterPrograms[ct];
+        if (existing->mHal.state.pointSmooth != pointSmooth) continue;
+        if (existing->mHal.state.lineSmooth != lineSmooth) continue;
+        if (existing->mHal.state.pointSprite != pointSprite) continue;
+        if (existing->mHal.state.lineWidth != lineWidth) continue;
+        if (existing->mHal.state.cull != cull) continue;
+        returnRef.set(existing);
+        ObjectBase::asyncUnlock();
+        return returnRef;
+    }
+    ObjectBase::asyncUnlock();
+
+    ProgramRaster *pr = new ProgramRaster(rsc, pointSmooth,
+                                          lineSmooth, pointSprite, lineWidth, cull);
+    returnRef.set(pr);
+
+    ObjectBase::asyncLock();
+    rsc->mStateRaster.mRasterPrograms.push(pr);
+    ObjectBase::asyncUnlock();
+
+    return returnRef;
+}
+
 namespace android {
 namespace renderscript {
 
-RsProgramRaster rsi_ProgramRasterCreate(Context * rsc,
-                                      bool pointSmooth,
-                                      bool lineSmooth,
-                                      bool pointSprite,
-                                      float lineWidth,
-                                      RsCullMode cull) {
-    ProgramRaster *pr = new ProgramRaster(rsc, pointSmooth,
-                                          lineSmooth, pointSprite, lineWidth, cull);
+RsProgramRaster rsi_ProgramRasterCreate(Context * rsc, bool pointSmooth, bool lineSmooth,
+                                        bool pointSprite, float lineWidth, RsCullMode cull) {
+    ObjectBaseRef<ProgramRaster> pr = ProgramRaster::getProgramRaster(rsc, pointSmooth, lineSmooth,
+                                                                      pointSprite, lineWidth, cull);
     pr->incUserRef();
-    return pr;
+    return pr.get();
 }
 
 }
diff --git a/rsProgramRaster.h b/rsProgramRaster.h
index efdb948..09d7d54 100644
--- a/rsProgramRaster.h
+++ b/rsProgramRaster.h
@@ -27,19 +27,17 @@
 
 class ProgramRaster : public ProgramBase {
 public:
-    ProgramRaster(Context *rsc,
-                  bool pointSmooth,
-                  bool lineSmooth,
-                  bool pointSprite,
-                  float lineWidth,
-                  RsCullMode cull);
-    virtual ~ProgramRaster();
-
     virtual void setup(const Context *, ProgramRasterState *);
     virtual void serialize(OStream *stream) const;
     virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_RASTER; }
     static ProgramRaster *createFromStream(Context *rsc, IStream *stream);
 
+    static ObjectBaseRef<ProgramRaster> getProgramRaster(Context *rsc,
+                                                         bool pointSmooth,
+                                                         bool lineSmooth,
+                                                         bool pointSprite,
+                                                         float lineWidth,
+                                                         RsCullMode cull);
     struct Hal {
         mutable void *drv;
 
@@ -55,6 +53,17 @@
     Hal mHal;
 
 protected:
+    virtual void preDestroy() const;
+    virtual ~ProgramRaster();
+
+private:
+    ProgramRaster(Context *rsc,
+                  bool pointSmooth,
+                  bool lineSmooth,
+                  bool pointSprite,
+                  float lineWidth,
+                  RsCullMode cull);
+
 };
 
 class ProgramRasterState {
@@ -66,6 +75,9 @@
 
     ObjectBaseRef<ProgramRaster> mDefault;
     ObjectBaseRef<ProgramRaster> mLast;
+
+    // Cache of all existing raster programs.
+    Vector<ProgramRaster *> mRasterPrograms;
 };
 
 
diff --git a/rsProgramStore.cpp b/rsProgramStore.cpp
index 8fe890b..7e25a22 100644
--- a/rsProgramStore.cpp
+++ b/rsProgramStore.cpp
@@ -41,6 +41,15 @@
     mHal.state.depthFunc = depthFunc;
 }
 
+void ProgramStore::preDestroy() const {
+    for (uint32_t ct = 0; ct < mRSC->mStateFragmentStore.mStorePrograms.size(); ct++) {
+        if (mRSC->mStateFragmentStore.mStorePrograms[ct] == this) {
+            mRSC->mStateFragmentStore.mStorePrograms.removeAt(ct);
+            break;
+        }
+    }
+}
+
 ProgramStore::~ProgramStore() {
     mRSC->mHal.funcs.store.destroy(mRSC, this);
 }
@@ -71,14 +80,58 @@
 ProgramStoreState::~ProgramStoreState() {
 }
 
+ObjectBaseRef<ProgramStore> ProgramStore::getProgramStore(Context *rsc,
+                                                          bool colorMaskR,
+                                                          bool colorMaskG,
+                                                          bool colorMaskB,
+                                                          bool colorMaskA,
+                                                          bool depthMask, bool ditherEnable,
+                                                          RsBlendSrcFunc srcFunc,
+                                                          RsBlendDstFunc destFunc,
+                                                          RsDepthFunc depthFunc) {
+    ObjectBaseRef<ProgramStore> returnRef;
+    ObjectBase::asyncLock();
+    for (uint32_t ct = 0; ct < rsc->mStateFragmentStore.mStorePrograms.size(); ct++) {
+        ProgramStore *existing = rsc->mStateFragmentStore.mStorePrograms[ct];
+        if (existing->mHal.state.ditherEnable != ditherEnable) continue;
+        if (existing->mHal.state.colorRWriteEnable != colorMaskR) continue;
+        if (existing->mHal.state.colorGWriteEnable != colorMaskG) continue;
+        if (existing->mHal.state.colorBWriteEnable != colorMaskB) continue;
+        if (existing->mHal.state.colorAWriteEnable != colorMaskA) continue;
+        if (existing->mHal.state.blendSrc != srcFunc) continue;
+        if (existing->mHal.state.blendDst != destFunc) continue;
+        if (existing->mHal.state.depthWriteEnable != depthMask) continue;
+        if (existing->mHal.state.depthFunc != depthFunc) continue;
+
+        returnRef.set(existing);
+        ObjectBase::asyncUnlock();
+        return returnRef;
+    }
+    ObjectBase::asyncUnlock();
+
+    ProgramStore *pfs = new ProgramStore(rsc,
+                                         colorMaskR, colorMaskG, colorMaskB, colorMaskA,
+                                         depthMask, ditherEnable,
+                                         srcFunc, destFunc, depthFunc);
+    returnRef.set(pfs);
+
+    pfs->init();
+
+    ObjectBase::asyncLock();
+    rsc->mStateFragmentStore.mStorePrograms.push(pfs);
+    ObjectBase::asyncUnlock();
+
+    return returnRef;
+}
+
+
+
 void ProgramStoreState::init(Context *rsc) {
-    ProgramStore *ps = new ProgramStore(rsc,
-                                        true, true, true, true,
-                                        true, true,
-                                        RS_BLEND_SRC_ONE, RS_BLEND_DST_ZERO,
-                                        RS_DEPTH_FUNC_LESS);
-    ps->init();
-    mDefault.set(ps);
+    mDefault.set(ProgramStore::getProgramStore(rsc,
+                                               true, true, true, true,
+                                               true, true,
+                                               RS_BLEND_SRC_ONE, RS_BLEND_DST_ZERO,
+                                               RS_DEPTH_FUNC_LESS).get());
 }
 
 void ProgramStoreState::deinit(Context *rsc) {
@@ -96,13 +149,14 @@
                                       RsBlendSrcFunc srcFunc, RsBlendDstFunc destFunc,
                                       RsDepthFunc depthFunc) {
 
-    ProgramStore *pfs = new ProgramStore(rsc,
-                                         colorMaskR, colorMaskG, colorMaskB, colorMaskA,
-                                         depthMask, ditherEnable,
-                                         srcFunc, destFunc, depthFunc);
-    pfs->init();
-    pfs->incUserRef();
-    return pfs;
+
+    ObjectBaseRef<ProgramStore> ps = ProgramStore::getProgramStore(rsc,
+                                                                   colorMaskR, colorMaskG,
+                                                                   colorMaskB, colorMaskA,
+                                                                   depthMask, ditherEnable,
+                                                                   srcFunc, destFunc, depthFunc);
+    ps->incUserRef();
+    return ps.get();
 }
 
 }
diff --git a/rsProgramStore.h b/rsProgramStore.h
index 77b3881..e21f039 100644
--- a/rsProgramStore.h
+++ b/rsProgramStore.h
@@ -28,18 +28,17 @@
 
 class ProgramStore : public ProgramBase {
 public:
-    ProgramStore(Context *,
-                 bool colorMaskR, bool colorMaskG, bool colorMaskB, bool colorMaskA,
-                 bool depthMask, bool ditherEnable,
-                 RsBlendSrcFunc srcFunc, RsBlendDstFunc destFunc,
-                 RsDepthFunc depthFunc);
-    virtual ~ProgramStore();
-
     virtual void setup(const Context *, ProgramStoreState *);
 
     virtual void serialize(OStream *stream) const;
     virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_PROGRAM_STORE; }
     static ProgramStore *createFromStream(Context *rsc, IStream *stream);
+    static ObjectBaseRef<ProgramStore> getProgramStore(Context *,
+                                                       bool colorMaskR, bool colorMaskG,
+                                                       bool colorMaskB, bool colorMaskA,
+                                                       bool depthMask, bool ditherEnable,
+                                                       RsBlendSrcFunc srcFunc, RsBlendDstFunc destFunc,
+                                                       RsDepthFunc depthFunc);
 
     void init();
 
@@ -66,6 +65,15 @@
     Hal mHal;
 
 protected:
+    virtual void preDestroy() const;
+    virtual ~ProgramStore();
+
+private:
+    ProgramStore(Context *,
+                 bool colorMaskR, bool colorMaskG, bool colorMaskB, bool colorMaskA,
+                 bool depthMask, bool ditherEnable,
+                 RsBlendSrcFunc srcFunc, RsBlendDstFunc destFunc,
+                 RsDepthFunc depthFunc);
 };
 
 class ProgramStoreState {
@@ -77,6 +85,9 @@
 
     ObjectBaseRef<ProgramStore> mDefault;
     ObjectBaseRef<ProgramStore> mLast;
+
+    // Cache of all existing store programs.
+    Vector<ProgramStore *> mStorePrograms;
 };
 
 }
diff --git a/rsProgramVertex.cpp b/rsProgramVertex.cpp
index 058a456..51cb2a8 100644
--- a/rsProgramVertex.cpp
+++ b/rsProgramVertex.cpp
@@ -150,26 +150,30 @@
 }
 
 void ProgramVertexState::init(Context *rsc) {
-    const Element *matrixElem = Element::create(rsc, RS_TYPE_MATRIX_4X4, RS_KIND_USER, false, 1);
-    const Element *f2Elem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 2);
-    const Element *f3Elem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 3);
-    const Element *f4Elem = Element::create(rsc, RS_TYPE_FLOAT_32, RS_KIND_USER, false, 4);
+    ObjectBaseRef<const Element> matrixElem = Element::createRef(rsc, RS_TYPE_MATRIX_4X4,
+                                                                 RS_KIND_USER, false, 1);
+    ObjectBaseRef<const Element> f2Elem = Element::createRef(rsc, RS_TYPE_FLOAT_32,
+                                                             RS_KIND_USER, false, 2);
+    ObjectBaseRef<const Element> f3Elem = Element::createRef(rsc, RS_TYPE_FLOAT_32,
+                                                             RS_KIND_USER, false, 3);
+    ObjectBaseRef<const Element> f4Elem = Element::createRef(rsc, RS_TYPE_FLOAT_32,
+                                                             RS_KIND_USER, false, 4);
 
-    rsc->mStateElement.elementBuilderBegin();
-    rsc->mStateElement.elementBuilderAdd(matrixElem, "MV", 1);
-    rsc->mStateElement.elementBuilderAdd(matrixElem, "P", 1);
-    rsc->mStateElement.elementBuilderAdd(matrixElem, "TexMatrix", 1);
-    rsc->mStateElement.elementBuilderAdd(matrixElem, "MVP", 1);
-    const Element *constInput = rsc->mStateElement.elementBuilderCreate(rsc);
+    Element::Builder constBuilder;
+    constBuilder.add(matrixElem.get(), "MV", 1);
+    constBuilder.add(matrixElem.get(), "P", 1);
+    constBuilder.add(matrixElem.get(), "TexMatrix", 1);
+    constBuilder.add(matrixElem.get(), "MVP", 1);
+    ObjectBaseRef<const Element> constInput = constBuilder.create(rsc);
 
-    rsc->mStateElement.elementBuilderBegin();
-    rsc->mStateElement.elementBuilderAdd(f4Elem, "position", 1);
-    rsc->mStateElement.elementBuilderAdd(f4Elem, "color", 1);
-    rsc->mStateElement.elementBuilderAdd(f3Elem, "normal", 1);
-    rsc->mStateElement.elementBuilderAdd(f2Elem, "texture0", 1);
-    const Element *attrElem = rsc->mStateElement.elementBuilderCreate(rsc);
+    Element::Builder inputBuilder;
+    inputBuilder.add(f4Elem.get(), "position", 1);
+    inputBuilder.add(f4Elem.get(), "color", 1);
+    inputBuilder.add(f3Elem.get(), "normal", 1);
+    inputBuilder.add(f2Elem.get(), "texture0", 1);
+    ObjectBaseRef<const Element> attrElem = inputBuilder.create(rsc);
 
-    Type *inputType = Type::getType(rsc, constInput, 1, 0, 0, false, false);
+    ObjectBaseRef<Type> inputType = Type::getTypeRef(rsc, constInput.get(), 1, 0, 0, false, false);
 
     String8 shaderString(RS_SHADER_INTERNAL);
     shaderString.append("varying vec4 varColor;\n");
@@ -183,13 +187,13 @@
 
     uint32_t tmp[4];
     tmp[0] = RS_PROGRAM_PARAM_CONSTANT;
-    tmp[1] = (uint32_t)inputType;
+    tmp[1] = (uint32_t)inputType.get();
     tmp[2] = RS_PROGRAM_PARAM_INPUT;
-    tmp[3] = (uint32_t)attrElem;
+    tmp[3] = (uint32_t)attrElem.get();
 
     ProgramVertex *pv = new ProgramVertex(rsc, shaderString.string(),
                                           shaderString.length(), tmp, 4);
-    Allocation *alloc = Allocation::createAllocation(rsc, inputType,
+    Allocation *alloc = Allocation::createAllocation(rsc, inputType.get(),
                               RS_ALLOCATION_USAGE_SCRIPT | RS_ALLOCATION_USAGE_GRAPHICS_CONSTANTS);
     pv->bindAllocation(rsc, alloc, 0);
 
diff --git a/rsSampler.cpp b/rsSampler.cpp
index 2a05d16..5fc64a4 100644
--- a/rsSampler.cpp
+++ b/rsSampler.cpp
@@ -48,6 +48,15 @@
     mRSC->mHal.funcs.sampler.destroy(mRSC, this);
 }
 
+void Sampler::preDestroy() const {
+    for (uint32_t ct = 0; ct < mRSC->mStateSampler.mAllSamplers.size(); ct++) {
+        if (mRSC->mStateSampler.mAllSamplers[ct] == this) {
+            mRSC->mStateSampler.mAllSamplers.removeAt(ct);
+            break;
+        }
+    }
+}
+
 void Sampler::bindToContext(SamplerState *ss, uint32_t slot) {
     ss->mSamplers[slot].set(this);
     mBoundSlot = slot;
@@ -66,6 +75,39 @@
     return NULL;
 }
 
+ObjectBaseRef<Sampler> Sampler::getSampler(Context *rsc,
+                                           RsSamplerValue magFilter,
+                                           RsSamplerValue minFilter,
+                                           RsSamplerValue wrapS,
+                                           RsSamplerValue wrapT,
+                                           RsSamplerValue wrapR,
+                                           float aniso) {
+    ObjectBaseRef<Sampler> returnRef;
+    ObjectBase::asyncLock();
+    for (uint32_t ct = 0; ct < rsc->mStateSampler.mAllSamplers.size(); ct++) {
+        Sampler *existing = rsc->mStateSampler.mAllSamplers[ct];
+        if (existing->mHal.state.magFilter != magFilter) continue;
+        if (existing->mHal.state.minFilter != minFilter ) continue;
+        if (existing->mHal.state.wrapS != wrapS) continue;
+        if (existing->mHal.state.wrapT != wrapT) continue;
+        if (existing->mHal.state.wrapR != wrapR) continue;
+        if (existing->mHal.state.aniso != aniso) continue;
+        returnRef.set(existing);
+        ObjectBase::asyncUnlock();
+        return returnRef;
+    }
+    ObjectBase::asyncUnlock();
+
+    Sampler *s = new Sampler(rsc, magFilter, minFilter, wrapS, wrapT, wrapR, aniso);
+    returnRef.set(s);
+
+    ObjectBase::asyncLock();
+    rsc->mStateSampler.mAllSamplers.push(s);
+    ObjectBase::asyncUnlock();
+
+    return returnRef;
+}
+
 ////////////////////////////////
 
 namespace android {
@@ -78,9 +120,10 @@
                             RsSamplerValue wrapT,
                             RsSamplerValue wrapR,
                             float aniso) {
-    Sampler * s = new Sampler(rsc, magFilter, minFilter, wrapS, wrapT, wrapR, aniso);
+    ObjectBaseRef<Sampler> s = Sampler::getSampler(rsc, magFilter, minFilter,
+                                                   wrapS, wrapT, wrapR, aniso);
     s->incUserRef();
-    return s;
+    return s.get();
 }
 
 }}
diff --git a/rsSampler.h b/rsSampler.h
index 90b6082..e698132 100644
--- a/rsSampler.h
+++ b/rsSampler.h
@@ -30,16 +30,13 @@
 
 class Sampler : public ObjectBase {
 public:
-    Sampler(Context *,
-            RsSamplerValue magFilter,
-            RsSamplerValue minFilter,
-            RsSamplerValue wrapS,
-            RsSamplerValue wrapT,
-            RsSamplerValue wrapR,
-            float aniso = 1.0f);
-
-    virtual ~Sampler();
-
+    static ObjectBaseRef<Sampler> getSampler(Context *,
+                                             RsSamplerValue magFilter,
+                                             RsSamplerValue minFilter,
+                                             RsSamplerValue wrapS,
+                                             RsSamplerValue wrapT,
+                                             RsSamplerValue wrapR,
+                                             float aniso = 1.0f);
     void bindToContext(SamplerState *, uint32_t slot);
     void unbindFromContext(SamplerState *);
 
@@ -65,14 +62,33 @@
 protected:
     int32_t mBoundSlot;
 
+    virtual void preDestroy() const;
+    virtual ~Sampler();
+
 private:
     Sampler(Context *);
+    Sampler(Context *,
+            RsSamplerValue magFilter,
+            RsSamplerValue minFilter,
+            RsSamplerValue wrapS,
+            RsSamplerValue wrapT,
+            RsSamplerValue wrapR,
+            float aniso = 1.0f);
 };
 
 
 class SamplerState {
 public:
     ObjectBaseRef<Sampler> mSamplers[RS_MAX_SAMPLER_SLOT];
+    void init(Context *rsc) {
+    }
+    void deinit(Context *rsc) {
+        for (uint32_t i = 0; i < RS_MAX_SAMPLER_SLOT; i ++) {
+            mSamplers[i].clear();
+        }
+    }
+    // Cache of all existing raster programs.
+    Vector<Sampler *> mAllSamplers;
 };
 
 }
diff --git a/rsType.cpp b/rsType.cpp
index 10e3182..9a6a31b 100644
--- a/rsType.cpp
+++ b/rsType.cpp
@@ -25,7 +25,7 @@
     clear();
 }
 
-void Type::preDestroy() {
+void Type::preDestroy() const {
     for (uint32_t ct = 0; ct < mRSC->mStateType.mTypes.size(); ct++) {
         if (mRSC->mStateType.mTypes[ct] == this) {
             mRSC->mStateType.mTypes.removeAt(ct);
@@ -58,6 +58,7 @@
 }
 
 TypeState::~TypeState() {
+    rsAssert(!mTypes.size());
 }
 
 size_t Type::getOffsetForFace(uint32_t face) const {
@@ -183,7 +184,9 @@
     uint32_t z = stream->loadU32();
     uint8_t lod = stream->loadU8();
     uint8_t faces = stream->loadU8();
-    return Type::getType(rsc, elem, x, y, z, lod != 0, faces !=0 );
+    Type *type = Type::getType(rsc, elem, x, y, z, lod != 0, faces !=0 );
+    elem->decUserRef();
+    return type;
 }
 
 bool Type::getIsNp2() const {
@@ -203,24 +206,11 @@
     return false;
 }
 
-bool Type::isEqual(const Type *other) const {
-    if (other == NULL) {
-        return false;
-    }
-    if (other->getElement()->isEqual(getElement()) &&
-        other->getDimX() == mDimX &&
-        other->getDimY() == mDimY &&
-        other->getDimZ() == mDimZ &&
-        other->getDimLOD() == mDimLOD &&
-        other->getDimFaces() == mFaces) {
-        return true;
-    }
-    return false;
-}
+ObjectBaseRef<Type> Type::getTypeRef(Context *rsc, const Element *e,
+                                     uint32_t dimX, uint32_t dimY, uint32_t dimZ,
+                                     bool dimLOD, bool dimFaces) {
+    ObjectBaseRef<Type> returnRef;
 
-Type * Type::getType(Context *rsc, const Element *e,
-                     uint32_t dimX, uint32_t dimY, uint32_t dimZ,
-                     bool dimLOD, bool dimFaces) {
     TypeState * stc = &rsc->mStateType;
 
     ObjectBase::asyncLock();
@@ -232,14 +222,15 @@
         if (t->getDimZ() != dimZ) continue;
         if (t->getDimLOD() != dimLOD) continue;
         if (t->getDimFaces() != dimFaces) continue;
-        t->incUserRef();
+        returnRef.set(t);
         ObjectBase::asyncUnlock();
-        return t;
+        return returnRef;
     }
     ObjectBase::asyncUnlock();
 
 
     Type *nt = new Type(rsc);
+    returnRef.set(nt);
     nt->mElement.set(e);
     nt->mDimX = dimX;
     nt->mDimY = dimY;
@@ -247,25 +238,24 @@
     nt->mDimLOD = dimLOD;
     nt->mFaces = dimFaces;
     nt->compute();
-    nt->incUserRef();
 
     ObjectBase::asyncLock();
     stc->mTypes.push(nt);
     ObjectBase::asyncUnlock();
 
-    return nt;
+    return returnRef;
 }
 
-Type * Type::cloneAndResize1D(Context *rsc, uint32_t dimX) const {
-    return getType(rsc, mElement.get(), dimX,
-                   mDimY, mDimZ, mDimLOD, mFaces);
+ObjectBaseRef<Type> Type::cloneAndResize1D(Context *rsc, uint32_t dimX) const {
+    return getTypeRef(rsc, mElement.get(), dimX,
+                      mDimY, mDimZ, mDimLOD, mFaces);
 }
 
-Type * Type::cloneAndResize2D(Context *rsc,
+ObjectBaseRef<Type> Type::cloneAndResize2D(Context *rsc,
                               uint32_t dimX,
                               uint32_t dimY) const {
-    return getType(rsc, mElement.get(), dimX, dimY,
-                   mDimZ, mDimLOD, mFaces);
+    return getTypeRef(rsc, mElement.get(), dimX, dimY,
+                      mDimZ, mDimLOD, mFaces);
 }
 
 
diff --git a/rsType.h b/rsType.h
index 086db33..bc0d9ff 100644
--- a/rsType.h
+++ b/rsType.h
@@ -62,14 +62,20 @@
     virtual RsA3DClassID getClassId() const { return RS_A3D_CLASS_ID_TYPE; }
     static Type *createFromStream(Context *rsc, IStream *stream);
 
-    bool isEqual(const Type *other) const;
+    ObjectBaseRef<Type> cloneAndResize1D(Context *rsc, uint32_t dimX) const;
+    ObjectBaseRef<Type> cloneAndResize2D(Context *rsc, uint32_t dimX, uint32_t dimY) const;
 
-    Type * cloneAndResize1D(Context *rsc, uint32_t dimX) const;
-    Type * cloneAndResize2D(Context *rsc, uint32_t dimX, uint32_t dimY) const;
+    static ObjectBaseRef<Type> getTypeRef(Context *rsc, const Element *e,
+                                          uint32_t dimX, uint32_t dimY, uint32_t dimZ,
+                                          bool dimLOD, bool dimFaces);
 
-    static Type * getType(Context *rsc, const Element *e,
-                      uint32_t dimX, uint32_t dimY, uint32_t dimZ,
-                      bool dimLOD, bool dimFaces);
+    static Type* getType(Context *rsc, const Element *e,
+                         uint32_t dimX, uint32_t dimY, uint32_t dimZ,
+                         bool dimLOD, bool dimFaces) {
+        ObjectBaseRef<Type> type = getTypeRef(rsc, e, dimX, dimY, dimZ, dimLOD, dimFaces);
+        type->incUserRef();
+        return type.get();
+    }
 
 protected:
     struct LOD {
@@ -105,7 +111,7 @@
     uint32_t mLODCount;
 
 protected:
-    virtual void preDestroy();
+    virtual void preDestroy() const;
     virtual ~Type();
 
 private: