diff --git a/include/gpu/SkGpuDevice.h b/include/gpu/SkGpuDevice.h
index 409e3b3..ffc24a8 100644
--- a/include/gpu/SkGpuDevice.h
+++ b/include/gpu/SkGpuDevice.h
@@ -29,14 +29,11 @@
 public:
     /**
      *  New device that will create an offscreen renderTarget based on the
-     *  config, width, height.
-     *
-     *  usage is a special flag that should only be set by SkCanvas
-     *  internally.
+     *  config, width, height. The device's storage will not count against
+     *  the GrContext's texture cache budget. The device's pixels will be
+     *  uninitialized.
      */
-    SkGpuDevice(GrContext*, SkBitmap::Config,
-                int width, int height,
-                SkDevice::Usage usage = SkDevice::kGeneral_Usage);
+    SkGpuDevice(GrContext*, SkBitmap::Config, int width, int height);
 
     /**
      *  New device that will render to the specified renderTarget.
@@ -118,14 +115,8 @@
 
 protected:
     typedef GrContext::TextureCacheEntry TexCache;
-    enum TexType {
-        kBitmap_TexType,
-        kDeviceRenderTarget_TexType,
-        kSaveLayerDeviceRenderTarget_TexType
-    };
     TexCache lockCachedTexture(const SkBitmap& bitmap,
-                               const GrSamplerState* sampler,
-                               TexType type = kBitmap_TexType);
+                               const GrSamplerState* sampler);
     bool isBitmapInTextureCache(const SkBitmap& bitmap,
                                 const GrSamplerState& sampler) const;
     void unlockCachedTexture(TexCache);
@@ -152,6 +143,9 @@
     // called from rt and tex cons
     void initFromRenderTarget(GrContext*, GrRenderTarget*);
 
+    // used by createCompatibleDevice
+    SkGpuDevice(GrContext*, GrTexture* texture, TexCache, bool needClear);
+
     // override from SkDevice
     virtual SkDevice* onCreateCompatibleDevice(SkBitmap::Config config,
                                                int width, int height,
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index e13a0ea..9e4a7b6 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -22,7 +22,7 @@
 #include "SkTLazy.h"
 #include "SkUtils.h"
 
-#define CACHE_LAYER_TEXTURES 1
+#define CACHE_COMPATIBLE_DEVICE_TEXTURES 1
 
 #if 0
     extern bool (*gShouldDrawProc)();
@@ -30,9 +30,11 @@
         do {                                                    \
             if (gShouldDrawProc && !gShouldDrawProc()) return;  \
             this->prepareRenderTarget(draw);                    \
+            GrAssert(!fNeedClear)                               \
         } while (0)
 #else
-    #define CHECK_SHOULD_DRAW(draw) this->prepareRenderTarget(draw)
+    #define CHECK_SHOULD_DRAW(draw) this->prepareRenderTarget(draw); \
+                                    GrAssert(!fNeedClear)
 #endif
 
 // we use the same texture slot on GrPaint for bitmaps and shaders
@@ -59,6 +61,15 @@
 // requiring texture domain clamping to prevent color bleeding when drawing 
 // a sub region of a larger source image.
 #define COLOR_BLEED_TOLERANCE SkFloatToScalar(0.001f)
+
+#define DO_DEFERRED_CLEAR       \
+    do {                        \
+        if (fNeedClear) {       \
+            this->clear(NULL);  \
+            fNeedClear = false; \
+        }                       \
+    } while (false)             \
+
 ///////////////////////////////////////////////////////////////////////////////
 
 class SkGpuDevice::SkAutoCachedTexture : public ::SkNoncopyable {
@@ -193,9 +204,11 @@
     fTextContext = NULL;
 }
 
-SkGpuDevice::SkGpuDevice(GrContext* context, SkBitmap::Config config, int width,
-                         int height, Usage usage)
-: SkDevice(config, width, height, false /*isOpaque*/) {
+SkGpuDevice::SkGpuDevice(GrContext* context,
+                         SkBitmap::Config config,
+                         int width,
+                         int height)
+    : SkDevice(config, width, height, false /*isOpaque*/) {
     fNeedPrepareRenderTarget = false;
     fDrawProcs = NULL;
 
@@ -212,19 +225,6 @@
     SkBitmap bm;
     bm.setConfig(config, width, height);
 
-#if CACHE_LAYER_TEXTURES
-    TexType type = (kSaveLayer_Usage == usage) ?
-                            kSaveLayerDeviceRenderTarget_TexType :
-                            kDeviceRenderTarget_TexType;
-    fCache = this->lockCachedTexture(bm, NULL, type);
-    fTexture = fCache.texture();
-    if (fTexture) {
-        SkASSERT(NULL != fTexture->asRenderTarget());
-        // hold a ref directly on fTexture (even though fCache has one) to match
-        // other constructor paths. Simplifies cleanup.
-        fTexture->ref();
-    }
-#else
     const GrTextureDesc desc = {
         kRenderTarget_GrTextureFlagBit,
         width,
@@ -234,16 +234,13 @@
     };
 
     fTexture = fContext->createUncachedTexture(desc, NULL, 0);
-#endif
+
     if (NULL != fTexture) {
         fRenderTarget = fTexture->asRenderTarget();
         fRenderTarget->ref();
 
         GrAssert(NULL != fRenderTarget);
 
-        // we defer the actual clear until our gainFocus()
-        fNeedClear = true;
-
         // wrap the bitmap with a pixelref to expose our texture
         SkGrTexturePixelRef* pr = new SkGrTexturePixelRef(fTexture);
         this->setPixelRef(pr, 0)->unref();
@@ -278,6 +275,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkGpuDevice::makeRenderTargetCurrent() {
+    DO_DEFERRED_CLEAR;
     fContext->setRenderTarget(fRenderTarget);
     fContext->flush(true);
     fNeedPrepareRenderTarget = true;
@@ -310,6 +308,7 @@
 bool SkGpuDevice::onReadPixels(const SkBitmap& bitmap,
                                int x, int y,
                                SkCanvas::Config8888 config8888) {
+    DO_DEFERRED_CLEAR;
     SkASSERT(SkBitmap::kARGB_8888_Config == bitmap.config());
     SkASSERT(!bitmap.isNull());
     SkASSERT(SkIRect::MakeWH(this->width(), this->height()).contains(SkIRect::MakeXYWH(x, y, bitmap.width(), bitmap.height())));
@@ -396,13 +395,11 @@
 
     convert_matrixclip(fContext, matrix, clipStack, clip, this->getOrigin());
 
-    if (fNeedClear) {
-        fContext->clear(NULL, 0x0);
-        fNeedClear = false;
-    }
+    DO_DEFERRED_CLEAR;
 }
 
 SkGpuRenderTarget* SkGpuDevice::accessRenderTarget() {
+    DO_DEFERRED_CLEAR;
     return (SkGpuRenderTarget*)fRenderTarget;
 }
 
@@ -596,6 +593,7 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 void SkGpuDevice::clear(SkColor color) {
+    fContext->setRenderTarget(fRenderTarget);
     fContext->clear(NULL, color);
 }
 
@@ -1509,12 +1507,19 @@
                                         GR_Scalar1 * h / texture->height()));
 }
 
-void SkGpuDevice::drawDevice(const SkDraw& draw, SkDevice* dev,
+void SkGpuDevice::drawDevice(const SkDraw& draw, SkDevice* device,
                             int x, int y, const SkPaint& paint) {
+    // clear of the source device must occur before CHECK_SHOULD_DRAW
+    SkGpuDevice* dev = static_cast<SkGpuDevice*>(device);
+    if (dev->fNeedClear) {
+        // TODO: could check here whether we really need to draw at all
+        dev->clear(0x0);
+    }
+
     CHECK_SHOULD_DRAW(draw);
 
     GrPaint grPaint;
-    if (!((SkGpuDevice*)dev)->bindDeviceAsTexture(&grPaint) ||
+    if (!dev->bindDeviceAsTexture(&grPaint) ||
         !skPaint2GrPaintNoShader(paint, true, false, &grPaint)) {
         return;
     }
@@ -1809,55 +1814,35 @@
 }
 
 void SkGpuDevice::flush() {
+    DO_DEFERRED_CLEAR;
     fContext->resolveRenderTarget(fRenderTarget);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
 
-SkGpuDevice::TexCache SkGpuDevice::lockCachedTexture(const SkBitmap& bitmap,
-                                            const GrSamplerState* sampler,
-                                            TexType type) {
+SkGpuDevice::TexCache SkGpuDevice::lockCachedTexture(
+                                            const SkBitmap& bitmap,
+                                            const GrSamplerState* sampler) {
     GrContext::TextureCacheEntry entry;
     GrContext* ctx = this->context();
 
-    if (kBitmap_TexType != type) {
-        const GrTextureDesc desc = {
-            kRenderTarget_GrTextureFlagBit,
-            bitmap.width(),
-            bitmap.height(),
-            SkGr::Bitmap2PixelConfig(bitmap),
-            0 // samples
-        };
-        GrContext::ScratchTexMatch match;
-        if (kSaveLayerDeviceRenderTarget_TexType == type) {
-            // we know layers will only be drawn through drawDevice.
-            // drawDevice has been made to work with content embedded in a
-            // larger texture so its okay to use the approximate version.
-            match = GrContext::kApprox_ScratchTexMatch;
-        } else {
-            SkASSERT(kDeviceRenderTarget_TexType == type);
-            match = GrContext::kExact_ScratchTexMatch;
-        }
-        entry = ctx->lockScratchTexture(desc, match);
-    } else {
-        if (!bitmap.isVolatile()) {
-            GrContext::TextureKey key = bitmap.getGenerationID();
-            key |= ((uint64_t) bitmap.pixelRefOffset()) << 32;
+    if (!bitmap.isVolatile()) {
+        GrContext::TextureKey key = bitmap.getGenerationID();
+        key |= ((uint64_t) bitmap.pixelRefOffset()) << 32;
 
-            entry = ctx->findAndLockTexture(key, bitmap.width(),
-                                            bitmap.height(), sampler);
-            if (NULL == entry.texture()) {
-                entry = sk_gr_create_bitmap_texture(ctx, key, sampler,
-                                                    bitmap);
-            }
-        } else {
-            entry = sk_gr_create_bitmap_texture(ctx, gUNCACHED_KEY,
-                                                sampler, bitmap);
-        }
+        entry = ctx->findAndLockTexture(key, bitmap.width(),
+                                        bitmap.height(), sampler);
         if (NULL == entry.texture()) {
-            GrPrintf("---- failed to create texture for cache [%d %d]\n",
-                     bitmap.width(), bitmap.height());
+            entry = sk_gr_create_bitmap_texture(ctx, key, sampler,
+                                                bitmap);
         }
+    } else {
+        entry = sk_gr_create_bitmap_texture(ctx, gUNCACHED_KEY,
+                                            sampler, bitmap);
+    }
+    if (NULL == entry.texture()) {
+        GrPrintf("---- failed to create texture for cache [%d %d]\n",
+                    bitmap.width(), bitmap.height());
     }
     return entry;
 }
@@ -1880,8 +1865,53 @@
                                                 int width, int height,
                                                 bool isOpaque,
                                                 Usage usage) {
-    return SkNEW_ARGS(SkGpuDevice,(this->context(), config,
-                                   width, height, usage));
+    GrTextureDesc desc;
+    desc.fConfig = fRenderTarget->config();
+    desc.fFlags = kRenderTarget_GrTextureFlagBit;
+    desc.fWidth = width;
+    desc.fHeight = height;
+    desc.fSampleCnt = fRenderTarget->numSamples();
+
+    GrContext::TextureCacheEntry cacheEntry;
+    GrTexture* texture;
+    SkAutoTUnref<GrTexture> tunref;
+    // Skia's convention is to only clear a device if it is a non-opaque layer.
+    bool needClear = !isOpaque && kSaveLayer_Usage == usage;
+
+#if CACHE_COMPATIBLE_DEVICE_TEXTURES
+    // layers are never draw in repeat modes, so we can request an approx
+    // match and ignore any padding.
+    GrContext::ScratchTexMatch matchType = (kSaveLayer_Usage == usage) ?
+                                    GrContext::kApprox_ScratchTexMatch :
+                                    GrContext::kExact_ScratchTexMatch;
+    cacheEntry = fContext->lockScratchTexture(desc, matchType);
+    texture = cacheEntry.texture();
+#else
+    tunref.reset(fContext->createUncachedTexture(desc, NULL, 0));
+    texture = tunref.get();
+#endif
+    if (texture) {
+        return SkNEW_ARGS(SkGpuDevice,(fContext,
+                                       texture,
+                                       cacheEntry,
+                                       needClear));
+    } else {
+        GrPrintf("---- failed to create compatible device texture [%d %d]\n",
+                    width, height);
+        return NULL;
+    }
+}
+
+SkGpuDevice::SkGpuDevice(GrContext* context,
+                         GrTexture* texture,
+                         TexCache cacheEntry,
+                         bool needClear)
+    : SkDevice(make_bitmap(context, texture->asRenderTarget())) {
+    GrAssert(texture && texture->asRenderTarget());
+    GrAssert(NULL == cacheEntry.texture() || texture == cacheEntry.texture());
+    this->initFromRenderTarget(context, texture->asRenderTarget());
+    fCache = cacheEntry;
+    fNeedClear = needClear;
 }
 
 GrTextContext* SkGpuDevice::getTextContext() {
