diff --git a/src/core/SkRecordDraw.cpp b/src/core/SkRecordDraw.cpp
index e1975e1..8bd12d0 100644
--- a/src/core/SkRecordDraw.cpp
+++ b/src/core/SkRecordDraw.cpp
@@ -48,11 +48,12 @@
 void SkRecordPartialDraw(const SkRecord& record,
                          SkCanvas* canvas,
                          const SkRect& clearRect,
-                         unsigned start, unsigned stop) {
+                         unsigned start, unsigned stop,
+                         const SkMatrix& initialCTM) {
     SkAutoCanvasRestore saveRestore(canvas, true /*save now, restore at exit*/);
 
     stop = SkTMin(stop, record.count());
-    SkRecords::PartialDraw draw(canvas, clearRect);
+    SkRecords::PartialDraw draw(canvas, clearRect, initialCTM);
     for (unsigned i = start; i < stop; i++) {
         record.visit<void>(i, draw);
     }
diff --git a/src/core/SkRecordDraw.h b/src/core/SkRecordDraw.h
index 033b76d..75921d1 100644
--- a/src/core/SkRecordDraw.h
+++ b/src/core/SkRecordDraw.h
@@ -11,6 +11,7 @@
 #include "SkBBoxHierarchy.h"
 #include "SkCanvas.h"
 #include "SkDrawPictureCallback.h"
+#include "SkMatrix.h"
 #include "SkRecord.h"
 
 // Fill a BBH to be used by SkRecordDraw to accelerate playback.
@@ -20,15 +21,21 @@
 void SkRecordDraw(const SkRecord&, SkCanvas*, const SkBBoxHierarchy*, SkDrawPictureCallback*);
 
 // Draw a portion of an SkRecord into an SkCanvas while replacing clears with drawRects.
-void SkRecordPartialDraw(const SkRecord&, SkCanvas*, const SkRect&, unsigned start, unsigned stop);
+// When drawing a portion of an SkRecord the CTM on the passed in canvas must be
+// the composition of the replay matrix with the record-time CTM (for the portion
+// of the record that is being replayed). For setMatrix calls to behave correctly
+// the initialCTM parameter must set to just the replay matrix.
+void SkRecordPartialDraw(const SkRecord&, SkCanvas*, const SkRect&, unsigned start, unsigned stop,
+                         const SkMatrix& initialCTM);
 
 namespace SkRecords {
 
 // This is an SkRecord visitor that will draw that SkRecord to an SkCanvas.
 class Draw : SkNoncopyable {
 public:
-    explicit Draw(SkCanvas* canvas)
-        : fInitialCTM(canvas->getTotalMatrix()), fCanvas(canvas) {}
+    explicit Draw(SkCanvas* canvas, const SkMatrix* initialCTM = NULL)
+        : fInitialCTM(initialCTM ? *initialCTM : canvas->getTotalMatrix())
+        , fCanvas(canvas) {}
 
     template <typename T> void operator()(const T& r) {
         this->draw(r);
@@ -45,8 +52,8 @@
 // Used by SkRecordPartialDraw.
 class PartialDraw : public Draw {
 public:
-    PartialDraw(SkCanvas* canvas, const SkRect& clearRect)
-        : INHERITED(canvas), fClearRect(clearRect) {}
+    PartialDraw(SkCanvas* canvas, const SkRect& clearRect, const SkMatrix& initialCTM)
+        : INHERITED(canvas, &initialCTM), fClearRect(clearRect) {}
 
     // Same as Draw for all ops except Clear.
     template <typename T> void operator()(const T& r) {
diff --git a/src/gpu/GrLayerCache.cpp b/src/gpu/GrLayerCache.cpp
index 0c2e6b0..8e61cb7 100644
--- a/src/gpu/GrLayerCache.cpp
+++ b/src/gpu/GrLayerCache.cpp
@@ -119,28 +119,31 @@
 
 GrCachedLayer* GrLayerCache::createLayer(uint32_t pictureID, 
                                          int start, int stop, 
+                                         const SkIPoint& offset,
                                          const SkMatrix& ctm) {
     SkASSERT(pictureID != SK_InvalidGenID && start > 0 && stop > 0);
 
-    GrCachedLayer* layer = SkNEW_ARGS(GrCachedLayer, (pictureID, start, stop, ctm));
+    GrCachedLayer* layer = SkNEW_ARGS(GrCachedLayer, (pictureID, start, stop, offset, ctm));
     fLayerHash.add(layer);
     return layer;
 }
 
 GrCachedLayer* GrLayerCache::findLayer(uint32_t pictureID,
                                        int start, int stop, 
+                                       const SkIPoint& offset,
                                        const SkMatrix& ctm) {
     SkASSERT(pictureID != SK_InvalidGenID && start > 0 && stop > 0);
-    return fLayerHash.find(GrCachedLayer::Key(pictureID, start, stop, ctm));
+    return fLayerHash.find(GrCachedLayer::Key(pictureID, start, stop, offset, ctm));
 }
 
 GrCachedLayer* GrLayerCache::findLayerOrCreate(uint32_t pictureID,
                                                int start, int stop,
+                                               const SkIPoint& offset,
                                                const SkMatrix& ctm) {
     SkASSERT(pictureID != SK_InvalidGenID && start > 0 && stop > 0);
-    GrCachedLayer* layer = fLayerHash.find(GrCachedLayer::Key(pictureID, start, stop, ctm));
+    GrCachedLayer* layer = fLayerHash.find(GrCachedLayer::Key(pictureID, start, stop, offset, ctm));
     if (NULL == layer) {
-        layer = this->createLayer(pictureID, start, stop, ctm);
+        layer = this->createLayer(pictureID, start, stop, offset, ctm);
     }
 
     return layer;
diff --git a/src/gpu/GrLayerCache.h b/src/gpu/GrLayerCache.h
index fc1467e..15fa246 100644
--- a/src/gpu/GrLayerCache.h
+++ b/src/gpu/GrLayerCache.h
@@ -51,15 +51,17 @@
 public:
     // For SkTDynamicHash
     struct Key {
-        Key(uint32_t pictureID, int start, int stop, const SkMatrix& ctm) 
+        Key(uint32_t pictureID, int start, int stop, const SkIPoint& offset, const SkMatrix& ctm) 
         : fPictureID(pictureID)
         , fStart(start)
         , fStop(stop)
+        , fOffset(offset)
         , fCTM(ctm) {
             fCTM.getType(); // force initialization of type so hashes match
 
             // Key needs to be tightly packed.
             GR_STATIC_ASSERT(sizeof(Key) == sizeof(uint32_t) + 2 * sizeof(int) + 
+                                            2 * sizeof(int32_t) +
                                             9 * sizeof(SkScalar) + sizeof(uint32_t));
         }
 
@@ -67,12 +69,14 @@
             return fPictureID == other.fPictureID &&
                    fStart == other.fStart &&
                    fStop == other.fStop &&
+                   fOffset == other.fOffset &&
                    fCTM.cheapEqualTo(other.fCTM);
         }
 
         uint32_t pictureID() const { return fPictureID; }
         int start() const { return fStart; }
         int stop() const { return fStop; }
+        const SkIPoint& offset() const { return fOffset; }
         const SkMatrix& ctm() const { return fCTM; }
 
     private:
@@ -81,6 +85,8 @@
         // The range of commands in the picture this layer represents
         const int      fStart;
         const int      fStop;
+        // The offset of the layer in device space
+        const SkIPoint fOffset;
         // The CTM applied to this layer in the picture
         SkMatrix       fCTM;
     };
@@ -91,8 +97,9 @@
     }
 
     // GrCachedLayer proper
-    GrCachedLayer(uint32_t pictureID, int start, int stop, const SkMatrix& ctm) 
-        : fKey(pictureID, start, stop, ctm)
+    GrCachedLayer(uint32_t pictureID, int start, int stop,
+                  const SkIPoint& offset, const SkMatrix& ctm) 
+        : fKey(pictureID, start, stop, offset, ctm)
         , fTexture(NULL)
         , fRect(GrIRect16::MakeEmpty())
         , fPlot(NULL)
@@ -107,6 +114,7 @@
     uint32_t pictureID() const { return fKey.pictureID(); }
     int start() const { return fKey.start(); }
     int stop() const { return fKey.stop(); }
+    const SkIPoint& offset() const { return fKey.offset(); }
     const SkMatrix& ctm() const { return fKey.ctm(); }
 
     void setTexture(GrTexture* texture, const GrIRect16& rect) {
@@ -171,9 +179,11 @@
     // elements by the GrContext
     void freeAll();
 
-    GrCachedLayer* findLayer(uint32_t pictureID, int start, int stop, const SkMatrix& ctm);
+    GrCachedLayer* findLayer(uint32_t pictureID, int start, int stop, 
+                             const SkIPoint& offset, const SkMatrix& ctm);
     GrCachedLayer* findLayerOrCreate(uint32_t pictureID,
                                      int start, int stop, 
+                                     const SkIPoint& offset,
                                      const SkMatrix& ctm);
 
     // Inform the cache that layer's cached image is now required. 
@@ -227,9 +237,9 @@
     int fPlotLocks[kNumPlotsX * kNumPlotsY];
 
     void initAtlas();
-    GrCachedLayer* createLayer(uint32_t pictureID, int start, int stop, const SkMatrix& ctm);
+    GrCachedLayer* createLayer(uint32_t pictureID, int start, int stop, 
+                               const SkIPoint& offset, const SkMatrix& ctm);
 
-public:
     void purgeAll();
 
     // Remove all the layers (and unlock any resources) associated with 'pictureID'
diff --git a/src/gpu/GrLayerHoister.cpp b/src/gpu/GrLayerHoister.cpp
index 49ca338..a29d91a 100644
--- a/src/gpu/GrLayerHoister.cpp
+++ b/src/gpu/GrLayerHoister.cpp
@@ -91,11 +91,18 @@
             // info.fCTM maps the layer's top/left to the origin.
             // Since this layer is atlased, the top/left corner needs
             // to be offset to the correct location in the backing texture.
+            SkMatrix initialCTM;
+            initialCTM.setTranslate(SkIntToScalar(-layer->offset().fX), 
+                                    SkIntToScalar(-layer->offset().fY));
+            initialCTM.postTranslate(bound.fLeft, bound.fTop);
+            
+            atlasCanvas->translate(SkIntToScalar(-layer->offset().fX), 
+                                   SkIntToScalar(-layer->offset().fY));
             atlasCanvas->translate(bound.fLeft, bound.fTop);
             atlasCanvas->concat(layer->ctm());
 
             SkRecordPartialDraw(*picture->fRecord.get(), atlasCanvas, bound,
-                                layer->start()+1, layer->stop());
+                                layer->start()+1, layer->stop(), initialCTM);
 
             atlasCanvas->restore();
         }
@@ -126,10 +133,16 @@
 
         layerCanvas->clear(SK_ColorTRANSPARENT);
 
+        SkMatrix initialCTM;
+        initialCTM.setTranslate(SkIntToScalar(-layer->offset().fX), 
+                                SkIntToScalar(-layer->offset().fY));
+
+        layerCanvas->translate(SkIntToScalar(-layer->offset().fX), 
+                               SkIntToScalar(-layer->offset().fY));
         layerCanvas->concat(layer->ctm());
 
         SkRecordPartialDraw(*picture->fRecord.get(), layerCanvas, bound,
-                            layer->start()+1, layer->stop());
+                            layer->start()+1, layer->stop(), initialCTM);
 
         layerCanvas->flush();
     }
@@ -151,6 +164,7 @@
         GrCachedLayer* layer = layerCache->findLayer(picture->uniqueID(),
                                                      info.fSaveLayerOpID,
                                                      info.fRestoreOpID,
+                                                     info.fOffset,
                                                      info.fOriginXform);
         layerCache->unlock(layer);
     }
diff --git a/src/gpu/GrPictureUtils.cpp b/src/gpu/GrPictureUtils.cpp
index 8810eb8..0cc1f1e 100644
--- a/src/gpu/GrPictureUtils.cpp
+++ b/src/gpu/GrPictureUtils.cpp
@@ -126,8 +126,6 @@
             dst.fOffset = SkIPoint::Make(newClip.fLeft, newClip.fTop);
             dst.fOriginXform = *fCTM;
             dst.fOriginXform.postConcat(src.fOriginXform);
-            dst.fOriginXform.postTranslate(SkIntToScalar(-newClip.fLeft), 
-                                           SkIntToScalar(-newClip.fTop));
 
             if (NULL == src.fPaint) {
                 dst.fPaint = NULL;
@@ -191,8 +189,6 @@
         slInfo.fSize = SkISize::Make(si.fBounds.width(), si.fBounds.height());
         slInfo.fOffset = SkIPoint::Make(si.fBounds.fLeft, si.fBounds.fTop);
         slInfo.fOriginXform = *fCTM;
-        slInfo.fOriginXform.postTranslate(SkIntToScalar(-si.fBounds.fLeft),
-                                          SkIntToScalar(-si.fBounds.fTop));
 
         if (NULL == si.fPaint) {
             slInfo.fPaint = NULL;
diff --git a/src/gpu/GrPictureUtils.h b/src/gpu/GrPictureUtils.h
index 1c6897c..55b294a 100644
--- a/src/gpu/GrPictureUtils.h
+++ b/src/gpu/GrPictureUtils.h
@@ -26,8 +26,8 @@
         uint32_t fPictureID;
         // The size of the saveLayer
         SkISize fSize;
-        // The matrix state in which this layer's draws must occur. It already incorporates
-        // the translation needed to map the layer's top-left point to the origin.
+        // The matrix state in which this layer's draws must occur. It does not
+        // include the translation needed to map the layer's top-left point to the origin.
         SkMatrix fOriginXform;
         // The offset that needs to be passed to drawBitmap to correctly
         // position the pre-rendered layer. It is in device space.
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 122af65..b719f84 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -1887,7 +1887,8 @@
 
             GrCachedLayer* layer = fContext->getLayerCache()->findLayerOrCreate(picture->uniqueID(), 
                                                                                 info.fSaveLayerOpID, 
-                                                                                info.fRestoreOpID, 
+                                                                                info.fRestoreOpID,
+                                                                                info.fOffset,
                                                                                 info.fOriginXform);
 
             GrReplacements::ReplacementInfo* layerInfo = replacements.push();
diff --git a/tests/GpuLayerCacheTest.cpp b/tests/GpuLayerCacheTest.cpp
index 83f31f5..66117e6 100644
--- a/tests/GpuLayerCacheTest.cpp
+++ b/tests/GpuLayerCacheTest.cpp
@@ -33,9 +33,11 @@
     for (int i = 0; i < numToAdd; ++i) {
         GrCachedLayer* layer = cache->findLayerOrCreate(picture.uniqueID(), 
                                                         idOffset+i+1, idOffset+i+2, 
+                                                        SkIPoint::Make(0, 0),
                                                         SkMatrix::I());
         REPORTER_ASSERT(reporter, layer);
-        GrCachedLayer* temp = cache->findLayer(picture.uniqueID(), idOffset+i+1, idOffset+i+2, SkMatrix::I());
+        GrCachedLayer* temp = cache->findLayer(picture.uniqueID(), idOffset+i+1, idOffset+i+2, 
+                                               SkIPoint::Make(0, 0), SkMatrix::I());
         REPORTER_ASSERT(reporter, temp == layer);
 
         REPORTER_ASSERT(reporter, TestingAccess::NumLayers(cache) == idOffset + i + 1);
@@ -99,7 +101,8 @@
         create_layers(reporter, &cache, *picture, kInitialNumLayers, 0);
 
         for (int i = 0; i < kInitialNumLayers; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, SkMatrix::I());
+            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, 
+                                                   SkIPoint::Make(0, 0), SkMatrix::I());
             REPORTER_ASSERT(reporter, layer);
 
             lock_layer(reporter, &cache, layer);
@@ -116,14 +119,15 @@
 
         // Unlock the textures
         for (int i = 0; i < kInitialNumLayers; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, SkMatrix::I());
+            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, 
+                                                   SkIPoint::Make(0, 0), SkMatrix::I());
             REPORTER_ASSERT(reporter, layer);
-
             cache.unlock(layer);
         }
 
         for (int i = 0; i < kInitialNumLayers; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, SkMatrix::I());
+            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, 
+                                                   SkIPoint::Make(0, 0), SkMatrix::I());
             REPORTER_ASSERT(reporter, layer);
 
             REPORTER_ASSERT(reporter, !layer->locked());
@@ -144,7 +148,7 @@
             create_layers(reporter, &cache, *picture, 1, kInitialNumLayers);
             GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), 
                                                    kInitialNumLayers+1, kInitialNumLayers+2, 
-                                                   SkMatrix::I());
+                                                   SkIPoint::Make(0, 0), SkMatrix::I());
             REPORTER_ASSERT(reporter, layer);
 
             lock_layer(reporter, &cache, layer);
@@ -152,7 +156,8 @@
         }
 
         for (int i = 0; i < kInitialNumLayers+1; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, SkMatrix::I());
+            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, i+2, 
+                                                   SkIPoint::Make(0, 0), SkMatrix::I());
             // 3 old layers plus the new one should be in the atlas.
             if (1 == i || 2 == i || 3 == i || 5 == i) {
                 REPORTER_ASSERT(reporter, layer);
diff --git a/tests/PictureTest.cpp b/tests/PictureTest.cpp
index 3bc91d7..ca2d620 100644
--- a/tests/PictureTest.cpp
+++ b/tests/PictureTest.cpp
@@ -1014,7 +1014,7 @@
             REPORTER_ASSERT(reporter, pict->uniqueID() == info2.fPictureID);
             REPORTER_ASSERT(reporter, kWidth / 2 == info2.fSize.fWidth &&
                                       kHeight/2 == info2.fSize.fHeight); // bound reduces size
-            REPORTER_ASSERT(reporter, info2.fOriginXform.isIdentity());
+            REPORTER_ASSERT(reporter, !info2.fOriginXform.isIdentity());
             REPORTER_ASSERT(reporter, kWidth/2 == info2.fOffset.fX &&   // translated
                                       kHeight/2 == info2.fOffset.fY);
             REPORTER_ASSERT(reporter, NULL == info1.fPaint);
diff --git a/tests/RecordDrawTest.cpp b/tests/RecordDrawTest.cpp
index 9e42e0a..70f0250 100644
--- a/tests/RecordDrawTest.cpp
+++ b/tests/RecordDrawTest.cpp
@@ -190,7 +190,7 @@
 
     SkRecord rerecord;
     SkRecorder canvas(&rerecord, kWidth, kHeight);
-    SkRecordPartialDraw(record, &canvas, r1, 1, 2); // replay just drawRect of r2
+    SkRecordPartialDraw(record, &canvas, r1, 1, 2, SkMatrix::I()); // replay just drawRect of r2
 
     REPORTER_ASSERT(r, 3 == rerecord.count());
     assert_type<SkRecords::Save>     (r, rerecord, 0);
@@ -213,7 +213,7 @@
 
     SkRecord rerecord;
     SkRecorder canvas(&rerecord, kWidth, kHeight);
-    SkRecordPartialDraw(record, &canvas, rect, 0, 1); // replay just the clear
+    SkRecordPartialDraw(record, &canvas, rect, 0, 1, SkMatrix::I()); // replay just the clear
 
     REPORTER_ASSERT(r, 3 == rerecord.count());
     assert_type<SkRecords::Save>    (r, rerecord, 0);
