Use variable length key (rather than accumulated matrix) as save layer hoisting key

Adding the rendering canvas' CTM to the layer hoisting key (i.e., Add support for hoisting layers in pictures drawn with a matrix - https://codereview.chromium.org/748853002/) has increased the cache miss rate due to accumulated floating point error. This CL fixes part of the issue by using the chain of operation indices leading to each saveLayer as the key. The canvas' CTM must still form part of the key but should be less subject to accumulated error.

BUG=skia:2315

Review URL: https://codereview.chromium.org/753253002
diff --git a/expectations/gm/ignored-tests.txt b/expectations/gm/ignored-tests.txt
index 4b7e750..66ddaa6 100644
--- a/expectations/gm/ignored-tests.txt
+++ b/expectations/gm/ignored-tests.txt
@@ -57,5 +57,7 @@
 #mtklein
 verylargebitmap
 
-
+#robertphillips
+multipicturedraw_sierpinski_simple
+multipicturedraw_sierpinski_tiled
 
diff --git a/src/core/SkLayerInfo.h b/src/core/SkLayerInfo.h
index dd0eaf0..6ed77c9 100644
--- a/src/core/SkLayerInfo.h
+++ b/src/core/SkLayerInfo.h
@@ -18,8 +18,8 @@
     // Information about a given saveLayer/restore block in an SkPicture
     class BlockInfo {
     public:
-        BlockInfo() : fPicture(NULL), fPaint(NULL) {}
-        ~BlockInfo() { SkSafeUnref(fPicture); SkDELETE(fPaint); }
+        BlockInfo() : fPicture(NULL), fPaint(NULL), fKey(NULL), fKeySize(0) {}
+        ~BlockInfo() { SkSafeUnref(fPicture); SkDELETE(fPaint); SkDELETE_ARRAY(fKey); }
 
         // The picture owning the layer. If the owning picture is the top-most
         // one (i.e., the picture for which this SkLayerInfo was created) then
@@ -50,6 +50,11 @@
         bool    fHasNestedLayers;
         // True if this saveLayer is nested within another. False otherwise.
         bool    fIsNested;
+        // The variable length key for this saveLayer block. It stores the
+        // thread of drawPicture and saveLayer operation indices that lead to this
+        // saveLayer (including its own op index). The BlockInfo owns this memory.
+        int*    fKey;
+        int     fKeySize;  // # of ints
     };
 
     SkLayerInfo(Key key) : INHERITED(key) { }
diff --git a/src/core/SkMultiPictureDraw.cpp b/src/core/SkMultiPictureDraw.cpp
index 51feae5..bd1e8fe 100644
--- a/src/core/SkMultiPictureDraw.cpp
+++ b/src/core/SkMultiPictureDraw.cpp
@@ -170,8 +170,8 @@
 
             GrReplacements replacements;
 
-            GrLayerHoister::ConvertLayersToReplacements(needRendering, &replacements);
-            GrLayerHoister::ConvertLayersToReplacements(recycled, &replacements);
+            GrLayerHoister::ConvertLayersToReplacements(picture, needRendering, &replacements);
+            GrLayerHoister::ConvertLayersToReplacements(picture, recycled, &replacements);
 
             // Render the entire picture using new layers
             GrRecordReplaceDraw(picture, canvas, &replacements, initialMatrix, NULL);
diff --git a/src/core/SkRecordDraw.cpp b/src/core/SkRecordDraw.cpp
index 5e1fe7f..5407480 100644
--- a/src/core/SkRecordDraw.cpp
+++ b/src/core/SkRecordDraw.cpp
@@ -692,6 +692,13 @@
             dst.fRestoreOpID = src.fRestoreOpID;
             dst.fHasNestedLayers = src.fHasNestedLayers;
             dst.fIsNested = fSaveLayersInStack > 0 || src.fIsNested;
+
+            // Store 'saveLayer ops from enclosing picture' + drawPict op + 'ops from sub-picture'
+            dst.fKeySize = fSaveLayerOpStack.count() + src.fKeySize + 1;
+            dst.fKey = SkNEW_ARRAY(int, dst.fKeySize);
+            memcpy(dst.fKey, fSaveLayerOpStack.begin(), fSaveLayerOpStack.count() * sizeof(int));
+            dst.fKey[fSaveLayerOpStack.count()] = fFillBounds.currentOp();
+            memcpy(&dst.fKey[fSaveLayerOpStack.count()+1], src.fKey, src.fKeySize * sizeof(int));
         }
     }
 
@@ -724,6 +731,7 @@
         if (isSaveLayer) {
             this->updateStackForSaveLayer();
             ++fSaveLayersInStack;
+            fSaveLayerOpStack.push(fFillBounds.currentOp());
         }
 
         fSaveLayerStack.push(SaveLayerInfo(fFillBounds.currentOp(), isSaveLayer, paint));
@@ -735,6 +743,8 @@
             return;
         }
 
+        SkASSERT(fSaveLayersInStack == fSaveLayerOpStack.count());
+
         SaveLayerInfo sli;
         fSaveLayerStack.pop(&sli);
 
@@ -758,12 +768,20 @@
         block.fRestoreOpID = fFillBounds.currentOp();
         block.fHasNestedLayers = sli.fHasNestedSaveLayer;
         block.fIsNested = fSaveLayersInStack > 0;
+
+        block.fKeySize = fSaveLayerOpStack.count();
+        block.fKey = SkNEW_ARRAY(int, block.fKeySize);
+        memcpy(block.fKey, fSaveLayerOpStack.begin(), block.fKeySize * sizeof(int));
+
+        fSaveLayerOpStack.pop();
     }
 
     // Used to collect saveLayer information for layer hoisting
-    int                   fSaveLayersInStack;
+    int                      fSaveLayersInStack;
     SkTDArray<SaveLayerInfo> fSaveLayerStack;
-    SkLayerInfo*          fAccelData;
+    // The op code indices of all the currently active saveLayers
+    SkTDArray<int>           fSaveLayerOpStack;
+    SkLayerInfo*             fAccelData;
     const SkPicture::SnapshotArray* fPictList;
 
     SkRecords::FillBounds fFillBounds;
diff --git a/src/gpu/GrLayerCache.cpp b/src/gpu/GrLayerCache.cpp
index 6b17675..5374d11 100644
--- a/src/gpu/GrLayerCache.cpp
+++ b/src/gpu/GrLayerCache.cpp
@@ -13,7 +13,6 @@
 #ifdef SK_DEBUG
 void GrCachedLayer::validate(const GrTexture* backingTexture) const {
     SkASSERT(SK_InvalidGenID != fKey.pictureID());
-    SkASSERT(fKey.start() >= 0);
 
     if (fTexture) {
         // If the layer is in some texture then it must occupy some rectangle
@@ -125,32 +124,35 @@
 GrCachedLayer* GrLayerCache::createLayer(uint32_t pictureID,
                                          int start, int stop,
                                          const SkIRect& bounds,
-                                         const SkMatrix& ctm,
+                                         const SkMatrix& initialMat,
+                                         const int* key,
+                                         int keySize,
                                          const SkPaint* paint) {
     SkASSERT(pictureID != SK_InvalidGenID && start >= 0 && stop > 0);
 
-    GrCachedLayer* layer = SkNEW_ARGS(GrCachedLayer, (pictureID, start, stop, bounds, ctm, paint));
+    GrCachedLayer* layer = SkNEW_ARGS(GrCachedLayer, (pictureID, start, stop, bounds, initialMat,
+                                                      key, keySize, paint));
     fLayerHash.add(layer);
     return layer;
 }
 
-GrCachedLayer* GrLayerCache::findLayer(uint32_t pictureID,
-                                       int start,
-                                       const SkIRect& bounds,
-                                       const SkMatrix& ctm) {
-    SkASSERT(pictureID != SK_InvalidGenID && start > 0);
-    return fLayerHash.find(GrCachedLayer::Key(pictureID, start, bounds, ctm));
+GrCachedLayer* GrLayerCache::findLayer(uint32_t pictureID, const SkMatrix& initialMat,
+                                       const int* key, int keySize) {
+    SkASSERT(pictureID != SK_InvalidGenID);
+    return fLayerHash.find(GrCachedLayer::Key(pictureID, initialMat, key, keySize));
 }
 
 GrCachedLayer* GrLayerCache::findLayerOrCreate(uint32_t pictureID,
                                                int start, int stop,
                                                const SkIRect& bounds,
-                                               const SkMatrix& ctm,
+                                               const SkMatrix& initialMat,
+                                               const int* key,
+                                               int keySize,
                                                const SkPaint* paint) {
     SkASSERT(pictureID != SK_InvalidGenID && start >= 0 && stop > 0);
-    GrCachedLayer* layer = fLayerHash.find(GrCachedLayer::Key(pictureID, start, bounds, ctm));
+    GrCachedLayer* layer = fLayerHash.find(GrCachedLayer::Key(pictureID, initialMat, key, keySize));
     if (NULL == layer) {
-        layer = this->createLayer(pictureID, start, stop, bounds, ctm, paint);
+        layer = this->createLayer(pictureID, start, stop, bounds, initialMat, key, keySize, paint);
     }
 
     return layer;
@@ -485,7 +487,11 @@
         }
 
         SkString fileName(dirName);
-        fileName.appendf("\\%d-%d.png", layer->fKey.pictureID(), layer->fKey.start());
+        fileName.appendf("\\%d", layer->fKey.pictureID());
+        for (int i = 0; i < layer->fKey.keySize(); ++i) {
+            fileName.appendf("-%d", layer->fKey.key()[i]);
+        }
+        fileName.appendf(".png");
 
         layer->texture()->surfacePriv().savePixels(fileName.c_str());
     }
diff --git a/src/gpu/GrLayerCache.h b/src/gpu/GrLayerCache.h
index 75e9130..967503d 100644
--- a/src/gpu/GrLayerCache.h
+++ b/src/gpu/GrLayerCache.h
@@ -76,53 +76,80 @@
 public:
     // For SkTDynamicHash
     struct Key {
-        Key(uint32_t pictureID, int start, const SkIRect& bounds, const SkMatrix& ctm)
-        : fPictureID(pictureID)
-        , fStart(start)
-        , fBounds(bounds)
-        , fCTM(ctm) {
-            fCTM.getType(); // force initialization of type so hashes match
+        Key(uint32_t pictureID, const SkMatrix& initialMat, 
+            const int* key, int keySize, bool copyKey = false)
+        : fKeySize(keySize) 
+        , fFreeKey(copyKey) {
+            fIDMatrix.fPictureID = pictureID;
+            fIDMatrix.fInitialMat = initialMat;
+            fIDMatrix.fInitialMat.getType(); // force initialization of type so hashes match
 
-            // Key needs to be tightly packed.
-            GR_STATIC_ASSERT(sizeof(Key) == sizeof(uint32_t) +      // picture ID
-                                            sizeof(int) +           // start index
-                                            4 * sizeof(uint32_t) +  // bounds
-                                            9 * sizeof(SkScalar) + sizeof(uint32_t)); // matrix
+            if (copyKey) {
+                int* tempKey = SkNEW_ARRAY(int, keySize);
+                memcpy(tempKey, key, keySize*sizeof(int));
+                fKey = tempKey;
+            } else {
+                fKey = key;
+            }
+
+            // The pictureID/matrix portion needs to be tightly packed.
+            GR_STATIC_ASSERT(sizeof(IDMatrix) == sizeof(uint32_t)+                     // pictureID
+                                             9 * sizeof(SkScalar) + sizeof(uint32_t)); // matrix
+        }
+
+        ~Key() {
+            if (fFreeKey) {
+                SkDELETE_ARRAY(fKey);
+            }
         }
 
         bool operator==(const Key& other) const {
-            return fPictureID == other.fPictureID &&
-                   fStart == other.fStart &&
-                   fBounds == other.fBounds &&
-                   fCTM.cheapEqualTo(other.fCTM);
+            if (fKeySize != other.fKeySize) {
+                return false;
+            }
+            return fIDMatrix.fPictureID == other.fIDMatrix.fPictureID &&
+                   fIDMatrix.fInitialMat.cheapEqualTo(other.fIDMatrix.fInitialMat) &&
+                   !memcmp(fKey, other.fKey, fKeySize * sizeof(int));
         }
 
-        uint32_t pictureID() const { return fPictureID; }
-        int start() const { return fStart; }
-        const SkIRect& bound() const { return fBounds; }
+        uint32_t pictureID() const { return fIDMatrix.fPictureID; }
+
+        // TODO: remove these when GrCachedLayer & ReplacementInfo fuse
+        const int* key() const { SkASSERT(fFreeKey);  return fKey; }
+        int keySize() const { SkASSERT(fFreeKey); return fKeySize; }
+
+        uint32_t hash() const {
+            uint32_t hash = SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(fKey),
+                                                fKeySize * sizeof(int));
+            return SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(&fIDMatrix), 
+                                       sizeof(IDMatrix), hash);
+        }
 
     private:
-        // ID of the picture of which this layer is a part
-        const uint32_t fPictureID;
-        // The the index of the saveLayer command in the picture
-        const int      fStart;
-        // The bounds of the layer. The TL corner is its offset.
-        const SkIRect  fBounds;
-        // The 2x2 portion of the CTM applied to this layer in the picture
-        SkMatrix       fCTM;
+        struct IDMatrix {
+            // ID of the picture of which this layer is a part
+            uint32_t fPictureID;
+            // The initial matrix passed into drawPicture
+            SkMatrix fInitialMat;
+        }              fIDMatrix;
+
+        const int*     fKey;
+        const int      fKeySize;
+        bool           fFreeKey;
     };
 
     static const Key& GetKey(const GrCachedLayer& layer) { return layer.fKey; }
-    static uint32_t Hash(const Key& key) { 
-        return SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(&key), sizeof(Key));
-    }
+    static uint32_t Hash(const Key& key) { return key.hash(); }
 
     // GrCachedLayer proper
     GrCachedLayer(uint32_t pictureID, int start, int stop,
                   const SkIRect& bounds, const SkMatrix& ctm,
+                  const int* key, int keySize,
                   const SkPaint* paint)
-        : fKey(pictureID, start, bounds, ctm)
+        : fKey(pictureID, ctm, key, keySize, true)
+        , fStart(start)
         , fStop(stop)
+        , fBounds(bounds)
         , fPaint(paint ? SkNEW_ARGS(SkPaint, (*paint)) : NULL)
         , fTexture(NULL)
         , fRect(GrIRect16::MakeEmpty())
@@ -138,9 +165,13 @@
     }
 
     uint32_t pictureID() const { return fKey.pictureID(); }
-    int start() const { return fKey.start(); }
-    const SkIRect& bound() const { return fKey.bound(); }
+    // TODO: remove these when GrCachedLayer & ReplacementInfo fuse
+    const int* key() const { return fKey.key(); }
+    int keySize() const { return fKey.keySize(); }
 
+    int start() const { return fStart; }
+    // TODO: make bound debug only
+    const SkIRect& bound() const { return fBounds; }
     int stop() const { return fStop; }
     void setTexture(GrTexture* texture, const GrIRect16& rect) {
         SkRefCnt_SafeAssign(fTexture, texture);
@@ -167,9 +198,13 @@
 private:
     const Key       fKey;
 
+    // The "saveLayer" operation index of the cached layer
+    const int       fStart;
     // The final "restore" operation index of the cached layer
     const int       fStop;
 
+    const SkIRect   fBounds;
+
     // The paint used when dropping the layer down into the owning canvas.
     // Can be NULL. This class makes a copy for itself.
     const SkPaint*  fPaint;
@@ -224,12 +259,11 @@
     // elements by the GrContext
     void freeAll();
 
-    GrCachedLayer* findLayer(uint32_t pictureID, int start, 
-                             const SkIRect& bounds, const SkMatrix& ctm);
     GrCachedLayer* findLayerOrCreate(uint32_t pictureID,
                                      int start, int stop, 
                                      const SkIRect& bounds,
-                                     const SkMatrix& ctm,
+                                     const SkMatrix& initialMat,
+                                     const int* key, int keySize,
                                      const SkPaint* paint);
 
     // Attempt to place 'layer' in the atlas. Return true on success; false on failure.
@@ -310,8 +344,9 @@
     void unlock(GrCachedLayer* layer);
 
     void initAtlas();
-    GrCachedLayer* createLayer(uint32_t pictureID, int start, int stop, 
-                               const SkIRect& bounds, const SkMatrix& ctm, 
+    GrCachedLayer* createLayer(uint32_t pictureID, int start, int stop,
+                               const SkIRect& bounds, const SkMatrix& initialMat,
+                               const int* key, int keySize,
                                const SkPaint* paint);
 
     // Remove all the layers (and unlock any resources) associated with 'pictureID'
@@ -332,6 +367,8 @@
     // for testing
     friend class TestingAccess;
     int numLayers() const { return fLayerHash.count(); }
+    GrCachedLayer* findLayer(uint32_t pictureID, const SkMatrix& ctm,
+                             const int* key, int keySize);
 };
 
 #endif
diff --git a/src/gpu/GrLayerHoister.cpp b/src/gpu/GrLayerHoister.cpp
index b7653ee..4e78368 100644
--- a/src/gpu/GrLayerHoister.cpp
+++ b/src/gpu/GrLayerHoister.cpp
@@ -19,7 +19,7 @@
 // required texture/render target resources.
 static void prepare_for_hoisting(GrLayerCache* layerCache, 
                                  const SkPicture* topLevelPicture,
-                                 const SkMatrix& matrix,
+                                 const SkMatrix& initialMat,
                                  const SkLayerInfo::BlockInfo& info,
                                  const SkIRect& layerRect,
                                  SkTDArray<GrHoistedLayer>* needRendering,
@@ -28,15 +28,13 @@
                                  int numSamples) {
     const SkPicture* pict = info.fPicture ? info.fPicture : topLevelPicture;
 
-    SkMatrix combined = matrix;
-    combined.preConcat(info.fPreMat);
-    combined.preConcat(info.fLocalMat);
-
-    GrCachedLayer* layer = layerCache->findLayerOrCreate(pict->uniqueID(),
+    GrCachedLayer* layer = layerCache->findLayerOrCreate(topLevelPicture->uniqueID(),
                                                          info.fSaveLayerOpID,
                                                          info.fRestoreOpID,
                                                          layerRect,
-                                                         combined,
+                                                         initialMat,
+                                                         info.fKey,
+                                                         info.fKeySize,
                                                          info.fPaint);
     GrSurfaceDesc desc;
     desc.fFlags = kRenderTarget_GrSurfaceFlag;
@@ -76,7 +74,8 @@
     hl->fPicture = pict;
     hl->fOffset = SkIPoint::Make(layerRect.fLeft, layerRect.fTop);
     hl->fLocalMat = info.fLocalMat;
-    hl->fPreMat = matrix;
+    hl->fInitialMat = initialMat;
+    hl->fPreMat = initialMat;
     hl->fPreMat.preConcat(info.fPreMat);
 }
 
@@ -192,19 +191,17 @@
     result->setPixelRef(SkNEW_ARGS(SkGrPixelRef, (info, texture)))->unref();
 }
 
-void GrLayerHoister::ConvertLayersToReplacements(const SkTDArray<GrHoistedLayer>& layers,
+void GrLayerHoister::ConvertLayersToReplacements(const SkPicture* topLevelPicture,
+                                                 const SkTDArray<GrHoistedLayer>& layers,
                                                  GrReplacements* replacements) {
     // TODO: just replace GrReplacements::ReplacementInfo with GrCachedLayer?
     for (int i = 0; i < layers.count(); ++i) {
         GrCachedLayer* layer = layers[i].fLayer;
-        const SkPicture* picture = layers[i].fPicture;
-
-        SkMatrix combined = SkMatrix::Concat(layers[i].fPreMat, layers[i].fLocalMat);
 
         GrReplacements::ReplacementInfo* layerInfo =
-                    replacements->newReplacement(picture->uniqueID(),
-                                                 layer->start(),
-                                                 combined);
+                    replacements->newReplacement(topLevelPicture->uniqueID(),
+                                                 layers[i].fInitialMat,
+                                                 layer->key(), layer->keySize());
         layerInfo->fStop = layer->stop();
         layerInfo->fPos = layers[i].fOffset;
 
diff --git a/src/gpu/GrLayerHoister.h b/src/gpu/GrLayerHoister.h
index 84c7896..a28f887 100644
--- a/src/gpu/GrLayerHoister.h
+++ b/src/gpu/GrLayerHoister.h
@@ -17,9 +17,11 @@
 
 class GrHoistedLayer {
 public:
-    const SkPicture* fPicture;
+    const SkPicture* fPicture;  // the picture that actually contains the layer
+                                // (not necessarily the top-most picture)
     GrCachedLayer*   fLayer;
     SkIPoint         fOffset;
+    SkMatrix         fInitialMat;
     SkMatrix         fPreMat;
     SkMatrix         fLocalMat;
 };
@@ -86,7 +88,8 @@
         @param layers       The hoisted layers
         @param replacements Replacement object that will be used for a replacement draw
     */
-    static void ConvertLayersToReplacements(const SkTDArray<GrHoistedLayer>& layers,
+    static void ConvertLayersToReplacements(const SkPicture* topLevelPicture, 
+                                            const SkTDArray<GrHoistedLayer>& layers,
                                             GrReplacements* replacements);
 
     /** Unlock a group of layers in the layer cache.
diff --git a/src/gpu/GrRecordReplaceDraw.cpp b/src/gpu/GrRecordReplaceDraw.cpp
index dd686d3..9748f13 100644
--- a/src/gpu/GrRecordReplaceDraw.cpp
+++ b/src/gpu/GrRecordReplaceDraw.cpp
@@ -12,9 +12,10 @@
 #include "SkRecords.h"
 
 GrReplacements::ReplacementInfo* GrReplacements::newReplacement(uint32_t pictureID,
-                                                                unsigned start,
-                                                                const SkMatrix& ctm) {
-    ReplacementInfo* replacement = SkNEW_ARGS(ReplacementInfo, (pictureID, start, ctm));
+                                                                const SkMatrix& initialMat,
+                                                                const int* key, int keySize) {
+    ReplacementInfo* replacement = SkNEW_ARGS(ReplacementInfo, (pictureID, initialMat, 
+                                                                key, keySize));
     fReplacementHash.add(replacement);
     return replacement;
 }
@@ -30,10 +31,11 @@
     fReplacementHash.reset();
 }
 
-const GrReplacements::ReplacementInfo* GrReplacements::lookupByStart(uint32_t pictureID,
-                                                                     unsigned start,
-                                                                     const SkMatrix& ctm) const {
-    return fReplacementHash.find(ReplacementInfo::Key(pictureID, start, ctm));
+const GrReplacements::ReplacementInfo* GrReplacements::lookup(uint32_t pictureID,
+                                                              const SkMatrix& initialMat,
+                                                              const int* key,
+                                                              int keySize) const {
+    return fReplacementHash.find(ReplacementInfo::Key(pictureID, initialMat, key, keySize));
 }
 
 static inline void draw_replacement_bitmap(const GrReplacements::ReplacementInfo* ri,
@@ -56,18 +58,22 @@
 public:
     ReplaceDraw(SkCanvas* canvas,
                 SkPicture const* const drawablePicts[], int drawableCount,
+                const SkPicture* topLevelPicture,
                 const SkPicture* picture,
                 const GrReplacements* replacements,
                 const SkMatrix& initialMatrix,
-                SkDrawPictureCallback* callback)
+                SkDrawPictureCallback* callback,
+                const int* opIndices, int numIndices)
         : INHERITED(canvas, drawablePicts, NULL, drawableCount)
         , fCanvas(canvas)
+        , fTopLevelPicture(topLevelPicture)
         , fPicture(picture)
         , fReplacements(replacements)
         , fInitialMatrix(initialMatrix)
         , fCallback(callback)
         , fIndex(0)
         , fNumReplaced(0) {
+        fOpIndexStack.append(numIndices, opIndices);
     }
 
     int draw() {
@@ -118,13 +124,26 @@
         this->INHERITED::operator()(r);
     }
     void operator()(const SkRecords::DrawPicture& dp) {
+
+        int drawPictureOffset;
+        if (fOps.count()) {
+            drawPictureOffset = fOps[fIndex];
+        } else {
+            drawPictureOffset = fIndex;
+        }
+
+        fOpIndexStack.push(drawPictureOffset);
+
         SkAutoCanvasMatrixPaint acmp(fCanvas, dp.matrix, dp.paint, dp.picture->cullRect());
 
         // Draw sub-pictures with the same replacement list but a different picture
         ReplaceDraw draw(fCanvas, this->drawablePicts(), this->drawableCount(),
-                         dp.picture, fReplacements, fInitialMatrix, fCallback);
+                         fTopLevelPicture, dp.picture, fReplacements, fInitialMatrix, fCallback,
+                         fOpIndexStack.begin(), fOpIndexStack.count());
 
         fNumReplaced += draw.draw();
+
+        fOpIndexStack.pop();
     }
     void operator()(const SkRecords::SaveLayer& sl) {
 
@@ -137,11 +156,13 @@
             startOffset = fIndex;
         }
 
-        const SkMatrix& ctm = fCanvas->getTotalMatrix();
-        const GrReplacements::ReplacementInfo* ri = fReplacements->lookupByStart(
-                                                            fPicture->uniqueID(),
-                                                            startOffset,
-                                                            ctm);
+        fOpIndexStack.push(startOffset);
+
+        const GrReplacements::ReplacementInfo* ri = fReplacements->lookup(
+                                                                    fTopLevelPicture->uniqueID(),
+                                                                    fInitialMatrix,
+                                                                    fOpIndexStack.begin(),
+                                                                    fOpIndexStack.count());
 
         if (ri) {
             fNumReplaced++;
@@ -155,15 +176,19 @@
             } else {
                 fIndex = ri->fStop;
             }
+            fOpIndexStack.pop();
             return;
         }
 
         // This is a fail for layer hoisting
         this->INHERITED::operator()(sl);
+
+        fOpIndexStack.pop();
     }
 
 private:
     SkCanvas*              fCanvas;
+    const SkPicture*       fTopLevelPicture;
     const SkPicture*       fPicture;
     const GrReplacements*  fReplacements;
     const SkMatrix         fInitialMatrix;
@@ -173,6 +198,9 @@
     int                    fIndex;
     int                    fNumReplaced;
 
+    // The op code indices of all the enclosing drawPicture and saveLayer calls
+    SkTDArray<int>         fOpIndexStack;
+
     typedef Draw INHERITED;
 };
 
@@ -184,7 +212,9 @@
     SkAutoCanvasRestore saveRestore(canvas, true /*save now, restore at exit*/);
 
     // TODO: drawablePicts?
-    ReplaceDraw draw(canvas, NULL, 0, picture, replacements, initialMatrix, callback);
+    ReplaceDraw draw(canvas, NULL, 0, 
+                     picture, picture, 
+                     replacements, initialMatrix, callback, NULL, 0);
 
     return draw.draw();
 }
diff --git a/src/gpu/GrRecordReplaceDraw.h b/src/gpu/GrRecordReplaceDraw.h
index fabeec1..538661a 100644
--- a/src/gpu/GrRecordReplaceDraw.h
+++ b/src/gpu/GrRecordReplaceDraw.h
@@ -32,48 +32,70 @@
     class ReplacementInfo {
     public:
         struct Key {
-            Key(uint32_t pictureID, unsigned start, const SkMatrix& ctm)
-            : fPictureID(pictureID)
-            , fStart(start)
-            , fCTM(ctm) {
-                fCTM.getType(); // force initialization of type so hashes match
+            Key(uint32_t pictureID, const SkMatrix& initialMat,
+                const int* key, int keySize, bool copyKey = false)
+            : fKeySize(keySize)
+            , fFreeKey(copyKey) {
+                fIDMatrix.fPictureID = pictureID;
+                fIDMatrix.fInitialMat = initialMat;
+                fIDMatrix.fInitialMat.getType(); // force initialization of type so hashes match
 
-                // Key needs to be tightly packed.
-                GR_STATIC_ASSERT(sizeof(Key) == sizeof(uint32_t) +      // picture ID
-                                                sizeof(int) +           // start
-                                                9 * sizeof(SkScalar)    // 3x3 from CTM
-                                                +sizeof(uint32_t));     // matrix's type
+                if (copyKey) {
+                    int* tempKey = SkNEW_ARRAY(int, keySize);
+                    memcpy(tempKey, key, keySize * sizeof(int));
+                    fKey = tempKey;
+                } else {
+                    fKey = key;
+                }
+
+                // The pictureID/matrix portion needs to be tightly packed.
+                GR_STATIC_ASSERT(sizeof(IDMatrix) == sizeof(uint32_t)+                // pictureID
+                                              9 * sizeof(SkScalar)+sizeof(uint32_t)); // matrix
             }
 
-            bool operator==(const Key& other) const { 
-                return fPictureID == other.fPictureID &&
-                       fStart == other.fStart &&
-                       fCTM.cheapEqualTo(other.fCTM); // TODO: should be fuzzy
+            ~Key() {
+                if (fFreeKey) {
+                    SkDELETE_ARRAY(fKey);
+                }
+            }
+            bool operator==(const Key& other) const {
+                if (fKeySize != other.fKeySize) {
+                    return false;
+                }
+                return fIDMatrix.fPictureID == other.fIDMatrix.fPictureID &&
+                       fIDMatrix.fInitialMat.cheapEqualTo(other.fIDMatrix.fInitialMat) &&
+                       !memcmp(fKey, other.fKey, fKeySize * sizeof(int));
             }
 
-            uint32_t     pictureID() const { return fPictureID; }
-            unsigned int start() const { return fStart; }
+            uint32_t hash() const {
+                uint32_t hash = SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(fKey),
+                                                    fKeySize * sizeof(int));
+                return SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(&fIDMatrix),
+                                           sizeof(IDMatrix), hash);
+            }
 
         private:
-            const uint32_t fPictureID;
-            const unsigned fStart;
-            const SkMatrix fCTM;
+            struct IDMatrix {
+                uint32_t fPictureID;
+                SkMatrix fInitialMat;
+            }              fIDMatrix;
+
+            const int*     fKey;
+            const int      fKeySize;
+            const bool     fFreeKey;
         };
 
         static const Key& GetKey(const ReplacementInfo& layer) { return layer.fKey; }
-        static uint32_t Hash(const Key& key) {
-            return SkChecksum::Murmur3(reinterpret_cast<const uint32_t*>(&key), sizeof(Key));
-        }
+        static uint32_t Hash(const Key& key) { return key.hash(); }
 
-        ReplacementInfo(uint32_t pictureID, unsigned int start, const SkMatrix& ctm)
-            : fKey(pictureID, start, ctm)
+        ReplacementInfo(uint32_t pictureID, const SkMatrix& initialMat,
+                        const int* key, int keySize)
+            : fKey(pictureID, initialMat, key, keySize, true)
             , fImage(NULL)
             , fPaint(NULL) {
         }
         ~ReplacementInfo() { fImage->unref(); SkDELETE(fPaint); }
 
-        unsigned int start() const { return fKey.start(); }
-
         const Key       fKey;
         unsigned        fStop;
         SkIPoint        fPos;
@@ -86,12 +108,11 @@
     ~GrReplacements() { this->freeAll(); }
 
     // Add a new replacement range.
-    ReplacementInfo* newReplacement(uint32_t pictureID, unsigned start, const SkMatrix& ctm);
+    ReplacementInfo* newReplacement(uint32_t pictureID, const SkMatrix& initialMat,
+                                    const int* key, int keySize);
 
-    // look up a replacement range by its pictureID, start offset and the CTM
-    // TODO: also need to add clip to lookup
-    const ReplacementInfo* lookupByStart(uint32_t pictureID, unsigned start, 
-                                         const SkMatrix& ctm) const;
+    const ReplacementInfo* lookup(uint32_t pictureID, const SkMatrix& initalMat,
+                                  const int* key, int keySize) const;
 
 private:
     SkTDynamicHash<ReplacementInfo, ReplacementInfo::Key> fReplacementHash;
diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp
index 158fa91..9a78048 100644
--- a/src/gpu/SkGpuDevice.cpp
+++ b/src/gpu/SkGpuDevice.cpp
@@ -1832,8 +1832,8 @@
 
     GrReplacements replacements;
 
-    GrLayerHoister::ConvertLayersToReplacements(needRendering, &replacements);
-    GrLayerHoister::ConvertLayersToReplacements(recycled, &replacements);
+    GrLayerHoister::ConvertLayersToReplacements(mainPicture, needRendering, &replacements);
+    GrLayerHoister::ConvertLayersToReplacements(mainPicture, recycled, &replacements);
 
     // Render the entire picture using new layers
     GrRecordReplaceDraw(mainPicture, mainCanvas, &replacements, initialMatrix, NULL);
diff --git a/tests/GpuLayerCacheTest.cpp b/tests/GpuLayerCacheTest.cpp
index 1043a20..43a845b 100644
--- a/tests/GpuLayerCacheTest.cpp
+++ b/tests/GpuLayerCacheTest.cpp
@@ -24,6 +24,10 @@
     static int Uses(GrCachedLayer* layer) {
         return layer->uses();
     }
+    static GrCachedLayer* Find(GrLayerCache* cache, uint32_t pictureID,
+                               const SkMatrix& initialMat, const int* key, int keySize) {
+        return cache->findLayer(pictureID, initialMat, key, keySize);
+    }
 };
 
 // Add several layers to the cache
@@ -34,14 +38,16 @@
                           int idOffset) {
 
     for (int i = 0; i < numToAdd; ++i) {
+        int indices[1] = { idOffset+i+1 };
         GrCachedLayer* layer = cache->findLayerOrCreate(picture.uniqueID(), 
                                                         idOffset+i+1, idOffset+i+2, 
                                                         SkIRect::MakeEmpty(),
                                                         SkMatrix::I(),
+                                                        indices, 1,
                                                         NULL);
         REPORTER_ASSERT(reporter, layer);
-        GrCachedLayer* temp = cache->findLayer(picture.uniqueID(), idOffset + i + 1,
-                                               SkIRect::MakeEmpty(), SkMatrix::I());
+        GrCachedLayer* temp = TestingAccess::Find(cache, picture.uniqueID(), SkMatrix::I(), 
+                                                  indices, 1);
         REPORTER_ASSERT(reporter, temp == layer);
 
         REPORTER_ASSERT(reporter, TestingAccess::NumLayers(cache) == idOffset + i + 1);
@@ -111,8 +117,9 @@
         create_layers(reporter, &cache, *picture, kInitialNumLayers, 0);
 
         for (int i = 0; i < kInitialNumLayers; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, 
-                                                   SkIRect::MakeEmpty(), SkMatrix::I());
+            int indices[1] = { i + 1 };
+            GrCachedLayer* layer = TestingAccess::Find(&cache, picture->uniqueID(), SkMatrix::I(),
+                                                       indices, 1);
             REPORTER_ASSERT(reporter, layer);
 
             lock_layer(reporter, &cache, layer);
@@ -129,15 +136,19 @@
 
         // Unlock the textures
         for (int i = 0; i < kInitialNumLayers; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, 
-                                                   SkIRect::MakeEmpty(), SkMatrix::I());
+            int indices[1] = { i+1 };
+
+            GrCachedLayer* layer = TestingAccess::Find(&cache, picture->uniqueID(), SkMatrix::I(),
+                                                       indices, 1);
             REPORTER_ASSERT(reporter, layer);
             cache.removeUse(layer);
         }
 
         for (int i = 0; i < kInitialNumLayers; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i+1, 
-                                                   SkIRect::MakeEmpty(), SkMatrix::I());
+            int indices[1] = { i+1 };
+
+            GrCachedLayer* layer = TestingAccess::Find(&cache, picture->uniqueID(), SkMatrix::I(),
+                                                       indices, 1);
             REPORTER_ASSERT(reporter, layer);
 
             // All the layers should be unlocked
@@ -161,12 +172,13 @@
         }
 
         {
+            int indices[1] = { kInitialNumLayers+1 };
+
             // Add an additional layer. Since all the layers are unlocked this 
             // will force out the first atlased layer
             create_layers(reporter, &cache, *picture, 1, kInitialNumLayers);
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), 
-                                                   kInitialNumLayers+1, 
-                                                   SkIRect::MakeEmpty(), SkMatrix::I());
+            GrCachedLayer* layer = TestingAccess::Find(&cache, picture->uniqueID(), SkMatrix::I(),
+                                                       indices, 1);
             REPORTER_ASSERT(reporter, layer);
 
             lock_layer(reporter, &cache, layer);
@@ -174,8 +186,10 @@
         }
 
         for (int i = 0; i < kInitialNumLayers+1; ++i) {
-            GrCachedLayer* layer = cache.findLayer(picture->uniqueID(), i + 1,
-                                                   SkIRect::MakeEmpty(), SkMatrix::I());
+            int indices[1] = { i+1 };
+
+            GrCachedLayer* layer = TestingAccess::Find(&cache, picture->uniqueID(), SkMatrix::I(),
+                                                       indices, 1);
 #if GR_CACHE_HOISTED_LAYERS
             // 3 old layers plus the new one should be in the atlas.
             if (1 == i || 2 == i || 3 == i || 5 == i) {
diff --git a/tests/PictureTest.cpp b/tests/PictureTest.cpp
index 64073d3..7a8c8fa 100644
--- a/tests/PictureTest.cpp
+++ b/tests/PictureTest.cpp
@@ -938,7 +938,7 @@
 
         // 2)
         c->saveLayer(NULL, NULL); // layer #1
-            c->translate(kWidth/2.0f, kHeight/2.0f);
+            c->translate(kWidth / 2.0f, kHeight / 2.0f);
             SkRect r = SkRect::MakeXYWH(0, 0, kWidth/2, kHeight/2);
             c->saveLayer(&r, &complexPaint); // layer #2
             c->restore();
diff --git a/tests/RecordReplaceDrawTest.cpp b/tests/RecordReplaceDrawTest.cpp
index 5d40738..8fc824d 100644
--- a/tests/RecordReplaceDrawTest.cpp
+++ b/tests/RecordReplaceDrawTest.cpp
@@ -115,9 +115,11 @@
         pic.reset(recorder.endRecording());
     }
 
+    int key[1] = { 0 };
+
     GrReplacements replacements;
-    GrReplacements::ReplacementInfo* ri = replacements.newReplacement(pic->uniqueID(),
-                                                                      0, SkMatrix::I());
+    GrReplacements::ReplacementInfo* ri = replacements.newReplacement(pic->uniqueID(), 
+                                                                      SkMatrix::I(), key, 1);
     ri->fStop = 2;
     ri->fPos.set(0, 0);
     ri->fImage = make_image(SK_ColorRED);