Fully deferred displaylist replay

bug:8037003

A recursive drawDisplayList call is now entirely deferred before
playing back to the screen and issuing GL commands. This way, the
entire stream can be inspected, optimized, and batch work (such as
uploading textures) before issuing commands.

Additionally, this fixes an issue where operations draw could move
across restores corresponding to saveLayer(alpha). Those and other
similar cases (such as complex clipping, requiring the stencil) are
now treated as batching barriers, with the operations that change
renderer state in a way that's difficult to defer are just re-issued
at flush time.

Change-Id: Ie7348166662a5ad89fb9b1e87558334fb826b01e
diff --git a/libs/hwui/Caches.cpp b/libs/hwui/Caches.cpp
index a1cc2e8..4642a4f 100644
--- a/libs/hwui/Caches.cpp
+++ b/libs/hwui/Caches.cpp
@@ -103,14 +103,9 @@
 void Caches::initExtensions() {
     if (mExtensions.hasDebugMarker()) {
         eventMark = glInsertEventMarkerEXT;
-        if ((drawDeferDisabled || drawReorderDisabled)) {
-            startMark = glPushGroupMarkerEXT;
-            endMark = glPopGroupMarkerEXT;
-        } else {
-            startMark = startMarkNull;
-            endMark = endMarkNull;
-        }
 
+        startMark = glPushGroupMarkerEXT;
+        endMark = glPopGroupMarkerEXT;
     } else {
         eventMark = eventMarkNull;
         startMark = startMarkNull;
diff --git a/libs/hwui/DeferredDisplayList.cpp b/libs/hwui/DeferredDisplayList.cpp
index a4e9950..2027fc8 100644
--- a/libs/hwui/DeferredDisplayList.cpp
+++ b/libs/hwui/DeferredDisplayList.cpp
@@ -32,15 +32,15 @@
 namespace android {
 namespace uirenderer {
 
+/////////////////////////////////////////////////////////////////////////////////
+// Operation Batches
+/////////////////////////////////////////////////////////////////////////////////
+
 class DrawOpBatch {
 public:
-    DrawOpBatch() {
-        mOps.clear();
-    }
+    DrawOpBatch() { mOps.clear(); }
 
-    ~DrawOpBatch() {
-        mOps.clear();
-    }
+    virtual ~DrawOpBatch() { mOps.clear(); }
 
     void add(DrawOp* op) {
         // NOTE: ignore empty bounds special case, since we don't merge across those ops
@@ -48,8 +48,9 @@
         mOps.add(op);
     }
 
-    bool intersects(Rect& rect) {
+    virtual bool intersects(Rect& rect) {
         if (!rect.intersects(mBounds)) return false;
+
         for (unsigned int i = 0; i < mOps.size(); i++) {
             if (rect.intersects(mOps[i]->state.mBounds)) {
 #if DEBUG_DEFER
@@ -64,27 +65,217 @@
         return false;
     }
 
-    Vector<DrawOp*> mOps;
+    virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty) {
+        DEFER_LOGD("replaying draw batch %p", this);
+
+        status_t status = DrawGlInfo::kStatusDone;
+        DisplayListLogBuffer& logBuffer = DisplayListLogBuffer::getInstance();
+        for (unsigned int i = 0; i < mOps.size(); i++) {
+            DrawOp* op = mOps[i];
+
+            renderer.restoreDisplayState(op->state, kStateDeferFlag_Draw);
+
+#if DEBUG_DISPLAY_LIST_OPS_AS_EVENTS
+            renderer.eventMark(strlen(op->name()), op->name());
+#endif
+            status |= op->applyDraw(renderer, dirty, 0, op->state.mMultipliedAlpha);
+            logBuffer.writeCommand(0, op->name());
+        }
+        return status;
+    }
+
+    inline int count() const { return mOps.size(); }
 private:
+    Vector<DrawOp*> mOps;
     Rect mBounds;
 };
 
-void DeferredDisplayList::clear() {
+class StateOpBatch : public DrawOpBatch {
+public:
+    // creates a single operation batch
+    StateOpBatch(StateOp* op) : mOp(op) {}
+
+    bool intersects(Rect& rect) {
+        // if something checks for intersection, it's trying to go backwards across a state op,
+        // something not currently supported - state ops are always barriers
+        CRASH();
+        return false;
+    }
+
+    virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty) {
+        DEFER_LOGD("replaying state op batch %p", this);
+        renderer.restoreDisplayState(mOp->state, 0);
+
+        // use invalid save count because it won't be used at flush time - RestoreToCountOp is the
+        // only one to use it, and we don't use that class at flush time, instead calling
+        // renderer.restoreToCount directly
+        int saveCount = -1;
+        mOp->applyState(renderer, saveCount);
+        return DrawGlInfo::kStatusDone;
+    }
+
+private:
+    StateOp* mOp;
+};
+
+class RestoreToCountBatch : public DrawOpBatch {
+public:
+    RestoreToCountBatch(int restoreCount) : mRestoreCount(restoreCount) {}
+
+    bool intersects(Rect& rect) {
+        // if something checks for intersection, it's trying to go backwards across a state op,
+        // something not currently supported - state ops are always barriers
+        CRASH();
+        return false;
+    }
+
+    virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty) {
+        DEFER_LOGD("batch %p restoring to count %d", this, mRestoreCount);
+        renderer.restoreToCount(mRestoreCount);
+
+        return DrawGlInfo::kStatusDone;
+    }
+
+private:
+    /*
+     * The count used here represents the flush() time saveCount. This is as opposed to the
+     * DisplayList record time, or defer() time values (which are RestoreToCountOp's mCount, and
+     * (saveCount + mCount) respectively). Since the count is different from the original
+     * RestoreToCountOp, we don't store a pointer to the op, as elsewhere.
+     */
+    const int mRestoreCount;
+};
+
+/////////////////////////////////////////////////////////////////////////////////
+// DeferredDisplayList
+/////////////////////////////////////////////////////////////////////////////////
+
+void DeferredDisplayList::resetBatchingState() {
     for (int i = 0; i < kOpBatch_Count; i++) {
         mBatchIndices[i] = -1;
     }
+}
+
+void DeferredDisplayList::clear() {
+    resetBatchingState();
+    mComplexClipStackStart = -1;
+
     for (unsigned int i = 0; i < mBatches.size(); i++) {
         delete mBatches[i];
     }
     mBatches.clear();
+    mSaveStack.clear();
 }
 
-void DeferredDisplayList::add(DrawOp* op, bool disallowReorder) {
-    if (CC_UNLIKELY(disallowReorder)) {
-        if (!mBatches.isEmpty()) {
-            mBatches[0]->add(op);
-            return;
+/////////////////////////////////////////////////////////////////////////////////
+// Operation adding
+/////////////////////////////////////////////////////////////////////////////////
+
+int DeferredDisplayList::getStateOpDeferFlags() const {
+    // For both clipOp and save(Layer)Op, we don't want to save drawing info, and only want to save
+    // the clip if we aren't recording a complex clip (and can thus trust it to be a rect)
+    return recordingComplexClip() ? 0 : kStateDeferFlag_Clip;
+}
+
+int DeferredDisplayList::getDrawOpDeferFlags() const {
+    return kStateDeferFlag_Draw | getStateOpDeferFlags();
+}
+
+/**
+ * When an clipping operation occurs that could cause a complex clip, record the operation and all
+ * subsequent clipOps, save/restores (if the clip flag is set). During a flush, instead of loading
+ * the clip from deferred state, we play back all of the relevant state operations that generated
+ * the complex clip.
+ *
+ * Note that we don't need to record the associated restore operation, since operations at defer
+ * time record whether they should store the renderer's current clip
+ */
+void DeferredDisplayList::addClip(OpenGLRenderer& renderer, ClipOp* op) {
+    if (recordingComplexClip() || op->canCauseComplexClip() || !renderer.hasRectToRectTransform()) {
+        DEFER_LOGD("%p Received complex clip operation %p", this, op);
+
+        // NOTE: defer clip op before setting mComplexClipStackStart so previous clip is recorded
+        storeStateOpBarrier(renderer, op);
+
+        if (!recordingComplexClip()) {
+            mComplexClipStackStart = renderer.getSaveCount() - 1;
+            DEFER_LOGD("    Starting complex clip region, start is %d", mComplexClipStackStart);
         }
+    }
+}
+
+/**
+ * For now, we record save layer operations as barriers in the batch list, preventing drawing
+ * operations from reordering around the saveLayer and it's associated restore()
+ *
+ * In the future, we should send saveLayer commands (if they can be played out of order) and their
+ * contained drawing operations to a seperate list of batches, so that they may draw at the
+ * beginning of the frame. This would avoid targetting and removing an FBO in the middle of a frame.
+ *
+ * saveLayer operations should be pulled to the beginning of the frame if the canvas doesn't have a
+ * complex clip, and if the flags (kClip_SaveFlag & kClipToLayer_SaveFlag) are set.
+ */
+void DeferredDisplayList::addSaveLayer(OpenGLRenderer& renderer,
+        SaveLayerOp* op, int newSaveCount) {
+    DEFER_LOGD("%p adding saveLayerOp %p, flags %x, new count %d",
+            this, op, op->getFlags(), newSaveCount);
+
+    storeStateOpBarrier(renderer, op);
+    mSaveStack.push(newSaveCount);
+}
+
+/**
+ * Takes save op and it's return value - the new save count - and stores it into the stream as a
+ * barrier if it's needed to properly modify a complex clip
+ */
+void DeferredDisplayList::addSave(OpenGLRenderer& renderer, SaveOp* op, int newSaveCount) {
+    int saveFlags = op->getFlags();
+    DEFER_LOGD("%p adding saveOp %p, flags %x, new count %d", this, op, saveFlags, newSaveCount);
+
+    if (recordingComplexClip() && (saveFlags & SkCanvas::kClip_SaveFlag)) {
+        // store and replay the save operation, as it may be needed to correctly playback the clip
+        DEFER_LOGD("    adding save barrier with new save count %d", newSaveCount);
+        storeStateOpBarrier(renderer, op);
+        mSaveStack.push(newSaveCount);
+    }
+}
+
+/**
+ * saveLayer() commands must be associated with a restoreToCount batch that will clean up and draw
+ * the layer in the deferred list
+ *
+ * other save() commands which occur as children of a snapshot with complex clip will be deferred,
+ * and must be restored
+ *
+ * Either will act as a barrier to draw operation reordering, as we want to play back layer
+ * save/restore and complex canvas modifications (including save/restore) in order.
+ */
+void DeferredDisplayList::addRestoreToCount(OpenGLRenderer& renderer, int newSaveCount) {
+    DEFER_LOGD("%p addRestoreToCount %d", this, newSaveCount);
+
+    if (recordingComplexClip() && newSaveCount <= mComplexClipStackStart) {
+        mComplexClipStackStart = -1;
+        resetBatchingState();
+    }
+
+    if (mSaveStack.isEmpty() || newSaveCount > mSaveStack.top()) {
+        return;
+    }
+
+    while (!mSaveStack.isEmpty() && mSaveStack.top() >= newSaveCount) mSaveStack.pop();
+
+    storeRestoreToCountBarrier(mSaveStack.size() + 1);
+}
+
+void DeferredDisplayList::addDrawOp(OpenGLRenderer& renderer, DrawOp* op) {
+    if (renderer.storeDisplayState(op->state, getDrawOpDeferFlags())) {
+        return; // quick rejected
+    }
+
+    op->onDrawOpDeferred(renderer);
+
+    if (CC_UNLIKELY(renderer.getCaches().drawReorderDisabled)) {
+        // TODO: elegant way to reuse batches?
         DrawOpBatch* b = new DrawOpBatch();
         b->add(op);
         mBatches.add(b);
@@ -138,9 +329,41 @@
     targetBatch->add(op);
 }
 
-status_t DeferredDisplayList::flush(OpenGLRenderer& renderer, Rect& dirty, int32_t flags,
-        uint32_t level) {
-    ATRACE_CALL();
+void DeferredDisplayList::storeStateOpBarrier(OpenGLRenderer& renderer, StateOp* op) {
+    DEFER_LOGD("%p adding state op barrier at pos %d", this, mBatches.size());
+
+    renderer.storeDisplayState(op->state, getStateOpDeferFlags());
+    mBatches.add(new StateOpBatch(op));
+    resetBatchingState();
+}
+
+void DeferredDisplayList::storeRestoreToCountBarrier(int newSaveCount) {
+    DEFER_LOGD("%p adding restore to count %d barrier, pos %d",
+            this, newSaveCount, mBatches.size());
+
+    mBatches.add(new RestoreToCountBatch(newSaveCount));
+    resetBatchingState();
+}
+
+/////////////////////////////////////////////////////////////////////////////////
+// Replay / flush
+/////////////////////////////////////////////////////////////////////////////////
+
+static status_t replayBatchList(Vector<DrawOpBatch*>& batchList,
+        OpenGLRenderer& renderer, Rect& dirty) {
+    status_t status = DrawGlInfo::kStatusDone;
+
+    int opCount = 0;
+    for (unsigned int i = 0; i < batchList.size(); i++) {
+        status |= batchList[i]->replay(renderer, dirty);
+        opCount += batchList[i]->count();
+    }
+    DEFER_LOGD("--flushed, drew %d batches (total %d ops)", batchList.size(), opCount);
+    return status;
+}
+
+status_t DeferredDisplayList::flush(OpenGLRenderer& renderer, Rect& dirty) {
+    ATRACE_NAME("flush drawing commands");
     status_t status = DrawGlInfo::kStatusDone;
 
     if (isEmpty()) return status; // nothing to flush
@@ -148,29 +371,12 @@
     DEFER_LOGD("--flushing");
     renderer.eventMark("Flush");
 
-    DrawModifiers restoreDrawModifiers = renderer.getDrawModifiers();
-    int restoreTo = renderer.save(SkCanvas::kMatrix_SaveFlag | SkCanvas::kClip_SaveFlag);
-    int opCount = 0;
-    for (unsigned int i = 0; i < mBatches.size(); i++) {
-        DrawOpBatch* batch = mBatches[i];
-        for (unsigned int j = 0; j < batch->mOps.size(); j++) {
-            DrawOp* op = batch->mOps[j];
+    renderer.restoreToCount(1);
 
-            renderer.restoreDisplayState(op->state);
+    status |= replayBatchList(mBatches, renderer, dirty);
 
-#if DEBUG_DEFER
-            op->output(2);
-#endif
-            status |= op->applyDraw(renderer, dirty, level,
-                    op->state.mMultipliedAlpha >= 0, op->state.mMultipliedAlpha);
-            opCount++;
-        }
-    }
+    DEFER_LOGD("--flush complete, returning %x", status);
 
-    DEFER_LOGD("--flushed, drew %d batches (total %d ops)", mBatches.size(), opCount);
-
-    renderer.restoreToCount(restoreTo);
-    renderer.setDrawModifiers(restoreDrawModifiers);
     clear();
     return status;
 }
diff --git a/libs/hwui/DeferredDisplayList.h b/libs/hwui/DeferredDisplayList.h
index 4fcb297..8e908fa 100644
--- a/libs/hwui/DeferredDisplayList.h
+++ b/libs/hwui/DeferredDisplayList.h
@@ -26,10 +26,13 @@
 namespace android {
 namespace uirenderer {
 
+class ClipOp;
 class DrawOp;
+class SaveOp;
+class SaveLayerOp;
+class StateOp;
 class DrawOpBatch;
 class OpenGLRenderer;
-class SkiaShader;
 
 class DeferredDisplayList {
 public:
@@ -55,18 +58,42 @@
      * Plays back all of the draw ops recorded into batches to the renderer.
      * Adjusts the state of the renderer as necessary, and restores it when complete
      */
-    status_t flush(OpenGLRenderer& renderer, Rect& dirty, int32_t flags,
-            uint32_t level);
+    status_t flush(OpenGLRenderer& renderer, Rect& dirty);
+
+    void addClip(OpenGLRenderer& renderer, ClipOp* op);
+    void addSaveLayer(OpenGLRenderer& renderer, SaveLayerOp* op, int newSaveCount);
+    void addSave(OpenGLRenderer& renderer, SaveOp* op, int newSaveCount);
+    void addRestoreToCount(OpenGLRenderer& renderer, int newSaveCount);
 
     /**
      * Add a draw op into the DeferredDisplayList, reordering as needed (for performance) if
      * disallowReorder is false, respecting draw order when overlaps occur
      */
-    void add(DrawOp* op, bool disallowReorder);
+    void addDrawOp(OpenGLRenderer& renderer, DrawOp* op);
 
 private:
+    /*
+     * Resets the batching back-pointers, creating a barrier in the operation stream so that no ops
+     * added in the future will be inserted into a batch that already exist.
+     */
+    void resetBatchingState();
+
     void clear();
 
+    void storeStateOpBarrier(OpenGLRenderer& renderer, StateOp* op);
+    void storeRestoreToCountBarrier(int newSaveCount);
+
+    bool recordingComplexClip() const { return mComplexClipStackStart >= 0; }
+
+    int getStateOpDeferFlags() const;
+    int getDrawOpDeferFlags() const;
+
+    /*
+     *
+     * at defer time, stores the savecount of save/saveLayer ops that were 
+     */
+    Vector<int> mSaveStack;
+    int mComplexClipStackStart;
 
     Vector<DrawOpBatch*> mBatches;
     int mBatchIndices[kOpBatch_Count];
diff --git a/libs/hwui/DisplayList.cpp b/libs/hwui/DisplayList.cpp
index 5781f4d..4743f58 100644
--- a/libs/hwui/DisplayList.cpp
+++ b/libs/hwui/DisplayList.cpp
@@ -61,6 +61,12 @@
 
 void DisplayList::clearResources() {
     mDisplayListData = NULL;
+
+    mClipRectOp = NULL;
+    mSaveLayerOp = NULL;
+    mSaveOp = NULL;
+    mRestoreToCountOp = NULL;
+
     delete mTransformMatrix;
     delete mTransformCamera;
     delete mTransformMatrix3D;
@@ -156,6 +162,13 @@
         return;
     }
 
+    // allocate reusable ops for state-deferral
+    LinearAllocator& alloc = mDisplayListData->allocator;
+    mClipRectOp = new (alloc) ClipRectOp();
+    mSaveLayerOp = new (alloc) SaveLayerOp();
+    mSaveOp = new (alloc) SaveOp();
+    mRestoreToCountOp = new (alloc) RestoreToCountOp();
+
     mFunctorCount = recorder.getFunctorCount();
 
     Caches& caches = Caches::getInstance();
@@ -318,7 +331,7 @@
     }
 }
 
-void DisplayList::outputViewProperties(uint32_t level) {
+void DisplayList::outputViewProperties(const int level) {
     updateMatrix();
     if (mLeft != 0 || mTop != 0) {
         ALOGD("%*sTranslate (left, top) %d, %d", level * 2, "", mLeft, mTop);
@@ -358,10 +371,17 @@
     }
 }
 
-status_t DisplayList::setViewProperties(OpenGLRenderer& renderer, Rect& dirty,
-        int32_t flags, uint32_t level, DeferredDisplayList* deferredList) {
-    status_t status = DrawGlInfo::kStatusDone;
-#if DEBUG_DISPLAYLIST
+/*
+ * For property operations, we pass a savecount of 0, since the operations aren't part of the
+ * displaylist, and thus don't have to compensate for the record-time/playback-time discrepancy in
+ * base saveCount (i.e., how RestoreToCount uses saveCount + mCount)
+ */
+#define PROPERTY_SAVECOUNT 0
+
+template <class T>
+void DisplayList::setViewProperties(OpenGLRenderer& renderer, T& handler,
+        const int level) {
+#if DEBUG_DISPLAY_LIST
     outputViewProperties(level);
 #endif
     updateMatrix();
@@ -381,86 +401,121 @@
         }
     }
     if (mAlpha < 1 && !mCaching) {
-        if (deferredList) {
-            // flush since we'll either enter a Layer, or set alpha, both not supported in deferral
-            status |= deferredList->flush(renderer, dirty, flags, level);
-        }
-
         if (!mHasOverlappingRendering) {
             renderer.setAlpha(mAlpha);
         } else {
             // TODO: should be able to store the size of a DL at record time and not
             // have to pass it into this call. In fact, this information might be in the
             // location/size info that we store with the new native transform data.
-            int flags = SkCanvas::kHasAlphaLayer_SaveFlag;
+            int saveFlags = SkCanvas::kHasAlphaLayer_SaveFlag;
             if (mClipChildren) {
-                flags |= SkCanvas::kClipToLayer_SaveFlag;
+                saveFlags |= SkCanvas::kClipToLayer_SaveFlag;
             }
-            renderer.saveLayerAlpha(0, 0, mRight - mLeft, mBottom - mTop,
-                    mMultipliedAlpha, flags);
+            handler(mSaveLayerOp->reinit(0, 0, mRight - mLeft, mBottom - mTop,
+                    mMultipliedAlpha, SkXfermode::kSrcOver_Mode, saveFlags), PROPERTY_SAVECOUNT);
         }
     }
     if (mClipChildren && !mCaching) {
-        if (deferredList && CC_UNLIKELY(!renderer.hasRectToRectTransform())) {
-            // flush, since clip will likely be a region
-            status |= deferredList->flush(renderer, dirty, flags, level);
-        }
-        renderer.clipRect(0, 0, mRight - mLeft, mBottom - mTop,
-                SkRegion::kIntersect_Op);
+        handler(mClipRectOp->reinit(0, 0, mRight - mLeft, mBottom - mTop, SkRegion::kIntersect_Op),
+                PROPERTY_SAVECOUNT);
     }
-    return status;
 }
 
-status_t DisplayList::replay(OpenGLRenderer& renderer, Rect& dirty, int32_t flags, uint32_t level,
-        DeferredDisplayList* deferredList) {
-    status_t drawGlStatus = DrawGlInfo::kStatusDone;
+class DeferOperationHandler {
+public:
+    DeferOperationHandler(DeferStateStruct& deferStruct, int multipliedAlpha, int level)
+        : mDeferStruct(deferStruct), mMultipliedAlpha(multipliedAlpha), mLevel(level) {}
+    inline void operator()(DisplayListOp* operation, int saveCount) {
+        operation->defer(mDeferStruct, saveCount, mLevel, mMultipliedAlpha);
+    }
+private:
+    DeferStateStruct& mDeferStruct;
+    const int mMultipliedAlpha;
+    const int mLevel;
+};
+
+void DisplayList::defer(DeferStateStruct& deferStruct, const int level) {
+    DeferOperationHandler handler(deferStruct, mCaching ? mMultipliedAlpha : -1, level);
+    iterate<DeferOperationHandler>(deferStruct.mRenderer, handler, level);
+}
+
+class ReplayOperationHandler {
+public:
+    ReplayOperationHandler(ReplayStateStruct& replayStruct, int multipliedAlpha, int level)
+        : mReplayStruct(replayStruct), mMultipliedAlpha(multipliedAlpha), mLevel(level) {}
+    inline void operator()(DisplayListOp* operation, int saveCount) {
+#if DEBUG_DISPLAY_LIST_OPS_AS_EVENTS
+        replayStruct.mRenderer.eventMark(operation->name());
+#endif
+        operation->replay(mReplayStruct, saveCount, mLevel, mMultipliedAlpha);
+    }
+private:
+    ReplayStateStruct& mReplayStruct;
+    const int mMultipliedAlpha;
+    const int mLevel;
+};
+
+void DisplayList::replay(ReplayStateStruct& replayStruct, const int level) {
+    ReplayOperationHandler handler(replayStruct, mCaching ? mMultipliedAlpha : -1, level);
+
+    replayStruct.mRenderer.startMark(mName.string());
+    iterate<ReplayOperationHandler>(replayStruct.mRenderer, handler, level);
+    replayStruct.mRenderer.endMark();
+
+    DISPLAY_LIST_LOGD("%*sDone (%p, %s), returning %d", level * 2, "", this, mName.string(),
+            replayStruct.mDrawGlStatus);
+}
+
+/**
+ * This function serves both defer and replay modes, and will organize the displayList's component
+ * operations for a single frame:
+ *
+ * Every 'simple' operation that affects just the matrix and alpha (or other factors of
+ * DeferredDisplayState) may be issued directly to the renderer, but complex operations (with custom
+ * defer logic) and operations in displayListOps are issued through the 'handler' which handles the
+ * defer vs replay logic, per operation
+ */
+template <class T>
+void DisplayList::iterate(OpenGLRenderer& renderer, T& handler, const int level) {
+    if (mSize == 0 || mAlpha <= 0) {
+        DISPLAY_LIST_LOGD("%*sEmpty display list (%p, %s)", level * 2, "", this, mName.string());
+        return;
+    }
 
 #if DEBUG_DISPLAY_LIST
     Rect* clipRect = renderer.getClipRect();
     DISPLAY_LIST_LOGD("%*sStart display list (%p, %s), clipRect: %.0f, %.f, %.0f, %.0f",
-            (level+1)*2, "", this, mName.string(), clipRect->left, clipRect->top,
+            level * 2, "", this, mName.string(), clipRect->left, clipRect->top,
             clipRect->right, clipRect->bottom);
 #endif
 
-    renderer.startMark(mName.string());
+    int restoreTo = renderer.getSaveCount();
+    handler(mSaveOp->reinit(SkCanvas::kMatrix_SaveFlag | SkCanvas::kClip_SaveFlag),
+            PROPERTY_SAVECOUNT);
 
-    int restoreTo = renderer.save(SkCanvas::kMatrix_SaveFlag | SkCanvas::kClip_SaveFlag);
-    DISPLAY_LIST_LOGD("%*sSave %d %d", level * 2, "",
+    DISPLAY_LIST_LOGD("%*sSave %d %d", (level + 1) * 2, "",
             SkCanvas::kMatrix_SaveFlag | SkCanvas::kClip_SaveFlag, restoreTo);
 
-    drawGlStatus |= setViewProperties(renderer, dirty, flags, level, deferredList);
+    setViewProperties<T>(renderer, handler, level + 1);
 
     if (renderer.quickRejectNoScissor(0, 0, mWidth, mHeight)) {
         DISPLAY_LIST_LOGD("%*sRestoreToCount %d", level * 2, "", restoreTo);
-        renderer.restoreToCount(restoreTo);
-        renderer.endMark();
-        return drawGlStatus;
+        handler(mRestoreToCountOp->reinit(restoreTo), PROPERTY_SAVECOUNT);
+        return;
     }
 
     DisplayListLogBuffer& logBuffer = DisplayListLogBuffer::getInstance();
     int saveCount = renderer.getSaveCount() - 1;
     for (unsigned int i = 0; i < mDisplayListData->displayListOps.size(); i++) {
         DisplayListOp *op = mDisplayListData->displayListOps[i];
-#if DEBUG_DISPLAY_LIST_OPS_AS_EVENTS
-        renderer.eventMark(strlen(op->name()), op->name());
-#endif
-        drawGlStatus |= op->replay(renderer, dirty, flags,
-                saveCount, level, mCaching, mMultipliedAlpha, deferredList);
+
+        handler(op, saveCount);
         logBuffer.writeCommand(level, op->name());
     }
 
-    DISPLAY_LIST_LOGD("%*sRestoreToCount %d", level * 2, "", restoreTo);
+    DISPLAY_LIST_LOGD("%*sRestoreToCount %d", (level + 1) * 2, "", restoreTo);
+    handler(mRestoreToCountOp->reinit(restoreTo), PROPERTY_SAVECOUNT);
     renderer.restoreToCount(restoreTo);
-    renderer.endMark();
-
-    DISPLAY_LIST_LOGD("%*sDone (%p, %s), returning %d", (level + 1) * 2, "", this, mName.string(),
-            drawGlStatus);
-
-    if (!level && CC_LIKELY(deferredList)) {
-        drawGlStatus |= deferredList->flush(renderer, dirty, flags, level);
-    }
-
-    return drawGlStatus;
 }
 
 }; // namespace uirenderer
diff --git a/libs/hwui/DisplayList.h b/libs/hwui/DisplayList.h
index feee69c..5392587 100644
--- a/libs/hwui/DisplayList.h
+++ b/libs/hwui/DisplayList.h
@@ -24,6 +24,8 @@
 #include <SkCamera.h>
 #include <SkMatrix.h>
 
+#include <private/hwui/DrawGlInfo.h>
+
 #include <utils/RefBase.h>
 #include <utils/SortedVector.h>
 #include <utils/String8.h>
@@ -57,10 +59,33 @@
 class SkiaColorFilter;
 class SkiaShader;
 
+class ClipRectOp;
+class SaveLayerOp;
+class SaveOp;
+class RestoreToCountOp;
+
+struct DeferStateStruct {
+    DeferStateStruct(DeferredDisplayList& deferredList, OpenGLRenderer& renderer, int replayFlags)
+            : mDeferredList(deferredList), mRenderer(renderer), mReplayFlags(replayFlags) {}
+    DeferredDisplayList& mDeferredList;
+    OpenGLRenderer& mRenderer;
+    const int mReplayFlags;
+};
+
+struct ReplayStateStruct {
+    ReplayStateStruct(OpenGLRenderer& renderer, Rect& dirty, int replayFlags)
+            : mRenderer(renderer), mDirty(dirty), mReplayFlags(replayFlags),
+            mDrawGlStatus(DrawGlInfo::kStatusDone) {}
+    OpenGLRenderer& mRenderer;
+    Rect& mDirty;
+    const int mReplayFlags;
+    status_t mDrawGlStatus;
+};
+
 /**
  * Refcounted structure that holds data used in display list stream
  */
-class DisplayListData: public LightRefBase<DisplayListData> {
+class DisplayListData : public LightRefBase<DisplayListData> {
 public:
     LinearAllocator allocator;
     Vector<DisplayListOp*> displayListOps;
@@ -79,9 +104,6 @@
         kReplayFlag_ClipChildren = 0x1
     };
 
-    status_t setViewProperties(OpenGLRenderer& renderer, Rect& dirty,
-            int32_t flags, uint32_t level, DeferredDisplayList* deferredList);
-    void outputViewProperties(uint32_t level);
 
     ANDROID_API size_t getSize();
     ANDROID_API static void destroyDisplayListDeferred(DisplayList* displayList);
@@ -89,8 +111,9 @@
 
     void initFromDisplayListRenderer(const DisplayListRenderer& recorder, bool reusing = false);
 
-    status_t replay(OpenGLRenderer& renderer, Rect& dirty, int32_t flags, uint32_t level = 0,
-            DeferredDisplayList* deferredList = NULL);
+
+    void defer(DeferStateStruct& deferStruct, const int level);
+    void replay(ReplayStateStruct& replayStruct, const int level);
 
     void output(uint32_t level = 0);
 
@@ -426,6 +449,14 @@
     }
 
 private:
+    void outputViewProperties(const int level);
+
+    template <class T>
+    inline void setViewProperties(OpenGLRenderer& renderer, T& handler, const int level);
+
+    template <class T>
+    inline void iterate(OpenGLRenderer& renderer, T& handler, const int level);
+
     void init();
 
     void clearResources();
@@ -490,6 +521,22 @@
     SkMatrix* mStaticMatrix;
     SkMatrix* mAnimationMatrix;
     bool mCaching;
+
+    /**
+     * State operations - needed to defer displayList property operations (for example, when setting
+     * an alpha causes a SaveLayerAlpha to occur). These operations point into mDisplayListData's
+     * allocation, or null if uninitialized.
+     *
+     * These are initialized (via friend constructors) when a displayList is issued in either replay
+     * or deferred mode. If replaying, the ops are not used until the next frame. If deferring, the
+     * ops may be stored in the DeferredDisplayList to be played back a second time.
+     *
+     * They should be used at most once per frame (one call to iterate)
+     */
+    ClipRectOp* mClipRectOp;
+    SaveLayerOp* mSaveLayerOp;
+    SaveOp* mSaveOp;
+    RestoreToCountOp* mRestoreToCountOp;
 }; // class DisplayList
 
 }; // namespace uirenderer
diff --git a/libs/hwui/DisplayListOp.h b/libs/hwui/DisplayListOp.h
index 105f45f..14b476f 100644
--- a/libs/hwui/DisplayListOp.h
+++ b/libs/hwui/DisplayListOp.h
@@ -78,17 +78,26 @@
         kOpLogFlag_JSON = 0x2 // TODO: add?
     };
 
-    // If a DeferredDisplayList is supplied, DrawOps will be stored until the list is flushed
-    // NOTE: complex clips and layers prevent deferral
-    virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty, int32_t flags, int saveCount,
-            uint32_t level, bool caching, int multipliedAlpha,
-            DeferredDisplayList* deferredList) = 0;
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) = 0;
 
-    virtual void output(int level, uint32_t flags = 0) = 0;
+    virtual void replay(ReplayStateStruct& replayStruct, int saveCount,
+            int level, int multipliedAlpha) = 0;
+
+    virtual void output(int level, uint32_t logFlags = 0) = 0;
 
     // NOTE: it would be nice to declare constants and overriding the implementation in each op to
     // point at the constants, but that seems to require a .cpp file
     virtual const char* name() = 0;
+
+    /**
+     * Stores the relevant canvas state of the object between deferral and replay (if the canvas
+     * state supports being stored) See OpenGLRenderer::simpleClipAndState()
+     *
+     * TODO: don't reserve space for StateOps that won't be deferred
+     */
+    DeferredDisplayState state;
+
 };
 
 class StateOp : public DisplayListOp {
@@ -97,28 +106,22 @@
 
     virtual ~StateOp() {}
 
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        // default behavior only affects immediate, deferrable state, issue directly to renderer
+        applyState(deferStruct.mRenderer, saveCount);
+    }
+
     /**
      * State operations are applied directly to the renderer, but can cause the deferred drawing op
      * list to flush
      */
-    virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty, int32_t flags, int saveCount,
-            uint32_t level, bool caching, int multipliedAlpha, DeferredDisplayList* deferredList) {
-        status_t status = DrawGlInfo::kStatusDone;
-        if (deferredList && requiresDrawOpFlush(renderer)) {
-            // will be setting renderer state that affects ops in deferredList, so flush list first
-            status |= deferredList->flush(renderer, dirty, flags, level);
-        }
-        applyState(renderer, saveCount);
-        return status;
+    virtual void replay(ReplayStateStruct& replayStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        applyState(replayStruct.mRenderer, saveCount);
     }
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) = 0;
-
-    /**
-     * Returns true if it affects renderer drawing state in such a way to break deferral
-     * see OpenGLRenderer::disallowDeferral()
-     */
-    virtual bool requiresDrawOpFlush(OpenGLRenderer& renderer) { return false; }
 };
 
 class DrawOp : public DisplayListOp {
@@ -126,36 +129,35 @@
     DrawOp(SkPaint* paint)
             : mPaint(paint), mQuickRejected(false) {}
 
-    /** Draw operations are stored in the deferredList with information necessary for playback */
-    virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty, int32_t flags, int saveCount,
-            uint32_t level, bool caching, int multipliedAlpha, DeferredDisplayList* deferredList) {
-        if (mQuickRejected && CC_LIKELY(flags & DisplayList::kReplayFlag_ClipChildren)) {
-            return DrawGlInfo::kStatusDone;
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        if (mQuickRejected &&
+                CC_LIKELY(deferStruct.mReplayFlags & DisplayList::kReplayFlag_ClipChildren)) {
+            return;
         }
 
-        if (!deferredList || renderer.disallowDeferral()) {
-            // dispatch draw immediately, since the renderer's state is too complex for deferral
-            return applyDraw(renderer, dirty, level, caching, multipliedAlpha);
-        }
-
-        if (!caching) multipliedAlpha = -1;
         state.mMultipliedAlpha = multipliedAlpha;
         if (!getLocalBounds(state.mBounds)) {
             // empty bounds signify bounds can't be calculated
             state.mBounds.setEmpty();
         }
 
-        if (!renderer.storeDisplayState(state)) {
-            // op wasn't quick-rejected, so defer
-            deferredList->add(this, renderer.getCaches().drawReorderDisabled);
-            onDrawOpDeferred(renderer);
-        }
-
-        return DrawGlInfo::kStatusDone;
+        deferStruct.mDeferredList.addDrawOp(deferStruct.mRenderer, this);
     }
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) = 0;
+    virtual void replay(ReplayStateStruct& replayStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        if (mQuickRejected &&
+                CC_LIKELY(replayStruct.mReplayFlags & DisplayList::kReplayFlag_ClipChildren)) {
+            return;
+        }
+
+        replayStruct.mDrawGlStatus |= applyDraw(replayStruct.mRenderer, replayStruct.mDirty,
+                level, multipliedAlpha);
+    }
+
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) = 0;
 
     virtual void onDrawOpDeferred(OpenGLRenderer& renderer) {
     }
@@ -174,11 +176,6 @@
 
     float strokeWidthOutset() { return mPaint->getStrokeWidth() * 0.5f; }
 
-    /**
-     * Stores the relevant canvas state of the object between deferral and replay (if the canvas
-     * state supports being stored) See OpenGLRenderer::simpleClipAndState()
-     */
-    DeferredDisplayState state;
 protected:
     SkPaint* getPaint(OpenGLRenderer& renderer, bool alwaysCopy = false) {
         return renderer.filterPaint(mPaint, alwaysCopy);
@@ -225,88 +222,113 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 class SaveOp : public StateOp {
+    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
     SaveOp(int flags)
             : mFlags(flags) {}
 
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        int newSaveCount = deferStruct.mRenderer.save(mFlags);
+        deferStruct.mDeferredList.addSave(deferStruct.mRenderer, this, newSaveCount);
+    }
+
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
         renderer.save(mFlags);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Save flags %x", mFlags);
     }
 
     virtual const char* name() { return "Save"; }
 
+    int getFlags() const { return mFlags; }
 private:
+    SaveOp() {}
+    DisplayListOp* reinit(int flags) {
+        mFlags = flags;
+        return this;
+    }
+
     int mFlags;
 };
 
 class RestoreToCountOp : public StateOp {
+    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
     RestoreToCountOp(int count)
             : mCount(count) {}
 
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
-        renderer.restoreToCount(saveCount + mCount);
-
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        deferStruct.mDeferredList.addRestoreToCount(deferStruct.mRenderer, saveCount + mCount);
+        deferStruct.mRenderer.restoreToCount(saveCount + mCount);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
+        renderer.restoreToCount(saveCount + mCount);
+    }
+
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Restore to count %d", mCount);
     }
 
     virtual const char* name() { return "RestoreToCount"; }
-    // Note: don't have to return true for requiresDrawOpFlush - even though restore can create a
-    // complex clip, the clip and matrix are overridden by DeferredDisplayList::flush()
 
 private:
+    RestoreToCountOp() {}
+    DisplayListOp* reinit(int count) {
+        mCount = count;
+        return this;
+    }
+
     int mCount;
 };
 
 class SaveLayerOp : public StateOp {
+    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
-    SaveLayerOp(float left, float top, float right, float bottom, SkPaint* paint, int flags)
-            : mArea(left, top, right, bottom), mPaint(paint), mFlags(flags) {}
+    SaveLayerOp(float left, float top, float right, float bottom,
+            int alpha, SkXfermode::Mode mode, int flags)
+            : mArea(left, top, right, bottom), mAlpha(alpha), mMode(mode), mFlags(flags) {}
+
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        // NOTE: don't bother with actual saveLayer, instead issuing it at flush time
+        int newSaveCount = deferStruct.mRenderer.save(mFlags);
+        deferStruct.mDeferredList.addSaveLayer(deferStruct.mRenderer, this, newSaveCount);
+    }
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
-        SkPaint* paint = renderer.filterPaint(mPaint);
-        renderer.saveLayer(mArea.left, mArea.top, mArea.right, mArea.bottom, paint, mFlags);
+        renderer.saveLayer(mArea.left, mArea.top, mArea.right, mArea.bottom, mAlpha, mMode, mFlags);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
-        OP_LOG("SaveLayer of area " RECT_STRING, RECT_ARGS(mArea));
+    virtual void output(int level, uint32_t logFlags) {
+        OP_LOG("SaveLayer%s of area " RECT_STRING,
+                (isSaveLayerAlpha() ? "Alpha" : ""),RECT_ARGS(mArea));
     }
 
-    virtual const char* name() { return "SaveLayer"; }
-    virtual bool requiresDrawOpFlush(OpenGLRenderer& renderer) { return true; }
+    virtual const char* name() { return isSaveLayerAlpha() ? "SaveLayerAlpha" : "SaveLayer"; }
+
+    int getFlags() { return mFlags; }
 
 private:
-    Rect mArea;
-    SkPaint* mPaint;
-    int mFlags;
-};
-
-class SaveLayerAlphaOp : public StateOp {
-public:
-    SaveLayerAlphaOp(float left, float top, float right, float bottom, int alpha, int flags)
-            : mArea(left, top, right, bottom), mAlpha(alpha), mFlags(flags) {}
-
-    virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
-        renderer.saveLayerAlpha(mArea.left, mArea.top, mArea.right, mArea.bottom, mAlpha, mFlags);
+    // Special case, reserved for direct DisplayList usage
+    SaveLayerOp() {}
+    DisplayListOp* reinit(float left, float top, float right, float bottom,
+            int alpha, SkXfermode::Mode mode, int flags) {
+        mArea.set(left, top, right, bottom);
+        mAlpha = alpha;
+        mMode = mode;
+        mFlags = flags;
+        return this;
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
-        OP_LOG("SaveLayerAlpha of area " RECT_STRING, RECT_ARGS(mArea));
-    }
-
-    virtual const char* name() { return "SaveLayerAlpha"; }
-    virtual bool requiresDrawOpFlush(OpenGLRenderer& renderer) { return true; }
-
-private:
+    bool isSaveLayerAlpha() { return mAlpha < 255 && mMode == SkXfermode::kSrcOver_Mode; }
     Rect mArea;
     int mAlpha;
+    SkXfermode::Mode mMode;
     int mFlags;
 };
 
@@ -319,7 +341,7 @@
         renderer.translate(mDx, mDy);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Translate by %f %f", mDx, mDy);
     }
 
@@ -339,7 +361,7 @@
         renderer.rotate(mDegrees);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Rotate by %f degrees", mDegrees);
     }
 
@@ -358,7 +380,7 @@
         renderer.scale(mSx, mSy);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Scale by %f %f", mSx, mSy);
     }
 
@@ -378,7 +400,7 @@
         renderer.skew(mSx, mSy);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Skew by %f %f", mSx, mSy);
     }
 
@@ -398,7 +420,7 @@
         renderer.setMatrix(mMatrix);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("SetMatrix " MATRIX_STRING, MATRIX_ARGS(mMatrix));
     }
 
@@ -417,7 +439,7 @@
         renderer.concatMatrix(mMatrix);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("ConcatMatrix " MATRIX_STRING, MATRIX_ARGS(mMatrix));
     }
 
@@ -427,75 +449,97 @@
     SkMatrix* mMatrix;
 };
 
-class ClipRectOp : public StateOp {
+class ClipOp : public StateOp {
+public:
+    ClipOp(SkRegion::Op op) : mOp(op) {}
+
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        // NOTE: must defer op BEFORE applying state, since it may read clip
+        deferStruct.mDeferredList.addClip(deferStruct.mRenderer, this);
+
+        // TODO: Can we avoid applying complex clips at defer time?
+        applyState(deferStruct.mRenderer, saveCount);
+    }
+
+    bool canCauseComplexClip() {
+        return ((mOp != SkRegion::kIntersect_Op) && (mOp != SkRegion::kReplace_Op)) || !isRect();
+    }
+
+protected:
+    ClipOp() {}
+    virtual bool isRect() { return false; }
+
+    SkRegion::Op mOp;
+};
+
+class ClipRectOp : public ClipOp {
+    friend class DisplayList; // give DisplayList private constructor/reinit access
 public:
     ClipRectOp(float left, float top, float right, float bottom, SkRegion::Op op)
-            : mArea(left, top, right, bottom), mOp(op) {}
+            : ClipOp(op), mArea(left, top, right, bottom) {}
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
         renderer.clipRect(mArea.left, mArea.top, mArea.right, mArea.bottom, mOp);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("ClipRect " RECT_STRING, RECT_ARGS(mArea));
     }
 
     virtual const char* name() { return "ClipRect"; }
 
-    virtual bool requiresDrawOpFlush(OpenGLRenderer& renderer) {
-        // TODO: currently, we flush when we *might* cause a clip region to exist. Ideally, we
-        // should only flush when a non-rectangular clip would result
-        return !renderer.hasRectToRectTransform() || !hasRectToRectOp();
-    }
+protected:
+    virtual bool isRect() { return true; }
 
 private:
-    inline bool hasRectToRectOp() {
-        return mOp == SkRegion::kIntersect_Op || mOp == SkRegion::kReplace_Op;
+    ClipRectOp() {}
+    DisplayListOp* reinit(float left, float top, float right, float bottom, SkRegion::Op op) {
+        mOp = op;
+        mArea.set(left, top, right, bottom);
+        return this;
     }
+
     Rect mArea;
-    SkRegion::Op mOp;
 };
 
-class ClipPathOp : public StateOp {
+class ClipPathOp : public ClipOp {
 public:
     ClipPathOp(SkPath* path, SkRegion::Op op)
-            : mPath(path), mOp(op) {}
+            : ClipOp(op), mPath(path) {}
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
         renderer.clipPath(mPath, mOp);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         SkRect bounds = mPath->getBounds();
         OP_LOG("ClipPath bounds " RECT_STRING,
                 bounds.left(), bounds.top(), bounds.right(), bounds.bottom());
     }
 
     virtual const char* name() { return "ClipPath"; }
-    virtual bool requiresDrawOpFlush(OpenGLRenderer& renderer) { return true; }
 
 private:
     SkPath* mPath;
-    SkRegion::Op mOp;
 };
 
-class ClipRegionOp : public StateOp {
+class ClipRegionOp : public ClipOp {
 public:
     ClipRegionOp(SkRegion* region, SkRegion::Op op)
-            : mRegion(region), mOp(op) {}
+            : ClipOp(op), mRegion(region) {}
 
     virtual void applyState(OpenGLRenderer& renderer, int saveCount) {
         renderer.clipRegion(mRegion, mOp);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         SkIRect bounds = mRegion->getBounds();
         OP_LOG("ClipRegion bounds %d %d %d %d",
                 bounds.left(), bounds.top(), bounds.right(), bounds.bottom());
     }
 
     virtual const char* name() { return "ClipRegion"; }
-    virtual bool requiresDrawOpFlush(OpenGLRenderer& renderer) { return true; }
 
 private:
     SkRegion* mRegion;
@@ -508,7 +552,7 @@
         renderer.resetShader();
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOGS("ResetShader");
     }
 
@@ -523,7 +567,7 @@
         renderer.setupShader(mShader);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("SetupShader, shader %p", mShader);
     }
 
@@ -539,7 +583,7 @@
         renderer.resetColorFilter();
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOGS("ResetColorFilter");
     }
 
@@ -555,7 +599,7 @@
         renderer.setupColorFilter(mColorFilter);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("SetupColorFilter, filter %p", mColorFilter);
     }
 
@@ -571,7 +615,7 @@
         renderer.resetShadow();
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOGS("ResetShadow");
     }
 
@@ -587,7 +631,7 @@
         renderer.setupShadow(mRadius, mDx, mDy, mColor);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("SetupShadow, radius %f, %f, %f, color %#x", mRadius, mDx, mDy, mColor);
     }
 
@@ -606,7 +650,7 @@
         renderer.resetPaintFilter();
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOGS("ResetPaintFilter");
     }
 
@@ -622,7 +666,7 @@
         renderer.setupPaintFilter(mClearBits, mSetBits);
     }
 
-    virtual void output(int level, uint32_t flags = 0) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("SetupPaintFilter, clear %#x, set %#x", mClearBits, mSetBits);
     }
 
@@ -645,9 +689,9 @@
                     paint),
             mBitmap(bitmap) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
-        bool makeCopy = caching && multipliedAlpha < 255;
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
+        bool makeCopy = multipliedAlpha >= 0 && multipliedAlpha < 255;
         SkPaint* paint = getPaint(renderer, makeCopy);
         if (makeCopy) {
             // The paint is safe to modify since we're working on a copy
@@ -657,7 +701,7 @@
         return ret;
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw bitmap %p at %f %f", mBitmap, mLocalBounds.left, mLocalBounds.top);
     }
 
@@ -679,12 +723,12 @@
         transform.mapRect(mLocalBounds);
     }
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawBitmap(mBitmap, mMatrix, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw bitmap %p matrix " MATRIX_STRING, mBitmap, MATRIX_ARGS(mMatrix));
     }
 
@@ -705,14 +749,14 @@
             : DrawBoundedOp(dstLeft, dstTop, dstRight, dstBottom, paint),
             mBitmap(bitmap), mSrc(srcLeft, srcTop, srcRight, srcBottom) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawBitmap(mBitmap, mSrc.left, mSrc.top, mSrc.right, mSrc.bottom,
                 mLocalBounds.left, mLocalBounds.top, mLocalBounds.right, mLocalBounds.bottom,
                 getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw bitmap %p src="RECT_STRING", dst="RECT_STRING,
                 mBitmap, RECT_ARGS(mSrc), RECT_ARGS(mLocalBounds));
     }
@@ -732,13 +776,13 @@
     DrawBitmapDataOp(SkBitmap* bitmap, float left, float top, SkPaint* paint)
             : DrawBitmapOp(bitmap, left, top, paint) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawBitmapData(mBitmap, mLocalBounds.left,
                 mLocalBounds.top, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw bitmap %p", mBitmap);
     }
 
@@ -756,13 +800,13 @@
             mBitmap(bitmap), mMeshWidth(meshWidth), mMeshHeight(meshHeight),
             mVertices(vertices), mColors(colors) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawBitmapMesh(mBitmap, mMeshWidth, mMeshHeight,
                 mVertices, mColors, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw bitmap %p mesh %d x %d", mBitmap, mMeshWidth, mMeshHeight);
     }
 
@@ -790,8 +834,8 @@
             mColors(colors), mxDivsCount(width), myDivsCount(height),
             mNumColors(numColors), mAlpha(alpha), mMode(mode) {};
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         // NOTE: not calling the virtual method, which takes a paint
         return renderer.drawPatch(mBitmap, mxDivs, myDivs, mColors,
                 mxDivsCount, myDivsCount, mNumColors,
@@ -799,7 +843,7 @@
                 mLocalBounds.right, mLocalBounds.bottom, mAlpha, mMode);
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw patch "RECT_STRING, RECT_ARGS(mLocalBounds));
     }
 
@@ -825,12 +869,12 @@
     DrawColorOp(int color, SkXfermode::Mode mode)
             : DrawOp(0), mColor(color), mMode(mode) {};
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawColor(mColor, mMode);
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw color %#x, mode %d", mColor, mMode);
     }
 
@@ -869,13 +913,13 @@
     DrawRectOp(float left, float top, float right, float bottom, SkPaint* paint)
             : DrawStrokableOp(left, top, right, bottom, paint) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawRect(mLocalBounds.left, mLocalBounds.top,
                 mLocalBounds.right, mLocalBounds.bottom, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Rect "RECT_STRING, RECT_ARGS(mLocalBounds));
     }
 
@@ -888,12 +932,12 @@
             : DrawBoundedOp(rects, count, paint),
             mRects(rects), mCount(count) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawRects(mRects, mCount, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Rects count %d", mCount);
     }
 
@@ -914,13 +958,13 @@
             float rx, float ry, SkPaint* paint)
             : DrawStrokableOp(left, top, right, bottom, paint), mRx(rx), mRy(ry) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawRoundRect(mLocalBounds.left, mLocalBounds.top,
                 mLocalBounds.right, mLocalBounds.bottom, mRx, mRy, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw RoundRect "RECT_STRING", rx %f, ry %f", RECT_ARGS(mLocalBounds), mRx, mRy);
     }
 
@@ -937,12 +981,12 @@
             : DrawStrokableOp(x - radius, y - radius, x + radius, y + radius, paint),
             mX(x), mY(y), mRadius(radius) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawCircle(mX, mY, mRadius, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Circle x %f, y %f, r %f", mX, mY, mRadius);
     }
 
@@ -959,13 +1003,13 @@
     DrawOvalOp(float left, float top, float right, float bottom, SkPaint* paint)
             : DrawStrokableOp(left, top, right, bottom, paint) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawOval(mLocalBounds.left, mLocalBounds.top,
                 mLocalBounds.right, mLocalBounds.bottom, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Oval "RECT_STRING, RECT_ARGS(mLocalBounds));
     }
 
@@ -979,14 +1023,14 @@
             : DrawStrokableOp(left, top, right, bottom, paint),
             mStartAngle(startAngle), mSweepAngle(sweepAngle), mUseCenter(useCenter) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawArc(mLocalBounds.left, mLocalBounds.top,
                 mLocalBounds.right, mLocalBounds.bottom,
                 mStartAngle, mSweepAngle, mUseCenter, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Arc "RECT_STRING", start %f, sweep %f, useCenter %d",
                 RECT_ARGS(mLocalBounds), mStartAngle, mSweepAngle, mUseCenter);
     }
@@ -1011,8 +1055,8 @@
         mLocalBounds.set(left, top, left + width, top + height);
     }
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawPath(mPath, getPaint(renderer));
     }
 
@@ -1021,7 +1065,7 @@
         renderer.getCaches().pathCache.precache(mPath, paint);
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Path %p in "RECT_STRING, mPath, RECT_ARGS(mLocalBounds));
     }
 
@@ -1042,12 +1086,12 @@
         mLocalBounds.outset(strokeWidthOutset());
     }
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawLines(mPoints, mCount, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Lines count %d", mCount);
     }
 
@@ -1069,12 +1113,12 @@
     DrawPointsOp(float* points, int count, SkPaint* paint)
             : DrawLinesOp(points, count, paint) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawPoints(mPoints, mCount, getPaint(renderer));
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Points count %d", mCount);
     }
 
@@ -1086,7 +1130,7 @@
     DrawSomeTextOp(const char* text, int bytesCount, int count, SkPaint* paint)
             : DrawOp(paint), mText(text), mBytesCount(bytesCount), mCount(count) {};
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw some text, %d bytes", mBytesCount);
     }
 
@@ -1116,8 +1160,8 @@
         /* TODO: inherit from DrawBounded and init mLocalBounds */
     }
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawTextOnPath(mText, mBytesCount, mCount, mPath,
                 mHOffset, mVOffset, getPaint(renderer));
     }
@@ -1138,8 +1182,8 @@
         /* TODO: inherit from DrawBounded and init mLocalBounds */
     }
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawPosText(mText, mBytesCount, mCount, mPositions, getPaint(renderer));
     }
 
@@ -1184,13 +1228,13 @@
         fontRenderer.precache(paint, mText, mCount, transform);
     }
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         return renderer.drawText(mText, mBytesCount, mCount, mX, mY,
                 mPositions, getPaint(renderer), mLength);
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Text of count %d, bytes %d", mCount, mBytesCount);
     }
 
@@ -1221,15 +1265,15 @@
     DrawFunctorOp(Functor* functor)
             : DrawOp(0), mFunctor(functor) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         renderer.startMark("GL functor");
         status_t ret = renderer.callDrawGLFunction(mFunctor, dirty);
         renderer.endMark();
         return ret;
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Functor %p", mFunctor);
     }
 
@@ -1245,21 +1289,26 @@
             : DrawBoundedOp(0, 0, displayList->getWidth(), displayList->getHeight(), 0),
             mDisplayList(displayList), mFlags(flags) {}
 
-    virtual status_t replay(OpenGLRenderer& renderer, Rect& dirty, int32_t flags, int saveCount,
-            uint32_t level, bool caching, int multipliedAlpha, DeferredDisplayList* deferredList) {
+    virtual void defer(DeferStateStruct& deferStruct, int saveCount,
+            int level, int multipliedAlpha) {
         if (mDisplayList && mDisplayList->isRenderable()) {
-            return mDisplayList->replay(renderer, dirty, mFlags, level + 1, deferredList);
+            mDisplayList->defer(deferStruct, level + 1);
         }
-        return DrawGlInfo::kStatusDone;
     }
 
-    // NOT USED, since replay is overridden
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) { return DrawGlInfo::kStatusDone; }
+    virtual void replay(ReplayStateStruct& replayStruct, int saveCount,
+            int level, int multipliedAlpha) {
+        if (mDisplayList && mDisplayList->isRenderable()) {
+            mDisplayList->replay(replayStruct, level + 1);
+        }
+    }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) { return DrawGlInfo::kStatusDone; }
+
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Display List %p, flags %#x", mDisplayList, mFlags);
-        if (mDisplayList && (flags & kOpLogFlag_Recurse)) {
+        if (mDisplayList && (logFlags & kOpLogFlag_Recurse)) {
             mDisplayList->output(level + 1);
         }
     }
@@ -1276,11 +1325,11 @@
     DrawLayerOp(Layer* layer, float x, float y, SkPaint* paint)
             : DrawOp(paint), mLayer(layer), mX(x), mY(y) {}
 
-    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, uint32_t level,
-            bool caching, int multipliedAlpha) {
+    virtual status_t applyDraw(OpenGLRenderer& renderer, Rect& dirty, int level,
+            int multipliedAlpha) {
         int oldAlpha = -1;
 
-        if (caching && multipliedAlpha < 255) {
+        if (multipliedAlpha >= 0 && multipliedAlpha < 255) {
             oldAlpha = mLayer->getAlpha();
             mLayer->setAlpha(multipliedAlpha);
         }
@@ -1291,7 +1340,7 @@
         return ret;
     }
 
-    virtual void output(int level, uint32_t flags) {
+    virtual void output(int level, uint32_t logFlags) {
         OP_LOG("Draw Layer %p at %f %f", mLayer, mX, mY);
     }
 
diff --git a/libs/hwui/DisplayListRenderer.cpp b/libs/hwui/DisplayListRenderer.cpp
index b011443..11a655e 100644
--- a/libs/hwui/DisplayListRenderer.cpp
+++ b/libs/hwui/DisplayListRenderer.cpp
@@ -175,14 +175,8 @@
 }
 
 int DisplayListRenderer::saveLayer(float left, float top, float right, float bottom,
-        SkPaint* p, int flags) {
-    addStateOp(new (alloc()) SaveLayerOp(left, top, right, bottom, p, flags));
-    return OpenGLRenderer::save(flags);
-}
-
-int DisplayListRenderer::saveLayerAlpha(float left, float top, float right, float bottom,
-        int alpha, int flags) {
-    addStateOp(new (alloc()) SaveLayerAlphaOp(left, top, right, bottom, alpha, flags));
+        int alpha, SkXfermode::Mode mode, int flags) {
+    addStateOp(new (alloc()) SaveLayerOp(left, top, right, bottom, alpha, mode, flags));
     return OpenGLRenderer::save(flags);
 }
 
diff --git a/libs/hwui/DisplayListRenderer.h b/libs/hwui/DisplayListRenderer.h
index 38619bf..73b9b66 100644
--- a/libs/hwui/DisplayListRenderer.h
+++ b/libs/hwui/DisplayListRenderer.h
@@ -79,9 +79,7 @@
     virtual void restoreToCount(int saveCount);
 
     virtual int saveLayer(float left, float top, float right, float bottom,
-            SkPaint* p, int flags);
-    virtual int saveLayerAlpha(float left, float top, float right, float bottom,
-                int alpha, int flags);
+            int alpha, SkXfermode::Mode mode, int flags);
 
     virtual void translate(float dx, float dy);
     virtual void rotate(float degrees);
diff --git a/libs/hwui/OpenGLRenderer.cpp b/libs/hwui/OpenGLRenderer.cpp
index 7fe0a69..428980e 100644
--- a/libs/hwui/OpenGLRenderer.cpp
+++ b/libs/hwui/OpenGLRenderer.cpp
@@ -635,38 +635,17 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 int OpenGLRenderer::saveLayer(float left, float top, float right, float bottom,
-        SkPaint* p, int flags) {
+        int alpha, SkXfermode::Mode mode, int flags) {
     const GLuint previousFbo = mSnapshot->fbo;
     const int count = saveSnapshot(flags);
 
     if (!mSnapshot->isIgnored()) {
-        int alpha = 255;
-        SkXfermode::Mode mode;
-
-        if (p) {
-            alpha = p->getAlpha();
-            mode = getXfermode(p->getXfermode());
-        } else {
-            mode = SkXfermode::kSrcOver_Mode;
-        }
-
         createLayer(left, top, right, bottom, alpha, mode, flags, previousFbo);
     }
 
     return count;
 }
 
-int OpenGLRenderer::saveLayerAlpha(float left, float top, float right, float bottom,
-        int alpha, int flags) {
-    if (alpha >= 255) {
-        return saveLayer(left, top, right, bottom, NULL, flags);
-    } else {
-        SkPaint paint;
-        paint.setAlpha(alpha);
-        return saveLayer(left, top, right, bottom, &paint, flags);
-    }
-}
-
 /**
  * Layers are viewed by Skia are slightly different than layers in image editing
  * programs (for instance.) When a layer is created, previously created layers
@@ -1225,36 +1204,48 @@
 // State Deferral
 ///////////////////////////////////////////////////////////////////////////////
 
-bool OpenGLRenderer::storeDisplayState(DeferredDisplayState& state) {
+bool OpenGLRenderer::storeDisplayState(DeferredDisplayState& state, int stateDeferFlags) {
     const Rect& currentClip = *(mSnapshot->clipRect);
     const mat4& currentMatrix = *(mSnapshot->transform);
 
-    // state only has bounds initialized in local coordinates
-    if (!state.mBounds.isEmpty()) {
-        currentMatrix.mapRect(state.mBounds);
-        if (!state.mBounds.intersect(currentClip)) {
-            // quick rejected
-            return true;
+    if (stateDeferFlags & kStateDeferFlag_Draw) {
+        // state has bounds initialized in local coordinates
+        if (!state.mBounds.isEmpty()) {
+            currentMatrix.mapRect(state.mBounds);
+            if (!state.mBounds.intersect(currentClip)) {
+                // quick rejected
+                return true;
+            }
+        } else {
+            state.mBounds.set(currentClip);
         }
-    } else {
-        state.mBounds.set(currentClip);
+        state.mDrawModifiers = mDrawModifiers;
+        state.mAlpha = mSnapshot->alpha;
     }
 
-    state.mClip.set(currentClip);
+    if (stateDeferFlags & kStateDeferFlag_Clip) {
+        state.mClip.set(currentClip);
+    } else {
+        state.mClip.setEmpty();
+    }
+
+    // transform always deferred
     state.mMatrix.load(currentMatrix);
-    state.mDrawModifiers = mDrawModifiers;
     return false;
 }
 
-void OpenGLRenderer::restoreDisplayState(const DeferredDisplayState& state) {
+void OpenGLRenderer::restoreDisplayState(const DeferredDisplayState& state, int stateDeferFlags) {
     currentTransform().load(state.mMatrix);
 
-    // NOTE: a clip RECT will be saved and restored, but DeferredDisplayState doesn't support
-    // complex clips. In the future, we should add support for deferral of operations clipped by
-    // these. for now, we don't defer with complex clips (see OpenGLRenderer::disallowDeferral())
-    mSnapshot->setClip(state.mClip.left, state.mClip.top, state.mClip.right, state.mClip.bottom);
-    dirtyClip();
-    mDrawModifiers = state.mDrawModifiers;
+    if (stateDeferFlags & kStateDeferFlag_Draw) {
+        mDrawModifiers = state.mDrawModifiers;
+        mSnapshot->alpha = state.mAlpha;
+    }
+
+    if (!state.mClip.isEmpty()) { //stateDeferFlags & kStateDeferFlag_Clip) {
+        mSnapshot->setClip(state.mClip.left, state.mClip.top, state.mClip.right, state.mClip.bottom);
+        dirtyClip();
+    }
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -1805,16 +1796,21 @@
 // Drawing
 ///////////////////////////////////////////////////////////////////////////////
 
-status_t OpenGLRenderer::drawDisplayList(DisplayList* displayList, Rect& dirty, int32_t flags) {
+status_t OpenGLRenderer::drawDisplayList(DisplayList* displayList, Rect& dirty,
+        int32_t replayFlags) {
     // All the usual checks and setup operations (quickReject, setupDraw, etc.)
     // will be performed by the display list itself
     if (displayList && displayList->isRenderable()) {
         if (CC_UNLIKELY(mCaches.drawDeferDisabled)) {
-            return displayList->replay(*this, dirty, flags, 0);
+            ReplayStateStruct replayStruct(*this, dirty, replayFlags);
+            displayList->replay(replayStruct, 0);
+            return replayStruct.mDrawGlStatus;
         }
 
         DeferredDisplayList deferredList;
-        return displayList->replay(*this, dirty, flags, 0, &deferredList);
+        DeferStateStruct deferStruct(deferredList, *this, replayFlags);
+        displayList->defer(deferStruct, 0);
+        return deferredList.flush(*this, dirty);
     }
 
     return DrawGlInfo::kStatusDone;
diff --git a/libs/hwui/OpenGLRenderer.h b/libs/hwui/OpenGLRenderer.h
index e961af2..e6c636c 100644
--- a/libs/hwui/OpenGLRenderer.h
+++ b/libs/hwui/OpenGLRenderer.h
@@ -65,6 +65,11 @@
     int mPaintFilterSetBits;
 };
 
+enum StateDeferFlags {
+    kStateDeferFlag_Draw = 0x1,
+    kStateDeferFlag_Clip = 0x2
+};
+
 struct DeferredDisplayState {
     Rect mBounds; // local bounds, mapped with matrix to be in screen space coordinates, clipped.
     int mMultipliedAlpha; // -1 if invalid (because caching not set)
@@ -72,8 +77,8 @@
     // the below are set and used by the OpenGLRenderer at record and deferred playback
     Rect mClip;
     mat4 mMatrix;
-    SkiaShader* mShader;
     DrawModifiers mDrawModifiers;
+    float mAlpha;
 };
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -188,10 +193,18 @@
     virtual void restore();
     virtual void restoreToCount(int saveCount);
 
+    ANDROID_API int saveLayer(float left, float top, float right, float bottom,
+            SkPaint* paint, int flags) {
+        SkXfermode::Mode mode = SkXfermode::kSrcOver_Mode;
+        if (paint) mode = getXfermode(paint->getXfermode());
+        return saveLayer(left, top, right, bottom, paint ? paint->getAlpha() : 255, mode, flags);
+    }
+    ANDROID_API int saveLayerAlpha(float left, float top, float right, float bottom,
+            int alpha, int flags) {
+        return saveLayer(left, top, right, bottom, alpha, SkXfermode::kSrcOver_Mode, flags);
+    }
     virtual int saveLayer(float left, float top, float right, float bottom,
-            SkPaint* p, int flags);
-    virtual int saveLayerAlpha(float left, float top, float right, float bottom,
-            int alpha, int flags);
+            int alpha, SkXfermode::Mode mode, int flags);
 
     virtual void translate(float dx, float dy);
     virtual void rotate(float degrees);
@@ -211,7 +224,7 @@
     virtual bool clipRegion(SkRegion* region, SkRegion::Op op);
     virtual Rect* getClipRect();
 
-    virtual status_t drawDisplayList(DisplayList* displayList, Rect& dirty, int32_t flags);
+    virtual status_t drawDisplayList(DisplayList* displayList, Rect& dirty, int32_t replayFlags);
     virtual void outputDisplayList(DisplayList* displayList);
     virtual status_t drawLayer(Layer* layer, float x, float y, SkPaint* paint);
     virtual status_t drawBitmap(SkBitmap* bitmap, float left, float top, SkPaint* paint);
@@ -261,21 +274,10 @@
 
     SkPaint* filterPaint(SkPaint* paint, bool alwaysCopy = false);
 
-    bool disallowDeferral() {
-        // returns true if the OpenGLRenderer's state can be completely represented by
-        // a DeferredDisplayState object
-        return !mSnapshot->clipRegion->isEmpty() ||
-                mSnapshot->alpha < 1.0 ||
-                (mSnapshot->flags & Snapshot::kFlagIsLayer) ||
-                (mSnapshot->flags & Snapshot::kFlagFboTarget); // ensure we're not in a layer
-    }
+    bool storeDisplayState(DeferredDisplayState& state, int stateDeferFlags);
+    void restoreDisplayState(const DeferredDisplayState& state, int stateDeferFlags);
 
-    bool storeDisplayState(DeferredDisplayState& state);
-    void restoreDisplayState(const DeferredDisplayState& state);
-
-    const DrawModifiers& getDrawModifiers() { return mDrawModifiers; }
-    void setDrawModifiers(const DrawModifiers& drawModifiers) { mDrawModifiers = drawModifiers; }
-
+    // TODO: what does this mean? no perspective? no rotate?
     ANDROID_API bool isCurrentTransformSimple() {
         return mSnapshot->transform->isSimple();
     }
@@ -284,6 +286,11 @@
         return mCaches;
     }
 
+    // simple rect clip
+    bool isCurrentClipSimple() {
+        return mSnapshot->clipRegion->isEmpty();
+    }
+
     /**
      * Sets the alpha on the current snapshot. This alpha value will be modulated
      * with other alpha values when drawing primitives.