Refactor SkRecord opts, converting playback optimizations where possible.

This adds back two optimizations from SkPicture: drawPosText strength reduction to drawPosTextH, and pointless save-foo-restore blocks are noop'd away.

The small-T optimization in SkRecord gets in the way of implementing replace(), so I removed it.

Just to keep the API focused, I removed the methods on SkRecord that iterate over i for you; it's just as efficient to do it yourself, and all of the interesting code does its own custom iteration.

BUG=skia:2378
R=fmalita@chromium.org, mtklein@google.com

Author: mtklein@chromium.org

Review URL: https://codereview.chromium.org/245853002

git-svn-id: http://skia.googlecode.com/svn/trunk@14300 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/record/SkRecord.h b/src/record/SkRecord.h
index 4c2f5b5..ddf1b3d 100644
--- a/src/record/SkRecord.h
+++ b/src/record/SkRecord.h
@@ -31,59 +31,43 @@
 
     ~SkRecord() {
         Destroyer destroyer;
-        this->mutate(destroyer);
+        for (unsigned i = 0; i < this->count(); i++) {
+            this->mutate(i, destroyer);
+        }
     }
 
+    // Returns the number of canvas commands in this SkRecord.
     unsigned count() const { return fCount; }
 
-    // Accepts a visitor functor with this interface:
+    // Visit the i-th canvas command with a functor matching this interface:
     //   template <typename T>
     //   void operator()(const T& record) { ... }
-    // This operator() must be defined for at least all SkRecords::*; your compiler will help you
-    // get this right.
+    // This operator() must be defined for at least all SkRecords::*.
     template <typename F>
     void visit(unsigned i, F& f) const {
         SkASSERT(i < this->count());
         fRecords[i].visit(fTypes[i], f);
     }
 
-    // As above.  f will be called on each recorded canvas call in the order they were append()ed.
-    template <typename F>
-    void visit(F& f) const {
-        for (unsigned i = 0; i < fCount; i++) {
-            this->visit(i, f);
-        }
-    }
-
-    // Accepts a visitor functor with this interface:
+    // Mutate the i-th canvas command with a functor matching this interface:
     //   template <typename T>
     //   void operator()(T* record) { ... }
-    // This operator() must be defined for at least all SkRecords::*; again, your compiler will help
-    // you get this right.
+    // This operator() must be defined for at least all SkRecords::*.
     template <typename F>
     void mutate(unsigned i, F& f) {
         SkASSERT(i < this->count());
         fRecords[i].mutate(fTypes[i], f);
     }
 
-    // As above.  f will be called on each recorded canvas call in the order they were append()ed.
-    template <typename F>
-    void mutate(F& f) {
-        for (unsigned i = 0; i < fCount; i++) {
-            this->mutate(i, f);
-        }
-    }
-
-    // Allocate contiguous space for count Ts, to be destroyed (not just freed) when the SkRecord is
-    // destroyed.  For classes with constructors, placement new into this array.  Throws on failure.
-    // Here T can really be any class, not just those from SkRecords.
+    // Allocate contiguous space for count Ts, to be freed when the SkRecord is destroyed.
+    // Here T can be any class, not just those from SkRecords.  Throws on failure.
     template <typename T>
     T* alloc(unsigned count = 1) {
         return (T*)fAlloc.allocThrow(sizeof(T) * count);
     }
 
-    // Allocate space to record a canvas call of type T at the end of this SkRecord.  You are
-    // expected to placement new an object of type T onto this pointer.
+    // Add a new command of type T to the end of this SkRecord.
+    // You are expected to placement new an object of type T onto this pointer.
     template <typename T>
     T* append() {
         if (fCount == fReserved) {
@@ -93,9 +77,26 @@
         }
 
         fTypes[fCount] = T::kType;
-        return fRecords[fCount++].alloc<T>(this);
+        return fRecords[fCount++].set(this->alloc<T>());
     }
 
+    // Replace the i-th command with a new command of type T.
+    // You are expected to placement new an object of type T onto this pointer.
+    // References to the old command remain valid for the life of the SkRecord, but
+    // you must destroy the old command.  (It's okay to destroy it first before calling replace.)
+    template <typename T>
+    T* replace(unsigned i) {
+        SkASSERT(i < this->count());
+        fTypes[i] = T::kType;
+        return fRecords[i].set(this->alloc<T>());
+    }
+
+    // A mutator that can be used with replace to destroy canvas commands.
+    struct Destroyer {
+        template <typename T>
+        void operator()(T* record) { record->~T(); }
+    };
+
 private:
     // Implementation notes!
     //
@@ -131,23 +132,9 @@
     // SkRecord looking for just patterns of draw commands (or using this as a quick reject
     // mechanism) though there's admittedly not a very good API exposed publically for this.
     //
-    // We pull one final sneaky trick in the implementation.  When recording canvas calls that need
-    // to store less than a pointer of data, we don't go through the usual path of allocating the
-    // draw command in fAlloc and a pointer to it in fRecords; instead, we ignore fAlloc and
-    // directly allocate the object in the space we would have put the pointer in fRecords.  This is
-    // why you'll see uintptr_t instead of void* in Record below.
-    //
-    // The cost of appending a single record into this structure is then:
-    //   - 1 + sizeof(void*) + sizeof(T) if sizeof(T) >  sizeof(void*)
-    //   - 1 + sizeof(void*)             if sizeof(T) <= sizeof(void*)
+    // The cost to append a T into this structure is 1 + sizeof(void*) + sizeof(T).
 
 
-    // A mutator that calls destructors of all the canvas calls we've recorded.
-    struct Destroyer {
-        template <typename T>
-        void operator()(T* record) { record->~T(); }
-    };
-
     // Logically the same as SkRecords::Type, but packed into 8 bits.
     struct Type8 {
     public:
@@ -159,19 +146,15 @@
         uint8_t fType;
     };
 
-    // Logically a void* to some bytes in fAlloc, but maybe has the bytes stored immediately
-    // instead.  This is also the main interface for devirtualized polymorphic dispatch: see visit()
-    // and mutate(), which essentially do the work of the missing vtable.
+    // An untyped pointer to some bytes in fAlloc.  This is the interface for polymorphic dispatch:
+    // visit() and mutate() work with the parallel fTypes array to do the work of a vtable.
     struct Record {
     public:
-
-        // Allocate space for a T, perhaps using the SkRecord to allocate that space.
+        // Point this record to its data in fAlloc.  Returns ptr for convenience.
         template <typename T>
-        T* alloc(SkRecord* record) {
-            if (IsLarge<T>()) {
-                fRecord = (uintptr_t)record->alloc<T>();
-            }
-            return this->ptr<T>();
+        T* set(T* ptr) {
+            fPtr = ptr;
+            return ptr;
         }
 
         // Visit this record with functor F (see public API above) assuming the record we're
@@ -194,13 +177,9 @@
 
     private:
         template <typename T>
-        T* ptr() const { return (T*)(IsLarge<T>() ? (void*)fRecord : &fRecord); }
+        T* ptr() const { return (T*)fPtr; }
 
-        // Is T too big to fit directly into a uintptr_t, neededing external allocation?
-        template <typename T>
-        static bool IsLarge() { return sizeof(T) > sizeof(uintptr_t); }
-
-        uintptr_t fRecord;
+        void* fPtr;
     };
 
     // fAlloc needs to be a data structure which can append variable length data in contiguous
diff --git a/src/record/SkRecordDraw.cpp b/src/record/SkRecordDraw.cpp
index c6e8550..21b2c7a 100644
--- a/src/record/SkRecordDraw.cpp
+++ b/src/record/SkRecordDraw.cpp
@@ -18,15 +18,16 @@
     void next() { ++fIndex; }
 
     template <typename T> void operator()(const T& r) {
-        if (!this->canSkip(r)) {
+        if (!this->skip(r)) {
             this->draw(r);
             this->updateClip<T>();
         }
     }
 
 private:
-    // Can we skip this command right now?
-    template <typename T> bool canSkip(const T&) const {
+    // Return true if we can skip this command, false if not.
+    // Update fIndex here directly to skip more than just this one command.
+    template <typename T> bool skip(const T&) {
         // We can skip most commands if the clip is empty.  Exceptions are specialized below.
         return fClipEmpty;
     }
@@ -44,6 +45,8 @@
     bool fClipEmpty;
 };
 
+// TODO(mtklein): do this specialization with template traits instead of macros
+
 // These commands may change the clip.
 #define UPDATE_CLIP(T) template <> void Draw::updateClip<SkRecords::T>() \
     { fClipEmpty = fCanvas->isClipEmpty(); }
@@ -56,75 +59,34 @@
 #undef UPDATE_CLIP
 
 // These commands must always run.
-#define CAN_SKIP(T) template <> bool Draw::canSkip(const SkRecords::T&) const { return false; }
-CAN_SKIP(Restore);
-CAN_SKIP(Save);
-CAN_SKIP(SaveLayer);
-CAN_SKIP(Clear);
-CAN_SKIP(PushCull);
-CAN_SKIP(PopCull);
-#undef CAN_SKIP
+#define SKIP(T) template <> bool Draw::skip(const SkRecords::T&) { return false; }
+SKIP(Restore);
+SKIP(Save);
+SKIP(SaveLayer);
+SKIP(Clear);
+SKIP(PushCull);
+SKIP(PopCull);
+#undef SKIP
 
 // We can skip these commands if they're intersecting with a clip that's already empty.
-#define CAN_SKIP(T) template <> bool Draw::canSkip(const SkRecords::T& r) const \
+#define SKIP(T) template <> bool Draw::skip(const SkRecords::T& r) \
     { return fClipEmpty && SkRegion::kIntersect_Op == r.op; }
-CAN_SKIP(ClipPath);
-CAN_SKIP(ClipRRect);
-CAN_SKIP(ClipRect);
-CAN_SKIP(ClipRegion);
-#undef CAN_SKIP
+SKIP(ClipPath);
+SKIP(ClipRRect);
+SKIP(ClipRect);
+SKIP(ClipRegion);
+#undef SKIP
 
-static bool can_skip_text(const SkCanvas& c, const SkPaint& p, SkScalar minY, SkScalar maxY) {
-    // If we're drawing vertical text, none of the checks we're about to do make any sense.
-    // We'll need to call SkPaint::computeFastBounds() later, so bail out if that's not possible.
-    if (p.isVerticalText() || !p.canComputeFastBounds()) {
-        return false;
-    }
-
-    // Rather than checking the top and bottom font metrics, we guess.  Actually looking up the top
-    // and bottom metrics is slow, and this overapproximation should be good enough.
-    const SkScalar buffer = p.getTextSize() * 1.5f;
-    SkDEBUGCODE(SkPaint::FontMetrics metrics;)
-    SkDEBUGCODE(p.getFontMetrics(&metrics);)
-    SkASSERT(-buffer <= metrics.fTop);
-    SkASSERT(+buffer >= metrics.fBottom);
-
-    // Let the paint adjust the text bounds.  We don't care about left and right here, so we use
-    // 0 and 1 respectively just so the bounds rectangle isn't empty.
-    SkRect bounds;
-    bounds.set(0, -buffer, SK_Scalar1, buffer);
-    SkRect adjusted = p.computeFastBounds(bounds, &bounds);
-    return c.quickRejectY(minY + adjusted.fTop, maxY + adjusted.fBottom);
-}
-
-template <> bool Draw::canSkip(const SkRecords::DrawPosTextH& r) const {
-    return fClipEmpty || can_skip_text(*fCanvas, r.paint, r.y, r.y);
-}
-
-template <> bool Draw::canSkip(const SkRecords::DrawPosText& r) const {
-    if (fClipEmpty) {
-        return true;
-    }
-
-    // TODO(mtklein): may want to move this minY/maxY calculation into a one-time pass
-    const unsigned points = r.paint.countText(r.text, r.byteLength);
-    if (points == 0) {
-        return true;
-    }
-    SkScalar minY = SK_ScalarInfinity, maxY = SK_ScalarNegativeInfinity;
-    for (unsigned i = 0; i < points; i++) {
-        minY = SkTMin(minY, r.pos[i].fY);
-        maxY = SkTMax(maxY, r.pos[i].fY);
-    }
-
-    return can_skip_text(*fCanvas, r.paint, minY, maxY);
-}
+// NoOps can always be skipped and draw nothing.
+template <> bool Draw::skip(const SkRecords::NoOp&) { return true; }
+template <> void Draw::draw(const SkRecords::NoOp&) {}
 
 #define DRAW(T, call) template <> void Draw::draw(const SkRecords::T& r) { fCanvas->call; }
 DRAW(Restore, restore());
 DRAW(Save, save(r.flags));
 DRAW(SaveLayer, saveLayer(r.bounds, r.paint, r.flags));
 DRAW(PopCull, popCull());
+DRAW(PushCull, pushCull(r.rect));
 DRAW(Clear, clear(r.color));
 DRAW(Concat, concat(r.matrix));
 DRAW(SetMatrix, setMatrix(r.matrix));
@@ -154,15 +116,26 @@
                                 r.xmode.get(), r.indices, r.indexCount, r.paint));
 #undef DRAW
 
-// PushCull is a bit of a oddball.  We might be able to just skip until just past its popCull.
-template <> void Draw::draw(const SkRecords::PushCull& r) {
-    if (r.popOffset != SkRecords::kUnsetPopOffset && fCanvas->quickReject(r.rect)) {
-        fIndex += r.popOffset;
-    } else {
-        fCanvas->pushCull(r.rect);
+// Added by SkRecordAnnotateCullingPairs.
+template <> bool Draw::skip(const SkRecords::PairedPushCull& r) {
+    if (fCanvas->quickReject(r.base->rect)) {
+        fIndex += r.skip;
+        return true;
     }
+    return false;
 }
 
+// Added by SkRecordBoundDrawPosTextH
+template <> bool Draw::skip(const SkRecords::BoundedDrawPosTextH& r) {
+    return fClipEmpty || fCanvas->quickRejectY(r.minY, r.maxY);
+}
+
+// These draw by proxying to the commands they wrap.  (All the optimization is for skip().)
+#define DRAW(T) template <> void Draw::draw(const SkRecords::T& r) { this->draw(*r.base); }
+DRAW(PairedPushCull);
+DRAW(BoundedDrawPosTextH);
+#undef DRAW
+
 }  // namespace
 
 void SkRecordDraw(const SkRecord& record, SkCanvas* canvas) {
diff --git a/src/record/SkRecordOpts.cpp b/src/record/SkRecordOpts.cpp
index 8963c07..1b23acd 100644
--- a/src/record/SkRecordOpts.cpp
+++ b/src/record/SkRecordOpts.cpp
@@ -11,39 +11,236 @@
 #include "SkTDArray.h"
 
 void SkRecordOptimize(SkRecord* record) {
+    // TODO(mtklein): fuse independent optimizations to reduce number of passes?
+    SkRecordNoopSaveRestores(record);
     SkRecordAnnotateCullingPairs(record);
+    SkRecordReduceDrawPosTextStrength(record);  // Helpful to run this before BoundDrawPosTextH.
+    SkRecordBoundDrawPosTextH(record);
 }
 
+// Streamline replacing one command with another.
+#define REPLACE(record, index, T, ...) \
+    SkNEW_PLACEMENT_ARGS(record->replace<SkRecords::T>(index), SkRecords::T, (__VA_ARGS__))
+
 namespace {
 
-struct Annotator {
-    unsigned index;
-    SkTDArray<SkRecords::PushCull*> pushStack;
+// Convenience base class to share some common implementation code.
+class Common : SkNoncopyable {
+public:
+    explicit Common(SkRecord* record) : fRecord(record), fIndex(0) {}
 
-    // Do nothing to most record types.
-    template <typename T> void operator()(T*) {}
+    unsigned index() const { return fIndex; }
+    void next() { ++fIndex; }
+
+protected:
+    SkRecord* fRecord;
+    unsigned fIndex;
 };
 
-template <> void Annotator::operator()(SkRecords::PushCull* push) {
-    // Store the push's index for now.  We'll calculate the offset using this in the paired pop.
-    push->popOffset = index;
-    pushStack.push(push);
+// Turns logical no-op Save-[non-drawing command]*-Restore patterns into actual no-ops.
+// TODO(mtklein): state machine diagram
+class SaveRestoreNooper : public Common {
+public:
+    explicit SaveRestoreNooper(SkRecord* record)
+        : Common(record), fSave(kInactive), fChanged(false) {}
+
+    // Most drawing commands reset to inactive state without nooping anything.
+    template <typename T>
+    void operator()(T*) { fSave = kInactive; }
+
+    bool changed() const { return fChanged; }
+
+private:
+    static const unsigned kInactive = ~0;
+    unsigned fSave;
+    bool fChanged;
+};
+
+// If the command doesn't draw anything, that doesn't reset the state back to inactive.
+// TODO(mtklein): do this with some sort of template-based trait mechanism instead of macros
+#define IGNORE(T) template <> void SaveRestoreNooper::operator()(SkRecords::T*) {}
+IGNORE(NoOp)
+IGNORE(Concat)
+IGNORE(SetMatrix)
+IGNORE(ClipRect)
+IGNORE(ClipRRect)
+IGNORE(ClipPath)
+IGNORE(ClipRegion)
+IGNORE(PairedPushCull)
+IGNORE(PushCull)
+IGNORE(PopCull)
+#undef CLIP
+
+template <>
+void SaveRestoreNooper::operator()(SkRecords::Save* r) {
+    fSave = SkCanvas::kMatrixClip_SaveFlag == r->flags ? this->index() : kInactive;
 }
 
-template <> void Annotator::operator()(SkRecords::PopCull* pop) {
-    SkRecords::PushCull* push = pushStack.top();
-    pushStack.pop();
+template <>
+void SaveRestoreNooper::operator()(SkRecords::Restore* r) {
+    if (fSave != kInactive) {
+        // Remove everything between the save and restore, inclusive on both sides.
+        fChanged = true;
+        SkRecord::Destroyer destroyer;
+        for (unsigned i = fSave; i <= this->index(); i++) {
+            fRecord->mutate(i, destroyer);
+            REPLACE(fRecord, i, NoOp);
+        }
+        fSave = kInactive;
+    }
+}
 
-    SkASSERT(index > push->popOffset);          // push->popOffset holds the index of the push.
-    push->popOffset = index - push->popOffset;  // Now it's the offset between push and pop.
+
+// Tries to replace PushCull with PairedPushCull, which lets us skip to the paired PopCull
+// when the canvas can quickReject the cull rect.
+class CullAnnotator : public Common {
+public:
+    explicit CullAnnotator(SkRecord* record) : Common(record) {}
+
+    // Do nothing to most ops.
+    template <typename T>
+    void operator()(T*) {}
+
+private:
+    struct Pair {
+        unsigned index;
+        SkRecords::PushCull* command;
+    };
+
+    SkTDArray<Pair> fPushStack;
+};
+
+template <>
+void CullAnnotator::operator()(SkRecords::PushCull* push) {
+    Pair pair = { this->index(), push };
+    fPushStack.push(pair);
+}
+
+template <>
+void CullAnnotator::operator()(SkRecords::PopCull* pop) {
+    Pair push = fPushStack.top();
+    fPushStack.pop();
+
+    SkASSERT(this->index() > push.index);
+    unsigned skip = this->index() - push.index;
+
+    // PairedPushCull adopts push.command.
+    REPLACE(fRecord, push.index, PairedPushCull, push.command, skip);
+}
+
+// Replaces DrawPosText with DrawPosTextH when all Y coordinates are equal.
+class StrengthReducer : public Common {
+public:
+    explicit StrengthReducer(SkRecord* record) : Common(record) {}
+
+    // Do nothing to most ops.
+    template <typename T>
+    void operator()(T*) {}
+};
+
+template <>
+void StrengthReducer::operator()(SkRecords::DrawPosText* r) {
+    const unsigned points = r->paint.countText(r->text, r->byteLength);
+    if (points == 0) {
+        // No point (ha!).
+        return;
+    }
+
+    const SkScalar firstY = r->pos[0].fY;
+    for (unsigned i = 1; i < points; i++) {
+        if (r->pos[i].fY != firstY) {
+            // Needs the full strength of DrawPosText.
+            return;
+        }
+    }
+    // All ys are the same.  We can replace DrawPosText with DrawPosTextH.
+
+    // r->pos is points SkPoints, [(x,y),(x,y),(x,y),(x,y), ... ].
+    // We're going to squint and look at that as 2*points SkScalars, [x,y,x,y,x,y,x,y, ...].
+    // Then we'll rearrange things so all the xs are in order up front, clobbering the ys.
+    SK_COMPILE_ASSERT(sizeof(SkPoint) == 2 * sizeof(SkScalar), SquintingIsNotSafe);
+    SkScalar* scalars = &r->pos[0].fX;
+    for (unsigned i = 0; i < 2*points; i += 2) {
+        scalars[i/2] = scalars[i];
+    }
+
+    SkRecord::Destroyer destroyer;
+    fRecord->mutate(this->index(), destroyer);
+    REPLACE(fRecord, this->index(),
+            DrawPosTextH, (char*)r->text, r->byteLength, scalars, firstY, r->paint);
+}
+
+
+// Tries to replace DrawPosTextH with BoundedDrawPosTextH, which knows conservative upper and lower
+// bounds to use with SkCanvas::quickRejectY.
+class TextBounder : public Common {
+public:
+    explicit TextBounder(SkRecord* record) : Common(record) {}
+
+    // Do nothing to most ops.
+    template <typename T>
+    void operator()(T*) {}
+};
+
+template <>
+void TextBounder::operator()(SkRecords::DrawPosTextH* r) {
+    // If we're drawing vertical text, none of the checks we're about to do make any sense.
+    // We'll need to call SkPaint::computeFastBounds() later, so bail if that's not possible.
+    if (r->paint.isVerticalText() || !r->paint.canComputeFastBounds()) {
+        return;
+    }
+
+    // Rather than checking the top and bottom font metrics, we guess.  Actually looking up the
+    // top and bottom metrics is slow, and this overapproximation should be good enough.
+    const SkScalar buffer = r->paint.getTextSize() * 1.5f;
+    SkDEBUGCODE(SkPaint::FontMetrics metrics;)
+    SkDEBUGCODE(r->paint.getFontMetrics(&metrics);)
+    SkASSERT(-buffer <= metrics.fTop);
+    SkASSERT(+buffer >= metrics.fBottom);
+
+    // Let the paint adjust the text bounds.  We don't care about left and right here, so we use
+    // 0 and 1 respectively just so the bounds rectangle isn't empty.
+    SkRect bounds;
+    bounds.set(0, r->y - buffer, SK_Scalar1, r->y + buffer);
+    SkRect adjusted = r->paint.computeFastBounds(bounds, &bounds);
+
+    // BoundedDrawPosTextH adopts r.
+    REPLACE(fRecord, this->index(), BoundedDrawPosTextH, r, adjusted.fTop, adjusted.fBottom);
+}
+
+template <typename Pass>
+static void run_pass(Pass& pass, SkRecord* record) {
+    for (; pass.index() < record->count(); pass.next()) {
+        record->mutate(pass.index(), pass);
+    }
 }
 
 }  // namespace
 
-void SkRecordAnnotateCullingPairs(SkRecord* record) {
-    Annotator annotator;
 
-    for (annotator.index = 0; annotator.index < record->count(); annotator.index++) {
-        record->mutate(annotator.index, annotator);
-    }
+void SkRecordNoopSaveRestores(SkRecord* record) {
+    // Run SaveRestoreNooper until it doesn't make any more changes.
+    bool changed;
+    do {
+        SaveRestoreNooper nooper(record);
+        run_pass(nooper, record);
+        changed = nooper.changed();
+    } while (changed);
 }
+
+void SkRecordAnnotateCullingPairs(SkRecord* record) {
+    CullAnnotator annotator(record);
+    run_pass(annotator, record);
+}
+
+void SkRecordReduceDrawPosTextStrength(SkRecord* record) {
+    StrengthReducer reducer(record);
+    run_pass(reducer, record);
+}
+
+void SkRecordBoundDrawPosTextH(SkRecord* record) {
+    TextBounder bounder(record);
+    run_pass(bounder, record);
+}
+
+#undef REPLACE
diff --git a/src/record/SkRecordOpts.h b/src/record/SkRecordOpts.h
index bc19a1f..c9cbccf 100644
--- a/src/record/SkRecordOpts.h
+++ b/src/record/SkRecordOpts.h
@@ -14,7 +14,16 @@
 void SkRecordOptimize(SkRecord*);
 
 
-// Annotates PushCull records in record with the relative offset of their paired PopCull.
+// Turns logical no-op Save-[non-drawing command]*-Restore patterns into actual no-ops.
+void SkRecordNoopSaveRestores(SkRecord*);  // TODO(mtklein): add unit tests
+
+// Annotates PushCull commands with the relative offset of their paired PopCull.
 void SkRecordAnnotateCullingPairs(SkRecord*);
 
+// Convert DrawPosText to DrawPosTextH when all the Y coordinates are equal.
+void SkRecordReduceDrawPosTextStrength(SkRecord*);  // TODO(mtklein): add unit tests
+
+// Calculate min and max Y bounds for DrawPosTextH commands, for use with SkCanvas::quickRejectY.
+void SkRecordBoundDrawPosTextH(SkRecord*);  // TODO(mtklein): add unit tests
+
 #endif//SkRecordOpts_DEFINED
diff --git a/src/record/SkRecorder.cpp b/src/record/SkRecorder.cpp
index aff6609..d5ca01f 100644
--- a/src/record/SkRecorder.cpp
+++ b/src/record/SkRecorder.cpp
@@ -213,7 +213,7 @@
 }
 
 void SkRecorder::onPushCull(const SkRect& rect) {
-    APPEND(PushCull, rect, SkRecords::kUnsetPopOffset);
+    APPEND(PushCull, rect);
 }
 
 void SkRecorder::onPopCull() {
diff --git a/src/record/SkRecords.h b/src/record/SkRecords.h
index 1080656..7a89740 100644
--- a/src/record/SkRecords.h
+++ b/src/record/SkRecords.h
@@ -19,36 +19,39 @@
 //
 // We leave this SK_RECORD_TYPES macro defined for use by code that wants to operate on SkRecords
 // types polymorphically.  (See SkRecord::Record::{visit,mutate} for an example.)
-#define SK_RECORD_TYPES(M)  \
-    M(Restore)              \
-    M(Save)                 \
-    M(SaveLayer)            \
-    M(Concat)               \
-    M(SetMatrix)            \
-    M(ClipPath)             \
-    M(ClipRRect)            \
-    M(ClipRect)             \
-    M(ClipRegion)           \
-    M(Clear)                \
-    M(DrawBitmap)           \
-    M(DrawBitmapMatrix)     \
-    M(DrawBitmapNine)       \
-    M(DrawBitmapRectToRect) \
-    M(DrawDRRect)           \
-    M(DrawOval)             \
-    M(DrawPaint)            \
-    M(DrawPath)             \
-    M(DrawPoints)           \
-    M(DrawPosText)          \
-    M(DrawPosTextH)         \
-    M(DrawRRect)            \
-    M(DrawRect)             \
-    M(DrawSprite)           \
-    M(DrawText)             \
-    M(DrawTextOnPath)       \
-    M(DrawVertices)         \
-    M(PushCull)             \
-    M(PopCull)
+#define SK_RECORD_TYPES(M)                                          \
+    M(NoOp)                                                         \
+    M(Restore)                                                      \
+    M(Save)                                                         \
+    M(SaveLayer)                                                    \
+    M(Concat)                                                       \
+    M(SetMatrix)                                                    \
+    M(ClipPath)                                                     \
+    M(ClipRRect)                                                    \
+    M(ClipRect)                                                     \
+    M(ClipRegion)                                                   \
+    M(Clear)                                                        \
+    M(DrawBitmap)                                                   \
+    M(DrawBitmapMatrix)                                             \
+    M(DrawBitmapNine)                                               \
+    M(DrawBitmapRectToRect)                                         \
+    M(DrawDRRect)                                                   \
+    M(DrawOval)                                                     \
+    M(DrawPaint)                                                    \
+    M(DrawPath)                                                     \
+    M(DrawPoints)                                                   \
+    M(DrawPosText)                                                  \
+    M(DrawPosTextH)                                                 \
+    M(DrawRRect)                                                    \
+    M(DrawRect)                                                     \
+    M(DrawSprite)                                                   \
+    M(DrawText)                                                     \
+    M(DrawTextOnPath)                                               \
+    M(DrawVertices)                                                 \
+    M(PushCull)                                                     \
+    M(PopCull)                                                      \
+    M(PairedPushCull)         /*From SkRecordAnnotateCullingPairs*/ \
+    M(BoundedDrawPosTextH)    /*From SkRecordBoundDrawPosTextH*/
 
 // Defines SkRecords::Type, an enum of all record types.
 #define ENUM(T) T##_Type,
@@ -107,6 +110,12 @@
     A a; B b; C c; D d; E e;                                              \
 };
 
+#define ACT_AS_PTR(ptr)                       \
+    operator T*() { return ptr; }             \
+    operator const T*() const { return ptr; } \
+    T* operator->() { return ptr; }           \
+    const T* operator->() const { return ptr; }
+
 // An Optional doesn't own the pointer's memory, but may need to destroy non-POD data.
 template <typename T>
 class Optional : SkNoncopyable {
@@ -114,8 +123,19 @@
     Optional(T* ptr) : fPtr(ptr) {}
     ~Optional() { if (fPtr) fPtr->~T(); }
 
-    operator T*() { return fPtr; }
-    operator const T*() const { return fPtr; }
+    ACT_AS_PTR(fPtr);
+private:
+    T* fPtr;
+};
+
+// Like Optional, but ptr must not be NULL.
+template <typename T>
+class Adopted : SkNoncopyable {
+public:
+    Adopted(T* ptr) : fPtr(ptr) { SkASSERT(fPtr); }
+    ~Adopted() { fPtr->~T(); }
+
+    ACT_AS_PTR(fPtr);
 private:
     T* fPtr;
 };
@@ -126,8 +146,7 @@
 public:
     PODArray(T* ptr) : fPtr(ptr) {}
 
-    operator T*() { return fPtr; }
-    operator const T*() const { return fPtr; }
+    ACT_AS_PTR(fPtr);
 private:
     T* fPtr;
 };
@@ -151,15 +170,13 @@
     SkBitmap fBitmap;
 };
 
-// None of these records manages the lifetimes of pointers, except for DrawVertices handling its
-// Xfermode specially.
+RECORD0(NoOp);
 
 RECORD0(Restore);
 RECORD1(Save, SkCanvas::SaveFlags, flags);
 RECORD3(SaveLayer, Optional<SkRect>, bounds, Optional<SkPaint>, paint, SkCanvas::SaveFlags, flags);
 
-static const unsigned kUnsetPopOffset = 0;
-RECORD2(PushCull, SkRect, rect, unsigned, popOffset);
+RECORD1(PushCull, SkRect, rect);
 RECORD0(PopCull);
 
 RECORD1(Concat, SkMatrix, matrix);
@@ -247,6 +264,10 @@
     SkPaint paint;
 };
 
+// Records added by optimizations.
+RECORD2(PairedPushCull, Adopted<PushCull>, base, unsigned, skip);
+RECORD3(BoundedDrawPosTextH, Adopted<DrawPosTextH>, base, SkScalar, minY, SkScalar, maxY);
+
 #undef RECORD0
 #undef RECORD1
 #undef RECORD2