Deterministic SkTSet and PDF Output

Addresses this issue: https://code.google.com/p/skia/issues/detail?id=1277

R=edisonn@google.com, vandebo@chromium.org

Author: richardlin@chromium.org

Review URL: https://chromiumcodereview.appspot.com/19283005

git-svn-id: http://skia.googlecode.com/svn/trunk@10298 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/src/pdf/SkTSet.h b/src/pdf/SkTSet.h
index 8d5bbb6..c2d2785 100644
--- a/src/pdf/SkTSet.h
+++ b/src/pdf/SkTSet.h
@@ -8,12 +8,14 @@
 #ifndef SkTSet_DEFINED
 #define SkTSet_DEFINED
 
+#include "SkTSort.h"
 #include "SkTDArray.h"
 #include "SkTypes.h"
 
 /** \class SkTSet<T>
 
-    The SkTSet template class defines a set.
+    The SkTSet template class defines a set. Elements are additionally
+    guaranteed to be sorted by their insertion order.
     Main operations supported now are: add, merge, find and contains.
 
     TSet<T> is mutable.
@@ -24,23 +26,28 @@
 template <typename T> class SkTSet {
 public:
     SkTSet() {
-        fArray = SkNEW(SkTDArray<T>);
+        fSetArray = SkNEW(SkTDArray<T>);
+        fOrderedArray = SkNEW(SkTDArray<T>);
     }
 
     ~SkTSet() {
-        SkASSERT(fArray);
-        SkDELETE(fArray);
+        SkASSERT(fSetArray);
+        SkDELETE(fSetArray);
+        SkASSERT(fOrderedArray);
+        SkDELETE(fOrderedArray);
     }
 
     SkTSet(const SkTSet<T>& src) {
-        this->fArray = SkNEW_ARGS(SkTDArray<T>, (*src.fArray));
+        this->fSetArray = SkNEW_ARGS(SkTDArray<T>, (*src.fSetArray));
+        this->fOrderedArray = SkNEW_ARGS(SkTDArray<T>, (*src.fOrderedArray));
 #ifdef SK_DEBUG
         validate();
 #endif
     }
 
     SkTSet<T>& operator=(const SkTSet<T>& src) {
-        *this->fArray = *src.fArray;
+        *this->fSetArray = *src.fSetArray;
+        *this->fOrderedArray = *src.fOrderedArray;
 #ifdef SK_DEBUG
         validate();
 #endif
@@ -48,23 +55,39 @@
     }
 
     /** Merges src elements into this, and returns the number of duplicates
-     * found.
-    */
+     * found. Elements from src will retain their ordering and will be ordered
+     * after the elements currently in this set.
+     *
+     * Implementation note: this uses a 2-stage merge to obtain O(n log n) time.
+     * The first stage goes through src.fOrderedArray, checking if
+     * this->contains() is false before adding to this.fOrderedArray.
+     * The second stage does a standard sorted list merge on the fSetArrays.
+     */
     int mergeInto(const SkTSet<T>& src) {
-        SkASSERT(fArray);
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
+
+        // Do fOrderedArray merge.
+        for (int i = 0; i < src.count(); ++i) {
+            if (!contains((*src.fOrderedArray)[i])) {
+                fOrderedArray->push((*src.fOrderedArray)[i]);
+            }
+        }
+
+        // Do fSetArray merge.
         int duplicates = 0;
 
         SkTDArray<T>* fArrayNew = new SkTDArray<T>();
-        fArrayNew->setReserve(count() + src.count());
+        fArrayNew->setReserve(fOrderedArray->count());
         int i = 0;
         int j = 0;
 
-        while (i < count() && j < src.count()) {
-            if ((*fArray)[i] < (*src.fArray)[j]) {
-                fArrayNew->push((*fArray)[i]);
+        while (i < fSetArray->count() && j < src.count()) {
+            if ((*fSetArray)[i] < (*src.fSetArray)[j]) {
+                fArrayNew->push((*fSetArray)[i]);
                 i++;
-            } else if ((*fArray)[i] > (*src.fArray)[j]) {
-                fArrayNew->push((*src.fArray)[j]);
+            } else if ((*fSetArray)[i] > (*src.fSetArray)[j]) {
+                fArrayNew->push((*src.fSetArray)[j]);
                 j++;
             } else {
                 duplicates++;
@@ -72,17 +95,17 @@
             }
         }
 
-        while (i < count()) {
-            fArrayNew->push((*fArray)[i]);
+        while (i < fSetArray->count()) {
+            fArrayNew->push((*fSetArray)[i]);
             i++;
         }
 
         while (j < src.count()) {
-            fArrayNew->push((*src.fArray)[j]);
+            fArrayNew->push((*src.fSetArray)[j]);
             j++;
         }
-        SkDELETE(fArray);
-        fArray = fArrayNew;
+        SkDELETE(fSetArray);
+        fSetArray = fArrayNew;
         fArrayNew = NULL;
 
 #ifdef SK_DEBUG
@@ -91,18 +114,20 @@
         return duplicates;
     }
 
-    /** Adds a new element into set and returns true if the element is already
+    /** Adds a new element into set and returns false if the element is already
      * in this set.
     */
     bool add(const T& elem) {
-        SkASSERT(fArray);
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
 
         int pos = 0;
         int i = find(elem, &pos);
         if (i >= 0) {
             return false;
         }
-        *fArray->insert(pos) = elem;
+        *fSetArray->insert(pos) = elem;
+        fOrderedArray->push(elem);
 #ifdef SK_DEBUG
         validate();
 #endif
@@ -112,96 +137,212 @@
     /** Returns true if this set is empty.
     */
     bool isEmpty() const {
-        SkASSERT(fArray);
-        return fArray->isEmpty();
+        SkASSERT(fOrderedArray);
+        SkASSERT(fSetArray);
+        SkASSERT(fSetArray->isEmpty() == fOrderedArray->isEmpty());
+        return fOrderedArray->isEmpty();
     }
 
     /** Return the number of elements in the set.
      */
     int count() const {
-        SkASSERT(fArray);
-        return fArray->count();
+        SkASSERT(fOrderedArray);
+        SkASSERT(fSetArray);
+        SkASSERT(fSetArray->count() == fOrderedArray->count());
+        return fOrderedArray->count();
     }
 
     /** Return the number of bytes in the set: count * sizeof(T).
      */
     size_t bytes() const {
-        SkASSERT(fArray);
-        return fArray->bytes();
+        SkASSERT(fOrderedArray);
+        return fOrderedArray->bytes();
     }
 
     /** Return the beginning of a set iterator.
      * Elements in the iterator will be sorted ascending.
      */
     const T*  begin() const {
-        SkASSERT(fArray);
-        return fArray->begin();
+        SkASSERT(fOrderedArray);
+        return fOrderedArray->begin();
     }
 
     /** Return the end of a set iterator.
      */
     const T*  end() const {
-        SkASSERT(fArray);
-        return fArray->end();
+        SkASSERT(fOrderedArray);
+        return fOrderedArray->end();
     }
 
     const T&  operator[](int index) const {
-        SkASSERT(fArray);
-        return (*fArray)[index];
+        SkASSERT(fOrderedArray);
+        return (*fOrderedArray)[index];
     }
 
     /** Resets the set (deletes memory and initiates an empty set).
      */
     void reset() {
-        SkASSERT(fArray);
-        fArray->reset();
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
+        fSetArray->reset();
+        fOrderedArray->reset();
     }
 
     /** Rewinds the set (preserves memory and initiates an empty set).
      */
     void rewind() {
-        SkASSERT(fArray);
-        fArray->rewind();
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
+        fSetArray->rewind();
+        fOrderedArray->rewind();
     }
 
     /** Reserves memory for the set.
      */
     void setReserve(size_t reserve) {
-        SkASSERT(fArray);
-        fArray->setReserve(reserve);
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
+        fSetArray->setReserve(reserve);
+        fOrderedArray->setReserve(reserve);
     }
 
-    /** Returns the index where an element was found.
+    /** Returns true if the array contains this element.
+     */
+    bool contains(const T& elem) const {
+        SkASSERT(fSetArray);
+        return (this->find(elem) >= 0);
+    }
+
+    /** Copies internal array to destination.
+     */
+    void copy(T* dst) const {
+        SkASSERT(fOrderedArray);
+        fOrderedArray->copyRange(dst, 0, fOrderedArray->count());
+    }
+
+    /** Returns a const reference to the internal vector.
+     */
+    const SkTDArray<T>& toArray() {
+        SkASSERT(fOrderedArray);
+        return *fOrderedArray;
+    }
+
+    /** Unref all elements in the set.
+     */
+    void unrefAll() {
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
+        fOrderedArray->unrefAll();
+        // Also reset the other array, as SkTDArray::unrefAll does an
+        // implcit reset
+        fSetArray->reset();
+    }
+
+    /** safeUnref all elements in the set.
+     */
+    void safeUnrefAll() {
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
+        fOrderedArray->safeUnrefAll();
+        // Also reset the other array, as SkTDArray::safeUnrefAll does an
+        // implcit reset
+        fSetArray->reset();
+    }
+
+#ifdef SK_DEBUG
+    void validate() const {
+        SkASSERT(fSetArray);
+        SkASSERT(fOrderedArray);
+        fSetArray->validate();
+        fOrderedArray->validate();
+        SkASSERT(isSorted() && !hasDuplicates() && arraysConsistent());
+    }
+
+    bool hasDuplicates() const {
+        for (int i = 0; i < fSetArray->count() - 1; ++i) {
+            if ((*fSetArray)[i] == (*fSetArray)[i + 1]) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    bool isSorted() const {
+        for (int i = 0; i < fSetArray->count() - 1; ++i) {
+            // Use only < operator
+            if (!((*fSetArray)[i] < (*fSetArray)[i + 1])) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /** Checks if fSetArray is consistent with fOrderedArray
+     */
+    bool arraysConsistent() const {
+        if (fSetArray->count() != fOrderedArray->count()) {
+            return false;
+        }
+        if (fOrderedArray->count() == 0) {
+            return true;
+        }
+
+        // Copy and sort fOrderedArray, then compare to fSetArray.
+        // A O(n log n) algorithm is necessary as O(n^2) will choke some GMs.
+        SkAutoMalloc sortedArray(fOrderedArray->bytes());
+        T* sortedBase = reinterpret_cast<T*>(sortedArray.get());
+        size_t count = fOrderedArray->count();
+        fOrderedArray->copyRange(sortedBase, 0, count);
+
+        SkTQSort<T>(sortedBase, sortedBase + count - 1);
+
+        for (size_t i = 0; i < count; ++i) {
+            if (sortedBase[i] != (*fSetArray)[i]) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+#endif
+
+private:
+    SkTDArray<T>* fSetArray;        // Sorted by pointer address for fast
+                                    // lookup.
+    SkTDArray<T>* fOrderedArray;    // Sorted by insertion order for
+                                    // deterministic output.
+
+    /** Returns the index in fSetArray where an element was found.
      * Returns -1 if the element was not found, and it fills *posToInsertSorted
      * with the index of the place where elem should be inserted to preserve the
      * internal array sorted.
      * If element was found, *posToInsertSorted is undefined.
      */
     int find(const T& elem, int* posToInsertSorted = NULL) const {
-        SkASSERT(fArray);
+        SkASSERT(fSetArray);
 
-        if (fArray->count() == 0) {
+        if (fSetArray->count() == 0) {
             if (posToInsertSorted) {
                 *posToInsertSorted = 0;
             }
             return -1;
         }
         int iMin = 0;
-        int iMax = fArray->count();
+        int iMax = fSetArray->count();
 
         while (iMin < iMax - 1) {
             int iMid = (iMin + iMax) / 2;
-            if (elem < (*fArray)[iMid]) {
+            if (elem < (*fSetArray)[iMid]) {
                 iMax = iMid;
             } else {
                 iMin = iMid;
             }
         }
-        if (elem == (*fArray)[iMin]) {
+        if (elem == (*fSetArray)[iMin]) {
             return iMin;
         }
         if (posToInsertSorted) {
-            if (elem < (*fArray)[iMin]) {
+            if (elem < (*fSetArray)[iMin]) {
                 *posToInsertSorted = iMin;
             } else {
                 *posToInsertSorted = iMin + 1;
@@ -210,71 +351,6 @@
 
         return -1;
     }
-
-    /** Returns true if the array contains this element.
-     */
-    bool contains(const T& elem) const {
-        SkASSERT(fArray);
-        return (this->find(elem) >= 0);
-    }
-
-    /** Copies internal array to destination.
-     */
-    void copy(T* dst) const {
-        SkASSERT(fArray);
-        fArray->copyRange(0, fArray->count(), dst);
-    }
-
-    /** Returns a const reference to the internal vector.
-     */
-    const SkTDArray<T>& toArray() {
-        SkASSERT(fArray);
-        return *fArray;
-    }
-
-    /** Unref all elements in the set.
-     */
-    void unrefAll() {
-        SkASSERT(fArray);
-        fArray->unrefAll();
-    }
-
-    /** safeUnref all elements in the set.
-     */
-     void safeUnrefAll() {
-        SkASSERT(fArray);
-        fArray->safeUnrefAll();
-    }
-
-#ifdef SK_DEBUG
-    void validate() const {
-        SkASSERT(fArray);
-        fArray->validate();
-        SkASSERT(isSorted() && !hasDuplicates());
-    }
-
-    bool hasDuplicates() const {
-        for (int i = 0; i < fArray->count() - 1; ++i) {
-            if ((*fArray)[i] == (*fArray)[i + 1]) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    bool isSorted() const {
-        for (int i = 0; i < fArray->count() - 1; ++i) {
-            // Use only < operator
-            if (!((*fArray)[i] < (*fArray)[i + 1])) {
-                return false;
-            }
-        }
-        return true;
-    }
-#endif
-
-private:
-    SkTDArray<T>* fArray;
 };
 
 #endif
diff --git a/tests/TSetTest.cpp b/tests/TSetTest.cpp
index 1cb9056..8f276f7 100644
--- a/tests/TSetTest.cpp
+++ b/tests/TSetTest.cpp
@@ -41,7 +41,7 @@
     return (long(i) * PRIME1) % PRIME2;
 }
 
-// Will expose contains() and find() too.
+// Will expose contains() too.
 static void TestTSet_advanced(skiatest::Reporter* reporter) {
     SkTSet<int> set0;
 
@@ -60,6 +60,11 @@
         REPORTER_ASSERT(reporter, !set0.add(f(i)));
     }
 
+    // Test deterministic output
+    for (int i = 0; i < COUNT; i++) {
+        REPORTER_ASSERT(reporter, set0[i] == f(i));
+    }
+
     // Test copy constructor too.
     SkTSet<int> set1 = set0;
 
@@ -68,6 +73,7 @@
 
     for (int i = 0; i < COUNT; i++) {
         REPORTER_ASSERT(reporter, set1.contains(f(i)));
+        REPORTER_ASSERT(reporter, set1[i] == f(i));
     }
 
     // Test operator= too.
@@ -79,6 +85,7 @@
 
     for (int i = 0; i < COUNT; i++) {
         REPORTER_ASSERT(reporter, set2.contains(f(i)));
+        REPORTER_ASSERT(reporter, set2[i] == f(i));
     }
 
 #ifdef SK_DEBUG
@@ -108,6 +115,12 @@
         REPORTER_ASSERT(reporter, set.contains(i));
     }
 
+    // check deterministic output
+    for (int i = 0; i < COUNT; i++) {
+        REPORTER_ASSERT(reporter, set[i] == 2 * i);
+        REPORTER_ASSERT(reporter, set[COUNT + i] == 2 * i + 1);
+    }
+
 #ifdef SK_DEBUG
     set.validate();
     setOdd.validate();