SkPDF/Clusterator: expose to the light of a unit test

Change-Id: I5667da133f608ab42f83daba3424134b8e956b1e
Reviewed-on: https://skia-review.googlesource.com/117006
Reviewed-by: Florin Malita <fmalita@chromium.org>
Commit-Queue: Hal Canary <halcanary@google.com>
diff --git a/gn/pdf.gni b/gn/pdf.gni
index 665e35c..25c5a29 100644
--- a/gn/pdf.gni
+++ b/gn/pdf.gni
@@ -8,6 +8,8 @@
 
 skia_pdf_sources = [
   "$_src/pdf/SkBitmapKey.h",
+  "$_src/pdf/SkClusterator.cpp",
+  "$_src/pdf/SkClusterator.h",
   "$_src/pdf/SkDeflate.cpp",
   "$_src/pdf/SkDeflate.h",
   "$_src/pdf/SkJpegInfo.cpp",
diff --git a/src/pdf/SkClusterator.cpp b/src/pdf/SkClusterator.cpp
new file mode 100644
index 0000000..3d6800b
--- /dev/null
+++ b/src/pdf/SkClusterator.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkClusterator.h"
+
+#include "SkUtils.h"
+
+static bool is_reversed(const uint32_t* clusters, uint32_t count) {
+    // "ReversedChars" is how PDF deals with RTL text.
+    // return true if more than one cluster and monotonicly decreasing to zero.
+    if (count < 2 || clusters[0] == 0 || clusters[count - 1] != 0) {
+        return false;
+    }
+    for (uint32_t i = 0; i + 1 < count; ++i) {
+        if (clusters[i + 1] > clusters[i]) {
+            return false;
+        }
+    }
+    return true;
+}
+
+SkClusterator::SkClusterator(const void* sourceText,
+                             size_t sourceByteCount,
+                             const SkPaint& paint,
+                             const uint32_t* clusters,
+                             uint32_t utf8TextByteLength,
+                             const char* utf8Text) {
+    if (SkPaint::kGlyphID_TextEncoding == paint.getTextEncoding()) {
+        fGlyphs = reinterpret_cast<const SkGlyphID*>(sourceText);
+        fClusters = clusters;
+        fUtf8Text = utf8Text;
+        fGlyphCount = sourceByteCount / sizeof(SkGlyphID);
+        fTextByteLength = utf8TextByteLength;
+        if (fClusters) {
+            SkASSERT(fUtf8Text && fTextByteLength > 0 && fGlyphCount > 0);
+            fReversedChars = is_reversed(fClusters, fGlyphCount);
+        } else {
+            SkASSERT(!fUtf8Text && fTextByteLength == 0);
+        }
+        return;
+    }
+
+    // If Skia is given text (not glyphs), then our fallback primitive shaping will
+    // produce a simple 1-1 cluster mapping.
+    fGlyphCount = SkToU32(paint.textToGlyphs(sourceText, sourceByteCount, nullptr));
+    fGlyphStorage.resize(fGlyphCount);
+    (void)paint.textToGlyphs(sourceText, sourceByteCount, fGlyphStorage.data());
+    fGlyphs = fGlyphStorage.data();
+    fClusterStorage.resize(fGlyphCount);
+    fClusters = fClusterStorage.data();
+
+    switch (paint.getTextEncoding()) {
+        case SkPaint::kUTF8_TextEncoding:
+        {
+            fUtf8Text = reinterpret_cast<const char*>(sourceText);
+            fTextByteLength = SkToU32(sourceByteCount);
+            const char* txtPtr = fUtf8Text;
+            for (uint32_t i = 0; i < fGlyphCount; ++i) {
+                fClusterStorage[i] = SkToU32(txtPtr - fUtf8Text);
+                txtPtr += SkUTF8_LeadByteToCount(*(const unsigned char*)txtPtr);
+                SkASSERT(txtPtr <= fUtf8Text + sourceByteCount);
+            }
+            SkASSERT(txtPtr == fUtf8Text + sourceByteCount);
+            return;
+        }
+        case SkPaint::kUTF16_TextEncoding:
+        {
+            const uint16_t* utf16ptr = reinterpret_cast<const uint16_t*>(sourceText);
+            int utf16count = SkToInt(sourceByteCount / sizeof(uint16_t));
+            fTextByteLength = SkToU32(SkUTF16_ToUTF8(utf16ptr, utf16count));
+            fUtf8textStorage.resize(fTextByteLength);
+            fUtf8Text = fUtf8textStorage.data();
+            char* txtPtr = fUtf8textStorage.data();
+            uint32_t clusterIndex = 0;
+            while (utf16ptr < (const uint16_t*)sourceText + utf16count) {
+                fClusterStorage[clusterIndex++] = SkToU32(txtPtr - fUtf8Text);
+                SkUnichar uni = SkUTF16_NextUnichar(&utf16ptr);
+                txtPtr += SkUTF8_FromUnichar(uni, txtPtr);
+            }
+            SkASSERT(clusterIndex == fGlyphCount);
+            SkASSERT(txtPtr == fUtf8textStorage.data() + fTextByteLength);
+            SkASSERT(utf16ptr == (const uint16_t*)sourceText + utf16count);
+            return;
+        }
+        case SkPaint::kUTF32_TextEncoding:
+        {
+            const SkUnichar* utf32 = reinterpret_cast<const SkUnichar*>(sourceText);
+            uint32_t utf32count = SkToU32(sourceByteCount / sizeof(SkUnichar));
+            SkASSERT(fGlyphCount == utf32count);
+            fTextByteLength = 0;
+            for (uint32_t i = 0; i < utf32count; ++i) {
+                fTextByteLength += SkToU32(SkUTF8_FromUnichar(utf32[i]));
+            }
+            fUtf8textStorage.resize(SkToSizeT(fTextByteLength));
+            fUtf8Text = fUtf8textStorage.data();
+            char* txtPtr = fUtf8textStorage.data();
+            for (uint32_t i = 0; i < utf32count; ++i) {
+                fClusterStorage[i] = SkToU32(txtPtr - fUtf8Text);
+                txtPtr += SkUTF8_FromUnichar(utf32[i], txtPtr);
+            }
+            return;
+        }
+        default:
+            SkDEBUGFAIL("");
+            break;
+    }
+}
+
+SkClusterator::Cluster SkClusterator::next() {
+    if (fCurrentGlyphIndex >= fGlyphCount) {
+        return Cluster{nullptr, 0, 0, 0};
+    }
+    if (!fClusters || !fUtf8Text) {
+        return Cluster{nullptr, 0, fCurrentGlyphIndex++, 1};
+    }
+    uint32_t clusterGlyphIndex = fCurrentGlyphIndex;
+    uint32_t cluster = fClusters[clusterGlyphIndex];
+    do {
+        ++fCurrentGlyphIndex;
+    } while (fCurrentGlyphIndex < fGlyphCount && cluster == fClusters[fCurrentGlyphIndex]);
+    uint32_t clusterGlyphCount = fCurrentGlyphIndex - clusterGlyphIndex;
+    uint32_t clusterEnd = fTextByteLength;
+    for (unsigned i = 0; i < fGlyphCount; ++i) {
+       uint32_t c = fClusters[i];
+       if (c > cluster && c < clusterEnd) {
+           clusterEnd = c;
+       }
+    }
+    uint32_t clusterLen = clusterEnd - cluster;
+    return Cluster{fUtf8Text + cluster, clusterLen, clusterGlyphIndex, clusterGlyphCount};
+}
+
diff --git a/src/pdf/SkClusterator.h b/src/pdf/SkClusterator.h
new file mode 100644
index 0000000..db47a5d
--- /dev/null
+++ b/src/pdf/SkClusterator.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkClusterator_DEFINED
+#define SkClusterator_DEFINED
+
+#include <vector>
+
+#include "SkTypes.h"
+#include "SkPaint.h"
+
+/** Given the m-to-n glyph-to-character mapping data (as returned by
+    harfbuzz), iterate over the clusters. */
+class SkClusterator {
+public:
+    SkClusterator(const void* sourceText,
+                  size_t sourceByteCount,
+                  const SkPaint& paint,
+                  const uint32_t* clusters,
+                  uint32_t utf8TextByteLength,
+                  const char* utf8Text);
+    const SkGlyphID* glyphs() const { return fGlyphs; }
+    uint32_t glyphCount() const { return fGlyphCount; }
+    bool reversedChars() const { return fReversedChars; }
+    struct Cluster {
+        const char* fUtf8Text;
+        uint32_t fTextByteLength;
+        uint32_t fGlyphIndex;
+        uint32_t fGlyphCount;
+        explicit operator bool() const { return fGlyphCount != 0; }
+    };
+    Cluster next();
+
+private:
+    std::vector<SkGlyphID> fGlyphStorage;
+    std::vector<char> fUtf8textStorage;
+    std::vector<uint32_t> fClusterStorage;
+    const SkGlyphID* fGlyphs;
+    const uint32_t* fClusters;
+    const char* fUtf8Text;
+    uint32_t fGlyphCount;
+    uint32_t fTextByteLength;
+    uint32_t fCurrentGlyphIndex = 0;
+    bool fReversedChars = false;
+};
+
+
+#endif  // SkClusterator_DEFINED
diff --git a/src/pdf/SkPDFDevice.cpp b/src/pdf/SkPDFDevice.cpp
index 76d6005..98e34b0 100644
--- a/src/pdf/SkPDFDevice.cpp
+++ b/src/pdf/SkPDFDevice.cpp
@@ -13,6 +13,7 @@
 #include "SkBitmapKey.h"
 #include "SkCanvas.h"
 #include "SkClipOpPriv.h"
+#include "SkClusterator.h"
 #include "SkColor.h"
 #include "SkColorFilter.h"
 #include "SkDraw.h"
@@ -1049,164 +1050,8 @@
     bool fInitialized = false;
     const bool fDefaultPositioning;
 };
-
-/** Given the m-to-n glyph-to-character mapping data (as returned by
-    harfbuzz), iterate over the clusters. */
-class Clusterator {
-public:
-    Clusterator() : fClusters(nullptr), fUtf8Text(nullptr), fGlyphCount(0), fTextByteLength(0) {}
-    explicit Clusterator(uint32_t glyphCount)
-        : fClusters(nullptr)
-        , fUtf8Text(nullptr)
-        , fGlyphCount(glyphCount)
-        , fTextByteLength(0) {}
-    // The clusters[] array is an array of offsets into utf8Text[],
-    // one offset for each glyph.  See SkTextBlobBuilder for more info.
-    Clusterator(const uint32_t* clusters,
-                const char* utf8Text,
-                uint32_t glyphCount,
-                uint32_t textByteLength)
-        : fClusters(clusters)
-        , fUtf8Text(utf8Text)
-        , fGlyphCount(glyphCount)
-        , fTextByteLength(textByteLength)
-    {
-        // "ReversedChars" is how PDF deals with RTL text.
-        // We set it to true only if all cluster indices are decreasing.
-        if (fUtf8Text && fClusters && fGlyphCount > 1) {
-            int clusterCount = 1;
-            for (uint32_t i = 0; i + 1 < fGlyphCount; ++i) {
-                if (fClusters[i + 1] != fClusters[i]) {
-                    if (fClusters[i + 1] > fClusters[i]) {
-                        return;
-                    }
-                    ++clusterCount;
-                }
-            }
-            fReversedChars = clusterCount > 1;
-        }
-    }
-    struct Cluster {
-        const char* fUtf8Text;
-        uint32_t fTextByteLength;
-        uint32_t fGlyphIndex;
-        uint32_t fGlyphCount;
-        explicit operator bool() const { return fGlyphCount != 0; }
-    };
-    // True if this looks like right-to-left text.
-    bool reversedChars() const { return fReversedChars; }
-    Cluster next() {
-        if (fGlyphIndex >= fGlyphCount) {
-            return Cluster{nullptr, 0, 0, 0};
-        }
-        if (!fClusters || !fUtf8Text) {
-            return Cluster{nullptr, 0, fGlyphIndex++, 1};
-        }
-        uint32_t clusterGlyphIndex = fGlyphIndex;
-        uint32_t cluster = fClusters[clusterGlyphIndex];
-        do {
-            ++fGlyphIndex;
-        } while (fGlyphIndex < fGlyphCount && cluster == fClusters[fGlyphIndex]);
-        uint32_t clusterGlyphCount = fGlyphIndex - clusterGlyphIndex;
-        uint32_t clusterEnd = fTextByteLength;
-        for (unsigned i = 0; i < fGlyphCount; ++i) {
-           uint32_t c = fClusters[i];
-           if (c > cluster && c < clusterEnd) {
-               clusterEnd = c;
-           }
-        }
-        uint32_t clusterLen = clusterEnd - cluster;
-        return Cluster{fUtf8Text + cluster, clusterLen, clusterGlyphIndex, clusterGlyphCount};
-    }
-
-private:
-    const uint32_t* fClusters;
-    const char* fUtf8Text;
-    uint32_t fGlyphCount;
-    uint32_t fTextByteLength;
-    uint32_t fGlyphIndex = 0;
-    bool fReversedChars = false;
-};
-
-struct TextStorage {
-    SkAutoTMalloc<char> fUtf8textStorage;
-    SkAutoTMalloc<uint32_t> fClusterStorage;
-    SkAutoTMalloc<SkGlyphID> fGlyphStorage;
-};
 }  // namespace
 
-/** Given some unicode text (as passed to drawText(), convert to
-    glyphs (via primitive shaping), while preserving
-    glyph-to-character mapping information. */
-static Clusterator make_clusterator(
-        const void* sourceText,
-        size_t sourceByteCount,
-        const SkPaint& paint,
-        TextStorage* storage,
-        int glyphCount) {
-    SkASSERT(SkPaint::kGlyphID_TextEncoding != paint.getTextEncoding());
-    SkASSERT(glyphCount == paint.textToGlyphs(sourceText, sourceByteCount, nullptr));
-    SkASSERT(glyphCount > 0);
-    storage->fGlyphStorage.reset(SkToSizeT(glyphCount));
-    (void)paint.textToGlyphs(sourceText, sourceByteCount, storage->fGlyphStorage.get());
-    storage->fClusterStorage.reset(SkToSizeT(glyphCount));
-    uint32_t* clusters = storage->fClusterStorage.get();
-    uint32_t utf8ByteCount = 0;
-    const char* utf8Text = nullptr;
-    switch (paint.getTextEncoding()) {
-        case SkPaint::kUTF8_TextEncoding: {
-            const char* txtPtr = (const char*)sourceText;
-            for (int i = 0; i < glyphCount; ++i) {
-                clusters[i] = SkToU32(txtPtr - (const char*)sourceText);
-                txtPtr += SkUTF8_LeadByteToCount(*(const unsigned char*)txtPtr);
-                SkASSERT(txtPtr <= (const char*)sourceText + sourceByteCount);
-            }
-            SkASSERT(txtPtr == (const char*)sourceText + sourceByteCount);
-            utf8ByteCount = SkToU32(sourceByteCount);
-            utf8Text = (const char*)sourceText;
-            break;
-        }
-        case SkPaint::kUTF16_TextEncoding: {
-            const uint16_t* utf16ptr = (const uint16_t*)sourceText;
-            int utf16count = SkToInt(sourceByteCount / sizeof(uint16_t));
-            utf8ByteCount = SkToU32(SkUTF16_ToUTF8(utf16ptr, utf16count));
-            storage->fUtf8textStorage.reset(utf8ByteCount);
-            char* txtPtr = storage->fUtf8textStorage.get();
-            utf8Text = txtPtr;
-            int clusterIndex = 0;
-            while (utf16ptr < (const uint16_t*)sourceText + utf16count) {
-                clusters[clusterIndex++] = SkToU32(txtPtr - utf8Text);
-                SkUnichar uni = SkUTF16_NextUnichar(&utf16ptr);
-                txtPtr += SkUTF8_FromUnichar(uni, txtPtr);
-            }
-            SkASSERT(clusterIndex == glyphCount);
-            SkASSERT(txtPtr == storage->fUtf8textStorage.get() + utf8ByteCount);
-            SkASSERT(utf16ptr == (const uint16_t*)sourceText + utf16count);
-            break;
-        }
-        case SkPaint::kUTF32_TextEncoding: {
-            const SkUnichar* utf32 = (const SkUnichar*)sourceText;
-            int utf32count = SkToInt(sourceByteCount / sizeof(SkUnichar));
-            SkASSERT(glyphCount == utf32count);
-            for (int i = 0; i < utf32count; ++i) {
-                utf8ByteCount += SkToU32(SkUTF8_FromUnichar(utf32[i]));
-            }
-            storage->fUtf8textStorage.reset(SkToSizeT(utf8ByteCount));
-            char* txtPtr = storage->fUtf8textStorage.get();
-            utf8Text = txtPtr;
-            for (int i = 0; i < utf32count; ++i) {
-                clusters[i] = SkToU32(txtPtr - utf8Text);
-                txtPtr += SkUTF8_FromUnichar(utf32[i], txtPtr);
-            }
-            break;
-        }
-        default:
-            SkDEBUGFAIL("");
-            break;
-    }
-    return Clusterator(clusters, utf8Text, SkToU32(glyphCount), utf8ByteCount);
-}
-
 static SkUnichar map_glyph(const SkTDArray<SkUnichar>& glyphToUnicode, SkGlyphID glyph) {
     return SkToInt(glyph) < glyphToUnicode.count() ? glyphToUnicode[SkToInt(glyph)] : -1;
 }
@@ -1355,38 +1200,14 @@
     if (!metrics) {
         return;
     }
-    int glyphCount = paint.textToGlyphs(sourceText, sourceByteCount, nullptr);
-    if (glyphCount <= 0) {
+    SkClusterator clusterator(sourceText, sourceByteCount, paint,
+                              clusters, textByteLength, utf8Text);
+    const SkGlyphID* glyphs = clusterator.glyphs();
+    uint32_t glyphCount = clusterator.glyphCount();
+    if (glyphCount == 0) {
         return;
     }
 
-    // These three heap buffers are only used in the case where no glyphs
-    // are passed to drawText() (most clients pass glyphs or a textblob).
-    TextStorage storage;
-    const SkGlyphID* glyphs = nullptr;
-    Clusterator clusterator;
-    if (textByteLength > 0) {
-        SkASSERT(glyphCount == SkToInt(sourceByteCount / sizeof(SkGlyphID)));
-        glyphs = (const SkGlyphID*)sourceText;
-        clusterator = Clusterator(clusters, utf8Text, SkToU32(glyphCount), textByteLength);
-        SkASSERT(clusters);
-        SkASSERT(utf8Text);
-        SkASSERT(srcPaint.getTextEncoding() == SkPaint::kGlyphID_TextEncoding);
-        SkASSERT(glyphCount == paint.textToGlyphs(sourceText, sourceByteCount, nullptr));
-    } else if (SkPaint::kGlyphID_TextEncoding == srcPaint.getTextEncoding()) {
-        SkASSERT(glyphCount == SkToInt(sourceByteCount / sizeof(SkGlyphID)));
-        glyphs = (const SkGlyphID*)sourceText;
-        clusterator = Clusterator(SkToU32(glyphCount));
-        SkASSERT(glyphCount == paint.textToGlyphs(sourceText, sourceByteCount, nullptr));
-        SkASSERT(nullptr == clusters);
-        SkASSERT(nullptr == utf8Text);
-    } else {
-        SkASSERT(nullptr == clusters);
-        SkASSERT(nullptr == utf8Text);
-        clusterator = make_clusterator(sourceText, sourceByteCount, srcPaint,
-                                       &storage, glyphCount);
-        glyphs = storage.fGlyphStorage;
-    }
     bool defaultPositioning = (positioning == SkTextBlob::kDefault_Positioning);
     paint.setHinting(SkPaint::kNo_Hinting);
 
@@ -1406,7 +1227,7 @@
                             /* SkPaint::kRight_Align */           -1.0f;
     if (defaultPositioning && alignment != SkPaint::kLeft_Align) {
         SkScalar advance = 0;
-        for (int i = 0; i < glyphCount; ++i) {
+        for (uint32_t i = 0; i < glyphCount; ++i) {
             advance += advanceScale * glyphCache->getGlyphIDAdvance(glyphs[i]).fAdvanceX;
         }
         offset.offset(alignmentFactor * advance, 0);
@@ -1442,7 +1263,7 @@
                                         offset);
         SkPDFFont* font = nullptr;
 
-        while (Clusterator::Cluster c = clusterator.next()) {
+        while (SkClusterator::Cluster c = clusterator.next()) {
             int index = c.fGlyphIndex;
             int glyphLimit = index + c.fGlyphCount;
 
diff --git a/tests/PDFPrimitivesTest.cpp b/tests/PDFPrimitivesTest.cpp
index 34eada9..f16c669 100644
--- a/tests/PDFPrimitivesTest.cpp
+++ b/tests/PDFPrimitivesTest.cpp
@@ -12,6 +12,7 @@
 #include "Resources.h"
 #include "SkBitmap.h"
 #include "SkCanvas.h"
+#include "SkClusterator.h"
 #include "SkData.h"
 #include "SkDocument.h"
 #include "SkDeflate.h"
@@ -493,4 +494,32 @@
         REPORTER_ASSERT(reporter, roundTrip == i);
     }
 }
+
+DEF_TEST(SkPDF_Clusterator, reporter) {
+    const uint32_t clusters[11] = { 3, 2, 2, 1, 0, 4, 4, 7, 6, 6, 5 };
+    const SkGlyphID glyphs[11] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
+    const char text[] = "abcdefgh";
+    SkPaint paint;
+    paint.setTextEncoding(SkPaint::kGlyphID_TextEncoding);
+    SkClusterator clusterator(glyphs, sizeof(glyphs), paint, clusters, strlen(text), text);
+    SkClusterator::Cluster expectations[] = {
+        {&text[3], 1, 0, 1},
+        {&text[2], 1, 1, 2},
+        {&text[1], 1, 3, 1},
+        {&text[0], 1, 4, 1},
+        {&text[4], 1, 5, 2},
+        {&text[7], 1, 7, 1},
+        {&text[6], 1, 8, 2},
+        {&text[5], 1, 10, 1},
+        {nullptr, 0, 0, 0},
+    };
+    for (const auto& expectation : expectations) {
+        auto c = clusterator.next();
+        REPORTER_ASSERT(reporter, c.fUtf8Text       == expectation.fUtf8Text);
+        REPORTER_ASSERT(reporter, c.fTextByteLength == expectation.fTextByteLength);
+        REPORTER_ASSERT(reporter, c.fGlyphIndex     == expectation.fGlyphIndex);
+        REPORTER_ASSERT(reporter, c.fGlyphCount     == expectation.fGlyphCount);
+    }
+}
+
 #endif