Reland "SkShaper JSON output with cluster visualization"

This is a reland of 53610832a04157e4edf18347c3e69ddd9d799e3f

Fixes conversion of size_t to int.

Original change's description:
> SkShaper JSON output with cluster visualization
>
> A simple JSON output for diagnostic purposes.
> If the run is not 1:1 code points to glyphs, then
> break the run into clusters.
>
> Change-Id: I06980e0bac2cdca8a69b5b5ba0759a021fd4eb3b
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/209740
> Commit-Queue: Herb Derby <herb@google.com>
> Reviewed-by: Julia Lavrova <jlavrova@google.com>

Change-Id: I712293c4820eb23234d64fa019d28bac8b105637
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/211986
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Herb Derby <herb@google.com>
diff --git a/src/core/SkSpan.h b/src/core/SkSpan.h
index 42d536e..c651d1c 100644
--- a/src/core/SkSpan.h
+++ b/src/core/SkSpan.h
@@ -20,6 +20,7 @@
     constexpr SkSpan(T* ptr, size_t size) : fPtr{ptr}, fSize{size} {}
     template <typename U>
     constexpr explicit SkSpan(std::vector<U>& v) : fPtr{v.data()}, fSize{v.size()} {}
+    constexpr explicit SkSpan(std::string& s) : fPtr{s.c_str()}, fSize{s.size()} {}
     constexpr SkSpan(const SkSpan& o) = default;
     constexpr SkSpan& operator=(const SkSpan& that) {
         fPtr = that.fPtr;
@@ -27,6 +28,8 @@
         return *this;
     }
     constexpr T& operator [] (size_t i) const { return fPtr[i]; }
+    constexpr T& front() const { return fPtr[0]; }
+    constexpr T& back()  const { return fPtr[fSize - 1]; }
     constexpr T* begin() const { return fPtr; }
     constexpr T* end() const { return fPtr + fSize; }
     constexpr const T* cbegin() const { return fPtr; }
diff --git a/src/utils/SkShaperJSONWriter.cpp b/src/utils/SkShaperJSONWriter.cpp
new file mode 100644
index 0000000..e078880
--- /dev/null
+++ b/src/utils/SkShaperJSONWriter.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkShaperJSONWriter.h"
+
+#include <algorithm>
+#include <limits>
+#include <string>
+
+#include "src/utils/SkJSONWriter.h"
+#include "src/utils/SkUTF.h"
+
+SkShaperJSONWriter::SkShaperJSONWriter(SkJSONWriter* JSONWriter, const char* utf8, size_t size)
+        : fJSONWriter{JSONWriter}
+        , fUTF8{utf8, size} {}
+
+void SkShaperJSONWriter::beginLine() { }
+
+void SkShaperJSONWriter::runInfo(const SkShaper::RunHandler::RunInfo& info) { }
+
+void SkShaperJSONWriter::commitRunInfo() { }
+
+SkShaper::RunHandler::Buffer
+SkShaperJSONWriter::runBuffer(const SkShaper::RunHandler::RunInfo& info) {
+    fGlyphs.resize(info.glyphCount);
+    fPositions.resize(info.glyphCount);
+    fClusters.resize(info.glyphCount);
+    return {fGlyphs.data(), fPositions.data(), nullptr, fClusters.data(), {0, 0}};
+}
+
+static bool is_one_to_one(const char utf8[], size_t utf8Begin, size_t utf8End,
+        std::vector<uint32_t>& clusters) {
+    size_t lastUtf8Index = utf8End;
+
+    auto checkCluster = [&](size_t clusterIndex) {
+        if (clusters[clusterIndex] >= lastUtf8Index) {
+            return false;
+        }
+        size_t utf8ClusterSize = lastUtf8Index - clusters[clusterIndex];
+        if (SkUTF::CountUTF8(&utf8[clusters[clusterIndex]], utf8ClusterSize) != 1) {
+            return false;
+        }
+        lastUtf8Index = clusters[clusterIndex];
+        return true;
+    };
+
+    if (clusters.front() <= clusters.back()) {
+        // left-to-right clusters
+        size_t clusterCursor = clusters.size();
+        while (clusterCursor > 0) {
+            if (!checkCluster(--clusterCursor)) { return false; }
+        }
+    } else {
+        // right-to-left clusters
+        size_t clusterCursor = 0;
+        while (clusterCursor < clusters.size()) {
+            if (!checkCluster(clusterCursor++)) { return false; }
+        }
+    }
+
+    return true;
+}
+
+void SkShaperJSONWriter::commitRunBuffer(const SkShaper::RunHandler::RunInfo& info) {
+    fJSONWriter->beginObject("run", true);
+
+    // Font name
+    SkString fontName;
+    info.fFont.getTypeface()->getFamilyName(&fontName);
+    fJSONWriter->appendString("font name", fontName.c_str());
+
+    // Font size
+    fJSONWriter->appendFloat("font size", info.fFont.getSize());
+
+    if (info.fBidiLevel > 0) {
+        std::string bidiType = info.fBidiLevel % 2 == 0 ? "left-to-right" : "right-to-left";
+        std::string bidiOutput = bidiType + " lvl " + std::to_string(info.fBidiLevel);
+        fJSONWriter->appendString("BiDi", bidiOutput.c_str());
+    }
+
+    if (is_one_to_one(fUTF8.c_str(), info.utf8Range.begin(), info.utf8Range.end(), fClusters)) {
+        std::string utf8{&fUTF8[info.utf8Range.begin()], info.utf8Range.size()};
+        fJSONWriter->appendString("UTF8", utf8.c_str());
+
+        fJSONWriter->beginArray("glyphs", false);
+        for (auto glyphID : fGlyphs) {
+            fJSONWriter->appendU32(glyphID);
+        }
+        fJSONWriter->endArray();
+
+        fJSONWriter->beginArray("clusters", false);
+        for (auto cluster : fClusters) {
+            fJSONWriter->appendU32(cluster);
+        }
+        fJSONWriter->endArray();
+    } else {
+        VisualizeClusters(fUTF8.c_str(),
+                          info.utf8Range.begin(), info.utf8Range.end(),
+                          SkSpan<const SkGlyphID>{fGlyphs},
+                          SkSpan<const uint32_t>{fClusters},
+                          [this](size_t codePointCount, SkSpan<const char> utf1to1,
+                                 SkSpan<const SkGlyphID> glyph1to1) {
+                              this->displayMToN(codePointCount, utf1to1, glyph1to1);
+                          });
+    }
+
+    if (info.glyphCount > 1) {
+        fJSONWriter->beginArray("horizontal positions", false);
+        for (auto position : fPositions) {
+            fJSONWriter->appendFloat(position.x());
+        }
+        fJSONWriter->endArray();
+    }
+
+    fJSONWriter->beginArray("advances", false);
+    for (size_t i = 1; i < info.glyphCount; i++) {
+        fJSONWriter->appendFloat(fPositions[i].fX - fPositions[i-1].fX);
+    }
+    SkPoint lastAdvance = info.fAdvance - (fPositions.back() - fPositions.front());
+    fJSONWriter->appendFloat(lastAdvance.fX);
+    fJSONWriter->endArray();
+
+    fJSONWriter->endObject();
+}
+
+void SkShaperJSONWriter::BreakupClusters(size_t utf8Begin, size_t utf8End,
+                                         SkSpan<const uint32_t> clusters,
+                                         const BreakupCluastersCallback& processMToN) {
+
+    if (clusters.front() <= clusters.back()) {
+        // Handle left-to-right text direction
+        size_t glyphStartIndex = 0;
+        for (size_t glyphEndIndex = 0; glyphEndIndex < clusters.size(); glyphEndIndex++) {
+
+            if (clusters[glyphStartIndex] == clusters[glyphEndIndex]) { continue; }
+
+            processMToN(glyphStartIndex, glyphEndIndex,
+                        clusters[glyphStartIndex], clusters[glyphEndIndex]);
+
+            glyphStartIndex = glyphEndIndex;
+        }
+
+        processMToN(glyphStartIndex, clusters.size(), clusters[glyphStartIndex], utf8End);
+
+    } else {
+        // Handle right-to-left text direction.
+        SkASSERT(clusters.size() >= 2);
+        size_t glyphStartIndex = 0;
+        uint32_t utf8EndIndex = utf8End;
+        for (size_t glyphEndIndex = 0; glyphEndIndex < clusters.size(); glyphEndIndex++) {
+
+            if (clusters[glyphStartIndex] == clusters[glyphEndIndex]) { continue; }
+
+            processMToN(glyphStartIndex, glyphEndIndex,
+                        clusters[glyphStartIndex], utf8EndIndex);
+
+            utf8EndIndex = clusters[glyphStartIndex];
+            glyphStartIndex = glyphEndIndex;
+        }
+        processMToN(glyphStartIndex, clusters.size(), utf8Begin, clusters[glyphStartIndex-1]);
+    }
+}
+
+void SkShaperJSONWriter::VisualizeClusters(const char* utf8, size_t utf8Begin, size_t utf8End,
+                                           SkSpan<const SkGlyphID> glyphIDs,
+                                           SkSpan<const uint32_t> clusters,
+                                           const VisualizeClustersCallback& processMToN) {
+
+    size_t glyphRangeStart, glyphRangeEnd;
+    uint32_t utf8RangeStart, utf8RangeEnd;
+
+    auto resetRanges = [&]() {
+        glyphRangeStart = std::numeric_limits<size_t>::max();
+        glyphRangeEnd   = 0;
+        utf8RangeStart  = std::numeric_limits<uint32_t>::max();
+        utf8RangeEnd    = 0;
+    };
+
+    auto checkRangesAndProcess = [&]() {
+        if (glyphRangeStart < glyphRangeEnd) {
+            size_t glyphRangeCount = glyphRangeEnd - glyphRangeStart;
+            SkSpan<const char> utf8Span{&utf8[utf8RangeStart], utf8RangeEnd - utf8RangeStart};
+            SkSpan<const SkGlyphID> glyphSpan{&glyphIDs[glyphRangeStart], glyphRangeCount};
+
+            // Glyph count is the same as codepoint count for 1:1.
+            processMToN(glyphRangeCount, utf8Span, glyphSpan);
+        }
+        resetRanges();
+    };
+
+    auto gatherRuns = [&](size_t glyphStartIndex, size_t glyphEndIndex,
+                          uint32_t utf8StartIndex, uint32_t utf8EndIndex) {
+        int possibleCount = SkUTF::CountUTF8(&utf8[utf8StartIndex], utf8EndIndex - utf8StartIndex);
+        if (possibleCount == -1) { return; }
+        size_t codePointCount = SkTo<size_t>(possibleCount);
+        if (codePointCount == 1 && glyphEndIndex - glyphStartIndex == 1) {
+            glyphRangeStart = std::min(glyphRangeStart, glyphStartIndex);
+            glyphRangeEnd   = std::max(glyphRangeEnd,   glyphEndIndex  );
+            utf8RangeStart  = std::min(utf8RangeStart,  utf8StartIndex );
+            utf8RangeEnd    = std::max(utf8RangeEnd,    utf8EndIndex   );
+        } else {
+            checkRangesAndProcess();
+
+            SkSpan<const char> utf8Span{&utf8[utf8StartIndex], utf8EndIndex - utf8StartIndex};
+            SkSpan<const SkGlyphID> glyphSpan{&glyphIDs[glyphStartIndex],
+                                              glyphEndIndex - glyphStartIndex};
+
+            processMToN(codePointCount, utf8Span, glyphSpan);
+        }
+    };
+
+    resetRanges();
+    BreakupClusters(utf8Begin, utf8End, clusters, gatherRuns);
+    checkRangesAndProcess();
+}
+
+void SkShaperJSONWriter::displayMToN(size_t codePointCount,
+                                     SkSpan<const char> utf8,
+                                     SkSpan<const SkGlyphID> glyphIDs) {
+    std::string nString = std::to_string(codePointCount);
+    std::string mString = std::to_string(glyphIDs.size());
+    std::string clusterName = "cluster " + nString + " to " + mString;
+    fJSONWriter->beginObject(clusterName.c_str(), true);
+    std::string utf8String{utf8.data(), utf8.size()};
+    fJSONWriter->appendString("UTF", utf8String.c_str());
+    fJSONWriter->beginArray("glyphsIDs", false);
+    for (auto glyphID : glyphIDs) {
+        fJSONWriter->appendU32(glyphID);
+    }
+    fJSONWriter->endArray();
+    fJSONWriter->endObject();
+}
diff --git a/src/utils/SkShaperJSONWriter.h b/src/utils/SkShaperJSONWriter.h
new file mode 100644
index 0000000..bca0cd1
--- /dev/null
+++ b/src/utils/SkShaperJSONWriter.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkShaperJSONWriter_DEFINED
+#define SkShaperJSONWriter_DEFINED
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "modules/skshaper/include/SkShaper.h"
+#include "src/core/SkSpan.h"
+
+class SkJSONWriter;
+
+class SkShaperJSONWriter final : public SkShaper::RunHandler {
+public:
+    SkShaperJSONWriter(SkJSONWriter* JSONWriter, const char* utf8, size_t size);
+
+    void beginLine() override;
+    void runInfo(const RunInfo& info) override;
+    void commitRunInfo() override;
+
+    Buffer runBuffer(const RunInfo& info) override;
+
+    void commitRunBuffer(const RunInfo& info) override;
+
+    void commitLine() override {}
+
+    using BreakupCluastersCallback =
+            std::function<void(size_t, size_t, uint32_t, uint32_t)>;
+
+    // Break up cluster into a set of ranges for the UTF8, and the glyphIDs.
+    static void BreakupClusters(size_t utf8Begin, size_t utf8End,
+                                SkSpan<const uint32_t> clusters,
+                                const BreakupCluastersCallback& processMToN);
+
+
+    using VisualizeClustersCallback =
+        std::function<void(size_t, SkSpan<const char>, SkSpan<const SkGlyphID>)>;
+
+    // Gather runs of 1:1 into larger runs, and display M:N as single entries.
+    static void VisualizeClusters(const char utf8[],
+                                  size_t utf8Begin, size_t utf8End,
+                                  SkSpan<const SkGlyphID> glyphIDs,
+                                  SkSpan<const uint32_t> clusters,
+                                  const VisualizeClustersCallback& processMToN);
+
+private:
+    void displayMToN(size_t codePointCount,
+                     SkSpan<const char> utf8,
+                     SkSpan<const SkGlyphID> glyphIDs);
+
+    SkJSONWriter* fJSONWriter;
+    std::vector<SkGlyphID> fGlyphs;
+    std::vector<SkPoint> fPositions;
+    std::vector<uint32_t> fClusters;
+
+    std::string fUTF8;
+};
+
+#endif  // SkShaperJSONWriter_DEFINED