Reland "SkShaper JSON output with cluster visualization"

This is a reland of 53610832a04157e4edf18347c3e69ddd9d799e3f

Fixes conversion of size_t to int.

Original change's description:
> SkShaper JSON output with cluster visualization
>
> A simple JSON output for diagnostic purposes.
> If the run is not 1:1 code points to glyphs, then
> break the run into clusters.
>
> Change-Id: I06980e0bac2cdca8a69b5b5ba0759a021fd4eb3b
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/209740
> Commit-Queue: Herb Derby <herb@google.com>
> Reviewed-by: Julia Lavrova <jlavrova@google.com>

Change-Id: I712293c4820eb23234d64fa019d28bac8b105637
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/211986
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Herb Derby <herb@google.com>
diff --git a/gn/tests.gni b/gn/tests.gni
index ebed984..bcc540a 100644
--- a/gn/tests.gni
+++ b/gn/tests.gni
@@ -235,6 +235,7 @@
   "$_tests/SkRasterPipelineTest.cpp",
   "$_tests/SkRemoteGlyphCacheTest.cpp",
   "$_tests/SkResourceCacheTest.cpp",
+  "$_tests/SkShaperJSONWriterTest.cpp",
   "$_tests/SkSharedMutexTest.cpp",
   "$_tests/SkSLErrorTest.cpp",
   "$_tests/SkSLFPTest.cpp",
diff --git a/gn/utils.gni b/gn/utils.gni
index e5b3e84..7dcb013 100644
--- a/gn/utils.gni
+++ b/gn/utils.gni
@@ -71,6 +71,8 @@
   "$_src/utils/SkShadowTessellator.cpp",
   "$_src/utils/SkShadowTessellator.h",
   "$_src/utils/SkShadowUtils.cpp",
+  "$_src/utils/SkShaperJSONWriter.h",
+  "$_src/utils/SkShaperJSONWriter.cpp",
   "$_src/utils/SkTextUtils.cpp",
   "$_src/utils/SkThreadUtils_pthread.cpp",
   "$_src/utils/SkThreadUtils_win.cpp",
diff --git a/src/core/SkSpan.h b/src/core/SkSpan.h
index 42d536e..c651d1c 100644
--- a/src/core/SkSpan.h
+++ b/src/core/SkSpan.h
@@ -20,6 +20,7 @@
     constexpr SkSpan(T* ptr, size_t size) : fPtr{ptr}, fSize{size} {}
     template <typename U>
     constexpr explicit SkSpan(std::vector<U>& v) : fPtr{v.data()}, fSize{v.size()} {}
+    constexpr explicit SkSpan(std::string& s) : fPtr{s.c_str()}, fSize{s.size()} {}
     constexpr SkSpan(const SkSpan& o) = default;
     constexpr SkSpan& operator=(const SkSpan& that) {
         fPtr = that.fPtr;
@@ -27,6 +28,8 @@
         return *this;
     }
     constexpr T& operator [] (size_t i) const { return fPtr[i]; }
+    constexpr T& front() const { return fPtr[0]; }
+    constexpr T& back()  const { return fPtr[fSize - 1]; }
     constexpr T* begin() const { return fPtr; }
     constexpr T* end() const { return fPtr + fSize; }
     constexpr const T* cbegin() const { return fPtr; }
diff --git a/src/utils/SkShaperJSONWriter.cpp b/src/utils/SkShaperJSONWriter.cpp
new file mode 100644
index 0000000..e078880
--- /dev/null
+++ b/src/utils/SkShaperJSONWriter.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkShaperJSONWriter.h"
+
+#include <algorithm>
+#include <limits>
+#include <string>
+
+#include "src/utils/SkJSONWriter.h"
+#include "src/utils/SkUTF.h"
+
+SkShaperJSONWriter::SkShaperJSONWriter(SkJSONWriter* JSONWriter, const char* utf8, size_t size)
+        : fJSONWriter{JSONWriter}
+        , fUTF8{utf8, size} {}
+
+void SkShaperJSONWriter::beginLine() { }
+
+void SkShaperJSONWriter::runInfo(const SkShaper::RunHandler::RunInfo& info) { }
+
+void SkShaperJSONWriter::commitRunInfo() { }
+
+SkShaper::RunHandler::Buffer
+SkShaperJSONWriter::runBuffer(const SkShaper::RunHandler::RunInfo& info) {
+    fGlyphs.resize(info.glyphCount);
+    fPositions.resize(info.glyphCount);
+    fClusters.resize(info.glyphCount);
+    return {fGlyphs.data(), fPositions.data(), nullptr, fClusters.data(), {0, 0}};
+}
+
+static bool is_one_to_one(const char utf8[], size_t utf8Begin, size_t utf8End,
+        std::vector<uint32_t>& clusters) {
+    size_t lastUtf8Index = utf8End;
+
+    auto checkCluster = [&](size_t clusterIndex) {
+        if (clusters[clusterIndex] >= lastUtf8Index) {
+            return false;
+        }
+        size_t utf8ClusterSize = lastUtf8Index - clusters[clusterIndex];
+        if (SkUTF::CountUTF8(&utf8[clusters[clusterIndex]], utf8ClusterSize) != 1) {
+            return false;
+        }
+        lastUtf8Index = clusters[clusterIndex];
+        return true;
+    };
+
+    if (clusters.front() <= clusters.back()) {
+        // left-to-right clusters
+        size_t clusterCursor = clusters.size();
+        while (clusterCursor > 0) {
+            if (!checkCluster(--clusterCursor)) { return false; }
+        }
+    } else {
+        // right-to-left clusters
+        size_t clusterCursor = 0;
+        while (clusterCursor < clusters.size()) {
+            if (!checkCluster(clusterCursor++)) { return false; }
+        }
+    }
+
+    return true;
+}
+
+void SkShaperJSONWriter::commitRunBuffer(const SkShaper::RunHandler::RunInfo& info) {
+    fJSONWriter->beginObject("run", true);
+
+    // Font name
+    SkString fontName;
+    info.fFont.getTypeface()->getFamilyName(&fontName);
+    fJSONWriter->appendString("font name", fontName.c_str());
+
+    // Font size
+    fJSONWriter->appendFloat("font size", info.fFont.getSize());
+
+    if (info.fBidiLevel > 0) {
+        std::string bidiType = info.fBidiLevel % 2 == 0 ? "left-to-right" : "right-to-left";
+        std::string bidiOutput = bidiType + " lvl " + std::to_string(info.fBidiLevel);
+        fJSONWriter->appendString("BiDi", bidiOutput.c_str());
+    }
+
+    if (is_one_to_one(fUTF8.c_str(), info.utf8Range.begin(), info.utf8Range.end(), fClusters)) {
+        std::string utf8{&fUTF8[info.utf8Range.begin()], info.utf8Range.size()};
+        fJSONWriter->appendString("UTF8", utf8.c_str());
+
+        fJSONWriter->beginArray("glyphs", false);
+        for (auto glyphID : fGlyphs) {
+            fJSONWriter->appendU32(glyphID);
+        }
+        fJSONWriter->endArray();
+
+        fJSONWriter->beginArray("clusters", false);
+        for (auto cluster : fClusters) {
+            fJSONWriter->appendU32(cluster);
+        }
+        fJSONWriter->endArray();
+    } else {
+        VisualizeClusters(fUTF8.c_str(),
+                          info.utf8Range.begin(), info.utf8Range.end(),
+                          SkSpan<const SkGlyphID>{fGlyphs},
+                          SkSpan<const uint32_t>{fClusters},
+                          [this](size_t codePointCount, SkSpan<const char> utf1to1,
+                                 SkSpan<const SkGlyphID> glyph1to1) {
+                              this->displayMToN(codePointCount, utf1to1, glyph1to1);
+                          });
+    }
+
+    if (info.glyphCount > 1) {
+        fJSONWriter->beginArray("horizontal positions", false);
+        for (auto position : fPositions) {
+            fJSONWriter->appendFloat(position.x());
+        }
+        fJSONWriter->endArray();
+    }
+
+    fJSONWriter->beginArray("advances", false);
+    for (size_t i = 1; i < info.glyphCount; i++) {
+        fJSONWriter->appendFloat(fPositions[i].fX - fPositions[i-1].fX);
+    }
+    SkPoint lastAdvance = info.fAdvance - (fPositions.back() - fPositions.front());
+    fJSONWriter->appendFloat(lastAdvance.fX);
+    fJSONWriter->endArray();
+
+    fJSONWriter->endObject();
+}
+
+void SkShaperJSONWriter::BreakupClusters(size_t utf8Begin, size_t utf8End,
+                                         SkSpan<const uint32_t> clusters,
+                                         const BreakupCluastersCallback& processMToN) {
+
+    if (clusters.front() <= clusters.back()) {
+        // Handle left-to-right text direction
+        size_t glyphStartIndex = 0;
+        for (size_t glyphEndIndex = 0; glyphEndIndex < clusters.size(); glyphEndIndex++) {
+
+            if (clusters[glyphStartIndex] == clusters[glyphEndIndex]) { continue; }
+
+            processMToN(glyphStartIndex, glyphEndIndex,
+                        clusters[glyphStartIndex], clusters[glyphEndIndex]);
+
+            glyphStartIndex = glyphEndIndex;
+        }
+
+        processMToN(glyphStartIndex, clusters.size(), clusters[glyphStartIndex], utf8End);
+
+    } else {
+        // Handle right-to-left text direction.
+        SkASSERT(clusters.size() >= 2);
+        size_t glyphStartIndex = 0;
+        uint32_t utf8EndIndex = utf8End;
+        for (size_t glyphEndIndex = 0; glyphEndIndex < clusters.size(); glyphEndIndex++) {
+
+            if (clusters[glyphStartIndex] == clusters[glyphEndIndex]) { continue; }
+
+            processMToN(glyphStartIndex, glyphEndIndex,
+                        clusters[glyphStartIndex], utf8EndIndex);
+
+            utf8EndIndex = clusters[glyphStartIndex];
+            glyphStartIndex = glyphEndIndex;
+        }
+        processMToN(glyphStartIndex, clusters.size(), utf8Begin, clusters[glyphStartIndex-1]);
+    }
+}
+
+void SkShaperJSONWriter::VisualizeClusters(const char* utf8, size_t utf8Begin, size_t utf8End,
+                                           SkSpan<const SkGlyphID> glyphIDs,
+                                           SkSpan<const uint32_t> clusters,
+                                           const VisualizeClustersCallback& processMToN) {
+
+    size_t glyphRangeStart, glyphRangeEnd;
+    uint32_t utf8RangeStart, utf8RangeEnd;
+
+    auto resetRanges = [&]() {
+        glyphRangeStart = std::numeric_limits<size_t>::max();
+        glyphRangeEnd   = 0;
+        utf8RangeStart  = std::numeric_limits<uint32_t>::max();
+        utf8RangeEnd    = 0;
+    };
+
+    auto checkRangesAndProcess = [&]() {
+        if (glyphRangeStart < glyphRangeEnd) {
+            size_t glyphRangeCount = glyphRangeEnd - glyphRangeStart;
+            SkSpan<const char> utf8Span{&utf8[utf8RangeStart], utf8RangeEnd - utf8RangeStart};
+            SkSpan<const SkGlyphID> glyphSpan{&glyphIDs[glyphRangeStart], glyphRangeCount};
+
+            // Glyph count is the same as codepoint count for 1:1.
+            processMToN(glyphRangeCount, utf8Span, glyphSpan);
+        }
+        resetRanges();
+    };
+
+    auto gatherRuns = [&](size_t glyphStartIndex, size_t glyphEndIndex,
+                          uint32_t utf8StartIndex, uint32_t utf8EndIndex) {
+        int possibleCount = SkUTF::CountUTF8(&utf8[utf8StartIndex], utf8EndIndex - utf8StartIndex);
+        if (possibleCount == -1) { return; }
+        size_t codePointCount = SkTo<size_t>(possibleCount);
+        if (codePointCount == 1 && glyphEndIndex - glyphStartIndex == 1) {
+            glyphRangeStart = std::min(glyphRangeStart, glyphStartIndex);
+            glyphRangeEnd   = std::max(glyphRangeEnd,   glyphEndIndex  );
+            utf8RangeStart  = std::min(utf8RangeStart,  utf8StartIndex );
+            utf8RangeEnd    = std::max(utf8RangeEnd,    utf8EndIndex   );
+        } else {
+            checkRangesAndProcess();
+
+            SkSpan<const char> utf8Span{&utf8[utf8StartIndex], utf8EndIndex - utf8StartIndex};
+            SkSpan<const SkGlyphID> glyphSpan{&glyphIDs[glyphStartIndex],
+                                              glyphEndIndex - glyphStartIndex};
+
+            processMToN(codePointCount, utf8Span, glyphSpan);
+        }
+    };
+
+    resetRanges();
+    BreakupClusters(utf8Begin, utf8End, clusters, gatherRuns);
+    checkRangesAndProcess();
+}
+
+void SkShaperJSONWriter::displayMToN(size_t codePointCount,
+                                     SkSpan<const char> utf8,
+                                     SkSpan<const SkGlyphID> glyphIDs) {
+    std::string nString = std::to_string(codePointCount);
+    std::string mString = std::to_string(glyphIDs.size());
+    std::string clusterName = "cluster " + nString + " to " + mString;
+    fJSONWriter->beginObject(clusterName.c_str(), true);
+    std::string utf8String{utf8.data(), utf8.size()};
+    fJSONWriter->appendString("UTF", utf8String.c_str());
+    fJSONWriter->beginArray("glyphsIDs", false);
+    for (auto glyphID : glyphIDs) {
+        fJSONWriter->appendU32(glyphID);
+    }
+    fJSONWriter->endArray();
+    fJSONWriter->endObject();
+}
diff --git a/src/utils/SkShaperJSONWriter.h b/src/utils/SkShaperJSONWriter.h
new file mode 100644
index 0000000..bca0cd1
--- /dev/null
+++ b/src/utils/SkShaperJSONWriter.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2019 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkShaperJSONWriter_DEFINED
+#define SkShaperJSONWriter_DEFINED
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "modules/skshaper/include/SkShaper.h"
+#include "src/core/SkSpan.h"
+
+class SkJSONWriter;
+
+class SkShaperJSONWriter final : public SkShaper::RunHandler {
+public:
+    SkShaperJSONWriter(SkJSONWriter* JSONWriter, const char* utf8, size_t size);
+
+    void beginLine() override;
+    void runInfo(const RunInfo& info) override;
+    void commitRunInfo() override;
+
+    Buffer runBuffer(const RunInfo& info) override;
+
+    void commitRunBuffer(const RunInfo& info) override;
+
+    void commitLine() override {}
+
+    using BreakupCluastersCallback =
+            std::function<void(size_t, size_t, uint32_t, uint32_t)>;
+
+    // Break up cluster into a set of ranges for the UTF8, and the glyphIDs.
+    static void BreakupClusters(size_t utf8Begin, size_t utf8End,
+                                SkSpan<const uint32_t> clusters,
+                                const BreakupCluastersCallback& processMToN);
+
+
+    using VisualizeClustersCallback =
+        std::function<void(size_t, SkSpan<const char>, SkSpan<const SkGlyphID>)>;
+
+    // Gather runs of 1:1 into larger runs, and display M:N as single entries.
+    static void VisualizeClusters(const char utf8[],
+                                  size_t utf8Begin, size_t utf8End,
+                                  SkSpan<const SkGlyphID> glyphIDs,
+                                  SkSpan<const uint32_t> clusters,
+                                  const VisualizeClustersCallback& processMToN);
+
+private:
+    void displayMToN(size_t codePointCount,
+                     SkSpan<const char> utf8,
+                     SkSpan<const SkGlyphID> glyphIDs);
+
+    SkJSONWriter* fJSONWriter;
+    std::vector<SkGlyphID> fGlyphs;
+    std::vector<SkPoint> fPositions;
+    std::vector<uint32_t> fClusters;
+
+    std::string fUTF8;
+};
+
+#endif  // SkShaperJSONWriter_DEFINED
diff --git a/tests/SkShaperJSONWriterTest.cpp b/tests/SkShaperJSONWriterTest.cpp
new file mode 100644
index 0000000..15956d7
--- /dev/null
+++ b/tests/SkShaperJSONWriterTest.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2019 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "src/utils/SkShaperJSONWriter.h"
+
+#include "Test.h"
+
+#include "src/core/SkSpan.h"
+#include "src/utils/SkJSONWriter.h"
+#include "src/utils/SkUTF.h"
+
+DEF_TEST(SkShaperTest_cluster, reporter) {
+
+    struct Answer {
+        size_t glyphStartIndex, glyphEndIndex;
+        uint32_t utf8StartIndex, utf8EndIndex;
+    };
+
+    struct TestCase {
+        size_t utf8Len;
+        std::vector<uint32_t> clusters;
+        std::vector<Answer> answers;
+    };
+
+    std::vector<TestCase> cases = {
+            /*1:1*/ { 1, {0}, {{0, 1, 0, 1}} },
+            /*1:2*/ { 1, {0, 0}, {{0, 2, 0, 1}} },
+            /*2:1*/ { 2, {0}, {{0, 1, 0, 2}} },
+            /*2:3*/ { 2, {0, 0, 0}, {{0, 3, 0, 2}} },
+            /*3:2*/ { 3, {0, 0}, {{0, 2, 0, 3}} },
+
+            // cluster runs
+            { 2, {0, 1}, {{0, 1, 0, 1}, {1, 2, 1, 2}} },
+            { 2, {1, 0}, {{0, 1, 1, 2}, {1, 2, 0, 1}} },
+            { 2, {0, 0, 1}, {{0, 2, 0, 1}, {2, 3, 1, 2}} },
+            { 2, {1, 0, 0}, {{0, 1, 1, 2}, {1, 3, 0, 1}} },
+            { 2, {0, 1, 1}, {{0, 1, 0, 1}, {1, 3, 1, 2}} },
+            { 2, {1, 1, 0}, {{0, 2, 1, 2}, {2, 3, 0, 1}} },
+            { 3, {0, 0, 1}, {{0, 2, 0, 1}, {2, 3, 1, 3}} },
+            { 3, {1, 0, 0}, {{0, 1, 1, 3}, {1, 3, 0, 1}} },
+            { 3, {0, 1, 1}, {{0, 1, 0, 1}, {1, 3, 1, 3}} },
+            { 3, {1, 1, 0}, {{0, 2, 1, 3}, {2, 3, 0, 1}} },
+            { 4, {3, 2, 1, 0}, {{0, 1, 3, 4}, {1, 2, 2, 3}, {2, 3, 1, 2}, {3, 4, 0, 1}} },
+    };
+
+    for (auto& oneCase : cases) {
+        size_t answerCount = 0;
+        auto checker = [&](size_t glyphStartIndex, size_t glyphEndIndex,
+                           uint32_t utf8StartIndex, uint32_t utf8EndIndex) {
+            if (answerCount < oneCase.answers.size()) {
+                Answer a = oneCase.answers[answerCount];
+                REPORTER_ASSERT(reporter, a.glyphStartIndex == glyphStartIndex);
+                REPORTER_ASSERT(reporter,   a.glyphEndIndex == glyphEndIndex  );
+                REPORTER_ASSERT(reporter,  a.utf8StartIndex == utf8StartIndex );
+                REPORTER_ASSERT(reporter,    a.utf8EndIndex == utf8EndIndex   );
+
+            } else {
+                REPORTER_ASSERT(reporter, false, "Too many clusters");
+            }
+            answerCount++;
+        };
+
+        SkShaperJSONWriter::BreakupClusters(
+                0, oneCase.utf8Len, SkSpan<const uint32_t>{oneCase.clusters}, checker);
+        REPORTER_ASSERT(reporter, answerCount == oneCase.answers.size());
+    }
+}
+
+DEF_TEST(SkShaperTest_VisualizeCluster, reporter) {
+
+    struct Answer {
+        std::string utf8;
+        std::vector<SkGlyphID> glyphIDs;
+    };
+    struct TestCase {
+        std::string utf8;
+        std::vector<SkGlyphID> glyphIDs;
+        std::vector<uint32_t> clusters;
+        std::vector<Answer> answers;
+    };
+
+    std::vector<TestCase> cases = {
+            { "A", {7}, {0}, {{"A", {7}}} },
+            { "ABCD", {7, 8, 9, 10}, {0, 1, 2, 3}, {{"ABCD", {7, 8, 9, 10}}} },
+            { "A", {7, 8}, {0, 0}, {{"A", {7, 8}}} },
+            { "AB", {7}, {0}, {{"AB", {7}}} },
+            { "AB", {7, 8, 9}, {0, 0, 0}, {{"AB", {7, 8, 9}}} },
+            { "ABC", {7, 8}, {0, 0}, {{"ABC", {7, 8}}} },
+            { "ABCD", {7, 8, 9, 10}, {3, 2, 1, 0}, {{"ABCD", {7, 8, 9, 10}}} },
+            { "المادة", {246, 268, 241, 205, 240}, {10, 8, 6, 2, 0},
+                        {{"ادة",  {246, 268, 241}}, {"لم", {205}}, {"ا", {240}}} },
+    };
+
+    for (auto& oneCase : cases) {
+        size_t answerCount = 0;
+        auto checker = [&](
+                int codePointCount, SkSpan<const char> utf1to1, SkSpan<const SkGlyphID> glyph1to1) {
+            if (answerCount < oneCase.answers.size()) {
+                Answer a = oneCase.answers[answerCount];
+                std::string toCheckUtf8{utf1to1.data(), utf1to1.size()};
+                REPORTER_ASSERT(reporter, a.utf8 == toCheckUtf8);
+                std::vector<SkGlyphID> toCheckGlyphIDs{glyph1to1.begin(), glyph1to1.end()};
+                REPORTER_ASSERT(reporter, a.glyphIDs == toCheckGlyphIDs);
+
+            } else {
+                REPORTER_ASSERT(reporter, false, "Too many clusters");
+            }
+            answerCount++;
+        };
+
+        SkShaperJSONWriter::VisualizeClusters(oneCase.utf8.c_str(),
+                                              0, oneCase.utf8.size(),
+                                              SkSpan<const SkGlyphID>{oneCase.glyphIDs},
+                                              SkSpan<const uint32_t>{oneCase.clusters},
+                                              checker);
+    }
+}
+
+// Example use of the SkShaperJSONWriter.
+// Set to 1 to see use.
+#if 0
+DEF_TEST(SkShaperTest_basic, reporter) {
+    std::unique_ptr<SkShaper> shaper = SkShaper::Make();
+    SkFont font(nullptr, 14);
+
+    SkDynamicMemoryWStream out;
+    SkJSONWriter jsonWriter{&out, SkJSONWriter::Mode::kPretty};
+    std::string s = "المادة 1 يولد جميع الناس أحرارًا متساوين في الكرامة والحقوق. وقد وهبوا "
+                    "عقلاً وضميرًا وعليهم أن يعامل بعضهم بعضًا بروح الإخاء.";
+
+    SkShaperJSONWriter shaperJSON{&jsonWriter, s.c_str(), s.size()};
+
+    jsonWriter.beginObject();
+    shaper->shape(s.c_str(), s.size(), font, true /* right to left */,  256, &shaperJSON);
+    jsonWriter.endObject();
+    jsonWriter.flush();
+
+    std::string sout(out.bytesWritten(), 0);
+    out.copyTo(&sout[0]);
+    // Uncomment below to show the JSON.
+    SkDebugf("%s", sout.c_str());
+}
+#endif
\ No newline at end of file