Reland #3 "ICU API: only in SkParagraph, simplified."

This is the fix for google3 failure.
This reverts commit e36a4667237dd4672f68cd4cb411d41881cd4be2.

Reason for revert: Trying to fix google3 build brake

Original change's description:
> Revert "Reland "ICU API: only in SkParagraph, simplified (relanding reverted).""
>
> This reverts commit 16fbc2477205aa06efb03e9bdaa0e4a5e94ee865.
>
> Reason for revert: Checking to see if this is blocking the G3 roll
>
> Original change's description:
> > Reland "ICU API: only in SkParagraph, simplified (relanding reverted)."
> >
> > This reverts commit a30095d17c879f0af0bfa799b25c97bcacd0b4fc.
> >
> > Reason for revert: Fixing the build
> >
> > Original change's description:
> > > Revert "ICU API: only in SkParagraph, simplified (relanding reverted)."
> > >
> > > This reverts commit 7479eda3b6d2884c089b62d9fc489574716ab3b7.
> > >
> > > Reason for revert: Breaking build
> > >
> > > Original change's description:
> > > > ICU API: only in SkParagraph, simplified (relanding reverted).
> > > >
> > > > Reverted commit: https://skia-review.googlesource.com/c/skia/+/296128/
> > > >
> > > > Change-Id: Iaf793bff94a6060579c7d6176d477e598c047be6
> > > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303261
> > > > Reviewed-by: Mike Reed <reed@google.com>
> > > > Commit-Queue: Julia Lavrova <jlavrova@google.com>
> > >
> > > TBR=reed@google.com,jlavrova@google.com
> > >
> > > Change-Id: Idd4c41e22aa59e24bdbd07f2fa5e9258c1bbb7a7
> > > No-Presubmit: true
> > > No-Tree-Checks: true
> > > No-Try: true
> > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303358
> > > Reviewed-by: Julia Lavrova <jlavrova@google.com>
> > > Commit-Queue: Julia Lavrova <jlavrova@google.com>
> >
> > TBR=reed@google.com,jlavrova@google.com
> >
> > Change-Id: Iea5da4535ea2e388e8e632e6c556b66c8781631a
> > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303377
> > Reviewed-by: Ben Wagner <bungeman@google.com>
> > Reviewed-by: Julia Lavrova <jlavrova@google.com>
> > Commit-Queue: Julia Lavrova <jlavrova@google.com>
>
> TBR=bungeman@google.com,reed@google.com,jlavrova@google.com
>
> Change-Id: I1edfecc56add670b251adf44892265088fd32c42
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/304058
> Reviewed-by: Robert Phillips <robertphillips@google.com>
> Commit-Queue: Robert Phillips <robertphillips@google.com>

TBR=bungeman@google.com,robertphillips@google.com,reed@google.com,jlavrova@google.com

Change-Id: Ife73aa21539e870d69bda6b5892979646732d778
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/304060
Commit-Queue: Julia Lavrova <jlavrova@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp
new file mode 100644
index 0000000..8900b4e
--- /dev/null
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@@ -0,0 +1,243 @@
+/*
+* Copyright 2020 Google Inc.
+*
+* Use of this source code is governed by a BSD-style license that can be
+* found in the LICENSE file.
+*/
+#include "include/private/SkTFitsIn.h"
+#include "include/private/SkTemplates.h"
+#include "modules/skshaper/src/SkUnicode.h"
+#include "src/utils/SkUTF.h"
+#include <unicode/ubidi.h>
+#include <unicode/ubrk.h>
+#include <unicode/utext.h>
+#include <unicode/utypes.h>
+#include <vector>
+#include <functional>
+
+using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
+using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
+using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>>;
+
+/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
+static inline SkUnichar utf8_next(const char** ptr, const char* end) {
+    SkUnichar val = SkUTF::NextUTF8(ptr, end);
+    return val < 0 ? 0xFFFD : val;
+}
+
+namespace skia {
+
+class SkUnicode_icu : public SkUnicode {
+
+    static UBreakIteratorType convertType(UBreakType type) {
+        switch (type) {
+            case UBreakType::kLines: return UBRK_LINE;
+            case UBreakType::kGraphemes: return UBRK_CHARACTER;
+            case UBreakType::kWords: return UBRK_WORD;
+            default:
+              SkDEBUGF("Convert error: wrong break type");
+              return UBRK_CHARACTER;
+        }
+    }
+
+    static int convertUtf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
+        int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
+        if (utf16Units < 0) {
+            SkDEBUGF("Convert error: Invalid utf8 input");
+            return utf16Units;
+        }
+        *utf16 = std::unique_ptr<uint16_t[]>(new uint16_t[utf16Units]);
+        SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16->get(), utf16Units, utf8, utf8Units);
+        SkASSERT(dstLen == utf16Units);
+        return utf16Units;
+    }
+
+    static bool extractBidi(const char utf8[], int utf8Units,  Direction dir, std::vector<BidiRegion>* bidiRegions) {
+
+        // Convert to UTF16 since for now bidi iterator only operates on utf16
+        std::unique_ptr<uint16_t[]> utf16;
+        auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
+        if (utf16Units < 0) {
+            return false;
+        }
+
+        // Create bidi iterator
+        UErrorCode status = U_ZERO_ERROR;
+        ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Bidi error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(bidi);
+        uint8_t bidiLevel = (dir == Direction::kLTR) ? UBIDI_LTR : UBIDI_RTL;
+        // The required lifetime of utf16 isn't well documented.
+        // It appears it isn't used after ubidi_setPara except through ubidi_getText.
+        ubidi_setPara(bidi.get(), (const UChar*)utf16.get(), utf16Units, bidiLevel, nullptr, &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Bidi error: %s", u_errorName(status));
+            return false;
+        }
+
+        // Iterate through bidi regions and the result positions into utf8
+        const char* start8 = utf8;
+        const char* end8 = utf8 + utf8Units;
+        BidiLevel currentLevel = 0;
+
+        Position pos8 = 0;
+        Position pos16 = 0;
+        Position end16 = ubidi_getLength(bidi.get());
+        while (pos16 < end16) {
+            auto level = ubidi_getLevelAt(bidi.get(), pos16);
+            if (pos16 == 0) {
+                currentLevel = level;
+            } else if (level != currentLevel) {
+                Position end = start8 - utf8;
+                bidiRegions->emplace_back(pos8, end, currentLevel);
+                currentLevel = level;
+                pos8 = end;
+            }
+            SkUnichar u = utf8_next(&start8, end8);
+            pos16 += SkUTF::ToUTF16(u);
+        }
+        Position end = start8 - utf8;
+        if (end != pos8) {
+            bidiRegions->emplace_back(pos8, end, currentLevel);
+        }
+        return true;
+    }
+
+    static bool extractWords(uint16_t utf16[], int utf16Units, std::vector<Position>* words) {
+
+        UErrorCode status = U_ZERO_ERROR;
+
+        UBreakIteratorType breakType = convertType(UBreakType::kWords);
+        ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(iterator);
+
+        UText sUtf16UText = UTEXT_INITIALIZER;
+        ICUUText utf16UText(utext_openUChars(&sUtf16UText, (UChar*)utf16, utf16Units, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+
+        ubrk_setUText(iterator.get(), utf16UText.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+
+        // Get the words
+        int32_t pos = ubrk_first(iterator.get());
+        while (pos != UBRK_DONE) {
+            words->emplace_back(pos);
+            pos = ubrk_next(iterator.get());
+        }
+
+        return true;
+    }
+
+    static bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
+
+        UErrorCode status = U_ZERO_ERROR;
+        UText sUtf8UText = UTEXT_INITIALIZER;
+        ICUUText text(utext_openUTF8(&sUtf8UText, &utf8[0], utf8Units, &status));
+
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(text);
+
+        ICUBreakIterator iterator(ubrk_open(convertType(type), uloc_getDefault(), nullptr, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+        }
+
+        ubrk_setUText(iterator.get(), text.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+
+        auto iter = iterator.get();
+        int32_t pos = ubrk_first(iter);
+        while (pos != UBRK_DONE) {
+            add(pos, ubrk_getRuleStatus(iter));
+            pos = ubrk_next(iter);
+        }
+        return true;
+    }
+
+    static bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
+
+        const char* start = utf8;
+        const char* end = utf8 + utf8Units;
+        const char* ch = start;
+        while (ch < end) {
+            auto index = ch - start;
+            auto unichar = utf8_next(&ch, end);
+            if (u_isWhitespace(unichar)) {
+                auto ending = ch - start;
+                for (auto k = index; k < ending; ++k) {
+                  whitespaces->emplace_back(k);
+                }
+            }
+        }
+        return true;
+    }
+
+public:
+    ~SkUnicode_icu() override { }
+
+    bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) override {
+        return extractBidi(utf8, utf8Units, dir, results);
+    }
+
+    bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
+
+        return extractPositions(utf8, utf8Units, UBreakType::kLines,
+            [results](int pos, int status) {
+                    results->emplace_back(pos,status == UBRK_LINE_HARD
+                                                        ? LineBreakType::kHardLineBreak
+                                                        : LineBreakType::kSoftLineBreak);
+        });
+    }
+
+    bool getWords(const char utf8[], int utf8Units, std::vector<Position>* results) override {
+
+        // Convert to UTF16 since we want the results in utf16
+        std::unique_ptr<uint16_t[]> utf16;
+        auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
+        if (utf16Units < 0) {
+            return false;
+        }
+
+        return extractWords(utf16.get(), utf16Units, results);
+    }
+
+    bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
+
+        return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
+            [results](int pos, int status) { results->emplace_back(pos);
+        });
+    }
+
+    bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
+
+        return extractWhitespaces(utf8, utf8Units, results);
+    }
+
+    void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override {
+        ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
+    }
+};
+
+std::unique_ptr<SkUnicode> SkUnicode::Make() { return std::make_unique<SkUnicode_icu>(); }
+
+}
+