ICU project: text break iterators in SkShaper

Change-Id: I8a0dd71298331b608fbe874cc610a80fc7815b0e
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/313082
Reviewed-by: Ben Wagner <bungeman@google.com>
Commit-Queue: Julia Lavrova <jlavrova@google.com>
diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp
index 13de14d..7eb0f86 100644
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@@ -118,20 +118,84 @@
     ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
 }
 
+class SkBreakIterator_icu : public SkBreakIterator {
+    ICUBreakIterator fBreakIterator;
+    Position fLastResult;
+ public:
+    explicit SkBreakIterator_icu(ICUBreakIterator iter)
+        : fBreakIterator(std::move(iter)), fLastResult(0) {}
+    Position first() override
+      { return fLastResult = ubrk_first(fBreakIterator.get()); }
+    Position current() override
+      { return fLastResult = ubrk_current(fBreakIterator.get()); }
+    Position next() override
+      { return fLastResult = ubrk_next(fBreakIterator.get()); }
+    Position preceding(Position offset) override
+        { return fLastResult = ubrk_preceding(fBreakIterator.get(), offset); }
+    Position following(Position offset) override
+        { return fLastResult = ubrk_following(fBreakIterator.get(), offset);}
+    Status status() override { return ubrk_getRuleStatus(fBreakIterator.get()); }
+    bool isDone() override { return fLastResult == UBRK_DONE; }
+
+    bool setText(const char utftext8[], int utf8Units) override {
+        UErrorCode status = U_ZERO_ERROR;
+
+        UText sUtf8UText = UTEXT_INITIALIZER;
+        ICUUText text(utext_openUTF8(&sUtf8UText, &utftext8[0], utf8Units, &status));
+
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(text);
+        ubrk_setUText(fBreakIterator.get(), text.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        fLastResult = 0;
+        return true;
+    }
+
+    static UBreakIteratorType convertType(SkUnicode::BreakType type) {
+        switch (type) {
+            case SkUnicode::BreakType::kLines: return UBRK_LINE;
+            case SkUnicode::BreakType::kGraphemes: return UBRK_CHARACTER;
+            case SkUnicode::BreakType::kWords: return UBRK_WORD;
+            default:
+              return UBRK_CHARACTER;
+        }
+    }
+
+    static std::unique_ptr<SkBreakIterator> makeUtf8BreakIterator
+        (const char locale[], SkUnicode::BreakType type) {
+        UErrorCode status = U_ZERO_ERROR;
+        ICUBreakIterator iterator(ubrk_open(convertType(type), locale, nullptr, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return nullptr;
+        }
+        return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
+    }
+};
+
 class SkUnicode_icu : public SkUnicode {
 
-    static UBreakIteratorType convertType(UBreakType type) {
+    static UBreakIteratorType convertType(BreakType type) {
         switch (type) {
-            case UBreakType::kLines: return UBRK_LINE;
-            case UBreakType::kGraphemes: return UBRK_CHARACTER;
-            case UBreakType::kWords: return UBRK_WORD;
+            case BreakType::kLines: return UBRK_LINE;
+            case BreakType::kGraphemes: return UBRK_CHARACTER;
+            case BreakType::kWords: return UBRK_WORD;
             default:
               SkDEBUGF("Convert error: wrong break type");
               return UBRK_CHARACTER;
         }
     }
 
-    static bool extractBidi(const char utf8[], int utf8Units, TextDirection dir, std::vector<BidiRegion>* bidiRegions) {
+    static bool extractBidi(const char utf8[],
+                            int utf8Units,
+                            TextDirection dir,
+                            std::vector<BidiRegion>* bidiRegions) {
 
         // Convert to UTF16 since for now bidi iterator only operates on utf16
         std::unique_ptr<uint16_t[]> utf16;
@@ -189,7 +253,7 @@
 
         UErrorCode status = U_ZERO_ERROR;
 
-        UBreakIteratorType breakType = convertType(UBreakType::kWords);
+        UBreakIteratorType breakType = convertType(BreakType::kWords);
         ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
         if (U_FAILURE(status)) {
             SkDEBUGF("Break error: %s", u_errorName(status));
@@ -220,7 +284,8 @@
         return true;
     }
 
-    static bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
+    static bool extractPositions
+        (const char utf8[], int utf8Units, BreakType type, std::function<void(int, int)> add) {
 
         UErrorCode status = U_ZERO_ERROR;
         UText sUtf8UText = UTEXT_INITIALIZER;
@@ -252,7 +317,9 @@
         return true;
     }
 
-    static bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
+    static bool extractWhitespaces(const char utf8[],
+                                   int utf8Units,
+                                   std::vector<Position>* whitespaces) {
 
         const char* start = utf8;
         const char* end = utf8 + utf8Units;
@@ -293,16 +360,22 @@
         SkASSERT(dstLen == utf8Units);
         return utf8Units;
    }
+
 public:
     ~SkUnicode_icu() override { }
     std::unique_ptr<SkBidiIterator> makeBidiIterator(const uint16_t text[], int count,
                                                      SkBidiIterator::Direction dir) override {
         return SkBidiIterator_icu::makeBidiIterator(text, count, dir);
     }
-    std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[], int count,
+    std::unique_ptr<SkBidiIterator> makeBidiIterator(const char text[],
+                                                     int count,
                                                      SkBidiIterator::Direction dir) override {
         return SkBidiIterator_icu::makeBidiIterator(text, count, dir);
     }
+    std::unique_ptr<SkBreakIterator> makeBreakIterator(const char locale[],
+                                                       BreakType breakType) override {
+        return SkBreakIterator_icu::makeUtf8BreakIterator(locale, breakType);
+    }
 
     // TODO: Use ICU data file to detect controls and whitespaces
     bool isControl(SkUnichar utf8) override {
@@ -323,13 +396,18 @@
         }
     }
 
-    bool getBidiRegions(const char utf8[], int utf8Units, TextDirection dir, std::vector<BidiRegion>* results) override {
+    bool getBidiRegions(const char utf8[],
+                        int utf8Units,
+                        TextDirection dir,
+                        std::vector<BidiRegion>* results) override {
         return extractBidi(utf8, utf8Units, dir, results);
     }
 
-    bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
+    bool getLineBreaks(const char utf8[],
+                       int utf8Units,
+                       std::vector<LineBreakBefore>* results) override {
 
-        return extractPositions(utf8, utf8Units, UBreakType::kLines,
+        return extractPositions(utf8, utf8Units, BreakType::kLines,
             [results](int pos, int status) {
                     results->emplace_back(pos,status == UBRK_LINE_HARD
                                                         ? LineBreakType::kHardLineBreak
@@ -351,7 +429,7 @@
 
     bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
 
-        return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
+        return extractPositions(utf8, utf8Units, BreakType::kGraphemes,
             [results](int pos, int status) { results->emplace_back(pos);
         });
     }
@@ -361,7 +439,9 @@
         return extractWhitespaces(utf8, utf8Units, results);
     }
 
-    void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override {
+    void reorderVisual(const BidiLevel runLevels[],
+                       int levelsCount,
+                       int32_t logicalFromVisual[]) override {
         ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
     }
 };