| /* |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "utils/utf8/unicodetext.h" |
| |
| #include "gtest/gtest.h" |
| |
| namespace libtextclassifier3 { |
| namespace { |
| |
| class UnicodeTextTest : public testing::Test { |
| protected: |
| UnicodeTextTest() : empty_text_() { |
| text_.push_back(0x1C0); |
| text_.push_back(0x4E8C); |
| text_.push_back(0xD7DB); |
| text_.push_back(0x34); |
| text_.push_back(0x1D11E); |
| } |
| |
| UnicodeText empty_text_; |
| UnicodeText text_; |
| }; |
| |
| // Tests for our modifications of UnicodeText. |
| TEST(UnicodeTextTest, Custom) { |
| UnicodeText text = UTF8ToUnicodeText("1234๐hello", /*do_copy=*/false); |
| EXPECT_EQ(text.ToUTF8String(), "1234๐hello"); |
| EXPECT_EQ(text.size_codepoints(), 10); |
| EXPECT_EQ(text.size_bytes(), 13); |
| |
| auto it_begin = text.begin(); |
| std::advance(it_begin, 4); |
| auto it_end = text.begin(); |
| std::advance(it_end, 6); |
| EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "๐h"); |
| } |
| |
| TEST(UnicodeTextTest, Substring) { |
| UnicodeText text = UTF8ToUnicodeText("1234๐hello", /*do_copy=*/false); |
| |
| EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/true), |
| UTF8ToUnicodeText("๐h")); |
| EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/false), |
| UTF8ToUnicodeText("๐h")); |
| } |
| |
| TEST(UnicodeTextTest, Ownership) { |
| const std::string src = "\u304A\u00B0\u106B"; |
| |
| UnicodeText alias; |
| alias.PointToUTF8(src.data(), src.size()); |
| EXPECT_EQ(alias.data(), src.data()); |
| UnicodeText::const_iterator it = alias.begin(); |
| EXPECT_EQ(*it++, 0x304A); |
| EXPECT_EQ(*it++, 0x00B0); |
| EXPECT_EQ(*it++, 0x106B); |
| EXPECT_EQ(it, alias.end()); |
| |
| UnicodeText t = alias; // Copy initialization copies the data. |
| EXPECT_NE(t.data(), alias.data()); |
| } |
| |
| TEST(UnicodeTextTest, Validation) { |
| EXPECT_TRUE(UTF8ToUnicodeText("1234๐hello", /*do_copy=*/false).is_valid()); |
| EXPECT_TRUE( |
| UTF8ToUnicodeText("\u304A\u00B0\u106B", /*do_copy=*/false).is_valid()); |
| EXPECT_TRUE( |
| UTF8ToUnicodeText("this is a test๐๐๐", /*do_copy=*/false).is_valid()); |
| EXPECT_TRUE( |
| UTF8ToUnicodeText("\xf0\x9f\x98\x8b", /*do_copy=*/false).is_valid()); |
| // Too short (string is too short). |
| EXPECT_FALSE(UTF8ToUnicodeText("\xf0\x9f", /*do_copy=*/false).is_valid()); |
| // Too long (too many trailing bytes). |
| EXPECT_FALSE( |
| UTF8ToUnicodeText("\xf0\x9f\x98\x8b\x8b", /*do_copy=*/false).is_valid()); |
| // Too short (too few trailing bytes). |
| EXPECT_FALSE( |
| UTF8ToUnicodeText("\xf0\x9f\x98\x61\x61", /*do_copy=*/false).is_valid()); |
| // Invalid with context. |
| EXPECT_FALSE( |
| UTF8ToUnicodeText("hello \xf0\x9f\x98\x61\x61 world1", /*do_copy=*/false) |
| .is_valid()); |
| } |
| |
| class IteratorTest : public UnicodeTextTest {}; |
| |
| TEST_F(IteratorTest, Iterates) { |
| UnicodeText::const_iterator iter = text_.begin(); |
| EXPECT_EQ(0x1C0, *iter); |
| EXPECT_EQ(&iter, &++iter); // operator++ returns *this. |
| EXPECT_EQ(0x4E8C, *iter++); |
| EXPECT_EQ(0xD7DB, *iter); |
| // Make sure you can dereference more than once. |
| EXPECT_EQ(0xD7DB, *iter); |
| EXPECT_EQ(0x34, *++iter); |
| EXPECT_EQ(0x1D11E, *++iter); |
| ASSERT_TRUE(iter != text_.end()); |
| iter++; |
| EXPECT_TRUE(iter == text_.end()); |
| } |
| |
| TEST_F(IteratorTest, MultiPass) { |
| // Also tests Default Constructible and Assignable. |
| UnicodeText::const_iterator i1, i2; |
| i1 = text_.begin(); |
| i2 = i1; |
| EXPECT_EQ(0x4E8C, *++i1); |
| EXPECT_TRUE(i1 != i2); |
| EXPECT_EQ(0x1C0, *i2); |
| ++i2; |
| EXPECT_TRUE(i1 == i2); |
| EXPECT_EQ(0x4E8C, *i2); |
| } |
| |
| TEST_F(IteratorTest, ReverseIterates) { |
| UnicodeText::const_iterator iter = text_.end(); |
| EXPECT_TRUE(iter == text_.end()); |
| iter--; |
| ASSERT_TRUE(iter != text_.end()); |
| EXPECT_EQ(0x1D11E, *iter--); |
| EXPECT_EQ(0x34, *iter); |
| EXPECT_EQ(0xD7DB, *--iter); |
| // Make sure you can dereference more than once. |
| EXPECT_EQ(0xD7DB, *iter); |
| --iter; |
| EXPECT_EQ(0x4E8C, *iter--); |
| EXPECT_EQ(0x1C0, *iter); |
| EXPECT_TRUE(iter == text_.begin()); |
| } |
| |
| TEST_F(IteratorTest, Comparable) { |
| UnicodeText::const_iterator i1, i2; |
| i1 = text_.begin(); |
| i2 = i1; |
| ++i2; |
| |
| EXPECT_TRUE(i1 < i2); |
| EXPECT_TRUE(text_.begin() <= i1); |
| EXPECT_FALSE(i1 >= i2); |
| EXPECT_FALSE(i1 > text_.end()); |
| } |
| |
| TEST_F(IteratorTest, Advance) { |
| UnicodeText::const_iterator iter = text_.begin(); |
| EXPECT_EQ(0x1C0, *iter); |
| std::advance(iter, 4); |
| EXPECT_EQ(0x1D11E, *iter); |
| ++iter; |
| EXPECT_TRUE(iter == text_.end()); |
| } |
| |
| TEST_F(IteratorTest, Distance) { |
| UnicodeText::const_iterator iter = text_.begin(); |
| EXPECT_EQ(0, std::distance(text_.begin(), iter)); |
| EXPECT_EQ(5, std::distance(iter, text_.end())); |
| ++iter; |
| ++iter; |
| EXPECT_EQ(2, std::distance(text_.begin(), iter)); |
| EXPECT_EQ(3, std::distance(iter, text_.end())); |
| ++iter; |
| ++iter; |
| EXPECT_EQ(4, std::distance(text_.begin(), iter)); |
| ++iter; |
| EXPECT_EQ(0, std::distance(iter, text_.end())); |
| } |
| |
| class OperatorTest : public UnicodeTextTest {}; |
| |
| TEST_F(OperatorTest, Clear) { |
| UnicodeText empty_text(UTF8ToUnicodeText("", /*do_copy=*/false)); |
| EXPECT_FALSE(text_ == empty_text); |
| text_.clear(); |
| EXPECT_TRUE(text_ == empty_text); |
| } |
| |
| TEST_F(OperatorTest, Empty) { |
| EXPECT_TRUE(empty_text_.empty()); |
| EXPECT_FALSE(text_.empty()); |
| text_.clear(); |
| EXPECT_TRUE(text_.empty()); |
| } |
| |
| } // namespace |
| } // namespace libtextclassifier3 |