blob: d23e43a71be483beb6e3ec287c7ac6cc22afab33 [file] [log] [blame]
Kenny Rootba0165b2010-11-09 14:37:23 -08001/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "Unicode_test"
18#include <utils/Log.h>
19#include <utils/Unicode.h>
20
21#include <gtest/gtest.h>
22
23namespace android {
24
25class UnicodeTest : public testing::Test {
26protected:
27 virtual void SetUp() {
28 }
29
30 virtual void TearDown() {
31 }
Michael Wright5bacef32016-05-09 14:43:31 +010032
33 char16_t const * const kSearchString = u"I am a leaf on the wind.";
Kenny Rootba0165b2010-11-09 14:37:23 -080034};
35
36TEST_F(UnicodeTest, UTF8toUTF16ZeroLength) {
37 ssize_t measured;
38
39 const uint8_t str[] = { };
40
41 measured = utf8_to_utf16_length(str, 0);
42 EXPECT_EQ(0, measured)
43 << "Zero length input should return zero length output.";
44}
45
46TEST_F(UnicodeTest, UTF8toUTF16ASCIILength) {
47 ssize_t measured;
48
49 // U+0030 or ASCII '0'
50 const uint8_t str[] = { 0x30 };
51
52 measured = utf8_to_utf16_length(str, sizeof(str));
53 EXPECT_EQ(1, measured)
54 << "ASCII glyphs should have a length of 1 char16_t";
55}
56
57TEST_F(UnicodeTest, UTF8toUTF16Plane1Length) {
58 ssize_t measured;
59
60 // U+2323 SMILE
61 const uint8_t str[] = { 0xE2, 0x8C, 0xA3 };
62
63 measured = utf8_to_utf16_length(str, sizeof(str));
64 EXPECT_EQ(1, measured)
65 << "Plane 1 glyphs should have a length of 1 char16_t";
66}
67
68TEST_F(UnicodeTest, UTF8toUTF16SurrogateLength) {
69 ssize_t measured;
70
71 // U+10000
72 const uint8_t str[] = { 0xF0, 0x90, 0x80, 0x80 };
73
74 measured = utf8_to_utf16_length(str, sizeof(str));
75 EXPECT_EQ(2, measured)
76 << "Surrogate pairs should have a length of 2 char16_t";
77}
78
79TEST_F(UnicodeTest, UTF8toUTF16TruncatedUTF8) {
80 ssize_t measured;
81
82 // Truncated U+2323 SMILE
83 // U+2323 SMILE
84 const uint8_t str[] = { 0xE2, 0x8C };
85
86 measured = utf8_to_utf16_length(str, sizeof(str));
87 EXPECT_EQ(-1, measured)
88 << "Truncated UTF-8 should return -1 to indicate invalid";
89}
90
91TEST_F(UnicodeTest, UTF8toUTF16Normal) {
92 const uint8_t str[] = {
93 0x30, // U+0030, 1 UTF-16 character
94 0xC4, 0x80, // U+0100, 1 UTF-16 character
95 0xE2, 0x8C, 0xA3, // U+2323, 1 UTF-16 character
96 0xF0, 0x90, 0x80, 0x80, // U+10000, 2 UTF-16 character
97 };
98
99 char16_t output[1 + 1 + 1 + 2 + 1]; // Room for NULL
100
Sergio Giro9de67762016-07-20 20:01:33 +0100101 utf8_to_utf16(str, sizeof(str), output, sizeof(output) / sizeof(output[0]));
Kenny Rootba0165b2010-11-09 14:37:23 -0800102
103 EXPECT_EQ(0x0030, output[0])
104 << "should be U+0030";
105 EXPECT_EQ(0x0100, output[1])
106 << "should be U+0100";
107 EXPECT_EQ(0x2323, output[2])
108 << "should be U+2323";
109 EXPECT_EQ(0xD800, output[3])
110 << "should be first half of surrogate U+10000";
111 EXPECT_EQ(0xDC00, output[4])
112 << "should be second half of surrogate U+10000";
113 EXPECT_EQ(NULL, output[5])
114 << "should be NULL terminated";
115}
116
Michael Wright5bacef32016-05-09 14:43:31 +0100117TEST_F(UnicodeTest, strstr16EmptyTarget) {
118 EXPECT_EQ(strstr16(kSearchString, u""), kSearchString)
119 << "should return the original pointer";
120}
121
122TEST_F(UnicodeTest, strstr16SameString) {
123 const char16_t* result = strstr16(kSearchString, kSearchString);
124 EXPECT_EQ(kSearchString, result)
125 << "should return the original pointer";
126}
127
128TEST_F(UnicodeTest, strstr16TargetStartOfString) {
129 const char16_t* result = strstr16(kSearchString, u"I am");
130 EXPECT_EQ(kSearchString, result)
131 << "should return the original pointer";
132}
133
134
135TEST_F(UnicodeTest, strstr16TargetEndOfString) {
136 const char16_t* result = strstr16(kSearchString, u"wind.");
137 EXPECT_EQ(kSearchString+19, result);
138}
139
140TEST_F(UnicodeTest, strstr16TargetWithinString) {
141 const char16_t* result = strstr16(kSearchString, u"leaf");
142 EXPECT_EQ(kSearchString+7, result);
143}
144
145TEST_F(UnicodeTest, strstr16TargetNotPresent) {
146 const char16_t* result = strstr16(kSearchString, u"soar");
147 EXPECT_EQ(nullptr, result);
148}
149
Sergio Giro9de67762016-07-20 20:01:33 +0100150// http://b/29267949
151// Test that overreading in utf8_to_utf16_length is detected
152TEST_F(UnicodeTest, InvalidUtf8OverreadDetected) {
153 // An utf8 char starting with \xc4 is two bytes long.
154 // Add extra zeros so no extra memory is read in case the code doesn't
155 // work as expected.
156 static char utf8[] = "\xc4\x00\x00\x00";
157 ASSERT_DEATH(utf8_to_utf16_length((uint8_t *) utf8, strlen(utf8),
158 true /* overreadIsFatal */), "" /* regex for ASSERT_DEATH */);
159}
160
Kenny Rootba0165b2010-11-09 14:37:23 -0800161}