blob: d436fc02289634a90fc56912d37af827c862ef8a [file] [log] [blame]
Reid Kleckner7df03c22013-07-16 17:14:33 +00001//===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Support/ConvertUTF.h"
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000011#include "llvm/Support/Format.h"
Reid Kleckner7df03c22013-07-16 17:14:33 +000012#include "gtest/gtest.h"
13#include <string>
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +000014#include <utility>
Chandler Carruthd9903882015-01-14 11:23:27 +000015#include <vector>
Reid Kleckner7df03c22013-07-16 17:14:33 +000016
17using namespace llvm;
18
19TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
20 // Src is the look of disapproval.
21 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
22 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
23 std::string Result;
24 bool Success = convertUTF16ToUTF8String(Ref, Result);
25 EXPECT_TRUE(Success);
26 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
27 EXPECT_EQ(Expected, Result);
28}
29
30TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
31 // Src is the look of disapproval.
32 static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
33 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
34 std::string Result;
35 bool Success = convertUTF16ToUTF8String(Ref, Result);
36 EXPECT_TRUE(Success);
37 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
38 EXPECT_EQ(Expected, Result);
39}
40
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000041TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
42 // Src is the look of disapproval.
43 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
44 StringRef Ref(Src, sizeof(Src) - 1);
45 SmallVector<UTF16, 5> Result;
46 bool Success = convertUTF8ToUTF16String(Ref, Result);
47 EXPECT_TRUE(Success);
48 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
Eric Christopher7aebb322015-01-27 01:01:39 +000049 ASSERT_EQ(3u, Result.size());
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000050 for (int I = 0, E = 3; I != E; ++I)
51 EXPECT_EQ(Expected[I], Result[I]);
52}
53
Reid Kleckner7df03c22013-07-16 17:14:33 +000054TEST(ConvertUTFTest, OddLengthInput) {
55 std::string Result;
Craig Toppere1d12942014-08-27 05:25:25 +000056 bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000057 EXPECT_FALSE(Success);
58}
59
60TEST(ConvertUTFTest, Empty) {
61 std::string Result;
Craig Toppere1d12942014-08-27 05:25:25 +000062 bool Success = convertUTF16ToUTF8String(None, Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000063 EXPECT_TRUE(Success);
64 EXPECT_TRUE(Result.empty());
65}
66
67TEST(ConvertUTFTest, HasUTF16BOM) {
Craig Toppere1d12942014-08-27 05:25:25 +000068 bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000069 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000070 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000071 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000072 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
Reid Kleckner7df03c22013-07-16 17:14:33 +000073 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
Craig Toppere1d12942014-08-27 05:25:25 +000074 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
Reid Kleckner7df03c22013-07-16 17:14:33 +000075 EXPECT_TRUE(HasBOM);
76
Craig Toppere1d12942014-08-27 05:25:25 +000077 HasBOM = hasUTF16ByteOrderMark(None);
Reid Kleckner7df03c22013-07-16 17:14:33 +000078 EXPECT_FALSE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000079 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
Reid Kleckner7df03c22013-07-16 17:14:33 +000080 EXPECT_FALSE(HasBOM);
81}
Dmitri Gribenko1089db02014-06-16 11:09:46 +000082
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +000083struct ConvertUTFResultContainer {
84 ConversionResult ErrorCode;
85 std::vector<unsigned> UnicodeScalars;
86
87 ConvertUTFResultContainer(ConversionResult ErrorCode)
88 : ErrorCode(ErrorCode) {}
89
90 ConvertUTFResultContainer
91 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
92 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
93 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
94 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
95 ConvertUTFResultContainer Result(*this);
96 if (US0 != 0x110000)
97 Result.UnicodeScalars.push_back(US0);
98 if (US1 != 0x110000)
99 Result.UnicodeScalars.push_back(US1);
100 if (US2 != 0x110000)
101 Result.UnicodeScalars.push_back(US2);
102 if (US3 != 0x110000)
103 Result.UnicodeScalars.push_back(US3);
104 if (US4 != 0x110000)
105 Result.UnicodeScalars.push_back(US4);
106 if (US5 != 0x110000)
107 Result.UnicodeScalars.push_back(US5);
108 if (US6 != 0x110000)
109 Result.UnicodeScalars.push_back(US6);
110 if (US7 != 0x110000)
111 Result.UnicodeScalars.push_back(US7);
112 return Result;
113 }
114};
115
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000116std::pair<ConversionResult, std::vector<unsigned>>
117ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
118 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
119
120 const UTF8 *SourceNext = SourceStart;
121 std::vector<UTF32> Decoded(S.size(), 0);
122 UTF32 *TargetStart = Decoded.data();
123
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000124 auto ErrorCode =
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000125 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
126 Decoded.data() + Decoded.size(), lenientConversion);
127
128 Decoded.resize(TargetStart - Decoded.data());
129
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000130 return std::make_pair(ErrorCode, Decoded);
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000131}
132
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000133std::pair<ConversionResult, std::vector<unsigned>>
134ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
135 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
136
137 const UTF8 *SourceNext = SourceStart;
138 std::vector<UTF32> Decoded(S.size(), 0);
139 UTF32 *TargetStart = Decoded.data();
140
141 auto ErrorCode = ConvertUTF8toUTF32Partial(
142 &SourceNext, SourceStart + S.size(), &TargetStart,
143 Decoded.data() + Decoded.size(), lenientConversion);
144
145 Decoded.resize(TargetStart - Decoded.data());
146
147 return std::make_pair(ErrorCode, Decoded);
148}
149
150::testing::AssertionResult
151CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
152 StringRef S, bool Partial = false) {
153 ConversionResult ErrorCode;
154 std::vector<unsigned> Decoded;
155 if (!Partial)
156 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
157 else
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000158 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
Dmitri Gribenkocbc7ae22015-01-10 05:03:29 +0000159
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000160 if (Expected.ErrorCode != ErrorCode)
161 return ::testing::AssertionFailure() << "Expected error code "
162 << Expected.ErrorCode << ", actual "
163 << ErrorCode;
164
165 if (Expected.UnicodeScalars != Decoded)
166 return ::testing::AssertionFailure()
167 << "Expected lenient decoded result:\n"
168 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
169 << "Actual result:\n" << ::testing::PrintToString(Decoded);
170
171 return ::testing::AssertionSuccess();
172}
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000173
174TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
175
176 //
177 // 1-byte sequences
178 //
179
180 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000181 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
182 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000183
184 //
185 // 2-byte sequences
186 //
187
188 // U+0283 LATIN SMALL LETTER ESH
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000189 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
190 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
191 "\xca\x83"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000192
193 // U+03BA GREEK SMALL LETTER KAPPA
194 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
195 // U+03C3 GREEK SMALL LETTER SIGMA
196 // U+03BC GREEK SMALL LETTER MU
197 // U+03B5 GREEK SMALL LETTER EPSILON
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000198 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
199 ConvertUTFResultContainer(conversionOK)
200 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
201 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000202
203 //
204 // 3-byte sequences
205 //
206
207 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
208 // U+6587 CJK UNIFIED IDEOGRAPH-6587
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000209 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
210 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
211 "\xe4\xbe\x8b\xe6\x96\x87"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000212
213 // U+D55C HANGUL SYLLABLE HAN
214 // U+AE00 HANGUL SYLLABLE GEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
216 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
217 "\xed\x95\x9c\xea\xb8\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000218
219 // U+1112 HANGUL CHOSEONG HIEUH
220 // U+1161 HANGUL JUNGSEONG A
221 // U+11AB HANGUL JONGSEONG NIEUN
222 // U+1100 HANGUL CHOSEONG KIYEOK
223 // U+1173 HANGUL JUNGSEONG EU
224 // U+11AF HANGUL JONGSEONG RIEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000225 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
226 ConvertUTFResultContainer(conversionOK)
227 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
228 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
229 "\xe1\x86\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000230
231 //
232 // 4-byte sequences
233 //
234
235 // U+E0100 VARIATION SELECTOR-17
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000236 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
237 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
238 "\xf3\xa0\x84\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000239
240 //
241 // First possible sequence of a certain length
242 //
243
244 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
246 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
247 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000248
249 // U+0080 PADDING CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000250 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
251 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
252 "\xc2\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000253
254 // U+0800 SAMARITAN LETTER ALAF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000255 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
256 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
257 "\xe0\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000258
259 // U+10000 LINEAR B SYLLABLE B008 A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000260 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
261 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
262 "\xf0\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000263
264 // U+200000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000265 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
266 ConvertUTFResultContainer(sourceIllegal)
267 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
268 "\xf8\x88\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000269
270 // U+4000000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000271 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
272 ConvertUTFResultContainer(sourceIllegal)
273 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
274 "\xfc\x84\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000275
276 //
277 // Last possible sequence of a certain length
278 //
279
280 // U+007F DELETE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000281 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
282 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000283
284 // U+07FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000285 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
286 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
287 "\xdf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000288
289 // U+FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000290 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
291 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
292 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000293
294 // U+1FFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000295 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
296 ConvertUTFResultContainer(sourceIllegal)
297 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
298 "\xf7\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000299
300 // U+3FFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000301 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
302 ConvertUTFResultContainer(sourceIllegal)
303 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
304 "\xfb\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000305
306 // U+7FFFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000307 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
308 ConvertUTFResultContainer(sourceIllegal)
309 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
310 "\xfd\xbf\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000311
312 //
313 // Other boundary conditions
314 //
315
316 // U+D7FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000317 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
318 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
319 "\xed\x9f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000320
321 // U+E000 (private use)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000322 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
323 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
324 "\xee\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000325
326 // U+FFFD REPLACEMENT CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000327 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
328 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
329 "\xef\xbf\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000330
331 // U+10FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000332 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
333 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
334 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000335
336 // U+110000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000337 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
338 ConvertUTFResultContainer(sourceIllegal)
339 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
340 "\xf4\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000341
342 //
343 // Unexpected continuation bytes
344 //
345
346 // A sequence of unexpected continuation bytes that don't follow a first
347 // byte, every byte is a maximal subpart.
348
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000349 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
350 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
351 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
352 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
353 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
354 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
355 "\x80\x80"));
356 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
357 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
358 "\x80\xbf"));
359 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
360 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
361 "\xbf\x80"));
362 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
363 ConvertUTFResultContainer(sourceIllegal)
364 .withScalars(0xfffd, 0xfffd, 0xfffd),
365 "\x80\xbf\x80"));
366 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
367 ConvertUTFResultContainer(sourceIllegal)
368 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
369 "\x80\xbf\x80\xbf"));
370 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
371 ConvertUTFResultContainer(sourceIllegal)
372 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
373 "\x80\xbf\x82\xbf\xaa"));
374 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
375 ConvertUTFResultContainer(sourceIllegal)
376 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
377 "\xaa\xb0\xbb\xbf\xaa\xa0"));
378 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
379 ConvertUTFResultContainer(sourceIllegal)
380 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
381 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000382
383 // All continuation bytes (0x80--0xbf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000384 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
385 ConvertUTFResultContainer(sourceIllegal)
386 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
387 0xfffd, 0xfffd, 0xfffd, 0xfffd)
388 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
389 0xfffd, 0xfffd, 0xfffd, 0xfffd)
390 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
391 0xfffd, 0xfffd, 0xfffd, 0xfffd)
392 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
393 0xfffd, 0xfffd, 0xfffd, 0xfffd)
394 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
395 0xfffd, 0xfffd, 0xfffd, 0xfffd)
396 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
397 0xfffd, 0xfffd, 0xfffd, 0xfffd)
398 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
399 0xfffd, 0xfffd, 0xfffd, 0xfffd)
400 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
401 0xfffd, 0xfffd, 0xfffd, 0xfffd),
402 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
403 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
404 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
405 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000406
407 //
408 // Lonely start bytes
409 //
410
411 // Start bytes of 2-byte sequences (0xc0--0xdf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000412 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
413 ConvertUTFResultContainer(sourceIllegal)
414 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
415 0xfffd, 0xfffd, 0xfffd, 0xfffd)
416 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
417 0xfffd, 0xfffd, 0xfffd, 0xfffd)
418 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
419 0xfffd, 0xfffd, 0xfffd, 0xfffd)
420 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
421 0xfffd, 0xfffd, 0xfffd, 0xfffd),
422 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
423 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000424
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000425 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
426 ConvertUTFResultContainer(sourceIllegal)
427 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
428 0xfffd, 0x0020, 0xfffd, 0x0020)
429 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
430 0xfffd, 0x0020, 0xfffd, 0x0020)
431 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
432 0xfffd, 0x0020, 0xfffd, 0x0020)
433 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
434 0xfffd, 0x0020, 0xfffd, 0x0020)
435 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
436 0xfffd, 0x0020, 0xfffd, 0x0020)
437 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
438 0xfffd, 0x0020, 0xfffd, 0x0020)
439 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
440 0xfffd, 0x0020, 0xfffd, 0x0020)
441 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
442 0xfffd, 0x0020, 0xfffd, 0x0020),
443 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
444 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
445 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
446 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000447
448 // Start bytes of 3-byte sequences (0xe0--0xef).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000449 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
450 ConvertUTFResultContainer(sourceIllegal)
451 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
452 0xfffd, 0xfffd, 0xfffd, 0xfffd)
453 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
454 0xfffd, 0xfffd, 0xfffd, 0xfffd),
455 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000456
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000457 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
458 ConvertUTFResultContainer(sourceIllegal)
459 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
460 0xfffd, 0x0020, 0xfffd, 0x0020)
461 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
462 0xfffd, 0x0020, 0xfffd, 0x0020)
463 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
464 0xfffd, 0x0020, 0xfffd, 0x0020)
465 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
466 0xfffd, 0x0020, 0xfffd, 0x0020),
467 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
468 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000469
470 // Start bytes of 4-byte sequences (0xf0--0xf7).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000471 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
472 ConvertUTFResultContainer(sourceIllegal)
473 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
474 0xfffd, 0xfffd, 0xfffd, 0xfffd),
475 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000476
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000477 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
478 ConvertUTFResultContainer(sourceIllegal)
479 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
480 0xfffd, 0x0020, 0xfffd, 0x0020)
481 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
482 0xfffd, 0x0020, 0xfffd, 0x0020),
483 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000484
485 // Start bytes of 5-byte sequences (0xf8--0xfb).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000486 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
487 ConvertUTFResultContainer(sourceIllegal)
488 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
489 "\xf8\xf9\xfa\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000490
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000491 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
492 ConvertUTFResultContainer(sourceIllegal)
493 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
494 0xfffd, 0x0020, 0xfffd, 0x0020),
495 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000496
497 // Start bytes of 6-byte sequences (0xfc--0xfd).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000498 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
499 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
500 "\xfc\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000501
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000502 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
503 ConvertUTFResultContainer(sourceIllegal)
504 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
505 "\xfc\x20\xfd\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000506
507 //
508 // Other bytes (0xc0--0xc1, 0xfe--0xff).
509 //
510
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000511 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
512 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
513 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
514 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
515 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
516 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
517 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
518 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000519
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000520 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
521 ConvertUTFResultContainer(sourceIllegal)
522 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
523 "\xc0\xc1\xfe\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000524
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000525 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
526 ConvertUTFResultContainer(sourceIllegal)
527 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
528 "\xfe\xfe\xff\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000529
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000530 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
531 ConvertUTFResultContainer(sourceIllegal)
532 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
533 "\xfe\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000534
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000535 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
536 ConvertUTFResultContainer(sourceIllegal)
537 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
538 "\xff\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000539
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000540 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
541 ConvertUTFResultContainer(sourceIllegal)
542 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
543 0xfffd, 0x0020, 0xfffd, 0x0020),
544 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000545
546 //
547 // Sequences with one continuation byte missing
548 //
549
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000550 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
551 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
552 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
553 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
554 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
555 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
556 "\xe0\xa0"));
557 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
558 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
559 "\xe0\xbf"));
560 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
561 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
562 "\xe1\x80"));
563 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
564 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
565 "\xec\xbf"));
566 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
567 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
568 "\xed\x80"));
569 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
570 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
571 "\xed\x9f"));
572 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
573 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
574 "\xee\x80"));
575 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
576 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
577 "\xef\xbf"));
578 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
579 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
580 "\xf0\x90\x80"));
581 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
582 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
583 "\xf0\xbf\xbf"));
584 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
585 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
586 "\xf1\x80\x80"));
587 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
588 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
589 "\xf3\xbf\xbf"));
590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
591 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
592 "\xf4\x80\x80"));
593 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
594 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
595 "\xf4\x8f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000596
597 // Overlong sequences with one trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000598 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
599 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
600 "\xc0"));
601 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
602 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
603 "\xc1"));
604 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
605 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
606 "\xe0\x80"));
607 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
608 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
609 "\xe0\x9f"));
610 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
611 ConvertUTFResultContainer(sourceIllegal)
612 .withScalars(0xfffd, 0xfffd, 0xfffd),
613 "\xf0\x80\x80"));
614 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
615 ConvertUTFResultContainer(sourceIllegal)
616 .withScalars(0xfffd, 0xfffd, 0xfffd),
617 "\xf0\x8f\x80"));
618 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
619 ConvertUTFResultContainer(sourceIllegal)
620 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
621 "\xf8\x80\x80\x80"));
622 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
623 ConvertUTFResultContainer(sourceIllegal)
624 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
625 "\xfc\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000626
627 // Sequences that represent surrogates with one trailing byte missing.
628 // High surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000629 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
630 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
631 "\xed\xa0"));
632 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
633 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
634 "\xed\xac"));
635 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
636 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
637 "\xed\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000638 // Low surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000639 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
640 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
641 "\xed\xb0"));
642 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
643 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
644 "\xed\xb4"));
645 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
646 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
647 "\xed\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000648
649 // Ill-formed 4-byte sequences.
650 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
651 // U+1100xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
653 ConvertUTFResultContainer(sourceIllegal)
654 .withScalars(0xfffd, 0xfffd, 0xfffd),
655 "\xf4\x90\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000656 // U+13FBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000657 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
658 ConvertUTFResultContainer(sourceIllegal)
659 .withScalars(0xfffd, 0xfffd, 0xfffd),
660 "\xf4\xbf\xbf"));
661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
662 ConvertUTFResultContainer(sourceIllegal)
663 .withScalars(0xfffd, 0xfffd, 0xfffd),
664 "\xf5\x80\x80"));
665 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
666 ConvertUTFResultContainer(sourceIllegal)
667 .withScalars(0xfffd, 0xfffd, 0xfffd),
668 "\xf6\x80\x80"));
669 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
670 ConvertUTFResultContainer(sourceIllegal)
671 .withScalars(0xfffd, 0xfffd, 0xfffd),
672 "\xf7\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000673 // U+1FFBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
675 ConvertUTFResultContainer(sourceIllegal)
676 .withScalars(0xfffd, 0xfffd, 0xfffd),
677 "\xf7\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000678
679 // Ill-formed 5-byte sequences.
680 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
681 // U+2000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000682 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
683 ConvertUTFResultContainer(sourceIllegal)
684 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
685 "\xf8\x88\x80\x80"));
686 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
687 ConvertUTFResultContainer(sourceIllegal)
688 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
689 "\xf8\xbf\xbf\xbf"));
690 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
691 ConvertUTFResultContainer(sourceIllegal)
692 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
693 "\xf9\x80\x80\x80"));
694 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
695 ConvertUTFResultContainer(sourceIllegal)
696 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
697 "\xfa\x80\x80\x80"));
698 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
699 ConvertUTFResultContainer(sourceIllegal)
700 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
701 "\xfb\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000702 // U+3FFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000703 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
704 ConvertUTFResultContainer(sourceIllegal)
705 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
706 "\xfb\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000707
708 // Ill-formed 6-byte sequences.
709 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
710 // U+40000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000711 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
712 ConvertUTFResultContainer(sourceIllegal)
713 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
714 "\xfc\x84\x80\x80\x80"));
715 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
716 ConvertUTFResultContainer(sourceIllegal)
717 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
718 "\xfc\xbf\xbf\xbf\xbf"));
719 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
720 ConvertUTFResultContainer(sourceIllegal)
721 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
722 "\xfd\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000723 // U+7FFFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000724 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
725 ConvertUTFResultContainer(sourceIllegal)
726 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
727 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000728
729 //
730 // Sequences with two continuation bytes missing
731 //
732
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000733 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
734 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
735 "\xf0\x90"));
736 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
737 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
738 "\xf0\xbf"));
739 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
740 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
741 "\xf1\x80"));
742 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
743 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
744 "\xf3\xbf"));
745 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
746 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
747 "\xf4\x80"));
748 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
749 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
750 "\xf4\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000751
752 // Overlong sequences with two trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000753 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
754 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
755 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
756 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
757 "\xf0\x80"));
758 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
759 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
760 "\xf0\x8f"));
761 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
762 ConvertUTFResultContainer(sourceIllegal)
763 .withScalars(0xfffd, 0xfffd, 0xfffd),
764 "\xf8\x80\x80"));
765 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
766 ConvertUTFResultContainer(sourceIllegal)
767 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
768 "\xfc\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000769
770 // Sequences that represent surrogates with two trailing bytes missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000771 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
772 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000773
774 // Ill-formed 4-byte sequences.
775 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
776 // U+110yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000777 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
778 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
779 "\xf4\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000780 // U+13Fyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000781 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
782 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
783 "\xf4\xbf"));
784 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
785 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
786 "\xf5\x80"));
787 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
788 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
789 "\xf6\x80"));
790 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
791 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
792 "\xf7\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000793 // U+1FFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000794 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
795 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
796 "\xf7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000797
798 // Ill-formed 5-byte sequences.
799 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
800 // U+200yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000801 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
802 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
803 "\xf8\x88\x80"));
804 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
805 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
806 "\xf8\xbf\xbf"));
807 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
808 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
809 "\xf9\x80\x80"));
810 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
811 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
812 "\xfa\x80\x80"));
813 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
814 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
815 "\xfb\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000816 // U+3FFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000817 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
818 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
819 "\xfb\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000820
821 // Ill-formed 6-byte sequences.
822 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
823 // U+4000yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000824 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
825 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
826 "\xfc\x84\x80\x80"));
827 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
828 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
829 "\xfc\xbf\xbf\xbf"));
830 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
831 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
832 "\xfd\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000833 // U+7FFFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000834 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
835 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
836 "\xfd\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000837
838 //
839 // Sequences with three continuation bytes missing
840 //
841
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000842 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
843 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
844 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
845 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
846 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
847 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
848 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
849 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
850 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
851 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000852
853 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000854 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
855 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
856 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
857 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
858 "\xf8\x80"));
859 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
860 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
861 "\xfc\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000862
863 // Ill-formed 4-byte sequences.
864 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
865 // U+14yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000866 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
867 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
868 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
869 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000870 // U+1Cyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000871 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
872 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000873
874 // Ill-formed 5-byte sequences.
875 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
876 // U+20yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000877 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
878 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
879 "\xf8\x88"));
880 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
881 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
882 "\xf8\xbf"));
883 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
884 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
885 "\xf9\x80"));
886 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
887 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
888 "\xfa\x80"));
889 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
890 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
891 "\xfb\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000892 // U+3FCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000893 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
894 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
895 "\xfb\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000896
897 // Ill-formed 6-byte sequences.
898 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
899 // U+400yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000900 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
901 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
902 "\xfc\x84\x80"));
903 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
904 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
905 "\xfc\xbf\xbf"));
906 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
907 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
908 "\xfd\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000909 // U+7FFCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000910 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
911 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
912 "\xfd\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000913
914 //
915 // Sequences with four continuation bytes missing
916 //
917
918 // Ill-formed 5-byte sequences.
919 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
920 // U+uzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000921 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
922 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
923 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
924 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
925 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
926 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
927 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
928 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000929 // U+3zyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000930 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
931 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000932
933 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000934 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
935 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
936 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
937 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
938 "\xfc\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000939
940 // Ill-formed 6-byte sequences.
941 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
942 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000943 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
944 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
945 "\xfc\x84"));
946 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
947 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
948 "\xfc\xbf"));
949 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
950 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
951 "\xfd\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000952 // U+7Fzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000953 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
954 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
955 "\xfd\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000956
957 //
958 // Sequences with five continuation bytes missing
959 //
960
961 // Ill-formed 6-byte sequences.
962 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
963 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000964 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
965 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000966 // U+uuzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000967 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
968 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000969
970 //
971 // Consecutive sequences with trailing bytes missing
972 //
973
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000974 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
975 ConvertUTFResultContainer(sourceIllegal)
976 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
977 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
978 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
979 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
980 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
981 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
982 "\xc0" "\xe0\x80" "\xf0\x80\x80"
983 "\xf8\x80\x80\x80"
984 "\xfc\x80\x80\x80\x80"
985 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
986 "\xfb\xbf\xbf\xbf"
987 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000988
989 //
990 // Overlong UTF-8 sequences
991 //
992
993 // U+002F SOLIDUS
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000994 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
995 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000996
997 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000998 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
999 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1000 "\xc0\xaf"));
1001 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1002 ConvertUTFResultContainer(sourceIllegal)
1003 .withScalars(0xfffd, 0xfffd, 0xfffd),
1004 "\xe0\x80\xaf"));
1005 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1006 ConvertUTFResultContainer(sourceIllegal)
1007 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1008 "\xf0\x80\x80\xaf"));
1009 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1010 ConvertUTFResultContainer(sourceIllegal)
1011 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1012 "\xf8\x80\x80\x80\xaf"));
1013 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1014 ConvertUTFResultContainer(sourceIllegal)
1015 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1016 "\xfc\x80\x80\x80\x80\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001017
1018 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001019 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1020 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1021 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001022
1023 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001024 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1025 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1026 "\xc0\x80"));
1027 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1028 ConvertUTFResultContainer(sourceIllegal)
1029 .withScalars(0xfffd, 0xfffd, 0xfffd),
1030 "\xe0\x80\x80"));
1031 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1032 ConvertUTFResultContainer(sourceIllegal)
1033 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1034 "\xf0\x80\x80\x80"));
1035 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1036 ConvertUTFResultContainer(sourceIllegal)
1037 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1038 "\xf8\x80\x80\x80\x80"));
1039 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1040 ConvertUTFResultContainer(sourceIllegal)
1041 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1042 "\xfc\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001043
1044 // Other overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001045 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1046 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1047 "\xc0\xbf"));
1048 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1049 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1050 "\xc1\x80"));
1051 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1052 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1053 "\xc1\xbf"));
1054 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1055 ConvertUTFResultContainer(sourceIllegal)
1056 .withScalars(0xfffd, 0xfffd, 0xfffd),
1057 "\xe0\x9f\xbf"));
1058 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1059 ConvertUTFResultContainer(sourceIllegal)
1060 .withScalars(0xfffd, 0xfffd, 0xfffd),
1061 "\xed\xa0\x80"));
1062 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1063 ConvertUTFResultContainer(sourceIllegal)
1064 .withScalars(0xfffd, 0xfffd, 0xfffd),
1065 "\xed\xbf\xbf"));
1066 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1067 ConvertUTFResultContainer(sourceIllegal)
1068 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1069 "\xf0\x8f\x80\x80"));
1070 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1071 ConvertUTFResultContainer(sourceIllegal)
1072 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1073 "\xf0\x8f\xbf\xbf"));
1074 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1075 ConvertUTFResultContainer(sourceIllegal)
1076 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1077 "\xf8\x87\xbf\xbf\xbf"));
1078 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1079 ConvertUTFResultContainer(sourceIllegal)
1080 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1081 "\xfc\x83\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001082
1083 //
1084 // Isolated surrogates
1085 //
1086
1087 // Unicode 6.3.0:
1088 //
1089 // D71. High-surrogate code point: A Unicode code point in the range
1090 // U+D800 to U+DBFF.
1091 //
1092 // D73. Low-surrogate code point: A Unicode code point in the range
1093 // U+DC00 to U+DFFF.
1094
1095 // Note: U+E0100 is <DB40 DD00> in UTF16.
1096
1097 // High surrogates
1098
1099 // U+D800
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001100 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1101 ConvertUTFResultContainer(sourceIllegal)
1102 .withScalars(0xfffd, 0xfffd, 0xfffd),
1103 "\xed\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001104
1105 // U+DB40
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001106 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1107 ConvertUTFResultContainer(sourceIllegal)
1108 .withScalars(0xfffd, 0xfffd, 0xfffd),
1109 "\xed\xac\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001110
1111 // U+DBFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001112 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1113 ConvertUTFResultContainer(sourceIllegal)
1114 .withScalars(0xfffd, 0xfffd, 0xfffd),
1115 "\xed\xaf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001116
1117 // Low surrogates
1118
1119 // U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001120 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1121 ConvertUTFResultContainer(sourceIllegal)
1122 .withScalars(0xfffd, 0xfffd, 0xfffd),
1123 "\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001124
1125 // U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001126 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1127 ConvertUTFResultContainer(sourceIllegal)
1128 .withScalars(0xfffd, 0xfffd, 0xfffd),
1129 "\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001130
1131 // U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001132 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1133 ConvertUTFResultContainer(sourceIllegal)
1134 .withScalars(0xfffd, 0xfffd, 0xfffd),
1135 "\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001136
1137 // Surrogate pairs
1138
1139 // U+D800 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001140 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1141 ConvertUTFResultContainer(sourceIllegal)
1142 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1143 "\xed\xa0\x80\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001144
1145 // U+D800 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001146 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1147 ConvertUTFResultContainer(sourceIllegal)
1148 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1149 "\xed\xa0\x80\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001150
1151 // U+D800 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001152 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1153 ConvertUTFResultContainer(sourceIllegal)
1154 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1155 "\xed\xa0\x80\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001156
1157 // U+DB40 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001158 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1159 ConvertUTFResultContainer(sourceIllegal)
1160 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1161 "\xed\xac\xa0\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001162
1163 // U+DB40 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001164 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1165 ConvertUTFResultContainer(sourceIllegal)
1166 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1167 "\xed\xac\xa0\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001168
1169 // U+DB40 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001170 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1171 ConvertUTFResultContainer(sourceIllegal)
1172 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1173 "\xed\xac\xa0\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001174
1175 // U+DBFF U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001176 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1177 ConvertUTFResultContainer(sourceIllegal)
1178 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1179 "\xed\xaf\xbf\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001180
1181 // U+DBFF U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001182 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1183 ConvertUTFResultContainer(sourceIllegal)
1184 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1185 "\xed\xaf\xbf\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001186
1187 // U+DBFF U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001188 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1189 ConvertUTFResultContainer(sourceIllegal)
1190 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1191 "\xed\xaf\xbf\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001192
1193 //
1194 // Noncharacters
1195 //
1196
1197 // Unicode 6.3.0:
1198 //
1199 // D14. Noncharacter: A code point that is permanently reserved for
1200 // internal use and that should never be interchanged. Noncharacters
1201 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1202 // and the values U+FDD0..U+FDEF.
1203
1204 // U+FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001205 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1206 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1207 "\xef\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001208
1209 // U+FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001210 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1211 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1212 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001213
1214 // U+1FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001215 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1216 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1217 "\xf0\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001218
1219 // U+1FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001220 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1221 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1222 "\xf0\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001223
1224 // U+2FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001225 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1226 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1227 "\xf0\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001228
1229 // U+2FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001230 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1231 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1232 "\xf0\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001233
1234 // U+3FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001235 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1236 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1237 "\xf0\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001238
1239 // U+3FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001240 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1241 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1242 "\xf0\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001243
1244 // U+4FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1246 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1247 "\xf1\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001248
1249 // U+4FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001250 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1251 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1252 "\xf1\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001253
1254 // U+5FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001255 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1256 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1257 "\xf1\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001258
1259 // U+5FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001260 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1261 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1262 "\xf1\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001263
1264 // U+6FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001265 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1266 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1267 "\xf1\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001268
1269 // U+6FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001270 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1271 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1272 "\xf1\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001273
1274 // U+7FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001275 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1276 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1277 "\xf1\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001278
1279 // U+7FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001280 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1281 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1282 "\xf1\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001283
1284 // U+8FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001285 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1286 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1287 "\xf2\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001288
1289 // U+8FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001290 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1291 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1292 "\xf2\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001293
1294 // U+9FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001295 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1296 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1297 "\xf2\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001298
1299 // U+9FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001300 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1301 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1302 "\xf2\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001303
1304 // U+AFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001305 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1306 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1307 "\xf2\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001308
1309 // U+AFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001310 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1311 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1312 "\xf2\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001313
1314 // U+BFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001315 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1316 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1317 "\xf2\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001318
1319 // U+BFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001320 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1321 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1322 "\xf2\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001323
1324 // U+CFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001325 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1326 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1327 "\xf3\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001328
1329 // U+CFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001330 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1331 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1332 "\xf3\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001333
1334 // U+DFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001335 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1336 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1337 "\xf3\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001338
1339 // U+DFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001340 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1341 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1342 "\xf3\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001343
1344 // U+EFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001345 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1346 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1347 "\xf3\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001348
1349 // U+EFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001350 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1351 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1352 "\xf3\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001353
1354 // U+FFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001355 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1356 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1357 "\xf3\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001358
1359 // U+FFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001360 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1361 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1362 "\xf3\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001363
1364 // U+10FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001365 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1366 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1367 "\xf4\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001368
1369 // U+10FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001370 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1371 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1372 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001373
1374 // U+FDD0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001375 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1376 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1377 "\xef\xb7\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001378
1379 // U+FDD1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001380 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1381 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1382 "\xef\xb7\x91"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001383
1384 // U+FDD2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001385 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1386 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1387 "\xef\xb7\x92"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001388
1389 // U+FDD3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001390 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1391 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1392 "\xef\xb7\x93"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001393
1394 // U+FDD4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001395 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1396 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1397 "\xef\xb7\x94"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001398
1399 // U+FDD5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001400 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1401 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1402 "\xef\xb7\x95"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001403
1404 // U+FDD6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001405 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1406 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1407 "\xef\xb7\x96"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001408
1409 // U+FDD7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001410 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1411 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1412 "\xef\xb7\x97"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001413
1414 // U+FDD8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001415 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1416 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1417 "\xef\xb7\x98"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001418
1419 // U+FDD9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001420 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1421 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1422 "\xef\xb7\x99"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001423
1424 // U+FDDA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001425 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1426 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1427 "\xef\xb7\x9a"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001428
1429 // U+FDDB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001430 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1431 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1432 "\xef\xb7\x9b"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001433
1434 // U+FDDC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001435 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1436 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1437 "\xef\xb7\x9c"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001438
1439 // U+FDDD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001440 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1441 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1442 "\xef\xb7\x9d"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001443
1444 // U+FDDE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001445 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1446 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1447 "\xef\xb7\x9e"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001448
1449 // U+FDDF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001450 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1451 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1452 "\xef\xb7\x9f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001453
1454 // U+FDE0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001455 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1456 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1457 "\xef\xb7\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001458
1459 // U+FDE1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001460 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1461 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1462 "\xef\xb7\xa1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001463
1464 // U+FDE2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001465 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1466 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1467 "\xef\xb7\xa2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001468
1469 // U+FDE3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001470 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1471 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1472 "\xef\xb7\xa3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001473
1474 // U+FDE4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001475 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1476 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1477 "\xef\xb7\xa4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001478
1479 // U+FDE5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001480 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1481 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1482 "\xef\xb7\xa5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001483
1484 // U+FDE6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001485 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1486 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1487 "\xef\xb7\xa6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001488
1489 // U+FDE7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001490 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1491 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1492 "\xef\xb7\xa7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001493
1494 // U+FDE8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001495 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1496 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1497 "\xef\xb7\xa8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001498
1499 // U+FDE9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001500 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1501 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1502 "\xef\xb7\xa9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001503
1504 // U+FDEA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001505 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1506 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1507 "\xef\xb7\xaa"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001508
1509 // U+FDEB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001510 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1511 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1512 "\xef\xb7\xab"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001513
1514 // U+FDEC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001515 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1516 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1517 "\xef\xb7\xac"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001518
1519 // U+FDED
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001520 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1521 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1522 "\xef\xb7\xad"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001523
1524 // U+FDEE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001525 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1526 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1527 "\xef\xb7\xae"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001528
1529 // U+FDEF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001530 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1531 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1532 "\xef\xb7\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001533
1534 // U+FDF0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001535 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1536 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1537 "\xef\xb7\xb0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001538
1539 // U+FDF1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001540 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1541 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1542 "\xef\xb7\xb1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001543
1544 // U+FDF2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001545 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1546 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1547 "\xef\xb7\xb2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001548
1549 // U+FDF3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001550 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1551 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1552 "\xef\xb7\xb3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001553
1554 // U+FDF4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001555 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1556 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1557 "\xef\xb7\xb4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001558
1559 // U+FDF5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001560 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1561 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1562 "\xef\xb7\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001563
1564 // U+FDF6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001565 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1566 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1567 "\xef\xb7\xb6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001568
1569 // U+FDF7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001570 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1571 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1572 "\xef\xb7\xb7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001573
1574 // U+FDF8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001575 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1576 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1577 "\xef\xb7\xb8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001578
1579 // U+FDF9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001580 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1581 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1582 "\xef\xb7\xb9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001583
1584 // U+FDFA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001585 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1586 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1587 "\xef\xb7\xba"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001588
1589 // U+FDFB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1591 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1592 "\xef\xb7\xbb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001593
1594 // U+FDFC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001595 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1596 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1597 "\xef\xb7\xbc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001598
1599 // U+FDFD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001600 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1601 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1602 "\xef\xb7\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001603
1604 // U+FDFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001605 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1606 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1607 "\xef\xb7\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001608
1609 // U+FDFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001610 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1611 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1612 "\xef\xb7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001613}
1614
1615TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1616 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001617 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1618 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1619 "\x41", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001620
1621 //
1622 // Sequences with one continuation byte missing
1623 //
1624
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001625 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1626 ConvertUTFResultContainer(sourceExhausted),
1627 "\xc2", true));
1628 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1629 ConvertUTFResultContainer(sourceExhausted),
1630 "\xdf", true));
1631 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1632 ConvertUTFResultContainer(sourceExhausted),
1633 "\xe0\xa0", true));
1634 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1635 ConvertUTFResultContainer(sourceExhausted),
1636 "\xe0\xbf", true));
1637 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1638 ConvertUTFResultContainer(sourceExhausted),
1639 "\xe1\x80", true));
1640 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1641 ConvertUTFResultContainer(sourceExhausted),
1642 "\xec\xbf", true));
1643 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1644 ConvertUTFResultContainer(sourceExhausted),
1645 "\xed\x80", true));
1646 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1647 ConvertUTFResultContainer(sourceExhausted),
1648 "\xed\x9f", true));
1649 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1650 ConvertUTFResultContainer(sourceExhausted),
1651 "\xee\x80", true));
1652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1653 ConvertUTFResultContainer(sourceExhausted),
1654 "\xef\xbf", true));
1655 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1656 ConvertUTFResultContainer(sourceExhausted),
1657 "\xf0\x90\x80", true));
1658 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1659 ConvertUTFResultContainer(sourceExhausted),
1660 "\xf0\xbf\xbf", true));
1661 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1662 ConvertUTFResultContainer(sourceExhausted),
1663 "\xf1\x80\x80", true));
1664 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1665 ConvertUTFResultContainer(sourceExhausted),
1666 "\xf3\xbf\xbf", true));
1667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1668 ConvertUTFResultContainer(sourceExhausted),
1669 "\xf4\x80\x80", true));
1670 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1671 ConvertUTFResultContainer(sourceExhausted),
1672 "\xf4\x8f\xbf", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001673
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1675 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1676 "\x41\xc2", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001677}
1678