blob: 61ed252d6273d5a9e5347bf8a4f3d4ae7df1b245 [file] [log] [blame]
Reid Kleckner7df03c22013-07-16 17:14:33 +00001//===- llvm/unittest/Support/ConvertUTFTest.cpp - ConvertUTF tests --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/Support/ConvertUTF.h"
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000011#include "llvm/Support/Format.h"
Reid Kleckner7df03c22013-07-16 17:14:33 +000012#include "gtest/gtest.h"
13#include <string>
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +000014#include <utility>
Chandler Carruthd9903882015-01-14 11:23:27 +000015#include <vector>
Reid Kleckner7df03c22013-07-16 17:14:33 +000016
17using namespace llvm;
18
19TEST(ConvertUTFTest, ConvertUTF16LittleEndianToUTF8String) {
20 // Src is the look of disapproval.
21 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
22 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
23 std::string Result;
24 bool Success = convertUTF16ToUTF8String(Ref, Result);
25 EXPECT_TRUE(Success);
26 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
27 EXPECT_EQ(Expected, Result);
28}
29
30TEST(ConvertUTFTest, ConvertUTF16BigEndianToUTF8String) {
31 // Src is the look of disapproval.
32 static const char Src[] = "\xfe\xff\x0c\xa0\x00_\x0c\xa0";
33 ArrayRef<char> Ref(Src, sizeof(Src) - 1);
34 std::string Result;
35 bool Success = convertUTF16ToUTF8String(Ref, Result);
36 EXPECT_TRUE(Success);
37 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
38 EXPECT_EQ(Expected, Result);
39}
40
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000041TEST(ConvertUTFTest, ConvertUTF8ToUTF16String) {
42 // Src is the look of disapproval.
43 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
44 StringRef Ref(Src, sizeof(Src) - 1);
45 SmallVector<UTF16, 5> Result;
46 bool Success = convertUTF8ToUTF16String(Ref, Result);
47 EXPECT_TRUE(Success);
48 static const UTF16 Expected[] = {0x0CA0, 0x005f, 0x0CA0, 0};
Eric Christopher7aebb322015-01-27 01:01:39 +000049 ASSERT_EQ(3u, Result.size());
Reid Klecknerd8cb6b02015-01-26 19:51:00 +000050 for (int I = 0, E = 3; I != E; ++I)
51 EXPECT_EQ(Expected[I], Result[I]);
52}
53
Reid Kleckner7df03c22013-07-16 17:14:33 +000054TEST(ConvertUTFTest, OddLengthInput) {
55 std::string Result;
Craig Toppere1d12942014-08-27 05:25:25 +000056 bool Success = convertUTF16ToUTF8String(makeArrayRef("xxxxx", 5), Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000057 EXPECT_FALSE(Success);
58}
59
60TEST(ConvertUTFTest, Empty) {
61 std::string Result;
Marianne Mailhot-Sarrasin7423f402016-03-11 15:59:32 +000062 bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
Reid Kleckner7df03c22013-07-16 17:14:33 +000063 EXPECT_TRUE(Success);
64 EXPECT_TRUE(Result.empty());
65}
66
67TEST(ConvertUTFTest, HasUTF16BOM) {
Craig Toppere1d12942014-08-27 05:25:25 +000068 bool HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xff\xfe", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000069 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000070 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff", 2));
Reid Kleckner7df03c22013-07-16 17:14:33 +000071 EXPECT_TRUE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000072 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff ", 3));
Reid Kleckner7df03c22013-07-16 17:14:33 +000073 EXPECT_TRUE(HasBOM); // Don't care about odd lengths.
Craig Toppere1d12942014-08-27 05:25:25 +000074 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe\xff\x00asdf", 6));
Reid Kleckner7df03c22013-07-16 17:14:33 +000075 EXPECT_TRUE(HasBOM);
76
Craig Toppere1d12942014-08-27 05:25:25 +000077 HasBOM = hasUTF16ByteOrderMark(None);
Reid Kleckner7df03c22013-07-16 17:14:33 +000078 EXPECT_FALSE(HasBOM);
Craig Toppere1d12942014-08-27 05:25:25 +000079 HasBOM = hasUTF16ByteOrderMark(makeArrayRef("\xfe", 1));
Reid Kleckner7df03c22013-07-16 17:14:33 +000080 EXPECT_FALSE(HasBOM);
81}
Dmitri Gribenko1089db02014-06-16 11:09:46 +000082
Marianne Mailhot-Sarrasin7423f402016-03-11 15:59:32 +000083TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
84 // Src is the look of disapproval.
85 static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
86 ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
87 std::string Result;
88 bool Success = convertUTF16ToUTF8String(SrcRef, Result);
89 EXPECT_TRUE(Success);
90 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
91 EXPECT_EQ(Expected, Result);
92}
93
94TEST(ConvertUTFTest, ConvertUTF8toWide) {
95 // Src is the look of disapproval.
96 static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
97 std::wstring Result;
98 bool Success = ConvertUTF8toWide((const char*)Src, Result);
99 EXPECT_TRUE(Success);
100 std::wstring Expected(L"\x0ca0_\x0ca0");
101 EXPECT_EQ(Expected, Result);
102 Result.clear();
103 Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
104 EXPECT_TRUE(Success);
105 EXPECT_EQ(Expected, Result);
106}
107
108TEST(ConvertUTFTest, convertWideToUTF8) {
109 // Src is the look of disapproval.
110 static const wchar_t Src[] = L"\x0ca0_\x0ca0";
111 std::string Result;
112 bool Success = convertWideToUTF8(Src, Result);
113 EXPECT_TRUE(Success);
114 std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
115 EXPECT_EQ(Expected, Result);
116}
117
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000118struct ConvertUTFResultContainer {
119 ConversionResult ErrorCode;
120 std::vector<unsigned> UnicodeScalars;
121
122 ConvertUTFResultContainer(ConversionResult ErrorCode)
123 : ErrorCode(ErrorCode) {}
124
125 ConvertUTFResultContainer
126 withScalars(unsigned US0 = 0x110000, unsigned US1 = 0x110000,
127 unsigned US2 = 0x110000, unsigned US3 = 0x110000,
128 unsigned US4 = 0x110000, unsigned US5 = 0x110000,
129 unsigned US6 = 0x110000, unsigned US7 = 0x110000) {
130 ConvertUTFResultContainer Result(*this);
131 if (US0 != 0x110000)
132 Result.UnicodeScalars.push_back(US0);
133 if (US1 != 0x110000)
134 Result.UnicodeScalars.push_back(US1);
135 if (US2 != 0x110000)
136 Result.UnicodeScalars.push_back(US2);
137 if (US3 != 0x110000)
138 Result.UnicodeScalars.push_back(US3);
139 if (US4 != 0x110000)
140 Result.UnicodeScalars.push_back(US4);
141 if (US5 != 0x110000)
142 Result.UnicodeScalars.push_back(US5);
143 if (US6 != 0x110000)
144 Result.UnicodeScalars.push_back(US6);
145 if (US7 != 0x110000)
146 Result.UnicodeScalars.push_back(US7);
147 return Result;
148 }
149};
150
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000151std::pair<ConversionResult, std::vector<unsigned>>
152ConvertUTF8ToUnicodeScalarsLenient(StringRef S) {
153 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
154
155 const UTF8 *SourceNext = SourceStart;
156 std::vector<UTF32> Decoded(S.size(), 0);
157 UTF32 *TargetStart = Decoded.data();
158
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000159 auto ErrorCode =
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000160 ConvertUTF8toUTF32(&SourceNext, SourceStart + S.size(), &TargetStart,
161 Decoded.data() + Decoded.size(), lenientConversion);
162
163 Decoded.resize(TargetStart - Decoded.data());
164
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000165 return std::make_pair(ErrorCode, Decoded);
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000166}
167
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000168std::pair<ConversionResult, std::vector<unsigned>>
169ConvertUTF8ToUnicodeScalarsPartialLenient(StringRef S) {
170 const UTF8 *SourceStart = reinterpret_cast<const UTF8 *>(S.data());
171
172 const UTF8 *SourceNext = SourceStart;
173 std::vector<UTF32> Decoded(S.size(), 0);
174 UTF32 *TargetStart = Decoded.data();
175
176 auto ErrorCode = ConvertUTF8toUTF32Partial(
177 &SourceNext, SourceStart + S.size(), &TargetStart,
178 Decoded.data() + Decoded.size(), lenientConversion);
179
180 Decoded.resize(TargetStart - Decoded.data());
181
182 return std::make_pair(ErrorCode, Decoded);
183}
184
185::testing::AssertionResult
186CheckConvertUTF8ToUnicodeScalars(ConvertUTFResultContainer Expected,
187 StringRef S, bool Partial = false) {
188 ConversionResult ErrorCode;
189 std::vector<unsigned> Decoded;
190 if (!Partial)
191 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsLenient(S);
192 else
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000193 std::tie(ErrorCode, Decoded) = ConvertUTF8ToUnicodeScalarsPartialLenient(S);
Dmitri Gribenkocbc7ae22015-01-10 05:03:29 +0000194
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000195 if (Expected.ErrorCode != ErrorCode)
196 return ::testing::AssertionFailure() << "Expected error code "
197 << Expected.ErrorCode << ", actual "
198 << ErrorCode;
199
200 if (Expected.UnicodeScalars != Decoded)
201 return ::testing::AssertionFailure()
202 << "Expected lenient decoded result:\n"
203 << ::testing::PrintToString(Expected.UnicodeScalars) << "\n"
204 << "Actual result:\n" << ::testing::PrintToString(Decoded);
205
206 return ::testing::AssertionSuccess();
207}
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000208
209TEST(ConvertUTFTest, UTF8ToUTF32Lenient) {
210
211 //
212 // 1-byte sequences
213 //
214
215 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000216 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
217 ConvertUTFResultContainer(conversionOK).withScalars(0x0041), "\x41"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000218
219 //
220 // 2-byte sequences
221 //
222
223 // U+0283 LATIN SMALL LETTER ESH
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000224 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
225 ConvertUTFResultContainer(conversionOK).withScalars(0x0283),
226 "\xca\x83"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000227
228 // U+03BA GREEK SMALL LETTER KAPPA
229 // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA
230 // U+03C3 GREEK SMALL LETTER SIGMA
231 // U+03BC GREEK SMALL LETTER MU
232 // U+03B5 GREEK SMALL LETTER EPSILON
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000233 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
234 ConvertUTFResultContainer(conversionOK)
235 .withScalars(0x03ba, 0x1f79, 0x03c3, 0x03bc, 0x03b5),
236 "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000237
238 //
239 // 3-byte sequences
240 //
241
242 // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B
243 // U+6587 CJK UNIFIED IDEOGRAPH-6587
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000244 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
245 ConvertUTFResultContainer(conversionOK).withScalars(0x4f8b, 0x6587),
246 "\xe4\xbe\x8b\xe6\x96\x87"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000247
248 // U+D55C HANGUL SYLLABLE HAN
249 // U+AE00 HANGUL SYLLABLE GEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000250 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
251 ConvertUTFResultContainer(conversionOK).withScalars(0xd55c, 0xae00),
252 "\xed\x95\x9c\xea\xb8\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000253
254 // U+1112 HANGUL CHOSEONG HIEUH
255 // U+1161 HANGUL JUNGSEONG A
256 // U+11AB HANGUL JONGSEONG NIEUN
257 // U+1100 HANGUL CHOSEONG KIYEOK
258 // U+1173 HANGUL JUNGSEONG EU
259 // U+11AF HANGUL JONGSEONG RIEUL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000260 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
261 ConvertUTFResultContainer(conversionOK)
262 .withScalars(0x1112, 0x1161, 0x11ab, 0x1100, 0x1173, 0x11af),
263 "\xe1\x84\x92\xe1\x85\xa1\xe1\x86\xab\xe1\x84\x80\xe1\x85\xb3"
264 "\xe1\x86\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000265
266 //
267 // 4-byte sequences
268 //
269
270 // U+E0100 VARIATION SELECTOR-17
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000271 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
272 ConvertUTFResultContainer(conversionOK).withScalars(0x000E0100),
273 "\xf3\xa0\x84\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000274
275 //
276 // First possible sequence of a certain length
277 //
278
279 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000280 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
281 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
282 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000283
284 // U+0080 PADDING CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000285 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
286 ConvertUTFResultContainer(conversionOK).withScalars(0x0080),
287 "\xc2\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000288
289 // U+0800 SAMARITAN LETTER ALAF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000290 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
291 ConvertUTFResultContainer(conversionOK).withScalars(0x0800),
292 "\xe0\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000293
294 // U+10000 LINEAR B SYLLABLE B008 A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000295 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
296 ConvertUTFResultContainer(conversionOK).withScalars(0x10000),
297 "\xf0\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000298
299 // U+200000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000300 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
301 ConvertUTFResultContainer(sourceIllegal)
302 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
303 "\xf8\x88\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000304
305 // U+4000000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000306 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
307 ConvertUTFResultContainer(sourceIllegal)
308 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
309 "\xfc\x84\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000310
311 //
312 // Last possible sequence of a certain length
313 //
314
315 // U+007F DELETE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000316 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
317 ConvertUTFResultContainer(conversionOK).withScalars(0x007f), "\x7f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000318
319 // U+07FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000320 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
321 ConvertUTFResultContainer(conversionOK).withScalars(0x07ff),
322 "\xdf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000323
324 // U+FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000325 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
326 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
327 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000328
329 // U+1FFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000330 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
331 ConvertUTFResultContainer(sourceIllegal)
332 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
333 "\xf7\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000334
335 // U+3FFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000336 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
337 ConvertUTFResultContainer(sourceIllegal)
338 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
339 "\xfb\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000340
341 // U+7FFFFFFF (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000342 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
343 ConvertUTFResultContainer(sourceIllegal)
344 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
345 "\xfd\xbf\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000346
347 //
348 // Other boundary conditions
349 //
350
351 // U+D7FF (unassigned)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000352 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
353 ConvertUTFResultContainer(conversionOK).withScalars(0xd7ff),
354 "\xed\x9f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000355
356 // U+E000 (private use)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000357 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
358 ConvertUTFResultContainer(conversionOK).withScalars(0xe000),
359 "\xee\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000360
361 // U+FFFD REPLACEMENT CHARACTER
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000362 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
363 ConvertUTFResultContainer(conversionOK).withScalars(0xfffd),
364 "\xef\xbf\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000365
366 // U+10FFFF (noncharacter)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000367 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
368 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
369 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000370
371 // U+110000 (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000372 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
373 ConvertUTFResultContainer(sourceIllegal)
374 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
375 "\xf4\x90\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000376
377 //
378 // Unexpected continuation bytes
379 //
380
381 // A sequence of unexpected continuation bytes that don't follow a first
382 // byte, every byte is a maximal subpart.
383
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000384 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
385 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\x80"));
386 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
387 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xbf"));
388 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
389 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
390 "\x80\x80"));
391 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
392 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
393 "\x80\xbf"));
394 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
395 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
396 "\xbf\x80"));
397 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
398 ConvertUTFResultContainer(sourceIllegal)
399 .withScalars(0xfffd, 0xfffd, 0xfffd),
400 "\x80\xbf\x80"));
401 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
402 ConvertUTFResultContainer(sourceIllegal)
403 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
404 "\x80\xbf\x80\xbf"));
405 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
406 ConvertUTFResultContainer(sourceIllegal)
407 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
408 "\x80\xbf\x82\xbf\xaa"));
409 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
410 ConvertUTFResultContainer(sourceIllegal)
411 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
412 "\xaa\xb0\xbb\xbf\xaa\xa0"));
413 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
414 ConvertUTFResultContainer(sourceIllegal)
415 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
416 "\xaa\xb0\xbb\xbf\xaa\xa0\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000417
418 // All continuation bytes (0x80--0xbf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000419 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
420 ConvertUTFResultContainer(sourceIllegal)
421 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
422 0xfffd, 0xfffd, 0xfffd, 0xfffd)
423 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
424 0xfffd, 0xfffd, 0xfffd, 0xfffd)
425 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
426 0xfffd, 0xfffd, 0xfffd, 0xfffd)
427 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
428 0xfffd, 0xfffd, 0xfffd, 0xfffd)
429 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
430 0xfffd, 0xfffd, 0xfffd, 0xfffd)
431 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
432 0xfffd, 0xfffd, 0xfffd, 0xfffd)
433 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
434 0xfffd, 0xfffd, 0xfffd, 0xfffd)
435 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
436 0xfffd, 0xfffd, 0xfffd, 0xfffd),
437 "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
438 "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
439 "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
440 "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000441
442 //
443 // Lonely start bytes
444 //
445
446 // Start bytes of 2-byte sequences (0xc0--0xdf).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000447 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
448 ConvertUTFResultContainer(sourceIllegal)
449 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
450 0xfffd, 0xfffd, 0xfffd, 0xfffd)
451 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
452 0xfffd, 0xfffd, 0xfffd, 0xfffd)
453 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
454 0xfffd, 0xfffd, 0xfffd, 0xfffd)
455 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
456 0xfffd, 0xfffd, 0xfffd, 0xfffd),
457 "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
458 "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000459
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000460 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
461 ConvertUTFResultContainer(sourceIllegal)
462 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
463 0xfffd, 0x0020, 0xfffd, 0x0020)
464 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
465 0xfffd, 0x0020, 0xfffd, 0x0020)
466 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
467 0xfffd, 0x0020, 0xfffd, 0x0020)
468 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
469 0xfffd, 0x0020, 0xfffd, 0x0020)
470 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
471 0xfffd, 0x0020, 0xfffd, 0x0020)
472 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
473 0xfffd, 0x0020, 0xfffd, 0x0020)
474 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
475 0xfffd, 0x0020, 0xfffd, 0x0020)
476 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
477 0xfffd, 0x0020, 0xfffd, 0x0020),
478 "\xc0\x20\xc1\x20\xc2\x20\xc3\x20\xc4\x20\xc5\x20\xc6\x20\xc7\x20"
479 "\xc8\x20\xc9\x20\xca\x20\xcb\x20\xcc\x20\xcd\x20\xce\x20\xcf\x20"
480 "\xd0\x20\xd1\x20\xd2\x20\xd3\x20\xd4\x20\xd5\x20\xd6\x20\xd7\x20"
481 "\xd8\x20\xd9\x20\xda\x20\xdb\x20\xdc\x20\xdd\x20\xde\x20\xdf\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000482
483 // Start bytes of 3-byte sequences (0xe0--0xef).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000484 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
485 ConvertUTFResultContainer(sourceIllegal)
486 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
487 0xfffd, 0xfffd, 0xfffd, 0xfffd)
488 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
489 0xfffd, 0xfffd, 0xfffd, 0xfffd),
490 "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000491
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000492 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
493 ConvertUTFResultContainer(sourceIllegal)
494 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
495 0xfffd, 0x0020, 0xfffd, 0x0020)
496 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
497 0xfffd, 0x0020, 0xfffd, 0x0020)
498 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
499 0xfffd, 0x0020, 0xfffd, 0x0020)
500 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
501 0xfffd, 0x0020, 0xfffd, 0x0020),
502 "\xe0\x20\xe1\x20\xe2\x20\xe3\x20\xe4\x20\xe5\x20\xe6\x20\xe7\x20"
503 "\xe8\x20\xe9\x20\xea\x20\xeb\x20\xec\x20\xed\x20\xee\x20\xef\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000504
505 // Start bytes of 4-byte sequences (0xf0--0xf7).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000506 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
507 ConvertUTFResultContainer(sourceIllegal)
508 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd,
509 0xfffd, 0xfffd, 0xfffd, 0xfffd),
510 "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000511
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000512 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
513 ConvertUTFResultContainer(sourceIllegal)
514 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
515 0xfffd, 0x0020, 0xfffd, 0x0020)
516 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
517 0xfffd, 0x0020, 0xfffd, 0x0020),
518 "\xf0\x20\xf1\x20\xf2\x20\xf3\x20\xf4\x20\xf5\x20\xf6\x20\xf7\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000519
520 // Start bytes of 5-byte sequences (0xf8--0xfb).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000521 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
522 ConvertUTFResultContainer(sourceIllegal)
523 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
524 "\xf8\xf9\xfa\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000525
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000526 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
527 ConvertUTFResultContainer(sourceIllegal)
528 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
529 0xfffd, 0x0020, 0xfffd, 0x0020),
530 "\xf8\x20\xf9\x20\xfa\x20\xfb\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000531
532 // Start bytes of 6-byte sequences (0xfc--0xfd).
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000533 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
534 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
535 "\xfc\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000536
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000537 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
538 ConvertUTFResultContainer(sourceIllegal)
539 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020),
540 "\xfc\x20\xfd\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000541
542 //
543 // Other bytes (0xc0--0xc1, 0xfe--0xff).
544 //
545
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000546 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
547 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc0"));
548 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
549 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc1"));
550 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
551 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfe"));
552 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
553 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000554
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000555 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
556 ConvertUTFResultContainer(sourceIllegal)
557 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
558 "\xc0\xc1\xfe\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000559
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000560 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
561 ConvertUTFResultContainer(sourceIllegal)
562 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
563 "\xfe\xfe\xff\xff"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000564
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000565 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
566 ConvertUTFResultContainer(sourceIllegal)
567 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
568 "\xfe\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000569
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000570 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
571 ConvertUTFResultContainer(sourceIllegal)
572 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
573 "\xff\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000574
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000575 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
576 ConvertUTFResultContainer(sourceIllegal)
577 .withScalars(0xfffd, 0x0020, 0xfffd, 0x0020,
578 0xfffd, 0x0020, 0xfffd, 0x0020),
579 "\xc0\x20\xc1\x20\xfe\x20\xff\x20"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000580
581 //
582 // Sequences with one continuation byte missing
583 //
584
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000585 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
586 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xc2"));
587 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
588 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xdf"));
589 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
590 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
591 "\xe0\xa0"));
592 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
593 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
594 "\xe0\xbf"));
595 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
596 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
597 "\xe1\x80"));
598 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
599 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
600 "\xec\xbf"));
601 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
602 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
603 "\xed\x80"));
604 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
605 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
606 "\xed\x9f"));
607 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
608 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
609 "\xee\x80"));
610 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
611 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
612 "\xef\xbf"));
613 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
614 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
615 "\xf0\x90\x80"));
616 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
617 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
618 "\xf0\xbf\xbf"));
619 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
620 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
621 "\xf1\x80\x80"));
622 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
623 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
624 "\xf3\xbf\xbf"));
625 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
626 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
627 "\xf4\x80\x80"));
628 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
629 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
630 "\xf4\x8f\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000631
632 // Overlong sequences with one trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000633 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
634 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
635 "\xc0"));
636 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
637 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
638 "\xc1"));
639 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
640 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
641 "\xe0\x80"));
642 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
643 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
644 "\xe0\x9f"));
645 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
646 ConvertUTFResultContainer(sourceIllegal)
647 .withScalars(0xfffd, 0xfffd, 0xfffd),
648 "\xf0\x80\x80"));
649 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
650 ConvertUTFResultContainer(sourceIllegal)
651 .withScalars(0xfffd, 0xfffd, 0xfffd),
652 "\xf0\x8f\x80"));
653 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
654 ConvertUTFResultContainer(sourceIllegal)
655 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
656 "\xf8\x80\x80\x80"));
657 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
658 ConvertUTFResultContainer(sourceIllegal)
659 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
660 "\xfc\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000661
662 // Sequences that represent surrogates with one trailing byte missing.
663 // High surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000664 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
665 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
666 "\xed\xa0"));
667 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
668 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
669 "\xed\xac"));
670 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
671 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
672 "\xed\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000673 // Low surrogates
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000674 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
675 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
676 "\xed\xb0"));
677 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
678 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
679 "\xed\xb4"));
680 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
681 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
682 "\xed\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000683
684 // Ill-formed 4-byte sequences.
685 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
686 // U+1100xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000687 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
688 ConvertUTFResultContainer(sourceIllegal)
689 .withScalars(0xfffd, 0xfffd, 0xfffd),
690 "\xf4\x90\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000691 // U+13FBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000692 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
693 ConvertUTFResultContainer(sourceIllegal)
694 .withScalars(0xfffd, 0xfffd, 0xfffd),
695 "\xf4\xbf\xbf"));
696 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
697 ConvertUTFResultContainer(sourceIllegal)
698 .withScalars(0xfffd, 0xfffd, 0xfffd),
699 "\xf5\x80\x80"));
700 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
701 ConvertUTFResultContainer(sourceIllegal)
702 .withScalars(0xfffd, 0xfffd, 0xfffd),
703 "\xf6\x80\x80"));
704 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
705 ConvertUTFResultContainer(sourceIllegal)
706 .withScalars(0xfffd, 0xfffd, 0xfffd),
707 "\xf7\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000708 // U+1FFBxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000709 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
710 ConvertUTFResultContainer(sourceIllegal)
711 .withScalars(0xfffd, 0xfffd, 0xfffd),
712 "\xf7\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000713
714 // Ill-formed 5-byte sequences.
715 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
716 // U+2000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000717 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
718 ConvertUTFResultContainer(sourceIllegal)
719 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
720 "\xf8\x88\x80\x80"));
721 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
722 ConvertUTFResultContainer(sourceIllegal)
723 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
724 "\xf8\xbf\xbf\xbf"));
725 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
726 ConvertUTFResultContainer(sourceIllegal)
727 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
728 "\xf9\x80\x80\x80"));
729 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
730 ConvertUTFResultContainer(sourceIllegal)
731 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
732 "\xfa\x80\x80\x80"));
733 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
734 ConvertUTFResultContainer(sourceIllegal)
735 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
736 "\xfb\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000737 // U+3FFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000738 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
739 ConvertUTFResultContainer(sourceIllegal)
740 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
741 "\xfb\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000742
743 // Ill-formed 6-byte sequences.
744 // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx
745 // U+40000xx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000746 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
747 ConvertUTFResultContainer(sourceIllegal)
748 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
749 "\xfc\x84\x80\x80\x80"));
750 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
751 ConvertUTFResultContainer(sourceIllegal)
752 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
753 "\xfc\xbf\xbf\xbf\xbf"));
754 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
755 ConvertUTFResultContainer(sourceIllegal)
756 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
757 "\xfd\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000758 // U+7FFFFFxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000759 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
760 ConvertUTFResultContainer(sourceIllegal)
761 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
762 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000763
764 //
765 // Sequences with two continuation bytes missing
766 //
767
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000768 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
769 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
770 "\xf0\x90"));
771 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
772 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
773 "\xf0\xbf"));
774 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
775 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
776 "\xf1\x80"));
777 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
778 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
779 "\xf3\xbf"));
780 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
781 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
782 "\xf4\x80"));
783 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
784 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd),
785 "\xf4\x8f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000786
787 // Overlong sequences with two trailing byte missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000788 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
789 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xe0"));
790 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
791 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
792 "\xf0\x80"));
793 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
794 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
795 "\xf0\x8f"));
796 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
797 ConvertUTFResultContainer(sourceIllegal)
798 .withScalars(0xfffd, 0xfffd, 0xfffd),
799 "\xf8\x80\x80"));
800 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
801 ConvertUTFResultContainer(sourceIllegal)
802 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
803 "\xfc\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000804
805 // Sequences that represent surrogates with two trailing bytes missing.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000806 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
807 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xed"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000808
809 // Ill-formed 4-byte sequences.
810 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
811 // U+110yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000812 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
813 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
814 "\xf4\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000815 // U+13Fyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000816 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
817 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
818 "\xf4\xbf"));
819 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
820 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
821 "\xf5\x80"));
822 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
823 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
824 "\xf6\x80"));
825 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
826 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
827 "\xf7\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000828 // U+1FFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000829 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
830 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
831 "\xf7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000832
833 // Ill-formed 5-byte sequences.
834 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
835 // U+200yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000836 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
837 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
838 "\xf8\x88\x80"));
839 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
840 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
841 "\xf8\xbf\xbf"));
842 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
843 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
844 "\xf9\x80\x80"));
845 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
846 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
847 "\xfa\x80\x80"));
848 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
849 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
850 "\xfb\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000851 // U+3FFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000852 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
853 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
854 "\xfb\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000855
856 // Ill-formed 6-byte sequences.
857 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
858 // U+4000yxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000859 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
860 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
861 "\xfc\x84\x80\x80"));
862 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
863 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
864 "\xfc\xbf\xbf\xbf"));
865 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
866 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
867 "\xfd\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000868 // U+7FFFFyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000869 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
870 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
871 "\xfd\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000872
873 //
874 // Sequences with three continuation bytes missing
875 //
876
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000877 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
878 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
879 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
880 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf1"));
881 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
882 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf2"));
883 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
884 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf3"));
885 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
886 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000887
888 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000889 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
890 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf0"));
891 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
892 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
893 "\xf8\x80"));
894 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
895 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
896 "\xfc\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000897
898 // Ill-formed 4-byte sequences.
899 // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
900 // U+14yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000901 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
902 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf5"));
903 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
904 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000905 // U+1Cyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000906 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
907 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000908
909 // Ill-formed 5-byte sequences.
910 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
911 // U+20yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000912 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
913 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
914 "\xf8\x88"));
915 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
916 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
917 "\xf8\xbf"));
918 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
919 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
920 "\xf9\x80"));
921 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
922 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
923 "\xfa\x80"));
924 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
925 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
926 "\xfb\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000927 // U+3FCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000928 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
929 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
930 "\xfb\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000931
932 // Ill-formed 6-byte sequences.
933 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
934 // U+400yyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000935 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
936 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
937 "\xfc\x84\x80"));
938 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
939 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
940 "\xfc\xbf\xbf"));
941 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
942 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
943 "\xfd\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000944 // U+7FFCyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000945 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
946 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd, 0xfffd),
947 "\xfd\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000948
949 //
950 // Sequences with four continuation bytes missing
951 //
952
953 // Ill-formed 5-byte sequences.
954 // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
955 // U+uzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000956 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
957 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
958 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
959 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf9"));
960 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
961 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfa"));
962 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
963 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000964 // U+3zyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000965 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
966 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000967
968 // Broken overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000969 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
970 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xf8"));
971 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
972 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
973 "\xfc\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000974
975 // Ill-formed 6-byte sequences.
976 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
977 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000978 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
979 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
980 "\xfc\x84"));
981 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
982 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
983 "\xfc\xbf"));
984 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
985 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
986 "\xfd\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000987 // U+7Fzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000988 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
989 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
990 "\xfd\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +0000991
992 //
993 // Sequences with five continuation bytes missing
994 //
995
996 // Ill-formed 6-byte sequences.
997 // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx
998 // U+uzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +0000999 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1000 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001001 // U+uuzzyyxx (invalid)
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001002 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1003 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd), "\xfd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001004
1005 //
1006 // Consecutive sequences with trailing bytes missing
1007 //
1008
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001009 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1010 ConvertUTFResultContainer(sourceIllegal)
1011 .withScalars(0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1012 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1013 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd)
1014 .withScalars(0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd)
1015 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd)
1016 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1017 "\xc0" "\xe0\x80" "\xf0\x80\x80"
1018 "\xf8\x80\x80\x80"
1019 "\xfc\x80\x80\x80\x80"
1020 "\xdf" "\xef\xbf" "\xf7\xbf\xbf"
1021 "\xfb\xbf\xbf\xbf"
1022 "\xfd\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001023
1024 //
1025 // Overlong UTF-8 sequences
1026 //
1027
1028 // U+002F SOLIDUS
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001029 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1030 ConvertUTFResultContainer(conversionOK).withScalars(0x002f), "\x2f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001031
1032 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001033 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1034 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1035 "\xc0\xaf"));
1036 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1037 ConvertUTFResultContainer(sourceIllegal)
1038 .withScalars(0xfffd, 0xfffd, 0xfffd),
1039 "\xe0\x80\xaf"));
1040 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1041 ConvertUTFResultContainer(sourceIllegal)
1042 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1043 "\xf0\x80\x80\xaf"));
1044 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1045 ConvertUTFResultContainer(sourceIllegal)
1046 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1047 "\xf8\x80\x80\x80\xaf"));
1048 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1049 ConvertUTFResultContainer(sourceIllegal)
1050 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1051 "\xfc\x80\x80\x80\x80\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001052
1053 // U+0000 NULL
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001054 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1055 ConvertUTFResultContainer(conversionOK).withScalars(0x0000),
1056 StringRef("\x00", 1)));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001057
1058 // Overlong sequences of the above.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001059 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1060 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1061 "\xc0\x80"));
1062 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1063 ConvertUTFResultContainer(sourceIllegal)
1064 .withScalars(0xfffd, 0xfffd, 0xfffd),
1065 "\xe0\x80\x80"));
1066 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1067 ConvertUTFResultContainer(sourceIllegal)
1068 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1069 "\xf0\x80\x80\x80"));
1070 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1071 ConvertUTFResultContainer(sourceIllegal)
1072 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1073 "\xf8\x80\x80\x80\x80"));
1074 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1075 ConvertUTFResultContainer(sourceIllegal)
1076 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1077 "\xfc\x80\x80\x80\x80\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001078
1079 // Other overlong sequences.
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001080 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1081 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1082 "\xc0\xbf"));
1083 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1084 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1085 "\xc1\x80"));
1086 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1087 ConvertUTFResultContainer(sourceIllegal).withScalars(0xfffd, 0xfffd),
1088 "\xc1\xbf"));
1089 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1090 ConvertUTFResultContainer(sourceIllegal)
1091 .withScalars(0xfffd, 0xfffd, 0xfffd),
1092 "\xe0\x9f\xbf"));
1093 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1094 ConvertUTFResultContainer(sourceIllegal)
1095 .withScalars(0xfffd, 0xfffd, 0xfffd),
1096 "\xed\xa0\x80"));
1097 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1098 ConvertUTFResultContainer(sourceIllegal)
1099 .withScalars(0xfffd, 0xfffd, 0xfffd),
1100 "\xed\xbf\xbf"));
1101 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1102 ConvertUTFResultContainer(sourceIllegal)
1103 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1104 "\xf0\x8f\x80\x80"));
1105 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1106 ConvertUTFResultContainer(sourceIllegal)
1107 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd),
1108 "\xf0\x8f\xbf\xbf"));
1109 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1110 ConvertUTFResultContainer(sourceIllegal)
1111 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1112 "\xf8\x87\xbf\xbf\xbf"));
1113 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1114 ConvertUTFResultContainer(sourceIllegal)
1115 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1116 "\xfc\x83\xbf\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001117
1118 //
1119 // Isolated surrogates
1120 //
1121
1122 // Unicode 6.3.0:
1123 //
1124 // D71. High-surrogate code point: A Unicode code point in the range
1125 // U+D800 to U+DBFF.
1126 //
1127 // D73. Low-surrogate code point: A Unicode code point in the range
1128 // U+DC00 to U+DFFF.
1129
1130 // Note: U+E0100 is <DB40 DD00> in UTF16.
1131
1132 // High surrogates
1133
1134 // U+D800
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001135 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1136 ConvertUTFResultContainer(sourceIllegal)
1137 .withScalars(0xfffd, 0xfffd, 0xfffd),
1138 "\xed\xa0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001139
1140 // U+DB40
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001141 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1142 ConvertUTFResultContainer(sourceIllegal)
1143 .withScalars(0xfffd, 0xfffd, 0xfffd),
1144 "\xed\xac\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001145
1146 // U+DBFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001147 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1148 ConvertUTFResultContainer(sourceIllegal)
1149 .withScalars(0xfffd, 0xfffd, 0xfffd),
1150 "\xed\xaf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001151
1152 // Low surrogates
1153
1154 // U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001155 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1156 ConvertUTFResultContainer(sourceIllegal)
1157 .withScalars(0xfffd, 0xfffd, 0xfffd),
1158 "\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001159
1160 // U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001161 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1162 ConvertUTFResultContainer(sourceIllegal)
1163 .withScalars(0xfffd, 0xfffd, 0xfffd),
1164 "\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001165
1166 // U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001167 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1168 ConvertUTFResultContainer(sourceIllegal)
1169 .withScalars(0xfffd, 0xfffd, 0xfffd),
1170 "\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001171
1172 // Surrogate pairs
1173
1174 // U+D800 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001175 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1176 ConvertUTFResultContainer(sourceIllegal)
1177 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1178 "\xed\xa0\x80\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001179
1180 // U+D800 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001181 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1182 ConvertUTFResultContainer(sourceIllegal)
1183 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1184 "\xed\xa0\x80\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001185
1186 // U+D800 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001187 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1188 ConvertUTFResultContainer(sourceIllegal)
1189 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1190 "\xed\xa0\x80\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001191
1192 // U+DB40 U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001193 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1194 ConvertUTFResultContainer(sourceIllegal)
1195 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1196 "\xed\xac\xa0\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001197
1198 // U+DB40 U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001199 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1200 ConvertUTFResultContainer(sourceIllegal)
1201 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1202 "\xed\xac\xa0\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001203
1204 // U+DB40 U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001205 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1206 ConvertUTFResultContainer(sourceIllegal)
1207 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1208 "\xed\xac\xa0\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001209
1210 // U+DBFF U+DC00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001211 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1212 ConvertUTFResultContainer(sourceIllegal)
1213 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1214 "\xed\xaf\xbf\xed\xb0\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001215
1216 // U+DBFF U+DD00
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001217 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1218 ConvertUTFResultContainer(sourceIllegal)
1219 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1220 "\xed\xaf\xbf\xed\xb4\x80"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001221
1222 // U+DBFF U+DFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001223 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1224 ConvertUTFResultContainer(sourceIllegal)
1225 .withScalars(0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd),
1226 "\xed\xaf\xbf\xed\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001227
1228 //
1229 // Noncharacters
1230 //
1231
1232 // Unicode 6.3.0:
1233 //
1234 // D14. Noncharacter: A code point that is permanently reserved for
1235 // internal use and that should never be interchanged. Noncharacters
1236 // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016)
1237 // and the values U+FDD0..U+FDEF.
1238
1239 // U+FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001240 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1241 ConvertUTFResultContainer(conversionOK).withScalars(0xfffe),
1242 "\xef\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001243
1244 // U+FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001245 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1246 ConvertUTFResultContainer(conversionOK).withScalars(0xffff),
1247 "\xef\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001248
1249 // U+1FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001250 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1251 ConvertUTFResultContainer(conversionOK).withScalars(0x1fffe),
1252 "\xf0\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001253
1254 // U+1FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001255 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1256 ConvertUTFResultContainer(conversionOK).withScalars(0x1ffff),
1257 "\xf0\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001258
1259 // U+2FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001260 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1261 ConvertUTFResultContainer(conversionOK).withScalars(0x2fffe),
1262 "\xf0\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001263
1264 // U+2FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001265 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1266 ConvertUTFResultContainer(conversionOK).withScalars(0x2ffff),
1267 "\xf0\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001268
1269 // U+3FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001270 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1271 ConvertUTFResultContainer(conversionOK).withScalars(0x3fffe),
1272 "\xf0\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001273
1274 // U+3FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001275 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1276 ConvertUTFResultContainer(conversionOK).withScalars(0x3ffff),
1277 "\xf0\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001278
1279 // U+4FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001280 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1281 ConvertUTFResultContainer(conversionOK).withScalars(0x4fffe),
1282 "\xf1\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001283
1284 // U+4FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001285 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1286 ConvertUTFResultContainer(conversionOK).withScalars(0x4ffff),
1287 "\xf1\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001288
1289 // U+5FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001290 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1291 ConvertUTFResultContainer(conversionOK).withScalars(0x5fffe),
1292 "\xf1\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001293
1294 // U+5FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001295 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1296 ConvertUTFResultContainer(conversionOK).withScalars(0x5ffff),
1297 "\xf1\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001298
1299 // U+6FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001300 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1301 ConvertUTFResultContainer(conversionOK).withScalars(0x6fffe),
1302 "\xf1\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001303
1304 // U+6FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001305 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1306 ConvertUTFResultContainer(conversionOK).withScalars(0x6ffff),
1307 "\xf1\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001308
1309 // U+7FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001310 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1311 ConvertUTFResultContainer(conversionOK).withScalars(0x7fffe),
1312 "\xf1\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001313
1314 // U+7FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001315 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1316 ConvertUTFResultContainer(conversionOK).withScalars(0x7ffff),
1317 "\xf1\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001318
1319 // U+8FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001320 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1321 ConvertUTFResultContainer(conversionOK).withScalars(0x8fffe),
1322 "\xf2\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001323
1324 // U+8FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001325 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1326 ConvertUTFResultContainer(conversionOK).withScalars(0x8ffff),
1327 "\xf2\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001328
1329 // U+9FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001330 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1331 ConvertUTFResultContainer(conversionOK).withScalars(0x9fffe),
1332 "\xf2\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001333
1334 // U+9FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001335 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1336 ConvertUTFResultContainer(conversionOK).withScalars(0x9ffff),
1337 "\xf2\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001338
1339 // U+AFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001340 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1341 ConvertUTFResultContainer(conversionOK).withScalars(0xafffe),
1342 "\xf2\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001343
1344 // U+AFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001345 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1346 ConvertUTFResultContainer(conversionOK).withScalars(0xaffff),
1347 "\xf2\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001348
1349 // U+BFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001350 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1351 ConvertUTFResultContainer(conversionOK).withScalars(0xbfffe),
1352 "\xf2\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001353
1354 // U+BFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001355 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1356 ConvertUTFResultContainer(conversionOK).withScalars(0xbffff),
1357 "\xf2\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001358
1359 // U+CFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001360 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1361 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffe),
1362 "\xf3\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001363
1364 // U+CFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001365 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1366 ConvertUTFResultContainer(conversionOK).withScalars(0xcfffF),
1367 "\xf3\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001368
1369 // U+DFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001370 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1371 ConvertUTFResultContainer(conversionOK).withScalars(0xdfffe),
1372 "\xf3\x9f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001373
1374 // U+DFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001375 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1376 ConvertUTFResultContainer(conversionOK).withScalars(0xdffff),
1377 "\xf3\x9f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001378
1379 // U+EFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001380 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1381 ConvertUTFResultContainer(conversionOK).withScalars(0xefffe),
1382 "\xf3\xaf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001383
1384 // U+EFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001385 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1386 ConvertUTFResultContainer(conversionOK).withScalars(0xeffff),
1387 "\xf3\xaf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001388
1389 // U+FFFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001390 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1391 ConvertUTFResultContainer(conversionOK).withScalars(0xffffe),
1392 "\xf3\xbf\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001393
1394 // U+FFFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001395 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1396 ConvertUTFResultContainer(conversionOK).withScalars(0xfffff),
1397 "\xf3\xbf\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001398
1399 // U+10FFFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001400 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1401 ConvertUTFResultContainer(conversionOK).withScalars(0x10fffe),
1402 "\xf4\x8f\xbf\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001403
1404 // U+10FFFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001405 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1406 ConvertUTFResultContainer(conversionOK).withScalars(0x10ffff),
1407 "\xf4\x8f\xbf\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001408
1409 // U+FDD0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001410 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1411 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd0),
1412 "\xef\xb7\x90"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001413
1414 // U+FDD1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001415 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1416 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd1),
1417 "\xef\xb7\x91"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001418
1419 // U+FDD2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001420 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1421 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd2),
1422 "\xef\xb7\x92"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001423
1424 // U+FDD3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001425 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1426 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd3),
1427 "\xef\xb7\x93"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001428
1429 // U+FDD4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001430 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1431 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd4),
1432 "\xef\xb7\x94"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001433
1434 // U+FDD5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001435 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1436 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd5),
1437 "\xef\xb7\x95"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001438
1439 // U+FDD6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001440 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1441 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd6),
1442 "\xef\xb7\x96"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001443
1444 // U+FDD7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001445 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1446 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd7),
1447 "\xef\xb7\x97"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001448
1449 // U+FDD8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001450 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1451 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd8),
1452 "\xef\xb7\x98"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001453
1454 // U+FDD9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001455 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1456 ConvertUTFResultContainer(conversionOK).withScalars(0xfdd9),
1457 "\xef\xb7\x99"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001458
1459 // U+FDDA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001460 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1461 ConvertUTFResultContainer(conversionOK).withScalars(0xfdda),
1462 "\xef\xb7\x9a"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001463
1464 // U+FDDB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001465 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1466 ConvertUTFResultContainer(conversionOK).withScalars(0xfddb),
1467 "\xef\xb7\x9b"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001468
1469 // U+FDDC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001470 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1471 ConvertUTFResultContainer(conversionOK).withScalars(0xfddc),
1472 "\xef\xb7\x9c"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001473
1474 // U+FDDD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001475 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1476 ConvertUTFResultContainer(conversionOK).withScalars(0xfddd),
1477 "\xef\xb7\x9d"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001478
1479 // U+FDDE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001480 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1481 ConvertUTFResultContainer(conversionOK).withScalars(0xfdde),
1482 "\xef\xb7\x9e"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001483
1484 // U+FDDF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001485 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1486 ConvertUTFResultContainer(conversionOK).withScalars(0xfddf),
1487 "\xef\xb7\x9f"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001488
1489 // U+FDE0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001490 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1491 ConvertUTFResultContainer(conversionOK).withScalars(0xfde0),
1492 "\xef\xb7\xa0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001493
1494 // U+FDE1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001495 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1496 ConvertUTFResultContainer(conversionOK).withScalars(0xfde1),
1497 "\xef\xb7\xa1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001498
1499 // U+FDE2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001500 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1501 ConvertUTFResultContainer(conversionOK).withScalars(0xfde2),
1502 "\xef\xb7\xa2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001503
1504 // U+FDE3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001505 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1506 ConvertUTFResultContainer(conversionOK).withScalars(0xfde3),
1507 "\xef\xb7\xa3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001508
1509 // U+FDE4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001510 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1511 ConvertUTFResultContainer(conversionOK).withScalars(0xfde4),
1512 "\xef\xb7\xa4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001513
1514 // U+FDE5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001515 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1516 ConvertUTFResultContainer(conversionOK).withScalars(0xfde5),
1517 "\xef\xb7\xa5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001518
1519 // U+FDE6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001520 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1521 ConvertUTFResultContainer(conversionOK).withScalars(0xfde6),
1522 "\xef\xb7\xa6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001523
1524 // U+FDE7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001525 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1526 ConvertUTFResultContainer(conversionOK).withScalars(0xfde7),
1527 "\xef\xb7\xa7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001528
1529 // U+FDE8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001530 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1531 ConvertUTFResultContainer(conversionOK).withScalars(0xfde8),
1532 "\xef\xb7\xa8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001533
1534 // U+FDE9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001535 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1536 ConvertUTFResultContainer(conversionOK).withScalars(0xfde9),
1537 "\xef\xb7\xa9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001538
1539 // U+FDEA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001540 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1541 ConvertUTFResultContainer(conversionOK).withScalars(0xfdea),
1542 "\xef\xb7\xaa"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001543
1544 // U+FDEB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001545 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1546 ConvertUTFResultContainer(conversionOK).withScalars(0xfdeb),
1547 "\xef\xb7\xab"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001548
1549 // U+FDEC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001550 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1551 ConvertUTFResultContainer(conversionOK).withScalars(0xfdec),
1552 "\xef\xb7\xac"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001553
1554 // U+FDED
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001555 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1556 ConvertUTFResultContainer(conversionOK).withScalars(0xfded),
1557 "\xef\xb7\xad"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001558
1559 // U+FDEE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001560 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1561 ConvertUTFResultContainer(conversionOK).withScalars(0xfdee),
1562 "\xef\xb7\xae"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001563
1564 // U+FDEF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001565 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1566 ConvertUTFResultContainer(conversionOK).withScalars(0xfdef),
1567 "\xef\xb7\xaf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001568
1569 // U+FDF0
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001570 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1571 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf0),
1572 "\xef\xb7\xb0"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001573
1574 // U+FDF1
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001575 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1576 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf1),
1577 "\xef\xb7\xb1"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001578
1579 // U+FDF2
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001580 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1581 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf2),
1582 "\xef\xb7\xb2"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001583
1584 // U+FDF3
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001585 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1586 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf3),
1587 "\xef\xb7\xb3"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001588
1589 // U+FDF4
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001590 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1591 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf4),
1592 "\xef\xb7\xb4"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001593
1594 // U+FDF5
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001595 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1596 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf5),
1597 "\xef\xb7\xb5"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001598
1599 // U+FDF6
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001600 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1601 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf6),
1602 "\xef\xb7\xb6"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001603
1604 // U+FDF7
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001605 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1606 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf7),
1607 "\xef\xb7\xb7"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001608
1609 // U+FDF8
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001610 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1611 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf8),
1612 "\xef\xb7\xb8"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001613
1614 // U+FDF9
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001615 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1616 ConvertUTFResultContainer(conversionOK).withScalars(0xfdf9),
1617 "\xef\xb7\xb9"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001618
1619 // U+FDFA
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001620 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1621 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfa),
1622 "\xef\xb7\xba"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001623
1624 // U+FDFB
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001625 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1626 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfb),
1627 "\xef\xb7\xbb"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001628
1629 // U+FDFC
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001630 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1631 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfc),
1632 "\xef\xb7\xbc"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001633
1634 // U+FDFD
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001635 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1636 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfd),
1637 "\xef\xb7\xbd"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001638
1639 // U+FDFE
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001640 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1641 ConvertUTFResultContainer(conversionOK).withScalars(0xfdfe),
1642 "\xef\xb7\xbe"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001643
1644 // U+FDFF
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001645 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1646 ConvertUTFResultContainer(conversionOK).withScalars(0xfdff),
1647 "\xef\xb7\xbf"));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001648}
1649
1650TEST(ConvertUTFTest, UTF8ToUTF32PartialLenient) {
1651 // U+0041 LATIN CAPITAL LETTER A
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001652 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1653 ConvertUTFResultContainer(conversionOK).withScalars(0x0041),
1654 "\x41", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001655
1656 //
1657 // Sequences with one continuation byte missing
1658 //
1659
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001660 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1661 ConvertUTFResultContainer(sourceExhausted),
1662 "\xc2", true));
1663 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1664 ConvertUTFResultContainer(sourceExhausted),
1665 "\xdf", true));
1666 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1667 ConvertUTFResultContainer(sourceExhausted),
1668 "\xe0\xa0", true));
1669 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1670 ConvertUTFResultContainer(sourceExhausted),
1671 "\xe0\xbf", true));
1672 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1673 ConvertUTFResultContainer(sourceExhausted),
1674 "\xe1\x80", true));
1675 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1676 ConvertUTFResultContainer(sourceExhausted),
1677 "\xec\xbf", true));
1678 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1679 ConvertUTFResultContainer(sourceExhausted),
1680 "\xed\x80", true));
1681 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1682 ConvertUTFResultContainer(sourceExhausted),
1683 "\xed\x9f", true));
1684 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1685 ConvertUTFResultContainer(sourceExhausted),
1686 "\xee\x80", true));
1687 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1688 ConvertUTFResultContainer(sourceExhausted),
1689 "\xef\xbf", true));
1690 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1691 ConvertUTFResultContainer(sourceExhausted),
1692 "\xf0\x90\x80", true));
1693 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1694 ConvertUTFResultContainer(sourceExhausted),
1695 "\xf0\xbf\xbf", true));
1696 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1697 ConvertUTFResultContainer(sourceExhausted),
1698 "\xf1\x80\x80", true));
1699 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1700 ConvertUTFResultContainer(sourceExhausted),
1701 "\xf3\xbf\xbf", true));
1702 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1703 ConvertUTFResultContainer(sourceExhausted),
1704 "\xf4\x80\x80", true));
1705 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1706 ConvertUTFResultContainer(sourceExhausted),
1707 "\xf4\x8f\xbf", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001708
Dmitri Gribenkoebdd0a52014-06-17 09:33:24 +00001709 EXPECT_TRUE(CheckConvertUTF8ToUnicodeScalars(
1710 ConvertUTFResultContainer(sourceExhausted).withScalars(0x0041),
1711 "\x41\xc2", true));
Dmitri Gribenko1089db02014-06-16 11:09:46 +00001712}
1713